diff options
| author | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:31:46 -0800 |
|---|---|---|
| committer | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:46:31 -0800 |
| commit | f56bb35301836e56582a575a75864392a0177875 (patch) | |
| tree | de61ddd39de3e7df52759711950b4c288592f0dc /mp/src/public/mathlib | |
| parent | Mark some more files as text. (diff) | |
| download | source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip | |
Fix line endings. WHAMMY.
Diffstat (limited to 'mp/src/public/mathlib')
23 files changed, 13790 insertions, 13790 deletions
diff --git a/mp/src/public/mathlib/amd3dx.h b/mp/src/public/mathlib/amd3dx.h index 05eb663e..9dab1bfd 100644 --- a/mp/src/public/mathlib/amd3dx.h +++ b/mp/src/public/mathlib/amd3dx.h @@ -1,1188 +1,1188 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-/******************************************************************************
-
- Copyright (c) 1999 Advanced Micro Devices, Inc.
-
- LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
- EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
- NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
- PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
- DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
- BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
- INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
- OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
- OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
- NOT APPLY TO YOU.
-
- AMD does not assume any responsibility for any errors which may appear in the
- Materials nor any responsibility to support or update the Materials. AMD retains
- the right to make changes to its test specifications at any time, without notice.
-
- NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
- further information, software, technical information, know-how, or show-how
- available to you.
-
- So that all may benefit from your experience, please report any problems
- or suggestions about this software to [email protected]
-
- AMD Developer Technologies, M/S 585
- Advanced Micro Devices, Inc.
- 5900 E. Ben White Blvd.
- Austin, TX 78741
-
-*******************************************************************************
-
- AMD3DX.H
-
- MACRO FORMAT
- ============
- This file contains inline assembly macros that
- generate AMD-3D instructions in binary format.
- Therefore, C or C++ programmer can use AMD-3D instructions
- without any penalty in their C or C++ source code.
-
- The macro's name and format conventions are as follow:
-
-
- 1. First argument of macro is a destination and
- second argument is a source operand.
- ex) _asm PFCMPEQ (mm3, mm4)
- | |
- dst src
-
- 2. The destination operand can be m0 to m7 only.
- The source operand can be any one of the register
- m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi
- that contains effective address.
- ex) _asm PFRCP (MM7, MM6)
- ex) _asm PFRCPIT2 (mm0, mm4)
- ex) _asm PFMUL (mm3, _edi)
-
- 3. The prefetch(w) takes one src operand _eax, ecx, _edx,
- _ebx, _esi, or _edi that contains effective address.
- ex) _asm PREFETCH (_edi)
-
- For WATCOM C/C++ users, when using #pragma aux instead if
- _asm, all macro names should be prefixed by a p_ or P_.
- Macros should not be enclosed in quotes.
- ex) p_pfrcp (MM7,MM6)
-
- NOTE: Not all instruction macros, nor all possible
- combinations of operands have been explicitely
- tested. If any errors are found, please report
- them.
-
- EXAMPLE
- =======
- Following program doesn't do anything but it shows you
- how to use inline assembly AMD-3D instructions in C.
- Note that this will only work in flat memory model which
- segment registers cs, ds, ss and es point to the same
- linear address space total less than 4GB.
-
- Used Microsoft VC++ 5.0
-
- #include <stdio.h>
- #include "amd3d.h"
-
- void main ()
- {
- float x = (float)1.25;
- float y = (float)1.25;
- float z, zz;
-
- _asm {
- movd mm1, x
- movd mm2, y
- pfmul (mm1, mm2)
- movd z, mm1
- femms
- }
-
- printf ("value of z = %f\n", z);
-
- //
- // Demonstration of using the memory instead of
- // multimedia register
- //
- _asm {
- movd mm3, x
- lea esi, y // load effective address of y
- pfmul (mm3, _esi)
- movd zz, mm3
- femms
- }
-
- printf ("value of zz = %f\n", zz);
- }
-
- #pragma aux EXAMPLE with WATCOM C/C++ v11.x
- ===========================================
-
- extern void Add(float *__Dest, float *__A, float *__B);
- #pragma aux Add = \
- p_femms \
- "movd mm6,[esi]" \
- p_pfadd(mm6,_edi) \
- "movd [ebx],mm6" \
- p_femms \
- parm [ebx] [esi] [edi];
-
-*******************************************************************************/
-
-#ifndef _K3DMACROSINCLUDED_
-#define _K3DMACROSINCLUDED_
-
-#if defined (__WATCOMC__)
-
-// The WATCOM C/C++ version of the 3DNow! macros.
-//
-// The older, compbined register style for WATCOM C/C++ macros is not
-// supported.
-
-/* Operand defines for instructions two operands */
-#define _k3d_mm0_mm0 0xc0
-#define _k3d_mm0_mm1 0xc1
-#define _k3d_mm0_mm2 0xc2
-#define _k3d_mm0_mm3 0xc3
-#define _k3d_mm0_mm4 0xc4
-#define _k3d_mm0_mm5 0xc5
-#define _k3d_mm0_mm6 0xc6
-#define _k3d_mm0_mm7 0xc7
-#define _k3d_mm0_eax 0x00
-#define _k3d_mm0_ecx 0x01
-#define _k3d_mm0_edx 0x02
-#define _k3d_mm0_ebx 0x03
-#define _k3d_mm0_esi 0x06
-#define _k3d_mm0_edi 0x07
-#define _k3d_mm1_mm0 0xc8
-#define _k3d_mm1_mm1 0xc9
-#define _k3d_mm1_mm2 0xca
-#define _k3d_mm1_mm3 0xcb
-#define _k3d_mm1_mm4 0xcc
-#define _k3d_mm1_mm5 0xcd
-#define _k3d_mm1_mm6 0xce
-#define _k3d_mm1_mm7 0xcf
-#define _k3d_mm1_eax 0x08
-#define _k3d_mm1_ecx 0x09
-#define _k3d_mm1_edx 0x0a
-#define _k3d_mm1_ebx 0x0b
-#define _k3d_mm1_esi 0x0e
-#define _k3d_mm1_edi 0x0f
-#define _k3d_mm2_mm0 0xd0
-#define _k3d_mm2_mm1 0xd1
-#define _k3d_mm2_mm2 0xd2
-#define _k3d_mm2_mm3 0xd3
-#define _k3d_mm2_mm4 0xd4
-#define _k3d_mm2_mm5 0xd5
-#define _k3d_mm2_mm6 0xd6
-#define _k3d_mm2_mm7 0xd7
-#define _k3d_mm2_eax 0x10
-#define _k3d_mm2_ecx 0x11
-#define _k3d_mm2_edx 0x12
-#define _k3d_mm2_ebx 0x13
-#define _k3d_mm2_esi 0x16
-#define _k3d_mm2_edi 0x17
-#define _k3d_mm3_mm0 0xd8
-#define _k3d_mm3_mm1 0xd9
-#define _k3d_mm3_mm2 0xda
-#define _k3d_mm3_mm3 0xdb
-#define _k3d_mm3_mm4 0xdc
-#define _k3d_mm3_mm5 0xdd
-#define _k3d_mm3_mm6 0xde
-#define _k3d_mm3_mm7 0xdf
-#define _k3d_mm3_eax 0x18
-#define _k3d_mm3_ecx 0x19
-#define _k3d_mm3_edx 0x1a
-#define _k3d_mm3_ebx 0x1b
-#define _k3d_mm3_esi 0x1e
-#define _k3d_mm3_edi 0x1f
-#define _k3d_mm4_mm0 0xe0
-#define _k3d_mm4_mm1 0xe1
-#define _k3d_mm4_mm2 0xe2
-#define _k3d_mm4_mm3 0xe3
-#define _k3d_mm4_mm4 0xe4
-#define _k3d_mm4_mm5 0xe5
-#define _k3d_mm4_mm6 0xe6
-#define _k3d_mm4_mm7 0xe7
-#define _k3d_mm4_eax 0x20
-#define _k3d_mm4_ecx 0x21
-#define _k3d_mm4_edx 0x22
-#define _k3d_mm4_ebx 0x23
-#define _k3d_mm4_esi 0x26
-#define _k3d_mm4_edi 0x27
-#define _k3d_mm5_mm0 0xe8
-#define _k3d_mm5_mm1 0xe9
-#define _k3d_mm5_mm2 0xea
-#define _k3d_mm5_mm3 0xeb
-#define _k3d_mm5_mm4 0xec
-#define _k3d_mm5_mm5 0xed
-#define _k3d_mm5_mm6 0xee
-#define _k3d_mm5_mm7 0xef
-#define _k3d_mm5_eax 0x28
-#define _k3d_mm5_ecx 0x29
-#define _k3d_mm5_edx 0x2a
-#define _k3d_mm5_ebx 0x2b
-#define _k3d_mm5_esi 0x2e
-#define _k3d_mm5_edi 0x2f
-#define _k3d_mm6_mm0 0xf0
-#define _k3d_mm6_mm1 0xf1
-#define _k3d_mm6_mm2 0xf2
-#define _k3d_mm6_mm3 0xf3
-#define _k3d_mm6_mm4 0xf4
-#define _k3d_mm6_mm5 0xf5
-#define _k3d_mm6_mm6 0xf6
-#define _k3d_mm6_mm7 0xf7
-#define _k3d_mm6_eax 0x30
-#define _k3d_mm6_ecx 0x31
-#define _k3d_mm6_edx 0x32
-#define _k3d_mm6_ebx 0x33
-#define _k3d_mm6_esi 0x36
-#define _k3d_mm6_edi 0x37
-#define _k3d_mm7_mm0 0xf8
-#define _k3d_mm7_mm1 0xf9
-#define _k3d_mm7_mm2 0xfa
-#define _k3d_mm7_mm3 0xfb
-#define _k3d_mm7_mm4 0xfc
-#define _k3d_mm7_mm5 0xfd
-#define _k3d_mm7_mm6 0xfe
-#define _k3d_mm7_mm7 0xff
-#define _k3d_mm7_eax 0x38
-#define _k3d_mm7_ecx 0x39
-#define _k3d_mm7_edx 0x3a
-#define _k3d_mm7_ebx 0x3b
-#define _k3d_mm7_esi 0x3e
-#define _k3d_mm7_edi 0x3f
-
-#define _k3d_name_xlat_m0 _mm0
-#define _k3d_name_xlat_m1 _mm1
-#define _k3d_name_xlat_m2 _mm2
-#define _k3d_name_xlat_m3 _mm3
-#define _k3d_name_xlat_m4 _mm4
-#define _k3d_name_xlat_m5 _mm5
-#define _k3d_name_xlat_m6 _mm6
-#define _k3d_name_xlat_m7 _mm7
-#define _k3d_name_xlat_M0 _mm0
-#define _k3d_name_xlat_M1 _mm1
-#define _k3d_name_xlat_M2 _mm2
-#define _k3d_name_xlat_M3 _mm3
-#define _k3d_name_xlat_M4 _mm4
-#define _k3d_name_xlat_M5 _mm5
-#define _k3d_name_xlat_M6 _mm6
-#define _k3d_name_xlat_M7 _mm7
-#define _k3d_name_xlat_mm0 _mm0
-#define _k3d_name_xlat_mm1 _mm1
-#define _k3d_name_xlat_mm2 _mm2
-#define _k3d_name_xlat_mm3 _mm3
-#define _k3d_name_xlat_mm4 _mm4
-#define _k3d_name_xlat_mm5 _mm5
-#define _k3d_name_xlat_mm6 _mm6
-#define _k3d_name_xlat_mm7 _mm7
-#define _k3d_name_xlat_MM0 _mm0
-#define _k3d_name_xlat_MM1 _mm1
-#define _k3d_name_xlat_MM2 _mm2
-#define _k3d_name_xlat_MM3 _mm3
-#define _k3d_name_xlat_MM4 _mm4
-#define _k3d_name_xlat_MM5 _mm5
-#define _k3d_name_xlat_MM6 _mm6
-#define _k3d_name_xlat_MM7 _mm7
-#define _k3d_name_xlat_eax _eax
-#define _k3d_name_xlat_ebx _ebx
-#define _k3d_name_xlat_ecx _ecx
-#define _k3d_name_xlat_edx _edx
-#define _k3d_name_xlat_esi _esi
-#define _k3d_name_xlat_edi _edi
-#define _k3d_name_xlat_ebp _ebp
-#define _k3d_name_xlat_EAX _eax
-#define _k3d_name_xlat_EBX _ebx
-#define _k3d_name_xlat_ECX _ecx
-#define _k3d_name_xlat_EDX _edx
-#define _k3d_name_xlat_ESI _esi
-#define _k3d_name_xlat_EDI _edi
-#define _k3d_name_xlat_EBP _ebp
-#define _k3d_name_xlat__eax _eax
-#define _k3d_name_xlat__ebx _ebx
-#define _k3d_name_xlat__ecx _ecx
-#define _k3d_name_xlat__edx _edx
-#define _k3d_name_xlat__esi _esi
-#define _k3d_name_xlat__edi _edi
-#define _k3d_name_xlat__ebp _ebp
-#define _k3d_name_xlat__EAX _eax
-#define _k3d_name_xlat__EBX _ebx
-#define _k3d_name_xlat__ECX _ecx
-#define _k3d_name_xlat__EDX _edx
-#define _k3d_name_xlat__ESI _esi
-#define _k3d_name_xlat__EDI _edi
-#define _k3d_name_xlat__EBP _ebp
-
-#define _k3d_xglue3(a,b,c) a##b##c
-#define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c)
-#define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src)
-
-/* Operand defines for prefetch and prefetchw */
-
-#define _k3d_pref_eax 0x00
-#define _k3d_pref_ecx 0x01
-#define _k3d_pref_edx 0x02
-#define _k3d_pref_ebx 0x03
-#define _k3d_pref_esi 0x06
-#define _k3d_pref_edi 0x07
-#define _k3d_pref_EAX 0x00
-#define _k3d_pref_ECX 0x01
-#define _k3d_pref_EDX 0x02
-#define _k3d_pref_EBX 0x03
-#define _k3d_pref_ESI 0x06
-#define _k3d_pref_EDI 0x07
-#define _k3d_prefw_eax 0x08
-#define _k3d_prefw_ecx 0x09
-#define _k3d_prefw_edx 0x0A
-#define _k3d_prefw_ebx 0x0B
-#define _k3d_prefw_esi 0x0E
-#define _k3d_prefw_edi 0x0F
-#define _k3d_prefw_EAX 0x08
-#define _k3d_prefw_ECX 0x09
-#define _k3d_prefw_EDX 0x0A
-#define _k3d_prefw_EBX 0x0B
-#define _k3d_prefw_ESI 0x0E
-#define _k3d_prefw_EDI 0x0F
-
-/* Defines for 3DNow! instructions */
-#define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d
-#define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae
-#define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e
-#define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0
-#define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90
-#define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0
-#define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4
-#define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94
-#define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4
-#define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96
-#define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6
-#define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6
-#define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97
-#define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7
-#define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a
-#define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa
-#define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d
-#define FEMMS db 0x0f, 0x0e
-#define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf
-#define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7
-#define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src
-#define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src
-#define CPUID db 0x0f, 0xa2
-
-/* Defines for new, K7 opcodes */
-#define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a
-#define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e
-#define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb
-#define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src)
-#define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src)
-#define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src)
-#define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src)
-#define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src)
-#define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src)
-#define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src)
-#define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src)
-#define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src)
-#define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src)
-#define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk
-#define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk
-#define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk
-#define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst)
-#define SFENCE db 0x0f, 0xae, 0xf8
-
-/* Memory/offset versions of the opcodes */
-#define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d
-#define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae
-#define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e
-#define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0
-#define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90
-#define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0
-#define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4
-#define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94
-#define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4
-#define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96
-#define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6
-#define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6
-#define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97
-#define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7
-#define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a
-#define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa
-#define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d
-#define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf
-#define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7
-
-
-/* Memory/offset versions of the new, K7 opcodes */
-#define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a
-#define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e
-#define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb
-#define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off
-#define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off
-#define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off
-#define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off
-#define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off
-#define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off
-#define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off
-#define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off
-#define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off
-#define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off
-#define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off
-#define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk
-#define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk
-
-
-/* Defines for 3DNow! instructions for use in pragmas */
-#define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d
-#define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae
-#define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e
-#define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0
-#define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90
-#define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0
-#define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4
-#define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94
-#define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4
-#define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96
-#define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6
-#define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6
-#define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97
-#define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7
-#define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a
-#define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa
-#define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d
-#define p_femms 0x0f 0x0e
-#define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf
-#define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7
-#define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src
-#define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src
-#define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
-#define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
-#define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
-#define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
-
-#define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d
-#define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae
-#define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e
-#define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0
-#define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90
-#define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0
-#define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4
-#define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94
-#define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4
-#define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96
-#define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6
-#define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6
-#define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97
-#define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7
-#define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a
-#define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa
-#define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d
-#define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf
-#define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7
-#define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
-#define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
-#define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
-#define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
-#define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
-
-
-#define P_PF2ID(dst,src) p_pf2id(dst,src)
-#define P_PFACC(dst,src) p_pfacc(dst,src)
-#define P_PFADD(dst,src) p_pfadd(dst,src)
-#define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src)
-#define P_PFCMPGE(dst,src) p_pfcmpge(dst,src)
-#define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src)
-#define P_PFMAX(dst,src) p_pfmax(dst,src)
-#define P_PFMIN(dst,src) p_pfmin(dst,src)
-#define P_PFMUL(dst,src) p_pfmul(dst,src)
-#define P_PFRCP(dst,src) p_pfrcp(dst,src)
-#define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src)
-#define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src)
-#define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src)
-#define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src)
-#define P_PFSUB(dst,src) p_pfsub(dst,src)
-#define P_PFSUBR(dst,src) p_pfsubr(dst,src)
-#define P_PI2FD(dst,src) p_pi2fd(dst,src)
-#define P_FEMMS p_femms
-#define P_PAVGUSB(dst,src) p_pavgusb(dst,src)
-#define P_PMULHRW(dst,src) p_pmulhrw(dst,src)
-#define P_PREFETCH(src) p_prefetch(src)
-#define P_PREFETCHW(src) p_prefetchw(src)
-#define p_CPUID 0x0f 0xa2
-#define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off)
-#define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off)
-#define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off)
-#define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off)
-#define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off)
-#define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off)
-#define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off)
-#define p_pfminm(dst,src,off) P_PFMINM(dst,src,off)
-#define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off)
-#define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off)
-#define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off)
-#define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off)
-#define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off)
-#define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off)
-#define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off)
-#define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off)
-#define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off)
-#define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off)
-#define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off)
-
-#define P_PFNACC(dst,src) p_pfnacc(dst,src)
-#define P_FPPNACC(dst,src) p_pfpnacc(dst,src)
-#define P_PSWAPD(dst,src) p_pswapd(dst,src)
-#define P_PMINUB(dst,src) p_pminub(dst,src)
-#define P_PMAXUB(dst,src) p_pmaxub(dst,src)
-#define P_PMINSW(dst,src) p_pminsw(dst,src)
-#define P_PMAXSW(dst,src) p_pmaxsw(dst,src)
-#define P_PMULHUW(dst,src) p_pmulhuw(dst,src)
-#define P_PAVGB(dst,src) p_pavgb(dst,src)
-#define P_PAVGW(dst,src) p_avgw(dst,src)
-#define P_PSADBW(dst,src) p_psadbw(dst,src)
-#define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src)
-#define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src)
-#define P_PINSRW(dst,src,msk) p_pinsrw(dst,src)
-#define P_PEXTRW(dst,src,msk) p_pextrw(dst,src)
-#define P_PSHUFW(dst,src,msk) p_pshufw(dst,src)
-#define P_MOVNTQ(dst,src) p_movntq(dst,src)
-
-#define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off)
-#define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off)
-#define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off)
-#define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off)
-#define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off)
-#define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off)
-#define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off)
-#define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off)
-#define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off)
-#define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off)
-#define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off)
-#define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off)
-#define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off)
-#define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk)
-#define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk)
-#define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off)
-
-#elif defined (_MSC_VER) && !defined (__MWERKS__)
-// The Microsoft Visual C++ version of the 3DNow! macros.
-
-// Stop the "no EMMS" warning, since it doesn't detect FEMMS properly
-#pragma warning(disable:4799)
-
-// Defines for operands.
-#define _K3D_MM0 0xc0
-#define _K3D_MM1 0xc1
-#define _K3D_MM2 0xc2
-#define _K3D_MM3 0xc3
-#define _K3D_MM4 0xc4
-#define _K3D_MM5 0xc5
-#define _K3D_MM6 0xc6
-#define _K3D_MM7 0xc7
-#define _K3D_mm0 0xc0
-#define _K3D_mm1 0xc1
-#define _K3D_mm2 0xc2
-#define _K3D_mm3 0xc3
-#define _K3D_mm4 0xc4
-#define _K3D_mm5 0xc5
-#define _K3D_mm6 0xc6
-#define _K3D_mm7 0xc7
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-
-// These defines are for compatibility with the previous version of the header file.
-#define _K3D_M0 0xc0
-#define _K3D_M1 0xc1
-#define _K3D_M2 0xc2
-#define _K3D_M3 0xc3
-#define _K3D_M4 0xc4
-#define _K3D_M5 0xc5
-#define _K3D_M6 0xc6
-#define _K3D_M7 0xc7
-#define _K3D_m0 0xc0
-#define _K3D_m1 0xc1
-#define _K3D_m2 0xc2
-#define _K3D_m3 0xc3
-#define _K3D_m4 0xc4
-#define _K3D_m5 0xc5
-#define _K3D_m6 0xc6
-#define _K3D_m7 0xc7
-#define _K3D__EAX 0x00
-#define _K3D__ECX 0x01
-#define _K3D__EDX 0x02
-#define _K3D__EBX 0x03
-#define _K3D__ESI 0x06
-#define _K3D__EDI 0x07
-#define _K3D__eax 0x00
-#define _K3D__ecx 0x01
-#define _K3D__edx 0x02
-#define _K3D__ebx 0x03
-#define _K3D__esi 0x06
-#define _K3D__edi 0x07
-
-// General 3DNow! instruction format that is supported by
-// these macros. Note that only the most basic form of memory
-// operands are supported by these macros.
-
-#define InjK3DOps(dst,src,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0f \
- _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
- _asm _emit _3DNowOpcode##inst \
-}
-
-#define InjK3DMOps(dst,src,off,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0f \
- _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
- _asm _emit off \
- _asm _emit _3DNowOpcode##inst \
-}
-
-#define InjMMXOps(dst,src,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit _3DNowOpcode##inst \
- _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
-}
-
-#define InjMMXMOps(dst,src,off,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit _3DNowOpcode##inst \
- _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
- _asm _emit off \
-}
-
-#define _3DNowOpcodePF2ID 0x1d
-#define _3DNowOpcodePFACC 0xae
-#define _3DNowOpcodePFADD 0x9e
-#define _3DNowOpcodePFCMPEQ 0xb0
-#define _3DNowOpcodePFCMPGE 0x90
-#define _3DNowOpcodePFCMPGT 0xa0
-#define _3DNowOpcodePFMAX 0xa4
-#define _3DNowOpcodePFMIN 0x94
-#define _3DNowOpcodePFMUL 0xb4
-#define _3DNowOpcodePFRCP 0x96
-#define _3DNowOpcodePFRCPIT1 0xa6
-#define _3DNowOpcodePFRCPIT2 0xb6
-#define _3DNowOpcodePFRSQRT 0x97
-#define _3DNowOpcodePFRSQIT1 0xa7
-#define _3DNowOpcodePFSUB 0x9a
-#define _3DNowOpcodePFSUBR 0xaa
-#define _3DNowOpcodePI2FD 0x0d
-#define _3DNowOpcodePAVGUSB 0xbf
-#define _3DNowOpcodePMULHRW 0xb7
-#define _3DNowOpcodePFNACC 0x8a
-#define _3DNowOpcodeFPPNACC 0x8e
-#define _3DNowOpcodePSWAPD 0xbb
-#define _3DNowOpcodePMINUB 0xda
-#define _3DNowOpcodePMAXUB 0xde
-#define _3DNowOpcodePMINSW 0xea
-#define _3DNowOpcodePMAXSW 0xee
-#define _3DNowOpcodePMULHUW 0xe4
-#define _3DNowOpcodePAVGB 0xe0
-#define _3DNowOpcodePAVGW 0xe3
-#define _3DNowOpcodePSADBW 0xf6
-#define _3DNowOpcodePMOVMSKB 0xd7
-#define _3DNowOpcodePMASKMOVQ 0xf7
-#define _3DNowOpcodePINSRW 0xc4
-#define _3DNowOpcodePEXTRW 0xc5
-#define _3DNowOpcodePSHUFW 0x70
-#define _3DNowOpcodeMOVNTQ 0xe7
-#define _3DNowOpcodePREFETCHT 0x18
-
-
-#define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID)
-#define PFACC(dst,src) InjK3DOps(dst, src, PFACC)
-#define PFADD(dst,src) InjK3DOps(dst, src, PFADD)
-#define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ)
-#define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE)
-#define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT)
-#define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX)
-#define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN)
-#define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL)
-#define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP)
-#define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1)
-#define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2)
-#define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT)
-#define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1)
-#define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB)
-#define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR)
-#define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD)
-#define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB)
-#define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW)
-
-#define FEMMS \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0e \
-}
-
-#define PREFETCH(src) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (_K3D_##src & 0x07) \
-}
-
-/* Prefetch with a short offset, < 127 or > -127
- Carefull! Doesn't check for your offset being
- in range. */
-
-#define PREFETCHM(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x40 | (_K3D_##src & 0x07)) \
- _asm _emit off \
-}
-
-/* Prefetch with a long offset */
-
-#define PREFETCHMLONG(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x80 | (_K3D_##src & 0x07)) \
- _asm _emit (off & 0x000000ff) \
- _asm _emit (off & 0x0000ff00) >> 8 \
- _asm _emit (off & 0x00ff0000) >> 16 \
- _asm _emit (off & 0xff000000) >> 24 \
-}
-
-#define PREFETCHW(src) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x08 | (_K3D_##src & 0x07)) \
-}
-
-#define PREFETCHWM(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit 0x48 | (_K3D_##src & 0x07) \
- _asm _emit off \
-}
-
-#define PREFETCHWMLONG(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit 0x88 | (_K3D_##src & 0x07) \
- _asm _emit (off & 0x000000ff) \
- _asm _emit (off & 0x0000ff00) >> 8 \
- _asm _emit (off & 0x00ff0000) >> 16 \
- _asm _emit (off & 0xff000000) >> 24 \
-}
-
-#define CPUID \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0xa2 \
-}
-
-
-/* Defines for new, K7 opcodes */
-#define SFENCE \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0xae \
- _asm _emit 0xf8 \
-}
-
-#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
-#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
-#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
-#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
-#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
-#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
-#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
-#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
-#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
-#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
-#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
-#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
-#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
-#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk
-#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk
-#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk
-#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
-#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
-#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
-#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
-#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
-
-
-/* Memory/offset versions of the opcodes */
-#define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB)
-#define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID)
-#define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC)
-#define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD)
-#define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ)
-#define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE)
-#define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT)
-#define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX)
-#define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN)
-#define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL)
-#define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP)
-#define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1)
-#define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2)
-#define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT)
-#define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1)
-#define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB)
-#define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR)
-#define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD)
-#define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW)
-
-
-/* Memory/offset versions of the K7 opcodes */
-#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
-#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
-#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
-#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
-#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
-#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
-#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
-#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
-#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
-#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
-#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
-#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
-#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
-#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk
-#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk
-#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
-#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
-#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
-#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
-#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
-
-
-#else
-
-/* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */
-#define PAVGUSB(dst,src) pavgusb dst,src
-#define PF2ID(dst,src) pf2id dst,src
-#define PFACC(dst,src) pfacc dst,src
-#define PFADD(dst,src) pfadd dst,src
-#define PFCMPEQ(dst,src) pfcmpeq dst,src
-#define PFCMPGE(dst,src) pfcmpge dst,src
-#define PFCMPGT(dst,src) pfcmpgt dst,src
-#define PFMAX(dst,src) pfmax dst,src
-#define PFMIN(dst,src) pfmin dst,src
-#define PFMUL(dst,src) pfmul dst,src
-#define PFRCP(dst,src) pfrcp dst,src
-#define PFRCPIT1(dst,src) pfrcpit1 dst,src
-#define PFRCPIT2(dst,src) pfrcpit2 dst,src
-#define PFRSQRT(dst,src) pfrsqrt dst,src
-#define PFRSQIT1(dst,src) pfrsqit1 dst,src
-#define PFSUB(dst,src) pfsub dst,src
-#define PFSUBR(dst,src) pfsubr dst,src
-#define PI2FD(dst,src) pi2fd dst,src
-#define PMULHRW(dst,src) pmulhrw dst,src
-#define PREFETCH(src) prefetch src
-#define PREFETCHW(src) prefetchw src
-
-#define PAVGUSBM(dst,src,off) pavgusb dst,[src+off]
-#define PF2IDM(dst,src,off) PF2ID dst,[src+off]
-#define PFACCM(dst,src,off) PFACC dst,[src+off]
-#define PFADDM(dst,src,off) PFADD dst,[src+off]
-#define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off]
-#define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off]
-#define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off]
-#define PFMAXM(dst,src,off) PFMAX dst,[src+off]
-#define PFMINM(dst,src,off) PFMIN dst,[src+off]
-#define PFMULM(dst,src,off) PFMUL dst,[src+off]
-#define PFRCPM(dst,src,off) PFRCP dst,[src+off]
-#define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off]
-#define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off]
-#define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off]
-#define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off]
-#define PFSUBM(dst,src,off) PFSUB dst,[src+off]
-#define PFSUBRM(dst,src,off) PFSUBR dst,[src+off]
-#define PI2FDM(dst,src,off) PI2FD dst,[src+off]
-#define PMULHRWM(dst,src,off) PMULHRW dst,[src+off]
-
-
-#if defined (__MWERKS__)
-// At the moment, CodeWarrior does not support these opcodes, so hand-assemble them
-
-// Defines for operands.
-#define _K3D_MM0 0xc0
-#define _K3D_MM1 0xc1
-#define _K3D_MM2 0xc2
-#define _K3D_MM3 0xc3
-#define _K3D_MM4 0xc4
-#define _K3D_MM5 0xc5
-#define _K3D_MM6 0xc6
-#define _K3D_MM7 0xc7
-#define _K3D_mm0 0xc0
-#define _K3D_mm1 0xc1
-#define _K3D_mm2 0xc2
-#define _K3D_mm3 0xc3
-#define _K3D_mm4 0xc4
-#define _K3D_mm5 0xc5
-#define _K3D_mm6 0xc6
-#define _K3D_mm7 0xc7
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-
-#define InjK3DOps(dst,src,inst) \
- db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst
-
-#define InjK3DMOps(dst,src,off,inst) \
- db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst
-
-#define InjMMXOps(dst,src,inst) \
- db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src)
-
-#define InjMMXMOps(dst,src,off,inst) \
- db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off
-
-#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
-#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
-#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
-#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
-#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
-#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
-#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
-#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
-#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
-#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
-#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
-#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
-#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
-#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk
-#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk
-#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk
-#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
-#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
-#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
-#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
-#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
-
-
-/* Memory/offset versions of the K7 opcodes */
-#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
-#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
-#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
-#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
-#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
-#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
-#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
-#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
-#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
-#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
-#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
-#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
-#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
-#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk
-#define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk
-#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk
-#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
-#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
-#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
-#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
-#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
-
-
-#else
-
-#define PFNACC(dst,src) PFNACC dst,src
-#define PFPNACC(dst,src) PFPNACC dst,src
-#define PSWAPD(dst,src) PSWAPD dst,src
-#define PMINUB(dst,src) PMINUB dst,src
-#define PMAXUB(dst,src) PMAXUB dst,src
-#define PMINSW(dst,src) PMINSW dst,src
-#define PMAXSW(dst,src) PMAXSW dst,src
-#define PMULHUW(dst,src) PMULHUW dst,src
-#define PAVGB(dst,src) PAVGB dst,src
-#define PAVGW(dst,src) PAVGW dst,src
-#define PSADBW(dst,src) PSADBW dst,src
-#define PMOVMSKB(dst,src) PMOVMSKB dst,src
-#define PMASKMOVQ(dst,src) PMASKMOVQ dst,src
-#define PINSRW(dst,src,msk) PINSRW dst,src,msk
-#define PEXTRW(dst,src,msk) PEXTRW dst,src,msk
-#define PSHUFW(dst,src,msk) PSHUFW dst,src,msk
-#define MOVNTQ(dst,src) MOVNTQ dst,src
-
-#define PFNACCM(dst,src,off) PFNACC dst,[src+off]
-#define PFPNACCM(dst,src,off) PFPNACC dst,[src+off]
-#define PSWAPDM(dst,src,off) PSWAPD dst,[src+off]
-#define PMINUBM(dst,src,off) PMINUB dst,[src+off]
-#define PMAXUBM(dst,src,off) PMAXUB dst,[src+off]
-#define PMINSWM(dst,src,off) PMINSW dst,[src+off]
-#define PMAXSWM(dst,src,off) PMAXSW dst,[src+off]
-#define PMULHUWM(dst,src,off) PMULHUW dst,[src+off]
-#define PAVGBM(dst,src,off) PAVGB dst,[src+off]
-#define PAVGWM(dst,src,off) PAVGW dst,[src+off]
-#define PSADBWM(dst,src,off) PSADBW dst,[src+off]
-#define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off]
-#define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off]
-#define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk
-#define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk
-#define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk
-#define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off]
-
-#endif
-
-#endif
-
-/* Just to deal with lower case. */
-#define pf2id(dst,src) PF2ID(dst,src)
-#define pfacc(dst,src) PFACC(dst,src)
-#define pfadd(dst,src) PFADD(dst,src)
-#define pfcmpeq(dst,src) PFCMPEQ(dst,src)
-#define pfcmpge(dst,src) PFCMPGE(dst,src)
-#define pfcmpgt(dst,src) PFCMPGT(dst,src)
-#define pfmax(dst,src) PFMAX(dst,src)
-#define pfmin(dst,src) PFMIN(dst,src)
-#define pfmul(dst,src) PFMUL(dst,src)
-#define pfrcp(dst,src) PFRCP(dst,src)
-#define pfrcpit1(dst,src) PFRCPIT1(dst,src)
-#define pfrcpit2(dst,src) PFRCPIT2(dst,src)
-#define pfrsqrt(dst,src) PFRSQRT(dst,src)
-#define pfrsqit1(dst,src) PFRSQIT1(dst,src)
-#define pfsub(dst,src) PFSUB(dst,src)
-#define pfsubr(dst,src) PFSUBR(dst,src)
-#define pi2fd(dst,src) PI2FD(dst,src)
-#define femms FEMMS
-#define pavgusb(dst,src) PAVGUSB(dst,src)
-#define pmulhrw(dst,src) PMULHRW(dst,src)
-#define prefetch(src) PREFETCH(src)
-#define prefetchw(src) PREFETCHW(src)
-
-#define prefetchm(src,off) PREFETCHM(src,off)
-#define prefetchmlong(src,off) PREFETCHMLONG(src,off)
-#define prefetchwm(src,off) PREFETCHWM(src,off)
-#define prefetchwmlong(src,off) PREFETCHWMLONG(src,off)
-
-#define pfnacc(dst,src) PFNACC(dst,src)
-#define pfpnacc(dst,src) PFPNACC(dst,src)
-#define pswapd(dst,src) PSWAPD(dst,src)
-#define pminub(dst,src) PMINUB(dst,src)
-#define pmaxub(dst,src) PMAXUB(dst,src)
-#define pminsw(dst,src) PMINSW(dst,src)
-#define pmaxsw(dst,src) PMAXSW(dst,src)
-#define pmulhuw(dst,src) PMULHUW(dst,src)
-#define pavgb(dst,src) PAVGB(dst,src)
-#define pavgw(dst,src) PAVGW(dst,src)
-#define psadbw(dst,src) PSADBW(dst,src)
-#define pmovmskb(dst,src) PMOVMSKB(dst,src)
-#define pmaskmovq(dst,src) PMASKMOVQ(dst,src)
-#define pinsrw(dst,src,msk) PINSRW(dst,src,msk)
-#define pextrw(dst,src,msk) PEXTRW(dst,src,msk)
-#define pshufw(dst,src,msk) PSHUFW(dst,src,msk)
-#define movntq(dst,src) MOVNTQ(dst,src)
-#define prefetchnta(mem) PREFETCHNTA(mem)
-#define prefetcht0(mem) PREFETCHT0(mem)
-#define prefetcht1(mem) PREFETCHT1(mem)
-#define prefetcht2(mem) PREFETCHT2(mem)
-
-
-#define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off)
-#define pf2idm(dst,src,off) PF2IDM(dst,src,off)
-#define pfaccm(dst,src,off) PFACCM(dst,src,off)
-#define pfaddm(dst,src,off) PFADDM(dst,src,off)
-#define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off)
-#define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off)
-#define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off)
-#define pfmaxm(dst,src,off) PFMAXM(dst,src,off)
-#define pfminm(dst,src,off) PFMINM(dst,src,off)
-#define pfmulm(dst,src,off) PFMULM(dst,src,off)
-#define pfrcpm(dst,src,off) PFRCPM(dst,src,off)
-#define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off)
-#define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off)
-#define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off)
-#define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off)
-#define pfsubm(dst,src,off) PFSUBM(dst,src,off)
-#define pfsubrm(dst,src,off) PFSUBRM(dst,src,off)
-#define pi2fdm(dst,src,off) PI2FDM(dst,src,off)
-#define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off)
-#define cpuid CPUID
-#define sfence SFENCE
-
-#define pfnaccm(dst,src,off) PFNACCM(dst,src,off)
-#define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off)
-#define pswapdm(dst,src,off) PSWAPDM(dst,src,off)
-#define pminubm(dst,src,off) PMINUBM(dst,src,off)
-#define pmaxubm(dst,src,off) PMAXUBM(dst,src,off)
-#define pminswm(dst,src,off) PMINSWM(dst,src,off)
-#define pmaxswm(dst,src,off) PMAXSWM(dst,src,off)
-#define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off)
-#define pavgbm(dst,src,off) PAVGBM(dst,src,off)
-#define pavgwm(dst,src,off) PAVGWM(dst,src,off)
-#define psadbwm(dst,src,off) PSADBWM(dst,src,off)
-#define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off)
-#define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off)
-#define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk)
-#define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk)
-#define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk)
-#define movntqm(dst,src,off) MOVNTQM(dst,src,off)
-#define prefetchntam(mem,off) PREFETCHNTA(mem,off)
-#define prefetcht0m(mem,off) PREFETCHT0(mem,off)
-#define prefetcht1m(mem,off) PREFETCHT1(mem,off)
-#define prefetcht2m(mem,off) PREFETCHT2(mem,off)
-
-#endif
+//========= Copyright Valve Corporation, All rights reserved. ============// +/****************************************************************************** + + Copyright (c) 1999 Advanced Micro Devices, Inc. + + LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY + EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY, + NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY + PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY + DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS, + BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR + INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY + OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION + OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY + NOT APPLY TO YOU. + + AMD does not assume any responsibility for any errors which may appear in the + Materials nor any responsibility to support or update the Materials. AMD retains + the right to make changes to its test specifications at any time, without notice. + + NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any + further information, software, technical information, know-how, or show-how + available to you. + + So that all may benefit from your experience, please report any problems + or suggestions about this software to [email protected] + + AMD Developer Technologies, M/S 585 + Advanced Micro Devices, Inc. + 5900 E. Ben White Blvd. + Austin, TX 78741 + +******************************************************************************* + + AMD3DX.H + + MACRO FORMAT + ============ + This file contains inline assembly macros that + generate AMD-3D instructions in binary format. + Therefore, C or C++ programmer can use AMD-3D instructions + without any penalty in their C or C++ source code. + + The macro's name and format conventions are as follow: + + + 1. First argument of macro is a destination and + second argument is a source operand. + ex) _asm PFCMPEQ (mm3, mm4) + | | + dst src + + 2. The destination operand can be m0 to m7 only. + The source operand can be any one of the register + m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi + that contains effective address. + ex) _asm PFRCP (MM7, MM6) + ex) _asm PFRCPIT2 (mm0, mm4) + ex) _asm PFMUL (mm3, _edi) + + 3. The prefetch(w) takes one src operand _eax, ecx, _edx, + _ebx, _esi, or _edi that contains effective address. + ex) _asm PREFETCH (_edi) + + For WATCOM C/C++ users, when using #pragma aux instead if + _asm, all macro names should be prefixed by a p_ or P_. + Macros should not be enclosed in quotes. + ex) p_pfrcp (MM7,MM6) + + NOTE: Not all instruction macros, nor all possible + combinations of operands have been explicitely + tested. If any errors are found, please report + them. + + EXAMPLE + ======= + Following program doesn't do anything but it shows you + how to use inline assembly AMD-3D instructions in C. + Note that this will only work in flat memory model which + segment registers cs, ds, ss and es point to the same + linear address space total less than 4GB. + + Used Microsoft VC++ 5.0 + + #include <stdio.h> + #include "amd3d.h" + + void main () + { + float x = (float)1.25; + float y = (float)1.25; + float z, zz; + + _asm { + movd mm1, x + movd mm2, y + pfmul (mm1, mm2) + movd z, mm1 + femms + } + + printf ("value of z = %f\n", z); + + // + // Demonstration of using the memory instead of + // multimedia register + // + _asm { + movd mm3, x + lea esi, y // load effective address of y + pfmul (mm3, _esi) + movd zz, mm3 + femms + } + + printf ("value of zz = %f\n", zz); + } + + #pragma aux EXAMPLE with WATCOM C/C++ v11.x + =========================================== + + extern void Add(float *__Dest, float *__A, float *__B); + #pragma aux Add = \ + p_femms \ + "movd mm6,[esi]" \ + p_pfadd(mm6,_edi) \ + "movd [ebx],mm6" \ + p_femms \ + parm [ebx] [esi] [edi]; + +*******************************************************************************/ + +#ifndef _K3DMACROSINCLUDED_ +#define _K3DMACROSINCLUDED_ + +#if defined (__WATCOMC__) + +// The WATCOM C/C++ version of the 3DNow! macros. +// +// The older, compbined register style for WATCOM C/C++ macros is not +// supported. + +/* Operand defines for instructions two operands */ +#define _k3d_mm0_mm0 0xc0 +#define _k3d_mm0_mm1 0xc1 +#define _k3d_mm0_mm2 0xc2 +#define _k3d_mm0_mm3 0xc3 +#define _k3d_mm0_mm4 0xc4 +#define _k3d_mm0_mm5 0xc5 +#define _k3d_mm0_mm6 0xc6 +#define _k3d_mm0_mm7 0xc7 +#define _k3d_mm0_eax 0x00 +#define _k3d_mm0_ecx 0x01 +#define _k3d_mm0_edx 0x02 +#define _k3d_mm0_ebx 0x03 +#define _k3d_mm0_esi 0x06 +#define _k3d_mm0_edi 0x07 +#define _k3d_mm1_mm0 0xc8 +#define _k3d_mm1_mm1 0xc9 +#define _k3d_mm1_mm2 0xca +#define _k3d_mm1_mm3 0xcb +#define _k3d_mm1_mm4 0xcc +#define _k3d_mm1_mm5 0xcd +#define _k3d_mm1_mm6 0xce +#define _k3d_mm1_mm7 0xcf +#define _k3d_mm1_eax 0x08 +#define _k3d_mm1_ecx 0x09 +#define _k3d_mm1_edx 0x0a +#define _k3d_mm1_ebx 0x0b +#define _k3d_mm1_esi 0x0e +#define _k3d_mm1_edi 0x0f +#define _k3d_mm2_mm0 0xd0 +#define _k3d_mm2_mm1 0xd1 +#define _k3d_mm2_mm2 0xd2 +#define _k3d_mm2_mm3 0xd3 +#define _k3d_mm2_mm4 0xd4 +#define _k3d_mm2_mm5 0xd5 +#define _k3d_mm2_mm6 0xd6 +#define _k3d_mm2_mm7 0xd7 +#define _k3d_mm2_eax 0x10 +#define _k3d_mm2_ecx 0x11 +#define _k3d_mm2_edx 0x12 +#define _k3d_mm2_ebx 0x13 +#define _k3d_mm2_esi 0x16 +#define _k3d_mm2_edi 0x17 +#define _k3d_mm3_mm0 0xd8 +#define _k3d_mm3_mm1 0xd9 +#define _k3d_mm3_mm2 0xda +#define _k3d_mm3_mm3 0xdb +#define _k3d_mm3_mm4 0xdc +#define _k3d_mm3_mm5 0xdd +#define _k3d_mm3_mm6 0xde +#define _k3d_mm3_mm7 0xdf +#define _k3d_mm3_eax 0x18 +#define _k3d_mm3_ecx 0x19 +#define _k3d_mm3_edx 0x1a +#define _k3d_mm3_ebx 0x1b +#define _k3d_mm3_esi 0x1e +#define _k3d_mm3_edi 0x1f +#define _k3d_mm4_mm0 0xe0 +#define _k3d_mm4_mm1 0xe1 +#define _k3d_mm4_mm2 0xe2 +#define _k3d_mm4_mm3 0xe3 +#define _k3d_mm4_mm4 0xe4 +#define _k3d_mm4_mm5 0xe5 +#define _k3d_mm4_mm6 0xe6 +#define _k3d_mm4_mm7 0xe7 +#define _k3d_mm4_eax 0x20 +#define _k3d_mm4_ecx 0x21 +#define _k3d_mm4_edx 0x22 +#define _k3d_mm4_ebx 0x23 +#define _k3d_mm4_esi 0x26 +#define _k3d_mm4_edi 0x27 +#define _k3d_mm5_mm0 0xe8 +#define _k3d_mm5_mm1 0xe9 +#define _k3d_mm5_mm2 0xea +#define _k3d_mm5_mm3 0xeb +#define _k3d_mm5_mm4 0xec +#define _k3d_mm5_mm5 0xed +#define _k3d_mm5_mm6 0xee +#define _k3d_mm5_mm7 0xef +#define _k3d_mm5_eax 0x28 +#define _k3d_mm5_ecx 0x29 +#define _k3d_mm5_edx 0x2a +#define _k3d_mm5_ebx 0x2b +#define _k3d_mm5_esi 0x2e +#define _k3d_mm5_edi 0x2f +#define _k3d_mm6_mm0 0xf0 +#define _k3d_mm6_mm1 0xf1 +#define _k3d_mm6_mm2 0xf2 +#define _k3d_mm6_mm3 0xf3 +#define _k3d_mm6_mm4 0xf4 +#define _k3d_mm6_mm5 0xf5 +#define _k3d_mm6_mm6 0xf6 +#define _k3d_mm6_mm7 0xf7 +#define _k3d_mm6_eax 0x30 +#define _k3d_mm6_ecx 0x31 +#define _k3d_mm6_edx 0x32 +#define _k3d_mm6_ebx 0x33 +#define _k3d_mm6_esi 0x36 +#define _k3d_mm6_edi 0x37 +#define _k3d_mm7_mm0 0xf8 +#define _k3d_mm7_mm1 0xf9 +#define _k3d_mm7_mm2 0xfa +#define _k3d_mm7_mm3 0xfb +#define _k3d_mm7_mm4 0xfc +#define _k3d_mm7_mm5 0xfd +#define _k3d_mm7_mm6 0xfe +#define _k3d_mm7_mm7 0xff +#define _k3d_mm7_eax 0x38 +#define _k3d_mm7_ecx 0x39 +#define _k3d_mm7_edx 0x3a +#define _k3d_mm7_ebx 0x3b +#define _k3d_mm7_esi 0x3e +#define _k3d_mm7_edi 0x3f + +#define _k3d_name_xlat_m0 _mm0 +#define _k3d_name_xlat_m1 _mm1 +#define _k3d_name_xlat_m2 _mm2 +#define _k3d_name_xlat_m3 _mm3 +#define _k3d_name_xlat_m4 _mm4 +#define _k3d_name_xlat_m5 _mm5 +#define _k3d_name_xlat_m6 _mm6 +#define _k3d_name_xlat_m7 _mm7 +#define _k3d_name_xlat_M0 _mm0 +#define _k3d_name_xlat_M1 _mm1 +#define _k3d_name_xlat_M2 _mm2 +#define _k3d_name_xlat_M3 _mm3 +#define _k3d_name_xlat_M4 _mm4 +#define _k3d_name_xlat_M5 _mm5 +#define _k3d_name_xlat_M6 _mm6 +#define _k3d_name_xlat_M7 _mm7 +#define _k3d_name_xlat_mm0 _mm0 +#define _k3d_name_xlat_mm1 _mm1 +#define _k3d_name_xlat_mm2 _mm2 +#define _k3d_name_xlat_mm3 _mm3 +#define _k3d_name_xlat_mm4 _mm4 +#define _k3d_name_xlat_mm5 _mm5 +#define _k3d_name_xlat_mm6 _mm6 +#define _k3d_name_xlat_mm7 _mm7 +#define _k3d_name_xlat_MM0 _mm0 +#define _k3d_name_xlat_MM1 _mm1 +#define _k3d_name_xlat_MM2 _mm2 +#define _k3d_name_xlat_MM3 _mm3 +#define _k3d_name_xlat_MM4 _mm4 +#define _k3d_name_xlat_MM5 _mm5 +#define _k3d_name_xlat_MM6 _mm6 +#define _k3d_name_xlat_MM7 _mm7 +#define _k3d_name_xlat_eax _eax +#define _k3d_name_xlat_ebx _ebx +#define _k3d_name_xlat_ecx _ecx +#define _k3d_name_xlat_edx _edx +#define _k3d_name_xlat_esi _esi +#define _k3d_name_xlat_edi _edi +#define _k3d_name_xlat_ebp _ebp +#define _k3d_name_xlat_EAX _eax +#define _k3d_name_xlat_EBX _ebx +#define _k3d_name_xlat_ECX _ecx +#define _k3d_name_xlat_EDX _edx +#define _k3d_name_xlat_ESI _esi +#define _k3d_name_xlat_EDI _edi +#define _k3d_name_xlat_EBP _ebp +#define _k3d_name_xlat__eax _eax +#define _k3d_name_xlat__ebx _ebx +#define _k3d_name_xlat__ecx _ecx +#define _k3d_name_xlat__edx _edx +#define _k3d_name_xlat__esi _esi +#define _k3d_name_xlat__edi _edi +#define _k3d_name_xlat__ebp _ebp +#define _k3d_name_xlat__EAX _eax +#define _k3d_name_xlat__EBX _ebx +#define _k3d_name_xlat__ECX _ecx +#define _k3d_name_xlat__EDX _edx +#define _k3d_name_xlat__ESI _esi +#define _k3d_name_xlat__EDI _edi +#define _k3d_name_xlat__EBP _ebp + +#define _k3d_xglue3(a,b,c) a##b##c +#define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c) +#define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src) + +/* Operand defines for prefetch and prefetchw */ + +#define _k3d_pref_eax 0x00 +#define _k3d_pref_ecx 0x01 +#define _k3d_pref_edx 0x02 +#define _k3d_pref_ebx 0x03 +#define _k3d_pref_esi 0x06 +#define _k3d_pref_edi 0x07 +#define _k3d_pref_EAX 0x00 +#define _k3d_pref_ECX 0x01 +#define _k3d_pref_EDX 0x02 +#define _k3d_pref_EBX 0x03 +#define _k3d_pref_ESI 0x06 +#define _k3d_pref_EDI 0x07 +#define _k3d_prefw_eax 0x08 +#define _k3d_prefw_ecx 0x09 +#define _k3d_prefw_edx 0x0A +#define _k3d_prefw_ebx 0x0B +#define _k3d_prefw_esi 0x0E +#define _k3d_prefw_edi 0x0F +#define _k3d_prefw_EAX 0x08 +#define _k3d_prefw_ECX 0x09 +#define _k3d_prefw_EDX 0x0A +#define _k3d_prefw_EBX 0x0B +#define _k3d_prefw_ESI 0x0E +#define _k3d_prefw_EDI 0x0F + +/* Defines for 3DNow! instructions */ +#define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d +#define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae +#define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e +#define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0 +#define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90 +#define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0 +#define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4 +#define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94 +#define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4 +#define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96 +#define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6 +#define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6 +#define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97 +#define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7 +#define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a +#define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa +#define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d +#define FEMMS db 0x0f, 0x0e +#define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf +#define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7 +#define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src +#define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src +#define CPUID db 0x0f, 0xa2 + +/* Defines for new, K7 opcodes */ +#define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a +#define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e +#define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb +#define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src) +#define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src) +#define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src) +#define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src) +#define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src) +#define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src) +#define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src) +#define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src) +#define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src) +#define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src) +#define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk +#define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk +#define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk +#define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst) +#define SFENCE db 0x0f, 0xae, 0xf8 + +/* Memory/offset versions of the opcodes */ +#define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d +#define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae +#define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e +#define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0 +#define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90 +#define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0 +#define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4 +#define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94 +#define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4 +#define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96 +#define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6 +#define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6 +#define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97 +#define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7 +#define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a +#define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa +#define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d +#define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf +#define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7 + + +/* Memory/offset versions of the new, K7 opcodes */ +#define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a +#define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e +#define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb +#define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off +#define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off +#define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off +#define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off +#define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off +#define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off +#define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off +#define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off +#define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off +#define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off +#define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off +#define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk +#define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk + + +/* Defines for 3DNow! instructions for use in pragmas */ +#define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d +#define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae +#define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e +#define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0 +#define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90 +#define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0 +#define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4 +#define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94 +#define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4 +#define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96 +#define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6 +#define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6 +#define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97 +#define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7 +#define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a +#define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa +#define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d +#define p_femms 0x0f 0x0e +#define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf +#define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7 +#define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src +#define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src +#define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a +#define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e +#define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb +#define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off + +#define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d +#define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae +#define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e +#define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0 +#define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90 +#define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0 +#define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4 +#define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94 +#define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4 +#define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96 +#define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6 +#define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6 +#define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97 +#define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7 +#define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a +#define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa +#define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d +#define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf +#define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7 +#define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a +#define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e +#define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb +#define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off +#define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk + + +#define P_PF2ID(dst,src) p_pf2id(dst,src) +#define P_PFACC(dst,src) p_pfacc(dst,src) +#define P_PFADD(dst,src) p_pfadd(dst,src) +#define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src) +#define P_PFCMPGE(dst,src) p_pfcmpge(dst,src) +#define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src) +#define P_PFMAX(dst,src) p_pfmax(dst,src) +#define P_PFMIN(dst,src) p_pfmin(dst,src) +#define P_PFMUL(dst,src) p_pfmul(dst,src) +#define P_PFRCP(dst,src) p_pfrcp(dst,src) +#define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src) +#define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src) +#define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src) +#define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src) +#define P_PFSUB(dst,src) p_pfsub(dst,src) +#define P_PFSUBR(dst,src) p_pfsubr(dst,src) +#define P_PI2FD(dst,src) p_pi2fd(dst,src) +#define P_FEMMS p_femms +#define P_PAVGUSB(dst,src) p_pavgusb(dst,src) +#define P_PMULHRW(dst,src) p_pmulhrw(dst,src) +#define P_PREFETCH(src) p_prefetch(src) +#define P_PREFETCHW(src) p_prefetchw(src) +#define p_CPUID 0x0f 0xa2 +#define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off) +#define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off) +#define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off) +#define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off) +#define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off) +#define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off) +#define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off) +#define p_pfminm(dst,src,off) P_PFMINM(dst,src,off) +#define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off) +#define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off) +#define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off) +#define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off) +#define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off) +#define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off) +#define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off) +#define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off) +#define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off) +#define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off) +#define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off) + +#define P_PFNACC(dst,src) p_pfnacc(dst,src) +#define P_FPPNACC(dst,src) p_pfpnacc(dst,src) +#define P_PSWAPD(dst,src) p_pswapd(dst,src) +#define P_PMINUB(dst,src) p_pminub(dst,src) +#define P_PMAXUB(dst,src) p_pmaxub(dst,src) +#define P_PMINSW(dst,src) p_pminsw(dst,src) +#define P_PMAXSW(dst,src) p_pmaxsw(dst,src) +#define P_PMULHUW(dst,src) p_pmulhuw(dst,src) +#define P_PAVGB(dst,src) p_pavgb(dst,src) +#define P_PAVGW(dst,src) p_avgw(dst,src) +#define P_PSADBW(dst,src) p_psadbw(dst,src) +#define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src) +#define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src) +#define P_PINSRW(dst,src,msk) p_pinsrw(dst,src) +#define P_PEXTRW(dst,src,msk) p_pextrw(dst,src) +#define P_PSHUFW(dst,src,msk) p_pshufw(dst,src) +#define P_MOVNTQ(dst,src) p_movntq(dst,src) + +#define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off) +#define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off) +#define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off) +#define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off) +#define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off) +#define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off) +#define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off) +#define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off) +#define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off) +#define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off) +#define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off) +#define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off) +#define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off) +#define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk) +#define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk) +#define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off) + +#elif defined (_MSC_VER) && !defined (__MWERKS__) +// The Microsoft Visual C++ version of the 3DNow! macros. + +// Stop the "no EMMS" warning, since it doesn't detect FEMMS properly +#pragma warning(disable:4799) + +// Defines for operands. +#define _K3D_MM0 0xc0 +#define _K3D_MM1 0xc1 +#define _K3D_MM2 0xc2 +#define _K3D_MM3 0xc3 +#define _K3D_MM4 0xc4 +#define _K3D_MM5 0xc5 +#define _K3D_MM6 0xc6 +#define _K3D_MM7 0xc7 +#define _K3D_mm0 0xc0 +#define _K3D_mm1 0xc1 +#define _K3D_mm2 0xc2 +#define _K3D_mm3 0xc3 +#define _K3D_mm4 0xc4 +#define _K3D_mm5 0xc5 +#define _K3D_mm6 0xc6 +#define _K3D_mm7 0xc7 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 + +// These defines are for compatibility with the previous version of the header file. +#define _K3D_M0 0xc0 +#define _K3D_M1 0xc1 +#define _K3D_M2 0xc2 +#define _K3D_M3 0xc3 +#define _K3D_M4 0xc4 +#define _K3D_M5 0xc5 +#define _K3D_M6 0xc6 +#define _K3D_M7 0xc7 +#define _K3D_m0 0xc0 +#define _K3D_m1 0xc1 +#define _K3D_m2 0xc2 +#define _K3D_m3 0xc3 +#define _K3D_m4 0xc4 +#define _K3D_m5 0xc5 +#define _K3D_m6 0xc6 +#define _K3D_m7 0xc7 +#define _K3D__EAX 0x00 +#define _K3D__ECX 0x01 +#define _K3D__EDX 0x02 +#define _K3D__EBX 0x03 +#define _K3D__ESI 0x06 +#define _K3D__EDI 0x07 +#define _K3D__eax 0x00 +#define _K3D__ecx 0x01 +#define _K3D__edx 0x02 +#define _K3D__ebx 0x03 +#define _K3D__esi 0x06 +#define _K3D__edi 0x07 + +// General 3DNow! instruction format that is supported by +// these macros. Note that only the most basic form of memory +// operands are supported by these macros. + +#define InjK3DOps(dst,src,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0f \ + _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \ + _asm _emit _3DNowOpcode##inst \ +} + +#define InjK3DMOps(dst,src,off,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0f \ + _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \ + _asm _emit off \ + _asm _emit _3DNowOpcode##inst \ +} + +#define InjMMXOps(dst,src,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit _3DNowOpcode##inst \ + _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \ +} + +#define InjMMXMOps(dst,src,off,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit _3DNowOpcode##inst \ + _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \ + _asm _emit off \ +} + +#define _3DNowOpcodePF2ID 0x1d +#define _3DNowOpcodePFACC 0xae +#define _3DNowOpcodePFADD 0x9e +#define _3DNowOpcodePFCMPEQ 0xb0 +#define _3DNowOpcodePFCMPGE 0x90 +#define _3DNowOpcodePFCMPGT 0xa0 +#define _3DNowOpcodePFMAX 0xa4 +#define _3DNowOpcodePFMIN 0x94 +#define _3DNowOpcodePFMUL 0xb4 +#define _3DNowOpcodePFRCP 0x96 +#define _3DNowOpcodePFRCPIT1 0xa6 +#define _3DNowOpcodePFRCPIT2 0xb6 +#define _3DNowOpcodePFRSQRT 0x97 +#define _3DNowOpcodePFRSQIT1 0xa7 +#define _3DNowOpcodePFSUB 0x9a +#define _3DNowOpcodePFSUBR 0xaa +#define _3DNowOpcodePI2FD 0x0d +#define _3DNowOpcodePAVGUSB 0xbf +#define _3DNowOpcodePMULHRW 0xb7 +#define _3DNowOpcodePFNACC 0x8a +#define _3DNowOpcodeFPPNACC 0x8e +#define _3DNowOpcodePSWAPD 0xbb +#define _3DNowOpcodePMINUB 0xda +#define _3DNowOpcodePMAXUB 0xde +#define _3DNowOpcodePMINSW 0xea +#define _3DNowOpcodePMAXSW 0xee +#define _3DNowOpcodePMULHUW 0xe4 +#define _3DNowOpcodePAVGB 0xe0 +#define _3DNowOpcodePAVGW 0xe3 +#define _3DNowOpcodePSADBW 0xf6 +#define _3DNowOpcodePMOVMSKB 0xd7 +#define _3DNowOpcodePMASKMOVQ 0xf7 +#define _3DNowOpcodePINSRW 0xc4 +#define _3DNowOpcodePEXTRW 0xc5 +#define _3DNowOpcodePSHUFW 0x70 +#define _3DNowOpcodeMOVNTQ 0xe7 +#define _3DNowOpcodePREFETCHT 0x18 + + +#define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID) +#define PFACC(dst,src) InjK3DOps(dst, src, PFACC) +#define PFADD(dst,src) InjK3DOps(dst, src, PFADD) +#define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ) +#define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE) +#define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT) +#define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX) +#define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN) +#define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL) +#define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP) +#define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1) +#define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2) +#define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT) +#define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1) +#define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB) +#define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR) +#define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD) +#define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB) +#define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW) + +#define FEMMS \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0e \ +} + +#define PREFETCH(src) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (_K3D_##src & 0x07) \ +} + +/* Prefetch with a short offset, < 127 or > -127 + Carefull! Doesn't check for your offset being + in range. */ + +#define PREFETCHM(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x40 | (_K3D_##src & 0x07)) \ + _asm _emit off \ +} + +/* Prefetch with a long offset */ + +#define PREFETCHMLONG(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x80 | (_K3D_##src & 0x07)) \ + _asm _emit (off & 0x000000ff) \ + _asm _emit (off & 0x0000ff00) >> 8 \ + _asm _emit (off & 0x00ff0000) >> 16 \ + _asm _emit (off & 0xff000000) >> 24 \ +} + +#define PREFETCHW(src) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x08 | (_K3D_##src & 0x07)) \ +} + +#define PREFETCHWM(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit 0x48 | (_K3D_##src & 0x07) \ + _asm _emit off \ +} + +#define PREFETCHWMLONG(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit 0x88 | (_K3D_##src & 0x07) \ + _asm _emit (off & 0x000000ff) \ + _asm _emit (off & 0x0000ff00) >> 8 \ + _asm _emit (off & 0x00ff0000) >> 16 \ + _asm _emit (off & 0xff000000) >> 24 \ +} + +#define CPUID \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0xa2 \ +} + + +/* Defines for new, K7 opcodes */ +#define SFENCE \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0xae \ + _asm _emit 0xf8 \ +} + +#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC) +#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC) +#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD) +#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB) +#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB) +#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW) +#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW) +#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW) +#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB) +#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW) +#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW) +#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB) +#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ) +#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk +#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk +#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk +#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ) +#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT) +#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT) +#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT) +#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT) + + +/* Memory/offset versions of the opcodes */ +#define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB) +#define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID) +#define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC) +#define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD) +#define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ) +#define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE) +#define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT) +#define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX) +#define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN) +#define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL) +#define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP) +#define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1) +#define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2) +#define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT) +#define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1) +#define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB) +#define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR) +#define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD) +#define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW) + + +/* Memory/offset versions of the K7 opcodes */ +#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC) +#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC) +#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD) +#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB) +#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB) +#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW) +#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW) +#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW) +#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB) +#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW) +#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW) +#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB) +#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ) +#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk +#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk +#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ) +#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT) +#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT) +#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT) +#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT) + + +#else + +/* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */ +#define PAVGUSB(dst,src) pavgusb dst,src +#define PF2ID(dst,src) pf2id dst,src +#define PFACC(dst,src) pfacc dst,src +#define PFADD(dst,src) pfadd dst,src +#define PFCMPEQ(dst,src) pfcmpeq dst,src +#define PFCMPGE(dst,src) pfcmpge dst,src +#define PFCMPGT(dst,src) pfcmpgt dst,src +#define PFMAX(dst,src) pfmax dst,src +#define PFMIN(dst,src) pfmin dst,src +#define PFMUL(dst,src) pfmul dst,src +#define PFRCP(dst,src) pfrcp dst,src +#define PFRCPIT1(dst,src) pfrcpit1 dst,src +#define PFRCPIT2(dst,src) pfrcpit2 dst,src +#define PFRSQRT(dst,src) pfrsqrt dst,src +#define PFRSQIT1(dst,src) pfrsqit1 dst,src +#define PFSUB(dst,src) pfsub dst,src +#define PFSUBR(dst,src) pfsubr dst,src +#define PI2FD(dst,src) pi2fd dst,src +#define PMULHRW(dst,src) pmulhrw dst,src +#define PREFETCH(src) prefetch src +#define PREFETCHW(src) prefetchw src + +#define PAVGUSBM(dst,src,off) pavgusb dst,[src+off] +#define PF2IDM(dst,src,off) PF2ID dst,[src+off] +#define PFACCM(dst,src,off) PFACC dst,[src+off] +#define PFADDM(dst,src,off) PFADD dst,[src+off] +#define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off] +#define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off] +#define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off] +#define PFMAXM(dst,src,off) PFMAX dst,[src+off] +#define PFMINM(dst,src,off) PFMIN dst,[src+off] +#define PFMULM(dst,src,off) PFMUL dst,[src+off] +#define PFRCPM(dst,src,off) PFRCP dst,[src+off] +#define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off] +#define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off] +#define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off] +#define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off] +#define PFSUBM(dst,src,off) PFSUB dst,[src+off] +#define PFSUBRM(dst,src,off) PFSUBR dst,[src+off] +#define PI2FDM(dst,src,off) PI2FD dst,[src+off] +#define PMULHRWM(dst,src,off) PMULHRW dst,[src+off] + + +#if defined (__MWERKS__) +// At the moment, CodeWarrior does not support these opcodes, so hand-assemble them + +// Defines for operands. +#define _K3D_MM0 0xc0 +#define _K3D_MM1 0xc1 +#define _K3D_MM2 0xc2 +#define _K3D_MM3 0xc3 +#define _K3D_MM4 0xc4 +#define _K3D_MM5 0xc5 +#define _K3D_MM6 0xc6 +#define _K3D_MM7 0xc7 +#define _K3D_mm0 0xc0 +#define _K3D_mm1 0xc1 +#define _K3D_mm2 0xc2 +#define _K3D_mm3 0xc3 +#define _K3D_mm4 0xc4 +#define _K3D_mm5 0xc5 +#define _K3D_mm6 0xc6 +#define _K3D_mm7 0xc7 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 + +#define InjK3DOps(dst,src,inst) \ + db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst + +#define InjK3DMOps(dst,src,off,inst) \ + db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst + +#define InjMMXOps(dst,src,inst) \ + db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src) + +#define InjMMXMOps(dst,src,off,inst) \ + db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off + +#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC) +#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC) +#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD) +#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB) +#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB) +#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW) +#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW) +#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW) +#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB) +#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW) +#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW) +#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB) +#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ) +#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk +#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk +#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk +#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ) +#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT) +#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT) +#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT) +#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT) + + +/* Memory/offset versions of the K7 opcodes */ +#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC) +#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC) +#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD) +#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB) +#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB) +#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW) +#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW) +#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW) +#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB) +#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW) +#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW) +#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB) +#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ) +#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk +#define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk +#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk +#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ) +#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT) +#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT) +#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT) +#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT) + + +#else + +#define PFNACC(dst,src) PFNACC dst,src +#define PFPNACC(dst,src) PFPNACC dst,src +#define PSWAPD(dst,src) PSWAPD dst,src +#define PMINUB(dst,src) PMINUB dst,src +#define PMAXUB(dst,src) PMAXUB dst,src +#define PMINSW(dst,src) PMINSW dst,src +#define PMAXSW(dst,src) PMAXSW dst,src +#define PMULHUW(dst,src) PMULHUW dst,src +#define PAVGB(dst,src) PAVGB dst,src +#define PAVGW(dst,src) PAVGW dst,src +#define PSADBW(dst,src) PSADBW dst,src +#define PMOVMSKB(dst,src) PMOVMSKB dst,src +#define PMASKMOVQ(dst,src) PMASKMOVQ dst,src +#define PINSRW(dst,src,msk) PINSRW dst,src,msk +#define PEXTRW(dst,src,msk) PEXTRW dst,src,msk +#define PSHUFW(dst,src,msk) PSHUFW dst,src,msk +#define MOVNTQ(dst,src) MOVNTQ dst,src + +#define PFNACCM(dst,src,off) PFNACC dst,[src+off] +#define PFPNACCM(dst,src,off) PFPNACC dst,[src+off] +#define PSWAPDM(dst,src,off) PSWAPD dst,[src+off] +#define PMINUBM(dst,src,off) PMINUB dst,[src+off] +#define PMAXUBM(dst,src,off) PMAXUB dst,[src+off] +#define PMINSWM(dst,src,off) PMINSW dst,[src+off] +#define PMAXSWM(dst,src,off) PMAXSW dst,[src+off] +#define PMULHUWM(dst,src,off) PMULHUW dst,[src+off] +#define PAVGBM(dst,src,off) PAVGB dst,[src+off] +#define PAVGWM(dst,src,off) PAVGW dst,[src+off] +#define PSADBWM(dst,src,off) PSADBW dst,[src+off] +#define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off] +#define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off] +#define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk +#define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk +#define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk +#define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off] + +#endif + +#endif + +/* Just to deal with lower case. */ +#define pf2id(dst,src) PF2ID(dst,src) +#define pfacc(dst,src) PFACC(dst,src) +#define pfadd(dst,src) PFADD(dst,src) +#define pfcmpeq(dst,src) PFCMPEQ(dst,src) +#define pfcmpge(dst,src) PFCMPGE(dst,src) +#define pfcmpgt(dst,src) PFCMPGT(dst,src) +#define pfmax(dst,src) PFMAX(dst,src) +#define pfmin(dst,src) PFMIN(dst,src) +#define pfmul(dst,src) PFMUL(dst,src) +#define pfrcp(dst,src) PFRCP(dst,src) +#define pfrcpit1(dst,src) PFRCPIT1(dst,src) +#define pfrcpit2(dst,src) PFRCPIT2(dst,src) +#define pfrsqrt(dst,src) PFRSQRT(dst,src) +#define pfrsqit1(dst,src) PFRSQIT1(dst,src) +#define pfsub(dst,src) PFSUB(dst,src) +#define pfsubr(dst,src) PFSUBR(dst,src) +#define pi2fd(dst,src) PI2FD(dst,src) +#define femms FEMMS +#define pavgusb(dst,src) PAVGUSB(dst,src) +#define pmulhrw(dst,src) PMULHRW(dst,src) +#define prefetch(src) PREFETCH(src) +#define prefetchw(src) PREFETCHW(src) + +#define prefetchm(src,off) PREFETCHM(src,off) +#define prefetchmlong(src,off) PREFETCHMLONG(src,off) +#define prefetchwm(src,off) PREFETCHWM(src,off) +#define prefetchwmlong(src,off) PREFETCHWMLONG(src,off) + +#define pfnacc(dst,src) PFNACC(dst,src) +#define pfpnacc(dst,src) PFPNACC(dst,src) +#define pswapd(dst,src) PSWAPD(dst,src) +#define pminub(dst,src) PMINUB(dst,src) +#define pmaxub(dst,src) PMAXUB(dst,src) +#define pminsw(dst,src) PMINSW(dst,src) +#define pmaxsw(dst,src) PMAXSW(dst,src) +#define pmulhuw(dst,src) PMULHUW(dst,src) +#define pavgb(dst,src) PAVGB(dst,src) +#define pavgw(dst,src) PAVGW(dst,src) +#define psadbw(dst,src) PSADBW(dst,src) +#define pmovmskb(dst,src) PMOVMSKB(dst,src) +#define pmaskmovq(dst,src) PMASKMOVQ(dst,src) +#define pinsrw(dst,src,msk) PINSRW(dst,src,msk) +#define pextrw(dst,src,msk) PEXTRW(dst,src,msk) +#define pshufw(dst,src,msk) PSHUFW(dst,src,msk) +#define movntq(dst,src) MOVNTQ(dst,src) +#define prefetchnta(mem) PREFETCHNTA(mem) +#define prefetcht0(mem) PREFETCHT0(mem) +#define prefetcht1(mem) PREFETCHT1(mem) +#define prefetcht2(mem) PREFETCHT2(mem) + + +#define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off) +#define pf2idm(dst,src,off) PF2IDM(dst,src,off) +#define pfaccm(dst,src,off) PFACCM(dst,src,off) +#define pfaddm(dst,src,off) PFADDM(dst,src,off) +#define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off) +#define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off) +#define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off) +#define pfmaxm(dst,src,off) PFMAXM(dst,src,off) +#define pfminm(dst,src,off) PFMINM(dst,src,off) +#define pfmulm(dst,src,off) PFMULM(dst,src,off) +#define pfrcpm(dst,src,off) PFRCPM(dst,src,off) +#define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off) +#define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off) +#define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off) +#define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off) +#define pfsubm(dst,src,off) PFSUBM(dst,src,off) +#define pfsubrm(dst,src,off) PFSUBRM(dst,src,off) +#define pi2fdm(dst,src,off) PI2FDM(dst,src,off) +#define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off) +#define cpuid CPUID +#define sfence SFENCE + +#define pfnaccm(dst,src,off) PFNACCM(dst,src,off) +#define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off) +#define pswapdm(dst,src,off) PSWAPDM(dst,src,off) +#define pminubm(dst,src,off) PMINUBM(dst,src,off) +#define pmaxubm(dst,src,off) PMAXUBM(dst,src,off) +#define pminswm(dst,src,off) PMINSWM(dst,src,off) +#define pmaxswm(dst,src,off) PMAXSWM(dst,src,off) +#define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off) +#define pavgbm(dst,src,off) PAVGBM(dst,src,off) +#define pavgwm(dst,src,off) PAVGWM(dst,src,off) +#define psadbwm(dst,src,off) PSADBWM(dst,src,off) +#define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off) +#define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off) +#define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk) +#define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk) +#define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk) +#define movntqm(dst,src,off) MOVNTQM(dst,src,off) +#define prefetchntam(mem,off) PREFETCHNTA(mem,off) +#define prefetcht0m(mem,off) PREFETCHT0(mem,off) +#define prefetcht1m(mem,off) PREFETCHT1(mem,off) +#define prefetcht2m(mem,off) PREFETCHT2(mem,off) + +#endif diff --git a/mp/src/public/mathlib/anorms.h b/mp/src/public/mathlib/anorms.h index ae759eb1..4f653835 100644 --- a/mp/src/public/mathlib/anorms.h +++ b/mp/src/public/mathlib/anorms.h @@ -1,25 +1,25 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=============================================================================//
-
-#ifndef ANORMS_H
-#define ANORMS_H
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include "mathlib/vector.h"
-
-
-#define NUMVERTEXNORMALS 162
-
-// the angle between consecutive g_anorms[] vectors is ~14.55 degrees
-#define VERTEXNORMAL_CONE_INNER_ANGLE DEG2RAD(7.275)
-
-extern Vector g_anorms[NUMVERTEXNORMALS];
-
-
-#endif // ANORMS_H
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=============================================================================// + +#ifndef ANORMS_H +#define ANORMS_H +#ifdef _WIN32 +#pragma once +#endif + + +#include "mathlib/vector.h" + + +#define NUMVERTEXNORMALS 162 + +// the angle between consecutive g_anorms[] vectors is ~14.55 degrees +#define VERTEXNORMAL_CONE_INNER_ANGLE DEG2RAD(7.275) + +extern Vector g_anorms[NUMVERTEXNORMALS]; + + +#endif // ANORMS_H diff --git a/mp/src/public/mathlib/bumpvects.h b/mp/src/public/mathlib/bumpvects.h index e0ba73fb..6939ca05 100644 --- a/mp/src/public/mathlib/bumpvects.h +++ b/mp/src/public/mathlib/bumpvects.h @@ -1,37 +1,37 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $Workfile: $
-// $Date: $
-// $NoKeywords: $
-//=============================================================================//
-
-#ifndef BUMPVECTS_H
-#define BUMPVECTS_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include "mathlib/mathlib.h"
-
-#define OO_SQRT_2 0.70710676908493042f
-#define OO_SQRT_3 0.57735025882720947f
-#define OO_SQRT_6 0.40824821591377258f
-// sqrt( 2 / 3 )
-#define OO_SQRT_2_OVER_3 0.81649661064147949f
-
-#define NUM_BUMP_VECTS 3
-
-const TableVector g_localBumpBasis[NUM_BUMP_VECTS] =
-{
- { OO_SQRT_2_OVER_3, 0.0f, OO_SQRT_3 },
- { -OO_SQRT_6, OO_SQRT_2, OO_SQRT_3 },
- { -OO_SQRT_6, -OO_SQRT_2, OO_SQRT_3 }
-};
-
-void GetBumpNormals( const Vector& sVect, const Vector& tVect, const Vector& flatNormal,
- const Vector& phongNormal, Vector bumpNormals[NUM_BUMP_VECTS] );
-
-#endif // BUMPVECTS_H
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $Workfile: $ +// $Date: $ +// $NoKeywords: $ +//=============================================================================// + +#ifndef BUMPVECTS_H +#define BUMPVECTS_H + +#ifdef _WIN32 +#pragma once +#endif + +#include "mathlib/mathlib.h" + +#define OO_SQRT_2 0.70710676908493042f +#define OO_SQRT_3 0.57735025882720947f +#define OO_SQRT_6 0.40824821591377258f +// sqrt( 2 / 3 ) +#define OO_SQRT_2_OVER_3 0.81649661064147949f + +#define NUM_BUMP_VECTS 3 + +const TableVector g_localBumpBasis[NUM_BUMP_VECTS] = +{ + { OO_SQRT_2_OVER_3, 0.0f, OO_SQRT_3 }, + { -OO_SQRT_6, OO_SQRT_2, OO_SQRT_3 }, + { -OO_SQRT_6, -OO_SQRT_2, OO_SQRT_3 } +}; + +void GetBumpNormals( const Vector& sVect, const Vector& tVect, const Vector& flatNormal, + const Vector& phongNormal, Vector bumpNormals[NUM_BUMP_VECTS] ); + +#endif // BUMPVECTS_H diff --git a/mp/src/public/mathlib/compressed_3d_unitvec.h b/mp/src/public/mathlib/compressed_3d_unitvec.h index d9f2f597..a92dba22 100644 --- a/mp/src/public/mathlib/compressed_3d_unitvec.h +++ b/mp/src/public/mathlib/compressed_3d_unitvec.h @@ -1,284 +1,284 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-#ifndef _3D_UNITVEC_H
-#define _3D_UNITVEC_H
-
-
-#define UNITVEC_DECLARE_STATICS \
- float cUnitVector::mUVAdjustment[0x2000]; \
- Vector cUnitVector::mTmpVec;
-
-// upper 3 bits
-#define SIGN_MASK 0xe000
-#define XSIGN_MASK 0x8000
-#define YSIGN_MASK 0x4000
-#define ZSIGN_MASK 0x2000
-
-// middle 6 bits - xbits
-#define TOP_MASK 0x1f80
-
-// lower 7 bits - ybits
-#define BOTTOM_MASK 0x007f
-
-// unitcomp.cpp : A Unit Vector to 16-bit word conversion
-// algorithm based on work of Rafael Baptista ([email protected])
-// Accuracy improved by O.D. ([email protected])
-// Used with Permission.
-
-// a compressed unit vector. reasonable fidelty for unit
-// vectors in a 16 bit package. Good enough for surface normals
-// we hope.
-class cUnitVector // : public c3dMathObject
-{
-public:
- cUnitVector() { mVec = 0; }
- cUnitVector( const Vector& vec )
- {
- packVector( vec );
- }
- cUnitVector( unsigned short val ) { mVec = val; }
-
- cUnitVector& operator=( const Vector& vec )
- { packVector( vec ); return *this; }
-
- operator Vector()
- {
- unpackVector( mTmpVec );
- return mTmpVec;
- }
-
- void packVector( const Vector& vec )
- {
- // convert from Vector to cUnitVector
-
- Assert( vec.IsValid());
- Vector tmp = vec;
-
- // input vector does not have to be unit length
- // Assert( tmp.length() <= 1.001f );
-
- mVec = 0;
- if ( tmp.x < 0 ) { mVec |= XSIGN_MASK; tmp.x = -tmp.x; }
- if ( tmp.y < 0 ) { mVec |= YSIGN_MASK; tmp.y = -tmp.y; }
- if ( tmp.z < 0 ) { mVec |= ZSIGN_MASK; tmp.z = -tmp.z; }
-
- // project the normal onto the plane that goes through
- // X0=(1,0,0),Y0=(0,1,0),Z0=(0,0,1).
- // on that plane we choose an (projective!) coordinate system
- // such that X0->(0,0), Y0->(126,0), Z0->(0,126),(0,0,0)->Infinity
-
- // a little slower... old pack was 4 multiplies and 2 adds.
- // This is 2 multiplies, 2 adds, and a divide....
- float w = 126.0f / ( tmp.x + tmp.y + tmp.z );
- long xbits = (long)( tmp.x * w );
- long ybits = (long)( tmp.y * w );
-
- Assert( xbits < 127 );
- Assert( xbits >= 0 );
- Assert( ybits < 127 );
- Assert( ybits >= 0 );
-
- // Now we can be sure that 0<=xp<=126, 0<=yp<=126, 0<=xp+yp<=126
- // however for the sampling we want to transform this triangle
- // into a rectangle.
- if ( xbits >= 64 )
- {
- xbits = 127 - xbits;
- ybits = 127 - ybits;
- }
-
- // now we that have xp in the range (0,127) and yp in
- // the range (0,63), we can pack all the bits together
- mVec |= ( xbits << 7 );
- mVec |= ybits;
- }
-
- void unpackVector( Vector& vec )
- {
- // if we do a straightforward backward transform
- // we will get points on the plane X0,Y0,Z0
- // however we need points on a sphere that goes through
- // these points. Therefore we need to adjust x,y,z so
- // that x^2+y^2+z^2=1 by normalizing the vector. We have
- // already precalculated the amount by which we need to
- // scale, so all we do is a table lookup and a
- // multiplication
-
- // get the x and y bits
- long xbits = (( mVec & TOP_MASK ) >> 7 );
- long ybits = ( mVec & BOTTOM_MASK );
-
- // map the numbers back to the triangle (0,0)-(0,126)-(126,0)
- if (( xbits + ybits ) >= 127 )
- {
- xbits = 127 - xbits;
- ybits = 127 - ybits;
- }
-
- // do the inverse transform and normalization
- // costs 3 extra multiplies and 2 subtracts. No big deal.
- float uvadj = mUVAdjustment[mVec & ~SIGN_MASK];
- vec.x = uvadj * (float) xbits;
- vec.y = uvadj * (float) ybits;
- vec.z = uvadj * (float)( 126 - xbits - ybits );
-
- // set all the sign bits
- if ( mVec & XSIGN_MASK ) vec.x = -vec.x;
- if ( mVec & YSIGN_MASK ) vec.y = -vec.y;
- if ( mVec & ZSIGN_MASK ) vec.z = -vec.z;
-
- Assert( vec.IsValid());
- }
-
- static void initializeStatics()
- {
- for ( int idx = 0; idx < 0x2000; idx++ )
- {
- long xbits = idx >> 7;
- long ybits = idx & BOTTOM_MASK;
-
- // map the numbers back to the triangle (0,0)-(0,127)-(127,0)
- if (( xbits + ybits ) >= 127 )
- {
- xbits = 127 - xbits;
- ybits = 127 - ybits;
- }
-
- // convert to 3D vectors
- float x = (float)xbits;
- float y = (float)ybits;
- float z = (float)( 126 - xbits - ybits );
-
- // calculate the amount of normalization required
- mUVAdjustment[idx] = 1.0f / sqrtf( y*y + z*z + x*x );
- Assert( _finite( mUVAdjustment[idx]));
-
- //cerr << mUVAdjustment[idx] << "\t";
- //if ( xbits == 0 ) cerr << "\n";
- }
- }
-
-#if 0
- void test()
- {
- #define TEST_RANGE 4
- #define TEST_RANDOM 100
- #define TEST_ANGERROR 1.0
-
- float maxError = 0;
- float avgError = 0;
- int numVecs = 0;
-
- {for ( int x = -TEST_RANGE; x < TEST_RANGE; x++ )
- {
- for ( int y = -TEST_RANGE; y < TEST_RANGE; y++ )
- {
- for ( int z = -TEST_RANGE; z < TEST_RANGE; z++ )
- {
- if (( x + y + z ) == 0 ) continue;
-
- Vector vec( (float)x, (float)y, (float)z );
- Vector vec2;
-
- vec.normalize();
- packVector( vec );
- unpackVector( vec2 );
-
- float ang = vec.dot( vec2 );
- ang = (( fabs( ang ) > 0.99999f ) ? 0 : (float)acos(ang));
-
- if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
- {
- cerr << "error: " << ang << endl;
- cerr << "orig vec: " << vec.x << ",\t"
- << vec.y << ",\t" << vec.z << "\tmVec: "
- << mVec << endl;
- cerr << "quantized vec2: " << vec2.x
- << ",\t" << vec2.y << ",\t"
- << vec2.z << endl << endl;
- }
- avgError += ang;
- numVecs++;
- if ( maxError < ang ) maxError = ang;
- }
- }
- }}
-
- for ( int w = 0; w < TEST_RANDOM; w++ )
- {
- Vector vec( genRandom(), genRandom(), genRandom());
- Vector vec2;
- vec.normalize();
-
- packVector( vec );
- unpackVector( vec2 );
-
- float ang =vec.dot( vec2 );
- ang = (( ang > 0.999f ) ? 0 : (float)acos(ang));
-
- if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
- {
- cerr << "error: " << ang << endl;
- cerr << "orig vec: " << vec.x << ",\t"
- << vec.y << ",\t" << vec.z << "\tmVec: "
- << mVec << endl;
- cerr << "quantized vec2: " << vec2.x << ",\t"
- << vec2.y << ",\t"
- << vec2.z << endl << endl;
- }
- avgError += ang;
- numVecs++;
- if ( maxError < ang ) maxError = ang;
- }
-
- { for ( int x = 0; x < 50; x++ )
- {
- Vector vec( (float)x, 25.0f, 0.0f );
- Vector vec2;
-
- vec.normalize();
- packVector( vec );
- unpackVector( vec2 );
-
- float ang = vec.dot( vec2 );
- ang = (( fabs( ang ) > 0.999f ) ? 0 : (float)acos(ang));
-
- if (( ang > TEST_ANGERROR ) | ( !_finite( ang )))
- {
- cerr << "error: " << ang << endl;
- cerr << "orig vec: " << vec.x << ",\t"
- << vec.y << ",\t" << vec.z << "\tmVec: "
- << mVec << endl;
- cerr << " quantized vec2: " << vec2.x << ",\t"
- << vec2.y << ",\t" << vec2.z << endl << endl;
- }
-
- avgError += ang;
- numVecs++;
- if ( maxError < ang ) maxError = ang;
- }}
-
- cerr << "max angle error: " << maxError
- << ", average error: " << avgError / numVecs
- << ", num tested vecs: " << numVecs << endl;
- }
-
- friend ostream& operator<< ( ostream& os, const cUnitVector& vec )
- { os << vec.mVec; return os; }
-#endif
-
-//protected: // !!!!
-
- unsigned short mVec;
- static float mUVAdjustment[0x2000];
- static Vector mTmpVec;
-};
-
-#endif // _3D_VECTOR_H
-
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +#ifndef _3D_UNITVEC_H +#define _3D_UNITVEC_H + + +#define UNITVEC_DECLARE_STATICS \ + float cUnitVector::mUVAdjustment[0x2000]; \ + Vector cUnitVector::mTmpVec; + +// upper 3 bits +#define SIGN_MASK 0xe000 +#define XSIGN_MASK 0x8000 +#define YSIGN_MASK 0x4000 +#define ZSIGN_MASK 0x2000 + +// middle 6 bits - xbits +#define TOP_MASK 0x1f80 + +// lower 7 bits - ybits +#define BOTTOM_MASK 0x007f + +// unitcomp.cpp : A Unit Vector to 16-bit word conversion +// algorithm based on work of Rafael Baptista ([email protected]) +// Accuracy improved by O.D. ([email protected]) +// Used with Permission. + +// a compressed unit vector. reasonable fidelty for unit +// vectors in a 16 bit package. Good enough for surface normals +// we hope. +class cUnitVector // : public c3dMathObject +{ +public: + cUnitVector() { mVec = 0; } + cUnitVector( const Vector& vec ) + { + packVector( vec ); + } + cUnitVector( unsigned short val ) { mVec = val; } + + cUnitVector& operator=( const Vector& vec ) + { packVector( vec ); return *this; } + + operator Vector() + { + unpackVector( mTmpVec ); + return mTmpVec; + } + + void packVector( const Vector& vec ) + { + // convert from Vector to cUnitVector + + Assert( vec.IsValid()); + Vector tmp = vec; + + // input vector does not have to be unit length + // Assert( tmp.length() <= 1.001f ); + + mVec = 0; + if ( tmp.x < 0 ) { mVec |= XSIGN_MASK; tmp.x = -tmp.x; } + if ( tmp.y < 0 ) { mVec |= YSIGN_MASK; tmp.y = -tmp.y; } + if ( tmp.z < 0 ) { mVec |= ZSIGN_MASK; tmp.z = -tmp.z; } + + // project the normal onto the plane that goes through + // X0=(1,0,0),Y0=(0,1,0),Z0=(0,0,1). + // on that plane we choose an (projective!) coordinate system + // such that X0->(0,0), Y0->(126,0), Z0->(0,126),(0,0,0)->Infinity + + // a little slower... old pack was 4 multiplies and 2 adds. + // This is 2 multiplies, 2 adds, and a divide.... + float w = 126.0f / ( tmp.x + tmp.y + tmp.z ); + long xbits = (long)( tmp.x * w ); + long ybits = (long)( tmp.y * w ); + + Assert( xbits < 127 ); + Assert( xbits >= 0 ); + Assert( ybits < 127 ); + Assert( ybits >= 0 ); + + // Now we can be sure that 0<=xp<=126, 0<=yp<=126, 0<=xp+yp<=126 + // however for the sampling we want to transform this triangle + // into a rectangle. + if ( xbits >= 64 ) + { + xbits = 127 - xbits; + ybits = 127 - ybits; + } + + // now we that have xp in the range (0,127) and yp in + // the range (0,63), we can pack all the bits together + mVec |= ( xbits << 7 ); + mVec |= ybits; + } + + void unpackVector( Vector& vec ) + { + // if we do a straightforward backward transform + // we will get points on the plane X0,Y0,Z0 + // however we need points on a sphere that goes through + // these points. Therefore we need to adjust x,y,z so + // that x^2+y^2+z^2=1 by normalizing the vector. We have + // already precalculated the amount by which we need to + // scale, so all we do is a table lookup and a + // multiplication + + // get the x and y bits + long xbits = (( mVec & TOP_MASK ) >> 7 ); + long ybits = ( mVec & BOTTOM_MASK ); + + // map the numbers back to the triangle (0,0)-(0,126)-(126,0) + if (( xbits + ybits ) >= 127 ) + { + xbits = 127 - xbits; + ybits = 127 - ybits; + } + + // do the inverse transform and normalization + // costs 3 extra multiplies and 2 subtracts. No big deal. + float uvadj = mUVAdjustment[mVec & ~SIGN_MASK]; + vec.x = uvadj * (float) xbits; + vec.y = uvadj * (float) ybits; + vec.z = uvadj * (float)( 126 - xbits - ybits ); + + // set all the sign bits + if ( mVec & XSIGN_MASK ) vec.x = -vec.x; + if ( mVec & YSIGN_MASK ) vec.y = -vec.y; + if ( mVec & ZSIGN_MASK ) vec.z = -vec.z; + + Assert( vec.IsValid()); + } + + static void initializeStatics() + { + for ( int idx = 0; idx < 0x2000; idx++ ) + { + long xbits = idx >> 7; + long ybits = idx & BOTTOM_MASK; + + // map the numbers back to the triangle (0,0)-(0,127)-(127,0) + if (( xbits + ybits ) >= 127 ) + { + xbits = 127 - xbits; + ybits = 127 - ybits; + } + + // convert to 3D vectors + float x = (float)xbits; + float y = (float)ybits; + float z = (float)( 126 - xbits - ybits ); + + // calculate the amount of normalization required + mUVAdjustment[idx] = 1.0f / sqrtf( y*y + z*z + x*x ); + Assert( _finite( mUVAdjustment[idx])); + + //cerr << mUVAdjustment[idx] << "\t"; + //if ( xbits == 0 ) cerr << "\n"; + } + } + +#if 0 + void test() + { + #define TEST_RANGE 4 + #define TEST_RANDOM 100 + #define TEST_ANGERROR 1.0 + + float maxError = 0; + float avgError = 0; + int numVecs = 0; + + {for ( int x = -TEST_RANGE; x < TEST_RANGE; x++ ) + { + for ( int y = -TEST_RANGE; y < TEST_RANGE; y++ ) + { + for ( int z = -TEST_RANGE; z < TEST_RANGE; z++ ) + { + if (( x + y + z ) == 0 ) continue; + + Vector vec( (float)x, (float)y, (float)z ); + Vector vec2; + + vec.normalize(); + packVector( vec ); + unpackVector( vec2 ); + + float ang = vec.dot( vec2 ); + ang = (( fabs( ang ) > 0.99999f ) ? 0 : (float)acos(ang)); + + if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) + { + cerr << "error: " << ang << endl; + cerr << "orig vec: " << vec.x << ",\t" + << vec.y << ",\t" << vec.z << "\tmVec: " + << mVec << endl; + cerr << "quantized vec2: " << vec2.x + << ",\t" << vec2.y << ",\t" + << vec2.z << endl << endl; + } + avgError += ang; + numVecs++; + if ( maxError < ang ) maxError = ang; + } + } + }} + + for ( int w = 0; w < TEST_RANDOM; w++ ) + { + Vector vec( genRandom(), genRandom(), genRandom()); + Vector vec2; + vec.normalize(); + + packVector( vec ); + unpackVector( vec2 ); + + float ang =vec.dot( vec2 ); + ang = (( ang > 0.999f ) ? 0 : (float)acos(ang)); + + if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) + { + cerr << "error: " << ang << endl; + cerr << "orig vec: " << vec.x << ",\t" + << vec.y << ",\t" << vec.z << "\tmVec: " + << mVec << endl; + cerr << "quantized vec2: " << vec2.x << ",\t" + << vec2.y << ",\t" + << vec2.z << endl << endl; + } + avgError += ang; + numVecs++; + if ( maxError < ang ) maxError = ang; + } + + { for ( int x = 0; x < 50; x++ ) + { + Vector vec( (float)x, 25.0f, 0.0f ); + Vector vec2; + + vec.normalize(); + packVector( vec ); + unpackVector( vec2 ); + + float ang = vec.dot( vec2 ); + ang = (( fabs( ang ) > 0.999f ) ? 0 : (float)acos(ang)); + + if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) + { + cerr << "error: " << ang << endl; + cerr << "orig vec: " << vec.x << ",\t" + << vec.y << ",\t" << vec.z << "\tmVec: " + << mVec << endl; + cerr << " quantized vec2: " << vec2.x << ",\t" + << vec2.y << ",\t" << vec2.z << endl << endl; + } + + avgError += ang; + numVecs++; + if ( maxError < ang ) maxError = ang; + }} + + cerr << "max angle error: " << maxError + << ", average error: " << avgError / numVecs + << ", num tested vecs: " << numVecs << endl; + } + + friend ostream& operator<< ( ostream& os, const cUnitVector& vec ) + { os << vec.mVec; return os; } +#endif + +//protected: // !!!! + + unsigned short mVec; + static float mUVAdjustment[0x2000]; + static Vector mTmpVec; +}; + +#endif // _3D_VECTOR_H + + diff --git a/mp/src/public/mathlib/compressed_light_cube.h b/mp/src/public/mathlib/compressed_light_cube.h index a720808f..207f92db 100644 --- a/mp/src/public/mathlib/compressed_light_cube.h +++ b/mp/src/public/mathlib/compressed_light_cube.h @@ -1,24 +1,24 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=============================================================================//
-
-#ifndef COMPRESSED_LIGHT_CUBE_H
-#define COMPRESSED_LIGHT_CUBE_H
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include "mathlib/mathlib.h"
-
-
-struct CompressedLightCube
-{
- DECLARE_BYTESWAP_DATADESC();
- ColorRGBExp32 m_Color[6];
-};
-
-
-#endif // COMPRESSED_LIGHT_CUBE_H
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=============================================================================// + +#ifndef COMPRESSED_LIGHT_CUBE_H +#define COMPRESSED_LIGHT_CUBE_H +#ifdef _WIN32 +#pragma once +#endif + + +#include "mathlib/mathlib.h" + + +struct CompressedLightCube +{ + DECLARE_BYTESWAP_DATADESC(); + ColorRGBExp32 m_Color[6]; +}; + + +#endif // COMPRESSED_LIGHT_CUBE_H diff --git a/mp/src/public/mathlib/compressed_vector.h b/mp/src/public/mathlib/compressed_vector.h index 6eb3ac5d..6a495229 100644 --- a/mp/src/public/mathlib/compressed_vector.h +++ b/mp/src/public/mathlib/compressed_vector.h @@ -1,608 +1,608 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef COMPRESSED_VECTOR_H
-#define COMPRESSED_VECTOR_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <float.h>
-
-// For vec_t, put this somewhere else?
-#include "basetypes.h"
-
-// For rand(). We really need a library!
-#include <stdlib.h>
-
-#include "tier0/dbg.h"
-#include "mathlib/vector.h"
-
-#include "mathlib/mathlib.h"
-
-#if defined( _X360 )
-#pragma bitfield_order( push, lsb_to_msb )
-#endif
-//=========================================================
-// fit a 3D vector into 32 bits
-//=========================================================
-
-class Vector32
-{
-public:
- // Construction/destruction:
- Vector32(void);
- Vector32(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- Vector32& operator=(const Vector &vOther);
- operator Vector ();
-
-private:
- unsigned short x:10;
- unsigned short y:10;
- unsigned short z:10;
- unsigned short exp:2;
-};
-
-inline Vector32& Vector32::operator=(const Vector &vOther)
-{
- CHECK_VALID(vOther);
-
- static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f };
-
- float fmax = Max( fabs( vOther.x ), fabs( vOther.y ) );
- fmax = Max( fmax, (float)fabs( vOther.z ) );
-
- for (exp = 0; exp < 3; exp++)
- {
- if (fmax < expScale[exp])
- break;
- }
- Assert( fmax < expScale[exp] );
-
- float fexp = 512.0f / expScale[exp];
-
- x = Clamp( (int)(vOther.x * fexp) + 512, 0, 1023 );
- y = Clamp( (int)(vOther.y * fexp) + 512, 0, 1023 );
- z = Clamp( (int)(vOther.z * fexp) + 512, 0, 1023 );
- return *this;
-}
-
-
-inline Vector32::operator Vector ()
-{
- Vector tmp;
-
- static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f };
-
- float fexp = expScale[exp] / 512.0f;
-
- tmp.x = (((int)x) - 512) * fexp;
- tmp.y = (((int)y) - 512) * fexp;
- tmp.z = (((int)z) - 512) * fexp;
- return tmp;
-}
-
-
-//=========================================================
-// Fit a unit vector into 32 bits
-//=========================================================
-
-class Normal32
-{
-public:
- // Construction/destruction:
- Normal32(void);
- Normal32(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- Normal32& operator=(const Vector &vOther);
- operator Vector ();
-
-private:
- unsigned short x:15;
- unsigned short y:15;
- unsigned short zneg:1;
-};
-
-
-inline Normal32& Normal32::operator=(const Vector &vOther)
-{
- CHECK_VALID(vOther);
-
- x = Clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 );
- y = Clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 );
- zneg = (vOther.z < 0);
- //x = vOther.x;
- //y = vOther.y;
- //z = vOther.z;
- return *this;
-}
-
-
-inline Normal32::operator Vector ()
-{
- Vector tmp;
-
- tmp.x = ((int)x - 16384) * (1 / 16384.0);
- tmp.y = ((int)y - 16384) * (1 / 16384.0);
- tmp.z = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y );
- if (zneg)
- tmp.z = -tmp.z;
- return tmp;
-}
-
-
-//=========================================================
-// 64 bit Quaternion
-//=========================================================
-
-class Quaternion64
-{
-public:
- // Construction/destruction:
- Quaternion64(void);
- Quaternion64(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- // Quaternion& operator=(const Quaternion64 &vOther);
- Quaternion64& operator=(const Quaternion &vOther);
- operator Quaternion ();
-private:
- uint64 x:21;
- uint64 y:21;
- uint64 z:21;
- uint64 wneg:1;
-};
-
-
-inline Quaternion64::operator Quaternion ()
-{
- Quaternion tmp;
-
- // shift to -1048576, + 1048575, then round down slightly to -1.0 < x < 1.0
- tmp.x = ((int)x - 1048576) * (1 / 1048576.5f);
- tmp.y = ((int)y - 1048576) * (1 / 1048576.5f);
- tmp.z = ((int)z - 1048576) * (1 / 1048576.5f);
- tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
- if (wneg)
- tmp.w = -tmp.w;
- return tmp;
-}
-
-inline Quaternion64& Quaternion64::operator=(const Quaternion &vOther)
-{
- CHECK_VALID(vOther);
-
- x = Clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 );
- y = Clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 );
- z = Clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 );
- wneg = (vOther.w < 0);
- return *this;
-}
-
-//=========================================================
-// 48 bit Quaternion
-//=========================================================
-
-class Quaternion48
-{
-public:
- // Construction/destruction:
- Quaternion48(void);
- Quaternion48(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- // Quaternion& operator=(const Quaternion48 &vOther);
- Quaternion48& operator=(const Quaternion &vOther);
- operator Quaternion ();
-private:
- unsigned short x:16;
- unsigned short y:16;
- unsigned short z:15;
- unsigned short wneg:1;
-};
-
-
-inline Quaternion48::operator Quaternion ()
-{
- Quaternion tmp;
-
- tmp.x = ((int)x - 32768) * (1 / 32768.0);
- tmp.y = ((int)y - 32768) * (1 / 32768.0);
- tmp.z = ((int)z - 16384) * (1 / 16384.0);
- tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
- if (wneg)
- tmp.w = -tmp.w;
- return tmp;
-}
-
-inline Quaternion48& Quaternion48::operator=(const Quaternion &vOther)
-{
- CHECK_VALID(vOther);
-
- x = Clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 );
- y = Clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 );
- z = Clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 );
- wneg = (vOther.w < 0);
- return *this;
-}
-
-//=========================================================
-// 32 bit Quaternion
-//=========================================================
-
-class Quaternion32
-{
-public:
- // Construction/destruction:
- Quaternion32(void);
- Quaternion32(vec_t X, vec_t Y, vec_t Z);
-
- // assignment
- // Quaternion& operator=(const Quaternion48 &vOther);
- Quaternion32& operator=(const Quaternion &vOther);
- operator Quaternion ();
-private:
- unsigned int x:11;
- unsigned int y:10;
- unsigned int z:10;
- unsigned int wneg:1;
-};
-
-
-inline Quaternion32::operator Quaternion ()
-{
- Quaternion tmp;
-
- tmp.x = ((int)x - 1024) * (1 / 1024.0);
- tmp.y = ((int)y - 512) * (1 / 512.0);
- tmp.z = ((int)z - 512) * (1 / 512.0);
- tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z );
- if (wneg)
- tmp.w = -tmp.w;
- return tmp;
-}
-
-inline Quaternion32& Quaternion32::operator=(const Quaternion &vOther)
-{
- CHECK_VALID(vOther);
-
- x = Clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 );
- y = Clamp( (int)(vOther.y * 512) + 512, 0, 1023 );
- z = Clamp( (int)(vOther.z * 512) + 512, 0, 1023 );
- wneg = (vOther.w < 0);
- return *this;
-}
-
-//=========================================================
-// 16 bit float
-//=========================================================
-
-
-const int float32bias = 127;
-const int float16bias = 15;
-
-const float maxfloat16bits = 65504.0f;
-
-class float16
-{
-public:
- //float16() {}
- //float16( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); }
-
- void Init() { m_storage.rawWord = 0; }
-// float16& operator=(const float16 &other) { m_storage.rawWord = other.m_storage.rawWord; return *this; }
-// float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; }
-// operator unsigned short () { return m_storage.rawWord; }
-// operator float () { return Convert16bitFloatTo32bits( m_storage.rawWord ); }
- unsigned short GetBits() const
- {
- return m_storage.rawWord;
- }
- float GetFloat() const
- {
- return Convert16bitFloatTo32bits( m_storage.rawWord );
- }
- void SetFloat( float in )
- {
- m_storage.rawWord = ConvertFloatTo16bits( in );
- }
-
- bool IsInfinity() const
- {
- return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa == 0;
- }
- bool IsNaN() const
- {
- return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa != 0;
- }
-
- bool operator==(const float16 other) const { return m_storage.rawWord == other.m_storage.rawWord; }
- bool operator!=(const float16 other) const { return m_storage.rawWord != other.m_storage.rawWord; }
-
-// bool operator< (const float other) const { return GetFloat() < other; }
-// bool operator> (const float other) const { return GetFloat() > other; }
-
-protected:
- union float32bits
- {
- float rawFloat;
- struct
- {
- unsigned int mantissa : 23;
- unsigned int biased_exponent : 8;
- unsigned int sign : 1;
- } bits;
- };
-
- union float16bits
- {
- unsigned short rawWord;
- struct
- {
- unsigned short mantissa : 10;
- unsigned short biased_exponent : 5;
- unsigned short sign : 1;
- } bits;
- };
-
- static bool IsNaN( float16bits in )
- {
- return in.bits.biased_exponent == 31 && in.bits.mantissa != 0;
- }
- static bool IsInfinity( float16bits in )
- {
- return in.bits.biased_exponent == 31 && in.bits.mantissa == 0;
- }
-
- // 0x0001 - 0x03ff
- static unsigned short ConvertFloatTo16bits( float input )
- {
- if ( input > maxfloat16bits )
- input = maxfloat16bits;
- else if ( input < -maxfloat16bits )
- input = -maxfloat16bits;
-
- float16bits output;
- float32bits inFloat;
-
- inFloat.rawFloat = input;
-
- output.bits.sign = inFloat.bits.sign;
-
- if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa==0) )
- {
- // zero
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 0;
- }
- else if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa!=0) )
- {
- // denorm -- denorm float maps to 0 half
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 0;
- }
- else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa==0) )
- {
-#if 0
- // infinity
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 31;
-#else
- // infinity maps to maxfloat
- output.bits.mantissa = 0x3ff;
- output.bits.biased_exponent = 0x1e;
-#endif
- }
- else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa!=0) )
- {
-#if 0
- // NaN
- output.bits.mantissa = 1;
- output.bits.biased_exponent = 31;
-#else
- // NaN maps to zero
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 0;
-#endif
- }
- else
- {
- // regular number
- int new_exp = inFloat.bits.biased_exponent-127;
-
- if (new_exp<-24)
- {
- // this maps to 0
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 0;
- }
-
- if (new_exp<-14)
- {
- // this maps to a denorm
- output.bits.biased_exponent = 0;
- unsigned int exp_val = ( unsigned int )( -14 - ( inFloat.bits.biased_exponent - float32bias ) );
- if( exp_val > 0 && exp_val < 11 )
- {
- output.bits.mantissa = ( 1 << ( 10 - exp_val ) ) + ( inFloat.bits.mantissa >> ( 13 + exp_val ) );
- }
- }
- else if (new_exp>15)
- {
-#if 0
- // map this value to infinity
- output.bits.mantissa = 0;
- output.bits.biased_exponent = 31;
-#else
- // to big. . . maps to maxfloat
- output.bits.mantissa = 0x3ff;
- output.bits.biased_exponent = 0x1e;
-#endif
- }
- else
- {
- output.bits.biased_exponent = new_exp+15;
- output.bits.mantissa = (inFloat.bits.mantissa >> 13);
- }
- }
- return output.rawWord;
- }
-
- static float Convert16bitFloatTo32bits( unsigned short input )
- {
- float32bits output;
- const float16bits &inFloat = *((float16bits *)&input);
-
- if( IsInfinity( inFloat ) )
- {
- return maxfloat16bits * ( ( inFloat.bits.sign == 1 ) ? -1.0f : 1.0f );
- }
- if( IsNaN( inFloat ) )
- {
- return 0.0;
- }
- if( inFloat.bits.biased_exponent == 0 && inFloat.bits.mantissa != 0 )
- {
- // denorm
- const float half_denorm = (1.0f/16384.0f); // 2^-14
- float mantissa = ((float)(inFloat.bits.mantissa)) / 1024.0f;
- float sgn = (inFloat.bits.sign)? -1.0f :1.0f;
- output.rawFloat = sgn*mantissa*half_denorm;
- }
- else
- {
- // regular number
- unsigned mantissa = inFloat.bits.mantissa;
- unsigned biased_exponent = inFloat.bits.biased_exponent;
- unsigned sign = ((unsigned)inFloat.bits.sign) << 31;
- biased_exponent = ( (biased_exponent - float16bias + float32bias) * (biased_exponent != 0) ) << 23;
- mantissa <<= (23-10);
-
- *((unsigned *)&output) = ( mantissa | biased_exponent | sign );
- }
-
- return output.rawFloat;
- }
-
-
- float16bits m_storage;
-};
-
-class float16_with_assign : public float16
-{
-public:
- float16_with_assign() {}
- float16_with_assign( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); }
-
- float16& operator=(const float16 &other) { m_storage.rawWord = ((float16_with_assign &)other).m_storage.rawWord; return *this; }
- float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; }
-// operator unsigned short () const { return m_storage.rawWord; }
- operator float () const { return Convert16bitFloatTo32bits( m_storage.rawWord ); }
-};
-
-//=========================================================
-// Fit a 3D vector in 48 bits
-//=========================================================
-
-class Vector48
-{
-public:
- // Construction/destruction:
- Vector48(void) {}
- Vector48(vec_t X, vec_t Y, vec_t Z) { x.SetFloat( X ); y.SetFloat( Y ); z.SetFloat( Z ); }
-
- // assignment
- Vector48& operator=(const Vector &vOther);
- operator Vector ();
-
- const float operator[]( int i ) const { return (((float16 *)this)[i]).GetFloat(); }
-
- float16 x;
- float16 y;
- float16 z;
-};
-
-inline Vector48& Vector48::operator=(const Vector &vOther)
-{
- CHECK_VALID(vOther);
-
- x.SetFloat( vOther.x );
- y.SetFloat( vOther.y );
- z.SetFloat( vOther.z );
- return *this;
-}
-
-
-inline Vector48::operator Vector ()
-{
- Vector tmp;
-
- tmp.x = x.GetFloat();
- tmp.y = y.GetFloat();
- tmp.z = z.GetFloat();
-
- return tmp;
-}
-
-//=========================================================
-// Fit a 2D vector in 32 bits
-//=========================================================
-
-class Vector2d32
-{
-public:
- // Construction/destruction:
- Vector2d32(void) {}
- Vector2d32(vec_t X, vec_t Y) { x.SetFloat( X ); y.SetFloat( Y ); }
-
- // assignment
- Vector2d32& operator=(const Vector &vOther);
- Vector2d32& operator=(const Vector2D &vOther);
-
- operator Vector2D ();
-
- void Init( vec_t ix = 0.f, vec_t iy = 0.f);
-
- float16_with_assign x;
- float16_with_assign y;
-};
-
-inline Vector2d32& Vector2d32::operator=(const Vector2D &vOther)
-{
- x.SetFloat( vOther.x );
- y.SetFloat( vOther.y );
- return *this;
-}
-
-inline Vector2d32::operator Vector2D ()
-{
- Vector2D tmp;
-
- tmp.x = x.GetFloat();
- tmp.y = y.GetFloat();
-
- return tmp;
-}
-
-inline void Vector2d32::Init( vec_t ix, vec_t iy )
-{
- x.SetFloat(ix);
- y.SetFloat(iy);
-}
-
-#if defined( _X360 )
-#pragma bitfield_order( pop )
-#endif
-
-#endif
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef COMPRESSED_VECTOR_H +#define COMPRESSED_VECTOR_H + +#ifdef _WIN32 +#pragma once +#endif + +#include <math.h> +#include <float.h> + +// For vec_t, put this somewhere else? +#include "basetypes.h" + +// For rand(). We really need a library! +#include <stdlib.h> + +#include "tier0/dbg.h" +#include "mathlib/vector.h" + +#include "mathlib/mathlib.h" + +#if defined( _X360 ) +#pragma bitfield_order( push, lsb_to_msb ) +#endif +//========================================================= +// fit a 3D vector into 32 bits +//========================================================= + +class Vector32 +{ +public: + // Construction/destruction: + Vector32(void); + Vector32(vec_t X, vec_t Y, vec_t Z); + + // assignment + Vector32& operator=(const Vector &vOther); + operator Vector (); + +private: + unsigned short x:10; + unsigned short y:10; + unsigned short z:10; + unsigned short exp:2; +}; + +inline Vector32& Vector32::operator=(const Vector &vOther) +{ + CHECK_VALID(vOther); + + static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f }; + + float fmax = Max( fabs( vOther.x ), fabs( vOther.y ) ); + fmax = Max( fmax, (float)fabs( vOther.z ) ); + + for (exp = 0; exp < 3; exp++) + { + if (fmax < expScale[exp]) + break; + } + Assert( fmax < expScale[exp] ); + + float fexp = 512.0f / expScale[exp]; + + x = Clamp( (int)(vOther.x * fexp) + 512, 0, 1023 ); + y = Clamp( (int)(vOther.y * fexp) + 512, 0, 1023 ); + z = Clamp( (int)(vOther.z * fexp) + 512, 0, 1023 ); + return *this; +} + + +inline Vector32::operator Vector () +{ + Vector tmp; + + static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f }; + + float fexp = expScale[exp] / 512.0f; + + tmp.x = (((int)x) - 512) * fexp; + tmp.y = (((int)y) - 512) * fexp; + tmp.z = (((int)z) - 512) * fexp; + return tmp; +} + + +//========================================================= +// Fit a unit vector into 32 bits +//========================================================= + +class Normal32 +{ +public: + // Construction/destruction: + Normal32(void); + Normal32(vec_t X, vec_t Y, vec_t Z); + + // assignment + Normal32& operator=(const Vector &vOther); + operator Vector (); + +private: + unsigned short x:15; + unsigned short y:15; + unsigned short zneg:1; +}; + + +inline Normal32& Normal32::operator=(const Vector &vOther) +{ + CHECK_VALID(vOther); + + x = Clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 ); + y = Clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 ); + zneg = (vOther.z < 0); + //x = vOther.x; + //y = vOther.y; + //z = vOther.z; + return *this; +} + + +inline Normal32::operator Vector () +{ + Vector tmp; + + tmp.x = ((int)x - 16384) * (1 / 16384.0); + tmp.y = ((int)y - 16384) * (1 / 16384.0); + tmp.z = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y ); + if (zneg) + tmp.z = -tmp.z; + return tmp; +} + + +//========================================================= +// 64 bit Quaternion +//========================================================= + +class Quaternion64 +{ +public: + // Construction/destruction: + Quaternion64(void); + Quaternion64(vec_t X, vec_t Y, vec_t Z); + + // assignment + // Quaternion& operator=(const Quaternion64 &vOther); + Quaternion64& operator=(const Quaternion &vOther); + operator Quaternion (); +private: + uint64 x:21; + uint64 y:21; + uint64 z:21; + uint64 wneg:1; +}; + + +inline Quaternion64::operator Quaternion () +{ + Quaternion tmp; + + // shift to -1048576, + 1048575, then round down slightly to -1.0 < x < 1.0 + tmp.x = ((int)x - 1048576) * (1 / 1048576.5f); + tmp.y = ((int)y - 1048576) * (1 / 1048576.5f); + tmp.z = ((int)z - 1048576) * (1 / 1048576.5f); + tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); + if (wneg) + tmp.w = -tmp.w; + return tmp; +} + +inline Quaternion64& Quaternion64::operator=(const Quaternion &vOther) +{ + CHECK_VALID(vOther); + + x = Clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 ); + y = Clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 ); + z = Clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 ); + wneg = (vOther.w < 0); + return *this; +} + +//========================================================= +// 48 bit Quaternion +//========================================================= + +class Quaternion48 +{ +public: + // Construction/destruction: + Quaternion48(void); + Quaternion48(vec_t X, vec_t Y, vec_t Z); + + // assignment + // Quaternion& operator=(const Quaternion48 &vOther); + Quaternion48& operator=(const Quaternion &vOther); + operator Quaternion (); +private: + unsigned short x:16; + unsigned short y:16; + unsigned short z:15; + unsigned short wneg:1; +}; + + +inline Quaternion48::operator Quaternion () +{ + Quaternion tmp; + + tmp.x = ((int)x - 32768) * (1 / 32768.0); + tmp.y = ((int)y - 32768) * (1 / 32768.0); + tmp.z = ((int)z - 16384) * (1 / 16384.0); + tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); + if (wneg) + tmp.w = -tmp.w; + return tmp; +} + +inline Quaternion48& Quaternion48::operator=(const Quaternion &vOther) +{ + CHECK_VALID(vOther); + + x = Clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 ); + y = Clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 ); + z = Clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 ); + wneg = (vOther.w < 0); + return *this; +} + +//========================================================= +// 32 bit Quaternion +//========================================================= + +class Quaternion32 +{ +public: + // Construction/destruction: + Quaternion32(void); + Quaternion32(vec_t X, vec_t Y, vec_t Z); + + // assignment + // Quaternion& operator=(const Quaternion48 &vOther); + Quaternion32& operator=(const Quaternion &vOther); + operator Quaternion (); +private: + unsigned int x:11; + unsigned int y:10; + unsigned int z:10; + unsigned int wneg:1; +}; + + +inline Quaternion32::operator Quaternion () +{ + Quaternion tmp; + + tmp.x = ((int)x - 1024) * (1 / 1024.0); + tmp.y = ((int)y - 512) * (1 / 512.0); + tmp.z = ((int)z - 512) * (1 / 512.0); + tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); + if (wneg) + tmp.w = -tmp.w; + return tmp; +} + +inline Quaternion32& Quaternion32::operator=(const Quaternion &vOther) +{ + CHECK_VALID(vOther); + + x = Clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 ); + y = Clamp( (int)(vOther.y * 512) + 512, 0, 1023 ); + z = Clamp( (int)(vOther.z * 512) + 512, 0, 1023 ); + wneg = (vOther.w < 0); + return *this; +} + +//========================================================= +// 16 bit float +//========================================================= + + +const int float32bias = 127; +const int float16bias = 15; + +const float maxfloat16bits = 65504.0f; + +class float16 +{ +public: + //float16() {} + //float16( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); } + + void Init() { m_storage.rawWord = 0; } +// float16& operator=(const float16 &other) { m_storage.rawWord = other.m_storage.rawWord; return *this; } +// float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; } +// operator unsigned short () { return m_storage.rawWord; } +// operator float () { return Convert16bitFloatTo32bits( m_storage.rawWord ); } + unsigned short GetBits() const + { + return m_storage.rawWord; + } + float GetFloat() const + { + return Convert16bitFloatTo32bits( m_storage.rawWord ); + } + void SetFloat( float in ) + { + m_storage.rawWord = ConvertFloatTo16bits( in ); + } + + bool IsInfinity() const + { + return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa == 0; + } + bool IsNaN() const + { + return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa != 0; + } + + bool operator==(const float16 other) const { return m_storage.rawWord == other.m_storage.rawWord; } + bool operator!=(const float16 other) const { return m_storage.rawWord != other.m_storage.rawWord; } + +// bool operator< (const float other) const { return GetFloat() < other; } +// bool operator> (const float other) const { return GetFloat() > other; } + +protected: + union float32bits + { + float rawFloat; + struct + { + unsigned int mantissa : 23; + unsigned int biased_exponent : 8; + unsigned int sign : 1; + } bits; + }; + + union float16bits + { + unsigned short rawWord; + struct + { + unsigned short mantissa : 10; + unsigned short biased_exponent : 5; + unsigned short sign : 1; + } bits; + }; + + static bool IsNaN( float16bits in ) + { + return in.bits.biased_exponent == 31 && in.bits.mantissa != 0; + } + static bool IsInfinity( float16bits in ) + { + return in.bits.biased_exponent == 31 && in.bits.mantissa == 0; + } + + // 0x0001 - 0x03ff + static unsigned short ConvertFloatTo16bits( float input ) + { + if ( input > maxfloat16bits ) + input = maxfloat16bits; + else if ( input < -maxfloat16bits ) + input = -maxfloat16bits; + + float16bits output; + float32bits inFloat; + + inFloat.rawFloat = input; + + output.bits.sign = inFloat.bits.sign; + + if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa==0) ) + { + // zero + output.bits.mantissa = 0; + output.bits.biased_exponent = 0; + } + else if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa!=0) ) + { + // denorm -- denorm float maps to 0 half + output.bits.mantissa = 0; + output.bits.biased_exponent = 0; + } + else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa==0) ) + { +#if 0 + // infinity + output.bits.mantissa = 0; + output.bits.biased_exponent = 31; +#else + // infinity maps to maxfloat + output.bits.mantissa = 0x3ff; + output.bits.biased_exponent = 0x1e; +#endif + } + else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa!=0) ) + { +#if 0 + // NaN + output.bits.mantissa = 1; + output.bits.biased_exponent = 31; +#else + // NaN maps to zero + output.bits.mantissa = 0; + output.bits.biased_exponent = 0; +#endif + } + else + { + // regular number + int new_exp = inFloat.bits.biased_exponent-127; + + if (new_exp<-24) + { + // this maps to 0 + output.bits.mantissa = 0; + output.bits.biased_exponent = 0; + } + + if (new_exp<-14) + { + // this maps to a denorm + output.bits.biased_exponent = 0; + unsigned int exp_val = ( unsigned int )( -14 - ( inFloat.bits.biased_exponent - float32bias ) ); + if( exp_val > 0 && exp_val < 11 ) + { + output.bits.mantissa = ( 1 << ( 10 - exp_val ) ) + ( inFloat.bits.mantissa >> ( 13 + exp_val ) ); + } + } + else if (new_exp>15) + { +#if 0 + // map this value to infinity + output.bits.mantissa = 0; + output.bits.biased_exponent = 31; +#else + // to big. . . maps to maxfloat + output.bits.mantissa = 0x3ff; + output.bits.biased_exponent = 0x1e; +#endif + } + else + { + output.bits.biased_exponent = new_exp+15; + output.bits.mantissa = (inFloat.bits.mantissa >> 13); + } + } + return output.rawWord; + } + + static float Convert16bitFloatTo32bits( unsigned short input ) + { + float32bits output; + const float16bits &inFloat = *((float16bits *)&input); + + if( IsInfinity( inFloat ) ) + { + return maxfloat16bits * ( ( inFloat.bits.sign == 1 ) ? -1.0f : 1.0f ); + } + if( IsNaN( inFloat ) ) + { + return 0.0; + } + if( inFloat.bits.biased_exponent == 0 && inFloat.bits.mantissa != 0 ) + { + // denorm + const float half_denorm = (1.0f/16384.0f); // 2^-14 + float mantissa = ((float)(inFloat.bits.mantissa)) / 1024.0f; + float sgn = (inFloat.bits.sign)? -1.0f :1.0f; + output.rawFloat = sgn*mantissa*half_denorm; + } + else + { + // regular number + unsigned mantissa = inFloat.bits.mantissa; + unsigned biased_exponent = inFloat.bits.biased_exponent; + unsigned sign = ((unsigned)inFloat.bits.sign) << 31; + biased_exponent = ( (biased_exponent - float16bias + float32bias) * (biased_exponent != 0) ) << 23; + mantissa <<= (23-10); + + *((unsigned *)&output) = ( mantissa | biased_exponent | sign ); + } + + return output.rawFloat; + } + + + float16bits m_storage; +}; + +class float16_with_assign : public float16 +{ +public: + float16_with_assign() {} + float16_with_assign( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); } + + float16& operator=(const float16 &other) { m_storage.rawWord = ((float16_with_assign &)other).m_storage.rawWord; return *this; } + float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; } +// operator unsigned short () const { return m_storage.rawWord; } + operator float () const { return Convert16bitFloatTo32bits( m_storage.rawWord ); } +}; + +//========================================================= +// Fit a 3D vector in 48 bits +//========================================================= + +class Vector48 +{ +public: + // Construction/destruction: + Vector48(void) {} + Vector48(vec_t X, vec_t Y, vec_t Z) { x.SetFloat( X ); y.SetFloat( Y ); z.SetFloat( Z ); } + + // assignment + Vector48& operator=(const Vector &vOther); + operator Vector (); + + const float operator[]( int i ) const { return (((float16 *)this)[i]).GetFloat(); } + + float16 x; + float16 y; + float16 z; +}; + +inline Vector48& Vector48::operator=(const Vector &vOther) +{ + CHECK_VALID(vOther); + + x.SetFloat( vOther.x ); + y.SetFloat( vOther.y ); + z.SetFloat( vOther.z ); + return *this; +} + + +inline Vector48::operator Vector () +{ + Vector tmp; + + tmp.x = x.GetFloat(); + tmp.y = y.GetFloat(); + tmp.z = z.GetFloat(); + + return tmp; +} + +//========================================================= +// Fit a 2D vector in 32 bits +//========================================================= + +class Vector2d32 +{ +public: + // Construction/destruction: + Vector2d32(void) {} + Vector2d32(vec_t X, vec_t Y) { x.SetFloat( X ); y.SetFloat( Y ); } + + // assignment + Vector2d32& operator=(const Vector &vOther); + Vector2d32& operator=(const Vector2D &vOther); + + operator Vector2D (); + + void Init( vec_t ix = 0.f, vec_t iy = 0.f); + + float16_with_assign x; + float16_with_assign y; +}; + +inline Vector2d32& Vector2d32::operator=(const Vector2D &vOther) +{ + x.SetFloat( vOther.x ); + y.SetFloat( vOther.y ); + return *this; +} + +inline Vector2d32::operator Vector2D () +{ + Vector2D tmp; + + tmp.x = x.GetFloat(); + tmp.y = y.GetFloat(); + + return tmp; +} + +inline void Vector2d32::Init( vec_t ix, vec_t iy ) +{ + x.SetFloat(ix); + y.SetFloat(iy); +} + +#if defined( _X360 ) +#pragma bitfield_order( pop ) +#endif + +#endif + diff --git a/mp/src/public/mathlib/halton.h b/mp/src/public/mathlib/halton.h index 204e5fd5..44df68ff 100644 --- a/mp/src/public/mathlib/halton.h +++ b/mp/src/public/mathlib/halton.h @@ -1,71 +1,71 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-// $Id$
-
-// halton.h - classes, etc for generating numbers using the Halton pseudo-random sequence. See
-// http://halton-sequences.wikiverse.org/.
-//
-// what this function is useful for is any sort of sampling/integration problem where
-// you want to solve it by random sampling. Each call the NextValue() generates
-// a random number between 0 and 1, in an unclumped manner, so that the space can be more
-// or less evenly sampled with a minimum number of samples.
-//
-// It is NOT useful for generating random numbers dynamically, since the outputs aren't
-// particularly random.
-//
-// To generate multidimensional sample values (points in a plane, etc), use two
-// HaltonSequenceGenerator_t's, with different (primes) bases.
-
-#ifndef HALTON_H
-#define HALTON_H
-
-#include <tier0/platform.h>
-#include <mathlib/vector.h>
-
-class HaltonSequenceGenerator_t
-{
- int seed;
- int base;
- float fbase; //< base as a float
-
-public:
- HaltonSequenceGenerator_t(int base); //< base MUST be prime, >=2
-
- float GetElement(int element);
-
- inline float NextValue(void)
- {
- return GetElement(seed++);
- }
-
-};
-
-
-class DirectionalSampler_t //< pseudo-random sphere sampling
-{
- HaltonSequenceGenerator_t zdot;
- HaltonSequenceGenerator_t vrot;
-public:
- DirectionalSampler_t(void)
- : zdot(2),vrot(3)
- {
- }
-
- Vector NextValue(void)
- {
- float zvalue=zdot.NextValue();
- zvalue=2*zvalue-1.0; // map from 0..1 to -1..1
- float phi=acos(zvalue);
- // now, generate a random rotation angle for x/y
- float theta=2.0*M_PI*vrot.NextValue();
- float sin_p=sin(phi);
- return Vector(cos(theta)*sin_p,
- sin(theta)*sin_p,
- zvalue);
-
- }
-};
-
-
-
-
-#endif // halton_h
+//========= Copyright Valve Corporation, All rights reserved. ============// +// $Id$ + +// halton.h - classes, etc for generating numbers using the Halton pseudo-random sequence. See +// http://halton-sequences.wikiverse.org/. +// +// what this function is useful for is any sort of sampling/integration problem where +// you want to solve it by random sampling. Each call the NextValue() generates +// a random number between 0 and 1, in an unclumped manner, so that the space can be more +// or less evenly sampled with a minimum number of samples. +// +// It is NOT useful for generating random numbers dynamically, since the outputs aren't +// particularly random. +// +// To generate multidimensional sample values (points in a plane, etc), use two +// HaltonSequenceGenerator_t's, with different (primes) bases. + +#ifndef HALTON_H +#define HALTON_H + +#include <tier0/platform.h> +#include <mathlib/vector.h> + +class HaltonSequenceGenerator_t +{ + int seed; + int base; + float fbase; //< base as a float + +public: + HaltonSequenceGenerator_t(int base); //< base MUST be prime, >=2 + + float GetElement(int element); + + inline float NextValue(void) + { + return GetElement(seed++); + } + +}; + + +class DirectionalSampler_t //< pseudo-random sphere sampling +{ + HaltonSequenceGenerator_t zdot; + HaltonSequenceGenerator_t vrot; +public: + DirectionalSampler_t(void) + : zdot(2),vrot(3) + { + } + + Vector NextValue(void) + { + float zvalue=zdot.NextValue(); + zvalue=2*zvalue-1.0; // map from 0..1 to -1..1 + float phi=acos(zvalue); + // now, generate a random rotation angle for x/y + float theta=2.0*M_PI*vrot.NextValue(); + float sin_p=sin(phi); + return Vector(cos(theta)*sin_p, + sin(theta)*sin_p, + zvalue); + + } +}; + + + + +#endif // halton_h diff --git a/mp/src/public/mathlib/lightdesc.h b/mp/src/public/mathlib/lightdesc.h index d03e3e19..1096d623 100644 --- a/mp/src/public/mathlib/lightdesc.h +++ b/mp/src/public/mathlib/lightdesc.h @@ -1,173 +1,173 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//===========================================================================//
-
-// light structure definitions.
-#ifndef LIGHTDESC_H
-#define LIGHTDESC_H
-
-#include <mathlib/ssemath.h>
-#include <mathlib/vector.h>
-
-//-----------------------------------------------------------------------------
-// Light structure
-//-----------------------------------------------------------------------------
-
-enum LightType_t
-{
- MATERIAL_LIGHT_DISABLE = 0,
- MATERIAL_LIGHT_POINT,
- MATERIAL_LIGHT_DIRECTIONAL,
- MATERIAL_LIGHT_SPOT,
-};
-
-enum LightType_OptimizationFlags_t
-{
- LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 = 1,
- LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 = 2,
- LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 = 4,
- LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED = 8,
-};
-
-struct LightDesc_t
-{
- LightType_t m_Type; //< MATERIAL_LIGHT_xxx
- Vector m_Color; //< color+intensity
- Vector m_Position; //< light source center position
- Vector m_Direction; //< for SPOT, direction it is pointing
- float m_Range; //< distance range for light.0=infinite
- float m_Falloff; //< angular falloff exponent for spot lights
- float m_Attenuation0; //< constant distance falloff term
- float m_Attenuation1; //< linear term of falloff
- float m_Attenuation2; //< quadatic term of falloff
- float m_Theta; //< inner cone angle. no angular falloff
- //< within this cone
- float m_Phi; //< outer cone angle
-
- // the values below are derived from the above settings for optimizations
- // These aren't used by DX8. . used for software lighting.
- float m_ThetaDot;
- float m_PhiDot;
- unsigned int m_Flags;
-protected:
- float OneOver_ThetaDot_Minus_PhiDot;
- float m_RangeSquared;
-public:
-
- void RecalculateDerivedValues(void); // calculate m_xxDot, m_Type for changed parms
-
- LightDesc_t(void)
- {
- }
-
- // constructors for various useful subtypes
-
- // a point light with infinite range
- LightDesc_t( const Vector &pos, const Vector &color )
- {
- InitPoint( pos, color );
- }
-
- /// a simple light. cone boundaries in radians. you pass a look_at point and the
- /// direciton is derived from that.
- LightDesc_t( const Vector &pos, const Vector &color, const Vector &point_at,
- float inner_cone_boundary, float outer_cone_boundary )
- {
- InitSpot( pos, color, point_at, inner_cone_boundary, outer_cone_boundary );
- }
-
- void InitPoint( const Vector &pos, const Vector &color );
- void InitDirectional( const Vector &dir, const Vector &color );
- void InitSpot(const Vector &pos, const Vector &color, const Vector &point_at,
- float inner_cone_boundary, float outer_cone_boundary );
-
- /// Given 4 points and 4 normals, ADD lighting from this light into "color".
- void ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal,
- FourVectors &color, bool DoHalfLambert=false ) const;
- void ComputeNonincidenceLightAtPoints( const FourVectors &pos, FourVectors &color ) const;
- void ComputeLightAtPointsForDirectional( const FourVectors &pos,
- const FourVectors &normal,
- FourVectors &color, bool DoHalfLambert=false ) const;
-
- // warning - modifies color!!! set color first!!
- void SetupOldStyleAttenuation( float fQuadatricAttn, float fLinearAttn, float fConstantAttn );
-
- void SetupNewStyleAttenuation( float fFiftyPercentDistance, float fZeroPercentDistance );
-
-
-/// given a direction relative to the light source position, is this ray within the
- /// light cone (for spotlights..non spots consider all rays to be within their cone)
- bool IsDirectionWithinLightCone(const Vector &rdir) const
- {
- return ((m_Type!=MATERIAL_LIGHT_SPOT) || (rdir.Dot(m_Direction)>=m_PhiDot));
- }
-
- float OneOverThetaDotMinusPhiDot() const
- {
- return OneOver_ThetaDot_Minus_PhiDot;
- }
-};
-
-
-//-----------------------------------------------------------------------------
-// a point light with infinite range
-//-----------------------------------------------------------------------------
-inline void LightDesc_t::InitPoint( const Vector &pos, const Vector &color )
-{
- m_Type=MATERIAL_LIGHT_POINT;
- m_Color=color;
- m_Position=pos;
- m_Range=0.0; // infinite
- m_Attenuation0=1.0;
- m_Attenuation1=0;
- m_Attenuation2=0;
- RecalculateDerivedValues();
-}
-
-
-//-----------------------------------------------------------------------------
-// a directional light with infinite range
-//-----------------------------------------------------------------------------
-inline void LightDesc_t::InitDirectional( const Vector &dir, const Vector &color )
-{
- m_Type=MATERIAL_LIGHT_DIRECTIONAL;
- m_Color=color;
- m_Direction=dir;
- m_Range=0.0; // infinite
- m_Attenuation0=1.0;
- m_Attenuation1=0;
- m_Attenuation2=0;
- RecalculateDerivedValues();
-}
-
-
-//-----------------------------------------------------------------------------
-// a simple light. cone boundaries in radians. you pass a look_at point and the
-// direciton is derived from that.
-//-----------------------------------------------------------------------------
-inline void LightDesc_t::InitSpot(const Vector &pos, const Vector &color, const Vector &point_at,
- float inner_cone_boundary, float outer_cone_boundary)
-{
- m_Type=MATERIAL_LIGHT_SPOT;
- m_Color=color;
- m_Position=pos;
- m_Direction=point_at;
- m_Direction-=pos;
- VectorNormalizeFast(m_Direction);
- m_Falloff=5.0; // linear angle falloff
- m_Theta=inner_cone_boundary;
- m_Phi=outer_cone_boundary;
-
- m_Range=0.0; // infinite
-
- m_Attenuation0=1.0;
- m_Attenuation1=0;
- m_Attenuation2=0;
- RecalculateDerivedValues();
-}
-
-
-#endif
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +// light structure definitions. +#ifndef LIGHTDESC_H +#define LIGHTDESC_H + +#include <mathlib/ssemath.h> +#include <mathlib/vector.h> + +//----------------------------------------------------------------------------- +// Light structure +//----------------------------------------------------------------------------- + +enum LightType_t +{ + MATERIAL_LIGHT_DISABLE = 0, + MATERIAL_LIGHT_POINT, + MATERIAL_LIGHT_DIRECTIONAL, + MATERIAL_LIGHT_SPOT, +}; + +enum LightType_OptimizationFlags_t +{ + LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 = 1, + LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 = 2, + LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 = 4, + LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED = 8, +}; + +struct LightDesc_t +{ + LightType_t m_Type; //< MATERIAL_LIGHT_xxx + Vector m_Color; //< color+intensity + Vector m_Position; //< light source center position + Vector m_Direction; //< for SPOT, direction it is pointing + float m_Range; //< distance range for light.0=infinite + float m_Falloff; //< angular falloff exponent for spot lights + float m_Attenuation0; //< constant distance falloff term + float m_Attenuation1; //< linear term of falloff + float m_Attenuation2; //< quadatic term of falloff + float m_Theta; //< inner cone angle. no angular falloff + //< within this cone + float m_Phi; //< outer cone angle + + // the values below are derived from the above settings for optimizations + // These aren't used by DX8. . used for software lighting. + float m_ThetaDot; + float m_PhiDot; + unsigned int m_Flags; +protected: + float OneOver_ThetaDot_Minus_PhiDot; + float m_RangeSquared; +public: + + void RecalculateDerivedValues(void); // calculate m_xxDot, m_Type for changed parms + + LightDesc_t(void) + { + } + + // constructors for various useful subtypes + + // a point light with infinite range + LightDesc_t( const Vector &pos, const Vector &color ) + { + InitPoint( pos, color ); + } + + /// a simple light. cone boundaries in radians. you pass a look_at point and the + /// direciton is derived from that. + LightDesc_t( const Vector &pos, const Vector &color, const Vector &point_at, + float inner_cone_boundary, float outer_cone_boundary ) + { + InitSpot( pos, color, point_at, inner_cone_boundary, outer_cone_boundary ); + } + + void InitPoint( const Vector &pos, const Vector &color ); + void InitDirectional( const Vector &dir, const Vector &color ); + void InitSpot(const Vector &pos, const Vector &color, const Vector &point_at, + float inner_cone_boundary, float outer_cone_boundary ); + + /// Given 4 points and 4 normals, ADD lighting from this light into "color". + void ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal, + FourVectors &color, bool DoHalfLambert=false ) const; + void ComputeNonincidenceLightAtPoints( const FourVectors &pos, FourVectors &color ) const; + void ComputeLightAtPointsForDirectional( const FourVectors &pos, + const FourVectors &normal, + FourVectors &color, bool DoHalfLambert=false ) const; + + // warning - modifies color!!! set color first!! + void SetupOldStyleAttenuation( float fQuadatricAttn, float fLinearAttn, float fConstantAttn ); + + void SetupNewStyleAttenuation( float fFiftyPercentDistance, float fZeroPercentDistance ); + + +/// given a direction relative to the light source position, is this ray within the + /// light cone (for spotlights..non spots consider all rays to be within their cone) + bool IsDirectionWithinLightCone(const Vector &rdir) const + { + return ((m_Type!=MATERIAL_LIGHT_SPOT) || (rdir.Dot(m_Direction)>=m_PhiDot)); + } + + float OneOverThetaDotMinusPhiDot() const + { + return OneOver_ThetaDot_Minus_PhiDot; + } +}; + + +//----------------------------------------------------------------------------- +// a point light with infinite range +//----------------------------------------------------------------------------- +inline void LightDesc_t::InitPoint( const Vector &pos, const Vector &color ) +{ + m_Type=MATERIAL_LIGHT_POINT; + m_Color=color; + m_Position=pos; + m_Range=0.0; // infinite + m_Attenuation0=1.0; + m_Attenuation1=0; + m_Attenuation2=0; + RecalculateDerivedValues(); +} + + +//----------------------------------------------------------------------------- +// a directional light with infinite range +//----------------------------------------------------------------------------- +inline void LightDesc_t::InitDirectional( const Vector &dir, const Vector &color ) +{ + m_Type=MATERIAL_LIGHT_DIRECTIONAL; + m_Color=color; + m_Direction=dir; + m_Range=0.0; // infinite + m_Attenuation0=1.0; + m_Attenuation1=0; + m_Attenuation2=0; + RecalculateDerivedValues(); +} + + +//----------------------------------------------------------------------------- +// a simple light. cone boundaries in radians. you pass a look_at point and the +// direciton is derived from that. +//----------------------------------------------------------------------------- +inline void LightDesc_t::InitSpot(const Vector &pos, const Vector &color, const Vector &point_at, + float inner_cone_boundary, float outer_cone_boundary) +{ + m_Type=MATERIAL_LIGHT_SPOT; + m_Color=color; + m_Position=pos; + m_Direction=point_at; + m_Direction-=pos; + VectorNormalizeFast(m_Direction); + m_Falloff=5.0; // linear angle falloff + m_Theta=inner_cone_boundary; + m_Phi=outer_cone_boundary; + + m_Range=0.0; // infinite + + m_Attenuation0=1.0; + m_Attenuation1=0; + m_Attenuation2=0; + RecalculateDerivedValues(); +} + + +#endif + diff --git a/mp/src/public/mathlib/math_pfns.h b/mp/src/public/mathlib/math_pfns.h index 4436eab5..d43411ce 100644 --- a/mp/src/public/mathlib/math_pfns.h +++ b/mp/src/public/mathlib/math_pfns.h @@ -1,80 +1,80 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=====================================================================================//
-
-#ifndef _MATH_PFNS_H_
-#define _MATH_PFNS_H_
-
-#if defined( _X360 )
-#include <xboxmath.h>
-#endif
-
-#if !defined( _X360 )
-
-// These globals are initialized by mathlib and redirected based on available fpu features
-extern float (*pfSqrt)(float x);
-extern float (*pfRSqrt)(float x);
-extern float (*pfRSqrtFast)(float x);
-extern void (*pfFastSinCos)(float x, float *s, float *c);
-extern float (*pfFastCos)(float x);
-
-// The following are not declared as macros because they are often used in limiting situations,
-// and sometimes the compiler simply refuses to inline them for some reason
-#define FastSqrt(x) (*pfSqrt)(x)
-#define FastRSqrt(x) (*pfRSqrt)(x)
-#define FastRSqrtFast(x) (*pfRSqrtFast)(x)
-#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c)
-#define FastCos(x) (*pfFastCos)(x)
-
-#if defined(__i386__) || defined(_M_IX86)
-// On x86, the inline FPU or SSE sqrt instruction is faster than
-// the overhead of setting up a function call and saving/restoring
-// the FPU or SSE register state and can be scheduled better, too.
-#undef FastSqrt
-#define FastSqrt(x) ::sqrtf(x)
-#endif
-
-#endif // !_X360
-
-#if defined( _X360 )
-
-FORCEINLINE float _VMX_Sqrt( float x )
-{
- return __fsqrts( x );
-}
-
-FORCEINLINE float _VMX_RSqrt( float x )
-{
- float rroot = __frsqrte( x );
-
- // Single iteration NewtonRaphson on reciprocal square root estimate
- return (0.5f * rroot) * (3.0f - (x * rroot) * rroot);
-}
-
-FORCEINLINE float _VMX_RSqrtFast( float x )
-{
- return __frsqrte( x );
-}
-
-FORCEINLINE void _VMX_SinCos( float a, float *pS, float *pC )
-{
- XMScalarSinCos( pS, pC, a );
-}
-
-FORCEINLINE float _VMX_Cos( float a )
-{
- return XMScalarCos( a );
-}
-
-// the 360 has fixed hw and calls directly
-#define FastSqrt(x) _VMX_Sqrt(x)
-#define FastRSqrt(x) _VMX_RSqrt(x)
-#define FastRSqrtFast(x) _VMX_RSqrtFast(x)
-#define FastSinCos(x,s,c) _VMX_SinCos(x,s,c)
-#define FastCos(x) _VMX_Cos(x)
-
-#endif // _X360
-
-#endif // _MATH_PFNS_H_
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=====================================================================================// + +#ifndef _MATH_PFNS_H_ +#define _MATH_PFNS_H_ + +#if defined( _X360 ) +#include <xboxmath.h> +#endif + +#if !defined( _X360 ) + +// These globals are initialized by mathlib and redirected based on available fpu features +extern float (*pfSqrt)(float x); +extern float (*pfRSqrt)(float x); +extern float (*pfRSqrtFast)(float x); +extern void (*pfFastSinCos)(float x, float *s, float *c); +extern float (*pfFastCos)(float x); + +// The following are not declared as macros because they are often used in limiting situations, +// and sometimes the compiler simply refuses to inline them for some reason +#define FastSqrt(x) (*pfSqrt)(x) +#define FastRSqrt(x) (*pfRSqrt)(x) +#define FastRSqrtFast(x) (*pfRSqrtFast)(x) +#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c) +#define FastCos(x) (*pfFastCos)(x) + +#if defined(__i386__) || defined(_M_IX86) +// On x86, the inline FPU or SSE sqrt instruction is faster than +// the overhead of setting up a function call and saving/restoring +// the FPU or SSE register state and can be scheduled better, too. +#undef FastSqrt +#define FastSqrt(x) ::sqrtf(x) +#endif + +#endif // !_X360 + +#if defined( _X360 ) + +FORCEINLINE float _VMX_Sqrt( float x ) +{ + return __fsqrts( x ); +} + +FORCEINLINE float _VMX_RSqrt( float x ) +{ + float rroot = __frsqrte( x ); + + // Single iteration NewtonRaphson on reciprocal square root estimate + return (0.5f * rroot) * (3.0f - (x * rroot) * rroot); +} + +FORCEINLINE float _VMX_RSqrtFast( float x ) +{ + return __frsqrte( x ); +} + +FORCEINLINE void _VMX_SinCos( float a, float *pS, float *pC ) +{ + XMScalarSinCos( pS, pC, a ); +} + +FORCEINLINE float _VMX_Cos( float a ) +{ + return XMScalarCos( a ); +} + +// the 360 has fixed hw and calls directly +#define FastSqrt(x) _VMX_Sqrt(x) +#define FastRSqrt(x) _VMX_RSqrt(x) +#define FastRSqrtFast(x) _VMX_RSqrtFast(x) +#define FastSinCos(x,s,c) _VMX_SinCos(x,s,c) +#define FastCos(x) _VMX_Cos(x) + +#endif // _X360 + +#endif // _MATH_PFNS_H_ diff --git a/mp/src/public/mathlib/mathlib.h b/mp/src/public/mathlib/mathlib.h index e1873cd0..f734ae68 100644 --- a/mp/src/public/mathlib/mathlib.h +++ b/mp/src/public/mathlib/mathlib.h @@ -1,2186 +1,2186 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//===========================================================================//
-
-#ifndef MATH_LIB_H
-#define MATH_LIB_H
-
-#include <math.h>
-#include "tier0/basetypes.h"
-#include "tier0/commonmacros.h"
-#include "mathlib/vector.h"
-#include "mathlib/vector2d.h"
-#include "tier0/dbg.h"
-
-#include "mathlib/math_pfns.h"
-
-#if defined(__i386__) || defined(_M_IX86)
-// For MMX intrinsics
-#include <xmmintrin.h>
-#endif
-
-// XXX remove me
-#undef clamp
-
-// Uncomment this to enable FP exceptions in parts of the code.
-// This can help track down FP bugs. However the code is not
-// FP exception clean so this not a turnkey operation.
-//#define FP_EXCEPTIONS_ENABLED
-
-
-#ifdef FP_EXCEPTIONS_ENABLED
-#include <float.h> // For _clearfp and _controlfp_s
-#endif
-
-// FPExceptionDisabler and FPExceptionEnabler taken from my blog post
-// at http://www.altdevblogaday.com/2012/04/20/exceptional-floating-point/
-
-// Declare an object of this type in a scope in order to suppress
-// all floating-point exceptions temporarily. The old exception
-// state will be reset at the end.
-class FPExceptionDisabler
-{
-public:
-#ifdef FP_EXCEPTIONS_ENABLED
- FPExceptionDisabler();
- ~FPExceptionDisabler();
-
-private:
- unsigned int mOldValues;
-#else
- FPExceptionDisabler() {}
- ~FPExceptionDisabler() {}
-#endif
-
-private:
- // Make the copy constructor and assignment operator private
- // and unimplemented to prohibit copying.
- FPExceptionDisabler(const FPExceptionDisabler&);
- FPExceptionDisabler& operator=(const FPExceptionDisabler&);
-};
-
-// Declare an object of this type in a scope in order to enable a
-// specified set of floating-point exceptions temporarily. The old
-// exception state will be reset at the end.
-// This class can be nested.
-class FPExceptionEnabler
-{
-public:
- // Overflow, divide-by-zero, and invalid-operation are the FP
- // exceptions most frequently associated with bugs.
-#ifdef FP_EXCEPTIONS_ENABLED
- FPExceptionEnabler(unsigned int enableBits = _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID);
- ~FPExceptionEnabler();
-
-private:
- unsigned int mOldValues;
-#else
- FPExceptionEnabler(unsigned int enableBits = 0)
- {
- }
- ~FPExceptionEnabler()
- {
- }
-#endif
-
-private:
- // Make the copy constructor and assignment operator private
- // and unimplemented to prohibit copying.
- FPExceptionEnabler(const FPExceptionEnabler&);
- FPExceptionEnabler& operator=(const FPExceptionEnabler&);
-};
-
-
-
-#ifdef DEBUG // stop crashing edit-and-continue
-FORCEINLINE float clamp( float val, float minVal, float maxVal )
-{
- if ( maxVal < minVal )
- return maxVal;
- else if( val < minVal )
- return minVal;
- else if( val > maxVal )
- return maxVal;
- else
- return val;
-}
-#else // DEBUG
-FORCEINLINE float clamp( float val, float minVal, float maxVal )
-{
-#if defined(__i386__) || defined(_M_IX86)
- _mm_store_ss( &val,
- _mm_min_ss(
- _mm_max_ss(
- _mm_load_ss(&val),
- _mm_load_ss(&minVal) ),
- _mm_load_ss(&maxVal) ) );
-#else
- val = fpmax(minVal, val);
- val = fpmin(maxVal, val);
-#endif
- return val;
-}
-#endif // DEBUG
-
-//
-// Returns a clamped value in the range [min, max].
-//
-template< class T >
-inline T clamp( T const &val, T const &minVal, T const &maxVal )
-{
- if ( maxVal < minVal )
- return maxVal;
- else if( val < minVal )
- return minVal;
- else if( val > maxVal )
- return maxVal;
- else
- return val;
-}
-
-
-// plane_t structure
-// !!! if this is changed, it must be changed in asm code too !!!
-// FIXME: does the asm code even exist anymore?
-// FIXME: this should move to a different file
-struct cplane_t
-{
- Vector normal;
- float dist;
- byte type; // for fast side tests
- byte signbits; // signx + (signy<<1) + (signz<<1)
- byte pad[2];
-
-#ifdef VECTOR_NO_SLOW_OPERATIONS
- cplane_t() {}
-
-private:
- // No copy constructors allowed if we're in optimal mode
- cplane_t(const cplane_t& vOther);
-#endif
-};
-
-// structure offset for asm code
-#define CPLANE_NORMAL_X 0
-#define CPLANE_NORMAL_Y 4
-#define CPLANE_NORMAL_Z 8
-#define CPLANE_DIST 12
-#define CPLANE_TYPE 16
-#define CPLANE_SIGNBITS 17
-#define CPLANE_PAD0 18
-#define CPLANE_PAD1 19
-
-// 0-2 are axial planes
-#define PLANE_X 0
-#define PLANE_Y 1
-#define PLANE_Z 2
-
-// 3-5 are non-axial planes snapped to the nearest
-#define PLANE_ANYX 3
-#define PLANE_ANYY 4
-#define PLANE_ANYZ 5
-
-
-//-----------------------------------------------------------------------------
-// Frustum plane indices.
-// WARNING: there is code that depends on these values
-//-----------------------------------------------------------------------------
-
-enum
-{
- FRUSTUM_RIGHT = 0,
- FRUSTUM_LEFT = 1,
- FRUSTUM_TOP = 2,
- FRUSTUM_BOTTOM = 3,
- FRUSTUM_NEARZ = 4,
- FRUSTUM_FARZ = 5,
- FRUSTUM_NUMPLANES = 6
-};
-
-extern int SignbitsForPlane( cplane_t *out );
-
-class Frustum_t
-{
-public:
- void SetPlane( int i, int nType, const Vector &vecNormal, float dist )
- {
- m_Plane[i].normal = vecNormal;
- m_Plane[i].dist = dist;
- m_Plane[i].type = nType;
- m_Plane[i].signbits = SignbitsForPlane( &m_Plane[i] );
- m_AbsNormal[i].Init( fabs(vecNormal.x), fabs(vecNormal.y), fabs(vecNormal.z) );
- }
-
- inline const cplane_t *GetPlane( int i ) const { return &m_Plane[i]; }
- inline const Vector &GetAbsNormal( int i ) const { return m_AbsNormal[i]; }
-
-private:
- cplane_t m_Plane[FRUSTUM_NUMPLANES];
- Vector m_AbsNormal[FRUSTUM_NUMPLANES];
-};
-
-// Computes Y fov from an X fov and a screen aspect ratio + X from Y
-float CalcFovY( float flFovX, float flScreenAspect );
-float CalcFovX( float flFovY, float flScreenAspect );
-
-// Generate a frustum based on perspective view parameters
-// NOTE: FOV is specified in degrees, as the *full* view angle (not half-angle)
-void GeneratePerspectiveFrustum( const Vector& origin, const QAngle &angles, float flZNear, float flZFar, float flFovX, float flAspectRatio, Frustum_t &frustum );
-void GeneratePerspectiveFrustum( const Vector& origin, const Vector &forward, const Vector &right, const Vector &up, float flZNear, float flZFar, float flFovX, float flFovY, Frustum_t &frustum );
-
-// Cull the world-space bounding box to the specified frustum.
-bool R_CullBox( const Vector& mins, const Vector& maxs, const Frustum_t &frustum );
-bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t &frustum );
-
-struct matrix3x4_t
-{
- matrix3x4_t() {}
- matrix3x4_t(
- float m00, float m01, float m02, float m03,
- float m10, float m11, float m12, float m13,
- float m20, float m21, float m22, float m23 )
- {
- m_flMatVal[0][0] = m00; m_flMatVal[0][1] = m01; m_flMatVal[0][2] = m02; m_flMatVal[0][3] = m03;
- m_flMatVal[1][0] = m10; m_flMatVal[1][1] = m11; m_flMatVal[1][2] = m12; m_flMatVal[1][3] = m13;
- m_flMatVal[2][0] = m20; m_flMatVal[2][1] = m21; m_flMatVal[2][2] = m22; m_flMatVal[2][3] = m23;
- }
-
- //-----------------------------------------------------------------------------
- // Creates a matrix where the X axis = forward
- // the Y axis = left, and the Z axis = up
- //-----------------------------------------------------------------------------
- void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
- {
- m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x;
- m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y;
- m_flMatVal[2][0] = xAxis.z; m_flMatVal[2][1] = yAxis.z; m_flMatVal[2][2] = zAxis.z; m_flMatVal[2][3] = vecOrigin.z;
- }
-
- //-----------------------------------------------------------------------------
- // Creates a matrix where the X axis = forward
- // the Y axis = left, and the Z axis = up
- //-----------------------------------------------------------------------------
- matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin )
- {
- Init( xAxis, yAxis, zAxis, vecOrigin );
- }
-
- inline void Invalidate( void )
- {
- for (int i = 0; i < 3; i++)
- {
- for (int j = 0; j < 4; j++)
- {
- m_flMatVal[i][j] = VEC_T_NAN;
- }
- }
- }
-
- float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
- const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; }
- float *Base() { return &m_flMatVal[0][0]; }
- const float *Base() const { return &m_flMatVal[0][0]; }
-
- float m_flMatVal[3][4];
-};
-
-
-#ifndef M_PI
- #define M_PI 3.14159265358979323846 // matches value in gcc v2 math.h
-#endif
-
-#define M_PI_F ((float)(M_PI)) // Shouldn't collide with anything.
-
-// NJS: Inlined to prevent floats from being autopromoted to doubles, as with the old system.
-#ifndef RAD2DEG
- #define RAD2DEG( x ) ( (float)(x) * (float)(180.f / M_PI_F) )
-#endif
-
-#ifndef DEG2RAD
- #define DEG2RAD( x ) ( (float)(x) * (float)(M_PI_F / 180.f) )
-#endif
-
-// Used to represent sides of things like planes.
-#define SIDE_FRONT 0
-#define SIDE_BACK 1
-#define SIDE_ON 2
-#define SIDE_CROSS -2 // necessary for polylib.c
-
-#define ON_VIS_EPSILON 0.01 // necessary for vvis (flow.c) -- again look into moving later!
-#define EQUAL_EPSILON 0.001 // necessary for vbsp (faces.c) -- should look into moving it there?
-
-extern bool s_bMathlibInitialized;
-
-extern const Vector vec3_origin;
-extern const QAngle vec3_angle;
-extern const Quaternion quat_identity;
-extern const Vector vec3_invalid;
-extern const int nanmask;
-
-#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask)
-
-FORCEINLINE vec_t DotProduct(const vec_t *v1, const vec_t *v2)
-{
- return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
-}
-FORCEINLINE void VectorSubtract(const vec_t *a, const vec_t *b, vec_t *c)
-{
- c[0]=a[0]-b[0];
- c[1]=a[1]-b[1];
- c[2]=a[2]-b[2];
-}
-FORCEINLINE void VectorAdd(const vec_t *a, const vec_t *b, vec_t *c)
-{
- c[0]=a[0]+b[0];
- c[1]=a[1]+b[1];
- c[2]=a[2]+b[2];
-}
-FORCEINLINE void VectorCopy(const vec_t *a, vec_t *b)
-{
- b[0]=a[0];
- b[1]=a[1];
- b[2]=a[2];
-}
-FORCEINLINE void VectorClear(vec_t *a)
-{
- a[0]=a[1]=a[2]=0;
-}
-
-FORCEINLINE float VectorMaximum(const vec_t *v)
-{
- return max( v[0], max( v[1], v[2] ) );
-}
-
-FORCEINLINE float VectorMaximum(const Vector& v)
-{
- return max( v.x, max( v.y, v.z ) );
-}
-
-FORCEINLINE void VectorScale (const float* in, vec_t scale, float* out)
-{
- out[0] = in[0]*scale;
- out[1] = in[1]*scale;
- out[2] = in[2]*scale;
-}
-
-
-// Cannot be forceinline as they have overloads:
-inline void VectorFill(vec_t *a, float b)
-{
- a[0]=a[1]=a[2]=b;
-}
-
-inline void VectorNegate(vec_t *a)
-{
- a[0]=-a[0];
- a[1]=-a[1];
- a[2]=-a[2];
-}
-
-
-//#define VectorMaximum(a) ( max( (a)[0], max( (a)[1], (a)[2] ) ) )
-#define Vector2Clear(x) {(x)[0]=(x)[1]=0;}
-#define Vector2Negate(x) {(x)[0]=-((x)[0]);(x)[1]=-((x)[1]);}
-#define Vector2Copy(a,b) {(b)[0]=(a)[0];(b)[1]=(a)[1];}
-#define Vector2Subtract(a,b,c) {(c)[0]=(a)[0]-(b)[0];(c)[1]=(a)[1]-(b)[1];}
-#define Vector2Add(a,b,c) {(c)[0]=(a)[0]+(b)[0];(c)[1]=(a)[1]+(b)[1];}
-#define Vector2Scale(a,b,c) {(c)[0]=(b)*(a)[0];(c)[1]=(b)*(a)[1];}
-
-// NJS: Some functions in VBSP still need to use these for dealing with mixing vec4's and shorts with vec_t's.
-// remove when no longer needed.
-#define VECTOR_COPY( A, B ) do { (B)[0] = (A)[0]; (B)[1] = (A)[1]; (B)[2]=(A)[2]; } while(0)
-#define DOT_PRODUCT( A, B ) ( (A)[0]*(B)[0] + (A)[1]*(B)[1] + (A)[2]*(B)[2] )
-
-FORCEINLINE void VectorMAInline( const float* start, float scale, const float* direction, float* dest )
-{
- dest[0]=start[0]+direction[0]*scale;
- dest[1]=start[1]+direction[1]*scale;
- dest[2]=start[2]+direction[2]*scale;
-}
-
-FORCEINLINE void VectorMAInline( const Vector& start, float scale, const Vector& direction, Vector& dest )
-{
- dest.x=start.x+direction.x*scale;
- dest.y=start.y+direction.y*scale;
- dest.z=start.z+direction.z*scale;
-}
-
-FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest )
-{
- VectorMAInline(start, scale, direction, dest);
-}
-
-FORCEINLINE void VectorMA( const float * start, float scale, const float *direction, float *dest )
-{
- VectorMAInline(start, scale, direction, dest);
-}
-
-
-int VectorCompare (const float *v1, const float *v2);
-
-inline float VectorLength(const float *v)
-{
- return FastSqrt( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] + FLT_EPSILON );
-}
-
-void CrossProduct (const float *v1, const float *v2, float *cross);
-
-qboolean VectorsEqual( const float *v1, const float *v2 );
-
-inline vec_t RoundInt (vec_t in)
-{
- return floor(in + 0.5f);
-}
-
-int Q_log2(int val);
-
-// Math routines done in optimized assembly math package routines
-void inline SinCos( float radians, float *sine, float *cosine )
-{
-#if defined( _X360 )
- XMScalarSinCos( sine, cosine, radians );
-#elif defined( PLATFORM_WINDOWS_PC32 )
- _asm
- {
- fld DWORD PTR [radians]
- fsincos
-
- mov edx, DWORD PTR [cosine]
- mov eax, DWORD PTR [sine]
-
- fstp DWORD PTR [edx]
- fstp DWORD PTR [eax]
- }
-#elif defined( PLATFORM_WINDOWS_PC64 )
- *sine = sin( radians );
- *cosine = cos( radians );
-#elif defined( POSIX )
- register double __cosr, __sinr;
- __asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians));
-
- *sine = __sinr;
- *cosine = __cosr;
-#endif
-}
-
-#define SIN_TABLE_SIZE 256
-#define FTOIBIAS 12582912.f
-extern float SinCosTable[SIN_TABLE_SIZE];
-
-inline float TableCos( float theta )
-{
- union
- {
- int i;
- float f;
- } ftmp;
-
- // ideally, the following should compile down to: theta * constant + constant, changing any of these constants from defines sometimes fubars this.
- ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + ( FTOIBIAS + ( SIN_TABLE_SIZE / 4 ) );
- return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ];
-}
-
-inline float TableSin( float theta )
-{
- union
- {
- int i;
- float f;
- } ftmp;
-
- // ideally, the following should compile down to: theta * constant + constant
- ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + FTOIBIAS;
- return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ];
-}
-
-template<class T>
-FORCEINLINE T Square( T const &a )
-{
- return a * a;
-}
-
-
-// return the smallest power of two >= x.
-// returns 0 if x == 0 or x > 0x80000000 (ie numbers that would be negative if x was signed)
-// NOTE: the old code took an int, and if you pass in an int of 0x80000000 casted to a uint,
-// you'll get 0x80000000, which is correct for uints, instead of 0, which was correct for ints
-FORCEINLINE uint SmallestPowerOfTwoGreaterOrEqual( uint x )
-{
- x -= 1;
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
- return x + 1;
-}
-
-// return the largest power of two <= x. Will return 0 if passed 0
-FORCEINLINE uint LargestPowerOfTwoLessThanOrEqual( uint x )
-{
- if ( x >= 0x80000000 )
- return 0x80000000;
-
- return SmallestPowerOfTwoGreaterOrEqual( x + 1 ) >> 1;
-}
-
-
-// Math routines for optimizing division
-void FloorDivMod (double numer, double denom, int *quotient, int *rem);
-int GreatestCommonDivisor (int i1, int i2);
-
-// Test for FPU denormal mode
-bool IsDenormal( const float &val );
-
-// MOVEMENT INFO
-enum
-{
- PITCH = 0, // up / down
- YAW, // left / right
- ROLL // fall over
-};
-
-void MatrixAngles( const matrix3x4_t & matrix, float *angles ); // !!!!
-void MatrixVectors( const matrix3x4_t &matrix, Vector* pForward, Vector *pRight, Vector *pUp );
-void VectorTransform (const float *in1, const matrix3x4_t & in2, float *out);
-void VectorITransform (const float *in1, const matrix3x4_t & in2, float *out);
-void VectorRotate( const float *in1, const matrix3x4_t & in2, float *out);
-void VectorRotate( const Vector &in1, const QAngle &in2, Vector &out );
-void VectorRotate( const Vector &in1, const Quaternion &in2, Vector &out );
-void VectorIRotate( const float *in1, const matrix3x4_t & in2, float *out);
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-QAngle TransformAnglesToLocalSpace( const QAngle &angles, const matrix3x4_t &parentMatrix );
-QAngle TransformAnglesToWorldSpace( const QAngle &angles, const matrix3x4_t &parentMatrix );
-
-#endif
-
-void MatrixInitialize( matrix3x4_t &mat, const Vector &vecOrigin, const Vector &vecXAxis, const Vector &vecYAxis, const Vector &vecZAxis );
-void MatrixCopy( const matrix3x4_t &in, matrix3x4_t &out );
-void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out );
-
-// Matrix equality test
-bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 );
-
-void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out );
-void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out );
-
-inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out )
-{
- MatrixGetColumn ( in, 3, out );
-}
-
-inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out )
-{
- MatrixSetColumn ( in, 3, out );
-}
-
-void MatrixScaleBy ( const float flScale, matrix3x4_t &out );
-void MatrixScaleByZero ( matrix3x4_t &out );
-
-//void DecomposeRotation( const matrix3x4_t &mat, float *out );
-void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
-void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out);
-
-// For identical interface w/ VMatrix
-inline void MatrixMultiply ( const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out )
-{
- ConcatTransforms( in1, in2, out );
-}
-
-void QuaternionSlerp( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
-void QuaternionSlerpNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
-void QuaternionBlend( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
-void QuaternionBlendNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt );
-void QuaternionIdentityBlend( const Quaternion &p, float t, Quaternion &qt );
-float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q );
-void QuaternionScale( const Quaternion &p, float t, Quaternion &q );
-void QuaternionAlign( const Quaternion &p, const Quaternion &q, Quaternion &qt );
-float QuaternionDotProduct( const Quaternion &p, const Quaternion &q );
-void QuaternionConjugate( const Quaternion &p, Quaternion &q );
-void QuaternionInvert( const Quaternion &p, Quaternion &q );
-float QuaternionNormalize( Quaternion &q );
-void QuaternionAdd( const Quaternion &p, const Quaternion &q, Quaternion &qt );
-void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt );
-void QuaternionMatrix( const Quaternion &q, matrix3x4_t &matrix );
-void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t &matrix );
-void QuaternionAngles( const Quaternion &q, QAngle &angles );
-void AngleQuaternion( const QAngle& angles, Quaternion &qt );
-void QuaternionAngles( const Quaternion &q, RadianEuler &angles );
-void AngleQuaternion( RadianEuler const &angles, Quaternion &qt );
-void QuaternionAxisAngle( const Quaternion &q, Vector &axis, float &angle );
-void AxisAngleQuaternion( const Vector &axis, float angle, Quaternion &q );
-void BasisToQuaternion( const Vector &vecForward, const Vector &vecRight, const Vector &vecUp, Quaternion &q );
-void MatrixQuaternion( const matrix3x4_t &mat, Quaternion &q );
-
-// A couple methods to find the dot product of a vector with a matrix row or column...
-inline float MatrixRowDotProduct( const matrix3x4_t &in1, int row, const Vector& in2 )
-{
- Assert( (row >= 0) && (row < 3) );
- return DotProduct( in1[row], in2.Base() );
-}
-
-inline float MatrixColumnDotProduct( const matrix3x4_t &in1, int col, const Vector& in2 )
-{
- Assert( (col >= 0) && (col < 4) );
- return in1[0][col] * in2[0] + in1[1][col] * in2[1] + in1[2][col] * in2[2];
-}
-
-int __cdecl BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *plane);
-
-inline float anglemod(float a)
-{
- a = (360.f/65536) * ((int)(a*(65536.f/360.0f)) & 65535);
- return a;
-}
-
-// Remap a value in the range [A,B] to [C,D].
-inline float RemapVal( float val, float A, float B, float C, float D)
-{
- if ( A == B )
- return val >= B ? D : C;
- return C + (D - C) * (val - A) / (B - A);
-}
-
-inline float RemapValClamped( float val, float A, float B, float C, float D)
-{
- if ( A == B )
- return val >= B ? D : C;
- float cVal = (val - A) / (B - A);
- cVal = clamp( cVal, 0.0f, 1.0f );
-
- return C + (D - C) * cVal;
-}
-
-// Returns A + (B-A)*flPercent.
-// float Lerp( float flPercent, float A, float B );
-template <class T>
-FORCEINLINE T Lerp( float flPercent, T const &A, T const &B )
-{
- return A + (B - A) * flPercent;
-}
-
-FORCEINLINE float Sqr( float f )
-{
- return f*f;
-}
-
-// 5-argument floating point linear interpolation.
-// FLerp(f1,f2,i1,i2,x)=
-// f1 at x=i1
-// f2 at x=i2
-// smooth lerp between f1 and f2 at x>i1 and x<i2
-// extrapolation for x<i1 or x>i2
-//
-// If you know a function f(x)'s value (f1) at position i1, and its value (f2) at position i2,
-// the function can be linearly interpolated with FLerp(f1,f2,i1,i2,x)
-// i2=i1 will cause a divide by zero.
-static inline float FLerp(float f1, float f2, float i1, float i2, float x)
-{
- return f1+(f2-f1)*(x-i1)/(i2-i1);
-}
-
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-// YWB: Specialization for interpolating euler angles via quaternions...
-template<> FORCEINLINE QAngle Lerp<QAngle>( float flPercent, const QAngle& q1, const QAngle& q2 )
-{
- // Avoid precision errors
- if ( q1 == q2 )
- return q1;
-
- Quaternion src, dest;
-
- // Convert to quaternions
- AngleQuaternion( q1, src );
- AngleQuaternion( q2, dest );
-
- Quaternion result;
-
- // Slerp
- QuaternionSlerp( src, dest, flPercent, result );
-
- // Convert to euler
- QAngle output;
- QuaternionAngles( result, output );
- return output;
-}
-
-#else
-
-#pragma error
-
-// NOTE NOTE: I haven't tested this!! It may not work! Check out interpolatedvar.cpp in the client dll to try it
-template<> FORCEINLINE QAngleByValue Lerp<QAngleByValue>( float flPercent, const QAngleByValue& q1, const QAngleByValue& q2 )
-{
- // Avoid precision errors
- if ( q1 == q2 )
- return q1;
-
- Quaternion src, dest;
-
- // Convert to quaternions
- AngleQuaternion( q1, src );
- AngleQuaternion( q2, dest );
-
- Quaternion result;
-
- // Slerp
- QuaternionSlerp( src, dest, flPercent, result );
-
- // Convert to euler
- QAngleByValue output;
- QuaternionAngles( result, output );
- return output;
-}
-
-#endif // VECTOR_NO_SLOW_OPERATIONS
-
-
-/// Same as swap(), but won't cause problems with std::swap
-template <class T>
-FORCEINLINE void V_swap( T& x, T& y )
-{
- T temp = x;
- x = y;
- y = temp;
-}
-
-template <class T> FORCEINLINE T AVG(T a, T b)
-{
- return (a+b)/2;
-}
-
-// number of elements in an array of static size
-#define NELEMS(x) ARRAYSIZE(x)
-
-// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector));
-#define XYZ(v) (v).x,(v).y,(v).z
-
-
-inline float Sign( float x )
-{
- return (x <0.0f) ? -1.0f : 1.0f;
-}
-
-//
-// Clamps the input integer to the given array bounds.
-// Equivalent to the following, but without using any branches:
-//
-// if( n < 0 ) return 0;
-// else if ( n > maxindex ) return maxindex;
-// else return n;
-//
-// This is not always a clear performance win, but when you have situations where a clamped
-// value is thrashing against a boundary this is a big win. (ie, valid, invalid, valid, invalid, ...)
-//
-// Note: This code has been run against all possible integers.
-//
-inline int ClampArrayBounds( int n, unsigned maxindex )
-{
- // mask is 0 if less than 4096, 0xFFFFFFFF if greater than
- unsigned int inrangemask = 0xFFFFFFFF + (((unsigned) n) > maxindex );
- unsigned int lessthan0mask = 0xFFFFFFFF + ( n >= 0 );
-
- // If the result was valid, set the result, (otherwise sets zero)
- int result = (inrangemask & n);
-
- // if the result was out of range or zero.
- result |= ((~inrangemask) & (~lessthan0mask)) & maxindex;
-
- return result;
-}
-
-
-#define BOX_ON_PLANE_SIDE(emins, emaxs, p) \
- (((p)->type < 3)? \
- ( \
- ((p)->dist <= (emins)[(p)->type])? \
- 1 \
- : \
- ( \
- ((p)->dist >= (emaxs)[(p)->type])?\
- 2 \
- : \
- 3 \
- ) \
- ) \
- : \
- BoxOnPlaneSide( (emins), (emaxs), (p)))
-
-//-----------------------------------------------------------------------------
-// FIXME: Vector versions.... the float versions will go away hopefully soon!
-//-----------------------------------------------------------------------------
-
-void AngleVectors (const QAngle& angles, Vector *forward);
-void AngleVectors (const QAngle& angles, Vector *forward, Vector *right, Vector *up);
-void AngleVectorsTranspose (const QAngle& angles, Vector *forward, Vector *right, Vector *up);
-void AngleMatrix (const QAngle &angles, matrix3x4_t &mat );
-void AngleMatrix( const QAngle &angles, const Vector &position, matrix3x4_t &mat );
-void AngleMatrix (const RadianEuler &angles, matrix3x4_t &mat );
-void AngleMatrix( RadianEuler const &angles, const Vector &position, matrix3x4_t &mat );
-void AngleIMatrix (const QAngle &angles, matrix3x4_t &mat );
-void AngleIMatrix (const QAngle &angles, const Vector &position, matrix3x4_t &mat );
-void AngleIMatrix (const RadianEuler &angles, matrix3x4_t &mat );
-void VectorAngles( const Vector &forward, QAngle &angles );
-void VectorAngles( const Vector &forward, const Vector &pseudoup, QAngle &angles );
-void VectorMatrix( const Vector &forward, matrix3x4_t &mat );
-void VectorVectors( const Vector &forward, Vector &right, Vector &up );
-void SetIdentityMatrix( matrix3x4_t &mat );
-void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst );
-void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst );
-
-inline void SetScaleMatrix( float flScale, matrix3x4_t &dst )
-{
- SetScaleMatrix( flScale, flScale, flScale, dst );
-}
-
-inline void SetScaleMatrix( const Vector& scale, matrix3x4_t &dst )
-{
- SetScaleMatrix( scale.x, scale.y, scale.z, dst );
-}
-
-// Computes the inverse transpose
-void MatrixTranspose( matrix3x4_t& mat );
-void MatrixTranspose( const matrix3x4_t& src, matrix3x4_t& dst );
-void MatrixInverseTranspose( const matrix3x4_t& src, matrix3x4_t& dst );
-
-inline void PositionMatrix( const Vector &position, matrix3x4_t &mat )
-{
- MatrixSetColumn( position, 3, mat );
-}
-
-inline void MatrixPosition( const matrix3x4_t &matrix, Vector &position )
-{
- MatrixGetColumn( matrix, 3, position );
-}
-
-inline void VectorRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out)
-{
- VectorRotate( &in1.x, in2, &out.x );
-}
-
-inline void VectorIRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out)
-{
- VectorIRotate( &in1.x, in2, &out.x );
-}
-
-inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles )
-{
- MatrixAngles( matrix, &angles.x );
-}
-
-inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles, Vector &position )
-{
- MatrixAngles( matrix, angles );
- MatrixPosition( matrix, position );
-}
-
-inline void MatrixAngles( const matrix3x4_t &matrix, RadianEuler &angles )
-{
- MatrixAngles( matrix, &angles.x );
-
- angles.Init( DEG2RAD( angles.z ), DEG2RAD( angles.x ), DEG2RAD( angles.y ) );
-}
-
-void MatrixAngles( const matrix3x4_t &mat, RadianEuler &angles, Vector &position );
-
-void MatrixAngles( const matrix3x4_t &mat, Quaternion &q, Vector &position );
-
-inline int VectorCompare (const Vector& v1, const Vector& v2)
-{
- return v1 == v2;
-}
-
-inline void VectorTransform (const Vector& in1, const matrix3x4_t &in2, Vector &out)
-{
- VectorTransform( &in1.x, in2, &out.x );
-}
-
-inline void VectorITransform (const Vector& in1, const matrix3x4_t &in2, Vector &out)
-{
- VectorITransform( &in1.x, in2, &out.x );
-}
-
-/*
-inline void DecomposeRotation( const matrix3x4_t &mat, Vector &out )
-{
- DecomposeRotation( mat, &out.x );
-}
-*/
-
-inline int BoxOnPlaneSide (const Vector& emins, const Vector& emaxs, const cplane_t *plane )
-{
- return BoxOnPlaneSide( &emins.x, &emaxs.x, plane );
-}
-
-inline void VectorFill(Vector& a, float b)
-{
- a[0]=a[1]=a[2]=b;
-}
-
-inline void VectorNegate(Vector& a)
-{
- a[0] = -a[0];
- a[1] = -a[1];
- a[2] = -a[2];
-}
-
-inline vec_t VectorAvg(Vector& a)
-{
- return ( a[0] + a[1] + a[2] ) / 3;
-}
-
-//-----------------------------------------------------------------------------
-// Box/plane test (slow version)
-//-----------------------------------------------------------------------------
-inline int FASTCALL BoxOnPlaneSide2 (const Vector& emins, const Vector& emaxs, const cplane_t *p, float tolerance = 0.f )
-{
- Vector corners[2];
-
- if (p->normal[0] < 0)
- {
- corners[0][0] = emins[0];
- corners[1][0] = emaxs[0];
- }
- else
- {
- corners[1][0] = emins[0];
- corners[0][0] = emaxs[0];
- }
-
- if (p->normal[1] < 0)
- {
- corners[0][1] = emins[1];
- corners[1][1] = emaxs[1];
- }
- else
- {
- corners[1][1] = emins[1];
- corners[0][1] = emaxs[1];
- }
-
- if (p->normal[2] < 0)
- {
- corners[0][2] = emins[2];
- corners[1][2] = emaxs[2];
- }
- else
- {
- corners[1][2] = emins[2];
- corners[0][2] = emaxs[2];
- }
-
- int sides = 0;
-
- float dist1 = DotProduct (p->normal, corners[0]) - p->dist;
- if (dist1 >= tolerance)
- sides = 1;
-
- float dist2 = DotProduct (p->normal, corners[1]) - p->dist;
- if (dist2 < -tolerance)
- sides |= 2;
-
- return sides;
-}
-
-//-----------------------------------------------------------------------------
-// Helpers for bounding box construction
-//-----------------------------------------------------------------------------
-
-void ClearBounds (Vector& mins, Vector& maxs);
-void AddPointToBounds (const Vector& v, Vector& mins, Vector& maxs);
-
-//
-// COLORSPACE/GAMMA CONVERSION STUFF
-//
-void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright );
-
-// convert texture to linear 0..1 value
-inline float TexLightToLinear( int c, int exponent )
-{
- extern float power2_n[256];
- Assert( exponent >= -128 && exponent <= 127 );
- return ( float )c * power2_n[exponent+128];
-}
-
-
-// convert texture to linear 0..1 value
-int LinearToTexture( float f );
-// converts 0..1 linear value to screen gamma (0..255)
-int LinearToScreenGamma( float f );
-float TextureToLinear( int c );
-
-// compressed color format
-struct ColorRGBExp32
-{
- byte r, g, b;
- signed char exponent;
-};
-
-void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector& out );
-void VectorToColorRGBExp32( const Vector& v, ColorRGBExp32 &c );
-
-// solve for "x" where "a x^2 + b x + c = 0", return true if solution exists
-bool SolveQuadratic( float a, float b, float c, float &root1, float &root2 );
-
-// solves for "a, b, c" where "a x^2 + b x + c = y", return true if solution exists
-bool SolveInverseQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c );
-
-// solves for a,b,c specified as above, except that it always creates a monotonically increasing or
-// decreasing curve if the data is monotonically increasing or decreasing. In order to enforce the
-// monoticity condition, it is possible that the resulting quadratic will only approximate the data
-// instead of interpolating it. This code is not especially fast.
-bool SolveInverseQuadraticMonotonic( float x1, float y1, float x2, float y2,
- float x3, float y3, float &a, float &b, float &c );
-
-
-
-
-// solves for "a, b, c" where "1/(a x^2 + b x + c ) = y", return true if solution exists
-bool SolveInverseReciprocalQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c );
-
-// rotate a vector around the Z axis (YAW)
-void VectorYawRotate( const Vector& in, float flYaw, Vector &out);
-
-
-// Bias takes an X value between 0 and 1 and returns another value between 0 and 1
-// The curve is biased towards 0 or 1 based on biasAmt, which is between 0 and 1.
-// Lower values of biasAmt bias the curve towards 0 and higher values bias it towards 1.
-//
-// For example, with biasAmt = 0.2, the curve looks like this:
-//
-// 1
-// | *
-// | *
-// | *
-// | **
-// | **
-// | ****
-// |*********
-// |___________________
-// 0 1
-//
-//
-// With biasAmt = 0.8, the curve looks like this:
-//
-// 1
-// | **************
-// | **
-// | *
-// | *
-// |*
-// |*
-// |*
-// |___________________
-// 0 1
-//
-// With a biasAmt of 0.5, Bias returns X.
-float Bias( float x, float biasAmt );
-
-
-// Gain is similar to Bias, but biasAmt biases towards or away from 0.5.
-// Lower bias values bias towards 0.5 and higher bias values bias away from it.
-//
-// For example, with biasAmt = 0.2, the curve looks like this:
-//
-// 1
-// | *
-// | *
-// | **
-// | ***************
-// | **
-// | *
-// |*
-// |___________________
-// 0 1
-//
-//
-// With biasAmt = 0.8, the curve looks like this:
-//
-// 1
-// | *****
-// | ***
-// | *
-// | *
-// | *
-// | ***
-// |*****
-// |___________________
-// 0 1
-float Gain( float x, float biasAmt );
-
-
-// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave
-// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for
-// any fadein/fadeout effect where it should start and end smoothly.
-//
-// The curve looks like this:
-//
-// 1
-// | **
-// | * *
-// | * *
-// | * *
-// | * *
-// | ** **
-// |*** ***
-// |___________________
-// 0 1
-//
-float SmoothCurve( float x );
-
-
-// This works like SmoothCurve, with two changes:
-//
-// 1. Instead of the curve peaking at 0.5, it will peak at flPeakPos.
-// (So if you specify flPeakPos=0.2, then the peak will slide to the left).
-//
-// 2. flPeakSharpness is a 0-1 value controlling the sharpness of the peak.
-// Low values blunt the peak and high values sharpen the peak.
-float SmoothCurve_Tweak( float x, float flPeakPos=0.5, float flPeakSharpness=0.5 );
-
-
-//float ExponentialDecay( float halflife, float dt );
-//float ExponentialDecay( float decayTo, float decayTime, float dt );
-
-// halflife is time for value to reach 50%
-inline float ExponentialDecay( float halflife, float dt )
-{
- // log(0.5) == -0.69314718055994530941723212145818
- return expf( -0.69314718f / halflife * dt);
-}
-
-// decayTo is factor the value should decay to in decayTime
-inline float ExponentialDecay( float decayTo, float decayTime, float dt )
-{
- return expf( logf( decayTo ) / decayTime * dt);
-}
-
-// Get the integrated distanced traveled
-// decayTo is factor the value should decay to in decayTime
-// dt is the time relative to the last velocity update
-inline float ExponentialDecayIntegral( float decayTo, float decayTime, float dt )
-{
- return (powf( decayTo, dt / decayTime) * decayTime - decayTime) / logf( decayTo );
-}
-
-// hermite basis function for smooth interpolation
-// Similar to Gain() above, but very cheap to call
-// value should be between 0 & 1 inclusive
-inline float SimpleSpline( float value )
-{
- float valueSquared = value * value;
-
- // Nice little ease-in, ease-out spline-like curve
- return (3 * valueSquared - 2 * valueSquared * value);
-}
-
-// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
-// spline using SimpleSpline
-inline float SimpleSplineRemapVal( float val, float A, float B, float C, float D)
-{
- if ( A == B )
- return val >= B ? D : C;
- float cVal = (val - A) / (B - A);
- return C + (D - C) * SimpleSpline( cVal );
-}
-
-// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
-// spline using SimpleSpline
-inline float SimpleSplineRemapValClamped( float val, float A, float B, float C, float D )
-{
- if ( A == B )
- return val >= B ? D : C;
- float cVal = (val - A) / (B - A);
- cVal = clamp( cVal, 0.0f, 1.0f );
- return C + (D - C) * SimpleSpline( cVal );
-}
-
-FORCEINLINE int RoundFloatToInt(float f)
-{
-#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 )
- return _mm_cvtss_si32(_mm_load_ss(&f));
-#elif defined( _X360 )
-#ifdef Assert
- Assert( IsFPUControlWordSet() );
-#endif
- union
- {
- double flResult;
- int pResult[2];
- };
- flResult = __fctiw( f );
- return pResult[1];
-#else
-#error Unknown architecture
-#endif
-}
-
-FORCEINLINE unsigned char RoundFloatToByte(float f)
-{
- int nResult = RoundFloatToInt(f);
-#ifdef Assert
- Assert( (nResult & ~0xFF) == 0 );
-#endif
- return (unsigned char) nResult;
-}
-
-FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f)
-{
-#if defined( _X360 )
-#ifdef Assert
- Assert( IsFPUControlWordSet() );
-#endif
- union
- {
- double flResult;
- int pIntResult[2];
- unsigned long pResult[2];
- };
- flResult = __fctiw( f );
- Assert( pIntResult[1] >= 0 );
- return pResult[1];
-#else // !X360
-
-#if defined( PLATFORM_WINDOWS_PC64 )
- uint nRet = ( uint ) f;
- if ( nRet & 1 )
- {
- if ( ( f - floor( f ) >= 0.5 ) )
- {
- nRet++;
- }
- }
- else
- {
- if ( ( f - floor( f ) > 0.5 ) )
- {
- nRet++;
- }
- }
- return nRet;
-#else // PLATFORM_WINDOWS_PC64
- unsigned char nResult[8];
-
- #if defined( _WIN32 )
- __asm
- {
- fld f
- fistp qword ptr nResult
- }
- #elif POSIX
- __asm __volatile__ (
- "fistpl %0;": "=m" (nResult): "t" (f) : "st"
- );
- #endif
-
- return *((unsigned long*)nResult);
-#endif // PLATFORM_WINDOWS_PC64
-#endif // !X360
-}
-
-FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f )
-{
- return fabs( RoundFloatToInt( flValue ) - flValue ) < flTolerance;
-}
-
-// Fast, accurate ftol:
-FORCEINLINE int Float2Int( float a )
-{
-#if defined( _X360 )
- union
- {
- double flResult;
- int pResult[2];
- };
- flResult = __fctiwz( a );
- return pResult[1];
-#else // !X360
- // Rely on compiler to generate CVTTSS2SI on x86
- return (int) a;
-#endif
-}
-
-// Over 15x faster than: (int)floor(value)
-inline int Floor2Int( float a )
-{
- int RetVal;
-#if defined( __i386__ )
- // Convert to int and back, compare, subtract one if too big
- __m128 a128 = _mm_set_ss(a);
- RetVal = _mm_cvtss_si32(a128);
- __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
- RetVal -= _mm_comigt_ss( rounded128, a128 );
-#else
- RetVal = static_cast<int>( floor(a) );
-#endif
- return RetVal;
-}
-
-//-----------------------------------------------------------------------------
-// Fast color conversion from float to unsigned char
-//-----------------------------------------------------------------------------
-FORCEINLINE unsigned int FastFToC( float c )
-{
-#if defined( __i386__ )
- // IEEE float bit manipulation works for values between [0, 1<<23)
- union { float f; int i; } convert = { c*255.0f + (float)(1<<23) };
- return convert.i & 255;
-#else
- // consoles CPUs suffer from load-hit-store penalty
- return Float2Int( c * 255.0f );
-#endif
-}
-
-//-----------------------------------------------------------------------------
-// Fast conversion from float to integer with magnitude less than 2**22
-//-----------------------------------------------------------------------------
-FORCEINLINE int FastFloatToSmallInt( float c )
-{
-#if defined( __i386__ )
- // IEEE float bit manipulation works for values between [-1<<22, 1<<22)
- union { float f; int i; } convert = { c + (float)(3<<22) };
- return (convert.i & ((1<<23)-1)) - (1<<22);
-#else
- // consoles CPUs suffer from load-hit-store penalty
- return Float2Int( c );
-#endif
-}
-
-//-----------------------------------------------------------------------------
-// Purpose: Bound input float to .001 (millisecond) boundary
-// Input : in -
-// Output : inline float
-//-----------------------------------------------------------------------------
-inline float ClampToMsec( float in )
-{
- int msec = Floor2Int( in * 1000.0f + 0.5f );
- return 0.001f * msec;
-}
-
-// Over 15x faster than: (int)ceil(value)
-inline int Ceil2Int( float a )
-{
- int RetVal;
-#if defined( __i386__ )
- // Convert to int and back, compare, add one if too small
- __m128 a128 = _mm_load_ss(&a);
- RetVal = _mm_cvtss_si32(a128);
- __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal);
- RetVal += _mm_comilt_ss( rounded128, a128 );
-#else
- RetVal = static_cast<int>( ceil(a) );
-#endif
- return RetVal;
-}
-
-
-// Regular signed area of triangle
-#define TriArea2D( A, B, C ) \
- ( 0.5f * ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) )
-
-// This version doesn't premultiply by 0.5f, so it's the area of the rectangle instead
-#define TriArea2DTimesTwo( A, B, C ) \
- ( ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) )
-
-
-// Get the barycentric coordinates of "pt" in triangle [A,B,C].
-inline void GetBarycentricCoords2D(
- Vector2D const &A,
- Vector2D const &B,
- Vector2D const &C,
- Vector2D const &pt,
- float bcCoords[3] )
-{
- // Note, because to top and bottom are both x2, the issue washes out in the composite
- float invTriArea = 1.0f / TriArea2DTimesTwo( A, B, C );
-
- // NOTE: We assume here that the lightmap coordinate vertices go counterclockwise.
- // If not, TriArea2D() is negated so this works out right.
- bcCoords[0] = TriArea2DTimesTwo( B, C, pt ) * invTriArea;
- bcCoords[1] = TriArea2DTimesTwo( C, A, pt ) * invTriArea;
- bcCoords[2] = TriArea2DTimesTwo( A, B, pt ) * invTriArea;
-}
-
-
-// Return true of the sphere might touch the box (the sphere is actually treated
-// like a box itself, so this may return true if the sphere's bounding box touches
-// a corner of the box but the sphere itself doesn't).
-inline bool QuickBoxSphereTest(
- const Vector& vOrigin,
- float flRadius,
- const Vector& bbMin,
- const Vector& bbMax )
-{
- return vOrigin.x - flRadius < bbMax.x && vOrigin.x + flRadius > bbMin.x &&
- vOrigin.y - flRadius < bbMax.y && vOrigin.y + flRadius > bbMin.y &&
- vOrigin.z - flRadius < bbMax.z && vOrigin.z + flRadius > bbMin.z;
-}
-
-
-// Return true of the boxes intersect (but not if they just touch).
-inline bool QuickBoxIntersectTest(
- const Vector& vBox1Min,
- const Vector& vBox1Max,
- const Vector& vBox2Min,
- const Vector& vBox2Max )
-{
- return
- vBox1Min.x < vBox2Max.x && vBox1Max.x > vBox2Min.x &&
- vBox1Min.y < vBox2Max.y && vBox1Max.y > vBox2Min.y &&
- vBox1Min.z < vBox2Max.z && vBox1Max.z > vBox2Min.z;
-}
-
-
-extern float GammaToLinearFullRange( float gamma );
-extern float LinearToGammaFullRange( float linear );
-extern float GammaToLinear( float gamma );
-extern float LinearToGamma( float linear );
-
-extern float SrgbGammaToLinear( float flSrgbGammaValue );
-extern float SrgbLinearToGamma( float flLinearValue );
-extern float X360GammaToLinear( float fl360GammaValue );
-extern float X360LinearToGamma( float flLinearValue );
-extern float SrgbGammaTo360Gamma( float flSrgbGammaValue );
-
-// linear (0..4) to screen corrected vertex space (0..1?)
-FORCEINLINE float LinearToVertexLight( float f )
-{
- extern float lineartovertex[4096];
-
- // Gotta clamp before the multiply; could overflow...
- // assume 0..4 range
- int i = RoundFloatToInt( f * 1024.f );
-
- // Presumably the comman case will be not to clamp, so check that first:
- if( (unsigned)i > 4095 )
- {
- if ( i < 0 )
- i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream
- else
- i = 4095;
- }
-
- return lineartovertex[i];
-}
-
-
-FORCEINLINE unsigned char LinearToLightmap( float f )
-{
- extern unsigned char lineartolightmap[4096];
-
- // Gotta clamp before the multiply; could overflow...
- int i = RoundFloatToInt( f * 1024.f ); // assume 0..4 range
-
- // Presumably the comman case will be not to clamp, so check that first:
- if ( (unsigned)i > 4095 )
- {
- if ( i < 0 )
- i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream
- else
- i = 4095;
- }
-
- return lineartolightmap[i];
-}
-
-FORCEINLINE void ColorClamp( Vector& color )
-{
- float maxc = max( color.x, max( color.y, color.z ) );
- if ( maxc > 1.0f )
- {
- float ooMax = 1.0f / maxc;
- color.x *= ooMax;
- color.y *= ooMax;
- color.z *= ooMax;
- }
-
- if ( color[0] < 0.f ) color[0] = 0.f;
- if ( color[1] < 0.f ) color[1] = 0.f;
- if ( color[2] < 0.f ) color[2] = 0.f;
-}
-
-inline void ColorClampTruncate( Vector& color )
-{
- if (color[0] > 1.0f) color[0] = 1.0f; else if (color[0] < 0.0f) color[0] = 0.0f;
- if (color[1] > 1.0f) color[1] = 1.0f; else if (color[1] < 0.0f) color[1] = 0.0f;
- if (color[2] > 1.0f) color[2] = 1.0f; else if (color[2] < 0.0f) color[2] = 0.0f;
-}
-
-// Interpolate a Catmull-Rom spline.
-// t is a [0,1] value and interpolates a curve between p2 and p3.
-void Catmull_Rom_Spline(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector &output );
-
-// Interpolate a Catmull-Rom spline.
-// Returns the tangent of the point at t of the spline
-void Catmull_Rom_Spline_Tangent(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector &output );
-
-// area under the curve [0..t]
-void Catmull_Rom_Spline_Integral(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// area under the curve [0..1]
-void Catmull_Rom_Spline_Integral(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- Vector& output );
-
-// Interpolate a Catmull-Rom spline.
-// Normalize p2->p1 and p3->p4 to be the same length as p2->p3
-void Catmull_Rom_Spline_Normalize(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector &output );
-
-// area under the curve [0..t]
-// Normalize p2->p1 and p3->p4 to be the same length as p2->p3
-void Catmull_Rom_Spline_Integral_Normalize(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// Interpolate a Catmull-Rom spline.
-// Normalize p2.x->p1.x and p3.x->p4.x to be the same length as p2.x->p3.x
-void Catmull_Rom_Spline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector &output );
-
-// area under the curve [0..t]
-void Catmull_Rom_Spline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// Interpolate a Hermite spline.
-// t is a [0,1] value and interpolates a curve between p1 and p2 with the deltas d1 and d2.
-void Hermite_Spline(
- const Vector &p1,
- const Vector &p2,
- const Vector &d1,
- const Vector &d2,
- float t,
- Vector& output );
-
-float Hermite_Spline(
- float p1,
- float p2,
- float d1,
- float d2,
- float t );
-
-// t is a [0,1] value and interpolates a curve between p1 and p2 with the slopes p0->p1 and p1->p2
-void Hermite_Spline(
- const Vector &p0,
- const Vector &p1,
- const Vector &p2,
- float t,
- Vector& output );
-
-float Hermite_Spline(
- float p0,
- float p1,
- float p2,
- float t );
-
-
-void Hermite_SplineBasis( float t, float basis[] );
-
-void Hermite_Spline(
- const Quaternion &q0,
- const Quaternion &q1,
- const Quaternion &q2,
- float t,
- Quaternion &output );
-
-
-// See http://en.wikipedia.org/wiki/Kochanek-Bartels_curves
-//
-// Tension: -1 = Round -> 1 = Tight
-// Bias: -1 = Pre-shoot (bias left) -> 1 = Post-shoot (bias right)
-// Continuity: -1 = Box corners -> 1 = Inverted corners
-//
-// If T=B=C=0 it's the same matrix as Catmull-Rom.
-// If T=1 & B=C=0 it's the same as Cubic.
-// If T=B=0 & C=-1 it's just linear interpolation
-//
-// See http://news.povray.org/povray.binaries.tutorials/attachment/%[email protected]%3E/Splines.bas.txt
-// for example code and descriptions of various spline types...
-//
-void Kochanek_Bartels_Spline(
- float tension,
- float bias,
- float continuity,
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-void Kochanek_Bartels_Spline_NormalizeX(
- float tension,
- float bias,
- float continuity,
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// See link at Kochanek_Bartels_Spline for info on the basis matrix used
-void Cubic_Spline(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-void Cubic_Spline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// See link at Kochanek_Bartels_Spline for info on the basis matrix used
-void BSpline(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-void BSpline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// See link at Kochanek_Bartels_Spline for info on the basis matrix used
-void Parabolic_Spline(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-void Parabolic_Spline_NormalizeX(
- const Vector &p1,
- const Vector &p2,
- const Vector &p3,
- const Vector &p4,
- float t,
- Vector& output );
-
-// quintic interpolating polynomial from Perlin.
-// 0->0, 1->1, smooth-in between with smooth tangents
-FORCEINLINE float QuinticInterpolatingPolynomial(float t)
-{
- // 6t^5-15t^4+10t^3
- return t * t * t *( t * ( t* 6.0 - 15.0 ) + 10.0 );
-}
-
-// given a table of sorted tabulated positions, return the two indices and blendfactor to linear
-// interpolate. Does a search. Can be used to find the blend value to interpolate between
-// keyframes.
-void GetInterpolationData( float const *pKnotPositions,
- float const *pKnotValues,
- int nNumValuesinList,
- int nInterpolationRange,
- float flPositionToInterpolateAt,
- bool bWrap,
- float *pValueA,
- float *pValueB,
- float *pInterpolationValue);
-
-float RangeCompressor( float flValue, float flMin, float flMax, float flBase );
-
-// Get the minimum distance from vOrigin to the bounding box defined by [mins,maxs]
-// using voronoi regions.
-// 0 is returned if the origin is inside the box.
-float CalcSqrDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point );
-void CalcClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut );
-void CalcSqrDistAndClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut, float &distSqrOut );
-
-inline float CalcDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point )
-{
- float flDistSqr = CalcSqrDistanceToAABB( mins, maxs, point );
- return sqrt(flDistSqr);
-}
-
-// Get the closest point from P to the (infinite) line through vLineA and vLineB and
-// calculate the shortest distance from P to the line.
-// If you pass in a value for t, it will tell you the t for (A + (B-A)t) to get the closest point.
-// If the closest point lies on the segment between A and B, then 0 <= t <= 1.
-void CalcClosestPointOnLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 );
-float CalcDistanceToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
-float CalcDistanceSqrToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
-
-// The same three functions as above, except now the line is closed between A and B.
-void CalcClosestPointOnLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 );
-float CalcDistanceToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
-float CalcDistanceSqrToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 );
-
-// A function to compute the closes line segment connnection two lines (or false if the lines are parallel, etc.)
-bool CalcLineToLineIntersectionSegment(
- const Vector& p1,const Vector& p2,const Vector& p3,const Vector& p4,Vector *s1,Vector *s2,
- float *t1, float *t2 );
-
-// The above functions in 2D
-void CalcClosestPointOnLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 );
-float CalcDistanceToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
-float CalcDistanceSqrToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
-void CalcClosestPointOnLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 );
-float CalcDistanceToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
-float CalcDistanceSqrToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 );
-
-// Init the mathlib
-void MathLib_Init( float gamma = 2.2f, float texGamma = 2.2f, float brightness = 0.0f, int overbright = 2.0f, bool bAllow3DNow = true, bool bAllowSSE = true, bool bAllowSSE2 = true, bool bAllowMMX = true );
-bool MathLib_3DNowEnabled( void );
-bool MathLib_MMXEnabled( void );
-bool MathLib_SSEEnabled( void );
-bool MathLib_SSE2Enabled( void );
-
-float Approach( float target, float value, float speed );
-float ApproachAngle( float target, float value, float speed );
-float AngleDiff( float destAngle, float srcAngle );
-float AngleDistance( float next, float cur );
-float AngleNormalize( float angle );
-
-// ensure that 0 <= angle <= 360
-float AngleNormalizePositive( float angle );
-
-bool AnglesAreEqual( float a, float b, float tolerance = 0.0f );
-
-
-void RotationDeltaAxisAngle( const QAngle &srcAngles, const QAngle &destAngles, Vector &deltaAxis, float &deltaAngle );
-void RotationDelta( const QAngle &srcAngles, const QAngle &destAngles, QAngle *out );
-
-void ComputeTrianglePlane( const Vector& v1, const Vector& v2, const Vector& v3, Vector& normal, float& intercept );
-int PolyFromPlane( Vector *outVerts, const Vector& normal, float dist, float fHalfScale = 9000.0f );
-int ClipPolyToPlane( Vector *inVerts, int vertCount, Vector *outVerts, const Vector& normal, float dist, float fOnPlaneEpsilon = 0.1f );
-int ClipPolyToPlane_Precise( double *inVerts, int vertCount, double *outVerts, const double *normal, double dist, double fOnPlaneEpsilon = 0.1 );
-
-//-----------------------------------------------------------------------------
-// Computes a reasonable tangent space for a triangle
-//-----------------------------------------------------------------------------
-void CalcTriangleTangentSpace( const Vector &p0, const Vector &p1, const Vector &p2,
- const Vector2D &t0, const Vector2D &t1, const Vector2D& t2,
- Vector &sVect, Vector &tVect );
-
-//-----------------------------------------------------------------------------
-// Transforms a AABB into another space; which will inherently grow the box.
-//-----------------------------------------------------------------------------
-void TransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
-
-//-----------------------------------------------------------------------------
-// Uses the inverse transform of in1
-//-----------------------------------------------------------------------------
-void ITransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
-
-//-----------------------------------------------------------------------------
-// Rotates a AABB into another space; which will inherently grow the box.
-// (same as TransformAABB, but doesn't take the translation into account)
-//-----------------------------------------------------------------------------
-void RotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
-
-//-----------------------------------------------------------------------------
-// Uses the inverse transform of in1
-//-----------------------------------------------------------------------------
-void IRotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut );
-
-//-----------------------------------------------------------------------------
-// Transform a plane
-//-----------------------------------------------------------------------------
-inline void MatrixTransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane )
-{
- // What we want to do is the following:
- // 1) transform the normal into the new space.
- // 2) Determine a point on the old plane given by plane dist * plane normal
- // 3) Transform that point into the new space
- // 4) Plane dist = DotProduct( new normal, new point )
-
- // An optimized version, which works if the plane is orthogonal.
- // 1) Transform the normal into the new space
- // 2) Realize that transforming the old plane point into the new space
- // is given by [ d * n'x + Tx, d * n'y + Ty, d * n'z + Tz ]
- // where d = old plane dist, n' = transformed normal, Tn = translational component of transform
- // 3) Compute the new plane dist using the dot product of the normal result of #2
-
- // For a correct result, this should be an inverse-transpose matrix
- // but that only matters if there are nonuniform scale or skew factors in this matrix.
- VectorRotate( inPlane.normal, src, outPlane.normal );
- outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal );
- outPlane.dist += outPlane.normal.x * src[0][3] + outPlane.normal.y * src[1][3] + outPlane.normal.z * src[2][3];
-}
-
-inline void MatrixITransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane )
-{
- // The trick here is that Tn = translational component of transform,
- // but for an inverse transform, Tn = - R^-1 * T
- Vector vecTranslation;
- MatrixGetColumn( src, 3, vecTranslation );
-
- Vector vecInvTranslation;
- VectorIRotate( vecTranslation, src, vecInvTranslation );
-
- VectorIRotate( inPlane.normal, src, outPlane.normal );
- outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal );
- outPlane.dist -= outPlane.normal.x * vecInvTranslation[0] + outPlane.normal.y * vecInvTranslation[1] + outPlane.normal.z * vecInvTranslation[2];
-}
-
-int CeilPow2( int in );
-int FloorPow2( int in );
-
-FORCEINLINE float * UnpackNormal_HEND3N( const unsigned int *pPackedNormal, float *pNormal )
-{
- int temp[3];
- temp[0] = ((*pPackedNormal >> 0L) & 0x7ff);
- if ( temp[0] & 0x400 )
- {
- temp[0] = 2048 - temp[0];
- }
- temp[1] = ((*pPackedNormal >> 11L) & 0x7ff);
- if ( temp[1] & 0x400 )
- {
- temp[1] = 2048 - temp[1];
- }
- temp[2] = ((*pPackedNormal >> 22L) & 0x3ff);
- if ( temp[2] & 0x200 )
- {
- temp[2] = 1024 - temp[2];
- }
- pNormal[0] = (float)temp[0] * 1.0f/1023.0f;
- pNormal[1] = (float)temp[1] * 1.0f/1023.0f;
- pNormal[2] = (float)temp[2] * 1.0f/511.0f;
- return pNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_HEND3N( const float *pNormal, unsigned int *pPackedNormal )
-{
- int temp[3];
-
- temp[0] = Float2Int( pNormal[0] * 1023.0f );
- temp[1] = Float2Int( pNormal[1] * 1023.0f );
- temp[2] = Float2Int( pNormal[2] * 511.0f );
-
- // the normal is out of bounds, determine the source and fix
- // clamping would be even more of a slowdown here
- Assert( temp[0] >= -1023 && temp[0] <= 1023 );
- Assert( temp[1] >= -1023 && temp[1] <= 1023 );
- Assert( temp[2] >= -511 && temp[2] <= 511 );
-
- *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) |
- ( ( temp[1] & 0x7ff ) << 11L ) |
- ( ( temp[0] & 0x7ff ) << 0L );
- return pPackedNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_HEND3N( float nx, float ny, float nz, unsigned int *pPackedNormal )
-{
- int temp[3];
-
- temp[0] = Float2Int( nx * 1023.0f );
- temp[1] = Float2Int( ny * 1023.0f );
- temp[2] = Float2Int( nz * 511.0f );
-
- // the normal is out of bounds, determine the source and fix
- // clamping would be even more of a slowdown here
- Assert( temp[0] >= -1023 && temp[0] <= 1023 );
- Assert( temp[1] >= -1023 && temp[1] <= 1023 );
- Assert( temp[2] >= -511 && temp[2] <= 511 );
-
- *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) |
- ( ( temp[1] & 0x7ff ) << 11L ) |
- ( ( temp[0] & 0x7ff ) << 0L );
- return pPackedNormal;
-}
-
-FORCEINLINE float * UnpackNormal_SHORT2( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE )
-{
- // Unpacks from Jason's 2-short format (fills in a 4th binormal-sign (+1/-1) value, if this is a tangent vector)
-
- // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits)
- short iX = (*pPackedNormal & 0x0000FFFF);
- short iY = (*pPackedNormal & 0xFFFF0000) >> 16;
-
- float zSign = +1;
- if ( iX < 0 )
- {
- zSign = -1;
- iX = -iX;
- }
- float tSign = +1;
- if ( iY < 0 )
- {
- tSign = -1;
- iY = -iY;
- }
-
- pNormal[0] = ( iX - 16384.0f ) / 16384.0f;
- pNormal[1] = ( iY - 16384.0f ) / 16384.0f;
- pNormal[2] = zSign*sqrtf( 1.0f - ( pNormal[0]*pNormal[0] + pNormal[1]*pNormal[1] ) );
- if ( bIsTangent )
- {
- pNormal[3] = tSign;
- }
-
- return pNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_SHORT2( float nx, float ny, float nz, unsigned int *pPackedNormal, float binormalSign = +1.0f )
-{
- // Pack a vector (ASSUMED TO BE NORMALIZED) into Jason's 4-byte (SHORT2) format.
- // This simply reconstructs Z from X & Y. It uses the sign bits of the X & Y coords
- // to reconstruct the sign of Z and, if this is a tangent vector, the sign of the
- // binormal (this is needed because tangent/binormal vectors are supposed to follow
- // UV gradients, but shaders reconstruct the binormal from the tangent and normal
- // assuming that they form a right-handed basis).
-
- nx += 1; // [-1,+1] -> [0,2]
- ny += 1;
- nx *= 16384.0f; // [ 0, 2] -> [0,32768]
- ny *= 16384.0f;
-
- // '0' and '32768' values are invalid encodings
- nx = max( nx, 1.0f ); // Make sure there are no zero values
- ny = max( ny, 1.0f );
- nx = min( nx, 32767.0f ); // Make sure there are no 32768 values
- ny = min( ny, 32767.0f );
-
- if ( nz < 0.0f )
- nx = -nx; // Set the sign bit for z
-
- ny *= binormalSign; // Set the sign bit for the binormal (use when encoding a tangent vector)
-
- // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits), also use Float2Int()
- short sX = (short)nx; // signed short [1,32767]
- short sY = (short)ny;
-
- *pPackedNormal = ( sX & 0x0000FFFF ) | ( sY << 16 ); // NOTE: The mask is necessary (if sX is negative and cast to an int...)
-
- return pPackedNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_SHORT2( const float *pNormal, unsigned int *pPackedNormal, float binormalSign = +1.0f )
-{
- return PackNormal_SHORT2( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, binormalSign );
-}
-
-// Unpacks a UBYTE4 normal (for a tangent, the result's fourth component receives the binormal 'sign')
-FORCEINLINE float * UnpackNormal_UBYTE4( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE )
-{
- unsigned char cX, cY;
- if ( bIsTangent )
- {
- cX = *pPackedNormal >> 16; // Unpack Z
- cY = *pPackedNormal >> 24; // Unpack W
- }
- else
- {
- cX = *pPackedNormal >> 0; // Unpack X
- cY = *pPackedNormal >> 8; // Unpack Y
- }
-
- float x = cX - 128.0f;
- float y = cY - 128.0f;
- float z;
-
- float zSignBit = x < 0 ? 1.0f : 0.0f; // z and t negative bits (like slt asm instruction)
- float tSignBit = y < 0 ? 1.0f : 0.0f;
- float zSign = -( 2*zSignBit - 1 ); // z and t signs
- float tSign = -( 2*tSignBit - 1 );
-
- x = x*zSign - zSignBit; // 0..127
- y = y*tSign - tSignBit;
- x = x - 64; // -64..63
- y = y - 64;
-
- float xSignBit = x < 0 ? 1.0f : 0.0f; // x and y negative bits (like slt asm instruction)
- float ySignBit = y < 0 ? 1.0f : 0.0f;
- float xSign = -( 2*xSignBit - 1 ); // x and y signs
- float ySign = -( 2*ySignBit - 1 );
-
- x = ( x*xSign - xSignBit ) / 63.0f; // 0..1 range
- y = ( y*ySign - ySignBit ) / 63.0f;
- z = 1.0f - x - y;
-
- float oolen = 1.0f / sqrt( x*x + y*y + z*z ); // Normalize and
- x *= oolen * xSign; // Recover signs
- y *= oolen * ySign;
- z *= oolen * zSign;
-
- pNormal[0] = x;
- pNormal[1] = y;
- pNormal[2] = z;
- if ( bIsTangent )
- {
- pNormal[3] = tSign;
- }
-
- return pNormal;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// See: http://www.oroboro.com/rafael/docserv.php/index/programming/article/unitv2
-//
-// UBYTE4 encoding, using per-octant projection onto x+y+z=1
-// Assume input vector is already unit length
-//
-// binormalSign specifies 'sign' of binormal, stored in t sign bit of tangent
-// (lets the shader know whether norm/tan/bin form a right-handed basis)
-//
-// bIsTangent is used to specify which WORD of the output to store the data
-// The expected usage is to call once with the normal and once with
-// the tangent and binormal sign flag, bitwise OR'ing the returned DWORDs
-FORCEINLINE unsigned int * PackNormal_UBYTE4( float nx, float ny, float nz, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f )
-{
- float xSign = nx < 0.0f ? -1.0f : 1.0f; // -1 or 1 sign
- float ySign = ny < 0.0f ? -1.0f : 1.0f;
- float zSign = nz < 0.0f ? -1.0f : 1.0f;
- float tSign = binormalSign;
- Assert( ( binormalSign == +1.0f ) || ( binormalSign == -1.0f ) );
-
- float xSignBit = 0.5f*( 1 - xSign ); // [-1,+1] -> [1,0]
- float ySignBit = 0.5f*( 1 - ySign ); // 1 is negative bit (like slt instruction)
- float zSignBit = 0.5f*( 1 - zSign );
- float tSignBit = 0.5f*( 1 - binormalSign );
-
- float absX = xSign*nx; // 0..1 range (abs)
- float absY = ySign*ny;
- float absZ = zSign*nz;
-
- float xbits = absX / ( absX + absY + absZ ); // Project onto x+y+z=1 plane
- float ybits = absY / ( absX + absY + absZ );
-
- xbits *= 63; // 0..63
- ybits *= 63;
-
- xbits = xbits * xSign - xSignBit; // -64..63 range
- ybits = ybits * ySign - ySignBit;
- xbits += 64.0f; // 0..127 range
- ybits += 64.0f;
-
- xbits = xbits * zSign - zSignBit; // Negate based on z and t
- ybits = ybits * tSign - tSignBit; // -128..127 range
-
- xbits += 128.0f; // 0..255 range
- ybits += 128.0f;
-
- unsigned char cX = (unsigned char) xbits;
- unsigned char cY = (unsigned char) ybits;
-
- if ( !bIsTangent )
- *pPackedNormal = (cX << 0) | (cY << 8); // xy for normal
- else
- *pPackedNormal = (cX << 16) | (cY << 24); // zw for tangent
-
- return pPackedNormal;
-}
-
-FORCEINLINE unsigned int * PackNormal_UBYTE4( const float *pNormal, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f )
-{
- return PackNormal_UBYTE4( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, bIsTangent, binormalSign );
-}
-
-
-//-----------------------------------------------------------------------------
-// Convert RGB to HSV
-//-----------------------------------------------------------------------------
-void RGBtoHSV( const Vector &rgb, Vector &hsv );
-
-
-//-----------------------------------------------------------------------------
-// Convert HSV to RGB
-//-----------------------------------------------------------------------------
-void HSVtoRGB( const Vector &hsv, Vector &rgb );
-
-
-//-----------------------------------------------------------------------------
-// Fast version of pow and log
-//-----------------------------------------------------------------------------
-
-float FastLog2(float i); // log2( i )
-float FastPow2(float i); // 2^i
-float FastPow(float a, float b); // a^b
-float FastPow10( float i ); // 10^i
-
-//-----------------------------------------------------------------------------
-// For testing float equality
-//-----------------------------------------------------------------------------
-
-inline bool CloseEnough( float a, float b, float epsilon = EQUAL_EPSILON )
-{
- return fabs( a - b ) <= epsilon;
-}
-
-inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL_EPSILON )
-{
- return fabs( a.x - b.x ) <= epsilon &&
- fabs( a.y - b.y ) <= epsilon &&
- fabs( a.z - b.z ) <= epsilon;
-}
-
-// Fast compare
-// maxUlps is the maximum error in terms of Units in the Last Place. This
-// specifies how big an error we are willing to accept in terms of the value
-// of the least significant digit of the floating point number�s
-// representation. maxUlps can also be interpreted in terms of how many
-// representable floats we are willing to accept between A and B.
-// This function will allow maxUlps-1 floats between A and B.
-bool AlmostEqual(float a, float b, int maxUlps = 10);
-
-inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10)
-{
- return AlmostEqual( a.x, b.x, maxUlps ) &&
- AlmostEqual( a.y, b.y, maxUlps ) &&
- AlmostEqual( a.z, b.z, maxUlps );
-}
-
-
-#endif // MATH_BASE_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +#ifndef MATH_LIB_H +#define MATH_LIB_H + +#include <math.h> +#include "tier0/basetypes.h" +#include "tier0/commonmacros.h" +#include "mathlib/vector.h" +#include "mathlib/vector2d.h" +#include "tier0/dbg.h" + +#include "mathlib/math_pfns.h" + +#if defined(__i386__) || defined(_M_IX86) +// For MMX intrinsics +#include <xmmintrin.h> +#endif + +// XXX remove me +#undef clamp + +// Uncomment this to enable FP exceptions in parts of the code. +// This can help track down FP bugs. However the code is not +// FP exception clean so this not a turnkey operation. +//#define FP_EXCEPTIONS_ENABLED + + +#ifdef FP_EXCEPTIONS_ENABLED +#include <float.h> // For _clearfp and _controlfp_s +#endif + +// FPExceptionDisabler and FPExceptionEnabler taken from my blog post +// at http://www.altdevblogaday.com/2012/04/20/exceptional-floating-point/ + +// Declare an object of this type in a scope in order to suppress +// all floating-point exceptions temporarily. The old exception +// state will be reset at the end. +class FPExceptionDisabler +{ +public: +#ifdef FP_EXCEPTIONS_ENABLED + FPExceptionDisabler(); + ~FPExceptionDisabler(); + +private: + unsigned int mOldValues; +#else + FPExceptionDisabler() {} + ~FPExceptionDisabler() {} +#endif + +private: + // Make the copy constructor and assignment operator private + // and unimplemented to prohibit copying. + FPExceptionDisabler(const FPExceptionDisabler&); + FPExceptionDisabler& operator=(const FPExceptionDisabler&); +}; + +// Declare an object of this type in a scope in order to enable a +// specified set of floating-point exceptions temporarily. The old +// exception state will be reset at the end. +// This class can be nested. +class FPExceptionEnabler +{ +public: + // Overflow, divide-by-zero, and invalid-operation are the FP + // exceptions most frequently associated with bugs. +#ifdef FP_EXCEPTIONS_ENABLED + FPExceptionEnabler(unsigned int enableBits = _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID); + ~FPExceptionEnabler(); + +private: + unsigned int mOldValues; +#else + FPExceptionEnabler(unsigned int enableBits = 0) + { + } + ~FPExceptionEnabler() + { + } +#endif + +private: + // Make the copy constructor and assignment operator private + // and unimplemented to prohibit copying. + FPExceptionEnabler(const FPExceptionEnabler&); + FPExceptionEnabler& operator=(const FPExceptionEnabler&); +}; + + + +#ifdef DEBUG // stop crashing edit-and-continue +FORCEINLINE float clamp( float val, float minVal, float maxVal ) +{ + if ( maxVal < minVal ) + return maxVal; + else if( val < minVal ) + return minVal; + else if( val > maxVal ) + return maxVal; + else + return val; +} +#else // DEBUG +FORCEINLINE float clamp( float val, float minVal, float maxVal ) +{ +#if defined(__i386__) || defined(_M_IX86) + _mm_store_ss( &val, + _mm_min_ss( + _mm_max_ss( + _mm_load_ss(&val), + _mm_load_ss(&minVal) ), + _mm_load_ss(&maxVal) ) ); +#else + val = fpmax(minVal, val); + val = fpmin(maxVal, val); +#endif + return val; +} +#endif // DEBUG + +// +// Returns a clamped value in the range [min, max]. +// +template< class T > +inline T clamp( T const &val, T const &minVal, T const &maxVal ) +{ + if ( maxVal < minVal ) + return maxVal; + else if( val < minVal ) + return minVal; + else if( val > maxVal ) + return maxVal; + else + return val; +} + + +// plane_t structure +// !!! if this is changed, it must be changed in asm code too !!! +// FIXME: does the asm code even exist anymore? +// FIXME: this should move to a different file +struct cplane_t +{ + Vector normal; + float dist; + byte type; // for fast side tests + byte signbits; // signx + (signy<<1) + (signz<<1) + byte pad[2]; + +#ifdef VECTOR_NO_SLOW_OPERATIONS + cplane_t() {} + +private: + // No copy constructors allowed if we're in optimal mode + cplane_t(const cplane_t& vOther); +#endif +}; + +// structure offset for asm code +#define CPLANE_NORMAL_X 0 +#define CPLANE_NORMAL_Y 4 +#define CPLANE_NORMAL_Z 8 +#define CPLANE_DIST 12 +#define CPLANE_TYPE 16 +#define CPLANE_SIGNBITS 17 +#define CPLANE_PAD0 18 +#define CPLANE_PAD1 19 + +// 0-2 are axial planes +#define PLANE_X 0 +#define PLANE_Y 1 +#define PLANE_Z 2 + +// 3-5 are non-axial planes snapped to the nearest +#define PLANE_ANYX 3 +#define PLANE_ANYY 4 +#define PLANE_ANYZ 5 + + +//----------------------------------------------------------------------------- +// Frustum plane indices. +// WARNING: there is code that depends on these values +//----------------------------------------------------------------------------- + +enum +{ + FRUSTUM_RIGHT = 0, + FRUSTUM_LEFT = 1, + FRUSTUM_TOP = 2, + FRUSTUM_BOTTOM = 3, + FRUSTUM_NEARZ = 4, + FRUSTUM_FARZ = 5, + FRUSTUM_NUMPLANES = 6 +}; + +extern int SignbitsForPlane( cplane_t *out ); + +class Frustum_t +{ +public: + void SetPlane( int i, int nType, const Vector &vecNormal, float dist ) + { + m_Plane[i].normal = vecNormal; + m_Plane[i].dist = dist; + m_Plane[i].type = nType; + m_Plane[i].signbits = SignbitsForPlane( &m_Plane[i] ); + m_AbsNormal[i].Init( fabs(vecNormal.x), fabs(vecNormal.y), fabs(vecNormal.z) ); + } + + inline const cplane_t *GetPlane( int i ) const { return &m_Plane[i]; } + inline const Vector &GetAbsNormal( int i ) const { return m_AbsNormal[i]; } + +private: + cplane_t m_Plane[FRUSTUM_NUMPLANES]; + Vector m_AbsNormal[FRUSTUM_NUMPLANES]; +}; + +// Computes Y fov from an X fov and a screen aspect ratio + X from Y +float CalcFovY( float flFovX, float flScreenAspect ); +float CalcFovX( float flFovY, float flScreenAspect ); + +// Generate a frustum based on perspective view parameters +// NOTE: FOV is specified in degrees, as the *full* view angle (not half-angle) +void GeneratePerspectiveFrustum( const Vector& origin, const QAngle &angles, float flZNear, float flZFar, float flFovX, float flAspectRatio, Frustum_t &frustum ); +void GeneratePerspectiveFrustum( const Vector& origin, const Vector &forward, const Vector &right, const Vector &up, float flZNear, float flZFar, float flFovX, float flFovY, Frustum_t &frustum ); + +// Cull the world-space bounding box to the specified frustum. +bool R_CullBox( const Vector& mins, const Vector& maxs, const Frustum_t &frustum ); +bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t &frustum ); + +struct matrix3x4_t +{ + matrix3x4_t() {} + matrix3x4_t( + float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23 ) + { + m_flMatVal[0][0] = m00; m_flMatVal[0][1] = m01; m_flMatVal[0][2] = m02; m_flMatVal[0][3] = m03; + m_flMatVal[1][0] = m10; m_flMatVal[1][1] = m11; m_flMatVal[1][2] = m12; m_flMatVal[1][3] = m13; + m_flMatVal[2][0] = m20; m_flMatVal[2][1] = m21; m_flMatVal[2][2] = m22; m_flMatVal[2][3] = m23; + } + + //----------------------------------------------------------------------------- + // Creates a matrix where the X axis = forward + // the Y axis = left, and the Z axis = up + //----------------------------------------------------------------------------- + void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin ) + { + m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x; + m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y; + m_flMatVal[2][0] = xAxis.z; m_flMatVal[2][1] = yAxis.z; m_flMatVal[2][2] = zAxis.z; m_flMatVal[2][3] = vecOrigin.z; + } + + //----------------------------------------------------------------------------- + // Creates a matrix where the X axis = forward + // the Y axis = left, and the Z axis = up + //----------------------------------------------------------------------------- + matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin ) + { + Init( xAxis, yAxis, zAxis, vecOrigin ); + } + + inline void Invalidate( void ) + { + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 4; j++) + { + m_flMatVal[i][j] = VEC_T_NAN; + } + } + } + + float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; } + const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; } + float *Base() { return &m_flMatVal[0][0]; } + const float *Base() const { return &m_flMatVal[0][0]; } + + float m_flMatVal[3][4]; +}; + + +#ifndef M_PI + #define M_PI 3.14159265358979323846 // matches value in gcc v2 math.h +#endif + +#define M_PI_F ((float)(M_PI)) // Shouldn't collide with anything. + +// NJS: Inlined to prevent floats from being autopromoted to doubles, as with the old system. +#ifndef RAD2DEG + #define RAD2DEG( x ) ( (float)(x) * (float)(180.f / M_PI_F) ) +#endif + +#ifndef DEG2RAD + #define DEG2RAD( x ) ( (float)(x) * (float)(M_PI_F / 180.f) ) +#endif + +// Used to represent sides of things like planes. +#define SIDE_FRONT 0 +#define SIDE_BACK 1 +#define SIDE_ON 2 +#define SIDE_CROSS -2 // necessary for polylib.c + +#define ON_VIS_EPSILON 0.01 // necessary for vvis (flow.c) -- again look into moving later! +#define EQUAL_EPSILON 0.001 // necessary for vbsp (faces.c) -- should look into moving it there? + +extern bool s_bMathlibInitialized; + +extern const Vector vec3_origin; +extern const QAngle vec3_angle; +extern const Quaternion quat_identity; +extern const Vector vec3_invalid; +extern const int nanmask; + +#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask) + +FORCEINLINE vec_t DotProduct(const vec_t *v1, const vec_t *v2) +{ + return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2]; +} +FORCEINLINE void VectorSubtract(const vec_t *a, const vec_t *b, vec_t *c) +{ + c[0]=a[0]-b[0]; + c[1]=a[1]-b[1]; + c[2]=a[2]-b[2]; +} +FORCEINLINE void VectorAdd(const vec_t *a, const vec_t *b, vec_t *c) +{ + c[0]=a[0]+b[0]; + c[1]=a[1]+b[1]; + c[2]=a[2]+b[2]; +} +FORCEINLINE void VectorCopy(const vec_t *a, vec_t *b) +{ + b[0]=a[0]; + b[1]=a[1]; + b[2]=a[2]; +} +FORCEINLINE void VectorClear(vec_t *a) +{ + a[0]=a[1]=a[2]=0; +} + +FORCEINLINE float VectorMaximum(const vec_t *v) +{ + return max( v[0], max( v[1], v[2] ) ); +} + +FORCEINLINE float VectorMaximum(const Vector& v) +{ + return max( v.x, max( v.y, v.z ) ); +} + +FORCEINLINE void VectorScale (const float* in, vec_t scale, float* out) +{ + out[0] = in[0]*scale; + out[1] = in[1]*scale; + out[2] = in[2]*scale; +} + + +// Cannot be forceinline as they have overloads: +inline void VectorFill(vec_t *a, float b) +{ + a[0]=a[1]=a[2]=b; +} + +inline void VectorNegate(vec_t *a) +{ + a[0]=-a[0]; + a[1]=-a[1]; + a[2]=-a[2]; +} + + +//#define VectorMaximum(a) ( max( (a)[0], max( (a)[1], (a)[2] ) ) ) +#define Vector2Clear(x) {(x)[0]=(x)[1]=0;} +#define Vector2Negate(x) {(x)[0]=-((x)[0]);(x)[1]=-((x)[1]);} +#define Vector2Copy(a,b) {(b)[0]=(a)[0];(b)[1]=(a)[1];} +#define Vector2Subtract(a,b,c) {(c)[0]=(a)[0]-(b)[0];(c)[1]=(a)[1]-(b)[1];} +#define Vector2Add(a,b,c) {(c)[0]=(a)[0]+(b)[0];(c)[1]=(a)[1]+(b)[1];} +#define Vector2Scale(a,b,c) {(c)[0]=(b)*(a)[0];(c)[1]=(b)*(a)[1];} + +// NJS: Some functions in VBSP still need to use these for dealing with mixing vec4's and shorts with vec_t's. +// remove when no longer needed. +#define VECTOR_COPY( A, B ) do { (B)[0] = (A)[0]; (B)[1] = (A)[1]; (B)[2]=(A)[2]; } while(0) +#define DOT_PRODUCT( A, B ) ( (A)[0]*(B)[0] + (A)[1]*(B)[1] + (A)[2]*(B)[2] ) + +FORCEINLINE void VectorMAInline( const float* start, float scale, const float* direction, float* dest ) +{ + dest[0]=start[0]+direction[0]*scale; + dest[1]=start[1]+direction[1]*scale; + dest[2]=start[2]+direction[2]*scale; +} + +FORCEINLINE void VectorMAInline( const Vector& start, float scale, const Vector& direction, Vector& dest ) +{ + dest.x=start.x+direction.x*scale; + dest.y=start.y+direction.y*scale; + dest.z=start.z+direction.z*scale; +} + +FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest ) +{ + VectorMAInline(start, scale, direction, dest); +} + +FORCEINLINE void VectorMA( const float * start, float scale, const float *direction, float *dest ) +{ + VectorMAInline(start, scale, direction, dest); +} + + +int VectorCompare (const float *v1, const float *v2); + +inline float VectorLength(const float *v) +{ + return FastSqrt( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] + FLT_EPSILON ); +} + +void CrossProduct (const float *v1, const float *v2, float *cross); + +qboolean VectorsEqual( const float *v1, const float *v2 ); + +inline vec_t RoundInt (vec_t in) +{ + return floor(in + 0.5f); +} + +int Q_log2(int val); + +// Math routines done in optimized assembly math package routines +void inline SinCos( float radians, float *sine, float *cosine ) +{ +#if defined( _X360 ) + XMScalarSinCos( sine, cosine, radians ); +#elif defined( PLATFORM_WINDOWS_PC32 ) + _asm + { + fld DWORD PTR [radians] + fsincos + + mov edx, DWORD PTR [cosine] + mov eax, DWORD PTR [sine] + + fstp DWORD PTR [edx] + fstp DWORD PTR [eax] + } +#elif defined( PLATFORM_WINDOWS_PC64 ) + *sine = sin( radians ); + *cosine = cos( radians ); +#elif defined( POSIX ) + register double __cosr, __sinr; + __asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians)); + + *sine = __sinr; + *cosine = __cosr; +#endif +} + +#define SIN_TABLE_SIZE 256 +#define FTOIBIAS 12582912.f +extern float SinCosTable[SIN_TABLE_SIZE]; + +inline float TableCos( float theta ) +{ + union + { + int i; + float f; + } ftmp; + + // ideally, the following should compile down to: theta * constant + constant, changing any of these constants from defines sometimes fubars this. + ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + ( FTOIBIAS + ( SIN_TABLE_SIZE / 4 ) ); + return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ]; +} + +inline float TableSin( float theta ) +{ + union + { + int i; + float f; + } ftmp; + + // ideally, the following should compile down to: theta * constant + constant + ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + FTOIBIAS; + return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ]; +} + +template<class T> +FORCEINLINE T Square( T const &a ) +{ + return a * a; +} + + +// return the smallest power of two >= x. +// returns 0 if x == 0 or x > 0x80000000 (ie numbers that would be negative if x was signed) +// NOTE: the old code took an int, and if you pass in an int of 0x80000000 casted to a uint, +// you'll get 0x80000000, which is correct for uints, instead of 0, which was correct for ints +FORCEINLINE uint SmallestPowerOfTwoGreaterOrEqual( uint x ) +{ + x -= 1; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return x + 1; +} + +// return the largest power of two <= x. Will return 0 if passed 0 +FORCEINLINE uint LargestPowerOfTwoLessThanOrEqual( uint x ) +{ + if ( x >= 0x80000000 ) + return 0x80000000; + + return SmallestPowerOfTwoGreaterOrEqual( x + 1 ) >> 1; +} + + +// Math routines for optimizing division +void FloorDivMod (double numer, double denom, int *quotient, int *rem); +int GreatestCommonDivisor (int i1, int i2); + +// Test for FPU denormal mode +bool IsDenormal( const float &val ); + +// MOVEMENT INFO +enum +{ + PITCH = 0, // up / down + YAW, // left / right + ROLL // fall over +}; + +void MatrixAngles( const matrix3x4_t & matrix, float *angles ); // !!!! +void MatrixVectors( const matrix3x4_t &matrix, Vector* pForward, Vector *pRight, Vector *pUp ); +void VectorTransform (const float *in1, const matrix3x4_t & in2, float *out); +void VectorITransform (const float *in1, const matrix3x4_t & in2, float *out); +void VectorRotate( const float *in1, const matrix3x4_t & in2, float *out); +void VectorRotate( const Vector &in1, const QAngle &in2, Vector &out ); +void VectorRotate( const Vector &in1, const Quaternion &in2, Vector &out ); +void VectorIRotate( const float *in1, const matrix3x4_t & in2, float *out); + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +QAngle TransformAnglesToLocalSpace( const QAngle &angles, const matrix3x4_t &parentMatrix ); +QAngle TransformAnglesToWorldSpace( const QAngle &angles, const matrix3x4_t &parentMatrix ); + +#endif + +void MatrixInitialize( matrix3x4_t &mat, const Vector &vecOrigin, const Vector &vecXAxis, const Vector &vecYAxis, const Vector &vecZAxis ); +void MatrixCopy( const matrix3x4_t &in, matrix3x4_t &out ); +void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out ); + +// Matrix equality test +bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 ); + +void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out ); +void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out ); + +inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out ) +{ + MatrixGetColumn ( in, 3, out ); +} + +inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out ) +{ + MatrixSetColumn ( in, 3, out ); +} + +void MatrixScaleBy ( const float flScale, matrix3x4_t &out ); +void MatrixScaleByZero ( matrix3x4_t &out ); + +//void DecomposeRotation( const matrix3x4_t &mat, float *out ); +void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out); +void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out); + +// For identical interface w/ VMatrix +inline void MatrixMultiply ( const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out ) +{ + ConcatTransforms( in1, in2, out ); +} + +void QuaternionSlerp( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); +void QuaternionSlerpNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); +void QuaternionBlend( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); +void QuaternionBlendNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); +void QuaternionIdentityBlend( const Quaternion &p, float t, Quaternion &qt ); +float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q ); +void QuaternionScale( const Quaternion &p, float t, Quaternion &q ); +void QuaternionAlign( const Quaternion &p, const Quaternion &q, Quaternion &qt ); +float QuaternionDotProduct( const Quaternion &p, const Quaternion &q ); +void QuaternionConjugate( const Quaternion &p, Quaternion &q ); +void QuaternionInvert( const Quaternion &p, Quaternion &q ); +float QuaternionNormalize( Quaternion &q ); +void QuaternionAdd( const Quaternion &p, const Quaternion &q, Quaternion &qt ); +void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt ); +void QuaternionMatrix( const Quaternion &q, matrix3x4_t &matrix ); +void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t &matrix ); +void QuaternionAngles( const Quaternion &q, QAngle &angles ); +void AngleQuaternion( const QAngle& angles, Quaternion &qt ); +void QuaternionAngles( const Quaternion &q, RadianEuler &angles ); +void AngleQuaternion( RadianEuler const &angles, Quaternion &qt ); +void QuaternionAxisAngle( const Quaternion &q, Vector &axis, float &angle ); +void AxisAngleQuaternion( const Vector &axis, float angle, Quaternion &q ); +void BasisToQuaternion( const Vector &vecForward, const Vector &vecRight, const Vector &vecUp, Quaternion &q ); +void MatrixQuaternion( const matrix3x4_t &mat, Quaternion &q ); + +// A couple methods to find the dot product of a vector with a matrix row or column... +inline float MatrixRowDotProduct( const matrix3x4_t &in1, int row, const Vector& in2 ) +{ + Assert( (row >= 0) && (row < 3) ); + return DotProduct( in1[row], in2.Base() ); +} + +inline float MatrixColumnDotProduct( const matrix3x4_t &in1, int col, const Vector& in2 ) +{ + Assert( (col >= 0) && (col < 4) ); + return in1[0][col] * in2[0] + in1[1][col] * in2[1] + in1[2][col] * in2[2]; +} + +int __cdecl BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *plane); + +inline float anglemod(float a) +{ + a = (360.f/65536) * ((int)(a*(65536.f/360.0f)) & 65535); + return a; +} + +// Remap a value in the range [A,B] to [C,D]. +inline float RemapVal( float val, float A, float B, float C, float D) +{ + if ( A == B ) + return val >= B ? D : C; + return C + (D - C) * (val - A) / (B - A); +} + +inline float RemapValClamped( float val, float A, float B, float C, float D) +{ + if ( A == B ) + return val >= B ? D : C; + float cVal = (val - A) / (B - A); + cVal = clamp( cVal, 0.0f, 1.0f ); + + return C + (D - C) * cVal; +} + +// Returns A + (B-A)*flPercent. +// float Lerp( float flPercent, float A, float B ); +template <class T> +FORCEINLINE T Lerp( float flPercent, T const &A, T const &B ) +{ + return A + (B - A) * flPercent; +} + +FORCEINLINE float Sqr( float f ) +{ + return f*f; +} + +// 5-argument floating point linear interpolation. +// FLerp(f1,f2,i1,i2,x)= +// f1 at x=i1 +// f2 at x=i2 +// smooth lerp between f1 and f2 at x>i1 and x<i2 +// extrapolation for x<i1 or x>i2 +// +// If you know a function f(x)'s value (f1) at position i1, and its value (f2) at position i2, +// the function can be linearly interpolated with FLerp(f1,f2,i1,i2,x) +// i2=i1 will cause a divide by zero. +static inline float FLerp(float f1, float f2, float i1, float i2, float x) +{ + return f1+(f2-f1)*(x-i1)/(i2-i1); +} + + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +// YWB: Specialization for interpolating euler angles via quaternions... +template<> FORCEINLINE QAngle Lerp<QAngle>( float flPercent, const QAngle& q1, const QAngle& q2 ) +{ + // Avoid precision errors + if ( q1 == q2 ) + return q1; + + Quaternion src, dest; + + // Convert to quaternions + AngleQuaternion( q1, src ); + AngleQuaternion( q2, dest ); + + Quaternion result; + + // Slerp + QuaternionSlerp( src, dest, flPercent, result ); + + // Convert to euler + QAngle output; + QuaternionAngles( result, output ); + return output; +} + +#else + +#pragma error + +// NOTE NOTE: I haven't tested this!! It may not work! Check out interpolatedvar.cpp in the client dll to try it +template<> FORCEINLINE QAngleByValue Lerp<QAngleByValue>( float flPercent, const QAngleByValue& q1, const QAngleByValue& q2 ) +{ + // Avoid precision errors + if ( q1 == q2 ) + return q1; + + Quaternion src, dest; + + // Convert to quaternions + AngleQuaternion( q1, src ); + AngleQuaternion( q2, dest ); + + Quaternion result; + + // Slerp + QuaternionSlerp( src, dest, flPercent, result ); + + // Convert to euler + QAngleByValue output; + QuaternionAngles( result, output ); + return output; +} + +#endif // VECTOR_NO_SLOW_OPERATIONS + + +/// Same as swap(), but won't cause problems with std::swap +template <class T> +FORCEINLINE void V_swap( T& x, T& y ) +{ + T temp = x; + x = y; + y = temp; +} + +template <class T> FORCEINLINE T AVG(T a, T b) +{ + return (a+b)/2; +} + +// number of elements in an array of static size +#define NELEMS(x) ARRAYSIZE(x) + +// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector)); +#define XYZ(v) (v).x,(v).y,(v).z + + +inline float Sign( float x ) +{ + return (x <0.0f) ? -1.0f : 1.0f; +} + +// +// Clamps the input integer to the given array bounds. +// Equivalent to the following, but without using any branches: +// +// if( n < 0 ) return 0; +// else if ( n > maxindex ) return maxindex; +// else return n; +// +// This is not always a clear performance win, but when you have situations where a clamped +// value is thrashing against a boundary this is a big win. (ie, valid, invalid, valid, invalid, ...) +// +// Note: This code has been run against all possible integers. +// +inline int ClampArrayBounds( int n, unsigned maxindex ) +{ + // mask is 0 if less than 4096, 0xFFFFFFFF if greater than + unsigned int inrangemask = 0xFFFFFFFF + (((unsigned) n) > maxindex ); + unsigned int lessthan0mask = 0xFFFFFFFF + ( n >= 0 ); + + // If the result was valid, set the result, (otherwise sets zero) + int result = (inrangemask & n); + + // if the result was out of range or zero. + result |= ((~inrangemask) & (~lessthan0mask)) & maxindex; + + return result; +} + + +#define BOX_ON_PLANE_SIDE(emins, emaxs, p) \ + (((p)->type < 3)? \ + ( \ + ((p)->dist <= (emins)[(p)->type])? \ + 1 \ + : \ + ( \ + ((p)->dist >= (emaxs)[(p)->type])?\ + 2 \ + : \ + 3 \ + ) \ + ) \ + : \ + BoxOnPlaneSide( (emins), (emaxs), (p))) + +//----------------------------------------------------------------------------- +// FIXME: Vector versions.... the float versions will go away hopefully soon! +//----------------------------------------------------------------------------- + +void AngleVectors (const QAngle& angles, Vector *forward); +void AngleVectors (const QAngle& angles, Vector *forward, Vector *right, Vector *up); +void AngleVectorsTranspose (const QAngle& angles, Vector *forward, Vector *right, Vector *up); +void AngleMatrix (const QAngle &angles, matrix3x4_t &mat ); +void AngleMatrix( const QAngle &angles, const Vector &position, matrix3x4_t &mat ); +void AngleMatrix (const RadianEuler &angles, matrix3x4_t &mat ); +void AngleMatrix( RadianEuler const &angles, const Vector &position, matrix3x4_t &mat ); +void AngleIMatrix (const QAngle &angles, matrix3x4_t &mat ); +void AngleIMatrix (const QAngle &angles, const Vector &position, matrix3x4_t &mat ); +void AngleIMatrix (const RadianEuler &angles, matrix3x4_t &mat ); +void VectorAngles( const Vector &forward, QAngle &angles ); +void VectorAngles( const Vector &forward, const Vector &pseudoup, QAngle &angles ); +void VectorMatrix( const Vector &forward, matrix3x4_t &mat ); +void VectorVectors( const Vector &forward, Vector &right, Vector &up ); +void SetIdentityMatrix( matrix3x4_t &mat ); +void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst ); +void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst ); + +inline void SetScaleMatrix( float flScale, matrix3x4_t &dst ) +{ + SetScaleMatrix( flScale, flScale, flScale, dst ); +} + +inline void SetScaleMatrix( const Vector& scale, matrix3x4_t &dst ) +{ + SetScaleMatrix( scale.x, scale.y, scale.z, dst ); +} + +// Computes the inverse transpose +void MatrixTranspose( matrix3x4_t& mat ); +void MatrixTranspose( const matrix3x4_t& src, matrix3x4_t& dst ); +void MatrixInverseTranspose( const matrix3x4_t& src, matrix3x4_t& dst ); + +inline void PositionMatrix( const Vector &position, matrix3x4_t &mat ) +{ + MatrixSetColumn( position, 3, mat ); +} + +inline void MatrixPosition( const matrix3x4_t &matrix, Vector &position ) +{ + MatrixGetColumn( matrix, 3, position ); +} + +inline void VectorRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out) +{ + VectorRotate( &in1.x, in2, &out.x ); +} + +inline void VectorIRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out) +{ + VectorIRotate( &in1.x, in2, &out.x ); +} + +inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles ) +{ + MatrixAngles( matrix, &angles.x ); +} + +inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles, Vector &position ) +{ + MatrixAngles( matrix, angles ); + MatrixPosition( matrix, position ); +} + +inline void MatrixAngles( const matrix3x4_t &matrix, RadianEuler &angles ) +{ + MatrixAngles( matrix, &angles.x ); + + angles.Init( DEG2RAD( angles.z ), DEG2RAD( angles.x ), DEG2RAD( angles.y ) ); +} + +void MatrixAngles( const matrix3x4_t &mat, RadianEuler &angles, Vector &position ); + +void MatrixAngles( const matrix3x4_t &mat, Quaternion &q, Vector &position ); + +inline int VectorCompare (const Vector& v1, const Vector& v2) +{ + return v1 == v2; +} + +inline void VectorTransform (const Vector& in1, const matrix3x4_t &in2, Vector &out) +{ + VectorTransform( &in1.x, in2, &out.x ); +} + +inline void VectorITransform (const Vector& in1, const matrix3x4_t &in2, Vector &out) +{ + VectorITransform( &in1.x, in2, &out.x ); +} + +/* +inline void DecomposeRotation( const matrix3x4_t &mat, Vector &out ) +{ + DecomposeRotation( mat, &out.x ); +} +*/ + +inline int BoxOnPlaneSide (const Vector& emins, const Vector& emaxs, const cplane_t *plane ) +{ + return BoxOnPlaneSide( &emins.x, &emaxs.x, plane ); +} + +inline void VectorFill(Vector& a, float b) +{ + a[0]=a[1]=a[2]=b; +} + +inline void VectorNegate(Vector& a) +{ + a[0] = -a[0]; + a[1] = -a[1]; + a[2] = -a[2]; +} + +inline vec_t VectorAvg(Vector& a) +{ + return ( a[0] + a[1] + a[2] ) / 3; +} + +//----------------------------------------------------------------------------- +// Box/plane test (slow version) +//----------------------------------------------------------------------------- +inline int FASTCALL BoxOnPlaneSide2 (const Vector& emins, const Vector& emaxs, const cplane_t *p, float tolerance = 0.f ) +{ + Vector corners[2]; + + if (p->normal[0] < 0) + { + corners[0][0] = emins[0]; + corners[1][0] = emaxs[0]; + } + else + { + corners[1][0] = emins[0]; + corners[0][0] = emaxs[0]; + } + + if (p->normal[1] < 0) + { + corners[0][1] = emins[1]; + corners[1][1] = emaxs[1]; + } + else + { + corners[1][1] = emins[1]; + corners[0][1] = emaxs[1]; + } + + if (p->normal[2] < 0) + { + corners[0][2] = emins[2]; + corners[1][2] = emaxs[2]; + } + else + { + corners[1][2] = emins[2]; + corners[0][2] = emaxs[2]; + } + + int sides = 0; + + float dist1 = DotProduct (p->normal, corners[0]) - p->dist; + if (dist1 >= tolerance) + sides = 1; + + float dist2 = DotProduct (p->normal, corners[1]) - p->dist; + if (dist2 < -tolerance) + sides |= 2; + + return sides; +} + +//----------------------------------------------------------------------------- +// Helpers for bounding box construction +//----------------------------------------------------------------------------- + +void ClearBounds (Vector& mins, Vector& maxs); +void AddPointToBounds (const Vector& v, Vector& mins, Vector& maxs); + +// +// COLORSPACE/GAMMA CONVERSION STUFF +// +void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright ); + +// convert texture to linear 0..1 value +inline float TexLightToLinear( int c, int exponent ) +{ + extern float power2_n[256]; + Assert( exponent >= -128 && exponent <= 127 ); + return ( float )c * power2_n[exponent+128]; +} + + +// convert texture to linear 0..1 value +int LinearToTexture( float f ); +// converts 0..1 linear value to screen gamma (0..255) +int LinearToScreenGamma( float f ); +float TextureToLinear( int c ); + +// compressed color format +struct ColorRGBExp32 +{ + byte r, g, b; + signed char exponent; +}; + +void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector& out ); +void VectorToColorRGBExp32( const Vector& v, ColorRGBExp32 &c ); + +// solve for "x" where "a x^2 + b x + c = 0", return true if solution exists +bool SolveQuadratic( float a, float b, float c, float &root1, float &root2 ); + +// solves for "a, b, c" where "a x^2 + b x + c = y", return true if solution exists +bool SolveInverseQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c ); + +// solves for a,b,c specified as above, except that it always creates a monotonically increasing or +// decreasing curve if the data is monotonically increasing or decreasing. In order to enforce the +// monoticity condition, it is possible that the resulting quadratic will only approximate the data +// instead of interpolating it. This code is not especially fast. +bool SolveInverseQuadraticMonotonic( float x1, float y1, float x2, float y2, + float x3, float y3, float &a, float &b, float &c ); + + + + +// solves for "a, b, c" where "1/(a x^2 + b x + c ) = y", return true if solution exists +bool SolveInverseReciprocalQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c ); + +// rotate a vector around the Z axis (YAW) +void VectorYawRotate( const Vector& in, float flYaw, Vector &out); + + +// Bias takes an X value between 0 and 1 and returns another value between 0 and 1 +// The curve is biased towards 0 or 1 based on biasAmt, which is between 0 and 1. +// Lower values of biasAmt bias the curve towards 0 and higher values bias it towards 1. +// +// For example, with biasAmt = 0.2, the curve looks like this: +// +// 1 +// | * +// | * +// | * +// | ** +// | ** +// | **** +// |********* +// |___________________ +// 0 1 +// +// +// With biasAmt = 0.8, the curve looks like this: +// +// 1 +// | ************** +// | ** +// | * +// | * +// |* +// |* +// |* +// |___________________ +// 0 1 +// +// With a biasAmt of 0.5, Bias returns X. +float Bias( float x, float biasAmt ); + + +// Gain is similar to Bias, but biasAmt biases towards or away from 0.5. +// Lower bias values bias towards 0.5 and higher bias values bias away from it. +// +// For example, with biasAmt = 0.2, the curve looks like this: +// +// 1 +// | * +// | * +// | ** +// | *************** +// | ** +// | * +// |* +// |___________________ +// 0 1 +// +// +// With biasAmt = 0.8, the curve looks like this: +// +// 1 +// | ***** +// | *** +// | * +// | * +// | * +// | *** +// |***** +// |___________________ +// 0 1 +float Gain( float x, float biasAmt ); + + +// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave +// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for +// any fadein/fadeout effect where it should start and end smoothly. +// +// The curve looks like this: +// +// 1 +// | ** +// | * * +// | * * +// | * * +// | * * +// | ** ** +// |*** *** +// |___________________ +// 0 1 +// +float SmoothCurve( float x ); + + +// This works like SmoothCurve, with two changes: +// +// 1. Instead of the curve peaking at 0.5, it will peak at flPeakPos. +// (So if you specify flPeakPos=0.2, then the peak will slide to the left). +// +// 2. flPeakSharpness is a 0-1 value controlling the sharpness of the peak. +// Low values blunt the peak and high values sharpen the peak. +float SmoothCurve_Tweak( float x, float flPeakPos=0.5, float flPeakSharpness=0.5 ); + + +//float ExponentialDecay( float halflife, float dt ); +//float ExponentialDecay( float decayTo, float decayTime, float dt ); + +// halflife is time for value to reach 50% +inline float ExponentialDecay( float halflife, float dt ) +{ + // log(0.5) == -0.69314718055994530941723212145818 + return expf( -0.69314718f / halflife * dt); +} + +// decayTo is factor the value should decay to in decayTime +inline float ExponentialDecay( float decayTo, float decayTime, float dt ) +{ + return expf( logf( decayTo ) / decayTime * dt); +} + +// Get the integrated distanced traveled +// decayTo is factor the value should decay to in decayTime +// dt is the time relative to the last velocity update +inline float ExponentialDecayIntegral( float decayTo, float decayTime, float dt ) +{ + return (powf( decayTo, dt / decayTime) * decayTime - decayTime) / logf( decayTo ); +} + +// hermite basis function for smooth interpolation +// Similar to Gain() above, but very cheap to call +// value should be between 0 & 1 inclusive +inline float SimpleSpline( float value ) +{ + float valueSquared = value * value; + + // Nice little ease-in, ease-out spline-like curve + return (3 * valueSquared - 2 * valueSquared * value); +} + +// remaps a value in [startInterval, startInterval+rangeInterval] from linear to +// spline using SimpleSpline +inline float SimpleSplineRemapVal( float val, float A, float B, float C, float D) +{ + if ( A == B ) + return val >= B ? D : C; + float cVal = (val - A) / (B - A); + return C + (D - C) * SimpleSpline( cVal ); +} + +// remaps a value in [startInterval, startInterval+rangeInterval] from linear to +// spline using SimpleSpline +inline float SimpleSplineRemapValClamped( float val, float A, float B, float C, float D ) +{ + if ( A == B ) + return val >= B ? D : C; + float cVal = (val - A) / (B - A); + cVal = clamp( cVal, 0.0f, 1.0f ); + return C + (D - C) * SimpleSpline( cVal ); +} + +FORCEINLINE int RoundFloatToInt(float f) +{ +#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 ) + return _mm_cvtss_si32(_mm_load_ss(&f)); +#elif defined( _X360 ) +#ifdef Assert + Assert( IsFPUControlWordSet() ); +#endif + union + { + double flResult; + int pResult[2]; + }; + flResult = __fctiw( f ); + return pResult[1]; +#else +#error Unknown architecture +#endif +} + +FORCEINLINE unsigned char RoundFloatToByte(float f) +{ + int nResult = RoundFloatToInt(f); +#ifdef Assert + Assert( (nResult & ~0xFF) == 0 ); +#endif + return (unsigned char) nResult; +} + +FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f) +{ +#if defined( _X360 ) +#ifdef Assert + Assert( IsFPUControlWordSet() ); +#endif + union + { + double flResult; + int pIntResult[2]; + unsigned long pResult[2]; + }; + flResult = __fctiw( f ); + Assert( pIntResult[1] >= 0 ); + return pResult[1]; +#else // !X360 + +#if defined( PLATFORM_WINDOWS_PC64 ) + uint nRet = ( uint ) f; + if ( nRet & 1 ) + { + if ( ( f - floor( f ) >= 0.5 ) ) + { + nRet++; + } + } + else + { + if ( ( f - floor( f ) > 0.5 ) ) + { + nRet++; + } + } + return nRet; +#else // PLATFORM_WINDOWS_PC64 + unsigned char nResult[8]; + + #if defined( _WIN32 ) + __asm + { + fld f + fistp qword ptr nResult + } + #elif POSIX + __asm __volatile__ ( + "fistpl %0;": "=m" (nResult): "t" (f) : "st" + ); + #endif + + return *((unsigned long*)nResult); +#endif // PLATFORM_WINDOWS_PC64 +#endif // !X360 +} + +FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f ) +{ + return fabs( RoundFloatToInt( flValue ) - flValue ) < flTolerance; +} + +// Fast, accurate ftol: +FORCEINLINE int Float2Int( float a ) +{ +#if defined( _X360 ) + union + { + double flResult; + int pResult[2]; + }; + flResult = __fctiwz( a ); + return pResult[1]; +#else // !X360 + // Rely on compiler to generate CVTTSS2SI on x86 + return (int) a; +#endif +} + +// Over 15x faster than: (int)floor(value) +inline int Floor2Int( float a ) +{ + int RetVal; +#if defined( __i386__ ) + // Convert to int and back, compare, subtract one if too big + __m128 a128 = _mm_set_ss(a); + RetVal = _mm_cvtss_si32(a128); + __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal); + RetVal -= _mm_comigt_ss( rounded128, a128 ); +#else + RetVal = static_cast<int>( floor(a) ); +#endif + return RetVal; +} + +//----------------------------------------------------------------------------- +// Fast color conversion from float to unsigned char +//----------------------------------------------------------------------------- +FORCEINLINE unsigned int FastFToC( float c ) +{ +#if defined( __i386__ ) + // IEEE float bit manipulation works for values between [0, 1<<23) + union { float f; int i; } convert = { c*255.0f + (float)(1<<23) }; + return convert.i & 255; +#else + // consoles CPUs suffer from load-hit-store penalty + return Float2Int( c * 255.0f ); +#endif +} + +//----------------------------------------------------------------------------- +// Fast conversion from float to integer with magnitude less than 2**22 +//----------------------------------------------------------------------------- +FORCEINLINE int FastFloatToSmallInt( float c ) +{ +#if defined( __i386__ ) + // IEEE float bit manipulation works for values between [-1<<22, 1<<22) + union { float f; int i; } convert = { c + (float)(3<<22) }; + return (convert.i & ((1<<23)-1)) - (1<<22); +#else + // consoles CPUs suffer from load-hit-store penalty + return Float2Int( c ); +#endif +} + +//----------------------------------------------------------------------------- +// Purpose: Bound input float to .001 (millisecond) boundary +// Input : in - +// Output : inline float +//----------------------------------------------------------------------------- +inline float ClampToMsec( float in ) +{ + int msec = Floor2Int( in * 1000.0f + 0.5f ); + return 0.001f * msec; +} + +// Over 15x faster than: (int)ceil(value) +inline int Ceil2Int( float a ) +{ + int RetVal; +#if defined( __i386__ ) + // Convert to int and back, compare, add one if too small + __m128 a128 = _mm_load_ss(&a); + RetVal = _mm_cvtss_si32(a128); + __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal); + RetVal += _mm_comilt_ss( rounded128, a128 ); +#else + RetVal = static_cast<int>( ceil(a) ); +#endif + return RetVal; +} + + +// Regular signed area of triangle +#define TriArea2D( A, B, C ) \ + ( 0.5f * ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) ) + +// This version doesn't premultiply by 0.5f, so it's the area of the rectangle instead +#define TriArea2DTimesTwo( A, B, C ) \ + ( ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) ) + + +// Get the barycentric coordinates of "pt" in triangle [A,B,C]. +inline void GetBarycentricCoords2D( + Vector2D const &A, + Vector2D const &B, + Vector2D const &C, + Vector2D const &pt, + float bcCoords[3] ) +{ + // Note, because to top and bottom are both x2, the issue washes out in the composite + float invTriArea = 1.0f / TriArea2DTimesTwo( A, B, C ); + + // NOTE: We assume here that the lightmap coordinate vertices go counterclockwise. + // If not, TriArea2D() is negated so this works out right. + bcCoords[0] = TriArea2DTimesTwo( B, C, pt ) * invTriArea; + bcCoords[1] = TriArea2DTimesTwo( C, A, pt ) * invTriArea; + bcCoords[2] = TriArea2DTimesTwo( A, B, pt ) * invTriArea; +} + + +// Return true of the sphere might touch the box (the sphere is actually treated +// like a box itself, so this may return true if the sphere's bounding box touches +// a corner of the box but the sphere itself doesn't). +inline bool QuickBoxSphereTest( + const Vector& vOrigin, + float flRadius, + const Vector& bbMin, + const Vector& bbMax ) +{ + return vOrigin.x - flRadius < bbMax.x && vOrigin.x + flRadius > bbMin.x && + vOrigin.y - flRadius < bbMax.y && vOrigin.y + flRadius > bbMin.y && + vOrigin.z - flRadius < bbMax.z && vOrigin.z + flRadius > bbMin.z; +} + + +// Return true of the boxes intersect (but not if they just touch). +inline bool QuickBoxIntersectTest( + const Vector& vBox1Min, + const Vector& vBox1Max, + const Vector& vBox2Min, + const Vector& vBox2Max ) +{ + return + vBox1Min.x < vBox2Max.x && vBox1Max.x > vBox2Min.x && + vBox1Min.y < vBox2Max.y && vBox1Max.y > vBox2Min.y && + vBox1Min.z < vBox2Max.z && vBox1Max.z > vBox2Min.z; +} + + +extern float GammaToLinearFullRange( float gamma ); +extern float LinearToGammaFullRange( float linear ); +extern float GammaToLinear( float gamma ); +extern float LinearToGamma( float linear ); + +extern float SrgbGammaToLinear( float flSrgbGammaValue ); +extern float SrgbLinearToGamma( float flLinearValue ); +extern float X360GammaToLinear( float fl360GammaValue ); +extern float X360LinearToGamma( float flLinearValue ); +extern float SrgbGammaTo360Gamma( float flSrgbGammaValue ); + +// linear (0..4) to screen corrected vertex space (0..1?) +FORCEINLINE float LinearToVertexLight( float f ) +{ + extern float lineartovertex[4096]; + + // Gotta clamp before the multiply; could overflow... + // assume 0..4 range + int i = RoundFloatToInt( f * 1024.f ); + + // Presumably the comman case will be not to clamp, so check that first: + if( (unsigned)i > 4095 ) + { + if ( i < 0 ) + i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream + else + i = 4095; + } + + return lineartovertex[i]; +} + + +FORCEINLINE unsigned char LinearToLightmap( float f ) +{ + extern unsigned char lineartolightmap[4096]; + + // Gotta clamp before the multiply; could overflow... + int i = RoundFloatToInt( f * 1024.f ); // assume 0..4 range + + // Presumably the comman case will be not to clamp, so check that first: + if ( (unsigned)i > 4095 ) + { + if ( i < 0 ) + i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream + else + i = 4095; + } + + return lineartolightmap[i]; +} + +FORCEINLINE void ColorClamp( Vector& color ) +{ + float maxc = max( color.x, max( color.y, color.z ) ); + if ( maxc > 1.0f ) + { + float ooMax = 1.0f / maxc; + color.x *= ooMax; + color.y *= ooMax; + color.z *= ooMax; + } + + if ( color[0] < 0.f ) color[0] = 0.f; + if ( color[1] < 0.f ) color[1] = 0.f; + if ( color[2] < 0.f ) color[2] = 0.f; +} + +inline void ColorClampTruncate( Vector& color ) +{ + if (color[0] > 1.0f) color[0] = 1.0f; else if (color[0] < 0.0f) color[0] = 0.0f; + if (color[1] > 1.0f) color[1] = 1.0f; else if (color[1] < 0.0f) color[1] = 0.0f; + if (color[2] > 1.0f) color[2] = 1.0f; else if (color[2] < 0.0f) color[2] = 0.0f; +} + +// Interpolate a Catmull-Rom spline. +// t is a [0,1] value and interpolates a curve between p2 and p3. +void Catmull_Rom_Spline( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector &output ); + +// Interpolate a Catmull-Rom spline. +// Returns the tangent of the point at t of the spline +void Catmull_Rom_Spline_Tangent( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector &output ); + +// area under the curve [0..t] +void Catmull_Rom_Spline_Integral( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// area under the curve [0..1] +void Catmull_Rom_Spline_Integral( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + Vector& output ); + +// Interpolate a Catmull-Rom spline. +// Normalize p2->p1 and p3->p4 to be the same length as p2->p3 +void Catmull_Rom_Spline_Normalize( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector &output ); + +// area under the curve [0..t] +// Normalize p2->p1 and p3->p4 to be the same length as p2->p3 +void Catmull_Rom_Spline_Integral_Normalize( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// Interpolate a Catmull-Rom spline. +// Normalize p2.x->p1.x and p3.x->p4.x to be the same length as p2.x->p3.x +void Catmull_Rom_Spline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector &output ); + +// area under the curve [0..t] +void Catmull_Rom_Spline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// Interpolate a Hermite spline. +// t is a [0,1] value and interpolates a curve between p1 and p2 with the deltas d1 and d2. +void Hermite_Spline( + const Vector &p1, + const Vector &p2, + const Vector &d1, + const Vector &d2, + float t, + Vector& output ); + +float Hermite_Spline( + float p1, + float p2, + float d1, + float d2, + float t ); + +// t is a [0,1] value and interpolates a curve between p1 and p2 with the slopes p0->p1 and p1->p2 +void Hermite_Spline( + const Vector &p0, + const Vector &p1, + const Vector &p2, + float t, + Vector& output ); + +float Hermite_Spline( + float p0, + float p1, + float p2, + float t ); + + +void Hermite_SplineBasis( float t, float basis[] ); + +void Hermite_Spline( + const Quaternion &q0, + const Quaternion &q1, + const Quaternion &q2, + float t, + Quaternion &output ); + + +// See http://en.wikipedia.org/wiki/Kochanek-Bartels_curves +// +// Tension: -1 = Round -> 1 = Tight +// Bias: -1 = Pre-shoot (bias left) -> 1 = Post-shoot (bias right) +// Continuity: -1 = Box corners -> 1 = Inverted corners +// +// If T=B=C=0 it's the same matrix as Catmull-Rom. +// If T=1 & B=C=0 it's the same as Cubic. +// If T=B=0 & C=-1 it's just linear interpolation +// +// See http://news.povray.org/povray.binaries.tutorials/attachment/%[email protected]%3E/Splines.bas.txt +// for example code and descriptions of various spline types... +// +void Kochanek_Bartels_Spline( + float tension, + float bias, + float continuity, + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +void Kochanek_Bartels_Spline_NormalizeX( + float tension, + float bias, + float continuity, + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// See link at Kochanek_Bartels_Spline for info on the basis matrix used +void Cubic_Spline( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +void Cubic_Spline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// See link at Kochanek_Bartels_Spline for info on the basis matrix used +void BSpline( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +void BSpline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// See link at Kochanek_Bartels_Spline for info on the basis matrix used +void Parabolic_Spline( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +void Parabolic_Spline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// quintic interpolating polynomial from Perlin. +// 0->0, 1->1, smooth-in between with smooth tangents +FORCEINLINE float QuinticInterpolatingPolynomial(float t) +{ + // 6t^5-15t^4+10t^3 + return t * t * t *( t * ( t* 6.0 - 15.0 ) + 10.0 ); +} + +// given a table of sorted tabulated positions, return the two indices and blendfactor to linear +// interpolate. Does a search. Can be used to find the blend value to interpolate between +// keyframes. +void GetInterpolationData( float const *pKnotPositions, + float const *pKnotValues, + int nNumValuesinList, + int nInterpolationRange, + float flPositionToInterpolateAt, + bool bWrap, + float *pValueA, + float *pValueB, + float *pInterpolationValue); + +float RangeCompressor( float flValue, float flMin, float flMax, float flBase ); + +// Get the minimum distance from vOrigin to the bounding box defined by [mins,maxs] +// using voronoi regions. +// 0 is returned if the origin is inside the box. +float CalcSqrDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point ); +void CalcClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut ); +void CalcSqrDistAndClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut, float &distSqrOut ); + +inline float CalcDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point ) +{ + float flDistSqr = CalcSqrDistanceToAABB( mins, maxs, point ); + return sqrt(flDistSqr); +} + +// Get the closest point from P to the (infinite) line through vLineA and vLineB and +// calculate the shortest distance from P to the line. +// If you pass in a value for t, it will tell you the t for (A + (B-A)t) to get the closest point. +// If the closest point lies on the segment between A and B, then 0 <= t <= 1. +void CalcClosestPointOnLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 ); +float CalcDistanceToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); +float CalcDistanceSqrToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); + +// The same three functions as above, except now the line is closed between A and B. +void CalcClosestPointOnLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 ); +float CalcDistanceToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); +float CalcDistanceSqrToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); + +// A function to compute the closes line segment connnection two lines (or false if the lines are parallel, etc.) +bool CalcLineToLineIntersectionSegment( + const Vector& p1,const Vector& p2,const Vector& p3,const Vector& p4,Vector *s1,Vector *s2, + float *t1, float *t2 ); + +// The above functions in 2D +void CalcClosestPointOnLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 ); +float CalcDistanceToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); +float CalcDistanceSqrToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); +void CalcClosestPointOnLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 ); +float CalcDistanceToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); +float CalcDistanceSqrToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); + +// Init the mathlib +void MathLib_Init( float gamma = 2.2f, float texGamma = 2.2f, float brightness = 0.0f, int overbright = 2.0f, bool bAllow3DNow = true, bool bAllowSSE = true, bool bAllowSSE2 = true, bool bAllowMMX = true ); +bool MathLib_3DNowEnabled( void ); +bool MathLib_MMXEnabled( void ); +bool MathLib_SSEEnabled( void ); +bool MathLib_SSE2Enabled( void ); + +float Approach( float target, float value, float speed ); +float ApproachAngle( float target, float value, float speed ); +float AngleDiff( float destAngle, float srcAngle ); +float AngleDistance( float next, float cur ); +float AngleNormalize( float angle ); + +// ensure that 0 <= angle <= 360 +float AngleNormalizePositive( float angle ); + +bool AnglesAreEqual( float a, float b, float tolerance = 0.0f ); + + +void RotationDeltaAxisAngle( const QAngle &srcAngles, const QAngle &destAngles, Vector &deltaAxis, float &deltaAngle ); +void RotationDelta( const QAngle &srcAngles, const QAngle &destAngles, QAngle *out ); + +void ComputeTrianglePlane( const Vector& v1, const Vector& v2, const Vector& v3, Vector& normal, float& intercept ); +int PolyFromPlane( Vector *outVerts, const Vector& normal, float dist, float fHalfScale = 9000.0f ); +int ClipPolyToPlane( Vector *inVerts, int vertCount, Vector *outVerts, const Vector& normal, float dist, float fOnPlaneEpsilon = 0.1f ); +int ClipPolyToPlane_Precise( double *inVerts, int vertCount, double *outVerts, const double *normal, double dist, double fOnPlaneEpsilon = 0.1 ); + +//----------------------------------------------------------------------------- +// Computes a reasonable tangent space for a triangle +//----------------------------------------------------------------------------- +void CalcTriangleTangentSpace( const Vector &p0, const Vector &p1, const Vector &p2, + const Vector2D &t0, const Vector2D &t1, const Vector2D& t2, + Vector &sVect, Vector &tVect ); + +//----------------------------------------------------------------------------- +// Transforms a AABB into another space; which will inherently grow the box. +//----------------------------------------------------------------------------- +void TransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); + +//----------------------------------------------------------------------------- +// Uses the inverse transform of in1 +//----------------------------------------------------------------------------- +void ITransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); + +//----------------------------------------------------------------------------- +// Rotates a AABB into another space; which will inherently grow the box. +// (same as TransformAABB, but doesn't take the translation into account) +//----------------------------------------------------------------------------- +void RotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); + +//----------------------------------------------------------------------------- +// Uses the inverse transform of in1 +//----------------------------------------------------------------------------- +void IRotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); + +//----------------------------------------------------------------------------- +// Transform a plane +//----------------------------------------------------------------------------- +inline void MatrixTransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane ) +{ + // What we want to do is the following: + // 1) transform the normal into the new space. + // 2) Determine a point on the old plane given by plane dist * plane normal + // 3) Transform that point into the new space + // 4) Plane dist = DotProduct( new normal, new point ) + + // An optimized version, which works if the plane is orthogonal. + // 1) Transform the normal into the new space + // 2) Realize that transforming the old plane point into the new space + // is given by [ d * n'x + Tx, d * n'y + Ty, d * n'z + Tz ] + // where d = old plane dist, n' = transformed normal, Tn = translational component of transform + // 3) Compute the new plane dist using the dot product of the normal result of #2 + + // For a correct result, this should be an inverse-transpose matrix + // but that only matters if there are nonuniform scale or skew factors in this matrix. + VectorRotate( inPlane.normal, src, outPlane.normal ); + outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal ); + outPlane.dist += outPlane.normal.x * src[0][3] + outPlane.normal.y * src[1][3] + outPlane.normal.z * src[2][3]; +} + +inline void MatrixITransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane ) +{ + // The trick here is that Tn = translational component of transform, + // but for an inverse transform, Tn = - R^-1 * T + Vector vecTranslation; + MatrixGetColumn( src, 3, vecTranslation ); + + Vector vecInvTranslation; + VectorIRotate( vecTranslation, src, vecInvTranslation ); + + VectorIRotate( inPlane.normal, src, outPlane.normal ); + outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal ); + outPlane.dist -= outPlane.normal.x * vecInvTranslation[0] + outPlane.normal.y * vecInvTranslation[1] + outPlane.normal.z * vecInvTranslation[2]; +} + +int CeilPow2( int in ); +int FloorPow2( int in ); + +FORCEINLINE float * UnpackNormal_HEND3N( const unsigned int *pPackedNormal, float *pNormal ) +{ + int temp[3]; + temp[0] = ((*pPackedNormal >> 0L) & 0x7ff); + if ( temp[0] & 0x400 ) + { + temp[0] = 2048 - temp[0]; + } + temp[1] = ((*pPackedNormal >> 11L) & 0x7ff); + if ( temp[1] & 0x400 ) + { + temp[1] = 2048 - temp[1]; + } + temp[2] = ((*pPackedNormal >> 22L) & 0x3ff); + if ( temp[2] & 0x200 ) + { + temp[2] = 1024 - temp[2]; + } + pNormal[0] = (float)temp[0] * 1.0f/1023.0f; + pNormal[1] = (float)temp[1] * 1.0f/1023.0f; + pNormal[2] = (float)temp[2] * 1.0f/511.0f; + return pNormal; +} + +FORCEINLINE unsigned int * PackNormal_HEND3N( const float *pNormal, unsigned int *pPackedNormal ) +{ + int temp[3]; + + temp[0] = Float2Int( pNormal[0] * 1023.0f ); + temp[1] = Float2Int( pNormal[1] * 1023.0f ); + temp[2] = Float2Int( pNormal[2] * 511.0f ); + + // the normal is out of bounds, determine the source and fix + // clamping would be even more of a slowdown here + Assert( temp[0] >= -1023 && temp[0] <= 1023 ); + Assert( temp[1] >= -1023 && temp[1] <= 1023 ); + Assert( temp[2] >= -511 && temp[2] <= 511 ); + + *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) | + ( ( temp[1] & 0x7ff ) << 11L ) | + ( ( temp[0] & 0x7ff ) << 0L ); + return pPackedNormal; +} + +FORCEINLINE unsigned int * PackNormal_HEND3N( float nx, float ny, float nz, unsigned int *pPackedNormal ) +{ + int temp[3]; + + temp[0] = Float2Int( nx * 1023.0f ); + temp[1] = Float2Int( ny * 1023.0f ); + temp[2] = Float2Int( nz * 511.0f ); + + // the normal is out of bounds, determine the source and fix + // clamping would be even more of a slowdown here + Assert( temp[0] >= -1023 && temp[0] <= 1023 ); + Assert( temp[1] >= -1023 && temp[1] <= 1023 ); + Assert( temp[2] >= -511 && temp[2] <= 511 ); + + *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) | + ( ( temp[1] & 0x7ff ) << 11L ) | + ( ( temp[0] & 0x7ff ) << 0L ); + return pPackedNormal; +} + +FORCEINLINE float * UnpackNormal_SHORT2( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE ) +{ + // Unpacks from Jason's 2-short format (fills in a 4th binormal-sign (+1/-1) value, if this is a tangent vector) + + // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits) + short iX = (*pPackedNormal & 0x0000FFFF); + short iY = (*pPackedNormal & 0xFFFF0000) >> 16; + + float zSign = +1; + if ( iX < 0 ) + { + zSign = -1; + iX = -iX; + } + float tSign = +1; + if ( iY < 0 ) + { + tSign = -1; + iY = -iY; + } + + pNormal[0] = ( iX - 16384.0f ) / 16384.0f; + pNormal[1] = ( iY - 16384.0f ) / 16384.0f; + pNormal[2] = zSign*sqrtf( 1.0f - ( pNormal[0]*pNormal[0] + pNormal[1]*pNormal[1] ) ); + if ( bIsTangent ) + { + pNormal[3] = tSign; + } + + return pNormal; +} + +FORCEINLINE unsigned int * PackNormal_SHORT2( float nx, float ny, float nz, unsigned int *pPackedNormal, float binormalSign = +1.0f ) +{ + // Pack a vector (ASSUMED TO BE NORMALIZED) into Jason's 4-byte (SHORT2) format. + // This simply reconstructs Z from X & Y. It uses the sign bits of the X & Y coords + // to reconstruct the sign of Z and, if this is a tangent vector, the sign of the + // binormal (this is needed because tangent/binormal vectors are supposed to follow + // UV gradients, but shaders reconstruct the binormal from the tangent and normal + // assuming that they form a right-handed basis). + + nx += 1; // [-1,+1] -> [0,2] + ny += 1; + nx *= 16384.0f; // [ 0, 2] -> [0,32768] + ny *= 16384.0f; + + // '0' and '32768' values are invalid encodings + nx = max( nx, 1.0f ); // Make sure there are no zero values + ny = max( ny, 1.0f ); + nx = min( nx, 32767.0f ); // Make sure there are no 32768 values + ny = min( ny, 32767.0f ); + + if ( nz < 0.0f ) + nx = -nx; // Set the sign bit for z + + ny *= binormalSign; // Set the sign bit for the binormal (use when encoding a tangent vector) + + // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits), also use Float2Int() + short sX = (short)nx; // signed short [1,32767] + short sY = (short)ny; + + *pPackedNormal = ( sX & 0x0000FFFF ) | ( sY << 16 ); // NOTE: The mask is necessary (if sX is negative and cast to an int...) + + return pPackedNormal; +} + +FORCEINLINE unsigned int * PackNormal_SHORT2( const float *pNormal, unsigned int *pPackedNormal, float binormalSign = +1.0f ) +{ + return PackNormal_SHORT2( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, binormalSign ); +} + +// Unpacks a UBYTE4 normal (for a tangent, the result's fourth component receives the binormal 'sign') +FORCEINLINE float * UnpackNormal_UBYTE4( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE ) +{ + unsigned char cX, cY; + if ( bIsTangent ) + { + cX = *pPackedNormal >> 16; // Unpack Z + cY = *pPackedNormal >> 24; // Unpack W + } + else + { + cX = *pPackedNormal >> 0; // Unpack X + cY = *pPackedNormal >> 8; // Unpack Y + } + + float x = cX - 128.0f; + float y = cY - 128.0f; + float z; + + float zSignBit = x < 0 ? 1.0f : 0.0f; // z and t negative bits (like slt asm instruction) + float tSignBit = y < 0 ? 1.0f : 0.0f; + float zSign = -( 2*zSignBit - 1 ); // z and t signs + float tSign = -( 2*tSignBit - 1 ); + + x = x*zSign - zSignBit; // 0..127 + y = y*tSign - tSignBit; + x = x - 64; // -64..63 + y = y - 64; + + float xSignBit = x < 0 ? 1.0f : 0.0f; // x and y negative bits (like slt asm instruction) + float ySignBit = y < 0 ? 1.0f : 0.0f; + float xSign = -( 2*xSignBit - 1 ); // x and y signs + float ySign = -( 2*ySignBit - 1 ); + + x = ( x*xSign - xSignBit ) / 63.0f; // 0..1 range + y = ( y*ySign - ySignBit ) / 63.0f; + z = 1.0f - x - y; + + float oolen = 1.0f / sqrt( x*x + y*y + z*z ); // Normalize and + x *= oolen * xSign; // Recover signs + y *= oolen * ySign; + z *= oolen * zSign; + + pNormal[0] = x; + pNormal[1] = y; + pNormal[2] = z; + if ( bIsTangent ) + { + pNormal[3] = tSign; + } + + return pNormal; +} + +////////////////////////////////////////////////////////////////////////////// +// See: http://www.oroboro.com/rafael/docserv.php/index/programming/article/unitv2 +// +// UBYTE4 encoding, using per-octant projection onto x+y+z=1 +// Assume input vector is already unit length +// +// binormalSign specifies 'sign' of binormal, stored in t sign bit of tangent +// (lets the shader know whether norm/tan/bin form a right-handed basis) +// +// bIsTangent is used to specify which WORD of the output to store the data +// The expected usage is to call once with the normal and once with +// the tangent and binormal sign flag, bitwise OR'ing the returned DWORDs +FORCEINLINE unsigned int * PackNormal_UBYTE4( float nx, float ny, float nz, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f ) +{ + float xSign = nx < 0.0f ? -1.0f : 1.0f; // -1 or 1 sign + float ySign = ny < 0.0f ? -1.0f : 1.0f; + float zSign = nz < 0.0f ? -1.0f : 1.0f; + float tSign = binormalSign; + Assert( ( binormalSign == +1.0f ) || ( binormalSign == -1.0f ) ); + + float xSignBit = 0.5f*( 1 - xSign ); // [-1,+1] -> [1,0] + float ySignBit = 0.5f*( 1 - ySign ); // 1 is negative bit (like slt instruction) + float zSignBit = 0.5f*( 1 - zSign ); + float tSignBit = 0.5f*( 1 - binormalSign ); + + float absX = xSign*nx; // 0..1 range (abs) + float absY = ySign*ny; + float absZ = zSign*nz; + + float xbits = absX / ( absX + absY + absZ ); // Project onto x+y+z=1 plane + float ybits = absY / ( absX + absY + absZ ); + + xbits *= 63; // 0..63 + ybits *= 63; + + xbits = xbits * xSign - xSignBit; // -64..63 range + ybits = ybits * ySign - ySignBit; + xbits += 64.0f; // 0..127 range + ybits += 64.0f; + + xbits = xbits * zSign - zSignBit; // Negate based on z and t + ybits = ybits * tSign - tSignBit; // -128..127 range + + xbits += 128.0f; // 0..255 range + ybits += 128.0f; + + unsigned char cX = (unsigned char) xbits; + unsigned char cY = (unsigned char) ybits; + + if ( !bIsTangent ) + *pPackedNormal = (cX << 0) | (cY << 8); // xy for normal + else + *pPackedNormal = (cX << 16) | (cY << 24); // zw for tangent + + return pPackedNormal; +} + +FORCEINLINE unsigned int * PackNormal_UBYTE4( const float *pNormal, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f ) +{ + return PackNormal_UBYTE4( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, bIsTangent, binormalSign ); +} + + +//----------------------------------------------------------------------------- +// Convert RGB to HSV +//----------------------------------------------------------------------------- +void RGBtoHSV( const Vector &rgb, Vector &hsv ); + + +//----------------------------------------------------------------------------- +// Convert HSV to RGB +//----------------------------------------------------------------------------- +void HSVtoRGB( const Vector &hsv, Vector &rgb ); + + +//----------------------------------------------------------------------------- +// Fast version of pow and log +//----------------------------------------------------------------------------- + +float FastLog2(float i); // log2( i ) +float FastPow2(float i); // 2^i +float FastPow(float a, float b); // a^b +float FastPow10( float i ); // 10^i + +//----------------------------------------------------------------------------- +// For testing float equality +//----------------------------------------------------------------------------- + +inline bool CloseEnough( float a, float b, float epsilon = EQUAL_EPSILON ) +{ + return fabs( a - b ) <= epsilon; +} + +inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL_EPSILON ) +{ + return fabs( a.x - b.x ) <= epsilon && + fabs( a.y - b.y ) <= epsilon && + fabs( a.z - b.z ) <= epsilon; +} + +// Fast compare +// maxUlps is the maximum error in terms of Units in the Last Place. This +// specifies how big an error we are willing to accept in terms of the value +// of the least significant digit of the floating point number�s +// representation. maxUlps can also be interpreted in terms of how many +// representable floats we are willing to accept between A and B. +// This function will allow maxUlps-1 floats between A and B. +bool AlmostEqual(float a, float b, int maxUlps = 10); + +inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10) +{ + return AlmostEqual( a.x, b.x, maxUlps ) && + AlmostEqual( a.y, b.y, maxUlps ) && + AlmostEqual( a.z, b.z, maxUlps ); +} + + +#endif // MATH_BASE_H + diff --git a/mp/src/public/mathlib/matrixmath.h b/mp/src/public/mathlib/matrixmath.h index 40de0c02..9c7f207b 100644 --- a/mp/src/public/mathlib/matrixmath.h +++ b/mp/src/public/mathlib/matrixmath.h @@ -1,385 +1,385 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// A set of generic, template-based matrix functions.
-//===========================================================================//
-
-#ifndef MATRIXMATH_H
-#define MATRIXMATH_H
-
-#include <stdarg.h>
-
-// The operations in this file can perform basic matrix operations on matrices represented
-// using any class that supports the necessary operations:
-//
-// .Element( row, col ) - return the element at a given matrox position
-// .SetElement( row, col, val ) - modify an element
-// .Width(), .Height() - get dimensions
-// .SetDimensions( nrows, ncols) - set a matrix to be un-initted and the appropriate size
-//
-// Generally, vectors can be used with these functions by using N x 1 matrices to represent them.
-// Matrices are addressed as row, column, and indices are 0-based
-//
-//
-// Note that the template versions of these routines are defined for generality - it is expected
-// that template specialization is used for common high performance cases.
-
-namespace MatrixMath
-{
- /// M *= flScaleValue
- template<class MATRIXCLASS>
- void ScaleMatrix( MATRIXCLASS &matrix, float flScaleValue )
- {
- for( int i = 0; i < matrix.Height(); i++ )
- {
- for( int j = 0; j < matrix.Width(); j++ )
- {
- matrix.SetElement( i, j, flScaleValue * matrix.Element( i, j ) );
- }
- }
- }
-
- /// AppendElementToMatrix - same as setting the element, except only works when all calls
- /// happen in top to bottom left to right order, end you have to call FinishedAppending when
- /// done. For normal matrix classes this is not different then SetElement, but for
- /// CSparseMatrix, it is an accelerated way to fill a matrix from scratch.
- template<class MATRIXCLASS>
- FORCEINLINE void AppendElement( MATRIXCLASS &matrix, int nRow, int nCol, float flValue )
- {
- matrix.SetElement( nRow, nCol, flValue ); // default implementation
- }
-
- template<class MATRIXCLASS>
- FORCEINLINE void FinishedAppending( MATRIXCLASS &matrix ) {} // default implementation
-
- /// M += fl
- template<class MATRIXCLASS>
- void AddToMatrix( MATRIXCLASS &matrix, float flAddend )
- {
- for( int i = 0; i < matrix.Height(); i++ )
- {
- for( int j = 0; j < matrix.Width(); j++ )
- {
- matrix.SetElement( i, j, flAddend + matrix.Element( i, j ) );
- }
- }
- }
-
- /// transpose
- template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
- void TransposeMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
- {
- pMatrixOut->SetDimensions( matrixIn.Width(), matrixIn.Height() );
- for( int i = 0; i < pMatrixOut->Height(); i++ )
- {
- for( int j = 0; j < pMatrixOut->Width(); j++ )
- {
- AppendElement( *pMatrixOut, i, j, matrixIn.Element( j, i ) );
- }
- }
- FinishedAppending( *pMatrixOut );
- }
-
- /// copy
- template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
- void CopyMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
- {
- pMatrixOut->SetDimensions( matrixIn.Height(), matrixIn.Width() );
- for( int i = 0; i < matrixIn.Height(); i++ )
- {
- for( int j = 0; j < matrixIn.Width(); j++ )
- {
- AppendElement( *pMatrixOut, i, j, matrixIn.Element( i, j ) );
- }
- }
- FinishedAppending( *pMatrixOut );
- }
-
-
-
- /// M+=M
- template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
- void AddMatrixToMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
- {
- for( int i = 0; i < matrixIn.Height(); i++ )
- {
- for( int j = 0; j < matrixIn.Width(); j++ )
- {
- pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + matrixIn.Element( i, j ) );
- }
- }
- }
-
- // M += scale * M
- template<class MATRIXCLASSIN, class MATRIXCLASSOUT>
- void AddScaledMatrixToMatrix( float flScale, MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut )
- {
- for( int i = 0; i < matrixIn.Height(); i++ )
- {
- for( int j = 0; j < matrixIn.Width(); j++ )
- {
- pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + flScale * matrixIn.Element( i, j ) );
- }
- }
- }
-
-
- // simple way to initialize a matrix with constants from code.
- template<class MATRIXCLASSOUT>
- void SetMatrixToIdentity( MATRIXCLASSOUT *pMatrixOut, float flDiagonalValue = 1.0 )
- {
- for( int i = 0; i < pMatrixOut->Height(); i++ )
- {
- for( int j = 0; j < pMatrixOut->Width(); j++ )
- {
- AppendElement( *pMatrixOut, i, j, ( i == j ) ? flDiagonalValue : 0 );
- }
- }
- FinishedAppending( *pMatrixOut );
- }
-
- //// simple way to initialize a matrix with constants from code
- template<class MATRIXCLASSOUT>
- void SetMatrixValues( MATRIXCLASSOUT *pMatrix, int nRows, int nCols, ... )
- {
- va_list argPtr;
- va_start( argPtr, nCols );
-
- pMatrix->SetDimensions( nRows, nCols );
- for( int nRow = 0; nRow < nRows; nRow++ )
- {
- for( int nCol = 0; nCol < nCols; nCol++ )
- {
- double flNewValue = va_arg( argPtr, double );
- pMatrix->SetElement( nRow, nCol, flNewValue );
- }
- }
- va_end( argPtr );
- }
-
-
- /// row and colum accessors. treat a row or a column as a column vector
- template<class MATRIXTYPE> class MatrixRowAccessor
- {
- public:
- FORCEINLINE MatrixRowAccessor( MATRIXTYPE const &matrix, int nRow )
- {
- m_pMatrix = &matrix;
- m_nRow = nRow;
- }
-
- FORCEINLINE float Element( int nRow, int nCol ) const
- {
- Assert( nCol == 0 );
- return m_pMatrix->Element( m_nRow, nRow );
- }
-
- FORCEINLINE int Width( void ) const { return 1; };
- FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
-
- private:
- MATRIXTYPE const *m_pMatrix;
- int m_nRow;
- };
-
- template<class MATRIXTYPE> class MatrixColumnAccessor
- {
- public:
- FORCEINLINE MatrixColumnAccessor( MATRIXTYPE const &matrix, int nColumn )
- {
- m_pMatrix = &matrix;
- m_nColumn = nColumn;
- }
-
- FORCEINLINE float Element( int nRow, int nColumn ) const
- {
- Assert( nColumn == 0 );
- return m_pMatrix->Element( nRow, m_nColumn );
- }
-
- FORCEINLINE int Width( void ) const { return 1; }
- FORCEINLINE int Height( void ) const { return m_pMatrix->Height(); }
- private:
- MATRIXTYPE const *m_pMatrix;
- int m_nColumn;
- };
-
- /// this translator acts as a proxy for the transposed matrix
- template<class MATRIXTYPE> class MatrixTransposeAccessor
- {
- public:
- FORCEINLINE MatrixTransposeAccessor( MATRIXTYPE const & matrix )
- {
- m_pMatrix = &matrix;
- }
-
- FORCEINLINE float Element( int nRow, int nColumn ) const
- {
- return m_pMatrix->Element( nColumn, nRow );
- }
-
- FORCEINLINE int Width( void ) const { return m_pMatrix->Height(); }
- FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); }
- private:
- MATRIXTYPE const *m_pMatrix;
- };
-
- /// this tranpose returns a wrapper around it's argument, allowing things like AddMatrixToMatrix( Transpose( matA ), &matB ) without an extra copy
- template<class MATRIXCLASSIN>
- MatrixTransposeAccessor<MATRIXCLASSIN> TransposeMatrix( MATRIXCLASSIN const &matrixIn )
- {
- return MatrixTransposeAccessor<MATRIXCLASSIN>( matrixIn );
- }
-
-
- /// retrieve rows and columns
- template<class MATRIXTYPE>
- FORCEINLINE MatrixColumnAccessor<MATRIXTYPE> MatrixColumn( MATRIXTYPE const &matrix, int nColumn )
- {
- return MatrixColumnAccessor<MATRIXTYPE>( matrix, nColumn );
- }
-
- template<class MATRIXTYPE>
- FORCEINLINE MatrixRowAccessor<MATRIXTYPE> MatrixRow( MATRIXTYPE const &matrix, int nRow )
- {
- return MatrixRowAccessor<MATRIXTYPE>( matrix, nRow );
- }
-
- //// dot product between vectors (or rows and/or columns via accessors)
- template<class MATRIXACCESSORATYPE, class MATRIXACCESSORBTYPE >
- float InnerProduct( MATRIXACCESSORATYPE const &vecA, MATRIXACCESSORBTYPE const &vecB )
- {
- Assert( vecA.Width() == 1 );
- Assert( vecB.Width() == 1 );
- Assert( vecA.Height() == vecB.Height() );
- double flResult = 0;
- for( int i = 0; i < vecA.Height(); i++ )
- {
- flResult += vecA.Element( i, 0 ) * vecB.Element( i, 0 );
- }
- return flResult;
- }
-
-
-
- /// matrix x matrix multiplication
- template<class MATRIXATYPE, class MATRIXBTYPE, class MATRIXOUTTYPE>
- void MatrixMultiply( MATRIXATYPE const &matA, MATRIXBTYPE const &matB, MATRIXOUTTYPE *pMatrixOut )
- {
- Assert( matA.Width() == matB.Height() );
- pMatrixOut->SetDimensions( matA.Height(), matB.Width() );
- for( int i = 0; i < matA.Height(); i++ )
- {
- for( int j = 0; j < matB.Width(); j++ )
- {
- pMatrixOut->SetElement( i, j, InnerProduct( MatrixRow( matA, i ), MatrixColumn( matB, j ) ) );
- }
- }
- }
-
- /// solve Ax=B via the conjugate graident method. Code and naming conventions based on the
- /// wikipedia article.
- template<class ATYPE, class XTYPE, class BTYPE>
- void ConjugateGradient( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
- {
- XTYPE vecR;
- vecR.SetDimensions( vecX.Height(), 1 );
- MatrixMultiply( matA, vecX, &vecR );
- ScaleMatrix( vecR, -1 );
- AddMatrixToMatrix( vecB, &vecR );
- XTYPE vecP;
- CopyMatrix( vecR, &vecP );
- float flRsOld = InnerProduct( vecR, vecR );
- for( int nIter = 0; nIter < 100; nIter++ )
- {
- XTYPE vecAp;
- MatrixMultiply( matA, vecP, &vecAp );
- float flDivisor = InnerProduct( vecAp, vecP );
- float flAlpha = flRsOld / flDivisor;
- AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
- AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
- float flRsNew = InnerProduct( vecR, vecR );
- if ( flRsNew < flTolerance )
- {
- break;
- }
- ScaleMatrix( vecP, flRsNew / flRsOld );
- AddMatrixToMatrix( vecR, &vecP );
- flRsOld = flRsNew;
- }
- }
-
- /// solve (A'*A) x=B via the conjugate gradient method. Code and naming conventions based on
- /// the wikipedia article. Same as Conjugate gradient but allows passing in two matrices whose
- /// product is used as the A matrix (in order to preserve sparsity)
- template<class ATYPE, class APRIMETYPE, class XTYPE, class BTYPE>
- void ConjugateGradient( ATYPE const &matA, APRIMETYPE const &matAPrime, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 )
- {
- XTYPE vecR1;
- vecR1.SetDimensions( vecX.Height(), 1 );
- MatrixMultiply( matA, vecX, &vecR1 );
- XTYPE vecR;
- vecR.SetDimensions( vecR1.Height(), 1 );
- MatrixMultiply( matAPrime, vecR1, &vecR );
- ScaleMatrix( vecR, -1 );
- AddMatrixToMatrix( vecB, &vecR );
- XTYPE vecP;
- CopyMatrix( vecR, &vecP );
- float flRsOld = InnerProduct( vecR, vecR );
- for( int nIter = 0; nIter < 100; nIter++ )
- {
- XTYPE vecAp1;
- MatrixMultiply( matA, vecP, &vecAp1 );
- XTYPE vecAp;
- MatrixMultiply( matAPrime, vecAp1, &vecAp );
- float flDivisor = InnerProduct( vecAp, vecP );
- float flAlpha = flRsOld / flDivisor;
- AddScaledMatrixToMatrix( flAlpha, vecP, &vecX );
- AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR );
- float flRsNew = InnerProduct( vecR, vecR );
- if ( flRsNew < flTolerance )
- {
- break;
- }
- ScaleMatrix( vecP, flRsNew / flRsOld );
- AddMatrixToMatrix( vecR, &vecP );
- flRsOld = flRsNew;
- }
- }
-
-
- template<class ATYPE, class XTYPE, class BTYPE>
- void LeastSquaresFit( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX )
- {
- // now, generate the normal equations
- BTYPE vecBeta;
- MatrixMath::MatrixMultiply( MatrixMath::TransposeMatrix( matA ), vecB, &vecBeta );
-
- vecX.SetDimensions( matA.Width(), 1 );
- MatrixMath::SetMatrixToIdentity( &vecX );
-
- ATYPE matATransposed;
- TransposeMatrix( matA, &matATransposed );
- ConjugateGradient( matA, matATransposed, vecBeta, vecX, 1.0e-20 );
- }
-
-};
-
-/// a simple fixed-size matrix class
-template<int NUMROWS, int NUMCOLS> class CFixedMatrix
-{
-public:
- FORCEINLINE int Width( void ) const { return NUMCOLS; }
- FORCEINLINE int Height( void ) const { return NUMROWS; }
- FORCEINLINE float Element( int nRow, int nCol ) const { return m_flValues[nRow][nCol]; }
- FORCEINLINE void SetElement( int nRow, int nCol, float flValue ) { m_flValues[nRow][nCol] = flValue; }
- FORCEINLINE void SetDimensions( int nNumRows, int nNumCols ) { Assert( ( nNumRows == NUMROWS ) && ( nNumCols == NUMCOLS ) ); }
-
-private:
- float m_flValues[NUMROWS][NUMCOLS];
-};
-
-
-
-#endif //matrixmath_h
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// A set of generic, template-based matrix functions. +//===========================================================================// + +#ifndef MATRIXMATH_H +#define MATRIXMATH_H + +#include <stdarg.h> + +// The operations in this file can perform basic matrix operations on matrices represented +// using any class that supports the necessary operations: +// +// .Element( row, col ) - return the element at a given matrox position +// .SetElement( row, col, val ) - modify an element +// .Width(), .Height() - get dimensions +// .SetDimensions( nrows, ncols) - set a matrix to be un-initted and the appropriate size +// +// Generally, vectors can be used with these functions by using N x 1 matrices to represent them. +// Matrices are addressed as row, column, and indices are 0-based +// +// +// Note that the template versions of these routines are defined for generality - it is expected +// that template specialization is used for common high performance cases. + +namespace MatrixMath +{ + /// M *= flScaleValue + template<class MATRIXCLASS> + void ScaleMatrix( MATRIXCLASS &matrix, float flScaleValue ) + { + for( int i = 0; i < matrix.Height(); i++ ) + { + for( int j = 0; j < matrix.Width(); j++ ) + { + matrix.SetElement( i, j, flScaleValue * matrix.Element( i, j ) ); + } + } + } + + /// AppendElementToMatrix - same as setting the element, except only works when all calls + /// happen in top to bottom left to right order, end you have to call FinishedAppending when + /// done. For normal matrix classes this is not different then SetElement, but for + /// CSparseMatrix, it is an accelerated way to fill a matrix from scratch. + template<class MATRIXCLASS> + FORCEINLINE void AppendElement( MATRIXCLASS &matrix, int nRow, int nCol, float flValue ) + { + matrix.SetElement( nRow, nCol, flValue ); // default implementation + } + + template<class MATRIXCLASS> + FORCEINLINE void FinishedAppending( MATRIXCLASS &matrix ) {} // default implementation + + /// M += fl + template<class MATRIXCLASS> + void AddToMatrix( MATRIXCLASS &matrix, float flAddend ) + { + for( int i = 0; i < matrix.Height(); i++ ) + { + for( int j = 0; j < matrix.Width(); j++ ) + { + matrix.SetElement( i, j, flAddend + matrix.Element( i, j ) ); + } + } + } + + /// transpose + template<class MATRIXCLASSIN, class MATRIXCLASSOUT> + void TransposeMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) + { + pMatrixOut->SetDimensions( matrixIn.Width(), matrixIn.Height() ); + for( int i = 0; i < pMatrixOut->Height(); i++ ) + { + for( int j = 0; j < pMatrixOut->Width(); j++ ) + { + AppendElement( *pMatrixOut, i, j, matrixIn.Element( j, i ) ); + } + } + FinishedAppending( *pMatrixOut ); + } + + /// copy + template<class MATRIXCLASSIN, class MATRIXCLASSOUT> + void CopyMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) + { + pMatrixOut->SetDimensions( matrixIn.Height(), matrixIn.Width() ); + for( int i = 0; i < matrixIn.Height(); i++ ) + { + for( int j = 0; j < matrixIn.Width(); j++ ) + { + AppendElement( *pMatrixOut, i, j, matrixIn.Element( i, j ) ); + } + } + FinishedAppending( *pMatrixOut ); + } + + + + /// M+=M + template<class MATRIXCLASSIN, class MATRIXCLASSOUT> + void AddMatrixToMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) + { + for( int i = 0; i < matrixIn.Height(); i++ ) + { + for( int j = 0; j < matrixIn.Width(); j++ ) + { + pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + matrixIn.Element( i, j ) ); + } + } + } + + // M += scale * M + template<class MATRIXCLASSIN, class MATRIXCLASSOUT> + void AddScaledMatrixToMatrix( float flScale, MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) + { + for( int i = 0; i < matrixIn.Height(); i++ ) + { + for( int j = 0; j < matrixIn.Width(); j++ ) + { + pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + flScale * matrixIn.Element( i, j ) ); + } + } + } + + + // simple way to initialize a matrix with constants from code. + template<class MATRIXCLASSOUT> + void SetMatrixToIdentity( MATRIXCLASSOUT *pMatrixOut, float flDiagonalValue = 1.0 ) + { + for( int i = 0; i < pMatrixOut->Height(); i++ ) + { + for( int j = 0; j < pMatrixOut->Width(); j++ ) + { + AppendElement( *pMatrixOut, i, j, ( i == j ) ? flDiagonalValue : 0 ); + } + } + FinishedAppending( *pMatrixOut ); + } + + //// simple way to initialize a matrix with constants from code + template<class MATRIXCLASSOUT> + void SetMatrixValues( MATRIXCLASSOUT *pMatrix, int nRows, int nCols, ... ) + { + va_list argPtr; + va_start( argPtr, nCols ); + + pMatrix->SetDimensions( nRows, nCols ); + for( int nRow = 0; nRow < nRows; nRow++ ) + { + for( int nCol = 0; nCol < nCols; nCol++ ) + { + double flNewValue = va_arg( argPtr, double ); + pMatrix->SetElement( nRow, nCol, flNewValue ); + } + } + va_end( argPtr ); + } + + + /// row and colum accessors. treat a row or a column as a column vector + template<class MATRIXTYPE> class MatrixRowAccessor + { + public: + FORCEINLINE MatrixRowAccessor( MATRIXTYPE const &matrix, int nRow ) + { + m_pMatrix = &matrix; + m_nRow = nRow; + } + + FORCEINLINE float Element( int nRow, int nCol ) const + { + Assert( nCol == 0 ); + return m_pMatrix->Element( m_nRow, nRow ); + } + + FORCEINLINE int Width( void ) const { return 1; }; + FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); } + + private: + MATRIXTYPE const *m_pMatrix; + int m_nRow; + }; + + template<class MATRIXTYPE> class MatrixColumnAccessor + { + public: + FORCEINLINE MatrixColumnAccessor( MATRIXTYPE const &matrix, int nColumn ) + { + m_pMatrix = &matrix; + m_nColumn = nColumn; + } + + FORCEINLINE float Element( int nRow, int nColumn ) const + { + Assert( nColumn == 0 ); + return m_pMatrix->Element( nRow, m_nColumn ); + } + + FORCEINLINE int Width( void ) const { return 1; } + FORCEINLINE int Height( void ) const { return m_pMatrix->Height(); } + private: + MATRIXTYPE const *m_pMatrix; + int m_nColumn; + }; + + /// this translator acts as a proxy for the transposed matrix + template<class MATRIXTYPE> class MatrixTransposeAccessor + { + public: + FORCEINLINE MatrixTransposeAccessor( MATRIXTYPE const & matrix ) + { + m_pMatrix = &matrix; + } + + FORCEINLINE float Element( int nRow, int nColumn ) const + { + return m_pMatrix->Element( nColumn, nRow ); + } + + FORCEINLINE int Width( void ) const { return m_pMatrix->Height(); } + FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); } + private: + MATRIXTYPE const *m_pMatrix; + }; + + /// this tranpose returns a wrapper around it's argument, allowing things like AddMatrixToMatrix( Transpose( matA ), &matB ) without an extra copy + template<class MATRIXCLASSIN> + MatrixTransposeAccessor<MATRIXCLASSIN> TransposeMatrix( MATRIXCLASSIN const &matrixIn ) + { + return MatrixTransposeAccessor<MATRIXCLASSIN>( matrixIn ); + } + + + /// retrieve rows and columns + template<class MATRIXTYPE> + FORCEINLINE MatrixColumnAccessor<MATRIXTYPE> MatrixColumn( MATRIXTYPE const &matrix, int nColumn ) + { + return MatrixColumnAccessor<MATRIXTYPE>( matrix, nColumn ); + } + + template<class MATRIXTYPE> + FORCEINLINE MatrixRowAccessor<MATRIXTYPE> MatrixRow( MATRIXTYPE const &matrix, int nRow ) + { + return MatrixRowAccessor<MATRIXTYPE>( matrix, nRow ); + } + + //// dot product between vectors (or rows and/or columns via accessors) + template<class MATRIXACCESSORATYPE, class MATRIXACCESSORBTYPE > + float InnerProduct( MATRIXACCESSORATYPE const &vecA, MATRIXACCESSORBTYPE const &vecB ) + { + Assert( vecA.Width() == 1 ); + Assert( vecB.Width() == 1 ); + Assert( vecA.Height() == vecB.Height() ); + double flResult = 0; + for( int i = 0; i < vecA.Height(); i++ ) + { + flResult += vecA.Element( i, 0 ) * vecB.Element( i, 0 ); + } + return flResult; + } + + + + /// matrix x matrix multiplication + template<class MATRIXATYPE, class MATRIXBTYPE, class MATRIXOUTTYPE> + void MatrixMultiply( MATRIXATYPE const &matA, MATRIXBTYPE const &matB, MATRIXOUTTYPE *pMatrixOut ) + { + Assert( matA.Width() == matB.Height() ); + pMatrixOut->SetDimensions( matA.Height(), matB.Width() ); + for( int i = 0; i < matA.Height(); i++ ) + { + for( int j = 0; j < matB.Width(); j++ ) + { + pMatrixOut->SetElement( i, j, InnerProduct( MatrixRow( matA, i ), MatrixColumn( matB, j ) ) ); + } + } + } + + /// solve Ax=B via the conjugate graident method. Code and naming conventions based on the + /// wikipedia article. + template<class ATYPE, class XTYPE, class BTYPE> + void ConjugateGradient( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 ) + { + XTYPE vecR; + vecR.SetDimensions( vecX.Height(), 1 ); + MatrixMultiply( matA, vecX, &vecR ); + ScaleMatrix( vecR, -1 ); + AddMatrixToMatrix( vecB, &vecR ); + XTYPE vecP; + CopyMatrix( vecR, &vecP ); + float flRsOld = InnerProduct( vecR, vecR ); + for( int nIter = 0; nIter < 100; nIter++ ) + { + XTYPE vecAp; + MatrixMultiply( matA, vecP, &vecAp ); + float flDivisor = InnerProduct( vecAp, vecP ); + float flAlpha = flRsOld / flDivisor; + AddScaledMatrixToMatrix( flAlpha, vecP, &vecX ); + AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR ); + float flRsNew = InnerProduct( vecR, vecR ); + if ( flRsNew < flTolerance ) + { + break; + } + ScaleMatrix( vecP, flRsNew / flRsOld ); + AddMatrixToMatrix( vecR, &vecP ); + flRsOld = flRsNew; + } + } + + /// solve (A'*A) x=B via the conjugate gradient method. Code and naming conventions based on + /// the wikipedia article. Same as Conjugate gradient but allows passing in two matrices whose + /// product is used as the A matrix (in order to preserve sparsity) + template<class ATYPE, class APRIMETYPE, class XTYPE, class BTYPE> + void ConjugateGradient( ATYPE const &matA, APRIMETYPE const &matAPrime, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 ) + { + XTYPE vecR1; + vecR1.SetDimensions( vecX.Height(), 1 ); + MatrixMultiply( matA, vecX, &vecR1 ); + XTYPE vecR; + vecR.SetDimensions( vecR1.Height(), 1 ); + MatrixMultiply( matAPrime, vecR1, &vecR ); + ScaleMatrix( vecR, -1 ); + AddMatrixToMatrix( vecB, &vecR ); + XTYPE vecP; + CopyMatrix( vecR, &vecP ); + float flRsOld = InnerProduct( vecR, vecR ); + for( int nIter = 0; nIter < 100; nIter++ ) + { + XTYPE vecAp1; + MatrixMultiply( matA, vecP, &vecAp1 ); + XTYPE vecAp; + MatrixMultiply( matAPrime, vecAp1, &vecAp ); + float flDivisor = InnerProduct( vecAp, vecP ); + float flAlpha = flRsOld / flDivisor; + AddScaledMatrixToMatrix( flAlpha, vecP, &vecX ); + AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR ); + float flRsNew = InnerProduct( vecR, vecR ); + if ( flRsNew < flTolerance ) + { + break; + } + ScaleMatrix( vecP, flRsNew / flRsOld ); + AddMatrixToMatrix( vecR, &vecP ); + flRsOld = flRsNew; + } + } + + + template<class ATYPE, class XTYPE, class BTYPE> + void LeastSquaresFit( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX ) + { + // now, generate the normal equations + BTYPE vecBeta; + MatrixMath::MatrixMultiply( MatrixMath::TransposeMatrix( matA ), vecB, &vecBeta ); + + vecX.SetDimensions( matA.Width(), 1 ); + MatrixMath::SetMatrixToIdentity( &vecX ); + + ATYPE matATransposed; + TransposeMatrix( matA, &matATransposed ); + ConjugateGradient( matA, matATransposed, vecBeta, vecX, 1.0e-20 ); + } + +}; + +/// a simple fixed-size matrix class +template<int NUMROWS, int NUMCOLS> class CFixedMatrix +{ +public: + FORCEINLINE int Width( void ) const { return NUMCOLS; } + FORCEINLINE int Height( void ) const { return NUMROWS; } + FORCEINLINE float Element( int nRow, int nCol ) const { return m_flValues[nRow][nCol]; } + FORCEINLINE void SetElement( int nRow, int nCol, float flValue ) { m_flValues[nRow][nCol] = flValue; } + FORCEINLINE void SetDimensions( int nNumRows, int nNumCols ) { Assert( ( nNumRows == NUMROWS ) && ( nNumCols == NUMCOLS ) ); } + +private: + float m_flValues[NUMROWS][NUMCOLS]; +}; + + + +#endif //matrixmath_h diff --git a/mp/src/public/mathlib/noise.h b/mp/src/public/mathlib/noise.h index 0aec2efe..19d3f729 100644 --- a/mp/src/public/mathlib/noise.h +++ b/mp/src/public/mathlib/noise.h @@ -1,35 +1,35 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-//=====================================================================================//
-
-#ifndef NOISE_H
-#define NOISE_H
-
-#include <math.h>
-#include "basetypes.h"
-#include "mathlib/vector.h"
-#include "tier0/dbg.h"
-
-
-// The following code is the c-ification of Ken Perlin's new noise algorithm
-// "JAVA REFERENCE IMPLEMENTATION OF IMPROVED NOISE - COPYRIGHT 2002 KEN PERLIN"
-// as available here: http://mrl.nyu.edu/~perlin/noise/
-// it generates a single octave of noise in the -1..1 range
-// this should at some point probably replace SparseConvolutionNoise - jd
-float ImprovedPerlinNoise( Vector const &pnt );
-
-// get the noise value at a point. Output range is 0..1.
-float SparseConvolutionNoise( Vector const &pnt );
-
-// get the noise value at a point, passing a custom noise shaping function. The noise shaping
-// function should map the domain 0..1 to 0..1.
-float SparseConvolutionNoise(Vector const &pnt, float (*pNoiseShapeFunction)(float) );
-
-// returns a 1/f noise. more octaves take longer
-float FractalNoise( Vector const &pnt, int n_octaves );
-
-// returns a abs(f)*1/f noise i.e. turbulence
-float Turbulence( Vector const &pnt, int n_octaves );
-#endif // NOISE_H
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=====================================================================================// + +#ifndef NOISE_H +#define NOISE_H + +#include <math.h> +#include "basetypes.h" +#include "mathlib/vector.h" +#include "tier0/dbg.h" + + +// The following code is the c-ification of Ken Perlin's new noise algorithm +// "JAVA REFERENCE IMPLEMENTATION OF IMPROVED NOISE - COPYRIGHT 2002 KEN PERLIN" +// as available here: http://mrl.nyu.edu/~perlin/noise/ +// it generates a single octave of noise in the -1..1 range +// this should at some point probably replace SparseConvolutionNoise - jd +float ImprovedPerlinNoise( Vector const &pnt ); + +// get the noise value at a point. Output range is 0..1. +float SparseConvolutionNoise( Vector const &pnt ); + +// get the noise value at a point, passing a custom noise shaping function. The noise shaping +// function should map the domain 0..1 to 0..1. +float SparseConvolutionNoise(Vector const &pnt, float (*pNoiseShapeFunction)(float) ); + +// returns a 1/f noise. more octaves take longer +float FractalNoise( Vector const &pnt, int n_octaves ); + +// returns a abs(f)*1/f noise i.e. turbulence +float Turbulence( Vector const &pnt, int n_octaves ); +#endif // NOISE_H diff --git a/mp/src/public/mathlib/polyhedron.h b/mp/src/public/mathlib/polyhedron.h index 6c51d432..38b465c7 100644 --- a/mp/src/public/mathlib/polyhedron.h +++ b/mp/src/public/mathlib/polyhedron.h @@ -1,73 +1,73 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef POLYHEDRON_H_
-#define POLYHEDRON_H_
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include "mathlib/mathlib.h"
-
-
-
-struct Polyhedron_IndexedLine_t
-{
- unsigned short iPointIndices[2];
-};
-
-struct Polyhedron_IndexedLineReference_t
-{
- unsigned short iLineIndex;
- unsigned char iEndPointIndex; //since two polygons reference any one line, one needs to traverse the line backwards, this flags that behavior
-};
-
-struct Polyhedron_IndexedPolygon_t
-{
- unsigned short iFirstIndex;
- unsigned short iIndexCount;
- Vector polyNormal;
-};
-
-class CPolyhedron //made into a class because it's going virtual to support distinctions between temp and permanent versions
-{
-public:
- Vector *pVertices;
- Polyhedron_IndexedLine_t *pLines;
- Polyhedron_IndexedLineReference_t *pIndices;
- Polyhedron_IndexedPolygon_t *pPolygons;
-
- unsigned short iVertexCount;
- unsigned short iLineCount;
- unsigned short iIndexCount;
- unsigned short iPolygonCount;
-
- virtual ~CPolyhedron( void ) {};
- virtual void Release( void ) = 0;
- Vector Center( void );
-};
-
-class CPolyhedron_AllocByNew : public CPolyhedron
-{
-public:
- virtual void Release( void );
- static CPolyhedron_AllocByNew *Allocate( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //creates the polyhedron along with enough memory to hold all it's data in a single allocation
-
-private:
- CPolyhedron_AllocByNew( void ) { }; //CPolyhedron_AllocByNew::Allocate() is the only way to create one of these.
-};
-
-CPolyhedron *GeneratePolyhedronFromPlanes( const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //be sure to polyhedron->Release()
-CPolyhedron *ClipPolyhedron( const CPolyhedron *pExistingPolyhedron, const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //this does NOT modify/delete the existing polyhedron
-
-CPolyhedron *GetTempPolyhedron( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //grab the temporary polyhedron. Avoids new/delete for quick work. Can only be in use by one chunk of code at a time
-
-
-#endif //#ifndef POLYHEDRON_H_
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef POLYHEDRON_H_ +#define POLYHEDRON_H_ + +#ifdef _WIN32 +#pragma once +#endif + +#include "mathlib/mathlib.h" + + + +struct Polyhedron_IndexedLine_t +{ + unsigned short iPointIndices[2]; +}; + +struct Polyhedron_IndexedLineReference_t +{ + unsigned short iLineIndex; + unsigned char iEndPointIndex; //since two polygons reference any one line, one needs to traverse the line backwards, this flags that behavior +}; + +struct Polyhedron_IndexedPolygon_t +{ + unsigned short iFirstIndex; + unsigned short iIndexCount; + Vector polyNormal; +}; + +class CPolyhedron //made into a class because it's going virtual to support distinctions between temp and permanent versions +{ +public: + Vector *pVertices; + Polyhedron_IndexedLine_t *pLines; + Polyhedron_IndexedLineReference_t *pIndices; + Polyhedron_IndexedPolygon_t *pPolygons; + + unsigned short iVertexCount; + unsigned short iLineCount; + unsigned short iIndexCount; + unsigned short iPolygonCount; + + virtual ~CPolyhedron( void ) {}; + virtual void Release( void ) = 0; + Vector Center( void ); +}; + +class CPolyhedron_AllocByNew : public CPolyhedron +{ +public: + virtual void Release( void ); + static CPolyhedron_AllocByNew *Allocate( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //creates the polyhedron along with enough memory to hold all it's data in a single allocation + +private: + CPolyhedron_AllocByNew( void ) { }; //CPolyhedron_AllocByNew::Allocate() is the only way to create one of these. +}; + +CPolyhedron *GeneratePolyhedronFromPlanes( const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //be sure to polyhedron->Release() +CPolyhedron *ClipPolyhedron( const CPolyhedron *pExistingPolyhedron, const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //this does NOT modify/delete the existing polyhedron + +CPolyhedron *GetTempPolyhedron( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //grab the temporary polyhedron. Avoids new/delete for quick work. Can only be in use by one chunk of code at a time + + +#endif //#ifndef POLYHEDRON_H_ + diff --git a/mp/src/public/mathlib/quantize.h b/mp/src/public/mathlib/quantize.h index c43b1530..5e5b7423 100644 --- a/mp/src/public/mathlib/quantize.h +++ b/mp/src/public/mathlib/quantize.h @@ -1,141 +1,141 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-#ifndef QUANTIZE_H
-#define QUANTIZE_H
-
-#ifndef STRING_H
-#include <string.h>
-#endif
-
-#define MAXDIMS 768
-#define MAXQUANT 16000
-
-
-#include <tier0/platform.h>
-
-struct Sample;
-
-struct QuantizedValue {
- double MinError; // minimum possible error. used
- // for neighbor searches.
- struct QuantizedValue *Children[2]; // splits
- int32 value; // only exists for leaf nodes
- struct Sample *Samples; // every sample quantized into this
- // entry
- int32 NSamples; // how many were quantized to this.
- int32 TotSamples;
- double *ErrorMeasure; // variance measure for each dimension
- double TotalError; // sum of errors
- uint8 *Mean; // average value of each dimension
- uint8 *Mins; // min box for children and this
- uint8 *Maxs; // max box for children and this
- int NQuant; // the number of samples which were
- // quantzied to this node since the
- // last time OptimizeQuantizer()
- // was called.
- int *Sums; // sum used by OptimizeQuantizer
- int sortdim; // dimension currently sorted along.
-};
-
-struct Sample {
- int32 ID; // identifier of this sample. can
- // be used for any purpose.
- int32 Count; // number of samples this sample
- // represents
- int32 QNum; // what value this sample ended up quantized
- // to.
- struct QuantizedValue *qptr; // ptr to what this was quantized to.
- uint8 Value[1]; // array of values for multi-dimensional
- // variables.
-};
-
-void FreeQuantization(struct QuantizedValue *t);
-
-struct QuantizedValue *Quantize(struct Sample *s, int nsamples, int ndims,
- int nvalues, uint8 *weights, int value0=0);
-
-int CompressSamples(struct Sample *s, int nsamples, int ndims);
-
-struct QuantizedValue *FindMatch(uint8 const *sample,
- int ndims,uint8 *weights,
- struct QuantizedValue *QTable);
-void PrintSamples(struct Sample const *s, int nsamples, int ndims);
-
-struct QuantizedValue *FindQNode(struct QuantizedValue const *q, int32 code);
-
-inline struct Sample *NthSample(struct Sample *s, int i, int nd)
-{
- uint8 *r=(uint8 *) s;
- r+=i*(sizeof(*s)+(nd-1));
- return (struct Sample *) r;
-}
-
-inline struct Sample *AllocSamples(int ns, int nd)
-{
- size_t size5=(sizeof(struct Sample)+(nd-1))*ns;
- void *ret=new uint8[size5];
- memset(ret,0,size5);
- for(int i=0;i<ns;i++)
- NthSample((struct Sample *)ret,i,nd)->Count=1;
- return (struct Sample *) ret;
-}
-
-
-// MinimumError: what is the min error which will occur if quantizing
-// a sample to the given qnode? This is just the error if the qnode
-// is a leaf.
-double MinimumError(struct QuantizedValue const *q, uint8 const *sample,
- int ndims, uint8 const *weights);
-double MaximumError(struct QuantizedValue const *q, uint8 const *sample,
- int ndims, uint8 const *weights);
-
-void PrintQTree(struct QuantizedValue const *p,int idlevel=0);
-void OptimizeQuantizer(struct QuantizedValue *q, int ndims);
-
-// RecalculateVelues: update the means in a sample tree, based upon
-// the samples. can be used to reoptimize when samples are deleted,
-// for instance.
-
-void RecalculateValues(struct QuantizedValue *q, int ndims);
-
-extern double SquaredError; // may be reset and examined. updated by
- // FindMatch()
-
-
-
-
-// the routines below can be used for uniform quantization via dart-throwing.
-typedef void (*GENERATOR)(void *); // generate a random sample
-typedef double (*COMPARER)(void const *a, void const *b);
-
-void *DartThrow(int NResults, int NTries, size_t itemsize, GENERATOR gen,
- COMPARER cmp);
-void *FindClosestDart(void *items,int NResults, size_t itemsize,
- COMPARER cmp, void *lookfor, int *idx);
-
-
-
-
-// color quantization of 24 bit images
-#define QUANTFLAGS_NODITHER 1 // don't do Floyd-steinberg dither
-
-extern void ColorQuantize(
-uint8 const *pImage, // 4 byte pixels ARGB
-int nWidth,
-int nHeight,
-int nFlags, // QUANTFLAGS_xxx
-int nColors, // # of colors to fill in in palette
-uint8 *pOutPixels, // where to store resulting 8 bit pixels
-uint8 *pOutPalette, // where to store resulting 768-byte palette
-int nFirstColor); // first color to use in mapping
-
-
-
-
-
-#endif
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +#ifndef QUANTIZE_H +#define QUANTIZE_H + +#ifndef STRING_H +#include <string.h> +#endif + +#define MAXDIMS 768 +#define MAXQUANT 16000 + + +#include <tier0/platform.h> + +struct Sample; + +struct QuantizedValue { + double MinError; // minimum possible error. used + // for neighbor searches. + struct QuantizedValue *Children[2]; // splits + int32 value; // only exists for leaf nodes + struct Sample *Samples; // every sample quantized into this + // entry + int32 NSamples; // how many were quantized to this. + int32 TotSamples; + double *ErrorMeasure; // variance measure for each dimension + double TotalError; // sum of errors + uint8 *Mean; // average value of each dimension + uint8 *Mins; // min box for children and this + uint8 *Maxs; // max box for children and this + int NQuant; // the number of samples which were + // quantzied to this node since the + // last time OptimizeQuantizer() + // was called. + int *Sums; // sum used by OptimizeQuantizer + int sortdim; // dimension currently sorted along. +}; + +struct Sample { + int32 ID; // identifier of this sample. can + // be used for any purpose. + int32 Count; // number of samples this sample + // represents + int32 QNum; // what value this sample ended up quantized + // to. + struct QuantizedValue *qptr; // ptr to what this was quantized to. + uint8 Value[1]; // array of values for multi-dimensional + // variables. +}; + +void FreeQuantization(struct QuantizedValue *t); + +struct QuantizedValue *Quantize(struct Sample *s, int nsamples, int ndims, + int nvalues, uint8 *weights, int value0=0); + +int CompressSamples(struct Sample *s, int nsamples, int ndims); + +struct QuantizedValue *FindMatch(uint8 const *sample, + int ndims,uint8 *weights, + struct QuantizedValue *QTable); +void PrintSamples(struct Sample const *s, int nsamples, int ndims); + +struct QuantizedValue *FindQNode(struct QuantizedValue const *q, int32 code); + +inline struct Sample *NthSample(struct Sample *s, int i, int nd) +{ + uint8 *r=(uint8 *) s; + r+=i*(sizeof(*s)+(nd-1)); + return (struct Sample *) r; +} + +inline struct Sample *AllocSamples(int ns, int nd) +{ + size_t size5=(sizeof(struct Sample)+(nd-1))*ns; + void *ret=new uint8[size5]; + memset(ret,0,size5); + for(int i=0;i<ns;i++) + NthSample((struct Sample *)ret,i,nd)->Count=1; + return (struct Sample *) ret; +} + + +// MinimumError: what is the min error which will occur if quantizing +// a sample to the given qnode? This is just the error if the qnode +// is a leaf. +double MinimumError(struct QuantizedValue const *q, uint8 const *sample, + int ndims, uint8 const *weights); +double MaximumError(struct QuantizedValue const *q, uint8 const *sample, + int ndims, uint8 const *weights); + +void PrintQTree(struct QuantizedValue const *p,int idlevel=0); +void OptimizeQuantizer(struct QuantizedValue *q, int ndims); + +// RecalculateVelues: update the means in a sample tree, based upon +// the samples. can be used to reoptimize when samples are deleted, +// for instance. + +void RecalculateValues(struct QuantizedValue *q, int ndims); + +extern double SquaredError; // may be reset and examined. updated by + // FindMatch() + + + + +// the routines below can be used for uniform quantization via dart-throwing. +typedef void (*GENERATOR)(void *); // generate a random sample +typedef double (*COMPARER)(void const *a, void const *b); + +void *DartThrow(int NResults, int NTries, size_t itemsize, GENERATOR gen, + COMPARER cmp); +void *FindClosestDart(void *items,int NResults, size_t itemsize, + COMPARER cmp, void *lookfor, int *idx); + + + + +// color quantization of 24 bit images +#define QUANTFLAGS_NODITHER 1 // don't do Floyd-steinberg dither + +extern void ColorQuantize( +uint8 const *pImage, // 4 byte pixels ARGB +int nWidth, +int nHeight, +int nFlags, // QUANTFLAGS_xxx +int nColors, // # of colors to fill in in palette +uint8 *pOutPixels, // where to store resulting 8 bit pixels +uint8 *pOutPalette, // where to store resulting 768-byte palette +int nFirstColor); // first color to use in mapping + + + + + +#endif diff --git a/mp/src/public/mathlib/simdvectormatrix.h b/mp/src/public/mathlib/simdvectormatrix.h index ba830787..f88cd328 100644 --- a/mp/src/public/mathlib/simdvectormatrix.h +++ b/mp/src/public/mathlib/simdvectormatrix.h @@ -1,142 +1,142 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors,
-// for high speed processing in tools.
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef SIMDVECTORMATRIX_H
-#define SIMDVECTORMATRIX_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include <string.h>
-#include "tier0/platform.h"
-#include "tier0/dbg.h"
-#include "tier1/utlsoacontainer.h"
-#include "mathlib/ssemath.h"
-
-class CSIMDVectorMatrix
-{
-public:
- int m_nWidth; // in actual vectors
- int m_nHeight;
-
- int m_nPaddedWidth; // # of 4x wide elements
-
- FourVectors *m_pData;
-
-protected:
- void Init( void )
- {
- m_pData = NULL;
- m_nWidth = 0;
- m_nHeight = 0;
- m_nPaddedWidth = 0;
- }
-
- int NVectors( void ) const
- {
- return m_nHeight * m_nPaddedWidth;
- }
-
-public:
- // constructors and destructors
- CSIMDVectorMatrix( void )
- {
- Init();
- }
-
- ~CSIMDVectorMatrix( void )
- {
- if ( m_pData )
- delete[] m_pData;
- }
-
- // set up storage and fields for m x n matrix. destroys old data
- void SetSize( int width, int height )
- {
- if ( ( ! m_pData ) || ( width != m_nWidth ) || ( height != m_nHeight ) )
- {
- if ( m_pData )
- delete[] m_pData;
-
- m_nWidth = width;
- m_nHeight = height;
-
- m_nPaddedWidth = ( m_nWidth + 3) >> 2;
- m_pData = NULL;
- if ( width && height )
- m_pData = new FourVectors[ m_nPaddedWidth * m_nHeight ];
- }
- }
-
- CSIMDVectorMatrix( int width, int height )
- {
- Init();
- SetSize( width, height );
- }
-
- CSIMDVectorMatrix &operator=( CSIMDVectorMatrix const &src )
- {
- SetSize( src.m_nWidth, src.m_nHeight );
- if ( m_pData )
- memcpy( m_pData, src.m_pData, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) );
- return *this;
- }
-
- CSIMDVectorMatrix &operator+=( CSIMDVectorMatrix const &src );
-
- CSIMDVectorMatrix &operator*=( Vector const &src );
-
- // create from an RGBA float bitmap. alpha ignored.
- void CreateFromRGBA_FloatImageData(int srcwidth, int srcheight, float const *srcdata );
-
- // create from 3 fields in a csoa
- void CreateFromCSOAAttributes( CSOAContainer const *pSrc,
- int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 );
-
- // Element access. If you are calling this a lot, you don't want to use this class, because
- // you're not getting the sse advantage
- Vector Element(int x, int y) const
- {
- Assert( m_pData );
- Assert( x < m_nWidth );
- Assert( y < m_nHeight );
- Vector ret;
- FourVectors const *pData=m_pData+y*m_nPaddedWidth+(x >> 2);
-
- int xo=(x & 3);
- ret.x=pData->X( xo );
- ret.y=pData->Y( xo );
- ret.z=pData->Z( xo );
- return ret;
- }
-
- //addressing the individual fourvectors elements
- FourVectors &CompoundElement(int x, int y)
- {
- Assert( m_pData );
- Assert( y < m_nHeight );
- Assert( x < m_nPaddedWidth );
- return m_pData[x + m_nPaddedWidth*y ];
- }
-
- // math operations on the whole image
- void Clear( void )
- {
- Assert( m_pData );
- memset( m_pData, 0, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) );
- }
-
- void RaiseToPower( float power );
-};
-
-
-
-#endif
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors, +// for high speed processing in tools. +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef SIMDVECTORMATRIX_H +#define SIMDVECTORMATRIX_H + +#ifdef _WIN32 +#pragma once +#endif + + +#include <string.h> +#include "tier0/platform.h" +#include "tier0/dbg.h" +#include "tier1/utlsoacontainer.h" +#include "mathlib/ssemath.h" + +class CSIMDVectorMatrix +{ +public: + int m_nWidth; // in actual vectors + int m_nHeight; + + int m_nPaddedWidth; // # of 4x wide elements + + FourVectors *m_pData; + +protected: + void Init( void ) + { + m_pData = NULL; + m_nWidth = 0; + m_nHeight = 0; + m_nPaddedWidth = 0; + } + + int NVectors( void ) const + { + return m_nHeight * m_nPaddedWidth; + } + +public: + // constructors and destructors + CSIMDVectorMatrix( void ) + { + Init(); + } + + ~CSIMDVectorMatrix( void ) + { + if ( m_pData ) + delete[] m_pData; + } + + // set up storage and fields for m x n matrix. destroys old data + void SetSize( int width, int height ) + { + if ( ( ! m_pData ) || ( width != m_nWidth ) || ( height != m_nHeight ) ) + { + if ( m_pData ) + delete[] m_pData; + + m_nWidth = width; + m_nHeight = height; + + m_nPaddedWidth = ( m_nWidth + 3) >> 2; + m_pData = NULL; + if ( width && height ) + m_pData = new FourVectors[ m_nPaddedWidth * m_nHeight ]; + } + } + + CSIMDVectorMatrix( int width, int height ) + { + Init(); + SetSize( width, height ); + } + + CSIMDVectorMatrix &operator=( CSIMDVectorMatrix const &src ) + { + SetSize( src.m_nWidth, src.m_nHeight ); + if ( m_pData ) + memcpy( m_pData, src.m_pData, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) ); + return *this; + } + + CSIMDVectorMatrix &operator+=( CSIMDVectorMatrix const &src ); + + CSIMDVectorMatrix &operator*=( Vector const &src ); + + // create from an RGBA float bitmap. alpha ignored. + void CreateFromRGBA_FloatImageData(int srcwidth, int srcheight, float const *srcdata ); + + // create from 3 fields in a csoa + void CreateFromCSOAAttributes( CSOAContainer const *pSrc, + int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 ); + + // Element access. If you are calling this a lot, you don't want to use this class, because + // you're not getting the sse advantage + Vector Element(int x, int y) const + { + Assert( m_pData ); + Assert( x < m_nWidth ); + Assert( y < m_nHeight ); + Vector ret; + FourVectors const *pData=m_pData+y*m_nPaddedWidth+(x >> 2); + + int xo=(x & 3); + ret.x=pData->X( xo ); + ret.y=pData->Y( xo ); + ret.z=pData->Z( xo ); + return ret; + } + + //addressing the individual fourvectors elements + FourVectors &CompoundElement(int x, int y) + { + Assert( m_pData ); + Assert( y < m_nHeight ); + Assert( x < m_nPaddedWidth ); + return m_pData[x + m_nPaddedWidth*y ]; + } + + // math operations on the whole image + void Clear( void ) + { + Assert( m_pData ); + memset( m_pData, 0, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) ); + } + + void RaiseToPower( float power ); +}; + + + +#endif diff --git a/mp/src/public/mathlib/spherical_geometry.h b/mp/src/public/mathlib/spherical_geometry.h index a32d96ac..04310f43 100644 --- a/mp/src/public/mathlib/spherical_geometry.h +++ b/mp/src/public/mathlib/spherical_geometry.h @@ -1,73 +1,73 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: Functions for spherical geometry.
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef SPHERICAL_GEOMETRY_H
-#define SPHERICAL_GEOMETRY_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <float.h>
-
-// see http://mathworld.wolfram.com/SphericalTrigonometry.html
-
-// return the spherical distance, in radians, between 2 points on the unit sphere.
-FORCEINLINE float UnitSphereLineSegmentLength( Vector const &a, Vector const &b )
-{
- // check unit length
- Assert( fabs( VectorLength( a ) - 1.0 ) < 1.0e-3 );
- Assert( fabs( VectorLength( b ) - 1.0 ) < 1.0e-3 );
- return acos( DotProduct( a, b ) );
-}
-
-
-// given 3 points on the unit sphere, return the spherical area (in radians) of the triangle they form.
-// valid for "small" triangles.
-FORCEINLINE float UnitSphereTriangleArea( Vector const &a, Vector const &b , Vector const &c )
-{
- float flLengthA = UnitSphereLineSegmentLength( b, c );
- float flLengthB = UnitSphereLineSegmentLength( c, a );
- float flLengthC = UnitSphereLineSegmentLength( a, b );
-
- if ( ( flLengthA == 0. ) || ( flLengthB == 0. ) || ( flLengthC == 0. ) )
- return 0.; // zero area triangle
-
- // now, find the 3 incribed angles for the triangle
- float flHalfSumLens = 0.5 * ( flLengthA + flLengthB + flLengthC );
- float flSinSums = sin( flHalfSumLens );
- float flSinSMinusA= sin( flHalfSumLens - flLengthA );
- float flSinSMinusB= sin( flHalfSumLens - flLengthB );
- float flSinSMinusC= sin( flHalfSumLens - flLengthC );
-
- float flTanAOver2 = sqrt ( ( flSinSMinusB * flSinSMinusC ) / ( flSinSums * flSinSMinusA ) );
- float flTanBOver2 = sqrt ( ( flSinSMinusA * flSinSMinusC ) / ( flSinSums * flSinSMinusB ) );
- float flTanCOver2 = sqrt ( ( flSinSMinusA * flSinSMinusB ) / ( flSinSums * flSinSMinusC ) );
-
- // Girards formula : area = sum of angles - pi.
- return 2.0 * ( atan( flTanAOver2 ) + atan( flTanBOver2 ) + atan( flTanCOver2 ) ) - M_PI;
-}
-
-// spherical harmonics-related functions. Best explanation at http://www.research.scea.com/gdc2003/spherical-harmonic-lighting.pdf
-
-// Evaluate associated legendre polynomial P( l, m ) at flX, using recurrence relation
-float AssociatedLegendrePolynomial( int nL, int nM, float flX );
-
-// Evaluate order N spherical harmonic with spherical coordinates
-// nL = band, 0..N
-// nM = -nL .. nL
-// theta = 0..M_PI
-// phi = 0.. 2 * M_PHI
-float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi );
-
-// evaluate spherical harmonic with normalized vector direction
-float SphericalHarmonic( int nL, int nM, Vector const &vecDirection );
-
-
-#endif // SPHERICAL_GEOMETRY_H
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: Functions for spherical geometry. +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef SPHERICAL_GEOMETRY_H +#define SPHERICAL_GEOMETRY_H + +#ifdef _WIN32 +#pragma once +#endif + +#include <math.h> +#include <float.h> + +// see http://mathworld.wolfram.com/SphericalTrigonometry.html + +// return the spherical distance, in radians, between 2 points on the unit sphere. +FORCEINLINE float UnitSphereLineSegmentLength( Vector const &a, Vector const &b ) +{ + // check unit length + Assert( fabs( VectorLength( a ) - 1.0 ) < 1.0e-3 ); + Assert( fabs( VectorLength( b ) - 1.0 ) < 1.0e-3 ); + return acos( DotProduct( a, b ) ); +} + + +// given 3 points on the unit sphere, return the spherical area (in radians) of the triangle they form. +// valid for "small" triangles. +FORCEINLINE float UnitSphereTriangleArea( Vector const &a, Vector const &b , Vector const &c ) +{ + float flLengthA = UnitSphereLineSegmentLength( b, c ); + float flLengthB = UnitSphereLineSegmentLength( c, a ); + float flLengthC = UnitSphereLineSegmentLength( a, b ); + + if ( ( flLengthA == 0. ) || ( flLengthB == 0. ) || ( flLengthC == 0. ) ) + return 0.; // zero area triangle + + // now, find the 3 incribed angles for the triangle + float flHalfSumLens = 0.5 * ( flLengthA + flLengthB + flLengthC ); + float flSinSums = sin( flHalfSumLens ); + float flSinSMinusA= sin( flHalfSumLens - flLengthA ); + float flSinSMinusB= sin( flHalfSumLens - flLengthB ); + float flSinSMinusC= sin( flHalfSumLens - flLengthC ); + + float flTanAOver2 = sqrt ( ( flSinSMinusB * flSinSMinusC ) / ( flSinSums * flSinSMinusA ) ); + float flTanBOver2 = sqrt ( ( flSinSMinusA * flSinSMinusC ) / ( flSinSums * flSinSMinusB ) ); + float flTanCOver2 = sqrt ( ( flSinSMinusA * flSinSMinusB ) / ( flSinSums * flSinSMinusC ) ); + + // Girards formula : area = sum of angles - pi. + return 2.0 * ( atan( flTanAOver2 ) + atan( flTanBOver2 ) + atan( flTanCOver2 ) ) - M_PI; +} + +// spherical harmonics-related functions. Best explanation at http://www.research.scea.com/gdc2003/spherical-harmonic-lighting.pdf + +// Evaluate associated legendre polynomial P( l, m ) at flX, using recurrence relation +float AssociatedLegendrePolynomial( int nL, int nM, float flX ); + +// Evaluate order N spherical harmonic with spherical coordinates +// nL = band, 0..N +// nM = -nL .. nL +// theta = 0..M_PI +// phi = 0.. 2 * M_PHI +float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi ); + +// evaluate spherical harmonic with normalized vector direction +float SphericalHarmonic( int nL, int nM, Vector const &vecDirection ); + + +#endif // SPHERICAL_GEOMETRY_H diff --git a/mp/src/public/mathlib/ssemath.h b/mp/src/public/mathlib/ssemath.h index b25fbd09..6691df12 100644 --- a/mp/src/public/mathlib/ssemath.h +++ b/mp/src/public/mathlib/ssemath.h @@ -1,3098 +1,3098 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: - defines SIMD "structure of arrays" classes and functions.
-//
-//===========================================================================//
-#ifndef SSEMATH_H
-#define SSEMATH_H
-
-#if defined( _X360 )
-#include <xboxmath.h>
-#else
-#include <xmmintrin.h>
-#endif
-
-#include <mathlib/vector.h>
-#include <mathlib/mathlib.h>
-
-#if defined(GNUC)
-#define USE_STDC_FOR_SIMD 0
-#else
-#define USE_STDC_FOR_SIMD 0
-#endif
-
-#if (!defined(_X360) && (USE_STDC_FOR_SIMD == 0))
-#define _SSE1 1
-#endif
-
-// I thought about defining a class/union for the SIMD packed floats instead of using fltx4,
-// but decided against it because (a) the nature of SIMD code which includes comparisons is to blur
-// the relationship between packed floats and packed integer types and (b) not sure that the
-// compiler would handle generating good code for the intrinsics.
-
-#if USE_STDC_FOR_SIMD
-
-typedef union
-{
- float m128_f32[4];
- uint32 m128_u32[4];
-} fltx4;
-
-typedef fltx4 i32x4;
-typedef fltx4 u32x4;
-
-#elif ( defined( _X360 ) )
-
-typedef union
-{
- // This union allows float/int access (which generally shouldn't be done in inner loops)
- __vector4 vmx;
- float m128_f32[4];
- uint32 m128_u32[4];
-} fltx4_union;
-
-typedef __vector4 fltx4;
-typedef __vector4 i32x4; // a VMX register; just a way of making it explicit that we're doing integer ops.
-typedef __vector4 u32x4; // a VMX register; just a way of making it explicit that we're doing unsigned integer ops.
-
-#else
-
-typedef __m128 fltx4;
-typedef __m128 i32x4;
-typedef __m128 u32x4;
-
-#endif
-
-// The FLTX4 type is a fltx4 used as a parameter to a function.
-// On the 360, the best way to do this is pass-by-copy on the registers.
-// On the PC, the best way is to pass by const reference.
-// The compiler will sometimes, but not always, replace a pass-by-const-ref
-// with a pass-in-reg on the 360; to avoid this confusion, you can
-// explicitly use a FLTX4 as the parameter type.
-#ifdef _X360
-typedef __vector4 FLTX4;
-#else
-typedef const fltx4 & FLTX4;
-#endif
-
-// A 16-byte aligned int32 datastructure
-// (for use when writing out fltx4's as SIGNED
-// ints).
-struct ALIGN16 intx4
-{
- int32 m_i32[4];
-
- inline int & operator[](int which)
- {
- return m_i32[which];
- }
-
- inline const int & operator[](int which) const
- {
- return m_i32[which];
- }
-
- inline int32 *Base() {
- return m_i32;
- }
-
- inline const int32 *Base() const
- {
- return m_i32;
- }
-
- inline const bool operator==(const intx4 &other) const
- {
- return m_i32[0] == other.m_i32[0] &&
- m_i32[1] == other.m_i32[1] &&
- m_i32[2] == other.m_i32[2] &&
- m_i32[3] == other.m_i32[3] ;
- }
-} ALIGN16_POST;
-
-
-#if defined( _DEBUG ) && defined( _X360 )
-FORCEINLINE void TestVPUFlags()
-{
- // Check that the VPU is in the appropriate (Java-compliant) mode (see 3.2.1 in altivec_pem.pdf on xds.xbox.com)
- __vector4 a;
- __asm
- {
- mfvscr a;
- }
- unsigned int * flags = (unsigned int *)&a;
- unsigned int controlWord = flags[3];
- Assert(controlWord == 0);
-}
-#else // _DEBUG
-FORCEINLINE void TestVPUFlags() {}
-#endif // _DEBUG
-
-
-// useful constants in SIMD packed float format:
-// (note: some of these aren't stored on the 360,
-// but are manufactured directly in one or two
-// instructions, saving a load and possible L2
-// miss.)
-#ifndef _X360
-extern const fltx4 Four_Zeros; // 0 0 0 0
-extern const fltx4 Four_Ones; // 1 1 1 1
-extern const fltx4 Four_Twos; // 2 2 2 2
-extern const fltx4 Four_Threes; // 3 3 3 3
-extern const fltx4 Four_Fours; // guess.
-extern const fltx4 Four_Point225s; // .225 .225 .225 .225
-extern const fltx4 Four_PointFives; // .5 .5 .5 .5
-extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
-extern const fltx4 Four_2ToThe21s; // (1<<21)..
-extern const fltx4 Four_2ToThe22s; // (1<<22)..
-extern const fltx4 Four_2ToThe23s; // (1<<23)..
-extern const fltx4 Four_2ToThe24s; // (1<<24)..
-extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2)
-extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1
-#else
-#define Four_Zeros XMVectorZero() // 0 0 0 0
-#define Four_Ones XMVectorSplatOne() // 1 1 1 1
-extern const fltx4 Four_Twos; // 2 2 2 2
-extern const fltx4 Four_Threes; // 3 3 3 3
-extern const fltx4 Four_Fours; // guess.
-extern const fltx4 Four_Point225s; // .225 .225 .225 .225
-extern const fltx4 Four_PointFives; // .5 .5 .5 .5
-extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON
-extern const fltx4 Four_2ToThe21s; // (1<<21)..
-extern const fltx4 Four_2ToThe22s; // (1<<22)..
-extern const fltx4 Four_2ToThe23s; // (1<<23)..
-extern const fltx4 Four_2ToThe24s; // (1<<24)..
-extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2)
-extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1
-#endif
-extern const fltx4 Four_FLT_MAX; // FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX
-extern const fltx4 Four_Negative_FLT_MAX; // -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX
-extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float
-
-// external aligned integer constants
-extern const ALIGN16 int32 g_SIMD_clear_signmask[] ALIGN16_POST; // 0x7fffffff x 4
-extern const ALIGN16 int32 g_SIMD_signmask[] ALIGN16_POST; // 0x80000000 x 4
-extern const ALIGN16 int32 g_SIMD_lsbmask[] ALIGN16_POST; // 0xfffffffe x 4
-extern const ALIGN16 int32 g_SIMD_clear_wmask[] ALIGN16_POST; // -1 -1 -1 0
-extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4] ALIGN16_POST; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF]
-extern const ALIGN16 int32 g_SIMD_AllOnesMask[] ALIGN16_POST; // ~0,~0,~0,~0
-extern const ALIGN16 int32 g_SIMD_Low16BitsMask[] ALIGN16_POST; // 0xffff x 4
-
-// this mask is used for skipping the tail of things. If you have N elements in an array, and wish
-// to mask out the tail, g_SIMD_SkipTailMask[N & 3] what you want to use for the last iteration.
-extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST;
-
-// Define prefetch macros.
-// The characteristics of cache and prefetch are completely
-// different between the different platforms, so you DO NOT
-// want to just define one macro that maps to every platform
-// intrinsic under the hood -- you need to prefetch at different
-// intervals between x86 and PPC, for example, and that is
-// a higher level code change.
-// On the other hand, I'm tired of typing #ifdef _X360
-// all over the place, so this is just a nop on Intel, PS3.
-#ifdef _X360
-#define PREFETCH360(address, offset) __dcbt(offset,address)
-#else
-#define PREFETCH360(x,y) // nothing
-#endif
-
-#if USE_STDC_FOR_SIMD
-
-//---------------------------------------------------------------------
-// Standard C (fallback/Linux) implementation (only there for compat - slow)
-//---------------------------------------------------------------------
-
-FORCEINLINE float SubFloat( const fltx4 & a, int idx )
-{
- return a.m128_f32[ idx ];
-}
-
-FORCEINLINE float & SubFloat( fltx4 & a, int idx )
-{
- return a.m128_f32[idx];
-}
-
-FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
-{
- return a.m128_u32[idx];
-}
-
-FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
-{
- return a.m128_u32[idx];
-}
-
-// Return one in the fastest way -- on the x360, faster even than loading.
-FORCEINLINE fltx4 LoadZeroSIMD( void )
-{
- return Four_Zeros;
-}
-
-// Return one in the fastest way -- on the x360, faster even than loading.
-FORCEINLINE fltx4 LoadOneSIMD( void )
-{
- return Four_Ones;
-}
-
-FORCEINLINE fltx4 SplatXSIMD( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 0 );
- SubFloat( retVal, 1 ) = SubFloat( a, 0 );
- SubFloat( retVal, 2 ) = SubFloat( a, 0 );
- SubFloat( retVal, 3 ) = SubFloat( a, 0 );
- return retVal;
-}
-
-FORCEINLINE fltx4 SplatYSIMD( fltx4 a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 1 );
- SubFloat( retVal, 1 ) = SubFloat( a, 1 );
- SubFloat( retVal, 2 ) = SubFloat( a, 1 );
- SubFloat( retVal, 3 ) = SubFloat( a, 1 );
- return retVal;
-}
-
-FORCEINLINE fltx4 SplatZSIMD( fltx4 a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 2 );
- SubFloat( retVal, 1 ) = SubFloat( a, 2 );
- SubFloat( retVal, 2 ) = SubFloat( a, 2 );
- SubFloat( retVal, 3 ) = SubFloat( a, 2 );
- return retVal;
-}
-
-FORCEINLINE fltx4 SplatWSIMD( fltx4 a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 3 );
- SubFloat( retVal, 1 ) = SubFloat( a, 3 );
- SubFloat( retVal, 2 ) = SubFloat( a, 3 );
- SubFloat( retVal, 3 ) = SubFloat( a, 3 );
- return retVal;
-}
-
-FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
-{
- fltx4 result = a;
- SubFloat( result, 0 ) = SubFloat( x, 0 );
- return result;
-}
-
-FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
-{
- fltx4 result = a;
- SubFloat( result, 1 ) = SubFloat( y, 1 );
- return result;
-}
-
-FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
-{
- fltx4 result = a;
- SubFloat( result, 2 ) = SubFloat( z, 2 );
- return result;
-}
-
-FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
-{
- fltx4 result = a;
- SubFloat( result, 3 ) = SubFloat( w, 3 );
- return result;
-}
-
-FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
-{
- fltx4 result = a;
- SubFloat( result, nComponent ) = flValue;
- return result;
-}
-
-// a b c d -> b c d a
-FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 1 );
- SubFloat( retVal, 1 ) = SubFloat( a, 2 );
- SubFloat( retVal, 2 ) = SubFloat( a, 3 );
- SubFloat( retVal, 3 ) = SubFloat( a, 0 );
- return retVal;
-}
-
-// a b c d -> c d a b
-FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = SubFloat( a, 2 );
- SubFloat( retVal, 1 ) = SubFloat( a, 3 );
- SubFloat( retVal, 2 ) = SubFloat( a, 0 );
- SubFloat( retVal, 3 ) = SubFloat( a, 1 );
- return retVal;
-}
-
-#define BINOP(op) \
- fltx4 retVal; \
- SubFloat( retVal, 0 ) = ( SubFloat( a, 0 ) op SubFloat( b, 0 ) ); \
- SubFloat( retVal, 1 ) = ( SubFloat( a, 1 ) op SubFloat( b, 1 ) ); \
- SubFloat( retVal, 2 ) = ( SubFloat( a, 2 ) op SubFloat( b, 2 ) ); \
- SubFloat( retVal, 3 ) = ( SubFloat( a, 3 ) op SubFloat( b, 3 ) ); \
- return retVal;
-
-#define IBINOP(op) \
- fltx4 retVal; \
- SubInt( retVal, 0 ) = ( SubInt( a, 0 ) op SubInt ( b, 0 ) ); \
- SubInt( retVal, 1 ) = ( SubInt( a, 1 ) op SubInt ( b, 1 ) ); \
- SubInt( retVal, 2 ) = ( SubInt( a, 2 ) op SubInt ( b, 2 ) ); \
- SubInt( retVal, 3 ) = ( SubInt( a, 3 ) op SubInt ( b, 3 ) ); \
- return retVal;
-
-FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b )
-{
- BINOP(+);
-}
-
-FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
-{
- BINOP(-);
-};
-
-FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
-{
- BINOP(*);
-}
-
-FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
-{
- BINOP(/);
-}
-
-
-FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
-{
- return AddSIMD( MulSIMD(a,b), c );
-}
-
-FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
-{
- return SubSIMD( c, MulSIMD(a,b) );
-};
-
-
-FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
-{
- fltx4 result;
- SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) );
- SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) );
- SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) );
- SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) );
- return result;
-}
-
-FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
- SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
- SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
-}
-
-FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
- SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
- SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
- SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) );
-}
-
-FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
-{
- fltx4 result;
- SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) );
- SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) );
- SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) );
- SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) );
- return result;
-}
-
-FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
-{
- fltx4 result;
- SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) );
- SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) );
- SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) );
- SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) );
- return result;
-}
-
-// tan^1(a/b) .. ie, pass sin in as a and cos in as b
-FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
-{
- fltx4 result;
- SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) );
- SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) );
- SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) );
- SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) );
- return result;
-}
-
-FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = max( SubFloat( a, 0 ), SubFloat( b, 0 ) );
- SubFloat( retVal, 1 ) = max( SubFloat( a, 1 ), SubFloat( b, 1 ) );
- SubFloat( retVal, 2 ) = max( SubFloat( a, 2 ), SubFloat( b, 2 ) );
- SubFloat( retVal, 3 ) = max( SubFloat( a, 3 ), SubFloat( b, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = min( SubFloat( a, 0 ), SubFloat( b, 0 ) );
- SubFloat( retVal, 1 ) = min( SubFloat( a, 1 ), SubFloat( b, 1 ) );
- SubFloat( retVal, 2 ) = min( SubFloat( a, 2 ), SubFloat( b, 2 ) );
- SubFloat( retVal, 3 ) = min( SubFloat( a, 3 ), SubFloat( b, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
-{
- IBINOP(&);
-}
-
-FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ~SubInt( a, 0 ) & SubInt( b, 0 );
- SubInt( retVal, 1 ) = ~SubInt( a, 1 ) & SubInt( b, 1 );
- SubInt( retVal, 2 ) = ~SubInt( a, 2 ) & SubInt( b, 2 );
- SubInt( retVal, 3 ) = ~SubInt( a, 3 ) & SubInt( b, 3 );
- return retVal;
-}
-
-FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
-{
- IBINOP(^);
-}
-
-FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
-{
- IBINOP(|);
-}
-
-FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = -SubFloat( a, 0 );
- SubFloat( retval, 1 ) = -SubFloat( a, 1 );
- SubFloat( retval, 2 ) = -SubFloat( a, 2 );
- SubFloat( retval, 3 ) = -SubFloat( a, 3 );
-
- return retval;
-}
-
-FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero?
-{
- return ( SubFloat( a, 0 ) == 0.0 ) &&
- ( SubFloat( a, 1 ) == 0.0 ) &&
- ( SubFloat( a, 2 ) == 0.0 ) &&
- ( SubFloat( a, 3 ) == 0.0 ) ;
-}
-
-
-// for branching when a.xyzw > b.xyzw
-FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
-{
- return SubFloat(a,0) > SubFloat(b,0) &&
- SubFloat(a,1) > SubFloat(b,1) &&
- SubFloat(a,2) > SubFloat(b,2) &&
- SubFloat(a,3) > SubFloat(b,3);
-}
-
-// for branching when a.xyzw >= b.xyzw
-FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
-{
- return SubFloat(a,0) >= SubFloat(b,0) &&
- SubFloat(a,1) >= SubFloat(b,1) &&
- SubFloat(a,2) >= SubFloat(b,2) &&
- SubFloat(a,3) >= SubFloat(b,3);
-}
-
-// For branching if all a.xyzw == b.xyzw
-FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
-{
- return SubFloat(a,0) == SubFloat(b,0) &&
- SubFloat(a,1) == SubFloat(b,1) &&
- SubFloat(a,2) == SubFloat(b,2) &&
- SubFloat(a,3) == SubFloat(b,3);
-}
-
-FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
-{
- int nRet = 0;
-
- nRet |= ( SubInt( a, 0 ) & 0x80000000 ) >> 31; // sign(x) -> bit 0
- nRet |= ( SubInt( a, 1 ) & 0x80000000 ) >> 30; // sign(y) -> bit 1
- nRet |= ( SubInt( a, 2 ) & 0x80000000 ) >> 29; // sign(z) -> bit 2
- nRet |= ( SubInt( a, 3 ) & 0x80000000 ) >> 28; // sign(w) -> bit 3
-
- return nRet;
-}
-
-FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
-{
- return (0 != TestSignSIMD( a ));
-}
-
-FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) == SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) == SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) == SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) == SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) > SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) > SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) > SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) > SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) >= SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) >= SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) >= SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) >= SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) < SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) < SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) < SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) < SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 )) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 )) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 )) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 )) ? ~0 : 0;
- return retVal;
-}
-
-FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 ) && SubFloat( a, 0 ) >= -SubFloat( b, 0 ) ) ? ~0 : 0;
- SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 ) && SubFloat( a, 1 ) >= -SubFloat( b, 1 ) ) ? ~0 : 0;
- SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 ) && SubFloat( a, 2 ) >= -SubFloat( b, 2 ) ) ? ~0 : 0;
- SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 ) && SubFloat( a, 3 ) >= -SubFloat( b, 3 ) ) ? ~0 : 0;
- return retVal;
-}
-
-
-FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
-{
- return OrSIMD(
- AndSIMD( ReplacementMask, NewValue ),
- AndNotSIMD( ReplacementMask, OldValue ) );
-}
-
-FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = flValue;
- SubFloat( retVal, 1 ) = flValue;
- SubFloat( retVal, 2 ) = flValue;
- SubFloat( retVal, 3 ) = flValue;
- return retVal;
-}
-
-/// replicate a single 32 bit integer value to all 4 components of an m128
-FORCEINLINE fltx4 ReplicateIX4( int nValue )
-{
- fltx4 retVal;
- SubInt( retVal, 0 ) = nValue;
- SubInt( retVal, 1 ) = nValue;
- SubInt( retVal, 2 ) = nValue;
- SubInt( retVal, 3 ) = nValue;
- return retVal;
-
-}
-
-// Round towards positive infinity
-FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) );
- return retVal;
-
-}
-
-// Round towards negative infinity
-FORCEINLINE fltx4 FloorSIMD( const fltx4 &a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = floor( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = floor( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = floor( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = floor( SubFloat( a, 3 ) );
- return retVal;
-
-}
-
-FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) != 0.0f ? SubFloat( a, 0 ) : FLT_EPSILON );
- SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) != 0.0f ? SubFloat( a, 1 ) : FLT_EPSILON );
- SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) != 0.0f ? SubFloat( a, 2 ) : FLT_EPSILON );
- SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) != 0.0f ? SubFloat( a, 3 ) : FLT_EPSILON );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 );
- SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 );
- SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 );
- SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 );
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 );
- SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 );
- SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 );
- SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 );
- return retVal;
-}
-
-/// 1/x for all 4 values.
-/// 1/0 will result in a big but NOT infinite result
-FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 ));
- SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 ));
- SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 ));
- SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 ));
- return retVal;
-}
-
-FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 ));
- SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 ));
- SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 ));
- SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 ));
- return retVal;
-}
-
-// 2^x for all values (the antilog)
-FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = powf( 2, SubFloat(toPower, 0) );
- SubFloat( retVal, 1 ) = powf( 2, SubFloat(toPower, 1) );
- SubFloat( retVal, 2 ) = powf( 2, SubFloat(toPower, 2) );
- SubFloat( retVal, 3 ) = powf( 2, SubFloat(toPower, 3) );
-
- return retVal;
-}
-
-FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
-{
- float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) +
- SubFloat( a, 1 ) * SubFloat( b, 1 ) +
- SubFloat( a, 2 ) * SubFloat( b, 2 );
- return ReplicateX4( flDot );
-}
-
-FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
-{
- float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) +
- SubFloat( a, 1 ) * SubFloat( b, 1 ) +
- SubFloat( a, 2 ) * SubFloat( b, 2 ) +
- SubFloat( a, 3 ) * SubFloat( b, 3 );
- return ReplicateX4( flDot );
-}
-
-// Clamps the components of a vector to a specified minimum and maximum range.
-FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
-{
- return MaxSIMD( min, MinSIMD( max, in ) );
-}
-
-// Squelch the w component of a vector to +0.0.
-// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
-FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
-{
- fltx4 retval;
- retval = a;
- SubFloat( retval, 0 ) = 0;
- return retval;
-}
-
-FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
-{
- return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
-}
-
-FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
-{
- return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
-{
- return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
-}
-
-// for the transitional class -- load a 3-by VectorAligned and squash its w component
-FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
-{
- fltx4 retval = LoadAlignedSIMD(pSIMD.Base());
- // squelch w
- SubInt( retval, 3 ) = 0;
- return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
-{
- *pSIMD = SubFloat(a, 0);
- *(pSIMD+1) = SubFloat(a, 1);
- *(pSIMD+2) = SubFloat(a, 2);
-}
-
-// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD
-FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- StoreAlignedSIMD(pSIMD->Base(),a);
-}
-
-FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w )
-{
-#define SWAP_FLOATS( _a_, _ia_, _b_, _ib_ ) { float tmp = SubFloat( _a_, _ia_ ); SubFloat( _a_, _ia_ ) = SubFloat( _b_, _ib_ ); SubFloat( _b_, _ib_ ) = tmp; }
- SWAP_FLOATS( x, 1, y, 0 );
- SWAP_FLOATS( x, 2, z, 0 );
- SWAP_FLOATS( x, 3, w, 0 );
- SWAP_FLOATS( y, 2, z, 1 );
- SWAP_FLOATS( y, 3, w, 1 );
- SWAP_FLOATS( z, 3, w, 2 );
-}
-
-// find the lowest component of a.x, a.y, a.z,
-// and replicate it to the whole return value.
-FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
-{
- float lowest = min( min( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
- return ReplicateX4(lowest);
-}
-
-// find the highest component of a.x, a.y, a.z,
-// and replicate it to the whole return value.
-FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
-{
- float highest = max( max( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2));
- return ReplicateX4(highest);
-}
-
-// Fixed-point conversion and save as SIGNED INTS.
-// pDest->x = Int (vSrc.x)
-// note: some architectures have means of doing
-// fixed point conversion when the fix depth is
-// specified as an immediate.. but there is no way
-// to guarantee an immediate as a parameter to function
-// like this.
-FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
-{
- (*pDest)[0] = SubFloat(vSrc, 0);
- (*pDest)[1] = SubFloat(vSrc, 1);
- (*pDest)[2] = SubFloat(vSrc, 2);
- (*pDest)[3] = SubFloat(vSrc, 3);
-}
-
-// ------------------------------------
-// INTEGER SIMD OPERATIONS.
-// ------------------------------------
-// splat all components of a vector to a signed immediate int number.
-FORCEINLINE fltx4 IntSetImmediateSIMD( int nValue )
-{
- fltx4 retval;
- SubInt( retval, 0 ) = SubInt( retval, 1 ) = SubInt( retval, 2 ) = SubInt( retval, 3) = nValue;
- return retval;
-}
-
-// Load 4 aligned words into a SIMD register
-FORCEINLINE i32x4 LoadAlignedIntSIMD(const void * RESTRICT pSIMD)
-{
- return *( reinterpret_cast< const i32x4 *> ( pSIMD ) );
-}
-
-// Load 4 unaligned words into a SIMD register
-FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD)
-{
- return *( reinterpret_cast< const i32x4 *> ( pSIMD ) );
-}
-
-// save into four words, 16-byte aligned
-FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a;
-}
-
-FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
-}
-
-// Take a fltx4 containing fixed-point uints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA )
-{
- Assert(0); /* pc has no such operation */
- fltx4 retval;
- SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) );
- SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) );
- SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) );
- SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) );
- return retval;
-}
-
-
-#if 0 /* pc has no such op */
-// Take a fltx4 containing fixed-point sints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[0])) );
- SubFloat( retval, 1 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[1])) );
- SubFloat( retval, 2 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[2])) );
- SubFloat( retval, 3 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[3])) );
- return retval;
-}
-
-
-/*
- works on fltx4's as if they are four uints.
- the first parameter contains the words to be shifted,
- the second contains the amount to shift by AS INTS
-
- for i = 0 to 3
- shift = vSrcB_i*32:(i*32)+4
- vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
-*/
-FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB)
-{
- i32x4 retval;
- SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0);
- SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1);
- SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2);
- SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3);
-
-
- return retval;
-}
-#endif
-
-#elif ( defined( _X360 ) )
-
-//---------------------------------------------------------------------
-// X360 implementation
-//---------------------------------------------------------------------
-
-FORCEINLINE float & FloatSIMD( fltx4 & a, int idx )
-{
- fltx4_union & a_union = (fltx4_union &)a;
- return a_union.m128_f32[idx];
-}
-
-FORCEINLINE unsigned int & UIntSIMD( fltx4 & a, int idx )
-{
- fltx4_union & a_union = (fltx4_union &)a;
- return a_union.m128_u32[idx];
-}
-
-FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b )
-{
- return __vaddfp( a, b );
-}
-
-FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
-{
- return __vsubfp( a, b );
-}
-
-FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
-{
- return __vmulfp( a, b );
-}
-
-FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
-{
- return __vmaddfp( a, b, c );
-}
-
-FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
-{
- return __vnmsubfp( a, b, c );
-};
-
-FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
-{
- return __vmsum3fp( a, b );
-}
-
-FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
-{
- return __vmsum4fp( a, b );
-}
-
-FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
-{
- return XMVectorSin( radians );
-}
-
-FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- XMVectorSinCos( &sine, &cosine, radians );
-}
-
-FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- XMVectorSinCos( &sine, &cosine, radians );
-}
-
-FORCEINLINE void CosSIMD( fltx4 &cosine, const fltx4 &radians )
-{
- cosine = XMVectorCos( radians );
-}
-
-FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
-{
- return XMVectorASin( sine );
-}
-
-FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
-{
- return XMVectorACos( cs );
-}
-
-// tan^1(a/b) .. ie, pass sin in as a and cos in as b
-FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
-{
- return XMVectorATan2( a, b );
-}
-
-// DivSIMD defined further down, since it uses ReciprocalSIMD
-
-FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
-{
- return __vmaxfp( a, b );
-}
-
-FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
-{
- return __vminfp( a, b );
-}
-
-FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
-{
- return __vand( a, b );
-}
-
-FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
-{
- // NOTE: a and b are swapped in the call: SSE complements the first argument, VMX the second
- return __vandc( b, a );
-}
-
-FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
-{
- return __vxor( a, b );
-}
-
-FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
-{
- return __vor( a, b );
-}
-
-FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
-{
- return XMVectorNegate(a);
-}
-
-FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero?
-{
- unsigned int equalFlags = 0;
- __vcmpeqfpR( a, Four_Zeros, &equalFlags );
- return XMComparisonAllTrue( equalFlags );
-}
-
-FORCEINLINE bool IsAnyZeros( const fltx4 & a ) // any floats are zero?
-{
- unsigned int conditionregister;
- XMVectorEqualR(&conditionregister, a, XMVectorZero());
- return XMComparisonAnyTrue(conditionregister);
-}
-
-FORCEINLINE bool IsAnyXYZZero( const fltx4 &a ) // are any of x,y,z zero?
-{
- // copy a's x component into w, in case w was zero.
- fltx4 temp = __vrlimi(a, a, 1, 1);
- unsigned int conditionregister;
- XMVectorEqualR(&conditionregister, temp, XMVectorZero());
- return XMComparisonAnyTrue(conditionregister);
-}
-
-// for branching when a.xyzw > b.xyzw
-FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
-{
- unsigned int cr;
- XMVectorGreaterR(&cr,a,b);
- return XMComparisonAllTrue(cr);
-}
-
-// for branching when a.xyzw >= b.xyzw
-FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
-{
- unsigned int cr;
- XMVectorGreaterOrEqualR(&cr,a,b);
- return XMComparisonAllTrue(cr);
-}
-
-// For branching if all a.xyzw == b.xyzw
-FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
-{
- unsigned int cr;
- XMVectorEqualR(&cr,a,b);
- return XMComparisonAllTrue(cr);
-}
-
-
-FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
-{
- // NOTE: this maps to SSE way better than it does to VMX (most code uses IsAnyNegative(), though)
- int nRet = 0;
-
- const fltx4_union & a_union = (const fltx4_union &)a;
- nRet |= ( a_union.m128_u32[0] & 0x80000000 ) >> 31; // sign(x) -> bit 0
- nRet |= ( a_union.m128_u32[1] & 0x80000000 ) >> 30; // sign(y) -> bit 1
- nRet |= ( a_union.m128_u32[2] & 0x80000000 ) >> 29; // sign(z) -> bit 2
- nRet |= ( a_union.m128_u32[3] & 0x80000000 ) >> 28; // sign(w) -> bit 3
-
- return nRet;
-}
-
-// Squelch the w component of a vector to +0.0.
-// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
-FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
-{
- return __vrlimi( a, __vzero(), 1, 0 );
-}
-
-FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
-{
- // NOTE: this tests the top bits of each vector element using integer math
- // (so it ignores NaNs - it will return true for "-NaN")
- unsigned int equalFlags = 0;
- fltx4 signMask = __vspltisw( -1 ); // 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF (low order 5 bits of each element = 31)
- signMask = __vslw( signMask, signMask ); // 0x80000000 0x80000000 0x80000000 0x80000000
- __vcmpequwR( Four_Zeros, __vand( signMask, a ), &equalFlags );
- return !XMComparisonAllTrue( equalFlags );
-}
-
-FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
-{
- return __vcmpeqfp( a, b );
-}
-
-
-FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
-{
- return __vcmpgtfp( a, b );
-}
-
-FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
-{
- return __vcmpgefp( a, b );
-}
-
-FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
-{
- return __vcmpgtfp( b, a );
-}
-
-FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
-{
- return __vcmpgefp( b, a );
-}
-
-FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
-{
- return XMVectorInBounds( a, b );
-}
-
-// returned[i] = ReplacementMask[i] == 0 ? OldValue : NewValue
-FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
-{
- return __vsel( OldValue, NewValue, ReplacementMask );
-}
-
-// AKA "Broadcast", "Splat"
-FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a
-{
- // NOTE: if flValue comes from a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
- float * pValue = &flValue;
- Assert( pValue );
- Assert( ((unsigned int)pValue & 3) == 0);
- return __vspltw( __lvlx( pValue, 0 ), 0 );
-}
-
-FORCEINLINE fltx4 ReplicateX4( const float *pValue ) // a,a,a,a
-{
- Assert( pValue );
- return __vspltw( __lvlx( pValue, 0 ), 0 );
-}
-
-/// replicate a single 32 bit integer value to all 4 components of an m128
-FORCEINLINE fltx4 ReplicateIX4( int nValue )
-{
- // NOTE: if nValue comes from a register, this causes a Load-Hit-Store stall (should not mix ints with fltx4s!)
- int * pValue = &nValue;
- Assert( pValue );
- Assert( ((unsigned int)pValue & 3) == 0);
- return __vspltw( __lvlx( pValue, 0 ), 0 );
-}
-
-// Round towards positive infinity
-FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
-{
- return __vrfip(a);
-}
-
-// Round towards nearest integer
-FORCEINLINE fltx4 RoundSIMD( const fltx4 &a )
-{
- return __vrfin(a);
-}
-
-// Round towards negative infinity
-FORCEINLINE fltx4 FloorSIMD( const fltx4 &a )
-{
- return __vrfim(a);
-}
-
-FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
-{
- // This is emulated from rsqrt
- return XMVectorSqrtEst( a );
-}
-
-FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
-{
- // This is emulated from rsqrt
- return XMVectorSqrt( a );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
-{
- return __vrsqrtefp( a );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
-{
- // Convert zeros to epsilons
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- return ReciprocalSqrtEstSIMD( a_safe );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
-{
- // This uses Newton-Raphson to improve the HW result
- return XMVectorReciprocalSqrt( a );
-}
-
-FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
-{
- return __vrefp( a );
-}
-
-/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration.
-/// No error checking!
-FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
-{
- // This uses Newton-Raphson to improve the HW result
- return XMVectorReciprocal( a );
-}
-
-// FIXME: on 360, this is very slow, since it uses ReciprocalSIMD (do we need DivEstSIMD?)
-FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
-{
- return MulSIMD( ReciprocalSIMD( b ), a );
-}
-
-/// 1/x for all 4 values.
-/// 1/0 will result in a big but NOT infinite result
-FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
-{
- // Convert zeros to epsilons
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- return ReciprocalEstSIMD( a_safe );
-}
-
-FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
-{
- // Convert zeros to epsilons
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- return ReciprocalSIMD( a_safe );
-
- // FIXME: This could be faster (BUT: it doesn't preserve the sign of -0.0, whereas the above does)
- // fltx4 zeroMask = CmpEqSIMD( Four_Zeros, a );
- // fltx4 a_safe = XMVectorSelect( a, Four_Epsilons, zeroMask );
- // return ReciprocalSIMD( a_safe );
-}
-
-// CHRISG: is it worth doing integer bitfiddling for this?
-// 2^x for all values (the antilog)
-FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
-{
- return XMVectorExp(toPower);
-}
-
-// Clamps the components of a vector to a specified minimum and maximum range.
-FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
-{
- return XMVectorClamp(in, min, max);
-}
-
-FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
-{
- return XMLoadVector4( pSIMD );
-}
-
-// load a 3-vector (as opposed to LoadUnalignedSIMD, which loads a 4-vec).
-FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
-{
- return XMLoadVector3( pSIMD );
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
-{
- return *( reinterpret_cast< const fltx4 *> ( pSIMD ) );
-}
-
-// for the transitional class -- load a 3-by VectorAligned and squash its w component
-FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
-{
- fltx4 out = XMLoadVector3A(pSIMD.Base());
- // squelch w
- return __vrlimi( out, __vzero(), 1, 0 );
-}
-
-// for the transitional class -- load a 3-by VectorAligned and squash its w component
-FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned * RESTRICT pSIMD )
-{
- fltx4 out = XMLoadVector3A(pSIMD);
- // squelch w
- return __vrlimi( out, __vzero(), 1, 0 );
-}
-
-FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a )
-{
- XMStoreVector4( pSIMD, a );
-}
-
-FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
-{
- XMStoreVector3( pSIMD, a );
-}
-
-
-// strongly typed -- for typechecking as we transition to SIMD
-FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- XMStoreVector3A(pSIMD->Base(),a);
-}
-
-
-// Fixed-point conversion and save as SIGNED INTS.
-// pDest->x = Int (vSrc.x)
-// note: some architectures have means of doing
-// fixed point conversion when the fix depth is
-// specified as an immediate.. but there is no way
-// to guarantee an immediate as a parameter to function
-// like this.
-FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
-{
- fltx4 asInt = __vctsxs( vSrc, 0 );
- XMStoreVector4A(pDest->Base(), asInt);
-}
-
-FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w )
-{
- XMMATRIX xyzwMatrix = _XMMATRIX( x, y, z, w );
- xyzwMatrix = XMMatrixTranspose( xyzwMatrix );
- x = xyzwMatrix.r[0];
- y = xyzwMatrix.r[1];
- z = xyzwMatrix.r[2];
- w = xyzwMatrix.r[3];
-}
-
-// Return one in the fastest way -- faster even than loading.
-FORCEINLINE fltx4 LoadZeroSIMD( void )
-{
- return XMVectorZero();
-}
-
-// Return one in the fastest way -- faster even than loading.
-FORCEINLINE fltx4 LoadOneSIMD( void )
-{
- return XMVectorSplatOne();
-}
-
-FORCEINLINE fltx4 SplatXSIMD( fltx4 a )
-{
- return XMVectorSplatX( a );
-}
-
-FORCEINLINE fltx4 SplatYSIMD( fltx4 a )
-{
- return XMVectorSplatY( a );
-}
-
-FORCEINLINE fltx4 SplatZSIMD( fltx4 a )
-{
- return XMVectorSplatZ( a );
-}
-
-FORCEINLINE fltx4 SplatWSIMD( fltx4 a )
-{
- return XMVectorSplatW( a );
-}
-
-FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
-{
- fltx4 result = __vrlimi(a, x, 8, 0);
- return result;
-}
-
-FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
-{
- fltx4 result = __vrlimi(a, y, 4, 0);
- return result;
-}
-
-FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
-{
- fltx4 result = __vrlimi(a, z, 2, 0);
- return result;
-}
-
-FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
-{
- fltx4 result = __vrlimi(a, w, 1, 0);
- return result;
-}
-
-FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
-{
- static int s_nVrlimiMask[4] = { 8, 4, 2, 1 };
- fltx4 val = ReplicateX4( flValue );
- fltx4 result = __vrlimi(a, val, s_nVrlimiMask[nComponent], 0);
- return result;
-}
-
-FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
-{
- fltx4 compareOne = a;
- return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 1 );
-}
-
-FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
-{
- fltx4 compareOne = a;
- return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 2 );
-}
-
-
-
-// find the lowest component of a.x, a.y, a.z,
-// and replicate it to the whole return value.
-// ignores a.w.
-// Though this is only five instructions long,
-// they are all dependent, making this stall city.
-// Forcing this inline should hopefully help with scheduling.
-FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a )
-{
- // a is [x,y,z,G] (where G is garbage)
- // rotate left by one
- fltx4 compareOne = a ;
- compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
- // compareOne is [y,z,G,G]
- fltx4 retval = MinSIMD( a, compareOne );
- // retVal is [min(x,y), min(y,z), G, G]
- compareOne = __vrlimi( compareOne, a, 8 , 2);
- // compareOne is [z, G, G, G]
- retval = MinSIMD( retval, compareOne );
- // retVal = [ min(min(x,y),z), G, G, G ]
-
- // splat the x component out to the whole vector and return
- return SplatXSIMD( retval );
-}
-
-// find the highest component of a.x, a.y, a.z,
-// and replicate it to the whole return value.
-// ignores a.w.
-// Though this is only five instructions long,
-// they are all dependent, making this stall city.
-// Forcing this inline should hopefully help with scheduling.
-FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a )
-{
- // a is [x,y,z,G] (where G is garbage)
- // rotate left by one
- fltx4 compareOne = a ;
- compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 );
- // compareOne is [y,z,G,G]
- fltx4 retval = MaxSIMD( a, compareOne );
- // retVal is [max(x,y), max(y,z), G, G]
- compareOne = __vrlimi( compareOne, a, 8 , 2);
- // compareOne is [z, G, G, G]
- retval = MaxSIMD( retval, compareOne );
- // retVal = [ max(max(x,y),z), G, G, G ]
-
- // splat the x component out to the whole vector and return
- return SplatXSIMD( retval );
-}
-
-
-// Transform many (horizontal) points in-place by a 3x4 matrix,
-// here already loaded onto three fltx4 registers.
-// The points must be stored as 16-byte aligned. They are points
-// and not vectors because we assume the w-component to be 1.
-// To spare yourself the annoyance of loading the matrix yourself,
-// use one of the overloads below.
-void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, FLTX4 mRow1, FLTX4 mRow2, FLTX4 mRow3);
-
-// Transform many (horizontal) points in-place by a 3x4 matrix.
-// The points must be stored as 16-byte aligned. They are points
-// and not vectors because we assume the w-component to be 1.
-// In this function, the matrix need not be aligned.
-FORCEINLINE void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix)
-{
- return TransformManyPointsBy(pVectors, numVectors,
- LoadUnalignedSIMD( pMatrix[0] ), LoadUnalignedSIMD( pMatrix[1] ), LoadUnalignedSIMD( pMatrix[2] ) );
-}
-
-// Transform many (horizontal) points in-place by a 3x4 matrix.
-// The points must be stored as 16-byte aligned. They are points
-// and not vectors because we assume the w-component to be 1.
-// In this function, the matrix must itself be aligned on a 16-byte
-// boundary.
-FORCEINLINE void TransformManyPointsByA(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix)
-{
- return TransformManyPointsBy(pVectors, numVectors,
- LoadAlignedSIMD( pMatrix[0] ), LoadAlignedSIMD( pMatrix[1] ), LoadAlignedSIMD( pMatrix[2] ) );
-}
-
-// ------------------------------------
-// INTEGER SIMD OPERATIONS.
-// ------------------------------------
-
-// Load 4 aligned words into a SIMD register
-FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD)
-{
- return XMLoadVector4A(pSIMD);
-}
-
-// Load 4 unaligned words into a SIMD register
-FORCEINLINE i32x4 LoadUnalignedIntSIMD(const void * RESTRICT pSIMD)
-{
- return XMLoadVector4( pSIMD );
-}
-
-// save into four words, 16-byte aligned
-FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a;
-}
-
-FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
-{
- *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a;
-}
-
-FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a )
-{
- XMStoreVector4(pSIMD, a);
-}
-
-
-// Take a fltx4 containing fixed-point uints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA )
-{
- return __vcfux( vSrcA, 0 );
-}
-
-
-// Take a fltx4 containing fixed-point sints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
-{
- return __vcfsx( vSrcA, 0 );
-}
-
-// Take a fltx4 containing fixed-point uints and
-// return them as single precision floats. Each uint
-// will be divided by 2^immed after conversion
-// (eg, this is fixed point math).
-/* as if:
- FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed )
- {
- return __vcfux( vSrcA, uImmed );
- }
-*/
-#define UnsignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfux( (vSrcA), (uImmed) ))
-
-// Take a fltx4 containing fixed-point sints and
-// return them as single precision floats. Each int
-// will be divided by 2^immed (eg, this is fixed point
-// math).
-/* as if:
- FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed )
- {
- return __vcfsx( vSrcA, uImmed );
- }
-*/
-#define SignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfsx( (vSrcA), (uImmed) ))
-
-// set all components of a vector to a signed immediate int number.
-/* as if:
- FORCEINLINE fltx4 IntSetImmediateSIMD(int toImmediate)
- {
- return __vspltisw( toImmediate );
- }
-*/
-#define IntSetImmediateSIMD(x) (__vspltisw(x))
-
-/*
- works on fltx4's as if they are four uints.
- the first parameter contains the words to be shifted,
- the second contains the amount to shift by AS INTS
-
- for i = 0 to 3
- shift = vSrcB_i*32:(i*32)+4
- vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
-*/
-FORCEINLINE fltx4 IntShiftLeftWordSIMD(fltx4 vSrcA, fltx4 vSrcB)
-{
- return __vslw(vSrcA, vSrcB);
-}
-
-FORCEINLINE float SubFloat( const fltx4 & a, int idx )
-{
- // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
- const fltx4_union & a_union = (const fltx4_union &)a;
- return a_union.m128_f32[ idx ];
-}
-
-FORCEINLINE float & SubFloat( fltx4 & a, int idx )
-{
- fltx4_union & a_union = (fltx4_union &)a;
- return a_union.m128_f32[idx];
-}
-
-FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
-{
- fltx4 t = __vctuxs( a, 0 );
- const fltx4_union & a_union = (const fltx4_union &)t;
- return a_union.m128_u32[idx];
-}
-
-
-FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
-{
- const fltx4_union & a_union = (const fltx4_union &)a;
- return a_union.m128_u32[idx];
-}
-
-FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
-{
- fltx4_union & a_union = (fltx4_union &)a;
- return a_union.m128_u32[idx];
-}
-
-#else
-
-//---------------------------------------------------------------------
-// Intel/SSE implementation
-//---------------------------------------------------------------------
-
-FORCEINLINE void StoreAlignedSIMD( float * RESTRICT pSIMD, const fltx4 & a )
-{
- _mm_store_ps( pSIMD, a );
-}
-
-FORCEINLINE void StoreUnalignedSIMD( float * RESTRICT pSIMD, const fltx4 & a )
-{
- _mm_storeu_ps( pSIMD, a );
-}
-
-
-FORCEINLINE fltx4 RotateLeft( const fltx4 & a );
-FORCEINLINE fltx4 RotateLeft2( const fltx4 & a );
-
-FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a )
-{
- _mm_store_ss(pSIMD, a);
- _mm_store_ss(pSIMD+1, RotateLeft(a));
- _mm_store_ss(pSIMD+2, RotateLeft2(a));
-}
-
-// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD
-FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- StoreAlignedSIMD( pSIMD->Base(),a );
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD )
-{
- return _mm_load_ps( reinterpret_cast< const float *> ( pSIMD ) );
-}
-
-FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b
-{
- return _mm_and_ps( a, b );
-}
-
-FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b
-{
- return _mm_andnot_ps( a, b );
-}
-
-FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b
-{
- return _mm_xor_ps( a, b );
-}
-
-FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b
-{
- return _mm_or_ps( a, b );
-}
-
-// Squelch the w component of a vector to +0.0.
-// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy)
-FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a )
-{
- return AndSIMD( a, LoadAlignedSIMD( g_SIMD_clear_wmask ) );
-}
-
-// for the transitional class -- load a 3-by VectorAligned and squash its w component
-FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD )
-{
- return SetWToZeroSIMD( LoadAlignedSIMD(pSIMD.Base()) );
-}
-
-FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD )
-{
- return _mm_loadu_ps( reinterpret_cast<const float *>( pSIMD ) );
-}
-
-FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD )
-{
- return _mm_loadu_ps( reinterpret_cast<const float *>( pSIMD ) );
-}
-
-/// replicate a single 32 bit integer value to all 4 components of an m128
-FORCEINLINE fltx4 ReplicateIX4( int i )
-{
- fltx4 value = _mm_set_ss( * ( ( float *) &i ) );;
- return _mm_shuffle_ps( value, value, 0);
-}
-
-
-FORCEINLINE fltx4 ReplicateX4( float flValue )
-{
- __m128 value = _mm_set_ss( flValue );
- return _mm_shuffle_ps( value, value, 0 );
-}
-
-
-FORCEINLINE float SubFloat( const fltx4 & a, int idx )
-{
- // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
-#ifndef POSIX
- return a.m128_f32[ idx ];
-#else
- return (reinterpret_cast<float const *>(&a))[idx];
-#endif
-}
-
-FORCEINLINE float & SubFloat( fltx4 & a, int idx )
-{
-#ifndef POSIX
- return a.m128_f32[ idx ];
-#else
- return (reinterpret_cast<float *>(&a))[idx];
-#endif
-}
-
-FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx )
-{
- return (uint32)SubFloat(a,idx);
-}
-
-FORCEINLINE uint32 SubInt( const fltx4 & a, int idx )
-{
-#ifndef POSIX
- return a.m128_u32[idx];
-#else
- return (reinterpret_cast<uint32 const *>(&a))[idx];
-#endif
-}
-
-FORCEINLINE uint32 & SubInt( fltx4 & a, int idx )
-{
-#ifndef POSIX
- return a.m128_u32[idx];
-#else
- return (reinterpret_cast<uint32 *>(&a))[idx];
-#endif
-}
-
-// Return one in the fastest way -- on the x360, faster even than loading.
-FORCEINLINE fltx4 LoadZeroSIMD( void )
-{
- return Four_Zeros;
-}
-
-// Return one in the fastest way -- on the x360, faster even than loading.
-FORCEINLINE fltx4 LoadOneSIMD( void )
-{
- return Four_Ones;
-}
-
-FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue )
-{
- return OrSIMD(
- AndSIMD( ReplacementMask, NewValue ),
- AndNotSIMD( ReplacementMask, OldValue ) );
-}
-
-// remember, the SSE numbers its words 3 2 1 0
-// The way we want to specify shuffles is backwards from the default
-// MM_SHUFFLE_REV is in array index order (default is reversed)
-#define MM_SHUFFLE_REV(a,b,c,d) _MM_SHUFFLE(d,c,b,a)
-
-FORCEINLINE fltx4 SplatXSIMD( fltx4 const & a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 0, 0, 0, 0 ) );
-}
-
-FORCEINLINE fltx4 SplatYSIMD( fltx4 const &a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 1, 1, 1 ) );
-}
-
-FORCEINLINE fltx4 SplatZSIMD( fltx4 const &a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 2, 2, 2 ) );
-}
-
-FORCEINLINE fltx4 SplatWSIMD( fltx4 const &a )
-{
- return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 3, 3, 3 ) );
-}
-
-FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x )
-{
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[0] ), x, a );
- return result;
-}
-
-FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y )
-{
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[1] ), y, a );
- return result;
-}
-
-FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z )
-{
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[2] ), z, a );
- return result;
-}
-
-FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w )
-{
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[3] ), w, a );
- return result;
-}
-
-FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue )
-{
- fltx4 val = ReplicateX4( flValue );
- fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[nComponent] ), val, a );
- return result;
-}
-
-// a b c d -> b c d a
-FORCEINLINE fltx4 RotateLeft( const fltx4 & a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 2, 3, 0 ) );
-}
-
-// a b c d -> c d a b
-FORCEINLINE fltx4 RotateLeft2( const fltx4 & a )
-{
- return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 3, 0, 1 ) );
-}
-
-// a b c d -> d a b c
-FORCEINLINE fltx4 RotateRight( const fltx4 & a )
-{
- return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 0, 3, 2, 1) );
-}
-
-// a b c d -> c d a b
-FORCEINLINE fltx4 RotateRight2( const fltx4 & a )
-{
- return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 0, 3, 2 ) );
-}
-
-
-FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) // a+b
-{
- return _mm_add_ps( a, b );
-};
-
-FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b
-{
- return _mm_sub_ps( a, b );
-};
-
-FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b
-{
- return _mm_mul_ps( a, b );
-};
-
-FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b
-{
- return _mm_div_ps( a, b );
-};
-
-FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c
-{
- return AddSIMD( MulSIMD(a,b), c );
-}
-
-FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b
-{
- return SubSIMD( c, MulSIMD(a,b) );
-};
-
-FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b )
-{
- fltx4 m = MulSIMD( a, b );
- float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 );
- return ReplicateX4( flDot );
-}
-
-FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b )
-{
- fltx4 m = MulSIMD( a, b );
- float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ) + SubFloat( m, 3 );
- return ReplicateX4( flDot );
-}
-
-//TODO: implement as four-way Taylor series (see xbox implementation)
-FORCEINLINE fltx4 SinSIMD( const fltx4 &radians )
-{
- fltx4 result;
- SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) );
- SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) );
- SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) );
- SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) );
- return result;
-}
-
-FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians )
-{
- // FIXME: Make a fast SSE version
- SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
- SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
- SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
-}
-
-FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) // a*b + c
-{
- // FIXME: Make a fast SSE version
- SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) );
- SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) );
- SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) );
- SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) );
-}
-
-//TODO: implement as four-way Taylor series (see xbox implementation)
-FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine )
-{
- // FIXME: Make a fast SSE version
- fltx4 result;
- SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) );
- SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) );
- SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) );
- SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) );
- return result;
-}
-
-FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs )
-{
- fltx4 result;
- SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) );
- SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) );
- SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) );
- SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) );
- return result;
-}
-
-// tan^1(a/b) .. ie, pass sin in as a and cos in as b
-FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b )
-{
- fltx4 result;
- SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) );
- SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) );
- SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) );
- SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) );
- return result;
-}
-
-FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a
-{
- return SubSIMD(LoadZeroSIMD(),a);
-}
-
-FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set
-{
- return _mm_movemask_ps( a );
-}
-
-FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0)
-{
- return (0 != TestSignSIMD( a ));
-}
-
-FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0
-{
- return _mm_cmpeq_ps( a, b );
-}
-
-FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0
-{
- return _mm_cmpgt_ps( a, b );
-}
-
-FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0
-{
- return _mm_cmpge_ps( a, b );
-}
-
-FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0
-{
- return _mm_cmplt_ps( a, b );
-}
-
-FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0
-{
- return _mm_cmple_ps( a, b );
-}
-
-// for branching when a.xyzw > b.xyzw
-FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b )
-{
- return TestSignSIMD( CmpLeSIMD( a, b ) ) == 0;
-}
-
-// for branching when a.xyzw >= b.xyzw
-FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b )
-{
- return TestSignSIMD( CmpLtSIMD( a, b ) ) == 0;
-}
-
-// For branching if all a.xyzw == b.xyzw
-FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b )
-{
- return TestSignSIMD( CmpEqSIMD( a, b ) ) == 0xf;
-}
-
-FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0
-{
- return AndSIMD( CmpLeSIMD(a,b), CmpGeSIMD(a, NegSIMD(b)) );
-}
-
-FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b)
-{
- return _mm_min_ps( a, b );
-}
-
-FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b)
-{
- return _mm_max_ps( a, b );
-}
-
-
-
-// SSE lacks rounding operations.
-// Really.
-// You can emulate them by setting the rounding mode for the
-// whole processor and then converting to int, and then back again.
-// But every time you set the rounding mode, you clear out the
-// entire pipeline. So, I can't do them per operation. You
-// have to do it once, before the loop that would call these.
-// Round towards positive infinity
-FORCEINLINE fltx4 CeilSIMD( const fltx4 &a )
-{
- fltx4 retVal;
- SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) );
- SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) );
- SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) );
- SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) );
- return retVal;
-
-}
-
-fltx4 fabs( const fltx4 & x );
-// Round towards negative infinity
-// This is the implementation that was here before; it assumes
-// you are in round-to-floor mode, which I guess is usually the
-// case for us vis-a-vis SSE. It's totally unnecessary on
-// VMX, which has a native floor op.
-FORCEINLINE fltx4 FloorSIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s );
- ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival );
- return XorSIMD( ival, XorSIMD( val, fl4Abs ) ); // restore sign bits
-}
-
-
-
-inline bool IsAllZeros( const fltx4 & var )
-{
- return TestSignSIMD( CmpEqSIMD( var, Four_Zeros ) ) == 0xF;
-}
-
-FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less
-{
- return _mm_sqrt_ps( a );
-}
-
-FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a)
-{
- return _mm_sqrt_ps( a );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less
-{
- return _mm_rsqrt_ps( a );
-}
-
-FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a )
-{
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- ret = ReciprocalSqrtEstSIMD( ret );
- return ret;
-}
-
-/// uses newton iteration for higher precision results than ReciprocalSqrtEstSIMD
-FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a)
-{
- fltx4 guess = ReciprocalSqrtEstSIMD( a );
- // newton iteration for 1/sqrt(a) : y(n+1) = 1/2 (y(n)*(3-a*y(n)^2));
- guess = MulSIMD( guess, SubSIMD( Four_Threes, MulSIMD( a, MulSIMD( guess, guess ))));
- guess = MulSIMD( Four_PointFives, guess);
- return guess;
-}
-
-FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less
-{
- return _mm_rcp_ps( a );
-}
-
-/// 1/x for all 4 values, more or less
-/// 1/0 will result in a big but NOT infinite result
-FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a )
-{
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- ret = ReciprocalEstSIMD( ret );
- return ret;
-}
-
-/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration.
-/// No error checking!
-FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a
-{
- fltx4 ret = ReciprocalEstSIMD( a );
- // newton iteration is: Y(n+1) = 2*Y(n)-a*Y(n)^2
- ret = SubSIMD( AddSIMD( ret, ret ), MulSIMD( a, MulSIMD( ret, ret ) ) );
- return ret;
-}
-
-/// 1/x for all 4 values.
-/// 1/0 will result in a big but NOT infinite result
-FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a )
-{
- fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros );
- fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) );
- ret = ReciprocalSIMD( ret );
- return ret;
-}
-
-// CHRISG: is it worth doing integer bitfiddling for this?
-// 2^x for all values (the antilog)
-FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower )
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = powf( 2, SubFloat(toPower, 0) );
- SubFloat( retval, 1 ) = powf( 2, SubFloat(toPower, 1) );
- SubFloat( retval, 2 ) = powf( 2, SubFloat(toPower, 2) );
- SubFloat( retval, 3 ) = powf( 2, SubFloat(toPower, 3) );
-
- return retval;
-}
-
-// Clamps the components of a vector to a specified minimum and maximum range.
-FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max)
-{
- return MaxSIMD( min, MinSIMD( max, in ) );
-}
-
-FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w)
-{
- _MM_TRANSPOSE4_PS( x, y, z, w );
-}
-
-FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 &a )
-{
- // a is [x,y,z,G] (where G is garbage)
- // rotate left by one
- fltx4 compareOne = RotateLeft( a );
- // compareOne is [y,z,G,x]
- fltx4 retval = MinSIMD( a, compareOne );
- // retVal is [min(x,y), ... ]
- compareOne = RotateLeft2( a );
- // compareOne is [z, G, x, y]
- retval = MinSIMD( retval, compareOne );
- // retVal = [ min(min(x,y),z)..]
- // splat the x component out to the whole vector and return
- return SplatXSIMD( retval );
-
-}
-
-FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 &a )
-{
- // a is [x,y,z,G] (where G is garbage)
- // rotate left by one
- fltx4 compareOne = RotateLeft( a );
- // compareOne is [y,z,G,x]
- fltx4 retval = MaxSIMD( a, compareOne );
- // retVal is [max(x,y), ... ]
- compareOne = RotateLeft2( a );
- // compareOne is [z, G, x, y]
- retval = MaxSIMD( retval, compareOne );
- // retVal = [ max(max(x,y),z)..]
- // splat the x component out to the whole vector and return
- return SplatXSIMD( retval );
-
-}
-
-// ------------------------------------
-// INTEGER SIMD OPERATIONS.
-// ------------------------------------
-
-
-#if 0 /* pc does not have these ops */
-// splat all components of a vector to a signed immediate int number.
-FORCEINLINE fltx4 IntSetImmediateSIMD(int to)
-{
- //CHRISG: SSE2 has this, but not SSE1. What to do?
- fltx4 retval;
- SubInt( retval, 0 ) = to;
- SubInt( retval, 1 ) = to;
- SubInt( retval, 2 ) = to;
- SubInt( retval, 3 ) = to;
- return retval;
-}
-#endif
-
-// Load 4 aligned words into a SIMD register
-FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD)
-{
- return _mm_load_ps( reinterpret_cast<const float *>(pSIMD) );
-}
-
-// Load 4 unaligned words into a SIMD register
-FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD)
-{
- return _mm_loadu_ps( reinterpret_cast<const float *>(pSIMD) );
-}
-
-// save into four words, 16-byte aligned
-FORCEINLINE void StoreAlignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a )
-{
- _mm_store_ps( reinterpret_cast<float *>(pSIMD), a );
-}
-
-FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a )
-{
- _mm_store_ps( reinterpret_cast<float *>(pSIMD.Base()), a );
-}
-
-FORCEINLINE void StoreUnalignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a )
-{
- _mm_storeu_ps( reinterpret_cast<float *>(pSIMD), a );
-}
-
-
-// CHRISG: the conversion functions all seem to operate on m64's only...
-// how do we make them work here?
-
-// Take a fltx4 containing fixed-point uints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA )
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) );
- SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) );
- SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) );
- SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) );
- return retval;
-}
-
-
-// Take a fltx4 containing fixed-point sints and
-// return them as single precision floats. No
-// fixed point conversion is done.
-FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA )
-{
- fltx4 retval;
- SubFloat( retval, 0 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[0]));
- SubFloat( retval, 1 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[1]));
- SubFloat( retval, 2 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[2]));
- SubFloat( retval, 3 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[3]));
- return retval;
-}
-
-/*
- works on fltx4's as if they are four uints.
- the first parameter contains the words to be shifted,
- the second contains the amount to shift by AS INTS
-
- for i = 0 to 3
- shift = vSrcB_i*32:(i*32)+4
- vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift
-*/
-FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB)
-{
- i32x4 retval;
- SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0);
- SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1);
- SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2);
- SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3);
-
-
- return retval;
-}
-
-
-// Fixed-point conversion and save as SIGNED INTS.
-// pDest->x = Int (vSrc.x)
-// note: some architectures have means of doing
-// fixed point conversion when the fix depth is
-// specified as an immediate.. but there is no way
-// to guarantee an immediate as a parameter to function
-// like this.
-FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc)
-{
- __m64 bottom = _mm_cvttps_pi32( vSrc );
- __m64 top = _mm_cvttps_pi32( _mm_movehl_ps(vSrc,vSrc) );
-
- *reinterpret_cast<__m64 *>(&(*pDest)[0]) = bottom;
- *reinterpret_cast<__m64 *>(&(*pDest)[2]) = top;
-
- _mm_empty();
-}
-
-
-
-#endif
-
-
-
-/// class FourVectors stores 4 independent vectors for use in SIMD processing. These vectors are
-/// stored in the format x x x x y y y y z z z z so that they can be efficiently SIMD-accelerated.
-class ALIGN16 FourVectors
-{
-public:
- fltx4 x, y, z;
-
- FORCEINLINE void DuplicateVector(Vector const &v) //< set all 4 vectors to the same vector value
- {
- x=ReplicateX4(v.x);
- y=ReplicateX4(v.y);
- z=ReplicateX4(v.z);
- }
-
- FORCEINLINE fltx4 const & operator[](int idx) const
- {
- return *((&x)+idx);
- }
-
- FORCEINLINE fltx4 & operator[](int idx)
- {
- return *((&x)+idx);
- }
-
- FORCEINLINE void operator+=(FourVectors const &b) //< add 4 vectors to another 4 vectors
- {
- x=AddSIMD(x,b.x);
- y=AddSIMD(y,b.y);
- z=AddSIMD(z,b.z);
- }
-
- FORCEINLINE void operator-=(FourVectors const &b) //< subtract 4 vectors from another 4
- {
- x=SubSIMD(x,b.x);
- y=SubSIMD(y,b.y);
- z=SubSIMD(z,b.z);
- }
-
- FORCEINLINE void operator*=(FourVectors const &b) //< scale all four vectors per component scale
- {
- x=MulSIMD(x,b.x);
- y=MulSIMD(y,b.y);
- z=MulSIMD(z,b.z);
- }
-
- FORCEINLINE void operator*=(const fltx4 & scale) //< scale
- {
- x=MulSIMD(x,scale);
- y=MulSIMD(y,scale);
- z=MulSIMD(z,scale);
- }
-
- FORCEINLINE void operator*=(float scale) //< uniformly scale all 4 vectors
- {
- fltx4 scalepacked = ReplicateX4(scale);
- *this *= scalepacked;
- }
-
- FORCEINLINE fltx4 operator*(FourVectors const &b) const //< 4 dot products
- {
- fltx4 dot=MulSIMD(x,b.x);
- dot=MaddSIMD(y,b.y,dot);
- dot=MaddSIMD(z,b.z,dot);
- return dot;
- }
-
- FORCEINLINE fltx4 operator*(Vector const &b) const //< dot product all 4 vectors with 1 vector
- {
- fltx4 dot=MulSIMD(x,ReplicateX4(b.x));
- dot=MaddSIMD(y,ReplicateX4(b.y), dot);
- dot=MaddSIMD(z,ReplicateX4(b.z), dot);
- return dot;
- }
-
- FORCEINLINE void VProduct(FourVectors const &b) //< component by component mul
- {
- x=MulSIMD(x,b.x);
- y=MulSIMD(y,b.y);
- z=MulSIMD(z,b.z);
- }
- FORCEINLINE void MakeReciprocal(void) //< (x,y,z)=(1/x,1/y,1/z)
- {
- x=ReciprocalSIMD(x);
- y=ReciprocalSIMD(y);
- z=ReciprocalSIMD(z);
- }
-
- FORCEINLINE void MakeReciprocalSaturate(void) //< (x,y,z)=(1/x,1/y,1/z), 1/0=1.0e23
- {
- x=ReciprocalSaturateSIMD(x);
- y=ReciprocalSaturateSIMD(y);
- z=ReciprocalSaturateSIMD(z);
- }
-
- // Assume the given matrix is a rotation, and rotate these vectors by it.
- // If you have a long list of FourVectors structures that you all want
- // to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
- inline void RotateBy(const matrix3x4_t& matrix);
-
- /// You can use this to rotate a long array of FourVectors all by the same
- /// matrix. The first parameter is the head of the array. The second is the
- /// number of vectors to rotate. The third is the matrix.
- static void RotateManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix );
-
- /// Assume the vectors are points, and transform them in place by the matrix.
- inline void TransformBy(const matrix3x4_t& matrix);
-
- /// You can use this to Transform a long array of FourVectors all by the same
- /// matrix. The first parameter is the head of the array. The second is the
- /// number of vectors to rotate. The third is the matrix. The fourth is the
- /// output buffer, which must not overlap the pVectors buffer. This is not
- /// an in-place transformation.
- static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix, FourVectors * RESTRICT pOut );
-
- /// You can use this to Transform a long array of FourVectors all by the same
- /// matrix. The first parameter is the head of the array. The second is the
- /// number of vectors to rotate. The third is the matrix. The fourth is the
- /// output buffer, which must not overlap the pVectors buffer.
- /// This is an in-place transformation.
- static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix );
-
- // X(),Y(),Z() - get at the desired component of the i'th (0..3) vector.
- FORCEINLINE const float & X(int idx) const
- {
- // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!)
- return SubFloat( (fltx4 &)x, idx );
- }
-
- FORCEINLINE const float & Y(int idx) const
- {
- return SubFloat( (fltx4 &)y, idx );
- }
-
- FORCEINLINE const float & Z(int idx) const
- {
- return SubFloat( (fltx4 &)z, idx );
- }
-
- FORCEINLINE float & X(int idx)
- {
- return SubFloat( x, idx );
- }
-
- FORCEINLINE float & Y(int idx)
- {
- return SubFloat( y, idx );
- }
-
- FORCEINLINE float & Z(int idx)
- {
- return SubFloat( z, idx );
- }
-
- FORCEINLINE Vector Vec(int idx) const //< unpack one of the vectors
- {
- return Vector( X(idx), Y(idx), Z(idx) );
- }
-
- FourVectors(void)
- {
- }
-
- FourVectors( FourVectors const &src )
- {
- x=src.x;
- y=src.y;
- z=src.z;
- }
-
- FORCEINLINE void operator=( FourVectors const &src )
- {
- x=src.x;
- y=src.y;
- z=src.z;
- }
-
- /// LoadAndSwizzle - load 4 Vectors into a FourVectors, performing transpose op
- FORCEINLINE void LoadAndSwizzle(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
- {
- // TransposeSIMD has large sub-expressions that the compiler can't eliminate on x360
- // use an unfolded implementation here
-#if _X360
- fltx4 tx = LoadUnalignedSIMD( &a.x );
- fltx4 ty = LoadUnalignedSIMD( &b.x );
- fltx4 tz = LoadUnalignedSIMD( &c.x );
- fltx4 tw = LoadUnalignedSIMD( &d.x );
- fltx4 r0 = __vmrghw(tx, tz);
- fltx4 r1 = __vmrghw(ty, tw);
- fltx4 r2 = __vmrglw(tx, tz);
- fltx4 r3 = __vmrglw(ty, tw);
-
- x = __vmrghw(r0, r1);
- y = __vmrglw(r0, r1);
- z = __vmrghw(r2, r3);
-#else
- x = LoadUnalignedSIMD( &( a.x ));
- y = LoadUnalignedSIMD( &( b.x ));
- z = LoadUnalignedSIMD( &( c.x ));
- fltx4 w = LoadUnalignedSIMD( &( d.x ));
- // now, matrix is:
- // x y z ?
- // x y z ?
- // x y z ?
- // x y z ?
- TransposeSIMD(x, y, z, w);
-#endif
- }
-
- /// LoadAndSwizzleAligned - load 4 Vectors into a FourVectors, performing transpose op.
- /// all 4 vectors must be 128 bit boundary
- FORCEINLINE void LoadAndSwizzleAligned(const float *RESTRICT a, const float *RESTRICT b, const float *RESTRICT c, const float *RESTRICT d)
- {
-#if _X360
- fltx4 tx = LoadAlignedSIMD(a);
- fltx4 ty = LoadAlignedSIMD(b);
- fltx4 tz = LoadAlignedSIMD(c);
- fltx4 tw = LoadAlignedSIMD(d);
- fltx4 r0 = __vmrghw(tx, tz);
- fltx4 r1 = __vmrghw(ty, tw);
- fltx4 r2 = __vmrglw(tx, tz);
- fltx4 r3 = __vmrglw(ty, tw);
-
- x = __vmrghw(r0, r1);
- y = __vmrglw(r0, r1);
- z = __vmrghw(r2, r3);
-#else
- x = LoadAlignedSIMD( a );
- y = LoadAlignedSIMD( b );
- z = LoadAlignedSIMD( c );
- fltx4 w = LoadAlignedSIMD( d );
- // now, matrix is:
- // x y z ?
- // x y z ?
- // x y z ?
- // x y z ?
- TransposeSIMD( x, y, z, w );
-#endif
- }
-
- FORCEINLINE void LoadAndSwizzleAligned(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
- {
- LoadAndSwizzleAligned( &a.x, &b.x, &c.x, &d.x );
- }
-
- /// return the squared length of all 4 vectors
- FORCEINLINE fltx4 length2(void) const
- {
- return (*this)*(*this);
- }
-
- /// return the approximate length of all 4 vectors. uses the sqrt approximation instruction
- FORCEINLINE fltx4 length(void) const
- {
- return SqrtEstSIMD(length2());
- }
-
- /// normalize all 4 vectors in place. not mega-accurate (uses reciprocal approximation instruction)
- FORCEINLINE void VectorNormalizeFast(void)
- {
- fltx4 mag_sq=(*this)*(*this); // length^2
- (*this) *= ReciprocalSqrtEstSIMD(mag_sq); // *(1.0/sqrt(length^2))
- }
-
- /// normalize all 4 vectors in place.
- FORCEINLINE void VectorNormalize(void)
- {
- fltx4 mag_sq=(*this)*(*this); // length^2
- (*this) *= ReciprocalSqrtSIMD(mag_sq); // *(1.0/sqrt(length^2))
- }
-
- /// construct a FourVectors from 4 separate Vectors
- FORCEINLINE FourVectors(Vector const &a, Vector const &b, Vector const &c, Vector const &d)
- {
- LoadAndSwizzle(a,b,c,d);
- }
-
- /// construct a FourVectors from 4 separate Vectors
- FORCEINLINE FourVectors(VectorAligned const &a, VectorAligned const &b, VectorAligned const &c, VectorAligned const &d)
- {
- LoadAndSwizzleAligned(a,b,c,d);
- }
-
- FORCEINLINE fltx4 DistToSqr( FourVectors const &pnt )
- {
- fltx4 fl4dX = SubSIMD( pnt.x, x );
- fltx4 fl4dY = SubSIMD( pnt.y, y );
- fltx4 fl4dZ = SubSIMD( pnt.z, z );
- return AddSIMD( MulSIMD( fl4dX, fl4dX), AddSIMD( MulSIMD( fl4dY, fl4dY ), MulSIMD( fl4dZ, fl4dZ ) ) );
-
- }
-
- FORCEINLINE fltx4 TValueOfClosestPointOnLine( FourVectors const &p0, FourVectors const &p1 ) const
- {
- FourVectors lineDelta = p1;
- lineDelta -= p0;
- fltx4 OOlineDirDotlineDir = ReciprocalSIMD( p1 * p1 );
- FourVectors v4OurPnt = *this;
- v4OurPnt -= p0;
- return MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta );
- }
-
- FORCEINLINE fltx4 DistSqrToLineSegment( FourVectors const &p0, FourVectors const &p1 ) const
- {
- FourVectors lineDelta = p1;
- FourVectors v4OurPnt = *this;
- v4OurPnt -= p0;
- lineDelta -= p0;
-
- fltx4 OOlineDirDotlineDir = ReciprocalSIMD( lineDelta * lineDelta );
-
- fltx4 fl4T = MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta );
-
- fl4T = MinSIMD( fl4T, Four_Ones );
- fl4T = MaxSIMD( fl4T, Four_Zeros );
- lineDelta *= fl4T;
- return v4OurPnt.DistToSqr( lineDelta );
- }
-
-};
-
-/// form 4 cross products
-inline FourVectors operator ^(const FourVectors &a, const FourVectors &b)
-{
- FourVectors ret;
- ret.x=SubSIMD(MulSIMD(a.y,b.z),MulSIMD(a.z,b.y));
- ret.y=SubSIMD(MulSIMD(a.z,b.x),MulSIMD(a.x,b.z));
- ret.z=SubSIMD(MulSIMD(a.x,b.y),MulSIMD(a.y,b.x));
- return ret;
-}
-
-/// component-by-componentwise MAX operator
-inline FourVectors maximum(const FourVectors &a, const FourVectors &b)
-{
- FourVectors ret;
- ret.x=MaxSIMD(a.x,b.x);
- ret.y=MaxSIMD(a.y,b.y);
- ret.z=MaxSIMD(a.z,b.z);
- return ret;
-}
-
-/// component-by-componentwise MIN operator
-inline FourVectors minimum(const FourVectors &a, const FourVectors &b)
-{
- FourVectors ret;
- ret.x=MinSIMD(a.x,b.x);
- ret.y=MinSIMD(a.y,b.y);
- ret.z=MinSIMD(a.z,b.z);
- return ret;
-}
-
-/// calculate reflection vector. incident and normal dir assumed normalized
-FORCEINLINE FourVectors VectorReflect( const FourVectors &incident, const FourVectors &normal )
-{
- FourVectors ret = incident;
- fltx4 iDotNx2 = incident * normal;
- iDotNx2 = AddSIMD( iDotNx2, iDotNx2 );
- FourVectors nPart = normal;
- nPart *= iDotNx2;
- ret -= nPart; // i-2(n*i)n
- return ret;
-}
-
-/// calculate slide vector. removes all components of a vector which are perpendicular to a normal vector.
-FORCEINLINE FourVectors VectorSlide( const FourVectors &incident, const FourVectors &normal )
-{
- FourVectors ret = incident;
- fltx4 iDotN = incident * normal;
- FourVectors nPart = normal;
- nPart *= iDotN;
- ret -= nPart; // i-(n*i)n
- return ret;
-}
-
-
-// Assume the given matrix is a rotation, and rotate these vectors by it.
-// If you have a long list of FourVectors structures that you all want
-// to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
-void FourVectors::RotateBy(const matrix3x4_t& matrix)
-{
- // Splat out each of the entries in the matrix to a fltx4. Do this
- // in the order that we will need them, to hide latency. I'm
- // avoiding making an array of them, so that they'll remain in
- // registers.
- fltx4 matSplat00, matSplat01, matSplat02,
- matSplat10, matSplat11, matSplat12,
- matSplat20, matSplat21, matSplat22;
-
- {
- // Load the matrix into local vectors. Sadly, matrix3x4_ts are
- // often unaligned. The w components will be the tranpose row of
- // the matrix, but we don't really care about that.
- fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] );
- fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] );
- fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] );
-
- matSplat00 = SplatXSIMD( matCol0 );
- matSplat01 = SplatYSIMD( matCol0 );
- matSplat02 = SplatZSIMD( matCol0 );
-
- matSplat10 = SplatXSIMD( matCol1 );
- matSplat11 = SplatYSIMD( matCol1 );
- matSplat12 = SplatZSIMD( matCol1 );
-
- matSplat20 = SplatXSIMD( matCol2 );
- matSplat21 = SplatYSIMD( matCol2 );
- matSplat22 = SplatZSIMD( matCol2 );
- }
-
- // Trust in the compiler to schedule these operations correctly:
- fltx4 outX, outY, outZ;
- outX = AddSIMD( AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ), MulSIMD( z, matSplat02 ) );
- outY = AddSIMD( AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ), MulSIMD( z, matSplat12 ) );
- outZ = AddSIMD( AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ), MulSIMD( z, matSplat22 ) );
-
- x = outX;
- y = outY;
- z = outZ;
-}
-
-// Assume the given matrix is a rotation, and rotate these vectors by it.
-// If you have a long list of FourVectors structures that you all want
-// to rotate by the same matrix, use FourVectors::RotateManyBy() instead.
-void FourVectors::TransformBy(const matrix3x4_t& matrix)
-{
- // Splat out each of the entries in the matrix to a fltx4. Do this
- // in the order that we will need them, to hide latency. I'm
- // avoiding making an array of them, so that they'll remain in
- // registers.
- fltx4 matSplat00, matSplat01, matSplat02,
- matSplat10, matSplat11, matSplat12,
- matSplat20, matSplat21, matSplat22;
-
- {
- // Load the matrix into local vectors. Sadly, matrix3x4_ts are
- // often unaligned. The w components will be the tranpose row of
- // the matrix, but we don't really care about that.
- fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] );
- fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] );
- fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] );
-
- matSplat00 = SplatXSIMD( matCol0 );
- matSplat01 = SplatYSIMD( matCol0 );
- matSplat02 = SplatZSIMD( matCol0 );
-
- matSplat10 = SplatXSIMD( matCol1 );
- matSplat11 = SplatYSIMD( matCol1 );
- matSplat12 = SplatZSIMD( matCol1 );
-
- matSplat20 = SplatXSIMD( matCol2 );
- matSplat21 = SplatYSIMD( matCol2 );
- matSplat22 = SplatZSIMD( matCol2 );
- }
-
- // Trust in the compiler to schedule these operations correctly:
- fltx4 outX, outY, outZ;
-
- outX = MaddSIMD( z, matSplat02, AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ) );
- outY = MaddSIMD( z, matSplat12, AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ) );
- outZ = MaddSIMD( z, matSplat22, AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ) );
-
- x = AddSIMD( outX, ReplicateX4( matrix[0][3] ));
- y = AddSIMD( outY, ReplicateX4( matrix[1][3] ));
- z = AddSIMD( outZ, ReplicateX4( matrix[2][3] ));
-}
-
-
-
-/// quick, low quality perlin-style noise() function suitable for real time use.
-/// return value is -1..1. Only reliable around +/- 1 million or so.
-fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z );
-fltx4 NoiseSIMD( FourVectors const &v );
-
-// vector valued noise direction
-FourVectors DNoiseSIMD( FourVectors const &v );
-
-// vector value "curl" noise function. see http://hyperphysics.phy-astr.gsu.edu/hbase/curl.html
-FourVectors CurlNoiseSIMD( FourVectors const &v );
-
-
-/// calculate the absolute value of a packed single
-inline fltx4 fabs( const fltx4 & x )
-{
- return AndSIMD( x, LoadAlignedSIMD( g_SIMD_clear_signmask ) );
-}
-
-/// negate all four components of a SIMD packed single
-inline fltx4 fnegate( const fltx4 & x )
-{
- return XorSIMD( x, LoadAlignedSIMD( g_SIMD_signmask ) );
-}
-
-
-fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent);
-
-// PowSIMD - raise a SIMD register to a power. This is analogous to the C pow() function, with some
-// restictions: fractional exponents are only handled with 2 bits of precision. Basically,
-// fractions of 0,.25,.5, and .75 are handled. PowSIMD(x,.30) will be the same as PowSIMD(x,.25).
-// negative and fractional powers are handled by the SIMD reciprocal and square root approximation
-// instructions and so are not especially accurate ----Note that this routine does not raise
-// numeric exceptions because it uses SIMD--- This routine is O(log2(exponent)).
-inline fltx4 PowSIMD( const fltx4 & x, float exponent )
-{
- return Pow_FixedPoint_Exponent_SIMD(x,(int) (4.0*exponent));
-}
-
-
-
-// random number generation - generate 4 random numbers quickly.
-
-void SeedRandSIMD(uint32 seed); // seed the random # generator
-fltx4 RandSIMD( int nContext = 0 ); // return 4 numbers in the 0..1 range
-
-// for multithreaded, you need to use these and use the argument form of RandSIMD:
-int GetSIMDRandContext( void );
-void ReleaseSIMDRandContext( int nContext );
-
-FORCEINLINE fltx4 RandSignedSIMD( void ) // -1..1
-{
- return SubSIMD( MulSIMD( Four_Twos, RandSIMD() ), Four_Ones );
-}
-
-
-// SIMD versions of mathlib simplespline functions
-// hermite basis function for smooth interpolation
-// Similar to Gain() above, but very cheap to call
-// value should be between 0 & 1 inclusive
-inline fltx4 SimpleSpline( const fltx4 & value )
-{
- // Arranged to avoid a data dependency between these two MULs:
- fltx4 valueDoubled = MulSIMD( value, Four_Twos );
- fltx4 valueSquared = MulSIMD( value, value );
-
- // Nice little ease-in, ease-out spline-like curve
- return SubSIMD(
- MulSIMD( Four_Threes, valueSquared ),
- MulSIMD( valueDoubled, valueSquared ) );
-}
-
-// remaps a value in [startInterval, startInterval+rangeInterval] from linear to
-// spline using SimpleSpline
-inline fltx4 SimpleSplineRemapValWithDeltas( const fltx4 & val,
- const fltx4 & A, const fltx4 & BMinusA,
- const fltx4 & OneOverBMinusA, const fltx4 & C,
- const fltx4 & DMinusC )
-{
-// if ( A == B )
-// return val >= B ? D : C;
- fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA );
- return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) );
-}
-
-inline fltx4 SimpleSplineRemapValWithDeltasClamped( const fltx4 & val,
- const fltx4 & A, const fltx4 & BMinusA,
- const fltx4 & OneOverBMinusA, const fltx4 & C,
- const fltx4 & DMinusC )
-{
-// if ( A == B )
-// return val >= B ? D : C;
- fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA );
- cVal = MinSIMD( Four_Ones, MaxSIMD( Four_Zeros, cVal ) );
- return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) );
-}
-
-FORCEINLINE fltx4 FracSIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s );
- ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival );
- return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits
-}
-
-FORCEINLINE fltx4 Mod2SIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( (float *) g_SIMD_lsbmask ), AddSIMD( fl4Abs, Four_2ToThe23s ) ), Four_2ToThe23s );
- ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Twos ), ival );
- return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits
-}
-
-FORCEINLINE fltx4 Mod2SIMDPositiveInput( const fltx4 &val )
-{
- fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( g_SIMD_lsbmask ), AddSIMD( val, Four_2ToThe23s ) ), Four_2ToThe23s );
- ival = MaskedAssign( CmpGtSIMD( ival, val ), SubSIMD( ival, Four_Twos ), ival );
- return SubSIMD( val, ival );
-}
-
-
-// approximate sin of an angle, with -1..1 representing the whole sin wave period instead of -pi..pi.
-// no range reduction is done - for values outside of 0..1 you won't like the results
-FORCEINLINE fltx4 _SinEst01SIMD( const fltx4 &val )
-{
- // really rough approximation - x*(4-x*4) - a parabola. s(0) = 0, s(.5) = 1, s(1)=0, smooth in-between.
- // sufficient for simple oscillation.
- return MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) );
-}
-
-FORCEINLINE fltx4 _Sin01SIMD( const fltx4 &val )
-{
- // not a bad approximation : parabola always over-estimates. Squared parabola always
- // underestimates. So lets blend between them: goodsin = badsin + .225*( badsin^2-badsin)
- fltx4 fl4BadEst = MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) );
- return AddSIMD( MulSIMD( Four_Point225s, SubSIMD( MulSIMD( fl4BadEst, fl4BadEst ), fl4BadEst ) ), fl4BadEst );
-}
-
-// full range useable implementations
-FORCEINLINE fltx4 SinEst01SIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs );
- fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones );
- fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) );
- fltx4 fl4Sin = _SinEst01SIMD( fl4val );
- fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) );
- return fl4Sin;
-
-}
-
-FORCEINLINE fltx4 Sin01SIMD( const fltx4 &val )
-{
- fltx4 fl4Abs = fabs( val );
- fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs );
- fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones );
- fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) );
- fltx4 fl4Sin = _Sin01SIMD( fl4val );
- fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) );
- return fl4Sin;
-
-}
-
-// Schlick style Bias approximation see graphics gems 4 : bias(t,a)= t/( (1/a-2)*(1-t)+1)
-
-FORCEINLINE fltx4 PreCalcBiasParameter( const fltx4 &bias_parameter )
-{
- // convert perlin-style-bias parameter to the value right for the approximation
- return SubSIMD( ReciprocalSIMD( bias_parameter ), Four_Twos );
-}
-
-FORCEINLINE fltx4 BiasSIMD( const fltx4 &val, const fltx4 &precalc_param )
-{
- // similar to bias function except pass precalced bias value from calling PreCalcBiasParameter.
-
- //!!speed!! use reciprocal est?
- //!!speed!! could save one op by precalcing _2_ values
- return DivSIMD( val, AddSIMD( MulSIMD( precalc_param, SubSIMD( Four_Ones, val ) ), Four_Ones ) );
-}
-
-//-----------------------------------------------------------------------------
-// Box/plane test
-// NOTE: The w component of emins + emaxs must be 1 for this to work
-//-----------------------------------------------------------------------------
-FORCEINLINE int BoxOnPlaneSideSIMD( const fltx4& emins, const fltx4& emaxs, const cplane_t *p, float tolerance = 0.f )
-{
- fltx4 corners[2];
- fltx4 normal = LoadUnalignedSIMD( p->normal.Base() );
- fltx4 dist = ReplicateX4( -p->dist );
- normal = SetWSIMD( normal, dist );
- fltx4 t4 = ReplicateX4( tolerance );
- fltx4 negt4 = ReplicateX4( -tolerance );
- fltx4 cmp = CmpGeSIMD( normal, Four_Zeros );
- corners[0] = MaskedAssign( cmp, emaxs, emins );
- corners[1] = MaskedAssign( cmp, emins, emaxs );
- fltx4 dot1 = Dot4SIMD( normal, corners[0] );
- fltx4 dot2 = Dot4SIMD( normal, corners[1] );
- cmp = CmpGeSIMD( dot1, t4 );
- fltx4 cmp2 = CmpGtSIMD( negt4, dot2 );
- fltx4 result = MaskedAssign( cmp, Four_Ones, Four_Zeros );
- fltx4 result2 = MaskedAssign( cmp2, Four_Twos, Four_Zeros );
- result = AddSIMD( result, result2 );
- intx4 sides;
- ConvertStoreAsIntsSIMD( &sides, result );
- return sides[0];
-}
-
-#endif // _ssemath_h
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: - defines SIMD "structure of arrays" classes and functions. +// +//===========================================================================// +#ifndef SSEMATH_H +#define SSEMATH_H + +#if defined( _X360 ) +#include <xboxmath.h> +#else +#include <xmmintrin.h> +#endif + +#include <mathlib/vector.h> +#include <mathlib/mathlib.h> + +#if defined(GNUC) +#define USE_STDC_FOR_SIMD 0 +#else +#define USE_STDC_FOR_SIMD 0 +#endif + +#if (!defined(_X360) && (USE_STDC_FOR_SIMD == 0)) +#define _SSE1 1 +#endif + +// I thought about defining a class/union for the SIMD packed floats instead of using fltx4, +// but decided against it because (a) the nature of SIMD code which includes comparisons is to blur +// the relationship between packed floats and packed integer types and (b) not sure that the +// compiler would handle generating good code for the intrinsics. + +#if USE_STDC_FOR_SIMD + +typedef union +{ + float m128_f32[4]; + uint32 m128_u32[4]; +} fltx4; + +typedef fltx4 i32x4; +typedef fltx4 u32x4; + +#elif ( defined( _X360 ) ) + +typedef union +{ + // This union allows float/int access (which generally shouldn't be done in inner loops) + __vector4 vmx; + float m128_f32[4]; + uint32 m128_u32[4]; +} fltx4_union; + +typedef __vector4 fltx4; +typedef __vector4 i32x4; // a VMX register; just a way of making it explicit that we're doing integer ops. +typedef __vector4 u32x4; // a VMX register; just a way of making it explicit that we're doing unsigned integer ops. + +#else + +typedef __m128 fltx4; +typedef __m128 i32x4; +typedef __m128 u32x4; + +#endif + +// The FLTX4 type is a fltx4 used as a parameter to a function. +// On the 360, the best way to do this is pass-by-copy on the registers. +// On the PC, the best way is to pass by const reference. +// The compiler will sometimes, but not always, replace a pass-by-const-ref +// with a pass-in-reg on the 360; to avoid this confusion, you can +// explicitly use a FLTX4 as the parameter type. +#ifdef _X360 +typedef __vector4 FLTX4; +#else +typedef const fltx4 & FLTX4; +#endif + +// A 16-byte aligned int32 datastructure +// (for use when writing out fltx4's as SIGNED +// ints). +struct ALIGN16 intx4 +{ + int32 m_i32[4]; + + inline int & operator[](int which) + { + return m_i32[which]; + } + + inline const int & operator[](int which) const + { + return m_i32[which]; + } + + inline int32 *Base() { + return m_i32; + } + + inline const int32 *Base() const + { + return m_i32; + } + + inline const bool operator==(const intx4 &other) const + { + return m_i32[0] == other.m_i32[0] && + m_i32[1] == other.m_i32[1] && + m_i32[2] == other.m_i32[2] && + m_i32[3] == other.m_i32[3] ; + } +} ALIGN16_POST; + + +#if defined( _DEBUG ) && defined( _X360 ) +FORCEINLINE void TestVPUFlags() +{ + // Check that the VPU is in the appropriate (Java-compliant) mode (see 3.2.1 in altivec_pem.pdf on xds.xbox.com) + __vector4 a; + __asm + { + mfvscr a; + } + unsigned int * flags = (unsigned int *)&a; + unsigned int controlWord = flags[3]; + Assert(controlWord == 0); +} +#else // _DEBUG +FORCEINLINE void TestVPUFlags() {} +#endif // _DEBUG + + +// useful constants in SIMD packed float format: +// (note: some of these aren't stored on the 360, +// but are manufactured directly in one or two +// instructions, saving a load and possible L2 +// miss.) +#ifndef _X360 +extern const fltx4 Four_Zeros; // 0 0 0 0 +extern const fltx4 Four_Ones; // 1 1 1 1 +extern const fltx4 Four_Twos; // 2 2 2 2 +extern const fltx4 Four_Threes; // 3 3 3 3 +extern const fltx4 Four_Fours; // guess. +extern const fltx4 Four_Point225s; // .225 .225 .225 .225 +extern const fltx4 Four_PointFives; // .5 .5 .5 .5 +extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON +extern const fltx4 Four_2ToThe21s; // (1<<21).. +extern const fltx4 Four_2ToThe22s; // (1<<22).. +extern const fltx4 Four_2ToThe23s; // (1<<23).. +extern const fltx4 Four_2ToThe24s; // (1<<24).. +extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2) +extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1 +#else +#define Four_Zeros XMVectorZero() // 0 0 0 0 +#define Four_Ones XMVectorSplatOne() // 1 1 1 1 +extern const fltx4 Four_Twos; // 2 2 2 2 +extern const fltx4 Four_Threes; // 3 3 3 3 +extern const fltx4 Four_Fours; // guess. +extern const fltx4 Four_Point225s; // .225 .225 .225 .225 +extern const fltx4 Four_PointFives; // .5 .5 .5 .5 +extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON +extern const fltx4 Four_2ToThe21s; // (1<<21).. +extern const fltx4 Four_2ToThe22s; // (1<<22).. +extern const fltx4 Four_2ToThe23s; // (1<<23).. +extern const fltx4 Four_2ToThe24s; // (1<<24).. +extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2) +extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1 +#endif +extern const fltx4 Four_FLT_MAX; // FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX +extern const fltx4 Four_Negative_FLT_MAX; // -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX +extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float + +// external aligned integer constants +extern const ALIGN16 int32 g_SIMD_clear_signmask[] ALIGN16_POST; // 0x7fffffff x 4 +extern const ALIGN16 int32 g_SIMD_signmask[] ALIGN16_POST; // 0x80000000 x 4 +extern const ALIGN16 int32 g_SIMD_lsbmask[] ALIGN16_POST; // 0xfffffffe x 4 +extern const ALIGN16 int32 g_SIMD_clear_wmask[] ALIGN16_POST; // -1 -1 -1 0 +extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4] ALIGN16_POST; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF] +extern const ALIGN16 int32 g_SIMD_AllOnesMask[] ALIGN16_POST; // ~0,~0,~0,~0 +extern const ALIGN16 int32 g_SIMD_Low16BitsMask[] ALIGN16_POST; // 0xffff x 4 + +// this mask is used for skipping the tail of things. If you have N elements in an array, and wish +// to mask out the tail, g_SIMD_SkipTailMask[N & 3] what you want to use for the last iteration. +extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST; + +// Define prefetch macros. +// The characteristics of cache and prefetch are completely +// different between the different platforms, so you DO NOT +// want to just define one macro that maps to every platform +// intrinsic under the hood -- you need to prefetch at different +// intervals between x86 and PPC, for example, and that is +// a higher level code change. +// On the other hand, I'm tired of typing #ifdef _X360 +// all over the place, so this is just a nop on Intel, PS3. +#ifdef _X360 +#define PREFETCH360(address, offset) __dcbt(offset,address) +#else +#define PREFETCH360(x,y) // nothing +#endif + +#if USE_STDC_FOR_SIMD + +//--------------------------------------------------------------------- +// Standard C (fallback/Linux) implementation (only there for compat - slow) +//--------------------------------------------------------------------- + +FORCEINLINE float SubFloat( const fltx4 & a, int idx ) +{ + return a.m128_f32[ idx ]; +} + +FORCEINLINE float & SubFloat( fltx4 & a, int idx ) +{ + return a.m128_f32[idx]; +} + +FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) +{ + return a.m128_u32[idx]; +} + +FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) +{ + return a.m128_u32[idx]; +} + +// Return one in the fastest way -- on the x360, faster even than loading. +FORCEINLINE fltx4 LoadZeroSIMD( void ) +{ + return Four_Zeros; +} + +// Return one in the fastest way -- on the x360, faster even than loading. +FORCEINLINE fltx4 LoadOneSIMD( void ) +{ + return Four_Ones; +} + +FORCEINLINE fltx4 SplatXSIMD( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 0 ); + SubFloat( retVal, 1 ) = SubFloat( a, 0 ); + SubFloat( retVal, 2 ) = SubFloat( a, 0 ); + SubFloat( retVal, 3 ) = SubFloat( a, 0 ); + return retVal; +} + +FORCEINLINE fltx4 SplatYSIMD( fltx4 a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 1 ); + SubFloat( retVal, 1 ) = SubFloat( a, 1 ); + SubFloat( retVal, 2 ) = SubFloat( a, 1 ); + SubFloat( retVal, 3 ) = SubFloat( a, 1 ); + return retVal; +} + +FORCEINLINE fltx4 SplatZSIMD( fltx4 a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 2 ); + SubFloat( retVal, 1 ) = SubFloat( a, 2 ); + SubFloat( retVal, 2 ) = SubFloat( a, 2 ); + SubFloat( retVal, 3 ) = SubFloat( a, 2 ); + return retVal; +} + +FORCEINLINE fltx4 SplatWSIMD( fltx4 a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 3 ); + SubFloat( retVal, 1 ) = SubFloat( a, 3 ); + SubFloat( retVal, 2 ) = SubFloat( a, 3 ); + SubFloat( retVal, 3 ) = SubFloat( a, 3 ); + return retVal; +} + +FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) +{ + fltx4 result = a; + SubFloat( result, 0 ) = SubFloat( x, 0 ); + return result; +} + +FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) +{ + fltx4 result = a; + SubFloat( result, 1 ) = SubFloat( y, 1 ); + return result; +} + +FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) +{ + fltx4 result = a; + SubFloat( result, 2 ) = SubFloat( z, 2 ); + return result; +} + +FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) +{ + fltx4 result = a; + SubFloat( result, 3 ) = SubFloat( w, 3 ); + return result; +} + +FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) +{ + fltx4 result = a; + SubFloat( result, nComponent ) = flValue; + return result; +} + +// a b c d -> b c d a +FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 1 ); + SubFloat( retVal, 1 ) = SubFloat( a, 2 ); + SubFloat( retVal, 2 ) = SubFloat( a, 3 ); + SubFloat( retVal, 3 ) = SubFloat( a, 0 ); + return retVal; +} + +// a b c d -> c d a b +FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 2 ); + SubFloat( retVal, 1 ) = SubFloat( a, 3 ); + SubFloat( retVal, 2 ) = SubFloat( a, 0 ); + SubFloat( retVal, 3 ) = SubFloat( a, 1 ); + return retVal; +} + +#define BINOP(op) \ + fltx4 retVal; \ + SubFloat( retVal, 0 ) = ( SubFloat( a, 0 ) op SubFloat( b, 0 ) ); \ + SubFloat( retVal, 1 ) = ( SubFloat( a, 1 ) op SubFloat( b, 1 ) ); \ + SubFloat( retVal, 2 ) = ( SubFloat( a, 2 ) op SubFloat( b, 2 ) ); \ + SubFloat( retVal, 3 ) = ( SubFloat( a, 3 ) op SubFloat( b, 3 ) ); \ + return retVal; + +#define IBINOP(op) \ + fltx4 retVal; \ + SubInt( retVal, 0 ) = ( SubInt( a, 0 ) op SubInt ( b, 0 ) ); \ + SubInt( retVal, 1 ) = ( SubInt( a, 1 ) op SubInt ( b, 1 ) ); \ + SubInt( retVal, 2 ) = ( SubInt( a, 2 ) op SubInt ( b, 2 ) ); \ + SubInt( retVal, 3 ) = ( SubInt( a, 3 ) op SubInt ( b, 3 ) ); \ + return retVal; + +FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) +{ + BINOP(+); +} + +FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b +{ + BINOP(-); +}; + +FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b +{ + BINOP(*); +} + +FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b +{ + BINOP(/); +} + + +FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c +{ + return AddSIMD( MulSIMD(a,b), c ); +} + +FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b +{ + return SubSIMD( c, MulSIMD(a,b) ); +}; + + +FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) +{ + fltx4 result; + SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) ); + SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) ); + SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) ); + SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) ); + return result; +} + +FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); + SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); + SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); +} + +FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); + SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); + SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); + SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) ); +} + +FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) +{ + fltx4 result; + SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) ); + SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) ); + SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) ); + SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) ); + return result; +} + +FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) +{ + fltx4 result; + SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) ); + SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) ); + SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) ); + SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) ); + return result; +} + +// tan^1(a/b) .. ie, pass sin in as a and cos in as b +FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) +{ + fltx4 result; + SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) ); + SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) ); + SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) ); + SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) ); + return result; +} + +FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = max( SubFloat( a, 0 ), SubFloat( b, 0 ) ); + SubFloat( retVal, 1 ) = max( SubFloat( a, 1 ), SubFloat( b, 1 ) ); + SubFloat( retVal, 2 ) = max( SubFloat( a, 2 ), SubFloat( b, 2 ) ); + SubFloat( retVal, 3 ) = max( SubFloat( a, 3 ), SubFloat( b, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = min( SubFloat( a, 0 ), SubFloat( b, 0 ) ); + SubFloat( retVal, 1 ) = min( SubFloat( a, 1 ), SubFloat( b, 1 ) ); + SubFloat( retVal, 2 ) = min( SubFloat( a, 2 ), SubFloat( b, 2 ) ); + SubFloat( retVal, 3 ) = min( SubFloat( a, 3 ), SubFloat( b, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b +{ + IBINOP(&); +} + +FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ~SubInt( a, 0 ) & SubInt( b, 0 ); + SubInt( retVal, 1 ) = ~SubInt( a, 1 ) & SubInt( b, 1 ); + SubInt( retVal, 2 ) = ~SubInt( a, 2 ) & SubInt( b, 2 ); + SubInt( retVal, 3 ) = ~SubInt( a, 3 ) & SubInt( b, 3 ); + return retVal; +} + +FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b +{ + IBINOP(^); +} + +FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b +{ + IBINOP(|); +} + +FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a +{ + fltx4 retval; + SubFloat( retval, 0 ) = -SubFloat( a, 0 ); + SubFloat( retval, 1 ) = -SubFloat( a, 1 ); + SubFloat( retval, 2 ) = -SubFloat( a, 2 ); + SubFloat( retval, 3 ) = -SubFloat( a, 3 ); + + return retval; +} + +FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero? +{ + return ( SubFloat( a, 0 ) == 0.0 ) && + ( SubFloat( a, 1 ) == 0.0 ) && + ( SubFloat( a, 2 ) == 0.0 ) && + ( SubFloat( a, 3 ) == 0.0 ) ; +} + + +// for branching when a.xyzw > b.xyzw +FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) +{ + return SubFloat(a,0) > SubFloat(b,0) && + SubFloat(a,1) > SubFloat(b,1) && + SubFloat(a,2) > SubFloat(b,2) && + SubFloat(a,3) > SubFloat(b,3); +} + +// for branching when a.xyzw >= b.xyzw +FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) +{ + return SubFloat(a,0) >= SubFloat(b,0) && + SubFloat(a,1) >= SubFloat(b,1) && + SubFloat(a,2) >= SubFloat(b,2) && + SubFloat(a,3) >= SubFloat(b,3); +} + +// For branching if all a.xyzw == b.xyzw +FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) +{ + return SubFloat(a,0) == SubFloat(b,0) && + SubFloat(a,1) == SubFloat(b,1) && + SubFloat(a,2) == SubFloat(b,2) && + SubFloat(a,3) == SubFloat(b,3); +} + +FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set +{ + int nRet = 0; + + nRet |= ( SubInt( a, 0 ) & 0x80000000 ) >> 31; // sign(x) -> bit 0 + nRet |= ( SubInt( a, 1 ) & 0x80000000 ) >> 30; // sign(y) -> bit 1 + nRet |= ( SubInt( a, 2 ) & 0x80000000 ) >> 29; // sign(z) -> bit 2 + nRet |= ( SubInt( a, 3 ) & 0x80000000 ) >> 28; // sign(w) -> bit 3 + + return nRet; +} + +FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) +{ + return (0 != TestSignSIMD( a )); +} + +FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) == SubFloat( b, 0 )) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) == SubFloat( b, 1 )) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) == SubFloat( b, 2 )) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) == SubFloat( b, 3 )) ? ~0 : 0; + return retVal; +} + +FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) > SubFloat( b, 0 )) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) > SubFloat( b, 1 )) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) > SubFloat( b, 2 )) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) > SubFloat( b, 3 )) ? ~0 : 0; + return retVal; +} + +FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) >= SubFloat( b, 0 )) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) >= SubFloat( b, 1 )) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) >= SubFloat( b, 2 )) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) >= SubFloat( b, 3 )) ? ~0 : 0; + return retVal; +} + +FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) < SubFloat( b, 0 )) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) < SubFloat( b, 1 )) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) < SubFloat( b, 2 )) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) < SubFloat( b, 3 )) ? ~0 : 0; + return retVal; +} + +FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 )) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 )) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 )) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 )) ? ~0 : 0; + return retVal; +} + +FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 ) && SubFloat( a, 0 ) >= -SubFloat( b, 0 ) ) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 ) && SubFloat( a, 1 ) >= -SubFloat( b, 1 ) ) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 ) && SubFloat( a, 2 ) >= -SubFloat( b, 2 ) ) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 ) && SubFloat( a, 3 ) >= -SubFloat( b, 3 ) ) ? ~0 : 0; + return retVal; +} + + +FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) +{ + return OrSIMD( + AndSIMD( ReplacementMask, NewValue ), + AndNotSIMD( ReplacementMask, OldValue ) ); +} + +FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = flValue; + SubFloat( retVal, 1 ) = flValue; + SubFloat( retVal, 2 ) = flValue; + SubFloat( retVal, 3 ) = flValue; + return retVal; +} + +/// replicate a single 32 bit integer value to all 4 components of an m128 +FORCEINLINE fltx4 ReplicateIX4( int nValue ) +{ + fltx4 retVal; + SubInt( retVal, 0 ) = nValue; + SubInt( retVal, 1 ) = nValue; + SubInt( retVal, 2 ) = nValue; + SubInt( retVal, 3 ) = nValue; + return retVal; + +} + +// Round towards positive infinity +FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) ); + return retVal; + +} + +// Round towards negative infinity +FORCEINLINE fltx4 FloorSIMD( const fltx4 &a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = floor( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = floor( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = floor( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = floor( SubFloat( a, 3 ) ); + return retVal; + +} + +FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) != 0.0f ? SubFloat( a, 0 ) : FLT_EPSILON ); + SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) != 0.0f ? SubFloat( a, 1 ) : FLT_EPSILON ); + SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) != 0.0f ? SubFloat( a, 2 ) : FLT_EPSILON ); + SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) != 0.0f ? SubFloat( a, 3 ) : FLT_EPSILON ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 ); + SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 ); + SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 ); + SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 ); + SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 ); + SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 ); + SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 ); + return retVal; +} + +/// 1/x for all 4 values. +/// 1/0 will result in a big but NOT infinite result +FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 )); + SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 )); + SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 )); + SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 )); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 )); + SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 )); + SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 )); + SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 )); + return retVal; +} + +// 2^x for all values (the antilog) +FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = powf( 2, SubFloat(toPower, 0) ); + SubFloat( retVal, 1 ) = powf( 2, SubFloat(toPower, 1) ); + SubFloat( retVal, 2 ) = powf( 2, SubFloat(toPower, 2) ); + SubFloat( retVal, 3 ) = powf( 2, SubFloat(toPower, 3) ); + + return retVal; +} + +FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) +{ + float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) + + SubFloat( a, 1 ) * SubFloat( b, 1 ) + + SubFloat( a, 2 ) * SubFloat( b, 2 ); + return ReplicateX4( flDot ); +} + +FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) +{ + float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) + + SubFloat( a, 1 ) * SubFloat( b, 1 ) + + SubFloat( a, 2 ) * SubFloat( b, 2 ) + + SubFloat( a, 3 ) * SubFloat( b, 3 ); + return ReplicateX4( flDot ); +} + +// Clamps the components of a vector to a specified minimum and maximum range. +FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) +{ + return MaxSIMD( min, MinSIMD( max, in ) ); +} + +// Squelch the w component of a vector to +0.0. +// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) +FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) +{ + fltx4 retval; + retval = a; + SubFloat( retval, 0 ) = 0; + return retval; +} + +FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) +{ + return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); +} + +FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) +{ + return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) +{ + return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); +} + +// for the transitional class -- load a 3-by VectorAligned and squash its w component +FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) +{ + fltx4 retval = LoadAlignedSIMD(pSIMD.Base()); + // squelch w + SubInt( retval, 3 ) = 0; + return retval; +} + +FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) +{ + *pSIMD = SubFloat(a, 0); + *(pSIMD+1) = SubFloat(a, 1); + *(pSIMD+2) = SubFloat(a, 2); +} + +// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD +FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + StoreAlignedSIMD(pSIMD->Base(),a); +} + +FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w ) +{ +#define SWAP_FLOATS( _a_, _ia_, _b_, _ib_ ) { float tmp = SubFloat( _a_, _ia_ ); SubFloat( _a_, _ia_ ) = SubFloat( _b_, _ib_ ); SubFloat( _b_, _ib_ ) = tmp; } + SWAP_FLOATS( x, 1, y, 0 ); + SWAP_FLOATS( x, 2, z, 0 ); + SWAP_FLOATS( x, 3, w, 0 ); + SWAP_FLOATS( y, 2, z, 1 ); + SWAP_FLOATS( y, 3, w, 1 ); + SWAP_FLOATS( z, 3, w, 2 ); +} + +// find the lowest component of a.x, a.y, a.z, +// and replicate it to the whole return value. +FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a ) +{ + float lowest = min( min( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2)); + return ReplicateX4(lowest); +} + +// find the highest component of a.x, a.y, a.z, +// and replicate it to the whole return value. +FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a ) +{ + float highest = max( max( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2)); + return ReplicateX4(highest); +} + +// Fixed-point conversion and save as SIGNED INTS. +// pDest->x = Int (vSrc.x) +// note: some architectures have means of doing +// fixed point conversion when the fix depth is +// specified as an immediate.. but there is no way +// to guarantee an immediate as a parameter to function +// like this. +FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) +{ + (*pDest)[0] = SubFloat(vSrc, 0); + (*pDest)[1] = SubFloat(vSrc, 1); + (*pDest)[2] = SubFloat(vSrc, 2); + (*pDest)[3] = SubFloat(vSrc, 3); +} + +// ------------------------------------ +// INTEGER SIMD OPERATIONS. +// ------------------------------------ +// splat all components of a vector to a signed immediate int number. +FORCEINLINE fltx4 IntSetImmediateSIMD( int nValue ) +{ + fltx4 retval; + SubInt( retval, 0 ) = SubInt( retval, 1 ) = SubInt( retval, 2 ) = SubInt( retval, 3) = nValue; + return retval; +} + +// Load 4 aligned words into a SIMD register +FORCEINLINE i32x4 LoadAlignedIntSIMD(const void * RESTRICT pSIMD) +{ + return *( reinterpret_cast< const i32x4 *> ( pSIMD ) ); +} + +// Load 4 unaligned words into a SIMD register +FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD) +{ + return *( reinterpret_cast< const i32x4 *> ( pSIMD ) ); +} + +// save into four words, 16-byte aligned +FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a; +} + +FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; +} + +// Take a fltx4 containing fixed-point uints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA ) +{ + Assert(0); /* pc has no such operation */ + fltx4 retval; + SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) ); + SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) ); + SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) ); + SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) ); + return retval; +} + + +#if 0 /* pc has no such op */ +// Take a fltx4 containing fixed-point sints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) +{ + fltx4 retval; + SubFloat( retval, 0 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[0])) ); + SubFloat( retval, 1 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[1])) ); + SubFloat( retval, 2 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[2])) ); + SubFloat( retval, 3 ) = ( (float) (reinterpret_cast<int32 *>(&vSrcA.m128_s32[3])) ); + return retval; +} + + +/* + works on fltx4's as if they are four uints. + the first parameter contains the words to be shifted, + the second contains the amount to shift by AS INTS + + for i = 0 to 3 + shift = vSrcB_i*32:(i*32)+4 + vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift +*/ +FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB) +{ + i32x4 retval; + SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0); + SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1); + SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2); + SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3); + + + return retval; +} +#endif + +#elif ( defined( _X360 ) ) + +//--------------------------------------------------------------------- +// X360 implementation +//--------------------------------------------------------------------- + +FORCEINLINE float & FloatSIMD( fltx4 & a, int idx ) +{ + fltx4_union & a_union = (fltx4_union &)a; + return a_union.m128_f32[idx]; +} + +FORCEINLINE unsigned int & UIntSIMD( fltx4 & a, int idx ) +{ + fltx4_union & a_union = (fltx4_union &)a; + return a_union.m128_u32[idx]; +} + +FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) +{ + return __vaddfp( a, b ); +} + +FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b +{ + return __vsubfp( a, b ); +} + +FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b +{ + return __vmulfp( a, b ); +} + +FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c +{ + return __vmaddfp( a, b, c ); +} + +FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b +{ + return __vnmsubfp( a, b, c ); +}; + +FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) +{ + return __vmsum3fp( a, b ); +} + +FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) +{ + return __vmsum4fp( a, b ); +} + +FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) +{ + return XMVectorSin( radians ); +} + +FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + XMVectorSinCos( &sine, &cosine, radians ); +} + +FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + XMVectorSinCos( &sine, &cosine, radians ); +} + +FORCEINLINE void CosSIMD( fltx4 &cosine, const fltx4 &radians ) +{ + cosine = XMVectorCos( radians ); +} + +FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) +{ + return XMVectorASin( sine ); +} + +FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) +{ + return XMVectorACos( cs ); +} + +// tan^1(a/b) .. ie, pass sin in as a and cos in as b +FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) +{ + return XMVectorATan2( a, b ); +} + +// DivSIMD defined further down, since it uses ReciprocalSIMD + +FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) +{ + return __vmaxfp( a, b ); +} + +FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) +{ + return __vminfp( a, b ); +} + +FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b +{ + return __vand( a, b ); +} + +FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b +{ + // NOTE: a and b are swapped in the call: SSE complements the first argument, VMX the second + return __vandc( b, a ); +} + +FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b +{ + return __vxor( a, b ); +} + +FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b +{ + return __vor( a, b ); +} + +FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a +{ + return XMVectorNegate(a); +} + +FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero? +{ + unsigned int equalFlags = 0; + __vcmpeqfpR( a, Four_Zeros, &equalFlags ); + return XMComparisonAllTrue( equalFlags ); +} + +FORCEINLINE bool IsAnyZeros( const fltx4 & a ) // any floats are zero? +{ + unsigned int conditionregister; + XMVectorEqualR(&conditionregister, a, XMVectorZero()); + return XMComparisonAnyTrue(conditionregister); +} + +FORCEINLINE bool IsAnyXYZZero( const fltx4 &a ) // are any of x,y,z zero? +{ + // copy a's x component into w, in case w was zero. + fltx4 temp = __vrlimi(a, a, 1, 1); + unsigned int conditionregister; + XMVectorEqualR(&conditionregister, temp, XMVectorZero()); + return XMComparisonAnyTrue(conditionregister); +} + +// for branching when a.xyzw > b.xyzw +FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) +{ + unsigned int cr; + XMVectorGreaterR(&cr,a,b); + return XMComparisonAllTrue(cr); +} + +// for branching when a.xyzw >= b.xyzw +FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) +{ + unsigned int cr; + XMVectorGreaterOrEqualR(&cr,a,b); + return XMComparisonAllTrue(cr); +} + +// For branching if all a.xyzw == b.xyzw +FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) +{ + unsigned int cr; + XMVectorEqualR(&cr,a,b); + return XMComparisonAllTrue(cr); +} + + +FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set +{ + // NOTE: this maps to SSE way better than it does to VMX (most code uses IsAnyNegative(), though) + int nRet = 0; + + const fltx4_union & a_union = (const fltx4_union &)a; + nRet |= ( a_union.m128_u32[0] & 0x80000000 ) >> 31; // sign(x) -> bit 0 + nRet |= ( a_union.m128_u32[1] & 0x80000000 ) >> 30; // sign(y) -> bit 1 + nRet |= ( a_union.m128_u32[2] & 0x80000000 ) >> 29; // sign(z) -> bit 2 + nRet |= ( a_union.m128_u32[3] & 0x80000000 ) >> 28; // sign(w) -> bit 3 + + return nRet; +} + +// Squelch the w component of a vector to +0.0. +// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) +FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) +{ + return __vrlimi( a, __vzero(), 1, 0 ); +} + +FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) +{ + // NOTE: this tests the top bits of each vector element using integer math + // (so it ignores NaNs - it will return true for "-NaN") + unsigned int equalFlags = 0; + fltx4 signMask = __vspltisw( -1 ); // 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF (low order 5 bits of each element = 31) + signMask = __vslw( signMask, signMask ); // 0x80000000 0x80000000 0x80000000 0x80000000 + __vcmpequwR( Four_Zeros, __vand( signMask, a ), &equalFlags ); + return !XMComparisonAllTrue( equalFlags ); +} + +FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 +{ + return __vcmpeqfp( a, b ); +} + + +FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 +{ + return __vcmpgtfp( a, b ); +} + +FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 +{ + return __vcmpgefp( a, b ); +} + +FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0 +{ + return __vcmpgtfp( b, a ); +} + +FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0 +{ + return __vcmpgefp( b, a ); +} + +FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0 +{ + return XMVectorInBounds( a, b ); +} + +// returned[i] = ReplacementMask[i] == 0 ? OldValue : NewValue +FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) +{ + return __vsel( OldValue, NewValue, ReplacementMask ); +} + +// AKA "Broadcast", "Splat" +FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a +{ + // NOTE: if flValue comes from a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) + float * pValue = &flValue; + Assert( pValue ); + Assert( ((unsigned int)pValue & 3) == 0); + return __vspltw( __lvlx( pValue, 0 ), 0 ); +} + +FORCEINLINE fltx4 ReplicateX4( const float *pValue ) // a,a,a,a +{ + Assert( pValue ); + return __vspltw( __lvlx( pValue, 0 ), 0 ); +} + +/// replicate a single 32 bit integer value to all 4 components of an m128 +FORCEINLINE fltx4 ReplicateIX4( int nValue ) +{ + // NOTE: if nValue comes from a register, this causes a Load-Hit-Store stall (should not mix ints with fltx4s!) + int * pValue = &nValue; + Assert( pValue ); + Assert( ((unsigned int)pValue & 3) == 0); + return __vspltw( __lvlx( pValue, 0 ), 0 ); +} + +// Round towards positive infinity +FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) +{ + return __vrfip(a); +} + +// Round towards nearest integer +FORCEINLINE fltx4 RoundSIMD( const fltx4 &a ) +{ + return __vrfin(a); +} + +// Round towards negative infinity +FORCEINLINE fltx4 FloorSIMD( const fltx4 &a ) +{ + return __vrfim(a); +} + +FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less +{ + // This is emulated from rsqrt + return XMVectorSqrtEst( a ); +} + +FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) +{ + // This is emulated from rsqrt + return XMVectorSqrt( a ); +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less +{ + return __vrsqrtefp( a ); +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) +{ + // Convert zeros to epsilons + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + return ReciprocalSqrtEstSIMD( a_safe ); +} + +FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) +{ + // This uses Newton-Raphson to improve the HW result + return XMVectorReciprocalSqrt( a ); +} + +FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less +{ + return __vrefp( a ); +} + +/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration. +/// No error checking! +FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a +{ + // This uses Newton-Raphson to improve the HW result + return XMVectorReciprocal( a ); +} + +// FIXME: on 360, this is very slow, since it uses ReciprocalSIMD (do we need DivEstSIMD?) +FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b +{ + return MulSIMD( ReciprocalSIMD( b ), a ); +} + +/// 1/x for all 4 values. +/// 1/0 will result in a big but NOT infinite result +FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) +{ + // Convert zeros to epsilons + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + return ReciprocalEstSIMD( a_safe ); +} + +FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) +{ + // Convert zeros to epsilons + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + return ReciprocalSIMD( a_safe ); + + // FIXME: This could be faster (BUT: it doesn't preserve the sign of -0.0, whereas the above does) + // fltx4 zeroMask = CmpEqSIMD( Four_Zeros, a ); + // fltx4 a_safe = XMVectorSelect( a, Four_Epsilons, zeroMask ); + // return ReciprocalSIMD( a_safe ); +} + +// CHRISG: is it worth doing integer bitfiddling for this? +// 2^x for all values (the antilog) +FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) +{ + return XMVectorExp(toPower); +} + +// Clamps the components of a vector to a specified minimum and maximum range. +FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) +{ + return XMVectorClamp(in, min, max); +} + +FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) +{ + return XMLoadVector4( pSIMD ); +} + +// load a 3-vector (as opposed to LoadUnalignedSIMD, which loads a 4-vec). +FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) +{ + return XMLoadVector3( pSIMD ); +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) +{ + return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); +} + +// for the transitional class -- load a 3-by VectorAligned and squash its w component +FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) +{ + fltx4 out = XMLoadVector3A(pSIMD.Base()); + // squelch w + return __vrlimi( out, __vzero(), 1, 0 ); +} + +// for the transitional class -- load a 3-by VectorAligned and squash its w component +FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned * RESTRICT pSIMD ) +{ + fltx4 out = XMLoadVector3A(pSIMD); + // squelch w + return __vrlimi( out, __vzero(), 1, 0 ); +} + +FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a ) +{ + XMStoreVector4( pSIMD, a ); +} + +FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) +{ + XMStoreVector3( pSIMD, a ); +} + + +// strongly typed -- for typechecking as we transition to SIMD +FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + XMStoreVector3A(pSIMD->Base(),a); +} + + +// Fixed-point conversion and save as SIGNED INTS. +// pDest->x = Int (vSrc.x) +// note: some architectures have means of doing +// fixed point conversion when the fix depth is +// specified as an immediate.. but there is no way +// to guarantee an immediate as a parameter to function +// like this. +FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) +{ + fltx4 asInt = __vctsxs( vSrc, 0 ); + XMStoreVector4A(pDest->Base(), asInt); +} + +FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w ) +{ + XMMATRIX xyzwMatrix = _XMMATRIX( x, y, z, w ); + xyzwMatrix = XMMatrixTranspose( xyzwMatrix ); + x = xyzwMatrix.r[0]; + y = xyzwMatrix.r[1]; + z = xyzwMatrix.r[2]; + w = xyzwMatrix.r[3]; +} + +// Return one in the fastest way -- faster even than loading. +FORCEINLINE fltx4 LoadZeroSIMD( void ) +{ + return XMVectorZero(); +} + +// Return one in the fastest way -- faster even than loading. +FORCEINLINE fltx4 LoadOneSIMD( void ) +{ + return XMVectorSplatOne(); +} + +FORCEINLINE fltx4 SplatXSIMD( fltx4 a ) +{ + return XMVectorSplatX( a ); +} + +FORCEINLINE fltx4 SplatYSIMD( fltx4 a ) +{ + return XMVectorSplatY( a ); +} + +FORCEINLINE fltx4 SplatZSIMD( fltx4 a ) +{ + return XMVectorSplatZ( a ); +} + +FORCEINLINE fltx4 SplatWSIMD( fltx4 a ) +{ + return XMVectorSplatW( a ); +} + +FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) +{ + fltx4 result = __vrlimi(a, x, 8, 0); + return result; +} + +FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) +{ + fltx4 result = __vrlimi(a, y, 4, 0); + return result; +} + +FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) +{ + fltx4 result = __vrlimi(a, z, 2, 0); + return result; +} + +FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) +{ + fltx4 result = __vrlimi(a, w, 1, 0); + return result; +} + +FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) +{ + static int s_nVrlimiMask[4] = { 8, 4, 2, 1 }; + fltx4 val = ReplicateX4( flValue ); + fltx4 result = __vrlimi(a, val, s_nVrlimiMask[nComponent], 0); + return result; +} + +FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) +{ + fltx4 compareOne = a; + return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 1 ); +} + +FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) +{ + fltx4 compareOne = a; + return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 2 ); +} + + + +// find the lowest component of a.x, a.y, a.z, +// and replicate it to the whole return value. +// ignores a.w. +// Though this is only five instructions long, +// they are all dependent, making this stall city. +// Forcing this inline should hopefully help with scheduling. +FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a ) +{ + // a is [x,y,z,G] (where G is garbage) + // rotate left by one + fltx4 compareOne = a ; + compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 ); + // compareOne is [y,z,G,G] + fltx4 retval = MinSIMD( a, compareOne ); + // retVal is [min(x,y), min(y,z), G, G] + compareOne = __vrlimi( compareOne, a, 8 , 2); + // compareOne is [z, G, G, G] + retval = MinSIMD( retval, compareOne ); + // retVal = [ min(min(x,y),z), G, G, G ] + + // splat the x component out to the whole vector and return + return SplatXSIMD( retval ); +} + +// find the highest component of a.x, a.y, a.z, +// and replicate it to the whole return value. +// ignores a.w. +// Though this is only five instructions long, +// they are all dependent, making this stall city. +// Forcing this inline should hopefully help with scheduling. +FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a ) +{ + // a is [x,y,z,G] (where G is garbage) + // rotate left by one + fltx4 compareOne = a ; + compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 ); + // compareOne is [y,z,G,G] + fltx4 retval = MaxSIMD( a, compareOne ); + // retVal is [max(x,y), max(y,z), G, G] + compareOne = __vrlimi( compareOne, a, 8 , 2); + // compareOne is [z, G, G, G] + retval = MaxSIMD( retval, compareOne ); + // retVal = [ max(max(x,y),z), G, G, G ] + + // splat the x component out to the whole vector and return + return SplatXSIMD( retval ); +} + + +// Transform many (horizontal) points in-place by a 3x4 matrix, +// here already loaded onto three fltx4 registers. +// The points must be stored as 16-byte aligned. They are points +// and not vectors because we assume the w-component to be 1. +// To spare yourself the annoyance of loading the matrix yourself, +// use one of the overloads below. +void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, FLTX4 mRow1, FLTX4 mRow2, FLTX4 mRow3); + +// Transform many (horizontal) points in-place by a 3x4 matrix. +// The points must be stored as 16-byte aligned. They are points +// and not vectors because we assume the w-component to be 1. +// In this function, the matrix need not be aligned. +FORCEINLINE void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix) +{ + return TransformManyPointsBy(pVectors, numVectors, + LoadUnalignedSIMD( pMatrix[0] ), LoadUnalignedSIMD( pMatrix[1] ), LoadUnalignedSIMD( pMatrix[2] ) ); +} + +// Transform many (horizontal) points in-place by a 3x4 matrix. +// The points must be stored as 16-byte aligned. They are points +// and not vectors because we assume the w-component to be 1. +// In this function, the matrix must itself be aligned on a 16-byte +// boundary. +FORCEINLINE void TransformManyPointsByA(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix) +{ + return TransformManyPointsBy(pVectors, numVectors, + LoadAlignedSIMD( pMatrix[0] ), LoadAlignedSIMD( pMatrix[1] ), LoadAlignedSIMD( pMatrix[2] ) ); +} + +// ------------------------------------ +// INTEGER SIMD OPERATIONS. +// ------------------------------------ + +// Load 4 aligned words into a SIMD register +FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD) +{ + return XMLoadVector4A(pSIMD); +} + +// Load 4 unaligned words into a SIMD register +FORCEINLINE i32x4 LoadUnalignedIntSIMD(const void * RESTRICT pSIMD) +{ + return XMLoadVector4( pSIMD ); +} + +// save into four words, 16-byte aligned +FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a; +} + +FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a ) +{ + XMStoreVector4(pSIMD, a); +} + + +// Take a fltx4 containing fixed-point uints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA ) +{ + return __vcfux( vSrcA, 0 ); +} + + +// Take a fltx4 containing fixed-point sints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) +{ + return __vcfsx( vSrcA, 0 ); +} + +// Take a fltx4 containing fixed-point uints and +// return them as single precision floats. Each uint +// will be divided by 2^immed after conversion +// (eg, this is fixed point math). +/* as if: + FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed ) + { + return __vcfux( vSrcA, uImmed ); + } +*/ +#define UnsignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfux( (vSrcA), (uImmed) )) + +// Take a fltx4 containing fixed-point sints and +// return them as single precision floats. Each int +// will be divided by 2^immed (eg, this is fixed point +// math). +/* as if: + FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed ) + { + return __vcfsx( vSrcA, uImmed ); + } +*/ +#define SignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfsx( (vSrcA), (uImmed) )) + +// set all components of a vector to a signed immediate int number. +/* as if: + FORCEINLINE fltx4 IntSetImmediateSIMD(int toImmediate) + { + return __vspltisw( toImmediate ); + } +*/ +#define IntSetImmediateSIMD(x) (__vspltisw(x)) + +/* + works on fltx4's as if they are four uints. + the first parameter contains the words to be shifted, + the second contains the amount to shift by AS INTS + + for i = 0 to 3 + shift = vSrcB_i*32:(i*32)+4 + vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift +*/ +FORCEINLINE fltx4 IntShiftLeftWordSIMD(fltx4 vSrcA, fltx4 vSrcB) +{ + return __vslw(vSrcA, vSrcB); +} + +FORCEINLINE float SubFloat( const fltx4 & a, int idx ) +{ + // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) + const fltx4_union & a_union = (const fltx4_union &)a; + return a_union.m128_f32[ idx ]; +} + +FORCEINLINE float & SubFloat( fltx4 & a, int idx ) +{ + fltx4_union & a_union = (fltx4_union &)a; + return a_union.m128_f32[idx]; +} + +FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx ) +{ + fltx4 t = __vctuxs( a, 0 ); + const fltx4_union & a_union = (const fltx4_union &)t; + return a_union.m128_u32[idx]; +} + + +FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) +{ + const fltx4_union & a_union = (const fltx4_union &)a; + return a_union.m128_u32[idx]; +} + +FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) +{ + fltx4_union & a_union = (fltx4_union &)a; + return a_union.m128_u32[idx]; +} + +#else + +//--------------------------------------------------------------------- +// Intel/SSE implementation +//--------------------------------------------------------------------- + +FORCEINLINE void StoreAlignedSIMD( float * RESTRICT pSIMD, const fltx4 & a ) +{ + _mm_store_ps( pSIMD, a ); +} + +FORCEINLINE void StoreUnalignedSIMD( float * RESTRICT pSIMD, const fltx4 & a ) +{ + _mm_storeu_ps( pSIMD, a ); +} + + +FORCEINLINE fltx4 RotateLeft( const fltx4 & a ); +FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ); + +FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) +{ + _mm_store_ss(pSIMD, a); + _mm_store_ss(pSIMD+1, RotateLeft(a)); + _mm_store_ss(pSIMD+2, RotateLeft2(a)); +} + +// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD +FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + StoreAlignedSIMD( pSIMD->Base(),a ); +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) +{ + return _mm_load_ps( reinterpret_cast< const float *> ( pSIMD ) ); +} + +FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b +{ + return _mm_and_ps( a, b ); +} + +FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b +{ + return _mm_andnot_ps( a, b ); +} + +FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b +{ + return _mm_xor_ps( a, b ); +} + +FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b +{ + return _mm_or_ps( a, b ); +} + +// Squelch the w component of a vector to +0.0. +// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) +FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) +{ + return AndSIMD( a, LoadAlignedSIMD( g_SIMD_clear_wmask ) ); +} + +// for the transitional class -- load a 3-by VectorAligned and squash its w component +FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) +{ + return SetWToZeroSIMD( LoadAlignedSIMD(pSIMD.Base()) ); +} + +FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) +{ + return _mm_loadu_ps( reinterpret_cast<const float *>( pSIMD ) ); +} + +FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) +{ + return _mm_loadu_ps( reinterpret_cast<const float *>( pSIMD ) ); +} + +/// replicate a single 32 bit integer value to all 4 components of an m128 +FORCEINLINE fltx4 ReplicateIX4( int i ) +{ + fltx4 value = _mm_set_ss( * ( ( float *) &i ) );; + return _mm_shuffle_ps( value, value, 0); +} + + +FORCEINLINE fltx4 ReplicateX4( float flValue ) +{ + __m128 value = _mm_set_ss( flValue ); + return _mm_shuffle_ps( value, value, 0 ); +} + + +FORCEINLINE float SubFloat( const fltx4 & a, int idx ) +{ + // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) +#ifndef POSIX + return a.m128_f32[ idx ]; +#else + return (reinterpret_cast<float const *>(&a))[idx]; +#endif +} + +FORCEINLINE float & SubFloat( fltx4 & a, int idx ) +{ +#ifndef POSIX + return a.m128_f32[ idx ]; +#else + return (reinterpret_cast<float *>(&a))[idx]; +#endif +} + +FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx ) +{ + return (uint32)SubFloat(a,idx); +} + +FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) +{ +#ifndef POSIX + return a.m128_u32[idx]; +#else + return (reinterpret_cast<uint32 const *>(&a))[idx]; +#endif +} + +FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) +{ +#ifndef POSIX + return a.m128_u32[idx]; +#else + return (reinterpret_cast<uint32 *>(&a))[idx]; +#endif +} + +// Return one in the fastest way -- on the x360, faster even than loading. +FORCEINLINE fltx4 LoadZeroSIMD( void ) +{ + return Four_Zeros; +} + +// Return one in the fastest way -- on the x360, faster even than loading. +FORCEINLINE fltx4 LoadOneSIMD( void ) +{ + return Four_Ones; +} + +FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) +{ + return OrSIMD( + AndSIMD( ReplacementMask, NewValue ), + AndNotSIMD( ReplacementMask, OldValue ) ); +} + +// remember, the SSE numbers its words 3 2 1 0 +// The way we want to specify shuffles is backwards from the default +// MM_SHUFFLE_REV is in array index order (default is reversed) +#define MM_SHUFFLE_REV(a,b,c,d) _MM_SHUFFLE(d,c,b,a) + +FORCEINLINE fltx4 SplatXSIMD( fltx4 const & a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 0, 0, 0, 0 ) ); +} + +FORCEINLINE fltx4 SplatYSIMD( fltx4 const &a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 1, 1, 1 ) ); +} + +FORCEINLINE fltx4 SplatZSIMD( fltx4 const &a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 2, 2, 2 ) ); +} + +FORCEINLINE fltx4 SplatWSIMD( fltx4 const &a ) +{ + return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 3, 3, 3 ) ); +} + +FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) +{ + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[0] ), x, a ); + return result; +} + +FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) +{ + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[1] ), y, a ); + return result; +} + +FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) +{ + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[2] ), z, a ); + return result; +} + +FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) +{ + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[3] ), w, a ); + return result; +} + +FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) +{ + fltx4 val = ReplicateX4( flValue ); + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[nComponent] ), val, a ); + return result; +} + +// a b c d -> b c d a +FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 2, 3, 0 ) ); +} + +// a b c d -> c d a b +FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 3, 0, 1 ) ); +} + +// a b c d -> d a b c +FORCEINLINE fltx4 RotateRight( const fltx4 & a ) +{ + return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 0, 3, 2, 1) ); +} + +// a b c d -> c d a b +FORCEINLINE fltx4 RotateRight2( const fltx4 & a ) +{ + return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 0, 3, 2 ) ); +} + + +FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) // a+b +{ + return _mm_add_ps( a, b ); +}; + +FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b +{ + return _mm_sub_ps( a, b ); +}; + +FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b +{ + return _mm_mul_ps( a, b ); +}; + +FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b +{ + return _mm_div_ps( a, b ); +}; + +FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c +{ + return AddSIMD( MulSIMD(a,b), c ); +} + +FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b +{ + return SubSIMD( c, MulSIMD(a,b) ); +}; + +FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) +{ + fltx4 m = MulSIMD( a, b ); + float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ); + return ReplicateX4( flDot ); +} + +FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) +{ + fltx4 m = MulSIMD( a, b ); + float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ) + SubFloat( m, 3 ); + return ReplicateX4( flDot ); +} + +//TODO: implement as four-way Taylor series (see xbox implementation) +FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) +{ + fltx4 result; + SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) ); + SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) ); + SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) ); + SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) ); + return result; +} + +FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + // FIXME: Make a fast SSE version + SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); + SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); + SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); +} + +FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) // a*b + c +{ + // FIXME: Make a fast SSE version + SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); + SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); + SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); + SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) ); +} + +//TODO: implement as four-way Taylor series (see xbox implementation) +FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) +{ + // FIXME: Make a fast SSE version + fltx4 result; + SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) ); + SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) ); + SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) ); + SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) ); + return result; +} + +FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) +{ + fltx4 result; + SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) ); + SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) ); + SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) ); + SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) ); + return result; +} + +// tan^1(a/b) .. ie, pass sin in as a and cos in as b +FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) +{ + fltx4 result; + SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) ); + SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) ); + SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) ); + SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) ); + return result; +} + +FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a +{ + return SubSIMD(LoadZeroSIMD(),a); +} + +FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set +{ + return _mm_movemask_ps( a ); +} + +FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) +{ + return (0 != TestSignSIMD( a )); +} + +FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 +{ + return _mm_cmpeq_ps( a, b ); +} + +FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 +{ + return _mm_cmpgt_ps( a, b ); +} + +FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 +{ + return _mm_cmpge_ps( a, b ); +} + +FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a<b) ? ~0:0 +{ + return _mm_cmplt_ps( a, b ); +} + +FORCEINLINE fltx4 CmpLeSIMD( const fltx4 & a, const fltx4 & b ) // (a<=b) ? ~0:0 +{ + return _mm_cmple_ps( a, b ); +} + +// for branching when a.xyzw > b.xyzw +FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) +{ + return TestSignSIMD( CmpLeSIMD( a, b ) ) == 0; +} + +// for branching when a.xyzw >= b.xyzw +FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) +{ + return TestSignSIMD( CmpLtSIMD( a, b ) ) == 0; +} + +// For branching if all a.xyzw == b.xyzw +FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) +{ + return TestSignSIMD( CmpEqSIMD( a, b ) ) == 0xf; +} + +FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0 +{ + return AndSIMD( CmpLeSIMD(a,b), CmpGeSIMD(a, NegSIMD(b)) ); +} + +FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) +{ + return _mm_min_ps( a, b ); +} + +FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) +{ + return _mm_max_ps( a, b ); +} + + + +// SSE lacks rounding operations. +// Really. +// You can emulate them by setting the rounding mode for the +// whole processor and then converting to int, and then back again. +// But every time you set the rounding mode, you clear out the +// entire pipeline. So, I can't do them per operation. You +// have to do it once, before the loop that would call these. +// Round towards positive infinity +FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) ); + return retVal; + +} + +fltx4 fabs( const fltx4 & x ); +// Round towards negative infinity +// This is the implementation that was here before; it assumes +// you are in round-to-floor mode, which I guess is usually the +// case for us vis-a-vis SSE. It's totally unnecessary on +// VMX, which has a native floor op. +FORCEINLINE fltx4 FloorSIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s ); + ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival ); + return XorSIMD( ival, XorSIMD( val, fl4Abs ) ); // restore sign bits +} + + + +inline bool IsAllZeros( const fltx4 & var ) +{ + return TestSignSIMD( CmpEqSIMD( var, Four_Zeros ) ) == 0xF; +} + +FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less +{ + return _mm_sqrt_ps( a ); +} + +FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) +{ + return _mm_sqrt_ps( a ); +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less +{ + return _mm_rsqrt_ps( a ); +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) +{ + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + ret = ReciprocalSqrtEstSIMD( ret ); + return ret; +} + +/// uses newton iteration for higher precision results than ReciprocalSqrtEstSIMD +FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) +{ + fltx4 guess = ReciprocalSqrtEstSIMD( a ); + // newton iteration for 1/sqrt(a) : y(n+1) = 1/2 (y(n)*(3-a*y(n)^2)); + guess = MulSIMD( guess, SubSIMD( Four_Threes, MulSIMD( a, MulSIMD( guess, guess )))); + guess = MulSIMD( Four_PointFives, guess); + return guess; +} + +FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less +{ + return _mm_rcp_ps( a ); +} + +/// 1/x for all 4 values, more or less +/// 1/0 will result in a big but NOT infinite result +FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) +{ + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + ret = ReciprocalEstSIMD( ret ); + return ret; +} + +/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration. +/// No error checking! +FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a +{ + fltx4 ret = ReciprocalEstSIMD( a ); + // newton iteration is: Y(n+1) = 2*Y(n)-a*Y(n)^2 + ret = SubSIMD( AddSIMD( ret, ret ), MulSIMD( a, MulSIMD( ret, ret ) ) ); + return ret; +} + +/// 1/x for all 4 values. +/// 1/0 will result in a big but NOT infinite result +FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) +{ + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + ret = ReciprocalSIMD( ret ); + return ret; +} + +// CHRISG: is it worth doing integer bitfiddling for this? +// 2^x for all values (the antilog) +FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) +{ + fltx4 retval; + SubFloat( retval, 0 ) = powf( 2, SubFloat(toPower, 0) ); + SubFloat( retval, 1 ) = powf( 2, SubFloat(toPower, 1) ); + SubFloat( retval, 2 ) = powf( 2, SubFloat(toPower, 2) ); + SubFloat( retval, 3 ) = powf( 2, SubFloat(toPower, 3) ); + + return retval; +} + +// Clamps the components of a vector to a specified minimum and maximum range. +FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) +{ + return MaxSIMD( min, MinSIMD( max, in ) ); +} + +FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w) +{ + _MM_TRANSPOSE4_PS( x, y, z, w ); +} + +FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 &a ) +{ + // a is [x,y,z,G] (where G is garbage) + // rotate left by one + fltx4 compareOne = RotateLeft( a ); + // compareOne is [y,z,G,x] + fltx4 retval = MinSIMD( a, compareOne ); + // retVal is [min(x,y), ... ] + compareOne = RotateLeft2( a ); + // compareOne is [z, G, x, y] + retval = MinSIMD( retval, compareOne ); + // retVal = [ min(min(x,y),z)..] + // splat the x component out to the whole vector and return + return SplatXSIMD( retval ); + +} + +FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 &a ) +{ + // a is [x,y,z,G] (where G is garbage) + // rotate left by one + fltx4 compareOne = RotateLeft( a ); + // compareOne is [y,z,G,x] + fltx4 retval = MaxSIMD( a, compareOne ); + // retVal is [max(x,y), ... ] + compareOne = RotateLeft2( a ); + // compareOne is [z, G, x, y] + retval = MaxSIMD( retval, compareOne ); + // retVal = [ max(max(x,y),z)..] + // splat the x component out to the whole vector and return + return SplatXSIMD( retval ); + +} + +// ------------------------------------ +// INTEGER SIMD OPERATIONS. +// ------------------------------------ + + +#if 0 /* pc does not have these ops */ +// splat all components of a vector to a signed immediate int number. +FORCEINLINE fltx4 IntSetImmediateSIMD(int to) +{ + //CHRISG: SSE2 has this, but not SSE1. What to do? + fltx4 retval; + SubInt( retval, 0 ) = to; + SubInt( retval, 1 ) = to; + SubInt( retval, 2 ) = to; + SubInt( retval, 3 ) = to; + return retval; +} +#endif + +// Load 4 aligned words into a SIMD register +FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD) +{ + return _mm_load_ps( reinterpret_cast<const float *>(pSIMD) ); +} + +// Load 4 unaligned words into a SIMD register +FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD) +{ + return _mm_loadu_ps( reinterpret_cast<const float *>(pSIMD) ); +} + +// save into four words, 16-byte aligned +FORCEINLINE void StoreAlignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a ) +{ + _mm_store_ps( reinterpret_cast<float *>(pSIMD), a ); +} + +FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) +{ + _mm_store_ps( reinterpret_cast<float *>(pSIMD.Base()), a ); +} + +FORCEINLINE void StoreUnalignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a ) +{ + _mm_storeu_ps( reinterpret_cast<float *>(pSIMD), a ); +} + + +// CHRISG: the conversion functions all seem to operate on m64's only... +// how do we make them work here? + +// Take a fltx4 containing fixed-point uints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA ) +{ + fltx4 retval; + SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) ); + SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) ); + SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) ); + SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) ); + return retval; +} + + +// Take a fltx4 containing fixed-point sints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) +{ + fltx4 retval; + SubFloat( retval, 0 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[0])); + SubFloat( retval, 1 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[1])); + SubFloat( retval, 2 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[2])); + SubFloat( retval, 3 ) = ( (float) (reinterpret_cast<const int32 *>(&vSrcA)[3])); + return retval; +} + +/* + works on fltx4's as if they are four uints. + the first parameter contains the words to be shifted, + the second contains the amount to shift by AS INTS + + for i = 0 to 3 + shift = vSrcB_i*32:(i*32)+4 + vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift +*/ +FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB) +{ + i32x4 retval; + SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0); + SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1); + SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2); + SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3); + + + return retval; +} + + +// Fixed-point conversion and save as SIGNED INTS. +// pDest->x = Int (vSrc.x) +// note: some architectures have means of doing +// fixed point conversion when the fix depth is +// specified as an immediate.. but there is no way +// to guarantee an immediate as a parameter to function +// like this. +FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) +{ + __m64 bottom = _mm_cvttps_pi32( vSrc ); + __m64 top = _mm_cvttps_pi32( _mm_movehl_ps(vSrc,vSrc) ); + + *reinterpret_cast<__m64 *>(&(*pDest)[0]) = bottom; + *reinterpret_cast<__m64 *>(&(*pDest)[2]) = top; + + _mm_empty(); +} + + + +#endif + + + +/// class FourVectors stores 4 independent vectors for use in SIMD processing. These vectors are +/// stored in the format x x x x y y y y z z z z so that they can be efficiently SIMD-accelerated. +class ALIGN16 FourVectors +{ +public: + fltx4 x, y, z; + + FORCEINLINE void DuplicateVector(Vector const &v) //< set all 4 vectors to the same vector value + { + x=ReplicateX4(v.x); + y=ReplicateX4(v.y); + z=ReplicateX4(v.z); + } + + FORCEINLINE fltx4 const & operator[](int idx) const + { + return *((&x)+idx); + } + + FORCEINLINE fltx4 & operator[](int idx) + { + return *((&x)+idx); + } + + FORCEINLINE void operator+=(FourVectors const &b) //< add 4 vectors to another 4 vectors + { + x=AddSIMD(x,b.x); + y=AddSIMD(y,b.y); + z=AddSIMD(z,b.z); + } + + FORCEINLINE void operator-=(FourVectors const &b) //< subtract 4 vectors from another 4 + { + x=SubSIMD(x,b.x); + y=SubSIMD(y,b.y); + z=SubSIMD(z,b.z); + } + + FORCEINLINE void operator*=(FourVectors const &b) //< scale all four vectors per component scale + { + x=MulSIMD(x,b.x); + y=MulSIMD(y,b.y); + z=MulSIMD(z,b.z); + } + + FORCEINLINE void operator*=(const fltx4 & scale) //< scale + { + x=MulSIMD(x,scale); + y=MulSIMD(y,scale); + z=MulSIMD(z,scale); + } + + FORCEINLINE void operator*=(float scale) //< uniformly scale all 4 vectors + { + fltx4 scalepacked = ReplicateX4(scale); + *this *= scalepacked; + } + + FORCEINLINE fltx4 operator*(FourVectors const &b) const //< 4 dot products + { + fltx4 dot=MulSIMD(x,b.x); + dot=MaddSIMD(y,b.y,dot); + dot=MaddSIMD(z,b.z,dot); + return dot; + } + + FORCEINLINE fltx4 operator*(Vector const &b) const //< dot product all 4 vectors with 1 vector + { + fltx4 dot=MulSIMD(x,ReplicateX4(b.x)); + dot=MaddSIMD(y,ReplicateX4(b.y), dot); + dot=MaddSIMD(z,ReplicateX4(b.z), dot); + return dot; + } + + FORCEINLINE void VProduct(FourVectors const &b) //< component by component mul + { + x=MulSIMD(x,b.x); + y=MulSIMD(y,b.y); + z=MulSIMD(z,b.z); + } + FORCEINLINE void MakeReciprocal(void) //< (x,y,z)=(1/x,1/y,1/z) + { + x=ReciprocalSIMD(x); + y=ReciprocalSIMD(y); + z=ReciprocalSIMD(z); + } + + FORCEINLINE void MakeReciprocalSaturate(void) //< (x,y,z)=(1/x,1/y,1/z), 1/0=1.0e23 + { + x=ReciprocalSaturateSIMD(x); + y=ReciprocalSaturateSIMD(y); + z=ReciprocalSaturateSIMD(z); + } + + // Assume the given matrix is a rotation, and rotate these vectors by it. + // If you have a long list of FourVectors structures that you all want + // to rotate by the same matrix, use FourVectors::RotateManyBy() instead. + inline void RotateBy(const matrix3x4_t& matrix); + + /// You can use this to rotate a long array of FourVectors all by the same + /// matrix. The first parameter is the head of the array. The second is the + /// number of vectors to rotate. The third is the matrix. + static void RotateManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix ); + + /// Assume the vectors are points, and transform them in place by the matrix. + inline void TransformBy(const matrix3x4_t& matrix); + + /// You can use this to Transform a long array of FourVectors all by the same + /// matrix. The first parameter is the head of the array. The second is the + /// number of vectors to rotate. The third is the matrix. The fourth is the + /// output buffer, which must not overlap the pVectors buffer. This is not + /// an in-place transformation. + static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix, FourVectors * RESTRICT pOut ); + + /// You can use this to Transform a long array of FourVectors all by the same + /// matrix. The first parameter is the head of the array. The second is the + /// number of vectors to rotate. The third is the matrix. The fourth is the + /// output buffer, which must not overlap the pVectors buffer. + /// This is an in-place transformation. + static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix ); + + // X(),Y(),Z() - get at the desired component of the i'th (0..3) vector. + FORCEINLINE const float & X(int idx) const + { + // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) + return SubFloat( (fltx4 &)x, idx ); + } + + FORCEINLINE const float & Y(int idx) const + { + return SubFloat( (fltx4 &)y, idx ); + } + + FORCEINLINE const float & Z(int idx) const + { + return SubFloat( (fltx4 &)z, idx ); + } + + FORCEINLINE float & X(int idx) + { + return SubFloat( x, idx ); + } + + FORCEINLINE float & Y(int idx) + { + return SubFloat( y, idx ); + } + + FORCEINLINE float & Z(int idx) + { + return SubFloat( z, idx ); + } + + FORCEINLINE Vector Vec(int idx) const //< unpack one of the vectors + { + return Vector( X(idx), Y(idx), Z(idx) ); + } + + FourVectors(void) + { + } + + FourVectors( FourVectors const &src ) + { + x=src.x; + y=src.y; + z=src.z; + } + + FORCEINLINE void operator=( FourVectors const &src ) + { + x=src.x; + y=src.y; + z=src.z; + } + + /// LoadAndSwizzle - load 4 Vectors into a FourVectors, performing transpose op + FORCEINLINE void LoadAndSwizzle(Vector const &a, Vector const &b, Vector const &c, Vector const &d) + { + // TransposeSIMD has large sub-expressions that the compiler can't eliminate on x360 + // use an unfolded implementation here +#if _X360 + fltx4 tx = LoadUnalignedSIMD( &a.x ); + fltx4 ty = LoadUnalignedSIMD( &b.x ); + fltx4 tz = LoadUnalignedSIMD( &c.x ); + fltx4 tw = LoadUnalignedSIMD( &d.x ); + fltx4 r0 = __vmrghw(tx, tz); + fltx4 r1 = __vmrghw(ty, tw); + fltx4 r2 = __vmrglw(tx, tz); + fltx4 r3 = __vmrglw(ty, tw); + + x = __vmrghw(r0, r1); + y = __vmrglw(r0, r1); + z = __vmrghw(r2, r3); +#else + x = LoadUnalignedSIMD( &( a.x )); + y = LoadUnalignedSIMD( &( b.x )); + z = LoadUnalignedSIMD( &( c.x )); + fltx4 w = LoadUnalignedSIMD( &( d.x )); + // now, matrix is: + // x y z ? + // x y z ? + // x y z ? + // x y z ? + TransposeSIMD(x, y, z, w); +#endif + } + + /// LoadAndSwizzleAligned - load 4 Vectors into a FourVectors, performing transpose op. + /// all 4 vectors must be 128 bit boundary + FORCEINLINE void LoadAndSwizzleAligned(const float *RESTRICT a, const float *RESTRICT b, const float *RESTRICT c, const float *RESTRICT d) + { +#if _X360 + fltx4 tx = LoadAlignedSIMD(a); + fltx4 ty = LoadAlignedSIMD(b); + fltx4 tz = LoadAlignedSIMD(c); + fltx4 tw = LoadAlignedSIMD(d); + fltx4 r0 = __vmrghw(tx, tz); + fltx4 r1 = __vmrghw(ty, tw); + fltx4 r2 = __vmrglw(tx, tz); + fltx4 r3 = __vmrglw(ty, tw); + + x = __vmrghw(r0, r1); + y = __vmrglw(r0, r1); + z = __vmrghw(r2, r3); +#else + x = LoadAlignedSIMD( a ); + y = LoadAlignedSIMD( b ); + z = LoadAlignedSIMD( c ); + fltx4 w = LoadAlignedSIMD( d ); + // now, matrix is: + // x y z ? + // x y z ? + // x y z ? + // x y z ? + TransposeSIMD( x, y, z, w ); +#endif + } + + FORCEINLINE void LoadAndSwizzleAligned(Vector const &a, Vector const &b, Vector const &c, Vector const &d) + { + LoadAndSwizzleAligned( &a.x, &b.x, &c.x, &d.x ); + } + + /// return the squared length of all 4 vectors + FORCEINLINE fltx4 length2(void) const + { + return (*this)*(*this); + } + + /// return the approximate length of all 4 vectors. uses the sqrt approximation instruction + FORCEINLINE fltx4 length(void) const + { + return SqrtEstSIMD(length2()); + } + + /// normalize all 4 vectors in place. not mega-accurate (uses reciprocal approximation instruction) + FORCEINLINE void VectorNormalizeFast(void) + { + fltx4 mag_sq=(*this)*(*this); // length^2 + (*this) *= ReciprocalSqrtEstSIMD(mag_sq); // *(1.0/sqrt(length^2)) + } + + /// normalize all 4 vectors in place. + FORCEINLINE void VectorNormalize(void) + { + fltx4 mag_sq=(*this)*(*this); // length^2 + (*this) *= ReciprocalSqrtSIMD(mag_sq); // *(1.0/sqrt(length^2)) + } + + /// construct a FourVectors from 4 separate Vectors + FORCEINLINE FourVectors(Vector const &a, Vector const &b, Vector const &c, Vector const &d) + { + LoadAndSwizzle(a,b,c,d); + } + + /// construct a FourVectors from 4 separate Vectors + FORCEINLINE FourVectors(VectorAligned const &a, VectorAligned const &b, VectorAligned const &c, VectorAligned const &d) + { + LoadAndSwizzleAligned(a,b,c,d); + } + + FORCEINLINE fltx4 DistToSqr( FourVectors const &pnt ) + { + fltx4 fl4dX = SubSIMD( pnt.x, x ); + fltx4 fl4dY = SubSIMD( pnt.y, y ); + fltx4 fl4dZ = SubSIMD( pnt.z, z ); + return AddSIMD( MulSIMD( fl4dX, fl4dX), AddSIMD( MulSIMD( fl4dY, fl4dY ), MulSIMD( fl4dZ, fl4dZ ) ) ); + + } + + FORCEINLINE fltx4 TValueOfClosestPointOnLine( FourVectors const &p0, FourVectors const &p1 ) const + { + FourVectors lineDelta = p1; + lineDelta -= p0; + fltx4 OOlineDirDotlineDir = ReciprocalSIMD( p1 * p1 ); + FourVectors v4OurPnt = *this; + v4OurPnt -= p0; + return MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta ); + } + + FORCEINLINE fltx4 DistSqrToLineSegment( FourVectors const &p0, FourVectors const &p1 ) const + { + FourVectors lineDelta = p1; + FourVectors v4OurPnt = *this; + v4OurPnt -= p0; + lineDelta -= p0; + + fltx4 OOlineDirDotlineDir = ReciprocalSIMD( lineDelta * lineDelta ); + + fltx4 fl4T = MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta ); + + fl4T = MinSIMD( fl4T, Four_Ones ); + fl4T = MaxSIMD( fl4T, Four_Zeros ); + lineDelta *= fl4T; + return v4OurPnt.DistToSqr( lineDelta ); + } + +}; + +/// form 4 cross products +inline FourVectors operator ^(const FourVectors &a, const FourVectors &b) +{ + FourVectors ret; + ret.x=SubSIMD(MulSIMD(a.y,b.z),MulSIMD(a.z,b.y)); + ret.y=SubSIMD(MulSIMD(a.z,b.x),MulSIMD(a.x,b.z)); + ret.z=SubSIMD(MulSIMD(a.x,b.y),MulSIMD(a.y,b.x)); + return ret; +} + +/// component-by-componentwise MAX operator +inline FourVectors maximum(const FourVectors &a, const FourVectors &b) +{ + FourVectors ret; + ret.x=MaxSIMD(a.x,b.x); + ret.y=MaxSIMD(a.y,b.y); + ret.z=MaxSIMD(a.z,b.z); + return ret; +} + +/// component-by-componentwise MIN operator +inline FourVectors minimum(const FourVectors &a, const FourVectors &b) +{ + FourVectors ret; + ret.x=MinSIMD(a.x,b.x); + ret.y=MinSIMD(a.y,b.y); + ret.z=MinSIMD(a.z,b.z); + return ret; +} + +/// calculate reflection vector. incident and normal dir assumed normalized +FORCEINLINE FourVectors VectorReflect( const FourVectors &incident, const FourVectors &normal ) +{ + FourVectors ret = incident; + fltx4 iDotNx2 = incident * normal; + iDotNx2 = AddSIMD( iDotNx2, iDotNx2 ); + FourVectors nPart = normal; + nPart *= iDotNx2; + ret -= nPart; // i-2(n*i)n + return ret; +} + +/// calculate slide vector. removes all components of a vector which are perpendicular to a normal vector. +FORCEINLINE FourVectors VectorSlide( const FourVectors &incident, const FourVectors &normal ) +{ + FourVectors ret = incident; + fltx4 iDotN = incident * normal; + FourVectors nPart = normal; + nPart *= iDotN; + ret -= nPart; // i-(n*i)n + return ret; +} + + +// Assume the given matrix is a rotation, and rotate these vectors by it. +// If you have a long list of FourVectors structures that you all want +// to rotate by the same matrix, use FourVectors::RotateManyBy() instead. +void FourVectors::RotateBy(const matrix3x4_t& matrix) +{ + // Splat out each of the entries in the matrix to a fltx4. Do this + // in the order that we will need them, to hide latency. I'm + // avoiding making an array of them, so that they'll remain in + // registers. + fltx4 matSplat00, matSplat01, matSplat02, + matSplat10, matSplat11, matSplat12, + matSplat20, matSplat21, matSplat22; + + { + // Load the matrix into local vectors. Sadly, matrix3x4_ts are + // often unaligned. The w components will be the tranpose row of + // the matrix, but we don't really care about that. + fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] ); + fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] ); + fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] ); + + matSplat00 = SplatXSIMD( matCol0 ); + matSplat01 = SplatYSIMD( matCol0 ); + matSplat02 = SplatZSIMD( matCol0 ); + + matSplat10 = SplatXSIMD( matCol1 ); + matSplat11 = SplatYSIMD( matCol1 ); + matSplat12 = SplatZSIMD( matCol1 ); + + matSplat20 = SplatXSIMD( matCol2 ); + matSplat21 = SplatYSIMD( matCol2 ); + matSplat22 = SplatZSIMD( matCol2 ); + } + + // Trust in the compiler to schedule these operations correctly: + fltx4 outX, outY, outZ; + outX = AddSIMD( AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ), MulSIMD( z, matSplat02 ) ); + outY = AddSIMD( AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ), MulSIMD( z, matSplat12 ) ); + outZ = AddSIMD( AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ), MulSIMD( z, matSplat22 ) ); + + x = outX; + y = outY; + z = outZ; +} + +// Assume the given matrix is a rotation, and rotate these vectors by it. +// If you have a long list of FourVectors structures that you all want +// to rotate by the same matrix, use FourVectors::RotateManyBy() instead. +void FourVectors::TransformBy(const matrix3x4_t& matrix) +{ + // Splat out each of the entries in the matrix to a fltx4. Do this + // in the order that we will need them, to hide latency. I'm + // avoiding making an array of them, so that they'll remain in + // registers. + fltx4 matSplat00, matSplat01, matSplat02, + matSplat10, matSplat11, matSplat12, + matSplat20, matSplat21, matSplat22; + + { + // Load the matrix into local vectors. Sadly, matrix3x4_ts are + // often unaligned. The w components will be the tranpose row of + // the matrix, but we don't really care about that. + fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] ); + fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] ); + fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] ); + + matSplat00 = SplatXSIMD( matCol0 ); + matSplat01 = SplatYSIMD( matCol0 ); + matSplat02 = SplatZSIMD( matCol0 ); + + matSplat10 = SplatXSIMD( matCol1 ); + matSplat11 = SplatYSIMD( matCol1 ); + matSplat12 = SplatZSIMD( matCol1 ); + + matSplat20 = SplatXSIMD( matCol2 ); + matSplat21 = SplatYSIMD( matCol2 ); + matSplat22 = SplatZSIMD( matCol2 ); + } + + // Trust in the compiler to schedule these operations correctly: + fltx4 outX, outY, outZ; + + outX = MaddSIMD( z, matSplat02, AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ) ); + outY = MaddSIMD( z, matSplat12, AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ) ); + outZ = MaddSIMD( z, matSplat22, AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ) ); + + x = AddSIMD( outX, ReplicateX4( matrix[0][3] )); + y = AddSIMD( outY, ReplicateX4( matrix[1][3] )); + z = AddSIMD( outZ, ReplicateX4( matrix[2][3] )); +} + + + +/// quick, low quality perlin-style noise() function suitable for real time use. +/// return value is -1..1. Only reliable around +/- 1 million or so. +fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z ); +fltx4 NoiseSIMD( FourVectors const &v ); + +// vector valued noise direction +FourVectors DNoiseSIMD( FourVectors const &v ); + +// vector value "curl" noise function. see http://hyperphysics.phy-astr.gsu.edu/hbase/curl.html +FourVectors CurlNoiseSIMD( FourVectors const &v ); + + +/// calculate the absolute value of a packed single +inline fltx4 fabs( const fltx4 & x ) +{ + return AndSIMD( x, LoadAlignedSIMD( g_SIMD_clear_signmask ) ); +} + +/// negate all four components of a SIMD packed single +inline fltx4 fnegate( const fltx4 & x ) +{ + return XorSIMD( x, LoadAlignedSIMD( g_SIMD_signmask ) ); +} + + +fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent); + +// PowSIMD - raise a SIMD register to a power. This is analogous to the C pow() function, with some +// restictions: fractional exponents are only handled with 2 bits of precision. Basically, +// fractions of 0,.25,.5, and .75 are handled. PowSIMD(x,.30) will be the same as PowSIMD(x,.25). +// negative and fractional powers are handled by the SIMD reciprocal and square root approximation +// instructions and so are not especially accurate ----Note that this routine does not raise +// numeric exceptions because it uses SIMD--- This routine is O(log2(exponent)). +inline fltx4 PowSIMD( const fltx4 & x, float exponent ) +{ + return Pow_FixedPoint_Exponent_SIMD(x,(int) (4.0*exponent)); +} + + + +// random number generation - generate 4 random numbers quickly. + +void SeedRandSIMD(uint32 seed); // seed the random # generator +fltx4 RandSIMD( int nContext = 0 ); // return 4 numbers in the 0..1 range + +// for multithreaded, you need to use these and use the argument form of RandSIMD: +int GetSIMDRandContext( void ); +void ReleaseSIMDRandContext( int nContext ); + +FORCEINLINE fltx4 RandSignedSIMD( void ) // -1..1 +{ + return SubSIMD( MulSIMD( Four_Twos, RandSIMD() ), Four_Ones ); +} + + +// SIMD versions of mathlib simplespline functions +// hermite basis function for smooth interpolation +// Similar to Gain() above, but very cheap to call +// value should be between 0 & 1 inclusive +inline fltx4 SimpleSpline( const fltx4 & value ) +{ + // Arranged to avoid a data dependency between these two MULs: + fltx4 valueDoubled = MulSIMD( value, Four_Twos ); + fltx4 valueSquared = MulSIMD( value, value ); + + // Nice little ease-in, ease-out spline-like curve + return SubSIMD( + MulSIMD( Four_Threes, valueSquared ), + MulSIMD( valueDoubled, valueSquared ) ); +} + +// remaps a value in [startInterval, startInterval+rangeInterval] from linear to +// spline using SimpleSpline +inline fltx4 SimpleSplineRemapValWithDeltas( const fltx4 & val, + const fltx4 & A, const fltx4 & BMinusA, + const fltx4 & OneOverBMinusA, const fltx4 & C, + const fltx4 & DMinusC ) +{ +// if ( A == B ) +// return val >= B ? D : C; + fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA ); + return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) ); +} + +inline fltx4 SimpleSplineRemapValWithDeltasClamped( const fltx4 & val, + const fltx4 & A, const fltx4 & BMinusA, + const fltx4 & OneOverBMinusA, const fltx4 & C, + const fltx4 & DMinusC ) +{ +// if ( A == B ) +// return val >= B ? D : C; + fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA ); + cVal = MinSIMD( Four_Ones, MaxSIMD( Four_Zeros, cVal ) ); + return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) ); +} + +FORCEINLINE fltx4 FracSIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s ); + ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival ); + return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits +} + +FORCEINLINE fltx4 Mod2SIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( (float *) g_SIMD_lsbmask ), AddSIMD( fl4Abs, Four_2ToThe23s ) ), Four_2ToThe23s ); + ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Twos ), ival ); + return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits +} + +FORCEINLINE fltx4 Mod2SIMDPositiveInput( const fltx4 &val ) +{ + fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( g_SIMD_lsbmask ), AddSIMD( val, Four_2ToThe23s ) ), Four_2ToThe23s ); + ival = MaskedAssign( CmpGtSIMD( ival, val ), SubSIMD( ival, Four_Twos ), ival ); + return SubSIMD( val, ival ); +} + + +// approximate sin of an angle, with -1..1 representing the whole sin wave period instead of -pi..pi. +// no range reduction is done - for values outside of 0..1 you won't like the results +FORCEINLINE fltx4 _SinEst01SIMD( const fltx4 &val ) +{ + // really rough approximation - x*(4-x*4) - a parabola. s(0) = 0, s(.5) = 1, s(1)=0, smooth in-between. + // sufficient for simple oscillation. + return MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) ); +} + +FORCEINLINE fltx4 _Sin01SIMD( const fltx4 &val ) +{ + // not a bad approximation : parabola always over-estimates. Squared parabola always + // underestimates. So lets blend between them: goodsin = badsin + .225*( badsin^2-badsin) + fltx4 fl4BadEst = MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) ); + return AddSIMD( MulSIMD( Four_Point225s, SubSIMD( MulSIMD( fl4BadEst, fl4BadEst ), fl4BadEst ) ), fl4BadEst ); +} + +// full range useable implementations +FORCEINLINE fltx4 SinEst01SIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs ); + fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones ); + fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) ); + fltx4 fl4Sin = _SinEst01SIMD( fl4val ); + fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) ); + return fl4Sin; + +} + +FORCEINLINE fltx4 Sin01SIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs ); + fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones ); + fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) ); + fltx4 fl4Sin = _Sin01SIMD( fl4val ); + fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) ); + return fl4Sin; + +} + +// Schlick style Bias approximation see graphics gems 4 : bias(t,a)= t/( (1/a-2)*(1-t)+1) + +FORCEINLINE fltx4 PreCalcBiasParameter( const fltx4 &bias_parameter ) +{ + // convert perlin-style-bias parameter to the value right for the approximation + return SubSIMD( ReciprocalSIMD( bias_parameter ), Four_Twos ); +} + +FORCEINLINE fltx4 BiasSIMD( const fltx4 &val, const fltx4 &precalc_param ) +{ + // similar to bias function except pass precalced bias value from calling PreCalcBiasParameter. + + //!!speed!! use reciprocal est? + //!!speed!! could save one op by precalcing _2_ values + return DivSIMD( val, AddSIMD( MulSIMD( precalc_param, SubSIMD( Four_Ones, val ) ), Four_Ones ) ); +} + +//----------------------------------------------------------------------------- +// Box/plane test +// NOTE: The w component of emins + emaxs must be 1 for this to work +//----------------------------------------------------------------------------- +FORCEINLINE int BoxOnPlaneSideSIMD( const fltx4& emins, const fltx4& emaxs, const cplane_t *p, float tolerance = 0.f ) +{ + fltx4 corners[2]; + fltx4 normal = LoadUnalignedSIMD( p->normal.Base() ); + fltx4 dist = ReplicateX4( -p->dist ); + normal = SetWSIMD( normal, dist ); + fltx4 t4 = ReplicateX4( tolerance ); + fltx4 negt4 = ReplicateX4( -tolerance ); + fltx4 cmp = CmpGeSIMD( normal, Four_Zeros ); + corners[0] = MaskedAssign( cmp, emaxs, emins ); + corners[1] = MaskedAssign( cmp, emins, emaxs ); + fltx4 dot1 = Dot4SIMD( normal, corners[0] ); + fltx4 dot2 = Dot4SIMD( normal, corners[1] ); + cmp = CmpGeSIMD( dot1, t4 ); + fltx4 cmp2 = CmpGtSIMD( negt4, dot2 ); + fltx4 result = MaskedAssign( cmp, Four_Ones, Four_Zeros ); + fltx4 result2 = MaskedAssign( cmp2, Four_Twos, Four_Zeros ); + result = AddSIMD( result, result2 ); + intx4 sides; + ConvertStoreAsIntsSIMD( &sides, result ); + return sides[0]; +} + +#endif // _ssemath_h diff --git a/mp/src/public/mathlib/ssequaternion.h b/mp/src/public/mathlib/ssequaternion.h index 5d60961e..825a9e45 100644 --- a/mp/src/public/mathlib/ssequaternion.h +++ b/mp/src/public/mathlib/ssequaternion.h @@ -1,367 +1,367 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: - defines SIMD "structure of arrays" classes and functions.
-//
-//===========================================================================//
-#ifndef SSEQUATMATH_H
-#define SSEQUATMATH_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-
-#include "mathlib/ssemath.h"
-
-// Use this #define to allow SSE versions of Quaternion math
-// to exist on PC.
-// On PC, certain horizontal vector operations are not supported.
-// This causes the SSE implementation of quaternion math to mix the
-// vector and scalar floating point units, which is extremely
-// performance negative if you don't compile to native SSE2 (which
-// we don't as of Sept 1, 2007). So, it's best not to allow these
-// functions to exist at all. It's not good enough to simply replace
-// the contents of the functions with scalar math, because each call
-// to LoadAligned and StoreAligned will result in an unnecssary copy
-// of the quaternion, and several moves to and from the XMM registers.
-//
-// Basically, the problem you run into is that for efficient SIMD code,
-// you need to load the quaternions and vectors into SIMD registers and
-// keep them there as long as possible while doing only SIMD math,
-// whereas for efficient scalar code, each time you copy onto or ever
-// use a fltx4, it hoses your pipeline. So the difference has to be
-// in the management of temporary variables in the calling function,
-// not inside the math functions.
-//
-// If you compile assuming the presence of SSE2, the MSVC will abandon
-// the traditional x87 FPU operations altogether and make everything use
-// the SSE2 registers, which lessens this problem a little.
-
-// permitted only on 360, as we've done careful tuning on its Altivec math:
-#ifdef _X360
-#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC!
-#endif
-
-
-
-//---------------------------------------------------------------------
-// Load/store quaternions
-//---------------------------------------------------------------------
-#ifndef _X360
-#if ALLOW_SIMD_QUATERNION_MATH
-// Using STDC or SSE
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
-{
- fltx4 retval = LoadAlignedSIMD( pSIMD.Base() );
- return retval;
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
-{
- fltx4 retval = LoadAlignedSIMD( pSIMD );
- return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- StoreAlignedSIMD( pSIMD->Base(), a );
-}
-#endif
-#else
-
-// for the transitional class -- load a QuaternionAligned
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD )
-{
- fltx4 retval = XMLoadVector4A( pSIMD.Base() );
- return retval;
-}
-
-FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD )
-{
- fltx4 retval = XMLoadVector4A( pSIMD );
- return retval;
-}
-
-FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a )
-{
- XMStoreVector4A( pSIMD->Base(), a );
-}
-
-#endif
-
-
-#if ALLOW_SIMD_QUATERNION_MATH
-//---------------------------------------------------------------------
-// Make sure quaternions are within 180 degrees of one another, if not, reverse q
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q )
-{
- // decide if one of the quaternions is backwards
- fltx4 a = SubSIMD( p, q );
- fltx4 b = AddSIMD( p, q );
- a = Dot4SIMD( a, a );
- b = Dot4SIMD( b, b );
- fltx4 cmp = CmpGtSIMD( a, b );
- fltx4 result = MaskedAssign( cmp, NegSIMD(q), q );
- return result;
-}
-
-//---------------------------------------------------------------------
-// Normalize Quaternion
-//---------------------------------------------------------------------
-#if USE_STDC_FOR_SIMD
-
-FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
-{
- fltx4 radius, result;
- radius = Dot4SIMD( q, q );
-
- if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
- {
- float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) );
- result = ReplicateX4( iradius );
- result = MulSIMD( result, q );
- return result;
- }
- return q;
-}
-
-#else
-
-// SSE + X360 implementation
-FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q )
-{
- fltx4 radius, result, mask;
- radius = Dot4SIMD( q, q );
- mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0
- result = ReciprocalSqrtSIMD( radius );
- result = MulSIMD( result, q );
- return MaskedAssign( mask, q, result ); // if radius was 0, just return q
-}
-
-#endif
-
-
-//---------------------------------------------------------------------
-// 0.0 returns p, 1.0 return q.
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- fltx4 sclp, sclq, result;
- sclq = ReplicateX4( t );
- sclp = SubSIMD( Four_Ones, sclq );
- result = MulSIMD( sclp, p );
- result = MaddSIMD( sclq, q, result );
- return QuaternionNormalizeSIMD( result );
-}
-
-
-//---------------------------------------------------------------------
-// Blend Quaternions
-//---------------------------------------------------------------------
-FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- // decide if one of the quaternions is backwards
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- result = QuaternionBlendNoAlignSIMD( p, q2, t );
- return result;
-}
-
-
-//---------------------------------------------------------------------
-// Multiply Quaternions
-//---------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
-{
- // decide if one of the quaternions is backwards
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- SubFloat( result, 0 ) = SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 );
- SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 );
- SubFloat( result, 2 ) = SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 );
- SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 );
- return result;
-}
-
-#else
-
-// X360
-extern const fltx4 g_QuatMultRowSign[4];
-FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q )
-{
- fltx4 q2, row, result;
- q2 = QuaternionAlignSIMD( p, q );
-
- row = XMVectorSwizzle( q2, 3, 2, 1, 0 );
- row = MulSIMD( row, g_QuatMultRowSign[0] );
- result = Dot4SIMD( row, p );
-
- row = XMVectorSwizzle( q2, 2, 3, 0, 1 );
- row = MulSIMD( row, g_QuatMultRowSign[1] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 4, 0 );
-
- row = XMVectorSwizzle( q2, 1, 0, 3, 2 );
- row = MulSIMD( row, g_QuatMultRowSign[2] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 2, 0 );
-
- row = MulSIMD( q2, g_QuatMultRowSign[3] );
- row = Dot4SIMD( row, p );
- result = __vrlimi( result, row, 1, 0 );
- return result;
-}
-
-#endif
-
-
-//---------------------------------------------------------------------
-// Quaternion scale
-//---------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
-{
- float r;
- fltx4 q;
-
- // FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
- // use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
- float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) );
- sinom = min( sinom, 1.f );
-
- float sinsom = sin( asin( sinom ) * t );
-
- t = sinsom / (sinom + FLT_EPSILON);
- SubFloat( q, 0 ) = t * SubFloat( p, 0 );
- SubFloat( q, 1 ) = t * SubFloat( p, 1 );
- SubFloat( q, 2 ) = t * SubFloat( p, 2 );
-
- // rescale rotation
- r = 1.0f - sinsom * sinsom;
-
- // Assert( r >= 0 );
- if (r < 0.0f)
- r = 0.0f;
- r = sqrt( r );
-
- // keep sign of rotation
- SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r );
- return q;
-}
-
-#else
-
-// X360
-FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t )
-{
- fltx4 sinom = Dot3SIMD( p, p );
- sinom = SqrtSIMD( sinom );
- sinom = MinSIMD( sinom, Four_Ones );
- fltx4 sinsom = ArcSinSIMD( sinom );
- fltx4 t4 = ReplicateX4( t );
- sinsom = MulSIMD( sinsom, t4 );
- sinsom = SinSIMD( sinsom );
- sinom = AddSIMD( sinom, Four_Epsilons );
- sinom = ReciprocalSIMD( sinom );
- t4 = MulSIMD( sinsom, sinom );
- fltx4 result = MulSIMD( p, t4 );
-
- // rescale rotation
- sinsom = MulSIMD( sinsom, sinsom );
- fltx4 r = SubSIMD( Four_Ones, sinsom );
- r = MaxSIMD( r, Four_Zeros );
- r = SqrtSIMD( r );
-
- // keep sign of rotation
- fltx4 cmp = CmpGeSIMD( p, Four_Zeros );
- r = MaskedAssign( cmp, r, NegSIMD( r ) );
-
- result = __vrlimi(result, r, 1, 0);
- return result;
-}
-
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Quaternion sphereical linear interpolation
-//-----------------------------------------------------------------------------
-#ifndef _X360
-
-// SSE and STDC
-FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- float omega, cosom, sinom, sclp, sclq;
-
- fltx4 result;
-
- // 0.0 returns p, 1.0 return q.
- cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) +
- SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 );
-
- if ( (1.0f + cosom ) > 0.000001f )
- {
- if ( (1.0f - cosom ) > 0.000001f )
- {
- omega = acos( cosom );
- sinom = sin( omega );
- sclp = sin( (1.0f - t)*omega) / sinom;
- sclq = sin( t*omega ) / sinom;
- }
- else
- {
- // TODO: add short circuit for cosom == 1.0f?
- sclp = 1.0f - t;
- sclq = t;
- }
- SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 );
- SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 );
- SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 );
- SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 );
- }
- else
- {
- SubFloat( result, 0 ) = -SubFloat( q, 1 );
- SubFloat( result, 1 ) = SubFloat( q, 0 );
- SubFloat( result, 2 ) = -SubFloat( q, 3 );
- SubFloat( result, 3 ) = SubFloat( q, 2 );
- sclp = sin( (1.0f - t) * (0.5f * M_PI));
- sclq = sin( t * (0.5f * M_PI));
- SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 );
- SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 );
- SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 );
- }
-
- return result;
-}
-
-#else
-
-// X360
-FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- return XMQuaternionSlerp( p, q, t );
-}
-
-#endif
-
-
-FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t )
-{
- fltx4 q2, result;
- q2 = QuaternionAlignSIMD( p, q );
- result = QuaternionSlerpNoAlignSIMD( p, q2, t );
- return result;
-}
-
-
-#endif // ALLOW_SIMD_QUATERNION_MATH
-
-#endif // SSEQUATMATH_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: - defines SIMD "structure of arrays" classes and functions. +// +//===========================================================================// +#ifndef SSEQUATMATH_H +#define SSEQUATMATH_H + +#ifdef _WIN32 +#pragma once +#endif + + +#include "mathlib/ssemath.h" + +// Use this #define to allow SSE versions of Quaternion math +// to exist on PC. +// On PC, certain horizontal vector operations are not supported. +// This causes the SSE implementation of quaternion math to mix the +// vector and scalar floating point units, which is extremely +// performance negative if you don't compile to native SSE2 (which +// we don't as of Sept 1, 2007). So, it's best not to allow these +// functions to exist at all. It's not good enough to simply replace +// the contents of the functions with scalar math, because each call +// to LoadAligned and StoreAligned will result in an unnecssary copy +// of the quaternion, and several moves to and from the XMM registers. +// +// Basically, the problem you run into is that for efficient SIMD code, +// you need to load the quaternions and vectors into SIMD registers and +// keep them there as long as possible while doing only SIMD math, +// whereas for efficient scalar code, each time you copy onto or ever +// use a fltx4, it hoses your pipeline. So the difference has to be +// in the management of temporary variables in the calling function, +// not inside the math functions. +// +// If you compile assuming the presence of SSE2, the MSVC will abandon +// the traditional x87 FPU operations altogether and make everything use +// the SSE2 registers, which lessens this problem a little. + +// permitted only on 360, as we've done careful tuning on its Altivec math: +#ifdef _X360 +#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC! +#endif + + + +//--------------------------------------------------------------------- +// Load/store quaternions +//--------------------------------------------------------------------- +#ifndef _X360 +#if ALLOW_SIMD_QUATERNION_MATH +// Using STDC or SSE +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD ) +{ + fltx4 retval = LoadAlignedSIMD( pSIMD.Base() ); + return retval; +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD ) +{ + fltx4 retval = LoadAlignedSIMD( pSIMD ); + return retval; +} + +FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + StoreAlignedSIMD( pSIMD->Base(), a ); +} +#endif +#else + +// for the transitional class -- load a QuaternionAligned +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD ) +{ + fltx4 retval = XMLoadVector4A( pSIMD.Base() ); + return retval; +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD ) +{ + fltx4 retval = XMLoadVector4A( pSIMD ); + return retval; +} + +FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + XMStoreVector4A( pSIMD->Base(), a ); +} + +#endif + + +#if ALLOW_SIMD_QUATERNION_MATH +//--------------------------------------------------------------------- +// Make sure quaternions are within 180 degrees of one another, if not, reverse q +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q ) +{ + // decide if one of the quaternions is backwards + fltx4 a = SubSIMD( p, q ); + fltx4 b = AddSIMD( p, q ); + a = Dot4SIMD( a, a ); + b = Dot4SIMD( b, b ); + fltx4 cmp = CmpGtSIMD( a, b ); + fltx4 result = MaskedAssign( cmp, NegSIMD(q), q ); + return result; +} + +//--------------------------------------------------------------------- +// Normalize Quaternion +//--------------------------------------------------------------------- +#if USE_STDC_FOR_SIMD + +FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q ) +{ + fltx4 radius, result; + radius = Dot4SIMD( q, q ); + + if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON)) + { + float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) ); + result = ReplicateX4( iradius ); + result = MulSIMD( result, q ); + return result; + } + return q; +} + +#else + +// SSE + X360 implementation +FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q ) +{ + fltx4 radius, result, mask; + radius = Dot4SIMD( q, q ); + mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0 + result = ReciprocalSqrtSIMD( radius ); + result = MulSIMD( result, q ); + return MaskedAssign( mask, q, result ); // if radius was 0, just return q +} + +#endif + + +//--------------------------------------------------------------------- +// 0.0 returns p, 1.0 return q. +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + fltx4 sclp, sclq, result; + sclq = ReplicateX4( t ); + sclp = SubSIMD( Four_Ones, sclq ); + result = MulSIMD( sclp, p ); + result = MaddSIMD( sclq, q, result ); + return QuaternionNormalizeSIMD( result ); +} + + +//--------------------------------------------------------------------- +// Blend Quaternions +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + // decide if one of the quaternions is backwards + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + result = QuaternionBlendNoAlignSIMD( p, q2, t ); + return result; +} + + +//--------------------------------------------------------------------- +// Multiply Quaternions +//--------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q ) +{ + // decide if one of the quaternions is backwards + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + SubFloat( result, 0 ) = SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 ); + SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 ); + SubFloat( result, 2 ) = SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 ); + SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 ); + return result; +} + +#else + +// X360 +extern const fltx4 g_QuatMultRowSign[4]; +FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q ) +{ + fltx4 q2, row, result; + q2 = QuaternionAlignSIMD( p, q ); + + row = XMVectorSwizzle( q2, 3, 2, 1, 0 ); + row = MulSIMD( row, g_QuatMultRowSign[0] ); + result = Dot4SIMD( row, p ); + + row = XMVectorSwizzle( q2, 2, 3, 0, 1 ); + row = MulSIMD( row, g_QuatMultRowSign[1] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 4, 0 ); + + row = XMVectorSwizzle( q2, 1, 0, 3, 2 ); + row = MulSIMD( row, g_QuatMultRowSign[2] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 2, 0 ); + + row = MulSIMD( q2, g_QuatMultRowSign[3] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 1, 0 ); + return result; +} + +#endif + + +//--------------------------------------------------------------------- +// Quaternion scale +//--------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t ) +{ + float r; + fltx4 q; + + // FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to + // use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale. + float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) ); + sinom = min( sinom, 1.f ); + + float sinsom = sin( asin( sinom ) * t ); + + t = sinsom / (sinom + FLT_EPSILON); + SubFloat( q, 0 ) = t * SubFloat( p, 0 ); + SubFloat( q, 1 ) = t * SubFloat( p, 1 ); + SubFloat( q, 2 ) = t * SubFloat( p, 2 ); + + // rescale rotation + r = 1.0f - sinsom * sinsom; + + // Assert( r >= 0 ); + if (r < 0.0f) + r = 0.0f; + r = sqrt( r ); + + // keep sign of rotation + SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r ); + return q; +} + +#else + +// X360 +FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t ) +{ + fltx4 sinom = Dot3SIMD( p, p ); + sinom = SqrtSIMD( sinom ); + sinom = MinSIMD( sinom, Four_Ones ); + fltx4 sinsom = ArcSinSIMD( sinom ); + fltx4 t4 = ReplicateX4( t ); + sinsom = MulSIMD( sinsom, t4 ); + sinsom = SinSIMD( sinsom ); + sinom = AddSIMD( sinom, Four_Epsilons ); + sinom = ReciprocalSIMD( sinom ); + t4 = MulSIMD( sinsom, sinom ); + fltx4 result = MulSIMD( p, t4 ); + + // rescale rotation + sinsom = MulSIMD( sinsom, sinsom ); + fltx4 r = SubSIMD( Four_Ones, sinsom ); + r = MaxSIMD( r, Four_Zeros ); + r = SqrtSIMD( r ); + + // keep sign of rotation + fltx4 cmp = CmpGeSIMD( p, Four_Zeros ); + r = MaskedAssign( cmp, r, NegSIMD( r ) ); + + result = __vrlimi(result, r, 1, 0); + return result; +} + +#endif + + +//----------------------------------------------------------------------------- +// Quaternion sphereical linear interpolation +//----------------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + float omega, cosom, sinom, sclp, sclq; + + fltx4 result; + + // 0.0 returns p, 1.0 return q. + cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) + + SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 ); + + if ( (1.0f + cosom ) > 0.000001f ) + { + if ( (1.0f - cosom ) > 0.000001f ) + { + omega = acos( cosom ); + sinom = sin( omega ); + sclp = sin( (1.0f - t)*omega) / sinom; + sclq = sin( t*omega ) / sinom; + } + else + { + // TODO: add short circuit for cosom == 1.0f? + sclp = 1.0f - t; + sclq = t; + } + SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 ); + SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 ); + SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 ); + SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 ); + } + else + { + SubFloat( result, 0 ) = -SubFloat( q, 1 ); + SubFloat( result, 1 ) = SubFloat( q, 0 ); + SubFloat( result, 2 ) = -SubFloat( q, 3 ); + SubFloat( result, 3 ) = SubFloat( q, 2 ); + sclp = sin( (1.0f - t) * (0.5f * M_PI)); + sclq = sin( t * (0.5f * M_PI)); + SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 ); + SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 ); + SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 ); + } + + return result; +} + +#else + +// X360 +FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + return XMQuaternionSlerp( p, q, t ); +} + +#endif + + +FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + result = QuaternionSlerpNoAlignSIMD( p, q2, t ); + return result; +} + + +#endif // ALLOW_SIMD_QUATERNION_MATH + +#endif // SSEQUATMATH_H + diff --git a/mp/src/public/mathlib/vector.h b/mp/src/public/mathlib/vector.h index 4b361640..c19261d7 100644 --- a/mp/src/public/mathlib/vector.h +++ b/mp/src/public/mathlib/vector.h @@ -1,2312 +1,2312 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef VECTOR_H
-#define VECTOR_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <float.h>
-
-// For vec_t, put this somewhere else?
-#include "tier0/basetypes.h"
-
-// For rand(). We really need a library!
-#include <stdlib.h>
-
-#ifndef _X360
-// For MMX intrinsics
-#include <xmmintrin.h>
-#endif
-
-#include "tier0/dbg.h"
-#include "tier0/threadtools.h"
-#include "mathlib/vector2d.h"
-#include "mathlib/math_pfns.h"
-#include "minmax.h"
-
-// Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc.
-//#define VECTOR_PARANOIA 1
-
-// Uncomment this to make sure we don't do anything slow with our vectors
-//#define VECTOR_NO_SLOW_OPERATIONS 1
-
-
-// Used to make certain code easier to read.
-#define X_INDEX 0
-#define Y_INDEX 1
-#define Z_INDEX 2
-
-
-#ifdef VECTOR_PARANOIA
-#define CHECK_VALID( _v) Assert( (_v).IsValid() )
-#else
-#ifdef GNUC
-#define CHECK_VALID( _v)
-#else
-#define CHECK_VALID( _v) 0
-#endif
-#endif
-
-#define VecToString(v) (static_cast<const char *>(CFmtStr("(%f, %f, %f)", (v).x, (v).y, (v).z))) // ** Note: this generates a temporary, don't hold reference!
-
-class VectorByValue;
-
-//=========================================================
-// 3D Vector
-//=========================================================
-class Vector
-{
-public:
- // Members
- vec_t x, y, z;
-
- // Construction/destruction:
- Vector(void);
- Vector(vec_t X, vec_t Y, vec_t Z);
- explicit Vector(vec_t XYZ); ///< broadcast initialize
-
- // Initialization
- void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f);
- // TODO (Ilya): Should there be an init that takes a single float for consistency?
-
- // Got any nasty NAN's?
- bool IsValid() const;
- void Invalidate();
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- // Base address...
- vec_t* Base();
- vec_t const* Base() const;
-
- // Cast to Vector2D...
- Vector2D& AsVector2D();
- const Vector2D& AsVector2D() const;
-
- // Initialization methods
- void Random( vec_t minVal, vec_t maxVal );
- inline void Zero(); ///< zero out a vector
-
- // equality
- bool operator==(const Vector& v) const;
- bool operator!=(const Vector& v) const;
-
- // arithmetic operations
- FORCEINLINE Vector& operator+=(const Vector &v);
- FORCEINLINE Vector& operator-=(const Vector &v);
- FORCEINLINE Vector& operator*=(const Vector &v);
- FORCEINLINE Vector& operator*=(float s);
- FORCEINLINE Vector& operator/=(const Vector &v);
- FORCEINLINE Vector& operator/=(float s);
- FORCEINLINE Vector& operator+=(float fl) ; ///< broadcast add
- FORCEINLINE Vector& operator-=(float fl) ; ///< broadcast sub
-
-// negate the vector components
- void Negate();
-
- // Get the vector's magnitude.
- inline vec_t Length() const;
-
- // Get the vector's magnitude squared.
- FORCEINLINE vec_t LengthSqr(void) const
- {
- CHECK_VALID(*this);
- return (x*x + y*y + z*z);
- }
-
- // return true if this vector is (0,0,0) within tolerance
- bool IsZero( float tolerance = 0.01f ) const
- {
- return (x > -tolerance && x < tolerance &&
- y > -tolerance && y < tolerance &&
- z > -tolerance && z < tolerance);
- }
-
- vec_t NormalizeInPlace();
- Vector Normalized() const;
- bool IsLengthGreaterThan( float val ) const;
- bool IsLengthLessThan( float val ) const;
-
- // check if a vector is within the box defined by two other vectors
- FORCEINLINE bool WithinAABox( Vector const &boxmin, Vector const &boxmax);
-
- // Get the distance from this vector to the other one.
- vec_t DistTo(const Vector &vOther) const;
-
- // Get the distance from this vector to the other one squared.
- // NJS: note, VC wasn't inlining it correctly in several deeply nested inlines due to being an 'out of line' inline.
- // may be able to tidy this up after switching to VC7
- FORCEINLINE vec_t DistToSqr(const Vector &vOther) const
- {
- Vector delta;
-
- delta.x = x - vOther.x;
- delta.y = y - vOther.y;
- delta.z = z - vOther.z;
-
- return delta.LengthSqr();
- }
-
- // Copy
- void CopyToArray(float* rgfl) const;
-
- // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
- // is about 12% faster than the actual vector equation (because it's done per-component
- // rather than per-vector).
- void MulAdd(const Vector& a, const Vector& b, float scalar);
-
- // Dot product.
- vec_t Dot(const Vector& vOther) const;
-
- // assignment
- Vector& operator=(const Vector &vOther);
-
- // 2d
- vec_t Length2D(void) const;
- vec_t Length2DSqr(void) const;
-
- operator VectorByValue &() { return *((VectorByValue *)(this)); }
- operator const VectorByValue &() const { return *((const VectorByValue *)(this)); }
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // copy constructors
-// Vector(const Vector &vOther);
-
- // arithmetic operations
- Vector operator-(void) const;
-
- Vector operator+(const Vector& v) const;
- Vector operator-(const Vector& v) const;
- Vector operator*(const Vector& v) const;
- Vector operator/(const Vector& v) const;
- Vector operator*(float fl) const;
- Vector operator/(float fl) const;
-
- // Cross product between two vectors.
- Vector Cross(const Vector &vOther) const;
-
- // Returns a vector with the min or max in X, Y, and Z.
- Vector Min(const Vector &vOther) const;
- Vector Max(const Vector &vOther) const;
-
-#else
-
-private:
- // No copy constructors allowed if we're in optimal mode
- Vector(const Vector& vOther);
-#endif
-};
-
-FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); }
-
-
-#define USE_M64S ( ( !defined( _X360 ) ) )
-
-
-
-//=========================================================
-// 4D Short Vector (aligned on 8-byte boundary)
-//=========================================================
-class ALIGN8 ShortVector
-{
-public:
-
- short x, y, z, w;
-
- // Initialization
- void Init(short ix = 0, short iy = 0, short iz = 0, short iw = 0 );
-
-
-#if USE_M64S
- __m64 &AsM64() { return *(__m64*)&x; }
- const __m64 &AsM64() const { return *(const __m64*)&x; }
-#endif
-
- // Setter
- void Set( const ShortVector& vOther );
- void Set( const short ix, const short iy, const short iz, const short iw );
-
- // array access...
- short operator[](int i) const;
- short& operator[](int i);
-
- // Base address...
- short* Base();
- short const* Base() const;
-
- // equality
- bool operator==(const ShortVector& v) const;
- bool operator!=(const ShortVector& v) const;
-
- // Arithmetic operations
- FORCEINLINE ShortVector& operator+=(const ShortVector &v);
- FORCEINLINE ShortVector& operator-=(const ShortVector &v);
- FORCEINLINE ShortVector& operator*=(const ShortVector &v);
- FORCEINLINE ShortVector& operator*=(float s);
- FORCEINLINE ShortVector& operator/=(const ShortVector &v);
- FORCEINLINE ShortVector& operator/=(float s);
- FORCEINLINE ShortVector operator*(float fl) const;
-
-private:
-
- // No copy constructors allowed if we're in optimal mode
-// ShortVector(ShortVector const& vOther);
-
- // No assignment operators either...
-// ShortVector& operator=( ShortVector const& src );
-
-} ALIGN8_POST;
-
-
-
-
-
-
-//=========================================================
-// 4D Integer Vector
-//=========================================================
-class IntVector4D
-{
-public:
-
- int x, y, z, w;
-
- // Initialization
- void Init(int ix = 0, int iy = 0, int iz = 0, int iw = 0 );
-
-#if USE_M64S
- __m64 &AsM64() { return *(__m64*)&x; }
- const __m64 &AsM64() const { return *(const __m64*)&x; }
-#endif
-
- // Setter
- void Set( const IntVector4D& vOther );
- void Set( const int ix, const int iy, const int iz, const int iw );
-
- // array access...
- int operator[](int i) const;
- int& operator[](int i);
-
- // Base address...
- int* Base();
- int const* Base() const;
-
- // equality
- bool operator==(const IntVector4D& v) const;
- bool operator!=(const IntVector4D& v) const;
-
- // Arithmetic operations
- FORCEINLINE IntVector4D& operator+=(const IntVector4D &v);
- FORCEINLINE IntVector4D& operator-=(const IntVector4D &v);
- FORCEINLINE IntVector4D& operator*=(const IntVector4D &v);
- FORCEINLINE IntVector4D& operator*=(float s);
- FORCEINLINE IntVector4D& operator/=(const IntVector4D &v);
- FORCEINLINE IntVector4D& operator/=(float s);
- FORCEINLINE IntVector4D operator*(float fl) const;
-
-private:
-
- // No copy constructors allowed if we're in optimal mode
- // IntVector4D(IntVector4D const& vOther);
-
- // No assignment operators either...
- // IntVector4D& operator=( IntVector4D const& src );
-
-};
-
-
-
-//-----------------------------------------------------------------------------
-// Allows us to specifically pass the vector by value when we need to
-//-----------------------------------------------------------------------------
-class VectorByValue : public Vector
-{
-public:
- // Construction/destruction:
- VectorByValue(void) : Vector() {}
- VectorByValue(vec_t X, vec_t Y, vec_t Z) : Vector( X, Y, Z ) {}
- VectorByValue(const VectorByValue& vOther) { *this = vOther; }
-};
-
-
-//-----------------------------------------------------------------------------
-// Utility to simplify table construction. No constructor means can use
-// traditional C-style initialization
-//-----------------------------------------------------------------------------
-class TableVector
-{
-public:
- vec_t x, y, z;
-
- operator Vector &() { return *((Vector *)(this)); }
- operator const Vector &() const { return *((const Vector *)(this)); }
-
- // array access...
- inline vec_t& operator[](int i)
- {
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
- }
-
- inline vec_t operator[](int i) const
- {
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
- }
-};
-
-
-//-----------------------------------------------------------------------------
-// Here's where we add all those lovely SSE optimized routines
-//-----------------------------------------------------------------------------
-
-class ALIGN16 VectorAligned : public Vector
-{
-public:
- inline VectorAligned(void) {};
- inline VectorAligned(vec_t X, vec_t Y, vec_t Z)
- {
- Init(X,Y,Z);
- }
-
-#ifdef VECTOR_NO_SLOW_OPERATIONS
-
-private:
- // No copy constructors allowed if we're in optimal mode
- VectorAligned(const VectorAligned& vOther);
- VectorAligned(const Vector &vOther);
-
-#else
-public:
- explicit VectorAligned(const Vector &vOther)
- {
- Init(vOther.x, vOther.y, vOther.z);
- }
-
- VectorAligned& operator=(const Vector &vOther)
- {
- Init(vOther.x, vOther.y, vOther.z);
- return *this;
- }
-
-#endif
- float w; // this space is used anyway
-} ALIGN16_POST;
-
-//-----------------------------------------------------------------------------
-// Vector related operations
-//-----------------------------------------------------------------------------
-
-// Vector clear
-FORCEINLINE void VectorClear( Vector& a );
-
-// Copy
-FORCEINLINE void VectorCopy( const Vector& src, Vector& dst );
-
-// Vector arithmetic
-FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& result );
-FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& result );
-FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& result );
-FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& result );
-FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& result );
-FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& result );
-inline void VectorScale ( const Vector& in, vec_t scale, Vector& result );
-// Don't mark this as inline in its function declaration. That's only necessary on its
-// definition, and 'inline' here leads to gcc warnings.
-void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest );
-
-// Vector equality with tolerance
-bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance = 0.0f );
-
-#define VectorExpand(v) (v).x, (v).y, (v).z
-
-
-// Normalization
-// FIXME: Can't use quite yet
-//vec_t VectorNormalize( Vector& v );
-
-// Length
-inline vec_t VectorLength( const Vector& v );
-
-// Dot Product
-FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b);
-
-// Cross product
-void CrossProduct(const Vector& a, const Vector& b, Vector& result );
-
-// Store the min or max of each of x, y, and z into the result.
-void VectorMin( const Vector &a, const Vector &b, Vector &result );
-void VectorMax( const Vector &a, const Vector &b, Vector &result );
-
-// Linearly interpolate between two vectors
-void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest );
-Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t );
-
-FORCEINLINE Vector ReplicateToVector( float x )
-{
- return Vector( x, x, x );
-}
-
-// check if a point is in the field of a view of an object. supports up to 180 degree fov.
-FORCEINLINE bool PointWithinViewAngle( Vector const &vecSrcPosition,
- Vector const &vecTargetPosition,
- Vector const &vecLookDirection, float flCosHalfFOV )
-{
- Vector vecDelta = vecTargetPosition - vecSrcPosition;
- float cosDiff = DotProduct( vecLookDirection, vecDelta );
-
- if ( cosDiff < 0 )
- return false;
-
- float flLen2 = vecDelta.LengthSqr();
-
- // a/sqrt(b) > c == a^2 > b * c ^2
- return ( cosDiff * cosDiff > flLen2 * flCosHalfFOV * flCosHalfFOV );
-
-}
-
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-// Cross product
-Vector CrossProduct( const Vector& a, const Vector& b );
-
-// Random vector creation
-Vector RandomVector( vec_t minVal, vec_t maxVal );
-
-#endif
-
-float RandomVectorInUnitSphere( Vector *pVector );
-float RandomVectorInUnitCircle( Vector2D *pVector );
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Vector methods
-//
-//-----------------------------------------------------------------------------
-
-
-//-----------------------------------------------------------------------------
-// constructors
-//-----------------------------------------------------------------------------
-inline Vector::Vector(void)
-{
-#ifdef _DEBUG
-#ifdef VECTOR_PARANOIA
- // Initialize to NAN to catch errors
- x = y = z = VEC_T_NAN;
-#endif
-#endif
-}
-
-inline Vector::Vector(vec_t X, vec_t Y, vec_t Z)
-{
- x = X; y = Y; z = Z;
- CHECK_VALID(*this);
-}
-
-inline Vector::Vector(vec_t XYZ)
-{
- x = y = z = XYZ;
- CHECK_VALID(*this);
-}
-
-//inline Vector::Vector(const float *pFloat)
-//{
-// Assert( pFloat );
-// x = pFloat[0]; y = pFloat[1]; z = pFloat[2];
-// CHECK_VALID(*this);
-//}
-
-#if 0
-//-----------------------------------------------------------------------------
-// copy constructor
-//-----------------------------------------------------------------------------
-
-inline Vector::Vector(const Vector &vOther)
-{
- CHECK_VALID(vOther);
- x = vOther.x; y = vOther.y; z = vOther.z;
-}
-#endif
-
-//-----------------------------------------------------------------------------
-// initialization
-//-----------------------------------------------------------------------------
-
-inline void Vector::Init( vec_t ix, vec_t iy, vec_t iz )
-{
- x = ix; y = iy; z = iz;
- CHECK_VALID(*this);
-}
-
-inline void Vector::Random( vec_t minVal, vec_t maxVal )
-{
- x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- CHECK_VALID(*this);
-}
-
-// This should really be a single opcode on the PowerPC (move r0 onto the vec reg)
-inline void Vector::Zero()
-{
- x = y = z = 0.0f;
-}
-
-inline void VectorClear( Vector& a )
-{
- a.x = a.y = a.z = 0.0f;
-}
-
-//-----------------------------------------------------------------------------
-// assignment
-//-----------------------------------------------------------------------------
-
-inline Vector& Vector::operator=(const Vector &vOther)
-{
- CHECK_VALID(vOther);
- x=vOther.x; y=vOther.y; z=vOther.z;
- return *this;
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline vec_t& Vector::operator[](int i)
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t Vector::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-inline vec_t* Vector::Base()
-{
- return (vec_t*)this;
-}
-
-inline vec_t const* Vector::Base() const
-{
- return (vec_t const*)this;
-}
-
-//-----------------------------------------------------------------------------
-// Cast to Vector2D...
-//-----------------------------------------------------------------------------
-
-inline Vector2D& Vector::AsVector2D()
-{
- return *(Vector2D*)this;
-}
-
-inline const Vector2D& Vector::AsVector2D() const
-{
- return *(const Vector2D*)this;
-}
-
-//-----------------------------------------------------------------------------
-// IsValid?
-//-----------------------------------------------------------------------------
-
-inline bool Vector::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z);
-}
-
-//-----------------------------------------------------------------------------
-// Invalidate
-//-----------------------------------------------------------------------------
-
-inline void Vector::Invalidate()
-{
-//#ifdef _DEBUG
-//#ifdef VECTOR_PARANOIA
- x = y = z = VEC_T_NAN;
-//#endif
-//#endif
-}
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool Vector::operator==( const Vector& src ) const
-{
- CHECK_VALID(src);
- CHECK_VALID(*this);
- return (src.x == x) && (src.y == y) && (src.z == z);
-}
-
-inline bool Vector::operator!=( const Vector& src ) const
-{
- CHECK_VALID(src);
- CHECK_VALID(*this);
- return (src.x != x) || (src.y != y) || (src.z != z);
-}
-
-
-//-----------------------------------------------------------------------------
-// Copy
-//-----------------------------------------------------------------------------
-
-FORCEINLINE void VectorCopy( const Vector& src, Vector& dst )
-{
- CHECK_VALID(src);
- dst.x = src.x;
- dst.y = src.y;
- dst.z = src.z;
-}
-
-inline void Vector::CopyToArray(float* rgfl) const
-{
- Assert( rgfl );
- CHECK_VALID(*this);
- rgfl[0] = x, rgfl[1] = y, rgfl[2] = z;
-}
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-// #pragma message("TODO: these should be SSE")
-
-inline void Vector::Negate()
-{
- CHECK_VALID(*this);
- x = -x; y = -y; z = -z;
-}
-
-FORCEINLINE Vector& Vector::operator+=(const Vector& v)
-{
- CHECK_VALID(*this);
- CHECK_VALID(v);
- x+=v.x; y+=v.y; z += v.z;
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator-=(const Vector& v)
-{
- CHECK_VALID(*this);
- CHECK_VALID(v);
- x-=v.x; y-=v.y; z -= v.z;
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- CHECK_VALID(*this);
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator*=(const Vector& v)
-{
- CHECK_VALID(v);
- x *= v.x;
- y *= v.y;
- z *= v.z;
- CHECK_VALID(*this);
- return *this;
-}
-
-// this ought to be an opcode.
-FORCEINLINE Vector& Vector::operator+=(float fl)
-{
- x += fl;
- y += fl;
- z += fl;
- CHECK_VALID(*this);
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator-=(float fl)
-{
- x -= fl;
- y -= fl;
- z -= fl;
- CHECK_VALID(*this);
- return *this;
-}
-
-
-
-FORCEINLINE Vector& Vector::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- CHECK_VALID(*this);
- return *this;
-}
-
-FORCEINLINE Vector& Vector::operator/=(const Vector& v)
-{
- CHECK_VALID(v);
- Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f );
- x /= v.x;
- y /= v.y;
- z /= v.z;
- CHECK_VALID(*this);
- return *this;
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Short Vector methods
-//
-//-----------------------------------------------------------------------------
-
-
-inline void ShortVector::Init( short ix, short iy, short iz, short iw )
-{
- x = ix; y = iy; z = iz; w = iw;
-}
-
-FORCEINLINE void ShortVector::Set( const ShortVector& vOther )
-{
- x = vOther.x;
- y = vOther.y;
- z = vOther.z;
- w = vOther.w;
-}
-
-FORCEINLINE void ShortVector::Set( const short ix, const short iy, const short iz, const short iw )
-{
- x = ix;
- y = iy;
- z = iz;
- w = iw;
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline short ShortVector::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 4) );
- return ((short*)this)[i];
-}
-
-inline short& ShortVector::operator[](int i)
-{
- Assert( (i >= 0) && (i < 4) );
- return ((short*)this)[i];
-}
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-inline short* ShortVector::Base()
-{
- return (short*)this;
-}
-
-inline short const* ShortVector::Base() const
-{
- return (short const*)this;
-}
-
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool ShortVector::operator==( const ShortVector& src ) const
-{
- return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
-}
-
-inline bool ShortVector::operator!=( const ShortVector& src ) const
-{
- return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
-}
-
-
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-
-FORCEINLINE ShortVector& ShortVector::operator+=(const ShortVector& v)
-{
- x+=v.x; y+=v.y; z += v.z; w += v.w;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator-=(const ShortVector& v)
-{
- x-=v.x; y-=v.y; z -= v.z; w -= v.w;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- w *= fl;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator*=(const ShortVector& v)
-{
- x *= v.x;
- y *= v.y;
- z *= v.z;
- w *= v.w;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- w *= oofl;
- return *this;
-}
-
-FORCEINLINE ShortVector& ShortVector::operator/=(const ShortVector& v)
-{
- Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 );
- x /= v.x;
- y /= v.y;
- z /= v.z;
- w /= v.w;
- return *this;
-}
-
-FORCEINLINE void ShortVectorMultiply( const ShortVector& src, float fl, ShortVector& res )
-{
- Assert( IsFinite(fl) );
- res.x = src.x * fl;
- res.y = src.y * fl;
- res.z = src.z * fl;
- res.w = src.w * fl;
-}
-
-FORCEINLINE ShortVector ShortVector::operator*(float fl) const
-{
- ShortVector res;
- ShortVectorMultiply( *this, fl, res );
- return res;
-}
-
-
-
-
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Integer Vector methods
-//
-//-----------------------------------------------------------------------------
-
-
-inline void IntVector4D::Init( int ix, int iy, int iz, int iw )
-{
- x = ix; y = iy; z = iz; w = iw;
-}
-
-FORCEINLINE void IntVector4D::Set( const IntVector4D& vOther )
-{
- x = vOther.x;
- y = vOther.y;
- z = vOther.z;
- w = vOther.w;
-}
-
-FORCEINLINE void IntVector4D::Set( const int ix, const int iy, const int iz, const int iw )
-{
- x = ix;
- y = iy;
- z = iz;
- w = iw;
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline int IntVector4D::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 4) );
- return ((int*)this)[i];
-}
-
-inline int& IntVector4D::operator[](int i)
-{
- Assert( (i >= 0) && (i < 4) );
- return ((int*)this)[i];
-}
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-inline int* IntVector4D::Base()
-{
- return (int*)this;
-}
-
-inline int const* IntVector4D::Base() const
-{
- return (int const*)this;
-}
-
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool IntVector4D::operator==( const IntVector4D& src ) const
-{
- return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
-}
-
-inline bool IntVector4D::operator!=( const IntVector4D& src ) const
-{
- return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
-}
-
-
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-
-FORCEINLINE IntVector4D& IntVector4D::operator+=(const IntVector4D& v)
-{
- x+=v.x; y+=v.y; z += v.z; w += v.w;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator-=(const IntVector4D& v)
-{
- x-=v.x; y-=v.y; z -= v.z; w -= v.w;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- w *= fl;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator*=(const IntVector4D& v)
-{
- x *= v.x;
- y *= v.y;
- z *= v.z;
- w *= v.w;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- w *= oofl;
- return *this;
-}
-
-FORCEINLINE IntVector4D& IntVector4D::operator/=(const IntVector4D& v)
-{
- Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 );
- x /= v.x;
- y /= v.y;
- z /= v.z;
- w /= v.w;
- return *this;
-}
-
-FORCEINLINE void IntVector4DMultiply( const IntVector4D& src, float fl, IntVector4D& res )
-{
- Assert( IsFinite(fl) );
- res.x = src.x * fl;
- res.y = src.y * fl;
- res.z = src.z * fl;
- res.w = src.w * fl;
-}
-
-FORCEINLINE IntVector4D IntVector4D::operator*(float fl) const
-{
- IntVector4D res;
- IntVector4DMultiply( *this, fl, res );
- return res;
-}
-
-
-
-// =======================
-
-
-FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& c )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- c.x = a.x + b.x;
- c.y = a.y + b.y;
- c.z = a.z + b.z;
-}
-
-FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& c )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- c.x = a.x - b.x;
- c.y = a.y - b.y;
- c.z = a.z - b.z;
-}
-
-FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& c )
-{
- CHECK_VALID(a);
- Assert( IsFinite(b) );
- c.x = a.x * b;
- c.y = a.y * b;
- c.z = a.z * b;
-}
-
-FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& c )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- c.x = a.x * b.x;
- c.y = a.y * b.y;
- c.z = a.z * b.z;
-}
-
-// for backwards compatability
-inline void VectorScale ( const Vector& in, vec_t scale, Vector& result )
-{
- VectorMultiply( in, scale, result );
-}
-
-
-FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& c )
-{
- CHECK_VALID(a);
- Assert( b != 0.0f );
- vec_t oob = 1.0f / b;
- c.x = a.x * oob;
- c.y = a.y * oob;
- c.z = a.z * oob;
-}
-
-FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& c )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) );
- c.x = a.x / b.x;
- c.y = a.y / b.y;
- c.z = a.z / b.z;
-}
-
-// FIXME: Remove
-// For backwards compatability
-inline void Vector::MulAdd(const Vector& a, const Vector& b, float scalar)
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- x = a.x + b.x * scalar;
- y = a.y + b.y * scalar;
- z = a.z + b.z * scalar;
-}
-
-inline void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest )
-{
- CHECK_VALID(src1);
- CHECK_VALID(src2);
- dest.x = src1.x + (src2.x - src1.x) * t;
- dest.y = src1.y + (src2.y - src1.y) * t;
- dest.z = src1.z + (src2.z - src1.z) * t;
-}
-
-inline Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t )
-{
- Vector result;
- VectorLerp( src1, src2, t, result );
- return result;
-}
-
-//-----------------------------------------------------------------------------
-// Temporary storage for vector results so const Vector& results can be returned
-//-----------------------------------------------------------------------------
-inline Vector &AllocTempVector()
-{
- static Vector s_vecTemp[128];
- static CInterlockedInt s_nIndex;
-
- int nIndex;
- for (;;)
- {
- int nOldIndex = s_nIndex;
- nIndex = ( (nOldIndex + 0x10001) & 0x7F );
-
- if ( s_nIndex.AssignIf( nOldIndex, nIndex ) )
- {
- break;
- }
- ThreadPause();
- }
- return s_vecTemp[nIndex & 0xffff];
-}
-
-
-
-//-----------------------------------------------------------------------------
-// dot, cross
-//-----------------------------------------------------------------------------
-FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b)
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- return( a.x*b.x + a.y*b.y + a.z*b.z );
-}
-
-// for backwards compatability
-inline vec_t Vector::Dot( const Vector& vOther ) const
-{
- CHECK_VALID(vOther);
- return DotProduct( *this, vOther );
-}
-
-inline void CrossProduct(const Vector& a, const Vector& b, Vector& result )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- Assert( &a != &result );
- Assert( &b != &result );
- result.x = a.y*b.z - a.z*b.y;
- result.y = a.z*b.x - a.x*b.z;
- result.z = a.x*b.y - a.y*b.x;
-}
-
-inline vec_t DotProductAbs( const Vector &v0, const Vector &v1 )
-{
- CHECK_VALID(v0);
- CHECK_VALID(v1);
- return FloatMakePositive(v0.x*v1.x) + FloatMakePositive(v0.y*v1.y) + FloatMakePositive(v0.z*v1.z);
-}
-
-inline vec_t DotProductAbs( const Vector &v0, const float *v1 )
-{
- return FloatMakePositive(v0.x * v1[0]) + FloatMakePositive(v0.y * v1[1]) + FloatMakePositive(v0.z * v1[2]);
-}
-
-//-----------------------------------------------------------------------------
-// length
-//-----------------------------------------------------------------------------
-
-inline vec_t VectorLength( const Vector& v )
-{
- CHECK_VALID(v);
- return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z);
-}
-
-
-inline vec_t Vector::Length(void) const
-{
- CHECK_VALID(*this);
- return VectorLength( *this );
-}
-
-
-//-----------------------------------------------------------------------------
-// Normalization
-//-----------------------------------------------------------------------------
-
-/*
-// FIXME: Can't use until we're un-macroed in mathlib.h
-inline vec_t VectorNormalize( Vector& v )
-{
- Assert( v.IsValid() );
- vec_t l = v.Length();
- if (l != 0.0f)
- {
- v /= l;
- }
- else
- {
- // FIXME:
- // Just copying the existing implemenation; shouldn't res.z == 0?
- v.x = v.y = 0.0f; v.z = 1.0f;
- }
- return l;
-}
-*/
-
-
-// check a point against a box
-bool Vector::WithinAABox( Vector const &boxmin, Vector const &boxmax)
-{
- return (
- ( x >= boxmin.x ) && ( x <= boxmax.x) &&
- ( y >= boxmin.y ) && ( y <= boxmax.y) &&
- ( z >= boxmin.z ) && ( z <= boxmax.z)
- );
-}
-
-//-----------------------------------------------------------------------------
-// Get the distance from this vector to the other one
-//-----------------------------------------------------------------------------
-inline vec_t Vector::DistTo(const Vector &vOther) const
-{
- Vector delta;
- VectorSubtract( *this, vOther, delta );
- return delta.Length();
-}
-
-
-//-----------------------------------------------------------------------------
-// Vector equality with tolerance
-//-----------------------------------------------------------------------------
-inline bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance )
-{
- if (FloatMakePositive(src1.x - src2.x) > tolerance)
- return false;
- if (FloatMakePositive(src1.y - src2.y) > tolerance)
- return false;
- return (FloatMakePositive(src1.z - src2.z) <= tolerance);
-}
-
-
-//-----------------------------------------------------------------------------
-// Computes the closest point to vecTarget no farther than flMaxDist from vecStart
-//-----------------------------------------------------------------------------
-inline void ComputeClosestPoint( const Vector& vecStart, float flMaxDist, const Vector& vecTarget, Vector *pResult )
-{
- Vector vecDelta;
- VectorSubtract( vecTarget, vecStart, vecDelta );
- float flDistSqr = vecDelta.LengthSqr();
- if ( flDistSqr <= flMaxDist * flMaxDist )
- {
- *pResult = vecTarget;
- }
- else
- {
- vecDelta /= FastSqrt( flDistSqr );
- VectorMA( vecStart, flMaxDist, vecDelta, *pResult );
- }
-}
-
-
-//-----------------------------------------------------------------------------
-// Takes the absolute value of a vector
-//-----------------------------------------------------------------------------
-inline void VectorAbs( const Vector& src, Vector& dst )
-{
- dst.x = FloatMakePositive(src.x);
- dst.y = FloatMakePositive(src.y);
- dst.z = FloatMakePositive(src.z);
-}
-
-
-//-----------------------------------------------------------------------------
-//
-// Slow methods
-//
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-//-----------------------------------------------------------------------------
-// Returns a vector with the min or max in X, Y, and Z.
-//-----------------------------------------------------------------------------
-inline Vector Vector::Min(const Vector &vOther) const
-{
- return Vector(x < vOther.x ? x : vOther.x,
- y < vOther.y ? y : vOther.y,
- z < vOther.z ? z : vOther.z);
-}
-
-inline Vector Vector::Max(const Vector &vOther) const
-{
- return Vector(x > vOther.x ? x : vOther.x,
- y > vOther.y ? y : vOther.y,
- z > vOther.z ? z : vOther.z);
-}
-
-
-//-----------------------------------------------------------------------------
-// arithmetic operations
-//-----------------------------------------------------------------------------
-
-inline Vector Vector::operator-(void) const
-{
- return Vector(-x,-y,-z);
-}
-
-inline Vector Vector::operator+(const Vector& v) const
-{
- Vector res;
- VectorAdd( *this, v, res );
- return res;
-}
-
-inline Vector Vector::operator-(const Vector& v) const
-{
- Vector res;
- VectorSubtract( *this, v, res );
- return res;
-}
-
-inline Vector Vector::operator*(float fl) const
-{
- Vector res;
- VectorMultiply( *this, fl, res );
- return res;
-}
-
-inline Vector Vector::operator*(const Vector& v) const
-{
- Vector res;
- VectorMultiply( *this, v, res );
- return res;
-}
-
-inline Vector Vector::operator/(float fl) const
-{
- Vector res;
- VectorDivide( *this, fl, res );
- return res;
-}
-
-inline Vector Vector::operator/(const Vector& v) const
-{
- Vector res;
- VectorDivide( *this, v, res );
- return res;
-}
-
-inline Vector operator*(float fl, const Vector& v)
-{
- return v * fl;
-}
-
-//-----------------------------------------------------------------------------
-// cross product
-//-----------------------------------------------------------------------------
-
-inline Vector Vector::Cross(const Vector& vOther) const
-{
- Vector res;
- CrossProduct( *this, vOther, res );
- return res;
-}
-
-//-----------------------------------------------------------------------------
-// 2D
-//-----------------------------------------------------------------------------
-
-inline vec_t Vector::Length2D(void) const
-{
- return (vec_t)FastSqrt(x*x + y*y);
-}
-
-inline vec_t Vector::Length2DSqr(void) const
-{
- return (x*x + y*y);
-}
-
-inline Vector CrossProduct(const Vector& a, const Vector& b)
-{
- return Vector( a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x );
-}
-
-inline void VectorMin( const Vector &a, const Vector &b, Vector &result )
-{
- result.x = fpmin(a.x, b.x);
- result.y = fpmin(a.y, b.y);
- result.z = fpmin(a.z, b.z);
-}
-
-inline void VectorMax( const Vector &a, const Vector &b, Vector &result )
-{
- result.x = fpmax(a.x, b.x);
- result.y = fpmax(a.y, b.y);
- result.z = fpmax(a.z, b.z);
-}
-
-inline float ComputeVolume( const Vector &vecMins, const Vector &vecMaxs )
-{
- Vector vecDelta;
- VectorSubtract( vecMaxs, vecMins, vecDelta );
- return DotProduct( vecDelta, vecDelta );
-}
-
-// Get a random vector.
-inline Vector RandomVector( float minVal, float maxVal )
-{
- Vector random;
- random.Random( minVal, maxVal );
- return random;
-}
-
-#endif //slow
-
-//-----------------------------------------------------------------------------
-// Helper debugging stuff....
-//-----------------------------------------------------------------------------
-
-inline bool operator==( float const* f, const Vector& v )
-{
- // AIIIEEEE!!!!
- Assert(0);
- return false;
-}
-
-inline bool operator==( const Vector& v, float const* f )
-{
- // AIIIEEEE!!!!
- Assert(0);
- return false;
-}
-
-inline bool operator!=( float const* f, const Vector& v )
-{
- // AIIIEEEE!!!!
- Assert(0);
- return false;
-}
-
-inline bool operator!=( const Vector& v, float const* f )
-{
- // AIIIEEEE!!!!
- Assert(0);
- return false;
-}
-
-
-//-----------------------------------------------------------------------------
-// AngularImpulse
-//-----------------------------------------------------------------------------
-// AngularImpulse are exponetial maps (an axis scaled by a "twist" angle in degrees)
-typedef Vector AngularImpulse;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline AngularImpulse RandomAngularImpulse( float minVal, float maxVal )
-{
- AngularImpulse angImp;
- angImp.Random( minVal, maxVal );
- return angImp;
-}
-
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Quaternion
-//-----------------------------------------------------------------------------
-
-class RadianEuler;
-
-class Quaternion // same data-layout as engine's vec4_t,
-{ // which is a vec_t[4]
-public:
- inline Quaternion(void) {
-
- // Initialize to NAN to catch errors
-#ifdef _DEBUG
-#ifdef VECTOR_PARANOIA
- x = y = z = w = VEC_T_NAN;
-#endif
-#endif
- }
- inline Quaternion(vec_t ix, vec_t iy, vec_t iz, vec_t iw) : x(ix), y(iy), z(iz), w(iw) { }
- inline Quaternion(RadianEuler const &angle); // evil auto type promotion!!!
-
- inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f) { x = ix; y = iy; z = iz; w = iw; }
-
- bool IsValid() const;
- void Invalidate();
-
- bool operator==( const Quaternion &src ) const;
- bool operator!=( const Quaternion &src ) const;
-
- vec_t* Base() { return (vec_t*)this; }
- const vec_t* Base() const { return (vec_t*)this; }
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- vec_t x, y, z, w;
-};
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline vec_t& Quaternion::operator[](int i)
-{
- Assert( (i >= 0) && (i < 4) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t Quaternion::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 4) );
- return ((vec_t*)this)[i];
-}
-
-
-//-----------------------------------------------------------------------------
-// Equality test
-//-----------------------------------------------------------------------------
-inline bool Quaternion::operator==( const Quaternion &src ) const
-{
- return ( x == src.x ) && ( y == src.y ) && ( z == src.z ) && ( w == src.w );
-}
-
-inline bool Quaternion::operator!=( const Quaternion &src ) const
-{
- return !operator==( src );
-}
-
-
-//-----------------------------------------------------------------------------
-// Quaternion equality with tolerance
-//-----------------------------------------------------------------------------
-inline bool QuaternionsAreEqual( const Quaternion& src1, const Quaternion& src2, float tolerance )
-{
- if (FloatMakePositive(src1.x - src2.x) > tolerance)
- return false;
- if (FloatMakePositive(src1.y - src2.y) > tolerance)
- return false;
- if (FloatMakePositive(src1.z - src2.z) > tolerance)
- return false;
- return (FloatMakePositive(src1.w - src2.w) <= tolerance);
-}
-
-
-//-----------------------------------------------------------------------------
-// Here's where we add all those lovely SSE optimized routines
-//-----------------------------------------------------------------------------
-class ALIGN16 QuaternionAligned : public Quaternion
-{
-public:
- inline QuaternionAligned(void) {};
- inline QuaternionAligned(vec_t X, vec_t Y, vec_t Z, vec_t W)
- {
- Init(X,Y,Z,W);
- }
-
-#ifdef VECTOR_NO_SLOW_OPERATIONS
-
-private:
- // No copy constructors allowed if we're in optimal mode
- QuaternionAligned(const QuaternionAligned& vOther);
- QuaternionAligned(const Quaternion &vOther);
-
-#else
-public:
- explicit QuaternionAligned(const Quaternion &vOther)
- {
- Init(vOther.x, vOther.y, vOther.z, vOther.w);
- }
-
- QuaternionAligned& operator=(const Quaternion &vOther)
- {
- Init(vOther.x, vOther.y, vOther.z, vOther.w);
- return *this;
- }
-
-#endif
-} ALIGN16_POST;
-
-
-//-----------------------------------------------------------------------------
-// Radian Euler angle aligned to axis (NOT ROLL/PITCH/YAW)
-//-----------------------------------------------------------------------------
-class QAngle;
-class RadianEuler
-{
-public:
- inline RadianEuler(void) { }
- inline RadianEuler(vec_t X, vec_t Y, vec_t Z) { x = X; y = Y; z = Z; }
- inline RadianEuler(Quaternion const &q); // evil auto type promotion!!!
- inline RadianEuler(QAngle const &angles); // evil auto type promotion!!!
-
- // Initialization
- inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f) { x = ix; y = iy; z = iz; }
-
- // conversion to qangle
- QAngle ToQAngle( void ) const;
- bool IsValid() const;
- void Invalidate();
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- vec_t x, y, z;
-};
-
-
-extern void AngleQuaternion( RadianEuler const &angles, Quaternion &qt );
-extern void QuaternionAngles( Quaternion const &q, RadianEuler &angles );
-
-FORCEINLINE void NetworkVarConstruct( Quaternion &q ) { q.x = q.y = q.z = q.w = 0.0f; }
-
-inline Quaternion::Quaternion(RadianEuler const &angle)
-{
- AngleQuaternion( angle, *this );
-}
-
-inline bool Quaternion::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w);
-}
-
-inline void Quaternion::Invalidate()
-{
-//#ifdef _DEBUG
-//#ifdef VECTOR_PARANOIA
- x = y = z = w = VEC_T_NAN;
-//#endif
-//#endif
-}
-
-inline RadianEuler::RadianEuler(Quaternion const &q)
-{
- QuaternionAngles( q, *this );
-}
-
-inline void VectorCopy( RadianEuler const& src, RadianEuler &dst )
-{
- CHECK_VALID(src);
- dst.x = src.x;
- dst.y = src.y;
- dst.z = src.z;
-}
-
-inline void VectorScale( RadianEuler const& src, float b, RadianEuler &dst )
-{
- CHECK_VALID(src);
- Assert( IsFinite(b) );
- dst.x = src.x * b;
- dst.y = src.y * b;
- dst.z = src.z * b;
-}
-
-inline bool RadianEuler::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z);
-}
-
-inline void RadianEuler::Invalidate()
-{
-//#ifdef _DEBUG
-//#ifdef VECTOR_PARANOIA
- x = y = z = VEC_T_NAN;
-//#endif
-//#endif
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline vec_t& RadianEuler::operator[](int i)
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t RadianEuler::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-
-//-----------------------------------------------------------------------------
-// Degree Euler QAngle pitch, yaw, roll
-//-----------------------------------------------------------------------------
-class QAngleByValue;
-
-class QAngle
-{
-public:
- // Members
- vec_t x, y, z;
-
- // Construction/destruction
- QAngle(void);
- QAngle(vec_t X, vec_t Y, vec_t Z);
-// QAngle(RadianEuler const &angles); // evil auto type promotion!!!
-
- // Allow pass-by-value
- operator QAngleByValue &() { return *((QAngleByValue *)(this)); }
- operator const QAngleByValue &() const { return *((const QAngleByValue *)(this)); }
-
- // Initialization
- void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f);
- void Random( vec_t minVal, vec_t maxVal );
-
- // Got any nasty NAN's?
- bool IsValid() const;
- void Invalidate();
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- // Base address...
- vec_t* Base();
- vec_t const* Base() const;
-
- // equality
- bool operator==(const QAngle& v) const;
- bool operator!=(const QAngle& v) const;
-
- // arithmetic operations
- QAngle& operator+=(const QAngle &v);
- QAngle& operator-=(const QAngle &v);
- QAngle& operator*=(float s);
- QAngle& operator/=(float s);
-
- // Get the vector's magnitude.
- vec_t Length() const;
- vec_t LengthSqr() const;
-
- // negate the QAngle components
- //void Negate();
-
- // No assignment operators either...
- QAngle& operator=( const QAngle& src );
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // copy constructors
-
- // arithmetic operations
- QAngle operator-(void) const;
-
- QAngle operator+(const QAngle& v) const;
- QAngle operator-(const QAngle& v) const;
- QAngle operator*(float fl) const;
- QAngle operator/(float fl) const;
-#else
-
-private:
- // No copy constructors allowed if we're in optimal mode
- QAngle(const QAngle& vOther);
-
-#endif
-};
-
-FORCEINLINE void NetworkVarConstruct( QAngle &q ) { q.x = q.y = q.z = 0.0f; }
-
-//-----------------------------------------------------------------------------
-// Allows us to specifically pass the vector by value when we need to
-//-----------------------------------------------------------------------------
-class QAngleByValue : public QAngle
-{
-public:
- // Construction/destruction:
- QAngleByValue(void) : QAngle() {}
- QAngleByValue(vec_t X, vec_t Y, vec_t Z) : QAngle( X, Y, Z ) {}
- QAngleByValue(const QAngleByValue& vOther) { *this = vOther; }
-};
-
-
-inline void VectorAdd( const QAngle& a, const QAngle& b, QAngle& result )
-{
- CHECK_VALID(a);
- CHECK_VALID(b);
- result.x = a.x + b.x;
- result.y = a.y + b.y;
- result.z = a.z + b.z;
-}
-
-inline void VectorMA( const QAngle &start, float scale, const QAngle &direction, QAngle &dest )
-{
- CHECK_VALID(start);
- CHECK_VALID(direction);
- dest.x = start.x + scale * direction.x;
- dest.y = start.y + scale * direction.y;
- dest.z = start.z + scale * direction.z;
-}
-
-
-//-----------------------------------------------------------------------------
-// constructors
-//-----------------------------------------------------------------------------
-inline QAngle::QAngle(void)
-{
-#ifdef _DEBUG
-#ifdef VECTOR_PARANOIA
- // Initialize to NAN to catch errors
- x = y = z = VEC_T_NAN;
-#endif
-#endif
-}
-
-inline QAngle::QAngle(vec_t X, vec_t Y, vec_t Z)
-{
- x = X; y = Y; z = Z;
- CHECK_VALID(*this);
-}
-
-
-//-----------------------------------------------------------------------------
-// initialization
-//-----------------------------------------------------------------------------
-inline void QAngle::Init( vec_t ix, vec_t iy, vec_t iz )
-{
- x = ix; y = iy; z = iz;
- CHECK_VALID(*this);
-}
-
-inline void QAngle::Random( vec_t minVal, vec_t maxVal )
-{
- x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- CHECK_VALID(*this);
-}
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline QAngle RandomAngle( float minVal, float maxVal )
-{
- Vector random;
- random.Random( minVal, maxVal );
- QAngle ret( random.x, random.y, random.z );
- return ret;
-}
-
-#endif
-
-
-inline RadianEuler::RadianEuler(QAngle const &angles)
-{
- Init(
- angles.z * 3.14159265358979323846f / 180.f,
- angles.x * 3.14159265358979323846f / 180.f,
- angles.y * 3.14159265358979323846f / 180.f );
-}
-
-
-
-
-inline QAngle RadianEuler::ToQAngle( void) const
-{
- return QAngle(
- y * 180.f / 3.14159265358979323846f,
- z * 180.f / 3.14159265358979323846f,
- x * 180.f / 3.14159265358979323846f );
-}
-
-
-//-----------------------------------------------------------------------------
-// assignment
-//-----------------------------------------------------------------------------
-inline QAngle& QAngle::operator=(const QAngle &vOther)
-{
- CHECK_VALID(vOther);
- x=vOther.x; y=vOther.y; z=vOther.z;
- return *this;
-}
-
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-inline vec_t& QAngle::operator[](int i)
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t QAngle::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 3) );
- return ((vec_t*)this)[i];
-}
-
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-inline vec_t* QAngle::Base()
-{
- return (vec_t*)this;
-}
-
-inline vec_t const* QAngle::Base() const
-{
- return (vec_t const*)this;
-}
-
-
-//-----------------------------------------------------------------------------
-// IsValid?
-//-----------------------------------------------------------------------------
-inline bool QAngle::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z);
-}
-
-//-----------------------------------------------------------------------------
-// Invalidate
-//-----------------------------------------------------------------------------
-
-inline void QAngle::Invalidate()
-{
-//#ifdef _DEBUG
-//#ifdef VECTOR_PARANOIA
- x = y = z = VEC_T_NAN;
-//#endif
-//#endif
-}
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-inline bool QAngle::operator==( const QAngle& src ) const
-{
- CHECK_VALID(src);
- CHECK_VALID(*this);
- return (src.x == x) && (src.y == y) && (src.z == z);
-}
-
-inline bool QAngle::operator!=( const QAngle& src ) const
-{
- CHECK_VALID(src);
- CHECK_VALID(*this);
- return (src.x != x) || (src.y != y) || (src.z != z);
-}
-
-
-//-----------------------------------------------------------------------------
-// Copy
-//-----------------------------------------------------------------------------
-inline void VectorCopy( const QAngle& src, QAngle& dst )
-{
- CHECK_VALID(src);
- dst.x = src.x;
- dst.y = src.y;
- dst.z = src.z;
-}
-
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-inline QAngle& QAngle::operator+=(const QAngle& v)
-{
- CHECK_VALID(*this);
- CHECK_VALID(v);
- x+=v.x; y+=v.y; z += v.z;
- return *this;
-}
-
-inline QAngle& QAngle::operator-=(const QAngle& v)
-{
- CHECK_VALID(*this);
- CHECK_VALID(v);
- x-=v.x; y-=v.y; z -= v.z;
- return *this;
-}
-
-inline QAngle& QAngle::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- CHECK_VALID(*this);
- return *this;
-}
-
-inline QAngle& QAngle::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- CHECK_VALID(*this);
- return *this;
-}
-
-
-//-----------------------------------------------------------------------------
-// length
-//-----------------------------------------------------------------------------
-inline vec_t QAngle::Length( ) const
-{
- CHECK_VALID(*this);
- return (vec_t)FastSqrt( LengthSqr( ) );
-}
-
-
-inline vec_t QAngle::LengthSqr( ) const
-{
- CHECK_VALID(*this);
- return x * x + y * y + z * z;
-}
-
-
-//-----------------------------------------------------------------------------
-// Vector equality with tolerance
-//-----------------------------------------------------------------------------
-inline bool QAnglesAreEqual( const QAngle& src1, const QAngle& src2, float tolerance = 0.0f )
-{
- if (FloatMakePositive(src1.x - src2.x) > tolerance)
- return false;
- if (FloatMakePositive(src1.y - src2.y) > tolerance)
- return false;
- return (FloatMakePositive(src1.z - src2.z) <= tolerance);
-}
-
-
-//-----------------------------------------------------------------------------
-// arithmetic operations (SLOW!!)
-//-----------------------------------------------------------------------------
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline QAngle QAngle::operator-(void) const
-{
- QAngle ret(-x,-y,-z);
- return ret;
-}
-
-inline QAngle QAngle::operator+(const QAngle& v) const
-{
- QAngle res;
- res.x = x + v.x;
- res.y = y + v.y;
- res.z = z + v.z;
- return res;
-}
-
-inline QAngle QAngle::operator-(const QAngle& v) const
-{
- QAngle res;
- res.x = x - v.x;
- res.y = y - v.y;
- res.z = z - v.z;
- return res;
-}
-
-inline QAngle QAngle::operator*(float fl) const
-{
- QAngle res;
- res.x = x * fl;
- res.y = y * fl;
- res.z = z * fl;
- return res;
-}
-
-inline QAngle QAngle::operator/(float fl) const
-{
- QAngle res;
- res.x = x / fl;
- res.y = y / fl;
- res.z = z / fl;
- return res;
-}
-
-inline QAngle operator*(float fl, const QAngle& v)
-{
- QAngle ret( v * fl );
- return ret;
-}
-
-#endif // VECTOR_NO_SLOW_OPERATIONS
-
-
-//-----------------------------------------------------------------------------
-// NOTE: These are not completely correct. The representations are not equivalent
-// unless the QAngle represents a rotational impulse along a coordinate axis (x,y,z)
-inline void QAngleToAngularImpulse( const QAngle &angles, AngularImpulse &impulse )
-{
- impulse.x = angles.z;
- impulse.y = angles.x;
- impulse.z = angles.y;
-}
-
-inline void AngularImpulseToQAngle( const AngularImpulse &impulse, QAngle &angles )
-{
- angles.x = impulse.y;
- angles.y = impulse.z;
- angles.z = impulse.x;
-}
-
-#if !defined( _X360 )
-
-FORCEINLINE vec_t InvRSquared( float const *v )
-{
-#if defined(__i386__) || defined(_M_IX86)
- float sqrlen = v[0]*v[0]+v[1]*v[1]+v[2]*v[2] + 1.0e-10f, result;
- _mm_store_ss(&result, _mm_rcp_ss( _mm_max_ss( _mm_set_ss(1.0f), _mm_load_ss(&sqrlen) ) ));
- return result;
-#else
- return 1.f/fpmax(1.f, v[0]*v[0]+v[1]*v[1]+v[2]*v[2]);
-#endif
-}
-
-FORCEINLINE vec_t InvRSquared( const Vector &v )
-{
- return InvRSquared(&v.x);
-}
-
-#if defined(__i386__) || defined(_M_IX86)
-inline void _SSE_RSqrtInline( float a, float* out )
-{
- __m128 xx = _mm_load_ss( &a );
- __m128 xr = _mm_rsqrt_ss( xx );
- __m128 xt;
- xt = _mm_mul_ss( xr, xr );
- xt = _mm_mul_ss( xt, xx );
- xt = _mm_sub_ss( _mm_set_ss(3.f), xt );
- xt = _mm_mul_ss( xt, _mm_set_ss(0.5f) );
- xr = _mm_mul_ss( xr, xt );
- _mm_store_ss( out, xr );
-}
-#endif
-
-// FIXME: Change this back to a #define once we get rid of the vec_t version
-FORCEINLINE float VectorNormalize( Vector& vec )
-{
-#ifndef DEBUG // stop crashing my edit-and-continue!
- #if defined(__i386__) || defined(_M_IX86)
- #define DO_SSE_OPTIMIZATION
- #endif
-#endif
-
-#if defined( DO_SSE_OPTIMIZATION )
- float sqrlen = vec.LengthSqr() + 1.0e-10f, invlen;
- _SSE_RSqrtInline(sqrlen, &invlen);
- vec.x *= invlen;
- vec.y *= invlen;
- vec.z *= invlen;
- return sqrlen * invlen;
-#else
- extern float (FASTCALL *pfVectorNormalize)(Vector& v);
- return (*pfVectorNormalize)(vec);
-#endif
-}
-
-// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s
-FORCEINLINE float VectorNormalize( float * v )
-{
- return VectorNormalize(*(reinterpret_cast<Vector *>(v)));
-}
-
-FORCEINLINE void VectorNormalizeFast( Vector &vec )
-{
- VectorNormalize(vec);
-}
-
-#else
-
-FORCEINLINE float _VMX_InvRSquared( const Vector &v )
-{
- XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) );
- xmV = XMVector3Dot( xmV, xmV );
- return xmV.x;
-}
-
-// call directly
-FORCEINLINE float _VMX_VectorNormalize( Vector &vec )
-{
- float mag = XMVector3Length( XMLoadVector3( vec.Base() ) ).x;
- float den = 1.f / (mag + FLT_EPSILON );
- vec.x *= den;
- vec.y *= den;
- vec.z *= den;
- return mag;
-}
-
-#define InvRSquared(x) _VMX_InvRSquared(x)
-
-// FIXME: Change this back to a #define once we get rid of the vec_t version
-FORCEINLINE float VectorNormalize( Vector& v )
-{
- return _VMX_VectorNormalize( v );
-}
-// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s
-FORCEINLINE float VectorNormalize( float *pV )
-{
- return _VMX_VectorNormalize(*(reinterpret_cast<Vector*>(pV)));
-}
-
-// call directly
-FORCEINLINE void VectorNormalizeFast( Vector &vec )
-{
- XMVECTOR xmV = XMVector3LengthEst( XMLoadVector3( vec.Base() ) );
- float den = 1.f / (xmV.x + FLT_EPSILON);
- vec.x *= den;
- vec.y *= den;
- vec.z *= den;
-}
-
-#endif // _X360
-
-
-inline vec_t Vector::NormalizeInPlace()
-{
- return VectorNormalize( *this );
-}
-
-inline Vector Vector::Normalized() const
-{
- Vector norm = *this;
- VectorNormalize( norm );
- return norm;
-}
-
-inline bool Vector::IsLengthGreaterThan( float val ) const
-{
- return LengthSqr() > val*val;
-}
-
-inline bool Vector::IsLengthLessThan( float val ) const
-{
- return LengthSqr() < val*val;
-}
-
-#endif
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef VECTOR_H +#define VECTOR_H + +#ifdef _WIN32 +#pragma once +#endif + +#include <math.h> +#include <float.h> + +// For vec_t, put this somewhere else? +#include "tier0/basetypes.h" + +// For rand(). We really need a library! +#include <stdlib.h> + +#ifndef _X360 +// For MMX intrinsics +#include <xmmintrin.h> +#endif + +#include "tier0/dbg.h" +#include "tier0/threadtools.h" +#include "mathlib/vector2d.h" +#include "mathlib/math_pfns.h" +#include "minmax.h" + +// Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc. +//#define VECTOR_PARANOIA 1 + +// Uncomment this to make sure we don't do anything slow with our vectors +//#define VECTOR_NO_SLOW_OPERATIONS 1 + + +// Used to make certain code easier to read. +#define X_INDEX 0 +#define Y_INDEX 1 +#define Z_INDEX 2 + + +#ifdef VECTOR_PARANOIA +#define CHECK_VALID( _v) Assert( (_v).IsValid() ) +#else +#ifdef GNUC +#define CHECK_VALID( _v) +#else +#define CHECK_VALID( _v) 0 +#endif +#endif + +#define VecToString(v) (static_cast<const char *>(CFmtStr("(%f, %f, %f)", (v).x, (v).y, (v).z))) // ** Note: this generates a temporary, don't hold reference! + +class VectorByValue; + +//========================================================= +// 3D Vector +//========================================================= +class Vector +{ +public: + // Members + vec_t x, y, z; + + // Construction/destruction: + Vector(void); + Vector(vec_t X, vec_t Y, vec_t Z); + explicit Vector(vec_t XYZ); ///< broadcast initialize + + // Initialization + void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f); + // TODO (Ilya): Should there be an init that takes a single float for consistency? + + // Got any nasty NAN's? + bool IsValid() const; + void Invalidate(); + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + // Base address... + vec_t* Base(); + vec_t const* Base() const; + + // Cast to Vector2D... + Vector2D& AsVector2D(); + const Vector2D& AsVector2D() const; + + // Initialization methods + void Random( vec_t minVal, vec_t maxVal ); + inline void Zero(); ///< zero out a vector + + // equality + bool operator==(const Vector& v) const; + bool operator!=(const Vector& v) const; + + // arithmetic operations + FORCEINLINE Vector& operator+=(const Vector &v); + FORCEINLINE Vector& operator-=(const Vector &v); + FORCEINLINE Vector& operator*=(const Vector &v); + FORCEINLINE Vector& operator*=(float s); + FORCEINLINE Vector& operator/=(const Vector &v); + FORCEINLINE Vector& operator/=(float s); + FORCEINLINE Vector& operator+=(float fl) ; ///< broadcast add + FORCEINLINE Vector& operator-=(float fl) ; ///< broadcast sub + +// negate the vector components + void Negate(); + + // Get the vector's magnitude. + inline vec_t Length() const; + + // Get the vector's magnitude squared. + FORCEINLINE vec_t LengthSqr(void) const + { + CHECK_VALID(*this); + return (x*x + y*y + z*z); + } + + // return true if this vector is (0,0,0) within tolerance + bool IsZero( float tolerance = 0.01f ) const + { + return (x > -tolerance && x < tolerance && + y > -tolerance && y < tolerance && + z > -tolerance && z < tolerance); + } + + vec_t NormalizeInPlace(); + Vector Normalized() const; + bool IsLengthGreaterThan( float val ) const; + bool IsLengthLessThan( float val ) const; + + // check if a vector is within the box defined by two other vectors + FORCEINLINE bool WithinAABox( Vector const &boxmin, Vector const &boxmax); + + // Get the distance from this vector to the other one. + vec_t DistTo(const Vector &vOther) const; + + // Get the distance from this vector to the other one squared. + // NJS: note, VC wasn't inlining it correctly in several deeply nested inlines due to being an 'out of line' inline. + // may be able to tidy this up after switching to VC7 + FORCEINLINE vec_t DistToSqr(const Vector &vOther) const + { + Vector delta; + + delta.x = x - vOther.x; + delta.y = y - vOther.y; + delta.z = z - vOther.z; + + return delta.LengthSqr(); + } + + // Copy + void CopyToArray(float* rgfl) const; + + // Multiply, add, and assign to this (ie: *this = a + b * scalar). This + // is about 12% faster than the actual vector equation (because it's done per-component + // rather than per-vector). + void MulAdd(const Vector& a, const Vector& b, float scalar); + + // Dot product. + vec_t Dot(const Vector& vOther) const; + + // assignment + Vector& operator=(const Vector &vOther); + + // 2d + vec_t Length2D(void) const; + vec_t Length2DSqr(void) const; + + operator VectorByValue &() { return *((VectorByValue *)(this)); } + operator const VectorByValue &() const { return *((const VectorByValue *)(this)); } + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // copy constructors +// Vector(const Vector &vOther); + + // arithmetic operations + Vector operator-(void) const; + + Vector operator+(const Vector& v) const; + Vector operator-(const Vector& v) const; + Vector operator*(const Vector& v) const; + Vector operator/(const Vector& v) const; + Vector operator*(float fl) const; + Vector operator/(float fl) const; + + // Cross product between two vectors. + Vector Cross(const Vector &vOther) const; + + // Returns a vector with the min or max in X, Y, and Z. + Vector Min(const Vector &vOther) const; + Vector Max(const Vector &vOther) const; + +#else + +private: + // No copy constructors allowed if we're in optimal mode + Vector(const Vector& vOther); +#endif +}; + +FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); } + + +#define USE_M64S ( ( !defined( _X360 ) ) ) + + + +//========================================================= +// 4D Short Vector (aligned on 8-byte boundary) +//========================================================= +class ALIGN8 ShortVector +{ +public: + + short x, y, z, w; + + // Initialization + void Init(short ix = 0, short iy = 0, short iz = 0, short iw = 0 ); + + +#if USE_M64S + __m64 &AsM64() { return *(__m64*)&x; } + const __m64 &AsM64() const { return *(const __m64*)&x; } +#endif + + // Setter + void Set( const ShortVector& vOther ); + void Set( const short ix, const short iy, const short iz, const short iw ); + + // array access... + short operator[](int i) const; + short& operator[](int i); + + // Base address... + short* Base(); + short const* Base() const; + + // equality + bool operator==(const ShortVector& v) const; + bool operator!=(const ShortVector& v) const; + + // Arithmetic operations + FORCEINLINE ShortVector& operator+=(const ShortVector &v); + FORCEINLINE ShortVector& operator-=(const ShortVector &v); + FORCEINLINE ShortVector& operator*=(const ShortVector &v); + FORCEINLINE ShortVector& operator*=(float s); + FORCEINLINE ShortVector& operator/=(const ShortVector &v); + FORCEINLINE ShortVector& operator/=(float s); + FORCEINLINE ShortVector operator*(float fl) const; + +private: + + // No copy constructors allowed if we're in optimal mode +// ShortVector(ShortVector const& vOther); + + // No assignment operators either... +// ShortVector& operator=( ShortVector const& src ); + +} ALIGN8_POST; + + + + + + +//========================================================= +// 4D Integer Vector +//========================================================= +class IntVector4D +{ +public: + + int x, y, z, w; + + // Initialization + void Init(int ix = 0, int iy = 0, int iz = 0, int iw = 0 ); + +#if USE_M64S + __m64 &AsM64() { return *(__m64*)&x; } + const __m64 &AsM64() const { return *(const __m64*)&x; } +#endif + + // Setter + void Set( const IntVector4D& vOther ); + void Set( const int ix, const int iy, const int iz, const int iw ); + + // array access... + int operator[](int i) const; + int& operator[](int i); + + // Base address... + int* Base(); + int const* Base() const; + + // equality + bool operator==(const IntVector4D& v) const; + bool operator!=(const IntVector4D& v) const; + + // Arithmetic operations + FORCEINLINE IntVector4D& operator+=(const IntVector4D &v); + FORCEINLINE IntVector4D& operator-=(const IntVector4D &v); + FORCEINLINE IntVector4D& operator*=(const IntVector4D &v); + FORCEINLINE IntVector4D& operator*=(float s); + FORCEINLINE IntVector4D& operator/=(const IntVector4D &v); + FORCEINLINE IntVector4D& operator/=(float s); + FORCEINLINE IntVector4D operator*(float fl) const; + +private: + + // No copy constructors allowed if we're in optimal mode + // IntVector4D(IntVector4D const& vOther); + + // No assignment operators either... + // IntVector4D& operator=( IntVector4D const& src ); + +}; + + + +//----------------------------------------------------------------------------- +// Allows us to specifically pass the vector by value when we need to +//----------------------------------------------------------------------------- +class VectorByValue : public Vector +{ +public: + // Construction/destruction: + VectorByValue(void) : Vector() {} + VectorByValue(vec_t X, vec_t Y, vec_t Z) : Vector( X, Y, Z ) {} + VectorByValue(const VectorByValue& vOther) { *this = vOther; } +}; + + +//----------------------------------------------------------------------------- +// Utility to simplify table construction. No constructor means can use +// traditional C-style initialization +//----------------------------------------------------------------------------- +class TableVector +{ +public: + vec_t x, y, z; + + operator Vector &() { return *((Vector *)(this)); } + operator const Vector &() const { return *((const Vector *)(this)); } + + // array access... + inline vec_t& operator[](int i) + { + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; + } + + inline vec_t operator[](int i) const + { + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; + } +}; + + +//----------------------------------------------------------------------------- +// Here's where we add all those lovely SSE optimized routines +//----------------------------------------------------------------------------- + +class ALIGN16 VectorAligned : public Vector +{ +public: + inline VectorAligned(void) {}; + inline VectorAligned(vec_t X, vec_t Y, vec_t Z) + { + Init(X,Y,Z); + } + +#ifdef VECTOR_NO_SLOW_OPERATIONS + +private: + // No copy constructors allowed if we're in optimal mode + VectorAligned(const VectorAligned& vOther); + VectorAligned(const Vector &vOther); + +#else +public: + explicit VectorAligned(const Vector &vOther) + { + Init(vOther.x, vOther.y, vOther.z); + } + + VectorAligned& operator=(const Vector &vOther) + { + Init(vOther.x, vOther.y, vOther.z); + return *this; + } + +#endif + float w; // this space is used anyway +} ALIGN16_POST; + +//----------------------------------------------------------------------------- +// Vector related operations +//----------------------------------------------------------------------------- + +// Vector clear +FORCEINLINE void VectorClear( Vector& a ); + +// Copy +FORCEINLINE void VectorCopy( const Vector& src, Vector& dst ); + +// Vector arithmetic +FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& result ); +FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& result ); +FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& result ); +FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& result ); +FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& result ); +FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& result ); +inline void VectorScale ( const Vector& in, vec_t scale, Vector& result ); +// Don't mark this as inline in its function declaration. That's only necessary on its +// definition, and 'inline' here leads to gcc warnings. +void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest ); + +// Vector equality with tolerance +bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance = 0.0f ); + +#define VectorExpand(v) (v).x, (v).y, (v).z + + +// Normalization +// FIXME: Can't use quite yet +//vec_t VectorNormalize( Vector& v ); + +// Length +inline vec_t VectorLength( const Vector& v ); + +// Dot Product +FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b); + +// Cross product +void CrossProduct(const Vector& a, const Vector& b, Vector& result ); + +// Store the min or max of each of x, y, and z into the result. +void VectorMin( const Vector &a, const Vector &b, Vector &result ); +void VectorMax( const Vector &a, const Vector &b, Vector &result ); + +// Linearly interpolate between two vectors +void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest ); +Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t ); + +FORCEINLINE Vector ReplicateToVector( float x ) +{ + return Vector( x, x, x ); +} + +// check if a point is in the field of a view of an object. supports up to 180 degree fov. +FORCEINLINE bool PointWithinViewAngle( Vector const &vecSrcPosition, + Vector const &vecTargetPosition, + Vector const &vecLookDirection, float flCosHalfFOV ) +{ + Vector vecDelta = vecTargetPosition - vecSrcPosition; + float cosDiff = DotProduct( vecLookDirection, vecDelta ); + + if ( cosDiff < 0 ) + return false; + + float flLen2 = vecDelta.LengthSqr(); + + // a/sqrt(b) > c == a^2 > b * c ^2 + return ( cosDiff * cosDiff > flLen2 * flCosHalfFOV * flCosHalfFOV ); + +} + + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +// Cross product +Vector CrossProduct( const Vector& a, const Vector& b ); + +// Random vector creation +Vector RandomVector( vec_t minVal, vec_t maxVal ); + +#endif + +float RandomVectorInUnitSphere( Vector *pVector ); +float RandomVectorInUnitCircle( Vector2D *pVector ); + + +//----------------------------------------------------------------------------- +// +// Inlined Vector methods +// +//----------------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// constructors +//----------------------------------------------------------------------------- +inline Vector::Vector(void) +{ +#ifdef _DEBUG +#ifdef VECTOR_PARANOIA + // Initialize to NAN to catch errors + x = y = z = VEC_T_NAN; +#endif +#endif +} + +inline Vector::Vector(vec_t X, vec_t Y, vec_t Z) +{ + x = X; y = Y; z = Z; + CHECK_VALID(*this); +} + +inline Vector::Vector(vec_t XYZ) +{ + x = y = z = XYZ; + CHECK_VALID(*this); +} + +//inline Vector::Vector(const float *pFloat) +//{ +// Assert( pFloat ); +// x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; +// CHECK_VALID(*this); +//} + +#if 0 +//----------------------------------------------------------------------------- +// copy constructor +//----------------------------------------------------------------------------- + +inline Vector::Vector(const Vector &vOther) +{ + CHECK_VALID(vOther); + x = vOther.x; y = vOther.y; z = vOther.z; +} +#endif + +//----------------------------------------------------------------------------- +// initialization +//----------------------------------------------------------------------------- + +inline void Vector::Init( vec_t ix, vec_t iy, vec_t iz ) +{ + x = ix; y = iy; z = iz; + CHECK_VALID(*this); +} + +inline void Vector::Random( vec_t minVal, vec_t maxVal ) +{ + x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + CHECK_VALID(*this); +} + +// This should really be a single opcode on the PowerPC (move r0 onto the vec reg) +inline void Vector::Zero() +{ + x = y = z = 0.0f; +} + +inline void VectorClear( Vector& a ) +{ + a.x = a.y = a.z = 0.0f; +} + +//----------------------------------------------------------------------------- +// assignment +//----------------------------------------------------------------------------- + +inline Vector& Vector::operator=(const Vector &vOther) +{ + CHECK_VALID(vOther); + x=vOther.x; y=vOther.y; z=vOther.z; + return *this; +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline vec_t& Vector::operator[](int i) +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + +inline vec_t Vector::operator[](int i) const +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- +inline vec_t* Vector::Base() +{ + return (vec_t*)this; +} + +inline vec_t const* Vector::Base() const +{ + return (vec_t const*)this; +} + +//----------------------------------------------------------------------------- +// Cast to Vector2D... +//----------------------------------------------------------------------------- + +inline Vector2D& Vector::AsVector2D() +{ + return *(Vector2D*)this; +} + +inline const Vector2D& Vector::AsVector2D() const +{ + return *(const Vector2D*)this; +} + +//----------------------------------------------------------------------------- +// IsValid? +//----------------------------------------------------------------------------- + +inline bool Vector::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z); +} + +//----------------------------------------------------------------------------- +// Invalidate +//----------------------------------------------------------------------------- + +inline void Vector::Invalidate() +{ +//#ifdef _DEBUG +//#ifdef VECTOR_PARANOIA + x = y = z = VEC_T_NAN; +//#endif +//#endif +} + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool Vector::operator==( const Vector& src ) const +{ + CHECK_VALID(src); + CHECK_VALID(*this); + return (src.x == x) && (src.y == y) && (src.z == z); +} + +inline bool Vector::operator!=( const Vector& src ) const +{ + CHECK_VALID(src); + CHECK_VALID(*this); + return (src.x != x) || (src.y != y) || (src.z != z); +} + + +//----------------------------------------------------------------------------- +// Copy +//----------------------------------------------------------------------------- + +FORCEINLINE void VectorCopy( const Vector& src, Vector& dst ) +{ + CHECK_VALID(src); + dst.x = src.x; + dst.y = src.y; + dst.z = src.z; +} + +inline void Vector::CopyToArray(float* rgfl) const +{ + Assert( rgfl ); + CHECK_VALID(*this); + rgfl[0] = x, rgfl[1] = y, rgfl[2] = z; +} + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- +// #pragma message("TODO: these should be SSE") + +inline void Vector::Negate() +{ + CHECK_VALID(*this); + x = -x; y = -y; z = -z; +} + +FORCEINLINE Vector& Vector::operator+=(const Vector& v) +{ + CHECK_VALID(*this); + CHECK_VALID(v); + x+=v.x; y+=v.y; z += v.z; + return *this; +} + +FORCEINLINE Vector& Vector::operator-=(const Vector& v) +{ + CHECK_VALID(*this); + CHECK_VALID(v); + x-=v.x; y-=v.y; z -= v.z; + return *this; +} + +FORCEINLINE Vector& Vector::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + CHECK_VALID(*this); + return *this; +} + +FORCEINLINE Vector& Vector::operator*=(const Vector& v) +{ + CHECK_VALID(v); + x *= v.x; + y *= v.y; + z *= v.z; + CHECK_VALID(*this); + return *this; +} + +// this ought to be an opcode. +FORCEINLINE Vector& Vector::operator+=(float fl) +{ + x += fl; + y += fl; + z += fl; + CHECK_VALID(*this); + return *this; +} + +FORCEINLINE Vector& Vector::operator-=(float fl) +{ + x -= fl; + y -= fl; + z -= fl; + CHECK_VALID(*this); + return *this; +} + + + +FORCEINLINE Vector& Vector::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + CHECK_VALID(*this); + return *this; +} + +FORCEINLINE Vector& Vector::operator/=(const Vector& v) +{ + CHECK_VALID(v); + Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f ); + x /= v.x; + y /= v.y; + z /= v.z; + CHECK_VALID(*this); + return *this; +} + + + +//----------------------------------------------------------------------------- +// +// Inlined Short Vector methods +// +//----------------------------------------------------------------------------- + + +inline void ShortVector::Init( short ix, short iy, short iz, short iw ) +{ + x = ix; y = iy; z = iz; w = iw; +} + +FORCEINLINE void ShortVector::Set( const ShortVector& vOther ) +{ + x = vOther.x; + y = vOther.y; + z = vOther.z; + w = vOther.w; +} + +FORCEINLINE void ShortVector::Set( const short ix, const short iy, const short iz, const short iw ) +{ + x = ix; + y = iy; + z = iz; + w = iw; +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline short ShortVector::operator[](int i) const +{ + Assert( (i >= 0) && (i < 4) ); + return ((short*)this)[i]; +} + +inline short& ShortVector::operator[](int i) +{ + Assert( (i >= 0) && (i < 4) ); + return ((short*)this)[i]; +} + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- +inline short* ShortVector::Base() +{ + return (short*)this; +} + +inline short const* ShortVector::Base() const +{ + return (short const*)this; +} + + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool ShortVector::operator==( const ShortVector& src ) const +{ + return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); +} + +inline bool ShortVector::operator!=( const ShortVector& src ) const +{ + return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); +} + + + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- + +FORCEINLINE ShortVector& ShortVector::operator+=(const ShortVector& v) +{ + x+=v.x; y+=v.y; z += v.z; w += v.w; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator-=(const ShortVector& v) +{ + x-=v.x; y-=v.y; z -= v.z; w -= v.w; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + w *= fl; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator*=(const ShortVector& v) +{ + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + w *= oofl; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator/=(const ShortVector& v) +{ + Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 ); + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + return *this; +} + +FORCEINLINE void ShortVectorMultiply( const ShortVector& src, float fl, ShortVector& res ) +{ + Assert( IsFinite(fl) ); + res.x = src.x * fl; + res.y = src.y * fl; + res.z = src.z * fl; + res.w = src.w * fl; +} + +FORCEINLINE ShortVector ShortVector::operator*(float fl) const +{ + ShortVector res; + ShortVectorMultiply( *this, fl, res ); + return res; +} + + + + + + +//----------------------------------------------------------------------------- +// +// Inlined Integer Vector methods +// +//----------------------------------------------------------------------------- + + +inline void IntVector4D::Init( int ix, int iy, int iz, int iw ) +{ + x = ix; y = iy; z = iz; w = iw; +} + +FORCEINLINE void IntVector4D::Set( const IntVector4D& vOther ) +{ + x = vOther.x; + y = vOther.y; + z = vOther.z; + w = vOther.w; +} + +FORCEINLINE void IntVector4D::Set( const int ix, const int iy, const int iz, const int iw ) +{ + x = ix; + y = iy; + z = iz; + w = iw; +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline int IntVector4D::operator[](int i) const +{ + Assert( (i >= 0) && (i < 4) ); + return ((int*)this)[i]; +} + +inline int& IntVector4D::operator[](int i) +{ + Assert( (i >= 0) && (i < 4) ); + return ((int*)this)[i]; +} + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- +inline int* IntVector4D::Base() +{ + return (int*)this; +} + +inline int const* IntVector4D::Base() const +{ + return (int const*)this; +} + + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool IntVector4D::operator==( const IntVector4D& src ) const +{ + return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); +} + +inline bool IntVector4D::operator!=( const IntVector4D& src ) const +{ + return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); +} + + + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- + +FORCEINLINE IntVector4D& IntVector4D::operator+=(const IntVector4D& v) +{ + x+=v.x; y+=v.y; z += v.z; w += v.w; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator-=(const IntVector4D& v) +{ + x-=v.x; y-=v.y; z -= v.z; w -= v.w; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + w *= fl; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator*=(const IntVector4D& v) +{ + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + w *= oofl; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator/=(const IntVector4D& v) +{ + Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 ); + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + return *this; +} + +FORCEINLINE void IntVector4DMultiply( const IntVector4D& src, float fl, IntVector4D& res ) +{ + Assert( IsFinite(fl) ); + res.x = src.x * fl; + res.y = src.y * fl; + res.z = src.z * fl; + res.w = src.w * fl; +} + +FORCEINLINE IntVector4D IntVector4D::operator*(float fl) const +{ + IntVector4D res; + IntVector4DMultiply( *this, fl, res ); + return res; +} + + + +// ======================= + + +FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& c ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + c.x = a.x + b.x; + c.y = a.y + b.y; + c.z = a.z + b.z; +} + +FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& c ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + c.x = a.x - b.x; + c.y = a.y - b.y; + c.z = a.z - b.z; +} + +FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& c ) +{ + CHECK_VALID(a); + Assert( IsFinite(b) ); + c.x = a.x * b; + c.y = a.y * b; + c.z = a.z * b; +} + +FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& c ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + c.x = a.x * b.x; + c.y = a.y * b.y; + c.z = a.z * b.z; +} + +// for backwards compatability +inline void VectorScale ( const Vector& in, vec_t scale, Vector& result ) +{ + VectorMultiply( in, scale, result ); +} + + +FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& c ) +{ + CHECK_VALID(a); + Assert( b != 0.0f ); + vec_t oob = 1.0f / b; + c.x = a.x * oob; + c.y = a.y * oob; + c.z = a.z * oob; +} + +FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& c ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) ); + c.x = a.x / b.x; + c.y = a.y / b.y; + c.z = a.z / b.z; +} + +// FIXME: Remove +// For backwards compatability +inline void Vector::MulAdd(const Vector& a, const Vector& b, float scalar) +{ + CHECK_VALID(a); + CHECK_VALID(b); + x = a.x + b.x * scalar; + y = a.y + b.y * scalar; + z = a.z + b.z * scalar; +} + +inline void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest ) +{ + CHECK_VALID(src1); + CHECK_VALID(src2); + dest.x = src1.x + (src2.x - src1.x) * t; + dest.y = src1.y + (src2.y - src1.y) * t; + dest.z = src1.z + (src2.z - src1.z) * t; +} + +inline Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t ) +{ + Vector result; + VectorLerp( src1, src2, t, result ); + return result; +} + +//----------------------------------------------------------------------------- +// Temporary storage for vector results so const Vector& results can be returned +//----------------------------------------------------------------------------- +inline Vector &AllocTempVector() +{ + static Vector s_vecTemp[128]; + static CInterlockedInt s_nIndex; + + int nIndex; + for (;;) + { + int nOldIndex = s_nIndex; + nIndex = ( (nOldIndex + 0x10001) & 0x7F ); + + if ( s_nIndex.AssignIf( nOldIndex, nIndex ) ) + { + break; + } + ThreadPause(); + } + return s_vecTemp[nIndex & 0xffff]; +} + + + +//----------------------------------------------------------------------------- +// dot, cross +//----------------------------------------------------------------------------- +FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b) +{ + CHECK_VALID(a); + CHECK_VALID(b); + return( a.x*b.x + a.y*b.y + a.z*b.z ); +} + +// for backwards compatability +inline vec_t Vector::Dot( const Vector& vOther ) const +{ + CHECK_VALID(vOther); + return DotProduct( *this, vOther ); +} + +inline void CrossProduct(const Vector& a, const Vector& b, Vector& result ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + Assert( &a != &result ); + Assert( &b != &result ); + result.x = a.y*b.z - a.z*b.y; + result.y = a.z*b.x - a.x*b.z; + result.z = a.x*b.y - a.y*b.x; +} + +inline vec_t DotProductAbs( const Vector &v0, const Vector &v1 ) +{ + CHECK_VALID(v0); + CHECK_VALID(v1); + return FloatMakePositive(v0.x*v1.x) + FloatMakePositive(v0.y*v1.y) + FloatMakePositive(v0.z*v1.z); +} + +inline vec_t DotProductAbs( const Vector &v0, const float *v1 ) +{ + return FloatMakePositive(v0.x * v1[0]) + FloatMakePositive(v0.y * v1[1]) + FloatMakePositive(v0.z * v1[2]); +} + +//----------------------------------------------------------------------------- +// length +//----------------------------------------------------------------------------- + +inline vec_t VectorLength( const Vector& v ) +{ + CHECK_VALID(v); + return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z); +} + + +inline vec_t Vector::Length(void) const +{ + CHECK_VALID(*this); + return VectorLength( *this ); +} + + +//----------------------------------------------------------------------------- +// Normalization +//----------------------------------------------------------------------------- + +/* +// FIXME: Can't use until we're un-macroed in mathlib.h +inline vec_t VectorNormalize( Vector& v ) +{ + Assert( v.IsValid() ); + vec_t l = v.Length(); + if (l != 0.0f) + { + v /= l; + } + else + { + // FIXME: + // Just copying the existing implemenation; shouldn't res.z == 0? + v.x = v.y = 0.0f; v.z = 1.0f; + } + return l; +} +*/ + + +// check a point against a box +bool Vector::WithinAABox( Vector const &boxmin, Vector const &boxmax) +{ + return ( + ( x >= boxmin.x ) && ( x <= boxmax.x) && + ( y >= boxmin.y ) && ( y <= boxmax.y) && + ( z >= boxmin.z ) && ( z <= boxmax.z) + ); +} + +//----------------------------------------------------------------------------- +// Get the distance from this vector to the other one +//----------------------------------------------------------------------------- +inline vec_t Vector::DistTo(const Vector &vOther) const +{ + Vector delta; + VectorSubtract( *this, vOther, delta ); + return delta.Length(); +} + + +//----------------------------------------------------------------------------- +// Vector equality with tolerance +//----------------------------------------------------------------------------- +inline bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance ) +{ + if (FloatMakePositive(src1.x - src2.x) > tolerance) + return false; + if (FloatMakePositive(src1.y - src2.y) > tolerance) + return false; + return (FloatMakePositive(src1.z - src2.z) <= tolerance); +} + + +//----------------------------------------------------------------------------- +// Computes the closest point to vecTarget no farther than flMaxDist from vecStart +//----------------------------------------------------------------------------- +inline void ComputeClosestPoint( const Vector& vecStart, float flMaxDist, const Vector& vecTarget, Vector *pResult ) +{ + Vector vecDelta; + VectorSubtract( vecTarget, vecStart, vecDelta ); + float flDistSqr = vecDelta.LengthSqr(); + if ( flDistSqr <= flMaxDist * flMaxDist ) + { + *pResult = vecTarget; + } + else + { + vecDelta /= FastSqrt( flDistSqr ); + VectorMA( vecStart, flMaxDist, vecDelta, *pResult ); + } +} + + +//----------------------------------------------------------------------------- +// Takes the absolute value of a vector +//----------------------------------------------------------------------------- +inline void VectorAbs( const Vector& src, Vector& dst ) +{ + dst.x = FloatMakePositive(src.x); + dst.y = FloatMakePositive(src.y); + dst.z = FloatMakePositive(src.z); +} + + +//----------------------------------------------------------------------------- +// +// Slow methods +// +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +//----------------------------------------------------------------------------- +// Returns a vector with the min or max in X, Y, and Z. +//----------------------------------------------------------------------------- +inline Vector Vector::Min(const Vector &vOther) const +{ + return Vector(x < vOther.x ? x : vOther.x, + y < vOther.y ? y : vOther.y, + z < vOther.z ? z : vOther.z); +} + +inline Vector Vector::Max(const Vector &vOther) const +{ + return Vector(x > vOther.x ? x : vOther.x, + y > vOther.y ? y : vOther.y, + z > vOther.z ? z : vOther.z); +} + + +//----------------------------------------------------------------------------- +// arithmetic operations +//----------------------------------------------------------------------------- + +inline Vector Vector::operator-(void) const +{ + return Vector(-x,-y,-z); +} + +inline Vector Vector::operator+(const Vector& v) const +{ + Vector res; + VectorAdd( *this, v, res ); + return res; +} + +inline Vector Vector::operator-(const Vector& v) const +{ + Vector res; + VectorSubtract( *this, v, res ); + return res; +} + +inline Vector Vector::operator*(float fl) const +{ + Vector res; + VectorMultiply( *this, fl, res ); + return res; +} + +inline Vector Vector::operator*(const Vector& v) const +{ + Vector res; + VectorMultiply( *this, v, res ); + return res; +} + +inline Vector Vector::operator/(float fl) const +{ + Vector res; + VectorDivide( *this, fl, res ); + return res; +} + +inline Vector Vector::operator/(const Vector& v) const +{ + Vector res; + VectorDivide( *this, v, res ); + return res; +} + +inline Vector operator*(float fl, const Vector& v) +{ + return v * fl; +} + +//----------------------------------------------------------------------------- +// cross product +//----------------------------------------------------------------------------- + +inline Vector Vector::Cross(const Vector& vOther) const +{ + Vector res; + CrossProduct( *this, vOther, res ); + return res; +} + +//----------------------------------------------------------------------------- +// 2D +//----------------------------------------------------------------------------- + +inline vec_t Vector::Length2D(void) const +{ + return (vec_t)FastSqrt(x*x + y*y); +} + +inline vec_t Vector::Length2DSqr(void) const +{ + return (x*x + y*y); +} + +inline Vector CrossProduct(const Vector& a, const Vector& b) +{ + return Vector( a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x ); +} + +inline void VectorMin( const Vector &a, const Vector &b, Vector &result ) +{ + result.x = fpmin(a.x, b.x); + result.y = fpmin(a.y, b.y); + result.z = fpmin(a.z, b.z); +} + +inline void VectorMax( const Vector &a, const Vector &b, Vector &result ) +{ + result.x = fpmax(a.x, b.x); + result.y = fpmax(a.y, b.y); + result.z = fpmax(a.z, b.z); +} + +inline float ComputeVolume( const Vector &vecMins, const Vector &vecMaxs ) +{ + Vector vecDelta; + VectorSubtract( vecMaxs, vecMins, vecDelta ); + return DotProduct( vecDelta, vecDelta ); +} + +// Get a random vector. +inline Vector RandomVector( float minVal, float maxVal ) +{ + Vector random; + random.Random( minVal, maxVal ); + return random; +} + +#endif //slow + +//----------------------------------------------------------------------------- +// Helper debugging stuff.... +//----------------------------------------------------------------------------- + +inline bool operator==( float const* f, const Vector& v ) +{ + // AIIIEEEE!!!! + Assert(0); + return false; +} + +inline bool operator==( const Vector& v, float const* f ) +{ + // AIIIEEEE!!!! + Assert(0); + return false; +} + +inline bool operator!=( float const* f, const Vector& v ) +{ + // AIIIEEEE!!!! + Assert(0); + return false; +} + +inline bool operator!=( const Vector& v, float const* f ) +{ + // AIIIEEEE!!!! + Assert(0); + return false; +} + + +//----------------------------------------------------------------------------- +// AngularImpulse +//----------------------------------------------------------------------------- +// AngularImpulse are exponetial maps (an axis scaled by a "twist" angle in degrees) +typedef Vector AngularImpulse; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline AngularImpulse RandomAngularImpulse( float minVal, float maxVal ) +{ + AngularImpulse angImp; + angImp.Random( minVal, maxVal ); + return angImp; +} + +#endif + + +//----------------------------------------------------------------------------- +// Quaternion +//----------------------------------------------------------------------------- + +class RadianEuler; + +class Quaternion // same data-layout as engine's vec4_t, +{ // which is a vec_t[4] +public: + inline Quaternion(void) { + + // Initialize to NAN to catch errors +#ifdef _DEBUG +#ifdef VECTOR_PARANOIA + x = y = z = w = VEC_T_NAN; +#endif +#endif + } + inline Quaternion(vec_t ix, vec_t iy, vec_t iz, vec_t iw) : x(ix), y(iy), z(iz), w(iw) { } + inline Quaternion(RadianEuler const &angle); // evil auto type promotion!!! + + inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f) { x = ix; y = iy; z = iz; w = iw; } + + bool IsValid() const; + void Invalidate(); + + bool operator==( const Quaternion &src ) const; + bool operator!=( const Quaternion &src ) const; + + vec_t* Base() { return (vec_t*)this; } + const vec_t* Base() const { return (vec_t*)this; } + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + vec_t x, y, z, w; +}; + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline vec_t& Quaternion::operator[](int i) +{ + Assert( (i >= 0) && (i < 4) ); + return ((vec_t*)this)[i]; +} + +inline vec_t Quaternion::operator[](int i) const +{ + Assert( (i >= 0) && (i < 4) ); + return ((vec_t*)this)[i]; +} + + +//----------------------------------------------------------------------------- +// Equality test +//----------------------------------------------------------------------------- +inline bool Quaternion::operator==( const Quaternion &src ) const +{ + return ( x == src.x ) && ( y == src.y ) && ( z == src.z ) && ( w == src.w ); +} + +inline bool Quaternion::operator!=( const Quaternion &src ) const +{ + return !operator==( src ); +} + + +//----------------------------------------------------------------------------- +// Quaternion equality with tolerance +//----------------------------------------------------------------------------- +inline bool QuaternionsAreEqual( const Quaternion& src1, const Quaternion& src2, float tolerance ) +{ + if (FloatMakePositive(src1.x - src2.x) > tolerance) + return false; + if (FloatMakePositive(src1.y - src2.y) > tolerance) + return false; + if (FloatMakePositive(src1.z - src2.z) > tolerance) + return false; + return (FloatMakePositive(src1.w - src2.w) <= tolerance); +} + + +//----------------------------------------------------------------------------- +// Here's where we add all those lovely SSE optimized routines +//----------------------------------------------------------------------------- +class ALIGN16 QuaternionAligned : public Quaternion +{ +public: + inline QuaternionAligned(void) {}; + inline QuaternionAligned(vec_t X, vec_t Y, vec_t Z, vec_t W) + { + Init(X,Y,Z,W); + } + +#ifdef VECTOR_NO_SLOW_OPERATIONS + +private: + // No copy constructors allowed if we're in optimal mode + QuaternionAligned(const QuaternionAligned& vOther); + QuaternionAligned(const Quaternion &vOther); + +#else +public: + explicit QuaternionAligned(const Quaternion &vOther) + { + Init(vOther.x, vOther.y, vOther.z, vOther.w); + } + + QuaternionAligned& operator=(const Quaternion &vOther) + { + Init(vOther.x, vOther.y, vOther.z, vOther.w); + return *this; + } + +#endif +} ALIGN16_POST; + + +//----------------------------------------------------------------------------- +// Radian Euler angle aligned to axis (NOT ROLL/PITCH/YAW) +//----------------------------------------------------------------------------- +class QAngle; +class RadianEuler +{ +public: + inline RadianEuler(void) { } + inline RadianEuler(vec_t X, vec_t Y, vec_t Z) { x = X; y = Y; z = Z; } + inline RadianEuler(Quaternion const &q); // evil auto type promotion!!! + inline RadianEuler(QAngle const &angles); // evil auto type promotion!!! + + // Initialization + inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f) { x = ix; y = iy; z = iz; } + + // conversion to qangle + QAngle ToQAngle( void ) const; + bool IsValid() const; + void Invalidate(); + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + vec_t x, y, z; +}; + + +extern void AngleQuaternion( RadianEuler const &angles, Quaternion &qt ); +extern void QuaternionAngles( Quaternion const &q, RadianEuler &angles ); + +FORCEINLINE void NetworkVarConstruct( Quaternion &q ) { q.x = q.y = q.z = q.w = 0.0f; } + +inline Quaternion::Quaternion(RadianEuler const &angle) +{ + AngleQuaternion( angle, *this ); +} + +inline bool Quaternion::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w); +} + +inline void Quaternion::Invalidate() +{ +//#ifdef _DEBUG +//#ifdef VECTOR_PARANOIA + x = y = z = w = VEC_T_NAN; +//#endif +//#endif +} + +inline RadianEuler::RadianEuler(Quaternion const &q) +{ + QuaternionAngles( q, *this ); +} + +inline void VectorCopy( RadianEuler const& src, RadianEuler &dst ) +{ + CHECK_VALID(src); + dst.x = src.x; + dst.y = src.y; + dst.z = src.z; +} + +inline void VectorScale( RadianEuler const& src, float b, RadianEuler &dst ) +{ + CHECK_VALID(src); + Assert( IsFinite(b) ); + dst.x = src.x * b; + dst.y = src.y * b; + dst.z = src.z * b; +} + +inline bool RadianEuler::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z); +} + +inline void RadianEuler::Invalidate() +{ +//#ifdef _DEBUG +//#ifdef VECTOR_PARANOIA + x = y = z = VEC_T_NAN; +//#endif +//#endif +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline vec_t& RadianEuler::operator[](int i) +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + +inline vec_t RadianEuler::operator[](int i) const +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + + +//----------------------------------------------------------------------------- +// Degree Euler QAngle pitch, yaw, roll +//----------------------------------------------------------------------------- +class QAngleByValue; + +class QAngle +{ +public: + // Members + vec_t x, y, z; + + // Construction/destruction + QAngle(void); + QAngle(vec_t X, vec_t Y, vec_t Z); +// QAngle(RadianEuler const &angles); // evil auto type promotion!!! + + // Allow pass-by-value + operator QAngleByValue &() { return *((QAngleByValue *)(this)); } + operator const QAngleByValue &() const { return *((const QAngleByValue *)(this)); } + + // Initialization + void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f); + void Random( vec_t minVal, vec_t maxVal ); + + // Got any nasty NAN's? + bool IsValid() const; + void Invalidate(); + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + // Base address... + vec_t* Base(); + vec_t const* Base() const; + + // equality + bool operator==(const QAngle& v) const; + bool operator!=(const QAngle& v) const; + + // arithmetic operations + QAngle& operator+=(const QAngle &v); + QAngle& operator-=(const QAngle &v); + QAngle& operator*=(float s); + QAngle& operator/=(float s); + + // Get the vector's magnitude. + vec_t Length() const; + vec_t LengthSqr() const; + + // negate the QAngle components + //void Negate(); + + // No assignment operators either... + QAngle& operator=( const QAngle& src ); + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // copy constructors + + // arithmetic operations + QAngle operator-(void) const; + + QAngle operator+(const QAngle& v) const; + QAngle operator-(const QAngle& v) const; + QAngle operator*(float fl) const; + QAngle operator/(float fl) const; +#else + +private: + // No copy constructors allowed if we're in optimal mode + QAngle(const QAngle& vOther); + +#endif +}; + +FORCEINLINE void NetworkVarConstruct( QAngle &q ) { q.x = q.y = q.z = 0.0f; } + +//----------------------------------------------------------------------------- +// Allows us to specifically pass the vector by value when we need to +//----------------------------------------------------------------------------- +class QAngleByValue : public QAngle +{ +public: + // Construction/destruction: + QAngleByValue(void) : QAngle() {} + QAngleByValue(vec_t X, vec_t Y, vec_t Z) : QAngle( X, Y, Z ) {} + QAngleByValue(const QAngleByValue& vOther) { *this = vOther; } +}; + + +inline void VectorAdd( const QAngle& a, const QAngle& b, QAngle& result ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + result.x = a.x + b.x; + result.y = a.y + b.y; + result.z = a.z + b.z; +} + +inline void VectorMA( const QAngle &start, float scale, const QAngle &direction, QAngle &dest ) +{ + CHECK_VALID(start); + CHECK_VALID(direction); + dest.x = start.x + scale * direction.x; + dest.y = start.y + scale * direction.y; + dest.z = start.z + scale * direction.z; +} + + +//----------------------------------------------------------------------------- +// constructors +//----------------------------------------------------------------------------- +inline QAngle::QAngle(void) +{ +#ifdef _DEBUG +#ifdef VECTOR_PARANOIA + // Initialize to NAN to catch errors + x = y = z = VEC_T_NAN; +#endif +#endif +} + +inline QAngle::QAngle(vec_t X, vec_t Y, vec_t Z) +{ + x = X; y = Y; z = Z; + CHECK_VALID(*this); +} + + +//----------------------------------------------------------------------------- +// initialization +//----------------------------------------------------------------------------- +inline void QAngle::Init( vec_t ix, vec_t iy, vec_t iz ) +{ + x = ix; y = iy; z = iz; + CHECK_VALID(*this); +} + +inline void QAngle::Random( vec_t minVal, vec_t maxVal ) +{ + x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + CHECK_VALID(*this); +} + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline QAngle RandomAngle( float minVal, float maxVal ) +{ + Vector random; + random.Random( minVal, maxVal ); + QAngle ret( random.x, random.y, random.z ); + return ret; +} + +#endif + + +inline RadianEuler::RadianEuler(QAngle const &angles) +{ + Init( + angles.z * 3.14159265358979323846f / 180.f, + angles.x * 3.14159265358979323846f / 180.f, + angles.y * 3.14159265358979323846f / 180.f ); +} + + + + +inline QAngle RadianEuler::ToQAngle( void) const +{ + return QAngle( + y * 180.f / 3.14159265358979323846f, + z * 180.f / 3.14159265358979323846f, + x * 180.f / 3.14159265358979323846f ); +} + + +//----------------------------------------------------------------------------- +// assignment +//----------------------------------------------------------------------------- +inline QAngle& QAngle::operator=(const QAngle &vOther) +{ + CHECK_VALID(vOther); + x=vOther.x; y=vOther.y; z=vOther.z; + return *this; +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline vec_t& QAngle::operator[](int i) +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + +inline vec_t QAngle::operator[](int i) const +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- +inline vec_t* QAngle::Base() +{ + return (vec_t*)this; +} + +inline vec_t const* QAngle::Base() const +{ + return (vec_t const*)this; +} + + +//----------------------------------------------------------------------------- +// IsValid? +//----------------------------------------------------------------------------- +inline bool QAngle::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z); +} + +//----------------------------------------------------------------------------- +// Invalidate +//----------------------------------------------------------------------------- + +inline void QAngle::Invalidate() +{ +//#ifdef _DEBUG +//#ifdef VECTOR_PARANOIA + x = y = z = VEC_T_NAN; +//#endif +//#endif +} + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- +inline bool QAngle::operator==( const QAngle& src ) const +{ + CHECK_VALID(src); + CHECK_VALID(*this); + return (src.x == x) && (src.y == y) && (src.z == z); +} + +inline bool QAngle::operator!=( const QAngle& src ) const +{ + CHECK_VALID(src); + CHECK_VALID(*this); + return (src.x != x) || (src.y != y) || (src.z != z); +} + + +//----------------------------------------------------------------------------- +// Copy +//----------------------------------------------------------------------------- +inline void VectorCopy( const QAngle& src, QAngle& dst ) +{ + CHECK_VALID(src); + dst.x = src.x; + dst.y = src.y; + dst.z = src.z; +} + + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- +inline QAngle& QAngle::operator+=(const QAngle& v) +{ + CHECK_VALID(*this); + CHECK_VALID(v); + x+=v.x; y+=v.y; z += v.z; + return *this; +} + +inline QAngle& QAngle::operator-=(const QAngle& v) +{ + CHECK_VALID(*this); + CHECK_VALID(v); + x-=v.x; y-=v.y; z -= v.z; + return *this; +} + +inline QAngle& QAngle::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + CHECK_VALID(*this); + return *this; +} + +inline QAngle& QAngle::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + CHECK_VALID(*this); + return *this; +} + + +//----------------------------------------------------------------------------- +// length +//----------------------------------------------------------------------------- +inline vec_t QAngle::Length( ) const +{ + CHECK_VALID(*this); + return (vec_t)FastSqrt( LengthSqr( ) ); +} + + +inline vec_t QAngle::LengthSqr( ) const +{ + CHECK_VALID(*this); + return x * x + y * y + z * z; +} + + +//----------------------------------------------------------------------------- +// Vector equality with tolerance +//----------------------------------------------------------------------------- +inline bool QAnglesAreEqual( const QAngle& src1, const QAngle& src2, float tolerance = 0.0f ) +{ + if (FloatMakePositive(src1.x - src2.x) > tolerance) + return false; + if (FloatMakePositive(src1.y - src2.y) > tolerance) + return false; + return (FloatMakePositive(src1.z - src2.z) <= tolerance); +} + + +//----------------------------------------------------------------------------- +// arithmetic operations (SLOW!!) +//----------------------------------------------------------------------------- +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline QAngle QAngle::operator-(void) const +{ + QAngle ret(-x,-y,-z); + return ret; +} + +inline QAngle QAngle::operator+(const QAngle& v) const +{ + QAngle res; + res.x = x + v.x; + res.y = y + v.y; + res.z = z + v.z; + return res; +} + +inline QAngle QAngle::operator-(const QAngle& v) const +{ + QAngle res; + res.x = x - v.x; + res.y = y - v.y; + res.z = z - v.z; + return res; +} + +inline QAngle QAngle::operator*(float fl) const +{ + QAngle res; + res.x = x * fl; + res.y = y * fl; + res.z = z * fl; + return res; +} + +inline QAngle QAngle::operator/(float fl) const +{ + QAngle res; + res.x = x / fl; + res.y = y / fl; + res.z = z / fl; + return res; +} + +inline QAngle operator*(float fl, const QAngle& v) +{ + QAngle ret( v * fl ); + return ret; +} + +#endif // VECTOR_NO_SLOW_OPERATIONS + + +//----------------------------------------------------------------------------- +// NOTE: These are not completely correct. The representations are not equivalent +// unless the QAngle represents a rotational impulse along a coordinate axis (x,y,z) +inline void QAngleToAngularImpulse( const QAngle &angles, AngularImpulse &impulse ) +{ + impulse.x = angles.z; + impulse.y = angles.x; + impulse.z = angles.y; +} + +inline void AngularImpulseToQAngle( const AngularImpulse &impulse, QAngle &angles ) +{ + angles.x = impulse.y; + angles.y = impulse.z; + angles.z = impulse.x; +} + +#if !defined( _X360 ) + +FORCEINLINE vec_t InvRSquared( float const *v ) +{ +#if defined(__i386__) || defined(_M_IX86) + float sqrlen = v[0]*v[0]+v[1]*v[1]+v[2]*v[2] + 1.0e-10f, result; + _mm_store_ss(&result, _mm_rcp_ss( _mm_max_ss( _mm_set_ss(1.0f), _mm_load_ss(&sqrlen) ) )); + return result; +#else + return 1.f/fpmax(1.f, v[0]*v[0]+v[1]*v[1]+v[2]*v[2]); +#endif +} + +FORCEINLINE vec_t InvRSquared( const Vector &v ) +{ + return InvRSquared(&v.x); +} + +#if defined(__i386__) || defined(_M_IX86) +inline void _SSE_RSqrtInline( float a, float* out ) +{ + __m128 xx = _mm_load_ss( &a ); + __m128 xr = _mm_rsqrt_ss( xx ); + __m128 xt; + xt = _mm_mul_ss( xr, xr ); + xt = _mm_mul_ss( xt, xx ); + xt = _mm_sub_ss( _mm_set_ss(3.f), xt ); + xt = _mm_mul_ss( xt, _mm_set_ss(0.5f) ); + xr = _mm_mul_ss( xr, xt ); + _mm_store_ss( out, xr ); +} +#endif + +// FIXME: Change this back to a #define once we get rid of the vec_t version +FORCEINLINE float VectorNormalize( Vector& vec ) +{ +#ifndef DEBUG // stop crashing my edit-and-continue! + #if defined(__i386__) || defined(_M_IX86) + #define DO_SSE_OPTIMIZATION + #endif +#endif + +#if defined( DO_SSE_OPTIMIZATION ) + float sqrlen = vec.LengthSqr() + 1.0e-10f, invlen; + _SSE_RSqrtInline(sqrlen, &invlen); + vec.x *= invlen; + vec.y *= invlen; + vec.z *= invlen; + return sqrlen * invlen; +#else + extern float (FASTCALL *pfVectorNormalize)(Vector& v); + return (*pfVectorNormalize)(vec); +#endif +} + +// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s +FORCEINLINE float VectorNormalize( float * v ) +{ + return VectorNormalize(*(reinterpret_cast<Vector *>(v))); +} + +FORCEINLINE void VectorNormalizeFast( Vector &vec ) +{ + VectorNormalize(vec); +} + +#else + +FORCEINLINE float _VMX_InvRSquared( const Vector &v ) +{ + XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) ); + xmV = XMVector3Dot( xmV, xmV ); + return xmV.x; +} + +// call directly +FORCEINLINE float _VMX_VectorNormalize( Vector &vec ) +{ + float mag = XMVector3Length( XMLoadVector3( vec.Base() ) ).x; + float den = 1.f / (mag + FLT_EPSILON ); + vec.x *= den; + vec.y *= den; + vec.z *= den; + return mag; +} + +#define InvRSquared(x) _VMX_InvRSquared(x) + +// FIXME: Change this back to a #define once we get rid of the vec_t version +FORCEINLINE float VectorNormalize( Vector& v ) +{ + return _VMX_VectorNormalize( v ); +} +// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s +FORCEINLINE float VectorNormalize( float *pV ) +{ + return _VMX_VectorNormalize(*(reinterpret_cast<Vector*>(pV))); +} + +// call directly +FORCEINLINE void VectorNormalizeFast( Vector &vec ) +{ + XMVECTOR xmV = XMVector3LengthEst( XMLoadVector3( vec.Base() ) ); + float den = 1.f / (xmV.x + FLT_EPSILON); + vec.x *= den; + vec.y *= den; + vec.z *= den; +} + +#endif // _X360 + + +inline vec_t Vector::NormalizeInPlace() +{ + return VectorNormalize( *this ); +} + +inline Vector Vector::Normalized() const +{ + Vector norm = *this; + VectorNormalize( norm ); + return norm; +} + +inline bool Vector::IsLengthGreaterThan( float val ) const +{ + return LengthSqr() > val*val; +} + +inline bool Vector::IsLengthLessThan( float val ) const +{ + return LengthSqr() < val*val; +} + +#endif + diff --git a/mp/src/public/mathlib/vector2d.h b/mp/src/public/mathlib/vector2d.h index 2c6bb242..41385589 100644 --- a/mp/src/public/mathlib/vector2d.h +++ b/mp/src/public/mathlib/vector2d.h @@ -1,670 +1,670 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef VECTOR2D_H
-#define VECTOR2D_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <float.h>
-
-// For vec_t, put this somewhere else?
-#include "tier0/basetypes.h"
-
-// For rand(). We really need a library!
-#include <stdlib.h>
-
-#include "tier0/dbg.h"
-#include "mathlib/math_pfns.h"
-
-//=========================================================
-// 2D Vector2D
-//=========================================================
-
-class Vector2D
-{
-public:
- // Members
- vec_t x, y;
-
- // Construction/destruction
- Vector2D(void);
- Vector2D(vec_t X, vec_t Y);
- Vector2D(const float *pFloat);
-
- // Initialization
- void Init(vec_t ix=0.0f, vec_t iy=0.0f);
-
- // Got any nasty NAN's?
- bool IsValid() const;
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- // Base address...
- vec_t* Base();
- vec_t const* Base() const;
-
- // Initialization methods
- void Random( float minVal, float maxVal );
-
- // equality
- bool operator==(const Vector2D& v) const;
- bool operator!=(const Vector2D& v) const;
-
- // arithmetic operations
- Vector2D& operator+=(const Vector2D &v);
- Vector2D& operator-=(const Vector2D &v);
- Vector2D& operator*=(const Vector2D &v);
- Vector2D& operator*=(float s);
- Vector2D& operator/=(const Vector2D &v);
- Vector2D& operator/=(float s);
-
- // negate the Vector2D components
- void Negate();
-
- // Get the Vector2D's magnitude.
- vec_t Length() const;
-
- // Get the Vector2D's magnitude squared.
- vec_t LengthSqr(void) const;
-
- // return true if this vector is (0,0) within tolerance
- bool IsZero( float tolerance = 0.01f ) const
- {
- return (x > -tolerance && x < tolerance &&
- y > -tolerance && y < tolerance);
- }
-
- // Normalize in place and return the old length.
- vec_t NormalizeInPlace();
-
- // Compare length.
- bool IsLengthGreaterThan( float val ) const;
- bool IsLengthLessThan( float val ) const;
-
- // Get the distance from this Vector2D to the other one.
- vec_t DistTo(const Vector2D &vOther) const;
-
- // Get the distance from this Vector2D to the other one squared.
- vec_t DistToSqr(const Vector2D &vOther) const;
-
- // Copy
- void CopyToArray(float* rgfl) const;
-
- // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
- // is about 12% faster than the actual Vector2D equation (because it's done per-component
- // rather than per-Vector2D).
- void MulAdd(const Vector2D& a, const Vector2D& b, float scalar);
-
- // Dot product.
- vec_t Dot(const Vector2D& vOther) const;
-
- // assignment
- Vector2D& operator=(const Vector2D &vOther);
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // copy constructors
- Vector2D(const Vector2D &vOther);
-
- // arithmetic operations
- Vector2D operator-(void) const;
-
- Vector2D operator+(const Vector2D& v) const;
- Vector2D operator-(const Vector2D& v) const;
- Vector2D operator*(const Vector2D& v) const;
- Vector2D operator/(const Vector2D& v) const;
- Vector2D operator*(float fl) const;
- Vector2D operator/(float fl) const;
-
- // Cross product between two vectors.
- Vector2D Cross(const Vector2D &vOther) const;
-
- // Returns a Vector2D with the min or max in X, Y, and Z.
- Vector2D Min(const Vector2D &vOther) const;
- Vector2D Max(const Vector2D &vOther) const;
-
-#else
-
-private:
- // No copy constructors allowed if we're in optimal mode
- Vector2D(const Vector2D& vOther);
-#endif
-};
-
-//-----------------------------------------------------------------------------
-
-const Vector2D vec2_origin(0,0);
-const Vector2D vec2_invalid( FLT_MAX, FLT_MAX );
-
-//-----------------------------------------------------------------------------
-// Vector2D related operations
-//-----------------------------------------------------------------------------
-
-// Vector2D clear
-void Vector2DClear( Vector2D& a );
-
-// Copy
-void Vector2DCopy( const Vector2D& src, Vector2D& dst );
-
-// Vector2D arithmetic
-void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& result );
-void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& result );
-void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& result );
-void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& result );
-void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& result );
-void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& result );
-void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result );
-
-// Store the min or max of each of x, y, and z into the result.
-void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result );
-void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result );
-
-#define Vector2DExpand( v ) (v).x, (v).y
-
-// Normalization
-vec_t Vector2DNormalize( Vector2D& v );
-
-// Length
-vec_t Vector2DLength( const Vector2D& v );
-
-// Dot Product
-vec_t DotProduct2D(const Vector2D& a, const Vector2D& b);
-
-// Linearly interpolate between two vectors
-void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest );
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Vector2D methods
-//
-//-----------------------------------------------------------------------------
-
-
-//-----------------------------------------------------------------------------
-// constructors
-//-----------------------------------------------------------------------------
-
-inline Vector2D::Vector2D(void)
-{
-#ifdef _DEBUG
- // Initialize to NAN to catch errors
- x = y = VEC_T_NAN;
-#endif
-}
-
-inline Vector2D::Vector2D(vec_t X, vec_t Y)
-{
- x = X; y = Y;
- Assert( IsValid() );
-}
-
-inline Vector2D::Vector2D(const float *pFloat)
-{
- Assert( pFloat );
- x = pFloat[0]; y = pFloat[1];
- Assert( IsValid() );
-}
-
-
-//-----------------------------------------------------------------------------
-// copy constructor
-//-----------------------------------------------------------------------------
-
-inline Vector2D::Vector2D(const Vector2D &vOther)
-{
- Assert( vOther.IsValid() );
- x = vOther.x; y = vOther.y;
-}
-
-//-----------------------------------------------------------------------------
-// initialization
-//-----------------------------------------------------------------------------
-
-inline void Vector2D::Init( vec_t ix, vec_t iy )
-{
- x = ix; y = iy;
- Assert( IsValid() );
-}
-
-inline void Vector2D::Random( float minVal, float maxVal )
-{
- x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
-}
-
-inline void Vector2DClear( Vector2D& a )
-{
- a.x = a.y = 0.0f;
-}
-
-//-----------------------------------------------------------------------------
-// assignment
-//-----------------------------------------------------------------------------
-
-inline Vector2D& Vector2D::operator=(const Vector2D &vOther)
-{
- Assert( vOther.IsValid() );
- x=vOther.x; y=vOther.y;
- return *this;
-}
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-
-inline vec_t& Vector2D::operator[](int i)
-{
- Assert( (i >= 0) && (i < 2) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t Vector2D::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 2) );
- return ((vec_t*)this)[i];
-}
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-
-inline vec_t* Vector2D::Base()
-{
- return (vec_t*)this;
-}
-
-inline vec_t const* Vector2D::Base() const
-{
- return (vec_t const*)this;
-}
-
-//-----------------------------------------------------------------------------
-// IsValid?
-//-----------------------------------------------------------------------------
-
-inline bool Vector2D::IsValid() const
-{
- return IsFinite(x) && IsFinite(y);
-}
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool Vector2D::operator==( const Vector2D& src ) const
-{
- Assert( src.IsValid() && IsValid() );
- return (src.x == x) && (src.y == y);
-}
-
-inline bool Vector2D::operator!=( const Vector2D& src ) const
-{
- Assert( src.IsValid() && IsValid() );
- return (src.x != x) || (src.y != y);
-}
-
-
-//-----------------------------------------------------------------------------
-// Copy
-//-----------------------------------------------------------------------------
-
-inline void Vector2DCopy( const Vector2D& src, Vector2D& dst )
-{
- Assert( src.IsValid() );
- dst.x = src.x;
- dst.y = src.y;
-}
-
-inline void Vector2D::CopyToArray(float* rgfl) const
-{
- Assert( IsValid() );
- Assert( rgfl );
- rgfl[0] = x; rgfl[1] = y;
-}
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-
-inline void Vector2D::Negate()
-{
- Assert( IsValid() );
- x = -x; y = -y;
-}
-
-inline Vector2D& Vector2D::operator+=(const Vector2D& v)
-{
- Assert( IsValid() && v.IsValid() );
- x+=v.x; y+=v.y;
- return *this;
-}
-
-inline Vector2D& Vector2D::operator-=(const Vector2D& v)
-{
- Assert( IsValid() && v.IsValid() );
- x-=v.x; y-=v.y;
- return *this;
-}
-
-inline Vector2D& Vector2D::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector2D& Vector2D::operator*=(const Vector2D& v)
-{
- x *= v.x;
- y *= v.y;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector2D& Vector2D::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector2D& Vector2D::operator/=(const Vector2D& v)
-{
- Assert( v.x != 0.0f && v.y != 0.0f );
- x /= v.x;
- y /= v.y;
- Assert( IsValid() );
- return *this;
-}
-
-inline void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x + b.x;
- c.y = a.y + b.y;
-}
-
-inline void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x - b.x;
- c.y = a.y - b.y;
-}
-
-inline void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& c )
-{
- Assert( a.IsValid() && IsFinite(b) );
- c.x = a.x * b;
- c.y = a.y * b;
-}
-
-inline void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x * b.x;
- c.y = a.y * b.y;
-}
-
-
-inline void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& c )
-{
- Assert( a.IsValid() );
- Assert( b != 0.0f );
- vec_t oob = 1.0f / b;
- c.x = a.x * oob;
- c.y = a.y * oob;
-}
-
-inline void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& c )
-{
- Assert( a.IsValid() );
- Assert( (b.x != 0.0f) && (b.y != 0.0f) );
- c.x = a.x / b.x;
- c.y = a.y / b.y;
-}
-
-inline void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result )
-{
- Assert( start.IsValid() && IsFinite(s) && dir.IsValid() );
- result.x = start.x + s*dir.x;
- result.y = start.y + s*dir.y;
-}
-
-// FIXME: Remove
-// For backwards compatability
-inline void Vector2D::MulAdd(const Vector2D& a, const Vector2D& b, float scalar)
-{
- x = a.x + b.x * scalar;
- y = a.y + b.y * scalar;
-}
-
-inline void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest )
-{
- dest[0] = src1[0] + (src2[0] - src1[0]) * t;
- dest[1] = src1[1] + (src2[1] - src1[1]) * t;
-}
-
-//-----------------------------------------------------------------------------
-// dot, cross
-//-----------------------------------------------------------------------------
-inline vec_t DotProduct2D(const Vector2D& a, const Vector2D& b)
-{
- Assert( a.IsValid() && b.IsValid() );
- return( a.x*b.x + a.y*b.y );
-}
-
-// for backwards compatability
-inline vec_t Vector2D::Dot( const Vector2D& vOther ) const
-{
- return DotProduct2D( *this, vOther );
-}
-
-
-//-----------------------------------------------------------------------------
-// length
-//-----------------------------------------------------------------------------
-inline vec_t Vector2DLength( const Vector2D& v )
-{
- Assert( v.IsValid() );
- return (vec_t)FastSqrt(v.x*v.x + v.y*v.y);
-}
-
-inline vec_t Vector2D::LengthSqr(void) const
-{
- Assert( IsValid() );
- return (x*x + y*y);
-}
-
-inline vec_t Vector2D::NormalizeInPlace()
-{
- return Vector2DNormalize( *this );
-}
-
-inline bool Vector2D::IsLengthGreaterThan( float val ) const
-{
- return LengthSqr() > val*val;
-}
-
-inline bool Vector2D::IsLengthLessThan( float val ) const
-{
- return LengthSqr() < val*val;
-}
-
-inline vec_t Vector2D::Length(void) const
-{
- return Vector2DLength( *this );
-}
-
-
-inline void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result )
-{
- result.x = (a.x < b.x) ? a.x : b.x;
- result.y = (a.y < b.y) ? a.y : b.y;
-}
-
-
-inline void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result )
-{
- result.x = (a.x > b.x) ? a.x : b.x;
- result.y = (a.y > b.y) ? a.y : b.y;
-}
-
-
-//-----------------------------------------------------------------------------
-// Normalization
-//-----------------------------------------------------------------------------
-inline vec_t Vector2DNormalize( Vector2D& v )
-{
- Assert( v.IsValid() );
- vec_t l = v.Length();
- if (l != 0.0f)
- {
- v /= l;
- }
- else
- {
- v.x = v.y = 0.0f;
- }
- return l;
-}
-
-
-//-----------------------------------------------------------------------------
-// Get the distance from this Vector2D to the other one
-//-----------------------------------------------------------------------------
-inline vec_t Vector2D::DistTo(const Vector2D &vOther) const
-{
- Vector2D delta;
- Vector2DSubtract( *this, vOther, delta );
- return delta.Length();
-}
-
-inline vec_t Vector2D::DistToSqr(const Vector2D &vOther) const
-{
- Vector2D delta;
- Vector2DSubtract( *this, vOther, delta );
- return delta.LengthSqr();
-}
-
-
-//-----------------------------------------------------------------------------
-// Computes the closest point to vecTarget no farther than flMaxDist from vecStart
-//-----------------------------------------------------------------------------
-inline void ComputeClosestPoint2D( const Vector2D& vecStart, float flMaxDist, const Vector2D& vecTarget, Vector2D *pResult )
-{
- Vector2D vecDelta;
- Vector2DSubtract( vecTarget, vecStart, vecDelta );
- float flDistSqr = vecDelta.LengthSqr();
- if ( flDistSqr <= flMaxDist * flMaxDist )
- {
- *pResult = vecTarget;
- }
- else
- {
- vecDelta /= FastSqrt( flDistSqr );
- Vector2DMA( vecStart, flMaxDist, vecDelta, *pResult );
- }
-}
-
-
-
-//-----------------------------------------------------------------------------
-//
-// Slow methods
-//
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-//-----------------------------------------------------------------------------
-// Returns a Vector2D with the min or max in X, Y, and Z.
-//-----------------------------------------------------------------------------
-
-inline Vector2D Vector2D::Min(const Vector2D &vOther) const
-{
- return Vector2D(x < vOther.x ? x : vOther.x,
- y < vOther.y ? y : vOther.y);
-}
-
-inline Vector2D Vector2D::Max(const Vector2D &vOther) const
-{
- return Vector2D(x > vOther.x ? x : vOther.x,
- y > vOther.y ? y : vOther.y);
-}
-
-
-//-----------------------------------------------------------------------------
-// arithmetic operations
-//-----------------------------------------------------------------------------
-
-inline Vector2D Vector2D::operator-(void) const
-{
- return Vector2D(-x,-y);
-}
-
-inline Vector2D Vector2D::operator+(const Vector2D& v) const
-{
- Vector2D res;
- Vector2DAdd( *this, v, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator-(const Vector2D& v) const
-{
- Vector2D res;
- Vector2DSubtract( *this, v, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator*(float fl) const
-{
- Vector2D res;
- Vector2DMultiply( *this, fl, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator*(const Vector2D& v) const
-{
- Vector2D res;
- Vector2DMultiply( *this, v, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator/(float fl) const
-{
- Vector2D res;
- Vector2DDivide( *this, fl, res );
- return res;
-}
-
-inline Vector2D Vector2D::operator/(const Vector2D& v) const
-{
- Vector2D res;
- Vector2DDivide( *this, v, res );
- return res;
-}
-
-inline Vector2D operator*(float fl, const Vector2D& v)
-{
- return v * fl;
-}
-
-#endif //slow
-
-#endif // VECTOR2D_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef VECTOR2D_H +#define VECTOR2D_H + +#ifdef _WIN32 +#pragma once +#endif + +#include <math.h> +#include <float.h> + +// For vec_t, put this somewhere else? +#include "tier0/basetypes.h" + +// For rand(). We really need a library! +#include <stdlib.h> + +#include "tier0/dbg.h" +#include "mathlib/math_pfns.h" + +//========================================================= +// 2D Vector2D +//========================================================= + +class Vector2D +{ +public: + // Members + vec_t x, y; + + // Construction/destruction + Vector2D(void); + Vector2D(vec_t X, vec_t Y); + Vector2D(const float *pFloat); + + // Initialization + void Init(vec_t ix=0.0f, vec_t iy=0.0f); + + // Got any nasty NAN's? + bool IsValid() const; + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + // Base address... + vec_t* Base(); + vec_t const* Base() const; + + // Initialization methods + void Random( float minVal, float maxVal ); + + // equality + bool operator==(const Vector2D& v) const; + bool operator!=(const Vector2D& v) const; + + // arithmetic operations + Vector2D& operator+=(const Vector2D &v); + Vector2D& operator-=(const Vector2D &v); + Vector2D& operator*=(const Vector2D &v); + Vector2D& operator*=(float s); + Vector2D& operator/=(const Vector2D &v); + Vector2D& operator/=(float s); + + // negate the Vector2D components + void Negate(); + + // Get the Vector2D's magnitude. + vec_t Length() const; + + // Get the Vector2D's magnitude squared. + vec_t LengthSqr(void) const; + + // return true if this vector is (0,0) within tolerance + bool IsZero( float tolerance = 0.01f ) const + { + return (x > -tolerance && x < tolerance && + y > -tolerance && y < tolerance); + } + + // Normalize in place and return the old length. + vec_t NormalizeInPlace(); + + // Compare length. + bool IsLengthGreaterThan( float val ) const; + bool IsLengthLessThan( float val ) const; + + // Get the distance from this Vector2D to the other one. + vec_t DistTo(const Vector2D &vOther) const; + + // Get the distance from this Vector2D to the other one squared. + vec_t DistToSqr(const Vector2D &vOther) const; + + // Copy + void CopyToArray(float* rgfl) const; + + // Multiply, add, and assign to this (ie: *this = a + b * scalar). This + // is about 12% faster than the actual Vector2D equation (because it's done per-component + // rather than per-Vector2D). + void MulAdd(const Vector2D& a, const Vector2D& b, float scalar); + + // Dot product. + vec_t Dot(const Vector2D& vOther) const; + + // assignment + Vector2D& operator=(const Vector2D &vOther); + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // copy constructors + Vector2D(const Vector2D &vOther); + + // arithmetic operations + Vector2D operator-(void) const; + + Vector2D operator+(const Vector2D& v) const; + Vector2D operator-(const Vector2D& v) const; + Vector2D operator*(const Vector2D& v) const; + Vector2D operator/(const Vector2D& v) const; + Vector2D operator*(float fl) const; + Vector2D operator/(float fl) const; + + // Cross product between two vectors. + Vector2D Cross(const Vector2D &vOther) const; + + // Returns a Vector2D with the min or max in X, Y, and Z. + Vector2D Min(const Vector2D &vOther) const; + Vector2D Max(const Vector2D &vOther) const; + +#else + +private: + // No copy constructors allowed if we're in optimal mode + Vector2D(const Vector2D& vOther); +#endif +}; + +//----------------------------------------------------------------------------- + +const Vector2D vec2_origin(0,0); +const Vector2D vec2_invalid( FLT_MAX, FLT_MAX ); + +//----------------------------------------------------------------------------- +// Vector2D related operations +//----------------------------------------------------------------------------- + +// Vector2D clear +void Vector2DClear( Vector2D& a ); + +// Copy +void Vector2DCopy( const Vector2D& src, Vector2D& dst ); + +// Vector2D arithmetic +void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& result ); +void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& result ); +void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& result ); +void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& result ); +void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& result ); +void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& result ); +void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result ); + +// Store the min or max of each of x, y, and z into the result. +void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result ); +void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result ); + +#define Vector2DExpand( v ) (v).x, (v).y + +// Normalization +vec_t Vector2DNormalize( Vector2D& v ); + +// Length +vec_t Vector2DLength( const Vector2D& v ); + +// Dot Product +vec_t DotProduct2D(const Vector2D& a, const Vector2D& b); + +// Linearly interpolate between two vectors +void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest ); + + +//----------------------------------------------------------------------------- +// +// Inlined Vector2D methods +// +//----------------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// constructors +//----------------------------------------------------------------------------- + +inline Vector2D::Vector2D(void) +{ +#ifdef _DEBUG + // Initialize to NAN to catch errors + x = y = VEC_T_NAN; +#endif +} + +inline Vector2D::Vector2D(vec_t X, vec_t Y) +{ + x = X; y = Y; + Assert( IsValid() ); +} + +inline Vector2D::Vector2D(const float *pFloat) +{ + Assert( pFloat ); + x = pFloat[0]; y = pFloat[1]; + Assert( IsValid() ); +} + + +//----------------------------------------------------------------------------- +// copy constructor +//----------------------------------------------------------------------------- + +inline Vector2D::Vector2D(const Vector2D &vOther) +{ + Assert( vOther.IsValid() ); + x = vOther.x; y = vOther.y; +} + +//----------------------------------------------------------------------------- +// initialization +//----------------------------------------------------------------------------- + +inline void Vector2D::Init( vec_t ix, vec_t iy ) +{ + x = ix; y = iy; + Assert( IsValid() ); +} + +inline void Vector2D::Random( float minVal, float maxVal ) +{ + x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); +} + +inline void Vector2DClear( Vector2D& a ) +{ + a.x = a.y = 0.0f; +} + +//----------------------------------------------------------------------------- +// assignment +//----------------------------------------------------------------------------- + +inline Vector2D& Vector2D::operator=(const Vector2D &vOther) +{ + Assert( vOther.IsValid() ); + x=vOther.x; y=vOther.y; + return *this; +} + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- + +inline vec_t& Vector2D::operator[](int i) +{ + Assert( (i >= 0) && (i < 2) ); + return ((vec_t*)this)[i]; +} + +inline vec_t Vector2D::operator[](int i) const +{ + Assert( (i >= 0) && (i < 2) ); + return ((vec_t*)this)[i]; +} + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- + +inline vec_t* Vector2D::Base() +{ + return (vec_t*)this; +} + +inline vec_t const* Vector2D::Base() const +{ + return (vec_t const*)this; +} + +//----------------------------------------------------------------------------- +// IsValid? +//----------------------------------------------------------------------------- + +inline bool Vector2D::IsValid() const +{ + return IsFinite(x) && IsFinite(y); +} + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool Vector2D::operator==( const Vector2D& src ) const +{ + Assert( src.IsValid() && IsValid() ); + return (src.x == x) && (src.y == y); +} + +inline bool Vector2D::operator!=( const Vector2D& src ) const +{ + Assert( src.IsValid() && IsValid() ); + return (src.x != x) || (src.y != y); +} + + +//----------------------------------------------------------------------------- +// Copy +//----------------------------------------------------------------------------- + +inline void Vector2DCopy( const Vector2D& src, Vector2D& dst ) +{ + Assert( src.IsValid() ); + dst.x = src.x; + dst.y = src.y; +} + +inline void Vector2D::CopyToArray(float* rgfl) const +{ + Assert( IsValid() ); + Assert( rgfl ); + rgfl[0] = x; rgfl[1] = y; +} + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- + +inline void Vector2D::Negate() +{ + Assert( IsValid() ); + x = -x; y = -y; +} + +inline Vector2D& Vector2D::operator+=(const Vector2D& v) +{ + Assert( IsValid() && v.IsValid() ); + x+=v.x; y+=v.y; + return *this; +} + +inline Vector2D& Vector2D::operator-=(const Vector2D& v) +{ + Assert( IsValid() && v.IsValid() ); + x-=v.x; y-=v.y; + return *this; +} + +inline Vector2D& Vector2D::operator*=(float fl) +{ + x *= fl; + y *= fl; + Assert( IsValid() ); + return *this; +} + +inline Vector2D& Vector2D::operator*=(const Vector2D& v) +{ + x *= v.x; + y *= v.y; + Assert( IsValid() ); + return *this; +} + +inline Vector2D& Vector2D::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + Assert( IsValid() ); + return *this; +} + +inline Vector2D& Vector2D::operator/=(const Vector2D& v) +{ + Assert( v.x != 0.0f && v.y != 0.0f ); + x /= v.x; + y /= v.y; + Assert( IsValid() ); + return *this; +} + +inline void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x + b.x; + c.y = a.y + b.y; +} + +inline void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x - b.x; + c.y = a.y - b.y; +} + +inline void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& c ) +{ + Assert( a.IsValid() && IsFinite(b) ); + c.x = a.x * b; + c.y = a.y * b; +} + +inline void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x * b.x; + c.y = a.y * b.y; +} + + +inline void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& c ) +{ + Assert( a.IsValid() ); + Assert( b != 0.0f ); + vec_t oob = 1.0f / b; + c.x = a.x * oob; + c.y = a.y * oob; +} + +inline void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& c ) +{ + Assert( a.IsValid() ); + Assert( (b.x != 0.0f) && (b.y != 0.0f) ); + c.x = a.x / b.x; + c.y = a.y / b.y; +} + +inline void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result ) +{ + Assert( start.IsValid() && IsFinite(s) && dir.IsValid() ); + result.x = start.x + s*dir.x; + result.y = start.y + s*dir.y; +} + +// FIXME: Remove +// For backwards compatability +inline void Vector2D::MulAdd(const Vector2D& a, const Vector2D& b, float scalar) +{ + x = a.x + b.x * scalar; + y = a.y + b.y * scalar; +} + +inline void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest ) +{ + dest[0] = src1[0] + (src2[0] - src1[0]) * t; + dest[1] = src1[1] + (src2[1] - src1[1]) * t; +} + +//----------------------------------------------------------------------------- +// dot, cross +//----------------------------------------------------------------------------- +inline vec_t DotProduct2D(const Vector2D& a, const Vector2D& b) +{ + Assert( a.IsValid() && b.IsValid() ); + return( a.x*b.x + a.y*b.y ); +} + +// for backwards compatability +inline vec_t Vector2D::Dot( const Vector2D& vOther ) const +{ + return DotProduct2D( *this, vOther ); +} + + +//----------------------------------------------------------------------------- +// length +//----------------------------------------------------------------------------- +inline vec_t Vector2DLength( const Vector2D& v ) +{ + Assert( v.IsValid() ); + return (vec_t)FastSqrt(v.x*v.x + v.y*v.y); +} + +inline vec_t Vector2D::LengthSqr(void) const +{ + Assert( IsValid() ); + return (x*x + y*y); +} + +inline vec_t Vector2D::NormalizeInPlace() +{ + return Vector2DNormalize( *this ); +} + +inline bool Vector2D::IsLengthGreaterThan( float val ) const +{ + return LengthSqr() > val*val; +} + +inline bool Vector2D::IsLengthLessThan( float val ) const +{ + return LengthSqr() < val*val; +} + +inline vec_t Vector2D::Length(void) const +{ + return Vector2DLength( *this ); +} + + +inline void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result ) +{ + result.x = (a.x < b.x) ? a.x : b.x; + result.y = (a.y < b.y) ? a.y : b.y; +} + + +inline void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result ) +{ + result.x = (a.x > b.x) ? a.x : b.x; + result.y = (a.y > b.y) ? a.y : b.y; +} + + +//----------------------------------------------------------------------------- +// Normalization +//----------------------------------------------------------------------------- +inline vec_t Vector2DNormalize( Vector2D& v ) +{ + Assert( v.IsValid() ); + vec_t l = v.Length(); + if (l != 0.0f) + { + v /= l; + } + else + { + v.x = v.y = 0.0f; + } + return l; +} + + +//----------------------------------------------------------------------------- +// Get the distance from this Vector2D to the other one +//----------------------------------------------------------------------------- +inline vec_t Vector2D::DistTo(const Vector2D &vOther) const +{ + Vector2D delta; + Vector2DSubtract( *this, vOther, delta ); + return delta.Length(); +} + +inline vec_t Vector2D::DistToSqr(const Vector2D &vOther) const +{ + Vector2D delta; + Vector2DSubtract( *this, vOther, delta ); + return delta.LengthSqr(); +} + + +//----------------------------------------------------------------------------- +// Computes the closest point to vecTarget no farther than flMaxDist from vecStart +//----------------------------------------------------------------------------- +inline void ComputeClosestPoint2D( const Vector2D& vecStart, float flMaxDist, const Vector2D& vecTarget, Vector2D *pResult ) +{ + Vector2D vecDelta; + Vector2DSubtract( vecTarget, vecStart, vecDelta ); + float flDistSqr = vecDelta.LengthSqr(); + if ( flDistSqr <= flMaxDist * flMaxDist ) + { + *pResult = vecTarget; + } + else + { + vecDelta /= FastSqrt( flDistSqr ); + Vector2DMA( vecStart, flMaxDist, vecDelta, *pResult ); + } +} + + + +//----------------------------------------------------------------------------- +// +// Slow methods +// +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +//----------------------------------------------------------------------------- +// Returns a Vector2D with the min or max in X, Y, and Z. +//----------------------------------------------------------------------------- + +inline Vector2D Vector2D::Min(const Vector2D &vOther) const +{ + return Vector2D(x < vOther.x ? x : vOther.x, + y < vOther.y ? y : vOther.y); +} + +inline Vector2D Vector2D::Max(const Vector2D &vOther) const +{ + return Vector2D(x > vOther.x ? x : vOther.x, + y > vOther.y ? y : vOther.y); +} + + +//----------------------------------------------------------------------------- +// arithmetic operations +//----------------------------------------------------------------------------- + +inline Vector2D Vector2D::operator-(void) const +{ + return Vector2D(-x,-y); +} + +inline Vector2D Vector2D::operator+(const Vector2D& v) const +{ + Vector2D res; + Vector2DAdd( *this, v, res ); + return res; +} + +inline Vector2D Vector2D::operator-(const Vector2D& v) const +{ + Vector2D res; + Vector2DSubtract( *this, v, res ); + return res; +} + +inline Vector2D Vector2D::operator*(float fl) const +{ + Vector2D res; + Vector2DMultiply( *this, fl, res ); + return res; +} + +inline Vector2D Vector2D::operator*(const Vector2D& v) const +{ + Vector2D res; + Vector2DMultiply( *this, v, res ); + return res; +} + +inline Vector2D Vector2D::operator/(float fl) const +{ + Vector2D res; + Vector2DDivide( *this, fl, res ); + return res; +} + +inline Vector2D Vector2D::operator/(const Vector2D& v) const +{ + Vector2D res; + Vector2DDivide( *this, v, res ); + return res; +} + +inline Vector2D operator*(float fl, const Vector2D& v) +{ + return v * fl; +} + +#endif //slow + +#endif // VECTOR2D_H + diff --git a/mp/src/public/mathlib/vector4d.h b/mp/src/public/mathlib/vector4d.h index 53052e4d..2b20c882 100644 --- a/mp/src/public/mathlib/vector4d.h +++ b/mp/src/public/mathlib/vector4d.h @@ -1,686 +1,686 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-
-#ifndef VECTOR4D_H
-#define VECTOR4D_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <math.h>
-#include <stdlib.h> // For rand(). We really need a library!
-#include <float.h>
-#if !defined( _X360 )
-#include <xmmintrin.h> // For SSE
-#endif
-#include "basetypes.h" // For vec_t, put this somewhere else?
-#include "tier0/dbg.h"
-#include "mathlib/math_pfns.h"
-
-// forward declarations
-class Vector;
-class Vector2D;
-
-//=========================================================
-// 4D Vector4D
-//=========================================================
-
-class Vector4D
-{
-public:
- // Members
- vec_t x, y, z, w;
-
- // Construction/destruction
- Vector4D(void);
- Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W);
- Vector4D(const float *pFloat);
-
- // Initialization
- void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f);
-
- // Got any nasty NAN's?
- bool IsValid() const;
-
- // array access...
- vec_t operator[](int i) const;
- vec_t& operator[](int i);
-
- // Base address...
- inline vec_t* Base();
- inline vec_t const* Base() const;
-
- // Cast to Vector and Vector2D...
- Vector& AsVector3D();
- Vector const& AsVector3D() const;
-
- Vector2D& AsVector2D();
- Vector2D const& AsVector2D() const;
-
- // Initialization methods
- void Random( vec_t minVal, vec_t maxVal );
-
- // equality
- bool operator==(const Vector4D& v) const;
- bool operator!=(const Vector4D& v) const;
-
- // arithmetic operations
- Vector4D& operator+=(const Vector4D &v);
- Vector4D& operator-=(const Vector4D &v);
- Vector4D& operator*=(const Vector4D &v);
- Vector4D& operator*=(float s);
- Vector4D& operator/=(const Vector4D &v);
- Vector4D& operator/=(float s);
-
- // negate the Vector4D components
- void Negate();
-
- // Get the Vector4D's magnitude.
- vec_t Length() const;
-
- // Get the Vector4D's magnitude squared.
- vec_t LengthSqr(void) const;
-
- // return true if this vector is (0,0,0,0) within tolerance
- bool IsZero( float tolerance = 0.01f ) const
- {
- return (x > -tolerance && x < tolerance &&
- y > -tolerance && y < tolerance &&
- z > -tolerance && z < tolerance &&
- w > -tolerance && w < tolerance);
- }
-
- // Get the distance from this Vector4D to the other one.
- vec_t DistTo(const Vector4D &vOther) const;
-
- // Get the distance from this Vector4D to the other one squared.
- vec_t DistToSqr(const Vector4D &vOther) const;
-
- // Copy
- void CopyToArray(float* rgfl) const;
-
- // Multiply, add, and assign to this (ie: *this = a + b * scalar). This
- // is about 12% faster than the actual Vector4D equation (because it's done per-component
- // rather than per-Vector4D).
- void MulAdd(Vector4D const& a, Vector4D const& b, float scalar);
-
- // Dot product.
- vec_t Dot(Vector4D const& vOther) const;
-
- // No copy constructors allowed if we're in optimal mode
-#ifdef VECTOR_NO_SLOW_OPERATIONS
-private:
-#else
-public:
-#endif
- Vector4D(Vector4D const& vOther);
-
- // No assignment operators either...
- Vector4D& operator=( Vector4D const& src );
-};
-
-const Vector4D vec4_origin( 0.0f, 0.0f, 0.0f, 0.0f );
-const Vector4D vec4_invalid( FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX );
-
-//-----------------------------------------------------------------------------
-// SSE optimized routines
-//-----------------------------------------------------------------------------
-
-class ALIGN16 Vector4DAligned : public Vector4D
-{
-public:
- Vector4DAligned(void) {}
- Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W );
-
- inline void Set( vec_t X, vec_t Y, vec_t Z, vec_t W );
- inline void InitZero( void );
-
- inline __m128 &AsM128() { return *(__m128*)&x; }
- inline const __m128 &AsM128() const { return *(const __m128*)&x; }
-
-private:
- // No copy constructors allowed if we're in optimal mode
- Vector4DAligned( Vector4DAligned const& vOther );
-
- // No assignment operators either...
- Vector4DAligned& operator=( Vector4DAligned const& src );
-} ALIGN16_POST;
-
-//-----------------------------------------------------------------------------
-// Vector4D related operations
-//-----------------------------------------------------------------------------
-
-// Vector4D clear
-void Vector4DClear( Vector4D& a );
-
-// Copy
-void Vector4DCopy( Vector4D const& src, Vector4D& dst );
-
-// Vector4D arithmetic
-void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& result );
-void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& result );
-void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& result );
-void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& result );
-void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& result );
-void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& result );
-void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result );
-
-// Vector4DAligned arithmetic
-void Vector4DMultiplyAligned( Vector4DAligned const& a, vec_t b, Vector4DAligned& result );
-
-
-#define Vector4DExpand( v ) (v).x, (v).y, (v).z, (v).w
-
-// Normalization
-vec_t Vector4DNormalize( Vector4D& v );
-
-// Length
-vec_t Vector4DLength( Vector4D const& v );
-
-// Dot Product
-vec_t DotProduct4D(Vector4D const& a, Vector4D const& b);
-
-// Linearly interpolate between two vectors
-void Vector4DLerp(Vector4D const& src1, Vector4D const& src2, vec_t t, Vector4D& dest );
-
-
-//-----------------------------------------------------------------------------
-//
-// Inlined Vector4D methods
-//
-//-----------------------------------------------------------------------------
-
-
-//-----------------------------------------------------------------------------
-// constructors
-//-----------------------------------------------------------------------------
-
-inline Vector4D::Vector4D(void)
-{
-#ifdef _DEBUG
- // Initialize to NAN to catch errors
- x = y = z = w = VEC_T_NAN;
-#endif
-}
-
-inline Vector4D::Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W )
-{
- x = X; y = Y; z = Z; w = W;
- Assert( IsValid() );
-}
-
-inline Vector4D::Vector4D(const float *pFloat)
-{
- Assert( pFloat );
- x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; w = pFloat[3];
- Assert( IsValid() );
-}
-
-
-//-----------------------------------------------------------------------------
-// copy constructor
-//-----------------------------------------------------------------------------
-
-inline Vector4D::Vector4D(const Vector4D &vOther)
-{
- Assert( vOther.IsValid() );
- x = vOther.x; y = vOther.y; z = vOther.z; w = vOther.w;
-}
-
-//-----------------------------------------------------------------------------
-// initialization
-//-----------------------------------------------------------------------------
-
-inline void Vector4D::Init( vec_t ix, vec_t iy, vec_t iz, vec_t iw )
-{
- x = ix; y = iy; z = iz; w = iw;
- Assert( IsValid() );
-}
-
-inline void Vector4D::Random( vec_t minVal, vec_t maxVal )
-{
- x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
- w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
-}
-
-inline void Vector4DClear( Vector4D& a )
-{
- a.x = a.y = a.z = a.w = 0.0f;
-}
-
-//-----------------------------------------------------------------------------
-// assignment
-//-----------------------------------------------------------------------------
-
-inline Vector4D& Vector4D::operator=(const Vector4D &vOther)
-{
- Assert( vOther.IsValid() );
- x=vOther.x; y=vOther.y; z=vOther.z; w=vOther.w;
- return *this;
-}
-
-//-----------------------------------------------------------------------------
-// Array access
-//-----------------------------------------------------------------------------
-
-inline vec_t& Vector4D::operator[](int i)
-{
- Assert( (i >= 0) && (i < 4) );
- return ((vec_t*)this)[i];
-}
-
-inline vec_t Vector4D::operator[](int i) const
-{
- Assert( (i >= 0) && (i < 4) );
- return ((vec_t*)this)[i];
-}
-
-//-----------------------------------------------------------------------------
-// Cast to Vector and Vector2D...
-//-----------------------------------------------------------------------------
-
-inline Vector& Vector4D::AsVector3D()
-{
- return *(Vector*)this;
-}
-
-inline Vector const& Vector4D::AsVector3D() const
-{
- return *(Vector const*)this;
-}
-
-inline Vector2D& Vector4D::AsVector2D()
-{
- return *(Vector2D*)this;
-}
-
-inline Vector2D const& Vector4D::AsVector2D() const
-{
- return *(Vector2D const*)this;
-}
-
-//-----------------------------------------------------------------------------
-// Base address...
-//-----------------------------------------------------------------------------
-
-inline vec_t* Vector4D::Base()
-{
- return (vec_t*)this;
-}
-
-inline vec_t const* Vector4D::Base() const
-{
- return (vec_t const*)this;
-}
-
-//-----------------------------------------------------------------------------
-// IsValid?
-//-----------------------------------------------------------------------------
-
-inline bool Vector4D::IsValid() const
-{
- return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w);
-}
-
-//-----------------------------------------------------------------------------
-// comparison
-//-----------------------------------------------------------------------------
-
-inline bool Vector4D::operator==( Vector4D const& src ) const
-{
- Assert( src.IsValid() && IsValid() );
- return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w);
-}
-
-inline bool Vector4D::operator!=( Vector4D const& src ) const
-{
- Assert( src.IsValid() && IsValid() );
- return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w);
-}
-
-
-//-----------------------------------------------------------------------------
-// Copy
-//-----------------------------------------------------------------------------
-
-inline void Vector4DCopy( Vector4D const& src, Vector4D& dst )
-{
- Assert( src.IsValid() );
- dst.x = src.x;
- dst.y = src.y;
- dst.z = src.z;
- dst.w = src.w;
-}
-
-inline void Vector4D::CopyToArray(float* rgfl) const
-{
- Assert( IsValid() );
- Assert( rgfl );
- rgfl[0] = x; rgfl[1] = y; rgfl[2] = z; rgfl[3] = w;
-}
-
-//-----------------------------------------------------------------------------
-// standard math operations
-//-----------------------------------------------------------------------------
-
-inline void Vector4D::Negate()
-{
- Assert( IsValid() );
- x = -x; y = -y; z = -z; w = -w;
-}
-
-inline Vector4D& Vector4D::operator+=(const Vector4D& v)
-{
- Assert( IsValid() && v.IsValid() );
- x+=v.x; y+=v.y; z += v.z; w += v.w;
- return *this;
-}
-
-inline Vector4D& Vector4D::operator-=(const Vector4D& v)
-{
- Assert( IsValid() && v.IsValid() );
- x-=v.x; y-=v.y; z -= v.z; w -= v.w;
- return *this;
-}
-
-inline Vector4D& Vector4D::operator*=(float fl)
-{
- x *= fl;
- y *= fl;
- z *= fl;
- w *= fl;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector4D& Vector4D::operator*=(Vector4D const& v)
-{
- x *= v.x;
- y *= v.y;
- z *= v.z;
- w *= v.w;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector4D& Vector4D::operator/=(float fl)
-{
- Assert( fl != 0.0f );
- float oofl = 1.0f / fl;
- x *= oofl;
- y *= oofl;
- z *= oofl;
- w *= oofl;
- Assert( IsValid() );
- return *this;
-}
-
-inline Vector4D& Vector4D::operator/=(Vector4D const& v)
-{
- Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f && v.w != 0.0f );
- x /= v.x;
- y /= v.y;
- z /= v.z;
- w /= v.w;
- Assert( IsValid() );
- return *this;
-}
-
-inline void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x + b.x;
- c.y = a.y + b.y;
- c.z = a.z + b.z;
- c.w = a.w + b.w;
-}
-
-inline void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x - b.x;
- c.y = a.y - b.y;
- c.z = a.z - b.z;
- c.w = a.w - b.w;
-}
-
-inline void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& c )
-{
- Assert( a.IsValid() && IsFinite(b) );
- c.x = a.x * b;
- c.y = a.y * b;
- c.z = a.z * b;
- c.w = a.w * b;
-}
-
-inline void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& c )
-{
- Assert( a.IsValid() && b.IsValid() );
- c.x = a.x * b.x;
- c.y = a.y * b.y;
- c.z = a.z * b.z;
- c.w = a.w * b.w;
-}
-
-inline void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& c )
-{
- Assert( a.IsValid() );
- Assert( b != 0.0f );
- vec_t oob = 1.0f / b;
- c.x = a.x * oob;
- c.y = a.y * oob;
- c.z = a.z * oob;
- c.w = a.w * oob;
-}
-
-inline void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& c )
-{
- Assert( a.IsValid() );
- Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) && (b.w != 0.0f) );
- c.x = a.x / b.x;
- c.y = a.y / b.y;
- c.z = a.z / b.z;
- c.w = a.w / b.w;
-}
-
-inline void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result )
-{
- Assert( start.IsValid() && IsFinite(s) && dir.IsValid() );
- result.x = start.x + s*dir.x;
- result.y = start.y + s*dir.y;
- result.z = start.z + s*dir.z;
- result.w = start.w + s*dir.w;
-}
-
-// FIXME: Remove
-// For backwards compatability
-inline void Vector4D::MulAdd(Vector4D const& a, Vector4D const& b, float scalar)
-{
- x = a.x + b.x * scalar;
- y = a.y + b.y * scalar;
- z = a.z + b.z * scalar;
- w = a.w + b.w * scalar;
-}
-
-inline void Vector4DLerp(const Vector4D& src1, const Vector4D& src2, vec_t t, Vector4D& dest )
-{
- dest[0] = src1[0] + (src2[0] - src1[0]) * t;
- dest[1] = src1[1] + (src2[1] - src1[1]) * t;
- dest[2] = src1[2] + (src2[2] - src1[2]) * t;
- dest[3] = src1[3] + (src2[3] - src1[3]) * t;
-}
-
-//-----------------------------------------------------------------------------
-// dot, cross
-//-----------------------------------------------------------------------------
-
-inline vec_t DotProduct4D(const Vector4D& a, const Vector4D& b)
-{
- Assert( a.IsValid() && b.IsValid() );
- return( a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w );
-}
-
-// for backwards compatability
-inline vec_t Vector4D::Dot( Vector4D const& vOther ) const
-{
- return DotProduct4D( *this, vOther );
-}
-
-
-//-----------------------------------------------------------------------------
-// length
-//-----------------------------------------------------------------------------
-
-inline vec_t Vector4DLength( Vector4D const& v )
-{
- Assert( v.IsValid() );
- return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w);
-}
-
-inline vec_t Vector4D::LengthSqr(void) const
-{
- Assert( IsValid() );
- return (x*x + y*y + z*z + w*w);
-}
-
-inline vec_t Vector4D::Length(void) const
-{
- return Vector4DLength( *this );
-}
-
-
-//-----------------------------------------------------------------------------
-// Normalization
-//-----------------------------------------------------------------------------
-
-// FIXME: Can't use until we're un-macroed in mathlib.h
-inline vec_t Vector4DNormalize( Vector4D& v )
-{
- Assert( v.IsValid() );
- vec_t l = v.Length();
- if (l != 0.0f)
- {
- v /= l;
- }
- else
- {
- v.x = v.y = v.z = v.w = 0.0f;
- }
- return l;
-}
-
-//-----------------------------------------------------------------------------
-// Get the distance from this Vector4D to the other one
-//-----------------------------------------------------------------------------
-
-inline vec_t Vector4D::DistTo(const Vector4D &vOther) const
-{
- Vector4D delta;
- Vector4DSubtract( *this, vOther, delta );
- return delta.Length();
-}
-
-inline vec_t Vector4D::DistToSqr(const Vector4D &vOther) const
-{
- Vector4D delta;
- Vector4DSubtract( *this, vOther, delta );
- return delta.LengthSqr();
-}
-
-
-//-----------------------------------------------------------------------------
-// Vector4DAligned routines
-//-----------------------------------------------------------------------------
-
-inline Vector4DAligned::Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W )
-{
- x = X; y = Y; z = Z; w = W;
- Assert( IsValid() );
-}
-
-inline void Vector4DAligned::Set( vec_t X, vec_t Y, vec_t Z, vec_t W )
-{
- x = X; y = Y; z = Z; w = W;
- Assert( IsValid() );
-}
-
-inline void Vector4DAligned::InitZero( void )
-{
-#if !defined( _X360 )
- this->AsM128() = _mm_set1_ps( 0.0f );
-#else
- this->AsM128() = __vspltisw( 0 );
-#endif
- Assert( IsValid() );
-}
-
-inline void Vector4DMultiplyAligned( Vector4DAligned const& a, Vector4DAligned const& b, Vector4DAligned& c )
-{
- Assert( a.IsValid() && b.IsValid() );
-#if !defined( _X360 )
- c.x = a.x * b.x;
- c.y = a.y * b.y;
- c.z = a.z * b.z;
- c.w = a.w * b.w;
-#else
- c.AsM128() = __vmulfp( a.AsM128(), b.AsM128() );
-#endif
-}
-
-inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB )
-{
- Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) );
-
-#if !defined( _X360 )
- vOutA.x += vInA.x * w;
- vOutA.y += vInA.y * w;
- vOutA.z += vInA.z * w;
- vOutA.w += vInA.w * w;
-
- vOutB.x += vInB.x * w;
- vOutB.y += vInB.y * w;
- vOutB.z += vInB.z * w;
- vOutB.w += vInB.w * w;
-#else
- __vector4 temp;
-
- temp = __lvlx( &w, 0 );
- temp = __vspltw( temp, 0 );
-
- vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
- vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
-#endif
-}
-
-inline void Vector4DWeightMADSSE( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB )
-{
- Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) );
-
-#if !defined( _X360 )
- // Replicate scalar float out to 4 components
- __m128 packed = _mm_set1_ps( w );
-
- // 4D SSE Vector MAD
- vOutA.AsM128() = _mm_add_ps( vOutA.AsM128(), _mm_mul_ps( vInA.AsM128(), packed ) );
- vOutB.AsM128() = _mm_add_ps( vOutB.AsM128(), _mm_mul_ps( vInB.AsM128(), packed ) );
-#else
- __vector4 temp;
-
- temp = __lvlx( &w, 0 );
- temp = __vspltw( temp, 0 );
-
- vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() );
- vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() );
-#endif
-}
-
-#endif // VECTOR4D_H
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef VECTOR4D_H +#define VECTOR4D_H + +#ifdef _WIN32 +#pragma once +#endif + +#include <math.h> +#include <stdlib.h> // For rand(). We really need a library! +#include <float.h> +#if !defined( _X360 ) +#include <xmmintrin.h> // For SSE +#endif +#include "basetypes.h" // For vec_t, put this somewhere else? +#include "tier0/dbg.h" +#include "mathlib/math_pfns.h" + +// forward declarations +class Vector; +class Vector2D; + +//========================================================= +// 4D Vector4D +//========================================================= + +class Vector4D +{ +public: + // Members + vec_t x, y, z, w; + + // Construction/destruction + Vector4D(void); + Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W); + Vector4D(const float *pFloat); + + // Initialization + void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f); + + // Got any nasty NAN's? + bool IsValid() const; + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + // Base address... + inline vec_t* Base(); + inline vec_t const* Base() const; + + // Cast to Vector and Vector2D... + Vector& AsVector3D(); + Vector const& AsVector3D() const; + + Vector2D& AsVector2D(); + Vector2D const& AsVector2D() const; + + // Initialization methods + void Random( vec_t minVal, vec_t maxVal ); + + // equality + bool operator==(const Vector4D& v) const; + bool operator!=(const Vector4D& v) const; + + // arithmetic operations + Vector4D& operator+=(const Vector4D &v); + Vector4D& operator-=(const Vector4D &v); + Vector4D& operator*=(const Vector4D &v); + Vector4D& operator*=(float s); + Vector4D& operator/=(const Vector4D &v); + Vector4D& operator/=(float s); + + // negate the Vector4D components + void Negate(); + + // Get the Vector4D's magnitude. + vec_t Length() const; + + // Get the Vector4D's magnitude squared. + vec_t LengthSqr(void) const; + + // return true if this vector is (0,0,0,0) within tolerance + bool IsZero( float tolerance = 0.01f ) const + { + return (x > -tolerance && x < tolerance && + y > -tolerance && y < tolerance && + z > -tolerance && z < tolerance && + w > -tolerance && w < tolerance); + } + + // Get the distance from this Vector4D to the other one. + vec_t DistTo(const Vector4D &vOther) const; + + // Get the distance from this Vector4D to the other one squared. + vec_t DistToSqr(const Vector4D &vOther) const; + + // Copy + void CopyToArray(float* rgfl) const; + + // Multiply, add, and assign to this (ie: *this = a + b * scalar). This + // is about 12% faster than the actual Vector4D equation (because it's done per-component + // rather than per-Vector4D). + void MulAdd(Vector4D const& a, Vector4D const& b, float scalar); + + // Dot product. + vec_t Dot(Vector4D const& vOther) const; + + // No copy constructors allowed if we're in optimal mode +#ifdef VECTOR_NO_SLOW_OPERATIONS +private: +#else +public: +#endif + Vector4D(Vector4D const& vOther); + + // No assignment operators either... + Vector4D& operator=( Vector4D const& src ); +}; + +const Vector4D vec4_origin( 0.0f, 0.0f, 0.0f, 0.0f ); +const Vector4D vec4_invalid( FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX ); + +//----------------------------------------------------------------------------- +// SSE optimized routines +//----------------------------------------------------------------------------- + +class ALIGN16 Vector4DAligned : public Vector4D +{ +public: + Vector4DAligned(void) {} + Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W ); + + inline void Set( vec_t X, vec_t Y, vec_t Z, vec_t W ); + inline void InitZero( void ); + + inline __m128 &AsM128() { return *(__m128*)&x; } + inline const __m128 &AsM128() const { return *(const __m128*)&x; } + +private: + // No copy constructors allowed if we're in optimal mode + Vector4DAligned( Vector4DAligned const& vOther ); + + // No assignment operators either... + Vector4DAligned& operator=( Vector4DAligned const& src ); +} ALIGN16_POST; + +//----------------------------------------------------------------------------- +// Vector4D related operations +//----------------------------------------------------------------------------- + +// Vector4D clear +void Vector4DClear( Vector4D& a ); + +// Copy +void Vector4DCopy( Vector4D const& src, Vector4D& dst ); + +// Vector4D arithmetic +void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& result ); +void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& result ); +void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& result ); +void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& result ); +void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& result ); +void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& result ); +void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result ); + +// Vector4DAligned arithmetic +void Vector4DMultiplyAligned( Vector4DAligned const& a, vec_t b, Vector4DAligned& result ); + + +#define Vector4DExpand( v ) (v).x, (v).y, (v).z, (v).w + +// Normalization +vec_t Vector4DNormalize( Vector4D& v ); + +// Length +vec_t Vector4DLength( Vector4D const& v ); + +// Dot Product +vec_t DotProduct4D(Vector4D const& a, Vector4D const& b); + +// Linearly interpolate between two vectors +void Vector4DLerp(Vector4D const& src1, Vector4D const& src2, vec_t t, Vector4D& dest ); + + +//----------------------------------------------------------------------------- +// +// Inlined Vector4D methods +// +//----------------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// constructors +//----------------------------------------------------------------------------- + +inline Vector4D::Vector4D(void) +{ +#ifdef _DEBUG + // Initialize to NAN to catch errors + x = y = z = w = VEC_T_NAN; +#endif +} + +inline Vector4D::Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W ) +{ + x = X; y = Y; z = Z; w = W; + Assert( IsValid() ); +} + +inline Vector4D::Vector4D(const float *pFloat) +{ + Assert( pFloat ); + x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; w = pFloat[3]; + Assert( IsValid() ); +} + + +//----------------------------------------------------------------------------- +// copy constructor +//----------------------------------------------------------------------------- + +inline Vector4D::Vector4D(const Vector4D &vOther) +{ + Assert( vOther.IsValid() ); + x = vOther.x; y = vOther.y; z = vOther.z; w = vOther.w; +} + +//----------------------------------------------------------------------------- +// initialization +//----------------------------------------------------------------------------- + +inline void Vector4D::Init( vec_t ix, vec_t iy, vec_t iz, vec_t iw ) +{ + x = ix; y = iy; z = iz; w = iw; + Assert( IsValid() ); +} + +inline void Vector4D::Random( vec_t minVal, vec_t maxVal ) +{ + x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); +} + +inline void Vector4DClear( Vector4D& a ) +{ + a.x = a.y = a.z = a.w = 0.0f; +} + +//----------------------------------------------------------------------------- +// assignment +//----------------------------------------------------------------------------- + +inline Vector4D& Vector4D::operator=(const Vector4D &vOther) +{ + Assert( vOther.IsValid() ); + x=vOther.x; y=vOther.y; z=vOther.z; w=vOther.w; + return *this; +} + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- + +inline vec_t& Vector4D::operator[](int i) +{ + Assert( (i >= 0) && (i < 4) ); + return ((vec_t*)this)[i]; +} + +inline vec_t Vector4D::operator[](int i) const +{ + Assert( (i >= 0) && (i < 4) ); + return ((vec_t*)this)[i]; +} + +//----------------------------------------------------------------------------- +// Cast to Vector and Vector2D... +//----------------------------------------------------------------------------- + +inline Vector& Vector4D::AsVector3D() +{ + return *(Vector*)this; +} + +inline Vector const& Vector4D::AsVector3D() const +{ + return *(Vector const*)this; +} + +inline Vector2D& Vector4D::AsVector2D() +{ + return *(Vector2D*)this; +} + +inline Vector2D const& Vector4D::AsVector2D() const +{ + return *(Vector2D const*)this; +} + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- + +inline vec_t* Vector4D::Base() +{ + return (vec_t*)this; +} + +inline vec_t const* Vector4D::Base() const +{ + return (vec_t const*)this; +} + +//----------------------------------------------------------------------------- +// IsValid? +//----------------------------------------------------------------------------- + +inline bool Vector4D::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w); +} + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool Vector4D::operator==( Vector4D const& src ) const +{ + Assert( src.IsValid() && IsValid() ); + return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); +} + +inline bool Vector4D::operator!=( Vector4D const& src ) const +{ + Assert( src.IsValid() && IsValid() ); + return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); +} + + +//----------------------------------------------------------------------------- +// Copy +//----------------------------------------------------------------------------- + +inline void Vector4DCopy( Vector4D const& src, Vector4D& dst ) +{ + Assert( src.IsValid() ); + dst.x = src.x; + dst.y = src.y; + dst.z = src.z; + dst.w = src.w; +} + +inline void Vector4D::CopyToArray(float* rgfl) const +{ + Assert( IsValid() ); + Assert( rgfl ); + rgfl[0] = x; rgfl[1] = y; rgfl[2] = z; rgfl[3] = w; +} + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- + +inline void Vector4D::Negate() +{ + Assert( IsValid() ); + x = -x; y = -y; z = -z; w = -w; +} + +inline Vector4D& Vector4D::operator+=(const Vector4D& v) +{ + Assert( IsValid() && v.IsValid() ); + x+=v.x; y+=v.y; z += v.z; w += v.w; + return *this; +} + +inline Vector4D& Vector4D::operator-=(const Vector4D& v) +{ + Assert( IsValid() && v.IsValid() ); + x-=v.x; y-=v.y; z -= v.z; w -= v.w; + return *this; +} + +inline Vector4D& Vector4D::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + w *= fl; + Assert( IsValid() ); + return *this; +} + +inline Vector4D& Vector4D::operator*=(Vector4D const& v) +{ + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + Assert( IsValid() ); + return *this; +} + +inline Vector4D& Vector4D::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + w *= oofl; + Assert( IsValid() ); + return *this; +} + +inline Vector4D& Vector4D::operator/=(Vector4D const& v) +{ + Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f && v.w != 0.0f ); + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + Assert( IsValid() ); + return *this; +} + +inline void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x + b.x; + c.y = a.y + b.y; + c.z = a.z + b.z; + c.w = a.w + b.w; +} + +inline void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x - b.x; + c.y = a.y - b.y; + c.z = a.z - b.z; + c.w = a.w - b.w; +} + +inline void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& c ) +{ + Assert( a.IsValid() && IsFinite(b) ); + c.x = a.x * b; + c.y = a.y * b; + c.z = a.z * b; + c.w = a.w * b; +} + +inline void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x * b.x; + c.y = a.y * b.y; + c.z = a.z * b.z; + c.w = a.w * b.w; +} + +inline void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& c ) +{ + Assert( a.IsValid() ); + Assert( b != 0.0f ); + vec_t oob = 1.0f / b; + c.x = a.x * oob; + c.y = a.y * oob; + c.z = a.z * oob; + c.w = a.w * oob; +} + +inline void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& c ) +{ + Assert( a.IsValid() ); + Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) && (b.w != 0.0f) ); + c.x = a.x / b.x; + c.y = a.y / b.y; + c.z = a.z / b.z; + c.w = a.w / b.w; +} + +inline void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result ) +{ + Assert( start.IsValid() && IsFinite(s) && dir.IsValid() ); + result.x = start.x + s*dir.x; + result.y = start.y + s*dir.y; + result.z = start.z + s*dir.z; + result.w = start.w + s*dir.w; +} + +// FIXME: Remove +// For backwards compatability +inline void Vector4D::MulAdd(Vector4D const& a, Vector4D const& b, float scalar) +{ + x = a.x + b.x * scalar; + y = a.y + b.y * scalar; + z = a.z + b.z * scalar; + w = a.w + b.w * scalar; +} + +inline void Vector4DLerp(const Vector4D& src1, const Vector4D& src2, vec_t t, Vector4D& dest ) +{ + dest[0] = src1[0] + (src2[0] - src1[0]) * t; + dest[1] = src1[1] + (src2[1] - src1[1]) * t; + dest[2] = src1[2] + (src2[2] - src1[2]) * t; + dest[3] = src1[3] + (src2[3] - src1[3]) * t; +} + +//----------------------------------------------------------------------------- +// dot, cross +//----------------------------------------------------------------------------- + +inline vec_t DotProduct4D(const Vector4D& a, const Vector4D& b) +{ + Assert( a.IsValid() && b.IsValid() ); + return( a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w ); +} + +// for backwards compatability +inline vec_t Vector4D::Dot( Vector4D const& vOther ) const +{ + return DotProduct4D( *this, vOther ); +} + + +//----------------------------------------------------------------------------- +// length +//----------------------------------------------------------------------------- + +inline vec_t Vector4DLength( Vector4D const& v ) +{ + Assert( v.IsValid() ); + return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w); +} + +inline vec_t Vector4D::LengthSqr(void) const +{ + Assert( IsValid() ); + return (x*x + y*y + z*z + w*w); +} + +inline vec_t Vector4D::Length(void) const +{ + return Vector4DLength( *this ); +} + + +//----------------------------------------------------------------------------- +// Normalization +//----------------------------------------------------------------------------- + +// FIXME: Can't use until we're un-macroed in mathlib.h +inline vec_t Vector4DNormalize( Vector4D& v ) +{ + Assert( v.IsValid() ); + vec_t l = v.Length(); + if (l != 0.0f) + { + v /= l; + } + else + { + v.x = v.y = v.z = v.w = 0.0f; + } + return l; +} + +//----------------------------------------------------------------------------- +// Get the distance from this Vector4D to the other one +//----------------------------------------------------------------------------- + +inline vec_t Vector4D::DistTo(const Vector4D &vOther) const +{ + Vector4D delta; + Vector4DSubtract( *this, vOther, delta ); + return delta.Length(); +} + +inline vec_t Vector4D::DistToSqr(const Vector4D &vOther) const +{ + Vector4D delta; + Vector4DSubtract( *this, vOther, delta ); + return delta.LengthSqr(); +} + + +//----------------------------------------------------------------------------- +// Vector4DAligned routines +//----------------------------------------------------------------------------- + +inline Vector4DAligned::Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W ) +{ + x = X; y = Y; z = Z; w = W; + Assert( IsValid() ); +} + +inline void Vector4DAligned::Set( vec_t X, vec_t Y, vec_t Z, vec_t W ) +{ + x = X; y = Y; z = Z; w = W; + Assert( IsValid() ); +} + +inline void Vector4DAligned::InitZero( void ) +{ +#if !defined( _X360 ) + this->AsM128() = _mm_set1_ps( 0.0f ); +#else + this->AsM128() = __vspltisw( 0 ); +#endif + Assert( IsValid() ); +} + +inline void Vector4DMultiplyAligned( Vector4DAligned const& a, Vector4DAligned const& b, Vector4DAligned& c ) +{ + Assert( a.IsValid() && b.IsValid() ); +#if !defined( _X360 ) + c.x = a.x * b.x; + c.y = a.y * b.y; + c.z = a.z * b.z; + c.w = a.w * b.w; +#else + c.AsM128() = __vmulfp( a.AsM128(), b.AsM128() ); +#endif +} + +inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB ) +{ + Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) ); + +#if !defined( _X360 ) + vOutA.x += vInA.x * w; + vOutA.y += vInA.y * w; + vOutA.z += vInA.z * w; + vOutA.w += vInA.w * w; + + vOutB.x += vInB.x * w; + vOutB.y += vInB.y * w; + vOutB.z += vInB.z * w; + vOutB.w += vInB.w * w; +#else + __vector4 temp; + + temp = __lvlx( &w, 0 ); + temp = __vspltw( temp, 0 ); + + vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() ); + vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() ); +#endif +} + +inline void Vector4DWeightMADSSE( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB ) +{ + Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) ); + +#if !defined( _X360 ) + // Replicate scalar float out to 4 components + __m128 packed = _mm_set1_ps( w ); + + // 4D SSE Vector MAD + vOutA.AsM128() = _mm_add_ps( vOutA.AsM128(), _mm_mul_ps( vInA.AsM128(), packed ) ); + vOutB.AsM128() = _mm_add_ps( vOutB.AsM128(), _mm_mul_ps( vInB.AsM128(), packed ) ); +#else + __vector4 temp; + + temp = __lvlx( &w, 0 ); + temp = __vspltw( temp, 0 ); + + vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() ); + vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() ); +#endif +} + +#endif // VECTOR4D_H + diff --git a/mp/src/public/mathlib/vmatrix.h b/mp/src/public/mathlib/vmatrix.h index e09a964f..2c536672 100644 --- a/mp/src/public/mathlib/vmatrix.h +++ b/mp/src/public/mathlib/vmatrix.h @@ -1,950 +1,950 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $NoKeywords: $
-//
-//=============================================================================//
-//
-// VMatrix always postmultiply vectors as in Ax = b.
-// Given a set of basis vectors ((F)orward, (L)eft, (U)p), and a (T)ranslation,
-// a matrix to transform a vector into that space looks like this:
-// Fx Lx Ux Tx
-// Fy Ly Uy Ty
-// Fz Lz Uz Tz
-// 0 0 0 1
-
-// Note that concatenating matrices needs to multiply them in reverse order.
-// ie: if I want to apply matrix A, B, then C, the equation needs to look like this:
-// C * B * A * v
-// ie:
-// v = A * v;
-// v = B * v;
-// v = C * v;
-//=============================================================================
-
-#ifndef VMATRIX_H
-#define VMATRIX_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include <string.h>
-#include "mathlib/vector.h"
-#include "mathlib/vplane.h"
-#include "mathlib/vector4d.h"
-#include "mathlib/mathlib.h"
-
-struct cplane_t;
-
-
-class VMatrix
-{
-public:
-
- VMatrix();
- VMatrix(
- vec_t m00, vec_t m01, vec_t m02, vec_t m03,
- vec_t m10, vec_t m11, vec_t m12, vec_t m13,
- vec_t m20, vec_t m21, vec_t m22, vec_t m23,
- vec_t m30, vec_t m31, vec_t m32, vec_t m33
- );
-
- // Creates a matrix where the X axis = forward
- // the Y axis = left, and the Z axis = up
- VMatrix( const Vector& forward, const Vector& left, const Vector& up );
- VMatrix( const Vector& forward, const Vector& left, const Vector& up, const Vector& translation );
-
- // Construct from a 3x4 matrix
- VMatrix( const matrix3x4_t& matrix3x4 );
-
- // Set the values in the matrix.
- void Init(
- vec_t m00, vec_t m01, vec_t m02, vec_t m03,
- vec_t m10, vec_t m11, vec_t m12, vec_t m13,
- vec_t m20, vec_t m21, vec_t m22, vec_t m23,
- vec_t m30, vec_t m31, vec_t m32, vec_t m33
- );
-
-
- // Initialize from a 3x4
- void Init( const matrix3x4_t& matrix3x4 );
-
- // array access
- inline float* operator[](int i)
- {
- return m[i];
- }
-
- inline const float* operator[](int i) const
- {
- return m[i];
- }
-
- // Get a pointer to m[0][0]
- inline float *Base()
- {
- return &m[0][0];
- }
-
- inline const float *Base() const
- {
- return &m[0][0];
- }
-
- void SetLeft(const Vector &vLeft);
- void SetUp(const Vector &vUp);
- void SetForward(const Vector &vForward);
-
- void GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const;
- void SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp);
-
- // Get/set the translation.
- Vector & GetTranslation( Vector &vTrans ) const;
- void SetTranslation(const Vector &vTrans);
-
- void PreTranslate(const Vector &vTrans);
- void PostTranslate(const Vector &vTrans);
-
- matrix3x4_t& As3x4();
- const matrix3x4_t& As3x4() const;
- void CopyFrom3x4( const matrix3x4_t &m3x4 );
- void Set3x4( matrix3x4_t& matrix3x4 ) const;
-
- bool operator==( const VMatrix& src ) const;
- bool operator!=( const VMatrix& src ) const { return !( *this == src ); }
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Access the basis vectors.
- Vector GetLeft() const;
- Vector GetUp() const;
- Vector GetForward() const;
- Vector GetTranslation() const;
-#endif
-
-
-// Matrix->vector operations.
-public:
- // Multiply by a 3D vector (same as operator*).
- void V3Mul(const Vector &vIn, Vector &vOut) const;
-
- // Multiply by a 4D vector.
- void V4Mul(const Vector4D &vIn, Vector4D &vOut) const;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Applies the rotation (ignores translation in the matrix). (This just calls VMul3x3).
- Vector ApplyRotation(const Vector &vVec) const;
-
- // Multiply by a vector (divides by w, assumes input w is 1).
- Vector operator*(const Vector &vVec) const;
-
- // Multiply by the upper 3x3 part of the matrix (ie: only apply rotation).
- Vector VMul3x3(const Vector &vVec) const;
-
- // Apply the inverse (transposed) rotation (only works on pure rotation matrix)
- Vector VMul3x3Transpose(const Vector &vVec) const;
-
- // Multiply by the upper 3 rows.
- Vector VMul4x3(const Vector &vVec) const;
-
- // Apply the inverse (transposed) transformation (only works on pure rotation/translation)
- Vector VMul4x3Transpose(const Vector &vVec) const;
-#endif
-
-
-// Matrix->plane operations.
-public:
- // Transform the plane. The matrix can only contain translation and rotation.
- void TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Just calls TransformPlane and returns the result.
- VPlane operator*(const VPlane &thePlane) const;
-#endif
-
-// Matrix->matrix operations.
-public:
-
- VMatrix& operator=(const VMatrix &mOther);
-
- // Multiply two matrices (out = this * vm).
- void MatrixMul( const VMatrix &vm, VMatrix &out ) const;
-
- // Add two matrices.
- const VMatrix& operator+=(const VMatrix &other);
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Just calls MatrixMul and returns the result.
- VMatrix operator*(const VMatrix &mOther) const;
-
- // Add/Subtract two matrices.
- VMatrix operator+(const VMatrix &other) const;
- VMatrix operator-(const VMatrix &other) const;
-
- // Negation.
- VMatrix operator-() const;
-
- // Return inverse matrix. Be careful because the results are undefined
- // if the matrix doesn't have an inverse (ie: InverseGeneral returns false).
- VMatrix operator~() const;
-#endif
-
-// Matrix operations.
-public:
- // Set to identity.
- void Identity();
-
- bool IsIdentity() const;
-
- // Setup a matrix for origin and angles.
- void SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles );
-
- // General inverse. This may fail so check the return!
- bool InverseGeneral(VMatrix &vInverse) const;
-
- // Does a fast inverse, assuming the matrix only contains translation and rotation.
- void InverseTR( VMatrix &mRet ) const;
-
- // Usually used for debug checks. Returns true if the upper 3x3 contains
- // unit vectors and they are all orthogonal.
- bool IsRotationMatrix() const;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // This calls the other InverseTR and returns the result.
- VMatrix InverseTR() const;
-
- // Get the scale of the matrix's basis vectors.
- Vector GetScale() const;
-
- // (Fast) multiply by a scaling matrix setup from vScale.
- VMatrix Scale(const Vector &vScale);
-
- // Normalize the basis vectors.
- VMatrix NormalizeBasisVectors() const;
-
- // Transpose.
- VMatrix Transpose() const;
-
- // Transpose upper-left 3x3.
- VMatrix Transpose3x3() const;
-#endif
-
-public:
- // The matrix.
- vec_t m[4][4];
-};
-
-
-
-//-----------------------------------------------------------------------------
-// Helper functions.
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-// Setup an identity matrix.
-VMatrix SetupMatrixIdentity();
-
-// Setup as a scaling matrix.
-VMatrix SetupMatrixScale(const Vector &vScale);
-
-// Setup a translation matrix.
-VMatrix SetupMatrixTranslation(const Vector &vTranslation);
-
-// Setup a matrix to reflect around the plane.
-VMatrix SetupMatrixReflection(const VPlane &thePlane);
-
-// Setup a matrix to project from vOrigin onto thePlane.
-VMatrix SetupMatrixProjection(const Vector &vOrigin, const VPlane &thePlane);
-
-// Setup a matrix to rotate the specified amount around the specified axis.
-VMatrix SetupMatrixAxisRot(const Vector &vAxis, vec_t fDegrees);
-
-// Setup a matrix from euler angles. Just sets identity and calls MatrixAngles.
-VMatrix SetupMatrixAngles(const QAngle &vAngles);
-
-// Setup a matrix for origin and angles.
-VMatrix SetupMatrixOrgAngles(const Vector &origin, const QAngle &vAngles);
-
-#endif
-
-#define VMatToString(mat) (static_cast<const char *>(CFmtStr("[ (%f, %f, %f), (%f, %f, %f), (%f, %f, %f), (%f, %f, %f) ]", mat.m[0][0], mat.m[0][1], mat.m[0][2], mat.m[0][3], mat.m[1][0], mat.m[1][1], mat.m[1][2], mat.m[1][3], mat.m[2][0], mat.m[2][1], mat.m[2][2], mat.m[2][3], mat.m[3][0], mat.m[3][1], mat.m[3][2], mat.m[3][3] ))) // ** Note: this generates a temporary, don't hold reference!
-
-//-----------------------------------------------------------------------------
-// Returns the point at the intersection on the 3 planes.
-// Returns false if it can't be solved (2 or more planes are parallel).
-//-----------------------------------------------------------------------------
-bool PlaneIntersection( const VPlane &vp1, const VPlane &vp2, const VPlane &vp3, Vector &vOut );
-
-
-//-----------------------------------------------------------------------------
-// These methods are faster. Use them if you want faster code
-//-----------------------------------------------------------------------------
-void MatrixSetIdentity( VMatrix &dst );
-void MatrixTranspose( const VMatrix& src, VMatrix& dst );
-void MatrixCopy( const VMatrix& src, VMatrix& dst );
-void MatrixMultiply( const VMatrix& src1, const VMatrix& src2, VMatrix& dst );
-
-// Accessors
-void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn );
-void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column );
-void MatrixGetRow( const VMatrix &src, int nCol, Vector *pColumn );
-void MatrixSetRow( VMatrix &src, int nCol, const Vector &column );
-
-// Vector3DMultiply treats src2 as if it's a direction vector
-void Vector3DMultiply( const VMatrix& src1, const Vector& src2, Vector& dst );
-
-// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation)
-inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst );
-
-// Vector3DMultiplyPositionProjective treats src2 as if it's a point
-// and does the perspective divide at the end
-void Vector3DMultiplyPositionProjective( const VMatrix& src1, const Vector &src2, Vector& dst );
-
-// Vector3DMultiplyPosition treats src2 as if it's a direction
-// and does the perspective divide at the end
-// NOTE: src1 had better be an inverse transpose to use this correctly
-void Vector3DMultiplyProjective( const VMatrix& src1, const Vector &src2, Vector& dst );
-
-void Vector4DMultiply( const VMatrix& src1, const Vector4D& src2, Vector4D& dst );
-
-// Same as Vector4DMultiply except that src2 has an implicit W of 1
-void Vector4DMultiplyPosition( const VMatrix& src1, const Vector &src2, Vector4D& dst );
-
-// Multiplies the vector by the transpose of the matrix
-void Vector3DMultiplyTranspose( const VMatrix& src1, const Vector& src2, Vector& dst );
-void Vector4DMultiplyTranspose( const VMatrix& src1, const Vector4D& src2, Vector4D& dst );
-
-// Transform a plane
-void MatrixTransformPlane( const VMatrix &src, const cplane_t &inPlane, cplane_t &outPlane );
-
-// Transform a plane that has an axis-aligned normal
-void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane );
-
-void MatrixBuildTranslation( VMatrix& dst, float x, float y, float z );
-void MatrixBuildTranslation( VMatrix& dst, const Vector &translation );
-
-inline void MatrixTranslate( VMatrix& dst, const Vector &translation )
-{
- VMatrix matTranslation, temp;
- MatrixBuildTranslation( matTranslation, translation );
- MatrixMultiply( dst, matTranslation, temp );
- dst = temp;
-}
-
-
-void MatrixBuildRotationAboutAxis( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees );
-void MatrixBuildRotateZ( VMatrix& dst, float angleDegrees );
-
-inline void MatrixRotate( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees )
-{
- VMatrix rotation, temp;
- MatrixBuildRotationAboutAxis( rotation, vAxisOfRot, angleDegrees );
- MatrixMultiply( dst, rotation, temp );
- dst = temp;
-}
-
-// Builds a rotation matrix that rotates one direction vector into another
-void MatrixBuildRotation( VMatrix &dst, const Vector& initialDirection, const Vector& finalDirection );
-
-// Builds a scale matrix
-void MatrixBuildScale( VMatrix &dst, float x, float y, float z );
-void MatrixBuildScale( VMatrix &dst, const Vector& scale );
-
-// Build a perspective matrix.
-// zNear and zFar are assumed to be positive.
-// You end up looking down positive Z, X is to the right, Y is up.
-// X range: [0..1]
-// Y range: [0..1]
-// Z range: [0..1]
-void MatrixBuildPerspective( VMatrix &dst, float fovX, float fovY, float zNear, float zFar );
-
-//-----------------------------------------------------------------------------
-// Given a projection matrix, take the extremes of the space in transformed into world space and
-// get a bounding box.
-//-----------------------------------------------------------------------------
-void CalculateAABBFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pMins, Vector *pMaxs );
-
-//-----------------------------------------------------------------------------
-// Given a projection matrix, take the extremes of the space in transformed into world space and
-// get a bounding sphere.
-//-----------------------------------------------------------------------------
-void CalculateSphereFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pCenter, float *pflRadius );
-
-//-----------------------------------------------------------------------------
-// Given an inverse projection matrix, take the extremes of the space in transformed into world space and
-// get a bounding box.
-//-----------------------------------------------------------------------------
-void CalculateAABBFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pMins, Vector *pMaxs );
-
-//-----------------------------------------------------------------------------
-// Given an inverse projection matrix, take the extremes of the space in transformed into world space and
-// get a bounding sphere.
-//-----------------------------------------------------------------------------
-void CalculateSphereFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pCenter, float *pflRadius );
-
-//-----------------------------------------------------------------------------
-// Calculate frustum planes given a clip->world space transform.
-//-----------------------------------------------------------------------------
-void FrustumPlanesFromMatrix( const VMatrix &clipToWorld, Frustum_t &frustum );
-
-//-----------------------------------------------------------------------------
-// Setup a matrix from euler angles.
-//-----------------------------------------------------------------------------
-void MatrixFromAngles( const QAngle& vAngles, VMatrix& dst );
-
-//-----------------------------------------------------------------------------
-// Creates euler angles from a matrix
-//-----------------------------------------------------------------------------
-void MatrixToAngles( const VMatrix& src, QAngle& vAngles );
-
-//-----------------------------------------------------------------------------
-// Does a fast inverse, assuming the matrix only contains translation and rotation.
-//-----------------------------------------------------------------------------
-void MatrixInverseTR( const VMatrix& src, VMatrix &dst );
-
-//-----------------------------------------------------------------------------
-// Inverts any matrix at all
-//-----------------------------------------------------------------------------
-bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst);
-
-//-----------------------------------------------------------------------------
-// Computes the inverse transpose
-//-----------------------------------------------------------------------------
-void MatrixInverseTranspose( const VMatrix& src, VMatrix& dst );
-
-
-
-//-----------------------------------------------------------------------------
-// VMatrix inlines.
-//-----------------------------------------------------------------------------
-inline VMatrix::VMatrix()
-{
-}
-
-inline VMatrix::VMatrix(
- vec_t m00, vec_t m01, vec_t m02, vec_t m03,
- vec_t m10, vec_t m11, vec_t m12, vec_t m13,
- vec_t m20, vec_t m21, vec_t m22, vec_t m23,
- vec_t m30, vec_t m31, vec_t m32, vec_t m33)
-{
- Init(
- m00, m01, m02, m03,
- m10, m11, m12, m13,
- m20, m21, m22, m23,
- m30, m31, m32, m33
- );
-}
-
-
-inline VMatrix::VMatrix( const matrix3x4_t& matrix3x4 )
-{
- Init( matrix3x4 );
-}
-
-
-//-----------------------------------------------------------------------------
-// Creates a matrix where the X axis = forward
-// the Y axis = left, and the Z axis = up
-//-----------------------------------------------------------------------------
-inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis )
-{
- Init(
- xAxis.x, yAxis.x, zAxis.x, 0.0f,
- xAxis.y, yAxis.y, zAxis.y, 0.0f,
- xAxis.z, yAxis.z, zAxis.z, 0.0f,
- 0.0f, 0.0f, 0.0f, 1.0f
- );
-}
-
-inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation )
-{
- Init(
- xAxis.x, yAxis.x, zAxis.x, translation.x,
- xAxis.y, yAxis.y, zAxis.y, translation.y,
- xAxis.z, yAxis.z, zAxis.z, translation.z,
- 0.0f, 0.0f, 0.0f, 1.0f
- );
-}
-
-
-inline void VMatrix::Init(
- vec_t m00, vec_t m01, vec_t m02, vec_t m03,
- vec_t m10, vec_t m11, vec_t m12, vec_t m13,
- vec_t m20, vec_t m21, vec_t m22, vec_t m23,
- vec_t m30, vec_t m31, vec_t m32, vec_t m33
- )
-{
- m[0][0] = m00;
- m[0][1] = m01;
- m[0][2] = m02;
- m[0][3] = m03;
-
- m[1][0] = m10;
- m[1][1] = m11;
- m[1][2] = m12;
- m[1][3] = m13;
-
- m[2][0] = m20;
- m[2][1] = m21;
- m[2][2] = m22;
- m[2][3] = m23;
-
- m[3][0] = m30;
- m[3][1] = m31;
- m[3][2] = m32;
- m[3][3] = m33;
-}
-
-
-//-----------------------------------------------------------------------------
-// Initialize from a 3x4
-//-----------------------------------------------------------------------------
-inline void VMatrix::Init( const matrix3x4_t& matrix3x4 )
-{
- memcpy(m, matrix3x4.Base(), sizeof( matrix3x4_t ) );
-
- m[3][0] = 0.0f;
- m[3][1] = 0.0f;
- m[3][2] = 0.0f;
- m[3][3] = 1.0f;
-}
-
-
-//-----------------------------------------------------------------------------
-// Methods related to the basis vectors of the matrix
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline Vector VMatrix::GetForward() const
-{
- return Vector(m[0][0], m[1][0], m[2][0]);
-}
-
-inline Vector VMatrix::GetLeft() const
-{
- return Vector(m[0][1], m[1][1], m[2][1]);
-}
-
-inline Vector VMatrix::GetUp() const
-{
- return Vector(m[0][2], m[1][2], m[2][2]);
-}
-
-#endif
-
-inline void VMatrix::SetForward(const Vector &vForward)
-{
- m[0][0] = vForward.x;
- m[1][0] = vForward.y;
- m[2][0] = vForward.z;
-}
-
-inline void VMatrix::SetLeft(const Vector &vLeft)
-{
- m[0][1] = vLeft.x;
- m[1][1] = vLeft.y;
- m[2][1] = vLeft.z;
-}
-
-inline void VMatrix::SetUp(const Vector &vUp)
-{
- m[0][2] = vUp.x;
- m[1][2] = vUp.y;
- m[2][2] = vUp.z;
-}
-
-inline void VMatrix::GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const
-{
- vForward.Init( m[0][0], m[1][0], m[2][0] );
- vLeft.Init( m[0][1], m[1][1], m[2][1] );
- vUp.Init( m[0][2], m[1][2], m[2][2] );
-}
-
-inline void VMatrix::SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp)
-{
- SetForward(vForward);
- SetLeft(vLeft);
- SetUp(vUp);
-}
-
-
-//-----------------------------------------------------------------------------
-// Methods related to the translation component of the matrix
-//-----------------------------------------------------------------------------
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline Vector VMatrix::GetTranslation() const
-{
- return Vector(m[0][3], m[1][3], m[2][3]);
-}
-
-#endif
-
-inline Vector& VMatrix::GetTranslation( Vector &vTrans ) const
-{
- vTrans.x = m[0][3];
- vTrans.y = m[1][3];
- vTrans.z = m[2][3];
- return vTrans;
-}
-
-inline void VMatrix::SetTranslation(const Vector &vTrans)
-{
- m[0][3] = vTrans.x;
- m[1][3] = vTrans.y;
- m[2][3] = vTrans.z;
-}
-
-
-//-----------------------------------------------------------------------------
-// appply translation to this matrix in the input space
-//-----------------------------------------------------------------------------
-inline void VMatrix::PreTranslate(const Vector &vTrans)
-{
- Vector tmp;
- Vector3DMultiplyPosition( *this, vTrans, tmp );
- m[0][3] = tmp.x;
- m[1][3] = tmp.y;
- m[2][3] = tmp.z;
-}
-
-
-//-----------------------------------------------------------------------------
-// appply translation to this matrix in the output space
-//-----------------------------------------------------------------------------
-inline void VMatrix::PostTranslate(const Vector &vTrans)
-{
- m[0][3] += vTrans.x;
- m[1][3] += vTrans.y;
- m[2][3] += vTrans.z;
-}
-
-inline const matrix3x4_t& VMatrix::As3x4() const
-{
- return *((const matrix3x4_t*)this);
-}
-
-inline matrix3x4_t& VMatrix::As3x4()
-{
- return *((matrix3x4_t*)this);
-}
-
-inline void VMatrix::CopyFrom3x4( const matrix3x4_t &m3x4 )
-{
- memcpy( m, m3x4.Base(), sizeof( matrix3x4_t ) );
- m[3][0] = m[3][1] = m[3][2] = 0;
- m[3][3] = 1;
-}
-
-inline void VMatrix::Set3x4( matrix3x4_t& matrix3x4 ) const
-{
- memcpy(matrix3x4.Base(), m, sizeof( matrix3x4_t ) );
-}
-
-
-//-----------------------------------------------------------------------------
-// Matrix math operations
-//-----------------------------------------------------------------------------
-inline const VMatrix& VMatrix::operator+=(const VMatrix &other)
-{
- for(int i=0; i < 4; i++)
- {
- for(int j=0; j < 4; j++)
- {
- m[i][j] += other.m[i][j];
- }
- }
-
- return *this;
-}
-
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline VMatrix VMatrix::operator+(const VMatrix &other) const
-{
- VMatrix ret;
- for(int i=0; i < 16; i++)
- {
- ((float*)ret.m)[i] = ((float*)m)[i] + ((float*)other.m)[i];
- }
- return ret;
-}
-
-inline VMatrix VMatrix::operator-(const VMatrix &other) const
-{
- VMatrix ret;
-
- for(int i=0; i < 4; i++)
- {
- for(int j=0; j < 4; j++)
- {
- ret.m[i][j] = m[i][j] - other.m[i][j];
- }
- }
-
- return ret;
-}
-
-inline VMatrix VMatrix::operator-() const
-{
- VMatrix ret;
- for( int i=0; i < 16; i++ )
- {
- ((float*)ret.m)[i] = ((float*)m)[i];
- }
- return ret;
-}
-
-#endif // VECTOR_NO_SLOW_OPERATIONS
-
-
-//-----------------------------------------------------------------------------
-// Vector transformation
-//-----------------------------------------------------------------------------
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline Vector VMatrix::operator*(const Vector &vVec) const
-{
- Vector vRet;
- vRet.x = m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z + m[0][3];
- vRet.y = m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z + m[1][3];
- vRet.z = m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z + m[2][3];
-
- return vRet;
-}
-
-inline Vector VMatrix::VMul4x3(const Vector &vVec) const
-{
- Vector vResult;
- Vector3DMultiplyPosition( *this, vVec, vResult );
- return vResult;
-}
-
-
-inline Vector VMatrix::VMul4x3Transpose(const Vector &vVec) const
-{
- Vector tmp = vVec;
- tmp.x -= m[0][3];
- tmp.y -= m[1][3];
- tmp.z -= m[2][3];
-
- return Vector(
- m[0][0]*tmp.x + m[1][0]*tmp.y + m[2][0]*tmp.z,
- m[0][1]*tmp.x + m[1][1]*tmp.y + m[2][1]*tmp.z,
- m[0][2]*tmp.x + m[1][2]*tmp.y + m[2][2]*tmp.z
- );
-}
-
-inline Vector VMatrix::VMul3x3(const Vector &vVec) const
-{
- return Vector(
- m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z,
- m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z,
- m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z
- );
-}
-
-inline Vector VMatrix::VMul3x3Transpose(const Vector &vVec) const
-{
- return Vector(
- m[0][0]*vVec.x + m[1][0]*vVec.y + m[2][0]*vVec.z,
- m[0][1]*vVec.x + m[1][1]*vVec.y + m[2][1]*vVec.z,
- m[0][2]*vVec.x + m[1][2]*vVec.y + m[2][2]*vVec.z
- );
-}
-
-#endif // VECTOR_NO_SLOW_OPERATIONS
-
-
-inline void VMatrix::V3Mul(const Vector &vIn, Vector &vOut) const
-{
- vec_t rw;
-
- rw = 1.0f / (m[3][0]*vIn.x + m[3][1]*vIn.y + m[3][2]*vIn.z + m[3][3]);
- vOut.x = (m[0][0]*vIn.x + m[0][1]*vIn.y + m[0][2]*vIn.z + m[0][3]) * rw;
- vOut.y = (m[1][0]*vIn.x + m[1][1]*vIn.y + m[1][2]*vIn.z + m[1][3]) * rw;
- vOut.z = (m[2][0]*vIn.x + m[2][1]*vIn.y + m[2][2]*vIn.z + m[2][3]) * rw;
-}
-
-inline void VMatrix::V4Mul(const Vector4D &vIn, Vector4D &vOut) const
-{
- vOut[0] = m[0][0]*vIn[0] + m[0][1]*vIn[1] + m[0][2]*vIn[2] + m[0][3]*vIn[3];
- vOut[1] = m[1][0]*vIn[0] + m[1][1]*vIn[1] + m[1][2]*vIn[2] + m[1][3]*vIn[3];
- vOut[2] = m[2][0]*vIn[0] + m[2][1]*vIn[1] + m[2][2]*vIn[2] + m[2][3]*vIn[3];
- vOut[3] = m[3][0]*vIn[0] + m[3][1]*vIn[1] + m[3][2]*vIn[2] + m[3][3]*vIn[3];
-}
-
-
-//-----------------------------------------------------------------------------
-// Plane transformation
-//-----------------------------------------------------------------------------
-inline void VMatrix::TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const
-{
- Vector vTrans;
- Vector3DMultiply( *this, inPlane.m_Normal, outPlane.m_Normal );
- outPlane.m_Dist = inPlane.m_Dist * DotProduct( outPlane.m_Normal, outPlane.m_Normal );
- outPlane.m_Dist += DotProduct( outPlane.m_Normal, GetTranslation( vTrans ) );
-}
-
-
-//-----------------------------------------------------------------------------
-// Other random stuff
-//-----------------------------------------------------------------------------
-inline void VMatrix::Identity()
-{
- MatrixSetIdentity( *this );
-}
-
-
-inline bool VMatrix::IsIdentity() const
-{
- return
- m[0][0] == 1.0f && m[0][1] == 0.0f && m[0][2] == 0.0f && m[0][3] == 0.0f &&
- m[1][0] == 0.0f && m[1][1] == 1.0f && m[1][2] == 0.0f && m[1][3] == 0.0f &&
- m[2][0] == 0.0f && m[2][1] == 0.0f && m[2][2] == 1.0f && m[2][3] == 0.0f &&
- m[3][0] == 0.0f && m[3][1] == 0.0f && m[3][2] == 0.0f && m[3][3] == 1.0f;
-}
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline Vector VMatrix::ApplyRotation(const Vector &vVec) const
-{
- return VMul3x3(vVec);
-}
-
-inline VMatrix VMatrix::operator~() const
-{
- VMatrix mRet;
- InverseGeneral(mRet);
- return mRet;
-}
-
-#endif
-
-
-//-----------------------------------------------------------------------------
-// Accessors
-//-----------------------------------------------------------------------------
-inline void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn )
-{
- Assert( (nCol >= 0) && (nCol <= 3) );
-
- pColumn->x = src[0][nCol];
- pColumn->y = src[1][nCol];
- pColumn->z = src[2][nCol];
-}
-
-inline void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column )
-{
- Assert( (nCol >= 0) && (nCol <= 3) );
-
- src.m[0][nCol] = column.x;
- src.m[1][nCol] = column.y;
- src.m[2][nCol] = column.z;
-}
-
-inline void MatrixGetRow( const VMatrix &src, int nRow, Vector *pRow )
-{
- Assert( (nRow >= 0) && (nRow <= 3) );
- *pRow = *(Vector*)src[nRow];
-}
-
-inline void MatrixSetRow( VMatrix &dst, int nRow, const Vector &row )
-{
- Assert( (nRow >= 0) && (nRow <= 3) );
- *(Vector*)dst[nRow] = row;
-}
-
-
-//-----------------------------------------------------------------------------
-// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation)
-//-----------------------------------------------------------------------------
-// NJS: src2 is passed in as a full vector rather than a reference to prevent the need
-// for 2 branches and a potential copy in the body. (ie, handling the case when the src2
-// reference is the same as the dst reference ).
-inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst )
-{
- dst[0] = src1[0][0] * src2.x + src1[0][1] * src2.y + src1[0][2] * src2.z + src1[0][3];
- dst[1] = src1[1][0] * src2.x + src1[1][1] * src2.y + src1[1][2] * src2.z + src1[1][3];
- dst[2] = src1[2][0] * src2.x + src1[2][1] * src2.y + src1[2][2] * src2.z + src1[2][3];
-}
-
-
-//-----------------------------------------------------------------------------
-// Transform a plane that has an axis-aligned normal
-//-----------------------------------------------------------------------------
-inline void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane )
-{
- // See MatrixTransformPlane in the .cpp file for an explanation of the algorithm.
- MatrixGetColumn( src, nDim, &outPlane.normal );
- outPlane.normal *= flSign;
- outPlane.dist = flDist * DotProduct( outPlane.normal, outPlane.normal );
-
- // NOTE: Writing this out by hand because it doesn't inline (inline depth isn't large enough)
- // This should read outPlane.dist += DotProduct( outPlane.normal, src.GetTranslation );
- outPlane.dist += outPlane.normal.x * src.m[0][3] + outPlane.normal.y * src.m[1][3] + outPlane.normal.z * src.m[2][3];
-}
-
-
-//-----------------------------------------------------------------------------
-// Matrix equality test
-//-----------------------------------------------------------------------------
-inline bool MatricesAreEqual( const VMatrix &src1, const VMatrix &src2, float flTolerance )
-{
- for ( int i = 0; i < 3; ++i )
- {
- for ( int j = 0; j < 3; ++j )
- {
- if ( fabs( src1[i][j] - src2[i][j] ) > flTolerance )
- return false;
- }
- }
- return true;
-}
-
-//-----------------------------------------------------------------------------
-//
-//-----------------------------------------------------------------------------
-void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar );
-void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar );
-void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right );
-void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar );
-
-inline void MatrixOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar )
-{
- VMatrix mat;
- MatrixBuildOrtho( mat, left, top, right, bottom, zNear, zFar );
-
- VMatrix temp;
- MatrixMultiply( dst, mat, temp );
- dst = temp;
-}
-
-inline void MatrixPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar )
-{
- VMatrix mat;
- MatrixBuildPerspectiveX( mat, flFovX, flAspect, flZNear, flZFar );
-
- VMatrix temp;
- MatrixMultiply( dst, mat, temp );
- dst = temp;
-}
-
-inline void MatrixPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right )
-{
- VMatrix mat;
- MatrixBuildPerspectiveOffCenterX( mat, flFovX, flAspect, flZNear, flZFar, bottom, top, left, right );
-
- VMatrix temp;
- MatrixMultiply( dst, mat, temp );
- dst = temp;
-}
-
-#endif
-
-
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +// +// VMatrix always postmultiply vectors as in Ax = b. +// Given a set of basis vectors ((F)orward, (L)eft, (U)p), and a (T)ranslation, +// a matrix to transform a vector into that space looks like this: +// Fx Lx Ux Tx +// Fy Ly Uy Ty +// Fz Lz Uz Tz +// 0 0 0 1 + +// Note that concatenating matrices needs to multiply them in reverse order. +// ie: if I want to apply matrix A, B, then C, the equation needs to look like this: +// C * B * A * v +// ie: +// v = A * v; +// v = B * v; +// v = C * v; +//============================================================================= + +#ifndef VMATRIX_H +#define VMATRIX_H + +#ifdef _WIN32 +#pragma once +#endif + +#include <string.h> +#include "mathlib/vector.h" +#include "mathlib/vplane.h" +#include "mathlib/vector4d.h" +#include "mathlib/mathlib.h" + +struct cplane_t; + + +class VMatrix +{ +public: + + VMatrix(); + VMatrix( + vec_t m00, vec_t m01, vec_t m02, vec_t m03, + vec_t m10, vec_t m11, vec_t m12, vec_t m13, + vec_t m20, vec_t m21, vec_t m22, vec_t m23, + vec_t m30, vec_t m31, vec_t m32, vec_t m33 + ); + + // Creates a matrix where the X axis = forward + // the Y axis = left, and the Z axis = up + VMatrix( const Vector& forward, const Vector& left, const Vector& up ); + VMatrix( const Vector& forward, const Vector& left, const Vector& up, const Vector& translation ); + + // Construct from a 3x4 matrix + VMatrix( const matrix3x4_t& matrix3x4 ); + + // Set the values in the matrix. + void Init( + vec_t m00, vec_t m01, vec_t m02, vec_t m03, + vec_t m10, vec_t m11, vec_t m12, vec_t m13, + vec_t m20, vec_t m21, vec_t m22, vec_t m23, + vec_t m30, vec_t m31, vec_t m32, vec_t m33 + ); + + + // Initialize from a 3x4 + void Init( const matrix3x4_t& matrix3x4 ); + + // array access + inline float* operator[](int i) + { + return m[i]; + } + + inline const float* operator[](int i) const + { + return m[i]; + } + + // Get a pointer to m[0][0] + inline float *Base() + { + return &m[0][0]; + } + + inline const float *Base() const + { + return &m[0][0]; + } + + void SetLeft(const Vector &vLeft); + void SetUp(const Vector &vUp); + void SetForward(const Vector &vForward); + + void GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const; + void SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp); + + // Get/set the translation. + Vector & GetTranslation( Vector &vTrans ) const; + void SetTranslation(const Vector &vTrans); + + void PreTranslate(const Vector &vTrans); + void PostTranslate(const Vector &vTrans); + + matrix3x4_t& As3x4(); + const matrix3x4_t& As3x4() const; + void CopyFrom3x4( const matrix3x4_t &m3x4 ); + void Set3x4( matrix3x4_t& matrix3x4 ) const; + + bool operator==( const VMatrix& src ) const; + bool operator!=( const VMatrix& src ) const { return !( *this == src ); } + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Access the basis vectors. + Vector GetLeft() const; + Vector GetUp() const; + Vector GetForward() const; + Vector GetTranslation() const; +#endif + + +// Matrix->vector operations. +public: + // Multiply by a 3D vector (same as operator*). + void V3Mul(const Vector &vIn, Vector &vOut) const; + + // Multiply by a 4D vector. + void V4Mul(const Vector4D &vIn, Vector4D &vOut) const; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Applies the rotation (ignores translation in the matrix). (This just calls VMul3x3). + Vector ApplyRotation(const Vector &vVec) const; + + // Multiply by a vector (divides by w, assumes input w is 1). + Vector operator*(const Vector &vVec) const; + + // Multiply by the upper 3x3 part of the matrix (ie: only apply rotation). + Vector VMul3x3(const Vector &vVec) const; + + // Apply the inverse (transposed) rotation (only works on pure rotation matrix) + Vector VMul3x3Transpose(const Vector &vVec) const; + + // Multiply by the upper 3 rows. + Vector VMul4x3(const Vector &vVec) const; + + // Apply the inverse (transposed) transformation (only works on pure rotation/translation) + Vector VMul4x3Transpose(const Vector &vVec) const; +#endif + + +// Matrix->plane operations. +public: + // Transform the plane. The matrix can only contain translation and rotation. + void TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Just calls TransformPlane and returns the result. + VPlane operator*(const VPlane &thePlane) const; +#endif + +// Matrix->matrix operations. +public: + + VMatrix& operator=(const VMatrix &mOther); + + // Multiply two matrices (out = this * vm). + void MatrixMul( const VMatrix &vm, VMatrix &out ) const; + + // Add two matrices. + const VMatrix& operator+=(const VMatrix &other); + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Just calls MatrixMul and returns the result. + VMatrix operator*(const VMatrix &mOther) const; + + // Add/Subtract two matrices. + VMatrix operator+(const VMatrix &other) const; + VMatrix operator-(const VMatrix &other) const; + + // Negation. + VMatrix operator-() const; + + // Return inverse matrix. Be careful because the results are undefined + // if the matrix doesn't have an inverse (ie: InverseGeneral returns false). + VMatrix operator~() const; +#endif + +// Matrix operations. +public: + // Set to identity. + void Identity(); + + bool IsIdentity() const; + + // Setup a matrix for origin and angles. + void SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles ); + + // General inverse. This may fail so check the return! + bool InverseGeneral(VMatrix &vInverse) const; + + // Does a fast inverse, assuming the matrix only contains translation and rotation. + void InverseTR( VMatrix &mRet ) const; + + // Usually used for debug checks. Returns true if the upper 3x3 contains + // unit vectors and they are all orthogonal. + bool IsRotationMatrix() const; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // This calls the other InverseTR and returns the result. + VMatrix InverseTR() const; + + // Get the scale of the matrix's basis vectors. + Vector GetScale() const; + + // (Fast) multiply by a scaling matrix setup from vScale. + VMatrix Scale(const Vector &vScale); + + // Normalize the basis vectors. + VMatrix NormalizeBasisVectors() const; + + // Transpose. + VMatrix Transpose() const; + + // Transpose upper-left 3x3. + VMatrix Transpose3x3() const; +#endif + +public: + // The matrix. + vec_t m[4][4]; +}; + + + +//----------------------------------------------------------------------------- +// Helper functions. +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +// Setup an identity matrix. +VMatrix SetupMatrixIdentity(); + +// Setup as a scaling matrix. +VMatrix SetupMatrixScale(const Vector &vScale); + +// Setup a translation matrix. +VMatrix SetupMatrixTranslation(const Vector &vTranslation); + +// Setup a matrix to reflect around the plane. +VMatrix SetupMatrixReflection(const VPlane &thePlane); + +// Setup a matrix to project from vOrigin onto thePlane. +VMatrix SetupMatrixProjection(const Vector &vOrigin, const VPlane &thePlane); + +// Setup a matrix to rotate the specified amount around the specified axis. +VMatrix SetupMatrixAxisRot(const Vector &vAxis, vec_t fDegrees); + +// Setup a matrix from euler angles. Just sets identity and calls MatrixAngles. +VMatrix SetupMatrixAngles(const QAngle &vAngles); + +// Setup a matrix for origin and angles. +VMatrix SetupMatrixOrgAngles(const Vector &origin, const QAngle &vAngles); + +#endif + +#define VMatToString(mat) (static_cast<const char *>(CFmtStr("[ (%f, %f, %f), (%f, %f, %f), (%f, %f, %f), (%f, %f, %f) ]", mat.m[0][0], mat.m[0][1], mat.m[0][2], mat.m[0][3], mat.m[1][0], mat.m[1][1], mat.m[1][2], mat.m[1][3], mat.m[2][0], mat.m[2][1], mat.m[2][2], mat.m[2][3], mat.m[3][0], mat.m[3][1], mat.m[3][2], mat.m[3][3] ))) // ** Note: this generates a temporary, don't hold reference! + +//----------------------------------------------------------------------------- +// Returns the point at the intersection on the 3 planes. +// Returns false if it can't be solved (2 or more planes are parallel). +//----------------------------------------------------------------------------- +bool PlaneIntersection( const VPlane &vp1, const VPlane &vp2, const VPlane &vp3, Vector &vOut ); + + +//----------------------------------------------------------------------------- +// These methods are faster. Use them if you want faster code +//----------------------------------------------------------------------------- +void MatrixSetIdentity( VMatrix &dst ); +void MatrixTranspose( const VMatrix& src, VMatrix& dst ); +void MatrixCopy( const VMatrix& src, VMatrix& dst ); +void MatrixMultiply( const VMatrix& src1, const VMatrix& src2, VMatrix& dst ); + +// Accessors +void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn ); +void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column ); +void MatrixGetRow( const VMatrix &src, int nCol, Vector *pColumn ); +void MatrixSetRow( VMatrix &src, int nCol, const Vector &column ); + +// Vector3DMultiply treats src2 as if it's a direction vector +void Vector3DMultiply( const VMatrix& src1, const Vector& src2, Vector& dst ); + +// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation) +inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst ); + +// Vector3DMultiplyPositionProjective treats src2 as if it's a point +// and does the perspective divide at the end +void Vector3DMultiplyPositionProjective( const VMatrix& src1, const Vector &src2, Vector& dst ); + +// Vector3DMultiplyPosition treats src2 as if it's a direction +// and does the perspective divide at the end +// NOTE: src1 had better be an inverse transpose to use this correctly +void Vector3DMultiplyProjective( const VMatrix& src1, const Vector &src2, Vector& dst ); + +void Vector4DMultiply( const VMatrix& src1, const Vector4D& src2, Vector4D& dst ); + +// Same as Vector4DMultiply except that src2 has an implicit W of 1 +void Vector4DMultiplyPosition( const VMatrix& src1, const Vector &src2, Vector4D& dst ); + +// Multiplies the vector by the transpose of the matrix +void Vector3DMultiplyTranspose( const VMatrix& src1, const Vector& src2, Vector& dst ); +void Vector4DMultiplyTranspose( const VMatrix& src1, const Vector4D& src2, Vector4D& dst ); + +// Transform a plane +void MatrixTransformPlane( const VMatrix &src, const cplane_t &inPlane, cplane_t &outPlane ); + +// Transform a plane that has an axis-aligned normal +void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane ); + +void MatrixBuildTranslation( VMatrix& dst, float x, float y, float z ); +void MatrixBuildTranslation( VMatrix& dst, const Vector &translation ); + +inline void MatrixTranslate( VMatrix& dst, const Vector &translation ) +{ + VMatrix matTranslation, temp; + MatrixBuildTranslation( matTranslation, translation ); + MatrixMultiply( dst, matTranslation, temp ); + dst = temp; +} + + +void MatrixBuildRotationAboutAxis( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees ); +void MatrixBuildRotateZ( VMatrix& dst, float angleDegrees ); + +inline void MatrixRotate( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees ) +{ + VMatrix rotation, temp; + MatrixBuildRotationAboutAxis( rotation, vAxisOfRot, angleDegrees ); + MatrixMultiply( dst, rotation, temp ); + dst = temp; +} + +// Builds a rotation matrix that rotates one direction vector into another +void MatrixBuildRotation( VMatrix &dst, const Vector& initialDirection, const Vector& finalDirection ); + +// Builds a scale matrix +void MatrixBuildScale( VMatrix &dst, float x, float y, float z ); +void MatrixBuildScale( VMatrix &dst, const Vector& scale ); + +// Build a perspective matrix. +// zNear and zFar are assumed to be positive. +// You end up looking down positive Z, X is to the right, Y is up. +// X range: [0..1] +// Y range: [0..1] +// Z range: [0..1] +void MatrixBuildPerspective( VMatrix &dst, float fovX, float fovY, float zNear, float zFar ); + +//----------------------------------------------------------------------------- +// Given a projection matrix, take the extremes of the space in transformed into world space and +// get a bounding box. +//----------------------------------------------------------------------------- +void CalculateAABBFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pMins, Vector *pMaxs ); + +//----------------------------------------------------------------------------- +// Given a projection matrix, take the extremes of the space in transformed into world space and +// get a bounding sphere. +//----------------------------------------------------------------------------- +void CalculateSphereFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pCenter, float *pflRadius ); + +//----------------------------------------------------------------------------- +// Given an inverse projection matrix, take the extremes of the space in transformed into world space and +// get a bounding box. +//----------------------------------------------------------------------------- +void CalculateAABBFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pMins, Vector *pMaxs ); + +//----------------------------------------------------------------------------- +// Given an inverse projection matrix, take the extremes of the space in transformed into world space and +// get a bounding sphere. +//----------------------------------------------------------------------------- +void CalculateSphereFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pCenter, float *pflRadius ); + +//----------------------------------------------------------------------------- +// Calculate frustum planes given a clip->world space transform. +//----------------------------------------------------------------------------- +void FrustumPlanesFromMatrix( const VMatrix &clipToWorld, Frustum_t &frustum ); + +//----------------------------------------------------------------------------- +// Setup a matrix from euler angles. +//----------------------------------------------------------------------------- +void MatrixFromAngles( const QAngle& vAngles, VMatrix& dst ); + +//----------------------------------------------------------------------------- +// Creates euler angles from a matrix +//----------------------------------------------------------------------------- +void MatrixToAngles( const VMatrix& src, QAngle& vAngles ); + +//----------------------------------------------------------------------------- +// Does a fast inverse, assuming the matrix only contains translation and rotation. +//----------------------------------------------------------------------------- +void MatrixInverseTR( const VMatrix& src, VMatrix &dst ); + +//----------------------------------------------------------------------------- +// Inverts any matrix at all +//----------------------------------------------------------------------------- +bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst); + +//----------------------------------------------------------------------------- +// Computes the inverse transpose +//----------------------------------------------------------------------------- +void MatrixInverseTranspose( const VMatrix& src, VMatrix& dst ); + + + +//----------------------------------------------------------------------------- +// VMatrix inlines. +//----------------------------------------------------------------------------- +inline VMatrix::VMatrix() +{ +} + +inline VMatrix::VMatrix( + vec_t m00, vec_t m01, vec_t m02, vec_t m03, + vec_t m10, vec_t m11, vec_t m12, vec_t m13, + vec_t m20, vec_t m21, vec_t m22, vec_t m23, + vec_t m30, vec_t m31, vec_t m32, vec_t m33) +{ + Init( + m00, m01, m02, m03, + m10, m11, m12, m13, + m20, m21, m22, m23, + m30, m31, m32, m33 + ); +} + + +inline VMatrix::VMatrix( const matrix3x4_t& matrix3x4 ) +{ + Init( matrix3x4 ); +} + + +//----------------------------------------------------------------------------- +// Creates a matrix where the X axis = forward +// the Y axis = left, and the Z axis = up +//----------------------------------------------------------------------------- +inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis ) +{ + Init( + xAxis.x, yAxis.x, zAxis.x, 0.0f, + xAxis.y, yAxis.y, zAxis.y, 0.0f, + xAxis.z, yAxis.z, zAxis.z, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + ); +} + +inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation ) +{ + Init( + xAxis.x, yAxis.x, zAxis.x, translation.x, + xAxis.y, yAxis.y, zAxis.y, translation.y, + xAxis.z, yAxis.z, zAxis.z, translation.z, + 0.0f, 0.0f, 0.0f, 1.0f + ); +} + + +inline void VMatrix::Init( + vec_t m00, vec_t m01, vec_t m02, vec_t m03, + vec_t m10, vec_t m11, vec_t m12, vec_t m13, + vec_t m20, vec_t m21, vec_t m22, vec_t m23, + vec_t m30, vec_t m31, vec_t m32, vec_t m33 + ) +{ + m[0][0] = m00; + m[0][1] = m01; + m[0][2] = m02; + m[0][3] = m03; + + m[1][0] = m10; + m[1][1] = m11; + m[1][2] = m12; + m[1][3] = m13; + + m[2][0] = m20; + m[2][1] = m21; + m[2][2] = m22; + m[2][3] = m23; + + m[3][0] = m30; + m[3][1] = m31; + m[3][2] = m32; + m[3][3] = m33; +} + + +//----------------------------------------------------------------------------- +// Initialize from a 3x4 +//----------------------------------------------------------------------------- +inline void VMatrix::Init( const matrix3x4_t& matrix3x4 ) +{ + memcpy(m, matrix3x4.Base(), sizeof( matrix3x4_t ) ); + + m[3][0] = 0.0f; + m[3][1] = 0.0f; + m[3][2] = 0.0f; + m[3][3] = 1.0f; +} + + +//----------------------------------------------------------------------------- +// Methods related to the basis vectors of the matrix +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline Vector VMatrix::GetForward() const +{ + return Vector(m[0][0], m[1][0], m[2][0]); +} + +inline Vector VMatrix::GetLeft() const +{ + return Vector(m[0][1], m[1][1], m[2][1]); +} + +inline Vector VMatrix::GetUp() const +{ + return Vector(m[0][2], m[1][2], m[2][2]); +} + +#endif + +inline void VMatrix::SetForward(const Vector &vForward) +{ + m[0][0] = vForward.x; + m[1][0] = vForward.y; + m[2][0] = vForward.z; +} + +inline void VMatrix::SetLeft(const Vector &vLeft) +{ + m[0][1] = vLeft.x; + m[1][1] = vLeft.y; + m[2][1] = vLeft.z; +} + +inline void VMatrix::SetUp(const Vector &vUp) +{ + m[0][2] = vUp.x; + m[1][2] = vUp.y; + m[2][2] = vUp.z; +} + +inline void VMatrix::GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const +{ + vForward.Init( m[0][0], m[1][0], m[2][0] ); + vLeft.Init( m[0][1], m[1][1], m[2][1] ); + vUp.Init( m[0][2], m[1][2], m[2][2] ); +} + +inline void VMatrix::SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp) +{ + SetForward(vForward); + SetLeft(vLeft); + SetUp(vUp); +} + + +//----------------------------------------------------------------------------- +// Methods related to the translation component of the matrix +//----------------------------------------------------------------------------- +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline Vector VMatrix::GetTranslation() const +{ + return Vector(m[0][3], m[1][3], m[2][3]); +} + +#endif + +inline Vector& VMatrix::GetTranslation( Vector &vTrans ) const +{ + vTrans.x = m[0][3]; + vTrans.y = m[1][3]; + vTrans.z = m[2][3]; + return vTrans; +} + +inline void VMatrix::SetTranslation(const Vector &vTrans) +{ + m[0][3] = vTrans.x; + m[1][3] = vTrans.y; + m[2][3] = vTrans.z; +} + + +//----------------------------------------------------------------------------- +// appply translation to this matrix in the input space +//----------------------------------------------------------------------------- +inline void VMatrix::PreTranslate(const Vector &vTrans) +{ + Vector tmp; + Vector3DMultiplyPosition( *this, vTrans, tmp ); + m[0][3] = tmp.x; + m[1][3] = tmp.y; + m[2][3] = tmp.z; +} + + +//----------------------------------------------------------------------------- +// appply translation to this matrix in the output space +//----------------------------------------------------------------------------- +inline void VMatrix::PostTranslate(const Vector &vTrans) +{ + m[0][3] += vTrans.x; + m[1][3] += vTrans.y; + m[2][3] += vTrans.z; +} + +inline const matrix3x4_t& VMatrix::As3x4() const +{ + return *((const matrix3x4_t*)this); +} + +inline matrix3x4_t& VMatrix::As3x4() +{ + return *((matrix3x4_t*)this); +} + +inline void VMatrix::CopyFrom3x4( const matrix3x4_t &m3x4 ) +{ + memcpy( m, m3x4.Base(), sizeof( matrix3x4_t ) ); + m[3][0] = m[3][1] = m[3][2] = 0; + m[3][3] = 1; +} + +inline void VMatrix::Set3x4( matrix3x4_t& matrix3x4 ) const +{ + memcpy(matrix3x4.Base(), m, sizeof( matrix3x4_t ) ); +} + + +//----------------------------------------------------------------------------- +// Matrix math operations +//----------------------------------------------------------------------------- +inline const VMatrix& VMatrix::operator+=(const VMatrix &other) +{ + for(int i=0; i < 4; i++) + { + for(int j=0; j < 4; j++) + { + m[i][j] += other.m[i][j]; + } + } + + return *this; +} + + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline VMatrix VMatrix::operator+(const VMatrix &other) const +{ + VMatrix ret; + for(int i=0; i < 16; i++) + { + ((float*)ret.m)[i] = ((float*)m)[i] + ((float*)other.m)[i]; + } + return ret; +} + +inline VMatrix VMatrix::operator-(const VMatrix &other) const +{ + VMatrix ret; + + for(int i=0; i < 4; i++) + { + for(int j=0; j < 4; j++) + { + ret.m[i][j] = m[i][j] - other.m[i][j]; + } + } + + return ret; +} + +inline VMatrix VMatrix::operator-() const +{ + VMatrix ret; + for( int i=0; i < 16; i++ ) + { + ((float*)ret.m)[i] = ((float*)m)[i]; + } + return ret; +} + +#endif // VECTOR_NO_SLOW_OPERATIONS + + +//----------------------------------------------------------------------------- +// Vector transformation +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline Vector VMatrix::operator*(const Vector &vVec) const +{ + Vector vRet; + vRet.x = m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z + m[0][3]; + vRet.y = m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z + m[1][3]; + vRet.z = m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z + m[2][3]; + + return vRet; +} + +inline Vector VMatrix::VMul4x3(const Vector &vVec) const +{ + Vector vResult; + Vector3DMultiplyPosition( *this, vVec, vResult ); + return vResult; +} + + +inline Vector VMatrix::VMul4x3Transpose(const Vector &vVec) const +{ + Vector tmp = vVec; + tmp.x -= m[0][3]; + tmp.y -= m[1][3]; + tmp.z -= m[2][3]; + + return Vector( + m[0][0]*tmp.x + m[1][0]*tmp.y + m[2][0]*tmp.z, + m[0][1]*tmp.x + m[1][1]*tmp.y + m[2][1]*tmp.z, + m[0][2]*tmp.x + m[1][2]*tmp.y + m[2][2]*tmp.z + ); +} + +inline Vector VMatrix::VMul3x3(const Vector &vVec) const +{ + return Vector( + m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z, + m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z, + m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z + ); +} + +inline Vector VMatrix::VMul3x3Transpose(const Vector &vVec) const +{ + return Vector( + m[0][0]*vVec.x + m[1][0]*vVec.y + m[2][0]*vVec.z, + m[0][1]*vVec.x + m[1][1]*vVec.y + m[2][1]*vVec.z, + m[0][2]*vVec.x + m[1][2]*vVec.y + m[2][2]*vVec.z + ); +} + +#endif // VECTOR_NO_SLOW_OPERATIONS + + +inline void VMatrix::V3Mul(const Vector &vIn, Vector &vOut) const +{ + vec_t rw; + + rw = 1.0f / (m[3][0]*vIn.x + m[3][1]*vIn.y + m[3][2]*vIn.z + m[3][3]); + vOut.x = (m[0][0]*vIn.x + m[0][1]*vIn.y + m[0][2]*vIn.z + m[0][3]) * rw; + vOut.y = (m[1][0]*vIn.x + m[1][1]*vIn.y + m[1][2]*vIn.z + m[1][3]) * rw; + vOut.z = (m[2][0]*vIn.x + m[2][1]*vIn.y + m[2][2]*vIn.z + m[2][3]) * rw; +} + +inline void VMatrix::V4Mul(const Vector4D &vIn, Vector4D &vOut) const +{ + vOut[0] = m[0][0]*vIn[0] + m[0][1]*vIn[1] + m[0][2]*vIn[2] + m[0][3]*vIn[3]; + vOut[1] = m[1][0]*vIn[0] + m[1][1]*vIn[1] + m[1][2]*vIn[2] + m[1][3]*vIn[3]; + vOut[2] = m[2][0]*vIn[0] + m[2][1]*vIn[1] + m[2][2]*vIn[2] + m[2][3]*vIn[3]; + vOut[3] = m[3][0]*vIn[0] + m[3][1]*vIn[1] + m[3][2]*vIn[2] + m[3][3]*vIn[3]; +} + + +//----------------------------------------------------------------------------- +// Plane transformation +//----------------------------------------------------------------------------- +inline void VMatrix::TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const +{ + Vector vTrans; + Vector3DMultiply( *this, inPlane.m_Normal, outPlane.m_Normal ); + outPlane.m_Dist = inPlane.m_Dist * DotProduct( outPlane.m_Normal, outPlane.m_Normal ); + outPlane.m_Dist += DotProduct( outPlane.m_Normal, GetTranslation( vTrans ) ); +} + + +//----------------------------------------------------------------------------- +// Other random stuff +//----------------------------------------------------------------------------- +inline void VMatrix::Identity() +{ + MatrixSetIdentity( *this ); +} + + +inline bool VMatrix::IsIdentity() const +{ + return + m[0][0] == 1.0f && m[0][1] == 0.0f && m[0][2] == 0.0f && m[0][3] == 0.0f && + m[1][0] == 0.0f && m[1][1] == 1.0f && m[1][2] == 0.0f && m[1][3] == 0.0f && + m[2][0] == 0.0f && m[2][1] == 0.0f && m[2][2] == 1.0f && m[2][3] == 0.0f && + m[3][0] == 0.0f && m[3][1] == 0.0f && m[3][2] == 0.0f && m[3][3] == 1.0f; +} + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline Vector VMatrix::ApplyRotation(const Vector &vVec) const +{ + return VMul3x3(vVec); +} + +inline VMatrix VMatrix::operator~() const +{ + VMatrix mRet; + InverseGeneral(mRet); + return mRet; +} + +#endif + + +//----------------------------------------------------------------------------- +// Accessors +//----------------------------------------------------------------------------- +inline void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn ) +{ + Assert( (nCol >= 0) && (nCol <= 3) ); + + pColumn->x = src[0][nCol]; + pColumn->y = src[1][nCol]; + pColumn->z = src[2][nCol]; +} + +inline void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column ) +{ + Assert( (nCol >= 0) && (nCol <= 3) ); + + src.m[0][nCol] = column.x; + src.m[1][nCol] = column.y; + src.m[2][nCol] = column.z; +} + +inline void MatrixGetRow( const VMatrix &src, int nRow, Vector *pRow ) +{ + Assert( (nRow >= 0) && (nRow <= 3) ); + *pRow = *(Vector*)src[nRow]; +} + +inline void MatrixSetRow( VMatrix &dst, int nRow, const Vector &row ) +{ + Assert( (nRow >= 0) && (nRow <= 3) ); + *(Vector*)dst[nRow] = row; +} + + +//----------------------------------------------------------------------------- +// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation) +//----------------------------------------------------------------------------- +// NJS: src2 is passed in as a full vector rather than a reference to prevent the need +// for 2 branches and a potential copy in the body. (ie, handling the case when the src2 +// reference is the same as the dst reference ). +inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst ) +{ + dst[0] = src1[0][0] * src2.x + src1[0][1] * src2.y + src1[0][2] * src2.z + src1[0][3]; + dst[1] = src1[1][0] * src2.x + src1[1][1] * src2.y + src1[1][2] * src2.z + src1[1][3]; + dst[2] = src1[2][0] * src2.x + src1[2][1] * src2.y + src1[2][2] * src2.z + src1[2][3]; +} + + +//----------------------------------------------------------------------------- +// Transform a plane that has an axis-aligned normal +//----------------------------------------------------------------------------- +inline void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane ) +{ + // See MatrixTransformPlane in the .cpp file for an explanation of the algorithm. + MatrixGetColumn( src, nDim, &outPlane.normal ); + outPlane.normal *= flSign; + outPlane.dist = flDist * DotProduct( outPlane.normal, outPlane.normal ); + + // NOTE: Writing this out by hand because it doesn't inline (inline depth isn't large enough) + // This should read outPlane.dist += DotProduct( outPlane.normal, src.GetTranslation ); + outPlane.dist += outPlane.normal.x * src.m[0][3] + outPlane.normal.y * src.m[1][3] + outPlane.normal.z * src.m[2][3]; +} + + +//----------------------------------------------------------------------------- +// Matrix equality test +//----------------------------------------------------------------------------- +inline bool MatricesAreEqual( const VMatrix &src1, const VMatrix &src2, float flTolerance ) +{ + for ( int i = 0; i < 3; ++i ) + { + for ( int j = 0; j < 3; ++j ) + { + if ( fabs( src1[i][j] - src2[i][j] ) > flTolerance ) + return false; + } + } + return true; +} + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- +void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar ); +void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar ); +void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right ); +void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar ); + +inline void MatrixOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar ) +{ + VMatrix mat; + MatrixBuildOrtho( mat, left, top, right, bottom, zNear, zFar ); + + VMatrix temp; + MatrixMultiply( dst, mat, temp ); + dst = temp; +} + +inline void MatrixPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar ) +{ + VMatrix mat; + MatrixBuildPerspectiveX( mat, flFovX, flAspect, flZNear, flZFar ); + + VMatrix temp; + MatrixMultiply( dst, mat, temp ); + dst = temp; +} + +inline void MatrixPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right ) +{ + VMatrix mat; + MatrixBuildPerspectiveOffCenterX( mat, flFovX, flAspect, flZNear, flZFar, bottom, top, left, right ); + + VMatrix temp; + MatrixMultiply( dst, mat, temp ); + dst = temp; +} + +#endif + + diff --git a/mp/src/public/mathlib/vplane.h b/mp/src/public/mathlib/vplane.h index 2c4441de..dd3d4a9a 100644 --- a/mp/src/public/mathlib/vplane.h +++ b/mp/src/public/mathlib/vplane.h @@ -1,182 +1,182 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose:
-//
-// $Workfile: $
-// $Date: $
-// $NoKeywords: $
-//=============================================================================//
-
-#ifndef VPLANE_H
-#define VPLANE_H
-
-#ifdef _WIN32
-#pragma once
-#endif
-
-#include "mathlib/vector.h"
-
-typedef int SideType;
-
-// Used to represent sides of things like planes.
-#define SIDE_FRONT 0
-#define SIDE_BACK 1
-#define SIDE_ON 2
-
-#define VP_EPSILON 0.01f
-
-
-class VPlane
-{
-public:
- VPlane();
- VPlane(const Vector &vNormal, vec_t dist);
-
- void Init(const Vector &vNormal, vec_t dist);
-
- // Return the distance from the point to the plane.
- vec_t DistTo(const Vector &vVec) const;
-
- // Copy.
- VPlane& operator=(const VPlane &thePlane);
-
- // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK.
- // The epsilon for SIDE_ON can be passed in.
- SideType GetPointSide(const Vector &vPoint, vec_t sideEpsilon=VP_EPSILON) const;
-
- // Returns SIDE_FRONT or SIDE_BACK.
- SideType GetPointSideExact(const Vector &vPoint) const;
-
- // Classify the box with respect to the plane.
- // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK
- SideType BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const;
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
- // Flip the plane.
- VPlane Flip();
-
- // Get a point on the plane (normal*dist).
- Vector GetPointOnPlane() const;
-
- // Snap the specified point to the plane (along the plane's normal).
- Vector SnapPointToPlane(const Vector &vPoint) const;
-#endif
-
-public:
- Vector m_Normal;
- vec_t m_Dist;
-
-#ifdef VECTOR_NO_SLOW_OPERATIONS
-private:
- // No copy constructors allowed if we're in optimal mode
- VPlane(const VPlane& vOther);
-#endif
-};
-
-
-//-----------------------------------------------------------------------------
-// Inlines.
-//-----------------------------------------------------------------------------
-inline VPlane::VPlane()
-{
-}
-
-inline VPlane::VPlane(const Vector &vNormal, vec_t dist)
-{
- m_Normal = vNormal;
- m_Dist = dist;
-}
-
-inline void VPlane::Init(const Vector &vNormal, vec_t dist)
-{
- m_Normal = vNormal;
- m_Dist = dist;
-}
-
-inline vec_t VPlane::DistTo(const Vector &vVec) const
-{
- return vVec.Dot(m_Normal) - m_Dist;
-}
-
-inline VPlane& VPlane::operator=(const VPlane &thePlane)
-{
- m_Normal = thePlane.m_Normal;
- m_Dist = thePlane.m_Dist;
- return *this;
-}
-
-#ifndef VECTOR_NO_SLOW_OPERATIONS
-
-inline VPlane VPlane::Flip()
-{
- return VPlane(-m_Normal, -m_Dist);
-}
-
-inline Vector VPlane::GetPointOnPlane() const
-{
- return m_Normal * m_Dist;
-}
-
-inline Vector VPlane::SnapPointToPlane(const Vector &vPoint) const
-{
- return vPoint - m_Normal * DistTo(vPoint);
-}
-
-#endif
-
-inline SideType VPlane::GetPointSide(const Vector &vPoint, vec_t sideEpsilon) const
-{
- vec_t fDist;
-
- fDist = DistTo(vPoint);
- if(fDist >= sideEpsilon)
- return SIDE_FRONT;
- else if(fDist <= -sideEpsilon)
- return SIDE_BACK;
- else
- return SIDE_ON;
-}
-
-inline SideType VPlane::GetPointSideExact(const Vector &vPoint) const
-{
- return DistTo(vPoint) > 0.0f ? SIDE_FRONT : SIDE_BACK;
-}
-
-
-// BUGBUG: This should either simply use the implementation in mathlib or cease to exist.
-// mathlib implementation is much more efficient. Check to see that VPlane isn't used in
-// performance critical code.
-inline SideType VPlane::BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const
-{
- int i, firstSide, side;
- TableVector vPoints[8] =
- {
- { vMin.x, vMin.y, vMin.z },
- { vMin.x, vMin.y, vMax.z },
- { vMin.x, vMax.y, vMax.z },
- { vMin.x, vMax.y, vMin.z },
-
- { vMax.x, vMin.y, vMin.z },
- { vMax.x, vMin.y, vMax.z },
- { vMax.x, vMax.y, vMax.z },
- { vMax.x, vMax.y, vMin.z },
- };
-
- firstSide = GetPointSideExact(vPoints[0]);
- for(i=1; i < 8; i++)
- {
- side = GetPointSideExact(vPoints[i]);
-
- // Does the box cross the plane?
- if(side != firstSide)
- return SIDE_ON;
- }
-
- // Ok, they're all on the same side, return that.
- return firstSide;
-}
-
-
-
-
-#endif // VPLANE_H
+//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $Workfile: $ +// $Date: $ +// $NoKeywords: $ +//=============================================================================// + +#ifndef VPLANE_H +#define VPLANE_H + +#ifdef _WIN32 +#pragma once +#endif + +#include "mathlib/vector.h" + +typedef int SideType; + +// Used to represent sides of things like planes. +#define SIDE_FRONT 0 +#define SIDE_BACK 1 +#define SIDE_ON 2 + +#define VP_EPSILON 0.01f + + +class VPlane +{ +public: + VPlane(); + VPlane(const Vector &vNormal, vec_t dist); + + void Init(const Vector &vNormal, vec_t dist); + + // Return the distance from the point to the plane. + vec_t DistTo(const Vector &vVec) const; + + // Copy. + VPlane& operator=(const VPlane &thePlane); + + // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK. + // The epsilon for SIDE_ON can be passed in. + SideType GetPointSide(const Vector &vPoint, vec_t sideEpsilon=VP_EPSILON) const; + + // Returns SIDE_FRONT or SIDE_BACK. + SideType GetPointSideExact(const Vector &vPoint) const; + + // Classify the box with respect to the plane. + // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK + SideType BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Flip the plane. + VPlane Flip(); + + // Get a point on the plane (normal*dist). + Vector GetPointOnPlane() const; + + // Snap the specified point to the plane (along the plane's normal). + Vector SnapPointToPlane(const Vector &vPoint) const; +#endif + +public: + Vector m_Normal; + vec_t m_Dist; + +#ifdef VECTOR_NO_SLOW_OPERATIONS +private: + // No copy constructors allowed if we're in optimal mode + VPlane(const VPlane& vOther); +#endif +}; + + +//----------------------------------------------------------------------------- +// Inlines. +//----------------------------------------------------------------------------- +inline VPlane::VPlane() +{ +} + +inline VPlane::VPlane(const Vector &vNormal, vec_t dist) +{ + m_Normal = vNormal; + m_Dist = dist; +} + +inline void VPlane::Init(const Vector &vNormal, vec_t dist) +{ + m_Normal = vNormal; + m_Dist = dist; +} + +inline vec_t VPlane::DistTo(const Vector &vVec) const +{ + return vVec.Dot(m_Normal) - m_Dist; +} + +inline VPlane& VPlane::operator=(const VPlane &thePlane) +{ + m_Normal = thePlane.m_Normal; + m_Dist = thePlane.m_Dist; + return *this; +} + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline VPlane VPlane::Flip() +{ + return VPlane(-m_Normal, -m_Dist); +} + +inline Vector VPlane::GetPointOnPlane() const +{ + return m_Normal * m_Dist; +} + +inline Vector VPlane::SnapPointToPlane(const Vector &vPoint) const +{ + return vPoint - m_Normal * DistTo(vPoint); +} + +#endif + +inline SideType VPlane::GetPointSide(const Vector &vPoint, vec_t sideEpsilon) const +{ + vec_t fDist; + + fDist = DistTo(vPoint); + if(fDist >= sideEpsilon) + return SIDE_FRONT; + else if(fDist <= -sideEpsilon) + return SIDE_BACK; + else + return SIDE_ON; +} + +inline SideType VPlane::GetPointSideExact(const Vector &vPoint) const +{ + return DistTo(vPoint) > 0.0f ? SIDE_FRONT : SIDE_BACK; +} + + +// BUGBUG: This should either simply use the implementation in mathlib or cease to exist. +// mathlib implementation is much more efficient. Check to see that VPlane isn't used in +// performance critical code. +inline SideType VPlane::BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const +{ + int i, firstSide, side; + TableVector vPoints[8] = + { + { vMin.x, vMin.y, vMin.z }, + { vMin.x, vMin.y, vMax.z }, + { vMin.x, vMax.y, vMax.z }, + { vMin.x, vMax.y, vMin.z }, + + { vMax.x, vMin.y, vMin.z }, + { vMax.x, vMin.y, vMax.z }, + { vMax.x, vMax.y, vMax.z }, + { vMax.x, vMax.y, vMin.z }, + }; + + firstSide = GetPointSideExact(vPoints[0]); + for(i=1; i < 8; i++) + { + side = GetPointSideExact(vPoints[i]); + + // Does the box cross the plane? + if(side != firstSide) + return SIDE_ON; + } + + // Ok, they're all on the same side, return that. + return firstSide; +} + + + + +#endif // VPLANE_H |