From f56bb35301836e56582a575a75864392a0177875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20P=2E=20Tjern=C3=B8?= Date: Mon, 2 Dec 2013 19:31:46 -0800 Subject: Fix line endings. WHAMMY. --- mp/src/public/mathlib/amd3dx.h | 2376 +++++----- mp/src/public/mathlib/anorms.h | 50 +- mp/src/public/mathlib/bumpvects.h | 74 +- mp/src/public/mathlib/compressed_3d_unitvec.h | 568 +-- mp/src/public/mathlib/compressed_light_cube.h | 48 +- mp/src/public/mathlib/compressed_vector.h | 1216 ++--- mp/src/public/mathlib/halton.h | 142 +- mp/src/public/mathlib/lightdesc.h | 346 +- mp/src/public/mathlib/math_pfns.h | 160 +- mp/src/public/mathlib/mathlib.h | 4372 ++++++++--------- mp/src/public/mathlib/matrixmath.h | 770 +-- mp/src/public/mathlib/noise.h | 70 +- mp/src/public/mathlib/polyhedron.h | 146 +- mp/src/public/mathlib/quantize.h | 282 +- mp/src/public/mathlib/simdvectormatrix.h | 284 +- mp/src/public/mathlib/spherical_geometry.h | 146 +- mp/src/public/mathlib/ssemath.h | 6196 ++++++++++++------------- mp/src/public/mathlib/ssequaternion.h | 734 +-- mp/src/public/mathlib/vector.h | 4624 +++++++++--------- mp/src/public/mathlib/vector2d.h | 1340 +++--- mp/src/public/mathlib/vector4d.h | 1372 +++--- mp/src/public/mathlib/vmatrix.h | 1900 ++++---- mp/src/public/mathlib/vplane.h | 364 +- 23 files changed, 13790 insertions(+), 13790 deletions(-) (limited to 'mp/src/public/mathlib') diff --git a/mp/src/public/mathlib/amd3dx.h b/mp/src/public/mathlib/amd3dx.h index 05eb663e..9dab1bfd 100644 --- a/mp/src/public/mathlib/amd3dx.h +++ b/mp/src/public/mathlib/amd3dx.h @@ -1,1188 +1,1188 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -/****************************************************************************** - - Copyright (c) 1999 Advanced Micro Devices, Inc. - - LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY - EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY, - NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY - PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY - DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS, - BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR - INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY - OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION - OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY - NOT APPLY TO YOU. - - AMD does not assume any responsibility for any errors which may appear in the - Materials nor any responsibility to support or update the Materials. AMD retains - the right to make changes to its test specifications at any time, without notice. - - NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any - further information, software, technical information, know-how, or show-how - available to you. - - So that all may benefit from your experience, please report any problems - or suggestions about this software to 3dsdk.support@amd.com - - AMD Developer Technologies, M/S 585 - Advanced Micro Devices, Inc. - 5900 E. Ben White Blvd. - Austin, TX 78741 - 3dsdk.support@amd.com - -******************************************************************************* - - AMD3DX.H - - MACRO FORMAT - ============ - This file contains inline assembly macros that - generate AMD-3D instructions in binary format. - Therefore, C or C++ programmer can use AMD-3D instructions - without any penalty in their C or C++ source code. - - The macro's name and format conventions are as follow: - - - 1. First argument of macro is a destination and - second argument is a source operand. - ex) _asm PFCMPEQ (mm3, mm4) - | | - dst src - - 2. The destination operand can be m0 to m7 only. - The source operand can be any one of the register - m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi - that contains effective address. - ex) _asm PFRCP (MM7, MM6) - ex) _asm PFRCPIT2 (mm0, mm4) - ex) _asm PFMUL (mm3, _edi) - - 3. The prefetch(w) takes one src operand _eax, ecx, _edx, - _ebx, _esi, or _edi that contains effective address. - ex) _asm PREFETCH (_edi) - - For WATCOM C/C++ users, when using #pragma aux instead if - _asm, all macro names should be prefixed by a p_ or P_. - Macros should not be enclosed in quotes. - ex) p_pfrcp (MM7,MM6) - - NOTE: Not all instruction macros, nor all possible - combinations of operands have been explicitely - tested. If any errors are found, please report - them. - - EXAMPLE - ======= - Following program doesn't do anything but it shows you - how to use inline assembly AMD-3D instructions in C. - Note that this will only work in flat memory model which - segment registers cs, ds, ss and es point to the same - linear address space total less than 4GB. - - Used Microsoft VC++ 5.0 - - #include - #include "amd3d.h" - - void main () - { - float x = (float)1.25; - float y = (float)1.25; - float z, zz; - - _asm { - movd mm1, x - movd mm2, y - pfmul (mm1, mm2) - movd z, mm1 - femms - } - - printf ("value of z = %f\n", z); - - // - // Demonstration of using the memory instead of - // multimedia register - // - _asm { - movd mm3, x - lea esi, y // load effective address of y - pfmul (mm3, _esi) - movd zz, mm3 - femms - } - - printf ("value of zz = %f\n", zz); - } - - #pragma aux EXAMPLE with WATCOM C/C++ v11.x - =========================================== - - extern void Add(float *__Dest, float *__A, float *__B); - #pragma aux Add = \ - p_femms \ - "movd mm6,[esi]" \ - p_pfadd(mm6,_edi) \ - "movd [ebx],mm6" \ - p_femms \ - parm [ebx] [esi] [edi]; - -*******************************************************************************/ - -#ifndef _K3DMACROSINCLUDED_ -#define _K3DMACROSINCLUDED_ - -#if defined (__WATCOMC__) - -// The WATCOM C/C++ version of the 3DNow! macros. -// -// The older, compbined register style for WATCOM C/C++ macros is not -// supported. - -/* Operand defines for instructions two operands */ -#define _k3d_mm0_mm0 0xc0 -#define _k3d_mm0_mm1 0xc1 -#define _k3d_mm0_mm2 0xc2 -#define _k3d_mm0_mm3 0xc3 -#define _k3d_mm0_mm4 0xc4 -#define _k3d_mm0_mm5 0xc5 -#define _k3d_mm0_mm6 0xc6 -#define _k3d_mm0_mm7 0xc7 -#define _k3d_mm0_eax 0x00 -#define _k3d_mm0_ecx 0x01 -#define _k3d_mm0_edx 0x02 -#define _k3d_mm0_ebx 0x03 -#define _k3d_mm0_esi 0x06 -#define _k3d_mm0_edi 0x07 -#define _k3d_mm1_mm0 0xc8 -#define _k3d_mm1_mm1 0xc9 -#define _k3d_mm1_mm2 0xca -#define _k3d_mm1_mm3 0xcb -#define _k3d_mm1_mm4 0xcc -#define _k3d_mm1_mm5 0xcd -#define _k3d_mm1_mm6 0xce -#define _k3d_mm1_mm7 0xcf -#define _k3d_mm1_eax 0x08 -#define _k3d_mm1_ecx 0x09 -#define _k3d_mm1_edx 0x0a -#define _k3d_mm1_ebx 0x0b -#define _k3d_mm1_esi 0x0e -#define _k3d_mm1_edi 0x0f -#define _k3d_mm2_mm0 0xd0 -#define _k3d_mm2_mm1 0xd1 -#define _k3d_mm2_mm2 0xd2 -#define _k3d_mm2_mm3 0xd3 -#define _k3d_mm2_mm4 0xd4 -#define _k3d_mm2_mm5 0xd5 -#define _k3d_mm2_mm6 0xd6 -#define _k3d_mm2_mm7 0xd7 -#define _k3d_mm2_eax 0x10 -#define _k3d_mm2_ecx 0x11 -#define _k3d_mm2_edx 0x12 -#define _k3d_mm2_ebx 0x13 -#define _k3d_mm2_esi 0x16 -#define _k3d_mm2_edi 0x17 -#define _k3d_mm3_mm0 0xd8 -#define _k3d_mm3_mm1 0xd9 -#define _k3d_mm3_mm2 0xda -#define _k3d_mm3_mm3 0xdb -#define _k3d_mm3_mm4 0xdc -#define _k3d_mm3_mm5 0xdd -#define _k3d_mm3_mm6 0xde -#define _k3d_mm3_mm7 0xdf -#define _k3d_mm3_eax 0x18 -#define _k3d_mm3_ecx 0x19 -#define _k3d_mm3_edx 0x1a -#define _k3d_mm3_ebx 0x1b -#define _k3d_mm3_esi 0x1e -#define _k3d_mm3_edi 0x1f -#define _k3d_mm4_mm0 0xe0 -#define _k3d_mm4_mm1 0xe1 -#define _k3d_mm4_mm2 0xe2 -#define _k3d_mm4_mm3 0xe3 -#define _k3d_mm4_mm4 0xe4 -#define _k3d_mm4_mm5 0xe5 -#define _k3d_mm4_mm6 0xe6 -#define _k3d_mm4_mm7 0xe7 -#define _k3d_mm4_eax 0x20 -#define _k3d_mm4_ecx 0x21 -#define _k3d_mm4_edx 0x22 -#define _k3d_mm4_ebx 0x23 -#define _k3d_mm4_esi 0x26 -#define _k3d_mm4_edi 0x27 -#define _k3d_mm5_mm0 0xe8 -#define _k3d_mm5_mm1 0xe9 -#define _k3d_mm5_mm2 0xea -#define _k3d_mm5_mm3 0xeb -#define _k3d_mm5_mm4 0xec -#define _k3d_mm5_mm5 0xed -#define _k3d_mm5_mm6 0xee -#define _k3d_mm5_mm7 0xef -#define _k3d_mm5_eax 0x28 -#define _k3d_mm5_ecx 0x29 -#define _k3d_mm5_edx 0x2a -#define _k3d_mm5_ebx 0x2b -#define _k3d_mm5_esi 0x2e -#define _k3d_mm5_edi 0x2f -#define _k3d_mm6_mm0 0xf0 -#define _k3d_mm6_mm1 0xf1 -#define _k3d_mm6_mm2 0xf2 -#define _k3d_mm6_mm3 0xf3 -#define _k3d_mm6_mm4 0xf4 -#define _k3d_mm6_mm5 0xf5 -#define _k3d_mm6_mm6 0xf6 -#define _k3d_mm6_mm7 0xf7 -#define _k3d_mm6_eax 0x30 -#define _k3d_mm6_ecx 0x31 -#define _k3d_mm6_edx 0x32 -#define _k3d_mm6_ebx 0x33 -#define _k3d_mm6_esi 0x36 -#define _k3d_mm6_edi 0x37 -#define _k3d_mm7_mm0 0xf8 -#define _k3d_mm7_mm1 0xf9 -#define _k3d_mm7_mm2 0xfa -#define _k3d_mm7_mm3 0xfb -#define _k3d_mm7_mm4 0xfc -#define _k3d_mm7_mm5 0xfd -#define _k3d_mm7_mm6 0xfe -#define _k3d_mm7_mm7 0xff -#define _k3d_mm7_eax 0x38 -#define _k3d_mm7_ecx 0x39 -#define _k3d_mm7_edx 0x3a -#define _k3d_mm7_ebx 0x3b -#define _k3d_mm7_esi 0x3e -#define _k3d_mm7_edi 0x3f - -#define _k3d_name_xlat_m0 _mm0 -#define _k3d_name_xlat_m1 _mm1 -#define _k3d_name_xlat_m2 _mm2 -#define _k3d_name_xlat_m3 _mm3 -#define _k3d_name_xlat_m4 _mm4 -#define _k3d_name_xlat_m5 _mm5 -#define _k3d_name_xlat_m6 _mm6 -#define _k3d_name_xlat_m7 _mm7 -#define _k3d_name_xlat_M0 _mm0 -#define _k3d_name_xlat_M1 _mm1 -#define _k3d_name_xlat_M2 _mm2 -#define _k3d_name_xlat_M3 _mm3 -#define _k3d_name_xlat_M4 _mm4 -#define _k3d_name_xlat_M5 _mm5 -#define _k3d_name_xlat_M6 _mm6 -#define _k3d_name_xlat_M7 _mm7 -#define _k3d_name_xlat_mm0 _mm0 -#define _k3d_name_xlat_mm1 _mm1 -#define _k3d_name_xlat_mm2 _mm2 -#define _k3d_name_xlat_mm3 _mm3 -#define _k3d_name_xlat_mm4 _mm4 -#define _k3d_name_xlat_mm5 _mm5 -#define _k3d_name_xlat_mm6 _mm6 -#define _k3d_name_xlat_mm7 _mm7 -#define _k3d_name_xlat_MM0 _mm0 -#define _k3d_name_xlat_MM1 _mm1 -#define _k3d_name_xlat_MM2 _mm2 -#define _k3d_name_xlat_MM3 _mm3 -#define _k3d_name_xlat_MM4 _mm4 -#define _k3d_name_xlat_MM5 _mm5 -#define _k3d_name_xlat_MM6 _mm6 -#define _k3d_name_xlat_MM7 _mm7 -#define _k3d_name_xlat_eax _eax -#define _k3d_name_xlat_ebx _ebx -#define _k3d_name_xlat_ecx _ecx -#define _k3d_name_xlat_edx _edx -#define _k3d_name_xlat_esi _esi -#define _k3d_name_xlat_edi _edi -#define _k3d_name_xlat_ebp _ebp -#define _k3d_name_xlat_EAX _eax -#define _k3d_name_xlat_EBX _ebx -#define _k3d_name_xlat_ECX _ecx -#define _k3d_name_xlat_EDX _edx -#define _k3d_name_xlat_ESI _esi -#define _k3d_name_xlat_EDI _edi -#define _k3d_name_xlat_EBP _ebp -#define _k3d_name_xlat__eax _eax -#define _k3d_name_xlat__ebx _ebx -#define _k3d_name_xlat__ecx _ecx -#define _k3d_name_xlat__edx _edx -#define _k3d_name_xlat__esi _esi -#define _k3d_name_xlat__edi _edi -#define _k3d_name_xlat__ebp _ebp -#define _k3d_name_xlat__EAX _eax -#define _k3d_name_xlat__EBX _ebx -#define _k3d_name_xlat__ECX _ecx -#define _k3d_name_xlat__EDX _edx -#define _k3d_name_xlat__ESI _esi -#define _k3d_name_xlat__EDI _edi -#define _k3d_name_xlat__EBP _ebp - -#define _k3d_xglue3(a,b,c) a##b##c -#define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c) -#define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src) - -/* Operand defines for prefetch and prefetchw */ - -#define _k3d_pref_eax 0x00 -#define _k3d_pref_ecx 0x01 -#define _k3d_pref_edx 0x02 -#define _k3d_pref_ebx 0x03 -#define _k3d_pref_esi 0x06 -#define _k3d_pref_edi 0x07 -#define _k3d_pref_EAX 0x00 -#define _k3d_pref_ECX 0x01 -#define _k3d_pref_EDX 0x02 -#define _k3d_pref_EBX 0x03 -#define _k3d_pref_ESI 0x06 -#define _k3d_pref_EDI 0x07 -#define _k3d_prefw_eax 0x08 -#define _k3d_prefw_ecx 0x09 -#define _k3d_prefw_edx 0x0A -#define _k3d_prefw_ebx 0x0B -#define _k3d_prefw_esi 0x0E -#define _k3d_prefw_edi 0x0F -#define _k3d_prefw_EAX 0x08 -#define _k3d_prefw_ECX 0x09 -#define _k3d_prefw_EDX 0x0A -#define _k3d_prefw_EBX 0x0B -#define _k3d_prefw_ESI 0x0E -#define _k3d_prefw_EDI 0x0F - -/* Defines for 3DNow! instructions */ -#define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d -#define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae -#define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e -#define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0 -#define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90 -#define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0 -#define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4 -#define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94 -#define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4 -#define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96 -#define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6 -#define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6 -#define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97 -#define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7 -#define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a -#define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa -#define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d -#define FEMMS db 0x0f, 0x0e -#define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf -#define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7 -#define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src -#define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src -#define CPUID db 0x0f, 0xa2 - -/* Defines for new, K7 opcodes */ -#define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a -#define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e -#define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb -#define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src) -#define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src) -#define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src) -#define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src) -#define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src) -#define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src) -#define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src) -#define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src) -#define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src) -#define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src) -#define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk -#define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk -#define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk -#define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst) -#define SFENCE db 0x0f, 0xae, 0xf8 - -/* Memory/offset versions of the opcodes */ -#define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d -#define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae -#define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e -#define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0 -#define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90 -#define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0 -#define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4 -#define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94 -#define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4 -#define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96 -#define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6 -#define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6 -#define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97 -#define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7 -#define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a -#define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa -#define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d -#define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf -#define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7 - - -/* Memory/offset versions of the new, K7 opcodes */ -#define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a -#define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e -#define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb -#define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off -#define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off -#define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off -#define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off -#define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off -#define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off -#define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off -#define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off -#define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off -#define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off -#define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off -#define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk -#define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk - - -/* Defines for 3DNow! instructions for use in pragmas */ -#define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d -#define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae -#define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e -#define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0 -#define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90 -#define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0 -#define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4 -#define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94 -#define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4 -#define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96 -#define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6 -#define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6 -#define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97 -#define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7 -#define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a -#define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa -#define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d -#define p_femms 0x0f 0x0e -#define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf -#define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7 -#define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src -#define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src -#define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a -#define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e -#define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb -#define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk -#define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk -#define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk -#define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off - -#define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d -#define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae -#define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e -#define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0 -#define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90 -#define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0 -#define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4 -#define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94 -#define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4 -#define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96 -#define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6 -#define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6 -#define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97 -#define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7 -#define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a -#define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa -#define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d -#define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf -#define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7 -#define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a -#define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e -#define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb -#define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off -#define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off -#define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off -#define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk -#define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk - - -#define P_PF2ID(dst,src) p_pf2id(dst,src) -#define P_PFACC(dst,src) p_pfacc(dst,src) -#define P_PFADD(dst,src) p_pfadd(dst,src) -#define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src) -#define P_PFCMPGE(dst,src) p_pfcmpge(dst,src) -#define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src) -#define P_PFMAX(dst,src) p_pfmax(dst,src) -#define P_PFMIN(dst,src) p_pfmin(dst,src) -#define P_PFMUL(dst,src) p_pfmul(dst,src) -#define P_PFRCP(dst,src) p_pfrcp(dst,src) -#define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src) -#define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src) -#define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src) -#define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src) -#define P_PFSUB(dst,src) p_pfsub(dst,src) -#define P_PFSUBR(dst,src) p_pfsubr(dst,src) -#define P_PI2FD(dst,src) p_pi2fd(dst,src) -#define P_FEMMS p_femms -#define P_PAVGUSB(dst,src) p_pavgusb(dst,src) -#define P_PMULHRW(dst,src) p_pmulhrw(dst,src) -#define P_PREFETCH(src) p_prefetch(src) -#define P_PREFETCHW(src) p_prefetchw(src) -#define p_CPUID 0x0f 0xa2 -#define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off) -#define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off) -#define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off) -#define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off) -#define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off) -#define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off) -#define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off) -#define p_pfminm(dst,src,off) P_PFMINM(dst,src,off) -#define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off) -#define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off) -#define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off) -#define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off) -#define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off) -#define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off) -#define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off) -#define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off) -#define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off) -#define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off) -#define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off) - -#define P_PFNACC(dst,src) p_pfnacc(dst,src) -#define P_FPPNACC(dst,src) p_pfpnacc(dst,src) -#define P_PSWAPD(dst,src) p_pswapd(dst,src) -#define P_PMINUB(dst,src) p_pminub(dst,src) -#define P_PMAXUB(dst,src) p_pmaxub(dst,src) -#define P_PMINSW(dst,src) p_pminsw(dst,src) -#define P_PMAXSW(dst,src) p_pmaxsw(dst,src) -#define P_PMULHUW(dst,src) p_pmulhuw(dst,src) -#define P_PAVGB(dst,src) p_pavgb(dst,src) -#define P_PAVGW(dst,src) p_avgw(dst,src) -#define P_PSADBW(dst,src) p_psadbw(dst,src) -#define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src) -#define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src) -#define P_PINSRW(dst,src,msk) p_pinsrw(dst,src) -#define P_PEXTRW(dst,src,msk) p_pextrw(dst,src) -#define P_PSHUFW(dst,src,msk) p_pshufw(dst,src) -#define P_MOVNTQ(dst,src) p_movntq(dst,src) - -#define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off) -#define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off) -#define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off) -#define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off) -#define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off) -#define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off) -#define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off) -#define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off) -#define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off) -#define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off) -#define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off) -#define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off) -#define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off) -#define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk) -#define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk) -#define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off) - -#elif defined (_MSC_VER) && !defined (__MWERKS__) -// The Microsoft Visual C++ version of the 3DNow! macros. - -// Stop the "no EMMS" warning, since it doesn't detect FEMMS properly -#pragma warning(disable:4799) - -// Defines for operands. -#define _K3D_MM0 0xc0 -#define _K3D_MM1 0xc1 -#define _K3D_MM2 0xc2 -#define _K3D_MM3 0xc3 -#define _K3D_MM4 0xc4 -#define _K3D_MM5 0xc5 -#define _K3D_MM6 0xc6 -#define _K3D_MM7 0xc7 -#define _K3D_mm0 0xc0 -#define _K3D_mm1 0xc1 -#define _K3D_mm2 0xc2 -#define _K3D_mm3 0xc3 -#define _K3D_mm4 0xc4 -#define _K3D_mm5 0xc5 -#define _K3D_mm6 0xc6 -#define _K3D_mm7 0xc7 -#define _K3D_EAX 0x00 -#define _K3D_ECX 0x01 -#define _K3D_EDX 0x02 -#define _K3D_EBX 0x03 -#define _K3D_ESI 0x06 -#define _K3D_EDI 0x07 -#define _K3D_eax 0x00 -#define _K3D_ecx 0x01 -#define _K3D_edx 0x02 -#define _K3D_ebx 0x03 -#define _K3D_esi 0x06 -#define _K3D_edi 0x07 - -// These defines are for compatibility with the previous version of the header file. -#define _K3D_M0 0xc0 -#define _K3D_M1 0xc1 -#define _K3D_M2 0xc2 -#define _K3D_M3 0xc3 -#define _K3D_M4 0xc4 -#define _K3D_M5 0xc5 -#define _K3D_M6 0xc6 -#define _K3D_M7 0xc7 -#define _K3D_m0 0xc0 -#define _K3D_m1 0xc1 -#define _K3D_m2 0xc2 -#define _K3D_m3 0xc3 -#define _K3D_m4 0xc4 -#define _K3D_m5 0xc5 -#define _K3D_m6 0xc6 -#define _K3D_m7 0xc7 -#define _K3D__EAX 0x00 -#define _K3D__ECX 0x01 -#define _K3D__EDX 0x02 -#define _K3D__EBX 0x03 -#define _K3D__ESI 0x06 -#define _K3D__EDI 0x07 -#define _K3D__eax 0x00 -#define _K3D__ecx 0x01 -#define _K3D__edx 0x02 -#define _K3D__ebx 0x03 -#define _K3D__esi 0x06 -#define _K3D__edi 0x07 - -// General 3DNow! instruction format that is supported by -// these macros. Note that only the most basic form of memory -// operands are supported by these macros. - -#define InjK3DOps(dst,src,inst) \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0f \ - _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \ - _asm _emit _3DNowOpcode##inst \ -} - -#define InjK3DMOps(dst,src,off,inst) \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0f \ - _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \ - _asm _emit off \ - _asm _emit _3DNowOpcode##inst \ -} - -#define InjMMXOps(dst,src,inst) \ -{ \ - _asm _emit 0x0f \ - _asm _emit _3DNowOpcode##inst \ - _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \ -} - -#define InjMMXMOps(dst,src,off,inst) \ -{ \ - _asm _emit 0x0f \ - _asm _emit _3DNowOpcode##inst \ - _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \ - _asm _emit off \ -} - -#define _3DNowOpcodePF2ID 0x1d -#define _3DNowOpcodePFACC 0xae -#define _3DNowOpcodePFADD 0x9e -#define _3DNowOpcodePFCMPEQ 0xb0 -#define _3DNowOpcodePFCMPGE 0x90 -#define _3DNowOpcodePFCMPGT 0xa0 -#define _3DNowOpcodePFMAX 0xa4 -#define _3DNowOpcodePFMIN 0x94 -#define _3DNowOpcodePFMUL 0xb4 -#define _3DNowOpcodePFRCP 0x96 -#define _3DNowOpcodePFRCPIT1 0xa6 -#define _3DNowOpcodePFRCPIT2 0xb6 -#define _3DNowOpcodePFRSQRT 0x97 -#define _3DNowOpcodePFRSQIT1 0xa7 -#define _3DNowOpcodePFSUB 0x9a -#define _3DNowOpcodePFSUBR 0xaa -#define _3DNowOpcodePI2FD 0x0d -#define _3DNowOpcodePAVGUSB 0xbf -#define _3DNowOpcodePMULHRW 0xb7 -#define _3DNowOpcodePFNACC 0x8a -#define _3DNowOpcodeFPPNACC 0x8e -#define _3DNowOpcodePSWAPD 0xbb -#define _3DNowOpcodePMINUB 0xda -#define _3DNowOpcodePMAXUB 0xde -#define _3DNowOpcodePMINSW 0xea -#define _3DNowOpcodePMAXSW 0xee -#define _3DNowOpcodePMULHUW 0xe4 -#define _3DNowOpcodePAVGB 0xe0 -#define _3DNowOpcodePAVGW 0xe3 -#define _3DNowOpcodePSADBW 0xf6 -#define _3DNowOpcodePMOVMSKB 0xd7 -#define _3DNowOpcodePMASKMOVQ 0xf7 -#define _3DNowOpcodePINSRW 0xc4 -#define _3DNowOpcodePEXTRW 0xc5 -#define _3DNowOpcodePSHUFW 0x70 -#define _3DNowOpcodeMOVNTQ 0xe7 -#define _3DNowOpcodePREFETCHT 0x18 - - -#define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID) -#define PFACC(dst,src) InjK3DOps(dst, src, PFACC) -#define PFADD(dst,src) InjK3DOps(dst, src, PFADD) -#define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ) -#define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE) -#define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT) -#define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX) -#define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN) -#define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL) -#define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP) -#define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1) -#define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2) -#define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT) -#define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1) -#define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB) -#define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR) -#define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD) -#define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB) -#define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW) - -#define FEMMS \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0e \ -} - -#define PREFETCH(src) \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0d \ - _asm _emit (_K3D_##src & 0x07) \ -} - -/* Prefetch with a short offset, < 127 or > -127 - Carefull! Doesn't check for your offset being - in range. */ - -#define PREFETCHM(src,off) \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0d \ - _asm _emit (0x40 | (_K3D_##src & 0x07)) \ - _asm _emit off \ -} - -/* Prefetch with a long offset */ - -#define PREFETCHMLONG(src,off) \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0d \ - _asm _emit (0x80 | (_K3D_##src & 0x07)) \ - _asm _emit (off & 0x000000ff) \ - _asm _emit (off & 0x0000ff00) >> 8 \ - _asm _emit (off & 0x00ff0000) >> 16 \ - _asm _emit (off & 0xff000000) >> 24 \ -} - -#define PREFETCHW(src) \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0d \ - _asm _emit (0x08 | (_K3D_##src & 0x07)) \ -} - -#define PREFETCHWM(src,off) \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0d \ - _asm _emit 0x48 | (_K3D_##src & 0x07) \ - _asm _emit off \ -} - -#define PREFETCHWMLONG(src,off) \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0x0d \ - _asm _emit 0x88 | (_K3D_##src & 0x07) \ - _asm _emit (off & 0x000000ff) \ - _asm _emit (off & 0x0000ff00) >> 8 \ - _asm _emit (off & 0x00ff0000) >> 16 \ - _asm _emit (off & 0xff000000) >> 24 \ -} - -#define CPUID \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0xa2 \ -} - - -/* Defines for new, K7 opcodes */ -#define SFENCE \ -{ \ - _asm _emit 0x0f \ - _asm _emit 0xae \ - _asm _emit 0xf8 \ -} - -#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC) -#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC) -#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD) -#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB) -#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB) -#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW) -#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW) -#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW) -#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB) -#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW) -#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW) -#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB) -#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ) -#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk -#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk -#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk -#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ) -#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT) -#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT) -#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT) -#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT) - - -/* Memory/offset versions of the opcodes */ -#define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB) -#define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID) -#define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC) -#define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD) -#define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ) -#define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE) -#define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT) -#define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX) -#define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN) -#define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL) -#define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP) -#define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1) -#define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2) -#define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT) -#define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1) -#define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB) -#define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR) -#define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD) -#define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW) - - -/* Memory/offset versions of the K7 opcodes */ -#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC) -#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC) -#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD) -#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB) -#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB) -#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW) -#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW) -#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW) -#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB) -#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW) -#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW) -#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB) -#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ) -#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk -#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk -#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ) -#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT) -#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT) -#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT) -#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT) - - -#else - -/* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */ -#define PAVGUSB(dst,src) pavgusb dst,src -#define PF2ID(dst,src) pf2id dst,src -#define PFACC(dst,src) pfacc dst,src -#define PFADD(dst,src) pfadd dst,src -#define PFCMPEQ(dst,src) pfcmpeq dst,src -#define PFCMPGE(dst,src) pfcmpge dst,src -#define PFCMPGT(dst,src) pfcmpgt dst,src -#define PFMAX(dst,src) pfmax dst,src -#define PFMIN(dst,src) pfmin dst,src -#define PFMUL(dst,src) pfmul dst,src -#define PFRCP(dst,src) pfrcp dst,src -#define PFRCPIT1(dst,src) pfrcpit1 dst,src -#define PFRCPIT2(dst,src) pfrcpit2 dst,src -#define PFRSQRT(dst,src) pfrsqrt dst,src -#define PFRSQIT1(dst,src) pfrsqit1 dst,src -#define PFSUB(dst,src) pfsub dst,src -#define PFSUBR(dst,src) pfsubr dst,src -#define PI2FD(dst,src) pi2fd dst,src -#define PMULHRW(dst,src) pmulhrw dst,src -#define PREFETCH(src) prefetch src -#define PREFETCHW(src) prefetchw src - -#define PAVGUSBM(dst,src,off) pavgusb dst,[src+off] -#define PF2IDM(dst,src,off) PF2ID dst,[src+off] -#define PFACCM(dst,src,off) PFACC dst,[src+off] -#define PFADDM(dst,src,off) PFADD dst,[src+off] -#define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off] -#define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off] -#define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off] -#define PFMAXM(dst,src,off) PFMAX dst,[src+off] -#define PFMINM(dst,src,off) PFMIN dst,[src+off] -#define PFMULM(dst,src,off) PFMUL dst,[src+off] -#define PFRCPM(dst,src,off) PFRCP dst,[src+off] -#define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off] -#define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off] -#define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off] -#define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off] -#define PFSUBM(dst,src,off) PFSUB dst,[src+off] -#define PFSUBRM(dst,src,off) PFSUBR dst,[src+off] -#define PI2FDM(dst,src,off) PI2FD dst,[src+off] -#define PMULHRWM(dst,src,off) PMULHRW dst,[src+off] - - -#if defined (__MWERKS__) -// At the moment, CodeWarrior does not support these opcodes, so hand-assemble them - -// Defines for operands. -#define _K3D_MM0 0xc0 -#define _K3D_MM1 0xc1 -#define _K3D_MM2 0xc2 -#define _K3D_MM3 0xc3 -#define _K3D_MM4 0xc4 -#define _K3D_MM5 0xc5 -#define _K3D_MM6 0xc6 -#define _K3D_MM7 0xc7 -#define _K3D_mm0 0xc0 -#define _K3D_mm1 0xc1 -#define _K3D_mm2 0xc2 -#define _K3D_mm3 0xc3 -#define _K3D_mm4 0xc4 -#define _K3D_mm5 0xc5 -#define _K3D_mm6 0xc6 -#define _K3D_mm7 0xc7 -#define _K3D_EAX 0x00 -#define _K3D_ECX 0x01 -#define _K3D_EDX 0x02 -#define _K3D_EBX 0x03 -#define _K3D_ESI 0x06 -#define _K3D_EDI 0x07 -#define _K3D_eax 0x00 -#define _K3D_ecx 0x01 -#define _K3D_edx 0x02 -#define _K3D_ebx 0x03 -#define _K3D_esi 0x06 -#define _K3D_edi 0x07 -#define _K3D_EAX 0x00 -#define _K3D_ECX 0x01 -#define _K3D_EDX 0x02 -#define _K3D_EBX 0x03 -#define _K3D_ESI 0x06 -#define _K3D_EDI 0x07 -#define _K3D_eax 0x00 -#define _K3D_ecx 0x01 -#define _K3D_edx 0x02 -#define _K3D_ebx 0x03 -#define _K3D_esi 0x06 -#define _K3D_edi 0x07 - -#define InjK3DOps(dst,src,inst) \ - db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst - -#define InjK3DMOps(dst,src,off,inst) \ - db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst - -#define InjMMXOps(dst,src,inst) \ - db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src) - -#define InjMMXMOps(dst,src,off,inst) \ - db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off - -#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC) -#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC) -#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD) -#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB) -#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB) -#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW) -#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW) -#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW) -#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB) -#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW) -#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW) -#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB) -#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ) -#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk -#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk -#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk -#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ) -#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT) -#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT) -#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT) -#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT) - - -/* Memory/offset versions of the K7 opcodes */ -#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC) -#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC) -#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD) -#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB) -#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB) -#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW) -#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW) -#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW) -#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB) -#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW) -#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW) -#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB) -#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ) -#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk -#define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk -#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk -#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ) -#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT) -#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT) -#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT) -#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT) - - -#else - -#define PFNACC(dst,src) PFNACC dst,src -#define PFPNACC(dst,src) PFPNACC dst,src -#define PSWAPD(dst,src) PSWAPD dst,src -#define PMINUB(dst,src) PMINUB dst,src -#define PMAXUB(dst,src) PMAXUB dst,src -#define PMINSW(dst,src) PMINSW dst,src -#define PMAXSW(dst,src) PMAXSW dst,src -#define PMULHUW(dst,src) PMULHUW dst,src -#define PAVGB(dst,src) PAVGB dst,src -#define PAVGW(dst,src) PAVGW dst,src -#define PSADBW(dst,src) PSADBW dst,src -#define PMOVMSKB(dst,src) PMOVMSKB dst,src -#define PMASKMOVQ(dst,src) PMASKMOVQ dst,src -#define PINSRW(dst,src,msk) PINSRW dst,src,msk -#define PEXTRW(dst,src,msk) PEXTRW dst,src,msk -#define PSHUFW(dst,src,msk) PSHUFW dst,src,msk -#define MOVNTQ(dst,src) MOVNTQ dst,src - -#define PFNACCM(dst,src,off) PFNACC dst,[src+off] -#define PFPNACCM(dst,src,off) PFPNACC dst,[src+off] -#define PSWAPDM(dst,src,off) PSWAPD dst,[src+off] -#define PMINUBM(dst,src,off) PMINUB dst,[src+off] -#define PMAXUBM(dst,src,off) PMAXUB dst,[src+off] -#define PMINSWM(dst,src,off) PMINSW dst,[src+off] -#define PMAXSWM(dst,src,off) PMAXSW dst,[src+off] -#define PMULHUWM(dst,src,off) PMULHUW dst,[src+off] -#define PAVGBM(dst,src,off) PAVGB dst,[src+off] -#define PAVGWM(dst,src,off) PAVGW dst,[src+off] -#define PSADBWM(dst,src,off) PSADBW dst,[src+off] -#define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off] -#define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off] -#define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk -#define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk -#define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk -#define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off] - -#endif - -#endif - -/* Just to deal with lower case. */ -#define pf2id(dst,src) PF2ID(dst,src) -#define pfacc(dst,src) PFACC(dst,src) -#define pfadd(dst,src) PFADD(dst,src) -#define pfcmpeq(dst,src) PFCMPEQ(dst,src) -#define pfcmpge(dst,src) PFCMPGE(dst,src) -#define pfcmpgt(dst,src) PFCMPGT(dst,src) -#define pfmax(dst,src) PFMAX(dst,src) -#define pfmin(dst,src) PFMIN(dst,src) -#define pfmul(dst,src) PFMUL(dst,src) -#define pfrcp(dst,src) PFRCP(dst,src) -#define pfrcpit1(dst,src) PFRCPIT1(dst,src) -#define pfrcpit2(dst,src) PFRCPIT2(dst,src) -#define pfrsqrt(dst,src) PFRSQRT(dst,src) -#define pfrsqit1(dst,src) PFRSQIT1(dst,src) -#define pfsub(dst,src) PFSUB(dst,src) -#define pfsubr(dst,src) PFSUBR(dst,src) -#define pi2fd(dst,src) PI2FD(dst,src) -#define femms FEMMS -#define pavgusb(dst,src) PAVGUSB(dst,src) -#define pmulhrw(dst,src) PMULHRW(dst,src) -#define prefetch(src) PREFETCH(src) -#define prefetchw(src) PREFETCHW(src) - -#define prefetchm(src,off) PREFETCHM(src,off) -#define prefetchmlong(src,off) PREFETCHMLONG(src,off) -#define prefetchwm(src,off) PREFETCHWM(src,off) -#define prefetchwmlong(src,off) PREFETCHWMLONG(src,off) - -#define pfnacc(dst,src) PFNACC(dst,src) -#define pfpnacc(dst,src) PFPNACC(dst,src) -#define pswapd(dst,src) PSWAPD(dst,src) -#define pminub(dst,src) PMINUB(dst,src) -#define pmaxub(dst,src) PMAXUB(dst,src) -#define pminsw(dst,src) PMINSW(dst,src) -#define pmaxsw(dst,src) PMAXSW(dst,src) -#define pmulhuw(dst,src) PMULHUW(dst,src) -#define pavgb(dst,src) PAVGB(dst,src) -#define pavgw(dst,src) PAVGW(dst,src) -#define psadbw(dst,src) PSADBW(dst,src) -#define pmovmskb(dst,src) PMOVMSKB(dst,src) -#define pmaskmovq(dst,src) PMASKMOVQ(dst,src) -#define pinsrw(dst,src,msk) PINSRW(dst,src,msk) -#define pextrw(dst,src,msk) PEXTRW(dst,src,msk) -#define pshufw(dst,src,msk) PSHUFW(dst,src,msk) -#define movntq(dst,src) MOVNTQ(dst,src) -#define prefetchnta(mem) PREFETCHNTA(mem) -#define prefetcht0(mem) PREFETCHT0(mem) -#define prefetcht1(mem) PREFETCHT1(mem) -#define prefetcht2(mem) PREFETCHT2(mem) - - -#define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off) -#define pf2idm(dst,src,off) PF2IDM(dst,src,off) -#define pfaccm(dst,src,off) PFACCM(dst,src,off) -#define pfaddm(dst,src,off) PFADDM(dst,src,off) -#define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off) -#define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off) -#define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off) -#define pfmaxm(dst,src,off) PFMAXM(dst,src,off) -#define pfminm(dst,src,off) PFMINM(dst,src,off) -#define pfmulm(dst,src,off) PFMULM(dst,src,off) -#define pfrcpm(dst,src,off) PFRCPM(dst,src,off) -#define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off) -#define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off) -#define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off) -#define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off) -#define pfsubm(dst,src,off) PFSUBM(dst,src,off) -#define pfsubrm(dst,src,off) PFSUBRM(dst,src,off) -#define pi2fdm(dst,src,off) PI2FDM(dst,src,off) -#define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off) -#define cpuid CPUID -#define sfence SFENCE - -#define pfnaccm(dst,src,off) PFNACCM(dst,src,off) -#define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off) -#define pswapdm(dst,src,off) PSWAPDM(dst,src,off) -#define pminubm(dst,src,off) PMINUBM(dst,src,off) -#define pmaxubm(dst,src,off) PMAXUBM(dst,src,off) -#define pminswm(dst,src,off) PMINSWM(dst,src,off) -#define pmaxswm(dst,src,off) PMAXSWM(dst,src,off) -#define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off) -#define pavgbm(dst,src,off) PAVGBM(dst,src,off) -#define pavgwm(dst,src,off) PAVGWM(dst,src,off) -#define psadbwm(dst,src,off) PSADBWM(dst,src,off) -#define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off) -#define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off) -#define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk) -#define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk) -#define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk) -#define movntqm(dst,src,off) MOVNTQM(dst,src,off) -#define prefetchntam(mem,off) PREFETCHNTA(mem,off) -#define prefetcht0m(mem,off) PREFETCHT0(mem,off) -#define prefetcht1m(mem,off) PREFETCHT1(mem,off) -#define prefetcht2m(mem,off) PREFETCHT2(mem,off) - -#endif +//========= Copyright Valve Corporation, All rights reserved. ============// +/****************************************************************************** + + Copyright (c) 1999 Advanced Micro Devices, Inc. + + LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY + EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY, + NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY + PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY + DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS, + BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR + INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY + OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION + OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY + NOT APPLY TO YOU. + + AMD does not assume any responsibility for any errors which may appear in the + Materials nor any responsibility to support or update the Materials. AMD retains + the right to make changes to its test specifications at any time, without notice. + + NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any + further information, software, technical information, know-how, or show-how + available to you. + + So that all may benefit from your experience, please report any problems + or suggestions about this software to 3dsdk.support@amd.com + + AMD Developer Technologies, M/S 585 + Advanced Micro Devices, Inc. + 5900 E. Ben White Blvd. + Austin, TX 78741 + 3dsdk.support@amd.com + +******************************************************************************* + + AMD3DX.H + + MACRO FORMAT + ============ + This file contains inline assembly macros that + generate AMD-3D instructions in binary format. + Therefore, C or C++ programmer can use AMD-3D instructions + without any penalty in their C or C++ source code. + + The macro's name and format conventions are as follow: + + + 1. First argument of macro is a destination and + second argument is a source operand. + ex) _asm PFCMPEQ (mm3, mm4) + | | + dst src + + 2. The destination operand can be m0 to m7 only. + The source operand can be any one of the register + m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi + that contains effective address. + ex) _asm PFRCP (MM7, MM6) + ex) _asm PFRCPIT2 (mm0, mm4) + ex) _asm PFMUL (mm3, _edi) + + 3. The prefetch(w) takes one src operand _eax, ecx, _edx, + _ebx, _esi, or _edi that contains effective address. + ex) _asm PREFETCH (_edi) + + For WATCOM C/C++ users, when using #pragma aux instead if + _asm, all macro names should be prefixed by a p_ or P_. + Macros should not be enclosed in quotes. + ex) p_pfrcp (MM7,MM6) + + NOTE: Not all instruction macros, nor all possible + combinations of operands have been explicitely + tested. If any errors are found, please report + them. + + EXAMPLE + ======= + Following program doesn't do anything but it shows you + how to use inline assembly AMD-3D instructions in C. + Note that this will only work in flat memory model which + segment registers cs, ds, ss and es point to the same + linear address space total less than 4GB. + + Used Microsoft VC++ 5.0 + + #include + #include "amd3d.h" + + void main () + { + float x = (float)1.25; + float y = (float)1.25; + float z, zz; + + _asm { + movd mm1, x + movd mm2, y + pfmul (mm1, mm2) + movd z, mm1 + femms + } + + printf ("value of z = %f\n", z); + + // + // Demonstration of using the memory instead of + // multimedia register + // + _asm { + movd mm3, x + lea esi, y // load effective address of y + pfmul (mm3, _esi) + movd zz, mm3 + femms + } + + printf ("value of zz = %f\n", zz); + } + + #pragma aux EXAMPLE with WATCOM C/C++ v11.x + =========================================== + + extern void Add(float *__Dest, float *__A, float *__B); + #pragma aux Add = \ + p_femms \ + "movd mm6,[esi]" \ + p_pfadd(mm6,_edi) \ + "movd [ebx],mm6" \ + p_femms \ + parm [ebx] [esi] [edi]; + +*******************************************************************************/ + +#ifndef _K3DMACROSINCLUDED_ +#define _K3DMACROSINCLUDED_ + +#if defined (__WATCOMC__) + +// The WATCOM C/C++ version of the 3DNow! macros. +// +// The older, compbined register style for WATCOM C/C++ macros is not +// supported. + +/* Operand defines for instructions two operands */ +#define _k3d_mm0_mm0 0xc0 +#define _k3d_mm0_mm1 0xc1 +#define _k3d_mm0_mm2 0xc2 +#define _k3d_mm0_mm3 0xc3 +#define _k3d_mm0_mm4 0xc4 +#define _k3d_mm0_mm5 0xc5 +#define _k3d_mm0_mm6 0xc6 +#define _k3d_mm0_mm7 0xc7 +#define _k3d_mm0_eax 0x00 +#define _k3d_mm0_ecx 0x01 +#define _k3d_mm0_edx 0x02 +#define _k3d_mm0_ebx 0x03 +#define _k3d_mm0_esi 0x06 +#define _k3d_mm0_edi 0x07 +#define _k3d_mm1_mm0 0xc8 +#define _k3d_mm1_mm1 0xc9 +#define _k3d_mm1_mm2 0xca +#define _k3d_mm1_mm3 0xcb +#define _k3d_mm1_mm4 0xcc +#define _k3d_mm1_mm5 0xcd +#define _k3d_mm1_mm6 0xce +#define _k3d_mm1_mm7 0xcf +#define _k3d_mm1_eax 0x08 +#define _k3d_mm1_ecx 0x09 +#define _k3d_mm1_edx 0x0a +#define _k3d_mm1_ebx 0x0b +#define _k3d_mm1_esi 0x0e +#define _k3d_mm1_edi 0x0f +#define _k3d_mm2_mm0 0xd0 +#define _k3d_mm2_mm1 0xd1 +#define _k3d_mm2_mm2 0xd2 +#define _k3d_mm2_mm3 0xd3 +#define _k3d_mm2_mm4 0xd4 +#define _k3d_mm2_mm5 0xd5 +#define _k3d_mm2_mm6 0xd6 +#define _k3d_mm2_mm7 0xd7 +#define _k3d_mm2_eax 0x10 +#define _k3d_mm2_ecx 0x11 +#define _k3d_mm2_edx 0x12 +#define _k3d_mm2_ebx 0x13 +#define _k3d_mm2_esi 0x16 +#define _k3d_mm2_edi 0x17 +#define _k3d_mm3_mm0 0xd8 +#define _k3d_mm3_mm1 0xd9 +#define _k3d_mm3_mm2 0xda +#define _k3d_mm3_mm3 0xdb +#define _k3d_mm3_mm4 0xdc +#define _k3d_mm3_mm5 0xdd +#define _k3d_mm3_mm6 0xde +#define _k3d_mm3_mm7 0xdf +#define _k3d_mm3_eax 0x18 +#define _k3d_mm3_ecx 0x19 +#define _k3d_mm3_edx 0x1a +#define _k3d_mm3_ebx 0x1b +#define _k3d_mm3_esi 0x1e +#define _k3d_mm3_edi 0x1f +#define _k3d_mm4_mm0 0xe0 +#define _k3d_mm4_mm1 0xe1 +#define _k3d_mm4_mm2 0xe2 +#define _k3d_mm4_mm3 0xe3 +#define _k3d_mm4_mm4 0xe4 +#define _k3d_mm4_mm5 0xe5 +#define _k3d_mm4_mm6 0xe6 +#define _k3d_mm4_mm7 0xe7 +#define _k3d_mm4_eax 0x20 +#define _k3d_mm4_ecx 0x21 +#define _k3d_mm4_edx 0x22 +#define _k3d_mm4_ebx 0x23 +#define _k3d_mm4_esi 0x26 +#define _k3d_mm4_edi 0x27 +#define _k3d_mm5_mm0 0xe8 +#define _k3d_mm5_mm1 0xe9 +#define _k3d_mm5_mm2 0xea +#define _k3d_mm5_mm3 0xeb +#define _k3d_mm5_mm4 0xec +#define _k3d_mm5_mm5 0xed +#define _k3d_mm5_mm6 0xee +#define _k3d_mm5_mm7 0xef +#define _k3d_mm5_eax 0x28 +#define _k3d_mm5_ecx 0x29 +#define _k3d_mm5_edx 0x2a +#define _k3d_mm5_ebx 0x2b +#define _k3d_mm5_esi 0x2e +#define _k3d_mm5_edi 0x2f +#define _k3d_mm6_mm0 0xf0 +#define _k3d_mm6_mm1 0xf1 +#define _k3d_mm6_mm2 0xf2 +#define _k3d_mm6_mm3 0xf3 +#define _k3d_mm6_mm4 0xf4 +#define _k3d_mm6_mm5 0xf5 +#define _k3d_mm6_mm6 0xf6 +#define _k3d_mm6_mm7 0xf7 +#define _k3d_mm6_eax 0x30 +#define _k3d_mm6_ecx 0x31 +#define _k3d_mm6_edx 0x32 +#define _k3d_mm6_ebx 0x33 +#define _k3d_mm6_esi 0x36 +#define _k3d_mm6_edi 0x37 +#define _k3d_mm7_mm0 0xf8 +#define _k3d_mm7_mm1 0xf9 +#define _k3d_mm7_mm2 0xfa +#define _k3d_mm7_mm3 0xfb +#define _k3d_mm7_mm4 0xfc +#define _k3d_mm7_mm5 0xfd +#define _k3d_mm7_mm6 0xfe +#define _k3d_mm7_mm7 0xff +#define _k3d_mm7_eax 0x38 +#define _k3d_mm7_ecx 0x39 +#define _k3d_mm7_edx 0x3a +#define _k3d_mm7_ebx 0x3b +#define _k3d_mm7_esi 0x3e +#define _k3d_mm7_edi 0x3f + +#define _k3d_name_xlat_m0 _mm0 +#define _k3d_name_xlat_m1 _mm1 +#define _k3d_name_xlat_m2 _mm2 +#define _k3d_name_xlat_m3 _mm3 +#define _k3d_name_xlat_m4 _mm4 +#define _k3d_name_xlat_m5 _mm5 +#define _k3d_name_xlat_m6 _mm6 +#define _k3d_name_xlat_m7 _mm7 +#define _k3d_name_xlat_M0 _mm0 +#define _k3d_name_xlat_M1 _mm1 +#define _k3d_name_xlat_M2 _mm2 +#define _k3d_name_xlat_M3 _mm3 +#define _k3d_name_xlat_M4 _mm4 +#define _k3d_name_xlat_M5 _mm5 +#define _k3d_name_xlat_M6 _mm6 +#define _k3d_name_xlat_M7 _mm7 +#define _k3d_name_xlat_mm0 _mm0 +#define _k3d_name_xlat_mm1 _mm1 +#define _k3d_name_xlat_mm2 _mm2 +#define _k3d_name_xlat_mm3 _mm3 +#define _k3d_name_xlat_mm4 _mm4 +#define _k3d_name_xlat_mm5 _mm5 +#define _k3d_name_xlat_mm6 _mm6 +#define _k3d_name_xlat_mm7 _mm7 +#define _k3d_name_xlat_MM0 _mm0 +#define _k3d_name_xlat_MM1 _mm1 +#define _k3d_name_xlat_MM2 _mm2 +#define _k3d_name_xlat_MM3 _mm3 +#define _k3d_name_xlat_MM4 _mm4 +#define _k3d_name_xlat_MM5 _mm5 +#define _k3d_name_xlat_MM6 _mm6 +#define _k3d_name_xlat_MM7 _mm7 +#define _k3d_name_xlat_eax _eax +#define _k3d_name_xlat_ebx _ebx +#define _k3d_name_xlat_ecx _ecx +#define _k3d_name_xlat_edx _edx +#define _k3d_name_xlat_esi _esi +#define _k3d_name_xlat_edi _edi +#define _k3d_name_xlat_ebp _ebp +#define _k3d_name_xlat_EAX _eax +#define _k3d_name_xlat_EBX _ebx +#define _k3d_name_xlat_ECX _ecx +#define _k3d_name_xlat_EDX _edx +#define _k3d_name_xlat_ESI _esi +#define _k3d_name_xlat_EDI _edi +#define _k3d_name_xlat_EBP _ebp +#define _k3d_name_xlat__eax _eax +#define _k3d_name_xlat__ebx _ebx +#define _k3d_name_xlat__ecx _ecx +#define _k3d_name_xlat__edx _edx +#define _k3d_name_xlat__esi _esi +#define _k3d_name_xlat__edi _edi +#define _k3d_name_xlat__ebp _ebp +#define _k3d_name_xlat__EAX _eax +#define _k3d_name_xlat__EBX _ebx +#define _k3d_name_xlat__ECX _ecx +#define _k3d_name_xlat__EDX _edx +#define _k3d_name_xlat__ESI _esi +#define _k3d_name_xlat__EDI _edi +#define _k3d_name_xlat__EBP _ebp + +#define _k3d_xglue3(a,b,c) a##b##c +#define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c) +#define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src) + +/* Operand defines for prefetch and prefetchw */ + +#define _k3d_pref_eax 0x00 +#define _k3d_pref_ecx 0x01 +#define _k3d_pref_edx 0x02 +#define _k3d_pref_ebx 0x03 +#define _k3d_pref_esi 0x06 +#define _k3d_pref_edi 0x07 +#define _k3d_pref_EAX 0x00 +#define _k3d_pref_ECX 0x01 +#define _k3d_pref_EDX 0x02 +#define _k3d_pref_EBX 0x03 +#define _k3d_pref_ESI 0x06 +#define _k3d_pref_EDI 0x07 +#define _k3d_prefw_eax 0x08 +#define _k3d_prefw_ecx 0x09 +#define _k3d_prefw_edx 0x0A +#define _k3d_prefw_ebx 0x0B +#define _k3d_prefw_esi 0x0E +#define _k3d_prefw_edi 0x0F +#define _k3d_prefw_EAX 0x08 +#define _k3d_prefw_ECX 0x09 +#define _k3d_prefw_EDX 0x0A +#define _k3d_prefw_EBX 0x0B +#define _k3d_prefw_ESI 0x0E +#define _k3d_prefw_EDI 0x0F + +/* Defines for 3DNow! instructions */ +#define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d +#define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae +#define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e +#define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0 +#define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90 +#define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0 +#define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4 +#define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94 +#define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4 +#define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96 +#define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6 +#define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6 +#define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97 +#define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7 +#define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a +#define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa +#define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d +#define FEMMS db 0x0f, 0x0e +#define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf +#define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7 +#define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src +#define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src +#define CPUID db 0x0f, 0xa2 + +/* Defines for new, K7 opcodes */ +#define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a +#define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e +#define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb +#define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src) +#define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src) +#define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src) +#define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src) +#define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src) +#define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src) +#define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src) +#define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src) +#define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src) +#define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src) +#define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk +#define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk +#define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk +#define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst) +#define SFENCE db 0x0f, 0xae, 0xf8 + +/* Memory/offset versions of the opcodes */ +#define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d +#define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae +#define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e +#define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0 +#define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90 +#define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0 +#define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4 +#define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94 +#define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4 +#define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96 +#define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6 +#define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6 +#define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97 +#define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7 +#define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a +#define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa +#define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d +#define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf +#define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7 + + +/* Memory/offset versions of the new, K7 opcodes */ +#define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a +#define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e +#define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb +#define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off +#define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off +#define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off +#define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off +#define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off +#define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off +#define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off +#define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off +#define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off +#define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off +#define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off +#define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk +#define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk + + +/* Defines for 3DNow! instructions for use in pragmas */ +#define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d +#define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae +#define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e +#define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0 +#define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90 +#define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0 +#define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4 +#define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94 +#define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4 +#define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96 +#define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6 +#define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6 +#define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97 +#define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7 +#define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a +#define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa +#define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d +#define p_femms 0x0f 0x0e +#define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf +#define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7 +#define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src +#define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src +#define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a +#define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e +#define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb +#define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off + +#define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d +#define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae +#define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e +#define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0 +#define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90 +#define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0 +#define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4 +#define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94 +#define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4 +#define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96 +#define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6 +#define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6 +#define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97 +#define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7 +#define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a +#define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa +#define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d +#define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf +#define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7 +#define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a +#define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e +#define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb +#define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off +#define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk + + +#define P_PF2ID(dst,src) p_pf2id(dst,src) +#define P_PFACC(dst,src) p_pfacc(dst,src) +#define P_PFADD(dst,src) p_pfadd(dst,src) +#define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src) +#define P_PFCMPGE(dst,src) p_pfcmpge(dst,src) +#define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src) +#define P_PFMAX(dst,src) p_pfmax(dst,src) +#define P_PFMIN(dst,src) p_pfmin(dst,src) +#define P_PFMUL(dst,src) p_pfmul(dst,src) +#define P_PFRCP(dst,src) p_pfrcp(dst,src) +#define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src) +#define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src) +#define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src) +#define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src) +#define P_PFSUB(dst,src) p_pfsub(dst,src) +#define P_PFSUBR(dst,src) p_pfsubr(dst,src) +#define P_PI2FD(dst,src) p_pi2fd(dst,src) +#define P_FEMMS p_femms +#define P_PAVGUSB(dst,src) p_pavgusb(dst,src) +#define P_PMULHRW(dst,src) p_pmulhrw(dst,src) +#define P_PREFETCH(src) p_prefetch(src) +#define P_PREFETCHW(src) p_prefetchw(src) +#define p_CPUID 0x0f 0xa2 +#define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off) +#define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off) +#define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off) +#define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off) +#define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off) +#define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off) +#define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off) +#define p_pfminm(dst,src,off) P_PFMINM(dst,src,off) +#define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off) +#define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off) +#define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off) +#define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off) +#define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off) +#define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off) +#define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off) +#define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off) +#define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off) +#define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off) +#define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off) + +#define P_PFNACC(dst,src) p_pfnacc(dst,src) +#define P_FPPNACC(dst,src) p_pfpnacc(dst,src) +#define P_PSWAPD(dst,src) p_pswapd(dst,src) +#define P_PMINUB(dst,src) p_pminub(dst,src) +#define P_PMAXUB(dst,src) p_pmaxub(dst,src) +#define P_PMINSW(dst,src) p_pminsw(dst,src) +#define P_PMAXSW(dst,src) p_pmaxsw(dst,src) +#define P_PMULHUW(dst,src) p_pmulhuw(dst,src) +#define P_PAVGB(dst,src) p_pavgb(dst,src) +#define P_PAVGW(dst,src) p_avgw(dst,src) +#define P_PSADBW(dst,src) p_psadbw(dst,src) +#define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src) +#define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src) +#define P_PINSRW(dst,src,msk) p_pinsrw(dst,src) +#define P_PEXTRW(dst,src,msk) p_pextrw(dst,src) +#define P_PSHUFW(dst,src,msk) p_pshufw(dst,src) +#define P_MOVNTQ(dst,src) p_movntq(dst,src) + +#define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off) +#define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off) +#define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off) +#define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off) +#define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off) +#define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off) +#define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off) +#define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off) +#define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off) +#define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off) +#define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off) +#define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off) +#define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off) +#define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk) +#define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk) +#define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off) + +#elif defined (_MSC_VER) && !defined (__MWERKS__) +// The Microsoft Visual C++ version of the 3DNow! macros. + +// Stop the "no EMMS" warning, since it doesn't detect FEMMS properly +#pragma warning(disable:4799) + +// Defines for operands. +#define _K3D_MM0 0xc0 +#define _K3D_MM1 0xc1 +#define _K3D_MM2 0xc2 +#define _K3D_MM3 0xc3 +#define _K3D_MM4 0xc4 +#define _K3D_MM5 0xc5 +#define _K3D_MM6 0xc6 +#define _K3D_MM7 0xc7 +#define _K3D_mm0 0xc0 +#define _K3D_mm1 0xc1 +#define _K3D_mm2 0xc2 +#define _K3D_mm3 0xc3 +#define _K3D_mm4 0xc4 +#define _K3D_mm5 0xc5 +#define _K3D_mm6 0xc6 +#define _K3D_mm7 0xc7 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 + +// These defines are for compatibility with the previous version of the header file. +#define _K3D_M0 0xc0 +#define _K3D_M1 0xc1 +#define _K3D_M2 0xc2 +#define _K3D_M3 0xc3 +#define _K3D_M4 0xc4 +#define _K3D_M5 0xc5 +#define _K3D_M6 0xc6 +#define _K3D_M7 0xc7 +#define _K3D_m0 0xc0 +#define _K3D_m1 0xc1 +#define _K3D_m2 0xc2 +#define _K3D_m3 0xc3 +#define _K3D_m4 0xc4 +#define _K3D_m5 0xc5 +#define _K3D_m6 0xc6 +#define _K3D_m7 0xc7 +#define _K3D__EAX 0x00 +#define _K3D__ECX 0x01 +#define _K3D__EDX 0x02 +#define _K3D__EBX 0x03 +#define _K3D__ESI 0x06 +#define _K3D__EDI 0x07 +#define _K3D__eax 0x00 +#define _K3D__ecx 0x01 +#define _K3D__edx 0x02 +#define _K3D__ebx 0x03 +#define _K3D__esi 0x06 +#define _K3D__edi 0x07 + +// General 3DNow! instruction format that is supported by +// these macros. Note that only the most basic form of memory +// operands are supported by these macros. + +#define InjK3DOps(dst,src,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0f \ + _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \ + _asm _emit _3DNowOpcode##inst \ +} + +#define InjK3DMOps(dst,src,off,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0f \ + _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \ + _asm _emit off \ + _asm _emit _3DNowOpcode##inst \ +} + +#define InjMMXOps(dst,src,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit _3DNowOpcode##inst \ + _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \ +} + +#define InjMMXMOps(dst,src,off,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit _3DNowOpcode##inst \ + _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \ + _asm _emit off \ +} + +#define _3DNowOpcodePF2ID 0x1d +#define _3DNowOpcodePFACC 0xae +#define _3DNowOpcodePFADD 0x9e +#define _3DNowOpcodePFCMPEQ 0xb0 +#define _3DNowOpcodePFCMPGE 0x90 +#define _3DNowOpcodePFCMPGT 0xa0 +#define _3DNowOpcodePFMAX 0xa4 +#define _3DNowOpcodePFMIN 0x94 +#define _3DNowOpcodePFMUL 0xb4 +#define _3DNowOpcodePFRCP 0x96 +#define _3DNowOpcodePFRCPIT1 0xa6 +#define _3DNowOpcodePFRCPIT2 0xb6 +#define _3DNowOpcodePFRSQRT 0x97 +#define _3DNowOpcodePFRSQIT1 0xa7 +#define _3DNowOpcodePFSUB 0x9a +#define _3DNowOpcodePFSUBR 0xaa +#define _3DNowOpcodePI2FD 0x0d +#define _3DNowOpcodePAVGUSB 0xbf +#define _3DNowOpcodePMULHRW 0xb7 +#define _3DNowOpcodePFNACC 0x8a +#define _3DNowOpcodeFPPNACC 0x8e +#define _3DNowOpcodePSWAPD 0xbb +#define _3DNowOpcodePMINUB 0xda +#define _3DNowOpcodePMAXUB 0xde +#define _3DNowOpcodePMINSW 0xea +#define _3DNowOpcodePMAXSW 0xee +#define _3DNowOpcodePMULHUW 0xe4 +#define _3DNowOpcodePAVGB 0xe0 +#define _3DNowOpcodePAVGW 0xe3 +#define _3DNowOpcodePSADBW 0xf6 +#define _3DNowOpcodePMOVMSKB 0xd7 +#define _3DNowOpcodePMASKMOVQ 0xf7 +#define _3DNowOpcodePINSRW 0xc4 +#define _3DNowOpcodePEXTRW 0xc5 +#define _3DNowOpcodePSHUFW 0x70 +#define _3DNowOpcodeMOVNTQ 0xe7 +#define _3DNowOpcodePREFETCHT 0x18 + + +#define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID) +#define PFACC(dst,src) InjK3DOps(dst, src, PFACC) +#define PFADD(dst,src) InjK3DOps(dst, src, PFADD) +#define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ) +#define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE) +#define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT) +#define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX) +#define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN) +#define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL) +#define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP) +#define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1) +#define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2) +#define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT) +#define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1) +#define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB) +#define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR) +#define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD) +#define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB) +#define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW) + +#define FEMMS \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0e \ +} + +#define PREFETCH(src) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (_K3D_##src & 0x07) \ +} + +/* Prefetch with a short offset, < 127 or > -127 + Carefull! Doesn't check for your offset being + in range. */ + +#define PREFETCHM(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x40 | (_K3D_##src & 0x07)) \ + _asm _emit off \ +} + +/* Prefetch with a long offset */ + +#define PREFETCHMLONG(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x80 | (_K3D_##src & 0x07)) \ + _asm _emit (off & 0x000000ff) \ + _asm _emit (off & 0x0000ff00) >> 8 \ + _asm _emit (off & 0x00ff0000) >> 16 \ + _asm _emit (off & 0xff000000) >> 24 \ +} + +#define PREFETCHW(src) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x08 | (_K3D_##src & 0x07)) \ +} + +#define PREFETCHWM(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit 0x48 | (_K3D_##src & 0x07) \ + _asm _emit off \ +} + +#define PREFETCHWMLONG(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit 0x88 | (_K3D_##src & 0x07) \ + _asm _emit (off & 0x000000ff) \ + _asm _emit (off & 0x0000ff00) >> 8 \ + _asm _emit (off & 0x00ff0000) >> 16 \ + _asm _emit (off & 0xff000000) >> 24 \ +} + +#define CPUID \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0xa2 \ +} + + +/* Defines for new, K7 opcodes */ +#define SFENCE \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0xae \ + _asm _emit 0xf8 \ +} + +#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC) +#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC) +#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD) +#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB) +#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB) +#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW) +#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW) +#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW) +#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB) +#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW) +#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW) +#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB) +#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ) +#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk +#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk +#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk +#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ) +#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT) +#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT) +#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT) +#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT) + + +/* Memory/offset versions of the opcodes */ +#define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB) +#define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID) +#define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC) +#define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD) +#define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ) +#define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE) +#define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT) +#define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX) +#define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN) +#define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL) +#define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP) +#define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1) +#define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2) +#define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT) +#define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1) +#define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB) +#define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR) +#define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD) +#define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW) + + +/* Memory/offset versions of the K7 opcodes */ +#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC) +#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC) +#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD) +#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB) +#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB) +#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW) +#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW) +#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW) +#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB) +#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW) +#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW) +#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB) +#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ) +#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk +#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk +#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ) +#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT) +#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT) +#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT) +#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT) + + +#else + +/* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */ +#define PAVGUSB(dst,src) pavgusb dst,src +#define PF2ID(dst,src) pf2id dst,src +#define PFACC(dst,src) pfacc dst,src +#define PFADD(dst,src) pfadd dst,src +#define PFCMPEQ(dst,src) pfcmpeq dst,src +#define PFCMPGE(dst,src) pfcmpge dst,src +#define PFCMPGT(dst,src) pfcmpgt dst,src +#define PFMAX(dst,src) pfmax dst,src +#define PFMIN(dst,src) pfmin dst,src +#define PFMUL(dst,src) pfmul dst,src +#define PFRCP(dst,src) pfrcp dst,src +#define PFRCPIT1(dst,src) pfrcpit1 dst,src +#define PFRCPIT2(dst,src) pfrcpit2 dst,src +#define PFRSQRT(dst,src) pfrsqrt dst,src +#define PFRSQIT1(dst,src) pfrsqit1 dst,src +#define PFSUB(dst,src) pfsub dst,src +#define PFSUBR(dst,src) pfsubr dst,src +#define PI2FD(dst,src) pi2fd dst,src +#define PMULHRW(dst,src) pmulhrw dst,src +#define PREFETCH(src) prefetch src +#define PREFETCHW(src) prefetchw src + +#define PAVGUSBM(dst,src,off) pavgusb dst,[src+off] +#define PF2IDM(dst,src,off) PF2ID dst,[src+off] +#define PFACCM(dst,src,off) PFACC dst,[src+off] +#define PFADDM(dst,src,off) PFADD dst,[src+off] +#define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off] +#define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off] +#define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off] +#define PFMAXM(dst,src,off) PFMAX dst,[src+off] +#define PFMINM(dst,src,off) PFMIN dst,[src+off] +#define PFMULM(dst,src,off) PFMUL dst,[src+off] +#define PFRCPM(dst,src,off) PFRCP dst,[src+off] +#define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off] +#define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off] +#define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off] +#define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off] +#define PFSUBM(dst,src,off) PFSUB dst,[src+off] +#define PFSUBRM(dst,src,off) PFSUBR dst,[src+off] +#define PI2FDM(dst,src,off) PI2FD dst,[src+off] +#define PMULHRWM(dst,src,off) PMULHRW dst,[src+off] + + +#if defined (__MWERKS__) +// At the moment, CodeWarrior does not support these opcodes, so hand-assemble them + +// Defines for operands. +#define _K3D_MM0 0xc0 +#define _K3D_MM1 0xc1 +#define _K3D_MM2 0xc2 +#define _K3D_MM3 0xc3 +#define _K3D_MM4 0xc4 +#define _K3D_MM5 0xc5 +#define _K3D_MM6 0xc6 +#define _K3D_MM7 0xc7 +#define _K3D_mm0 0xc0 +#define _K3D_mm1 0xc1 +#define _K3D_mm2 0xc2 +#define _K3D_mm3 0xc3 +#define _K3D_mm4 0xc4 +#define _K3D_mm5 0xc5 +#define _K3D_mm6 0xc6 +#define _K3D_mm7 0xc7 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 + +#define InjK3DOps(dst,src,inst) \ + db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst + +#define InjK3DMOps(dst,src,off,inst) \ + db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst + +#define InjMMXOps(dst,src,inst) \ + db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src) + +#define InjMMXMOps(dst,src,off,inst) \ + db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off + +#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC) +#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC) +#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD) +#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB) +#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB) +#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW) +#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW) +#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW) +#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB) +#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW) +#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW) +#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB) +#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ) +#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk +#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk +#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk +#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ) +#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT) +#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT) +#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT) +#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT) + + +/* Memory/offset versions of the K7 opcodes */ +#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC) +#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC) +#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD) +#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB) +#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB) +#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW) +#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW) +#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW) +#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB) +#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW) +#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW) +#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB) +#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ) +#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk +#define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk +#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk +#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ) +#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT) +#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT) +#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT) +#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT) + + +#else + +#define PFNACC(dst,src) PFNACC dst,src +#define PFPNACC(dst,src) PFPNACC dst,src +#define PSWAPD(dst,src) PSWAPD dst,src +#define PMINUB(dst,src) PMINUB dst,src +#define PMAXUB(dst,src) PMAXUB dst,src +#define PMINSW(dst,src) PMINSW dst,src +#define PMAXSW(dst,src) PMAXSW dst,src +#define PMULHUW(dst,src) PMULHUW dst,src +#define PAVGB(dst,src) PAVGB dst,src +#define PAVGW(dst,src) PAVGW dst,src +#define PSADBW(dst,src) PSADBW dst,src +#define PMOVMSKB(dst,src) PMOVMSKB dst,src +#define PMASKMOVQ(dst,src) PMASKMOVQ dst,src +#define PINSRW(dst,src,msk) PINSRW dst,src,msk +#define PEXTRW(dst,src,msk) PEXTRW dst,src,msk +#define PSHUFW(dst,src,msk) PSHUFW dst,src,msk +#define MOVNTQ(dst,src) MOVNTQ dst,src + +#define PFNACCM(dst,src,off) PFNACC dst,[src+off] +#define PFPNACCM(dst,src,off) PFPNACC dst,[src+off] +#define PSWAPDM(dst,src,off) PSWAPD dst,[src+off] +#define PMINUBM(dst,src,off) PMINUB dst,[src+off] +#define PMAXUBM(dst,src,off) PMAXUB dst,[src+off] +#define PMINSWM(dst,src,off) PMINSW dst,[src+off] +#define PMAXSWM(dst,src,off) PMAXSW dst,[src+off] +#define PMULHUWM(dst,src,off) PMULHUW dst,[src+off] +#define PAVGBM(dst,src,off) PAVGB dst,[src+off] +#define PAVGWM(dst,src,off) PAVGW dst,[src+off] +#define PSADBWM(dst,src,off) PSADBW dst,[src+off] +#define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off] +#define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off] +#define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk +#define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk +#define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk +#define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off] + +#endif + +#endif + +/* Just to deal with lower case. */ +#define pf2id(dst,src) PF2ID(dst,src) +#define pfacc(dst,src) PFACC(dst,src) +#define pfadd(dst,src) PFADD(dst,src) +#define pfcmpeq(dst,src) PFCMPEQ(dst,src) +#define pfcmpge(dst,src) PFCMPGE(dst,src) +#define pfcmpgt(dst,src) PFCMPGT(dst,src) +#define pfmax(dst,src) PFMAX(dst,src) +#define pfmin(dst,src) PFMIN(dst,src) +#define pfmul(dst,src) PFMUL(dst,src) +#define pfrcp(dst,src) PFRCP(dst,src) +#define pfrcpit1(dst,src) PFRCPIT1(dst,src) +#define pfrcpit2(dst,src) PFRCPIT2(dst,src) +#define pfrsqrt(dst,src) PFRSQRT(dst,src) +#define pfrsqit1(dst,src) PFRSQIT1(dst,src) +#define pfsub(dst,src) PFSUB(dst,src) +#define pfsubr(dst,src) PFSUBR(dst,src) +#define pi2fd(dst,src) PI2FD(dst,src) +#define femms FEMMS +#define pavgusb(dst,src) PAVGUSB(dst,src) +#define pmulhrw(dst,src) PMULHRW(dst,src) +#define prefetch(src) PREFETCH(src) +#define prefetchw(src) PREFETCHW(src) + +#define prefetchm(src,off) PREFETCHM(src,off) +#define prefetchmlong(src,off) PREFETCHMLONG(src,off) +#define prefetchwm(src,off) PREFETCHWM(src,off) +#define prefetchwmlong(src,off) PREFETCHWMLONG(src,off) + +#define pfnacc(dst,src) PFNACC(dst,src) +#define pfpnacc(dst,src) PFPNACC(dst,src) +#define pswapd(dst,src) PSWAPD(dst,src) +#define pminub(dst,src) PMINUB(dst,src) +#define pmaxub(dst,src) PMAXUB(dst,src) +#define pminsw(dst,src) PMINSW(dst,src) +#define pmaxsw(dst,src) PMAXSW(dst,src) +#define pmulhuw(dst,src) PMULHUW(dst,src) +#define pavgb(dst,src) PAVGB(dst,src) +#define pavgw(dst,src) PAVGW(dst,src) +#define psadbw(dst,src) PSADBW(dst,src) +#define pmovmskb(dst,src) PMOVMSKB(dst,src) +#define pmaskmovq(dst,src) PMASKMOVQ(dst,src) +#define pinsrw(dst,src,msk) PINSRW(dst,src,msk) +#define pextrw(dst,src,msk) PEXTRW(dst,src,msk) +#define pshufw(dst,src,msk) PSHUFW(dst,src,msk) +#define movntq(dst,src) MOVNTQ(dst,src) +#define prefetchnta(mem) PREFETCHNTA(mem) +#define prefetcht0(mem) PREFETCHT0(mem) +#define prefetcht1(mem) PREFETCHT1(mem) +#define prefetcht2(mem) PREFETCHT2(mem) + + +#define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off) +#define pf2idm(dst,src,off) PF2IDM(dst,src,off) +#define pfaccm(dst,src,off) PFACCM(dst,src,off) +#define pfaddm(dst,src,off) PFADDM(dst,src,off) +#define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off) +#define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off) +#define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off) +#define pfmaxm(dst,src,off) PFMAXM(dst,src,off) +#define pfminm(dst,src,off) PFMINM(dst,src,off) +#define pfmulm(dst,src,off) PFMULM(dst,src,off) +#define pfrcpm(dst,src,off) PFRCPM(dst,src,off) +#define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off) +#define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off) +#define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off) +#define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off) +#define pfsubm(dst,src,off) PFSUBM(dst,src,off) +#define pfsubrm(dst,src,off) PFSUBRM(dst,src,off) +#define pi2fdm(dst,src,off) PI2FDM(dst,src,off) +#define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off) +#define cpuid CPUID +#define sfence SFENCE + +#define pfnaccm(dst,src,off) PFNACCM(dst,src,off) +#define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off) +#define pswapdm(dst,src,off) PSWAPDM(dst,src,off) +#define pminubm(dst,src,off) PMINUBM(dst,src,off) +#define pmaxubm(dst,src,off) PMAXUBM(dst,src,off) +#define pminswm(dst,src,off) PMINSWM(dst,src,off) +#define pmaxswm(dst,src,off) PMAXSWM(dst,src,off) +#define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off) +#define pavgbm(dst,src,off) PAVGBM(dst,src,off) +#define pavgwm(dst,src,off) PAVGWM(dst,src,off) +#define psadbwm(dst,src,off) PSADBWM(dst,src,off) +#define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off) +#define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off) +#define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk) +#define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk) +#define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk) +#define movntqm(dst,src,off) MOVNTQM(dst,src,off) +#define prefetchntam(mem,off) PREFETCHNTA(mem,off) +#define prefetcht0m(mem,off) PREFETCHT0(mem,off) +#define prefetcht1m(mem,off) PREFETCHT1(mem,off) +#define prefetcht2m(mem,off) PREFETCHT2(mem,off) + +#endif diff --git a/mp/src/public/mathlib/anorms.h b/mp/src/public/mathlib/anorms.h index ae759eb1..4f653835 100644 --- a/mp/src/public/mathlib/anorms.h +++ b/mp/src/public/mathlib/anorms.h @@ -1,25 +1,25 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -//=============================================================================// - -#ifndef ANORMS_H -#define ANORMS_H -#ifdef _WIN32 -#pragma once -#endif - - -#include "mathlib/vector.h" - - -#define NUMVERTEXNORMALS 162 - -// the angle between consecutive g_anorms[] vectors is ~14.55 degrees -#define VERTEXNORMAL_CONE_INNER_ANGLE DEG2RAD(7.275) - -extern Vector g_anorms[NUMVERTEXNORMALS]; - - -#endif // ANORMS_H +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=============================================================================// + +#ifndef ANORMS_H +#define ANORMS_H +#ifdef _WIN32 +#pragma once +#endif + + +#include "mathlib/vector.h" + + +#define NUMVERTEXNORMALS 162 + +// the angle between consecutive g_anorms[] vectors is ~14.55 degrees +#define VERTEXNORMAL_CONE_INNER_ANGLE DEG2RAD(7.275) + +extern Vector g_anorms[NUMVERTEXNORMALS]; + + +#endif // ANORMS_H diff --git a/mp/src/public/mathlib/bumpvects.h b/mp/src/public/mathlib/bumpvects.h index e0ba73fb..6939ca05 100644 --- a/mp/src/public/mathlib/bumpvects.h +++ b/mp/src/public/mathlib/bumpvects.h @@ -1,37 +1,37 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $Workfile: $ -// $Date: $ -// $NoKeywords: $ -//=============================================================================// - -#ifndef BUMPVECTS_H -#define BUMPVECTS_H - -#ifdef _WIN32 -#pragma once -#endif - -#include "mathlib/mathlib.h" - -#define OO_SQRT_2 0.70710676908493042f -#define OO_SQRT_3 0.57735025882720947f -#define OO_SQRT_6 0.40824821591377258f -// sqrt( 2 / 3 ) -#define OO_SQRT_2_OVER_3 0.81649661064147949f - -#define NUM_BUMP_VECTS 3 - -const TableVector g_localBumpBasis[NUM_BUMP_VECTS] = -{ - { OO_SQRT_2_OVER_3, 0.0f, OO_SQRT_3 }, - { -OO_SQRT_6, OO_SQRT_2, OO_SQRT_3 }, - { -OO_SQRT_6, -OO_SQRT_2, OO_SQRT_3 } -}; - -void GetBumpNormals( const Vector& sVect, const Vector& tVect, const Vector& flatNormal, - const Vector& phongNormal, Vector bumpNormals[NUM_BUMP_VECTS] ); - -#endif // BUMPVECTS_H +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $Workfile: $ +// $Date: $ +// $NoKeywords: $ +//=============================================================================// + +#ifndef BUMPVECTS_H +#define BUMPVECTS_H + +#ifdef _WIN32 +#pragma once +#endif + +#include "mathlib/mathlib.h" + +#define OO_SQRT_2 0.70710676908493042f +#define OO_SQRT_3 0.57735025882720947f +#define OO_SQRT_6 0.40824821591377258f +// sqrt( 2 / 3 ) +#define OO_SQRT_2_OVER_3 0.81649661064147949f + +#define NUM_BUMP_VECTS 3 + +const TableVector g_localBumpBasis[NUM_BUMP_VECTS] = +{ + { OO_SQRT_2_OVER_3, 0.0f, OO_SQRT_3 }, + { -OO_SQRT_6, OO_SQRT_2, OO_SQRT_3 }, + { -OO_SQRT_6, -OO_SQRT_2, OO_SQRT_3 } +}; + +void GetBumpNormals( const Vector& sVect, const Vector& tVect, const Vector& flatNormal, + const Vector& phongNormal, Vector bumpNormals[NUM_BUMP_VECTS] ); + +#endif // BUMPVECTS_H diff --git a/mp/src/public/mathlib/compressed_3d_unitvec.h b/mp/src/public/mathlib/compressed_3d_unitvec.h index d9f2f597..a92dba22 100644 --- a/mp/src/public/mathlib/compressed_3d_unitvec.h +++ b/mp/src/public/mathlib/compressed_3d_unitvec.h @@ -1,284 +1,284 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -// -//=============================================================================// -#ifndef _3D_UNITVEC_H -#define _3D_UNITVEC_H - - -#define UNITVEC_DECLARE_STATICS \ - float cUnitVector::mUVAdjustment[0x2000]; \ - Vector cUnitVector::mTmpVec; - -// upper 3 bits -#define SIGN_MASK 0xe000 -#define XSIGN_MASK 0x8000 -#define YSIGN_MASK 0x4000 -#define ZSIGN_MASK 0x2000 - -// middle 6 bits - xbits -#define TOP_MASK 0x1f80 - -// lower 7 bits - ybits -#define BOTTOM_MASK 0x007f - -// unitcomp.cpp : A Unit Vector to 16-bit word conversion -// algorithm based on work of Rafael Baptista (rafael@oroboro.com) -// Accuracy improved by O.D. (punkfloyd@rocketmail.com) -// Used with Permission. - -// a compressed unit vector. reasonable fidelty for unit -// vectors in a 16 bit package. Good enough for surface normals -// we hope. -class cUnitVector // : public c3dMathObject -{ -public: - cUnitVector() { mVec = 0; } - cUnitVector( const Vector& vec ) - { - packVector( vec ); - } - cUnitVector( unsigned short val ) { mVec = val; } - - cUnitVector& operator=( const Vector& vec ) - { packVector( vec ); return *this; } - - operator Vector() - { - unpackVector( mTmpVec ); - return mTmpVec; - } - - void packVector( const Vector& vec ) - { - // convert from Vector to cUnitVector - - Assert( vec.IsValid()); - Vector tmp = vec; - - // input vector does not have to be unit length - // Assert( tmp.length() <= 1.001f ); - - mVec = 0; - if ( tmp.x < 0 ) { mVec |= XSIGN_MASK; tmp.x = -tmp.x; } - if ( tmp.y < 0 ) { mVec |= YSIGN_MASK; tmp.y = -tmp.y; } - if ( tmp.z < 0 ) { mVec |= ZSIGN_MASK; tmp.z = -tmp.z; } - - // project the normal onto the plane that goes through - // X0=(1,0,0),Y0=(0,1,0),Z0=(0,0,1). - // on that plane we choose an (projective!) coordinate system - // such that X0->(0,0), Y0->(126,0), Z0->(0,126),(0,0,0)->Infinity - - // a little slower... old pack was 4 multiplies and 2 adds. - // This is 2 multiplies, 2 adds, and a divide.... - float w = 126.0f / ( tmp.x + tmp.y + tmp.z ); - long xbits = (long)( tmp.x * w ); - long ybits = (long)( tmp.y * w ); - - Assert( xbits < 127 ); - Assert( xbits >= 0 ); - Assert( ybits < 127 ); - Assert( ybits >= 0 ); - - // Now we can be sure that 0<=xp<=126, 0<=yp<=126, 0<=xp+yp<=126 - // however for the sampling we want to transform this triangle - // into a rectangle. - if ( xbits >= 64 ) - { - xbits = 127 - xbits; - ybits = 127 - ybits; - } - - // now we that have xp in the range (0,127) and yp in - // the range (0,63), we can pack all the bits together - mVec |= ( xbits << 7 ); - mVec |= ybits; - } - - void unpackVector( Vector& vec ) - { - // if we do a straightforward backward transform - // we will get points on the plane X0,Y0,Z0 - // however we need points on a sphere that goes through - // these points. Therefore we need to adjust x,y,z so - // that x^2+y^2+z^2=1 by normalizing the vector. We have - // already precalculated the amount by which we need to - // scale, so all we do is a table lookup and a - // multiplication - - // get the x and y bits - long xbits = (( mVec & TOP_MASK ) >> 7 ); - long ybits = ( mVec & BOTTOM_MASK ); - - // map the numbers back to the triangle (0,0)-(0,126)-(126,0) - if (( xbits + ybits ) >= 127 ) - { - xbits = 127 - xbits; - ybits = 127 - ybits; - } - - // do the inverse transform and normalization - // costs 3 extra multiplies and 2 subtracts. No big deal. - float uvadj = mUVAdjustment[mVec & ~SIGN_MASK]; - vec.x = uvadj * (float) xbits; - vec.y = uvadj * (float) ybits; - vec.z = uvadj * (float)( 126 - xbits - ybits ); - - // set all the sign bits - if ( mVec & XSIGN_MASK ) vec.x = -vec.x; - if ( mVec & YSIGN_MASK ) vec.y = -vec.y; - if ( mVec & ZSIGN_MASK ) vec.z = -vec.z; - - Assert( vec.IsValid()); - } - - static void initializeStatics() - { - for ( int idx = 0; idx < 0x2000; idx++ ) - { - long xbits = idx >> 7; - long ybits = idx & BOTTOM_MASK; - - // map the numbers back to the triangle (0,0)-(0,127)-(127,0) - if (( xbits + ybits ) >= 127 ) - { - xbits = 127 - xbits; - ybits = 127 - ybits; - } - - // convert to 3D vectors - float x = (float)xbits; - float y = (float)ybits; - float z = (float)( 126 - xbits - ybits ); - - // calculate the amount of normalization required - mUVAdjustment[idx] = 1.0f / sqrtf( y*y + z*z + x*x ); - Assert( _finite( mUVAdjustment[idx])); - - //cerr << mUVAdjustment[idx] << "\t"; - //if ( xbits == 0 ) cerr << "\n"; - } - } - -#if 0 - void test() - { - #define TEST_RANGE 4 - #define TEST_RANDOM 100 - #define TEST_ANGERROR 1.0 - - float maxError = 0; - float avgError = 0; - int numVecs = 0; - - {for ( int x = -TEST_RANGE; x < TEST_RANGE; x++ ) - { - for ( int y = -TEST_RANGE; y < TEST_RANGE; y++ ) - { - for ( int z = -TEST_RANGE; z < TEST_RANGE; z++ ) - { - if (( x + y + z ) == 0 ) continue; - - Vector vec( (float)x, (float)y, (float)z ); - Vector vec2; - - vec.normalize(); - packVector( vec ); - unpackVector( vec2 ); - - float ang = vec.dot( vec2 ); - ang = (( fabs( ang ) > 0.99999f ) ? 0 : (float)acos(ang)); - - if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) - { - cerr << "error: " << ang << endl; - cerr << "orig vec: " << vec.x << ",\t" - << vec.y << ",\t" << vec.z << "\tmVec: " - << mVec << endl; - cerr << "quantized vec2: " << vec2.x - << ",\t" << vec2.y << ",\t" - << vec2.z << endl << endl; - } - avgError += ang; - numVecs++; - if ( maxError < ang ) maxError = ang; - } - } - }} - - for ( int w = 0; w < TEST_RANDOM; w++ ) - { - Vector vec( genRandom(), genRandom(), genRandom()); - Vector vec2; - vec.normalize(); - - packVector( vec ); - unpackVector( vec2 ); - - float ang =vec.dot( vec2 ); - ang = (( ang > 0.999f ) ? 0 : (float)acos(ang)); - - if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) - { - cerr << "error: " << ang << endl; - cerr << "orig vec: " << vec.x << ",\t" - << vec.y << ",\t" << vec.z << "\tmVec: " - << mVec << endl; - cerr << "quantized vec2: " << vec2.x << ",\t" - << vec2.y << ",\t" - << vec2.z << endl << endl; - } - avgError += ang; - numVecs++; - if ( maxError < ang ) maxError = ang; - } - - { for ( int x = 0; x < 50; x++ ) - { - Vector vec( (float)x, 25.0f, 0.0f ); - Vector vec2; - - vec.normalize(); - packVector( vec ); - unpackVector( vec2 ); - - float ang = vec.dot( vec2 ); - ang = (( fabs( ang ) > 0.999f ) ? 0 : (float)acos(ang)); - - if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) - { - cerr << "error: " << ang << endl; - cerr << "orig vec: " << vec.x << ",\t" - << vec.y << ",\t" << vec.z << "\tmVec: " - << mVec << endl; - cerr << " quantized vec2: " << vec2.x << ",\t" - << vec2.y << ",\t" << vec2.z << endl << endl; - } - - avgError += ang; - numVecs++; - if ( maxError < ang ) maxError = ang; - }} - - cerr << "max angle error: " << maxError - << ", average error: " << avgError / numVecs - << ", num tested vecs: " << numVecs << endl; - } - - friend ostream& operator<< ( ostream& os, const cUnitVector& vec ) - { os << vec.mVec; return os; } -#endif - -//protected: // !!!! - - unsigned short mVec; - static float mUVAdjustment[0x2000]; - static Vector mTmpVec; -}; - -#endif // _3D_VECTOR_H - - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +#ifndef _3D_UNITVEC_H +#define _3D_UNITVEC_H + + +#define UNITVEC_DECLARE_STATICS \ + float cUnitVector::mUVAdjustment[0x2000]; \ + Vector cUnitVector::mTmpVec; + +// upper 3 bits +#define SIGN_MASK 0xe000 +#define XSIGN_MASK 0x8000 +#define YSIGN_MASK 0x4000 +#define ZSIGN_MASK 0x2000 + +// middle 6 bits - xbits +#define TOP_MASK 0x1f80 + +// lower 7 bits - ybits +#define BOTTOM_MASK 0x007f + +// unitcomp.cpp : A Unit Vector to 16-bit word conversion +// algorithm based on work of Rafael Baptista (rafael@oroboro.com) +// Accuracy improved by O.D. (punkfloyd@rocketmail.com) +// Used with Permission. + +// a compressed unit vector. reasonable fidelty for unit +// vectors in a 16 bit package. Good enough for surface normals +// we hope. +class cUnitVector // : public c3dMathObject +{ +public: + cUnitVector() { mVec = 0; } + cUnitVector( const Vector& vec ) + { + packVector( vec ); + } + cUnitVector( unsigned short val ) { mVec = val; } + + cUnitVector& operator=( const Vector& vec ) + { packVector( vec ); return *this; } + + operator Vector() + { + unpackVector( mTmpVec ); + return mTmpVec; + } + + void packVector( const Vector& vec ) + { + // convert from Vector to cUnitVector + + Assert( vec.IsValid()); + Vector tmp = vec; + + // input vector does not have to be unit length + // Assert( tmp.length() <= 1.001f ); + + mVec = 0; + if ( tmp.x < 0 ) { mVec |= XSIGN_MASK; tmp.x = -tmp.x; } + if ( tmp.y < 0 ) { mVec |= YSIGN_MASK; tmp.y = -tmp.y; } + if ( tmp.z < 0 ) { mVec |= ZSIGN_MASK; tmp.z = -tmp.z; } + + // project the normal onto the plane that goes through + // X0=(1,0,0),Y0=(0,1,0),Z0=(0,0,1). + // on that plane we choose an (projective!) coordinate system + // such that X0->(0,0), Y0->(126,0), Z0->(0,126),(0,0,0)->Infinity + + // a little slower... old pack was 4 multiplies and 2 adds. + // This is 2 multiplies, 2 adds, and a divide.... + float w = 126.0f / ( tmp.x + tmp.y + tmp.z ); + long xbits = (long)( tmp.x * w ); + long ybits = (long)( tmp.y * w ); + + Assert( xbits < 127 ); + Assert( xbits >= 0 ); + Assert( ybits < 127 ); + Assert( ybits >= 0 ); + + // Now we can be sure that 0<=xp<=126, 0<=yp<=126, 0<=xp+yp<=126 + // however for the sampling we want to transform this triangle + // into a rectangle. + if ( xbits >= 64 ) + { + xbits = 127 - xbits; + ybits = 127 - ybits; + } + + // now we that have xp in the range (0,127) and yp in + // the range (0,63), we can pack all the bits together + mVec |= ( xbits << 7 ); + mVec |= ybits; + } + + void unpackVector( Vector& vec ) + { + // if we do a straightforward backward transform + // we will get points on the plane X0,Y0,Z0 + // however we need points on a sphere that goes through + // these points. Therefore we need to adjust x,y,z so + // that x^2+y^2+z^2=1 by normalizing the vector. We have + // already precalculated the amount by which we need to + // scale, so all we do is a table lookup and a + // multiplication + + // get the x and y bits + long xbits = (( mVec & TOP_MASK ) >> 7 ); + long ybits = ( mVec & BOTTOM_MASK ); + + // map the numbers back to the triangle (0,0)-(0,126)-(126,0) + if (( xbits + ybits ) >= 127 ) + { + xbits = 127 - xbits; + ybits = 127 - ybits; + } + + // do the inverse transform and normalization + // costs 3 extra multiplies and 2 subtracts. No big deal. + float uvadj = mUVAdjustment[mVec & ~SIGN_MASK]; + vec.x = uvadj * (float) xbits; + vec.y = uvadj * (float) ybits; + vec.z = uvadj * (float)( 126 - xbits - ybits ); + + // set all the sign bits + if ( mVec & XSIGN_MASK ) vec.x = -vec.x; + if ( mVec & YSIGN_MASK ) vec.y = -vec.y; + if ( mVec & ZSIGN_MASK ) vec.z = -vec.z; + + Assert( vec.IsValid()); + } + + static void initializeStatics() + { + for ( int idx = 0; idx < 0x2000; idx++ ) + { + long xbits = idx >> 7; + long ybits = idx & BOTTOM_MASK; + + // map the numbers back to the triangle (0,0)-(0,127)-(127,0) + if (( xbits + ybits ) >= 127 ) + { + xbits = 127 - xbits; + ybits = 127 - ybits; + } + + // convert to 3D vectors + float x = (float)xbits; + float y = (float)ybits; + float z = (float)( 126 - xbits - ybits ); + + // calculate the amount of normalization required + mUVAdjustment[idx] = 1.0f / sqrtf( y*y + z*z + x*x ); + Assert( _finite( mUVAdjustment[idx])); + + //cerr << mUVAdjustment[idx] << "\t"; + //if ( xbits == 0 ) cerr << "\n"; + } + } + +#if 0 + void test() + { + #define TEST_RANGE 4 + #define TEST_RANDOM 100 + #define TEST_ANGERROR 1.0 + + float maxError = 0; + float avgError = 0; + int numVecs = 0; + + {for ( int x = -TEST_RANGE; x < TEST_RANGE; x++ ) + { + for ( int y = -TEST_RANGE; y < TEST_RANGE; y++ ) + { + for ( int z = -TEST_RANGE; z < TEST_RANGE; z++ ) + { + if (( x + y + z ) == 0 ) continue; + + Vector vec( (float)x, (float)y, (float)z ); + Vector vec2; + + vec.normalize(); + packVector( vec ); + unpackVector( vec2 ); + + float ang = vec.dot( vec2 ); + ang = (( fabs( ang ) > 0.99999f ) ? 0 : (float)acos(ang)); + + if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) + { + cerr << "error: " << ang << endl; + cerr << "orig vec: " << vec.x << ",\t" + << vec.y << ",\t" << vec.z << "\tmVec: " + << mVec << endl; + cerr << "quantized vec2: " << vec2.x + << ",\t" << vec2.y << ",\t" + << vec2.z << endl << endl; + } + avgError += ang; + numVecs++; + if ( maxError < ang ) maxError = ang; + } + } + }} + + for ( int w = 0; w < TEST_RANDOM; w++ ) + { + Vector vec( genRandom(), genRandom(), genRandom()); + Vector vec2; + vec.normalize(); + + packVector( vec ); + unpackVector( vec2 ); + + float ang =vec.dot( vec2 ); + ang = (( ang > 0.999f ) ? 0 : (float)acos(ang)); + + if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) + { + cerr << "error: " << ang << endl; + cerr << "orig vec: " << vec.x << ",\t" + << vec.y << ",\t" << vec.z << "\tmVec: " + << mVec << endl; + cerr << "quantized vec2: " << vec2.x << ",\t" + << vec2.y << ",\t" + << vec2.z << endl << endl; + } + avgError += ang; + numVecs++; + if ( maxError < ang ) maxError = ang; + } + + { for ( int x = 0; x < 50; x++ ) + { + Vector vec( (float)x, 25.0f, 0.0f ); + Vector vec2; + + vec.normalize(); + packVector( vec ); + unpackVector( vec2 ); + + float ang = vec.dot( vec2 ); + ang = (( fabs( ang ) > 0.999f ) ? 0 : (float)acos(ang)); + + if (( ang > TEST_ANGERROR ) | ( !_finite( ang ))) + { + cerr << "error: " << ang << endl; + cerr << "orig vec: " << vec.x << ",\t" + << vec.y << ",\t" << vec.z << "\tmVec: " + << mVec << endl; + cerr << " quantized vec2: " << vec2.x << ",\t" + << vec2.y << ",\t" << vec2.z << endl << endl; + } + + avgError += ang; + numVecs++; + if ( maxError < ang ) maxError = ang; + }} + + cerr << "max angle error: " << maxError + << ", average error: " << avgError / numVecs + << ", num tested vecs: " << numVecs << endl; + } + + friend ostream& operator<< ( ostream& os, const cUnitVector& vec ) + { os << vec.mVec; return os; } +#endif + +//protected: // !!!! + + unsigned short mVec; + static float mUVAdjustment[0x2000]; + static Vector mTmpVec; +}; + +#endif // _3D_VECTOR_H + + diff --git a/mp/src/public/mathlib/compressed_light_cube.h b/mp/src/public/mathlib/compressed_light_cube.h index a720808f..207f92db 100644 --- a/mp/src/public/mathlib/compressed_light_cube.h +++ b/mp/src/public/mathlib/compressed_light_cube.h @@ -1,24 +1,24 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -//=============================================================================// - -#ifndef COMPRESSED_LIGHT_CUBE_H -#define COMPRESSED_LIGHT_CUBE_H -#ifdef _WIN32 -#pragma once -#endif - - -#include "mathlib/mathlib.h" - - -struct CompressedLightCube -{ - DECLARE_BYTESWAP_DATADESC(); - ColorRGBExp32 m_Color[6]; -}; - - -#endif // COMPRESSED_LIGHT_CUBE_H +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=============================================================================// + +#ifndef COMPRESSED_LIGHT_CUBE_H +#define COMPRESSED_LIGHT_CUBE_H +#ifdef _WIN32 +#pragma once +#endif + + +#include "mathlib/mathlib.h" + + +struct CompressedLightCube +{ + DECLARE_BYTESWAP_DATADESC(); + ColorRGBExp32 m_Color[6]; +}; + + +#endif // COMPRESSED_LIGHT_CUBE_H diff --git a/mp/src/public/mathlib/compressed_vector.h b/mp/src/public/mathlib/compressed_vector.h index 6eb3ac5d..6a495229 100644 --- a/mp/src/public/mathlib/compressed_vector.h +++ b/mp/src/public/mathlib/compressed_vector.h @@ -1,608 +1,608 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -// -//=============================================================================// - -#ifndef COMPRESSED_VECTOR_H -#define COMPRESSED_VECTOR_H - -#ifdef _WIN32 -#pragma once -#endif - -#include -#include - -// For vec_t, put this somewhere else? -#include "basetypes.h" - -// For rand(). We really need a library! -#include - -#include "tier0/dbg.h" -#include "mathlib/vector.h" - -#include "mathlib/mathlib.h" - -#if defined( _X360 ) -#pragma bitfield_order( push, lsb_to_msb ) -#endif -//========================================================= -// fit a 3D vector into 32 bits -//========================================================= - -class Vector32 -{ -public: - // Construction/destruction: - Vector32(void); - Vector32(vec_t X, vec_t Y, vec_t Z); - - // assignment - Vector32& operator=(const Vector &vOther); - operator Vector (); - -private: - unsigned short x:10; - unsigned short y:10; - unsigned short z:10; - unsigned short exp:2; -}; - -inline Vector32& Vector32::operator=(const Vector &vOther) -{ - CHECK_VALID(vOther); - - static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f }; - - float fmax = Max( fabs( vOther.x ), fabs( vOther.y ) ); - fmax = Max( fmax, (float)fabs( vOther.z ) ); - - for (exp = 0; exp < 3; exp++) - { - if (fmax < expScale[exp]) - break; - } - Assert( fmax < expScale[exp] ); - - float fexp = 512.0f / expScale[exp]; - - x = Clamp( (int)(vOther.x * fexp) + 512, 0, 1023 ); - y = Clamp( (int)(vOther.y * fexp) + 512, 0, 1023 ); - z = Clamp( (int)(vOther.z * fexp) + 512, 0, 1023 ); - return *this; -} - - -inline Vector32::operator Vector () -{ - Vector tmp; - - static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f }; - - float fexp = expScale[exp] / 512.0f; - - tmp.x = (((int)x) - 512) * fexp; - tmp.y = (((int)y) - 512) * fexp; - tmp.z = (((int)z) - 512) * fexp; - return tmp; -} - - -//========================================================= -// Fit a unit vector into 32 bits -//========================================================= - -class Normal32 -{ -public: - // Construction/destruction: - Normal32(void); - Normal32(vec_t X, vec_t Y, vec_t Z); - - // assignment - Normal32& operator=(const Vector &vOther); - operator Vector (); - -private: - unsigned short x:15; - unsigned short y:15; - unsigned short zneg:1; -}; - - -inline Normal32& Normal32::operator=(const Vector &vOther) -{ - CHECK_VALID(vOther); - - x = Clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 ); - y = Clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 ); - zneg = (vOther.z < 0); - //x = vOther.x; - //y = vOther.y; - //z = vOther.z; - return *this; -} - - -inline Normal32::operator Vector () -{ - Vector tmp; - - tmp.x = ((int)x - 16384) * (1 / 16384.0); - tmp.y = ((int)y - 16384) * (1 / 16384.0); - tmp.z = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y ); - if (zneg) - tmp.z = -tmp.z; - return tmp; -} - - -//========================================================= -// 64 bit Quaternion -//========================================================= - -class Quaternion64 -{ -public: - // Construction/destruction: - Quaternion64(void); - Quaternion64(vec_t X, vec_t Y, vec_t Z); - - // assignment - // Quaternion& operator=(const Quaternion64 &vOther); - Quaternion64& operator=(const Quaternion &vOther); - operator Quaternion (); -private: - uint64 x:21; - uint64 y:21; - uint64 z:21; - uint64 wneg:1; -}; - - -inline Quaternion64::operator Quaternion () -{ - Quaternion tmp; - - // shift to -1048576, + 1048575, then round down slightly to -1.0 < x < 1.0 - tmp.x = ((int)x - 1048576) * (1 / 1048576.5f); - tmp.y = ((int)y - 1048576) * (1 / 1048576.5f); - tmp.z = ((int)z - 1048576) * (1 / 1048576.5f); - tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); - if (wneg) - tmp.w = -tmp.w; - return tmp; -} - -inline Quaternion64& Quaternion64::operator=(const Quaternion &vOther) -{ - CHECK_VALID(vOther); - - x = Clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 ); - y = Clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 ); - z = Clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 ); - wneg = (vOther.w < 0); - return *this; -} - -//========================================================= -// 48 bit Quaternion -//========================================================= - -class Quaternion48 -{ -public: - // Construction/destruction: - Quaternion48(void); - Quaternion48(vec_t X, vec_t Y, vec_t Z); - - // assignment - // Quaternion& operator=(const Quaternion48 &vOther); - Quaternion48& operator=(const Quaternion &vOther); - operator Quaternion (); -private: - unsigned short x:16; - unsigned short y:16; - unsigned short z:15; - unsigned short wneg:1; -}; - - -inline Quaternion48::operator Quaternion () -{ - Quaternion tmp; - - tmp.x = ((int)x - 32768) * (1 / 32768.0); - tmp.y = ((int)y - 32768) * (1 / 32768.0); - tmp.z = ((int)z - 16384) * (1 / 16384.0); - tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); - if (wneg) - tmp.w = -tmp.w; - return tmp; -} - -inline Quaternion48& Quaternion48::operator=(const Quaternion &vOther) -{ - CHECK_VALID(vOther); - - x = Clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 ); - y = Clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 ); - z = Clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 ); - wneg = (vOther.w < 0); - return *this; -} - -//========================================================= -// 32 bit Quaternion -//========================================================= - -class Quaternion32 -{ -public: - // Construction/destruction: - Quaternion32(void); - Quaternion32(vec_t X, vec_t Y, vec_t Z); - - // assignment - // Quaternion& operator=(const Quaternion48 &vOther); - Quaternion32& operator=(const Quaternion &vOther); - operator Quaternion (); -private: - unsigned int x:11; - unsigned int y:10; - unsigned int z:10; - unsigned int wneg:1; -}; - - -inline Quaternion32::operator Quaternion () -{ - Quaternion tmp; - - tmp.x = ((int)x - 1024) * (1 / 1024.0); - tmp.y = ((int)y - 512) * (1 / 512.0); - tmp.z = ((int)z - 512) * (1 / 512.0); - tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); - if (wneg) - tmp.w = -tmp.w; - return tmp; -} - -inline Quaternion32& Quaternion32::operator=(const Quaternion &vOther) -{ - CHECK_VALID(vOther); - - x = Clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 ); - y = Clamp( (int)(vOther.y * 512) + 512, 0, 1023 ); - z = Clamp( (int)(vOther.z * 512) + 512, 0, 1023 ); - wneg = (vOther.w < 0); - return *this; -} - -//========================================================= -// 16 bit float -//========================================================= - - -const int float32bias = 127; -const int float16bias = 15; - -const float maxfloat16bits = 65504.0f; - -class float16 -{ -public: - //float16() {} - //float16( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); } - - void Init() { m_storage.rawWord = 0; } -// float16& operator=(const float16 &other) { m_storage.rawWord = other.m_storage.rawWord; return *this; } -// float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; } -// operator unsigned short () { return m_storage.rawWord; } -// operator float () { return Convert16bitFloatTo32bits( m_storage.rawWord ); } - unsigned short GetBits() const - { - return m_storage.rawWord; - } - float GetFloat() const - { - return Convert16bitFloatTo32bits( m_storage.rawWord ); - } - void SetFloat( float in ) - { - m_storage.rawWord = ConvertFloatTo16bits( in ); - } - - bool IsInfinity() const - { - return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa == 0; - } - bool IsNaN() const - { - return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa != 0; - } - - bool operator==(const float16 other) const { return m_storage.rawWord == other.m_storage.rawWord; } - bool operator!=(const float16 other) const { return m_storage.rawWord != other.m_storage.rawWord; } - -// bool operator< (const float other) const { return GetFloat() < other; } -// bool operator> (const float other) const { return GetFloat() > other; } - -protected: - union float32bits - { - float rawFloat; - struct - { - unsigned int mantissa : 23; - unsigned int biased_exponent : 8; - unsigned int sign : 1; - } bits; - }; - - union float16bits - { - unsigned short rawWord; - struct - { - unsigned short mantissa : 10; - unsigned short biased_exponent : 5; - unsigned short sign : 1; - } bits; - }; - - static bool IsNaN( float16bits in ) - { - return in.bits.biased_exponent == 31 && in.bits.mantissa != 0; - } - static bool IsInfinity( float16bits in ) - { - return in.bits.biased_exponent == 31 && in.bits.mantissa == 0; - } - - // 0x0001 - 0x03ff - static unsigned short ConvertFloatTo16bits( float input ) - { - if ( input > maxfloat16bits ) - input = maxfloat16bits; - else if ( input < -maxfloat16bits ) - input = -maxfloat16bits; - - float16bits output; - float32bits inFloat; - - inFloat.rawFloat = input; - - output.bits.sign = inFloat.bits.sign; - - if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa==0) ) - { - // zero - output.bits.mantissa = 0; - output.bits.biased_exponent = 0; - } - else if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa!=0) ) - { - // denorm -- denorm float maps to 0 half - output.bits.mantissa = 0; - output.bits.biased_exponent = 0; - } - else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa==0) ) - { -#if 0 - // infinity - output.bits.mantissa = 0; - output.bits.biased_exponent = 31; -#else - // infinity maps to maxfloat - output.bits.mantissa = 0x3ff; - output.bits.biased_exponent = 0x1e; -#endif - } - else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa!=0) ) - { -#if 0 - // NaN - output.bits.mantissa = 1; - output.bits.biased_exponent = 31; -#else - // NaN maps to zero - output.bits.mantissa = 0; - output.bits.biased_exponent = 0; -#endif - } - else - { - // regular number - int new_exp = inFloat.bits.biased_exponent-127; - - if (new_exp<-24) - { - // this maps to 0 - output.bits.mantissa = 0; - output.bits.biased_exponent = 0; - } - - if (new_exp<-14) - { - // this maps to a denorm - output.bits.biased_exponent = 0; - unsigned int exp_val = ( unsigned int )( -14 - ( inFloat.bits.biased_exponent - float32bias ) ); - if( exp_val > 0 && exp_val < 11 ) - { - output.bits.mantissa = ( 1 << ( 10 - exp_val ) ) + ( inFloat.bits.mantissa >> ( 13 + exp_val ) ); - } - } - else if (new_exp>15) - { -#if 0 - // map this value to infinity - output.bits.mantissa = 0; - output.bits.biased_exponent = 31; -#else - // to big. . . maps to maxfloat - output.bits.mantissa = 0x3ff; - output.bits.biased_exponent = 0x1e; -#endif - } - else - { - output.bits.biased_exponent = new_exp+15; - output.bits.mantissa = (inFloat.bits.mantissa >> 13); - } - } - return output.rawWord; - } - - static float Convert16bitFloatTo32bits( unsigned short input ) - { - float32bits output; - const float16bits &inFloat = *((float16bits *)&input); - - if( IsInfinity( inFloat ) ) - { - return maxfloat16bits * ( ( inFloat.bits.sign == 1 ) ? -1.0f : 1.0f ); - } - if( IsNaN( inFloat ) ) - { - return 0.0; - } - if( inFloat.bits.biased_exponent == 0 && inFloat.bits.mantissa != 0 ) - { - // denorm - const float half_denorm = (1.0f/16384.0f); // 2^-14 - float mantissa = ((float)(inFloat.bits.mantissa)) / 1024.0f; - float sgn = (inFloat.bits.sign)? -1.0f :1.0f; - output.rawFloat = sgn*mantissa*half_denorm; - } - else - { - // regular number - unsigned mantissa = inFloat.bits.mantissa; - unsigned biased_exponent = inFloat.bits.biased_exponent; - unsigned sign = ((unsigned)inFloat.bits.sign) << 31; - biased_exponent = ( (biased_exponent - float16bias + float32bias) * (biased_exponent != 0) ) << 23; - mantissa <<= (23-10); - - *((unsigned *)&output) = ( mantissa | biased_exponent | sign ); - } - - return output.rawFloat; - } - - - float16bits m_storage; -}; - -class float16_with_assign : public float16 -{ -public: - float16_with_assign() {} - float16_with_assign( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); } - - float16& operator=(const float16 &other) { m_storage.rawWord = ((float16_with_assign &)other).m_storage.rawWord; return *this; } - float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; } -// operator unsigned short () const { return m_storage.rawWord; } - operator float () const { return Convert16bitFloatTo32bits( m_storage.rawWord ); } -}; - -//========================================================= -// Fit a 3D vector in 48 bits -//========================================================= - -class Vector48 -{ -public: - // Construction/destruction: - Vector48(void) {} - Vector48(vec_t X, vec_t Y, vec_t Z) { x.SetFloat( X ); y.SetFloat( Y ); z.SetFloat( Z ); } - - // assignment - Vector48& operator=(const Vector &vOther); - operator Vector (); - - const float operator[]( int i ) const { return (((float16 *)this)[i]).GetFloat(); } - - float16 x; - float16 y; - float16 z; -}; - -inline Vector48& Vector48::operator=(const Vector &vOther) -{ - CHECK_VALID(vOther); - - x.SetFloat( vOther.x ); - y.SetFloat( vOther.y ); - z.SetFloat( vOther.z ); - return *this; -} - - -inline Vector48::operator Vector () -{ - Vector tmp; - - tmp.x = x.GetFloat(); - tmp.y = y.GetFloat(); - tmp.z = z.GetFloat(); - - return tmp; -} - -//========================================================= -// Fit a 2D vector in 32 bits -//========================================================= - -class Vector2d32 -{ -public: - // Construction/destruction: - Vector2d32(void) {} - Vector2d32(vec_t X, vec_t Y) { x.SetFloat( X ); y.SetFloat( Y ); } - - // assignment - Vector2d32& operator=(const Vector &vOther); - Vector2d32& operator=(const Vector2D &vOther); - - operator Vector2D (); - - void Init( vec_t ix = 0.f, vec_t iy = 0.f); - - float16_with_assign x; - float16_with_assign y; -}; - -inline Vector2d32& Vector2d32::operator=(const Vector2D &vOther) -{ - x.SetFloat( vOther.x ); - y.SetFloat( vOther.y ); - return *this; -} - -inline Vector2d32::operator Vector2D () -{ - Vector2D tmp; - - tmp.x = x.GetFloat(); - tmp.y = y.GetFloat(); - - return tmp; -} - -inline void Vector2d32::Init( vec_t ix, vec_t iy ) -{ - x.SetFloat(ix); - y.SetFloat(iy); -} - -#if defined( _X360 ) -#pragma bitfield_order( pop ) -#endif - -#endif - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef COMPRESSED_VECTOR_H +#define COMPRESSED_VECTOR_H + +#ifdef _WIN32 +#pragma once +#endif + +#include +#include + +// For vec_t, put this somewhere else? +#include "basetypes.h" + +// For rand(). We really need a library! +#include + +#include "tier0/dbg.h" +#include "mathlib/vector.h" + +#include "mathlib/mathlib.h" + +#if defined( _X360 ) +#pragma bitfield_order( push, lsb_to_msb ) +#endif +//========================================================= +// fit a 3D vector into 32 bits +//========================================================= + +class Vector32 +{ +public: + // Construction/destruction: + Vector32(void); + Vector32(vec_t X, vec_t Y, vec_t Z); + + // assignment + Vector32& operator=(const Vector &vOther); + operator Vector (); + +private: + unsigned short x:10; + unsigned short y:10; + unsigned short z:10; + unsigned short exp:2; +}; + +inline Vector32& Vector32::operator=(const Vector &vOther) +{ + CHECK_VALID(vOther); + + static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f }; + + float fmax = Max( fabs( vOther.x ), fabs( vOther.y ) ); + fmax = Max( fmax, (float)fabs( vOther.z ) ); + + for (exp = 0; exp < 3; exp++) + { + if (fmax < expScale[exp]) + break; + } + Assert( fmax < expScale[exp] ); + + float fexp = 512.0f / expScale[exp]; + + x = Clamp( (int)(vOther.x * fexp) + 512, 0, 1023 ); + y = Clamp( (int)(vOther.y * fexp) + 512, 0, 1023 ); + z = Clamp( (int)(vOther.z * fexp) + 512, 0, 1023 ); + return *this; +} + + +inline Vector32::operator Vector () +{ + Vector tmp; + + static float expScale[4] = { 4.0f, 16.0f, 32.f, 64.f }; + + float fexp = expScale[exp] / 512.0f; + + tmp.x = (((int)x) - 512) * fexp; + tmp.y = (((int)y) - 512) * fexp; + tmp.z = (((int)z) - 512) * fexp; + return tmp; +} + + +//========================================================= +// Fit a unit vector into 32 bits +//========================================================= + +class Normal32 +{ +public: + // Construction/destruction: + Normal32(void); + Normal32(vec_t X, vec_t Y, vec_t Z); + + // assignment + Normal32& operator=(const Vector &vOther); + operator Vector (); + +private: + unsigned short x:15; + unsigned short y:15; + unsigned short zneg:1; +}; + + +inline Normal32& Normal32::operator=(const Vector &vOther) +{ + CHECK_VALID(vOther); + + x = Clamp( (int)(vOther.x * 16384) + 16384, 0, 32767 ); + y = Clamp( (int)(vOther.y * 16384) + 16384, 0, 32767 ); + zneg = (vOther.z < 0); + //x = vOther.x; + //y = vOther.y; + //z = vOther.z; + return *this; +} + + +inline Normal32::operator Vector () +{ + Vector tmp; + + tmp.x = ((int)x - 16384) * (1 / 16384.0); + tmp.y = ((int)y - 16384) * (1 / 16384.0); + tmp.z = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y ); + if (zneg) + tmp.z = -tmp.z; + return tmp; +} + + +//========================================================= +// 64 bit Quaternion +//========================================================= + +class Quaternion64 +{ +public: + // Construction/destruction: + Quaternion64(void); + Quaternion64(vec_t X, vec_t Y, vec_t Z); + + // assignment + // Quaternion& operator=(const Quaternion64 &vOther); + Quaternion64& operator=(const Quaternion &vOther); + operator Quaternion (); +private: + uint64 x:21; + uint64 y:21; + uint64 z:21; + uint64 wneg:1; +}; + + +inline Quaternion64::operator Quaternion () +{ + Quaternion tmp; + + // shift to -1048576, + 1048575, then round down slightly to -1.0 < x < 1.0 + tmp.x = ((int)x - 1048576) * (1 / 1048576.5f); + tmp.y = ((int)y - 1048576) * (1 / 1048576.5f); + tmp.z = ((int)z - 1048576) * (1 / 1048576.5f); + tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); + if (wneg) + tmp.w = -tmp.w; + return tmp; +} + +inline Quaternion64& Quaternion64::operator=(const Quaternion &vOther) +{ + CHECK_VALID(vOther); + + x = Clamp( (int)(vOther.x * 1048576) + 1048576, 0, 2097151 ); + y = Clamp( (int)(vOther.y * 1048576) + 1048576, 0, 2097151 ); + z = Clamp( (int)(vOther.z * 1048576) + 1048576, 0, 2097151 ); + wneg = (vOther.w < 0); + return *this; +} + +//========================================================= +// 48 bit Quaternion +//========================================================= + +class Quaternion48 +{ +public: + // Construction/destruction: + Quaternion48(void); + Quaternion48(vec_t X, vec_t Y, vec_t Z); + + // assignment + // Quaternion& operator=(const Quaternion48 &vOther); + Quaternion48& operator=(const Quaternion &vOther); + operator Quaternion (); +private: + unsigned short x:16; + unsigned short y:16; + unsigned short z:15; + unsigned short wneg:1; +}; + + +inline Quaternion48::operator Quaternion () +{ + Quaternion tmp; + + tmp.x = ((int)x - 32768) * (1 / 32768.0); + tmp.y = ((int)y - 32768) * (1 / 32768.0); + tmp.z = ((int)z - 16384) * (1 / 16384.0); + tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); + if (wneg) + tmp.w = -tmp.w; + return tmp; +} + +inline Quaternion48& Quaternion48::operator=(const Quaternion &vOther) +{ + CHECK_VALID(vOther); + + x = Clamp( (int)(vOther.x * 32768) + 32768, 0, 65535 ); + y = Clamp( (int)(vOther.y * 32768) + 32768, 0, 65535 ); + z = Clamp( (int)(vOther.z * 16384) + 16384, 0, 32767 ); + wneg = (vOther.w < 0); + return *this; +} + +//========================================================= +// 32 bit Quaternion +//========================================================= + +class Quaternion32 +{ +public: + // Construction/destruction: + Quaternion32(void); + Quaternion32(vec_t X, vec_t Y, vec_t Z); + + // assignment + // Quaternion& operator=(const Quaternion48 &vOther); + Quaternion32& operator=(const Quaternion &vOther); + operator Quaternion (); +private: + unsigned int x:11; + unsigned int y:10; + unsigned int z:10; + unsigned int wneg:1; +}; + + +inline Quaternion32::operator Quaternion () +{ + Quaternion tmp; + + tmp.x = ((int)x - 1024) * (1 / 1024.0); + tmp.y = ((int)y - 512) * (1 / 512.0); + tmp.z = ((int)z - 512) * (1 / 512.0); + tmp.w = sqrt( 1 - tmp.x * tmp.x - tmp.y * tmp.y - tmp.z * tmp.z ); + if (wneg) + tmp.w = -tmp.w; + return tmp; +} + +inline Quaternion32& Quaternion32::operator=(const Quaternion &vOther) +{ + CHECK_VALID(vOther); + + x = Clamp( (int)(vOther.x * 1024) + 1024, 0, 2047 ); + y = Clamp( (int)(vOther.y * 512) + 512, 0, 1023 ); + z = Clamp( (int)(vOther.z * 512) + 512, 0, 1023 ); + wneg = (vOther.w < 0); + return *this; +} + +//========================================================= +// 16 bit float +//========================================================= + + +const int float32bias = 127; +const int float16bias = 15; + +const float maxfloat16bits = 65504.0f; + +class float16 +{ +public: + //float16() {} + //float16( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); } + + void Init() { m_storage.rawWord = 0; } +// float16& operator=(const float16 &other) { m_storage.rawWord = other.m_storage.rawWord; return *this; } +// float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; } +// operator unsigned short () { return m_storage.rawWord; } +// operator float () { return Convert16bitFloatTo32bits( m_storage.rawWord ); } + unsigned short GetBits() const + { + return m_storage.rawWord; + } + float GetFloat() const + { + return Convert16bitFloatTo32bits( m_storage.rawWord ); + } + void SetFloat( float in ) + { + m_storage.rawWord = ConvertFloatTo16bits( in ); + } + + bool IsInfinity() const + { + return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa == 0; + } + bool IsNaN() const + { + return m_storage.bits.biased_exponent == 31 && m_storage.bits.mantissa != 0; + } + + bool operator==(const float16 other) const { return m_storage.rawWord == other.m_storage.rawWord; } + bool operator!=(const float16 other) const { return m_storage.rawWord != other.m_storage.rawWord; } + +// bool operator< (const float other) const { return GetFloat() < other; } +// bool operator> (const float other) const { return GetFloat() > other; } + +protected: + union float32bits + { + float rawFloat; + struct + { + unsigned int mantissa : 23; + unsigned int biased_exponent : 8; + unsigned int sign : 1; + } bits; + }; + + union float16bits + { + unsigned short rawWord; + struct + { + unsigned short mantissa : 10; + unsigned short biased_exponent : 5; + unsigned short sign : 1; + } bits; + }; + + static bool IsNaN( float16bits in ) + { + return in.bits.biased_exponent == 31 && in.bits.mantissa != 0; + } + static bool IsInfinity( float16bits in ) + { + return in.bits.biased_exponent == 31 && in.bits.mantissa == 0; + } + + // 0x0001 - 0x03ff + static unsigned short ConvertFloatTo16bits( float input ) + { + if ( input > maxfloat16bits ) + input = maxfloat16bits; + else if ( input < -maxfloat16bits ) + input = -maxfloat16bits; + + float16bits output; + float32bits inFloat; + + inFloat.rawFloat = input; + + output.bits.sign = inFloat.bits.sign; + + if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa==0) ) + { + // zero + output.bits.mantissa = 0; + output.bits.biased_exponent = 0; + } + else if ( (inFloat.bits.biased_exponent==0) && (inFloat.bits.mantissa!=0) ) + { + // denorm -- denorm float maps to 0 half + output.bits.mantissa = 0; + output.bits.biased_exponent = 0; + } + else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa==0) ) + { +#if 0 + // infinity + output.bits.mantissa = 0; + output.bits.biased_exponent = 31; +#else + // infinity maps to maxfloat + output.bits.mantissa = 0x3ff; + output.bits.biased_exponent = 0x1e; +#endif + } + else if ( (inFloat.bits.biased_exponent==0xff) && (inFloat.bits.mantissa!=0) ) + { +#if 0 + // NaN + output.bits.mantissa = 1; + output.bits.biased_exponent = 31; +#else + // NaN maps to zero + output.bits.mantissa = 0; + output.bits.biased_exponent = 0; +#endif + } + else + { + // regular number + int new_exp = inFloat.bits.biased_exponent-127; + + if (new_exp<-24) + { + // this maps to 0 + output.bits.mantissa = 0; + output.bits.biased_exponent = 0; + } + + if (new_exp<-14) + { + // this maps to a denorm + output.bits.biased_exponent = 0; + unsigned int exp_val = ( unsigned int )( -14 - ( inFloat.bits.biased_exponent - float32bias ) ); + if( exp_val > 0 && exp_val < 11 ) + { + output.bits.mantissa = ( 1 << ( 10 - exp_val ) ) + ( inFloat.bits.mantissa >> ( 13 + exp_val ) ); + } + } + else if (new_exp>15) + { +#if 0 + // map this value to infinity + output.bits.mantissa = 0; + output.bits.biased_exponent = 31; +#else + // to big. . . maps to maxfloat + output.bits.mantissa = 0x3ff; + output.bits.biased_exponent = 0x1e; +#endif + } + else + { + output.bits.biased_exponent = new_exp+15; + output.bits.mantissa = (inFloat.bits.mantissa >> 13); + } + } + return output.rawWord; + } + + static float Convert16bitFloatTo32bits( unsigned short input ) + { + float32bits output; + const float16bits &inFloat = *((float16bits *)&input); + + if( IsInfinity( inFloat ) ) + { + return maxfloat16bits * ( ( inFloat.bits.sign == 1 ) ? -1.0f : 1.0f ); + } + if( IsNaN( inFloat ) ) + { + return 0.0; + } + if( inFloat.bits.biased_exponent == 0 && inFloat.bits.mantissa != 0 ) + { + // denorm + const float half_denorm = (1.0f/16384.0f); // 2^-14 + float mantissa = ((float)(inFloat.bits.mantissa)) / 1024.0f; + float sgn = (inFloat.bits.sign)? -1.0f :1.0f; + output.rawFloat = sgn*mantissa*half_denorm; + } + else + { + // regular number + unsigned mantissa = inFloat.bits.mantissa; + unsigned biased_exponent = inFloat.bits.biased_exponent; + unsigned sign = ((unsigned)inFloat.bits.sign) << 31; + biased_exponent = ( (biased_exponent - float16bias + float32bias) * (biased_exponent != 0) ) << 23; + mantissa <<= (23-10); + + *((unsigned *)&output) = ( mantissa | biased_exponent | sign ); + } + + return output.rawFloat; + } + + + float16bits m_storage; +}; + +class float16_with_assign : public float16 +{ +public: + float16_with_assign() {} + float16_with_assign( float f ) { m_storage.rawWord = ConvertFloatTo16bits(f); } + + float16& operator=(const float16 &other) { m_storage.rawWord = ((float16_with_assign &)other).m_storage.rawWord; return *this; } + float16& operator=(const float &other) { m_storage.rawWord = ConvertFloatTo16bits(other); return *this; } +// operator unsigned short () const { return m_storage.rawWord; } + operator float () const { return Convert16bitFloatTo32bits( m_storage.rawWord ); } +}; + +//========================================================= +// Fit a 3D vector in 48 bits +//========================================================= + +class Vector48 +{ +public: + // Construction/destruction: + Vector48(void) {} + Vector48(vec_t X, vec_t Y, vec_t Z) { x.SetFloat( X ); y.SetFloat( Y ); z.SetFloat( Z ); } + + // assignment + Vector48& operator=(const Vector &vOther); + operator Vector (); + + const float operator[]( int i ) const { return (((float16 *)this)[i]).GetFloat(); } + + float16 x; + float16 y; + float16 z; +}; + +inline Vector48& Vector48::operator=(const Vector &vOther) +{ + CHECK_VALID(vOther); + + x.SetFloat( vOther.x ); + y.SetFloat( vOther.y ); + z.SetFloat( vOther.z ); + return *this; +} + + +inline Vector48::operator Vector () +{ + Vector tmp; + + tmp.x = x.GetFloat(); + tmp.y = y.GetFloat(); + tmp.z = z.GetFloat(); + + return tmp; +} + +//========================================================= +// Fit a 2D vector in 32 bits +//========================================================= + +class Vector2d32 +{ +public: + // Construction/destruction: + Vector2d32(void) {} + Vector2d32(vec_t X, vec_t Y) { x.SetFloat( X ); y.SetFloat( Y ); } + + // assignment + Vector2d32& operator=(const Vector &vOther); + Vector2d32& operator=(const Vector2D &vOther); + + operator Vector2D (); + + void Init( vec_t ix = 0.f, vec_t iy = 0.f); + + float16_with_assign x; + float16_with_assign y; +}; + +inline Vector2d32& Vector2d32::operator=(const Vector2D &vOther) +{ + x.SetFloat( vOther.x ); + y.SetFloat( vOther.y ); + return *this; +} + +inline Vector2d32::operator Vector2D () +{ + Vector2D tmp; + + tmp.x = x.GetFloat(); + tmp.y = y.GetFloat(); + + return tmp; +} + +inline void Vector2d32::Init( vec_t ix, vec_t iy ) +{ + x.SetFloat(ix); + y.SetFloat(iy); +} + +#if defined( _X360 ) +#pragma bitfield_order( pop ) +#endif + +#endif + diff --git a/mp/src/public/mathlib/halton.h b/mp/src/public/mathlib/halton.h index 204e5fd5..44df68ff 100644 --- a/mp/src/public/mathlib/halton.h +++ b/mp/src/public/mathlib/halton.h @@ -1,71 +1,71 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// $Id$ - -// halton.h - classes, etc for generating numbers using the Halton pseudo-random sequence. See -// http://halton-sequences.wikiverse.org/. -// -// what this function is useful for is any sort of sampling/integration problem where -// you want to solve it by random sampling. Each call the NextValue() generates -// a random number between 0 and 1, in an unclumped manner, so that the space can be more -// or less evenly sampled with a minimum number of samples. -// -// It is NOT useful for generating random numbers dynamically, since the outputs aren't -// particularly random. -// -// To generate multidimensional sample values (points in a plane, etc), use two -// HaltonSequenceGenerator_t's, with different (primes) bases. - -#ifndef HALTON_H -#define HALTON_H - -#include -#include - -class HaltonSequenceGenerator_t -{ - int seed; - int base; - float fbase; //< base as a float - -public: - HaltonSequenceGenerator_t(int base); //< base MUST be prime, >=2 - - float GetElement(int element); - - inline float NextValue(void) - { - return GetElement(seed++); - } - -}; - - -class DirectionalSampler_t //< pseudo-random sphere sampling -{ - HaltonSequenceGenerator_t zdot; - HaltonSequenceGenerator_t vrot; -public: - DirectionalSampler_t(void) - : zdot(2),vrot(3) - { - } - - Vector NextValue(void) - { - float zvalue=zdot.NextValue(); - zvalue=2*zvalue-1.0; // map from 0..1 to -1..1 - float phi=acos(zvalue); - // now, generate a random rotation angle for x/y - float theta=2.0*M_PI*vrot.NextValue(); - float sin_p=sin(phi); - return Vector(cos(theta)*sin_p, - sin(theta)*sin_p, - zvalue); - - } -}; - - - - -#endif // halton_h +//========= Copyright Valve Corporation, All rights reserved. ============// +// $Id$ + +// halton.h - classes, etc for generating numbers using the Halton pseudo-random sequence. See +// http://halton-sequences.wikiverse.org/. +// +// what this function is useful for is any sort of sampling/integration problem where +// you want to solve it by random sampling. Each call the NextValue() generates +// a random number between 0 and 1, in an unclumped manner, so that the space can be more +// or less evenly sampled with a minimum number of samples. +// +// It is NOT useful for generating random numbers dynamically, since the outputs aren't +// particularly random. +// +// To generate multidimensional sample values (points in a plane, etc), use two +// HaltonSequenceGenerator_t's, with different (primes) bases. + +#ifndef HALTON_H +#define HALTON_H + +#include +#include + +class HaltonSequenceGenerator_t +{ + int seed; + int base; + float fbase; //< base as a float + +public: + HaltonSequenceGenerator_t(int base); //< base MUST be prime, >=2 + + float GetElement(int element); + + inline float NextValue(void) + { + return GetElement(seed++); + } + +}; + + +class DirectionalSampler_t //< pseudo-random sphere sampling +{ + HaltonSequenceGenerator_t zdot; + HaltonSequenceGenerator_t vrot; +public: + DirectionalSampler_t(void) + : zdot(2),vrot(3) + { + } + + Vector NextValue(void) + { + float zvalue=zdot.NextValue(); + zvalue=2*zvalue-1.0; // map from 0..1 to -1..1 + float phi=acos(zvalue); + // now, generate a random rotation angle for x/y + float theta=2.0*M_PI*vrot.NextValue(); + float sin_p=sin(phi); + return Vector(cos(theta)*sin_p, + sin(theta)*sin_p, + zvalue); + + } +}; + + + + +#endif // halton_h diff --git a/mp/src/public/mathlib/lightdesc.h b/mp/src/public/mathlib/lightdesc.h index d03e3e19..1096d623 100644 --- a/mp/src/public/mathlib/lightdesc.h +++ b/mp/src/public/mathlib/lightdesc.h @@ -1,173 +1,173 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -//===========================================================================// - -// light structure definitions. -#ifndef LIGHTDESC_H -#define LIGHTDESC_H - -#include -#include - -//----------------------------------------------------------------------------- -// Light structure -//----------------------------------------------------------------------------- - -enum LightType_t -{ - MATERIAL_LIGHT_DISABLE = 0, - MATERIAL_LIGHT_POINT, - MATERIAL_LIGHT_DIRECTIONAL, - MATERIAL_LIGHT_SPOT, -}; - -enum LightType_OptimizationFlags_t -{ - LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 = 1, - LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 = 2, - LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 = 4, - LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED = 8, -}; - -struct LightDesc_t -{ - LightType_t m_Type; //< MATERIAL_LIGHT_xxx - Vector m_Color; //< color+intensity - Vector m_Position; //< light source center position - Vector m_Direction; //< for SPOT, direction it is pointing - float m_Range; //< distance range for light.0=infinite - float m_Falloff; //< angular falloff exponent for spot lights - float m_Attenuation0; //< constant distance falloff term - float m_Attenuation1; //< linear term of falloff - float m_Attenuation2; //< quadatic term of falloff - float m_Theta; //< inner cone angle. no angular falloff - //< within this cone - float m_Phi; //< outer cone angle - - // the values below are derived from the above settings for optimizations - // These aren't used by DX8. . used for software lighting. - float m_ThetaDot; - float m_PhiDot; - unsigned int m_Flags; -protected: - float OneOver_ThetaDot_Minus_PhiDot; - float m_RangeSquared; -public: - - void RecalculateDerivedValues(void); // calculate m_xxDot, m_Type for changed parms - - LightDesc_t(void) - { - } - - // constructors for various useful subtypes - - // a point light with infinite range - LightDesc_t( const Vector &pos, const Vector &color ) - { - InitPoint( pos, color ); - } - - /// a simple light. cone boundaries in radians. you pass a look_at point and the - /// direciton is derived from that. - LightDesc_t( const Vector &pos, const Vector &color, const Vector &point_at, - float inner_cone_boundary, float outer_cone_boundary ) - { - InitSpot( pos, color, point_at, inner_cone_boundary, outer_cone_boundary ); - } - - void InitPoint( const Vector &pos, const Vector &color ); - void InitDirectional( const Vector &dir, const Vector &color ); - void InitSpot(const Vector &pos, const Vector &color, const Vector &point_at, - float inner_cone_boundary, float outer_cone_boundary ); - - /// Given 4 points and 4 normals, ADD lighting from this light into "color". - void ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal, - FourVectors &color, bool DoHalfLambert=false ) const; - void ComputeNonincidenceLightAtPoints( const FourVectors &pos, FourVectors &color ) const; - void ComputeLightAtPointsForDirectional( const FourVectors &pos, - const FourVectors &normal, - FourVectors &color, bool DoHalfLambert=false ) const; - - // warning - modifies color!!! set color first!! - void SetupOldStyleAttenuation( float fQuadatricAttn, float fLinearAttn, float fConstantAttn ); - - void SetupNewStyleAttenuation( float fFiftyPercentDistance, float fZeroPercentDistance ); - - -/// given a direction relative to the light source position, is this ray within the - /// light cone (for spotlights..non spots consider all rays to be within their cone) - bool IsDirectionWithinLightCone(const Vector &rdir) const - { - return ((m_Type!=MATERIAL_LIGHT_SPOT) || (rdir.Dot(m_Direction)>=m_PhiDot)); - } - - float OneOverThetaDotMinusPhiDot() const - { - return OneOver_ThetaDot_Minus_PhiDot; - } -}; - - -//----------------------------------------------------------------------------- -// a point light with infinite range -//----------------------------------------------------------------------------- -inline void LightDesc_t::InitPoint( const Vector &pos, const Vector &color ) -{ - m_Type=MATERIAL_LIGHT_POINT; - m_Color=color; - m_Position=pos; - m_Range=0.0; // infinite - m_Attenuation0=1.0; - m_Attenuation1=0; - m_Attenuation2=0; - RecalculateDerivedValues(); -} - - -//----------------------------------------------------------------------------- -// a directional light with infinite range -//----------------------------------------------------------------------------- -inline void LightDesc_t::InitDirectional( const Vector &dir, const Vector &color ) -{ - m_Type=MATERIAL_LIGHT_DIRECTIONAL; - m_Color=color; - m_Direction=dir; - m_Range=0.0; // infinite - m_Attenuation0=1.0; - m_Attenuation1=0; - m_Attenuation2=0; - RecalculateDerivedValues(); -} - - -//----------------------------------------------------------------------------- -// a simple light. cone boundaries in radians. you pass a look_at point and the -// direciton is derived from that. -//----------------------------------------------------------------------------- -inline void LightDesc_t::InitSpot(const Vector &pos, const Vector &color, const Vector &point_at, - float inner_cone_boundary, float outer_cone_boundary) -{ - m_Type=MATERIAL_LIGHT_SPOT; - m_Color=color; - m_Position=pos; - m_Direction=point_at; - m_Direction-=pos; - VectorNormalizeFast(m_Direction); - m_Falloff=5.0; // linear angle falloff - m_Theta=inner_cone_boundary; - m_Phi=outer_cone_boundary; - - m_Range=0.0; // infinite - - m_Attenuation0=1.0; - m_Attenuation1=0; - m_Attenuation2=0; - RecalculateDerivedValues(); -} - - -#endif - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +// light structure definitions. +#ifndef LIGHTDESC_H +#define LIGHTDESC_H + +#include +#include + +//----------------------------------------------------------------------------- +// Light structure +//----------------------------------------------------------------------------- + +enum LightType_t +{ + MATERIAL_LIGHT_DISABLE = 0, + MATERIAL_LIGHT_POINT, + MATERIAL_LIGHT_DIRECTIONAL, + MATERIAL_LIGHT_SPOT, +}; + +enum LightType_OptimizationFlags_t +{ + LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION0 = 1, + LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION1 = 2, + LIGHTTYPE_OPTIMIZATIONFLAGS_HAS_ATTENUATION2 = 4, + LIGHTTYPE_OPTIMIZATIONFLAGS_DERIVED_VALUES_CALCED = 8, +}; + +struct LightDesc_t +{ + LightType_t m_Type; //< MATERIAL_LIGHT_xxx + Vector m_Color; //< color+intensity + Vector m_Position; //< light source center position + Vector m_Direction; //< for SPOT, direction it is pointing + float m_Range; //< distance range for light.0=infinite + float m_Falloff; //< angular falloff exponent for spot lights + float m_Attenuation0; //< constant distance falloff term + float m_Attenuation1; //< linear term of falloff + float m_Attenuation2; //< quadatic term of falloff + float m_Theta; //< inner cone angle. no angular falloff + //< within this cone + float m_Phi; //< outer cone angle + + // the values below are derived from the above settings for optimizations + // These aren't used by DX8. . used for software lighting. + float m_ThetaDot; + float m_PhiDot; + unsigned int m_Flags; +protected: + float OneOver_ThetaDot_Minus_PhiDot; + float m_RangeSquared; +public: + + void RecalculateDerivedValues(void); // calculate m_xxDot, m_Type for changed parms + + LightDesc_t(void) + { + } + + // constructors for various useful subtypes + + // a point light with infinite range + LightDesc_t( const Vector &pos, const Vector &color ) + { + InitPoint( pos, color ); + } + + /// a simple light. cone boundaries in radians. you pass a look_at point and the + /// direciton is derived from that. + LightDesc_t( const Vector &pos, const Vector &color, const Vector &point_at, + float inner_cone_boundary, float outer_cone_boundary ) + { + InitSpot( pos, color, point_at, inner_cone_boundary, outer_cone_boundary ); + } + + void InitPoint( const Vector &pos, const Vector &color ); + void InitDirectional( const Vector &dir, const Vector &color ); + void InitSpot(const Vector &pos, const Vector &color, const Vector &point_at, + float inner_cone_boundary, float outer_cone_boundary ); + + /// Given 4 points and 4 normals, ADD lighting from this light into "color". + void ComputeLightAtPoints( const FourVectors &pos, const FourVectors &normal, + FourVectors &color, bool DoHalfLambert=false ) const; + void ComputeNonincidenceLightAtPoints( const FourVectors &pos, FourVectors &color ) const; + void ComputeLightAtPointsForDirectional( const FourVectors &pos, + const FourVectors &normal, + FourVectors &color, bool DoHalfLambert=false ) const; + + // warning - modifies color!!! set color first!! + void SetupOldStyleAttenuation( float fQuadatricAttn, float fLinearAttn, float fConstantAttn ); + + void SetupNewStyleAttenuation( float fFiftyPercentDistance, float fZeroPercentDistance ); + + +/// given a direction relative to the light source position, is this ray within the + /// light cone (for spotlights..non spots consider all rays to be within their cone) + bool IsDirectionWithinLightCone(const Vector &rdir) const + { + return ((m_Type!=MATERIAL_LIGHT_SPOT) || (rdir.Dot(m_Direction)>=m_PhiDot)); + } + + float OneOverThetaDotMinusPhiDot() const + { + return OneOver_ThetaDot_Minus_PhiDot; + } +}; + + +//----------------------------------------------------------------------------- +// a point light with infinite range +//----------------------------------------------------------------------------- +inline void LightDesc_t::InitPoint( const Vector &pos, const Vector &color ) +{ + m_Type=MATERIAL_LIGHT_POINT; + m_Color=color; + m_Position=pos; + m_Range=0.0; // infinite + m_Attenuation0=1.0; + m_Attenuation1=0; + m_Attenuation2=0; + RecalculateDerivedValues(); +} + + +//----------------------------------------------------------------------------- +// a directional light with infinite range +//----------------------------------------------------------------------------- +inline void LightDesc_t::InitDirectional( const Vector &dir, const Vector &color ) +{ + m_Type=MATERIAL_LIGHT_DIRECTIONAL; + m_Color=color; + m_Direction=dir; + m_Range=0.0; // infinite + m_Attenuation0=1.0; + m_Attenuation1=0; + m_Attenuation2=0; + RecalculateDerivedValues(); +} + + +//----------------------------------------------------------------------------- +// a simple light. cone boundaries in radians. you pass a look_at point and the +// direciton is derived from that. +//----------------------------------------------------------------------------- +inline void LightDesc_t::InitSpot(const Vector &pos, const Vector &color, const Vector &point_at, + float inner_cone_boundary, float outer_cone_boundary) +{ + m_Type=MATERIAL_LIGHT_SPOT; + m_Color=color; + m_Position=pos; + m_Direction=point_at; + m_Direction-=pos; + VectorNormalizeFast(m_Direction); + m_Falloff=5.0; // linear angle falloff + m_Theta=inner_cone_boundary; + m_Phi=outer_cone_boundary; + + m_Range=0.0; // infinite + + m_Attenuation0=1.0; + m_Attenuation1=0; + m_Attenuation2=0; + RecalculateDerivedValues(); +} + + +#endif + diff --git a/mp/src/public/mathlib/math_pfns.h b/mp/src/public/mathlib/math_pfns.h index 4436eab5..d43411ce 100644 --- a/mp/src/public/mathlib/math_pfns.h +++ b/mp/src/public/mathlib/math_pfns.h @@ -1,80 +1,80 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -//=====================================================================================// - -#ifndef _MATH_PFNS_H_ -#define _MATH_PFNS_H_ - -#if defined( _X360 ) -#include -#endif - -#if !defined( _X360 ) - -// These globals are initialized by mathlib and redirected based on available fpu features -extern float (*pfSqrt)(float x); -extern float (*pfRSqrt)(float x); -extern float (*pfRSqrtFast)(float x); -extern void (*pfFastSinCos)(float x, float *s, float *c); -extern float (*pfFastCos)(float x); - -// The following are not declared as macros because they are often used in limiting situations, -// and sometimes the compiler simply refuses to inline them for some reason -#define FastSqrt(x) (*pfSqrt)(x) -#define FastRSqrt(x) (*pfRSqrt)(x) -#define FastRSqrtFast(x) (*pfRSqrtFast)(x) -#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c) -#define FastCos(x) (*pfFastCos)(x) - -#if defined(__i386__) || defined(_M_IX86) -// On x86, the inline FPU or SSE sqrt instruction is faster than -// the overhead of setting up a function call and saving/restoring -// the FPU or SSE register state and can be scheduled better, too. -#undef FastSqrt -#define FastSqrt(x) ::sqrtf(x) -#endif - -#endif // !_X360 - -#if defined( _X360 ) - -FORCEINLINE float _VMX_Sqrt( float x ) -{ - return __fsqrts( x ); -} - -FORCEINLINE float _VMX_RSqrt( float x ) -{ - float rroot = __frsqrte( x ); - - // Single iteration NewtonRaphson on reciprocal square root estimate - return (0.5f * rroot) * (3.0f - (x * rroot) * rroot); -} - -FORCEINLINE float _VMX_RSqrtFast( float x ) -{ - return __frsqrte( x ); -} - -FORCEINLINE void _VMX_SinCos( float a, float *pS, float *pC ) -{ - XMScalarSinCos( pS, pC, a ); -} - -FORCEINLINE float _VMX_Cos( float a ) -{ - return XMScalarCos( a ); -} - -// the 360 has fixed hw and calls directly -#define FastSqrt(x) _VMX_Sqrt(x) -#define FastRSqrt(x) _VMX_RSqrt(x) -#define FastRSqrtFast(x) _VMX_RSqrtFast(x) -#define FastSinCos(x,s,c) _VMX_SinCos(x,s,c) -#define FastCos(x) _VMX_Cos(x) - -#endif // _X360 - -#endif // _MATH_PFNS_H_ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=====================================================================================// + +#ifndef _MATH_PFNS_H_ +#define _MATH_PFNS_H_ + +#if defined( _X360 ) +#include +#endif + +#if !defined( _X360 ) + +// These globals are initialized by mathlib and redirected based on available fpu features +extern float (*pfSqrt)(float x); +extern float (*pfRSqrt)(float x); +extern float (*pfRSqrtFast)(float x); +extern void (*pfFastSinCos)(float x, float *s, float *c); +extern float (*pfFastCos)(float x); + +// The following are not declared as macros because they are often used in limiting situations, +// and sometimes the compiler simply refuses to inline them for some reason +#define FastSqrt(x) (*pfSqrt)(x) +#define FastRSqrt(x) (*pfRSqrt)(x) +#define FastRSqrtFast(x) (*pfRSqrtFast)(x) +#define FastSinCos(x,s,c) (*pfFastSinCos)(x,s,c) +#define FastCos(x) (*pfFastCos)(x) + +#if defined(__i386__) || defined(_M_IX86) +// On x86, the inline FPU or SSE sqrt instruction is faster than +// the overhead of setting up a function call and saving/restoring +// the FPU or SSE register state and can be scheduled better, too. +#undef FastSqrt +#define FastSqrt(x) ::sqrtf(x) +#endif + +#endif // !_X360 + +#if defined( _X360 ) + +FORCEINLINE float _VMX_Sqrt( float x ) +{ + return __fsqrts( x ); +} + +FORCEINLINE float _VMX_RSqrt( float x ) +{ + float rroot = __frsqrte( x ); + + // Single iteration NewtonRaphson on reciprocal square root estimate + return (0.5f * rroot) * (3.0f - (x * rroot) * rroot); +} + +FORCEINLINE float _VMX_RSqrtFast( float x ) +{ + return __frsqrte( x ); +} + +FORCEINLINE void _VMX_SinCos( float a, float *pS, float *pC ) +{ + XMScalarSinCos( pS, pC, a ); +} + +FORCEINLINE float _VMX_Cos( float a ) +{ + return XMScalarCos( a ); +} + +// the 360 has fixed hw and calls directly +#define FastSqrt(x) _VMX_Sqrt(x) +#define FastRSqrt(x) _VMX_RSqrt(x) +#define FastRSqrtFast(x) _VMX_RSqrtFast(x) +#define FastSinCos(x,s,c) _VMX_SinCos(x,s,c) +#define FastCos(x) _VMX_Cos(x) + +#endif // _X360 + +#endif // _MATH_PFNS_H_ diff --git a/mp/src/public/mathlib/mathlib.h b/mp/src/public/mathlib/mathlib.h index e1873cd0..f734ae68 100644 --- a/mp/src/public/mathlib/mathlib.h +++ b/mp/src/public/mathlib/mathlib.h @@ -1,2186 +1,2186 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -//===========================================================================// - -#ifndef MATH_LIB_H -#define MATH_LIB_H - -#include -#include "tier0/basetypes.h" -#include "tier0/commonmacros.h" -#include "mathlib/vector.h" -#include "mathlib/vector2d.h" -#include "tier0/dbg.h" - -#include "mathlib/math_pfns.h" - -#if defined(__i386__) || defined(_M_IX86) -// For MMX intrinsics -#include -#endif - -// XXX remove me -#undef clamp - -// Uncomment this to enable FP exceptions in parts of the code. -// This can help track down FP bugs. However the code is not -// FP exception clean so this not a turnkey operation. -//#define FP_EXCEPTIONS_ENABLED - - -#ifdef FP_EXCEPTIONS_ENABLED -#include // For _clearfp and _controlfp_s -#endif - -// FPExceptionDisabler and FPExceptionEnabler taken from my blog post -// at http://www.altdevblogaday.com/2012/04/20/exceptional-floating-point/ - -// Declare an object of this type in a scope in order to suppress -// all floating-point exceptions temporarily. The old exception -// state will be reset at the end. -class FPExceptionDisabler -{ -public: -#ifdef FP_EXCEPTIONS_ENABLED - FPExceptionDisabler(); - ~FPExceptionDisabler(); - -private: - unsigned int mOldValues; -#else - FPExceptionDisabler() {} - ~FPExceptionDisabler() {} -#endif - -private: - // Make the copy constructor and assignment operator private - // and unimplemented to prohibit copying. - FPExceptionDisabler(const FPExceptionDisabler&); - FPExceptionDisabler& operator=(const FPExceptionDisabler&); -}; - -// Declare an object of this type in a scope in order to enable a -// specified set of floating-point exceptions temporarily. The old -// exception state will be reset at the end. -// This class can be nested. -class FPExceptionEnabler -{ -public: - // Overflow, divide-by-zero, and invalid-operation are the FP - // exceptions most frequently associated with bugs. -#ifdef FP_EXCEPTIONS_ENABLED - FPExceptionEnabler(unsigned int enableBits = _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID); - ~FPExceptionEnabler(); - -private: - unsigned int mOldValues; -#else - FPExceptionEnabler(unsigned int enableBits = 0) - { - } - ~FPExceptionEnabler() - { - } -#endif - -private: - // Make the copy constructor and assignment operator private - // and unimplemented to prohibit copying. - FPExceptionEnabler(const FPExceptionEnabler&); - FPExceptionEnabler& operator=(const FPExceptionEnabler&); -}; - - - -#ifdef DEBUG // stop crashing edit-and-continue -FORCEINLINE float clamp( float val, float minVal, float maxVal ) -{ - if ( maxVal < minVal ) - return maxVal; - else if( val < minVal ) - return minVal; - else if( val > maxVal ) - return maxVal; - else - return val; -} -#else // DEBUG -FORCEINLINE float clamp( float val, float minVal, float maxVal ) -{ -#if defined(__i386__) || defined(_M_IX86) - _mm_store_ss( &val, - _mm_min_ss( - _mm_max_ss( - _mm_load_ss(&val), - _mm_load_ss(&minVal) ), - _mm_load_ss(&maxVal) ) ); -#else - val = fpmax(minVal, val); - val = fpmin(maxVal, val); -#endif - return val; -} -#endif // DEBUG - -// -// Returns a clamped value in the range [min, max]. -// -template< class T > -inline T clamp( T const &val, T const &minVal, T const &maxVal ) -{ - if ( maxVal < minVal ) - return maxVal; - else if( val < minVal ) - return minVal; - else if( val > maxVal ) - return maxVal; - else - return val; -} - - -// plane_t structure -// !!! if this is changed, it must be changed in asm code too !!! -// FIXME: does the asm code even exist anymore? -// FIXME: this should move to a different file -struct cplane_t -{ - Vector normal; - float dist; - byte type; // for fast side tests - byte signbits; // signx + (signy<<1) + (signz<<1) - byte pad[2]; - -#ifdef VECTOR_NO_SLOW_OPERATIONS - cplane_t() {} - -private: - // No copy constructors allowed if we're in optimal mode - cplane_t(const cplane_t& vOther); -#endif -}; - -// structure offset for asm code -#define CPLANE_NORMAL_X 0 -#define CPLANE_NORMAL_Y 4 -#define CPLANE_NORMAL_Z 8 -#define CPLANE_DIST 12 -#define CPLANE_TYPE 16 -#define CPLANE_SIGNBITS 17 -#define CPLANE_PAD0 18 -#define CPLANE_PAD1 19 - -// 0-2 are axial planes -#define PLANE_X 0 -#define PLANE_Y 1 -#define PLANE_Z 2 - -// 3-5 are non-axial planes snapped to the nearest -#define PLANE_ANYX 3 -#define PLANE_ANYY 4 -#define PLANE_ANYZ 5 - - -//----------------------------------------------------------------------------- -// Frustum plane indices. -// WARNING: there is code that depends on these values -//----------------------------------------------------------------------------- - -enum -{ - FRUSTUM_RIGHT = 0, - FRUSTUM_LEFT = 1, - FRUSTUM_TOP = 2, - FRUSTUM_BOTTOM = 3, - FRUSTUM_NEARZ = 4, - FRUSTUM_FARZ = 5, - FRUSTUM_NUMPLANES = 6 -}; - -extern int SignbitsForPlane( cplane_t *out ); - -class Frustum_t -{ -public: - void SetPlane( int i, int nType, const Vector &vecNormal, float dist ) - { - m_Plane[i].normal = vecNormal; - m_Plane[i].dist = dist; - m_Plane[i].type = nType; - m_Plane[i].signbits = SignbitsForPlane( &m_Plane[i] ); - m_AbsNormal[i].Init( fabs(vecNormal.x), fabs(vecNormal.y), fabs(vecNormal.z) ); - } - - inline const cplane_t *GetPlane( int i ) const { return &m_Plane[i]; } - inline const Vector &GetAbsNormal( int i ) const { return m_AbsNormal[i]; } - -private: - cplane_t m_Plane[FRUSTUM_NUMPLANES]; - Vector m_AbsNormal[FRUSTUM_NUMPLANES]; -}; - -// Computes Y fov from an X fov and a screen aspect ratio + X from Y -float CalcFovY( float flFovX, float flScreenAspect ); -float CalcFovX( float flFovY, float flScreenAspect ); - -// Generate a frustum based on perspective view parameters -// NOTE: FOV is specified in degrees, as the *full* view angle (not half-angle) -void GeneratePerspectiveFrustum( const Vector& origin, const QAngle &angles, float flZNear, float flZFar, float flFovX, float flAspectRatio, Frustum_t &frustum ); -void GeneratePerspectiveFrustum( const Vector& origin, const Vector &forward, const Vector &right, const Vector &up, float flZNear, float flZFar, float flFovX, float flFovY, Frustum_t &frustum ); - -// Cull the world-space bounding box to the specified frustum. -bool R_CullBox( const Vector& mins, const Vector& maxs, const Frustum_t &frustum ); -bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t &frustum ); - -struct matrix3x4_t -{ - matrix3x4_t() {} - matrix3x4_t( - float m00, float m01, float m02, float m03, - float m10, float m11, float m12, float m13, - float m20, float m21, float m22, float m23 ) - { - m_flMatVal[0][0] = m00; m_flMatVal[0][1] = m01; m_flMatVal[0][2] = m02; m_flMatVal[0][3] = m03; - m_flMatVal[1][0] = m10; m_flMatVal[1][1] = m11; m_flMatVal[1][2] = m12; m_flMatVal[1][3] = m13; - m_flMatVal[2][0] = m20; m_flMatVal[2][1] = m21; m_flMatVal[2][2] = m22; m_flMatVal[2][3] = m23; - } - - //----------------------------------------------------------------------------- - // Creates a matrix where the X axis = forward - // the Y axis = left, and the Z axis = up - //----------------------------------------------------------------------------- - void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin ) - { - m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x; - m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y; - m_flMatVal[2][0] = xAxis.z; m_flMatVal[2][1] = yAxis.z; m_flMatVal[2][2] = zAxis.z; m_flMatVal[2][3] = vecOrigin.z; - } - - //----------------------------------------------------------------------------- - // Creates a matrix where the X axis = forward - // the Y axis = left, and the Z axis = up - //----------------------------------------------------------------------------- - matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin ) - { - Init( xAxis, yAxis, zAxis, vecOrigin ); - } - - inline void Invalidate( void ) - { - for (int i = 0; i < 3; i++) - { - for (int j = 0; j < 4; j++) - { - m_flMatVal[i][j] = VEC_T_NAN; - } - } - } - - float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; } - const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; } - float *Base() { return &m_flMatVal[0][0]; } - const float *Base() const { return &m_flMatVal[0][0]; } - - float m_flMatVal[3][4]; -}; - - -#ifndef M_PI - #define M_PI 3.14159265358979323846 // matches value in gcc v2 math.h -#endif - -#define M_PI_F ((float)(M_PI)) // Shouldn't collide with anything. - -// NJS: Inlined to prevent floats from being autopromoted to doubles, as with the old system. -#ifndef RAD2DEG - #define RAD2DEG( x ) ( (float)(x) * (float)(180.f / M_PI_F) ) -#endif - -#ifndef DEG2RAD - #define DEG2RAD( x ) ( (float)(x) * (float)(M_PI_F / 180.f) ) -#endif - -// Used to represent sides of things like planes. -#define SIDE_FRONT 0 -#define SIDE_BACK 1 -#define SIDE_ON 2 -#define SIDE_CROSS -2 // necessary for polylib.c - -#define ON_VIS_EPSILON 0.01 // necessary for vvis (flow.c) -- again look into moving later! -#define EQUAL_EPSILON 0.001 // necessary for vbsp (faces.c) -- should look into moving it there? - -extern bool s_bMathlibInitialized; - -extern const Vector vec3_origin; -extern const QAngle vec3_angle; -extern const Quaternion quat_identity; -extern const Vector vec3_invalid; -extern const int nanmask; - -#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask) - -FORCEINLINE vec_t DotProduct(const vec_t *v1, const vec_t *v2) -{ - return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2]; -} -FORCEINLINE void VectorSubtract(const vec_t *a, const vec_t *b, vec_t *c) -{ - c[0]=a[0]-b[0]; - c[1]=a[1]-b[1]; - c[2]=a[2]-b[2]; -} -FORCEINLINE void VectorAdd(const vec_t *a, const vec_t *b, vec_t *c) -{ - c[0]=a[0]+b[0]; - c[1]=a[1]+b[1]; - c[2]=a[2]+b[2]; -} -FORCEINLINE void VectorCopy(const vec_t *a, vec_t *b) -{ - b[0]=a[0]; - b[1]=a[1]; - b[2]=a[2]; -} -FORCEINLINE void VectorClear(vec_t *a) -{ - a[0]=a[1]=a[2]=0; -} - -FORCEINLINE float VectorMaximum(const vec_t *v) -{ - return max( v[0], max( v[1], v[2] ) ); -} - -FORCEINLINE float VectorMaximum(const Vector& v) -{ - return max( v.x, max( v.y, v.z ) ); -} - -FORCEINLINE void VectorScale (const float* in, vec_t scale, float* out) -{ - out[0] = in[0]*scale; - out[1] = in[1]*scale; - out[2] = in[2]*scale; -} - - -// Cannot be forceinline as they have overloads: -inline void VectorFill(vec_t *a, float b) -{ - a[0]=a[1]=a[2]=b; -} - -inline void VectorNegate(vec_t *a) -{ - a[0]=-a[0]; - a[1]=-a[1]; - a[2]=-a[2]; -} - - -//#define VectorMaximum(a) ( max( (a)[0], max( (a)[1], (a)[2] ) ) ) -#define Vector2Clear(x) {(x)[0]=(x)[1]=0;} -#define Vector2Negate(x) {(x)[0]=-((x)[0]);(x)[1]=-((x)[1]);} -#define Vector2Copy(a,b) {(b)[0]=(a)[0];(b)[1]=(a)[1];} -#define Vector2Subtract(a,b,c) {(c)[0]=(a)[0]-(b)[0];(c)[1]=(a)[1]-(b)[1];} -#define Vector2Add(a,b,c) {(c)[0]=(a)[0]+(b)[0];(c)[1]=(a)[1]+(b)[1];} -#define Vector2Scale(a,b,c) {(c)[0]=(b)*(a)[0];(c)[1]=(b)*(a)[1];} - -// NJS: Some functions in VBSP still need to use these for dealing with mixing vec4's and shorts with vec_t's. -// remove when no longer needed. -#define VECTOR_COPY( A, B ) do { (B)[0] = (A)[0]; (B)[1] = (A)[1]; (B)[2]=(A)[2]; } while(0) -#define DOT_PRODUCT( A, B ) ( (A)[0]*(B)[0] + (A)[1]*(B)[1] + (A)[2]*(B)[2] ) - -FORCEINLINE void VectorMAInline( const float* start, float scale, const float* direction, float* dest ) -{ - dest[0]=start[0]+direction[0]*scale; - dest[1]=start[1]+direction[1]*scale; - dest[2]=start[2]+direction[2]*scale; -} - -FORCEINLINE void VectorMAInline( const Vector& start, float scale, const Vector& direction, Vector& dest ) -{ - dest.x=start.x+direction.x*scale; - dest.y=start.y+direction.y*scale; - dest.z=start.z+direction.z*scale; -} - -FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest ) -{ - VectorMAInline(start, scale, direction, dest); -} - -FORCEINLINE void VectorMA( const float * start, float scale, const float *direction, float *dest ) -{ - VectorMAInline(start, scale, direction, dest); -} - - -int VectorCompare (const float *v1, const float *v2); - -inline float VectorLength(const float *v) -{ - return FastSqrt( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] + FLT_EPSILON ); -} - -void CrossProduct (const float *v1, const float *v2, float *cross); - -qboolean VectorsEqual( const float *v1, const float *v2 ); - -inline vec_t RoundInt (vec_t in) -{ - return floor(in + 0.5f); -} - -int Q_log2(int val); - -// Math routines done in optimized assembly math package routines -void inline SinCos( float radians, float *sine, float *cosine ) -{ -#if defined( _X360 ) - XMScalarSinCos( sine, cosine, radians ); -#elif defined( PLATFORM_WINDOWS_PC32 ) - _asm - { - fld DWORD PTR [radians] - fsincos - - mov edx, DWORD PTR [cosine] - mov eax, DWORD PTR [sine] - - fstp DWORD PTR [edx] - fstp DWORD PTR [eax] - } -#elif defined( PLATFORM_WINDOWS_PC64 ) - *sine = sin( radians ); - *cosine = cos( radians ); -#elif defined( POSIX ) - register double __cosr, __sinr; - __asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians)); - - *sine = __sinr; - *cosine = __cosr; -#endif -} - -#define SIN_TABLE_SIZE 256 -#define FTOIBIAS 12582912.f -extern float SinCosTable[SIN_TABLE_SIZE]; - -inline float TableCos( float theta ) -{ - union - { - int i; - float f; - } ftmp; - - // ideally, the following should compile down to: theta * constant + constant, changing any of these constants from defines sometimes fubars this. - ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + ( FTOIBIAS + ( SIN_TABLE_SIZE / 4 ) ); - return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ]; -} - -inline float TableSin( float theta ) -{ - union - { - int i; - float f; - } ftmp; - - // ideally, the following should compile down to: theta * constant + constant - ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + FTOIBIAS; - return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ]; -} - -template -FORCEINLINE T Square( T const &a ) -{ - return a * a; -} - - -// return the smallest power of two >= x. -// returns 0 if x == 0 or x > 0x80000000 (ie numbers that would be negative if x was signed) -// NOTE: the old code took an int, and if you pass in an int of 0x80000000 casted to a uint, -// you'll get 0x80000000, which is correct for uints, instead of 0, which was correct for ints -FORCEINLINE uint SmallestPowerOfTwoGreaterOrEqual( uint x ) -{ - x -= 1; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - return x + 1; -} - -// return the largest power of two <= x. Will return 0 if passed 0 -FORCEINLINE uint LargestPowerOfTwoLessThanOrEqual( uint x ) -{ - if ( x >= 0x80000000 ) - return 0x80000000; - - return SmallestPowerOfTwoGreaterOrEqual( x + 1 ) >> 1; -} - - -// Math routines for optimizing division -void FloorDivMod (double numer, double denom, int *quotient, int *rem); -int GreatestCommonDivisor (int i1, int i2); - -// Test for FPU denormal mode -bool IsDenormal( const float &val ); - -// MOVEMENT INFO -enum -{ - PITCH = 0, // up / down - YAW, // left / right - ROLL // fall over -}; - -void MatrixAngles( const matrix3x4_t & matrix, float *angles ); // !!!! -void MatrixVectors( const matrix3x4_t &matrix, Vector* pForward, Vector *pRight, Vector *pUp ); -void VectorTransform (const float *in1, const matrix3x4_t & in2, float *out); -void VectorITransform (const float *in1, const matrix3x4_t & in2, float *out); -void VectorRotate( const float *in1, const matrix3x4_t & in2, float *out); -void VectorRotate( const Vector &in1, const QAngle &in2, Vector &out ); -void VectorRotate( const Vector &in1, const Quaternion &in2, Vector &out ); -void VectorIRotate( const float *in1, const matrix3x4_t & in2, float *out); - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -QAngle TransformAnglesToLocalSpace( const QAngle &angles, const matrix3x4_t &parentMatrix ); -QAngle TransformAnglesToWorldSpace( const QAngle &angles, const matrix3x4_t &parentMatrix ); - -#endif - -void MatrixInitialize( matrix3x4_t &mat, const Vector &vecOrigin, const Vector &vecXAxis, const Vector &vecYAxis, const Vector &vecZAxis ); -void MatrixCopy( const matrix3x4_t &in, matrix3x4_t &out ); -void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out ); - -// Matrix equality test -bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 ); - -void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out ); -void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out ); - -inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out ) -{ - MatrixGetColumn ( in, 3, out ); -} - -inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out ) -{ - MatrixSetColumn ( in, 3, out ); -} - -void MatrixScaleBy ( const float flScale, matrix3x4_t &out ); -void MatrixScaleByZero ( matrix3x4_t &out ); - -//void DecomposeRotation( const matrix3x4_t &mat, float *out ); -void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out); -void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out); - -// For identical interface w/ VMatrix -inline void MatrixMultiply ( const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out ) -{ - ConcatTransforms( in1, in2, out ); -} - -void QuaternionSlerp( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); -void QuaternionSlerpNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); -void QuaternionBlend( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); -void QuaternionBlendNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); -void QuaternionIdentityBlend( const Quaternion &p, float t, Quaternion &qt ); -float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q ); -void QuaternionScale( const Quaternion &p, float t, Quaternion &q ); -void QuaternionAlign( const Quaternion &p, const Quaternion &q, Quaternion &qt ); -float QuaternionDotProduct( const Quaternion &p, const Quaternion &q ); -void QuaternionConjugate( const Quaternion &p, Quaternion &q ); -void QuaternionInvert( const Quaternion &p, Quaternion &q ); -float QuaternionNormalize( Quaternion &q ); -void QuaternionAdd( const Quaternion &p, const Quaternion &q, Quaternion &qt ); -void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt ); -void QuaternionMatrix( const Quaternion &q, matrix3x4_t &matrix ); -void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t &matrix ); -void QuaternionAngles( const Quaternion &q, QAngle &angles ); -void AngleQuaternion( const QAngle& angles, Quaternion &qt ); -void QuaternionAngles( const Quaternion &q, RadianEuler &angles ); -void AngleQuaternion( RadianEuler const &angles, Quaternion &qt ); -void QuaternionAxisAngle( const Quaternion &q, Vector &axis, float &angle ); -void AxisAngleQuaternion( const Vector &axis, float angle, Quaternion &q ); -void BasisToQuaternion( const Vector &vecForward, const Vector &vecRight, const Vector &vecUp, Quaternion &q ); -void MatrixQuaternion( const matrix3x4_t &mat, Quaternion &q ); - -// A couple methods to find the dot product of a vector with a matrix row or column... -inline float MatrixRowDotProduct( const matrix3x4_t &in1, int row, const Vector& in2 ) -{ - Assert( (row >= 0) && (row < 3) ); - return DotProduct( in1[row], in2.Base() ); -} - -inline float MatrixColumnDotProduct( const matrix3x4_t &in1, int col, const Vector& in2 ) -{ - Assert( (col >= 0) && (col < 4) ); - return in1[0][col] * in2[0] + in1[1][col] * in2[1] + in1[2][col] * in2[2]; -} - -int __cdecl BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *plane); - -inline float anglemod(float a) -{ - a = (360.f/65536) * ((int)(a*(65536.f/360.0f)) & 65535); - return a; -} - -// Remap a value in the range [A,B] to [C,D]. -inline float RemapVal( float val, float A, float B, float C, float D) -{ - if ( A == B ) - return val >= B ? D : C; - return C + (D - C) * (val - A) / (B - A); -} - -inline float RemapValClamped( float val, float A, float B, float C, float D) -{ - if ( A == B ) - return val >= B ? D : C; - float cVal = (val - A) / (B - A); - cVal = clamp( cVal, 0.0f, 1.0f ); - - return C + (D - C) * cVal; -} - -// Returns A + (B-A)*flPercent. -// float Lerp( float flPercent, float A, float B ); -template -FORCEINLINE T Lerp( float flPercent, T const &A, T const &B ) -{ - return A + (B - A) * flPercent; -} - -FORCEINLINE float Sqr( float f ) -{ - return f*f; -} - -// 5-argument floating point linear interpolation. -// FLerp(f1,f2,i1,i2,x)= -// f1 at x=i1 -// f2 at x=i2 -// smooth lerp between f1 and f2 at x>i1 and xi2 -// -// If you know a function f(x)'s value (f1) at position i1, and its value (f2) at position i2, -// the function can be linearly interpolated with FLerp(f1,f2,i1,i2,x) -// i2=i1 will cause a divide by zero. -static inline float FLerp(float f1, float f2, float i1, float i2, float x) -{ - return f1+(f2-f1)*(x-i1)/(i2-i1); -} - - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -// YWB: Specialization for interpolating euler angles via quaternions... -template<> FORCEINLINE QAngle Lerp( float flPercent, const QAngle& q1, const QAngle& q2 ) -{ - // Avoid precision errors - if ( q1 == q2 ) - return q1; - - Quaternion src, dest; - - // Convert to quaternions - AngleQuaternion( q1, src ); - AngleQuaternion( q2, dest ); - - Quaternion result; - - // Slerp - QuaternionSlerp( src, dest, flPercent, result ); - - // Convert to euler - QAngle output; - QuaternionAngles( result, output ); - return output; -} - -#else - -#pragma error - -// NOTE NOTE: I haven't tested this!! It may not work! Check out interpolatedvar.cpp in the client dll to try it -template<> FORCEINLINE QAngleByValue Lerp( float flPercent, const QAngleByValue& q1, const QAngleByValue& q2 ) -{ - // Avoid precision errors - if ( q1 == q2 ) - return q1; - - Quaternion src, dest; - - // Convert to quaternions - AngleQuaternion( q1, src ); - AngleQuaternion( q2, dest ); - - Quaternion result; - - // Slerp - QuaternionSlerp( src, dest, flPercent, result ); - - // Convert to euler - QAngleByValue output; - QuaternionAngles( result, output ); - return output; -} - -#endif // VECTOR_NO_SLOW_OPERATIONS - - -/// Same as swap(), but won't cause problems with std::swap -template -FORCEINLINE void V_swap( T& x, T& y ) -{ - T temp = x; - x = y; - y = temp; -} - -template FORCEINLINE T AVG(T a, T b) -{ - return (a+b)/2; -} - -// number of elements in an array of static size -#define NELEMS(x) ARRAYSIZE(x) - -// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector)); -#define XYZ(v) (v).x,(v).y,(v).z - - -inline float Sign( float x ) -{ - return (x <0.0f) ? -1.0f : 1.0f; -} - -// -// Clamps the input integer to the given array bounds. -// Equivalent to the following, but without using any branches: -// -// if( n < 0 ) return 0; -// else if ( n > maxindex ) return maxindex; -// else return n; -// -// This is not always a clear performance win, but when you have situations where a clamped -// value is thrashing against a boundary this is a big win. (ie, valid, invalid, valid, invalid, ...) -// -// Note: This code has been run against all possible integers. -// -inline int ClampArrayBounds( int n, unsigned maxindex ) -{ - // mask is 0 if less than 4096, 0xFFFFFFFF if greater than - unsigned int inrangemask = 0xFFFFFFFF + (((unsigned) n) > maxindex ); - unsigned int lessthan0mask = 0xFFFFFFFF + ( n >= 0 ); - - // If the result was valid, set the result, (otherwise sets zero) - int result = (inrangemask & n); - - // if the result was out of range or zero. - result |= ((~inrangemask) & (~lessthan0mask)) & maxindex; - - return result; -} - - -#define BOX_ON_PLANE_SIDE(emins, emaxs, p) \ - (((p)->type < 3)? \ - ( \ - ((p)->dist <= (emins)[(p)->type])? \ - 1 \ - : \ - ( \ - ((p)->dist >= (emaxs)[(p)->type])?\ - 2 \ - : \ - 3 \ - ) \ - ) \ - : \ - BoxOnPlaneSide( (emins), (emaxs), (p))) - -//----------------------------------------------------------------------------- -// FIXME: Vector versions.... the float versions will go away hopefully soon! -//----------------------------------------------------------------------------- - -void AngleVectors (const QAngle& angles, Vector *forward); -void AngleVectors (const QAngle& angles, Vector *forward, Vector *right, Vector *up); -void AngleVectorsTranspose (const QAngle& angles, Vector *forward, Vector *right, Vector *up); -void AngleMatrix (const QAngle &angles, matrix3x4_t &mat ); -void AngleMatrix( const QAngle &angles, const Vector &position, matrix3x4_t &mat ); -void AngleMatrix (const RadianEuler &angles, matrix3x4_t &mat ); -void AngleMatrix( RadianEuler const &angles, const Vector &position, matrix3x4_t &mat ); -void AngleIMatrix (const QAngle &angles, matrix3x4_t &mat ); -void AngleIMatrix (const QAngle &angles, const Vector &position, matrix3x4_t &mat ); -void AngleIMatrix (const RadianEuler &angles, matrix3x4_t &mat ); -void VectorAngles( const Vector &forward, QAngle &angles ); -void VectorAngles( const Vector &forward, const Vector &pseudoup, QAngle &angles ); -void VectorMatrix( const Vector &forward, matrix3x4_t &mat ); -void VectorVectors( const Vector &forward, Vector &right, Vector &up ); -void SetIdentityMatrix( matrix3x4_t &mat ); -void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst ); -void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst ); - -inline void SetScaleMatrix( float flScale, matrix3x4_t &dst ) -{ - SetScaleMatrix( flScale, flScale, flScale, dst ); -} - -inline void SetScaleMatrix( const Vector& scale, matrix3x4_t &dst ) -{ - SetScaleMatrix( scale.x, scale.y, scale.z, dst ); -} - -// Computes the inverse transpose -void MatrixTranspose( matrix3x4_t& mat ); -void MatrixTranspose( const matrix3x4_t& src, matrix3x4_t& dst ); -void MatrixInverseTranspose( const matrix3x4_t& src, matrix3x4_t& dst ); - -inline void PositionMatrix( const Vector &position, matrix3x4_t &mat ) -{ - MatrixSetColumn( position, 3, mat ); -} - -inline void MatrixPosition( const matrix3x4_t &matrix, Vector &position ) -{ - MatrixGetColumn( matrix, 3, position ); -} - -inline void VectorRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out) -{ - VectorRotate( &in1.x, in2, &out.x ); -} - -inline void VectorIRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out) -{ - VectorIRotate( &in1.x, in2, &out.x ); -} - -inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles ) -{ - MatrixAngles( matrix, &angles.x ); -} - -inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles, Vector &position ) -{ - MatrixAngles( matrix, angles ); - MatrixPosition( matrix, position ); -} - -inline void MatrixAngles( const matrix3x4_t &matrix, RadianEuler &angles ) -{ - MatrixAngles( matrix, &angles.x ); - - angles.Init( DEG2RAD( angles.z ), DEG2RAD( angles.x ), DEG2RAD( angles.y ) ); -} - -void MatrixAngles( const matrix3x4_t &mat, RadianEuler &angles, Vector &position ); - -void MatrixAngles( const matrix3x4_t &mat, Quaternion &q, Vector &position ); - -inline int VectorCompare (const Vector& v1, const Vector& v2) -{ - return v1 == v2; -} - -inline void VectorTransform (const Vector& in1, const matrix3x4_t &in2, Vector &out) -{ - VectorTransform( &in1.x, in2, &out.x ); -} - -inline void VectorITransform (const Vector& in1, const matrix3x4_t &in2, Vector &out) -{ - VectorITransform( &in1.x, in2, &out.x ); -} - -/* -inline void DecomposeRotation( const matrix3x4_t &mat, Vector &out ) -{ - DecomposeRotation( mat, &out.x ); -} -*/ - -inline int BoxOnPlaneSide (const Vector& emins, const Vector& emaxs, const cplane_t *plane ) -{ - return BoxOnPlaneSide( &emins.x, &emaxs.x, plane ); -} - -inline void VectorFill(Vector& a, float b) -{ - a[0]=a[1]=a[2]=b; -} - -inline void VectorNegate(Vector& a) -{ - a[0] = -a[0]; - a[1] = -a[1]; - a[2] = -a[2]; -} - -inline vec_t VectorAvg(Vector& a) -{ - return ( a[0] + a[1] + a[2] ) / 3; -} - -//----------------------------------------------------------------------------- -// Box/plane test (slow version) -//----------------------------------------------------------------------------- -inline int FASTCALL BoxOnPlaneSide2 (const Vector& emins, const Vector& emaxs, const cplane_t *p, float tolerance = 0.f ) -{ - Vector corners[2]; - - if (p->normal[0] < 0) - { - corners[0][0] = emins[0]; - corners[1][0] = emaxs[0]; - } - else - { - corners[1][0] = emins[0]; - corners[0][0] = emaxs[0]; - } - - if (p->normal[1] < 0) - { - corners[0][1] = emins[1]; - corners[1][1] = emaxs[1]; - } - else - { - corners[1][1] = emins[1]; - corners[0][1] = emaxs[1]; - } - - if (p->normal[2] < 0) - { - corners[0][2] = emins[2]; - corners[1][2] = emaxs[2]; - } - else - { - corners[1][2] = emins[2]; - corners[0][2] = emaxs[2]; - } - - int sides = 0; - - float dist1 = DotProduct (p->normal, corners[0]) - p->dist; - if (dist1 >= tolerance) - sides = 1; - - float dist2 = DotProduct (p->normal, corners[1]) - p->dist; - if (dist2 < -tolerance) - sides |= 2; - - return sides; -} - -//----------------------------------------------------------------------------- -// Helpers for bounding box construction -//----------------------------------------------------------------------------- - -void ClearBounds (Vector& mins, Vector& maxs); -void AddPointToBounds (const Vector& v, Vector& mins, Vector& maxs); - -// -// COLORSPACE/GAMMA CONVERSION STUFF -// -void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright ); - -// convert texture to linear 0..1 value -inline float TexLightToLinear( int c, int exponent ) -{ - extern float power2_n[256]; - Assert( exponent >= -128 && exponent <= 127 ); - return ( float )c * power2_n[exponent+128]; -} - - -// convert texture to linear 0..1 value -int LinearToTexture( float f ); -// converts 0..1 linear value to screen gamma (0..255) -int LinearToScreenGamma( float f ); -float TextureToLinear( int c ); - -// compressed color format -struct ColorRGBExp32 -{ - byte r, g, b; - signed char exponent; -}; - -void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector& out ); -void VectorToColorRGBExp32( const Vector& v, ColorRGBExp32 &c ); - -// solve for "x" where "a x^2 + b x + c = 0", return true if solution exists -bool SolveQuadratic( float a, float b, float c, float &root1, float &root2 ); - -// solves for "a, b, c" where "a x^2 + b x + c = y", return true if solution exists -bool SolveInverseQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c ); - -// solves for a,b,c specified as above, except that it always creates a monotonically increasing or -// decreasing curve if the data is monotonically increasing or decreasing. In order to enforce the -// monoticity condition, it is possible that the resulting quadratic will only approximate the data -// instead of interpolating it. This code is not especially fast. -bool SolveInverseQuadraticMonotonic( float x1, float y1, float x2, float y2, - float x3, float y3, float &a, float &b, float &c ); - - - - -// solves for "a, b, c" where "1/(a x^2 + b x + c ) = y", return true if solution exists -bool SolveInverseReciprocalQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c ); - -// rotate a vector around the Z axis (YAW) -void VectorYawRotate( const Vector& in, float flYaw, Vector &out); - - -// Bias takes an X value between 0 and 1 and returns another value between 0 and 1 -// The curve is biased towards 0 or 1 based on biasAmt, which is between 0 and 1. -// Lower values of biasAmt bias the curve towards 0 and higher values bias it towards 1. -// -// For example, with biasAmt = 0.2, the curve looks like this: -// -// 1 -// | * -// | * -// | * -// | ** -// | ** -// | **** -// |********* -// |___________________ -// 0 1 -// -// -// With biasAmt = 0.8, the curve looks like this: -// -// 1 -// | ************** -// | ** -// | * -// | * -// |* -// |* -// |* -// |___________________ -// 0 1 -// -// With a biasAmt of 0.5, Bias returns X. -float Bias( float x, float biasAmt ); - - -// Gain is similar to Bias, but biasAmt biases towards or away from 0.5. -// Lower bias values bias towards 0.5 and higher bias values bias away from it. -// -// For example, with biasAmt = 0.2, the curve looks like this: -// -// 1 -// | * -// | * -// | ** -// | *************** -// | ** -// | * -// |* -// |___________________ -// 0 1 -// -// -// With biasAmt = 0.8, the curve looks like this: -// -// 1 -// | ***** -// | *** -// | * -// | * -// | * -// | *** -// |***** -// |___________________ -// 0 1 -float Gain( float x, float biasAmt ); - - -// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave -// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for -// any fadein/fadeout effect where it should start and end smoothly. -// -// The curve looks like this: -// -// 1 -// | ** -// | * * -// | * * -// | * * -// | * * -// | ** ** -// |*** *** -// |___________________ -// 0 1 -// -float SmoothCurve( float x ); - - -// This works like SmoothCurve, with two changes: -// -// 1. Instead of the curve peaking at 0.5, it will peak at flPeakPos. -// (So if you specify flPeakPos=0.2, then the peak will slide to the left). -// -// 2. flPeakSharpness is a 0-1 value controlling the sharpness of the peak. -// Low values blunt the peak and high values sharpen the peak. -float SmoothCurve_Tweak( float x, float flPeakPos=0.5, float flPeakSharpness=0.5 ); - - -//float ExponentialDecay( float halflife, float dt ); -//float ExponentialDecay( float decayTo, float decayTime, float dt ); - -// halflife is time for value to reach 50% -inline float ExponentialDecay( float halflife, float dt ) -{ - // log(0.5) == -0.69314718055994530941723212145818 - return expf( -0.69314718f / halflife * dt); -} - -// decayTo is factor the value should decay to in decayTime -inline float ExponentialDecay( float decayTo, float decayTime, float dt ) -{ - return expf( logf( decayTo ) / decayTime * dt); -} - -// Get the integrated distanced traveled -// decayTo is factor the value should decay to in decayTime -// dt is the time relative to the last velocity update -inline float ExponentialDecayIntegral( float decayTo, float decayTime, float dt ) -{ - return (powf( decayTo, dt / decayTime) * decayTime - decayTime) / logf( decayTo ); -} - -// hermite basis function for smooth interpolation -// Similar to Gain() above, but very cheap to call -// value should be between 0 & 1 inclusive -inline float SimpleSpline( float value ) -{ - float valueSquared = value * value; - - // Nice little ease-in, ease-out spline-like curve - return (3 * valueSquared - 2 * valueSquared * value); -} - -// remaps a value in [startInterval, startInterval+rangeInterval] from linear to -// spline using SimpleSpline -inline float SimpleSplineRemapVal( float val, float A, float B, float C, float D) -{ - if ( A == B ) - return val >= B ? D : C; - float cVal = (val - A) / (B - A); - return C + (D - C) * SimpleSpline( cVal ); -} - -// remaps a value in [startInterval, startInterval+rangeInterval] from linear to -// spline using SimpleSpline -inline float SimpleSplineRemapValClamped( float val, float A, float B, float C, float D ) -{ - if ( A == B ) - return val >= B ? D : C; - float cVal = (val - A) / (B - A); - cVal = clamp( cVal, 0.0f, 1.0f ); - return C + (D - C) * SimpleSpline( cVal ); -} - -FORCEINLINE int RoundFloatToInt(float f) -{ -#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 ) - return _mm_cvtss_si32(_mm_load_ss(&f)); -#elif defined( _X360 ) -#ifdef Assert - Assert( IsFPUControlWordSet() ); -#endif - union - { - double flResult; - int pResult[2]; - }; - flResult = __fctiw( f ); - return pResult[1]; -#else -#error Unknown architecture -#endif -} - -FORCEINLINE unsigned char RoundFloatToByte(float f) -{ - int nResult = RoundFloatToInt(f); -#ifdef Assert - Assert( (nResult & ~0xFF) == 0 ); -#endif - return (unsigned char) nResult; -} - -FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f) -{ -#if defined( _X360 ) -#ifdef Assert - Assert( IsFPUControlWordSet() ); -#endif - union - { - double flResult; - int pIntResult[2]; - unsigned long pResult[2]; - }; - flResult = __fctiw( f ); - Assert( pIntResult[1] >= 0 ); - return pResult[1]; -#else // !X360 - -#if defined( PLATFORM_WINDOWS_PC64 ) - uint nRet = ( uint ) f; - if ( nRet & 1 ) - { - if ( ( f - floor( f ) >= 0.5 ) ) - { - nRet++; - } - } - else - { - if ( ( f - floor( f ) > 0.5 ) ) - { - nRet++; - } - } - return nRet; -#else // PLATFORM_WINDOWS_PC64 - unsigned char nResult[8]; - - #if defined( _WIN32 ) - __asm - { - fld f - fistp qword ptr nResult - } - #elif POSIX - __asm __volatile__ ( - "fistpl %0;": "=m" (nResult): "t" (f) : "st" - ); - #endif - - return *((unsigned long*)nResult); -#endif // PLATFORM_WINDOWS_PC64 -#endif // !X360 -} - -FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f ) -{ - return fabs( RoundFloatToInt( flValue ) - flValue ) < flTolerance; -} - -// Fast, accurate ftol: -FORCEINLINE int Float2Int( float a ) -{ -#if defined( _X360 ) - union - { - double flResult; - int pResult[2]; - }; - flResult = __fctiwz( a ); - return pResult[1]; -#else // !X360 - // Rely on compiler to generate CVTTSS2SI on x86 - return (int) a; -#endif -} - -// Over 15x faster than: (int)floor(value) -inline int Floor2Int( float a ) -{ - int RetVal; -#if defined( __i386__ ) - // Convert to int and back, compare, subtract one if too big - __m128 a128 = _mm_set_ss(a); - RetVal = _mm_cvtss_si32(a128); - __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal); - RetVal -= _mm_comigt_ss( rounded128, a128 ); -#else - RetVal = static_cast( floor(a) ); -#endif - return RetVal; -} - -//----------------------------------------------------------------------------- -// Fast color conversion from float to unsigned char -//----------------------------------------------------------------------------- -FORCEINLINE unsigned int FastFToC( float c ) -{ -#if defined( __i386__ ) - // IEEE float bit manipulation works for values between [0, 1<<23) - union { float f; int i; } convert = { c*255.0f + (float)(1<<23) }; - return convert.i & 255; -#else - // consoles CPUs suffer from load-hit-store penalty - return Float2Int( c * 255.0f ); -#endif -} - -//----------------------------------------------------------------------------- -// Fast conversion from float to integer with magnitude less than 2**22 -//----------------------------------------------------------------------------- -FORCEINLINE int FastFloatToSmallInt( float c ) -{ -#if defined( __i386__ ) - // IEEE float bit manipulation works for values between [-1<<22, 1<<22) - union { float f; int i; } convert = { c + (float)(3<<22) }; - return (convert.i & ((1<<23)-1)) - (1<<22); -#else - // consoles CPUs suffer from load-hit-store penalty - return Float2Int( c ); -#endif -} - -//----------------------------------------------------------------------------- -// Purpose: Bound input float to .001 (millisecond) boundary -// Input : in - -// Output : inline float -//----------------------------------------------------------------------------- -inline float ClampToMsec( float in ) -{ - int msec = Floor2Int( in * 1000.0f + 0.5f ); - return 0.001f * msec; -} - -// Over 15x faster than: (int)ceil(value) -inline int Ceil2Int( float a ) -{ - int RetVal; -#if defined( __i386__ ) - // Convert to int and back, compare, add one if too small - __m128 a128 = _mm_load_ss(&a); - RetVal = _mm_cvtss_si32(a128); - __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal); - RetVal += _mm_comilt_ss( rounded128, a128 ); -#else - RetVal = static_cast( ceil(a) ); -#endif - return RetVal; -} - - -// Regular signed area of triangle -#define TriArea2D( A, B, C ) \ - ( 0.5f * ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) ) - -// This version doesn't premultiply by 0.5f, so it's the area of the rectangle instead -#define TriArea2DTimesTwo( A, B, C ) \ - ( ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) ) - - -// Get the barycentric coordinates of "pt" in triangle [A,B,C]. -inline void GetBarycentricCoords2D( - Vector2D const &A, - Vector2D const &B, - Vector2D const &C, - Vector2D const &pt, - float bcCoords[3] ) -{ - // Note, because to top and bottom are both x2, the issue washes out in the composite - float invTriArea = 1.0f / TriArea2DTimesTwo( A, B, C ); - - // NOTE: We assume here that the lightmap coordinate vertices go counterclockwise. - // If not, TriArea2D() is negated so this works out right. - bcCoords[0] = TriArea2DTimesTwo( B, C, pt ) * invTriArea; - bcCoords[1] = TriArea2DTimesTwo( C, A, pt ) * invTriArea; - bcCoords[2] = TriArea2DTimesTwo( A, B, pt ) * invTriArea; -} - - -// Return true of the sphere might touch the box (the sphere is actually treated -// like a box itself, so this may return true if the sphere's bounding box touches -// a corner of the box but the sphere itself doesn't). -inline bool QuickBoxSphereTest( - const Vector& vOrigin, - float flRadius, - const Vector& bbMin, - const Vector& bbMax ) -{ - return vOrigin.x - flRadius < bbMax.x && vOrigin.x + flRadius > bbMin.x && - vOrigin.y - flRadius < bbMax.y && vOrigin.y + flRadius > bbMin.y && - vOrigin.z - flRadius < bbMax.z && vOrigin.z + flRadius > bbMin.z; -} - - -// Return true of the boxes intersect (but not if they just touch). -inline bool QuickBoxIntersectTest( - const Vector& vBox1Min, - const Vector& vBox1Max, - const Vector& vBox2Min, - const Vector& vBox2Max ) -{ - return - vBox1Min.x < vBox2Max.x && vBox1Max.x > vBox2Min.x && - vBox1Min.y < vBox2Max.y && vBox1Max.y > vBox2Min.y && - vBox1Min.z < vBox2Max.z && vBox1Max.z > vBox2Min.z; -} - - -extern float GammaToLinearFullRange( float gamma ); -extern float LinearToGammaFullRange( float linear ); -extern float GammaToLinear( float gamma ); -extern float LinearToGamma( float linear ); - -extern float SrgbGammaToLinear( float flSrgbGammaValue ); -extern float SrgbLinearToGamma( float flLinearValue ); -extern float X360GammaToLinear( float fl360GammaValue ); -extern float X360LinearToGamma( float flLinearValue ); -extern float SrgbGammaTo360Gamma( float flSrgbGammaValue ); - -// linear (0..4) to screen corrected vertex space (0..1?) -FORCEINLINE float LinearToVertexLight( float f ) -{ - extern float lineartovertex[4096]; - - // Gotta clamp before the multiply; could overflow... - // assume 0..4 range - int i = RoundFloatToInt( f * 1024.f ); - - // Presumably the comman case will be not to clamp, so check that first: - if( (unsigned)i > 4095 ) - { - if ( i < 0 ) - i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream - else - i = 4095; - } - - return lineartovertex[i]; -} - - -FORCEINLINE unsigned char LinearToLightmap( float f ) -{ - extern unsigned char lineartolightmap[4096]; - - // Gotta clamp before the multiply; could overflow... - int i = RoundFloatToInt( f * 1024.f ); // assume 0..4 range - - // Presumably the comman case will be not to clamp, so check that first: - if ( (unsigned)i > 4095 ) - { - if ( i < 0 ) - i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream - else - i = 4095; - } - - return lineartolightmap[i]; -} - -FORCEINLINE void ColorClamp( Vector& color ) -{ - float maxc = max( color.x, max( color.y, color.z ) ); - if ( maxc > 1.0f ) - { - float ooMax = 1.0f / maxc; - color.x *= ooMax; - color.y *= ooMax; - color.z *= ooMax; - } - - if ( color[0] < 0.f ) color[0] = 0.f; - if ( color[1] < 0.f ) color[1] = 0.f; - if ( color[2] < 0.f ) color[2] = 0.f; -} - -inline void ColorClampTruncate( Vector& color ) -{ - if (color[0] > 1.0f) color[0] = 1.0f; else if (color[0] < 0.0f) color[0] = 0.0f; - if (color[1] > 1.0f) color[1] = 1.0f; else if (color[1] < 0.0f) color[1] = 0.0f; - if (color[2] > 1.0f) color[2] = 1.0f; else if (color[2] < 0.0f) color[2] = 0.0f; -} - -// Interpolate a Catmull-Rom spline. -// t is a [0,1] value and interpolates a curve between p2 and p3. -void Catmull_Rom_Spline( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector &output ); - -// Interpolate a Catmull-Rom spline. -// Returns the tangent of the point at t of the spline -void Catmull_Rom_Spline_Tangent( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector &output ); - -// area under the curve [0..t] -void Catmull_Rom_Spline_Integral( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -// area under the curve [0..1] -void Catmull_Rom_Spline_Integral( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - Vector& output ); - -// Interpolate a Catmull-Rom spline. -// Normalize p2->p1 and p3->p4 to be the same length as p2->p3 -void Catmull_Rom_Spline_Normalize( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector &output ); - -// area under the curve [0..t] -// Normalize p2->p1 and p3->p4 to be the same length as p2->p3 -void Catmull_Rom_Spline_Integral_Normalize( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -// Interpolate a Catmull-Rom spline. -// Normalize p2.x->p1.x and p3.x->p4.x to be the same length as p2.x->p3.x -void Catmull_Rom_Spline_NormalizeX( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector &output ); - -// area under the curve [0..t] -void Catmull_Rom_Spline_NormalizeX( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -// Interpolate a Hermite spline. -// t is a [0,1] value and interpolates a curve between p1 and p2 with the deltas d1 and d2. -void Hermite_Spline( - const Vector &p1, - const Vector &p2, - const Vector &d1, - const Vector &d2, - float t, - Vector& output ); - -float Hermite_Spline( - float p1, - float p2, - float d1, - float d2, - float t ); - -// t is a [0,1] value and interpolates a curve between p1 and p2 with the slopes p0->p1 and p1->p2 -void Hermite_Spline( - const Vector &p0, - const Vector &p1, - const Vector &p2, - float t, - Vector& output ); - -float Hermite_Spline( - float p0, - float p1, - float p2, - float t ); - - -void Hermite_SplineBasis( float t, float basis[] ); - -void Hermite_Spline( - const Quaternion &q0, - const Quaternion &q1, - const Quaternion &q2, - float t, - Quaternion &output ); - - -// See http://en.wikipedia.org/wiki/Kochanek-Bartels_curves -// -// Tension: -1 = Round -> 1 = Tight -// Bias: -1 = Pre-shoot (bias left) -> 1 = Post-shoot (bias right) -// Continuity: -1 = Box corners -> 1 = Inverted corners -// -// If T=B=C=0 it's the same matrix as Catmull-Rom. -// If T=1 & B=C=0 it's the same as Cubic. -// If T=B=0 & C=-1 it's just linear interpolation -// -// See http://news.povray.org/povray.binaries.tutorials/attachment/%3CXns91B880592482seed7@povray.org%3E/Splines.bas.txt -// for example code and descriptions of various spline types... -// -void Kochanek_Bartels_Spline( - float tension, - float bias, - float continuity, - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -void Kochanek_Bartels_Spline_NormalizeX( - float tension, - float bias, - float continuity, - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -// See link at Kochanek_Bartels_Spline for info on the basis matrix used -void Cubic_Spline( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -void Cubic_Spline_NormalizeX( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -// See link at Kochanek_Bartels_Spline for info on the basis matrix used -void BSpline( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -void BSpline_NormalizeX( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -// See link at Kochanek_Bartels_Spline for info on the basis matrix used -void Parabolic_Spline( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -void Parabolic_Spline_NormalizeX( - const Vector &p1, - const Vector &p2, - const Vector &p3, - const Vector &p4, - float t, - Vector& output ); - -// quintic interpolating polynomial from Perlin. -// 0->0, 1->1, smooth-in between with smooth tangents -FORCEINLINE float QuinticInterpolatingPolynomial(float t) -{ - // 6t^5-15t^4+10t^3 - return t * t * t *( t * ( t* 6.0 - 15.0 ) + 10.0 ); -} - -// given a table of sorted tabulated positions, return the two indices and blendfactor to linear -// interpolate. Does a search. Can be used to find the blend value to interpolate between -// keyframes. -void GetInterpolationData( float const *pKnotPositions, - float const *pKnotValues, - int nNumValuesinList, - int nInterpolationRange, - float flPositionToInterpolateAt, - bool bWrap, - float *pValueA, - float *pValueB, - float *pInterpolationValue); - -float RangeCompressor( float flValue, float flMin, float flMax, float flBase ); - -// Get the minimum distance from vOrigin to the bounding box defined by [mins,maxs] -// using voronoi regions. -// 0 is returned if the origin is inside the box. -float CalcSqrDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point ); -void CalcClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut ); -void CalcSqrDistAndClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut, float &distSqrOut ); - -inline float CalcDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point ) -{ - float flDistSqr = CalcSqrDistanceToAABB( mins, maxs, point ); - return sqrt(flDistSqr); -} - -// Get the closest point from P to the (infinite) line through vLineA and vLineB and -// calculate the shortest distance from P to the line. -// If you pass in a value for t, it will tell you the t for (A + (B-A)t) to get the closest point. -// If the closest point lies on the segment between A and B, then 0 <= t <= 1. -void CalcClosestPointOnLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 ); -float CalcDistanceToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); -float CalcDistanceSqrToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); - -// The same three functions as above, except now the line is closed between A and B. -void CalcClosestPointOnLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 ); -float CalcDistanceToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); -float CalcDistanceSqrToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); - -// A function to compute the closes line segment connnection two lines (or false if the lines are parallel, etc.) -bool CalcLineToLineIntersectionSegment( - const Vector& p1,const Vector& p2,const Vector& p3,const Vector& p4,Vector *s1,Vector *s2, - float *t1, float *t2 ); - -// The above functions in 2D -void CalcClosestPointOnLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 ); -float CalcDistanceToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); -float CalcDistanceSqrToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); -void CalcClosestPointOnLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 ); -float CalcDistanceToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); -float CalcDistanceSqrToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); - -// Init the mathlib -void MathLib_Init( float gamma = 2.2f, float texGamma = 2.2f, float brightness = 0.0f, int overbright = 2.0f, bool bAllow3DNow = true, bool bAllowSSE = true, bool bAllowSSE2 = true, bool bAllowMMX = true ); -bool MathLib_3DNowEnabled( void ); -bool MathLib_MMXEnabled( void ); -bool MathLib_SSEEnabled( void ); -bool MathLib_SSE2Enabled( void ); - -float Approach( float target, float value, float speed ); -float ApproachAngle( float target, float value, float speed ); -float AngleDiff( float destAngle, float srcAngle ); -float AngleDistance( float next, float cur ); -float AngleNormalize( float angle ); - -// ensure that 0 <= angle <= 360 -float AngleNormalizePositive( float angle ); - -bool AnglesAreEqual( float a, float b, float tolerance = 0.0f ); - - -void RotationDeltaAxisAngle( const QAngle &srcAngles, const QAngle &destAngles, Vector &deltaAxis, float &deltaAngle ); -void RotationDelta( const QAngle &srcAngles, const QAngle &destAngles, QAngle *out ); - -void ComputeTrianglePlane( const Vector& v1, const Vector& v2, const Vector& v3, Vector& normal, float& intercept ); -int PolyFromPlane( Vector *outVerts, const Vector& normal, float dist, float fHalfScale = 9000.0f ); -int ClipPolyToPlane( Vector *inVerts, int vertCount, Vector *outVerts, const Vector& normal, float dist, float fOnPlaneEpsilon = 0.1f ); -int ClipPolyToPlane_Precise( double *inVerts, int vertCount, double *outVerts, const double *normal, double dist, double fOnPlaneEpsilon = 0.1 ); - -//----------------------------------------------------------------------------- -// Computes a reasonable tangent space for a triangle -//----------------------------------------------------------------------------- -void CalcTriangleTangentSpace( const Vector &p0, const Vector &p1, const Vector &p2, - const Vector2D &t0, const Vector2D &t1, const Vector2D& t2, - Vector &sVect, Vector &tVect ); - -//----------------------------------------------------------------------------- -// Transforms a AABB into another space; which will inherently grow the box. -//----------------------------------------------------------------------------- -void TransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); - -//----------------------------------------------------------------------------- -// Uses the inverse transform of in1 -//----------------------------------------------------------------------------- -void ITransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); - -//----------------------------------------------------------------------------- -// Rotates a AABB into another space; which will inherently grow the box. -// (same as TransformAABB, but doesn't take the translation into account) -//----------------------------------------------------------------------------- -void RotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); - -//----------------------------------------------------------------------------- -// Uses the inverse transform of in1 -//----------------------------------------------------------------------------- -void IRotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); - -//----------------------------------------------------------------------------- -// Transform a plane -//----------------------------------------------------------------------------- -inline void MatrixTransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane ) -{ - // What we want to do is the following: - // 1) transform the normal into the new space. - // 2) Determine a point on the old plane given by plane dist * plane normal - // 3) Transform that point into the new space - // 4) Plane dist = DotProduct( new normal, new point ) - - // An optimized version, which works if the plane is orthogonal. - // 1) Transform the normal into the new space - // 2) Realize that transforming the old plane point into the new space - // is given by [ d * n'x + Tx, d * n'y + Ty, d * n'z + Tz ] - // where d = old plane dist, n' = transformed normal, Tn = translational component of transform - // 3) Compute the new plane dist using the dot product of the normal result of #2 - - // For a correct result, this should be an inverse-transpose matrix - // but that only matters if there are nonuniform scale or skew factors in this matrix. - VectorRotate( inPlane.normal, src, outPlane.normal ); - outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal ); - outPlane.dist += outPlane.normal.x * src[0][3] + outPlane.normal.y * src[1][3] + outPlane.normal.z * src[2][3]; -} - -inline void MatrixITransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane ) -{ - // The trick here is that Tn = translational component of transform, - // but for an inverse transform, Tn = - R^-1 * T - Vector vecTranslation; - MatrixGetColumn( src, 3, vecTranslation ); - - Vector vecInvTranslation; - VectorIRotate( vecTranslation, src, vecInvTranslation ); - - VectorIRotate( inPlane.normal, src, outPlane.normal ); - outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal ); - outPlane.dist -= outPlane.normal.x * vecInvTranslation[0] + outPlane.normal.y * vecInvTranslation[1] + outPlane.normal.z * vecInvTranslation[2]; -} - -int CeilPow2( int in ); -int FloorPow2( int in ); - -FORCEINLINE float * UnpackNormal_HEND3N( const unsigned int *pPackedNormal, float *pNormal ) -{ - int temp[3]; - temp[0] = ((*pPackedNormal >> 0L) & 0x7ff); - if ( temp[0] & 0x400 ) - { - temp[0] = 2048 - temp[0]; - } - temp[1] = ((*pPackedNormal >> 11L) & 0x7ff); - if ( temp[1] & 0x400 ) - { - temp[1] = 2048 - temp[1]; - } - temp[2] = ((*pPackedNormal >> 22L) & 0x3ff); - if ( temp[2] & 0x200 ) - { - temp[2] = 1024 - temp[2]; - } - pNormal[0] = (float)temp[0] * 1.0f/1023.0f; - pNormal[1] = (float)temp[1] * 1.0f/1023.0f; - pNormal[2] = (float)temp[2] * 1.0f/511.0f; - return pNormal; -} - -FORCEINLINE unsigned int * PackNormal_HEND3N( const float *pNormal, unsigned int *pPackedNormal ) -{ - int temp[3]; - - temp[0] = Float2Int( pNormal[0] * 1023.0f ); - temp[1] = Float2Int( pNormal[1] * 1023.0f ); - temp[2] = Float2Int( pNormal[2] * 511.0f ); - - // the normal is out of bounds, determine the source and fix - // clamping would be even more of a slowdown here - Assert( temp[0] >= -1023 && temp[0] <= 1023 ); - Assert( temp[1] >= -1023 && temp[1] <= 1023 ); - Assert( temp[2] >= -511 && temp[2] <= 511 ); - - *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) | - ( ( temp[1] & 0x7ff ) << 11L ) | - ( ( temp[0] & 0x7ff ) << 0L ); - return pPackedNormal; -} - -FORCEINLINE unsigned int * PackNormal_HEND3N( float nx, float ny, float nz, unsigned int *pPackedNormal ) -{ - int temp[3]; - - temp[0] = Float2Int( nx * 1023.0f ); - temp[1] = Float2Int( ny * 1023.0f ); - temp[2] = Float2Int( nz * 511.0f ); - - // the normal is out of bounds, determine the source and fix - // clamping would be even more of a slowdown here - Assert( temp[0] >= -1023 && temp[0] <= 1023 ); - Assert( temp[1] >= -1023 && temp[1] <= 1023 ); - Assert( temp[2] >= -511 && temp[2] <= 511 ); - - *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) | - ( ( temp[1] & 0x7ff ) << 11L ) | - ( ( temp[0] & 0x7ff ) << 0L ); - return pPackedNormal; -} - -FORCEINLINE float * UnpackNormal_SHORT2( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE ) -{ - // Unpacks from Jason's 2-short format (fills in a 4th binormal-sign (+1/-1) value, if this is a tangent vector) - - // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits) - short iX = (*pPackedNormal & 0x0000FFFF); - short iY = (*pPackedNormal & 0xFFFF0000) >> 16; - - float zSign = +1; - if ( iX < 0 ) - { - zSign = -1; - iX = -iX; - } - float tSign = +1; - if ( iY < 0 ) - { - tSign = -1; - iY = -iY; - } - - pNormal[0] = ( iX - 16384.0f ) / 16384.0f; - pNormal[1] = ( iY - 16384.0f ) / 16384.0f; - pNormal[2] = zSign*sqrtf( 1.0f - ( pNormal[0]*pNormal[0] + pNormal[1]*pNormal[1] ) ); - if ( bIsTangent ) - { - pNormal[3] = tSign; - } - - return pNormal; -} - -FORCEINLINE unsigned int * PackNormal_SHORT2( float nx, float ny, float nz, unsigned int *pPackedNormal, float binormalSign = +1.0f ) -{ - // Pack a vector (ASSUMED TO BE NORMALIZED) into Jason's 4-byte (SHORT2) format. - // This simply reconstructs Z from X & Y. It uses the sign bits of the X & Y coords - // to reconstruct the sign of Z and, if this is a tangent vector, the sign of the - // binormal (this is needed because tangent/binormal vectors are supposed to follow - // UV gradients, but shaders reconstruct the binormal from the tangent and normal - // assuming that they form a right-handed basis). - - nx += 1; // [-1,+1] -> [0,2] - ny += 1; - nx *= 16384.0f; // [ 0, 2] -> [0,32768] - ny *= 16384.0f; - - // '0' and '32768' values are invalid encodings - nx = max( nx, 1.0f ); // Make sure there are no zero values - ny = max( ny, 1.0f ); - nx = min( nx, 32767.0f ); // Make sure there are no 32768 values - ny = min( ny, 32767.0f ); - - if ( nz < 0.0f ) - nx = -nx; // Set the sign bit for z - - ny *= binormalSign; // Set the sign bit for the binormal (use when encoding a tangent vector) - - // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits), also use Float2Int() - short sX = (short)nx; // signed short [1,32767] - short sY = (short)ny; - - *pPackedNormal = ( sX & 0x0000FFFF ) | ( sY << 16 ); // NOTE: The mask is necessary (if sX is negative and cast to an int...) - - return pPackedNormal; -} - -FORCEINLINE unsigned int * PackNormal_SHORT2( const float *pNormal, unsigned int *pPackedNormal, float binormalSign = +1.0f ) -{ - return PackNormal_SHORT2( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, binormalSign ); -} - -// Unpacks a UBYTE4 normal (for a tangent, the result's fourth component receives the binormal 'sign') -FORCEINLINE float * UnpackNormal_UBYTE4( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE ) -{ - unsigned char cX, cY; - if ( bIsTangent ) - { - cX = *pPackedNormal >> 16; // Unpack Z - cY = *pPackedNormal >> 24; // Unpack W - } - else - { - cX = *pPackedNormal >> 0; // Unpack X - cY = *pPackedNormal >> 8; // Unpack Y - } - - float x = cX - 128.0f; - float y = cY - 128.0f; - float z; - - float zSignBit = x < 0 ? 1.0f : 0.0f; // z and t negative bits (like slt asm instruction) - float tSignBit = y < 0 ? 1.0f : 0.0f; - float zSign = -( 2*zSignBit - 1 ); // z and t signs - float tSign = -( 2*tSignBit - 1 ); - - x = x*zSign - zSignBit; // 0..127 - y = y*tSign - tSignBit; - x = x - 64; // -64..63 - y = y - 64; - - float xSignBit = x < 0 ? 1.0f : 0.0f; // x and y negative bits (like slt asm instruction) - float ySignBit = y < 0 ? 1.0f : 0.0f; - float xSign = -( 2*xSignBit - 1 ); // x and y signs - float ySign = -( 2*ySignBit - 1 ); - - x = ( x*xSign - xSignBit ) / 63.0f; // 0..1 range - y = ( y*ySign - ySignBit ) / 63.0f; - z = 1.0f - x - y; - - float oolen = 1.0f / sqrt( x*x + y*y + z*z ); // Normalize and - x *= oolen * xSign; // Recover signs - y *= oolen * ySign; - z *= oolen * zSign; - - pNormal[0] = x; - pNormal[1] = y; - pNormal[2] = z; - if ( bIsTangent ) - { - pNormal[3] = tSign; - } - - return pNormal; -} - -////////////////////////////////////////////////////////////////////////////// -// See: http://www.oroboro.com/rafael/docserv.php/index/programming/article/unitv2 -// -// UBYTE4 encoding, using per-octant projection onto x+y+z=1 -// Assume input vector is already unit length -// -// binormalSign specifies 'sign' of binormal, stored in t sign bit of tangent -// (lets the shader know whether norm/tan/bin form a right-handed basis) -// -// bIsTangent is used to specify which WORD of the output to store the data -// The expected usage is to call once with the normal and once with -// the tangent and binormal sign flag, bitwise OR'ing the returned DWORDs -FORCEINLINE unsigned int * PackNormal_UBYTE4( float nx, float ny, float nz, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f ) -{ - float xSign = nx < 0.0f ? -1.0f : 1.0f; // -1 or 1 sign - float ySign = ny < 0.0f ? -1.0f : 1.0f; - float zSign = nz < 0.0f ? -1.0f : 1.0f; - float tSign = binormalSign; - Assert( ( binormalSign == +1.0f ) || ( binormalSign == -1.0f ) ); - - float xSignBit = 0.5f*( 1 - xSign ); // [-1,+1] -> [1,0] - float ySignBit = 0.5f*( 1 - ySign ); // 1 is negative bit (like slt instruction) - float zSignBit = 0.5f*( 1 - zSign ); - float tSignBit = 0.5f*( 1 - binormalSign ); - - float absX = xSign*nx; // 0..1 range (abs) - float absY = ySign*ny; - float absZ = zSign*nz; - - float xbits = absX / ( absX + absY + absZ ); // Project onto x+y+z=1 plane - float ybits = absY / ( absX + absY + absZ ); - - xbits *= 63; // 0..63 - ybits *= 63; - - xbits = xbits * xSign - xSignBit; // -64..63 range - ybits = ybits * ySign - ySignBit; - xbits += 64.0f; // 0..127 range - ybits += 64.0f; - - xbits = xbits * zSign - zSignBit; // Negate based on z and t - ybits = ybits * tSign - tSignBit; // -128..127 range - - xbits += 128.0f; // 0..255 range - ybits += 128.0f; - - unsigned char cX = (unsigned char) xbits; - unsigned char cY = (unsigned char) ybits; - - if ( !bIsTangent ) - *pPackedNormal = (cX << 0) | (cY << 8); // xy for normal - else - *pPackedNormal = (cX << 16) | (cY << 24); // zw for tangent - - return pPackedNormal; -} - -FORCEINLINE unsigned int * PackNormal_UBYTE4( const float *pNormal, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f ) -{ - return PackNormal_UBYTE4( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, bIsTangent, binormalSign ); -} - - -//----------------------------------------------------------------------------- -// Convert RGB to HSV -//----------------------------------------------------------------------------- -void RGBtoHSV( const Vector &rgb, Vector &hsv ); - - -//----------------------------------------------------------------------------- -// Convert HSV to RGB -//----------------------------------------------------------------------------- -void HSVtoRGB( const Vector &hsv, Vector &rgb ); - - -//----------------------------------------------------------------------------- -// Fast version of pow and log -//----------------------------------------------------------------------------- - -float FastLog2(float i); // log2( i ) -float FastPow2(float i); // 2^i -float FastPow(float a, float b); // a^b -float FastPow10( float i ); // 10^i - -//----------------------------------------------------------------------------- -// For testing float equality -//----------------------------------------------------------------------------- - -inline bool CloseEnough( float a, float b, float epsilon = EQUAL_EPSILON ) -{ - return fabs( a - b ) <= epsilon; -} - -inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL_EPSILON ) -{ - return fabs( a.x - b.x ) <= epsilon && - fabs( a.y - b.y ) <= epsilon && - fabs( a.z - b.z ) <= epsilon; -} - -// Fast compare -// maxUlps is the maximum error in terms of Units in the Last Place. This -// specifies how big an error we are willing to accept in terms of the value -// of the least significant digit of the floating point number’s -// representation. maxUlps can also be interpreted in terms of how many -// representable floats we are willing to accept between A and B. -// This function will allow maxUlps-1 floats between A and B. -bool AlmostEqual(float a, float b, int maxUlps = 10); - -inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10) -{ - return AlmostEqual( a.x, b.x, maxUlps ) && - AlmostEqual( a.y, b.y, maxUlps ) && - AlmostEqual( a.z, b.z, maxUlps ); -} - - -#endif // MATH_BASE_H - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//===========================================================================// + +#ifndef MATH_LIB_H +#define MATH_LIB_H + +#include +#include "tier0/basetypes.h" +#include "tier0/commonmacros.h" +#include "mathlib/vector.h" +#include "mathlib/vector2d.h" +#include "tier0/dbg.h" + +#include "mathlib/math_pfns.h" + +#if defined(__i386__) || defined(_M_IX86) +// For MMX intrinsics +#include +#endif + +// XXX remove me +#undef clamp + +// Uncomment this to enable FP exceptions in parts of the code. +// This can help track down FP bugs. However the code is not +// FP exception clean so this not a turnkey operation. +//#define FP_EXCEPTIONS_ENABLED + + +#ifdef FP_EXCEPTIONS_ENABLED +#include // For _clearfp and _controlfp_s +#endif + +// FPExceptionDisabler and FPExceptionEnabler taken from my blog post +// at http://www.altdevblogaday.com/2012/04/20/exceptional-floating-point/ + +// Declare an object of this type in a scope in order to suppress +// all floating-point exceptions temporarily. The old exception +// state will be reset at the end. +class FPExceptionDisabler +{ +public: +#ifdef FP_EXCEPTIONS_ENABLED + FPExceptionDisabler(); + ~FPExceptionDisabler(); + +private: + unsigned int mOldValues; +#else + FPExceptionDisabler() {} + ~FPExceptionDisabler() {} +#endif + +private: + // Make the copy constructor and assignment operator private + // and unimplemented to prohibit copying. + FPExceptionDisabler(const FPExceptionDisabler&); + FPExceptionDisabler& operator=(const FPExceptionDisabler&); +}; + +// Declare an object of this type in a scope in order to enable a +// specified set of floating-point exceptions temporarily. The old +// exception state will be reset at the end. +// This class can be nested. +class FPExceptionEnabler +{ +public: + // Overflow, divide-by-zero, and invalid-operation are the FP + // exceptions most frequently associated with bugs. +#ifdef FP_EXCEPTIONS_ENABLED + FPExceptionEnabler(unsigned int enableBits = _EM_OVERFLOW | _EM_ZERODIVIDE | _EM_INVALID); + ~FPExceptionEnabler(); + +private: + unsigned int mOldValues; +#else + FPExceptionEnabler(unsigned int enableBits = 0) + { + } + ~FPExceptionEnabler() + { + } +#endif + +private: + // Make the copy constructor and assignment operator private + // and unimplemented to prohibit copying. + FPExceptionEnabler(const FPExceptionEnabler&); + FPExceptionEnabler& operator=(const FPExceptionEnabler&); +}; + + + +#ifdef DEBUG // stop crashing edit-and-continue +FORCEINLINE float clamp( float val, float minVal, float maxVal ) +{ + if ( maxVal < minVal ) + return maxVal; + else if( val < minVal ) + return minVal; + else if( val > maxVal ) + return maxVal; + else + return val; +} +#else // DEBUG +FORCEINLINE float clamp( float val, float minVal, float maxVal ) +{ +#if defined(__i386__) || defined(_M_IX86) + _mm_store_ss( &val, + _mm_min_ss( + _mm_max_ss( + _mm_load_ss(&val), + _mm_load_ss(&minVal) ), + _mm_load_ss(&maxVal) ) ); +#else + val = fpmax(minVal, val); + val = fpmin(maxVal, val); +#endif + return val; +} +#endif // DEBUG + +// +// Returns a clamped value in the range [min, max]. +// +template< class T > +inline T clamp( T const &val, T const &minVal, T const &maxVal ) +{ + if ( maxVal < minVal ) + return maxVal; + else if( val < minVal ) + return minVal; + else if( val > maxVal ) + return maxVal; + else + return val; +} + + +// plane_t structure +// !!! if this is changed, it must be changed in asm code too !!! +// FIXME: does the asm code even exist anymore? +// FIXME: this should move to a different file +struct cplane_t +{ + Vector normal; + float dist; + byte type; // for fast side tests + byte signbits; // signx + (signy<<1) + (signz<<1) + byte pad[2]; + +#ifdef VECTOR_NO_SLOW_OPERATIONS + cplane_t() {} + +private: + // No copy constructors allowed if we're in optimal mode + cplane_t(const cplane_t& vOther); +#endif +}; + +// structure offset for asm code +#define CPLANE_NORMAL_X 0 +#define CPLANE_NORMAL_Y 4 +#define CPLANE_NORMAL_Z 8 +#define CPLANE_DIST 12 +#define CPLANE_TYPE 16 +#define CPLANE_SIGNBITS 17 +#define CPLANE_PAD0 18 +#define CPLANE_PAD1 19 + +// 0-2 are axial planes +#define PLANE_X 0 +#define PLANE_Y 1 +#define PLANE_Z 2 + +// 3-5 are non-axial planes snapped to the nearest +#define PLANE_ANYX 3 +#define PLANE_ANYY 4 +#define PLANE_ANYZ 5 + + +//----------------------------------------------------------------------------- +// Frustum plane indices. +// WARNING: there is code that depends on these values +//----------------------------------------------------------------------------- + +enum +{ + FRUSTUM_RIGHT = 0, + FRUSTUM_LEFT = 1, + FRUSTUM_TOP = 2, + FRUSTUM_BOTTOM = 3, + FRUSTUM_NEARZ = 4, + FRUSTUM_FARZ = 5, + FRUSTUM_NUMPLANES = 6 +}; + +extern int SignbitsForPlane( cplane_t *out ); + +class Frustum_t +{ +public: + void SetPlane( int i, int nType, const Vector &vecNormal, float dist ) + { + m_Plane[i].normal = vecNormal; + m_Plane[i].dist = dist; + m_Plane[i].type = nType; + m_Plane[i].signbits = SignbitsForPlane( &m_Plane[i] ); + m_AbsNormal[i].Init( fabs(vecNormal.x), fabs(vecNormal.y), fabs(vecNormal.z) ); + } + + inline const cplane_t *GetPlane( int i ) const { return &m_Plane[i]; } + inline const Vector &GetAbsNormal( int i ) const { return m_AbsNormal[i]; } + +private: + cplane_t m_Plane[FRUSTUM_NUMPLANES]; + Vector m_AbsNormal[FRUSTUM_NUMPLANES]; +}; + +// Computes Y fov from an X fov and a screen aspect ratio + X from Y +float CalcFovY( float flFovX, float flScreenAspect ); +float CalcFovX( float flFovY, float flScreenAspect ); + +// Generate a frustum based on perspective view parameters +// NOTE: FOV is specified in degrees, as the *full* view angle (not half-angle) +void GeneratePerspectiveFrustum( const Vector& origin, const QAngle &angles, float flZNear, float flZFar, float flFovX, float flAspectRatio, Frustum_t &frustum ); +void GeneratePerspectiveFrustum( const Vector& origin, const Vector &forward, const Vector &right, const Vector &up, float flZNear, float flZFar, float flFovX, float flFovY, Frustum_t &frustum ); + +// Cull the world-space bounding box to the specified frustum. +bool R_CullBox( const Vector& mins, const Vector& maxs, const Frustum_t &frustum ); +bool R_CullBoxSkipNear( const Vector& mins, const Vector& maxs, const Frustum_t &frustum ); + +struct matrix3x4_t +{ + matrix3x4_t() {} + matrix3x4_t( + float m00, float m01, float m02, float m03, + float m10, float m11, float m12, float m13, + float m20, float m21, float m22, float m23 ) + { + m_flMatVal[0][0] = m00; m_flMatVal[0][1] = m01; m_flMatVal[0][2] = m02; m_flMatVal[0][3] = m03; + m_flMatVal[1][0] = m10; m_flMatVal[1][1] = m11; m_flMatVal[1][2] = m12; m_flMatVal[1][3] = m13; + m_flMatVal[2][0] = m20; m_flMatVal[2][1] = m21; m_flMatVal[2][2] = m22; m_flMatVal[2][3] = m23; + } + + //----------------------------------------------------------------------------- + // Creates a matrix where the X axis = forward + // the Y axis = left, and the Z axis = up + //----------------------------------------------------------------------------- + void Init( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin ) + { + m_flMatVal[0][0] = xAxis.x; m_flMatVal[0][1] = yAxis.x; m_flMatVal[0][2] = zAxis.x; m_flMatVal[0][3] = vecOrigin.x; + m_flMatVal[1][0] = xAxis.y; m_flMatVal[1][1] = yAxis.y; m_flMatVal[1][2] = zAxis.y; m_flMatVal[1][3] = vecOrigin.y; + m_flMatVal[2][0] = xAxis.z; m_flMatVal[2][1] = yAxis.z; m_flMatVal[2][2] = zAxis.z; m_flMatVal[2][3] = vecOrigin.z; + } + + //----------------------------------------------------------------------------- + // Creates a matrix where the X axis = forward + // the Y axis = left, and the Z axis = up + //----------------------------------------------------------------------------- + matrix3x4_t( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector &vecOrigin ) + { + Init( xAxis, yAxis, zAxis, vecOrigin ); + } + + inline void Invalidate( void ) + { + for (int i = 0; i < 3; i++) + { + for (int j = 0; j < 4; j++) + { + m_flMatVal[i][j] = VEC_T_NAN; + } + } + } + + float *operator[]( int i ) { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; } + const float *operator[]( int i ) const { Assert(( i >= 0 ) && ( i < 3 )); return m_flMatVal[i]; } + float *Base() { return &m_flMatVal[0][0]; } + const float *Base() const { return &m_flMatVal[0][0]; } + + float m_flMatVal[3][4]; +}; + + +#ifndef M_PI + #define M_PI 3.14159265358979323846 // matches value in gcc v2 math.h +#endif + +#define M_PI_F ((float)(M_PI)) // Shouldn't collide with anything. + +// NJS: Inlined to prevent floats from being autopromoted to doubles, as with the old system. +#ifndef RAD2DEG + #define RAD2DEG( x ) ( (float)(x) * (float)(180.f / M_PI_F) ) +#endif + +#ifndef DEG2RAD + #define DEG2RAD( x ) ( (float)(x) * (float)(M_PI_F / 180.f) ) +#endif + +// Used to represent sides of things like planes. +#define SIDE_FRONT 0 +#define SIDE_BACK 1 +#define SIDE_ON 2 +#define SIDE_CROSS -2 // necessary for polylib.c + +#define ON_VIS_EPSILON 0.01 // necessary for vvis (flow.c) -- again look into moving later! +#define EQUAL_EPSILON 0.001 // necessary for vbsp (faces.c) -- should look into moving it there? + +extern bool s_bMathlibInitialized; + +extern const Vector vec3_origin; +extern const QAngle vec3_angle; +extern const Quaternion quat_identity; +extern const Vector vec3_invalid; +extern const int nanmask; + +#define IS_NAN(x) (((*(int *)&x)&nanmask)==nanmask) + +FORCEINLINE vec_t DotProduct(const vec_t *v1, const vec_t *v2) +{ + return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2]; +} +FORCEINLINE void VectorSubtract(const vec_t *a, const vec_t *b, vec_t *c) +{ + c[0]=a[0]-b[0]; + c[1]=a[1]-b[1]; + c[2]=a[2]-b[2]; +} +FORCEINLINE void VectorAdd(const vec_t *a, const vec_t *b, vec_t *c) +{ + c[0]=a[0]+b[0]; + c[1]=a[1]+b[1]; + c[2]=a[2]+b[2]; +} +FORCEINLINE void VectorCopy(const vec_t *a, vec_t *b) +{ + b[0]=a[0]; + b[1]=a[1]; + b[2]=a[2]; +} +FORCEINLINE void VectorClear(vec_t *a) +{ + a[0]=a[1]=a[2]=0; +} + +FORCEINLINE float VectorMaximum(const vec_t *v) +{ + return max( v[0], max( v[1], v[2] ) ); +} + +FORCEINLINE float VectorMaximum(const Vector& v) +{ + return max( v.x, max( v.y, v.z ) ); +} + +FORCEINLINE void VectorScale (const float* in, vec_t scale, float* out) +{ + out[0] = in[0]*scale; + out[1] = in[1]*scale; + out[2] = in[2]*scale; +} + + +// Cannot be forceinline as they have overloads: +inline void VectorFill(vec_t *a, float b) +{ + a[0]=a[1]=a[2]=b; +} + +inline void VectorNegate(vec_t *a) +{ + a[0]=-a[0]; + a[1]=-a[1]; + a[2]=-a[2]; +} + + +//#define VectorMaximum(a) ( max( (a)[0], max( (a)[1], (a)[2] ) ) ) +#define Vector2Clear(x) {(x)[0]=(x)[1]=0;} +#define Vector2Negate(x) {(x)[0]=-((x)[0]);(x)[1]=-((x)[1]);} +#define Vector2Copy(a,b) {(b)[0]=(a)[0];(b)[1]=(a)[1];} +#define Vector2Subtract(a,b,c) {(c)[0]=(a)[0]-(b)[0];(c)[1]=(a)[1]-(b)[1];} +#define Vector2Add(a,b,c) {(c)[0]=(a)[0]+(b)[0];(c)[1]=(a)[1]+(b)[1];} +#define Vector2Scale(a,b,c) {(c)[0]=(b)*(a)[0];(c)[1]=(b)*(a)[1];} + +// NJS: Some functions in VBSP still need to use these for dealing with mixing vec4's and shorts with vec_t's. +// remove when no longer needed. +#define VECTOR_COPY( A, B ) do { (B)[0] = (A)[0]; (B)[1] = (A)[1]; (B)[2]=(A)[2]; } while(0) +#define DOT_PRODUCT( A, B ) ( (A)[0]*(B)[0] + (A)[1]*(B)[1] + (A)[2]*(B)[2] ) + +FORCEINLINE void VectorMAInline( const float* start, float scale, const float* direction, float* dest ) +{ + dest[0]=start[0]+direction[0]*scale; + dest[1]=start[1]+direction[1]*scale; + dest[2]=start[2]+direction[2]*scale; +} + +FORCEINLINE void VectorMAInline( const Vector& start, float scale, const Vector& direction, Vector& dest ) +{ + dest.x=start.x+direction.x*scale; + dest.y=start.y+direction.y*scale; + dest.z=start.z+direction.z*scale; +} + +FORCEINLINE void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest ) +{ + VectorMAInline(start, scale, direction, dest); +} + +FORCEINLINE void VectorMA( const float * start, float scale, const float *direction, float *dest ) +{ + VectorMAInline(start, scale, direction, dest); +} + + +int VectorCompare (const float *v1, const float *v2); + +inline float VectorLength(const float *v) +{ + return FastSqrt( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] + FLT_EPSILON ); +} + +void CrossProduct (const float *v1, const float *v2, float *cross); + +qboolean VectorsEqual( const float *v1, const float *v2 ); + +inline vec_t RoundInt (vec_t in) +{ + return floor(in + 0.5f); +} + +int Q_log2(int val); + +// Math routines done in optimized assembly math package routines +void inline SinCos( float radians, float *sine, float *cosine ) +{ +#if defined( _X360 ) + XMScalarSinCos( sine, cosine, radians ); +#elif defined( PLATFORM_WINDOWS_PC32 ) + _asm + { + fld DWORD PTR [radians] + fsincos + + mov edx, DWORD PTR [cosine] + mov eax, DWORD PTR [sine] + + fstp DWORD PTR [edx] + fstp DWORD PTR [eax] + } +#elif defined( PLATFORM_WINDOWS_PC64 ) + *sine = sin( radians ); + *cosine = cos( radians ); +#elif defined( POSIX ) + register double __cosr, __sinr; + __asm ("fsincos" : "=t" (__cosr), "=u" (__sinr) : "0" (radians)); + + *sine = __sinr; + *cosine = __cosr; +#endif +} + +#define SIN_TABLE_SIZE 256 +#define FTOIBIAS 12582912.f +extern float SinCosTable[SIN_TABLE_SIZE]; + +inline float TableCos( float theta ) +{ + union + { + int i; + float f; + } ftmp; + + // ideally, the following should compile down to: theta * constant + constant, changing any of these constants from defines sometimes fubars this. + ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + ( FTOIBIAS + ( SIN_TABLE_SIZE / 4 ) ); + return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ]; +} + +inline float TableSin( float theta ) +{ + union + { + int i; + float f; + } ftmp; + + // ideally, the following should compile down to: theta * constant + constant + ftmp.f = theta * ( float )( SIN_TABLE_SIZE / ( 2.0f * M_PI ) ) + FTOIBIAS; + return SinCosTable[ ftmp.i & ( SIN_TABLE_SIZE - 1 ) ]; +} + +template +FORCEINLINE T Square( T const &a ) +{ + return a * a; +} + + +// return the smallest power of two >= x. +// returns 0 if x == 0 or x > 0x80000000 (ie numbers that would be negative if x was signed) +// NOTE: the old code took an int, and if you pass in an int of 0x80000000 casted to a uint, +// you'll get 0x80000000, which is correct for uints, instead of 0, which was correct for ints +FORCEINLINE uint SmallestPowerOfTwoGreaterOrEqual( uint x ) +{ + x -= 1; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return x + 1; +} + +// return the largest power of two <= x. Will return 0 if passed 0 +FORCEINLINE uint LargestPowerOfTwoLessThanOrEqual( uint x ) +{ + if ( x >= 0x80000000 ) + return 0x80000000; + + return SmallestPowerOfTwoGreaterOrEqual( x + 1 ) >> 1; +} + + +// Math routines for optimizing division +void FloorDivMod (double numer, double denom, int *quotient, int *rem); +int GreatestCommonDivisor (int i1, int i2); + +// Test for FPU denormal mode +bool IsDenormal( const float &val ); + +// MOVEMENT INFO +enum +{ + PITCH = 0, // up / down + YAW, // left / right + ROLL // fall over +}; + +void MatrixAngles( const matrix3x4_t & matrix, float *angles ); // !!!! +void MatrixVectors( const matrix3x4_t &matrix, Vector* pForward, Vector *pRight, Vector *pUp ); +void VectorTransform (const float *in1, const matrix3x4_t & in2, float *out); +void VectorITransform (const float *in1, const matrix3x4_t & in2, float *out); +void VectorRotate( const float *in1, const matrix3x4_t & in2, float *out); +void VectorRotate( const Vector &in1, const QAngle &in2, Vector &out ); +void VectorRotate( const Vector &in1, const Quaternion &in2, Vector &out ); +void VectorIRotate( const float *in1, const matrix3x4_t & in2, float *out); + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +QAngle TransformAnglesToLocalSpace( const QAngle &angles, const matrix3x4_t &parentMatrix ); +QAngle TransformAnglesToWorldSpace( const QAngle &angles, const matrix3x4_t &parentMatrix ); + +#endif + +void MatrixInitialize( matrix3x4_t &mat, const Vector &vecOrigin, const Vector &vecXAxis, const Vector &vecYAxis, const Vector &vecZAxis ); +void MatrixCopy( const matrix3x4_t &in, matrix3x4_t &out ); +void MatrixInvert( const matrix3x4_t &in, matrix3x4_t &out ); + +// Matrix equality test +bool MatricesAreEqual( const matrix3x4_t &src1, const matrix3x4_t &src2, float flTolerance = 1e-5 ); + +void MatrixGetColumn( const matrix3x4_t &in, int column, Vector &out ); +void MatrixSetColumn( const Vector &in, int column, matrix3x4_t &out ); + +inline void MatrixGetTranslation( const matrix3x4_t &in, Vector &out ) +{ + MatrixGetColumn ( in, 3, out ); +} + +inline void MatrixSetTranslation( const Vector &in, matrix3x4_t &out ) +{ + MatrixSetColumn ( in, 3, out ); +} + +void MatrixScaleBy ( const float flScale, matrix3x4_t &out ); +void MatrixScaleByZero ( matrix3x4_t &out ); + +//void DecomposeRotation( const matrix3x4_t &mat, float *out ); +void ConcatRotations (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out); +void ConcatTransforms (const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out); + +// For identical interface w/ VMatrix +inline void MatrixMultiply ( const matrix3x4_t &in1, const matrix3x4_t &in2, matrix3x4_t &out ) +{ + ConcatTransforms( in1, in2, out ); +} + +void QuaternionSlerp( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); +void QuaternionSlerpNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); +void QuaternionBlend( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); +void QuaternionBlendNoAlign( const Quaternion &p, const Quaternion &q, float t, Quaternion &qt ); +void QuaternionIdentityBlend( const Quaternion &p, float t, Quaternion &qt ); +float QuaternionAngleDiff( const Quaternion &p, const Quaternion &q ); +void QuaternionScale( const Quaternion &p, float t, Quaternion &q ); +void QuaternionAlign( const Quaternion &p, const Quaternion &q, Quaternion &qt ); +float QuaternionDotProduct( const Quaternion &p, const Quaternion &q ); +void QuaternionConjugate( const Quaternion &p, Quaternion &q ); +void QuaternionInvert( const Quaternion &p, Quaternion &q ); +float QuaternionNormalize( Quaternion &q ); +void QuaternionAdd( const Quaternion &p, const Quaternion &q, Quaternion &qt ); +void QuaternionMult( const Quaternion &p, const Quaternion &q, Quaternion &qt ); +void QuaternionMatrix( const Quaternion &q, matrix3x4_t &matrix ); +void QuaternionMatrix( const Quaternion &q, const Vector &pos, matrix3x4_t &matrix ); +void QuaternionAngles( const Quaternion &q, QAngle &angles ); +void AngleQuaternion( const QAngle& angles, Quaternion &qt ); +void QuaternionAngles( const Quaternion &q, RadianEuler &angles ); +void AngleQuaternion( RadianEuler const &angles, Quaternion &qt ); +void QuaternionAxisAngle( const Quaternion &q, Vector &axis, float &angle ); +void AxisAngleQuaternion( const Vector &axis, float angle, Quaternion &q ); +void BasisToQuaternion( const Vector &vecForward, const Vector &vecRight, const Vector &vecUp, Quaternion &q ); +void MatrixQuaternion( const matrix3x4_t &mat, Quaternion &q ); + +// A couple methods to find the dot product of a vector with a matrix row or column... +inline float MatrixRowDotProduct( const matrix3x4_t &in1, int row, const Vector& in2 ) +{ + Assert( (row >= 0) && (row < 3) ); + return DotProduct( in1[row], in2.Base() ); +} + +inline float MatrixColumnDotProduct( const matrix3x4_t &in1, int col, const Vector& in2 ) +{ + Assert( (col >= 0) && (col < 4) ); + return in1[0][col] * in2[0] + in1[1][col] * in2[1] + in1[2][col] * in2[2]; +} + +int __cdecl BoxOnPlaneSide (const float *emins, const float *emaxs, const cplane_t *plane); + +inline float anglemod(float a) +{ + a = (360.f/65536) * ((int)(a*(65536.f/360.0f)) & 65535); + return a; +} + +// Remap a value in the range [A,B] to [C,D]. +inline float RemapVal( float val, float A, float B, float C, float D) +{ + if ( A == B ) + return val >= B ? D : C; + return C + (D - C) * (val - A) / (B - A); +} + +inline float RemapValClamped( float val, float A, float B, float C, float D) +{ + if ( A == B ) + return val >= B ? D : C; + float cVal = (val - A) / (B - A); + cVal = clamp( cVal, 0.0f, 1.0f ); + + return C + (D - C) * cVal; +} + +// Returns A + (B-A)*flPercent. +// float Lerp( float flPercent, float A, float B ); +template +FORCEINLINE T Lerp( float flPercent, T const &A, T const &B ) +{ + return A + (B - A) * flPercent; +} + +FORCEINLINE float Sqr( float f ) +{ + return f*f; +} + +// 5-argument floating point linear interpolation. +// FLerp(f1,f2,i1,i2,x)= +// f1 at x=i1 +// f2 at x=i2 +// smooth lerp between f1 and f2 at x>i1 and xi2 +// +// If you know a function f(x)'s value (f1) at position i1, and its value (f2) at position i2, +// the function can be linearly interpolated with FLerp(f1,f2,i1,i2,x) +// i2=i1 will cause a divide by zero. +static inline float FLerp(float f1, float f2, float i1, float i2, float x) +{ + return f1+(f2-f1)*(x-i1)/(i2-i1); +} + + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +// YWB: Specialization for interpolating euler angles via quaternions... +template<> FORCEINLINE QAngle Lerp( float flPercent, const QAngle& q1, const QAngle& q2 ) +{ + // Avoid precision errors + if ( q1 == q2 ) + return q1; + + Quaternion src, dest; + + // Convert to quaternions + AngleQuaternion( q1, src ); + AngleQuaternion( q2, dest ); + + Quaternion result; + + // Slerp + QuaternionSlerp( src, dest, flPercent, result ); + + // Convert to euler + QAngle output; + QuaternionAngles( result, output ); + return output; +} + +#else + +#pragma error + +// NOTE NOTE: I haven't tested this!! It may not work! Check out interpolatedvar.cpp in the client dll to try it +template<> FORCEINLINE QAngleByValue Lerp( float flPercent, const QAngleByValue& q1, const QAngleByValue& q2 ) +{ + // Avoid precision errors + if ( q1 == q2 ) + return q1; + + Quaternion src, dest; + + // Convert to quaternions + AngleQuaternion( q1, src ); + AngleQuaternion( q2, dest ); + + Quaternion result; + + // Slerp + QuaternionSlerp( src, dest, flPercent, result ); + + // Convert to euler + QAngleByValue output; + QuaternionAngles( result, output ); + return output; +} + +#endif // VECTOR_NO_SLOW_OPERATIONS + + +/// Same as swap(), but won't cause problems with std::swap +template +FORCEINLINE void V_swap( T& x, T& y ) +{ + T temp = x; + x = y; + y = temp; +} + +template FORCEINLINE T AVG(T a, T b) +{ + return (a+b)/2; +} + +// number of elements in an array of static size +#define NELEMS(x) ARRAYSIZE(x) + +// XYZ macro, for printf type functions - ex printf("%f %f %f",XYZ(myvector)); +#define XYZ(v) (v).x,(v).y,(v).z + + +inline float Sign( float x ) +{ + return (x <0.0f) ? -1.0f : 1.0f; +} + +// +// Clamps the input integer to the given array bounds. +// Equivalent to the following, but without using any branches: +// +// if( n < 0 ) return 0; +// else if ( n > maxindex ) return maxindex; +// else return n; +// +// This is not always a clear performance win, but when you have situations where a clamped +// value is thrashing against a boundary this is a big win. (ie, valid, invalid, valid, invalid, ...) +// +// Note: This code has been run against all possible integers. +// +inline int ClampArrayBounds( int n, unsigned maxindex ) +{ + // mask is 0 if less than 4096, 0xFFFFFFFF if greater than + unsigned int inrangemask = 0xFFFFFFFF + (((unsigned) n) > maxindex ); + unsigned int lessthan0mask = 0xFFFFFFFF + ( n >= 0 ); + + // If the result was valid, set the result, (otherwise sets zero) + int result = (inrangemask & n); + + // if the result was out of range or zero. + result |= ((~inrangemask) & (~lessthan0mask)) & maxindex; + + return result; +} + + +#define BOX_ON_PLANE_SIDE(emins, emaxs, p) \ + (((p)->type < 3)? \ + ( \ + ((p)->dist <= (emins)[(p)->type])? \ + 1 \ + : \ + ( \ + ((p)->dist >= (emaxs)[(p)->type])?\ + 2 \ + : \ + 3 \ + ) \ + ) \ + : \ + BoxOnPlaneSide( (emins), (emaxs), (p))) + +//----------------------------------------------------------------------------- +// FIXME: Vector versions.... the float versions will go away hopefully soon! +//----------------------------------------------------------------------------- + +void AngleVectors (const QAngle& angles, Vector *forward); +void AngleVectors (const QAngle& angles, Vector *forward, Vector *right, Vector *up); +void AngleVectorsTranspose (const QAngle& angles, Vector *forward, Vector *right, Vector *up); +void AngleMatrix (const QAngle &angles, matrix3x4_t &mat ); +void AngleMatrix( const QAngle &angles, const Vector &position, matrix3x4_t &mat ); +void AngleMatrix (const RadianEuler &angles, matrix3x4_t &mat ); +void AngleMatrix( RadianEuler const &angles, const Vector &position, matrix3x4_t &mat ); +void AngleIMatrix (const QAngle &angles, matrix3x4_t &mat ); +void AngleIMatrix (const QAngle &angles, const Vector &position, matrix3x4_t &mat ); +void AngleIMatrix (const RadianEuler &angles, matrix3x4_t &mat ); +void VectorAngles( const Vector &forward, QAngle &angles ); +void VectorAngles( const Vector &forward, const Vector &pseudoup, QAngle &angles ); +void VectorMatrix( const Vector &forward, matrix3x4_t &mat ); +void VectorVectors( const Vector &forward, Vector &right, Vector &up ); +void SetIdentityMatrix( matrix3x4_t &mat ); +void SetScaleMatrix( float x, float y, float z, matrix3x4_t &dst ); +void MatrixBuildRotationAboutAxis( const Vector &vAxisOfRot, float angleDegrees, matrix3x4_t &dst ); + +inline void SetScaleMatrix( float flScale, matrix3x4_t &dst ) +{ + SetScaleMatrix( flScale, flScale, flScale, dst ); +} + +inline void SetScaleMatrix( const Vector& scale, matrix3x4_t &dst ) +{ + SetScaleMatrix( scale.x, scale.y, scale.z, dst ); +} + +// Computes the inverse transpose +void MatrixTranspose( matrix3x4_t& mat ); +void MatrixTranspose( const matrix3x4_t& src, matrix3x4_t& dst ); +void MatrixInverseTranspose( const matrix3x4_t& src, matrix3x4_t& dst ); + +inline void PositionMatrix( const Vector &position, matrix3x4_t &mat ) +{ + MatrixSetColumn( position, 3, mat ); +} + +inline void MatrixPosition( const matrix3x4_t &matrix, Vector &position ) +{ + MatrixGetColumn( matrix, 3, position ); +} + +inline void VectorRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out) +{ + VectorRotate( &in1.x, in2, &out.x ); +} + +inline void VectorIRotate( const Vector& in1, const matrix3x4_t &in2, Vector &out) +{ + VectorIRotate( &in1.x, in2, &out.x ); +} + +inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles ) +{ + MatrixAngles( matrix, &angles.x ); +} + +inline void MatrixAngles( const matrix3x4_t &matrix, QAngle &angles, Vector &position ) +{ + MatrixAngles( matrix, angles ); + MatrixPosition( matrix, position ); +} + +inline void MatrixAngles( const matrix3x4_t &matrix, RadianEuler &angles ) +{ + MatrixAngles( matrix, &angles.x ); + + angles.Init( DEG2RAD( angles.z ), DEG2RAD( angles.x ), DEG2RAD( angles.y ) ); +} + +void MatrixAngles( const matrix3x4_t &mat, RadianEuler &angles, Vector &position ); + +void MatrixAngles( const matrix3x4_t &mat, Quaternion &q, Vector &position ); + +inline int VectorCompare (const Vector& v1, const Vector& v2) +{ + return v1 == v2; +} + +inline void VectorTransform (const Vector& in1, const matrix3x4_t &in2, Vector &out) +{ + VectorTransform( &in1.x, in2, &out.x ); +} + +inline void VectorITransform (const Vector& in1, const matrix3x4_t &in2, Vector &out) +{ + VectorITransform( &in1.x, in2, &out.x ); +} + +/* +inline void DecomposeRotation( const matrix3x4_t &mat, Vector &out ) +{ + DecomposeRotation( mat, &out.x ); +} +*/ + +inline int BoxOnPlaneSide (const Vector& emins, const Vector& emaxs, const cplane_t *plane ) +{ + return BoxOnPlaneSide( &emins.x, &emaxs.x, plane ); +} + +inline void VectorFill(Vector& a, float b) +{ + a[0]=a[1]=a[2]=b; +} + +inline void VectorNegate(Vector& a) +{ + a[0] = -a[0]; + a[1] = -a[1]; + a[2] = -a[2]; +} + +inline vec_t VectorAvg(Vector& a) +{ + return ( a[0] + a[1] + a[2] ) / 3; +} + +//----------------------------------------------------------------------------- +// Box/plane test (slow version) +//----------------------------------------------------------------------------- +inline int FASTCALL BoxOnPlaneSide2 (const Vector& emins, const Vector& emaxs, const cplane_t *p, float tolerance = 0.f ) +{ + Vector corners[2]; + + if (p->normal[0] < 0) + { + corners[0][0] = emins[0]; + corners[1][0] = emaxs[0]; + } + else + { + corners[1][0] = emins[0]; + corners[0][0] = emaxs[0]; + } + + if (p->normal[1] < 0) + { + corners[0][1] = emins[1]; + corners[1][1] = emaxs[1]; + } + else + { + corners[1][1] = emins[1]; + corners[0][1] = emaxs[1]; + } + + if (p->normal[2] < 0) + { + corners[0][2] = emins[2]; + corners[1][2] = emaxs[2]; + } + else + { + corners[1][2] = emins[2]; + corners[0][2] = emaxs[2]; + } + + int sides = 0; + + float dist1 = DotProduct (p->normal, corners[0]) - p->dist; + if (dist1 >= tolerance) + sides = 1; + + float dist2 = DotProduct (p->normal, corners[1]) - p->dist; + if (dist2 < -tolerance) + sides |= 2; + + return sides; +} + +//----------------------------------------------------------------------------- +// Helpers for bounding box construction +//----------------------------------------------------------------------------- + +void ClearBounds (Vector& mins, Vector& maxs); +void AddPointToBounds (const Vector& v, Vector& mins, Vector& maxs); + +// +// COLORSPACE/GAMMA CONVERSION STUFF +// +void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright ); + +// convert texture to linear 0..1 value +inline float TexLightToLinear( int c, int exponent ) +{ + extern float power2_n[256]; + Assert( exponent >= -128 && exponent <= 127 ); + return ( float )c * power2_n[exponent+128]; +} + + +// convert texture to linear 0..1 value +int LinearToTexture( float f ); +// converts 0..1 linear value to screen gamma (0..255) +int LinearToScreenGamma( float f ); +float TextureToLinear( int c ); + +// compressed color format +struct ColorRGBExp32 +{ + byte r, g, b; + signed char exponent; +}; + +void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector& out ); +void VectorToColorRGBExp32( const Vector& v, ColorRGBExp32 &c ); + +// solve for "x" where "a x^2 + b x + c = 0", return true if solution exists +bool SolveQuadratic( float a, float b, float c, float &root1, float &root2 ); + +// solves for "a, b, c" where "a x^2 + b x + c = y", return true if solution exists +bool SolveInverseQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c ); + +// solves for a,b,c specified as above, except that it always creates a monotonically increasing or +// decreasing curve if the data is monotonically increasing or decreasing. In order to enforce the +// monoticity condition, it is possible that the resulting quadratic will only approximate the data +// instead of interpolating it. This code is not especially fast. +bool SolveInverseQuadraticMonotonic( float x1, float y1, float x2, float y2, + float x3, float y3, float &a, float &b, float &c ); + + + + +// solves for "a, b, c" where "1/(a x^2 + b x + c ) = y", return true if solution exists +bool SolveInverseReciprocalQuadratic( float x1, float y1, float x2, float y2, float x3, float y3, float &a, float &b, float &c ); + +// rotate a vector around the Z axis (YAW) +void VectorYawRotate( const Vector& in, float flYaw, Vector &out); + + +// Bias takes an X value between 0 and 1 and returns another value between 0 and 1 +// The curve is biased towards 0 or 1 based on biasAmt, which is between 0 and 1. +// Lower values of biasAmt bias the curve towards 0 and higher values bias it towards 1. +// +// For example, with biasAmt = 0.2, the curve looks like this: +// +// 1 +// | * +// | * +// | * +// | ** +// | ** +// | **** +// |********* +// |___________________ +// 0 1 +// +// +// With biasAmt = 0.8, the curve looks like this: +// +// 1 +// | ************** +// | ** +// | * +// | * +// |* +// |* +// |* +// |___________________ +// 0 1 +// +// With a biasAmt of 0.5, Bias returns X. +float Bias( float x, float biasAmt ); + + +// Gain is similar to Bias, but biasAmt biases towards or away from 0.5. +// Lower bias values bias towards 0.5 and higher bias values bias away from it. +// +// For example, with biasAmt = 0.2, the curve looks like this: +// +// 1 +// | * +// | * +// | ** +// | *************** +// | ** +// | * +// |* +// |___________________ +// 0 1 +// +// +// With biasAmt = 0.8, the curve looks like this: +// +// 1 +// | ***** +// | *** +// | * +// | * +// | * +// | *** +// |***** +// |___________________ +// 0 1 +float Gain( float x, float biasAmt ); + + +// SmoothCurve maps a 0-1 value into another 0-1 value based on a cosine wave +// where the derivatives of the function at 0 and 1 (and 0.5) are 0. This is useful for +// any fadein/fadeout effect where it should start and end smoothly. +// +// The curve looks like this: +// +// 1 +// | ** +// | * * +// | * * +// | * * +// | * * +// | ** ** +// |*** *** +// |___________________ +// 0 1 +// +float SmoothCurve( float x ); + + +// This works like SmoothCurve, with two changes: +// +// 1. Instead of the curve peaking at 0.5, it will peak at flPeakPos. +// (So if you specify flPeakPos=0.2, then the peak will slide to the left). +// +// 2. flPeakSharpness is a 0-1 value controlling the sharpness of the peak. +// Low values blunt the peak and high values sharpen the peak. +float SmoothCurve_Tweak( float x, float flPeakPos=0.5, float flPeakSharpness=0.5 ); + + +//float ExponentialDecay( float halflife, float dt ); +//float ExponentialDecay( float decayTo, float decayTime, float dt ); + +// halflife is time for value to reach 50% +inline float ExponentialDecay( float halflife, float dt ) +{ + // log(0.5) == -0.69314718055994530941723212145818 + return expf( -0.69314718f / halflife * dt); +} + +// decayTo is factor the value should decay to in decayTime +inline float ExponentialDecay( float decayTo, float decayTime, float dt ) +{ + return expf( logf( decayTo ) / decayTime * dt); +} + +// Get the integrated distanced traveled +// decayTo is factor the value should decay to in decayTime +// dt is the time relative to the last velocity update +inline float ExponentialDecayIntegral( float decayTo, float decayTime, float dt ) +{ + return (powf( decayTo, dt / decayTime) * decayTime - decayTime) / logf( decayTo ); +} + +// hermite basis function for smooth interpolation +// Similar to Gain() above, but very cheap to call +// value should be between 0 & 1 inclusive +inline float SimpleSpline( float value ) +{ + float valueSquared = value * value; + + // Nice little ease-in, ease-out spline-like curve + return (3 * valueSquared - 2 * valueSquared * value); +} + +// remaps a value in [startInterval, startInterval+rangeInterval] from linear to +// spline using SimpleSpline +inline float SimpleSplineRemapVal( float val, float A, float B, float C, float D) +{ + if ( A == B ) + return val >= B ? D : C; + float cVal = (val - A) / (B - A); + return C + (D - C) * SimpleSpline( cVal ); +} + +// remaps a value in [startInterval, startInterval+rangeInterval] from linear to +// spline using SimpleSpline +inline float SimpleSplineRemapValClamped( float val, float A, float B, float C, float D ) +{ + if ( A == B ) + return val >= B ? D : C; + float cVal = (val - A) / (B - A); + cVal = clamp( cVal, 0.0f, 1.0f ); + return C + (D - C) * SimpleSpline( cVal ); +} + +FORCEINLINE int RoundFloatToInt(float f) +{ +#if defined(__i386__) || defined(_M_IX86) || defined( PLATFORM_WINDOWS_PC64 ) + return _mm_cvtss_si32(_mm_load_ss(&f)); +#elif defined( _X360 ) +#ifdef Assert + Assert( IsFPUControlWordSet() ); +#endif + union + { + double flResult; + int pResult[2]; + }; + flResult = __fctiw( f ); + return pResult[1]; +#else +#error Unknown architecture +#endif +} + +FORCEINLINE unsigned char RoundFloatToByte(float f) +{ + int nResult = RoundFloatToInt(f); +#ifdef Assert + Assert( (nResult & ~0xFF) == 0 ); +#endif + return (unsigned char) nResult; +} + +FORCEINLINE unsigned long RoundFloatToUnsignedLong(float f) +{ +#if defined( _X360 ) +#ifdef Assert + Assert( IsFPUControlWordSet() ); +#endif + union + { + double flResult; + int pIntResult[2]; + unsigned long pResult[2]; + }; + flResult = __fctiw( f ); + Assert( pIntResult[1] >= 0 ); + return pResult[1]; +#else // !X360 + +#if defined( PLATFORM_WINDOWS_PC64 ) + uint nRet = ( uint ) f; + if ( nRet & 1 ) + { + if ( ( f - floor( f ) >= 0.5 ) ) + { + nRet++; + } + } + else + { + if ( ( f - floor( f ) > 0.5 ) ) + { + nRet++; + } + } + return nRet; +#else // PLATFORM_WINDOWS_PC64 + unsigned char nResult[8]; + + #if defined( _WIN32 ) + __asm + { + fld f + fistp qword ptr nResult + } + #elif POSIX + __asm __volatile__ ( + "fistpl %0;": "=m" (nResult): "t" (f) : "st" + ); + #endif + + return *((unsigned long*)nResult); +#endif // PLATFORM_WINDOWS_PC64 +#endif // !X360 +} + +FORCEINLINE bool IsIntegralValue( float flValue, float flTolerance = 0.001f ) +{ + return fabs( RoundFloatToInt( flValue ) - flValue ) < flTolerance; +} + +// Fast, accurate ftol: +FORCEINLINE int Float2Int( float a ) +{ +#if defined( _X360 ) + union + { + double flResult; + int pResult[2]; + }; + flResult = __fctiwz( a ); + return pResult[1]; +#else // !X360 + // Rely on compiler to generate CVTTSS2SI on x86 + return (int) a; +#endif +} + +// Over 15x faster than: (int)floor(value) +inline int Floor2Int( float a ) +{ + int RetVal; +#if defined( __i386__ ) + // Convert to int and back, compare, subtract one if too big + __m128 a128 = _mm_set_ss(a); + RetVal = _mm_cvtss_si32(a128); + __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal); + RetVal -= _mm_comigt_ss( rounded128, a128 ); +#else + RetVal = static_cast( floor(a) ); +#endif + return RetVal; +} + +//----------------------------------------------------------------------------- +// Fast color conversion from float to unsigned char +//----------------------------------------------------------------------------- +FORCEINLINE unsigned int FastFToC( float c ) +{ +#if defined( __i386__ ) + // IEEE float bit manipulation works for values between [0, 1<<23) + union { float f; int i; } convert = { c*255.0f + (float)(1<<23) }; + return convert.i & 255; +#else + // consoles CPUs suffer from load-hit-store penalty + return Float2Int( c * 255.0f ); +#endif +} + +//----------------------------------------------------------------------------- +// Fast conversion from float to integer with magnitude less than 2**22 +//----------------------------------------------------------------------------- +FORCEINLINE int FastFloatToSmallInt( float c ) +{ +#if defined( __i386__ ) + // IEEE float bit manipulation works for values between [-1<<22, 1<<22) + union { float f; int i; } convert = { c + (float)(3<<22) }; + return (convert.i & ((1<<23)-1)) - (1<<22); +#else + // consoles CPUs suffer from load-hit-store penalty + return Float2Int( c ); +#endif +} + +//----------------------------------------------------------------------------- +// Purpose: Bound input float to .001 (millisecond) boundary +// Input : in - +// Output : inline float +//----------------------------------------------------------------------------- +inline float ClampToMsec( float in ) +{ + int msec = Floor2Int( in * 1000.0f + 0.5f ); + return 0.001f * msec; +} + +// Over 15x faster than: (int)ceil(value) +inline int Ceil2Int( float a ) +{ + int RetVal; +#if defined( __i386__ ) + // Convert to int and back, compare, add one if too small + __m128 a128 = _mm_load_ss(&a); + RetVal = _mm_cvtss_si32(a128); + __m128 rounded128 = _mm_cvt_si2ss(_mm_setzero_ps(), RetVal); + RetVal += _mm_comilt_ss( rounded128, a128 ); +#else + RetVal = static_cast( ceil(a) ); +#endif + return RetVal; +} + + +// Regular signed area of triangle +#define TriArea2D( A, B, C ) \ + ( 0.5f * ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) ) + +// This version doesn't premultiply by 0.5f, so it's the area of the rectangle instead +#define TriArea2DTimesTwo( A, B, C ) \ + ( ( ( B.x - A.x ) * ( C.y - A.y ) - ( B.y - A.y ) * ( C.x - A.x ) ) ) + + +// Get the barycentric coordinates of "pt" in triangle [A,B,C]. +inline void GetBarycentricCoords2D( + Vector2D const &A, + Vector2D const &B, + Vector2D const &C, + Vector2D const &pt, + float bcCoords[3] ) +{ + // Note, because to top and bottom are both x2, the issue washes out in the composite + float invTriArea = 1.0f / TriArea2DTimesTwo( A, B, C ); + + // NOTE: We assume here that the lightmap coordinate vertices go counterclockwise. + // If not, TriArea2D() is negated so this works out right. + bcCoords[0] = TriArea2DTimesTwo( B, C, pt ) * invTriArea; + bcCoords[1] = TriArea2DTimesTwo( C, A, pt ) * invTriArea; + bcCoords[2] = TriArea2DTimesTwo( A, B, pt ) * invTriArea; +} + + +// Return true of the sphere might touch the box (the sphere is actually treated +// like a box itself, so this may return true if the sphere's bounding box touches +// a corner of the box but the sphere itself doesn't). +inline bool QuickBoxSphereTest( + const Vector& vOrigin, + float flRadius, + const Vector& bbMin, + const Vector& bbMax ) +{ + return vOrigin.x - flRadius < bbMax.x && vOrigin.x + flRadius > bbMin.x && + vOrigin.y - flRadius < bbMax.y && vOrigin.y + flRadius > bbMin.y && + vOrigin.z - flRadius < bbMax.z && vOrigin.z + flRadius > bbMin.z; +} + + +// Return true of the boxes intersect (but not if they just touch). +inline bool QuickBoxIntersectTest( + const Vector& vBox1Min, + const Vector& vBox1Max, + const Vector& vBox2Min, + const Vector& vBox2Max ) +{ + return + vBox1Min.x < vBox2Max.x && vBox1Max.x > vBox2Min.x && + vBox1Min.y < vBox2Max.y && vBox1Max.y > vBox2Min.y && + vBox1Min.z < vBox2Max.z && vBox1Max.z > vBox2Min.z; +} + + +extern float GammaToLinearFullRange( float gamma ); +extern float LinearToGammaFullRange( float linear ); +extern float GammaToLinear( float gamma ); +extern float LinearToGamma( float linear ); + +extern float SrgbGammaToLinear( float flSrgbGammaValue ); +extern float SrgbLinearToGamma( float flLinearValue ); +extern float X360GammaToLinear( float fl360GammaValue ); +extern float X360LinearToGamma( float flLinearValue ); +extern float SrgbGammaTo360Gamma( float flSrgbGammaValue ); + +// linear (0..4) to screen corrected vertex space (0..1?) +FORCEINLINE float LinearToVertexLight( float f ) +{ + extern float lineartovertex[4096]; + + // Gotta clamp before the multiply; could overflow... + // assume 0..4 range + int i = RoundFloatToInt( f * 1024.f ); + + // Presumably the comman case will be not to clamp, so check that first: + if( (unsigned)i > 4095 ) + { + if ( i < 0 ) + i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream + else + i = 4095; + } + + return lineartovertex[i]; +} + + +FORCEINLINE unsigned char LinearToLightmap( float f ) +{ + extern unsigned char lineartolightmap[4096]; + + // Gotta clamp before the multiply; could overflow... + int i = RoundFloatToInt( f * 1024.f ); // assume 0..4 range + + // Presumably the comman case will be not to clamp, so check that first: + if ( (unsigned)i > 4095 ) + { + if ( i < 0 ) + i = 0; // Compare to zero instead of 4095 to save 4 bytes in the instruction stream + else + i = 4095; + } + + return lineartolightmap[i]; +} + +FORCEINLINE void ColorClamp( Vector& color ) +{ + float maxc = max( color.x, max( color.y, color.z ) ); + if ( maxc > 1.0f ) + { + float ooMax = 1.0f / maxc; + color.x *= ooMax; + color.y *= ooMax; + color.z *= ooMax; + } + + if ( color[0] < 0.f ) color[0] = 0.f; + if ( color[1] < 0.f ) color[1] = 0.f; + if ( color[2] < 0.f ) color[2] = 0.f; +} + +inline void ColorClampTruncate( Vector& color ) +{ + if (color[0] > 1.0f) color[0] = 1.0f; else if (color[0] < 0.0f) color[0] = 0.0f; + if (color[1] > 1.0f) color[1] = 1.0f; else if (color[1] < 0.0f) color[1] = 0.0f; + if (color[2] > 1.0f) color[2] = 1.0f; else if (color[2] < 0.0f) color[2] = 0.0f; +} + +// Interpolate a Catmull-Rom spline. +// t is a [0,1] value and interpolates a curve between p2 and p3. +void Catmull_Rom_Spline( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector &output ); + +// Interpolate a Catmull-Rom spline. +// Returns the tangent of the point at t of the spline +void Catmull_Rom_Spline_Tangent( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector &output ); + +// area under the curve [0..t] +void Catmull_Rom_Spline_Integral( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// area under the curve [0..1] +void Catmull_Rom_Spline_Integral( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + Vector& output ); + +// Interpolate a Catmull-Rom spline. +// Normalize p2->p1 and p3->p4 to be the same length as p2->p3 +void Catmull_Rom_Spline_Normalize( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector &output ); + +// area under the curve [0..t] +// Normalize p2->p1 and p3->p4 to be the same length as p2->p3 +void Catmull_Rom_Spline_Integral_Normalize( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// Interpolate a Catmull-Rom spline. +// Normalize p2.x->p1.x and p3.x->p4.x to be the same length as p2.x->p3.x +void Catmull_Rom_Spline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector &output ); + +// area under the curve [0..t] +void Catmull_Rom_Spline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// Interpolate a Hermite spline. +// t is a [0,1] value and interpolates a curve between p1 and p2 with the deltas d1 and d2. +void Hermite_Spline( + const Vector &p1, + const Vector &p2, + const Vector &d1, + const Vector &d2, + float t, + Vector& output ); + +float Hermite_Spline( + float p1, + float p2, + float d1, + float d2, + float t ); + +// t is a [0,1] value and interpolates a curve between p1 and p2 with the slopes p0->p1 and p1->p2 +void Hermite_Spline( + const Vector &p0, + const Vector &p1, + const Vector &p2, + float t, + Vector& output ); + +float Hermite_Spline( + float p0, + float p1, + float p2, + float t ); + + +void Hermite_SplineBasis( float t, float basis[] ); + +void Hermite_Spline( + const Quaternion &q0, + const Quaternion &q1, + const Quaternion &q2, + float t, + Quaternion &output ); + + +// See http://en.wikipedia.org/wiki/Kochanek-Bartels_curves +// +// Tension: -1 = Round -> 1 = Tight +// Bias: -1 = Pre-shoot (bias left) -> 1 = Post-shoot (bias right) +// Continuity: -1 = Box corners -> 1 = Inverted corners +// +// If T=B=C=0 it's the same matrix as Catmull-Rom. +// If T=1 & B=C=0 it's the same as Cubic. +// If T=B=0 & C=-1 it's just linear interpolation +// +// See http://news.povray.org/povray.binaries.tutorials/attachment/%3CXns91B880592482seed7@povray.org%3E/Splines.bas.txt +// for example code and descriptions of various spline types... +// +void Kochanek_Bartels_Spline( + float tension, + float bias, + float continuity, + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +void Kochanek_Bartels_Spline_NormalizeX( + float tension, + float bias, + float continuity, + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// See link at Kochanek_Bartels_Spline for info on the basis matrix used +void Cubic_Spline( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +void Cubic_Spline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// See link at Kochanek_Bartels_Spline for info on the basis matrix used +void BSpline( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +void BSpline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// See link at Kochanek_Bartels_Spline for info on the basis matrix used +void Parabolic_Spline( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +void Parabolic_Spline_NormalizeX( + const Vector &p1, + const Vector &p2, + const Vector &p3, + const Vector &p4, + float t, + Vector& output ); + +// quintic interpolating polynomial from Perlin. +// 0->0, 1->1, smooth-in between with smooth tangents +FORCEINLINE float QuinticInterpolatingPolynomial(float t) +{ + // 6t^5-15t^4+10t^3 + return t * t * t *( t * ( t* 6.0 - 15.0 ) + 10.0 ); +} + +// given a table of sorted tabulated positions, return the two indices and blendfactor to linear +// interpolate. Does a search. Can be used to find the blend value to interpolate between +// keyframes. +void GetInterpolationData( float const *pKnotPositions, + float const *pKnotValues, + int nNumValuesinList, + int nInterpolationRange, + float flPositionToInterpolateAt, + bool bWrap, + float *pValueA, + float *pValueB, + float *pInterpolationValue); + +float RangeCompressor( float flValue, float flMin, float flMax, float flBase ); + +// Get the minimum distance from vOrigin to the bounding box defined by [mins,maxs] +// using voronoi regions. +// 0 is returned if the origin is inside the box. +float CalcSqrDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point ); +void CalcClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut ); +void CalcSqrDistAndClosestPointOnAABB( const Vector &mins, const Vector &maxs, const Vector &point, Vector &closestOut, float &distSqrOut ); + +inline float CalcDistanceToAABB( const Vector &mins, const Vector &maxs, const Vector &point ) +{ + float flDistSqr = CalcSqrDistanceToAABB( mins, maxs, point ); + return sqrt(flDistSqr); +} + +// Get the closest point from P to the (infinite) line through vLineA and vLineB and +// calculate the shortest distance from P to the line. +// If you pass in a value for t, it will tell you the t for (A + (B-A)t) to get the closest point. +// If the closest point lies on the segment between A and B, then 0 <= t <= 1. +void CalcClosestPointOnLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 ); +float CalcDistanceToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); +float CalcDistanceSqrToLine( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); + +// The same three functions as above, except now the line is closed between A and B. +void CalcClosestPointOnLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, Vector &vClosest, float *t=0 ); +float CalcDistanceToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); +float CalcDistanceSqrToLineSegment( const Vector &P, const Vector &vLineA, const Vector &vLineB, float *t=0 ); + +// A function to compute the closes line segment connnection two lines (or false if the lines are parallel, etc.) +bool CalcLineToLineIntersectionSegment( + const Vector& p1,const Vector& p2,const Vector& p3,const Vector& p4,Vector *s1,Vector *s2, + float *t1, float *t2 ); + +// The above functions in 2D +void CalcClosestPointOnLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 ); +float CalcDistanceToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); +float CalcDistanceSqrToLine2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); +void CalcClosestPointOnLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, Vector2D &vClosest, float *t=0 ); +float CalcDistanceToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); +float CalcDistanceSqrToLineSegment2D( Vector2D const &P, Vector2D const &vLineA, Vector2D const &vLineB, float *t=0 ); + +// Init the mathlib +void MathLib_Init( float gamma = 2.2f, float texGamma = 2.2f, float brightness = 0.0f, int overbright = 2.0f, bool bAllow3DNow = true, bool bAllowSSE = true, bool bAllowSSE2 = true, bool bAllowMMX = true ); +bool MathLib_3DNowEnabled( void ); +bool MathLib_MMXEnabled( void ); +bool MathLib_SSEEnabled( void ); +bool MathLib_SSE2Enabled( void ); + +float Approach( float target, float value, float speed ); +float ApproachAngle( float target, float value, float speed ); +float AngleDiff( float destAngle, float srcAngle ); +float AngleDistance( float next, float cur ); +float AngleNormalize( float angle ); + +// ensure that 0 <= angle <= 360 +float AngleNormalizePositive( float angle ); + +bool AnglesAreEqual( float a, float b, float tolerance = 0.0f ); + + +void RotationDeltaAxisAngle( const QAngle &srcAngles, const QAngle &destAngles, Vector &deltaAxis, float &deltaAngle ); +void RotationDelta( const QAngle &srcAngles, const QAngle &destAngles, QAngle *out ); + +void ComputeTrianglePlane( const Vector& v1, const Vector& v2, const Vector& v3, Vector& normal, float& intercept ); +int PolyFromPlane( Vector *outVerts, const Vector& normal, float dist, float fHalfScale = 9000.0f ); +int ClipPolyToPlane( Vector *inVerts, int vertCount, Vector *outVerts, const Vector& normal, float dist, float fOnPlaneEpsilon = 0.1f ); +int ClipPolyToPlane_Precise( double *inVerts, int vertCount, double *outVerts, const double *normal, double dist, double fOnPlaneEpsilon = 0.1 ); + +//----------------------------------------------------------------------------- +// Computes a reasonable tangent space for a triangle +//----------------------------------------------------------------------------- +void CalcTriangleTangentSpace( const Vector &p0, const Vector &p1, const Vector &p2, + const Vector2D &t0, const Vector2D &t1, const Vector2D& t2, + Vector &sVect, Vector &tVect ); + +//----------------------------------------------------------------------------- +// Transforms a AABB into another space; which will inherently grow the box. +//----------------------------------------------------------------------------- +void TransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); + +//----------------------------------------------------------------------------- +// Uses the inverse transform of in1 +//----------------------------------------------------------------------------- +void ITransformAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); + +//----------------------------------------------------------------------------- +// Rotates a AABB into another space; which will inherently grow the box. +// (same as TransformAABB, but doesn't take the translation into account) +//----------------------------------------------------------------------------- +void RotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); + +//----------------------------------------------------------------------------- +// Uses the inverse transform of in1 +//----------------------------------------------------------------------------- +void IRotateAABB( const matrix3x4_t &in1, const Vector &vecMinsIn, const Vector &vecMaxsIn, Vector &vecMinsOut, Vector &vecMaxsOut ); + +//----------------------------------------------------------------------------- +// Transform a plane +//----------------------------------------------------------------------------- +inline void MatrixTransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane ) +{ + // What we want to do is the following: + // 1) transform the normal into the new space. + // 2) Determine a point on the old plane given by plane dist * plane normal + // 3) Transform that point into the new space + // 4) Plane dist = DotProduct( new normal, new point ) + + // An optimized version, which works if the plane is orthogonal. + // 1) Transform the normal into the new space + // 2) Realize that transforming the old plane point into the new space + // is given by [ d * n'x + Tx, d * n'y + Ty, d * n'z + Tz ] + // where d = old plane dist, n' = transformed normal, Tn = translational component of transform + // 3) Compute the new plane dist using the dot product of the normal result of #2 + + // For a correct result, this should be an inverse-transpose matrix + // but that only matters if there are nonuniform scale or skew factors in this matrix. + VectorRotate( inPlane.normal, src, outPlane.normal ); + outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal ); + outPlane.dist += outPlane.normal.x * src[0][3] + outPlane.normal.y * src[1][3] + outPlane.normal.z * src[2][3]; +} + +inline void MatrixITransformPlane( const matrix3x4_t &src, const cplane_t &inPlane, cplane_t &outPlane ) +{ + // The trick here is that Tn = translational component of transform, + // but for an inverse transform, Tn = - R^-1 * T + Vector vecTranslation; + MatrixGetColumn( src, 3, vecTranslation ); + + Vector vecInvTranslation; + VectorIRotate( vecTranslation, src, vecInvTranslation ); + + VectorIRotate( inPlane.normal, src, outPlane.normal ); + outPlane.dist = inPlane.dist * DotProduct( outPlane.normal, outPlane.normal ); + outPlane.dist -= outPlane.normal.x * vecInvTranslation[0] + outPlane.normal.y * vecInvTranslation[1] + outPlane.normal.z * vecInvTranslation[2]; +} + +int CeilPow2( int in ); +int FloorPow2( int in ); + +FORCEINLINE float * UnpackNormal_HEND3N( const unsigned int *pPackedNormal, float *pNormal ) +{ + int temp[3]; + temp[0] = ((*pPackedNormal >> 0L) & 0x7ff); + if ( temp[0] & 0x400 ) + { + temp[0] = 2048 - temp[0]; + } + temp[1] = ((*pPackedNormal >> 11L) & 0x7ff); + if ( temp[1] & 0x400 ) + { + temp[1] = 2048 - temp[1]; + } + temp[2] = ((*pPackedNormal >> 22L) & 0x3ff); + if ( temp[2] & 0x200 ) + { + temp[2] = 1024 - temp[2]; + } + pNormal[0] = (float)temp[0] * 1.0f/1023.0f; + pNormal[1] = (float)temp[1] * 1.0f/1023.0f; + pNormal[2] = (float)temp[2] * 1.0f/511.0f; + return pNormal; +} + +FORCEINLINE unsigned int * PackNormal_HEND3N( const float *pNormal, unsigned int *pPackedNormal ) +{ + int temp[3]; + + temp[0] = Float2Int( pNormal[0] * 1023.0f ); + temp[1] = Float2Int( pNormal[1] * 1023.0f ); + temp[2] = Float2Int( pNormal[2] * 511.0f ); + + // the normal is out of bounds, determine the source and fix + // clamping would be even more of a slowdown here + Assert( temp[0] >= -1023 && temp[0] <= 1023 ); + Assert( temp[1] >= -1023 && temp[1] <= 1023 ); + Assert( temp[2] >= -511 && temp[2] <= 511 ); + + *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) | + ( ( temp[1] & 0x7ff ) << 11L ) | + ( ( temp[0] & 0x7ff ) << 0L ); + return pPackedNormal; +} + +FORCEINLINE unsigned int * PackNormal_HEND3N( float nx, float ny, float nz, unsigned int *pPackedNormal ) +{ + int temp[3]; + + temp[0] = Float2Int( nx * 1023.0f ); + temp[1] = Float2Int( ny * 1023.0f ); + temp[2] = Float2Int( nz * 511.0f ); + + // the normal is out of bounds, determine the source and fix + // clamping would be even more of a slowdown here + Assert( temp[0] >= -1023 && temp[0] <= 1023 ); + Assert( temp[1] >= -1023 && temp[1] <= 1023 ); + Assert( temp[2] >= -511 && temp[2] <= 511 ); + + *pPackedNormal = ( ( temp[2] & 0x3ff ) << 22L ) | + ( ( temp[1] & 0x7ff ) << 11L ) | + ( ( temp[0] & 0x7ff ) << 0L ); + return pPackedNormal; +} + +FORCEINLINE float * UnpackNormal_SHORT2( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE ) +{ + // Unpacks from Jason's 2-short format (fills in a 4th binormal-sign (+1/-1) value, if this is a tangent vector) + + // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits) + short iX = (*pPackedNormal & 0x0000FFFF); + short iY = (*pPackedNormal & 0xFFFF0000) >> 16; + + float zSign = +1; + if ( iX < 0 ) + { + zSign = -1; + iX = -iX; + } + float tSign = +1; + if ( iY < 0 ) + { + tSign = -1; + iY = -iY; + } + + pNormal[0] = ( iX - 16384.0f ) / 16384.0f; + pNormal[1] = ( iY - 16384.0f ) / 16384.0f; + pNormal[2] = zSign*sqrtf( 1.0f - ( pNormal[0]*pNormal[0] + pNormal[1]*pNormal[1] ) ); + if ( bIsTangent ) + { + pNormal[3] = tSign; + } + + return pNormal; +} + +FORCEINLINE unsigned int * PackNormal_SHORT2( float nx, float ny, float nz, unsigned int *pPackedNormal, float binormalSign = +1.0f ) +{ + // Pack a vector (ASSUMED TO BE NORMALIZED) into Jason's 4-byte (SHORT2) format. + // This simply reconstructs Z from X & Y. It uses the sign bits of the X & Y coords + // to reconstruct the sign of Z and, if this is a tangent vector, the sign of the + // binormal (this is needed because tangent/binormal vectors are supposed to follow + // UV gradients, but shaders reconstruct the binormal from the tangent and normal + // assuming that they form a right-handed basis). + + nx += 1; // [-1,+1] -> [0,2] + ny += 1; + nx *= 16384.0f; // [ 0, 2] -> [0,32768] + ny *= 16384.0f; + + // '0' and '32768' values are invalid encodings + nx = max( nx, 1.0f ); // Make sure there are no zero values + ny = max( ny, 1.0f ); + nx = min( nx, 32767.0f ); // Make sure there are no 32768 values + ny = min( ny, 32767.0f ); + + if ( nz < 0.0f ) + nx = -nx; // Set the sign bit for z + + ny *= binormalSign; // Set the sign bit for the binormal (use when encoding a tangent vector) + + // FIXME: short math is slow on 360 - use ints here instead (bit-twiddle to deal w/ the sign bits), also use Float2Int() + short sX = (short)nx; // signed short [1,32767] + short sY = (short)ny; + + *pPackedNormal = ( sX & 0x0000FFFF ) | ( sY << 16 ); // NOTE: The mask is necessary (if sX is negative and cast to an int...) + + return pPackedNormal; +} + +FORCEINLINE unsigned int * PackNormal_SHORT2( const float *pNormal, unsigned int *pPackedNormal, float binormalSign = +1.0f ) +{ + return PackNormal_SHORT2( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, binormalSign ); +} + +// Unpacks a UBYTE4 normal (for a tangent, the result's fourth component receives the binormal 'sign') +FORCEINLINE float * UnpackNormal_UBYTE4( const unsigned int *pPackedNormal, float *pNormal, bool bIsTangent = FALSE ) +{ + unsigned char cX, cY; + if ( bIsTangent ) + { + cX = *pPackedNormal >> 16; // Unpack Z + cY = *pPackedNormal >> 24; // Unpack W + } + else + { + cX = *pPackedNormal >> 0; // Unpack X + cY = *pPackedNormal >> 8; // Unpack Y + } + + float x = cX - 128.0f; + float y = cY - 128.0f; + float z; + + float zSignBit = x < 0 ? 1.0f : 0.0f; // z and t negative bits (like slt asm instruction) + float tSignBit = y < 0 ? 1.0f : 0.0f; + float zSign = -( 2*zSignBit - 1 ); // z and t signs + float tSign = -( 2*tSignBit - 1 ); + + x = x*zSign - zSignBit; // 0..127 + y = y*tSign - tSignBit; + x = x - 64; // -64..63 + y = y - 64; + + float xSignBit = x < 0 ? 1.0f : 0.0f; // x and y negative bits (like slt asm instruction) + float ySignBit = y < 0 ? 1.0f : 0.0f; + float xSign = -( 2*xSignBit - 1 ); // x and y signs + float ySign = -( 2*ySignBit - 1 ); + + x = ( x*xSign - xSignBit ) / 63.0f; // 0..1 range + y = ( y*ySign - ySignBit ) / 63.0f; + z = 1.0f - x - y; + + float oolen = 1.0f / sqrt( x*x + y*y + z*z ); // Normalize and + x *= oolen * xSign; // Recover signs + y *= oolen * ySign; + z *= oolen * zSign; + + pNormal[0] = x; + pNormal[1] = y; + pNormal[2] = z; + if ( bIsTangent ) + { + pNormal[3] = tSign; + } + + return pNormal; +} + +////////////////////////////////////////////////////////////////////////////// +// See: http://www.oroboro.com/rafael/docserv.php/index/programming/article/unitv2 +// +// UBYTE4 encoding, using per-octant projection onto x+y+z=1 +// Assume input vector is already unit length +// +// binormalSign specifies 'sign' of binormal, stored in t sign bit of tangent +// (lets the shader know whether norm/tan/bin form a right-handed basis) +// +// bIsTangent is used to specify which WORD of the output to store the data +// The expected usage is to call once with the normal and once with +// the tangent and binormal sign flag, bitwise OR'ing the returned DWORDs +FORCEINLINE unsigned int * PackNormal_UBYTE4( float nx, float ny, float nz, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f ) +{ + float xSign = nx < 0.0f ? -1.0f : 1.0f; // -1 or 1 sign + float ySign = ny < 0.0f ? -1.0f : 1.0f; + float zSign = nz < 0.0f ? -1.0f : 1.0f; + float tSign = binormalSign; + Assert( ( binormalSign == +1.0f ) || ( binormalSign == -1.0f ) ); + + float xSignBit = 0.5f*( 1 - xSign ); // [-1,+1] -> [1,0] + float ySignBit = 0.5f*( 1 - ySign ); // 1 is negative bit (like slt instruction) + float zSignBit = 0.5f*( 1 - zSign ); + float tSignBit = 0.5f*( 1 - binormalSign ); + + float absX = xSign*nx; // 0..1 range (abs) + float absY = ySign*ny; + float absZ = zSign*nz; + + float xbits = absX / ( absX + absY + absZ ); // Project onto x+y+z=1 plane + float ybits = absY / ( absX + absY + absZ ); + + xbits *= 63; // 0..63 + ybits *= 63; + + xbits = xbits * xSign - xSignBit; // -64..63 range + ybits = ybits * ySign - ySignBit; + xbits += 64.0f; // 0..127 range + ybits += 64.0f; + + xbits = xbits * zSign - zSignBit; // Negate based on z and t + ybits = ybits * tSign - tSignBit; // -128..127 range + + xbits += 128.0f; // 0..255 range + ybits += 128.0f; + + unsigned char cX = (unsigned char) xbits; + unsigned char cY = (unsigned char) ybits; + + if ( !bIsTangent ) + *pPackedNormal = (cX << 0) | (cY << 8); // xy for normal + else + *pPackedNormal = (cX << 16) | (cY << 24); // zw for tangent + + return pPackedNormal; +} + +FORCEINLINE unsigned int * PackNormal_UBYTE4( const float *pNormal, unsigned int *pPackedNormal, bool bIsTangent = false, float binormalSign = +1.0f ) +{ + return PackNormal_UBYTE4( pNormal[0], pNormal[1], pNormal[2], pPackedNormal, bIsTangent, binormalSign ); +} + + +//----------------------------------------------------------------------------- +// Convert RGB to HSV +//----------------------------------------------------------------------------- +void RGBtoHSV( const Vector &rgb, Vector &hsv ); + + +//----------------------------------------------------------------------------- +// Convert HSV to RGB +//----------------------------------------------------------------------------- +void HSVtoRGB( const Vector &hsv, Vector &rgb ); + + +//----------------------------------------------------------------------------- +// Fast version of pow and log +//----------------------------------------------------------------------------- + +float FastLog2(float i); // log2( i ) +float FastPow2(float i); // 2^i +float FastPow(float a, float b); // a^b +float FastPow10( float i ); // 10^i + +//----------------------------------------------------------------------------- +// For testing float equality +//----------------------------------------------------------------------------- + +inline bool CloseEnough( float a, float b, float epsilon = EQUAL_EPSILON ) +{ + return fabs( a - b ) <= epsilon; +} + +inline bool CloseEnough( const Vector &a, const Vector &b, float epsilon = EQUAL_EPSILON ) +{ + return fabs( a.x - b.x ) <= epsilon && + fabs( a.y - b.y ) <= epsilon && + fabs( a.z - b.z ) <= epsilon; +} + +// Fast compare +// maxUlps is the maximum error in terms of Units in the Last Place. This +// specifies how big an error we are willing to accept in terms of the value +// of the least significant digit of the floating point number’s +// representation. maxUlps can also be interpreted in terms of how many +// representable floats we are willing to accept between A and B. +// This function will allow maxUlps-1 floats between A and B. +bool AlmostEqual(float a, float b, int maxUlps = 10); + +inline bool AlmostEqual( const Vector &a, const Vector &b, int maxUlps = 10) +{ + return AlmostEqual( a.x, b.x, maxUlps ) && + AlmostEqual( a.y, b.y, maxUlps ) && + AlmostEqual( a.z, b.z, maxUlps ); +} + + +#endif // MATH_BASE_H + diff --git a/mp/src/public/mathlib/matrixmath.h b/mp/src/public/mathlib/matrixmath.h index 40de0c02..9c7f207b 100644 --- a/mp/src/public/mathlib/matrixmath.h +++ b/mp/src/public/mathlib/matrixmath.h @@ -1,385 +1,385 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// A set of generic, template-based matrix functions. -//===========================================================================// - -#ifndef MATRIXMATH_H -#define MATRIXMATH_H - -#include - -// The operations in this file can perform basic matrix operations on matrices represented -// using any class that supports the necessary operations: -// -// .Element( row, col ) - return the element at a given matrox position -// .SetElement( row, col, val ) - modify an element -// .Width(), .Height() - get dimensions -// .SetDimensions( nrows, ncols) - set a matrix to be un-initted and the appropriate size -// -// Generally, vectors can be used with these functions by using N x 1 matrices to represent them. -// Matrices are addressed as row, column, and indices are 0-based -// -// -// Note that the template versions of these routines are defined for generality - it is expected -// that template specialization is used for common high performance cases. - -namespace MatrixMath -{ - /// M *= flScaleValue - template - void ScaleMatrix( MATRIXCLASS &matrix, float flScaleValue ) - { - for( int i = 0; i < matrix.Height(); i++ ) - { - for( int j = 0; j < matrix.Width(); j++ ) - { - matrix.SetElement( i, j, flScaleValue * matrix.Element( i, j ) ); - } - } - } - - /// AppendElementToMatrix - same as setting the element, except only works when all calls - /// happen in top to bottom left to right order, end you have to call FinishedAppending when - /// done. For normal matrix classes this is not different then SetElement, but for - /// CSparseMatrix, it is an accelerated way to fill a matrix from scratch. - template - FORCEINLINE void AppendElement( MATRIXCLASS &matrix, int nRow, int nCol, float flValue ) - { - matrix.SetElement( nRow, nCol, flValue ); // default implementation - } - - template - FORCEINLINE void FinishedAppending( MATRIXCLASS &matrix ) {} // default implementation - - /// M += fl - template - void AddToMatrix( MATRIXCLASS &matrix, float flAddend ) - { - for( int i = 0; i < matrix.Height(); i++ ) - { - for( int j = 0; j < matrix.Width(); j++ ) - { - matrix.SetElement( i, j, flAddend + matrix.Element( i, j ) ); - } - } - } - - /// transpose - template - void TransposeMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) - { - pMatrixOut->SetDimensions( matrixIn.Width(), matrixIn.Height() ); - for( int i = 0; i < pMatrixOut->Height(); i++ ) - { - for( int j = 0; j < pMatrixOut->Width(); j++ ) - { - AppendElement( *pMatrixOut, i, j, matrixIn.Element( j, i ) ); - } - } - FinishedAppending( *pMatrixOut ); - } - - /// copy - template - void CopyMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) - { - pMatrixOut->SetDimensions( matrixIn.Height(), matrixIn.Width() ); - for( int i = 0; i < matrixIn.Height(); i++ ) - { - for( int j = 0; j < matrixIn.Width(); j++ ) - { - AppendElement( *pMatrixOut, i, j, matrixIn.Element( i, j ) ); - } - } - FinishedAppending( *pMatrixOut ); - } - - - - /// M+=M - template - void AddMatrixToMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) - { - for( int i = 0; i < matrixIn.Height(); i++ ) - { - for( int j = 0; j < matrixIn.Width(); j++ ) - { - pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + matrixIn.Element( i, j ) ); - } - } - } - - // M += scale * M - template - void AddScaledMatrixToMatrix( float flScale, MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) - { - for( int i = 0; i < matrixIn.Height(); i++ ) - { - for( int j = 0; j < matrixIn.Width(); j++ ) - { - pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + flScale * matrixIn.Element( i, j ) ); - } - } - } - - - // simple way to initialize a matrix with constants from code. - template - void SetMatrixToIdentity( MATRIXCLASSOUT *pMatrixOut, float flDiagonalValue = 1.0 ) - { - for( int i = 0; i < pMatrixOut->Height(); i++ ) - { - for( int j = 0; j < pMatrixOut->Width(); j++ ) - { - AppendElement( *pMatrixOut, i, j, ( i == j ) ? flDiagonalValue : 0 ); - } - } - FinishedAppending( *pMatrixOut ); - } - - //// simple way to initialize a matrix with constants from code - template - void SetMatrixValues( MATRIXCLASSOUT *pMatrix, int nRows, int nCols, ... ) - { - va_list argPtr; - va_start( argPtr, nCols ); - - pMatrix->SetDimensions( nRows, nCols ); - for( int nRow = 0; nRow < nRows; nRow++ ) - { - for( int nCol = 0; nCol < nCols; nCol++ ) - { - double flNewValue = va_arg( argPtr, double ); - pMatrix->SetElement( nRow, nCol, flNewValue ); - } - } - va_end( argPtr ); - } - - - /// row and colum accessors. treat a row or a column as a column vector - template class MatrixRowAccessor - { - public: - FORCEINLINE MatrixRowAccessor( MATRIXTYPE const &matrix, int nRow ) - { - m_pMatrix = &matrix; - m_nRow = nRow; - } - - FORCEINLINE float Element( int nRow, int nCol ) const - { - Assert( nCol == 0 ); - return m_pMatrix->Element( m_nRow, nRow ); - } - - FORCEINLINE int Width( void ) const { return 1; }; - FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); } - - private: - MATRIXTYPE const *m_pMatrix; - int m_nRow; - }; - - template class MatrixColumnAccessor - { - public: - FORCEINLINE MatrixColumnAccessor( MATRIXTYPE const &matrix, int nColumn ) - { - m_pMatrix = &matrix; - m_nColumn = nColumn; - } - - FORCEINLINE float Element( int nRow, int nColumn ) const - { - Assert( nColumn == 0 ); - return m_pMatrix->Element( nRow, m_nColumn ); - } - - FORCEINLINE int Width( void ) const { return 1; } - FORCEINLINE int Height( void ) const { return m_pMatrix->Height(); } - private: - MATRIXTYPE const *m_pMatrix; - int m_nColumn; - }; - - /// this translator acts as a proxy for the transposed matrix - template class MatrixTransposeAccessor - { - public: - FORCEINLINE MatrixTransposeAccessor( MATRIXTYPE const & matrix ) - { - m_pMatrix = &matrix; - } - - FORCEINLINE float Element( int nRow, int nColumn ) const - { - return m_pMatrix->Element( nColumn, nRow ); - } - - FORCEINLINE int Width( void ) const { return m_pMatrix->Height(); } - FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); } - private: - MATRIXTYPE const *m_pMatrix; - }; - - /// this tranpose returns a wrapper around it's argument, allowing things like AddMatrixToMatrix( Transpose( matA ), &matB ) without an extra copy - template - MatrixTransposeAccessor TransposeMatrix( MATRIXCLASSIN const &matrixIn ) - { - return MatrixTransposeAccessor( matrixIn ); - } - - - /// retrieve rows and columns - template - FORCEINLINE MatrixColumnAccessor MatrixColumn( MATRIXTYPE const &matrix, int nColumn ) - { - return MatrixColumnAccessor( matrix, nColumn ); - } - - template - FORCEINLINE MatrixRowAccessor MatrixRow( MATRIXTYPE const &matrix, int nRow ) - { - return MatrixRowAccessor( matrix, nRow ); - } - - //// dot product between vectors (or rows and/or columns via accessors) - template - float InnerProduct( MATRIXACCESSORATYPE const &vecA, MATRIXACCESSORBTYPE const &vecB ) - { - Assert( vecA.Width() == 1 ); - Assert( vecB.Width() == 1 ); - Assert( vecA.Height() == vecB.Height() ); - double flResult = 0; - for( int i = 0; i < vecA.Height(); i++ ) - { - flResult += vecA.Element( i, 0 ) * vecB.Element( i, 0 ); - } - return flResult; - } - - - - /// matrix x matrix multiplication - template - void MatrixMultiply( MATRIXATYPE const &matA, MATRIXBTYPE const &matB, MATRIXOUTTYPE *pMatrixOut ) - { - Assert( matA.Width() == matB.Height() ); - pMatrixOut->SetDimensions( matA.Height(), matB.Width() ); - for( int i = 0; i < matA.Height(); i++ ) - { - for( int j = 0; j < matB.Width(); j++ ) - { - pMatrixOut->SetElement( i, j, InnerProduct( MatrixRow( matA, i ), MatrixColumn( matB, j ) ) ); - } - } - } - - /// solve Ax=B via the conjugate graident method. Code and naming conventions based on the - /// wikipedia article. - template - void ConjugateGradient( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 ) - { - XTYPE vecR; - vecR.SetDimensions( vecX.Height(), 1 ); - MatrixMultiply( matA, vecX, &vecR ); - ScaleMatrix( vecR, -1 ); - AddMatrixToMatrix( vecB, &vecR ); - XTYPE vecP; - CopyMatrix( vecR, &vecP ); - float flRsOld = InnerProduct( vecR, vecR ); - for( int nIter = 0; nIter < 100; nIter++ ) - { - XTYPE vecAp; - MatrixMultiply( matA, vecP, &vecAp ); - float flDivisor = InnerProduct( vecAp, vecP ); - float flAlpha = flRsOld / flDivisor; - AddScaledMatrixToMatrix( flAlpha, vecP, &vecX ); - AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR ); - float flRsNew = InnerProduct( vecR, vecR ); - if ( flRsNew < flTolerance ) - { - break; - } - ScaleMatrix( vecP, flRsNew / flRsOld ); - AddMatrixToMatrix( vecR, &vecP ); - flRsOld = flRsNew; - } - } - - /// solve (A'*A) x=B via the conjugate gradient method. Code and naming conventions based on - /// the wikipedia article. Same as Conjugate gradient but allows passing in two matrices whose - /// product is used as the A matrix (in order to preserve sparsity) - template - void ConjugateGradient( ATYPE const &matA, APRIMETYPE const &matAPrime, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 ) - { - XTYPE vecR1; - vecR1.SetDimensions( vecX.Height(), 1 ); - MatrixMultiply( matA, vecX, &vecR1 ); - XTYPE vecR; - vecR.SetDimensions( vecR1.Height(), 1 ); - MatrixMultiply( matAPrime, vecR1, &vecR ); - ScaleMatrix( vecR, -1 ); - AddMatrixToMatrix( vecB, &vecR ); - XTYPE vecP; - CopyMatrix( vecR, &vecP ); - float flRsOld = InnerProduct( vecR, vecR ); - for( int nIter = 0; nIter < 100; nIter++ ) - { - XTYPE vecAp1; - MatrixMultiply( matA, vecP, &vecAp1 ); - XTYPE vecAp; - MatrixMultiply( matAPrime, vecAp1, &vecAp ); - float flDivisor = InnerProduct( vecAp, vecP ); - float flAlpha = flRsOld / flDivisor; - AddScaledMatrixToMatrix( flAlpha, vecP, &vecX ); - AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR ); - float flRsNew = InnerProduct( vecR, vecR ); - if ( flRsNew < flTolerance ) - { - break; - } - ScaleMatrix( vecP, flRsNew / flRsOld ); - AddMatrixToMatrix( vecR, &vecP ); - flRsOld = flRsNew; - } - } - - - template - void LeastSquaresFit( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX ) - { - // now, generate the normal equations - BTYPE vecBeta; - MatrixMath::MatrixMultiply( MatrixMath::TransposeMatrix( matA ), vecB, &vecBeta ); - - vecX.SetDimensions( matA.Width(), 1 ); - MatrixMath::SetMatrixToIdentity( &vecX ); - - ATYPE matATransposed; - TransposeMatrix( matA, &matATransposed ); - ConjugateGradient( matA, matATransposed, vecBeta, vecX, 1.0e-20 ); - } - -}; - -/// a simple fixed-size matrix class -template class CFixedMatrix -{ -public: - FORCEINLINE int Width( void ) const { return NUMCOLS; } - FORCEINLINE int Height( void ) const { return NUMROWS; } - FORCEINLINE float Element( int nRow, int nCol ) const { return m_flValues[nRow][nCol]; } - FORCEINLINE void SetElement( int nRow, int nCol, float flValue ) { m_flValues[nRow][nCol] = flValue; } - FORCEINLINE void SetDimensions( int nNumRows, int nNumCols ) { Assert( ( nNumRows == NUMROWS ) && ( nNumCols == NUMCOLS ) ); } - -private: - float m_flValues[NUMROWS][NUMCOLS]; -}; - - - -#endif //matrixmath_h +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// A set of generic, template-based matrix functions. +//===========================================================================// + +#ifndef MATRIXMATH_H +#define MATRIXMATH_H + +#include + +// The operations in this file can perform basic matrix operations on matrices represented +// using any class that supports the necessary operations: +// +// .Element( row, col ) - return the element at a given matrox position +// .SetElement( row, col, val ) - modify an element +// .Width(), .Height() - get dimensions +// .SetDimensions( nrows, ncols) - set a matrix to be un-initted and the appropriate size +// +// Generally, vectors can be used with these functions by using N x 1 matrices to represent them. +// Matrices are addressed as row, column, and indices are 0-based +// +// +// Note that the template versions of these routines are defined for generality - it is expected +// that template specialization is used for common high performance cases. + +namespace MatrixMath +{ + /// M *= flScaleValue + template + void ScaleMatrix( MATRIXCLASS &matrix, float flScaleValue ) + { + for( int i = 0; i < matrix.Height(); i++ ) + { + for( int j = 0; j < matrix.Width(); j++ ) + { + matrix.SetElement( i, j, flScaleValue * matrix.Element( i, j ) ); + } + } + } + + /// AppendElementToMatrix - same as setting the element, except only works when all calls + /// happen in top to bottom left to right order, end you have to call FinishedAppending when + /// done. For normal matrix classes this is not different then SetElement, but for + /// CSparseMatrix, it is an accelerated way to fill a matrix from scratch. + template + FORCEINLINE void AppendElement( MATRIXCLASS &matrix, int nRow, int nCol, float flValue ) + { + matrix.SetElement( nRow, nCol, flValue ); // default implementation + } + + template + FORCEINLINE void FinishedAppending( MATRIXCLASS &matrix ) {} // default implementation + + /// M += fl + template + void AddToMatrix( MATRIXCLASS &matrix, float flAddend ) + { + for( int i = 0; i < matrix.Height(); i++ ) + { + for( int j = 0; j < matrix.Width(); j++ ) + { + matrix.SetElement( i, j, flAddend + matrix.Element( i, j ) ); + } + } + } + + /// transpose + template + void TransposeMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) + { + pMatrixOut->SetDimensions( matrixIn.Width(), matrixIn.Height() ); + for( int i = 0; i < pMatrixOut->Height(); i++ ) + { + for( int j = 0; j < pMatrixOut->Width(); j++ ) + { + AppendElement( *pMatrixOut, i, j, matrixIn.Element( j, i ) ); + } + } + FinishedAppending( *pMatrixOut ); + } + + /// copy + template + void CopyMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) + { + pMatrixOut->SetDimensions( matrixIn.Height(), matrixIn.Width() ); + for( int i = 0; i < matrixIn.Height(); i++ ) + { + for( int j = 0; j < matrixIn.Width(); j++ ) + { + AppendElement( *pMatrixOut, i, j, matrixIn.Element( i, j ) ); + } + } + FinishedAppending( *pMatrixOut ); + } + + + + /// M+=M + template + void AddMatrixToMatrix( MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) + { + for( int i = 0; i < matrixIn.Height(); i++ ) + { + for( int j = 0; j < matrixIn.Width(); j++ ) + { + pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + matrixIn.Element( i, j ) ); + } + } + } + + // M += scale * M + template + void AddScaledMatrixToMatrix( float flScale, MATRIXCLASSIN const &matrixIn, MATRIXCLASSOUT *pMatrixOut ) + { + for( int i = 0; i < matrixIn.Height(); i++ ) + { + for( int j = 0; j < matrixIn.Width(); j++ ) + { + pMatrixOut->SetElement( i, j, pMatrixOut->Element( i, j ) + flScale * matrixIn.Element( i, j ) ); + } + } + } + + + // simple way to initialize a matrix with constants from code. + template + void SetMatrixToIdentity( MATRIXCLASSOUT *pMatrixOut, float flDiagonalValue = 1.0 ) + { + for( int i = 0; i < pMatrixOut->Height(); i++ ) + { + for( int j = 0; j < pMatrixOut->Width(); j++ ) + { + AppendElement( *pMatrixOut, i, j, ( i == j ) ? flDiagonalValue : 0 ); + } + } + FinishedAppending( *pMatrixOut ); + } + + //// simple way to initialize a matrix with constants from code + template + void SetMatrixValues( MATRIXCLASSOUT *pMatrix, int nRows, int nCols, ... ) + { + va_list argPtr; + va_start( argPtr, nCols ); + + pMatrix->SetDimensions( nRows, nCols ); + for( int nRow = 0; nRow < nRows; nRow++ ) + { + for( int nCol = 0; nCol < nCols; nCol++ ) + { + double flNewValue = va_arg( argPtr, double ); + pMatrix->SetElement( nRow, nCol, flNewValue ); + } + } + va_end( argPtr ); + } + + + /// row and colum accessors. treat a row or a column as a column vector + template class MatrixRowAccessor + { + public: + FORCEINLINE MatrixRowAccessor( MATRIXTYPE const &matrix, int nRow ) + { + m_pMatrix = &matrix; + m_nRow = nRow; + } + + FORCEINLINE float Element( int nRow, int nCol ) const + { + Assert( nCol == 0 ); + return m_pMatrix->Element( m_nRow, nRow ); + } + + FORCEINLINE int Width( void ) const { return 1; }; + FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); } + + private: + MATRIXTYPE const *m_pMatrix; + int m_nRow; + }; + + template class MatrixColumnAccessor + { + public: + FORCEINLINE MatrixColumnAccessor( MATRIXTYPE const &matrix, int nColumn ) + { + m_pMatrix = &matrix; + m_nColumn = nColumn; + } + + FORCEINLINE float Element( int nRow, int nColumn ) const + { + Assert( nColumn == 0 ); + return m_pMatrix->Element( nRow, m_nColumn ); + } + + FORCEINLINE int Width( void ) const { return 1; } + FORCEINLINE int Height( void ) const { return m_pMatrix->Height(); } + private: + MATRIXTYPE const *m_pMatrix; + int m_nColumn; + }; + + /// this translator acts as a proxy for the transposed matrix + template class MatrixTransposeAccessor + { + public: + FORCEINLINE MatrixTransposeAccessor( MATRIXTYPE const & matrix ) + { + m_pMatrix = &matrix; + } + + FORCEINLINE float Element( int nRow, int nColumn ) const + { + return m_pMatrix->Element( nColumn, nRow ); + } + + FORCEINLINE int Width( void ) const { return m_pMatrix->Height(); } + FORCEINLINE int Height( void ) const { return m_pMatrix->Width(); } + private: + MATRIXTYPE const *m_pMatrix; + }; + + /// this tranpose returns a wrapper around it's argument, allowing things like AddMatrixToMatrix( Transpose( matA ), &matB ) without an extra copy + template + MatrixTransposeAccessor TransposeMatrix( MATRIXCLASSIN const &matrixIn ) + { + return MatrixTransposeAccessor( matrixIn ); + } + + + /// retrieve rows and columns + template + FORCEINLINE MatrixColumnAccessor MatrixColumn( MATRIXTYPE const &matrix, int nColumn ) + { + return MatrixColumnAccessor( matrix, nColumn ); + } + + template + FORCEINLINE MatrixRowAccessor MatrixRow( MATRIXTYPE const &matrix, int nRow ) + { + return MatrixRowAccessor( matrix, nRow ); + } + + //// dot product between vectors (or rows and/or columns via accessors) + template + float InnerProduct( MATRIXACCESSORATYPE const &vecA, MATRIXACCESSORBTYPE const &vecB ) + { + Assert( vecA.Width() == 1 ); + Assert( vecB.Width() == 1 ); + Assert( vecA.Height() == vecB.Height() ); + double flResult = 0; + for( int i = 0; i < vecA.Height(); i++ ) + { + flResult += vecA.Element( i, 0 ) * vecB.Element( i, 0 ); + } + return flResult; + } + + + + /// matrix x matrix multiplication + template + void MatrixMultiply( MATRIXATYPE const &matA, MATRIXBTYPE const &matB, MATRIXOUTTYPE *pMatrixOut ) + { + Assert( matA.Width() == matB.Height() ); + pMatrixOut->SetDimensions( matA.Height(), matB.Width() ); + for( int i = 0; i < matA.Height(); i++ ) + { + for( int j = 0; j < matB.Width(); j++ ) + { + pMatrixOut->SetElement( i, j, InnerProduct( MatrixRow( matA, i ), MatrixColumn( matB, j ) ) ); + } + } + } + + /// solve Ax=B via the conjugate graident method. Code and naming conventions based on the + /// wikipedia article. + template + void ConjugateGradient( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 ) + { + XTYPE vecR; + vecR.SetDimensions( vecX.Height(), 1 ); + MatrixMultiply( matA, vecX, &vecR ); + ScaleMatrix( vecR, -1 ); + AddMatrixToMatrix( vecB, &vecR ); + XTYPE vecP; + CopyMatrix( vecR, &vecP ); + float flRsOld = InnerProduct( vecR, vecR ); + for( int nIter = 0; nIter < 100; nIter++ ) + { + XTYPE vecAp; + MatrixMultiply( matA, vecP, &vecAp ); + float flDivisor = InnerProduct( vecAp, vecP ); + float flAlpha = flRsOld / flDivisor; + AddScaledMatrixToMatrix( flAlpha, vecP, &vecX ); + AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR ); + float flRsNew = InnerProduct( vecR, vecR ); + if ( flRsNew < flTolerance ) + { + break; + } + ScaleMatrix( vecP, flRsNew / flRsOld ); + AddMatrixToMatrix( vecR, &vecP ); + flRsOld = flRsNew; + } + } + + /// solve (A'*A) x=B via the conjugate gradient method. Code and naming conventions based on + /// the wikipedia article. Same as Conjugate gradient but allows passing in two matrices whose + /// product is used as the A matrix (in order to preserve sparsity) + template + void ConjugateGradient( ATYPE const &matA, APRIMETYPE const &matAPrime, BTYPE const &vecB, XTYPE &vecX, float flTolerance = 1.0e-20 ) + { + XTYPE vecR1; + vecR1.SetDimensions( vecX.Height(), 1 ); + MatrixMultiply( matA, vecX, &vecR1 ); + XTYPE vecR; + vecR.SetDimensions( vecR1.Height(), 1 ); + MatrixMultiply( matAPrime, vecR1, &vecR ); + ScaleMatrix( vecR, -1 ); + AddMatrixToMatrix( vecB, &vecR ); + XTYPE vecP; + CopyMatrix( vecR, &vecP ); + float flRsOld = InnerProduct( vecR, vecR ); + for( int nIter = 0; nIter < 100; nIter++ ) + { + XTYPE vecAp1; + MatrixMultiply( matA, vecP, &vecAp1 ); + XTYPE vecAp; + MatrixMultiply( matAPrime, vecAp1, &vecAp ); + float flDivisor = InnerProduct( vecAp, vecP ); + float flAlpha = flRsOld / flDivisor; + AddScaledMatrixToMatrix( flAlpha, vecP, &vecX ); + AddScaledMatrixToMatrix( -flAlpha, vecAp, &vecR ); + float flRsNew = InnerProduct( vecR, vecR ); + if ( flRsNew < flTolerance ) + { + break; + } + ScaleMatrix( vecP, flRsNew / flRsOld ); + AddMatrixToMatrix( vecR, &vecP ); + flRsOld = flRsNew; + } + } + + + template + void LeastSquaresFit( ATYPE const &matA, BTYPE const &vecB, XTYPE &vecX ) + { + // now, generate the normal equations + BTYPE vecBeta; + MatrixMath::MatrixMultiply( MatrixMath::TransposeMatrix( matA ), vecB, &vecBeta ); + + vecX.SetDimensions( matA.Width(), 1 ); + MatrixMath::SetMatrixToIdentity( &vecX ); + + ATYPE matATransposed; + TransposeMatrix( matA, &matATransposed ); + ConjugateGradient( matA, matATransposed, vecBeta, vecX, 1.0e-20 ); + } + +}; + +/// a simple fixed-size matrix class +template class CFixedMatrix +{ +public: + FORCEINLINE int Width( void ) const { return NUMCOLS; } + FORCEINLINE int Height( void ) const { return NUMROWS; } + FORCEINLINE float Element( int nRow, int nCol ) const { return m_flValues[nRow][nCol]; } + FORCEINLINE void SetElement( int nRow, int nCol, float flValue ) { m_flValues[nRow][nCol] = flValue; } + FORCEINLINE void SetDimensions( int nNumRows, int nNumCols ) { Assert( ( nNumRows == NUMROWS ) && ( nNumCols == NUMCOLS ) ); } + +private: + float m_flValues[NUMROWS][NUMCOLS]; +}; + + + +#endif //matrixmath_h diff --git a/mp/src/public/mathlib/noise.h b/mp/src/public/mathlib/noise.h index 0aec2efe..19d3f729 100644 --- a/mp/src/public/mathlib/noise.h +++ b/mp/src/public/mathlib/noise.h @@ -1,35 +1,35 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -//=====================================================================================// - -#ifndef NOISE_H -#define NOISE_H - -#include -#include "basetypes.h" -#include "mathlib/vector.h" -#include "tier0/dbg.h" - - -// The following code is the c-ification of Ken Perlin's new noise algorithm -// "JAVA REFERENCE IMPLEMENTATION OF IMPROVED NOISE - COPYRIGHT 2002 KEN PERLIN" -// as available here: http://mrl.nyu.edu/~perlin/noise/ -// it generates a single octave of noise in the -1..1 range -// this should at some point probably replace SparseConvolutionNoise - jd -float ImprovedPerlinNoise( Vector const &pnt ); - -// get the noise value at a point. Output range is 0..1. -float SparseConvolutionNoise( Vector const &pnt ); - -// get the noise value at a point, passing a custom noise shaping function. The noise shaping -// function should map the domain 0..1 to 0..1. -float SparseConvolutionNoise(Vector const &pnt, float (*pNoiseShapeFunction)(float) ); - -// returns a 1/f noise. more octaves take longer -float FractalNoise( Vector const &pnt, int n_octaves ); - -// returns a abs(f)*1/f noise i.e. turbulence -float Turbulence( Vector const &pnt, int n_octaves ); -#endif // NOISE_H +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=====================================================================================// + +#ifndef NOISE_H +#define NOISE_H + +#include +#include "basetypes.h" +#include "mathlib/vector.h" +#include "tier0/dbg.h" + + +// The following code is the c-ification of Ken Perlin's new noise algorithm +// "JAVA REFERENCE IMPLEMENTATION OF IMPROVED NOISE - COPYRIGHT 2002 KEN PERLIN" +// as available here: http://mrl.nyu.edu/~perlin/noise/ +// it generates a single octave of noise in the -1..1 range +// this should at some point probably replace SparseConvolutionNoise - jd +float ImprovedPerlinNoise( Vector const &pnt ); + +// get the noise value at a point. Output range is 0..1. +float SparseConvolutionNoise( Vector const &pnt ); + +// get the noise value at a point, passing a custom noise shaping function. The noise shaping +// function should map the domain 0..1 to 0..1. +float SparseConvolutionNoise(Vector const &pnt, float (*pNoiseShapeFunction)(float) ); + +// returns a 1/f noise. more octaves take longer +float FractalNoise( Vector const &pnt, int n_octaves ); + +// returns a abs(f)*1/f noise i.e. turbulence +float Turbulence( Vector const &pnt, int n_octaves ); +#endif // NOISE_H diff --git a/mp/src/public/mathlib/polyhedron.h b/mp/src/public/mathlib/polyhedron.h index 6c51d432..38b465c7 100644 --- a/mp/src/public/mathlib/polyhedron.h +++ b/mp/src/public/mathlib/polyhedron.h @@ -1,73 +1,73 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -// -//=============================================================================// - -#ifndef POLYHEDRON_H_ -#define POLYHEDRON_H_ - -#ifdef _WIN32 -#pragma once -#endif - -#include "mathlib/mathlib.h" - - - -struct Polyhedron_IndexedLine_t -{ - unsigned short iPointIndices[2]; -}; - -struct Polyhedron_IndexedLineReference_t -{ - unsigned short iLineIndex; - unsigned char iEndPointIndex; //since two polygons reference any one line, one needs to traverse the line backwards, this flags that behavior -}; - -struct Polyhedron_IndexedPolygon_t -{ - unsigned short iFirstIndex; - unsigned short iIndexCount; - Vector polyNormal; -}; - -class CPolyhedron //made into a class because it's going virtual to support distinctions between temp and permanent versions -{ -public: - Vector *pVertices; - Polyhedron_IndexedLine_t *pLines; - Polyhedron_IndexedLineReference_t *pIndices; - Polyhedron_IndexedPolygon_t *pPolygons; - - unsigned short iVertexCount; - unsigned short iLineCount; - unsigned short iIndexCount; - unsigned short iPolygonCount; - - virtual ~CPolyhedron( void ) {}; - virtual void Release( void ) = 0; - Vector Center( void ); -}; - -class CPolyhedron_AllocByNew : public CPolyhedron -{ -public: - virtual void Release( void ); - static CPolyhedron_AllocByNew *Allocate( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //creates the polyhedron along with enough memory to hold all it's data in a single allocation - -private: - CPolyhedron_AllocByNew( void ) { }; //CPolyhedron_AllocByNew::Allocate() is the only way to create one of these. -}; - -CPolyhedron *GeneratePolyhedronFromPlanes( const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //be sure to polyhedron->Release() -CPolyhedron *ClipPolyhedron( const CPolyhedron *pExistingPolyhedron, const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //this does NOT modify/delete the existing polyhedron - -CPolyhedron *GetTempPolyhedron( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //grab the temporary polyhedron. Avoids new/delete for quick work. Can only be in use by one chunk of code at a time - - -#endif //#ifndef POLYHEDRON_H_ - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef POLYHEDRON_H_ +#define POLYHEDRON_H_ + +#ifdef _WIN32 +#pragma once +#endif + +#include "mathlib/mathlib.h" + + + +struct Polyhedron_IndexedLine_t +{ + unsigned short iPointIndices[2]; +}; + +struct Polyhedron_IndexedLineReference_t +{ + unsigned short iLineIndex; + unsigned char iEndPointIndex; //since two polygons reference any one line, one needs to traverse the line backwards, this flags that behavior +}; + +struct Polyhedron_IndexedPolygon_t +{ + unsigned short iFirstIndex; + unsigned short iIndexCount; + Vector polyNormal; +}; + +class CPolyhedron //made into a class because it's going virtual to support distinctions between temp and permanent versions +{ +public: + Vector *pVertices; + Polyhedron_IndexedLine_t *pLines; + Polyhedron_IndexedLineReference_t *pIndices; + Polyhedron_IndexedPolygon_t *pPolygons; + + unsigned short iVertexCount; + unsigned short iLineCount; + unsigned short iIndexCount; + unsigned short iPolygonCount; + + virtual ~CPolyhedron( void ) {}; + virtual void Release( void ) = 0; + Vector Center( void ); +}; + +class CPolyhedron_AllocByNew : public CPolyhedron +{ +public: + virtual void Release( void ); + static CPolyhedron_AllocByNew *Allocate( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //creates the polyhedron along with enough memory to hold all it's data in a single allocation + +private: + CPolyhedron_AllocByNew( void ) { }; //CPolyhedron_AllocByNew::Allocate() is the only way to create one of these. +}; + +CPolyhedron *GeneratePolyhedronFromPlanes( const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //be sure to polyhedron->Release() +CPolyhedron *ClipPolyhedron( const CPolyhedron *pExistingPolyhedron, const float *pOutwardFacingPlanes, int iPlaneCount, float fOnPlaneEpsilon, bool bUseTemporaryMemory = false ); //this does NOT modify/delete the existing polyhedron + +CPolyhedron *GetTempPolyhedron( unsigned short iVertices, unsigned short iLines, unsigned short iIndices, unsigned short iPolygons ); //grab the temporary polyhedron. Avoids new/delete for quick work. Can only be in use by one chunk of code at a time + + +#endif //#ifndef POLYHEDRON_H_ + diff --git a/mp/src/public/mathlib/quantize.h b/mp/src/public/mathlib/quantize.h index c43b1530..5e5b7423 100644 --- a/mp/src/public/mathlib/quantize.h +++ b/mp/src/public/mathlib/quantize.h @@ -1,141 +1,141 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -// -//=============================================================================// -#ifndef QUANTIZE_H -#define QUANTIZE_H - -#ifndef STRING_H -#include -#endif - -#define MAXDIMS 768 -#define MAXQUANT 16000 - - -#include - -struct Sample; - -struct QuantizedValue { - double MinError; // minimum possible error. used - // for neighbor searches. - struct QuantizedValue *Children[2]; // splits - int32 value; // only exists for leaf nodes - struct Sample *Samples; // every sample quantized into this - // entry - int32 NSamples; // how many were quantized to this. - int32 TotSamples; - double *ErrorMeasure; // variance measure for each dimension - double TotalError; // sum of errors - uint8 *Mean; // average value of each dimension - uint8 *Mins; // min box for children and this - uint8 *Maxs; // max box for children and this - int NQuant; // the number of samples which were - // quantzied to this node since the - // last time OptimizeQuantizer() - // was called. - int *Sums; // sum used by OptimizeQuantizer - int sortdim; // dimension currently sorted along. -}; - -struct Sample { - int32 ID; // identifier of this sample. can - // be used for any purpose. - int32 Count; // number of samples this sample - // represents - int32 QNum; // what value this sample ended up quantized - // to. - struct QuantizedValue *qptr; // ptr to what this was quantized to. - uint8 Value[1]; // array of values for multi-dimensional - // variables. -}; - -void FreeQuantization(struct QuantizedValue *t); - -struct QuantizedValue *Quantize(struct Sample *s, int nsamples, int ndims, - int nvalues, uint8 *weights, int value0=0); - -int CompressSamples(struct Sample *s, int nsamples, int ndims); - -struct QuantizedValue *FindMatch(uint8 const *sample, - int ndims,uint8 *weights, - struct QuantizedValue *QTable); -void PrintSamples(struct Sample const *s, int nsamples, int ndims); - -struct QuantizedValue *FindQNode(struct QuantizedValue const *q, int32 code); - -inline struct Sample *NthSample(struct Sample *s, int i, int nd) -{ - uint8 *r=(uint8 *) s; - r+=i*(sizeof(*s)+(nd-1)); - return (struct Sample *) r; -} - -inline struct Sample *AllocSamples(int ns, int nd) -{ - size_t size5=(sizeof(struct Sample)+(nd-1))*ns; - void *ret=new uint8[size5]; - memset(ret,0,size5); - for(int i=0;iCount=1; - return (struct Sample *) ret; -} - - -// MinimumError: what is the min error which will occur if quantizing -// a sample to the given qnode? This is just the error if the qnode -// is a leaf. -double MinimumError(struct QuantizedValue const *q, uint8 const *sample, - int ndims, uint8 const *weights); -double MaximumError(struct QuantizedValue const *q, uint8 const *sample, - int ndims, uint8 const *weights); - -void PrintQTree(struct QuantizedValue const *p,int idlevel=0); -void OptimizeQuantizer(struct QuantizedValue *q, int ndims); - -// RecalculateVelues: update the means in a sample tree, based upon -// the samples. can be used to reoptimize when samples are deleted, -// for instance. - -void RecalculateValues(struct QuantizedValue *q, int ndims); - -extern double SquaredError; // may be reset and examined. updated by - // FindMatch() - - - - -// the routines below can be used for uniform quantization via dart-throwing. -typedef void (*GENERATOR)(void *); // generate a random sample -typedef double (*COMPARER)(void const *a, void const *b); - -void *DartThrow(int NResults, int NTries, size_t itemsize, GENERATOR gen, - COMPARER cmp); -void *FindClosestDart(void *items,int NResults, size_t itemsize, - COMPARER cmp, void *lookfor, int *idx); - - - - -// color quantization of 24 bit images -#define QUANTFLAGS_NODITHER 1 // don't do Floyd-steinberg dither - -extern void ColorQuantize( -uint8 const *pImage, // 4 byte pixels ARGB -int nWidth, -int nHeight, -int nFlags, // QUANTFLAGS_xxx -int nColors, // # of colors to fill in in palette -uint8 *pOutPixels, // where to store resulting 8 bit pixels -uint8 *pOutPalette, // where to store resulting 768-byte palette -int nFirstColor); // first color to use in mapping - - - - - -#endif +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +#ifndef QUANTIZE_H +#define QUANTIZE_H + +#ifndef STRING_H +#include +#endif + +#define MAXDIMS 768 +#define MAXQUANT 16000 + + +#include + +struct Sample; + +struct QuantizedValue { + double MinError; // minimum possible error. used + // for neighbor searches. + struct QuantizedValue *Children[2]; // splits + int32 value; // only exists for leaf nodes + struct Sample *Samples; // every sample quantized into this + // entry + int32 NSamples; // how many were quantized to this. + int32 TotSamples; + double *ErrorMeasure; // variance measure for each dimension + double TotalError; // sum of errors + uint8 *Mean; // average value of each dimension + uint8 *Mins; // min box for children and this + uint8 *Maxs; // max box for children and this + int NQuant; // the number of samples which were + // quantzied to this node since the + // last time OptimizeQuantizer() + // was called. + int *Sums; // sum used by OptimizeQuantizer + int sortdim; // dimension currently sorted along. +}; + +struct Sample { + int32 ID; // identifier of this sample. can + // be used for any purpose. + int32 Count; // number of samples this sample + // represents + int32 QNum; // what value this sample ended up quantized + // to. + struct QuantizedValue *qptr; // ptr to what this was quantized to. + uint8 Value[1]; // array of values for multi-dimensional + // variables. +}; + +void FreeQuantization(struct QuantizedValue *t); + +struct QuantizedValue *Quantize(struct Sample *s, int nsamples, int ndims, + int nvalues, uint8 *weights, int value0=0); + +int CompressSamples(struct Sample *s, int nsamples, int ndims); + +struct QuantizedValue *FindMatch(uint8 const *sample, + int ndims,uint8 *weights, + struct QuantizedValue *QTable); +void PrintSamples(struct Sample const *s, int nsamples, int ndims); + +struct QuantizedValue *FindQNode(struct QuantizedValue const *q, int32 code); + +inline struct Sample *NthSample(struct Sample *s, int i, int nd) +{ + uint8 *r=(uint8 *) s; + r+=i*(sizeof(*s)+(nd-1)); + return (struct Sample *) r; +} + +inline struct Sample *AllocSamples(int ns, int nd) +{ + size_t size5=(sizeof(struct Sample)+(nd-1))*ns; + void *ret=new uint8[size5]; + memset(ret,0,size5); + for(int i=0;iCount=1; + return (struct Sample *) ret; +} + + +// MinimumError: what is the min error which will occur if quantizing +// a sample to the given qnode? This is just the error if the qnode +// is a leaf. +double MinimumError(struct QuantizedValue const *q, uint8 const *sample, + int ndims, uint8 const *weights); +double MaximumError(struct QuantizedValue const *q, uint8 const *sample, + int ndims, uint8 const *weights); + +void PrintQTree(struct QuantizedValue const *p,int idlevel=0); +void OptimizeQuantizer(struct QuantizedValue *q, int ndims); + +// RecalculateVelues: update the means in a sample tree, based upon +// the samples. can be used to reoptimize when samples are deleted, +// for instance. + +void RecalculateValues(struct QuantizedValue *q, int ndims); + +extern double SquaredError; // may be reset and examined. updated by + // FindMatch() + + + + +// the routines below can be used for uniform quantization via dart-throwing. +typedef void (*GENERATOR)(void *); // generate a random sample +typedef double (*COMPARER)(void const *a, void const *b); + +void *DartThrow(int NResults, int NTries, size_t itemsize, GENERATOR gen, + COMPARER cmp); +void *FindClosestDart(void *items,int NResults, size_t itemsize, + COMPARER cmp, void *lookfor, int *idx); + + + + +// color quantization of 24 bit images +#define QUANTFLAGS_NODITHER 1 // don't do Floyd-steinberg dither + +extern void ColorQuantize( +uint8 const *pImage, // 4 byte pixels ARGB +int nWidth, +int nHeight, +int nFlags, // QUANTFLAGS_xxx +int nColors, // # of colors to fill in in palette +uint8 *pOutPixels, // where to store resulting 8 bit pixels +uint8 *pOutPalette, // where to store resulting 768-byte palette +int nFirstColor); // first color to use in mapping + + + + + +#endif diff --git a/mp/src/public/mathlib/simdvectormatrix.h b/mp/src/public/mathlib/simdvectormatrix.h index ba830787..f88cd328 100644 --- a/mp/src/public/mathlib/simdvectormatrix.h +++ b/mp/src/public/mathlib/simdvectormatrix.h @@ -1,142 +1,142 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors, -// for high speed processing in tools. -// -// $NoKeywords: $ -// -//=============================================================================// - -#ifndef SIMDVECTORMATRIX_H -#define SIMDVECTORMATRIX_H - -#ifdef _WIN32 -#pragma once -#endif - - -#include -#include "tier0/platform.h" -#include "tier0/dbg.h" -#include "tier1/utlsoacontainer.h" -#include "mathlib/ssemath.h" - -class CSIMDVectorMatrix -{ -public: - int m_nWidth; // in actual vectors - int m_nHeight; - - int m_nPaddedWidth; // # of 4x wide elements - - FourVectors *m_pData; - -protected: - void Init( void ) - { - m_pData = NULL; - m_nWidth = 0; - m_nHeight = 0; - m_nPaddedWidth = 0; - } - - int NVectors( void ) const - { - return m_nHeight * m_nPaddedWidth; - } - -public: - // constructors and destructors - CSIMDVectorMatrix( void ) - { - Init(); - } - - ~CSIMDVectorMatrix( void ) - { - if ( m_pData ) - delete[] m_pData; - } - - // set up storage and fields for m x n matrix. destroys old data - void SetSize( int width, int height ) - { - if ( ( ! m_pData ) || ( width != m_nWidth ) || ( height != m_nHeight ) ) - { - if ( m_pData ) - delete[] m_pData; - - m_nWidth = width; - m_nHeight = height; - - m_nPaddedWidth = ( m_nWidth + 3) >> 2; - m_pData = NULL; - if ( width && height ) - m_pData = new FourVectors[ m_nPaddedWidth * m_nHeight ]; - } - } - - CSIMDVectorMatrix( int width, int height ) - { - Init(); - SetSize( width, height ); - } - - CSIMDVectorMatrix &operator=( CSIMDVectorMatrix const &src ) - { - SetSize( src.m_nWidth, src.m_nHeight ); - if ( m_pData ) - memcpy( m_pData, src.m_pData, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) ); - return *this; - } - - CSIMDVectorMatrix &operator+=( CSIMDVectorMatrix const &src ); - - CSIMDVectorMatrix &operator*=( Vector const &src ); - - // create from an RGBA float bitmap. alpha ignored. - void CreateFromRGBA_FloatImageData(int srcwidth, int srcheight, float const *srcdata ); - - // create from 3 fields in a csoa - void CreateFromCSOAAttributes( CSOAContainer const *pSrc, - int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 ); - - // Element access. If you are calling this a lot, you don't want to use this class, because - // you're not getting the sse advantage - Vector Element(int x, int y) const - { - Assert( m_pData ); - Assert( x < m_nWidth ); - Assert( y < m_nHeight ); - Vector ret; - FourVectors const *pData=m_pData+y*m_nPaddedWidth+(x >> 2); - - int xo=(x & 3); - ret.x=pData->X( xo ); - ret.y=pData->Y( xo ); - ret.z=pData->Z( xo ); - return ret; - } - - //addressing the individual fourvectors elements - FourVectors &CompoundElement(int x, int y) - { - Assert( m_pData ); - Assert( y < m_nHeight ); - Assert( x < m_nPaddedWidth ); - return m_pData[x + m_nPaddedWidth*y ]; - } - - // math operations on the whole image - void Clear( void ) - { - Assert( m_pData ); - memset( m_pData, 0, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) ); - } - - void RaiseToPower( float power ); -}; - - - -#endif +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: Provide a class (SSE/SIMD only) holding a 2d matrix of class FourVectors, +// for high speed processing in tools. +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef SIMDVECTORMATRIX_H +#define SIMDVECTORMATRIX_H + +#ifdef _WIN32 +#pragma once +#endif + + +#include +#include "tier0/platform.h" +#include "tier0/dbg.h" +#include "tier1/utlsoacontainer.h" +#include "mathlib/ssemath.h" + +class CSIMDVectorMatrix +{ +public: + int m_nWidth; // in actual vectors + int m_nHeight; + + int m_nPaddedWidth; // # of 4x wide elements + + FourVectors *m_pData; + +protected: + void Init( void ) + { + m_pData = NULL; + m_nWidth = 0; + m_nHeight = 0; + m_nPaddedWidth = 0; + } + + int NVectors( void ) const + { + return m_nHeight * m_nPaddedWidth; + } + +public: + // constructors and destructors + CSIMDVectorMatrix( void ) + { + Init(); + } + + ~CSIMDVectorMatrix( void ) + { + if ( m_pData ) + delete[] m_pData; + } + + // set up storage and fields for m x n matrix. destroys old data + void SetSize( int width, int height ) + { + if ( ( ! m_pData ) || ( width != m_nWidth ) || ( height != m_nHeight ) ) + { + if ( m_pData ) + delete[] m_pData; + + m_nWidth = width; + m_nHeight = height; + + m_nPaddedWidth = ( m_nWidth + 3) >> 2; + m_pData = NULL; + if ( width && height ) + m_pData = new FourVectors[ m_nPaddedWidth * m_nHeight ]; + } + } + + CSIMDVectorMatrix( int width, int height ) + { + Init(); + SetSize( width, height ); + } + + CSIMDVectorMatrix &operator=( CSIMDVectorMatrix const &src ) + { + SetSize( src.m_nWidth, src.m_nHeight ); + if ( m_pData ) + memcpy( m_pData, src.m_pData, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) ); + return *this; + } + + CSIMDVectorMatrix &operator+=( CSIMDVectorMatrix const &src ); + + CSIMDVectorMatrix &operator*=( Vector const &src ); + + // create from an RGBA float bitmap. alpha ignored. + void CreateFromRGBA_FloatImageData(int srcwidth, int srcheight, float const *srcdata ); + + // create from 3 fields in a csoa + void CreateFromCSOAAttributes( CSOAContainer const *pSrc, + int nAttrIdx0, int nAttrIdx1, int nAttrIdx2 ); + + // Element access. If you are calling this a lot, you don't want to use this class, because + // you're not getting the sse advantage + Vector Element(int x, int y) const + { + Assert( m_pData ); + Assert( x < m_nWidth ); + Assert( y < m_nHeight ); + Vector ret; + FourVectors const *pData=m_pData+y*m_nPaddedWidth+(x >> 2); + + int xo=(x & 3); + ret.x=pData->X( xo ); + ret.y=pData->Y( xo ); + ret.z=pData->Z( xo ); + return ret; + } + + //addressing the individual fourvectors elements + FourVectors &CompoundElement(int x, int y) + { + Assert( m_pData ); + Assert( y < m_nHeight ); + Assert( x < m_nPaddedWidth ); + return m_pData[x + m_nPaddedWidth*y ]; + } + + // math operations on the whole image + void Clear( void ) + { + Assert( m_pData ); + memset( m_pData, 0, m_nHeight*m_nPaddedWidth*sizeof(m_pData[0]) ); + } + + void RaiseToPower( float power ); +}; + + + +#endif diff --git a/mp/src/public/mathlib/spherical_geometry.h b/mp/src/public/mathlib/spherical_geometry.h index a32d96ac..04310f43 100644 --- a/mp/src/public/mathlib/spherical_geometry.h +++ b/mp/src/public/mathlib/spherical_geometry.h @@ -1,73 +1,73 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: Functions for spherical geometry. -// -// $NoKeywords: $ -// -//=============================================================================// - -#ifndef SPHERICAL_GEOMETRY_H -#define SPHERICAL_GEOMETRY_H - -#ifdef _WIN32 -#pragma once -#endif - -#include -#include - -// see http://mathworld.wolfram.com/SphericalTrigonometry.html - -// return the spherical distance, in radians, between 2 points on the unit sphere. -FORCEINLINE float UnitSphereLineSegmentLength( Vector const &a, Vector const &b ) -{ - // check unit length - Assert( fabs( VectorLength( a ) - 1.0 ) < 1.0e-3 ); - Assert( fabs( VectorLength( b ) - 1.0 ) < 1.0e-3 ); - return acos( DotProduct( a, b ) ); -} - - -// given 3 points on the unit sphere, return the spherical area (in radians) of the triangle they form. -// valid for "small" triangles. -FORCEINLINE float UnitSphereTriangleArea( Vector const &a, Vector const &b , Vector const &c ) -{ - float flLengthA = UnitSphereLineSegmentLength( b, c ); - float flLengthB = UnitSphereLineSegmentLength( c, a ); - float flLengthC = UnitSphereLineSegmentLength( a, b ); - - if ( ( flLengthA == 0. ) || ( flLengthB == 0. ) || ( flLengthC == 0. ) ) - return 0.; // zero area triangle - - // now, find the 3 incribed angles for the triangle - float flHalfSumLens = 0.5 * ( flLengthA + flLengthB + flLengthC ); - float flSinSums = sin( flHalfSumLens ); - float flSinSMinusA= sin( flHalfSumLens - flLengthA ); - float flSinSMinusB= sin( flHalfSumLens - flLengthB ); - float flSinSMinusC= sin( flHalfSumLens - flLengthC ); - - float flTanAOver2 = sqrt ( ( flSinSMinusB * flSinSMinusC ) / ( flSinSums * flSinSMinusA ) ); - float flTanBOver2 = sqrt ( ( flSinSMinusA * flSinSMinusC ) / ( flSinSums * flSinSMinusB ) ); - float flTanCOver2 = sqrt ( ( flSinSMinusA * flSinSMinusB ) / ( flSinSums * flSinSMinusC ) ); - - // Girards formula : area = sum of angles - pi. - return 2.0 * ( atan( flTanAOver2 ) + atan( flTanBOver2 ) + atan( flTanCOver2 ) ) - M_PI; -} - -// spherical harmonics-related functions. Best explanation at http://www.research.scea.com/gdc2003/spherical-harmonic-lighting.pdf - -// Evaluate associated legendre polynomial P( l, m ) at flX, using recurrence relation -float AssociatedLegendrePolynomial( int nL, int nM, float flX ); - -// Evaluate order N spherical harmonic with spherical coordinates -// nL = band, 0..N -// nM = -nL .. nL -// theta = 0..M_PI -// phi = 0.. 2 * M_PHI -float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi ); - -// evaluate spherical harmonic with normalized vector direction -float SphericalHarmonic( int nL, int nM, Vector const &vecDirection ); - - -#endif // SPHERICAL_GEOMETRY_H +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: Functions for spherical geometry. +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef SPHERICAL_GEOMETRY_H +#define SPHERICAL_GEOMETRY_H + +#ifdef _WIN32 +#pragma once +#endif + +#include +#include + +// see http://mathworld.wolfram.com/SphericalTrigonometry.html + +// return the spherical distance, in radians, between 2 points on the unit sphere. +FORCEINLINE float UnitSphereLineSegmentLength( Vector const &a, Vector const &b ) +{ + // check unit length + Assert( fabs( VectorLength( a ) - 1.0 ) < 1.0e-3 ); + Assert( fabs( VectorLength( b ) - 1.0 ) < 1.0e-3 ); + return acos( DotProduct( a, b ) ); +} + + +// given 3 points on the unit sphere, return the spherical area (in radians) of the triangle they form. +// valid for "small" triangles. +FORCEINLINE float UnitSphereTriangleArea( Vector const &a, Vector const &b , Vector const &c ) +{ + float flLengthA = UnitSphereLineSegmentLength( b, c ); + float flLengthB = UnitSphereLineSegmentLength( c, a ); + float flLengthC = UnitSphereLineSegmentLength( a, b ); + + if ( ( flLengthA == 0. ) || ( flLengthB == 0. ) || ( flLengthC == 0. ) ) + return 0.; // zero area triangle + + // now, find the 3 incribed angles for the triangle + float flHalfSumLens = 0.5 * ( flLengthA + flLengthB + flLengthC ); + float flSinSums = sin( flHalfSumLens ); + float flSinSMinusA= sin( flHalfSumLens - flLengthA ); + float flSinSMinusB= sin( flHalfSumLens - flLengthB ); + float flSinSMinusC= sin( flHalfSumLens - flLengthC ); + + float flTanAOver2 = sqrt ( ( flSinSMinusB * flSinSMinusC ) / ( flSinSums * flSinSMinusA ) ); + float flTanBOver2 = sqrt ( ( flSinSMinusA * flSinSMinusC ) / ( flSinSums * flSinSMinusB ) ); + float flTanCOver2 = sqrt ( ( flSinSMinusA * flSinSMinusB ) / ( flSinSums * flSinSMinusC ) ); + + // Girards formula : area = sum of angles - pi. + return 2.0 * ( atan( flTanAOver2 ) + atan( flTanBOver2 ) + atan( flTanCOver2 ) ) - M_PI; +} + +// spherical harmonics-related functions. Best explanation at http://www.research.scea.com/gdc2003/spherical-harmonic-lighting.pdf + +// Evaluate associated legendre polynomial P( l, m ) at flX, using recurrence relation +float AssociatedLegendrePolynomial( int nL, int nM, float flX ); + +// Evaluate order N spherical harmonic with spherical coordinates +// nL = band, 0..N +// nM = -nL .. nL +// theta = 0..M_PI +// phi = 0.. 2 * M_PHI +float SphericalHarmonic( int nL, int nM, float flTheta, float flPhi ); + +// evaluate spherical harmonic with normalized vector direction +float SphericalHarmonic( int nL, int nM, Vector const &vecDirection ); + + +#endif // SPHERICAL_GEOMETRY_H diff --git a/mp/src/public/mathlib/ssemath.h b/mp/src/public/mathlib/ssemath.h index b25fbd09..6691df12 100644 --- a/mp/src/public/mathlib/ssemath.h +++ b/mp/src/public/mathlib/ssemath.h @@ -1,3098 +1,3098 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: - defines SIMD "structure of arrays" classes and functions. -// -//===========================================================================// -#ifndef SSEMATH_H -#define SSEMATH_H - -#if defined( _X360 ) -#include -#else -#include -#endif - -#include -#include - -#if defined(GNUC) -#define USE_STDC_FOR_SIMD 0 -#else -#define USE_STDC_FOR_SIMD 0 -#endif - -#if (!defined(_X360) && (USE_STDC_FOR_SIMD == 0)) -#define _SSE1 1 -#endif - -// I thought about defining a class/union for the SIMD packed floats instead of using fltx4, -// but decided against it because (a) the nature of SIMD code which includes comparisons is to blur -// the relationship between packed floats and packed integer types and (b) not sure that the -// compiler would handle generating good code for the intrinsics. - -#if USE_STDC_FOR_SIMD - -typedef union -{ - float m128_f32[4]; - uint32 m128_u32[4]; -} fltx4; - -typedef fltx4 i32x4; -typedef fltx4 u32x4; - -#elif ( defined( _X360 ) ) - -typedef union -{ - // This union allows float/int access (which generally shouldn't be done in inner loops) - __vector4 vmx; - float m128_f32[4]; - uint32 m128_u32[4]; -} fltx4_union; - -typedef __vector4 fltx4; -typedef __vector4 i32x4; // a VMX register; just a way of making it explicit that we're doing integer ops. -typedef __vector4 u32x4; // a VMX register; just a way of making it explicit that we're doing unsigned integer ops. - -#else - -typedef __m128 fltx4; -typedef __m128 i32x4; -typedef __m128 u32x4; - -#endif - -// The FLTX4 type is a fltx4 used as a parameter to a function. -// On the 360, the best way to do this is pass-by-copy on the registers. -// On the PC, the best way is to pass by const reference. -// The compiler will sometimes, but not always, replace a pass-by-const-ref -// with a pass-in-reg on the 360; to avoid this confusion, you can -// explicitly use a FLTX4 as the parameter type. -#ifdef _X360 -typedef __vector4 FLTX4; -#else -typedef const fltx4 & FLTX4; -#endif - -// A 16-byte aligned int32 datastructure -// (for use when writing out fltx4's as SIGNED -// ints). -struct ALIGN16 intx4 -{ - int32 m_i32[4]; - - inline int & operator[](int which) - { - return m_i32[which]; - } - - inline const int & operator[](int which) const - { - return m_i32[which]; - } - - inline int32 *Base() { - return m_i32; - } - - inline const int32 *Base() const - { - return m_i32; - } - - inline const bool operator==(const intx4 &other) const - { - return m_i32[0] == other.m_i32[0] && - m_i32[1] == other.m_i32[1] && - m_i32[2] == other.m_i32[2] && - m_i32[3] == other.m_i32[3] ; - } -} ALIGN16_POST; - - -#if defined( _DEBUG ) && defined( _X360 ) -FORCEINLINE void TestVPUFlags() -{ - // Check that the VPU is in the appropriate (Java-compliant) mode (see 3.2.1 in altivec_pem.pdf on xds.xbox.com) - __vector4 a; - __asm - { - mfvscr a; - } - unsigned int * flags = (unsigned int *)&a; - unsigned int controlWord = flags[3]; - Assert(controlWord == 0); -} -#else // _DEBUG -FORCEINLINE void TestVPUFlags() {} -#endif // _DEBUG - - -// useful constants in SIMD packed float format: -// (note: some of these aren't stored on the 360, -// but are manufactured directly in one or two -// instructions, saving a load and possible L2 -// miss.) -#ifndef _X360 -extern const fltx4 Four_Zeros; // 0 0 0 0 -extern const fltx4 Four_Ones; // 1 1 1 1 -extern const fltx4 Four_Twos; // 2 2 2 2 -extern const fltx4 Four_Threes; // 3 3 3 3 -extern const fltx4 Four_Fours; // guess. -extern const fltx4 Four_Point225s; // .225 .225 .225 .225 -extern const fltx4 Four_PointFives; // .5 .5 .5 .5 -extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON -extern const fltx4 Four_2ToThe21s; // (1<<21).. -extern const fltx4 Four_2ToThe22s; // (1<<22).. -extern const fltx4 Four_2ToThe23s; // (1<<23).. -extern const fltx4 Four_2ToThe24s; // (1<<24).. -extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2) -extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1 -#else -#define Four_Zeros XMVectorZero() // 0 0 0 0 -#define Four_Ones XMVectorSplatOne() // 1 1 1 1 -extern const fltx4 Four_Twos; // 2 2 2 2 -extern const fltx4 Four_Threes; // 3 3 3 3 -extern const fltx4 Four_Fours; // guess. -extern const fltx4 Four_Point225s; // .225 .225 .225 .225 -extern const fltx4 Four_PointFives; // .5 .5 .5 .5 -extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON -extern const fltx4 Four_2ToThe21s; // (1<<21).. -extern const fltx4 Four_2ToThe22s; // (1<<22).. -extern const fltx4 Four_2ToThe23s; // (1<<23).. -extern const fltx4 Four_2ToThe24s; // (1<<24).. -extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2) -extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1 -#endif -extern const fltx4 Four_FLT_MAX; // FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX -extern const fltx4 Four_Negative_FLT_MAX; // -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX -extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float - -// external aligned integer constants -extern const ALIGN16 int32 g_SIMD_clear_signmask[] ALIGN16_POST; // 0x7fffffff x 4 -extern const ALIGN16 int32 g_SIMD_signmask[] ALIGN16_POST; // 0x80000000 x 4 -extern const ALIGN16 int32 g_SIMD_lsbmask[] ALIGN16_POST; // 0xfffffffe x 4 -extern const ALIGN16 int32 g_SIMD_clear_wmask[] ALIGN16_POST; // -1 -1 -1 0 -extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4] ALIGN16_POST; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF] -extern const ALIGN16 int32 g_SIMD_AllOnesMask[] ALIGN16_POST; // ~0,~0,~0,~0 -extern const ALIGN16 int32 g_SIMD_Low16BitsMask[] ALIGN16_POST; // 0xffff x 4 - -// this mask is used for skipping the tail of things. If you have N elements in an array, and wish -// to mask out the tail, g_SIMD_SkipTailMask[N & 3] what you want to use for the last iteration. -extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST; - -// Define prefetch macros. -// The characteristics of cache and prefetch are completely -// different between the different platforms, so you DO NOT -// want to just define one macro that maps to every platform -// intrinsic under the hood -- you need to prefetch at different -// intervals between x86 and PPC, for example, and that is -// a higher level code change. -// On the other hand, I'm tired of typing #ifdef _X360 -// all over the place, so this is just a nop on Intel, PS3. -#ifdef _X360 -#define PREFETCH360(address, offset) __dcbt(offset,address) -#else -#define PREFETCH360(x,y) // nothing -#endif - -#if USE_STDC_FOR_SIMD - -//--------------------------------------------------------------------- -// Standard C (fallback/Linux) implementation (only there for compat - slow) -//--------------------------------------------------------------------- - -FORCEINLINE float SubFloat( const fltx4 & a, int idx ) -{ - return a.m128_f32[ idx ]; -} - -FORCEINLINE float & SubFloat( fltx4 & a, int idx ) -{ - return a.m128_f32[idx]; -} - -FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) -{ - return a.m128_u32[idx]; -} - -FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) -{ - return a.m128_u32[idx]; -} - -// Return one in the fastest way -- on the x360, faster even than loading. -FORCEINLINE fltx4 LoadZeroSIMD( void ) -{ - return Four_Zeros; -} - -// Return one in the fastest way -- on the x360, faster even than loading. -FORCEINLINE fltx4 LoadOneSIMD( void ) -{ - return Four_Ones; -} - -FORCEINLINE fltx4 SplatXSIMD( const fltx4 & a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = SubFloat( a, 0 ); - SubFloat( retVal, 1 ) = SubFloat( a, 0 ); - SubFloat( retVal, 2 ) = SubFloat( a, 0 ); - SubFloat( retVal, 3 ) = SubFloat( a, 0 ); - return retVal; -} - -FORCEINLINE fltx4 SplatYSIMD( fltx4 a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = SubFloat( a, 1 ); - SubFloat( retVal, 1 ) = SubFloat( a, 1 ); - SubFloat( retVal, 2 ) = SubFloat( a, 1 ); - SubFloat( retVal, 3 ) = SubFloat( a, 1 ); - return retVal; -} - -FORCEINLINE fltx4 SplatZSIMD( fltx4 a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = SubFloat( a, 2 ); - SubFloat( retVal, 1 ) = SubFloat( a, 2 ); - SubFloat( retVal, 2 ) = SubFloat( a, 2 ); - SubFloat( retVal, 3 ) = SubFloat( a, 2 ); - return retVal; -} - -FORCEINLINE fltx4 SplatWSIMD( fltx4 a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = SubFloat( a, 3 ); - SubFloat( retVal, 1 ) = SubFloat( a, 3 ); - SubFloat( retVal, 2 ) = SubFloat( a, 3 ); - SubFloat( retVal, 3 ) = SubFloat( a, 3 ); - return retVal; -} - -FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) -{ - fltx4 result = a; - SubFloat( result, 0 ) = SubFloat( x, 0 ); - return result; -} - -FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) -{ - fltx4 result = a; - SubFloat( result, 1 ) = SubFloat( y, 1 ); - return result; -} - -FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) -{ - fltx4 result = a; - SubFloat( result, 2 ) = SubFloat( z, 2 ); - return result; -} - -FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) -{ - fltx4 result = a; - SubFloat( result, 3 ) = SubFloat( w, 3 ); - return result; -} - -FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) -{ - fltx4 result = a; - SubFloat( result, nComponent ) = flValue; - return result; -} - -// a b c d -> b c d a -FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = SubFloat( a, 1 ); - SubFloat( retVal, 1 ) = SubFloat( a, 2 ); - SubFloat( retVal, 2 ) = SubFloat( a, 3 ); - SubFloat( retVal, 3 ) = SubFloat( a, 0 ); - return retVal; -} - -// a b c d -> c d a b -FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = SubFloat( a, 2 ); - SubFloat( retVal, 1 ) = SubFloat( a, 3 ); - SubFloat( retVal, 2 ) = SubFloat( a, 0 ); - SubFloat( retVal, 3 ) = SubFloat( a, 1 ); - return retVal; -} - -#define BINOP(op) \ - fltx4 retVal; \ - SubFloat( retVal, 0 ) = ( SubFloat( a, 0 ) op SubFloat( b, 0 ) ); \ - SubFloat( retVal, 1 ) = ( SubFloat( a, 1 ) op SubFloat( b, 1 ) ); \ - SubFloat( retVal, 2 ) = ( SubFloat( a, 2 ) op SubFloat( b, 2 ) ); \ - SubFloat( retVal, 3 ) = ( SubFloat( a, 3 ) op SubFloat( b, 3 ) ); \ - return retVal; - -#define IBINOP(op) \ - fltx4 retVal; \ - SubInt( retVal, 0 ) = ( SubInt( a, 0 ) op SubInt ( b, 0 ) ); \ - SubInt( retVal, 1 ) = ( SubInt( a, 1 ) op SubInt ( b, 1 ) ); \ - SubInt( retVal, 2 ) = ( SubInt( a, 2 ) op SubInt ( b, 2 ) ); \ - SubInt( retVal, 3 ) = ( SubInt( a, 3 ) op SubInt ( b, 3 ) ); \ - return retVal; - -FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) -{ - BINOP(+); -} - -FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b -{ - BINOP(-); -}; - -FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b -{ - BINOP(*); -} - -FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b -{ - BINOP(/); -} - - -FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c -{ - return AddSIMD( MulSIMD(a,b), c ); -} - -FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b -{ - return SubSIMD( c, MulSIMD(a,b) ); -}; - - -FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) -{ - fltx4 result; - SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) ); - SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) ); - SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) ); - SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) ); - return result; -} - -FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) -{ - SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); - SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); - SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); -} - -FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) -{ - SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); - SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); - SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); - SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) ); -} - -FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) -{ - fltx4 result; - SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) ); - SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) ); - SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) ); - SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) ); - return result; -} - -FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) -{ - fltx4 result; - SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) ); - SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) ); - SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) ); - SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) ); - return result; -} - -// tan^1(a/b) .. ie, pass sin in as a and cos in as b -FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) -{ - fltx4 result; - SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) ); - SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) ); - SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) ); - SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) ); - return result; -} - -FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = max( SubFloat( a, 0 ), SubFloat( b, 0 ) ); - SubFloat( retVal, 1 ) = max( SubFloat( a, 1 ), SubFloat( b, 1 ) ); - SubFloat( retVal, 2 ) = max( SubFloat( a, 2 ), SubFloat( b, 2 ) ); - SubFloat( retVal, 3 ) = max( SubFloat( a, 3 ), SubFloat( b, 3 ) ); - return retVal; -} - -FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = min( SubFloat( a, 0 ), SubFloat( b, 0 ) ); - SubFloat( retVal, 1 ) = min( SubFloat( a, 1 ), SubFloat( b, 1 ) ); - SubFloat( retVal, 2 ) = min( SubFloat( a, 2 ), SubFloat( b, 2 ) ); - SubFloat( retVal, 3 ) = min( SubFloat( a, 3 ), SubFloat( b, 3 ) ); - return retVal; -} - -FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b -{ - IBINOP(&); -} - -FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b -{ - fltx4 retVal; - SubInt( retVal, 0 ) = ~SubInt( a, 0 ) & SubInt( b, 0 ); - SubInt( retVal, 1 ) = ~SubInt( a, 1 ) & SubInt( b, 1 ); - SubInt( retVal, 2 ) = ~SubInt( a, 2 ) & SubInt( b, 2 ); - SubInt( retVal, 3 ) = ~SubInt( a, 3 ) & SubInt( b, 3 ); - return retVal; -} - -FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b -{ - IBINOP(^); -} - -FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b -{ - IBINOP(|); -} - -FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a -{ - fltx4 retval; - SubFloat( retval, 0 ) = -SubFloat( a, 0 ); - SubFloat( retval, 1 ) = -SubFloat( a, 1 ); - SubFloat( retval, 2 ) = -SubFloat( a, 2 ); - SubFloat( retval, 3 ) = -SubFloat( a, 3 ); - - return retval; -} - -FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero? -{ - return ( SubFloat( a, 0 ) == 0.0 ) && - ( SubFloat( a, 1 ) == 0.0 ) && - ( SubFloat( a, 2 ) == 0.0 ) && - ( SubFloat( a, 3 ) == 0.0 ) ; -} - - -// for branching when a.xyzw > b.xyzw -FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) -{ - return SubFloat(a,0) > SubFloat(b,0) && - SubFloat(a,1) > SubFloat(b,1) && - SubFloat(a,2) > SubFloat(b,2) && - SubFloat(a,3) > SubFloat(b,3); -} - -// for branching when a.xyzw >= b.xyzw -FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) -{ - return SubFloat(a,0) >= SubFloat(b,0) && - SubFloat(a,1) >= SubFloat(b,1) && - SubFloat(a,2) >= SubFloat(b,2) && - SubFloat(a,3) >= SubFloat(b,3); -} - -// For branching if all a.xyzw == b.xyzw -FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) -{ - return SubFloat(a,0) == SubFloat(b,0) && - SubFloat(a,1) == SubFloat(b,1) && - SubFloat(a,2) == SubFloat(b,2) && - SubFloat(a,3) == SubFloat(b,3); -} - -FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set -{ - int nRet = 0; - - nRet |= ( SubInt( a, 0 ) & 0x80000000 ) >> 31; // sign(x) -> bit 0 - nRet |= ( SubInt( a, 1 ) & 0x80000000 ) >> 30; // sign(y) -> bit 1 - nRet |= ( SubInt( a, 2 ) & 0x80000000 ) >> 29; // sign(z) -> bit 2 - nRet |= ( SubInt( a, 3 ) & 0x80000000 ) >> 28; // sign(w) -> bit 3 - - return nRet; -} - -FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) -{ - return (0 != TestSignSIMD( a )); -} - -FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 -{ - fltx4 retVal; - SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) == SubFloat( b, 0 )) ? ~0 : 0; - SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) == SubFloat( b, 1 )) ? ~0 : 0; - SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) == SubFloat( b, 2 )) ? ~0 : 0; - SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) == SubFloat( b, 3 )) ? ~0 : 0; - return retVal; -} - -FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 -{ - fltx4 retVal; - SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) > SubFloat( b, 0 )) ? ~0 : 0; - SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) > SubFloat( b, 1 )) ? ~0 : 0; - SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) > SubFloat( b, 2 )) ? ~0 : 0; - SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) > SubFloat( b, 3 )) ? ~0 : 0; - return retVal; -} - -FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 -{ - fltx4 retVal; - SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) >= SubFloat( b, 0 )) ? ~0 : 0; - SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) >= SubFloat( b, 1 )) ? ~0 : 0; - SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) >= SubFloat( b, 2 )) ? ~0 : 0; - SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) >= SubFloat( b, 3 )) ? ~0 : 0; - return retVal; -} - -FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a= -b) ? ~0 : 0 -{ - fltx4 retVal; - SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 ) && SubFloat( a, 0 ) >= -SubFloat( b, 0 ) ) ? ~0 : 0; - SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 ) && SubFloat( a, 1 ) >= -SubFloat( b, 1 ) ) ? ~0 : 0; - SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 ) && SubFloat( a, 2 ) >= -SubFloat( b, 2 ) ) ? ~0 : 0; - SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 ) && SubFloat( a, 3 ) >= -SubFloat( b, 3 ) ) ? ~0 : 0; - return retVal; -} - - -FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) -{ - return OrSIMD( - AndSIMD( ReplacementMask, NewValue ), - AndNotSIMD( ReplacementMask, OldValue ) ); -} - -FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = flValue; - SubFloat( retVal, 1 ) = flValue; - SubFloat( retVal, 2 ) = flValue; - SubFloat( retVal, 3 ) = flValue; - return retVal; -} - -/// replicate a single 32 bit integer value to all 4 components of an m128 -FORCEINLINE fltx4 ReplicateIX4( int nValue ) -{ - fltx4 retVal; - SubInt( retVal, 0 ) = nValue; - SubInt( retVal, 1 ) = nValue; - SubInt( retVal, 2 ) = nValue; - SubInt( retVal, 3 ) = nValue; - return retVal; - -} - -// Round towards positive infinity -FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) ); - SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) ); - SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) ); - SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) ); - return retVal; - -} - -// Round towards negative infinity -FORCEINLINE fltx4 FloorSIMD( const fltx4 &a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = floor( SubFloat( a, 0 ) ); - SubFloat( retVal, 1 ) = floor( SubFloat( a, 1 ) ); - SubFloat( retVal, 2 ) = floor( SubFloat( a, 2 ) ); - SubFloat( retVal, 3 ) = floor( SubFloat( a, 3 ) ); - return retVal; - -} - -FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) ); - SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) ); - SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) ); - SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) ); - return retVal; -} - -FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) ); - SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) ); - SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) ); - SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) ); - return retVal; -} - -FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) ); - SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) ); - SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) ); - SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) ); - return retVal; -} - -FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) != 0.0f ? SubFloat( a, 0 ) : FLT_EPSILON ); - SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) != 0.0f ? SubFloat( a, 1 ) : FLT_EPSILON ); - SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) != 0.0f ? SubFloat( a, 2 ) : FLT_EPSILON ); - SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) != 0.0f ? SubFloat( a, 3 ) : FLT_EPSILON ); - return retVal; -} - -FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) ); - SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) ); - SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) ); - SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) ); - return retVal; -} - -FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 ); - SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 ); - SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 ); - SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 ); - return retVal; -} - -FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 ); - SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 ); - SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 ); - SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 ); - return retVal; -} - -/// 1/x for all 4 values. -/// 1/0 will result in a big but NOT infinite result -FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 )); - SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 )); - SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 )); - SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 )); - return retVal; -} - -FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 )); - SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 )); - SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 )); - SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 )); - return retVal; -} - -// 2^x for all values (the antilog) -FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = powf( 2, SubFloat(toPower, 0) ); - SubFloat( retVal, 1 ) = powf( 2, SubFloat(toPower, 1) ); - SubFloat( retVal, 2 ) = powf( 2, SubFloat(toPower, 2) ); - SubFloat( retVal, 3 ) = powf( 2, SubFloat(toPower, 3) ); - - return retVal; -} - -FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) -{ - float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) + - SubFloat( a, 1 ) * SubFloat( b, 1 ) + - SubFloat( a, 2 ) * SubFloat( b, 2 ); - return ReplicateX4( flDot ); -} - -FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) -{ - float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) + - SubFloat( a, 1 ) * SubFloat( b, 1 ) + - SubFloat( a, 2 ) * SubFloat( b, 2 ) + - SubFloat( a, 3 ) * SubFloat( b, 3 ); - return ReplicateX4( flDot ); -} - -// Clamps the components of a vector to a specified minimum and maximum range. -FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) -{ - return MaxSIMD( min, MinSIMD( max, in ) ); -} - -// Squelch the w component of a vector to +0.0. -// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) -FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) -{ - fltx4 retval; - retval = a; - SubFloat( retval, 0 ) = 0; - return retval; -} - -FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) -{ - return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); -} - -FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) -{ - return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); -} - -FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) -{ - return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); -} - -// for the transitional class -- load a 3-by VectorAligned and squash its w component -FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) -{ - fltx4 retval = LoadAlignedSIMD(pSIMD.Base()); - // squelch w - SubInt( retval, 3 ) = 0; - return retval; -} - -FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a ) -{ - *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; -} - -FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a ) -{ - *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; -} - -FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) -{ - *pSIMD = SubFloat(a, 0); - *(pSIMD+1) = SubFloat(a, 1); - *(pSIMD+2) = SubFloat(a, 2); -} - -// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD -FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) -{ - StoreAlignedSIMD(pSIMD->Base(),a); -} - -FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w ) -{ -#define SWAP_FLOATS( _a_, _ia_, _b_, _ib_ ) { float tmp = SubFloat( _a_, _ia_ ); SubFloat( _a_, _ia_ ) = SubFloat( _b_, _ib_ ); SubFloat( _b_, _ib_ ) = tmp; } - SWAP_FLOATS( x, 1, y, 0 ); - SWAP_FLOATS( x, 2, z, 0 ); - SWAP_FLOATS( x, 3, w, 0 ); - SWAP_FLOATS( y, 2, z, 1 ); - SWAP_FLOATS( y, 3, w, 1 ); - SWAP_FLOATS( z, 3, w, 2 ); -} - -// find the lowest component of a.x, a.y, a.z, -// and replicate it to the whole return value. -FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a ) -{ - float lowest = min( min( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2)); - return ReplicateX4(lowest); -} - -// find the highest component of a.x, a.y, a.z, -// and replicate it to the whole return value. -FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a ) -{ - float highest = max( max( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2)); - return ReplicateX4(highest); -} - -// Fixed-point conversion and save as SIGNED INTS. -// pDest->x = Int (vSrc.x) -// note: some architectures have means of doing -// fixed point conversion when the fix depth is -// specified as an immediate.. but there is no way -// to guarantee an immediate as a parameter to function -// like this. -FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) -{ - (*pDest)[0] = SubFloat(vSrc, 0); - (*pDest)[1] = SubFloat(vSrc, 1); - (*pDest)[2] = SubFloat(vSrc, 2); - (*pDest)[3] = SubFloat(vSrc, 3); -} - -// ------------------------------------ -// INTEGER SIMD OPERATIONS. -// ------------------------------------ -// splat all components of a vector to a signed immediate int number. -FORCEINLINE fltx4 IntSetImmediateSIMD( int nValue ) -{ - fltx4 retval; - SubInt( retval, 0 ) = SubInt( retval, 1 ) = SubInt( retval, 2 ) = SubInt( retval, 3) = nValue; - return retval; -} - -// Load 4 aligned words into a SIMD register -FORCEINLINE i32x4 LoadAlignedIntSIMD(const void * RESTRICT pSIMD) -{ - return *( reinterpret_cast< const i32x4 *> ( pSIMD ) ); -} - -// Load 4 unaligned words into a SIMD register -FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD) -{ - return *( reinterpret_cast< const i32x4 *> ( pSIMD ) ); -} - -// save into four words, 16-byte aligned -FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a ) -{ - *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; -} - -FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) -{ - *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a; -} - -FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a ) -{ - *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; -} - -// Take a fltx4 containing fixed-point uints and -// return them as single precision floats. No -// fixed point conversion is done. -FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA ) -{ - Assert(0); /* pc has no such operation */ - fltx4 retval; - SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) ); - SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) ); - SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) ); - SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) ); - return retval; -} - - -#if 0 /* pc has no such op */ -// Take a fltx4 containing fixed-point sints and -// return them as single precision floats. No -// fixed point conversion is done. -FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) -{ - fltx4 retval; - SubFloat( retval, 0 ) = ( (float) (reinterpret_cast(&vSrcA.m128_s32[0])) ); - SubFloat( retval, 1 ) = ( (float) (reinterpret_cast(&vSrcA.m128_s32[1])) ); - SubFloat( retval, 2 ) = ( (float) (reinterpret_cast(&vSrcA.m128_s32[2])) ); - SubFloat( retval, 3 ) = ( (float) (reinterpret_cast(&vSrcA.m128_s32[3])) ); - return retval; -} - - -/* - works on fltx4's as if they are four uints. - the first parameter contains the words to be shifted, - the second contains the amount to shift by AS INTS - - for i = 0 to 3 - shift = vSrcB_i*32:(i*32)+4 - vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift -*/ -FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB) -{ - i32x4 retval; - SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0); - SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1); - SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2); - SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3); - - - return retval; -} -#endif - -#elif ( defined( _X360 ) ) - -//--------------------------------------------------------------------- -// X360 implementation -//--------------------------------------------------------------------- - -FORCEINLINE float & FloatSIMD( fltx4 & a, int idx ) -{ - fltx4_union & a_union = (fltx4_union &)a; - return a_union.m128_f32[idx]; -} - -FORCEINLINE unsigned int & UIntSIMD( fltx4 & a, int idx ) -{ - fltx4_union & a_union = (fltx4_union &)a; - return a_union.m128_u32[idx]; -} - -FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) -{ - return __vaddfp( a, b ); -} - -FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b -{ - return __vsubfp( a, b ); -} - -FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b -{ - return __vmulfp( a, b ); -} - -FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c -{ - return __vmaddfp( a, b, c ); -} - -FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b -{ - return __vnmsubfp( a, b, c ); -}; - -FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) -{ - return __vmsum3fp( a, b ); -} - -FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) -{ - return __vmsum4fp( a, b ); -} - -FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) -{ - return XMVectorSin( radians ); -} - -FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) -{ - XMVectorSinCos( &sine, &cosine, radians ); -} - -FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) -{ - XMVectorSinCos( &sine, &cosine, radians ); -} - -FORCEINLINE void CosSIMD( fltx4 &cosine, const fltx4 &radians ) -{ - cosine = XMVectorCos( radians ); -} - -FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) -{ - return XMVectorASin( sine ); -} - -FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) -{ - return XMVectorACos( cs ); -} - -// tan^1(a/b) .. ie, pass sin in as a and cos in as b -FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) -{ - return XMVectorATan2( a, b ); -} - -// DivSIMD defined further down, since it uses ReciprocalSIMD - -FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) -{ - return __vmaxfp( a, b ); -} - -FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) -{ - return __vminfp( a, b ); -} - -FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b -{ - return __vand( a, b ); -} - -FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b -{ - // NOTE: a and b are swapped in the call: SSE complements the first argument, VMX the second - return __vandc( b, a ); -} - -FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b -{ - return __vxor( a, b ); -} - -FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b -{ - return __vor( a, b ); -} - -FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a -{ - return XMVectorNegate(a); -} - -FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero? -{ - unsigned int equalFlags = 0; - __vcmpeqfpR( a, Four_Zeros, &equalFlags ); - return XMComparisonAllTrue( equalFlags ); -} - -FORCEINLINE bool IsAnyZeros( const fltx4 & a ) // any floats are zero? -{ - unsigned int conditionregister; - XMVectorEqualR(&conditionregister, a, XMVectorZero()); - return XMComparisonAnyTrue(conditionregister); -} - -FORCEINLINE bool IsAnyXYZZero( const fltx4 &a ) // are any of x,y,z zero? -{ - // copy a's x component into w, in case w was zero. - fltx4 temp = __vrlimi(a, a, 1, 1); - unsigned int conditionregister; - XMVectorEqualR(&conditionregister, temp, XMVectorZero()); - return XMComparisonAnyTrue(conditionregister); -} - -// for branching when a.xyzw > b.xyzw -FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) -{ - unsigned int cr; - XMVectorGreaterR(&cr,a,b); - return XMComparisonAllTrue(cr); -} - -// for branching when a.xyzw >= b.xyzw -FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) -{ - unsigned int cr; - XMVectorGreaterOrEqualR(&cr,a,b); - return XMComparisonAllTrue(cr); -} - -// For branching if all a.xyzw == b.xyzw -FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) -{ - unsigned int cr; - XMVectorEqualR(&cr,a,b); - return XMComparisonAllTrue(cr); -} - - -FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set -{ - // NOTE: this maps to SSE way better than it does to VMX (most code uses IsAnyNegative(), though) - int nRet = 0; - - const fltx4_union & a_union = (const fltx4_union &)a; - nRet |= ( a_union.m128_u32[0] & 0x80000000 ) >> 31; // sign(x) -> bit 0 - nRet |= ( a_union.m128_u32[1] & 0x80000000 ) >> 30; // sign(y) -> bit 1 - nRet |= ( a_union.m128_u32[2] & 0x80000000 ) >> 29; // sign(z) -> bit 2 - nRet |= ( a_union.m128_u32[3] & 0x80000000 ) >> 28; // sign(w) -> bit 3 - - return nRet; -} - -// Squelch the w component of a vector to +0.0. -// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) -FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) -{ - return __vrlimi( a, __vzero(), 1, 0 ); -} - -FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) -{ - // NOTE: this tests the top bits of each vector element using integer math - // (so it ignores NaNs - it will return true for "-NaN") - unsigned int equalFlags = 0; - fltx4 signMask = __vspltisw( -1 ); // 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF (low order 5 bits of each element = 31) - signMask = __vslw( signMask, signMask ); // 0x80000000 0x80000000 0x80000000 0x80000000 - __vcmpequwR( Four_Zeros, __vand( signMask, a ), &equalFlags ); - return !XMComparisonAllTrue( equalFlags ); -} - -FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 -{ - return __vcmpeqfp( a, b ); -} - - -FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 -{ - return __vcmpgtfp( a, b ); -} - -FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 -{ - return __vcmpgefp( a, b ); -} - -FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a= -b) ? ~0 : 0 -{ - return XMVectorInBounds( a, b ); -} - -// returned[i] = ReplacementMask[i] == 0 ? OldValue : NewValue -FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) -{ - return __vsel( OldValue, NewValue, ReplacementMask ); -} - -// AKA "Broadcast", "Splat" -FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a -{ - // NOTE: if flValue comes from a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) - float * pValue = &flValue; - Assert( pValue ); - Assert( ((unsigned int)pValue & 3) == 0); - return __vspltw( __lvlx( pValue, 0 ), 0 ); -} - -FORCEINLINE fltx4 ReplicateX4( const float *pValue ) // a,a,a,a -{ - Assert( pValue ); - return __vspltw( __lvlx( pValue, 0 ), 0 ); -} - -/// replicate a single 32 bit integer value to all 4 components of an m128 -FORCEINLINE fltx4 ReplicateIX4( int nValue ) -{ - // NOTE: if nValue comes from a register, this causes a Load-Hit-Store stall (should not mix ints with fltx4s!) - int * pValue = &nValue; - Assert( pValue ); - Assert( ((unsigned int)pValue & 3) == 0); - return __vspltw( __lvlx( pValue, 0 ), 0 ); -} - -// Round towards positive infinity -FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) -{ - return __vrfip(a); -} - -// Round towards nearest integer -FORCEINLINE fltx4 RoundSIMD( const fltx4 &a ) -{ - return __vrfin(a); -} - -// Round towards negative infinity -FORCEINLINE fltx4 FloorSIMD( const fltx4 &a ) -{ - return __vrfim(a); -} - -FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less -{ - // This is emulated from rsqrt - return XMVectorSqrtEst( a ); -} - -FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) -{ - // This is emulated from rsqrt - return XMVectorSqrt( a ); -} - -FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less -{ - return __vrsqrtefp( a ); -} - -FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) -{ - // Convert zeros to epsilons - fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); - fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); - return ReciprocalSqrtEstSIMD( a_safe ); -} - -FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) -{ - // This uses Newton-Raphson to improve the HW result - return XMVectorReciprocalSqrt( a ); -} - -FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less -{ - return __vrefp( a ); -} - -/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration. -/// No error checking! -FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a -{ - // This uses Newton-Raphson to improve the HW result - return XMVectorReciprocal( a ); -} - -// FIXME: on 360, this is very slow, since it uses ReciprocalSIMD (do we need DivEstSIMD?) -FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b -{ - return MulSIMD( ReciprocalSIMD( b ), a ); -} - -/// 1/x for all 4 values. -/// 1/0 will result in a big but NOT infinite result -FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) -{ - // Convert zeros to epsilons - fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); - fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); - return ReciprocalEstSIMD( a_safe ); -} - -FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) -{ - // Convert zeros to epsilons - fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); - fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); - return ReciprocalSIMD( a_safe ); - - // FIXME: This could be faster (BUT: it doesn't preserve the sign of -0.0, whereas the above does) - // fltx4 zeroMask = CmpEqSIMD( Four_Zeros, a ); - // fltx4 a_safe = XMVectorSelect( a, Four_Epsilons, zeroMask ); - // return ReciprocalSIMD( a_safe ); -} - -// CHRISG: is it worth doing integer bitfiddling for this? -// 2^x for all values (the antilog) -FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) -{ - return XMVectorExp(toPower); -} - -// Clamps the components of a vector to a specified minimum and maximum range. -FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) -{ - return XMVectorClamp(in, min, max); -} - -FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) -{ - return XMLoadVector4( pSIMD ); -} - -// load a 3-vector (as opposed to LoadUnalignedSIMD, which loads a 4-vec). -FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) -{ - return XMLoadVector3( pSIMD ); -} - -FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) -{ - return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); -} - -// for the transitional class -- load a 3-by VectorAligned and squash its w component -FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) -{ - fltx4 out = XMLoadVector3A(pSIMD.Base()); - // squelch w - return __vrlimi( out, __vzero(), 1, 0 ); -} - -// for the transitional class -- load a 3-by VectorAligned and squash its w component -FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned * RESTRICT pSIMD ) -{ - fltx4 out = XMLoadVector3A(pSIMD); - // squelch w - return __vrlimi( out, __vzero(), 1, 0 ); -} - -FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a ) -{ - *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; -} - -FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a ) -{ - XMStoreVector4( pSIMD, a ); -} - -FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) -{ - XMStoreVector3( pSIMD, a ); -} - - -// strongly typed -- for typechecking as we transition to SIMD -FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) -{ - XMStoreVector3A(pSIMD->Base(),a); -} - - -// Fixed-point conversion and save as SIGNED INTS. -// pDest->x = Int (vSrc.x) -// note: some architectures have means of doing -// fixed point conversion when the fix depth is -// specified as an immediate.. but there is no way -// to guarantee an immediate as a parameter to function -// like this. -FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) -{ - fltx4 asInt = __vctsxs( vSrc, 0 ); - XMStoreVector4A(pDest->Base(), asInt); -} - -FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w ) -{ - XMMATRIX xyzwMatrix = _XMMATRIX( x, y, z, w ); - xyzwMatrix = XMMatrixTranspose( xyzwMatrix ); - x = xyzwMatrix.r[0]; - y = xyzwMatrix.r[1]; - z = xyzwMatrix.r[2]; - w = xyzwMatrix.r[3]; -} - -// Return one in the fastest way -- faster even than loading. -FORCEINLINE fltx4 LoadZeroSIMD( void ) -{ - return XMVectorZero(); -} - -// Return one in the fastest way -- faster even than loading. -FORCEINLINE fltx4 LoadOneSIMD( void ) -{ - return XMVectorSplatOne(); -} - -FORCEINLINE fltx4 SplatXSIMD( fltx4 a ) -{ - return XMVectorSplatX( a ); -} - -FORCEINLINE fltx4 SplatYSIMD( fltx4 a ) -{ - return XMVectorSplatY( a ); -} - -FORCEINLINE fltx4 SplatZSIMD( fltx4 a ) -{ - return XMVectorSplatZ( a ); -} - -FORCEINLINE fltx4 SplatWSIMD( fltx4 a ) -{ - return XMVectorSplatW( a ); -} - -FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) -{ - fltx4 result = __vrlimi(a, x, 8, 0); - return result; -} - -FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) -{ - fltx4 result = __vrlimi(a, y, 4, 0); - return result; -} - -FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) -{ - fltx4 result = __vrlimi(a, z, 2, 0); - return result; -} - -FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) -{ - fltx4 result = __vrlimi(a, w, 1, 0); - return result; -} - -FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) -{ - static int s_nVrlimiMask[4] = { 8, 4, 2, 1 }; - fltx4 val = ReplicateX4( flValue ); - fltx4 result = __vrlimi(a, val, s_nVrlimiMask[nComponent], 0); - return result; -} - -FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) -{ - fltx4 compareOne = a; - return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 1 ); -} - -FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) -{ - fltx4 compareOne = a; - return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 2 ); -} - - - -// find the lowest component of a.x, a.y, a.z, -// and replicate it to the whole return value. -// ignores a.w. -// Though this is only five instructions long, -// they are all dependent, making this stall city. -// Forcing this inline should hopefully help with scheduling. -FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a ) -{ - // a is [x,y,z,G] (where G is garbage) - // rotate left by one - fltx4 compareOne = a ; - compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 ); - // compareOne is [y,z,G,G] - fltx4 retval = MinSIMD( a, compareOne ); - // retVal is [min(x,y), min(y,z), G, G] - compareOne = __vrlimi( compareOne, a, 8 , 2); - // compareOne is [z, G, G, G] - retval = MinSIMD( retval, compareOne ); - // retVal = [ min(min(x,y),z), G, G, G ] - - // splat the x component out to the whole vector and return - return SplatXSIMD( retval ); -} - -// find the highest component of a.x, a.y, a.z, -// and replicate it to the whole return value. -// ignores a.w. -// Though this is only five instructions long, -// they are all dependent, making this stall city. -// Forcing this inline should hopefully help with scheduling. -FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a ) -{ - // a is [x,y,z,G] (where G is garbage) - // rotate left by one - fltx4 compareOne = a ; - compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 ); - // compareOne is [y,z,G,G] - fltx4 retval = MaxSIMD( a, compareOne ); - // retVal is [max(x,y), max(y,z), G, G] - compareOne = __vrlimi( compareOne, a, 8 , 2); - // compareOne is [z, G, G, G] - retval = MaxSIMD( retval, compareOne ); - // retVal = [ max(max(x,y),z), G, G, G ] - - // splat the x component out to the whole vector and return - return SplatXSIMD( retval ); -} - - -// Transform many (horizontal) points in-place by a 3x4 matrix, -// here already loaded onto three fltx4 registers. -// The points must be stored as 16-byte aligned. They are points -// and not vectors because we assume the w-component to be 1. -// To spare yourself the annoyance of loading the matrix yourself, -// use one of the overloads below. -void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, FLTX4 mRow1, FLTX4 mRow2, FLTX4 mRow3); - -// Transform many (horizontal) points in-place by a 3x4 matrix. -// The points must be stored as 16-byte aligned. They are points -// and not vectors because we assume the w-component to be 1. -// In this function, the matrix need not be aligned. -FORCEINLINE void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix) -{ - return TransformManyPointsBy(pVectors, numVectors, - LoadUnalignedSIMD( pMatrix[0] ), LoadUnalignedSIMD( pMatrix[1] ), LoadUnalignedSIMD( pMatrix[2] ) ); -} - -// Transform many (horizontal) points in-place by a 3x4 matrix. -// The points must be stored as 16-byte aligned. They are points -// and not vectors because we assume the w-component to be 1. -// In this function, the matrix must itself be aligned on a 16-byte -// boundary. -FORCEINLINE void TransformManyPointsByA(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix) -{ - return TransformManyPointsBy(pVectors, numVectors, - LoadAlignedSIMD( pMatrix[0] ), LoadAlignedSIMD( pMatrix[1] ), LoadAlignedSIMD( pMatrix[2] ) ); -} - -// ------------------------------------ -// INTEGER SIMD OPERATIONS. -// ------------------------------------ - -// Load 4 aligned words into a SIMD register -FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD) -{ - return XMLoadVector4A(pSIMD); -} - -// Load 4 unaligned words into a SIMD register -FORCEINLINE i32x4 LoadUnalignedIntSIMD(const void * RESTRICT pSIMD) -{ - return XMLoadVector4( pSIMD ); -} - -// save into four words, 16-byte aligned -FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a ) -{ - *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; -} - -FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) -{ - *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a; -} - -FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a ) -{ - XMStoreVector4(pSIMD, a); -} - - -// Take a fltx4 containing fixed-point uints and -// return them as single precision floats. No -// fixed point conversion is done. -FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA ) -{ - return __vcfux( vSrcA, 0 ); -} - - -// Take a fltx4 containing fixed-point sints and -// return them as single precision floats. No -// fixed point conversion is done. -FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) -{ - return __vcfsx( vSrcA, 0 ); -} - -// Take a fltx4 containing fixed-point uints and -// return them as single precision floats. Each uint -// will be divided by 2^immed after conversion -// (eg, this is fixed point math). -/* as if: - FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed ) - { - return __vcfux( vSrcA, uImmed ); - } -*/ -#define UnsignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfux( (vSrcA), (uImmed) )) - -// Take a fltx4 containing fixed-point sints and -// return them as single precision floats. Each int -// will be divided by 2^immed (eg, this is fixed point -// math). -/* as if: - FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed ) - { - return __vcfsx( vSrcA, uImmed ); - } -*/ -#define SignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfsx( (vSrcA), (uImmed) )) - -// set all components of a vector to a signed immediate int number. -/* as if: - FORCEINLINE fltx4 IntSetImmediateSIMD(int toImmediate) - { - return __vspltisw( toImmediate ); - } -*/ -#define IntSetImmediateSIMD(x) (__vspltisw(x)) - -/* - works on fltx4's as if they are four uints. - the first parameter contains the words to be shifted, - the second contains the amount to shift by AS INTS - - for i = 0 to 3 - shift = vSrcB_i*32:(i*32)+4 - vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift -*/ -FORCEINLINE fltx4 IntShiftLeftWordSIMD(fltx4 vSrcA, fltx4 vSrcB) -{ - return __vslw(vSrcA, vSrcB); -} - -FORCEINLINE float SubFloat( const fltx4 & a, int idx ) -{ - // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) - const fltx4_union & a_union = (const fltx4_union &)a; - return a_union.m128_f32[ idx ]; -} - -FORCEINLINE float & SubFloat( fltx4 & a, int idx ) -{ - fltx4_union & a_union = (fltx4_union &)a; - return a_union.m128_f32[idx]; -} - -FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx ) -{ - fltx4 t = __vctuxs( a, 0 ); - const fltx4_union & a_union = (const fltx4_union &)t; - return a_union.m128_u32[idx]; -} - - -FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) -{ - const fltx4_union & a_union = (const fltx4_union &)a; - return a_union.m128_u32[idx]; -} - -FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) -{ - fltx4_union & a_union = (fltx4_union &)a; - return a_union.m128_u32[idx]; -} - -#else - -//--------------------------------------------------------------------- -// Intel/SSE implementation -//--------------------------------------------------------------------- - -FORCEINLINE void StoreAlignedSIMD( float * RESTRICT pSIMD, const fltx4 & a ) -{ - _mm_store_ps( pSIMD, a ); -} - -FORCEINLINE void StoreUnalignedSIMD( float * RESTRICT pSIMD, const fltx4 & a ) -{ - _mm_storeu_ps( pSIMD, a ); -} - - -FORCEINLINE fltx4 RotateLeft( const fltx4 & a ); -FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ); - -FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) -{ - _mm_store_ss(pSIMD, a); - _mm_store_ss(pSIMD+1, RotateLeft(a)); - _mm_store_ss(pSIMD+2, RotateLeft2(a)); -} - -// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD -FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) -{ - StoreAlignedSIMD( pSIMD->Base(),a ); -} - -FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) -{ - return _mm_load_ps( reinterpret_cast< const float *> ( pSIMD ) ); -} - -FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b -{ - return _mm_and_ps( a, b ); -} - -FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b -{ - return _mm_andnot_ps( a, b ); -} - -FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b -{ - return _mm_xor_ps( a, b ); -} - -FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b -{ - return _mm_or_ps( a, b ); -} - -// Squelch the w component of a vector to +0.0. -// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) -FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) -{ - return AndSIMD( a, LoadAlignedSIMD( g_SIMD_clear_wmask ) ); -} - -// for the transitional class -- load a 3-by VectorAligned and squash its w component -FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) -{ - return SetWToZeroSIMD( LoadAlignedSIMD(pSIMD.Base()) ); -} - -FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) -{ - return _mm_loadu_ps( reinterpret_cast( pSIMD ) ); -} - -FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) -{ - return _mm_loadu_ps( reinterpret_cast( pSIMD ) ); -} - -/// replicate a single 32 bit integer value to all 4 components of an m128 -FORCEINLINE fltx4 ReplicateIX4( int i ) -{ - fltx4 value = _mm_set_ss( * ( ( float *) &i ) );; - return _mm_shuffle_ps( value, value, 0); -} - - -FORCEINLINE fltx4 ReplicateX4( float flValue ) -{ - __m128 value = _mm_set_ss( flValue ); - return _mm_shuffle_ps( value, value, 0 ); -} - - -FORCEINLINE float SubFloat( const fltx4 & a, int idx ) -{ - // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) -#ifndef POSIX - return a.m128_f32[ idx ]; -#else - return (reinterpret_cast(&a))[idx]; -#endif -} - -FORCEINLINE float & SubFloat( fltx4 & a, int idx ) -{ -#ifndef POSIX - return a.m128_f32[ idx ]; -#else - return (reinterpret_cast(&a))[idx]; -#endif -} - -FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx ) -{ - return (uint32)SubFloat(a,idx); -} - -FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) -{ -#ifndef POSIX - return a.m128_u32[idx]; -#else - return (reinterpret_cast(&a))[idx]; -#endif -} - -FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) -{ -#ifndef POSIX - return a.m128_u32[idx]; -#else - return (reinterpret_cast(&a))[idx]; -#endif -} - -// Return one in the fastest way -- on the x360, faster even than loading. -FORCEINLINE fltx4 LoadZeroSIMD( void ) -{ - return Four_Zeros; -} - -// Return one in the fastest way -- on the x360, faster even than loading. -FORCEINLINE fltx4 LoadOneSIMD( void ) -{ - return Four_Ones; -} - -FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) -{ - return OrSIMD( - AndSIMD( ReplacementMask, NewValue ), - AndNotSIMD( ReplacementMask, OldValue ) ); -} - -// remember, the SSE numbers its words 3 2 1 0 -// The way we want to specify shuffles is backwards from the default -// MM_SHUFFLE_REV is in array index order (default is reversed) -#define MM_SHUFFLE_REV(a,b,c,d) _MM_SHUFFLE(d,c,b,a) - -FORCEINLINE fltx4 SplatXSIMD( fltx4 const & a ) -{ - return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 0, 0, 0, 0 ) ); -} - -FORCEINLINE fltx4 SplatYSIMD( fltx4 const &a ) -{ - return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 1, 1, 1 ) ); -} - -FORCEINLINE fltx4 SplatZSIMD( fltx4 const &a ) -{ - return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 2, 2, 2 ) ); -} - -FORCEINLINE fltx4 SplatWSIMD( fltx4 const &a ) -{ - return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 3, 3, 3 ) ); -} - -FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) -{ - fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[0] ), x, a ); - return result; -} - -FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) -{ - fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[1] ), y, a ); - return result; -} - -FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) -{ - fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[2] ), z, a ); - return result; -} - -FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) -{ - fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[3] ), w, a ); - return result; -} - -FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) -{ - fltx4 val = ReplicateX4( flValue ); - fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[nComponent] ), val, a ); - return result; -} - -// a b c d -> b c d a -FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) -{ - return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 2, 3, 0 ) ); -} - -// a b c d -> c d a b -FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) -{ - return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 3, 0, 1 ) ); -} - -// a b c d -> d a b c -FORCEINLINE fltx4 RotateRight( const fltx4 & a ) -{ - return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 0, 3, 2, 1) ); -} - -// a b c d -> c d a b -FORCEINLINE fltx4 RotateRight2( const fltx4 & a ) -{ - return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 0, 3, 2 ) ); -} - - -FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) // a+b -{ - return _mm_add_ps( a, b ); -}; - -FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b -{ - return _mm_sub_ps( a, b ); -}; - -FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b -{ - return _mm_mul_ps( a, b ); -}; - -FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b -{ - return _mm_div_ps( a, b ); -}; - -FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c -{ - return AddSIMD( MulSIMD(a,b), c ); -} - -FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b -{ - return SubSIMD( c, MulSIMD(a,b) ); -}; - -FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) -{ - fltx4 m = MulSIMD( a, b ); - float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ); - return ReplicateX4( flDot ); -} - -FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) -{ - fltx4 m = MulSIMD( a, b ); - float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ) + SubFloat( m, 3 ); - return ReplicateX4( flDot ); -} - -//TODO: implement as four-way Taylor series (see xbox implementation) -FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) -{ - fltx4 result; - SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) ); - SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) ); - SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) ); - SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) ); - return result; -} - -FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) -{ - // FIXME: Make a fast SSE version - SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); - SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); - SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); -} - -FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) // a*b + c -{ - // FIXME: Make a fast SSE version - SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); - SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); - SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); - SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) ); -} - -//TODO: implement as four-way Taylor series (see xbox implementation) -FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) -{ - // FIXME: Make a fast SSE version - fltx4 result; - SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) ); - SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) ); - SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) ); - SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) ); - return result; -} - -FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) -{ - fltx4 result; - SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) ); - SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) ); - SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) ); - SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) ); - return result; -} - -// tan^1(a/b) .. ie, pass sin in as a and cos in as b -FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) -{ - fltx4 result; - SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) ); - SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) ); - SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) ); - SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) ); - return result; -} - -FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a -{ - return SubSIMD(LoadZeroSIMD(),a); -} - -FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set -{ - return _mm_movemask_ps( a ); -} - -FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) -{ - return (0 != TestSignSIMD( a )); -} - -FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 -{ - return _mm_cmpeq_ps( a, b ); -} - -FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 -{ - return _mm_cmpgt_ps( a, b ); -} - -FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 -{ - return _mm_cmpge_ps( a, b ); -} - -FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a b.xyzw -FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) -{ - return TestSignSIMD( CmpLeSIMD( a, b ) ) == 0; -} - -// for branching when a.xyzw >= b.xyzw -FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) -{ - return TestSignSIMD( CmpLtSIMD( a, b ) ) == 0; -} - -// For branching if all a.xyzw == b.xyzw -FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) -{ - return TestSignSIMD( CmpEqSIMD( a, b ) ) == 0xf; -} - -FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0 -{ - return AndSIMD( CmpLeSIMD(a,b), CmpGeSIMD(a, NegSIMD(b)) ); -} - -FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) -{ - return _mm_min_ps( a, b ); -} - -FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) -{ - return _mm_max_ps( a, b ); -} - - - -// SSE lacks rounding operations. -// Really. -// You can emulate them by setting the rounding mode for the -// whole processor and then converting to int, and then back again. -// But every time you set the rounding mode, you clear out the -// entire pipeline. So, I can't do them per operation. You -// have to do it once, before the loop that would call these. -// Round towards positive infinity -FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) -{ - fltx4 retVal; - SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) ); - SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) ); - SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) ); - SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) ); - return retVal; - -} - -fltx4 fabs( const fltx4 & x ); -// Round towards negative infinity -// This is the implementation that was here before; it assumes -// you are in round-to-floor mode, which I guess is usually the -// case for us vis-a-vis SSE. It's totally unnecessary on -// VMX, which has a native floor op. -FORCEINLINE fltx4 FloorSIMD( const fltx4 &val ) -{ - fltx4 fl4Abs = fabs( val ); - fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s ); - ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival ); - return XorSIMD( ival, XorSIMD( val, fl4Abs ) ); // restore sign bits -} - - - -inline bool IsAllZeros( const fltx4 & var ) -{ - return TestSignSIMD( CmpEqSIMD( var, Four_Zeros ) ) == 0xF; -} - -FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less -{ - return _mm_sqrt_ps( a ); -} - -FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) -{ - return _mm_sqrt_ps( a ); -} - -FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less -{ - return _mm_rsqrt_ps( a ); -} - -FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) -{ - fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); - fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); - ret = ReciprocalSqrtEstSIMD( ret ); - return ret; -} - -/// uses newton iteration for higher precision results than ReciprocalSqrtEstSIMD -FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) -{ - fltx4 guess = ReciprocalSqrtEstSIMD( a ); - // newton iteration for 1/sqrt(a) : y(n+1) = 1/2 (y(n)*(3-a*y(n)^2)); - guess = MulSIMD( guess, SubSIMD( Four_Threes, MulSIMD( a, MulSIMD( guess, guess )))); - guess = MulSIMD( Four_PointFives, guess); - return guess; -} - -FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less -{ - return _mm_rcp_ps( a ); -} - -/// 1/x for all 4 values, more or less -/// 1/0 will result in a big but NOT infinite result -FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) -{ - fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); - fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); - ret = ReciprocalEstSIMD( ret ); - return ret; -} - -/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration. -/// No error checking! -FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a -{ - fltx4 ret = ReciprocalEstSIMD( a ); - // newton iteration is: Y(n+1) = 2*Y(n)-a*Y(n)^2 - ret = SubSIMD( AddSIMD( ret, ret ), MulSIMD( a, MulSIMD( ret, ret ) ) ); - return ret; -} - -/// 1/x for all 4 values. -/// 1/0 will result in a big but NOT infinite result -FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) -{ - fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); - fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); - ret = ReciprocalSIMD( ret ); - return ret; -} - -// CHRISG: is it worth doing integer bitfiddling for this? -// 2^x for all values (the antilog) -FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) -{ - fltx4 retval; - SubFloat( retval, 0 ) = powf( 2, SubFloat(toPower, 0) ); - SubFloat( retval, 1 ) = powf( 2, SubFloat(toPower, 1) ); - SubFloat( retval, 2 ) = powf( 2, SubFloat(toPower, 2) ); - SubFloat( retval, 3 ) = powf( 2, SubFloat(toPower, 3) ); - - return retval; -} - -// Clamps the components of a vector to a specified minimum and maximum range. -FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) -{ - return MaxSIMD( min, MinSIMD( max, in ) ); -} - -FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w) -{ - _MM_TRANSPOSE4_PS( x, y, z, w ); -} - -FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 &a ) -{ - // a is [x,y,z,G] (where G is garbage) - // rotate left by one - fltx4 compareOne = RotateLeft( a ); - // compareOne is [y,z,G,x] - fltx4 retval = MinSIMD( a, compareOne ); - // retVal is [min(x,y), ... ] - compareOne = RotateLeft2( a ); - // compareOne is [z, G, x, y] - retval = MinSIMD( retval, compareOne ); - // retVal = [ min(min(x,y),z)..] - // splat the x component out to the whole vector and return - return SplatXSIMD( retval ); - -} - -FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 &a ) -{ - // a is [x,y,z,G] (where G is garbage) - // rotate left by one - fltx4 compareOne = RotateLeft( a ); - // compareOne is [y,z,G,x] - fltx4 retval = MaxSIMD( a, compareOne ); - // retVal is [max(x,y), ... ] - compareOne = RotateLeft2( a ); - // compareOne is [z, G, x, y] - retval = MaxSIMD( retval, compareOne ); - // retVal = [ max(max(x,y),z)..] - // splat the x component out to the whole vector and return - return SplatXSIMD( retval ); - -} - -// ------------------------------------ -// INTEGER SIMD OPERATIONS. -// ------------------------------------ - - -#if 0 /* pc does not have these ops */ -// splat all components of a vector to a signed immediate int number. -FORCEINLINE fltx4 IntSetImmediateSIMD(int to) -{ - //CHRISG: SSE2 has this, but not SSE1. What to do? - fltx4 retval; - SubInt( retval, 0 ) = to; - SubInt( retval, 1 ) = to; - SubInt( retval, 2 ) = to; - SubInt( retval, 3 ) = to; - return retval; -} -#endif - -// Load 4 aligned words into a SIMD register -FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD) -{ - return _mm_load_ps( reinterpret_cast(pSIMD) ); -} - -// Load 4 unaligned words into a SIMD register -FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD) -{ - return _mm_loadu_ps( reinterpret_cast(pSIMD) ); -} - -// save into four words, 16-byte aligned -FORCEINLINE void StoreAlignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a ) -{ - _mm_store_ps( reinterpret_cast(pSIMD), a ); -} - -FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) -{ - _mm_store_ps( reinterpret_cast(pSIMD.Base()), a ); -} - -FORCEINLINE void StoreUnalignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a ) -{ - _mm_storeu_ps( reinterpret_cast(pSIMD), a ); -} - - -// CHRISG: the conversion functions all seem to operate on m64's only... -// how do we make them work here? - -// Take a fltx4 containing fixed-point uints and -// return them as single precision floats. No -// fixed point conversion is done. -FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA ) -{ - fltx4 retval; - SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) ); - SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) ); - SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) ); - SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) ); - return retval; -} - - -// Take a fltx4 containing fixed-point sints and -// return them as single precision floats. No -// fixed point conversion is done. -FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) -{ - fltx4 retval; - SubFloat( retval, 0 ) = ( (float) (reinterpret_cast(&vSrcA)[0])); - SubFloat( retval, 1 ) = ( (float) (reinterpret_cast(&vSrcA)[1])); - SubFloat( retval, 2 ) = ( (float) (reinterpret_cast(&vSrcA)[2])); - SubFloat( retval, 3 ) = ( (float) (reinterpret_cast(&vSrcA)[3])); - return retval; -} - -/* - works on fltx4's as if they are four uints. - the first parameter contains the words to be shifted, - the second contains the amount to shift by AS INTS - - for i = 0 to 3 - shift = vSrcB_i*32:(i*32)+4 - vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift -*/ -FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB) -{ - i32x4 retval; - SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0); - SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1); - SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2); - SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3); - - - return retval; -} - - -// Fixed-point conversion and save as SIGNED INTS. -// pDest->x = Int (vSrc.x) -// note: some architectures have means of doing -// fixed point conversion when the fix depth is -// specified as an immediate.. but there is no way -// to guarantee an immediate as a parameter to function -// like this. -FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) -{ - __m64 bottom = _mm_cvttps_pi32( vSrc ); - __m64 top = _mm_cvttps_pi32( _mm_movehl_ps(vSrc,vSrc) ); - - *reinterpret_cast<__m64 *>(&(*pDest)[0]) = bottom; - *reinterpret_cast<__m64 *>(&(*pDest)[2]) = top; - - _mm_empty(); -} - - - -#endif - - - -/// class FourVectors stores 4 independent vectors for use in SIMD processing. These vectors are -/// stored in the format x x x x y y y y z z z z so that they can be efficiently SIMD-accelerated. -class ALIGN16 FourVectors -{ -public: - fltx4 x, y, z; - - FORCEINLINE void DuplicateVector(Vector const &v) //< set all 4 vectors to the same vector value - { - x=ReplicateX4(v.x); - y=ReplicateX4(v.y); - z=ReplicateX4(v.z); - } - - FORCEINLINE fltx4 const & operator[](int idx) const - { - return *((&x)+idx); - } - - FORCEINLINE fltx4 & operator[](int idx) - { - return *((&x)+idx); - } - - FORCEINLINE void operator+=(FourVectors const &b) //< add 4 vectors to another 4 vectors - { - x=AddSIMD(x,b.x); - y=AddSIMD(y,b.y); - z=AddSIMD(z,b.z); - } - - FORCEINLINE void operator-=(FourVectors const &b) //< subtract 4 vectors from another 4 - { - x=SubSIMD(x,b.x); - y=SubSIMD(y,b.y); - z=SubSIMD(z,b.z); - } - - FORCEINLINE void operator*=(FourVectors const &b) //< scale all four vectors per component scale - { - x=MulSIMD(x,b.x); - y=MulSIMD(y,b.y); - z=MulSIMD(z,b.z); - } - - FORCEINLINE void operator*=(const fltx4 & scale) //< scale - { - x=MulSIMD(x,scale); - y=MulSIMD(y,scale); - z=MulSIMD(z,scale); - } - - FORCEINLINE void operator*=(float scale) //< uniformly scale all 4 vectors - { - fltx4 scalepacked = ReplicateX4(scale); - *this *= scalepacked; - } - - FORCEINLINE fltx4 operator*(FourVectors const &b) const //< 4 dot products - { - fltx4 dot=MulSIMD(x,b.x); - dot=MaddSIMD(y,b.y,dot); - dot=MaddSIMD(z,b.z,dot); - return dot; - } - - FORCEINLINE fltx4 operator*(Vector const &b) const //< dot product all 4 vectors with 1 vector - { - fltx4 dot=MulSIMD(x,ReplicateX4(b.x)); - dot=MaddSIMD(y,ReplicateX4(b.y), dot); - dot=MaddSIMD(z,ReplicateX4(b.z), dot); - return dot; - } - - FORCEINLINE void VProduct(FourVectors const &b) //< component by component mul - { - x=MulSIMD(x,b.x); - y=MulSIMD(y,b.y); - z=MulSIMD(z,b.z); - } - FORCEINLINE void MakeReciprocal(void) //< (x,y,z)=(1/x,1/y,1/z) - { - x=ReciprocalSIMD(x); - y=ReciprocalSIMD(y); - z=ReciprocalSIMD(z); - } - - FORCEINLINE void MakeReciprocalSaturate(void) //< (x,y,z)=(1/x,1/y,1/z), 1/0=1.0e23 - { - x=ReciprocalSaturateSIMD(x); - y=ReciprocalSaturateSIMD(y); - z=ReciprocalSaturateSIMD(z); - } - - // Assume the given matrix is a rotation, and rotate these vectors by it. - // If you have a long list of FourVectors structures that you all want - // to rotate by the same matrix, use FourVectors::RotateManyBy() instead. - inline void RotateBy(const matrix3x4_t& matrix); - - /// You can use this to rotate a long array of FourVectors all by the same - /// matrix. The first parameter is the head of the array. The second is the - /// number of vectors to rotate. The third is the matrix. - static void RotateManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix ); - - /// Assume the vectors are points, and transform them in place by the matrix. - inline void TransformBy(const matrix3x4_t& matrix); - - /// You can use this to Transform a long array of FourVectors all by the same - /// matrix. The first parameter is the head of the array. The second is the - /// number of vectors to rotate. The third is the matrix. The fourth is the - /// output buffer, which must not overlap the pVectors buffer. This is not - /// an in-place transformation. - static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix, FourVectors * RESTRICT pOut ); - - /// You can use this to Transform a long array of FourVectors all by the same - /// matrix. The first parameter is the head of the array. The second is the - /// number of vectors to rotate. The third is the matrix. The fourth is the - /// output buffer, which must not overlap the pVectors buffer. - /// This is an in-place transformation. - static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix ); - - // X(),Y(),Z() - get at the desired component of the i'th (0..3) vector. - FORCEINLINE const float & X(int idx) const - { - // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) - return SubFloat( (fltx4 &)x, idx ); - } - - FORCEINLINE const float & Y(int idx) const - { - return SubFloat( (fltx4 &)y, idx ); - } - - FORCEINLINE const float & Z(int idx) const - { - return SubFloat( (fltx4 &)z, idx ); - } - - FORCEINLINE float & X(int idx) - { - return SubFloat( x, idx ); - } - - FORCEINLINE float & Y(int idx) - { - return SubFloat( y, idx ); - } - - FORCEINLINE float & Z(int idx) - { - return SubFloat( z, idx ); - } - - FORCEINLINE Vector Vec(int idx) const //< unpack one of the vectors - { - return Vector( X(idx), Y(idx), Z(idx) ); - } - - FourVectors(void) - { - } - - FourVectors( FourVectors const &src ) - { - x=src.x; - y=src.y; - z=src.z; - } - - FORCEINLINE void operator=( FourVectors const &src ) - { - x=src.x; - y=src.y; - z=src.z; - } - - /// LoadAndSwizzle - load 4 Vectors into a FourVectors, performing transpose op - FORCEINLINE void LoadAndSwizzle(Vector const &a, Vector const &b, Vector const &c, Vector const &d) - { - // TransposeSIMD has large sub-expressions that the compiler can't eliminate on x360 - // use an unfolded implementation here -#if _X360 - fltx4 tx = LoadUnalignedSIMD( &a.x ); - fltx4 ty = LoadUnalignedSIMD( &b.x ); - fltx4 tz = LoadUnalignedSIMD( &c.x ); - fltx4 tw = LoadUnalignedSIMD( &d.x ); - fltx4 r0 = __vmrghw(tx, tz); - fltx4 r1 = __vmrghw(ty, tw); - fltx4 r2 = __vmrglw(tx, tz); - fltx4 r3 = __vmrglw(ty, tw); - - x = __vmrghw(r0, r1); - y = __vmrglw(r0, r1); - z = __vmrghw(r2, r3); -#else - x = LoadUnalignedSIMD( &( a.x )); - y = LoadUnalignedSIMD( &( b.x )); - z = LoadUnalignedSIMD( &( c.x )); - fltx4 w = LoadUnalignedSIMD( &( d.x )); - // now, matrix is: - // x y z ? - // x y z ? - // x y z ? - // x y z ? - TransposeSIMD(x, y, z, w); -#endif - } - - /// LoadAndSwizzleAligned - load 4 Vectors into a FourVectors, performing transpose op. - /// all 4 vectors must be 128 bit boundary - FORCEINLINE void LoadAndSwizzleAligned(const float *RESTRICT a, const float *RESTRICT b, const float *RESTRICT c, const float *RESTRICT d) - { -#if _X360 - fltx4 tx = LoadAlignedSIMD(a); - fltx4 ty = LoadAlignedSIMD(b); - fltx4 tz = LoadAlignedSIMD(c); - fltx4 tw = LoadAlignedSIMD(d); - fltx4 r0 = __vmrghw(tx, tz); - fltx4 r1 = __vmrghw(ty, tw); - fltx4 r2 = __vmrglw(tx, tz); - fltx4 r3 = __vmrglw(ty, tw); - - x = __vmrghw(r0, r1); - y = __vmrglw(r0, r1); - z = __vmrghw(r2, r3); -#else - x = LoadAlignedSIMD( a ); - y = LoadAlignedSIMD( b ); - z = LoadAlignedSIMD( c ); - fltx4 w = LoadAlignedSIMD( d ); - // now, matrix is: - // x y z ? - // x y z ? - // x y z ? - // x y z ? - TransposeSIMD( x, y, z, w ); -#endif - } - - FORCEINLINE void LoadAndSwizzleAligned(Vector const &a, Vector const &b, Vector const &c, Vector const &d) - { - LoadAndSwizzleAligned( &a.x, &b.x, &c.x, &d.x ); - } - - /// return the squared length of all 4 vectors - FORCEINLINE fltx4 length2(void) const - { - return (*this)*(*this); - } - - /// return the approximate length of all 4 vectors. uses the sqrt approximation instruction - FORCEINLINE fltx4 length(void) const - { - return SqrtEstSIMD(length2()); - } - - /// normalize all 4 vectors in place. not mega-accurate (uses reciprocal approximation instruction) - FORCEINLINE void VectorNormalizeFast(void) - { - fltx4 mag_sq=(*this)*(*this); // length^2 - (*this) *= ReciprocalSqrtEstSIMD(mag_sq); // *(1.0/sqrt(length^2)) - } - - /// normalize all 4 vectors in place. - FORCEINLINE void VectorNormalize(void) - { - fltx4 mag_sq=(*this)*(*this); // length^2 - (*this) *= ReciprocalSqrtSIMD(mag_sq); // *(1.0/sqrt(length^2)) - } - - /// construct a FourVectors from 4 separate Vectors - FORCEINLINE FourVectors(Vector const &a, Vector const &b, Vector const &c, Vector const &d) - { - LoadAndSwizzle(a,b,c,d); - } - - /// construct a FourVectors from 4 separate Vectors - FORCEINLINE FourVectors(VectorAligned const &a, VectorAligned const &b, VectorAligned const &c, VectorAligned const &d) - { - LoadAndSwizzleAligned(a,b,c,d); - } - - FORCEINLINE fltx4 DistToSqr( FourVectors const &pnt ) - { - fltx4 fl4dX = SubSIMD( pnt.x, x ); - fltx4 fl4dY = SubSIMD( pnt.y, y ); - fltx4 fl4dZ = SubSIMD( pnt.z, z ); - return AddSIMD( MulSIMD( fl4dX, fl4dX), AddSIMD( MulSIMD( fl4dY, fl4dY ), MulSIMD( fl4dZ, fl4dZ ) ) ); - - } - - FORCEINLINE fltx4 TValueOfClosestPointOnLine( FourVectors const &p0, FourVectors const &p1 ) const - { - FourVectors lineDelta = p1; - lineDelta -= p0; - fltx4 OOlineDirDotlineDir = ReciprocalSIMD( p1 * p1 ); - FourVectors v4OurPnt = *this; - v4OurPnt -= p0; - return MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta ); - } - - FORCEINLINE fltx4 DistSqrToLineSegment( FourVectors const &p0, FourVectors const &p1 ) const - { - FourVectors lineDelta = p1; - FourVectors v4OurPnt = *this; - v4OurPnt -= p0; - lineDelta -= p0; - - fltx4 OOlineDirDotlineDir = ReciprocalSIMD( lineDelta * lineDelta ); - - fltx4 fl4T = MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta ); - - fl4T = MinSIMD( fl4T, Four_Ones ); - fl4T = MaxSIMD( fl4T, Four_Zeros ); - lineDelta *= fl4T; - return v4OurPnt.DistToSqr( lineDelta ); - } - -}; - -/// form 4 cross products -inline FourVectors operator ^(const FourVectors &a, const FourVectors &b) -{ - FourVectors ret; - ret.x=SubSIMD(MulSIMD(a.y,b.z),MulSIMD(a.z,b.y)); - ret.y=SubSIMD(MulSIMD(a.z,b.x),MulSIMD(a.x,b.z)); - ret.z=SubSIMD(MulSIMD(a.x,b.y),MulSIMD(a.y,b.x)); - return ret; -} - -/// component-by-componentwise MAX operator -inline FourVectors maximum(const FourVectors &a, const FourVectors &b) -{ - FourVectors ret; - ret.x=MaxSIMD(a.x,b.x); - ret.y=MaxSIMD(a.y,b.y); - ret.z=MaxSIMD(a.z,b.z); - return ret; -} - -/// component-by-componentwise MIN operator -inline FourVectors minimum(const FourVectors &a, const FourVectors &b) -{ - FourVectors ret; - ret.x=MinSIMD(a.x,b.x); - ret.y=MinSIMD(a.y,b.y); - ret.z=MinSIMD(a.z,b.z); - return ret; -} - -/// calculate reflection vector. incident and normal dir assumed normalized -FORCEINLINE FourVectors VectorReflect( const FourVectors &incident, const FourVectors &normal ) -{ - FourVectors ret = incident; - fltx4 iDotNx2 = incident * normal; - iDotNx2 = AddSIMD( iDotNx2, iDotNx2 ); - FourVectors nPart = normal; - nPart *= iDotNx2; - ret -= nPart; // i-2(n*i)n - return ret; -} - -/// calculate slide vector. removes all components of a vector which are perpendicular to a normal vector. -FORCEINLINE FourVectors VectorSlide( const FourVectors &incident, const FourVectors &normal ) -{ - FourVectors ret = incident; - fltx4 iDotN = incident * normal; - FourVectors nPart = normal; - nPart *= iDotN; - ret -= nPart; // i-(n*i)n - return ret; -} - - -// Assume the given matrix is a rotation, and rotate these vectors by it. -// If you have a long list of FourVectors structures that you all want -// to rotate by the same matrix, use FourVectors::RotateManyBy() instead. -void FourVectors::RotateBy(const matrix3x4_t& matrix) -{ - // Splat out each of the entries in the matrix to a fltx4. Do this - // in the order that we will need them, to hide latency. I'm - // avoiding making an array of them, so that they'll remain in - // registers. - fltx4 matSplat00, matSplat01, matSplat02, - matSplat10, matSplat11, matSplat12, - matSplat20, matSplat21, matSplat22; - - { - // Load the matrix into local vectors. Sadly, matrix3x4_ts are - // often unaligned. The w components will be the tranpose row of - // the matrix, but we don't really care about that. - fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] ); - fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] ); - fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] ); - - matSplat00 = SplatXSIMD( matCol0 ); - matSplat01 = SplatYSIMD( matCol0 ); - matSplat02 = SplatZSIMD( matCol0 ); - - matSplat10 = SplatXSIMD( matCol1 ); - matSplat11 = SplatYSIMD( matCol1 ); - matSplat12 = SplatZSIMD( matCol1 ); - - matSplat20 = SplatXSIMD( matCol2 ); - matSplat21 = SplatYSIMD( matCol2 ); - matSplat22 = SplatZSIMD( matCol2 ); - } - - // Trust in the compiler to schedule these operations correctly: - fltx4 outX, outY, outZ; - outX = AddSIMD( AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ), MulSIMD( z, matSplat02 ) ); - outY = AddSIMD( AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ), MulSIMD( z, matSplat12 ) ); - outZ = AddSIMD( AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ), MulSIMD( z, matSplat22 ) ); - - x = outX; - y = outY; - z = outZ; -} - -// Assume the given matrix is a rotation, and rotate these vectors by it. -// If you have a long list of FourVectors structures that you all want -// to rotate by the same matrix, use FourVectors::RotateManyBy() instead. -void FourVectors::TransformBy(const matrix3x4_t& matrix) -{ - // Splat out each of the entries in the matrix to a fltx4. Do this - // in the order that we will need them, to hide latency. I'm - // avoiding making an array of them, so that they'll remain in - // registers. - fltx4 matSplat00, matSplat01, matSplat02, - matSplat10, matSplat11, matSplat12, - matSplat20, matSplat21, matSplat22; - - { - // Load the matrix into local vectors. Sadly, matrix3x4_ts are - // often unaligned. The w components will be the tranpose row of - // the matrix, but we don't really care about that. - fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] ); - fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] ); - fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] ); - - matSplat00 = SplatXSIMD( matCol0 ); - matSplat01 = SplatYSIMD( matCol0 ); - matSplat02 = SplatZSIMD( matCol0 ); - - matSplat10 = SplatXSIMD( matCol1 ); - matSplat11 = SplatYSIMD( matCol1 ); - matSplat12 = SplatZSIMD( matCol1 ); - - matSplat20 = SplatXSIMD( matCol2 ); - matSplat21 = SplatYSIMD( matCol2 ); - matSplat22 = SplatZSIMD( matCol2 ); - } - - // Trust in the compiler to schedule these operations correctly: - fltx4 outX, outY, outZ; - - outX = MaddSIMD( z, matSplat02, AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ) ); - outY = MaddSIMD( z, matSplat12, AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ) ); - outZ = MaddSIMD( z, matSplat22, AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ) ); - - x = AddSIMD( outX, ReplicateX4( matrix[0][3] )); - y = AddSIMD( outY, ReplicateX4( matrix[1][3] )); - z = AddSIMD( outZ, ReplicateX4( matrix[2][3] )); -} - - - -/// quick, low quality perlin-style noise() function suitable for real time use. -/// return value is -1..1. Only reliable around +/- 1 million or so. -fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z ); -fltx4 NoiseSIMD( FourVectors const &v ); - -// vector valued noise direction -FourVectors DNoiseSIMD( FourVectors const &v ); - -// vector value "curl" noise function. see http://hyperphysics.phy-astr.gsu.edu/hbase/curl.html -FourVectors CurlNoiseSIMD( FourVectors const &v ); - - -/// calculate the absolute value of a packed single -inline fltx4 fabs( const fltx4 & x ) -{ - return AndSIMD( x, LoadAlignedSIMD( g_SIMD_clear_signmask ) ); -} - -/// negate all four components of a SIMD packed single -inline fltx4 fnegate( const fltx4 & x ) -{ - return XorSIMD( x, LoadAlignedSIMD( g_SIMD_signmask ) ); -} - - -fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent); - -// PowSIMD - raise a SIMD register to a power. This is analogous to the C pow() function, with some -// restictions: fractional exponents are only handled with 2 bits of precision. Basically, -// fractions of 0,.25,.5, and .75 are handled. PowSIMD(x,.30) will be the same as PowSIMD(x,.25). -// negative and fractional powers are handled by the SIMD reciprocal and square root approximation -// instructions and so are not especially accurate ----Note that this routine does not raise -// numeric exceptions because it uses SIMD--- This routine is O(log2(exponent)). -inline fltx4 PowSIMD( const fltx4 & x, float exponent ) -{ - return Pow_FixedPoint_Exponent_SIMD(x,(int) (4.0*exponent)); -} - - - -// random number generation - generate 4 random numbers quickly. - -void SeedRandSIMD(uint32 seed); // seed the random # generator -fltx4 RandSIMD( int nContext = 0 ); // return 4 numbers in the 0..1 range - -// for multithreaded, you need to use these and use the argument form of RandSIMD: -int GetSIMDRandContext( void ); -void ReleaseSIMDRandContext( int nContext ); - -FORCEINLINE fltx4 RandSignedSIMD( void ) // -1..1 -{ - return SubSIMD( MulSIMD( Four_Twos, RandSIMD() ), Four_Ones ); -} - - -// SIMD versions of mathlib simplespline functions -// hermite basis function for smooth interpolation -// Similar to Gain() above, but very cheap to call -// value should be between 0 & 1 inclusive -inline fltx4 SimpleSpline( const fltx4 & value ) -{ - // Arranged to avoid a data dependency between these two MULs: - fltx4 valueDoubled = MulSIMD( value, Four_Twos ); - fltx4 valueSquared = MulSIMD( value, value ); - - // Nice little ease-in, ease-out spline-like curve - return SubSIMD( - MulSIMD( Four_Threes, valueSquared ), - MulSIMD( valueDoubled, valueSquared ) ); -} - -// remaps a value in [startInterval, startInterval+rangeInterval] from linear to -// spline using SimpleSpline -inline fltx4 SimpleSplineRemapValWithDeltas( const fltx4 & val, - const fltx4 & A, const fltx4 & BMinusA, - const fltx4 & OneOverBMinusA, const fltx4 & C, - const fltx4 & DMinusC ) -{ -// if ( A == B ) -// return val >= B ? D : C; - fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA ); - return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) ); -} - -inline fltx4 SimpleSplineRemapValWithDeltasClamped( const fltx4 & val, - const fltx4 & A, const fltx4 & BMinusA, - const fltx4 & OneOverBMinusA, const fltx4 & C, - const fltx4 & DMinusC ) -{ -// if ( A == B ) -// return val >= B ? D : C; - fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA ); - cVal = MinSIMD( Four_Ones, MaxSIMD( Four_Zeros, cVal ) ); - return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) ); -} - -FORCEINLINE fltx4 FracSIMD( const fltx4 &val ) -{ - fltx4 fl4Abs = fabs( val ); - fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s ); - ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival ); - return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits -} - -FORCEINLINE fltx4 Mod2SIMD( const fltx4 &val ) -{ - fltx4 fl4Abs = fabs( val ); - fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( (float *) g_SIMD_lsbmask ), AddSIMD( fl4Abs, Four_2ToThe23s ) ), Four_2ToThe23s ); - ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Twos ), ival ); - return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits -} - -FORCEINLINE fltx4 Mod2SIMDPositiveInput( const fltx4 &val ) -{ - fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( g_SIMD_lsbmask ), AddSIMD( val, Four_2ToThe23s ) ), Four_2ToThe23s ); - ival = MaskedAssign( CmpGtSIMD( ival, val ), SubSIMD( ival, Four_Twos ), ival ); - return SubSIMD( val, ival ); -} - - -// approximate sin of an angle, with -1..1 representing the whole sin wave period instead of -pi..pi. -// no range reduction is done - for values outside of 0..1 you won't like the results -FORCEINLINE fltx4 _SinEst01SIMD( const fltx4 &val ) -{ - // really rough approximation - x*(4-x*4) - a parabola. s(0) = 0, s(.5) = 1, s(1)=0, smooth in-between. - // sufficient for simple oscillation. - return MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) ); -} - -FORCEINLINE fltx4 _Sin01SIMD( const fltx4 &val ) -{ - // not a bad approximation : parabola always over-estimates. Squared parabola always - // underestimates. So lets blend between them: goodsin = badsin + .225*( badsin^2-badsin) - fltx4 fl4BadEst = MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) ); - return AddSIMD( MulSIMD( Four_Point225s, SubSIMD( MulSIMD( fl4BadEst, fl4BadEst ), fl4BadEst ) ), fl4BadEst ); -} - -// full range useable implementations -FORCEINLINE fltx4 SinEst01SIMD( const fltx4 &val ) -{ - fltx4 fl4Abs = fabs( val ); - fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs ); - fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones ); - fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) ); - fltx4 fl4Sin = _SinEst01SIMD( fl4val ); - fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) ); - return fl4Sin; - -} - -FORCEINLINE fltx4 Sin01SIMD( const fltx4 &val ) -{ - fltx4 fl4Abs = fabs( val ); - fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs ); - fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones ); - fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) ); - fltx4 fl4Sin = _Sin01SIMD( fl4val ); - fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) ); - return fl4Sin; - -} - -// Schlick style Bias approximation see graphics gems 4 : bias(t,a)= t/( (1/a-2)*(1-t)+1) - -FORCEINLINE fltx4 PreCalcBiasParameter( const fltx4 &bias_parameter ) -{ - // convert perlin-style-bias parameter to the value right for the approximation - return SubSIMD( ReciprocalSIMD( bias_parameter ), Four_Twos ); -} - -FORCEINLINE fltx4 BiasSIMD( const fltx4 &val, const fltx4 &precalc_param ) -{ - // similar to bias function except pass precalced bias value from calling PreCalcBiasParameter. - - //!!speed!! use reciprocal est? - //!!speed!! could save one op by precalcing _2_ values - return DivSIMD( val, AddSIMD( MulSIMD( precalc_param, SubSIMD( Four_Ones, val ) ), Four_Ones ) ); -} - -//----------------------------------------------------------------------------- -// Box/plane test -// NOTE: The w component of emins + emaxs must be 1 for this to work -//----------------------------------------------------------------------------- -FORCEINLINE int BoxOnPlaneSideSIMD( const fltx4& emins, const fltx4& emaxs, const cplane_t *p, float tolerance = 0.f ) -{ - fltx4 corners[2]; - fltx4 normal = LoadUnalignedSIMD( p->normal.Base() ); - fltx4 dist = ReplicateX4( -p->dist ); - normal = SetWSIMD( normal, dist ); - fltx4 t4 = ReplicateX4( tolerance ); - fltx4 negt4 = ReplicateX4( -tolerance ); - fltx4 cmp = CmpGeSIMD( normal, Four_Zeros ); - corners[0] = MaskedAssign( cmp, emaxs, emins ); - corners[1] = MaskedAssign( cmp, emins, emaxs ); - fltx4 dot1 = Dot4SIMD( normal, corners[0] ); - fltx4 dot2 = Dot4SIMD( normal, corners[1] ); - cmp = CmpGeSIMD( dot1, t4 ); - fltx4 cmp2 = CmpGtSIMD( negt4, dot2 ); - fltx4 result = MaskedAssign( cmp, Four_Ones, Four_Zeros ); - fltx4 result2 = MaskedAssign( cmp2, Four_Twos, Four_Zeros ); - result = AddSIMD( result, result2 ); - intx4 sides; - ConvertStoreAsIntsSIMD( &sides, result ); - return sides[0]; -} - -#endif // _ssemath_h +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: - defines SIMD "structure of arrays" classes and functions. +// +//===========================================================================// +#ifndef SSEMATH_H +#define SSEMATH_H + +#if defined( _X360 ) +#include +#else +#include +#endif + +#include +#include + +#if defined(GNUC) +#define USE_STDC_FOR_SIMD 0 +#else +#define USE_STDC_FOR_SIMD 0 +#endif + +#if (!defined(_X360) && (USE_STDC_FOR_SIMD == 0)) +#define _SSE1 1 +#endif + +// I thought about defining a class/union for the SIMD packed floats instead of using fltx4, +// but decided against it because (a) the nature of SIMD code which includes comparisons is to blur +// the relationship between packed floats and packed integer types and (b) not sure that the +// compiler would handle generating good code for the intrinsics. + +#if USE_STDC_FOR_SIMD + +typedef union +{ + float m128_f32[4]; + uint32 m128_u32[4]; +} fltx4; + +typedef fltx4 i32x4; +typedef fltx4 u32x4; + +#elif ( defined( _X360 ) ) + +typedef union +{ + // This union allows float/int access (which generally shouldn't be done in inner loops) + __vector4 vmx; + float m128_f32[4]; + uint32 m128_u32[4]; +} fltx4_union; + +typedef __vector4 fltx4; +typedef __vector4 i32x4; // a VMX register; just a way of making it explicit that we're doing integer ops. +typedef __vector4 u32x4; // a VMX register; just a way of making it explicit that we're doing unsigned integer ops. + +#else + +typedef __m128 fltx4; +typedef __m128 i32x4; +typedef __m128 u32x4; + +#endif + +// The FLTX4 type is a fltx4 used as a parameter to a function. +// On the 360, the best way to do this is pass-by-copy on the registers. +// On the PC, the best way is to pass by const reference. +// The compiler will sometimes, but not always, replace a pass-by-const-ref +// with a pass-in-reg on the 360; to avoid this confusion, you can +// explicitly use a FLTX4 as the parameter type. +#ifdef _X360 +typedef __vector4 FLTX4; +#else +typedef const fltx4 & FLTX4; +#endif + +// A 16-byte aligned int32 datastructure +// (for use when writing out fltx4's as SIGNED +// ints). +struct ALIGN16 intx4 +{ + int32 m_i32[4]; + + inline int & operator[](int which) + { + return m_i32[which]; + } + + inline const int & operator[](int which) const + { + return m_i32[which]; + } + + inline int32 *Base() { + return m_i32; + } + + inline const int32 *Base() const + { + return m_i32; + } + + inline const bool operator==(const intx4 &other) const + { + return m_i32[0] == other.m_i32[0] && + m_i32[1] == other.m_i32[1] && + m_i32[2] == other.m_i32[2] && + m_i32[3] == other.m_i32[3] ; + } +} ALIGN16_POST; + + +#if defined( _DEBUG ) && defined( _X360 ) +FORCEINLINE void TestVPUFlags() +{ + // Check that the VPU is in the appropriate (Java-compliant) mode (see 3.2.1 in altivec_pem.pdf on xds.xbox.com) + __vector4 a; + __asm + { + mfvscr a; + } + unsigned int * flags = (unsigned int *)&a; + unsigned int controlWord = flags[3]; + Assert(controlWord == 0); +} +#else // _DEBUG +FORCEINLINE void TestVPUFlags() {} +#endif // _DEBUG + + +// useful constants in SIMD packed float format: +// (note: some of these aren't stored on the 360, +// but are manufactured directly in one or two +// instructions, saving a load and possible L2 +// miss.) +#ifndef _X360 +extern const fltx4 Four_Zeros; // 0 0 0 0 +extern const fltx4 Four_Ones; // 1 1 1 1 +extern const fltx4 Four_Twos; // 2 2 2 2 +extern const fltx4 Four_Threes; // 3 3 3 3 +extern const fltx4 Four_Fours; // guess. +extern const fltx4 Four_Point225s; // .225 .225 .225 .225 +extern const fltx4 Four_PointFives; // .5 .5 .5 .5 +extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON +extern const fltx4 Four_2ToThe21s; // (1<<21).. +extern const fltx4 Four_2ToThe22s; // (1<<22).. +extern const fltx4 Four_2ToThe23s; // (1<<23).. +extern const fltx4 Four_2ToThe24s; // (1<<24).. +extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2) +extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1 +#else +#define Four_Zeros XMVectorZero() // 0 0 0 0 +#define Four_Ones XMVectorSplatOne() // 1 1 1 1 +extern const fltx4 Four_Twos; // 2 2 2 2 +extern const fltx4 Four_Threes; // 3 3 3 3 +extern const fltx4 Four_Fours; // guess. +extern const fltx4 Four_Point225s; // .225 .225 .225 .225 +extern const fltx4 Four_PointFives; // .5 .5 .5 .5 +extern const fltx4 Four_Epsilons; // FLT_EPSILON FLT_EPSILON FLT_EPSILON FLT_EPSILON +extern const fltx4 Four_2ToThe21s; // (1<<21).. +extern const fltx4 Four_2ToThe22s; // (1<<22).. +extern const fltx4 Four_2ToThe23s; // (1<<23).. +extern const fltx4 Four_2ToThe24s; // (1<<24).. +extern const fltx4 Four_Origin; // 0 0 0 1 (origin point, like vr0 on the PS2) +extern const fltx4 Four_NegativeOnes; // -1 -1 -1 -1 +#endif +extern const fltx4 Four_FLT_MAX; // FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX +extern const fltx4 Four_Negative_FLT_MAX; // -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX +extern const fltx4 g_SIMD_0123; // 0 1 2 3 as float + +// external aligned integer constants +extern const ALIGN16 int32 g_SIMD_clear_signmask[] ALIGN16_POST; // 0x7fffffff x 4 +extern const ALIGN16 int32 g_SIMD_signmask[] ALIGN16_POST; // 0x80000000 x 4 +extern const ALIGN16 int32 g_SIMD_lsbmask[] ALIGN16_POST; // 0xfffffffe x 4 +extern const ALIGN16 int32 g_SIMD_clear_wmask[] ALIGN16_POST; // -1 -1 -1 0 +extern const ALIGN16 int32 g_SIMD_ComponentMask[4][4] ALIGN16_POST; // [0xFFFFFFFF 0 0 0], [0 0xFFFFFFFF 0 0], [0 0 0xFFFFFFFF 0], [0 0 0 0xFFFFFFFF] +extern const ALIGN16 int32 g_SIMD_AllOnesMask[] ALIGN16_POST; // ~0,~0,~0,~0 +extern const ALIGN16 int32 g_SIMD_Low16BitsMask[] ALIGN16_POST; // 0xffff x 4 + +// this mask is used for skipping the tail of things. If you have N elements in an array, and wish +// to mask out the tail, g_SIMD_SkipTailMask[N & 3] what you want to use for the last iteration. +extern const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST; + +// Define prefetch macros. +// The characteristics of cache and prefetch are completely +// different between the different platforms, so you DO NOT +// want to just define one macro that maps to every platform +// intrinsic under the hood -- you need to prefetch at different +// intervals between x86 and PPC, for example, and that is +// a higher level code change. +// On the other hand, I'm tired of typing #ifdef _X360 +// all over the place, so this is just a nop on Intel, PS3. +#ifdef _X360 +#define PREFETCH360(address, offset) __dcbt(offset,address) +#else +#define PREFETCH360(x,y) // nothing +#endif + +#if USE_STDC_FOR_SIMD + +//--------------------------------------------------------------------- +// Standard C (fallback/Linux) implementation (only there for compat - slow) +//--------------------------------------------------------------------- + +FORCEINLINE float SubFloat( const fltx4 & a, int idx ) +{ + return a.m128_f32[ idx ]; +} + +FORCEINLINE float & SubFloat( fltx4 & a, int idx ) +{ + return a.m128_f32[idx]; +} + +FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) +{ + return a.m128_u32[idx]; +} + +FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) +{ + return a.m128_u32[idx]; +} + +// Return one in the fastest way -- on the x360, faster even than loading. +FORCEINLINE fltx4 LoadZeroSIMD( void ) +{ + return Four_Zeros; +} + +// Return one in the fastest way -- on the x360, faster even than loading. +FORCEINLINE fltx4 LoadOneSIMD( void ) +{ + return Four_Ones; +} + +FORCEINLINE fltx4 SplatXSIMD( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 0 ); + SubFloat( retVal, 1 ) = SubFloat( a, 0 ); + SubFloat( retVal, 2 ) = SubFloat( a, 0 ); + SubFloat( retVal, 3 ) = SubFloat( a, 0 ); + return retVal; +} + +FORCEINLINE fltx4 SplatYSIMD( fltx4 a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 1 ); + SubFloat( retVal, 1 ) = SubFloat( a, 1 ); + SubFloat( retVal, 2 ) = SubFloat( a, 1 ); + SubFloat( retVal, 3 ) = SubFloat( a, 1 ); + return retVal; +} + +FORCEINLINE fltx4 SplatZSIMD( fltx4 a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 2 ); + SubFloat( retVal, 1 ) = SubFloat( a, 2 ); + SubFloat( retVal, 2 ) = SubFloat( a, 2 ); + SubFloat( retVal, 3 ) = SubFloat( a, 2 ); + return retVal; +} + +FORCEINLINE fltx4 SplatWSIMD( fltx4 a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 3 ); + SubFloat( retVal, 1 ) = SubFloat( a, 3 ); + SubFloat( retVal, 2 ) = SubFloat( a, 3 ); + SubFloat( retVal, 3 ) = SubFloat( a, 3 ); + return retVal; +} + +FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) +{ + fltx4 result = a; + SubFloat( result, 0 ) = SubFloat( x, 0 ); + return result; +} + +FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) +{ + fltx4 result = a; + SubFloat( result, 1 ) = SubFloat( y, 1 ); + return result; +} + +FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) +{ + fltx4 result = a; + SubFloat( result, 2 ) = SubFloat( z, 2 ); + return result; +} + +FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) +{ + fltx4 result = a; + SubFloat( result, 3 ) = SubFloat( w, 3 ); + return result; +} + +FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) +{ + fltx4 result = a; + SubFloat( result, nComponent ) = flValue; + return result; +} + +// a b c d -> b c d a +FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 1 ); + SubFloat( retVal, 1 ) = SubFloat( a, 2 ); + SubFloat( retVal, 2 ) = SubFloat( a, 3 ); + SubFloat( retVal, 3 ) = SubFloat( a, 0 ); + return retVal; +} + +// a b c d -> c d a b +FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = SubFloat( a, 2 ); + SubFloat( retVal, 1 ) = SubFloat( a, 3 ); + SubFloat( retVal, 2 ) = SubFloat( a, 0 ); + SubFloat( retVal, 3 ) = SubFloat( a, 1 ); + return retVal; +} + +#define BINOP(op) \ + fltx4 retVal; \ + SubFloat( retVal, 0 ) = ( SubFloat( a, 0 ) op SubFloat( b, 0 ) ); \ + SubFloat( retVal, 1 ) = ( SubFloat( a, 1 ) op SubFloat( b, 1 ) ); \ + SubFloat( retVal, 2 ) = ( SubFloat( a, 2 ) op SubFloat( b, 2 ) ); \ + SubFloat( retVal, 3 ) = ( SubFloat( a, 3 ) op SubFloat( b, 3 ) ); \ + return retVal; + +#define IBINOP(op) \ + fltx4 retVal; \ + SubInt( retVal, 0 ) = ( SubInt( a, 0 ) op SubInt ( b, 0 ) ); \ + SubInt( retVal, 1 ) = ( SubInt( a, 1 ) op SubInt ( b, 1 ) ); \ + SubInt( retVal, 2 ) = ( SubInt( a, 2 ) op SubInt ( b, 2 ) ); \ + SubInt( retVal, 3 ) = ( SubInt( a, 3 ) op SubInt ( b, 3 ) ); \ + return retVal; + +FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) +{ + BINOP(+); +} + +FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b +{ + BINOP(-); +}; + +FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b +{ + BINOP(*); +} + +FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b +{ + BINOP(/); +} + + +FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c +{ + return AddSIMD( MulSIMD(a,b), c ); +} + +FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b +{ + return SubSIMD( c, MulSIMD(a,b) ); +}; + + +FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) +{ + fltx4 result; + SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) ); + SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) ); + SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) ); + SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) ); + return result; +} + +FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); + SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); + SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); +} + +FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); + SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); + SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); + SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) ); +} + +FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) +{ + fltx4 result; + SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) ); + SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) ); + SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) ); + SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) ); + return result; +} + +FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) +{ + fltx4 result; + SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) ); + SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) ); + SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) ); + SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) ); + return result; +} + +// tan^1(a/b) .. ie, pass sin in as a and cos in as b +FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) +{ + fltx4 result; + SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) ); + SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) ); + SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) ); + SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) ); + return result; +} + +FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = max( SubFloat( a, 0 ), SubFloat( b, 0 ) ); + SubFloat( retVal, 1 ) = max( SubFloat( a, 1 ), SubFloat( b, 1 ) ); + SubFloat( retVal, 2 ) = max( SubFloat( a, 2 ), SubFloat( b, 2 ) ); + SubFloat( retVal, 3 ) = max( SubFloat( a, 3 ), SubFloat( b, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = min( SubFloat( a, 0 ), SubFloat( b, 0 ) ); + SubFloat( retVal, 1 ) = min( SubFloat( a, 1 ), SubFloat( b, 1 ) ); + SubFloat( retVal, 2 ) = min( SubFloat( a, 2 ), SubFloat( b, 2 ) ); + SubFloat( retVal, 3 ) = min( SubFloat( a, 3 ), SubFloat( b, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b +{ + IBINOP(&); +} + +FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ~SubInt( a, 0 ) & SubInt( b, 0 ); + SubInt( retVal, 1 ) = ~SubInt( a, 1 ) & SubInt( b, 1 ); + SubInt( retVal, 2 ) = ~SubInt( a, 2 ) & SubInt( b, 2 ); + SubInt( retVal, 3 ) = ~SubInt( a, 3 ) & SubInt( b, 3 ); + return retVal; +} + +FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b +{ + IBINOP(^); +} + +FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b +{ + IBINOP(|); +} + +FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a +{ + fltx4 retval; + SubFloat( retval, 0 ) = -SubFloat( a, 0 ); + SubFloat( retval, 1 ) = -SubFloat( a, 1 ); + SubFloat( retval, 2 ) = -SubFloat( a, 2 ); + SubFloat( retval, 3 ) = -SubFloat( a, 3 ); + + return retval; +} + +FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero? +{ + return ( SubFloat( a, 0 ) == 0.0 ) && + ( SubFloat( a, 1 ) == 0.0 ) && + ( SubFloat( a, 2 ) == 0.0 ) && + ( SubFloat( a, 3 ) == 0.0 ) ; +} + + +// for branching when a.xyzw > b.xyzw +FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) +{ + return SubFloat(a,0) > SubFloat(b,0) && + SubFloat(a,1) > SubFloat(b,1) && + SubFloat(a,2) > SubFloat(b,2) && + SubFloat(a,3) > SubFloat(b,3); +} + +// for branching when a.xyzw >= b.xyzw +FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) +{ + return SubFloat(a,0) >= SubFloat(b,0) && + SubFloat(a,1) >= SubFloat(b,1) && + SubFloat(a,2) >= SubFloat(b,2) && + SubFloat(a,3) >= SubFloat(b,3); +} + +// For branching if all a.xyzw == b.xyzw +FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) +{ + return SubFloat(a,0) == SubFloat(b,0) && + SubFloat(a,1) == SubFloat(b,1) && + SubFloat(a,2) == SubFloat(b,2) && + SubFloat(a,3) == SubFloat(b,3); +} + +FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set +{ + int nRet = 0; + + nRet |= ( SubInt( a, 0 ) & 0x80000000 ) >> 31; // sign(x) -> bit 0 + nRet |= ( SubInt( a, 1 ) & 0x80000000 ) >> 30; // sign(y) -> bit 1 + nRet |= ( SubInt( a, 2 ) & 0x80000000 ) >> 29; // sign(z) -> bit 2 + nRet |= ( SubInt( a, 3 ) & 0x80000000 ) >> 28; // sign(w) -> bit 3 + + return nRet; +} + +FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) +{ + return (0 != TestSignSIMD( a )); +} + +FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) == SubFloat( b, 0 )) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) == SubFloat( b, 1 )) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) == SubFloat( b, 2 )) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) == SubFloat( b, 3 )) ? ~0 : 0; + return retVal; +} + +FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) > SubFloat( b, 0 )) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) > SubFloat( b, 1 )) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) > SubFloat( b, 2 )) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) > SubFloat( b, 3 )) ? ~0 : 0; + return retVal; +} + +FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) >= SubFloat( b, 0 )) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) >= SubFloat( b, 1 )) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) >= SubFloat( b, 2 )) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) >= SubFloat( b, 3 )) ? ~0 : 0; + return retVal; +} + +FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a= -b) ? ~0 : 0 +{ + fltx4 retVal; + SubInt( retVal, 0 ) = ( SubFloat( a, 0 ) <= SubFloat( b, 0 ) && SubFloat( a, 0 ) >= -SubFloat( b, 0 ) ) ? ~0 : 0; + SubInt( retVal, 1 ) = ( SubFloat( a, 1 ) <= SubFloat( b, 1 ) && SubFloat( a, 1 ) >= -SubFloat( b, 1 ) ) ? ~0 : 0; + SubInt( retVal, 2 ) = ( SubFloat( a, 2 ) <= SubFloat( b, 2 ) && SubFloat( a, 2 ) >= -SubFloat( b, 2 ) ) ? ~0 : 0; + SubInt( retVal, 3 ) = ( SubFloat( a, 3 ) <= SubFloat( b, 3 ) && SubFloat( a, 3 ) >= -SubFloat( b, 3 ) ) ? ~0 : 0; + return retVal; +} + + +FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) +{ + return OrSIMD( + AndSIMD( ReplacementMask, NewValue ), + AndNotSIMD( ReplacementMask, OldValue ) ); +} + +FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = flValue; + SubFloat( retVal, 1 ) = flValue; + SubFloat( retVal, 2 ) = flValue; + SubFloat( retVal, 3 ) = flValue; + return retVal; +} + +/// replicate a single 32 bit integer value to all 4 components of an m128 +FORCEINLINE fltx4 ReplicateIX4( int nValue ) +{ + fltx4 retVal; + SubInt( retVal, 0 ) = nValue; + SubInt( retVal, 1 ) = nValue; + SubInt( retVal, 2 ) = nValue; + SubInt( retVal, 3 ) = nValue; + return retVal; + +} + +// Round towards positive infinity +FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) ); + return retVal; + +} + +// Round towards negative infinity +FORCEINLINE fltx4 FloorSIMD( const fltx4 &a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = floor( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = floor( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = floor( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = floor( SubFloat( a, 3 ) ); + return retVal; + +} + +FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = sqrt( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = sqrt( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = sqrt( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = sqrt( SubFloat( a, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) != 0.0f ? SubFloat( a, 0 ) : FLT_EPSILON ); + SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) != 0.0f ? SubFloat( a, 1 ) : FLT_EPSILON ); + SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) != 0.0f ? SubFloat( a, 2 ) : FLT_EPSILON ); + SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) != 0.0f ? SubFloat( a, 3 ) : FLT_EPSILON ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / sqrt( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = 1.0 / sqrt( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = 1.0 / sqrt( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = 1.0 / sqrt( SubFloat( a, 3 ) ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 ); + SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 ); + SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 ); + SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 ); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / SubFloat( a, 0 ); + SubFloat( retVal, 1 ) = 1.0 / SubFloat( a, 1 ); + SubFloat( retVal, 2 ) = 1.0 / SubFloat( a, 2 ); + SubFloat( retVal, 3 ) = 1.0 / SubFloat( a, 3 ); + return retVal; +} + +/// 1/x for all 4 values. +/// 1/0 will result in a big but NOT infinite result +FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 )); + SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 )); + SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 )); + SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 )); + return retVal; +} + +FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = 1.0 / (SubFloat( a, 0 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 0 )); + SubFloat( retVal, 1 ) = 1.0 / (SubFloat( a, 1 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 1 )); + SubFloat( retVal, 2 ) = 1.0 / (SubFloat( a, 2 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 2 )); + SubFloat( retVal, 3 ) = 1.0 / (SubFloat( a, 3 ) == 0.0f ? FLT_EPSILON : SubFloat( a, 3 )); + return retVal; +} + +// 2^x for all values (the antilog) +FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = powf( 2, SubFloat(toPower, 0) ); + SubFloat( retVal, 1 ) = powf( 2, SubFloat(toPower, 1) ); + SubFloat( retVal, 2 ) = powf( 2, SubFloat(toPower, 2) ); + SubFloat( retVal, 3 ) = powf( 2, SubFloat(toPower, 3) ); + + return retVal; +} + +FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) +{ + float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) + + SubFloat( a, 1 ) * SubFloat( b, 1 ) + + SubFloat( a, 2 ) * SubFloat( b, 2 ); + return ReplicateX4( flDot ); +} + +FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) +{ + float flDot = SubFloat( a, 0 ) * SubFloat( b, 0 ) + + SubFloat( a, 1 ) * SubFloat( b, 1 ) + + SubFloat( a, 2 ) * SubFloat( b, 2 ) + + SubFloat( a, 3 ) * SubFloat( b, 3 ); + return ReplicateX4( flDot ); +} + +// Clamps the components of a vector to a specified minimum and maximum range. +FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) +{ + return MaxSIMD( min, MinSIMD( max, in ) ); +} + +// Squelch the w component of a vector to +0.0. +// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) +FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) +{ + fltx4 retval; + retval = a; + SubFloat( retval, 0 ) = 0; + return retval; +} + +FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) +{ + return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); +} + +FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) +{ + return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) +{ + return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); +} + +// for the transitional class -- load a 3-by VectorAligned and squash its w component +FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) +{ + fltx4 retval = LoadAlignedSIMD(pSIMD.Base()); + // squelch w + SubInt( retval, 3 ) = 0; + return retval; +} + +FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) +{ + *pSIMD = SubFloat(a, 0); + *(pSIMD+1) = SubFloat(a, 1); + *(pSIMD+2) = SubFloat(a, 2); +} + +// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD +FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + StoreAlignedSIMD(pSIMD->Base(),a); +} + +FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w ) +{ +#define SWAP_FLOATS( _a_, _ia_, _b_, _ib_ ) { float tmp = SubFloat( _a_, _ia_ ); SubFloat( _a_, _ia_ ) = SubFloat( _b_, _ib_ ); SubFloat( _b_, _ib_ ) = tmp; } + SWAP_FLOATS( x, 1, y, 0 ); + SWAP_FLOATS( x, 2, z, 0 ); + SWAP_FLOATS( x, 3, w, 0 ); + SWAP_FLOATS( y, 2, z, 1 ); + SWAP_FLOATS( y, 3, w, 1 ); + SWAP_FLOATS( z, 3, w, 2 ); +} + +// find the lowest component of a.x, a.y, a.z, +// and replicate it to the whole return value. +FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a ) +{ + float lowest = min( min( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2)); + return ReplicateX4(lowest); +} + +// find the highest component of a.x, a.y, a.z, +// and replicate it to the whole return value. +FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a ) +{ + float highest = max( max( SubFloat(a, 0), SubFloat(a, 1) ), SubFloat(a, 2)); + return ReplicateX4(highest); +} + +// Fixed-point conversion and save as SIGNED INTS. +// pDest->x = Int (vSrc.x) +// note: some architectures have means of doing +// fixed point conversion when the fix depth is +// specified as an immediate.. but there is no way +// to guarantee an immediate as a parameter to function +// like this. +FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) +{ + (*pDest)[0] = SubFloat(vSrc, 0); + (*pDest)[1] = SubFloat(vSrc, 1); + (*pDest)[2] = SubFloat(vSrc, 2); + (*pDest)[3] = SubFloat(vSrc, 3); +} + +// ------------------------------------ +// INTEGER SIMD OPERATIONS. +// ------------------------------------ +// splat all components of a vector to a signed immediate int number. +FORCEINLINE fltx4 IntSetImmediateSIMD( int nValue ) +{ + fltx4 retval; + SubInt( retval, 0 ) = SubInt( retval, 1 ) = SubInt( retval, 2 ) = SubInt( retval, 3) = nValue; + return retval; +} + +// Load 4 aligned words into a SIMD register +FORCEINLINE i32x4 LoadAlignedIntSIMD(const void * RESTRICT pSIMD) +{ + return *( reinterpret_cast< const i32x4 *> ( pSIMD ) ); +} + +// Load 4 unaligned words into a SIMD register +FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD) +{ + return *( reinterpret_cast< const i32x4 *> ( pSIMD ) ); +} + +// save into four words, 16-byte aligned +FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a; +} + +FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; +} + +// Take a fltx4 containing fixed-point uints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA ) +{ + Assert(0); /* pc has no such operation */ + fltx4 retval; + SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) ); + SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) ); + SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) ); + SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) ); + return retval; +} + + +#if 0 /* pc has no such op */ +// Take a fltx4 containing fixed-point sints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) +{ + fltx4 retval; + SubFloat( retval, 0 ) = ( (float) (reinterpret_cast(&vSrcA.m128_s32[0])) ); + SubFloat( retval, 1 ) = ( (float) (reinterpret_cast(&vSrcA.m128_s32[1])) ); + SubFloat( retval, 2 ) = ( (float) (reinterpret_cast(&vSrcA.m128_s32[2])) ); + SubFloat( retval, 3 ) = ( (float) (reinterpret_cast(&vSrcA.m128_s32[3])) ); + return retval; +} + + +/* + works on fltx4's as if they are four uints. + the first parameter contains the words to be shifted, + the second contains the amount to shift by AS INTS + + for i = 0 to 3 + shift = vSrcB_i*32:(i*32)+4 + vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift +*/ +FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB) +{ + i32x4 retval; + SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0); + SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1); + SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2); + SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3); + + + return retval; +} +#endif + +#elif ( defined( _X360 ) ) + +//--------------------------------------------------------------------- +// X360 implementation +//--------------------------------------------------------------------- + +FORCEINLINE float & FloatSIMD( fltx4 & a, int idx ) +{ + fltx4_union & a_union = (fltx4_union &)a; + return a_union.m128_f32[idx]; +} + +FORCEINLINE unsigned int & UIntSIMD( fltx4 & a, int idx ) +{ + fltx4_union & a_union = (fltx4_union &)a; + return a_union.m128_u32[idx]; +} + +FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) +{ + return __vaddfp( a, b ); +} + +FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b +{ + return __vsubfp( a, b ); +} + +FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b +{ + return __vmulfp( a, b ); +} + +FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c +{ + return __vmaddfp( a, b, c ); +} + +FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b +{ + return __vnmsubfp( a, b, c ); +}; + +FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) +{ + return __vmsum3fp( a, b ); +} + +FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) +{ + return __vmsum4fp( a, b ); +} + +FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) +{ + return XMVectorSin( radians ); +} + +FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + XMVectorSinCos( &sine, &cosine, radians ); +} + +FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + XMVectorSinCos( &sine, &cosine, radians ); +} + +FORCEINLINE void CosSIMD( fltx4 &cosine, const fltx4 &radians ) +{ + cosine = XMVectorCos( radians ); +} + +FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) +{ + return XMVectorASin( sine ); +} + +FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) +{ + return XMVectorACos( cs ); +} + +// tan^1(a/b) .. ie, pass sin in as a and cos in as b +FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) +{ + return XMVectorATan2( a, b ); +} + +// DivSIMD defined further down, since it uses ReciprocalSIMD + +FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) +{ + return __vmaxfp( a, b ); +} + +FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) +{ + return __vminfp( a, b ); +} + +FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b +{ + return __vand( a, b ); +} + +FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b +{ + // NOTE: a and b are swapped in the call: SSE complements the first argument, VMX the second + return __vandc( b, a ); +} + +FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b +{ + return __vxor( a, b ); +} + +FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b +{ + return __vor( a, b ); +} + +FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a +{ + return XMVectorNegate(a); +} + +FORCEINLINE bool IsAllZeros( const fltx4 & a ) // all floats of a zero? +{ + unsigned int equalFlags = 0; + __vcmpeqfpR( a, Four_Zeros, &equalFlags ); + return XMComparisonAllTrue( equalFlags ); +} + +FORCEINLINE bool IsAnyZeros( const fltx4 & a ) // any floats are zero? +{ + unsigned int conditionregister; + XMVectorEqualR(&conditionregister, a, XMVectorZero()); + return XMComparisonAnyTrue(conditionregister); +} + +FORCEINLINE bool IsAnyXYZZero( const fltx4 &a ) // are any of x,y,z zero? +{ + // copy a's x component into w, in case w was zero. + fltx4 temp = __vrlimi(a, a, 1, 1); + unsigned int conditionregister; + XMVectorEqualR(&conditionregister, temp, XMVectorZero()); + return XMComparisonAnyTrue(conditionregister); +} + +// for branching when a.xyzw > b.xyzw +FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) +{ + unsigned int cr; + XMVectorGreaterR(&cr,a,b); + return XMComparisonAllTrue(cr); +} + +// for branching when a.xyzw >= b.xyzw +FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) +{ + unsigned int cr; + XMVectorGreaterOrEqualR(&cr,a,b); + return XMComparisonAllTrue(cr); +} + +// For branching if all a.xyzw == b.xyzw +FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) +{ + unsigned int cr; + XMVectorEqualR(&cr,a,b); + return XMComparisonAllTrue(cr); +} + + +FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set +{ + // NOTE: this maps to SSE way better than it does to VMX (most code uses IsAnyNegative(), though) + int nRet = 0; + + const fltx4_union & a_union = (const fltx4_union &)a; + nRet |= ( a_union.m128_u32[0] & 0x80000000 ) >> 31; // sign(x) -> bit 0 + nRet |= ( a_union.m128_u32[1] & 0x80000000 ) >> 30; // sign(y) -> bit 1 + nRet |= ( a_union.m128_u32[2] & 0x80000000 ) >> 29; // sign(z) -> bit 2 + nRet |= ( a_union.m128_u32[3] & 0x80000000 ) >> 28; // sign(w) -> bit 3 + + return nRet; +} + +// Squelch the w component of a vector to +0.0. +// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) +FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) +{ + return __vrlimi( a, __vzero(), 1, 0 ); +} + +FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) +{ + // NOTE: this tests the top bits of each vector element using integer math + // (so it ignores NaNs - it will return true for "-NaN") + unsigned int equalFlags = 0; + fltx4 signMask = __vspltisw( -1 ); // 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF 0xFFFFFFFF (low order 5 bits of each element = 31) + signMask = __vslw( signMask, signMask ); // 0x80000000 0x80000000 0x80000000 0x80000000 + __vcmpequwR( Four_Zeros, __vand( signMask, a ), &equalFlags ); + return !XMComparisonAllTrue( equalFlags ); +} + +FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 +{ + return __vcmpeqfp( a, b ); +} + + +FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 +{ + return __vcmpgtfp( a, b ); +} + +FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 +{ + return __vcmpgefp( a, b ); +} + +FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a= -b) ? ~0 : 0 +{ + return XMVectorInBounds( a, b ); +} + +// returned[i] = ReplacementMask[i] == 0 ? OldValue : NewValue +FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) +{ + return __vsel( OldValue, NewValue, ReplacementMask ); +} + +// AKA "Broadcast", "Splat" +FORCEINLINE fltx4 ReplicateX4( float flValue ) // a,a,a,a +{ + // NOTE: if flValue comes from a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) + float * pValue = &flValue; + Assert( pValue ); + Assert( ((unsigned int)pValue & 3) == 0); + return __vspltw( __lvlx( pValue, 0 ), 0 ); +} + +FORCEINLINE fltx4 ReplicateX4( const float *pValue ) // a,a,a,a +{ + Assert( pValue ); + return __vspltw( __lvlx( pValue, 0 ), 0 ); +} + +/// replicate a single 32 bit integer value to all 4 components of an m128 +FORCEINLINE fltx4 ReplicateIX4( int nValue ) +{ + // NOTE: if nValue comes from a register, this causes a Load-Hit-Store stall (should not mix ints with fltx4s!) + int * pValue = &nValue; + Assert( pValue ); + Assert( ((unsigned int)pValue & 3) == 0); + return __vspltw( __lvlx( pValue, 0 ), 0 ); +} + +// Round towards positive infinity +FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) +{ + return __vrfip(a); +} + +// Round towards nearest integer +FORCEINLINE fltx4 RoundSIMD( const fltx4 &a ) +{ + return __vrfin(a); +} + +// Round towards negative infinity +FORCEINLINE fltx4 FloorSIMD( const fltx4 &a ) +{ + return __vrfim(a); +} + +FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less +{ + // This is emulated from rsqrt + return XMVectorSqrtEst( a ); +} + +FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) +{ + // This is emulated from rsqrt + return XMVectorSqrt( a ); +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less +{ + return __vrsqrtefp( a ); +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) +{ + // Convert zeros to epsilons + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + return ReciprocalSqrtEstSIMD( a_safe ); +} + +FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) +{ + // This uses Newton-Raphson to improve the HW result + return XMVectorReciprocalSqrt( a ); +} + +FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less +{ + return __vrefp( a ); +} + +/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration. +/// No error checking! +FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a +{ + // This uses Newton-Raphson to improve the HW result + return XMVectorReciprocal( a ); +} + +// FIXME: on 360, this is very slow, since it uses ReciprocalSIMD (do we need DivEstSIMD?) +FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b +{ + return MulSIMD( ReciprocalSIMD( b ), a ); +} + +/// 1/x for all 4 values. +/// 1/0 will result in a big but NOT infinite result +FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) +{ + // Convert zeros to epsilons + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + return ReciprocalEstSIMD( a_safe ); +} + +FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) +{ + // Convert zeros to epsilons + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 a_safe = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + return ReciprocalSIMD( a_safe ); + + // FIXME: This could be faster (BUT: it doesn't preserve the sign of -0.0, whereas the above does) + // fltx4 zeroMask = CmpEqSIMD( Four_Zeros, a ); + // fltx4 a_safe = XMVectorSelect( a, Four_Epsilons, zeroMask ); + // return ReciprocalSIMD( a_safe ); +} + +// CHRISG: is it worth doing integer bitfiddling for this? +// 2^x for all values (the antilog) +FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) +{ + return XMVectorExp(toPower); +} + +// Clamps the components of a vector to a specified minimum and maximum range. +FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) +{ + return XMVectorClamp(in, min, max); +} + +FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) +{ + return XMLoadVector4( pSIMD ); +} + +// load a 3-vector (as opposed to LoadUnalignedSIMD, which loads a 4-vec). +FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) +{ + return XMLoadVector3( pSIMD ); +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) +{ + return *( reinterpret_cast< const fltx4 *> ( pSIMD ) ); +} + +// for the transitional class -- load a 3-by VectorAligned and squash its w component +FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) +{ + fltx4 out = XMLoadVector3A(pSIMD.Base()); + // squelch w + return __vrlimi( out, __vzero(), 1, 0 ); +} + +// for the transitional class -- load a 3-by VectorAligned and squash its w component +FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned * RESTRICT pSIMD ) +{ + fltx4 out = XMLoadVector3A(pSIMD); + // squelch w + return __vrlimi( out, __vzero(), 1, 0 ); +} + +FORCEINLINE void StoreAlignedSIMD( float *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< fltx4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreUnalignedSIMD( float *pSIMD, const fltx4 & a ) +{ + XMStoreVector4( pSIMD, a ); +} + +FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) +{ + XMStoreVector3( pSIMD, a ); +} + + +// strongly typed -- for typechecking as we transition to SIMD +FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + XMStoreVector3A(pSIMD->Base(),a); +} + + +// Fixed-point conversion and save as SIGNED INTS. +// pDest->x = Int (vSrc.x) +// note: some architectures have means of doing +// fixed point conversion when the fix depth is +// specified as an immediate.. but there is no way +// to guarantee an immediate as a parameter to function +// like this. +FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) +{ + fltx4 asInt = __vctsxs( vSrc, 0 ); + XMStoreVector4A(pDest->Base(), asInt); +} + +FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w ) +{ + XMMATRIX xyzwMatrix = _XMMATRIX( x, y, z, w ); + xyzwMatrix = XMMatrixTranspose( xyzwMatrix ); + x = xyzwMatrix.r[0]; + y = xyzwMatrix.r[1]; + z = xyzwMatrix.r[2]; + w = xyzwMatrix.r[3]; +} + +// Return one in the fastest way -- faster even than loading. +FORCEINLINE fltx4 LoadZeroSIMD( void ) +{ + return XMVectorZero(); +} + +// Return one in the fastest way -- faster even than loading. +FORCEINLINE fltx4 LoadOneSIMD( void ) +{ + return XMVectorSplatOne(); +} + +FORCEINLINE fltx4 SplatXSIMD( fltx4 a ) +{ + return XMVectorSplatX( a ); +} + +FORCEINLINE fltx4 SplatYSIMD( fltx4 a ) +{ + return XMVectorSplatY( a ); +} + +FORCEINLINE fltx4 SplatZSIMD( fltx4 a ) +{ + return XMVectorSplatZ( a ); +} + +FORCEINLINE fltx4 SplatWSIMD( fltx4 a ) +{ + return XMVectorSplatW( a ); +} + +FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) +{ + fltx4 result = __vrlimi(a, x, 8, 0); + return result; +} + +FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) +{ + fltx4 result = __vrlimi(a, y, 4, 0); + return result; +} + +FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) +{ + fltx4 result = __vrlimi(a, z, 2, 0); + return result; +} + +FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) +{ + fltx4 result = __vrlimi(a, w, 1, 0); + return result; +} + +FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) +{ + static int s_nVrlimiMask[4] = { 8, 4, 2, 1 }; + fltx4 val = ReplicateX4( flValue ); + fltx4 result = __vrlimi(a, val, s_nVrlimiMask[nComponent], 0); + return result; +} + +FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) +{ + fltx4 compareOne = a; + return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 1 ); +} + +FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) +{ + fltx4 compareOne = a; + return __vrlimi( compareOne, a, 8 | 4 | 2 | 1, 2 ); +} + + + +// find the lowest component of a.x, a.y, a.z, +// and replicate it to the whole return value. +// ignores a.w. +// Though this is only five instructions long, +// they are all dependent, making this stall city. +// Forcing this inline should hopefully help with scheduling. +FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 & a ) +{ + // a is [x,y,z,G] (where G is garbage) + // rotate left by one + fltx4 compareOne = a ; + compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 ); + // compareOne is [y,z,G,G] + fltx4 retval = MinSIMD( a, compareOne ); + // retVal is [min(x,y), min(y,z), G, G] + compareOne = __vrlimi( compareOne, a, 8 , 2); + // compareOne is [z, G, G, G] + retval = MinSIMD( retval, compareOne ); + // retVal = [ min(min(x,y),z), G, G, G ] + + // splat the x component out to the whole vector and return + return SplatXSIMD( retval ); +} + +// find the highest component of a.x, a.y, a.z, +// and replicate it to the whole return value. +// ignores a.w. +// Though this is only five instructions long, +// they are all dependent, making this stall city. +// Forcing this inline should hopefully help with scheduling. +FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 & a ) +{ + // a is [x,y,z,G] (where G is garbage) + // rotate left by one + fltx4 compareOne = a ; + compareOne = __vrlimi( compareOne, a, 8 | 4 , 1 ); + // compareOne is [y,z,G,G] + fltx4 retval = MaxSIMD( a, compareOne ); + // retVal is [max(x,y), max(y,z), G, G] + compareOne = __vrlimi( compareOne, a, 8 , 2); + // compareOne is [z, G, G, G] + retval = MaxSIMD( retval, compareOne ); + // retVal = [ max(max(x,y),z), G, G, G ] + + // splat the x component out to the whole vector and return + return SplatXSIMD( retval ); +} + + +// Transform many (horizontal) points in-place by a 3x4 matrix, +// here already loaded onto three fltx4 registers. +// The points must be stored as 16-byte aligned. They are points +// and not vectors because we assume the w-component to be 1. +// To spare yourself the annoyance of loading the matrix yourself, +// use one of the overloads below. +void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, FLTX4 mRow1, FLTX4 mRow2, FLTX4 mRow3); + +// Transform many (horizontal) points in-place by a 3x4 matrix. +// The points must be stored as 16-byte aligned. They are points +// and not vectors because we assume the w-component to be 1. +// In this function, the matrix need not be aligned. +FORCEINLINE void TransformManyPointsBy(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix) +{ + return TransformManyPointsBy(pVectors, numVectors, + LoadUnalignedSIMD( pMatrix[0] ), LoadUnalignedSIMD( pMatrix[1] ), LoadUnalignedSIMD( pMatrix[2] ) ); +} + +// Transform many (horizontal) points in-place by a 3x4 matrix. +// The points must be stored as 16-byte aligned. They are points +// and not vectors because we assume the w-component to be 1. +// In this function, the matrix must itself be aligned on a 16-byte +// boundary. +FORCEINLINE void TransformManyPointsByA(VectorAligned * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t &pMatrix) +{ + return TransformManyPointsBy(pVectors, numVectors, + LoadAlignedSIMD( pMatrix[0] ), LoadAlignedSIMD( pMatrix[1] ), LoadAlignedSIMD( pMatrix[2] ) ); +} + +// ------------------------------------ +// INTEGER SIMD OPERATIONS. +// ------------------------------------ + +// Load 4 aligned words into a SIMD register +FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD) +{ + return XMLoadVector4A(pSIMD); +} + +// Load 4 unaligned words into a SIMD register +FORCEINLINE i32x4 LoadUnalignedIntSIMD(const void * RESTRICT pSIMD) +{ + return XMLoadVector4( pSIMD ); +} + +// save into four words, 16-byte aligned +FORCEINLINE void StoreAlignedIntSIMD( int32 *pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD ) ) = a; +} + +FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) +{ + *( reinterpret_cast< i32x4 *> ( pSIMD.Base() ) ) = a; +} + +FORCEINLINE void StoreUnalignedIntSIMD( int32 *pSIMD, const fltx4 & a ) +{ + XMStoreVector4(pSIMD, a); +} + + +// Take a fltx4 containing fixed-point uints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA ) +{ + return __vcfux( vSrcA, 0 ); +} + + +// Take a fltx4 containing fixed-point sints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) +{ + return __vcfsx( vSrcA, 0 ); +} + +// Take a fltx4 containing fixed-point uints and +// return them as single precision floats. Each uint +// will be divided by 2^immed after conversion +// (eg, this is fixed point math). +/* as if: + FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed ) + { + return __vcfux( vSrcA, uImmed ); + } +*/ +#define UnsignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfux( (vSrcA), (uImmed) )) + +// Take a fltx4 containing fixed-point sints and +// return them as single precision floats. Each int +// will be divided by 2^immed (eg, this is fixed point +// math). +/* as if: + FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA, unsigned int uImmed ) + { + return __vcfsx( vSrcA, uImmed ); + } +*/ +#define SignedFixedIntConvertToFltSIMD(vSrcA, uImmed) (__vcfsx( (vSrcA), (uImmed) )) + +// set all components of a vector to a signed immediate int number. +/* as if: + FORCEINLINE fltx4 IntSetImmediateSIMD(int toImmediate) + { + return __vspltisw( toImmediate ); + } +*/ +#define IntSetImmediateSIMD(x) (__vspltisw(x)) + +/* + works on fltx4's as if they are four uints. + the first parameter contains the words to be shifted, + the second contains the amount to shift by AS INTS + + for i = 0 to 3 + shift = vSrcB_i*32:(i*32)+4 + vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift +*/ +FORCEINLINE fltx4 IntShiftLeftWordSIMD(fltx4 vSrcA, fltx4 vSrcB) +{ + return __vslw(vSrcA, vSrcB); +} + +FORCEINLINE float SubFloat( const fltx4 & a, int idx ) +{ + // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) + const fltx4_union & a_union = (const fltx4_union &)a; + return a_union.m128_f32[ idx ]; +} + +FORCEINLINE float & SubFloat( fltx4 & a, int idx ) +{ + fltx4_union & a_union = (fltx4_union &)a; + return a_union.m128_f32[idx]; +} + +FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx ) +{ + fltx4 t = __vctuxs( a, 0 ); + const fltx4_union & a_union = (const fltx4_union &)t; + return a_union.m128_u32[idx]; +} + + +FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) +{ + const fltx4_union & a_union = (const fltx4_union &)a; + return a_union.m128_u32[idx]; +} + +FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) +{ + fltx4_union & a_union = (fltx4_union &)a; + return a_union.m128_u32[idx]; +} + +#else + +//--------------------------------------------------------------------- +// Intel/SSE implementation +//--------------------------------------------------------------------- + +FORCEINLINE void StoreAlignedSIMD( float * RESTRICT pSIMD, const fltx4 & a ) +{ + _mm_store_ps( pSIMD, a ); +} + +FORCEINLINE void StoreUnalignedSIMD( float * RESTRICT pSIMD, const fltx4 & a ) +{ + _mm_storeu_ps( pSIMD, a ); +} + + +FORCEINLINE fltx4 RotateLeft( const fltx4 & a ); +FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ); + +FORCEINLINE void StoreUnaligned3SIMD( float *pSIMD, const fltx4 & a ) +{ + _mm_store_ss(pSIMD, a); + _mm_store_ss(pSIMD+1, RotateLeft(a)); + _mm_store_ss(pSIMD+2, RotateLeft2(a)); +} + +// strongly typed -- syntactic castor oil used for typechecking as we transition to SIMD +FORCEINLINE void StoreAligned3SIMD( VectorAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + StoreAlignedSIMD( pSIMD->Base(),a ); +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const void *pSIMD ) +{ + return _mm_load_ps( reinterpret_cast< const float *> ( pSIMD ) ); +} + +FORCEINLINE fltx4 AndSIMD( const fltx4 & a, const fltx4 & b ) // a & b +{ + return _mm_and_ps( a, b ); +} + +FORCEINLINE fltx4 AndNotSIMD( const fltx4 & a, const fltx4 & b ) // ~a & b +{ + return _mm_andnot_ps( a, b ); +} + +FORCEINLINE fltx4 XorSIMD( const fltx4 & a, const fltx4 & b ) // a ^ b +{ + return _mm_xor_ps( a, b ); +} + +FORCEINLINE fltx4 OrSIMD( const fltx4 & a, const fltx4 & b ) // a | b +{ + return _mm_or_ps( a, b ); +} + +// Squelch the w component of a vector to +0.0. +// Most efficient when you say a = SetWToZeroSIMD(a) (avoids a copy) +FORCEINLINE fltx4 SetWToZeroSIMD( const fltx4 & a ) +{ + return AndSIMD( a, LoadAlignedSIMD( g_SIMD_clear_wmask ) ); +} + +// for the transitional class -- load a 3-by VectorAligned and squash its w component +FORCEINLINE fltx4 LoadAlignedSIMD( const VectorAligned & pSIMD ) +{ + return SetWToZeroSIMD( LoadAlignedSIMD(pSIMD.Base()) ); +} + +FORCEINLINE fltx4 LoadUnalignedSIMD( const void *pSIMD ) +{ + return _mm_loadu_ps( reinterpret_cast( pSIMD ) ); +} + +FORCEINLINE fltx4 LoadUnaligned3SIMD( const void *pSIMD ) +{ + return _mm_loadu_ps( reinterpret_cast( pSIMD ) ); +} + +/// replicate a single 32 bit integer value to all 4 components of an m128 +FORCEINLINE fltx4 ReplicateIX4( int i ) +{ + fltx4 value = _mm_set_ss( * ( ( float *) &i ) );; + return _mm_shuffle_ps( value, value, 0); +} + + +FORCEINLINE fltx4 ReplicateX4( float flValue ) +{ + __m128 value = _mm_set_ss( flValue ); + return _mm_shuffle_ps( value, value, 0 ); +} + + +FORCEINLINE float SubFloat( const fltx4 & a, int idx ) +{ + // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) +#ifndef POSIX + return a.m128_f32[ idx ]; +#else + return (reinterpret_cast(&a))[idx]; +#endif +} + +FORCEINLINE float & SubFloat( fltx4 & a, int idx ) +{ +#ifndef POSIX + return a.m128_f32[ idx ]; +#else + return (reinterpret_cast(&a))[idx]; +#endif +} + +FORCEINLINE uint32 SubFloatConvertToInt( const fltx4 & a, int idx ) +{ + return (uint32)SubFloat(a,idx); +} + +FORCEINLINE uint32 SubInt( const fltx4 & a, int idx ) +{ +#ifndef POSIX + return a.m128_u32[idx]; +#else + return (reinterpret_cast(&a))[idx]; +#endif +} + +FORCEINLINE uint32 & SubInt( fltx4 & a, int idx ) +{ +#ifndef POSIX + return a.m128_u32[idx]; +#else + return (reinterpret_cast(&a))[idx]; +#endif +} + +// Return one in the fastest way -- on the x360, faster even than loading. +FORCEINLINE fltx4 LoadZeroSIMD( void ) +{ + return Four_Zeros; +} + +// Return one in the fastest way -- on the x360, faster even than loading. +FORCEINLINE fltx4 LoadOneSIMD( void ) +{ + return Four_Ones; +} + +FORCEINLINE fltx4 MaskedAssign( const fltx4 & ReplacementMask, const fltx4 & NewValue, const fltx4 & OldValue ) +{ + return OrSIMD( + AndSIMD( ReplacementMask, NewValue ), + AndNotSIMD( ReplacementMask, OldValue ) ); +} + +// remember, the SSE numbers its words 3 2 1 0 +// The way we want to specify shuffles is backwards from the default +// MM_SHUFFLE_REV is in array index order (default is reversed) +#define MM_SHUFFLE_REV(a,b,c,d) _MM_SHUFFLE(d,c,b,a) + +FORCEINLINE fltx4 SplatXSIMD( fltx4 const & a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 0, 0, 0, 0 ) ); +} + +FORCEINLINE fltx4 SplatYSIMD( fltx4 const &a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 1, 1, 1 ) ); +} + +FORCEINLINE fltx4 SplatZSIMD( fltx4 const &a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 2, 2, 2 ) ); +} + +FORCEINLINE fltx4 SplatWSIMD( fltx4 const &a ) +{ + return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 3, 3, 3, 3 ) ); +} + +FORCEINLINE fltx4 SetXSIMD( const fltx4& a, const fltx4& x ) +{ + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[0] ), x, a ); + return result; +} + +FORCEINLINE fltx4 SetYSIMD( const fltx4& a, const fltx4& y ) +{ + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[1] ), y, a ); + return result; +} + +FORCEINLINE fltx4 SetZSIMD( const fltx4& a, const fltx4& z ) +{ + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[2] ), z, a ); + return result; +} + +FORCEINLINE fltx4 SetWSIMD( const fltx4& a, const fltx4& w ) +{ + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[3] ), w, a ); + return result; +} + +FORCEINLINE fltx4 SetComponentSIMD( const fltx4& a, int nComponent, float flValue ) +{ + fltx4 val = ReplicateX4( flValue ); + fltx4 result = MaskedAssign( LoadAlignedSIMD( g_SIMD_ComponentMask[nComponent] ), val, a ); + return result; +} + +// a b c d -> b c d a +FORCEINLINE fltx4 RotateLeft( const fltx4 & a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 1, 2, 3, 0 ) ); +} + +// a b c d -> c d a b +FORCEINLINE fltx4 RotateLeft2( const fltx4 & a ) +{ + return _mm_shuffle_ps( a, a, MM_SHUFFLE_REV( 2, 3, 0, 1 ) ); +} + +// a b c d -> d a b c +FORCEINLINE fltx4 RotateRight( const fltx4 & a ) +{ + return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 0, 3, 2, 1) ); +} + +// a b c d -> c d a b +FORCEINLINE fltx4 RotateRight2( const fltx4 & a ) +{ + return _mm_shuffle_ps( a, a, _MM_SHUFFLE( 1, 0, 3, 2 ) ); +} + + +FORCEINLINE fltx4 AddSIMD( const fltx4 & a, const fltx4 & b ) // a+b +{ + return _mm_add_ps( a, b ); +}; + +FORCEINLINE fltx4 SubSIMD( const fltx4 & a, const fltx4 & b ) // a-b +{ + return _mm_sub_ps( a, b ); +}; + +FORCEINLINE fltx4 MulSIMD( const fltx4 & a, const fltx4 & b ) // a*b +{ + return _mm_mul_ps( a, b ); +}; + +FORCEINLINE fltx4 DivSIMD( const fltx4 & a, const fltx4 & b ) // a/b +{ + return _mm_div_ps( a, b ); +}; + +FORCEINLINE fltx4 MaddSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // a*b + c +{ + return AddSIMD( MulSIMD(a,b), c ); +} + +FORCEINLINE fltx4 MsubSIMD( const fltx4 & a, const fltx4 & b, const fltx4 & c ) // c - a*b +{ + return SubSIMD( c, MulSIMD(a,b) ); +}; + +FORCEINLINE fltx4 Dot3SIMD( const fltx4 &a, const fltx4 &b ) +{ + fltx4 m = MulSIMD( a, b ); + float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ); + return ReplicateX4( flDot ); +} + +FORCEINLINE fltx4 Dot4SIMD( const fltx4 &a, const fltx4 &b ) +{ + fltx4 m = MulSIMD( a, b ); + float flDot = SubFloat( m, 0 ) + SubFloat( m, 1 ) + SubFloat( m, 2 ) + SubFloat( m, 3 ); + return ReplicateX4( flDot ); +} + +//TODO: implement as four-way Taylor series (see xbox implementation) +FORCEINLINE fltx4 SinSIMD( const fltx4 &radians ) +{ + fltx4 result; + SubFloat( result, 0 ) = sin( SubFloat( radians, 0 ) ); + SubFloat( result, 1 ) = sin( SubFloat( radians, 1 ) ); + SubFloat( result, 2 ) = sin( SubFloat( radians, 2 ) ); + SubFloat( result, 3 ) = sin( SubFloat( radians, 3 ) ); + return result; +} + +FORCEINLINE void SinCos3SIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) +{ + // FIXME: Make a fast SSE version + SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); + SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); + SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); +} + +FORCEINLINE void SinCosSIMD( fltx4 &sine, fltx4 &cosine, const fltx4 &radians ) // a*b + c +{ + // FIXME: Make a fast SSE version + SinCos( SubFloat( radians, 0 ), &SubFloat( sine, 0 ), &SubFloat( cosine, 0 ) ); + SinCos( SubFloat( radians, 1 ), &SubFloat( sine, 1 ), &SubFloat( cosine, 1 ) ); + SinCos( SubFloat( radians, 2 ), &SubFloat( sine, 2 ), &SubFloat( cosine, 2 ) ); + SinCos( SubFloat( radians, 3 ), &SubFloat( sine, 3 ), &SubFloat( cosine, 3 ) ); +} + +//TODO: implement as four-way Taylor series (see xbox implementation) +FORCEINLINE fltx4 ArcSinSIMD( const fltx4 &sine ) +{ + // FIXME: Make a fast SSE version + fltx4 result; + SubFloat( result, 0 ) = asin( SubFloat( sine, 0 ) ); + SubFloat( result, 1 ) = asin( SubFloat( sine, 1 ) ); + SubFloat( result, 2 ) = asin( SubFloat( sine, 2 ) ); + SubFloat( result, 3 ) = asin( SubFloat( sine, 3 ) ); + return result; +} + +FORCEINLINE fltx4 ArcCosSIMD( const fltx4 &cs ) +{ + fltx4 result; + SubFloat( result, 0 ) = acos( SubFloat( cs, 0 ) ); + SubFloat( result, 1 ) = acos( SubFloat( cs, 1 ) ); + SubFloat( result, 2 ) = acos( SubFloat( cs, 2 ) ); + SubFloat( result, 3 ) = acos( SubFloat( cs, 3 ) ); + return result; +} + +// tan^1(a/b) .. ie, pass sin in as a and cos in as b +FORCEINLINE fltx4 ArcTan2SIMD( const fltx4 &a, const fltx4 &b ) +{ + fltx4 result; + SubFloat( result, 0 ) = atan2( SubFloat( a, 0 ), SubFloat( b, 0 ) ); + SubFloat( result, 1 ) = atan2( SubFloat( a, 1 ), SubFloat( b, 1 ) ); + SubFloat( result, 2 ) = atan2( SubFloat( a, 2 ), SubFloat( b, 2 ) ); + SubFloat( result, 3 ) = atan2( SubFloat( a, 3 ), SubFloat( b, 3 ) ); + return result; +} + +FORCEINLINE fltx4 NegSIMD(const fltx4 &a) // negate: -a +{ + return SubSIMD(LoadZeroSIMD(),a); +} + +FORCEINLINE int TestSignSIMD( const fltx4 & a ) // mask of which floats have the high bit set +{ + return _mm_movemask_ps( a ); +} + +FORCEINLINE bool IsAnyNegative( const fltx4 & a ) // (a.x < 0) || (a.y < 0) || (a.z < 0) || (a.w < 0) +{ + return (0 != TestSignSIMD( a )); +} + +FORCEINLINE fltx4 CmpEqSIMD( const fltx4 & a, const fltx4 & b ) // (a==b) ? ~0:0 +{ + return _mm_cmpeq_ps( a, b ); +} + +FORCEINLINE fltx4 CmpGtSIMD( const fltx4 & a, const fltx4 & b ) // (a>b) ? ~0:0 +{ + return _mm_cmpgt_ps( a, b ); +} + +FORCEINLINE fltx4 CmpGeSIMD( const fltx4 & a, const fltx4 & b ) // (a>=b) ? ~0:0 +{ + return _mm_cmpge_ps( a, b ); +} + +FORCEINLINE fltx4 CmpLtSIMD( const fltx4 & a, const fltx4 & b ) // (a b.xyzw +FORCEINLINE bool IsAllGreaterThan( const fltx4 &a, const fltx4 &b ) +{ + return TestSignSIMD( CmpLeSIMD( a, b ) ) == 0; +} + +// for branching when a.xyzw >= b.xyzw +FORCEINLINE bool IsAllGreaterThanOrEq( const fltx4 &a, const fltx4 &b ) +{ + return TestSignSIMD( CmpLtSIMD( a, b ) ) == 0; +} + +// For branching if all a.xyzw == b.xyzw +FORCEINLINE bool IsAllEqual( const fltx4 & a, const fltx4 & b ) +{ + return TestSignSIMD( CmpEqSIMD( a, b ) ) == 0xf; +} + +FORCEINLINE fltx4 CmpInBoundsSIMD( const fltx4 & a, const fltx4 & b ) // (a <= b && a >= -b) ? ~0 : 0 +{ + return AndSIMD( CmpLeSIMD(a,b), CmpGeSIMD(a, NegSIMD(b)) ); +} + +FORCEINLINE fltx4 MinSIMD( const fltx4 & a, const fltx4 & b ) // min(a,b) +{ + return _mm_min_ps( a, b ); +} + +FORCEINLINE fltx4 MaxSIMD( const fltx4 & a, const fltx4 & b ) // max(a,b) +{ + return _mm_max_ps( a, b ); +} + + + +// SSE lacks rounding operations. +// Really. +// You can emulate them by setting the rounding mode for the +// whole processor and then converting to int, and then back again. +// But every time you set the rounding mode, you clear out the +// entire pipeline. So, I can't do them per operation. You +// have to do it once, before the loop that would call these. +// Round towards positive infinity +FORCEINLINE fltx4 CeilSIMD( const fltx4 &a ) +{ + fltx4 retVal; + SubFloat( retVal, 0 ) = ceil( SubFloat( a, 0 ) ); + SubFloat( retVal, 1 ) = ceil( SubFloat( a, 1 ) ); + SubFloat( retVal, 2 ) = ceil( SubFloat( a, 2 ) ); + SubFloat( retVal, 3 ) = ceil( SubFloat( a, 3 ) ); + return retVal; + +} + +fltx4 fabs( const fltx4 & x ); +// Round towards negative infinity +// This is the implementation that was here before; it assumes +// you are in round-to-floor mode, which I guess is usually the +// case for us vis-a-vis SSE. It's totally unnecessary on +// VMX, which has a native floor op. +FORCEINLINE fltx4 FloorSIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s ); + ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival ); + return XorSIMD( ival, XorSIMD( val, fl4Abs ) ); // restore sign bits +} + + + +inline bool IsAllZeros( const fltx4 & var ) +{ + return TestSignSIMD( CmpEqSIMD( var, Four_Zeros ) ) == 0xF; +} + +FORCEINLINE fltx4 SqrtEstSIMD( const fltx4 & a ) // sqrt(a), more or less +{ + return _mm_sqrt_ps( a ); +} + +FORCEINLINE fltx4 SqrtSIMD( const fltx4 & a ) // sqrt(a) +{ + return _mm_sqrt_ps( a ); +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSIMD( const fltx4 & a ) // 1/sqrt(a), more or less +{ + return _mm_rsqrt_ps( a ); +} + +FORCEINLINE fltx4 ReciprocalSqrtEstSaturateSIMD( const fltx4 & a ) +{ + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + ret = ReciprocalSqrtEstSIMD( ret ); + return ret; +} + +/// uses newton iteration for higher precision results than ReciprocalSqrtEstSIMD +FORCEINLINE fltx4 ReciprocalSqrtSIMD( const fltx4 & a ) // 1/sqrt(a) +{ + fltx4 guess = ReciprocalSqrtEstSIMD( a ); + // newton iteration for 1/sqrt(a) : y(n+1) = 1/2 (y(n)*(3-a*y(n)^2)); + guess = MulSIMD( guess, SubSIMD( Four_Threes, MulSIMD( a, MulSIMD( guess, guess )))); + guess = MulSIMD( Four_PointFives, guess); + return guess; +} + +FORCEINLINE fltx4 ReciprocalEstSIMD( const fltx4 & a ) // 1/a, more or less +{ + return _mm_rcp_ps( a ); +} + +/// 1/x for all 4 values, more or less +/// 1/0 will result in a big but NOT infinite result +FORCEINLINE fltx4 ReciprocalEstSaturateSIMD( const fltx4 & a ) +{ + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + ret = ReciprocalEstSIMD( ret ); + return ret; +} + +/// 1/x for all 4 values. uses reciprocal approximation instruction plus newton iteration. +/// No error checking! +FORCEINLINE fltx4 ReciprocalSIMD( const fltx4 & a ) // 1/a +{ + fltx4 ret = ReciprocalEstSIMD( a ); + // newton iteration is: Y(n+1) = 2*Y(n)-a*Y(n)^2 + ret = SubSIMD( AddSIMD( ret, ret ), MulSIMD( a, MulSIMD( ret, ret ) ) ); + return ret; +} + +/// 1/x for all 4 values. +/// 1/0 will result in a big but NOT infinite result +FORCEINLINE fltx4 ReciprocalSaturateSIMD( const fltx4 & a ) +{ + fltx4 zero_mask = CmpEqSIMD( a, Four_Zeros ); + fltx4 ret = OrSIMD( a, AndSIMD( Four_Epsilons, zero_mask ) ); + ret = ReciprocalSIMD( ret ); + return ret; +} + +// CHRISG: is it worth doing integer bitfiddling for this? +// 2^x for all values (the antilog) +FORCEINLINE fltx4 ExpSIMD( const fltx4 &toPower ) +{ + fltx4 retval; + SubFloat( retval, 0 ) = powf( 2, SubFloat(toPower, 0) ); + SubFloat( retval, 1 ) = powf( 2, SubFloat(toPower, 1) ); + SubFloat( retval, 2 ) = powf( 2, SubFloat(toPower, 2) ); + SubFloat( retval, 3 ) = powf( 2, SubFloat(toPower, 3) ); + + return retval; +} + +// Clamps the components of a vector to a specified minimum and maximum range. +FORCEINLINE fltx4 ClampVectorSIMD( FLTX4 in, FLTX4 min, FLTX4 max) +{ + return MaxSIMD( min, MinSIMD( max, in ) ); +} + +FORCEINLINE void TransposeSIMD( fltx4 & x, fltx4 & y, fltx4 & z, fltx4 & w) +{ + _MM_TRANSPOSE4_PS( x, y, z, w ); +} + +FORCEINLINE fltx4 FindLowestSIMD3( const fltx4 &a ) +{ + // a is [x,y,z,G] (where G is garbage) + // rotate left by one + fltx4 compareOne = RotateLeft( a ); + // compareOne is [y,z,G,x] + fltx4 retval = MinSIMD( a, compareOne ); + // retVal is [min(x,y), ... ] + compareOne = RotateLeft2( a ); + // compareOne is [z, G, x, y] + retval = MinSIMD( retval, compareOne ); + // retVal = [ min(min(x,y),z)..] + // splat the x component out to the whole vector and return + return SplatXSIMD( retval ); + +} + +FORCEINLINE fltx4 FindHighestSIMD3( const fltx4 &a ) +{ + // a is [x,y,z,G] (where G is garbage) + // rotate left by one + fltx4 compareOne = RotateLeft( a ); + // compareOne is [y,z,G,x] + fltx4 retval = MaxSIMD( a, compareOne ); + // retVal is [max(x,y), ... ] + compareOne = RotateLeft2( a ); + // compareOne is [z, G, x, y] + retval = MaxSIMD( retval, compareOne ); + // retVal = [ max(max(x,y),z)..] + // splat the x component out to the whole vector and return + return SplatXSIMD( retval ); + +} + +// ------------------------------------ +// INTEGER SIMD OPERATIONS. +// ------------------------------------ + + +#if 0 /* pc does not have these ops */ +// splat all components of a vector to a signed immediate int number. +FORCEINLINE fltx4 IntSetImmediateSIMD(int to) +{ + //CHRISG: SSE2 has this, but not SSE1. What to do? + fltx4 retval; + SubInt( retval, 0 ) = to; + SubInt( retval, 1 ) = to; + SubInt( retval, 2 ) = to; + SubInt( retval, 3 ) = to; + return retval; +} +#endif + +// Load 4 aligned words into a SIMD register +FORCEINLINE i32x4 LoadAlignedIntSIMD( const void * RESTRICT pSIMD) +{ + return _mm_load_ps( reinterpret_cast(pSIMD) ); +} + +// Load 4 unaligned words into a SIMD register +FORCEINLINE i32x4 LoadUnalignedIntSIMD( const void * RESTRICT pSIMD) +{ + return _mm_loadu_ps( reinterpret_cast(pSIMD) ); +} + +// save into four words, 16-byte aligned +FORCEINLINE void StoreAlignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a ) +{ + _mm_store_ps( reinterpret_cast(pSIMD), a ); +} + +FORCEINLINE void StoreAlignedIntSIMD( intx4 &pSIMD, const fltx4 & a ) +{ + _mm_store_ps( reinterpret_cast(pSIMD.Base()), a ); +} + +FORCEINLINE void StoreUnalignedIntSIMD( int32 * RESTRICT pSIMD, const fltx4 & a ) +{ + _mm_storeu_ps( reinterpret_cast(pSIMD), a ); +} + + +// CHRISG: the conversion functions all seem to operate on m64's only... +// how do we make them work here? + +// Take a fltx4 containing fixed-point uints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 UnsignedIntConvertToFltSIMD( const u32x4 &vSrcA ) +{ + fltx4 retval; + SubFloat( retval, 0 ) = ( (float) SubInt( retval, 0 ) ); + SubFloat( retval, 1 ) = ( (float) SubInt( retval, 1 ) ); + SubFloat( retval, 2 ) = ( (float) SubInt( retval, 2 ) ); + SubFloat( retval, 3 ) = ( (float) SubInt( retval, 3 ) ); + return retval; +} + + +// Take a fltx4 containing fixed-point sints and +// return them as single precision floats. No +// fixed point conversion is done. +FORCEINLINE fltx4 SignedIntConvertToFltSIMD( const i32x4 &vSrcA ) +{ + fltx4 retval; + SubFloat( retval, 0 ) = ( (float) (reinterpret_cast(&vSrcA)[0])); + SubFloat( retval, 1 ) = ( (float) (reinterpret_cast(&vSrcA)[1])); + SubFloat( retval, 2 ) = ( (float) (reinterpret_cast(&vSrcA)[2])); + SubFloat( retval, 3 ) = ( (float) (reinterpret_cast(&vSrcA)[3])); + return retval; +} + +/* + works on fltx4's as if they are four uints. + the first parameter contains the words to be shifted, + the second contains the amount to shift by AS INTS + + for i = 0 to 3 + shift = vSrcB_i*32:(i*32)+4 + vReturned_i*32:(i*32)+31 = vSrcA_i*32:(i*32)+31 << shift +*/ +FORCEINLINE i32x4 IntShiftLeftWordSIMD(const i32x4 &vSrcA, const i32x4 &vSrcB) +{ + i32x4 retval; + SubInt(retval, 0) = SubInt(vSrcA, 0) << SubInt(vSrcB, 0); + SubInt(retval, 1) = SubInt(vSrcA, 1) << SubInt(vSrcB, 1); + SubInt(retval, 2) = SubInt(vSrcA, 2) << SubInt(vSrcB, 2); + SubInt(retval, 3) = SubInt(vSrcA, 3) << SubInt(vSrcB, 3); + + + return retval; +} + + +// Fixed-point conversion and save as SIGNED INTS. +// pDest->x = Int (vSrc.x) +// note: some architectures have means of doing +// fixed point conversion when the fix depth is +// specified as an immediate.. but there is no way +// to guarantee an immediate as a parameter to function +// like this. +FORCEINLINE void ConvertStoreAsIntsSIMD(intx4 * RESTRICT pDest, const fltx4 &vSrc) +{ + __m64 bottom = _mm_cvttps_pi32( vSrc ); + __m64 top = _mm_cvttps_pi32( _mm_movehl_ps(vSrc,vSrc) ); + + *reinterpret_cast<__m64 *>(&(*pDest)[0]) = bottom; + *reinterpret_cast<__m64 *>(&(*pDest)[2]) = top; + + _mm_empty(); +} + + + +#endif + + + +/// class FourVectors stores 4 independent vectors for use in SIMD processing. These vectors are +/// stored in the format x x x x y y y y z z z z so that they can be efficiently SIMD-accelerated. +class ALIGN16 FourVectors +{ +public: + fltx4 x, y, z; + + FORCEINLINE void DuplicateVector(Vector const &v) //< set all 4 vectors to the same vector value + { + x=ReplicateX4(v.x); + y=ReplicateX4(v.y); + z=ReplicateX4(v.z); + } + + FORCEINLINE fltx4 const & operator[](int idx) const + { + return *((&x)+idx); + } + + FORCEINLINE fltx4 & operator[](int idx) + { + return *((&x)+idx); + } + + FORCEINLINE void operator+=(FourVectors const &b) //< add 4 vectors to another 4 vectors + { + x=AddSIMD(x,b.x); + y=AddSIMD(y,b.y); + z=AddSIMD(z,b.z); + } + + FORCEINLINE void operator-=(FourVectors const &b) //< subtract 4 vectors from another 4 + { + x=SubSIMD(x,b.x); + y=SubSIMD(y,b.y); + z=SubSIMD(z,b.z); + } + + FORCEINLINE void operator*=(FourVectors const &b) //< scale all four vectors per component scale + { + x=MulSIMD(x,b.x); + y=MulSIMD(y,b.y); + z=MulSIMD(z,b.z); + } + + FORCEINLINE void operator*=(const fltx4 & scale) //< scale + { + x=MulSIMD(x,scale); + y=MulSIMD(y,scale); + z=MulSIMD(z,scale); + } + + FORCEINLINE void operator*=(float scale) //< uniformly scale all 4 vectors + { + fltx4 scalepacked = ReplicateX4(scale); + *this *= scalepacked; + } + + FORCEINLINE fltx4 operator*(FourVectors const &b) const //< 4 dot products + { + fltx4 dot=MulSIMD(x,b.x); + dot=MaddSIMD(y,b.y,dot); + dot=MaddSIMD(z,b.z,dot); + return dot; + } + + FORCEINLINE fltx4 operator*(Vector const &b) const //< dot product all 4 vectors with 1 vector + { + fltx4 dot=MulSIMD(x,ReplicateX4(b.x)); + dot=MaddSIMD(y,ReplicateX4(b.y), dot); + dot=MaddSIMD(z,ReplicateX4(b.z), dot); + return dot; + } + + FORCEINLINE void VProduct(FourVectors const &b) //< component by component mul + { + x=MulSIMD(x,b.x); + y=MulSIMD(y,b.y); + z=MulSIMD(z,b.z); + } + FORCEINLINE void MakeReciprocal(void) //< (x,y,z)=(1/x,1/y,1/z) + { + x=ReciprocalSIMD(x); + y=ReciprocalSIMD(y); + z=ReciprocalSIMD(z); + } + + FORCEINLINE void MakeReciprocalSaturate(void) //< (x,y,z)=(1/x,1/y,1/z), 1/0=1.0e23 + { + x=ReciprocalSaturateSIMD(x); + y=ReciprocalSaturateSIMD(y); + z=ReciprocalSaturateSIMD(z); + } + + // Assume the given matrix is a rotation, and rotate these vectors by it. + // If you have a long list of FourVectors structures that you all want + // to rotate by the same matrix, use FourVectors::RotateManyBy() instead. + inline void RotateBy(const matrix3x4_t& matrix); + + /// You can use this to rotate a long array of FourVectors all by the same + /// matrix. The first parameter is the head of the array. The second is the + /// number of vectors to rotate. The third is the matrix. + static void RotateManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix ); + + /// Assume the vectors are points, and transform them in place by the matrix. + inline void TransformBy(const matrix3x4_t& matrix); + + /// You can use this to Transform a long array of FourVectors all by the same + /// matrix. The first parameter is the head of the array. The second is the + /// number of vectors to rotate. The third is the matrix. The fourth is the + /// output buffer, which must not overlap the pVectors buffer. This is not + /// an in-place transformation. + static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix, FourVectors * RESTRICT pOut ); + + /// You can use this to Transform a long array of FourVectors all by the same + /// matrix. The first parameter is the head of the array. The second is the + /// number of vectors to rotate. The third is the matrix. The fourth is the + /// output buffer, which must not overlap the pVectors buffer. + /// This is an in-place transformation. + static void TransformManyBy(FourVectors * RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix ); + + // X(),Y(),Z() - get at the desired component of the i'th (0..3) vector. + FORCEINLINE const float & X(int idx) const + { + // NOTE: if the output goes into a register, this causes a Load-Hit-Store stall (don't mix fpu/vpu math!) + return SubFloat( (fltx4 &)x, idx ); + } + + FORCEINLINE const float & Y(int idx) const + { + return SubFloat( (fltx4 &)y, idx ); + } + + FORCEINLINE const float & Z(int idx) const + { + return SubFloat( (fltx4 &)z, idx ); + } + + FORCEINLINE float & X(int idx) + { + return SubFloat( x, idx ); + } + + FORCEINLINE float & Y(int idx) + { + return SubFloat( y, idx ); + } + + FORCEINLINE float & Z(int idx) + { + return SubFloat( z, idx ); + } + + FORCEINLINE Vector Vec(int idx) const //< unpack one of the vectors + { + return Vector( X(idx), Y(idx), Z(idx) ); + } + + FourVectors(void) + { + } + + FourVectors( FourVectors const &src ) + { + x=src.x; + y=src.y; + z=src.z; + } + + FORCEINLINE void operator=( FourVectors const &src ) + { + x=src.x; + y=src.y; + z=src.z; + } + + /// LoadAndSwizzle - load 4 Vectors into a FourVectors, performing transpose op + FORCEINLINE void LoadAndSwizzle(Vector const &a, Vector const &b, Vector const &c, Vector const &d) + { + // TransposeSIMD has large sub-expressions that the compiler can't eliminate on x360 + // use an unfolded implementation here +#if _X360 + fltx4 tx = LoadUnalignedSIMD( &a.x ); + fltx4 ty = LoadUnalignedSIMD( &b.x ); + fltx4 tz = LoadUnalignedSIMD( &c.x ); + fltx4 tw = LoadUnalignedSIMD( &d.x ); + fltx4 r0 = __vmrghw(tx, tz); + fltx4 r1 = __vmrghw(ty, tw); + fltx4 r2 = __vmrglw(tx, tz); + fltx4 r3 = __vmrglw(ty, tw); + + x = __vmrghw(r0, r1); + y = __vmrglw(r0, r1); + z = __vmrghw(r2, r3); +#else + x = LoadUnalignedSIMD( &( a.x )); + y = LoadUnalignedSIMD( &( b.x )); + z = LoadUnalignedSIMD( &( c.x )); + fltx4 w = LoadUnalignedSIMD( &( d.x )); + // now, matrix is: + // x y z ? + // x y z ? + // x y z ? + // x y z ? + TransposeSIMD(x, y, z, w); +#endif + } + + /// LoadAndSwizzleAligned - load 4 Vectors into a FourVectors, performing transpose op. + /// all 4 vectors must be 128 bit boundary + FORCEINLINE void LoadAndSwizzleAligned(const float *RESTRICT a, const float *RESTRICT b, const float *RESTRICT c, const float *RESTRICT d) + { +#if _X360 + fltx4 tx = LoadAlignedSIMD(a); + fltx4 ty = LoadAlignedSIMD(b); + fltx4 tz = LoadAlignedSIMD(c); + fltx4 tw = LoadAlignedSIMD(d); + fltx4 r0 = __vmrghw(tx, tz); + fltx4 r1 = __vmrghw(ty, tw); + fltx4 r2 = __vmrglw(tx, tz); + fltx4 r3 = __vmrglw(ty, tw); + + x = __vmrghw(r0, r1); + y = __vmrglw(r0, r1); + z = __vmrghw(r2, r3); +#else + x = LoadAlignedSIMD( a ); + y = LoadAlignedSIMD( b ); + z = LoadAlignedSIMD( c ); + fltx4 w = LoadAlignedSIMD( d ); + // now, matrix is: + // x y z ? + // x y z ? + // x y z ? + // x y z ? + TransposeSIMD( x, y, z, w ); +#endif + } + + FORCEINLINE void LoadAndSwizzleAligned(Vector const &a, Vector const &b, Vector const &c, Vector const &d) + { + LoadAndSwizzleAligned( &a.x, &b.x, &c.x, &d.x ); + } + + /// return the squared length of all 4 vectors + FORCEINLINE fltx4 length2(void) const + { + return (*this)*(*this); + } + + /// return the approximate length of all 4 vectors. uses the sqrt approximation instruction + FORCEINLINE fltx4 length(void) const + { + return SqrtEstSIMD(length2()); + } + + /// normalize all 4 vectors in place. not mega-accurate (uses reciprocal approximation instruction) + FORCEINLINE void VectorNormalizeFast(void) + { + fltx4 mag_sq=(*this)*(*this); // length^2 + (*this) *= ReciprocalSqrtEstSIMD(mag_sq); // *(1.0/sqrt(length^2)) + } + + /// normalize all 4 vectors in place. + FORCEINLINE void VectorNormalize(void) + { + fltx4 mag_sq=(*this)*(*this); // length^2 + (*this) *= ReciprocalSqrtSIMD(mag_sq); // *(1.0/sqrt(length^2)) + } + + /// construct a FourVectors from 4 separate Vectors + FORCEINLINE FourVectors(Vector const &a, Vector const &b, Vector const &c, Vector const &d) + { + LoadAndSwizzle(a,b,c,d); + } + + /// construct a FourVectors from 4 separate Vectors + FORCEINLINE FourVectors(VectorAligned const &a, VectorAligned const &b, VectorAligned const &c, VectorAligned const &d) + { + LoadAndSwizzleAligned(a,b,c,d); + } + + FORCEINLINE fltx4 DistToSqr( FourVectors const &pnt ) + { + fltx4 fl4dX = SubSIMD( pnt.x, x ); + fltx4 fl4dY = SubSIMD( pnt.y, y ); + fltx4 fl4dZ = SubSIMD( pnt.z, z ); + return AddSIMD( MulSIMD( fl4dX, fl4dX), AddSIMD( MulSIMD( fl4dY, fl4dY ), MulSIMD( fl4dZ, fl4dZ ) ) ); + + } + + FORCEINLINE fltx4 TValueOfClosestPointOnLine( FourVectors const &p0, FourVectors const &p1 ) const + { + FourVectors lineDelta = p1; + lineDelta -= p0; + fltx4 OOlineDirDotlineDir = ReciprocalSIMD( p1 * p1 ); + FourVectors v4OurPnt = *this; + v4OurPnt -= p0; + return MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta ); + } + + FORCEINLINE fltx4 DistSqrToLineSegment( FourVectors const &p0, FourVectors const &p1 ) const + { + FourVectors lineDelta = p1; + FourVectors v4OurPnt = *this; + v4OurPnt -= p0; + lineDelta -= p0; + + fltx4 OOlineDirDotlineDir = ReciprocalSIMD( lineDelta * lineDelta ); + + fltx4 fl4T = MulSIMD( OOlineDirDotlineDir, v4OurPnt * lineDelta ); + + fl4T = MinSIMD( fl4T, Four_Ones ); + fl4T = MaxSIMD( fl4T, Four_Zeros ); + lineDelta *= fl4T; + return v4OurPnt.DistToSqr( lineDelta ); + } + +}; + +/// form 4 cross products +inline FourVectors operator ^(const FourVectors &a, const FourVectors &b) +{ + FourVectors ret; + ret.x=SubSIMD(MulSIMD(a.y,b.z),MulSIMD(a.z,b.y)); + ret.y=SubSIMD(MulSIMD(a.z,b.x),MulSIMD(a.x,b.z)); + ret.z=SubSIMD(MulSIMD(a.x,b.y),MulSIMD(a.y,b.x)); + return ret; +} + +/// component-by-componentwise MAX operator +inline FourVectors maximum(const FourVectors &a, const FourVectors &b) +{ + FourVectors ret; + ret.x=MaxSIMD(a.x,b.x); + ret.y=MaxSIMD(a.y,b.y); + ret.z=MaxSIMD(a.z,b.z); + return ret; +} + +/// component-by-componentwise MIN operator +inline FourVectors minimum(const FourVectors &a, const FourVectors &b) +{ + FourVectors ret; + ret.x=MinSIMD(a.x,b.x); + ret.y=MinSIMD(a.y,b.y); + ret.z=MinSIMD(a.z,b.z); + return ret; +} + +/// calculate reflection vector. incident and normal dir assumed normalized +FORCEINLINE FourVectors VectorReflect( const FourVectors &incident, const FourVectors &normal ) +{ + FourVectors ret = incident; + fltx4 iDotNx2 = incident * normal; + iDotNx2 = AddSIMD( iDotNx2, iDotNx2 ); + FourVectors nPart = normal; + nPart *= iDotNx2; + ret -= nPart; // i-2(n*i)n + return ret; +} + +/// calculate slide vector. removes all components of a vector which are perpendicular to a normal vector. +FORCEINLINE FourVectors VectorSlide( const FourVectors &incident, const FourVectors &normal ) +{ + FourVectors ret = incident; + fltx4 iDotN = incident * normal; + FourVectors nPart = normal; + nPart *= iDotN; + ret -= nPart; // i-(n*i)n + return ret; +} + + +// Assume the given matrix is a rotation, and rotate these vectors by it. +// If you have a long list of FourVectors structures that you all want +// to rotate by the same matrix, use FourVectors::RotateManyBy() instead. +void FourVectors::RotateBy(const matrix3x4_t& matrix) +{ + // Splat out each of the entries in the matrix to a fltx4. Do this + // in the order that we will need them, to hide latency. I'm + // avoiding making an array of them, so that they'll remain in + // registers. + fltx4 matSplat00, matSplat01, matSplat02, + matSplat10, matSplat11, matSplat12, + matSplat20, matSplat21, matSplat22; + + { + // Load the matrix into local vectors. Sadly, matrix3x4_ts are + // often unaligned. The w components will be the tranpose row of + // the matrix, but we don't really care about that. + fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] ); + fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] ); + fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] ); + + matSplat00 = SplatXSIMD( matCol0 ); + matSplat01 = SplatYSIMD( matCol0 ); + matSplat02 = SplatZSIMD( matCol0 ); + + matSplat10 = SplatXSIMD( matCol1 ); + matSplat11 = SplatYSIMD( matCol1 ); + matSplat12 = SplatZSIMD( matCol1 ); + + matSplat20 = SplatXSIMD( matCol2 ); + matSplat21 = SplatYSIMD( matCol2 ); + matSplat22 = SplatZSIMD( matCol2 ); + } + + // Trust in the compiler to schedule these operations correctly: + fltx4 outX, outY, outZ; + outX = AddSIMD( AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ), MulSIMD( z, matSplat02 ) ); + outY = AddSIMD( AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ), MulSIMD( z, matSplat12 ) ); + outZ = AddSIMD( AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ), MulSIMD( z, matSplat22 ) ); + + x = outX; + y = outY; + z = outZ; +} + +// Assume the given matrix is a rotation, and rotate these vectors by it. +// If you have a long list of FourVectors structures that you all want +// to rotate by the same matrix, use FourVectors::RotateManyBy() instead. +void FourVectors::TransformBy(const matrix3x4_t& matrix) +{ + // Splat out each of the entries in the matrix to a fltx4. Do this + // in the order that we will need them, to hide latency. I'm + // avoiding making an array of them, so that they'll remain in + // registers. + fltx4 matSplat00, matSplat01, matSplat02, + matSplat10, matSplat11, matSplat12, + matSplat20, matSplat21, matSplat22; + + { + // Load the matrix into local vectors. Sadly, matrix3x4_ts are + // often unaligned. The w components will be the tranpose row of + // the matrix, but we don't really care about that. + fltx4 matCol0 = LoadUnalignedSIMD( matrix[0] ); + fltx4 matCol1 = LoadUnalignedSIMD( matrix[1] ); + fltx4 matCol2 = LoadUnalignedSIMD( matrix[2] ); + + matSplat00 = SplatXSIMD( matCol0 ); + matSplat01 = SplatYSIMD( matCol0 ); + matSplat02 = SplatZSIMD( matCol0 ); + + matSplat10 = SplatXSIMD( matCol1 ); + matSplat11 = SplatYSIMD( matCol1 ); + matSplat12 = SplatZSIMD( matCol1 ); + + matSplat20 = SplatXSIMD( matCol2 ); + matSplat21 = SplatYSIMD( matCol2 ); + matSplat22 = SplatZSIMD( matCol2 ); + } + + // Trust in the compiler to schedule these operations correctly: + fltx4 outX, outY, outZ; + + outX = MaddSIMD( z, matSplat02, AddSIMD( MulSIMD( x, matSplat00 ), MulSIMD( y, matSplat01 ) ) ); + outY = MaddSIMD( z, matSplat12, AddSIMD( MulSIMD( x, matSplat10 ), MulSIMD( y, matSplat11 ) ) ); + outZ = MaddSIMD( z, matSplat22, AddSIMD( MulSIMD( x, matSplat20 ), MulSIMD( y, matSplat21 ) ) ); + + x = AddSIMD( outX, ReplicateX4( matrix[0][3] )); + y = AddSIMD( outY, ReplicateX4( matrix[1][3] )); + z = AddSIMD( outZ, ReplicateX4( matrix[2][3] )); +} + + + +/// quick, low quality perlin-style noise() function suitable for real time use. +/// return value is -1..1. Only reliable around +/- 1 million or so. +fltx4 NoiseSIMD( const fltx4 & x, const fltx4 & y, const fltx4 & z ); +fltx4 NoiseSIMD( FourVectors const &v ); + +// vector valued noise direction +FourVectors DNoiseSIMD( FourVectors const &v ); + +// vector value "curl" noise function. see http://hyperphysics.phy-astr.gsu.edu/hbase/curl.html +FourVectors CurlNoiseSIMD( FourVectors const &v ); + + +/// calculate the absolute value of a packed single +inline fltx4 fabs( const fltx4 & x ) +{ + return AndSIMD( x, LoadAlignedSIMD( g_SIMD_clear_signmask ) ); +} + +/// negate all four components of a SIMD packed single +inline fltx4 fnegate( const fltx4 & x ) +{ + return XorSIMD( x, LoadAlignedSIMD( g_SIMD_signmask ) ); +} + + +fltx4 Pow_FixedPoint_Exponent_SIMD( const fltx4 & x, int exponent); + +// PowSIMD - raise a SIMD register to a power. This is analogous to the C pow() function, with some +// restictions: fractional exponents are only handled with 2 bits of precision. Basically, +// fractions of 0,.25,.5, and .75 are handled. PowSIMD(x,.30) will be the same as PowSIMD(x,.25). +// negative and fractional powers are handled by the SIMD reciprocal and square root approximation +// instructions and so are not especially accurate ----Note that this routine does not raise +// numeric exceptions because it uses SIMD--- This routine is O(log2(exponent)). +inline fltx4 PowSIMD( const fltx4 & x, float exponent ) +{ + return Pow_FixedPoint_Exponent_SIMD(x,(int) (4.0*exponent)); +} + + + +// random number generation - generate 4 random numbers quickly. + +void SeedRandSIMD(uint32 seed); // seed the random # generator +fltx4 RandSIMD( int nContext = 0 ); // return 4 numbers in the 0..1 range + +// for multithreaded, you need to use these and use the argument form of RandSIMD: +int GetSIMDRandContext( void ); +void ReleaseSIMDRandContext( int nContext ); + +FORCEINLINE fltx4 RandSignedSIMD( void ) // -1..1 +{ + return SubSIMD( MulSIMD( Four_Twos, RandSIMD() ), Four_Ones ); +} + + +// SIMD versions of mathlib simplespline functions +// hermite basis function for smooth interpolation +// Similar to Gain() above, but very cheap to call +// value should be between 0 & 1 inclusive +inline fltx4 SimpleSpline( const fltx4 & value ) +{ + // Arranged to avoid a data dependency between these two MULs: + fltx4 valueDoubled = MulSIMD( value, Four_Twos ); + fltx4 valueSquared = MulSIMD( value, value ); + + // Nice little ease-in, ease-out spline-like curve + return SubSIMD( + MulSIMD( Four_Threes, valueSquared ), + MulSIMD( valueDoubled, valueSquared ) ); +} + +// remaps a value in [startInterval, startInterval+rangeInterval] from linear to +// spline using SimpleSpline +inline fltx4 SimpleSplineRemapValWithDeltas( const fltx4 & val, + const fltx4 & A, const fltx4 & BMinusA, + const fltx4 & OneOverBMinusA, const fltx4 & C, + const fltx4 & DMinusC ) +{ +// if ( A == B ) +// return val >= B ? D : C; + fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA ); + return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) ); +} + +inline fltx4 SimpleSplineRemapValWithDeltasClamped( const fltx4 & val, + const fltx4 & A, const fltx4 & BMinusA, + const fltx4 & OneOverBMinusA, const fltx4 & C, + const fltx4 & DMinusC ) +{ +// if ( A == B ) +// return val >= B ? D : C; + fltx4 cVal = MulSIMD( SubSIMD( val, A), OneOverBMinusA ); + cVal = MinSIMD( Four_Ones, MaxSIMD( Four_Zeros, cVal ) ); + return AddSIMD( C, MulSIMD( DMinusC, SimpleSpline( cVal ) ) ); +} + +FORCEINLINE fltx4 FracSIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 ival = SubSIMD( AddSIMD( fl4Abs, Four_2ToThe23s ), Four_2ToThe23s ); + ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Ones ), ival ); + return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits +} + +FORCEINLINE fltx4 Mod2SIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( (float *) g_SIMD_lsbmask ), AddSIMD( fl4Abs, Four_2ToThe23s ) ), Four_2ToThe23s ); + ival = MaskedAssign( CmpGtSIMD( ival, fl4Abs ), SubSIMD( ival, Four_Twos ), ival ); + return XorSIMD( SubSIMD( fl4Abs, ival ), XorSIMD( val, fl4Abs ) ); // restore sign bits +} + +FORCEINLINE fltx4 Mod2SIMDPositiveInput( const fltx4 &val ) +{ + fltx4 ival = SubSIMD( AndSIMD( LoadAlignedSIMD( g_SIMD_lsbmask ), AddSIMD( val, Four_2ToThe23s ) ), Four_2ToThe23s ); + ival = MaskedAssign( CmpGtSIMD( ival, val ), SubSIMD( ival, Four_Twos ), ival ); + return SubSIMD( val, ival ); +} + + +// approximate sin of an angle, with -1..1 representing the whole sin wave period instead of -pi..pi. +// no range reduction is done - for values outside of 0..1 you won't like the results +FORCEINLINE fltx4 _SinEst01SIMD( const fltx4 &val ) +{ + // really rough approximation - x*(4-x*4) - a parabola. s(0) = 0, s(.5) = 1, s(1)=0, smooth in-between. + // sufficient for simple oscillation. + return MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) ); +} + +FORCEINLINE fltx4 _Sin01SIMD( const fltx4 &val ) +{ + // not a bad approximation : parabola always over-estimates. Squared parabola always + // underestimates. So lets blend between them: goodsin = badsin + .225*( badsin^2-badsin) + fltx4 fl4BadEst = MulSIMD( val, SubSIMD( Four_Fours, MulSIMD( val, Four_Fours ) ) ); + return AddSIMD( MulSIMD( Four_Point225s, SubSIMD( MulSIMD( fl4BadEst, fl4BadEst ), fl4BadEst ) ), fl4BadEst ); +} + +// full range useable implementations +FORCEINLINE fltx4 SinEst01SIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs ); + fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones ); + fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) ); + fltx4 fl4Sin = _SinEst01SIMD( fl4val ); + fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) ); + return fl4Sin; + +} + +FORCEINLINE fltx4 Sin01SIMD( const fltx4 &val ) +{ + fltx4 fl4Abs = fabs( val ); + fltx4 fl4Reduced2 = Mod2SIMDPositiveInput( fl4Abs ); + fltx4 fl4OddMask = CmpGeSIMD( fl4Reduced2, Four_Ones ); + fltx4 fl4val = SubSIMD( fl4Reduced2, AndSIMD( Four_Ones, fl4OddMask ) ); + fltx4 fl4Sin = _Sin01SIMD( fl4val ); + fl4Sin = XorSIMD( fl4Sin, AndSIMD( LoadAlignedSIMD( g_SIMD_signmask ), XorSIMD( val, fl4OddMask ) ) ); + return fl4Sin; + +} + +// Schlick style Bias approximation see graphics gems 4 : bias(t,a)= t/( (1/a-2)*(1-t)+1) + +FORCEINLINE fltx4 PreCalcBiasParameter( const fltx4 &bias_parameter ) +{ + // convert perlin-style-bias parameter to the value right for the approximation + return SubSIMD( ReciprocalSIMD( bias_parameter ), Four_Twos ); +} + +FORCEINLINE fltx4 BiasSIMD( const fltx4 &val, const fltx4 &precalc_param ) +{ + // similar to bias function except pass precalced bias value from calling PreCalcBiasParameter. + + //!!speed!! use reciprocal est? + //!!speed!! could save one op by precalcing _2_ values + return DivSIMD( val, AddSIMD( MulSIMD( precalc_param, SubSIMD( Four_Ones, val ) ), Four_Ones ) ); +} + +//----------------------------------------------------------------------------- +// Box/plane test +// NOTE: The w component of emins + emaxs must be 1 for this to work +//----------------------------------------------------------------------------- +FORCEINLINE int BoxOnPlaneSideSIMD( const fltx4& emins, const fltx4& emaxs, const cplane_t *p, float tolerance = 0.f ) +{ + fltx4 corners[2]; + fltx4 normal = LoadUnalignedSIMD( p->normal.Base() ); + fltx4 dist = ReplicateX4( -p->dist ); + normal = SetWSIMD( normal, dist ); + fltx4 t4 = ReplicateX4( tolerance ); + fltx4 negt4 = ReplicateX4( -tolerance ); + fltx4 cmp = CmpGeSIMD( normal, Four_Zeros ); + corners[0] = MaskedAssign( cmp, emaxs, emins ); + corners[1] = MaskedAssign( cmp, emins, emaxs ); + fltx4 dot1 = Dot4SIMD( normal, corners[0] ); + fltx4 dot2 = Dot4SIMD( normal, corners[1] ); + cmp = CmpGeSIMD( dot1, t4 ); + fltx4 cmp2 = CmpGtSIMD( negt4, dot2 ); + fltx4 result = MaskedAssign( cmp, Four_Ones, Four_Zeros ); + fltx4 result2 = MaskedAssign( cmp2, Four_Twos, Four_Zeros ); + result = AddSIMD( result, result2 ); + intx4 sides; + ConvertStoreAsIntsSIMD( &sides, result ); + return sides[0]; +} + +#endif // _ssemath_h diff --git a/mp/src/public/mathlib/ssequaternion.h b/mp/src/public/mathlib/ssequaternion.h index 5d60961e..825a9e45 100644 --- a/mp/src/public/mathlib/ssequaternion.h +++ b/mp/src/public/mathlib/ssequaternion.h @@ -1,367 +1,367 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: - defines SIMD "structure of arrays" classes and functions. -// -//===========================================================================// -#ifndef SSEQUATMATH_H -#define SSEQUATMATH_H - -#ifdef _WIN32 -#pragma once -#endif - - -#include "mathlib/ssemath.h" - -// Use this #define to allow SSE versions of Quaternion math -// to exist on PC. -// On PC, certain horizontal vector operations are not supported. -// This causes the SSE implementation of quaternion math to mix the -// vector and scalar floating point units, which is extremely -// performance negative if you don't compile to native SSE2 (which -// we don't as of Sept 1, 2007). So, it's best not to allow these -// functions to exist at all. It's not good enough to simply replace -// the contents of the functions with scalar math, because each call -// to LoadAligned and StoreAligned will result in an unnecssary copy -// of the quaternion, and several moves to and from the XMM registers. -// -// Basically, the problem you run into is that for efficient SIMD code, -// you need to load the quaternions and vectors into SIMD registers and -// keep them there as long as possible while doing only SIMD math, -// whereas for efficient scalar code, each time you copy onto or ever -// use a fltx4, it hoses your pipeline. So the difference has to be -// in the management of temporary variables in the calling function, -// not inside the math functions. -// -// If you compile assuming the presence of SSE2, the MSVC will abandon -// the traditional x87 FPU operations altogether and make everything use -// the SSE2 registers, which lessens this problem a little. - -// permitted only on 360, as we've done careful tuning on its Altivec math: -#ifdef _X360 -#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC! -#endif - - - -//--------------------------------------------------------------------- -// Load/store quaternions -//--------------------------------------------------------------------- -#ifndef _X360 -#if ALLOW_SIMD_QUATERNION_MATH -// Using STDC or SSE -FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD ) -{ - fltx4 retval = LoadAlignedSIMD( pSIMD.Base() ); - return retval; -} - -FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD ) -{ - fltx4 retval = LoadAlignedSIMD( pSIMD ); - return retval; -} - -FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a ) -{ - StoreAlignedSIMD( pSIMD->Base(), a ); -} -#endif -#else - -// for the transitional class -- load a QuaternionAligned -FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD ) -{ - fltx4 retval = XMLoadVector4A( pSIMD.Base() ); - return retval; -} - -FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD ) -{ - fltx4 retval = XMLoadVector4A( pSIMD ); - return retval; -} - -FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a ) -{ - XMStoreVector4A( pSIMD->Base(), a ); -} - -#endif - - -#if ALLOW_SIMD_QUATERNION_MATH -//--------------------------------------------------------------------- -// Make sure quaternions are within 180 degrees of one another, if not, reverse q -//--------------------------------------------------------------------- -FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q ) -{ - // decide if one of the quaternions is backwards - fltx4 a = SubSIMD( p, q ); - fltx4 b = AddSIMD( p, q ); - a = Dot4SIMD( a, a ); - b = Dot4SIMD( b, b ); - fltx4 cmp = CmpGtSIMD( a, b ); - fltx4 result = MaskedAssign( cmp, NegSIMD(q), q ); - return result; -} - -//--------------------------------------------------------------------- -// Normalize Quaternion -//--------------------------------------------------------------------- -#if USE_STDC_FOR_SIMD - -FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q ) -{ - fltx4 radius, result; - radius = Dot4SIMD( q, q ); - - if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON)) - { - float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) ); - result = ReplicateX4( iradius ); - result = MulSIMD( result, q ); - return result; - } - return q; -} - -#else - -// SSE + X360 implementation -FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q ) -{ - fltx4 radius, result, mask; - radius = Dot4SIMD( q, q ); - mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0 - result = ReciprocalSqrtSIMD( radius ); - result = MulSIMD( result, q ); - return MaskedAssign( mask, q, result ); // if radius was 0, just return q -} - -#endif - - -//--------------------------------------------------------------------- -// 0.0 returns p, 1.0 return q. -//--------------------------------------------------------------------- -FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) -{ - fltx4 sclp, sclq, result; - sclq = ReplicateX4( t ); - sclp = SubSIMD( Four_Ones, sclq ); - result = MulSIMD( sclp, p ); - result = MaddSIMD( sclq, q, result ); - return QuaternionNormalizeSIMD( result ); -} - - -//--------------------------------------------------------------------- -// Blend Quaternions -//--------------------------------------------------------------------- -FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t ) -{ - // decide if one of the quaternions is backwards - fltx4 q2, result; - q2 = QuaternionAlignSIMD( p, q ); - result = QuaternionBlendNoAlignSIMD( p, q2, t ); - return result; -} - - -//--------------------------------------------------------------------- -// Multiply Quaternions -//--------------------------------------------------------------------- -#ifndef _X360 - -// SSE and STDC -FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q ) -{ - // decide if one of the quaternions is backwards - fltx4 q2, result; - q2 = QuaternionAlignSIMD( p, q ); - SubFloat( result, 0 ) = SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 ); - SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 ); - SubFloat( result, 2 ) = SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 ); - SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 ); - return result; -} - -#else - -// X360 -extern const fltx4 g_QuatMultRowSign[4]; -FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q ) -{ - fltx4 q2, row, result; - q2 = QuaternionAlignSIMD( p, q ); - - row = XMVectorSwizzle( q2, 3, 2, 1, 0 ); - row = MulSIMD( row, g_QuatMultRowSign[0] ); - result = Dot4SIMD( row, p ); - - row = XMVectorSwizzle( q2, 2, 3, 0, 1 ); - row = MulSIMD( row, g_QuatMultRowSign[1] ); - row = Dot4SIMD( row, p ); - result = __vrlimi( result, row, 4, 0 ); - - row = XMVectorSwizzle( q2, 1, 0, 3, 2 ); - row = MulSIMD( row, g_QuatMultRowSign[2] ); - row = Dot4SIMD( row, p ); - result = __vrlimi( result, row, 2, 0 ); - - row = MulSIMD( q2, g_QuatMultRowSign[3] ); - row = Dot4SIMD( row, p ); - result = __vrlimi( result, row, 1, 0 ); - return result; -} - -#endif - - -//--------------------------------------------------------------------- -// Quaternion scale -//--------------------------------------------------------------------- -#ifndef _X360 - -// SSE and STDC -FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t ) -{ - float r; - fltx4 q; - - // FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to - // use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale. - float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) ); - sinom = min( sinom, 1.f ); - - float sinsom = sin( asin( sinom ) * t ); - - t = sinsom / (sinom + FLT_EPSILON); - SubFloat( q, 0 ) = t * SubFloat( p, 0 ); - SubFloat( q, 1 ) = t * SubFloat( p, 1 ); - SubFloat( q, 2 ) = t * SubFloat( p, 2 ); - - // rescale rotation - r = 1.0f - sinsom * sinsom; - - // Assert( r >= 0 ); - if (r < 0.0f) - r = 0.0f; - r = sqrt( r ); - - // keep sign of rotation - SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r ); - return q; -} - -#else - -// X360 -FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t ) -{ - fltx4 sinom = Dot3SIMD( p, p ); - sinom = SqrtSIMD( sinom ); - sinom = MinSIMD( sinom, Four_Ones ); - fltx4 sinsom = ArcSinSIMD( sinom ); - fltx4 t4 = ReplicateX4( t ); - sinsom = MulSIMD( sinsom, t4 ); - sinsom = SinSIMD( sinsom ); - sinom = AddSIMD( sinom, Four_Epsilons ); - sinom = ReciprocalSIMD( sinom ); - t4 = MulSIMD( sinsom, sinom ); - fltx4 result = MulSIMD( p, t4 ); - - // rescale rotation - sinsom = MulSIMD( sinsom, sinsom ); - fltx4 r = SubSIMD( Four_Ones, sinsom ); - r = MaxSIMD( r, Four_Zeros ); - r = SqrtSIMD( r ); - - // keep sign of rotation - fltx4 cmp = CmpGeSIMD( p, Four_Zeros ); - r = MaskedAssign( cmp, r, NegSIMD( r ) ); - - result = __vrlimi(result, r, 1, 0); - return result; -} - -#endif - - -//----------------------------------------------------------------------------- -// Quaternion sphereical linear interpolation -//----------------------------------------------------------------------------- -#ifndef _X360 - -// SSE and STDC -FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) -{ - float omega, cosom, sinom, sclp, sclq; - - fltx4 result; - - // 0.0 returns p, 1.0 return q. - cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) + - SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 ); - - if ( (1.0f + cosom ) > 0.000001f ) - { - if ( (1.0f - cosom ) > 0.000001f ) - { - omega = acos( cosom ); - sinom = sin( omega ); - sclp = sin( (1.0f - t)*omega) / sinom; - sclq = sin( t*omega ) / sinom; - } - else - { - // TODO: add short circuit for cosom == 1.0f? - sclp = 1.0f - t; - sclq = t; - } - SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 ); - SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 ); - SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 ); - SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 ); - } - else - { - SubFloat( result, 0 ) = -SubFloat( q, 1 ); - SubFloat( result, 1 ) = SubFloat( q, 0 ); - SubFloat( result, 2 ) = -SubFloat( q, 3 ); - SubFloat( result, 3 ) = SubFloat( q, 2 ); - sclp = sin( (1.0f - t) * (0.5f * M_PI)); - sclq = sin( t * (0.5f * M_PI)); - SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 ); - SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 ); - SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 ); - } - - return result; -} - -#else - -// X360 -FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) -{ - return XMQuaternionSlerp( p, q, t ); -} - -#endif - - -FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t ) -{ - fltx4 q2, result; - q2 = QuaternionAlignSIMD( p, q ); - result = QuaternionSlerpNoAlignSIMD( p, q2, t ); - return result; -} - - -#endif // ALLOW_SIMD_QUATERNION_MATH - -#endif // SSEQUATMATH_H - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: - defines SIMD "structure of arrays" classes and functions. +// +//===========================================================================// +#ifndef SSEQUATMATH_H +#define SSEQUATMATH_H + +#ifdef _WIN32 +#pragma once +#endif + + +#include "mathlib/ssemath.h" + +// Use this #define to allow SSE versions of Quaternion math +// to exist on PC. +// On PC, certain horizontal vector operations are not supported. +// This causes the SSE implementation of quaternion math to mix the +// vector and scalar floating point units, which is extremely +// performance negative if you don't compile to native SSE2 (which +// we don't as of Sept 1, 2007). So, it's best not to allow these +// functions to exist at all. It's not good enough to simply replace +// the contents of the functions with scalar math, because each call +// to LoadAligned and StoreAligned will result in an unnecssary copy +// of the quaternion, and several moves to and from the XMM registers. +// +// Basically, the problem you run into is that for efficient SIMD code, +// you need to load the quaternions and vectors into SIMD registers and +// keep them there as long as possible while doing only SIMD math, +// whereas for efficient scalar code, each time you copy onto or ever +// use a fltx4, it hoses your pipeline. So the difference has to be +// in the management of temporary variables in the calling function, +// not inside the math functions. +// +// If you compile assuming the presence of SSE2, the MSVC will abandon +// the traditional x87 FPU operations altogether and make everything use +// the SSE2 registers, which lessens this problem a little. + +// permitted only on 360, as we've done careful tuning on its Altivec math: +#ifdef _X360 +#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC! +#endif + + + +//--------------------------------------------------------------------- +// Load/store quaternions +//--------------------------------------------------------------------- +#ifndef _X360 +#if ALLOW_SIMD_QUATERNION_MATH +// Using STDC or SSE +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD ) +{ + fltx4 retval = LoadAlignedSIMD( pSIMD.Base() ); + return retval; +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD ) +{ + fltx4 retval = LoadAlignedSIMD( pSIMD ); + return retval; +} + +FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + StoreAlignedSIMD( pSIMD->Base(), a ); +} +#endif +#else + +// for the transitional class -- load a QuaternionAligned +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned & pSIMD ) +{ + fltx4 retval = XMLoadVector4A( pSIMD.Base() ); + return retval; +} + +FORCEINLINE fltx4 LoadAlignedSIMD( const QuaternionAligned * RESTRICT pSIMD ) +{ + fltx4 retval = XMLoadVector4A( pSIMD ); + return retval; +} + +FORCEINLINE void StoreAlignedSIMD( QuaternionAligned * RESTRICT pSIMD, const fltx4 & a ) +{ + XMStoreVector4A( pSIMD->Base(), a ); +} + +#endif + + +#if ALLOW_SIMD_QUATERNION_MATH +//--------------------------------------------------------------------- +// Make sure quaternions are within 180 degrees of one another, if not, reverse q +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionAlignSIMD( const fltx4 &p, const fltx4 &q ) +{ + // decide if one of the quaternions is backwards + fltx4 a = SubSIMD( p, q ); + fltx4 b = AddSIMD( p, q ); + a = Dot4SIMD( a, a ); + b = Dot4SIMD( b, b ); + fltx4 cmp = CmpGtSIMD( a, b ); + fltx4 result = MaskedAssign( cmp, NegSIMD(q), q ); + return result; +} + +//--------------------------------------------------------------------- +// Normalize Quaternion +//--------------------------------------------------------------------- +#if USE_STDC_FOR_SIMD + +FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q ) +{ + fltx4 radius, result; + radius = Dot4SIMD( q, q ); + + if ( SubFloat( radius, 0 ) ) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON)) + { + float iradius = 1.0f / sqrt( SubFloat( radius, 0 ) ); + result = ReplicateX4( iradius ); + result = MulSIMD( result, q ); + return result; + } + return q; +} + +#else + +// SSE + X360 implementation +FORCEINLINE fltx4 QuaternionNormalizeSIMD( const fltx4 &q ) +{ + fltx4 radius, result, mask; + radius = Dot4SIMD( q, q ); + mask = CmpEqSIMD( radius, Four_Zeros ); // all ones iff radius = 0 + result = ReciprocalSqrtSIMD( radius ); + result = MulSIMD( result, q ); + return MaskedAssign( mask, q, result ); // if radius was 0, just return q +} + +#endif + + +//--------------------------------------------------------------------- +// 0.0 returns p, 1.0 return q. +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + fltx4 sclp, sclq, result; + sclq = ReplicateX4( t ); + sclp = SubSIMD( Four_Ones, sclq ); + result = MulSIMD( sclp, p ); + result = MaddSIMD( sclq, q, result ); + return QuaternionNormalizeSIMD( result ); +} + + +//--------------------------------------------------------------------- +// Blend Quaternions +//--------------------------------------------------------------------- +FORCEINLINE fltx4 QuaternionBlendSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + // decide if one of the quaternions is backwards + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + result = QuaternionBlendNoAlignSIMD( p, q2, t ); + return result; +} + + +//--------------------------------------------------------------------- +// Multiply Quaternions +//--------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q ) +{ + // decide if one of the quaternions is backwards + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + SubFloat( result, 0 ) = SubFloat( p, 0 ) * SubFloat( q2, 3 ) + SubFloat( p, 1 ) * SubFloat( q2, 2 ) - SubFloat( p, 2 ) * SubFloat( q2, 1 ) + SubFloat( p, 3 ) * SubFloat( q2, 0 ); + SubFloat( result, 1 ) = -SubFloat( p, 0 ) * SubFloat( q2, 2 ) + SubFloat( p, 1 ) * SubFloat( q2, 3 ) + SubFloat( p, 2 ) * SubFloat( q2, 0 ) + SubFloat( p, 3 ) * SubFloat( q2, 1 ); + SubFloat( result, 2 ) = SubFloat( p, 0 ) * SubFloat( q2, 1 ) - SubFloat( p, 1 ) * SubFloat( q2, 0 ) + SubFloat( p, 2 ) * SubFloat( q2, 3 ) + SubFloat( p, 3 ) * SubFloat( q2, 2 ); + SubFloat( result, 3 ) = -SubFloat( p, 0 ) * SubFloat( q2, 0 ) - SubFloat( p, 1 ) * SubFloat( q2, 1 ) - SubFloat( p, 2 ) * SubFloat( q2, 2 ) + SubFloat( p, 3 ) * SubFloat( q2, 3 ); + return result; +} + +#else + +// X360 +extern const fltx4 g_QuatMultRowSign[4]; +FORCEINLINE fltx4 QuaternionMultSIMD( const fltx4 &p, const fltx4 &q ) +{ + fltx4 q2, row, result; + q2 = QuaternionAlignSIMD( p, q ); + + row = XMVectorSwizzle( q2, 3, 2, 1, 0 ); + row = MulSIMD( row, g_QuatMultRowSign[0] ); + result = Dot4SIMD( row, p ); + + row = XMVectorSwizzle( q2, 2, 3, 0, 1 ); + row = MulSIMD( row, g_QuatMultRowSign[1] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 4, 0 ); + + row = XMVectorSwizzle( q2, 1, 0, 3, 2 ); + row = MulSIMD( row, g_QuatMultRowSign[2] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 2, 0 ); + + row = MulSIMD( q2, g_QuatMultRowSign[3] ); + row = Dot4SIMD( row, p ); + result = __vrlimi( result, row, 1, 0 ); + return result; +} + +#endif + + +//--------------------------------------------------------------------- +// Quaternion scale +//--------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t ) +{ + float r; + fltx4 q; + + // FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to + // use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale. + float sinom = sqrt( SubFloat( p, 0 ) * SubFloat( p, 0 ) + SubFloat( p, 1 ) * SubFloat( p, 1 ) + SubFloat( p, 2 ) * SubFloat( p, 2 ) ); + sinom = min( sinom, 1.f ); + + float sinsom = sin( asin( sinom ) * t ); + + t = sinsom / (sinom + FLT_EPSILON); + SubFloat( q, 0 ) = t * SubFloat( p, 0 ); + SubFloat( q, 1 ) = t * SubFloat( p, 1 ); + SubFloat( q, 2 ) = t * SubFloat( p, 2 ); + + // rescale rotation + r = 1.0f - sinsom * sinsom; + + // Assert( r >= 0 ); + if (r < 0.0f) + r = 0.0f; + r = sqrt( r ); + + // keep sign of rotation + SubFloat( q, 3 ) = fsel( SubFloat( p, 3 ), r, -r ); + return q; +} + +#else + +// X360 +FORCEINLINE fltx4 QuaternionScaleSIMD( const fltx4 &p, float t ) +{ + fltx4 sinom = Dot3SIMD( p, p ); + sinom = SqrtSIMD( sinom ); + sinom = MinSIMD( sinom, Four_Ones ); + fltx4 sinsom = ArcSinSIMD( sinom ); + fltx4 t4 = ReplicateX4( t ); + sinsom = MulSIMD( sinsom, t4 ); + sinsom = SinSIMD( sinsom ); + sinom = AddSIMD( sinom, Four_Epsilons ); + sinom = ReciprocalSIMD( sinom ); + t4 = MulSIMD( sinsom, sinom ); + fltx4 result = MulSIMD( p, t4 ); + + // rescale rotation + sinsom = MulSIMD( sinsom, sinsom ); + fltx4 r = SubSIMD( Four_Ones, sinsom ); + r = MaxSIMD( r, Four_Zeros ); + r = SqrtSIMD( r ); + + // keep sign of rotation + fltx4 cmp = CmpGeSIMD( p, Four_Zeros ); + r = MaskedAssign( cmp, r, NegSIMD( r ) ); + + result = __vrlimi(result, r, 1, 0); + return result; +} + +#endif + + +//----------------------------------------------------------------------------- +// Quaternion sphereical linear interpolation +//----------------------------------------------------------------------------- +#ifndef _X360 + +// SSE and STDC +FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + float omega, cosom, sinom, sclp, sclq; + + fltx4 result; + + // 0.0 returns p, 1.0 return q. + cosom = SubFloat( p, 0 ) * SubFloat( q, 0 ) + SubFloat( p, 1 ) * SubFloat( q, 1 ) + + SubFloat( p, 2 ) * SubFloat( q, 2 ) + SubFloat( p, 3 ) * SubFloat( q, 3 ); + + if ( (1.0f + cosom ) > 0.000001f ) + { + if ( (1.0f - cosom ) > 0.000001f ) + { + omega = acos( cosom ); + sinom = sin( omega ); + sclp = sin( (1.0f - t)*omega) / sinom; + sclq = sin( t*omega ) / sinom; + } + else + { + // TODO: add short circuit for cosom == 1.0f? + sclp = 1.0f - t; + sclq = t; + } + SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( q, 0 ); + SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( q, 1 ); + SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( q, 2 ); + SubFloat( result, 3 ) = sclp * SubFloat( p, 3 ) + sclq * SubFloat( q, 3 ); + } + else + { + SubFloat( result, 0 ) = -SubFloat( q, 1 ); + SubFloat( result, 1 ) = SubFloat( q, 0 ); + SubFloat( result, 2 ) = -SubFloat( q, 3 ); + SubFloat( result, 3 ) = SubFloat( q, 2 ); + sclp = sin( (1.0f - t) * (0.5f * M_PI)); + sclq = sin( t * (0.5f * M_PI)); + SubFloat( result, 0 ) = sclp * SubFloat( p, 0 ) + sclq * SubFloat( result, 0 ); + SubFloat( result, 1 ) = sclp * SubFloat( p, 1 ) + sclq * SubFloat( result, 1 ); + SubFloat( result, 2 ) = sclp * SubFloat( p, 2 ) + sclq * SubFloat( result, 2 ); + } + + return result; +} + +#else + +// X360 +FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + return XMQuaternionSlerp( p, q, t ); +} + +#endif + + +FORCEINLINE fltx4 QuaternionSlerpSIMD( const fltx4 &p, const fltx4 &q, float t ) +{ + fltx4 q2, result; + q2 = QuaternionAlignSIMD( p, q ); + result = QuaternionSlerpNoAlignSIMD( p, q2, t ); + return result; +} + + +#endif // ALLOW_SIMD_QUATERNION_MATH + +#endif // SSEQUATMATH_H + diff --git a/mp/src/public/mathlib/vector.h b/mp/src/public/mathlib/vector.h index 4b361640..c19261d7 100644 --- a/mp/src/public/mathlib/vector.h +++ b/mp/src/public/mathlib/vector.h @@ -1,2312 +1,2312 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -// -//=============================================================================// - -#ifndef VECTOR_H -#define VECTOR_H - -#ifdef _WIN32 -#pragma once -#endif - -#include -#include - -// For vec_t, put this somewhere else? -#include "tier0/basetypes.h" - -// For rand(). We really need a library! -#include - -#ifndef _X360 -// For MMX intrinsics -#include -#endif - -#include "tier0/dbg.h" -#include "tier0/threadtools.h" -#include "mathlib/vector2d.h" -#include "mathlib/math_pfns.h" -#include "minmax.h" - -// Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc. -//#define VECTOR_PARANOIA 1 - -// Uncomment this to make sure we don't do anything slow with our vectors -//#define VECTOR_NO_SLOW_OPERATIONS 1 - - -// Used to make certain code easier to read. -#define X_INDEX 0 -#define Y_INDEX 1 -#define Z_INDEX 2 - - -#ifdef VECTOR_PARANOIA -#define CHECK_VALID( _v) Assert( (_v).IsValid() ) -#else -#ifdef GNUC -#define CHECK_VALID( _v) -#else -#define CHECK_VALID( _v) 0 -#endif -#endif - -#define VecToString(v) (static_cast(CFmtStr("(%f, %f, %f)", (v).x, (v).y, (v).z))) // ** Note: this generates a temporary, don't hold reference! - -class VectorByValue; - -//========================================================= -// 3D Vector -//========================================================= -class Vector -{ -public: - // Members - vec_t x, y, z; - - // Construction/destruction: - Vector(void); - Vector(vec_t X, vec_t Y, vec_t Z); - explicit Vector(vec_t XYZ); ///< broadcast initialize - - // Initialization - void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f); - // TODO (Ilya): Should there be an init that takes a single float for consistency? - - // Got any nasty NAN's? - bool IsValid() const; - void Invalidate(); - - // array access... - vec_t operator[](int i) const; - vec_t& operator[](int i); - - // Base address... - vec_t* Base(); - vec_t const* Base() const; - - // Cast to Vector2D... - Vector2D& AsVector2D(); - const Vector2D& AsVector2D() const; - - // Initialization methods - void Random( vec_t minVal, vec_t maxVal ); - inline void Zero(); ///< zero out a vector - - // equality - bool operator==(const Vector& v) const; - bool operator!=(const Vector& v) const; - - // arithmetic operations - FORCEINLINE Vector& operator+=(const Vector &v); - FORCEINLINE Vector& operator-=(const Vector &v); - FORCEINLINE Vector& operator*=(const Vector &v); - FORCEINLINE Vector& operator*=(float s); - FORCEINLINE Vector& operator/=(const Vector &v); - FORCEINLINE Vector& operator/=(float s); - FORCEINLINE Vector& operator+=(float fl) ; ///< broadcast add - FORCEINLINE Vector& operator-=(float fl) ; ///< broadcast sub - -// negate the vector components - void Negate(); - - // Get the vector's magnitude. - inline vec_t Length() const; - - // Get the vector's magnitude squared. - FORCEINLINE vec_t LengthSqr(void) const - { - CHECK_VALID(*this); - return (x*x + y*y + z*z); - } - - // return true if this vector is (0,0,0) within tolerance - bool IsZero( float tolerance = 0.01f ) const - { - return (x > -tolerance && x < tolerance && - y > -tolerance && y < tolerance && - z > -tolerance && z < tolerance); - } - - vec_t NormalizeInPlace(); - Vector Normalized() const; - bool IsLengthGreaterThan( float val ) const; - bool IsLengthLessThan( float val ) const; - - // check if a vector is within the box defined by two other vectors - FORCEINLINE bool WithinAABox( Vector const &boxmin, Vector const &boxmax); - - // Get the distance from this vector to the other one. - vec_t DistTo(const Vector &vOther) const; - - // Get the distance from this vector to the other one squared. - // NJS: note, VC wasn't inlining it correctly in several deeply nested inlines due to being an 'out of line' inline. - // may be able to tidy this up after switching to VC7 - FORCEINLINE vec_t DistToSqr(const Vector &vOther) const - { - Vector delta; - - delta.x = x - vOther.x; - delta.y = y - vOther.y; - delta.z = z - vOther.z; - - return delta.LengthSqr(); - } - - // Copy - void CopyToArray(float* rgfl) const; - - // Multiply, add, and assign to this (ie: *this = a + b * scalar). This - // is about 12% faster than the actual vector equation (because it's done per-component - // rather than per-vector). - void MulAdd(const Vector& a, const Vector& b, float scalar); - - // Dot product. - vec_t Dot(const Vector& vOther) const; - - // assignment - Vector& operator=(const Vector &vOther); - - // 2d - vec_t Length2D(void) const; - vec_t Length2DSqr(void) const; - - operator VectorByValue &() { return *((VectorByValue *)(this)); } - operator const VectorByValue &() const { return *((const VectorByValue *)(this)); } - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // copy constructors -// Vector(const Vector &vOther); - - // arithmetic operations - Vector operator-(void) const; - - Vector operator+(const Vector& v) const; - Vector operator-(const Vector& v) const; - Vector operator*(const Vector& v) const; - Vector operator/(const Vector& v) const; - Vector operator*(float fl) const; - Vector operator/(float fl) const; - - // Cross product between two vectors. - Vector Cross(const Vector &vOther) const; - - // Returns a vector with the min or max in X, Y, and Z. - Vector Min(const Vector &vOther) const; - Vector Max(const Vector &vOther) const; - -#else - -private: - // No copy constructors allowed if we're in optimal mode - Vector(const Vector& vOther); -#endif -}; - -FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); } - - -#define USE_M64S ( ( !defined( _X360 ) ) ) - - - -//========================================================= -// 4D Short Vector (aligned on 8-byte boundary) -//========================================================= -class ALIGN8 ShortVector -{ -public: - - short x, y, z, w; - - // Initialization - void Init(short ix = 0, short iy = 0, short iz = 0, short iw = 0 ); - - -#if USE_M64S - __m64 &AsM64() { return *(__m64*)&x; } - const __m64 &AsM64() const { return *(const __m64*)&x; } -#endif - - // Setter - void Set( const ShortVector& vOther ); - void Set( const short ix, const short iy, const short iz, const short iw ); - - // array access... - short operator[](int i) const; - short& operator[](int i); - - // Base address... - short* Base(); - short const* Base() const; - - // equality - bool operator==(const ShortVector& v) const; - bool operator!=(const ShortVector& v) const; - - // Arithmetic operations - FORCEINLINE ShortVector& operator+=(const ShortVector &v); - FORCEINLINE ShortVector& operator-=(const ShortVector &v); - FORCEINLINE ShortVector& operator*=(const ShortVector &v); - FORCEINLINE ShortVector& operator*=(float s); - FORCEINLINE ShortVector& operator/=(const ShortVector &v); - FORCEINLINE ShortVector& operator/=(float s); - FORCEINLINE ShortVector operator*(float fl) const; - -private: - - // No copy constructors allowed if we're in optimal mode -// ShortVector(ShortVector const& vOther); - - // No assignment operators either... -// ShortVector& operator=( ShortVector const& src ); - -} ALIGN8_POST; - - - - - - -//========================================================= -// 4D Integer Vector -//========================================================= -class IntVector4D -{ -public: - - int x, y, z, w; - - // Initialization - void Init(int ix = 0, int iy = 0, int iz = 0, int iw = 0 ); - -#if USE_M64S - __m64 &AsM64() { return *(__m64*)&x; } - const __m64 &AsM64() const { return *(const __m64*)&x; } -#endif - - // Setter - void Set( const IntVector4D& vOther ); - void Set( const int ix, const int iy, const int iz, const int iw ); - - // array access... - int operator[](int i) const; - int& operator[](int i); - - // Base address... - int* Base(); - int const* Base() const; - - // equality - bool operator==(const IntVector4D& v) const; - bool operator!=(const IntVector4D& v) const; - - // Arithmetic operations - FORCEINLINE IntVector4D& operator+=(const IntVector4D &v); - FORCEINLINE IntVector4D& operator-=(const IntVector4D &v); - FORCEINLINE IntVector4D& operator*=(const IntVector4D &v); - FORCEINLINE IntVector4D& operator*=(float s); - FORCEINLINE IntVector4D& operator/=(const IntVector4D &v); - FORCEINLINE IntVector4D& operator/=(float s); - FORCEINLINE IntVector4D operator*(float fl) const; - -private: - - // No copy constructors allowed if we're in optimal mode - // IntVector4D(IntVector4D const& vOther); - - // No assignment operators either... - // IntVector4D& operator=( IntVector4D const& src ); - -}; - - - -//----------------------------------------------------------------------------- -// Allows us to specifically pass the vector by value when we need to -//----------------------------------------------------------------------------- -class VectorByValue : public Vector -{ -public: - // Construction/destruction: - VectorByValue(void) : Vector() {} - VectorByValue(vec_t X, vec_t Y, vec_t Z) : Vector( X, Y, Z ) {} - VectorByValue(const VectorByValue& vOther) { *this = vOther; } -}; - - -//----------------------------------------------------------------------------- -// Utility to simplify table construction. No constructor means can use -// traditional C-style initialization -//----------------------------------------------------------------------------- -class TableVector -{ -public: - vec_t x, y, z; - - operator Vector &() { return *((Vector *)(this)); } - operator const Vector &() const { return *((const Vector *)(this)); } - - // array access... - inline vec_t& operator[](int i) - { - Assert( (i >= 0) && (i < 3) ); - return ((vec_t*)this)[i]; - } - - inline vec_t operator[](int i) const - { - Assert( (i >= 0) && (i < 3) ); - return ((vec_t*)this)[i]; - } -}; - - -//----------------------------------------------------------------------------- -// Here's where we add all those lovely SSE optimized routines -//----------------------------------------------------------------------------- - -class ALIGN16 VectorAligned : public Vector -{ -public: - inline VectorAligned(void) {}; - inline VectorAligned(vec_t X, vec_t Y, vec_t Z) - { - Init(X,Y,Z); - } - -#ifdef VECTOR_NO_SLOW_OPERATIONS - -private: - // No copy constructors allowed if we're in optimal mode - VectorAligned(const VectorAligned& vOther); - VectorAligned(const Vector &vOther); - -#else -public: - explicit VectorAligned(const Vector &vOther) - { - Init(vOther.x, vOther.y, vOther.z); - } - - VectorAligned& operator=(const Vector &vOther) - { - Init(vOther.x, vOther.y, vOther.z); - return *this; - } - -#endif - float w; // this space is used anyway -} ALIGN16_POST; - -//----------------------------------------------------------------------------- -// Vector related operations -//----------------------------------------------------------------------------- - -// Vector clear -FORCEINLINE void VectorClear( Vector& a ); - -// Copy -FORCEINLINE void VectorCopy( const Vector& src, Vector& dst ); - -// Vector arithmetic -FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& result ); -FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& result ); -FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& result ); -FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& result ); -FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& result ); -FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& result ); -inline void VectorScale ( const Vector& in, vec_t scale, Vector& result ); -// Don't mark this as inline in its function declaration. That's only necessary on its -// definition, and 'inline' here leads to gcc warnings. -void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest ); - -// Vector equality with tolerance -bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance = 0.0f ); - -#define VectorExpand(v) (v).x, (v).y, (v).z - - -// Normalization -// FIXME: Can't use quite yet -//vec_t VectorNormalize( Vector& v ); - -// Length -inline vec_t VectorLength( const Vector& v ); - -// Dot Product -FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b); - -// Cross product -void CrossProduct(const Vector& a, const Vector& b, Vector& result ); - -// Store the min or max of each of x, y, and z into the result. -void VectorMin( const Vector &a, const Vector &b, Vector &result ); -void VectorMax( const Vector &a, const Vector &b, Vector &result ); - -// Linearly interpolate between two vectors -void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest ); -Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t ); - -FORCEINLINE Vector ReplicateToVector( float x ) -{ - return Vector( x, x, x ); -} - -// check if a point is in the field of a view of an object. supports up to 180 degree fov. -FORCEINLINE bool PointWithinViewAngle( Vector const &vecSrcPosition, - Vector const &vecTargetPosition, - Vector const &vecLookDirection, float flCosHalfFOV ) -{ - Vector vecDelta = vecTargetPosition - vecSrcPosition; - float cosDiff = DotProduct( vecLookDirection, vecDelta ); - - if ( cosDiff < 0 ) - return false; - - float flLen2 = vecDelta.LengthSqr(); - - // a/sqrt(b) > c == a^2 > b * c ^2 - return ( cosDiff * cosDiff > flLen2 * flCosHalfFOV * flCosHalfFOV ); - -} - - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -// Cross product -Vector CrossProduct( const Vector& a, const Vector& b ); - -// Random vector creation -Vector RandomVector( vec_t minVal, vec_t maxVal ); - -#endif - -float RandomVectorInUnitSphere( Vector *pVector ); -float RandomVectorInUnitCircle( Vector2D *pVector ); - - -//----------------------------------------------------------------------------- -// -// Inlined Vector methods -// -//----------------------------------------------------------------------------- - - -//----------------------------------------------------------------------------- -// constructors -//----------------------------------------------------------------------------- -inline Vector::Vector(void) -{ -#ifdef _DEBUG -#ifdef VECTOR_PARANOIA - // Initialize to NAN to catch errors - x = y = z = VEC_T_NAN; -#endif -#endif -} - -inline Vector::Vector(vec_t X, vec_t Y, vec_t Z) -{ - x = X; y = Y; z = Z; - CHECK_VALID(*this); -} - -inline Vector::Vector(vec_t XYZ) -{ - x = y = z = XYZ; - CHECK_VALID(*this); -} - -//inline Vector::Vector(const float *pFloat) -//{ -// Assert( pFloat ); -// x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; -// CHECK_VALID(*this); -//} - -#if 0 -//----------------------------------------------------------------------------- -// copy constructor -//----------------------------------------------------------------------------- - -inline Vector::Vector(const Vector &vOther) -{ - CHECK_VALID(vOther); - x = vOther.x; y = vOther.y; z = vOther.z; -} -#endif - -//----------------------------------------------------------------------------- -// initialization -//----------------------------------------------------------------------------- - -inline void Vector::Init( vec_t ix, vec_t iy, vec_t iz ) -{ - x = ix; y = iy; z = iz; - CHECK_VALID(*this); -} - -inline void Vector::Random( vec_t minVal, vec_t maxVal ) -{ - x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - CHECK_VALID(*this); -} - -// This should really be a single opcode on the PowerPC (move r0 onto the vec reg) -inline void Vector::Zero() -{ - x = y = z = 0.0f; -} - -inline void VectorClear( Vector& a ) -{ - a.x = a.y = a.z = 0.0f; -} - -//----------------------------------------------------------------------------- -// assignment -//----------------------------------------------------------------------------- - -inline Vector& Vector::operator=(const Vector &vOther) -{ - CHECK_VALID(vOther); - x=vOther.x; y=vOther.y; z=vOther.z; - return *this; -} - - -//----------------------------------------------------------------------------- -// Array access -//----------------------------------------------------------------------------- -inline vec_t& Vector::operator[](int i) -{ - Assert( (i >= 0) && (i < 3) ); - return ((vec_t*)this)[i]; -} - -inline vec_t Vector::operator[](int i) const -{ - Assert( (i >= 0) && (i < 3) ); - return ((vec_t*)this)[i]; -} - - -//----------------------------------------------------------------------------- -// Base address... -//----------------------------------------------------------------------------- -inline vec_t* Vector::Base() -{ - return (vec_t*)this; -} - -inline vec_t const* Vector::Base() const -{ - return (vec_t const*)this; -} - -//----------------------------------------------------------------------------- -// Cast to Vector2D... -//----------------------------------------------------------------------------- - -inline Vector2D& Vector::AsVector2D() -{ - return *(Vector2D*)this; -} - -inline const Vector2D& Vector::AsVector2D() const -{ - return *(const Vector2D*)this; -} - -//----------------------------------------------------------------------------- -// IsValid? -//----------------------------------------------------------------------------- - -inline bool Vector::IsValid() const -{ - return IsFinite(x) && IsFinite(y) && IsFinite(z); -} - -//----------------------------------------------------------------------------- -// Invalidate -//----------------------------------------------------------------------------- - -inline void Vector::Invalidate() -{ -//#ifdef _DEBUG -//#ifdef VECTOR_PARANOIA - x = y = z = VEC_T_NAN; -//#endif -//#endif -} - -//----------------------------------------------------------------------------- -// comparison -//----------------------------------------------------------------------------- - -inline bool Vector::operator==( const Vector& src ) const -{ - CHECK_VALID(src); - CHECK_VALID(*this); - return (src.x == x) && (src.y == y) && (src.z == z); -} - -inline bool Vector::operator!=( const Vector& src ) const -{ - CHECK_VALID(src); - CHECK_VALID(*this); - return (src.x != x) || (src.y != y) || (src.z != z); -} - - -//----------------------------------------------------------------------------- -// Copy -//----------------------------------------------------------------------------- - -FORCEINLINE void VectorCopy( const Vector& src, Vector& dst ) -{ - CHECK_VALID(src); - dst.x = src.x; - dst.y = src.y; - dst.z = src.z; -} - -inline void Vector::CopyToArray(float* rgfl) const -{ - Assert( rgfl ); - CHECK_VALID(*this); - rgfl[0] = x, rgfl[1] = y, rgfl[2] = z; -} - -//----------------------------------------------------------------------------- -// standard math operations -//----------------------------------------------------------------------------- -// #pragma message("TODO: these should be SSE") - -inline void Vector::Negate() -{ - CHECK_VALID(*this); - x = -x; y = -y; z = -z; -} - -FORCEINLINE Vector& Vector::operator+=(const Vector& v) -{ - CHECK_VALID(*this); - CHECK_VALID(v); - x+=v.x; y+=v.y; z += v.z; - return *this; -} - -FORCEINLINE Vector& Vector::operator-=(const Vector& v) -{ - CHECK_VALID(*this); - CHECK_VALID(v); - x-=v.x; y-=v.y; z -= v.z; - return *this; -} - -FORCEINLINE Vector& Vector::operator*=(float fl) -{ - x *= fl; - y *= fl; - z *= fl; - CHECK_VALID(*this); - return *this; -} - -FORCEINLINE Vector& Vector::operator*=(const Vector& v) -{ - CHECK_VALID(v); - x *= v.x; - y *= v.y; - z *= v.z; - CHECK_VALID(*this); - return *this; -} - -// this ought to be an opcode. -FORCEINLINE Vector& Vector::operator+=(float fl) -{ - x += fl; - y += fl; - z += fl; - CHECK_VALID(*this); - return *this; -} - -FORCEINLINE Vector& Vector::operator-=(float fl) -{ - x -= fl; - y -= fl; - z -= fl; - CHECK_VALID(*this); - return *this; -} - - - -FORCEINLINE Vector& Vector::operator/=(float fl) -{ - Assert( fl != 0.0f ); - float oofl = 1.0f / fl; - x *= oofl; - y *= oofl; - z *= oofl; - CHECK_VALID(*this); - return *this; -} - -FORCEINLINE Vector& Vector::operator/=(const Vector& v) -{ - CHECK_VALID(v); - Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f ); - x /= v.x; - y /= v.y; - z /= v.z; - CHECK_VALID(*this); - return *this; -} - - - -//----------------------------------------------------------------------------- -// -// Inlined Short Vector methods -// -//----------------------------------------------------------------------------- - - -inline void ShortVector::Init( short ix, short iy, short iz, short iw ) -{ - x = ix; y = iy; z = iz; w = iw; -} - -FORCEINLINE void ShortVector::Set( const ShortVector& vOther ) -{ - x = vOther.x; - y = vOther.y; - z = vOther.z; - w = vOther.w; -} - -FORCEINLINE void ShortVector::Set( const short ix, const short iy, const short iz, const short iw ) -{ - x = ix; - y = iy; - z = iz; - w = iw; -} - - -//----------------------------------------------------------------------------- -// Array access -//----------------------------------------------------------------------------- -inline short ShortVector::operator[](int i) const -{ - Assert( (i >= 0) && (i < 4) ); - return ((short*)this)[i]; -} - -inline short& ShortVector::operator[](int i) -{ - Assert( (i >= 0) && (i < 4) ); - return ((short*)this)[i]; -} - -//----------------------------------------------------------------------------- -// Base address... -//----------------------------------------------------------------------------- -inline short* ShortVector::Base() -{ - return (short*)this; -} - -inline short const* ShortVector::Base() const -{ - return (short const*)this; -} - - -//----------------------------------------------------------------------------- -// comparison -//----------------------------------------------------------------------------- - -inline bool ShortVector::operator==( const ShortVector& src ) const -{ - return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); -} - -inline bool ShortVector::operator!=( const ShortVector& src ) const -{ - return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); -} - - - -//----------------------------------------------------------------------------- -// standard math operations -//----------------------------------------------------------------------------- - -FORCEINLINE ShortVector& ShortVector::operator+=(const ShortVector& v) -{ - x+=v.x; y+=v.y; z += v.z; w += v.w; - return *this; -} - -FORCEINLINE ShortVector& ShortVector::operator-=(const ShortVector& v) -{ - x-=v.x; y-=v.y; z -= v.z; w -= v.w; - return *this; -} - -FORCEINLINE ShortVector& ShortVector::operator*=(float fl) -{ - x *= fl; - y *= fl; - z *= fl; - w *= fl; - return *this; -} - -FORCEINLINE ShortVector& ShortVector::operator*=(const ShortVector& v) -{ - x *= v.x; - y *= v.y; - z *= v.z; - w *= v.w; - return *this; -} - -FORCEINLINE ShortVector& ShortVector::operator/=(float fl) -{ - Assert( fl != 0.0f ); - float oofl = 1.0f / fl; - x *= oofl; - y *= oofl; - z *= oofl; - w *= oofl; - return *this; -} - -FORCEINLINE ShortVector& ShortVector::operator/=(const ShortVector& v) -{ - Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 ); - x /= v.x; - y /= v.y; - z /= v.z; - w /= v.w; - return *this; -} - -FORCEINLINE void ShortVectorMultiply( const ShortVector& src, float fl, ShortVector& res ) -{ - Assert( IsFinite(fl) ); - res.x = src.x * fl; - res.y = src.y * fl; - res.z = src.z * fl; - res.w = src.w * fl; -} - -FORCEINLINE ShortVector ShortVector::operator*(float fl) const -{ - ShortVector res; - ShortVectorMultiply( *this, fl, res ); - return res; -} - - - - - - -//----------------------------------------------------------------------------- -// -// Inlined Integer Vector methods -// -//----------------------------------------------------------------------------- - - -inline void IntVector4D::Init( int ix, int iy, int iz, int iw ) -{ - x = ix; y = iy; z = iz; w = iw; -} - -FORCEINLINE void IntVector4D::Set( const IntVector4D& vOther ) -{ - x = vOther.x; - y = vOther.y; - z = vOther.z; - w = vOther.w; -} - -FORCEINLINE void IntVector4D::Set( const int ix, const int iy, const int iz, const int iw ) -{ - x = ix; - y = iy; - z = iz; - w = iw; -} - - -//----------------------------------------------------------------------------- -// Array access -//----------------------------------------------------------------------------- -inline int IntVector4D::operator[](int i) const -{ - Assert( (i >= 0) && (i < 4) ); - return ((int*)this)[i]; -} - -inline int& IntVector4D::operator[](int i) -{ - Assert( (i >= 0) && (i < 4) ); - return ((int*)this)[i]; -} - -//----------------------------------------------------------------------------- -// Base address... -//----------------------------------------------------------------------------- -inline int* IntVector4D::Base() -{ - return (int*)this; -} - -inline int const* IntVector4D::Base() const -{ - return (int const*)this; -} - - -//----------------------------------------------------------------------------- -// comparison -//----------------------------------------------------------------------------- - -inline bool IntVector4D::operator==( const IntVector4D& src ) const -{ - return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); -} - -inline bool IntVector4D::operator!=( const IntVector4D& src ) const -{ - return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); -} - - - -//----------------------------------------------------------------------------- -// standard math operations -//----------------------------------------------------------------------------- - -FORCEINLINE IntVector4D& IntVector4D::operator+=(const IntVector4D& v) -{ - x+=v.x; y+=v.y; z += v.z; w += v.w; - return *this; -} - -FORCEINLINE IntVector4D& IntVector4D::operator-=(const IntVector4D& v) -{ - x-=v.x; y-=v.y; z -= v.z; w -= v.w; - return *this; -} - -FORCEINLINE IntVector4D& IntVector4D::operator*=(float fl) -{ - x *= fl; - y *= fl; - z *= fl; - w *= fl; - return *this; -} - -FORCEINLINE IntVector4D& IntVector4D::operator*=(const IntVector4D& v) -{ - x *= v.x; - y *= v.y; - z *= v.z; - w *= v.w; - return *this; -} - -FORCEINLINE IntVector4D& IntVector4D::operator/=(float fl) -{ - Assert( fl != 0.0f ); - float oofl = 1.0f / fl; - x *= oofl; - y *= oofl; - z *= oofl; - w *= oofl; - return *this; -} - -FORCEINLINE IntVector4D& IntVector4D::operator/=(const IntVector4D& v) -{ - Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 ); - x /= v.x; - y /= v.y; - z /= v.z; - w /= v.w; - return *this; -} - -FORCEINLINE void IntVector4DMultiply( const IntVector4D& src, float fl, IntVector4D& res ) -{ - Assert( IsFinite(fl) ); - res.x = src.x * fl; - res.y = src.y * fl; - res.z = src.z * fl; - res.w = src.w * fl; -} - -FORCEINLINE IntVector4D IntVector4D::operator*(float fl) const -{ - IntVector4D res; - IntVector4DMultiply( *this, fl, res ); - return res; -} - - - -// ======================= - - -FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& c ) -{ - CHECK_VALID(a); - CHECK_VALID(b); - c.x = a.x + b.x; - c.y = a.y + b.y; - c.z = a.z + b.z; -} - -FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& c ) -{ - CHECK_VALID(a); - CHECK_VALID(b); - c.x = a.x - b.x; - c.y = a.y - b.y; - c.z = a.z - b.z; -} - -FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& c ) -{ - CHECK_VALID(a); - Assert( IsFinite(b) ); - c.x = a.x * b; - c.y = a.y * b; - c.z = a.z * b; -} - -FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& c ) -{ - CHECK_VALID(a); - CHECK_VALID(b); - c.x = a.x * b.x; - c.y = a.y * b.y; - c.z = a.z * b.z; -} - -// for backwards compatability -inline void VectorScale ( const Vector& in, vec_t scale, Vector& result ) -{ - VectorMultiply( in, scale, result ); -} - - -FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& c ) -{ - CHECK_VALID(a); - Assert( b != 0.0f ); - vec_t oob = 1.0f / b; - c.x = a.x * oob; - c.y = a.y * oob; - c.z = a.z * oob; -} - -FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& c ) -{ - CHECK_VALID(a); - CHECK_VALID(b); - Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) ); - c.x = a.x / b.x; - c.y = a.y / b.y; - c.z = a.z / b.z; -} - -// FIXME: Remove -// For backwards compatability -inline void Vector::MulAdd(const Vector& a, const Vector& b, float scalar) -{ - CHECK_VALID(a); - CHECK_VALID(b); - x = a.x + b.x * scalar; - y = a.y + b.y * scalar; - z = a.z + b.z * scalar; -} - -inline void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest ) -{ - CHECK_VALID(src1); - CHECK_VALID(src2); - dest.x = src1.x + (src2.x - src1.x) * t; - dest.y = src1.y + (src2.y - src1.y) * t; - dest.z = src1.z + (src2.z - src1.z) * t; -} - -inline Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t ) -{ - Vector result; - VectorLerp( src1, src2, t, result ); - return result; -} - -//----------------------------------------------------------------------------- -// Temporary storage for vector results so const Vector& results can be returned -//----------------------------------------------------------------------------- -inline Vector &AllocTempVector() -{ - static Vector s_vecTemp[128]; - static CInterlockedInt s_nIndex; - - int nIndex; - for (;;) - { - int nOldIndex = s_nIndex; - nIndex = ( (nOldIndex + 0x10001) & 0x7F ); - - if ( s_nIndex.AssignIf( nOldIndex, nIndex ) ) - { - break; - } - ThreadPause(); - } - return s_vecTemp[nIndex & 0xffff]; -} - - - -//----------------------------------------------------------------------------- -// dot, cross -//----------------------------------------------------------------------------- -FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b) -{ - CHECK_VALID(a); - CHECK_VALID(b); - return( a.x*b.x + a.y*b.y + a.z*b.z ); -} - -// for backwards compatability -inline vec_t Vector::Dot( const Vector& vOther ) const -{ - CHECK_VALID(vOther); - return DotProduct( *this, vOther ); -} - -inline void CrossProduct(const Vector& a, const Vector& b, Vector& result ) -{ - CHECK_VALID(a); - CHECK_VALID(b); - Assert( &a != &result ); - Assert( &b != &result ); - result.x = a.y*b.z - a.z*b.y; - result.y = a.z*b.x - a.x*b.z; - result.z = a.x*b.y - a.y*b.x; -} - -inline vec_t DotProductAbs( const Vector &v0, const Vector &v1 ) -{ - CHECK_VALID(v0); - CHECK_VALID(v1); - return FloatMakePositive(v0.x*v1.x) + FloatMakePositive(v0.y*v1.y) + FloatMakePositive(v0.z*v1.z); -} - -inline vec_t DotProductAbs( const Vector &v0, const float *v1 ) -{ - return FloatMakePositive(v0.x * v1[0]) + FloatMakePositive(v0.y * v1[1]) + FloatMakePositive(v0.z * v1[2]); -} - -//----------------------------------------------------------------------------- -// length -//----------------------------------------------------------------------------- - -inline vec_t VectorLength( const Vector& v ) -{ - CHECK_VALID(v); - return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z); -} - - -inline vec_t Vector::Length(void) const -{ - CHECK_VALID(*this); - return VectorLength( *this ); -} - - -//----------------------------------------------------------------------------- -// Normalization -//----------------------------------------------------------------------------- - -/* -// FIXME: Can't use until we're un-macroed in mathlib.h -inline vec_t VectorNormalize( Vector& v ) -{ - Assert( v.IsValid() ); - vec_t l = v.Length(); - if (l != 0.0f) - { - v /= l; - } - else - { - // FIXME: - // Just copying the existing implemenation; shouldn't res.z == 0? - v.x = v.y = 0.0f; v.z = 1.0f; - } - return l; -} -*/ - - -// check a point against a box -bool Vector::WithinAABox( Vector const &boxmin, Vector const &boxmax) -{ - return ( - ( x >= boxmin.x ) && ( x <= boxmax.x) && - ( y >= boxmin.y ) && ( y <= boxmax.y) && - ( z >= boxmin.z ) && ( z <= boxmax.z) - ); -} - -//----------------------------------------------------------------------------- -// Get the distance from this vector to the other one -//----------------------------------------------------------------------------- -inline vec_t Vector::DistTo(const Vector &vOther) const -{ - Vector delta; - VectorSubtract( *this, vOther, delta ); - return delta.Length(); -} - - -//----------------------------------------------------------------------------- -// Vector equality with tolerance -//----------------------------------------------------------------------------- -inline bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance ) -{ - if (FloatMakePositive(src1.x - src2.x) > tolerance) - return false; - if (FloatMakePositive(src1.y - src2.y) > tolerance) - return false; - return (FloatMakePositive(src1.z - src2.z) <= tolerance); -} - - -//----------------------------------------------------------------------------- -// Computes the closest point to vecTarget no farther than flMaxDist from vecStart -//----------------------------------------------------------------------------- -inline void ComputeClosestPoint( const Vector& vecStart, float flMaxDist, const Vector& vecTarget, Vector *pResult ) -{ - Vector vecDelta; - VectorSubtract( vecTarget, vecStart, vecDelta ); - float flDistSqr = vecDelta.LengthSqr(); - if ( flDistSqr <= flMaxDist * flMaxDist ) - { - *pResult = vecTarget; - } - else - { - vecDelta /= FastSqrt( flDistSqr ); - VectorMA( vecStart, flMaxDist, vecDelta, *pResult ); - } -} - - -//----------------------------------------------------------------------------- -// Takes the absolute value of a vector -//----------------------------------------------------------------------------- -inline void VectorAbs( const Vector& src, Vector& dst ) -{ - dst.x = FloatMakePositive(src.x); - dst.y = FloatMakePositive(src.y); - dst.z = FloatMakePositive(src.z); -} - - -//----------------------------------------------------------------------------- -// -// Slow methods -// -//----------------------------------------------------------------------------- - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -//----------------------------------------------------------------------------- -// Returns a vector with the min or max in X, Y, and Z. -//----------------------------------------------------------------------------- -inline Vector Vector::Min(const Vector &vOther) const -{ - return Vector(x < vOther.x ? x : vOther.x, - y < vOther.y ? y : vOther.y, - z < vOther.z ? z : vOther.z); -} - -inline Vector Vector::Max(const Vector &vOther) const -{ - return Vector(x > vOther.x ? x : vOther.x, - y > vOther.y ? y : vOther.y, - z > vOther.z ? z : vOther.z); -} - - -//----------------------------------------------------------------------------- -// arithmetic operations -//----------------------------------------------------------------------------- - -inline Vector Vector::operator-(void) const -{ - return Vector(-x,-y,-z); -} - -inline Vector Vector::operator+(const Vector& v) const -{ - Vector res; - VectorAdd( *this, v, res ); - return res; -} - -inline Vector Vector::operator-(const Vector& v) const -{ - Vector res; - VectorSubtract( *this, v, res ); - return res; -} - -inline Vector Vector::operator*(float fl) const -{ - Vector res; - VectorMultiply( *this, fl, res ); - return res; -} - -inline Vector Vector::operator*(const Vector& v) const -{ - Vector res; - VectorMultiply( *this, v, res ); - return res; -} - -inline Vector Vector::operator/(float fl) const -{ - Vector res; - VectorDivide( *this, fl, res ); - return res; -} - -inline Vector Vector::operator/(const Vector& v) const -{ - Vector res; - VectorDivide( *this, v, res ); - return res; -} - -inline Vector operator*(float fl, const Vector& v) -{ - return v * fl; -} - -//----------------------------------------------------------------------------- -// cross product -//----------------------------------------------------------------------------- - -inline Vector Vector::Cross(const Vector& vOther) const -{ - Vector res; - CrossProduct( *this, vOther, res ); - return res; -} - -//----------------------------------------------------------------------------- -// 2D -//----------------------------------------------------------------------------- - -inline vec_t Vector::Length2D(void) const -{ - return (vec_t)FastSqrt(x*x + y*y); -} - -inline vec_t Vector::Length2DSqr(void) const -{ - return (x*x + y*y); -} - -inline Vector CrossProduct(const Vector& a, const Vector& b) -{ - return Vector( a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x ); -} - -inline void VectorMin( const Vector &a, const Vector &b, Vector &result ) -{ - result.x = fpmin(a.x, b.x); - result.y = fpmin(a.y, b.y); - result.z = fpmin(a.z, b.z); -} - -inline void VectorMax( const Vector &a, const Vector &b, Vector &result ) -{ - result.x = fpmax(a.x, b.x); - result.y = fpmax(a.y, b.y); - result.z = fpmax(a.z, b.z); -} - -inline float ComputeVolume( const Vector &vecMins, const Vector &vecMaxs ) -{ - Vector vecDelta; - VectorSubtract( vecMaxs, vecMins, vecDelta ); - return DotProduct( vecDelta, vecDelta ); -} - -// Get a random vector. -inline Vector RandomVector( float minVal, float maxVal ) -{ - Vector random; - random.Random( minVal, maxVal ); - return random; -} - -#endif //slow - -//----------------------------------------------------------------------------- -// Helper debugging stuff.... -//----------------------------------------------------------------------------- - -inline bool operator==( float const* f, const Vector& v ) -{ - // AIIIEEEE!!!! - Assert(0); - return false; -} - -inline bool operator==( const Vector& v, float const* f ) -{ - // AIIIEEEE!!!! - Assert(0); - return false; -} - -inline bool operator!=( float const* f, const Vector& v ) -{ - // AIIIEEEE!!!! - Assert(0); - return false; -} - -inline bool operator!=( const Vector& v, float const* f ) -{ - // AIIIEEEE!!!! - Assert(0); - return false; -} - - -//----------------------------------------------------------------------------- -// AngularImpulse -//----------------------------------------------------------------------------- -// AngularImpulse are exponetial maps (an axis scaled by a "twist" angle in degrees) -typedef Vector AngularImpulse; - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline AngularImpulse RandomAngularImpulse( float minVal, float maxVal ) -{ - AngularImpulse angImp; - angImp.Random( minVal, maxVal ); - return angImp; -} - -#endif - - -//----------------------------------------------------------------------------- -// Quaternion -//----------------------------------------------------------------------------- - -class RadianEuler; - -class Quaternion // same data-layout as engine's vec4_t, -{ // which is a vec_t[4] -public: - inline Quaternion(void) { - - // Initialize to NAN to catch errors -#ifdef _DEBUG -#ifdef VECTOR_PARANOIA - x = y = z = w = VEC_T_NAN; -#endif -#endif - } - inline Quaternion(vec_t ix, vec_t iy, vec_t iz, vec_t iw) : x(ix), y(iy), z(iz), w(iw) { } - inline Quaternion(RadianEuler const &angle); // evil auto type promotion!!! - - inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f) { x = ix; y = iy; z = iz; w = iw; } - - bool IsValid() const; - void Invalidate(); - - bool operator==( const Quaternion &src ) const; - bool operator!=( const Quaternion &src ) const; - - vec_t* Base() { return (vec_t*)this; } - const vec_t* Base() const { return (vec_t*)this; } - - // array access... - vec_t operator[](int i) const; - vec_t& operator[](int i); - - vec_t x, y, z, w; -}; - - -//----------------------------------------------------------------------------- -// Array access -//----------------------------------------------------------------------------- -inline vec_t& Quaternion::operator[](int i) -{ - Assert( (i >= 0) && (i < 4) ); - return ((vec_t*)this)[i]; -} - -inline vec_t Quaternion::operator[](int i) const -{ - Assert( (i >= 0) && (i < 4) ); - return ((vec_t*)this)[i]; -} - - -//----------------------------------------------------------------------------- -// Equality test -//----------------------------------------------------------------------------- -inline bool Quaternion::operator==( const Quaternion &src ) const -{ - return ( x == src.x ) && ( y == src.y ) && ( z == src.z ) && ( w == src.w ); -} - -inline bool Quaternion::operator!=( const Quaternion &src ) const -{ - return !operator==( src ); -} - - -//----------------------------------------------------------------------------- -// Quaternion equality with tolerance -//----------------------------------------------------------------------------- -inline bool QuaternionsAreEqual( const Quaternion& src1, const Quaternion& src2, float tolerance ) -{ - if (FloatMakePositive(src1.x - src2.x) > tolerance) - return false; - if (FloatMakePositive(src1.y - src2.y) > tolerance) - return false; - if (FloatMakePositive(src1.z - src2.z) > tolerance) - return false; - return (FloatMakePositive(src1.w - src2.w) <= tolerance); -} - - -//----------------------------------------------------------------------------- -// Here's where we add all those lovely SSE optimized routines -//----------------------------------------------------------------------------- -class ALIGN16 QuaternionAligned : public Quaternion -{ -public: - inline QuaternionAligned(void) {}; - inline QuaternionAligned(vec_t X, vec_t Y, vec_t Z, vec_t W) - { - Init(X,Y,Z,W); - } - -#ifdef VECTOR_NO_SLOW_OPERATIONS - -private: - // No copy constructors allowed if we're in optimal mode - QuaternionAligned(const QuaternionAligned& vOther); - QuaternionAligned(const Quaternion &vOther); - -#else -public: - explicit QuaternionAligned(const Quaternion &vOther) - { - Init(vOther.x, vOther.y, vOther.z, vOther.w); - } - - QuaternionAligned& operator=(const Quaternion &vOther) - { - Init(vOther.x, vOther.y, vOther.z, vOther.w); - return *this; - } - -#endif -} ALIGN16_POST; - - -//----------------------------------------------------------------------------- -// Radian Euler angle aligned to axis (NOT ROLL/PITCH/YAW) -//----------------------------------------------------------------------------- -class QAngle; -class RadianEuler -{ -public: - inline RadianEuler(void) { } - inline RadianEuler(vec_t X, vec_t Y, vec_t Z) { x = X; y = Y; z = Z; } - inline RadianEuler(Quaternion const &q); // evil auto type promotion!!! - inline RadianEuler(QAngle const &angles); // evil auto type promotion!!! - - // Initialization - inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f) { x = ix; y = iy; z = iz; } - - // conversion to qangle - QAngle ToQAngle( void ) const; - bool IsValid() const; - void Invalidate(); - - // array access... - vec_t operator[](int i) const; - vec_t& operator[](int i); - - vec_t x, y, z; -}; - - -extern void AngleQuaternion( RadianEuler const &angles, Quaternion &qt ); -extern void QuaternionAngles( Quaternion const &q, RadianEuler &angles ); - -FORCEINLINE void NetworkVarConstruct( Quaternion &q ) { q.x = q.y = q.z = q.w = 0.0f; } - -inline Quaternion::Quaternion(RadianEuler const &angle) -{ - AngleQuaternion( angle, *this ); -} - -inline bool Quaternion::IsValid() const -{ - return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w); -} - -inline void Quaternion::Invalidate() -{ -//#ifdef _DEBUG -//#ifdef VECTOR_PARANOIA - x = y = z = w = VEC_T_NAN; -//#endif -//#endif -} - -inline RadianEuler::RadianEuler(Quaternion const &q) -{ - QuaternionAngles( q, *this ); -} - -inline void VectorCopy( RadianEuler const& src, RadianEuler &dst ) -{ - CHECK_VALID(src); - dst.x = src.x; - dst.y = src.y; - dst.z = src.z; -} - -inline void VectorScale( RadianEuler const& src, float b, RadianEuler &dst ) -{ - CHECK_VALID(src); - Assert( IsFinite(b) ); - dst.x = src.x * b; - dst.y = src.y * b; - dst.z = src.z * b; -} - -inline bool RadianEuler::IsValid() const -{ - return IsFinite(x) && IsFinite(y) && IsFinite(z); -} - -inline void RadianEuler::Invalidate() -{ -//#ifdef _DEBUG -//#ifdef VECTOR_PARANOIA - x = y = z = VEC_T_NAN; -//#endif -//#endif -} - - -//----------------------------------------------------------------------------- -// Array access -//----------------------------------------------------------------------------- -inline vec_t& RadianEuler::operator[](int i) -{ - Assert( (i >= 0) && (i < 3) ); - return ((vec_t*)this)[i]; -} - -inline vec_t RadianEuler::operator[](int i) const -{ - Assert( (i >= 0) && (i < 3) ); - return ((vec_t*)this)[i]; -} - - -//----------------------------------------------------------------------------- -// Degree Euler QAngle pitch, yaw, roll -//----------------------------------------------------------------------------- -class QAngleByValue; - -class QAngle -{ -public: - // Members - vec_t x, y, z; - - // Construction/destruction - QAngle(void); - QAngle(vec_t X, vec_t Y, vec_t Z); -// QAngle(RadianEuler const &angles); // evil auto type promotion!!! - - // Allow pass-by-value - operator QAngleByValue &() { return *((QAngleByValue *)(this)); } - operator const QAngleByValue &() const { return *((const QAngleByValue *)(this)); } - - // Initialization - void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f); - void Random( vec_t minVal, vec_t maxVal ); - - // Got any nasty NAN's? - bool IsValid() const; - void Invalidate(); - - // array access... - vec_t operator[](int i) const; - vec_t& operator[](int i); - - // Base address... - vec_t* Base(); - vec_t const* Base() const; - - // equality - bool operator==(const QAngle& v) const; - bool operator!=(const QAngle& v) const; - - // arithmetic operations - QAngle& operator+=(const QAngle &v); - QAngle& operator-=(const QAngle &v); - QAngle& operator*=(float s); - QAngle& operator/=(float s); - - // Get the vector's magnitude. - vec_t Length() const; - vec_t LengthSqr() const; - - // negate the QAngle components - //void Negate(); - - // No assignment operators either... - QAngle& operator=( const QAngle& src ); - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // copy constructors - - // arithmetic operations - QAngle operator-(void) const; - - QAngle operator+(const QAngle& v) const; - QAngle operator-(const QAngle& v) const; - QAngle operator*(float fl) const; - QAngle operator/(float fl) const; -#else - -private: - // No copy constructors allowed if we're in optimal mode - QAngle(const QAngle& vOther); - -#endif -}; - -FORCEINLINE void NetworkVarConstruct( QAngle &q ) { q.x = q.y = q.z = 0.0f; } - -//----------------------------------------------------------------------------- -// Allows us to specifically pass the vector by value when we need to -//----------------------------------------------------------------------------- -class QAngleByValue : public QAngle -{ -public: - // Construction/destruction: - QAngleByValue(void) : QAngle() {} - QAngleByValue(vec_t X, vec_t Y, vec_t Z) : QAngle( X, Y, Z ) {} - QAngleByValue(const QAngleByValue& vOther) { *this = vOther; } -}; - - -inline void VectorAdd( const QAngle& a, const QAngle& b, QAngle& result ) -{ - CHECK_VALID(a); - CHECK_VALID(b); - result.x = a.x + b.x; - result.y = a.y + b.y; - result.z = a.z + b.z; -} - -inline void VectorMA( const QAngle &start, float scale, const QAngle &direction, QAngle &dest ) -{ - CHECK_VALID(start); - CHECK_VALID(direction); - dest.x = start.x + scale * direction.x; - dest.y = start.y + scale * direction.y; - dest.z = start.z + scale * direction.z; -} - - -//----------------------------------------------------------------------------- -// constructors -//----------------------------------------------------------------------------- -inline QAngle::QAngle(void) -{ -#ifdef _DEBUG -#ifdef VECTOR_PARANOIA - // Initialize to NAN to catch errors - x = y = z = VEC_T_NAN; -#endif -#endif -} - -inline QAngle::QAngle(vec_t X, vec_t Y, vec_t Z) -{ - x = X; y = Y; z = Z; - CHECK_VALID(*this); -} - - -//----------------------------------------------------------------------------- -// initialization -//----------------------------------------------------------------------------- -inline void QAngle::Init( vec_t ix, vec_t iy, vec_t iz ) -{ - x = ix; y = iy; z = iz; - CHECK_VALID(*this); -} - -inline void QAngle::Random( vec_t minVal, vec_t maxVal ) -{ - x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - CHECK_VALID(*this); -} - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline QAngle RandomAngle( float minVal, float maxVal ) -{ - Vector random; - random.Random( minVal, maxVal ); - QAngle ret( random.x, random.y, random.z ); - return ret; -} - -#endif - - -inline RadianEuler::RadianEuler(QAngle const &angles) -{ - Init( - angles.z * 3.14159265358979323846f / 180.f, - angles.x * 3.14159265358979323846f / 180.f, - angles.y * 3.14159265358979323846f / 180.f ); -} - - - - -inline QAngle RadianEuler::ToQAngle( void) const -{ - return QAngle( - y * 180.f / 3.14159265358979323846f, - z * 180.f / 3.14159265358979323846f, - x * 180.f / 3.14159265358979323846f ); -} - - -//----------------------------------------------------------------------------- -// assignment -//----------------------------------------------------------------------------- -inline QAngle& QAngle::operator=(const QAngle &vOther) -{ - CHECK_VALID(vOther); - x=vOther.x; y=vOther.y; z=vOther.z; - return *this; -} - - -//----------------------------------------------------------------------------- -// Array access -//----------------------------------------------------------------------------- -inline vec_t& QAngle::operator[](int i) -{ - Assert( (i >= 0) && (i < 3) ); - return ((vec_t*)this)[i]; -} - -inline vec_t QAngle::operator[](int i) const -{ - Assert( (i >= 0) && (i < 3) ); - return ((vec_t*)this)[i]; -} - - -//----------------------------------------------------------------------------- -// Base address... -//----------------------------------------------------------------------------- -inline vec_t* QAngle::Base() -{ - return (vec_t*)this; -} - -inline vec_t const* QAngle::Base() const -{ - return (vec_t const*)this; -} - - -//----------------------------------------------------------------------------- -// IsValid? -//----------------------------------------------------------------------------- -inline bool QAngle::IsValid() const -{ - return IsFinite(x) && IsFinite(y) && IsFinite(z); -} - -//----------------------------------------------------------------------------- -// Invalidate -//----------------------------------------------------------------------------- - -inline void QAngle::Invalidate() -{ -//#ifdef _DEBUG -//#ifdef VECTOR_PARANOIA - x = y = z = VEC_T_NAN; -//#endif -//#endif -} - -//----------------------------------------------------------------------------- -// comparison -//----------------------------------------------------------------------------- -inline bool QAngle::operator==( const QAngle& src ) const -{ - CHECK_VALID(src); - CHECK_VALID(*this); - return (src.x == x) && (src.y == y) && (src.z == z); -} - -inline bool QAngle::operator!=( const QAngle& src ) const -{ - CHECK_VALID(src); - CHECK_VALID(*this); - return (src.x != x) || (src.y != y) || (src.z != z); -} - - -//----------------------------------------------------------------------------- -// Copy -//----------------------------------------------------------------------------- -inline void VectorCopy( const QAngle& src, QAngle& dst ) -{ - CHECK_VALID(src); - dst.x = src.x; - dst.y = src.y; - dst.z = src.z; -} - - -//----------------------------------------------------------------------------- -// standard math operations -//----------------------------------------------------------------------------- -inline QAngle& QAngle::operator+=(const QAngle& v) -{ - CHECK_VALID(*this); - CHECK_VALID(v); - x+=v.x; y+=v.y; z += v.z; - return *this; -} - -inline QAngle& QAngle::operator-=(const QAngle& v) -{ - CHECK_VALID(*this); - CHECK_VALID(v); - x-=v.x; y-=v.y; z -= v.z; - return *this; -} - -inline QAngle& QAngle::operator*=(float fl) -{ - x *= fl; - y *= fl; - z *= fl; - CHECK_VALID(*this); - return *this; -} - -inline QAngle& QAngle::operator/=(float fl) -{ - Assert( fl != 0.0f ); - float oofl = 1.0f / fl; - x *= oofl; - y *= oofl; - z *= oofl; - CHECK_VALID(*this); - return *this; -} - - -//----------------------------------------------------------------------------- -// length -//----------------------------------------------------------------------------- -inline vec_t QAngle::Length( ) const -{ - CHECK_VALID(*this); - return (vec_t)FastSqrt( LengthSqr( ) ); -} - - -inline vec_t QAngle::LengthSqr( ) const -{ - CHECK_VALID(*this); - return x * x + y * y + z * z; -} - - -//----------------------------------------------------------------------------- -// Vector equality with tolerance -//----------------------------------------------------------------------------- -inline bool QAnglesAreEqual( const QAngle& src1, const QAngle& src2, float tolerance = 0.0f ) -{ - if (FloatMakePositive(src1.x - src2.x) > tolerance) - return false; - if (FloatMakePositive(src1.y - src2.y) > tolerance) - return false; - return (FloatMakePositive(src1.z - src2.z) <= tolerance); -} - - -//----------------------------------------------------------------------------- -// arithmetic operations (SLOW!!) -//----------------------------------------------------------------------------- -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline QAngle QAngle::operator-(void) const -{ - QAngle ret(-x,-y,-z); - return ret; -} - -inline QAngle QAngle::operator+(const QAngle& v) const -{ - QAngle res; - res.x = x + v.x; - res.y = y + v.y; - res.z = z + v.z; - return res; -} - -inline QAngle QAngle::operator-(const QAngle& v) const -{ - QAngle res; - res.x = x - v.x; - res.y = y - v.y; - res.z = z - v.z; - return res; -} - -inline QAngle QAngle::operator*(float fl) const -{ - QAngle res; - res.x = x * fl; - res.y = y * fl; - res.z = z * fl; - return res; -} - -inline QAngle QAngle::operator/(float fl) const -{ - QAngle res; - res.x = x / fl; - res.y = y / fl; - res.z = z / fl; - return res; -} - -inline QAngle operator*(float fl, const QAngle& v) -{ - QAngle ret( v * fl ); - return ret; -} - -#endif // VECTOR_NO_SLOW_OPERATIONS - - -//----------------------------------------------------------------------------- -// NOTE: These are not completely correct. The representations are not equivalent -// unless the QAngle represents a rotational impulse along a coordinate axis (x,y,z) -inline void QAngleToAngularImpulse( const QAngle &angles, AngularImpulse &impulse ) -{ - impulse.x = angles.z; - impulse.y = angles.x; - impulse.z = angles.y; -} - -inline void AngularImpulseToQAngle( const AngularImpulse &impulse, QAngle &angles ) -{ - angles.x = impulse.y; - angles.y = impulse.z; - angles.z = impulse.x; -} - -#if !defined( _X360 ) - -FORCEINLINE vec_t InvRSquared( float const *v ) -{ -#if defined(__i386__) || defined(_M_IX86) - float sqrlen = v[0]*v[0]+v[1]*v[1]+v[2]*v[2] + 1.0e-10f, result; - _mm_store_ss(&result, _mm_rcp_ss( _mm_max_ss( _mm_set_ss(1.0f), _mm_load_ss(&sqrlen) ) )); - return result; -#else - return 1.f/fpmax(1.f, v[0]*v[0]+v[1]*v[1]+v[2]*v[2]); -#endif -} - -FORCEINLINE vec_t InvRSquared( const Vector &v ) -{ - return InvRSquared(&v.x); -} - -#if defined(__i386__) || defined(_M_IX86) -inline void _SSE_RSqrtInline( float a, float* out ) -{ - __m128 xx = _mm_load_ss( &a ); - __m128 xr = _mm_rsqrt_ss( xx ); - __m128 xt; - xt = _mm_mul_ss( xr, xr ); - xt = _mm_mul_ss( xt, xx ); - xt = _mm_sub_ss( _mm_set_ss(3.f), xt ); - xt = _mm_mul_ss( xt, _mm_set_ss(0.5f) ); - xr = _mm_mul_ss( xr, xt ); - _mm_store_ss( out, xr ); -} -#endif - -// FIXME: Change this back to a #define once we get rid of the vec_t version -FORCEINLINE float VectorNormalize( Vector& vec ) -{ -#ifndef DEBUG // stop crashing my edit-and-continue! - #if defined(__i386__) || defined(_M_IX86) - #define DO_SSE_OPTIMIZATION - #endif -#endif - -#if defined( DO_SSE_OPTIMIZATION ) - float sqrlen = vec.LengthSqr() + 1.0e-10f, invlen; - _SSE_RSqrtInline(sqrlen, &invlen); - vec.x *= invlen; - vec.y *= invlen; - vec.z *= invlen; - return sqrlen * invlen; -#else - extern float (FASTCALL *pfVectorNormalize)(Vector& v); - return (*pfVectorNormalize)(vec); -#endif -} - -// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s -FORCEINLINE float VectorNormalize( float * v ) -{ - return VectorNormalize(*(reinterpret_cast(v))); -} - -FORCEINLINE void VectorNormalizeFast( Vector &vec ) -{ - VectorNormalize(vec); -} - -#else - -FORCEINLINE float _VMX_InvRSquared( const Vector &v ) -{ - XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) ); - xmV = XMVector3Dot( xmV, xmV ); - return xmV.x; -} - -// call directly -FORCEINLINE float _VMX_VectorNormalize( Vector &vec ) -{ - float mag = XMVector3Length( XMLoadVector3( vec.Base() ) ).x; - float den = 1.f / (mag + FLT_EPSILON ); - vec.x *= den; - vec.y *= den; - vec.z *= den; - return mag; -} - -#define InvRSquared(x) _VMX_InvRSquared(x) - -// FIXME: Change this back to a #define once we get rid of the vec_t version -FORCEINLINE float VectorNormalize( Vector& v ) -{ - return _VMX_VectorNormalize( v ); -} -// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s -FORCEINLINE float VectorNormalize( float *pV ) -{ - return _VMX_VectorNormalize(*(reinterpret_cast(pV))); -} - -// call directly -FORCEINLINE void VectorNormalizeFast( Vector &vec ) -{ - XMVECTOR xmV = XMVector3LengthEst( XMLoadVector3( vec.Base() ) ); - float den = 1.f / (xmV.x + FLT_EPSILON); - vec.x *= den; - vec.y *= den; - vec.z *= den; -} - -#endif // _X360 - - -inline vec_t Vector::NormalizeInPlace() -{ - return VectorNormalize( *this ); -} - -inline Vector Vector::Normalized() const -{ - Vector norm = *this; - VectorNormalize( norm ); - return norm; -} - -inline bool Vector::IsLengthGreaterThan( float val ) const -{ - return LengthSqr() > val*val; -} - -inline bool Vector::IsLengthLessThan( float val ) const -{ - return LengthSqr() < val*val; -} - -#endif - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef VECTOR_H +#define VECTOR_H + +#ifdef _WIN32 +#pragma once +#endif + +#include +#include + +// For vec_t, put this somewhere else? +#include "tier0/basetypes.h" + +// For rand(). We really need a library! +#include + +#ifndef _X360 +// For MMX intrinsics +#include +#endif + +#include "tier0/dbg.h" +#include "tier0/threadtools.h" +#include "mathlib/vector2d.h" +#include "mathlib/math_pfns.h" +#include "minmax.h" + +// Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc. +//#define VECTOR_PARANOIA 1 + +// Uncomment this to make sure we don't do anything slow with our vectors +//#define VECTOR_NO_SLOW_OPERATIONS 1 + + +// Used to make certain code easier to read. +#define X_INDEX 0 +#define Y_INDEX 1 +#define Z_INDEX 2 + + +#ifdef VECTOR_PARANOIA +#define CHECK_VALID( _v) Assert( (_v).IsValid() ) +#else +#ifdef GNUC +#define CHECK_VALID( _v) +#else +#define CHECK_VALID( _v) 0 +#endif +#endif + +#define VecToString(v) (static_cast(CFmtStr("(%f, %f, %f)", (v).x, (v).y, (v).z))) // ** Note: this generates a temporary, don't hold reference! + +class VectorByValue; + +//========================================================= +// 3D Vector +//========================================================= +class Vector +{ +public: + // Members + vec_t x, y, z; + + // Construction/destruction: + Vector(void); + Vector(vec_t X, vec_t Y, vec_t Z); + explicit Vector(vec_t XYZ); ///< broadcast initialize + + // Initialization + void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f); + // TODO (Ilya): Should there be an init that takes a single float for consistency? + + // Got any nasty NAN's? + bool IsValid() const; + void Invalidate(); + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + // Base address... + vec_t* Base(); + vec_t const* Base() const; + + // Cast to Vector2D... + Vector2D& AsVector2D(); + const Vector2D& AsVector2D() const; + + // Initialization methods + void Random( vec_t minVal, vec_t maxVal ); + inline void Zero(); ///< zero out a vector + + // equality + bool operator==(const Vector& v) const; + bool operator!=(const Vector& v) const; + + // arithmetic operations + FORCEINLINE Vector& operator+=(const Vector &v); + FORCEINLINE Vector& operator-=(const Vector &v); + FORCEINLINE Vector& operator*=(const Vector &v); + FORCEINLINE Vector& operator*=(float s); + FORCEINLINE Vector& operator/=(const Vector &v); + FORCEINLINE Vector& operator/=(float s); + FORCEINLINE Vector& operator+=(float fl) ; ///< broadcast add + FORCEINLINE Vector& operator-=(float fl) ; ///< broadcast sub + +// negate the vector components + void Negate(); + + // Get the vector's magnitude. + inline vec_t Length() const; + + // Get the vector's magnitude squared. + FORCEINLINE vec_t LengthSqr(void) const + { + CHECK_VALID(*this); + return (x*x + y*y + z*z); + } + + // return true if this vector is (0,0,0) within tolerance + bool IsZero( float tolerance = 0.01f ) const + { + return (x > -tolerance && x < tolerance && + y > -tolerance && y < tolerance && + z > -tolerance && z < tolerance); + } + + vec_t NormalizeInPlace(); + Vector Normalized() const; + bool IsLengthGreaterThan( float val ) const; + bool IsLengthLessThan( float val ) const; + + // check if a vector is within the box defined by two other vectors + FORCEINLINE bool WithinAABox( Vector const &boxmin, Vector const &boxmax); + + // Get the distance from this vector to the other one. + vec_t DistTo(const Vector &vOther) const; + + // Get the distance from this vector to the other one squared. + // NJS: note, VC wasn't inlining it correctly in several deeply nested inlines due to being an 'out of line' inline. + // may be able to tidy this up after switching to VC7 + FORCEINLINE vec_t DistToSqr(const Vector &vOther) const + { + Vector delta; + + delta.x = x - vOther.x; + delta.y = y - vOther.y; + delta.z = z - vOther.z; + + return delta.LengthSqr(); + } + + // Copy + void CopyToArray(float* rgfl) const; + + // Multiply, add, and assign to this (ie: *this = a + b * scalar). This + // is about 12% faster than the actual vector equation (because it's done per-component + // rather than per-vector). + void MulAdd(const Vector& a, const Vector& b, float scalar); + + // Dot product. + vec_t Dot(const Vector& vOther) const; + + // assignment + Vector& operator=(const Vector &vOther); + + // 2d + vec_t Length2D(void) const; + vec_t Length2DSqr(void) const; + + operator VectorByValue &() { return *((VectorByValue *)(this)); } + operator const VectorByValue &() const { return *((const VectorByValue *)(this)); } + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // copy constructors +// Vector(const Vector &vOther); + + // arithmetic operations + Vector operator-(void) const; + + Vector operator+(const Vector& v) const; + Vector operator-(const Vector& v) const; + Vector operator*(const Vector& v) const; + Vector operator/(const Vector& v) const; + Vector operator*(float fl) const; + Vector operator/(float fl) const; + + // Cross product between two vectors. + Vector Cross(const Vector &vOther) const; + + // Returns a vector with the min or max in X, Y, and Z. + Vector Min(const Vector &vOther) const; + Vector Max(const Vector &vOther) const; + +#else + +private: + // No copy constructors allowed if we're in optimal mode + Vector(const Vector& vOther); +#endif +}; + +FORCEINLINE void NetworkVarConstruct( Vector &v ) { v.Zero(); } + + +#define USE_M64S ( ( !defined( _X360 ) ) ) + + + +//========================================================= +// 4D Short Vector (aligned on 8-byte boundary) +//========================================================= +class ALIGN8 ShortVector +{ +public: + + short x, y, z, w; + + // Initialization + void Init(short ix = 0, short iy = 0, short iz = 0, short iw = 0 ); + + +#if USE_M64S + __m64 &AsM64() { return *(__m64*)&x; } + const __m64 &AsM64() const { return *(const __m64*)&x; } +#endif + + // Setter + void Set( const ShortVector& vOther ); + void Set( const short ix, const short iy, const short iz, const short iw ); + + // array access... + short operator[](int i) const; + short& operator[](int i); + + // Base address... + short* Base(); + short const* Base() const; + + // equality + bool operator==(const ShortVector& v) const; + bool operator!=(const ShortVector& v) const; + + // Arithmetic operations + FORCEINLINE ShortVector& operator+=(const ShortVector &v); + FORCEINLINE ShortVector& operator-=(const ShortVector &v); + FORCEINLINE ShortVector& operator*=(const ShortVector &v); + FORCEINLINE ShortVector& operator*=(float s); + FORCEINLINE ShortVector& operator/=(const ShortVector &v); + FORCEINLINE ShortVector& operator/=(float s); + FORCEINLINE ShortVector operator*(float fl) const; + +private: + + // No copy constructors allowed if we're in optimal mode +// ShortVector(ShortVector const& vOther); + + // No assignment operators either... +// ShortVector& operator=( ShortVector const& src ); + +} ALIGN8_POST; + + + + + + +//========================================================= +// 4D Integer Vector +//========================================================= +class IntVector4D +{ +public: + + int x, y, z, w; + + // Initialization + void Init(int ix = 0, int iy = 0, int iz = 0, int iw = 0 ); + +#if USE_M64S + __m64 &AsM64() { return *(__m64*)&x; } + const __m64 &AsM64() const { return *(const __m64*)&x; } +#endif + + // Setter + void Set( const IntVector4D& vOther ); + void Set( const int ix, const int iy, const int iz, const int iw ); + + // array access... + int operator[](int i) const; + int& operator[](int i); + + // Base address... + int* Base(); + int const* Base() const; + + // equality + bool operator==(const IntVector4D& v) const; + bool operator!=(const IntVector4D& v) const; + + // Arithmetic operations + FORCEINLINE IntVector4D& operator+=(const IntVector4D &v); + FORCEINLINE IntVector4D& operator-=(const IntVector4D &v); + FORCEINLINE IntVector4D& operator*=(const IntVector4D &v); + FORCEINLINE IntVector4D& operator*=(float s); + FORCEINLINE IntVector4D& operator/=(const IntVector4D &v); + FORCEINLINE IntVector4D& operator/=(float s); + FORCEINLINE IntVector4D operator*(float fl) const; + +private: + + // No copy constructors allowed if we're in optimal mode + // IntVector4D(IntVector4D const& vOther); + + // No assignment operators either... + // IntVector4D& operator=( IntVector4D const& src ); + +}; + + + +//----------------------------------------------------------------------------- +// Allows us to specifically pass the vector by value when we need to +//----------------------------------------------------------------------------- +class VectorByValue : public Vector +{ +public: + // Construction/destruction: + VectorByValue(void) : Vector() {} + VectorByValue(vec_t X, vec_t Y, vec_t Z) : Vector( X, Y, Z ) {} + VectorByValue(const VectorByValue& vOther) { *this = vOther; } +}; + + +//----------------------------------------------------------------------------- +// Utility to simplify table construction. No constructor means can use +// traditional C-style initialization +//----------------------------------------------------------------------------- +class TableVector +{ +public: + vec_t x, y, z; + + operator Vector &() { return *((Vector *)(this)); } + operator const Vector &() const { return *((const Vector *)(this)); } + + // array access... + inline vec_t& operator[](int i) + { + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; + } + + inline vec_t operator[](int i) const + { + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; + } +}; + + +//----------------------------------------------------------------------------- +// Here's where we add all those lovely SSE optimized routines +//----------------------------------------------------------------------------- + +class ALIGN16 VectorAligned : public Vector +{ +public: + inline VectorAligned(void) {}; + inline VectorAligned(vec_t X, vec_t Y, vec_t Z) + { + Init(X,Y,Z); + } + +#ifdef VECTOR_NO_SLOW_OPERATIONS + +private: + // No copy constructors allowed if we're in optimal mode + VectorAligned(const VectorAligned& vOther); + VectorAligned(const Vector &vOther); + +#else +public: + explicit VectorAligned(const Vector &vOther) + { + Init(vOther.x, vOther.y, vOther.z); + } + + VectorAligned& operator=(const Vector &vOther) + { + Init(vOther.x, vOther.y, vOther.z); + return *this; + } + +#endif + float w; // this space is used anyway +} ALIGN16_POST; + +//----------------------------------------------------------------------------- +// Vector related operations +//----------------------------------------------------------------------------- + +// Vector clear +FORCEINLINE void VectorClear( Vector& a ); + +// Copy +FORCEINLINE void VectorCopy( const Vector& src, Vector& dst ); + +// Vector arithmetic +FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& result ); +FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& result ); +FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& result ); +FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& result ); +FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& result ); +FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& result ); +inline void VectorScale ( const Vector& in, vec_t scale, Vector& result ); +// Don't mark this as inline in its function declaration. That's only necessary on its +// definition, and 'inline' here leads to gcc warnings. +void VectorMA( const Vector& start, float scale, const Vector& direction, Vector& dest ); + +// Vector equality with tolerance +bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance = 0.0f ); + +#define VectorExpand(v) (v).x, (v).y, (v).z + + +// Normalization +// FIXME: Can't use quite yet +//vec_t VectorNormalize( Vector& v ); + +// Length +inline vec_t VectorLength( const Vector& v ); + +// Dot Product +FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b); + +// Cross product +void CrossProduct(const Vector& a, const Vector& b, Vector& result ); + +// Store the min or max of each of x, y, and z into the result. +void VectorMin( const Vector &a, const Vector &b, Vector &result ); +void VectorMax( const Vector &a, const Vector &b, Vector &result ); + +// Linearly interpolate between two vectors +void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest ); +Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t ); + +FORCEINLINE Vector ReplicateToVector( float x ) +{ + return Vector( x, x, x ); +} + +// check if a point is in the field of a view of an object. supports up to 180 degree fov. +FORCEINLINE bool PointWithinViewAngle( Vector const &vecSrcPosition, + Vector const &vecTargetPosition, + Vector const &vecLookDirection, float flCosHalfFOV ) +{ + Vector vecDelta = vecTargetPosition - vecSrcPosition; + float cosDiff = DotProduct( vecLookDirection, vecDelta ); + + if ( cosDiff < 0 ) + return false; + + float flLen2 = vecDelta.LengthSqr(); + + // a/sqrt(b) > c == a^2 > b * c ^2 + return ( cosDiff * cosDiff > flLen2 * flCosHalfFOV * flCosHalfFOV ); + +} + + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +// Cross product +Vector CrossProduct( const Vector& a, const Vector& b ); + +// Random vector creation +Vector RandomVector( vec_t minVal, vec_t maxVal ); + +#endif + +float RandomVectorInUnitSphere( Vector *pVector ); +float RandomVectorInUnitCircle( Vector2D *pVector ); + + +//----------------------------------------------------------------------------- +// +// Inlined Vector methods +// +//----------------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// constructors +//----------------------------------------------------------------------------- +inline Vector::Vector(void) +{ +#ifdef _DEBUG +#ifdef VECTOR_PARANOIA + // Initialize to NAN to catch errors + x = y = z = VEC_T_NAN; +#endif +#endif +} + +inline Vector::Vector(vec_t X, vec_t Y, vec_t Z) +{ + x = X; y = Y; z = Z; + CHECK_VALID(*this); +} + +inline Vector::Vector(vec_t XYZ) +{ + x = y = z = XYZ; + CHECK_VALID(*this); +} + +//inline Vector::Vector(const float *pFloat) +//{ +// Assert( pFloat ); +// x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; +// CHECK_VALID(*this); +//} + +#if 0 +//----------------------------------------------------------------------------- +// copy constructor +//----------------------------------------------------------------------------- + +inline Vector::Vector(const Vector &vOther) +{ + CHECK_VALID(vOther); + x = vOther.x; y = vOther.y; z = vOther.z; +} +#endif + +//----------------------------------------------------------------------------- +// initialization +//----------------------------------------------------------------------------- + +inline void Vector::Init( vec_t ix, vec_t iy, vec_t iz ) +{ + x = ix; y = iy; z = iz; + CHECK_VALID(*this); +} + +inline void Vector::Random( vec_t minVal, vec_t maxVal ) +{ + x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + CHECK_VALID(*this); +} + +// This should really be a single opcode on the PowerPC (move r0 onto the vec reg) +inline void Vector::Zero() +{ + x = y = z = 0.0f; +} + +inline void VectorClear( Vector& a ) +{ + a.x = a.y = a.z = 0.0f; +} + +//----------------------------------------------------------------------------- +// assignment +//----------------------------------------------------------------------------- + +inline Vector& Vector::operator=(const Vector &vOther) +{ + CHECK_VALID(vOther); + x=vOther.x; y=vOther.y; z=vOther.z; + return *this; +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline vec_t& Vector::operator[](int i) +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + +inline vec_t Vector::operator[](int i) const +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- +inline vec_t* Vector::Base() +{ + return (vec_t*)this; +} + +inline vec_t const* Vector::Base() const +{ + return (vec_t const*)this; +} + +//----------------------------------------------------------------------------- +// Cast to Vector2D... +//----------------------------------------------------------------------------- + +inline Vector2D& Vector::AsVector2D() +{ + return *(Vector2D*)this; +} + +inline const Vector2D& Vector::AsVector2D() const +{ + return *(const Vector2D*)this; +} + +//----------------------------------------------------------------------------- +// IsValid? +//----------------------------------------------------------------------------- + +inline bool Vector::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z); +} + +//----------------------------------------------------------------------------- +// Invalidate +//----------------------------------------------------------------------------- + +inline void Vector::Invalidate() +{ +//#ifdef _DEBUG +//#ifdef VECTOR_PARANOIA + x = y = z = VEC_T_NAN; +//#endif +//#endif +} + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool Vector::operator==( const Vector& src ) const +{ + CHECK_VALID(src); + CHECK_VALID(*this); + return (src.x == x) && (src.y == y) && (src.z == z); +} + +inline bool Vector::operator!=( const Vector& src ) const +{ + CHECK_VALID(src); + CHECK_VALID(*this); + return (src.x != x) || (src.y != y) || (src.z != z); +} + + +//----------------------------------------------------------------------------- +// Copy +//----------------------------------------------------------------------------- + +FORCEINLINE void VectorCopy( const Vector& src, Vector& dst ) +{ + CHECK_VALID(src); + dst.x = src.x; + dst.y = src.y; + dst.z = src.z; +} + +inline void Vector::CopyToArray(float* rgfl) const +{ + Assert( rgfl ); + CHECK_VALID(*this); + rgfl[0] = x, rgfl[1] = y, rgfl[2] = z; +} + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- +// #pragma message("TODO: these should be SSE") + +inline void Vector::Negate() +{ + CHECK_VALID(*this); + x = -x; y = -y; z = -z; +} + +FORCEINLINE Vector& Vector::operator+=(const Vector& v) +{ + CHECK_VALID(*this); + CHECK_VALID(v); + x+=v.x; y+=v.y; z += v.z; + return *this; +} + +FORCEINLINE Vector& Vector::operator-=(const Vector& v) +{ + CHECK_VALID(*this); + CHECK_VALID(v); + x-=v.x; y-=v.y; z -= v.z; + return *this; +} + +FORCEINLINE Vector& Vector::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + CHECK_VALID(*this); + return *this; +} + +FORCEINLINE Vector& Vector::operator*=(const Vector& v) +{ + CHECK_VALID(v); + x *= v.x; + y *= v.y; + z *= v.z; + CHECK_VALID(*this); + return *this; +} + +// this ought to be an opcode. +FORCEINLINE Vector& Vector::operator+=(float fl) +{ + x += fl; + y += fl; + z += fl; + CHECK_VALID(*this); + return *this; +} + +FORCEINLINE Vector& Vector::operator-=(float fl) +{ + x -= fl; + y -= fl; + z -= fl; + CHECK_VALID(*this); + return *this; +} + + + +FORCEINLINE Vector& Vector::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + CHECK_VALID(*this); + return *this; +} + +FORCEINLINE Vector& Vector::operator/=(const Vector& v) +{ + CHECK_VALID(v); + Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f ); + x /= v.x; + y /= v.y; + z /= v.z; + CHECK_VALID(*this); + return *this; +} + + + +//----------------------------------------------------------------------------- +// +// Inlined Short Vector methods +// +//----------------------------------------------------------------------------- + + +inline void ShortVector::Init( short ix, short iy, short iz, short iw ) +{ + x = ix; y = iy; z = iz; w = iw; +} + +FORCEINLINE void ShortVector::Set( const ShortVector& vOther ) +{ + x = vOther.x; + y = vOther.y; + z = vOther.z; + w = vOther.w; +} + +FORCEINLINE void ShortVector::Set( const short ix, const short iy, const short iz, const short iw ) +{ + x = ix; + y = iy; + z = iz; + w = iw; +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline short ShortVector::operator[](int i) const +{ + Assert( (i >= 0) && (i < 4) ); + return ((short*)this)[i]; +} + +inline short& ShortVector::operator[](int i) +{ + Assert( (i >= 0) && (i < 4) ); + return ((short*)this)[i]; +} + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- +inline short* ShortVector::Base() +{ + return (short*)this; +} + +inline short const* ShortVector::Base() const +{ + return (short const*)this; +} + + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool ShortVector::operator==( const ShortVector& src ) const +{ + return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); +} + +inline bool ShortVector::operator!=( const ShortVector& src ) const +{ + return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); +} + + + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- + +FORCEINLINE ShortVector& ShortVector::operator+=(const ShortVector& v) +{ + x+=v.x; y+=v.y; z += v.z; w += v.w; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator-=(const ShortVector& v) +{ + x-=v.x; y-=v.y; z -= v.z; w -= v.w; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + w *= fl; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator*=(const ShortVector& v) +{ + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + w *= oofl; + return *this; +} + +FORCEINLINE ShortVector& ShortVector::operator/=(const ShortVector& v) +{ + Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 ); + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + return *this; +} + +FORCEINLINE void ShortVectorMultiply( const ShortVector& src, float fl, ShortVector& res ) +{ + Assert( IsFinite(fl) ); + res.x = src.x * fl; + res.y = src.y * fl; + res.z = src.z * fl; + res.w = src.w * fl; +} + +FORCEINLINE ShortVector ShortVector::operator*(float fl) const +{ + ShortVector res; + ShortVectorMultiply( *this, fl, res ); + return res; +} + + + + + + +//----------------------------------------------------------------------------- +// +// Inlined Integer Vector methods +// +//----------------------------------------------------------------------------- + + +inline void IntVector4D::Init( int ix, int iy, int iz, int iw ) +{ + x = ix; y = iy; z = iz; w = iw; +} + +FORCEINLINE void IntVector4D::Set( const IntVector4D& vOther ) +{ + x = vOther.x; + y = vOther.y; + z = vOther.z; + w = vOther.w; +} + +FORCEINLINE void IntVector4D::Set( const int ix, const int iy, const int iz, const int iw ) +{ + x = ix; + y = iy; + z = iz; + w = iw; +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline int IntVector4D::operator[](int i) const +{ + Assert( (i >= 0) && (i < 4) ); + return ((int*)this)[i]; +} + +inline int& IntVector4D::operator[](int i) +{ + Assert( (i >= 0) && (i < 4) ); + return ((int*)this)[i]; +} + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- +inline int* IntVector4D::Base() +{ + return (int*)this; +} + +inline int const* IntVector4D::Base() const +{ + return (int const*)this; +} + + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool IntVector4D::operator==( const IntVector4D& src ) const +{ + return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); +} + +inline bool IntVector4D::operator!=( const IntVector4D& src ) const +{ + return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); +} + + + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- + +FORCEINLINE IntVector4D& IntVector4D::operator+=(const IntVector4D& v) +{ + x+=v.x; y+=v.y; z += v.z; w += v.w; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator-=(const IntVector4D& v) +{ + x-=v.x; y-=v.y; z -= v.z; w -= v.w; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + w *= fl; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator*=(const IntVector4D& v) +{ + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + w *= oofl; + return *this; +} + +FORCEINLINE IntVector4D& IntVector4D::operator/=(const IntVector4D& v) +{ + Assert( v.x != 0 && v.y != 0 && v.z != 0 && v.w != 0 ); + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + return *this; +} + +FORCEINLINE void IntVector4DMultiply( const IntVector4D& src, float fl, IntVector4D& res ) +{ + Assert( IsFinite(fl) ); + res.x = src.x * fl; + res.y = src.y * fl; + res.z = src.z * fl; + res.w = src.w * fl; +} + +FORCEINLINE IntVector4D IntVector4D::operator*(float fl) const +{ + IntVector4D res; + IntVector4DMultiply( *this, fl, res ); + return res; +} + + + +// ======================= + + +FORCEINLINE void VectorAdd( const Vector& a, const Vector& b, Vector& c ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + c.x = a.x + b.x; + c.y = a.y + b.y; + c.z = a.z + b.z; +} + +FORCEINLINE void VectorSubtract( const Vector& a, const Vector& b, Vector& c ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + c.x = a.x - b.x; + c.y = a.y - b.y; + c.z = a.z - b.z; +} + +FORCEINLINE void VectorMultiply( const Vector& a, vec_t b, Vector& c ) +{ + CHECK_VALID(a); + Assert( IsFinite(b) ); + c.x = a.x * b; + c.y = a.y * b; + c.z = a.z * b; +} + +FORCEINLINE void VectorMultiply( const Vector& a, const Vector& b, Vector& c ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + c.x = a.x * b.x; + c.y = a.y * b.y; + c.z = a.z * b.z; +} + +// for backwards compatability +inline void VectorScale ( const Vector& in, vec_t scale, Vector& result ) +{ + VectorMultiply( in, scale, result ); +} + + +FORCEINLINE void VectorDivide( const Vector& a, vec_t b, Vector& c ) +{ + CHECK_VALID(a); + Assert( b != 0.0f ); + vec_t oob = 1.0f / b; + c.x = a.x * oob; + c.y = a.y * oob; + c.z = a.z * oob; +} + +FORCEINLINE void VectorDivide( const Vector& a, const Vector& b, Vector& c ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) ); + c.x = a.x / b.x; + c.y = a.y / b.y; + c.z = a.z / b.z; +} + +// FIXME: Remove +// For backwards compatability +inline void Vector::MulAdd(const Vector& a, const Vector& b, float scalar) +{ + CHECK_VALID(a); + CHECK_VALID(b); + x = a.x + b.x * scalar; + y = a.y + b.y * scalar; + z = a.z + b.z * scalar; +} + +inline void VectorLerp(const Vector& src1, const Vector& src2, vec_t t, Vector& dest ) +{ + CHECK_VALID(src1); + CHECK_VALID(src2); + dest.x = src1.x + (src2.x - src1.x) * t; + dest.y = src1.y + (src2.y - src1.y) * t; + dest.z = src1.z + (src2.z - src1.z) * t; +} + +inline Vector VectorLerp(const Vector& src1, const Vector& src2, vec_t t ) +{ + Vector result; + VectorLerp( src1, src2, t, result ); + return result; +} + +//----------------------------------------------------------------------------- +// Temporary storage for vector results so const Vector& results can be returned +//----------------------------------------------------------------------------- +inline Vector &AllocTempVector() +{ + static Vector s_vecTemp[128]; + static CInterlockedInt s_nIndex; + + int nIndex; + for (;;) + { + int nOldIndex = s_nIndex; + nIndex = ( (nOldIndex + 0x10001) & 0x7F ); + + if ( s_nIndex.AssignIf( nOldIndex, nIndex ) ) + { + break; + } + ThreadPause(); + } + return s_vecTemp[nIndex & 0xffff]; +} + + + +//----------------------------------------------------------------------------- +// dot, cross +//----------------------------------------------------------------------------- +FORCEINLINE vec_t DotProduct(const Vector& a, const Vector& b) +{ + CHECK_VALID(a); + CHECK_VALID(b); + return( a.x*b.x + a.y*b.y + a.z*b.z ); +} + +// for backwards compatability +inline vec_t Vector::Dot( const Vector& vOther ) const +{ + CHECK_VALID(vOther); + return DotProduct( *this, vOther ); +} + +inline void CrossProduct(const Vector& a, const Vector& b, Vector& result ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + Assert( &a != &result ); + Assert( &b != &result ); + result.x = a.y*b.z - a.z*b.y; + result.y = a.z*b.x - a.x*b.z; + result.z = a.x*b.y - a.y*b.x; +} + +inline vec_t DotProductAbs( const Vector &v0, const Vector &v1 ) +{ + CHECK_VALID(v0); + CHECK_VALID(v1); + return FloatMakePositive(v0.x*v1.x) + FloatMakePositive(v0.y*v1.y) + FloatMakePositive(v0.z*v1.z); +} + +inline vec_t DotProductAbs( const Vector &v0, const float *v1 ) +{ + return FloatMakePositive(v0.x * v1[0]) + FloatMakePositive(v0.y * v1[1]) + FloatMakePositive(v0.z * v1[2]); +} + +//----------------------------------------------------------------------------- +// length +//----------------------------------------------------------------------------- + +inline vec_t VectorLength( const Vector& v ) +{ + CHECK_VALID(v); + return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z); +} + + +inline vec_t Vector::Length(void) const +{ + CHECK_VALID(*this); + return VectorLength( *this ); +} + + +//----------------------------------------------------------------------------- +// Normalization +//----------------------------------------------------------------------------- + +/* +// FIXME: Can't use until we're un-macroed in mathlib.h +inline vec_t VectorNormalize( Vector& v ) +{ + Assert( v.IsValid() ); + vec_t l = v.Length(); + if (l != 0.0f) + { + v /= l; + } + else + { + // FIXME: + // Just copying the existing implemenation; shouldn't res.z == 0? + v.x = v.y = 0.0f; v.z = 1.0f; + } + return l; +} +*/ + + +// check a point against a box +bool Vector::WithinAABox( Vector const &boxmin, Vector const &boxmax) +{ + return ( + ( x >= boxmin.x ) && ( x <= boxmax.x) && + ( y >= boxmin.y ) && ( y <= boxmax.y) && + ( z >= boxmin.z ) && ( z <= boxmax.z) + ); +} + +//----------------------------------------------------------------------------- +// Get the distance from this vector to the other one +//----------------------------------------------------------------------------- +inline vec_t Vector::DistTo(const Vector &vOther) const +{ + Vector delta; + VectorSubtract( *this, vOther, delta ); + return delta.Length(); +} + + +//----------------------------------------------------------------------------- +// Vector equality with tolerance +//----------------------------------------------------------------------------- +inline bool VectorsAreEqual( const Vector& src1, const Vector& src2, float tolerance ) +{ + if (FloatMakePositive(src1.x - src2.x) > tolerance) + return false; + if (FloatMakePositive(src1.y - src2.y) > tolerance) + return false; + return (FloatMakePositive(src1.z - src2.z) <= tolerance); +} + + +//----------------------------------------------------------------------------- +// Computes the closest point to vecTarget no farther than flMaxDist from vecStart +//----------------------------------------------------------------------------- +inline void ComputeClosestPoint( const Vector& vecStart, float flMaxDist, const Vector& vecTarget, Vector *pResult ) +{ + Vector vecDelta; + VectorSubtract( vecTarget, vecStart, vecDelta ); + float flDistSqr = vecDelta.LengthSqr(); + if ( flDistSqr <= flMaxDist * flMaxDist ) + { + *pResult = vecTarget; + } + else + { + vecDelta /= FastSqrt( flDistSqr ); + VectorMA( vecStart, flMaxDist, vecDelta, *pResult ); + } +} + + +//----------------------------------------------------------------------------- +// Takes the absolute value of a vector +//----------------------------------------------------------------------------- +inline void VectorAbs( const Vector& src, Vector& dst ) +{ + dst.x = FloatMakePositive(src.x); + dst.y = FloatMakePositive(src.y); + dst.z = FloatMakePositive(src.z); +} + + +//----------------------------------------------------------------------------- +// +// Slow methods +// +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +//----------------------------------------------------------------------------- +// Returns a vector with the min or max in X, Y, and Z. +//----------------------------------------------------------------------------- +inline Vector Vector::Min(const Vector &vOther) const +{ + return Vector(x < vOther.x ? x : vOther.x, + y < vOther.y ? y : vOther.y, + z < vOther.z ? z : vOther.z); +} + +inline Vector Vector::Max(const Vector &vOther) const +{ + return Vector(x > vOther.x ? x : vOther.x, + y > vOther.y ? y : vOther.y, + z > vOther.z ? z : vOther.z); +} + + +//----------------------------------------------------------------------------- +// arithmetic operations +//----------------------------------------------------------------------------- + +inline Vector Vector::operator-(void) const +{ + return Vector(-x,-y,-z); +} + +inline Vector Vector::operator+(const Vector& v) const +{ + Vector res; + VectorAdd( *this, v, res ); + return res; +} + +inline Vector Vector::operator-(const Vector& v) const +{ + Vector res; + VectorSubtract( *this, v, res ); + return res; +} + +inline Vector Vector::operator*(float fl) const +{ + Vector res; + VectorMultiply( *this, fl, res ); + return res; +} + +inline Vector Vector::operator*(const Vector& v) const +{ + Vector res; + VectorMultiply( *this, v, res ); + return res; +} + +inline Vector Vector::operator/(float fl) const +{ + Vector res; + VectorDivide( *this, fl, res ); + return res; +} + +inline Vector Vector::operator/(const Vector& v) const +{ + Vector res; + VectorDivide( *this, v, res ); + return res; +} + +inline Vector operator*(float fl, const Vector& v) +{ + return v * fl; +} + +//----------------------------------------------------------------------------- +// cross product +//----------------------------------------------------------------------------- + +inline Vector Vector::Cross(const Vector& vOther) const +{ + Vector res; + CrossProduct( *this, vOther, res ); + return res; +} + +//----------------------------------------------------------------------------- +// 2D +//----------------------------------------------------------------------------- + +inline vec_t Vector::Length2D(void) const +{ + return (vec_t)FastSqrt(x*x + y*y); +} + +inline vec_t Vector::Length2DSqr(void) const +{ + return (x*x + y*y); +} + +inline Vector CrossProduct(const Vector& a, const Vector& b) +{ + return Vector( a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x ); +} + +inline void VectorMin( const Vector &a, const Vector &b, Vector &result ) +{ + result.x = fpmin(a.x, b.x); + result.y = fpmin(a.y, b.y); + result.z = fpmin(a.z, b.z); +} + +inline void VectorMax( const Vector &a, const Vector &b, Vector &result ) +{ + result.x = fpmax(a.x, b.x); + result.y = fpmax(a.y, b.y); + result.z = fpmax(a.z, b.z); +} + +inline float ComputeVolume( const Vector &vecMins, const Vector &vecMaxs ) +{ + Vector vecDelta; + VectorSubtract( vecMaxs, vecMins, vecDelta ); + return DotProduct( vecDelta, vecDelta ); +} + +// Get a random vector. +inline Vector RandomVector( float minVal, float maxVal ) +{ + Vector random; + random.Random( minVal, maxVal ); + return random; +} + +#endif //slow + +//----------------------------------------------------------------------------- +// Helper debugging stuff.... +//----------------------------------------------------------------------------- + +inline bool operator==( float const* f, const Vector& v ) +{ + // AIIIEEEE!!!! + Assert(0); + return false; +} + +inline bool operator==( const Vector& v, float const* f ) +{ + // AIIIEEEE!!!! + Assert(0); + return false; +} + +inline bool operator!=( float const* f, const Vector& v ) +{ + // AIIIEEEE!!!! + Assert(0); + return false; +} + +inline bool operator!=( const Vector& v, float const* f ) +{ + // AIIIEEEE!!!! + Assert(0); + return false; +} + + +//----------------------------------------------------------------------------- +// AngularImpulse +//----------------------------------------------------------------------------- +// AngularImpulse are exponetial maps (an axis scaled by a "twist" angle in degrees) +typedef Vector AngularImpulse; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline AngularImpulse RandomAngularImpulse( float minVal, float maxVal ) +{ + AngularImpulse angImp; + angImp.Random( minVal, maxVal ); + return angImp; +} + +#endif + + +//----------------------------------------------------------------------------- +// Quaternion +//----------------------------------------------------------------------------- + +class RadianEuler; + +class Quaternion // same data-layout as engine's vec4_t, +{ // which is a vec_t[4] +public: + inline Quaternion(void) { + + // Initialize to NAN to catch errors +#ifdef _DEBUG +#ifdef VECTOR_PARANOIA + x = y = z = w = VEC_T_NAN; +#endif +#endif + } + inline Quaternion(vec_t ix, vec_t iy, vec_t iz, vec_t iw) : x(ix), y(iy), z(iz), w(iw) { } + inline Quaternion(RadianEuler const &angle); // evil auto type promotion!!! + + inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f) { x = ix; y = iy; z = iz; w = iw; } + + bool IsValid() const; + void Invalidate(); + + bool operator==( const Quaternion &src ) const; + bool operator!=( const Quaternion &src ) const; + + vec_t* Base() { return (vec_t*)this; } + const vec_t* Base() const { return (vec_t*)this; } + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + vec_t x, y, z, w; +}; + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline vec_t& Quaternion::operator[](int i) +{ + Assert( (i >= 0) && (i < 4) ); + return ((vec_t*)this)[i]; +} + +inline vec_t Quaternion::operator[](int i) const +{ + Assert( (i >= 0) && (i < 4) ); + return ((vec_t*)this)[i]; +} + + +//----------------------------------------------------------------------------- +// Equality test +//----------------------------------------------------------------------------- +inline bool Quaternion::operator==( const Quaternion &src ) const +{ + return ( x == src.x ) && ( y == src.y ) && ( z == src.z ) && ( w == src.w ); +} + +inline bool Quaternion::operator!=( const Quaternion &src ) const +{ + return !operator==( src ); +} + + +//----------------------------------------------------------------------------- +// Quaternion equality with tolerance +//----------------------------------------------------------------------------- +inline bool QuaternionsAreEqual( const Quaternion& src1, const Quaternion& src2, float tolerance ) +{ + if (FloatMakePositive(src1.x - src2.x) > tolerance) + return false; + if (FloatMakePositive(src1.y - src2.y) > tolerance) + return false; + if (FloatMakePositive(src1.z - src2.z) > tolerance) + return false; + return (FloatMakePositive(src1.w - src2.w) <= tolerance); +} + + +//----------------------------------------------------------------------------- +// Here's where we add all those lovely SSE optimized routines +//----------------------------------------------------------------------------- +class ALIGN16 QuaternionAligned : public Quaternion +{ +public: + inline QuaternionAligned(void) {}; + inline QuaternionAligned(vec_t X, vec_t Y, vec_t Z, vec_t W) + { + Init(X,Y,Z,W); + } + +#ifdef VECTOR_NO_SLOW_OPERATIONS + +private: + // No copy constructors allowed if we're in optimal mode + QuaternionAligned(const QuaternionAligned& vOther); + QuaternionAligned(const Quaternion &vOther); + +#else +public: + explicit QuaternionAligned(const Quaternion &vOther) + { + Init(vOther.x, vOther.y, vOther.z, vOther.w); + } + + QuaternionAligned& operator=(const Quaternion &vOther) + { + Init(vOther.x, vOther.y, vOther.z, vOther.w); + return *this; + } + +#endif +} ALIGN16_POST; + + +//----------------------------------------------------------------------------- +// Radian Euler angle aligned to axis (NOT ROLL/PITCH/YAW) +//----------------------------------------------------------------------------- +class QAngle; +class RadianEuler +{ +public: + inline RadianEuler(void) { } + inline RadianEuler(vec_t X, vec_t Y, vec_t Z) { x = X; y = Y; z = Z; } + inline RadianEuler(Quaternion const &q); // evil auto type promotion!!! + inline RadianEuler(QAngle const &angles); // evil auto type promotion!!! + + // Initialization + inline void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f) { x = ix; y = iy; z = iz; } + + // conversion to qangle + QAngle ToQAngle( void ) const; + bool IsValid() const; + void Invalidate(); + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + vec_t x, y, z; +}; + + +extern void AngleQuaternion( RadianEuler const &angles, Quaternion &qt ); +extern void QuaternionAngles( Quaternion const &q, RadianEuler &angles ); + +FORCEINLINE void NetworkVarConstruct( Quaternion &q ) { q.x = q.y = q.z = q.w = 0.0f; } + +inline Quaternion::Quaternion(RadianEuler const &angle) +{ + AngleQuaternion( angle, *this ); +} + +inline bool Quaternion::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w); +} + +inline void Quaternion::Invalidate() +{ +//#ifdef _DEBUG +//#ifdef VECTOR_PARANOIA + x = y = z = w = VEC_T_NAN; +//#endif +//#endif +} + +inline RadianEuler::RadianEuler(Quaternion const &q) +{ + QuaternionAngles( q, *this ); +} + +inline void VectorCopy( RadianEuler const& src, RadianEuler &dst ) +{ + CHECK_VALID(src); + dst.x = src.x; + dst.y = src.y; + dst.z = src.z; +} + +inline void VectorScale( RadianEuler const& src, float b, RadianEuler &dst ) +{ + CHECK_VALID(src); + Assert( IsFinite(b) ); + dst.x = src.x * b; + dst.y = src.y * b; + dst.z = src.z * b; +} + +inline bool RadianEuler::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z); +} + +inline void RadianEuler::Invalidate() +{ +//#ifdef _DEBUG +//#ifdef VECTOR_PARANOIA + x = y = z = VEC_T_NAN; +//#endif +//#endif +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline vec_t& RadianEuler::operator[](int i) +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + +inline vec_t RadianEuler::operator[](int i) const +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + + +//----------------------------------------------------------------------------- +// Degree Euler QAngle pitch, yaw, roll +//----------------------------------------------------------------------------- +class QAngleByValue; + +class QAngle +{ +public: + // Members + vec_t x, y, z; + + // Construction/destruction + QAngle(void); + QAngle(vec_t X, vec_t Y, vec_t Z); +// QAngle(RadianEuler const &angles); // evil auto type promotion!!! + + // Allow pass-by-value + operator QAngleByValue &() { return *((QAngleByValue *)(this)); } + operator const QAngleByValue &() const { return *((const QAngleByValue *)(this)); } + + // Initialization + void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f); + void Random( vec_t minVal, vec_t maxVal ); + + // Got any nasty NAN's? + bool IsValid() const; + void Invalidate(); + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + // Base address... + vec_t* Base(); + vec_t const* Base() const; + + // equality + bool operator==(const QAngle& v) const; + bool operator!=(const QAngle& v) const; + + // arithmetic operations + QAngle& operator+=(const QAngle &v); + QAngle& operator-=(const QAngle &v); + QAngle& operator*=(float s); + QAngle& operator/=(float s); + + // Get the vector's magnitude. + vec_t Length() const; + vec_t LengthSqr() const; + + // negate the QAngle components + //void Negate(); + + // No assignment operators either... + QAngle& operator=( const QAngle& src ); + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // copy constructors + + // arithmetic operations + QAngle operator-(void) const; + + QAngle operator+(const QAngle& v) const; + QAngle operator-(const QAngle& v) const; + QAngle operator*(float fl) const; + QAngle operator/(float fl) const; +#else + +private: + // No copy constructors allowed if we're in optimal mode + QAngle(const QAngle& vOther); + +#endif +}; + +FORCEINLINE void NetworkVarConstruct( QAngle &q ) { q.x = q.y = q.z = 0.0f; } + +//----------------------------------------------------------------------------- +// Allows us to specifically pass the vector by value when we need to +//----------------------------------------------------------------------------- +class QAngleByValue : public QAngle +{ +public: + // Construction/destruction: + QAngleByValue(void) : QAngle() {} + QAngleByValue(vec_t X, vec_t Y, vec_t Z) : QAngle( X, Y, Z ) {} + QAngleByValue(const QAngleByValue& vOther) { *this = vOther; } +}; + + +inline void VectorAdd( const QAngle& a, const QAngle& b, QAngle& result ) +{ + CHECK_VALID(a); + CHECK_VALID(b); + result.x = a.x + b.x; + result.y = a.y + b.y; + result.z = a.z + b.z; +} + +inline void VectorMA( const QAngle &start, float scale, const QAngle &direction, QAngle &dest ) +{ + CHECK_VALID(start); + CHECK_VALID(direction); + dest.x = start.x + scale * direction.x; + dest.y = start.y + scale * direction.y; + dest.z = start.z + scale * direction.z; +} + + +//----------------------------------------------------------------------------- +// constructors +//----------------------------------------------------------------------------- +inline QAngle::QAngle(void) +{ +#ifdef _DEBUG +#ifdef VECTOR_PARANOIA + // Initialize to NAN to catch errors + x = y = z = VEC_T_NAN; +#endif +#endif +} + +inline QAngle::QAngle(vec_t X, vec_t Y, vec_t Z) +{ + x = X; y = Y; z = Z; + CHECK_VALID(*this); +} + + +//----------------------------------------------------------------------------- +// initialization +//----------------------------------------------------------------------------- +inline void QAngle::Init( vec_t ix, vec_t iy, vec_t iz ) +{ + x = ix; y = iy; z = iz; + CHECK_VALID(*this); +} + +inline void QAngle::Random( vec_t minVal, vec_t maxVal ) +{ + x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + z = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + CHECK_VALID(*this); +} + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline QAngle RandomAngle( float minVal, float maxVal ) +{ + Vector random; + random.Random( minVal, maxVal ); + QAngle ret( random.x, random.y, random.z ); + return ret; +} + +#endif + + +inline RadianEuler::RadianEuler(QAngle const &angles) +{ + Init( + angles.z * 3.14159265358979323846f / 180.f, + angles.x * 3.14159265358979323846f / 180.f, + angles.y * 3.14159265358979323846f / 180.f ); +} + + + + +inline QAngle RadianEuler::ToQAngle( void) const +{ + return QAngle( + y * 180.f / 3.14159265358979323846f, + z * 180.f / 3.14159265358979323846f, + x * 180.f / 3.14159265358979323846f ); +} + + +//----------------------------------------------------------------------------- +// assignment +//----------------------------------------------------------------------------- +inline QAngle& QAngle::operator=(const QAngle &vOther) +{ + CHECK_VALID(vOther); + x=vOther.x; y=vOther.y; z=vOther.z; + return *this; +} + + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- +inline vec_t& QAngle::operator[](int i) +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + +inline vec_t QAngle::operator[](int i) const +{ + Assert( (i >= 0) && (i < 3) ); + return ((vec_t*)this)[i]; +} + + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- +inline vec_t* QAngle::Base() +{ + return (vec_t*)this; +} + +inline vec_t const* QAngle::Base() const +{ + return (vec_t const*)this; +} + + +//----------------------------------------------------------------------------- +// IsValid? +//----------------------------------------------------------------------------- +inline bool QAngle::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z); +} + +//----------------------------------------------------------------------------- +// Invalidate +//----------------------------------------------------------------------------- + +inline void QAngle::Invalidate() +{ +//#ifdef _DEBUG +//#ifdef VECTOR_PARANOIA + x = y = z = VEC_T_NAN; +//#endif +//#endif +} + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- +inline bool QAngle::operator==( const QAngle& src ) const +{ + CHECK_VALID(src); + CHECK_VALID(*this); + return (src.x == x) && (src.y == y) && (src.z == z); +} + +inline bool QAngle::operator!=( const QAngle& src ) const +{ + CHECK_VALID(src); + CHECK_VALID(*this); + return (src.x != x) || (src.y != y) || (src.z != z); +} + + +//----------------------------------------------------------------------------- +// Copy +//----------------------------------------------------------------------------- +inline void VectorCopy( const QAngle& src, QAngle& dst ) +{ + CHECK_VALID(src); + dst.x = src.x; + dst.y = src.y; + dst.z = src.z; +} + + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- +inline QAngle& QAngle::operator+=(const QAngle& v) +{ + CHECK_VALID(*this); + CHECK_VALID(v); + x+=v.x; y+=v.y; z += v.z; + return *this; +} + +inline QAngle& QAngle::operator-=(const QAngle& v) +{ + CHECK_VALID(*this); + CHECK_VALID(v); + x-=v.x; y-=v.y; z -= v.z; + return *this; +} + +inline QAngle& QAngle::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + CHECK_VALID(*this); + return *this; +} + +inline QAngle& QAngle::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + CHECK_VALID(*this); + return *this; +} + + +//----------------------------------------------------------------------------- +// length +//----------------------------------------------------------------------------- +inline vec_t QAngle::Length( ) const +{ + CHECK_VALID(*this); + return (vec_t)FastSqrt( LengthSqr( ) ); +} + + +inline vec_t QAngle::LengthSqr( ) const +{ + CHECK_VALID(*this); + return x * x + y * y + z * z; +} + + +//----------------------------------------------------------------------------- +// Vector equality with tolerance +//----------------------------------------------------------------------------- +inline bool QAnglesAreEqual( const QAngle& src1, const QAngle& src2, float tolerance = 0.0f ) +{ + if (FloatMakePositive(src1.x - src2.x) > tolerance) + return false; + if (FloatMakePositive(src1.y - src2.y) > tolerance) + return false; + return (FloatMakePositive(src1.z - src2.z) <= tolerance); +} + + +//----------------------------------------------------------------------------- +// arithmetic operations (SLOW!!) +//----------------------------------------------------------------------------- +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline QAngle QAngle::operator-(void) const +{ + QAngle ret(-x,-y,-z); + return ret; +} + +inline QAngle QAngle::operator+(const QAngle& v) const +{ + QAngle res; + res.x = x + v.x; + res.y = y + v.y; + res.z = z + v.z; + return res; +} + +inline QAngle QAngle::operator-(const QAngle& v) const +{ + QAngle res; + res.x = x - v.x; + res.y = y - v.y; + res.z = z - v.z; + return res; +} + +inline QAngle QAngle::operator*(float fl) const +{ + QAngle res; + res.x = x * fl; + res.y = y * fl; + res.z = z * fl; + return res; +} + +inline QAngle QAngle::operator/(float fl) const +{ + QAngle res; + res.x = x / fl; + res.y = y / fl; + res.z = z / fl; + return res; +} + +inline QAngle operator*(float fl, const QAngle& v) +{ + QAngle ret( v * fl ); + return ret; +} + +#endif // VECTOR_NO_SLOW_OPERATIONS + + +//----------------------------------------------------------------------------- +// NOTE: These are not completely correct. The representations are not equivalent +// unless the QAngle represents a rotational impulse along a coordinate axis (x,y,z) +inline void QAngleToAngularImpulse( const QAngle &angles, AngularImpulse &impulse ) +{ + impulse.x = angles.z; + impulse.y = angles.x; + impulse.z = angles.y; +} + +inline void AngularImpulseToQAngle( const AngularImpulse &impulse, QAngle &angles ) +{ + angles.x = impulse.y; + angles.y = impulse.z; + angles.z = impulse.x; +} + +#if !defined( _X360 ) + +FORCEINLINE vec_t InvRSquared( float const *v ) +{ +#if defined(__i386__) || defined(_M_IX86) + float sqrlen = v[0]*v[0]+v[1]*v[1]+v[2]*v[2] + 1.0e-10f, result; + _mm_store_ss(&result, _mm_rcp_ss( _mm_max_ss( _mm_set_ss(1.0f), _mm_load_ss(&sqrlen) ) )); + return result; +#else + return 1.f/fpmax(1.f, v[0]*v[0]+v[1]*v[1]+v[2]*v[2]); +#endif +} + +FORCEINLINE vec_t InvRSquared( const Vector &v ) +{ + return InvRSquared(&v.x); +} + +#if defined(__i386__) || defined(_M_IX86) +inline void _SSE_RSqrtInline( float a, float* out ) +{ + __m128 xx = _mm_load_ss( &a ); + __m128 xr = _mm_rsqrt_ss( xx ); + __m128 xt; + xt = _mm_mul_ss( xr, xr ); + xt = _mm_mul_ss( xt, xx ); + xt = _mm_sub_ss( _mm_set_ss(3.f), xt ); + xt = _mm_mul_ss( xt, _mm_set_ss(0.5f) ); + xr = _mm_mul_ss( xr, xt ); + _mm_store_ss( out, xr ); +} +#endif + +// FIXME: Change this back to a #define once we get rid of the vec_t version +FORCEINLINE float VectorNormalize( Vector& vec ) +{ +#ifndef DEBUG // stop crashing my edit-and-continue! + #if defined(__i386__) || defined(_M_IX86) + #define DO_SSE_OPTIMIZATION + #endif +#endif + +#if defined( DO_SSE_OPTIMIZATION ) + float sqrlen = vec.LengthSqr() + 1.0e-10f, invlen; + _SSE_RSqrtInline(sqrlen, &invlen); + vec.x *= invlen; + vec.y *= invlen; + vec.z *= invlen; + return sqrlen * invlen; +#else + extern float (FASTCALL *pfVectorNormalize)(Vector& v); + return (*pfVectorNormalize)(vec); +#endif +} + +// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s +FORCEINLINE float VectorNormalize( float * v ) +{ + return VectorNormalize(*(reinterpret_cast(v))); +} + +FORCEINLINE void VectorNormalizeFast( Vector &vec ) +{ + VectorNormalize(vec); +} + +#else + +FORCEINLINE float _VMX_InvRSquared( const Vector &v ) +{ + XMVECTOR xmV = XMVector3ReciprocalLength( XMLoadVector3( v.Base() ) ); + xmV = XMVector3Dot( xmV, xmV ); + return xmV.x; +} + +// call directly +FORCEINLINE float _VMX_VectorNormalize( Vector &vec ) +{ + float mag = XMVector3Length( XMLoadVector3( vec.Base() ) ).x; + float den = 1.f / (mag + FLT_EPSILON ); + vec.x *= den; + vec.y *= den; + vec.z *= den; + return mag; +} + +#define InvRSquared(x) _VMX_InvRSquared(x) + +// FIXME: Change this back to a #define once we get rid of the vec_t version +FORCEINLINE float VectorNormalize( Vector& v ) +{ + return _VMX_VectorNormalize( v ); +} +// FIXME: Obsolete version of VectorNormalize, once we remove all the friggin float*s +FORCEINLINE float VectorNormalize( float *pV ) +{ + return _VMX_VectorNormalize(*(reinterpret_cast(pV))); +} + +// call directly +FORCEINLINE void VectorNormalizeFast( Vector &vec ) +{ + XMVECTOR xmV = XMVector3LengthEst( XMLoadVector3( vec.Base() ) ); + float den = 1.f / (xmV.x + FLT_EPSILON); + vec.x *= den; + vec.y *= den; + vec.z *= den; +} + +#endif // _X360 + + +inline vec_t Vector::NormalizeInPlace() +{ + return VectorNormalize( *this ); +} + +inline Vector Vector::Normalized() const +{ + Vector norm = *this; + VectorNormalize( norm ); + return norm; +} + +inline bool Vector::IsLengthGreaterThan( float val ) const +{ + return LengthSqr() > val*val; +} + +inline bool Vector::IsLengthLessThan( float val ) const +{ + return LengthSqr() < val*val; +} + +#endif + diff --git a/mp/src/public/mathlib/vector2d.h b/mp/src/public/mathlib/vector2d.h index 2c6bb242..41385589 100644 --- a/mp/src/public/mathlib/vector2d.h +++ b/mp/src/public/mathlib/vector2d.h @@ -1,670 +1,670 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -// -//=============================================================================// - -#ifndef VECTOR2D_H -#define VECTOR2D_H - -#ifdef _WIN32 -#pragma once -#endif - -#include -#include - -// For vec_t, put this somewhere else? -#include "tier0/basetypes.h" - -// For rand(). We really need a library! -#include - -#include "tier0/dbg.h" -#include "mathlib/math_pfns.h" - -//========================================================= -// 2D Vector2D -//========================================================= - -class Vector2D -{ -public: - // Members - vec_t x, y; - - // Construction/destruction - Vector2D(void); - Vector2D(vec_t X, vec_t Y); - Vector2D(const float *pFloat); - - // Initialization - void Init(vec_t ix=0.0f, vec_t iy=0.0f); - - // Got any nasty NAN's? - bool IsValid() const; - - // array access... - vec_t operator[](int i) const; - vec_t& operator[](int i); - - // Base address... - vec_t* Base(); - vec_t const* Base() const; - - // Initialization methods - void Random( float minVal, float maxVal ); - - // equality - bool operator==(const Vector2D& v) const; - bool operator!=(const Vector2D& v) const; - - // arithmetic operations - Vector2D& operator+=(const Vector2D &v); - Vector2D& operator-=(const Vector2D &v); - Vector2D& operator*=(const Vector2D &v); - Vector2D& operator*=(float s); - Vector2D& operator/=(const Vector2D &v); - Vector2D& operator/=(float s); - - // negate the Vector2D components - void Negate(); - - // Get the Vector2D's magnitude. - vec_t Length() const; - - // Get the Vector2D's magnitude squared. - vec_t LengthSqr(void) const; - - // return true if this vector is (0,0) within tolerance - bool IsZero( float tolerance = 0.01f ) const - { - return (x > -tolerance && x < tolerance && - y > -tolerance && y < tolerance); - } - - // Normalize in place and return the old length. - vec_t NormalizeInPlace(); - - // Compare length. - bool IsLengthGreaterThan( float val ) const; - bool IsLengthLessThan( float val ) const; - - // Get the distance from this Vector2D to the other one. - vec_t DistTo(const Vector2D &vOther) const; - - // Get the distance from this Vector2D to the other one squared. - vec_t DistToSqr(const Vector2D &vOther) const; - - // Copy - void CopyToArray(float* rgfl) const; - - // Multiply, add, and assign to this (ie: *this = a + b * scalar). This - // is about 12% faster than the actual Vector2D equation (because it's done per-component - // rather than per-Vector2D). - void MulAdd(const Vector2D& a, const Vector2D& b, float scalar); - - // Dot product. - vec_t Dot(const Vector2D& vOther) const; - - // assignment - Vector2D& operator=(const Vector2D &vOther); - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // copy constructors - Vector2D(const Vector2D &vOther); - - // arithmetic operations - Vector2D operator-(void) const; - - Vector2D operator+(const Vector2D& v) const; - Vector2D operator-(const Vector2D& v) const; - Vector2D operator*(const Vector2D& v) const; - Vector2D operator/(const Vector2D& v) const; - Vector2D operator*(float fl) const; - Vector2D operator/(float fl) const; - - // Cross product between two vectors. - Vector2D Cross(const Vector2D &vOther) const; - - // Returns a Vector2D with the min or max in X, Y, and Z. - Vector2D Min(const Vector2D &vOther) const; - Vector2D Max(const Vector2D &vOther) const; - -#else - -private: - // No copy constructors allowed if we're in optimal mode - Vector2D(const Vector2D& vOther); -#endif -}; - -//----------------------------------------------------------------------------- - -const Vector2D vec2_origin(0,0); -const Vector2D vec2_invalid( FLT_MAX, FLT_MAX ); - -//----------------------------------------------------------------------------- -// Vector2D related operations -//----------------------------------------------------------------------------- - -// Vector2D clear -void Vector2DClear( Vector2D& a ); - -// Copy -void Vector2DCopy( const Vector2D& src, Vector2D& dst ); - -// Vector2D arithmetic -void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& result ); -void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& result ); -void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& result ); -void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& result ); -void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& result ); -void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& result ); -void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result ); - -// Store the min or max of each of x, y, and z into the result. -void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result ); -void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result ); - -#define Vector2DExpand( v ) (v).x, (v).y - -// Normalization -vec_t Vector2DNormalize( Vector2D& v ); - -// Length -vec_t Vector2DLength( const Vector2D& v ); - -// Dot Product -vec_t DotProduct2D(const Vector2D& a, const Vector2D& b); - -// Linearly interpolate between two vectors -void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest ); - - -//----------------------------------------------------------------------------- -// -// Inlined Vector2D methods -// -//----------------------------------------------------------------------------- - - -//----------------------------------------------------------------------------- -// constructors -//----------------------------------------------------------------------------- - -inline Vector2D::Vector2D(void) -{ -#ifdef _DEBUG - // Initialize to NAN to catch errors - x = y = VEC_T_NAN; -#endif -} - -inline Vector2D::Vector2D(vec_t X, vec_t Y) -{ - x = X; y = Y; - Assert( IsValid() ); -} - -inline Vector2D::Vector2D(const float *pFloat) -{ - Assert( pFloat ); - x = pFloat[0]; y = pFloat[1]; - Assert( IsValid() ); -} - - -//----------------------------------------------------------------------------- -// copy constructor -//----------------------------------------------------------------------------- - -inline Vector2D::Vector2D(const Vector2D &vOther) -{ - Assert( vOther.IsValid() ); - x = vOther.x; y = vOther.y; -} - -//----------------------------------------------------------------------------- -// initialization -//----------------------------------------------------------------------------- - -inline void Vector2D::Init( vec_t ix, vec_t iy ) -{ - x = ix; y = iy; - Assert( IsValid() ); -} - -inline void Vector2D::Random( float minVal, float maxVal ) -{ - x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); -} - -inline void Vector2DClear( Vector2D& a ) -{ - a.x = a.y = 0.0f; -} - -//----------------------------------------------------------------------------- -// assignment -//----------------------------------------------------------------------------- - -inline Vector2D& Vector2D::operator=(const Vector2D &vOther) -{ - Assert( vOther.IsValid() ); - x=vOther.x; y=vOther.y; - return *this; -} - -//----------------------------------------------------------------------------- -// Array access -//----------------------------------------------------------------------------- - -inline vec_t& Vector2D::operator[](int i) -{ - Assert( (i >= 0) && (i < 2) ); - return ((vec_t*)this)[i]; -} - -inline vec_t Vector2D::operator[](int i) const -{ - Assert( (i >= 0) && (i < 2) ); - return ((vec_t*)this)[i]; -} - -//----------------------------------------------------------------------------- -// Base address... -//----------------------------------------------------------------------------- - -inline vec_t* Vector2D::Base() -{ - return (vec_t*)this; -} - -inline vec_t const* Vector2D::Base() const -{ - return (vec_t const*)this; -} - -//----------------------------------------------------------------------------- -// IsValid? -//----------------------------------------------------------------------------- - -inline bool Vector2D::IsValid() const -{ - return IsFinite(x) && IsFinite(y); -} - -//----------------------------------------------------------------------------- -// comparison -//----------------------------------------------------------------------------- - -inline bool Vector2D::operator==( const Vector2D& src ) const -{ - Assert( src.IsValid() && IsValid() ); - return (src.x == x) && (src.y == y); -} - -inline bool Vector2D::operator!=( const Vector2D& src ) const -{ - Assert( src.IsValid() && IsValid() ); - return (src.x != x) || (src.y != y); -} - - -//----------------------------------------------------------------------------- -// Copy -//----------------------------------------------------------------------------- - -inline void Vector2DCopy( const Vector2D& src, Vector2D& dst ) -{ - Assert( src.IsValid() ); - dst.x = src.x; - dst.y = src.y; -} - -inline void Vector2D::CopyToArray(float* rgfl) const -{ - Assert( IsValid() ); - Assert( rgfl ); - rgfl[0] = x; rgfl[1] = y; -} - -//----------------------------------------------------------------------------- -// standard math operations -//----------------------------------------------------------------------------- - -inline void Vector2D::Negate() -{ - Assert( IsValid() ); - x = -x; y = -y; -} - -inline Vector2D& Vector2D::operator+=(const Vector2D& v) -{ - Assert( IsValid() && v.IsValid() ); - x+=v.x; y+=v.y; - return *this; -} - -inline Vector2D& Vector2D::operator-=(const Vector2D& v) -{ - Assert( IsValid() && v.IsValid() ); - x-=v.x; y-=v.y; - return *this; -} - -inline Vector2D& Vector2D::operator*=(float fl) -{ - x *= fl; - y *= fl; - Assert( IsValid() ); - return *this; -} - -inline Vector2D& Vector2D::operator*=(const Vector2D& v) -{ - x *= v.x; - y *= v.y; - Assert( IsValid() ); - return *this; -} - -inline Vector2D& Vector2D::operator/=(float fl) -{ - Assert( fl != 0.0f ); - float oofl = 1.0f / fl; - x *= oofl; - y *= oofl; - Assert( IsValid() ); - return *this; -} - -inline Vector2D& Vector2D::operator/=(const Vector2D& v) -{ - Assert( v.x != 0.0f && v.y != 0.0f ); - x /= v.x; - y /= v.y; - Assert( IsValid() ); - return *this; -} - -inline void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& c ) -{ - Assert( a.IsValid() && b.IsValid() ); - c.x = a.x + b.x; - c.y = a.y + b.y; -} - -inline void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& c ) -{ - Assert( a.IsValid() && b.IsValid() ); - c.x = a.x - b.x; - c.y = a.y - b.y; -} - -inline void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& c ) -{ - Assert( a.IsValid() && IsFinite(b) ); - c.x = a.x * b; - c.y = a.y * b; -} - -inline void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& c ) -{ - Assert( a.IsValid() && b.IsValid() ); - c.x = a.x * b.x; - c.y = a.y * b.y; -} - - -inline void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& c ) -{ - Assert( a.IsValid() ); - Assert( b != 0.0f ); - vec_t oob = 1.0f / b; - c.x = a.x * oob; - c.y = a.y * oob; -} - -inline void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& c ) -{ - Assert( a.IsValid() ); - Assert( (b.x != 0.0f) && (b.y != 0.0f) ); - c.x = a.x / b.x; - c.y = a.y / b.y; -} - -inline void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result ) -{ - Assert( start.IsValid() && IsFinite(s) && dir.IsValid() ); - result.x = start.x + s*dir.x; - result.y = start.y + s*dir.y; -} - -// FIXME: Remove -// For backwards compatability -inline void Vector2D::MulAdd(const Vector2D& a, const Vector2D& b, float scalar) -{ - x = a.x + b.x * scalar; - y = a.y + b.y * scalar; -} - -inline void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest ) -{ - dest[0] = src1[0] + (src2[0] - src1[0]) * t; - dest[1] = src1[1] + (src2[1] - src1[1]) * t; -} - -//----------------------------------------------------------------------------- -// dot, cross -//----------------------------------------------------------------------------- -inline vec_t DotProduct2D(const Vector2D& a, const Vector2D& b) -{ - Assert( a.IsValid() && b.IsValid() ); - return( a.x*b.x + a.y*b.y ); -} - -// for backwards compatability -inline vec_t Vector2D::Dot( const Vector2D& vOther ) const -{ - return DotProduct2D( *this, vOther ); -} - - -//----------------------------------------------------------------------------- -// length -//----------------------------------------------------------------------------- -inline vec_t Vector2DLength( const Vector2D& v ) -{ - Assert( v.IsValid() ); - return (vec_t)FastSqrt(v.x*v.x + v.y*v.y); -} - -inline vec_t Vector2D::LengthSqr(void) const -{ - Assert( IsValid() ); - return (x*x + y*y); -} - -inline vec_t Vector2D::NormalizeInPlace() -{ - return Vector2DNormalize( *this ); -} - -inline bool Vector2D::IsLengthGreaterThan( float val ) const -{ - return LengthSqr() > val*val; -} - -inline bool Vector2D::IsLengthLessThan( float val ) const -{ - return LengthSqr() < val*val; -} - -inline vec_t Vector2D::Length(void) const -{ - return Vector2DLength( *this ); -} - - -inline void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result ) -{ - result.x = (a.x < b.x) ? a.x : b.x; - result.y = (a.y < b.y) ? a.y : b.y; -} - - -inline void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result ) -{ - result.x = (a.x > b.x) ? a.x : b.x; - result.y = (a.y > b.y) ? a.y : b.y; -} - - -//----------------------------------------------------------------------------- -// Normalization -//----------------------------------------------------------------------------- -inline vec_t Vector2DNormalize( Vector2D& v ) -{ - Assert( v.IsValid() ); - vec_t l = v.Length(); - if (l != 0.0f) - { - v /= l; - } - else - { - v.x = v.y = 0.0f; - } - return l; -} - - -//----------------------------------------------------------------------------- -// Get the distance from this Vector2D to the other one -//----------------------------------------------------------------------------- -inline vec_t Vector2D::DistTo(const Vector2D &vOther) const -{ - Vector2D delta; - Vector2DSubtract( *this, vOther, delta ); - return delta.Length(); -} - -inline vec_t Vector2D::DistToSqr(const Vector2D &vOther) const -{ - Vector2D delta; - Vector2DSubtract( *this, vOther, delta ); - return delta.LengthSqr(); -} - - -//----------------------------------------------------------------------------- -// Computes the closest point to vecTarget no farther than flMaxDist from vecStart -//----------------------------------------------------------------------------- -inline void ComputeClosestPoint2D( const Vector2D& vecStart, float flMaxDist, const Vector2D& vecTarget, Vector2D *pResult ) -{ - Vector2D vecDelta; - Vector2DSubtract( vecTarget, vecStart, vecDelta ); - float flDistSqr = vecDelta.LengthSqr(); - if ( flDistSqr <= flMaxDist * flMaxDist ) - { - *pResult = vecTarget; - } - else - { - vecDelta /= FastSqrt( flDistSqr ); - Vector2DMA( vecStart, flMaxDist, vecDelta, *pResult ); - } -} - - - -//----------------------------------------------------------------------------- -// -// Slow methods -// -//----------------------------------------------------------------------------- - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -//----------------------------------------------------------------------------- -// Returns a Vector2D with the min or max in X, Y, and Z. -//----------------------------------------------------------------------------- - -inline Vector2D Vector2D::Min(const Vector2D &vOther) const -{ - return Vector2D(x < vOther.x ? x : vOther.x, - y < vOther.y ? y : vOther.y); -} - -inline Vector2D Vector2D::Max(const Vector2D &vOther) const -{ - return Vector2D(x > vOther.x ? x : vOther.x, - y > vOther.y ? y : vOther.y); -} - - -//----------------------------------------------------------------------------- -// arithmetic operations -//----------------------------------------------------------------------------- - -inline Vector2D Vector2D::operator-(void) const -{ - return Vector2D(-x,-y); -} - -inline Vector2D Vector2D::operator+(const Vector2D& v) const -{ - Vector2D res; - Vector2DAdd( *this, v, res ); - return res; -} - -inline Vector2D Vector2D::operator-(const Vector2D& v) const -{ - Vector2D res; - Vector2DSubtract( *this, v, res ); - return res; -} - -inline Vector2D Vector2D::operator*(float fl) const -{ - Vector2D res; - Vector2DMultiply( *this, fl, res ); - return res; -} - -inline Vector2D Vector2D::operator*(const Vector2D& v) const -{ - Vector2D res; - Vector2DMultiply( *this, v, res ); - return res; -} - -inline Vector2D Vector2D::operator/(float fl) const -{ - Vector2D res; - Vector2DDivide( *this, fl, res ); - return res; -} - -inline Vector2D Vector2D::operator/(const Vector2D& v) const -{ - Vector2D res; - Vector2DDivide( *this, v, res ); - return res; -} - -inline Vector2D operator*(float fl, const Vector2D& v) -{ - return v * fl; -} - -#endif //slow - -#endif // VECTOR2D_H - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef VECTOR2D_H +#define VECTOR2D_H + +#ifdef _WIN32 +#pragma once +#endif + +#include +#include + +// For vec_t, put this somewhere else? +#include "tier0/basetypes.h" + +// For rand(). We really need a library! +#include + +#include "tier0/dbg.h" +#include "mathlib/math_pfns.h" + +//========================================================= +// 2D Vector2D +//========================================================= + +class Vector2D +{ +public: + // Members + vec_t x, y; + + // Construction/destruction + Vector2D(void); + Vector2D(vec_t X, vec_t Y); + Vector2D(const float *pFloat); + + // Initialization + void Init(vec_t ix=0.0f, vec_t iy=0.0f); + + // Got any nasty NAN's? + bool IsValid() const; + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + // Base address... + vec_t* Base(); + vec_t const* Base() const; + + // Initialization methods + void Random( float minVal, float maxVal ); + + // equality + bool operator==(const Vector2D& v) const; + bool operator!=(const Vector2D& v) const; + + // arithmetic operations + Vector2D& operator+=(const Vector2D &v); + Vector2D& operator-=(const Vector2D &v); + Vector2D& operator*=(const Vector2D &v); + Vector2D& operator*=(float s); + Vector2D& operator/=(const Vector2D &v); + Vector2D& operator/=(float s); + + // negate the Vector2D components + void Negate(); + + // Get the Vector2D's magnitude. + vec_t Length() const; + + // Get the Vector2D's magnitude squared. + vec_t LengthSqr(void) const; + + // return true if this vector is (0,0) within tolerance + bool IsZero( float tolerance = 0.01f ) const + { + return (x > -tolerance && x < tolerance && + y > -tolerance && y < tolerance); + } + + // Normalize in place and return the old length. + vec_t NormalizeInPlace(); + + // Compare length. + bool IsLengthGreaterThan( float val ) const; + bool IsLengthLessThan( float val ) const; + + // Get the distance from this Vector2D to the other one. + vec_t DistTo(const Vector2D &vOther) const; + + // Get the distance from this Vector2D to the other one squared. + vec_t DistToSqr(const Vector2D &vOther) const; + + // Copy + void CopyToArray(float* rgfl) const; + + // Multiply, add, and assign to this (ie: *this = a + b * scalar). This + // is about 12% faster than the actual Vector2D equation (because it's done per-component + // rather than per-Vector2D). + void MulAdd(const Vector2D& a, const Vector2D& b, float scalar); + + // Dot product. + vec_t Dot(const Vector2D& vOther) const; + + // assignment + Vector2D& operator=(const Vector2D &vOther); + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // copy constructors + Vector2D(const Vector2D &vOther); + + // arithmetic operations + Vector2D operator-(void) const; + + Vector2D operator+(const Vector2D& v) const; + Vector2D operator-(const Vector2D& v) const; + Vector2D operator*(const Vector2D& v) const; + Vector2D operator/(const Vector2D& v) const; + Vector2D operator*(float fl) const; + Vector2D operator/(float fl) const; + + // Cross product between two vectors. + Vector2D Cross(const Vector2D &vOther) const; + + // Returns a Vector2D with the min or max in X, Y, and Z. + Vector2D Min(const Vector2D &vOther) const; + Vector2D Max(const Vector2D &vOther) const; + +#else + +private: + // No copy constructors allowed if we're in optimal mode + Vector2D(const Vector2D& vOther); +#endif +}; + +//----------------------------------------------------------------------------- + +const Vector2D vec2_origin(0,0); +const Vector2D vec2_invalid( FLT_MAX, FLT_MAX ); + +//----------------------------------------------------------------------------- +// Vector2D related operations +//----------------------------------------------------------------------------- + +// Vector2D clear +void Vector2DClear( Vector2D& a ); + +// Copy +void Vector2DCopy( const Vector2D& src, Vector2D& dst ); + +// Vector2D arithmetic +void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& result ); +void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& result ); +void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& result ); +void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& result ); +void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& result ); +void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& result ); +void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result ); + +// Store the min or max of each of x, y, and z into the result. +void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result ); +void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result ); + +#define Vector2DExpand( v ) (v).x, (v).y + +// Normalization +vec_t Vector2DNormalize( Vector2D& v ); + +// Length +vec_t Vector2DLength( const Vector2D& v ); + +// Dot Product +vec_t DotProduct2D(const Vector2D& a, const Vector2D& b); + +// Linearly interpolate between two vectors +void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest ); + + +//----------------------------------------------------------------------------- +// +// Inlined Vector2D methods +// +//----------------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// constructors +//----------------------------------------------------------------------------- + +inline Vector2D::Vector2D(void) +{ +#ifdef _DEBUG + // Initialize to NAN to catch errors + x = y = VEC_T_NAN; +#endif +} + +inline Vector2D::Vector2D(vec_t X, vec_t Y) +{ + x = X; y = Y; + Assert( IsValid() ); +} + +inline Vector2D::Vector2D(const float *pFloat) +{ + Assert( pFloat ); + x = pFloat[0]; y = pFloat[1]; + Assert( IsValid() ); +} + + +//----------------------------------------------------------------------------- +// copy constructor +//----------------------------------------------------------------------------- + +inline Vector2D::Vector2D(const Vector2D &vOther) +{ + Assert( vOther.IsValid() ); + x = vOther.x; y = vOther.y; +} + +//----------------------------------------------------------------------------- +// initialization +//----------------------------------------------------------------------------- + +inline void Vector2D::Init( vec_t ix, vec_t iy ) +{ + x = ix; y = iy; + Assert( IsValid() ); +} + +inline void Vector2D::Random( float minVal, float maxVal ) +{ + x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal); +} + +inline void Vector2DClear( Vector2D& a ) +{ + a.x = a.y = 0.0f; +} + +//----------------------------------------------------------------------------- +// assignment +//----------------------------------------------------------------------------- + +inline Vector2D& Vector2D::operator=(const Vector2D &vOther) +{ + Assert( vOther.IsValid() ); + x=vOther.x; y=vOther.y; + return *this; +} + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- + +inline vec_t& Vector2D::operator[](int i) +{ + Assert( (i >= 0) && (i < 2) ); + return ((vec_t*)this)[i]; +} + +inline vec_t Vector2D::operator[](int i) const +{ + Assert( (i >= 0) && (i < 2) ); + return ((vec_t*)this)[i]; +} + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- + +inline vec_t* Vector2D::Base() +{ + return (vec_t*)this; +} + +inline vec_t const* Vector2D::Base() const +{ + return (vec_t const*)this; +} + +//----------------------------------------------------------------------------- +// IsValid? +//----------------------------------------------------------------------------- + +inline bool Vector2D::IsValid() const +{ + return IsFinite(x) && IsFinite(y); +} + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool Vector2D::operator==( const Vector2D& src ) const +{ + Assert( src.IsValid() && IsValid() ); + return (src.x == x) && (src.y == y); +} + +inline bool Vector2D::operator!=( const Vector2D& src ) const +{ + Assert( src.IsValid() && IsValid() ); + return (src.x != x) || (src.y != y); +} + + +//----------------------------------------------------------------------------- +// Copy +//----------------------------------------------------------------------------- + +inline void Vector2DCopy( const Vector2D& src, Vector2D& dst ) +{ + Assert( src.IsValid() ); + dst.x = src.x; + dst.y = src.y; +} + +inline void Vector2D::CopyToArray(float* rgfl) const +{ + Assert( IsValid() ); + Assert( rgfl ); + rgfl[0] = x; rgfl[1] = y; +} + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- + +inline void Vector2D::Negate() +{ + Assert( IsValid() ); + x = -x; y = -y; +} + +inline Vector2D& Vector2D::operator+=(const Vector2D& v) +{ + Assert( IsValid() && v.IsValid() ); + x+=v.x; y+=v.y; + return *this; +} + +inline Vector2D& Vector2D::operator-=(const Vector2D& v) +{ + Assert( IsValid() && v.IsValid() ); + x-=v.x; y-=v.y; + return *this; +} + +inline Vector2D& Vector2D::operator*=(float fl) +{ + x *= fl; + y *= fl; + Assert( IsValid() ); + return *this; +} + +inline Vector2D& Vector2D::operator*=(const Vector2D& v) +{ + x *= v.x; + y *= v.y; + Assert( IsValid() ); + return *this; +} + +inline Vector2D& Vector2D::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + Assert( IsValid() ); + return *this; +} + +inline Vector2D& Vector2D::operator/=(const Vector2D& v) +{ + Assert( v.x != 0.0f && v.y != 0.0f ); + x /= v.x; + y /= v.y; + Assert( IsValid() ); + return *this; +} + +inline void Vector2DAdd( const Vector2D& a, const Vector2D& b, Vector2D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x + b.x; + c.y = a.y + b.y; +} + +inline void Vector2DSubtract( const Vector2D& a, const Vector2D& b, Vector2D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x - b.x; + c.y = a.y - b.y; +} + +inline void Vector2DMultiply( const Vector2D& a, vec_t b, Vector2D& c ) +{ + Assert( a.IsValid() && IsFinite(b) ); + c.x = a.x * b; + c.y = a.y * b; +} + +inline void Vector2DMultiply( const Vector2D& a, const Vector2D& b, Vector2D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x * b.x; + c.y = a.y * b.y; +} + + +inline void Vector2DDivide( const Vector2D& a, vec_t b, Vector2D& c ) +{ + Assert( a.IsValid() ); + Assert( b != 0.0f ); + vec_t oob = 1.0f / b; + c.x = a.x * oob; + c.y = a.y * oob; +} + +inline void Vector2DDivide( const Vector2D& a, const Vector2D& b, Vector2D& c ) +{ + Assert( a.IsValid() ); + Assert( (b.x != 0.0f) && (b.y != 0.0f) ); + c.x = a.x / b.x; + c.y = a.y / b.y; +} + +inline void Vector2DMA( const Vector2D& start, float s, const Vector2D& dir, Vector2D& result ) +{ + Assert( start.IsValid() && IsFinite(s) && dir.IsValid() ); + result.x = start.x + s*dir.x; + result.y = start.y + s*dir.y; +} + +// FIXME: Remove +// For backwards compatability +inline void Vector2D::MulAdd(const Vector2D& a, const Vector2D& b, float scalar) +{ + x = a.x + b.x * scalar; + y = a.y + b.y * scalar; +} + +inline void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D& dest ) +{ + dest[0] = src1[0] + (src2[0] - src1[0]) * t; + dest[1] = src1[1] + (src2[1] - src1[1]) * t; +} + +//----------------------------------------------------------------------------- +// dot, cross +//----------------------------------------------------------------------------- +inline vec_t DotProduct2D(const Vector2D& a, const Vector2D& b) +{ + Assert( a.IsValid() && b.IsValid() ); + return( a.x*b.x + a.y*b.y ); +} + +// for backwards compatability +inline vec_t Vector2D::Dot( const Vector2D& vOther ) const +{ + return DotProduct2D( *this, vOther ); +} + + +//----------------------------------------------------------------------------- +// length +//----------------------------------------------------------------------------- +inline vec_t Vector2DLength( const Vector2D& v ) +{ + Assert( v.IsValid() ); + return (vec_t)FastSqrt(v.x*v.x + v.y*v.y); +} + +inline vec_t Vector2D::LengthSqr(void) const +{ + Assert( IsValid() ); + return (x*x + y*y); +} + +inline vec_t Vector2D::NormalizeInPlace() +{ + return Vector2DNormalize( *this ); +} + +inline bool Vector2D::IsLengthGreaterThan( float val ) const +{ + return LengthSqr() > val*val; +} + +inline bool Vector2D::IsLengthLessThan( float val ) const +{ + return LengthSqr() < val*val; +} + +inline vec_t Vector2D::Length(void) const +{ + return Vector2DLength( *this ); +} + + +inline void Vector2DMin( const Vector2D &a, const Vector2D &b, Vector2D &result ) +{ + result.x = (a.x < b.x) ? a.x : b.x; + result.y = (a.y < b.y) ? a.y : b.y; +} + + +inline void Vector2DMax( const Vector2D &a, const Vector2D &b, Vector2D &result ) +{ + result.x = (a.x > b.x) ? a.x : b.x; + result.y = (a.y > b.y) ? a.y : b.y; +} + + +//----------------------------------------------------------------------------- +// Normalization +//----------------------------------------------------------------------------- +inline vec_t Vector2DNormalize( Vector2D& v ) +{ + Assert( v.IsValid() ); + vec_t l = v.Length(); + if (l != 0.0f) + { + v /= l; + } + else + { + v.x = v.y = 0.0f; + } + return l; +} + + +//----------------------------------------------------------------------------- +// Get the distance from this Vector2D to the other one +//----------------------------------------------------------------------------- +inline vec_t Vector2D::DistTo(const Vector2D &vOther) const +{ + Vector2D delta; + Vector2DSubtract( *this, vOther, delta ); + return delta.Length(); +} + +inline vec_t Vector2D::DistToSqr(const Vector2D &vOther) const +{ + Vector2D delta; + Vector2DSubtract( *this, vOther, delta ); + return delta.LengthSqr(); +} + + +//----------------------------------------------------------------------------- +// Computes the closest point to vecTarget no farther than flMaxDist from vecStart +//----------------------------------------------------------------------------- +inline void ComputeClosestPoint2D( const Vector2D& vecStart, float flMaxDist, const Vector2D& vecTarget, Vector2D *pResult ) +{ + Vector2D vecDelta; + Vector2DSubtract( vecTarget, vecStart, vecDelta ); + float flDistSqr = vecDelta.LengthSqr(); + if ( flDistSqr <= flMaxDist * flMaxDist ) + { + *pResult = vecTarget; + } + else + { + vecDelta /= FastSqrt( flDistSqr ); + Vector2DMA( vecStart, flMaxDist, vecDelta, *pResult ); + } +} + + + +//----------------------------------------------------------------------------- +// +// Slow methods +// +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +//----------------------------------------------------------------------------- +// Returns a Vector2D with the min or max in X, Y, and Z. +//----------------------------------------------------------------------------- + +inline Vector2D Vector2D::Min(const Vector2D &vOther) const +{ + return Vector2D(x < vOther.x ? x : vOther.x, + y < vOther.y ? y : vOther.y); +} + +inline Vector2D Vector2D::Max(const Vector2D &vOther) const +{ + return Vector2D(x > vOther.x ? x : vOther.x, + y > vOther.y ? y : vOther.y); +} + + +//----------------------------------------------------------------------------- +// arithmetic operations +//----------------------------------------------------------------------------- + +inline Vector2D Vector2D::operator-(void) const +{ + return Vector2D(-x,-y); +} + +inline Vector2D Vector2D::operator+(const Vector2D& v) const +{ + Vector2D res; + Vector2DAdd( *this, v, res ); + return res; +} + +inline Vector2D Vector2D::operator-(const Vector2D& v) const +{ + Vector2D res; + Vector2DSubtract( *this, v, res ); + return res; +} + +inline Vector2D Vector2D::operator*(float fl) const +{ + Vector2D res; + Vector2DMultiply( *this, fl, res ); + return res; +} + +inline Vector2D Vector2D::operator*(const Vector2D& v) const +{ + Vector2D res; + Vector2DMultiply( *this, v, res ); + return res; +} + +inline Vector2D Vector2D::operator/(float fl) const +{ + Vector2D res; + Vector2DDivide( *this, fl, res ); + return res; +} + +inline Vector2D Vector2D::operator/(const Vector2D& v) const +{ + Vector2D res; + Vector2DDivide( *this, v, res ); + return res; +} + +inline Vector2D operator*(float fl, const Vector2D& v) +{ + return v * fl; +} + +#endif //slow + +#endif // VECTOR2D_H + diff --git a/mp/src/public/mathlib/vector4d.h b/mp/src/public/mathlib/vector4d.h index 53052e4d..2b20c882 100644 --- a/mp/src/public/mathlib/vector4d.h +++ b/mp/src/public/mathlib/vector4d.h @@ -1,686 +1,686 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -// -//=============================================================================// - -#ifndef VECTOR4D_H -#define VECTOR4D_H - -#ifdef _WIN32 -#pragma once -#endif - -#include -#include // For rand(). We really need a library! -#include -#if !defined( _X360 ) -#include // For SSE -#endif -#include "basetypes.h" // For vec_t, put this somewhere else? -#include "tier0/dbg.h" -#include "mathlib/math_pfns.h" - -// forward declarations -class Vector; -class Vector2D; - -//========================================================= -// 4D Vector4D -//========================================================= - -class Vector4D -{ -public: - // Members - vec_t x, y, z, w; - - // Construction/destruction - Vector4D(void); - Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W); - Vector4D(const float *pFloat); - - // Initialization - void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f); - - // Got any nasty NAN's? - bool IsValid() const; - - // array access... - vec_t operator[](int i) const; - vec_t& operator[](int i); - - // Base address... - inline vec_t* Base(); - inline vec_t const* Base() const; - - // Cast to Vector and Vector2D... - Vector& AsVector3D(); - Vector const& AsVector3D() const; - - Vector2D& AsVector2D(); - Vector2D const& AsVector2D() const; - - // Initialization methods - void Random( vec_t minVal, vec_t maxVal ); - - // equality - bool operator==(const Vector4D& v) const; - bool operator!=(const Vector4D& v) const; - - // arithmetic operations - Vector4D& operator+=(const Vector4D &v); - Vector4D& operator-=(const Vector4D &v); - Vector4D& operator*=(const Vector4D &v); - Vector4D& operator*=(float s); - Vector4D& operator/=(const Vector4D &v); - Vector4D& operator/=(float s); - - // negate the Vector4D components - void Negate(); - - // Get the Vector4D's magnitude. - vec_t Length() const; - - // Get the Vector4D's magnitude squared. - vec_t LengthSqr(void) const; - - // return true if this vector is (0,0,0,0) within tolerance - bool IsZero( float tolerance = 0.01f ) const - { - return (x > -tolerance && x < tolerance && - y > -tolerance && y < tolerance && - z > -tolerance && z < tolerance && - w > -tolerance && w < tolerance); - } - - // Get the distance from this Vector4D to the other one. - vec_t DistTo(const Vector4D &vOther) const; - - // Get the distance from this Vector4D to the other one squared. - vec_t DistToSqr(const Vector4D &vOther) const; - - // Copy - void CopyToArray(float* rgfl) const; - - // Multiply, add, and assign to this (ie: *this = a + b * scalar). This - // is about 12% faster than the actual Vector4D equation (because it's done per-component - // rather than per-Vector4D). - void MulAdd(Vector4D const& a, Vector4D const& b, float scalar); - - // Dot product. - vec_t Dot(Vector4D const& vOther) const; - - // No copy constructors allowed if we're in optimal mode -#ifdef VECTOR_NO_SLOW_OPERATIONS -private: -#else -public: -#endif - Vector4D(Vector4D const& vOther); - - // No assignment operators either... - Vector4D& operator=( Vector4D const& src ); -}; - -const Vector4D vec4_origin( 0.0f, 0.0f, 0.0f, 0.0f ); -const Vector4D vec4_invalid( FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX ); - -//----------------------------------------------------------------------------- -// SSE optimized routines -//----------------------------------------------------------------------------- - -class ALIGN16 Vector4DAligned : public Vector4D -{ -public: - Vector4DAligned(void) {} - Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W ); - - inline void Set( vec_t X, vec_t Y, vec_t Z, vec_t W ); - inline void InitZero( void ); - - inline __m128 &AsM128() { return *(__m128*)&x; } - inline const __m128 &AsM128() const { return *(const __m128*)&x; } - -private: - // No copy constructors allowed if we're in optimal mode - Vector4DAligned( Vector4DAligned const& vOther ); - - // No assignment operators either... - Vector4DAligned& operator=( Vector4DAligned const& src ); -} ALIGN16_POST; - -//----------------------------------------------------------------------------- -// Vector4D related operations -//----------------------------------------------------------------------------- - -// Vector4D clear -void Vector4DClear( Vector4D& a ); - -// Copy -void Vector4DCopy( Vector4D const& src, Vector4D& dst ); - -// Vector4D arithmetic -void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& result ); -void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& result ); -void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& result ); -void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& result ); -void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& result ); -void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& result ); -void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result ); - -// Vector4DAligned arithmetic -void Vector4DMultiplyAligned( Vector4DAligned const& a, vec_t b, Vector4DAligned& result ); - - -#define Vector4DExpand( v ) (v).x, (v).y, (v).z, (v).w - -// Normalization -vec_t Vector4DNormalize( Vector4D& v ); - -// Length -vec_t Vector4DLength( Vector4D const& v ); - -// Dot Product -vec_t DotProduct4D(Vector4D const& a, Vector4D const& b); - -// Linearly interpolate between two vectors -void Vector4DLerp(Vector4D const& src1, Vector4D const& src2, vec_t t, Vector4D& dest ); - - -//----------------------------------------------------------------------------- -// -// Inlined Vector4D methods -// -//----------------------------------------------------------------------------- - - -//----------------------------------------------------------------------------- -// constructors -//----------------------------------------------------------------------------- - -inline Vector4D::Vector4D(void) -{ -#ifdef _DEBUG - // Initialize to NAN to catch errors - x = y = z = w = VEC_T_NAN; -#endif -} - -inline Vector4D::Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W ) -{ - x = X; y = Y; z = Z; w = W; - Assert( IsValid() ); -} - -inline Vector4D::Vector4D(const float *pFloat) -{ - Assert( pFloat ); - x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; w = pFloat[3]; - Assert( IsValid() ); -} - - -//----------------------------------------------------------------------------- -// copy constructor -//----------------------------------------------------------------------------- - -inline Vector4D::Vector4D(const Vector4D &vOther) -{ - Assert( vOther.IsValid() ); - x = vOther.x; y = vOther.y; z = vOther.z; w = vOther.w; -} - -//----------------------------------------------------------------------------- -// initialization -//----------------------------------------------------------------------------- - -inline void Vector4D::Init( vec_t ix, vec_t iy, vec_t iz, vec_t iw ) -{ - x = ix; y = iy; z = iz; w = iw; - Assert( IsValid() ); -} - -inline void Vector4D::Random( vec_t minVal, vec_t maxVal ) -{ - x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); - w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); -} - -inline void Vector4DClear( Vector4D& a ) -{ - a.x = a.y = a.z = a.w = 0.0f; -} - -//----------------------------------------------------------------------------- -// assignment -//----------------------------------------------------------------------------- - -inline Vector4D& Vector4D::operator=(const Vector4D &vOther) -{ - Assert( vOther.IsValid() ); - x=vOther.x; y=vOther.y; z=vOther.z; w=vOther.w; - return *this; -} - -//----------------------------------------------------------------------------- -// Array access -//----------------------------------------------------------------------------- - -inline vec_t& Vector4D::operator[](int i) -{ - Assert( (i >= 0) && (i < 4) ); - return ((vec_t*)this)[i]; -} - -inline vec_t Vector4D::operator[](int i) const -{ - Assert( (i >= 0) && (i < 4) ); - return ((vec_t*)this)[i]; -} - -//----------------------------------------------------------------------------- -// Cast to Vector and Vector2D... -//----------------------------------------------------------------------------- - -inline Vector& Vector4D::AsVector3D() -{ - return *(Vector*)this; -} - -inline Vector const& Vector4D::AsVector3D() const -{ - return *(Vector const*)this; -} - -inline Vector2D& Vector4D::AsVector2D() -{ - return *(Vector2D*)this; -} - -inline Vector2D const& Vector4D::AsVector2D() const -{ - return *(Vector2D const*)this; -} - -//----------------------------------------------------------------------------- -// Base address... -//----------------------------------------------------------------------------- - -inline vec_t* Vector4D::Base() -{ - return (vec_t*)this; -} - -inline vec_t const* Vector4D::Base() const -{ - return (vec_t const*)this; -} - -//----------------------------------------------------------------------------- -// IsValid? -//----------------------------------------------------------------------------- - -inline bool Vector4D::IsValid() const -{ - return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w); -} - -//----------------------------------------------------------------------------- -// comparison -//----------------------------------------------------------------------------- - -inline bool Vector4D::operator==( Vector4D const& src ) const -{ - Assert( src.IsValid() && IsValid() ); - return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); -} - -inline bool Vector4D::operator!=( Vector4D const& src ) const -{ - Assert( src.IsValid() && IsValid() ); - return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); -} - - -//----------------------------------------------------------------------------- -// Copy -//----------------------------------------------------------------------------- - -inline void Vector4DCopy( Vector4D const& src, Vector4D& dst ) -{ - Assert( src.IsValid() ); - dst.x = src.x; - dst.y = src.y; - dst.z = src.z; - dst.w = src.w; -} - -inline void Vector4D::CopyToArray(float* rgfl) const -{ - Assert( IsValid() ); - Assert( rgfl ); - rgfl[0] = x; rgfl[1] = y; rgfl[2] = z; rgfl[3] = w; -} - -//----------------------------------------------------------------------------- -// standard math operations -//----------------------------------------------------------------------------- - -inline void Vector4D::Negate() -{ - Assert( IsValid() ); - x = -x; y = -y; z = -z; w = -w; -} - -inline Vector4D& Vector4D::operator+=(const Vector4D& v) -{ - Assert( IsValid() && v.IsValid() ); - x+=v.x; y+=v.y; z += v.z; w += v.w; - return *this; -} - -inline Vector4D& Vector4D::operator-=(const Vector4D& v) -{ - Assert( IsValid() && v.IsValid() ); - x-=v.x; y-=v.y; z -= v.z; w -= v.w; - return *this; -} - -inline Vector4D& Vector4D::operator*=(float fl) -{ - x *= fl; - y *= fl; - z *= fl; - w *= fl; - Assert( IsValid() ); - return *this; -} - -inline Vector4D& Vector4D::operator*=(Vector4D const& v) -{ - x *= v.x; - y *= v.y; - z *= v.z; - w *= v.w; - Assert( IsValid() ); - return *this; -} - -inline Vector4D& Vector4D::operator/=(float fl) -{ - Assert( fl != 0.0f ); - float oofl = 1.0f / fl; - x *= oofl; - y *= oofl; - z *= oofl; - w *= oofl; - Assert( IsValid() ); - return *this; -} - -inline Vector4D& Vector4D::operator/=(Vector4D const& v) -{ - Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f && v.w != 0.0f ); - x /= v.x; - y /= v.y; - z /= v.z; - w /= v.w; - Assert( IsValid() ); - return *this; -} - -inline void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& c ) -{ - Assert( a.IsValid() && b.IsValid() ); - c.x = a.x + b.x; - c.y = a.y + b.y; - c.z = a.z + b.z; - c.w = a.w + b.w; -} - -inline void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& c ) -{ - Assert( a.IsValid() && b.IsValid() ); - c.x = a.x - b.x; - c.y = a.y - b.y; - c.z = a.z - b.z; - c.w = a.w - b.w; -} - -inline void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& c ) -{ - Assert( a.IsValid() && IsFinite(b) ); - c.x = a.x * b; - c.y = a.y * b; - c.z = a.z * b; - c.w = a.w * b; -} - -inline void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& c ) -{ - Assert( a.IsValid() && b.IsValid() ); - c.x = a.x * b.x; - c.y = a.y * b.y; - c.z = a.z * b.z; - c.w = a.w * b.w; -} - -inline void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& c ) -{ - Assert( a.IsValid() ); - Assert( b != 0.0f ); - vec_t oob = 1.0f / b; - c.x = a.x * oob; - c.y = a.y * oob; - c.z = a.z * oob; - c.w = a.w * oob; -} - -inline void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& c ) -{ - Assert( a.IsValid() ); - Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) && (b.w != 0.0f) ); - c.x = a.x / b.x; - c.y = a.y / b.y; - c.z = a.z / b.z; - c.w = a.w / b.w; -} - -inline void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result ) -{ - Assert( start.IsValid() && IsFinite(s) && dir.IsValid() ); - result.x = start.x + s*dir.x; - result.y = start.y + s*dir.y; - result.z = start.z + s*dir.z; - result.w = start.w + s*dir.w; -} - -// FIXME: Remove -// For backwards compatability -inline void Vector4D::MulAdd(Vector4D const& a, Vector4D const& b, float scalar) -{ - x = a.x + b.x * scalar; - y = a.y + b.y * scalar; - z = a.z + b.z * scalar; - w = a.w + b.w * scalar; -} - -inline void Vector4DLerp(const Vector4D& src1, const Vector4D& src2, vec_t t, Vector4D& dest ) -{ - dest[0] = src1[0] + (src2[0] - src1[0]) * t; - dest[1] = src1[1] + (src2[1] - src1[1]) * t; - dest[2] = src1[2] + (src2[2] - src1[2]) * t; - dest[3] = src1[3] + (src2[3] - src1[3]) * t; -} - -//----------------------------------------------------------------------------- -// dot, cross -//----------------------------------------------------------------------------- - -inline vec_t DotProduct4D(const Vector4D& a, const Vector4D& b) -{ - Assert( a.IsValid() && b.IsValid() ); - return( a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w ); -} - -// for backwards compatability -inline vec_t Vector4D::Dot( Vector4D const& vOther ) const -{ - return DotProduct4D( *this, vOther ); -} - - -//----------------------------------------------------------------------------- -// length -//----------------------------------------------------------------------------- - -inline vec_t Vector4DLength( Vector4D const& v ) -{ - Assert( v.IsValid() ); - return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w); -} - -inline vec_t Vector4D::LengthSqr(void) const -{ - Assert( IsValid() ); - return (x*x + y*y + z*z + w*w); -} - -inline vec_t Vector4D::Length(void) const -{ - return Vector4DLength( *this ); -} - - -//----------------------------------------------------------------------------- -// Normalization -//----------------------------------------------------------------------------- - -// FIXME: Can't use until we're un-macroed in mathlib.h -inline vec_t Vector4DNormalize( Vector4D& v ) -{ - Assert( v.IsValid() ); - vec_t l = v.Length(); - if (l != 0.0f) - { - v /= l; - } - else - { - v.x = v.y = v.z = v.w = 0.0f; - } - return l; -} - -//----------------------------------------------------------------------------- -// Get the distance from this Vector4D to the other one -//----------------------------------------------------------------------------- - -inline vec_t Vector4D::DistTo(const Vector4D &vOther) const -{ - Vector4D delta; - Vector4DSubtract( *this, vOther, delta ); - return delta.Length(); -} - -inline vec_t Vector4D::DistToSqr(const Vector4D &vOther) const -{ - Vector4D delta; - Vector4DSubtract( *this, vOther, delta ); - return delta.LengthSqr(); -} - - -//----------------------------------------------------------------------------- -// Vector4DAligned routines -//----------------------------------------------------------------------------- - -inline Vector4DAligned::Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W ) -{ - x = X; y = Y; z = Z; w = W; - Assert( IsValid() ); -} - -inline void Vector4DAligned::Set( vec_t X, vec_t Y, vec_t Z, vec_t W ) -{ - x = X; y = Y; z = Z; w = W; - Assert( IsValid() ); -} - -inline void Vector4DAligned::InitZero( void ) -{ -#if !defined( _X360 ) - this->AsM128() = _mm_set1_ps( 0.0f ); -#else - this->AsM128() = __vspltisw( 0 ); -#endif - Assert( IsValid() ); -} - -inline void Vector4DMultiplyAligned( Vector4DAligned const& a, Vector4DAligned const& b, Vector4DAligned& c ) -{ - Assert( a.IsValid() && b.IsValid() ); -#if !defined( _X360 ) - c.x = a.x * b.x; - c.y = a.y * b.y; - c.z = a.z * b.z; - c.w = a.w * b.w; -#else - c.AsM128() = __vmulfp( a.AsM128(), b.AsM128() ); -#endif -} - -inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB ) -{ - Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) ); - -#if !defined( _X360 ) - vOutA.x += vInA.x * w; - vOutA.y += vInA.y * w; - vOutA.z += vInA.z * w; - vOutA.w += vInA.w * w; - - vOutB.x += vInB.x * w; - vOutB.y += vInB.y * w; - vOutB.z += vInB.z * w; - vOutB.w += vInB.w * w; -#else - __vector4 temp; - - temp = __lvlx( &w, 0 ); - temp = __vspltw( temp, 0 ); - - vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() ); - vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() ); -#endif -} - -inline void Vector4DWeightMADSSE( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB ) -{ - Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) ); - -#if !defined( _X360 ) - // Replicate scalar float out to 4 components - __m128 packed = _mm_set1_ps( w ); - - // 4D SSE Vector MAD - vOutA.AsM128() = _mm_add_ps( vOutA.AsM128(), _mm_mul_ps( vInA.AsM128(), packed ) ); - vOutB.AsM128() = _mm_add_ps( vOutB.AsM128(), _mm_mul_ps( vInB.AsM128(), packed ) ); -#else - __vector4 temp; - - temp = __lvlx( &w, 0 ); - temp = __vspltw( temp, 0 ); - - vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() ); - vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() ); -#endif -} - -#endif // VECTOR4D_H - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// + +#ifndef VECTOR4D_H +#define VECTOR4D_H + +#ifdef _WIN32 +#pragma once +#endif + +#include +#include // For rand(). We really need a library! +#include +#if !defined( _X360 ) +#include // For SSE +#endif +#include "basetypes.h" // For vec_t, put this somewhere else? +#include "tier0/dbg.h" +#include "mathlib/math_pfns.h" + +// forward declarations +class Vector; +class Vector2D; + +//========================================================= +// 4D Vector4D +//========================================================= + +class Vector4D +{ +public: + // Members + vec_t x, y, z, w; + + // Construction/destruction + Vector4D(void); + Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W); + Vector4D(const float *pFloat); + + // Initialization + void Init(vec_t ix=0.0f, vec_t iy=0.0f, vec_t iz=0.0f, vec_t iw=0.0f); + + // Got any nasty NAN's? + bool IsValid() const; + + // array access... + vec_t operator[](int i) const; + vec_t& operator[](int i); + + // Base address... + inline vec_t* Base(); + inline vec_t const* Base() const; + + // Cast to Vector and Vector2D... + Vector& AsVector3D(); + Vector const& AsVector3D() const; + + Vector2D& AsVector2D(); + Vector2D const& AsVector2D() const; + + // Initialization methods + void Random( vec_t minVal, vec_t maxVal ); + + // equality + bool operator==(const Vector4D& v) const; + bool operator!=(const Vector4D& v) const; + + // arithmetic operations + Vector4D& operator+=(const Vector4D &v); + Vector4D& operator-=(const Vector4D &v); + Vector4D& operator*=(const Vector4D &v); + Vector4D& operator*=(float s); + Vector4D& operator/=(const Vector4D &v); + Vector4D& operator/=(float s); + + // negate the Vector4D components + void Negate(); + + // Get the Vector4D's magnitude. + vec_t Length() const; + + // Get the Vector4D's magnitude squared. + vec_t LengthSqr(void) const; + + // return true if this vector is (0,0,0,0) within tolerance + bool IsZero( float tolerance = 0.01f ) const + { + return (x > -tolerance && x < tolerance && + y > -tolerance && y < tolerance && + z > -tolerance && z < tolerance && + w > -tolerance && w < tolerance); + } + + // Get the distance from this Vector4D to the other one. + vec_t DistTo(const Vector4D &vOther) const; + + // Get the distance from this Vector4D to the other one squared. + vec_t DistToSqr(const Vector4D &vOther) const; + + // Copy + void CopyToArray(float* rgfl) const; + + // Multiply, add, and assign to this (ie: *this = a + b * scalar). This + // is about 12% faster than the actual Vector4D equation (because it's done per-component + // rather than per-Vector4D). + void MulAdd(Vector4D const& a, Vector4D const& b, float scalar); + + // Dot product. + vec_t Dot(Vector4D const& vOther) const; + + // No copy constructors allowed if we're in optimal mode +#ifdef VECTOR_NO_SLOW_OPERATIONS +private: +#else +public: +#endif + Vector4D(Vector4D const& vOther); + + // No assignment operators either... + Vector4D& operator=( Vector4D const& src ); +}; + +const Vector4D vec4_origin( 0.0f, 0.0f, 0.0f, 0.0f ); +const Vector4D vec4_invalid( FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX ); + +//----------------------------------------------------------------------------- +// SSE optimized routines +//----------------------------------------------------------------------------- + +class ALIGN16 Vector4DAligned : public Vector4D +{ +public: + Vector4DAligned(void) {} + Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W ); + + inline void Set( vec_t X, vec_t Y, vec_t Z, vec_t W ); + inline void InitZero( void ); + + inline __m128 &AsM128() { return *(__m128*)&x; } + inline const __m128 &AsM128() const { return *(const __m128*)&x; } + +private: + // No copy constructors allowed if we're in optimal mode + Vector4DAligned( Vector4DAligned const& vOther ); + + // No assignment operators either... + Vector4DAligned& operator=( Vector4DAligned const& src ); +} ALIGN16_POST; + +//----------------------------------------------------------------------------- +// Vector4D related operations +//----------------------------------------------------------------------------- + +// Vector4D clear +void Vector4DClear( Vector4D& a ); + +// Copy +void Vector4DCopy( Vector4D const& src, Vector4D& dst ); + +// Vector4D arithmetic +void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& result ); +void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& result ); +void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& result ); +void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& result ); +void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& result ); +void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& result ); +void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result ); + +// Vector4DAligned arithmetic +void Vector4DMultiplyAligned( Vector4DAligned const& a, vec_t b, Vector4DAligned& result ); + + +#define Vector4DExpand( v ) (v).x, (v).y, (v).z, (v).w + +// Normalization +vec_t Vector4DNormalize( Vector4D& v ); + +// Length +vec_t Vector4DLength( Vector4D const& v ); + +// Dot Product +vec_t DotProduct4D(Vector4D const& a, Vector4D const& b); + +// Linearly interpolate between two vectors +void Vector4DLerp(Vector4D const& src1, Vector4D const& src2, vec_t t, Vector4D& dest ); + + +//----------------------------------------------------------------------------- +// +// Inlined Vector4D methods +// +//----------------------------------------------------------------------------- + + +//----------------------------------------------------------------------------- +// constructors +//----------------------------------------------------------------------------- + +inline Vector4D::Vector4D(void) +{ +#ifdef _DEBUG + // Initialize to NAN to catch errors + x = y = z = w = VEC_T_NAN; +#endif +} + +inline Vector4D::Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W ) +{ + x = X; y = Y; z = Z; w = W; + Assert( IsValid() ); +} + +inline Vector4D::Vector4D(const float *pFloat) +{ + Assert( pFloat ); + x = pFloat[0]; y = pFloat[1]; z = pFloat[2]; w = pFloat[3]; + Assert( IsValid() ); +} + + +//----------------------------------------------------------------------------- +// copy constructor +//----------------------------------------------------------------------------- + +inline Vector4D::Vector4D(const Vector4D &vOther) +{ + Assert( vOther.IsValid() ); + x = vOther.x; y = vOther.y; z = vOther.z; w = vOther.w; +} + +//----------------------------------------------------------------------------- +// initialization +//----------------------------------------------------------------------------- + +inline void Vector4D::Init( vec_t ix, vec_t iy, vec_t iz, vec_t iw ) +{ + x = ix; y = iy; z = iz; w = iw; + Assert( IsValid() ); +} + +inline void Vector4D::Random( vec_t minVal, vec_t maxVal ) +{ + x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); + w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal); +} + +inline void Vector4DClear( Vector4D& a ) +{ + a.x = a.y = a.z = a.w = 0.0f; +} + +//----------------------------------------------------------------------------- +// assignment +//----------------------------------------------------------------------------- + +inline Vector4D& Vector4D::operator=(const Vector4D &vOther) +{ + Assert( vOther.IsValid() ); + x=vOther.x; y=vOther.y; z=vOther.z; w=vOther.w; + return *this; +} + +//----------------------------------------------------------------------------- +// Array access +//----------------------------------------------------------------------------- + +inline vec_t& Vector4D::operator[](int i) +{ + Assert( (i >= 0) && (i < 4) ); + return ((vec_t*)this)[i]; +} + +inline vec_t Vector4D::operator[](int i) const +{ + Assert( (i >= 0) && (i < 4) ); + return ((vec_t*)this)[i]; +} + +//----------------------------------------------------------------------------- +// Cast to Vector and Vector2D... +//----------------------------------------------------------------------------- + +inline Vector& Vector4D::AsVector3D() +{ + return *(Vector*)this; +} + +inline Vector const& Vector4D::AsVector3D() const +{ + return *(Vector const*)this; +} + +inline Vector2D& Vector4D::AsVector2D() +{ + return *(Vector2D*)this; +} + +inline Vector2D const& Vector4D::AsVector2D() const +{ + return *(Vector2D const*)this; +} + +//----------------------------------------------------------------------------- +// Base address... +//----------------------------------------------------------------------------- + +inline vec_t* Vector4D::Base() +{ + return (vec_t*)this; +} + +inline vec_t const* Vector4D::Base() const +{ + return (vec_t const*)this; +} + +//----------------------------------------------------------------------------- +// IsValid? +//----------------------------------------------------------------------------- + +inline bool Vector4D::IsValid() const +{ + return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w); +} + +//----------------------------------------------------------------------------- +// comparison +//----------------------------------------------------------------------------- + +inline bool Vector4D::operator==( Vector4D const& src ) const +{ + Assert( src.IsValid() && IsValid() ); + return (src.x == x) && (src.y == y) && (src.z == z) && (src.w == w); +} + +inline bool Vector4D::operator!=( Vector4D const& src ) const +{ + Assert( src.IsValid() && IsValid() ); + return (src.x != x) || (src.y != y) || (src.z != z) || (src.w != w); +} + + +//----------------------------------------------------------------------------- +// Copy +//----------------------------------------------------------------------------- + +inline void Vector4DCopy( Vector4D const& src, Vector4D& dst ) +{ + Assert( src.IsValid() ); + dst.x = src.x; + dst.y = src.y; + dst.z = src.z; + dst.w = src.w; +} + +inline void Vector4D::CopyToArray(float* rgfl) const +{ + Assert( IsValid() ); + Assert( rgfl ); + rgfl[0] = x; rgfl[1] = y; rgfl[2] = z; rgfl[3] = w; +} + +//----------------------------------------------------------------------------- +// standard math operations +//----------------------------------------------------------------------------- + +inline void Vector4D::Negate() +{ + Assert( IsValid() ); + x = -x; y = -y; z = -z; w = -w; +} + +inline Vector4D& Vector4D::operator+=(const Vector4D& v) +{ + Assert( IsValid() && v.IsValid() ); + x+=v.x; y+=v.y; z += v.z; w += v.w; + return *this; +} + +inline Vector4D& Vector4D::operator-=(const Vector4D& v) +{ + Assert( IsValid() && v.IsValid() ); + x-=v.x; y-=v.y; z -= v.z; w -= v.w; + return *this; +} + +inline Vector4D& Vector4D::operator*=(float fl) +{ + x *= fl; + y *= fl; + z *= fl; + w *= fl; + Assert( IsValid() ); + return *this; +} + +inline Vector4D& Vector4D::operator*=(Vector4D const& v) +{ + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + Assert( IsValid() ); + return *this; +} + +inline Vector4D& Vector4D::operator/=(float fl) +{ + Assert( fl != 0.0f ); + float oofl = 1.0f / fl; + x *= oofl; + y *= oofl; + z *= oofl; + w *= oofl; + Assert( IsValid() ); + return *this; +} + +inline Vector4D& Vector4D::operator/=(Vector4D const& v) +{ + Assert( v.x != 0.0f && v.y != 0.0f && v.z != 0.0f && v.w != 0.0f ); + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + Assert( IsValid() ); + return *this; +} + +inline void Vector4DAdd( Vector4D const& a, Vector4D const& b, Vector4D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x + b.x; + c.y = a.y + b.y; + c.z = a.z + b.z; + c.w = a.w + b.w; +} + +inline void Vector4DSubtract( Vector4D const& a, Vector4D const& b, Vector4D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x - b.x; + c.y = a.y - b.y; + c.z = a.z - b.z; + c.w = a.w - b.w; +} + +inline void Vector4DMultiply( Vector4D const& a, vec_t b, Vector4D& c ) +{ + Assert( a.IsValid() && IsFinite(b) ); + c.x = a.x * b; + c.y = a.y * b; + c.z = a.z * b; + c.w = a.w * b; +} + +inline void Vector4DMultiply( Vector4D const& a, Vector4D const& b, Vector4D& c ) +{ + Assert( a.IsValid() && b.IsValid() ); + c.x = a.x * b.x; + c.y = a.y * b.y; + c.z = a.z * b.z; + c.w = a.w * b.w; +} + +inline void Vector4DDivide( Vector4D const& a, vec_t b, Vector4D& c ) +{ + Assert( a.IsValid() ); + Assert( b != 0.0f ); + vec_t oob = 1.0f / b; + c.x = a.x * oob; + c.y = a.y * oob; + c.z = a.z * oob; + c.w = a.w * oob; +} + +inline void Vector4DDivide( Vector4D const& a, Vector4D const& b, Vector4D& c ) +{ + Assert( a.IsValid() ); + Assert( (b.x != 0.0f) && (b.y != 0.0f) && (b.z != 0.0f) && (b.w != 0.0f) ); + c.x = a.x / b.x; + c.y = a.y / b.y; + c.z = a.z / b.z; + c.w = a.w / b.w; +} + +inline void Vector4DMA( Vector4D const& start, float s, Vector4D const& dir, Vector4D& result ) +{ + Assert( start.IsValid() && IsFinite(s) && dir.IsValid() ); + result.x = start.x + s*dir.x; + result.y = start.y + s*dir.y; + result.z = start.z + s*dir.z; + result.w = start.w + s*dir.w; +} + +// FIXME: Remove +// For backwards compatability +inline void Vector4D::MulAdd(Vector4D const& a, Vector4D const& b, float scalar) +{ + x = a.x + b.x * scalar; + y = a.y + b.y * scalar; + z = a.z + b.z * scalar; + w = a.w + b.w * scalar; +} + +inline void Vector4DLerp(const Vector4D& src1, const Vector4D& src2, vec_t t, Vector4D& dest ) +{ + dest[0] = src1[0] + (src2[0] - src1[0]) * t; + dest[1] = src1[1] + (src2[1] - src1[1]) * t; + dest[2] = src1[2] + (src2[2] - src1[2]) * t; + dest[3] = src1[3] + (src2[3] - src1[3]) * t; +} + +//----------------------------------------------------------------------------- +// dot, cross +//----------------------------------------------------------------------------- + +inline vec_t DotProduct4D(const Vector4D& a, const Vector4D& b) +{ + Assert( a.IsValid() && b.IsValid() ); + return( a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w ); +} + +// for backwards compatability +inline vec_t Vector4D::Dot( Vector4D const& vOther ) const +{ + return DotProduct4D( *this, vOther ); +} + + +//----------------------------------------------------------------------------- +// length +//----------------------------------------------------------------------------- + +inline vec_t Vector4DLength( Vector4D const& v ) +{ + Assert( v.IsValid() ); + return (vec_t)FastSqrt(v.x*v.x + v.y*v.y + v.z*v.z + v.w*v.w); +} + +inline vec_t Vector4D::LengthSqr(void) const +{ + Assert( IsValid() ); + return (x*x + y*y + z*z + w*w); +} + +inline vec_t Vector4D::Length(void) const +{ + return Vector4DLength( *this ); +} + + +//----------------------------------------------------------------------------- +// Normalization +//----------------------------------------------------------------------------- + +// FIXME: Can't use until we're un-macroed in mathlib.h +inline vec_t Vector4DNormalize( Vector4D& v ) +{ + Assert( v.IsValid() ); + vec_t l = v.Length(); + if (l != 0.0f) + { + v /= l; + } + else + { + v.x = v.y = v.z = v.w = 0.0f; + } + return l; +} + +//----------------------------------------------------------------------------- +// Get the distance from this Vector4D to the other one +//----------------------------------------------------------------------------- + +inline vec_t Vector4D::DistTo(const Vector4D &vOther) const +{ + Vector4D delta; + Vector4DSubtract( *this, vOther, delta ); + return delta.Length(); +} + +inline vec_t Vector4D::DistToSqr(const Vector4D &vOther) const +{ + Vector4D delta; + Vector4DSubtract( *this, vOther, delta ); + return delta.LengthSqr(); +} + + +//----------------------------------------------------------------------------- +// Vector4DAligned routines +//----------------------------------------------------------------------------- + +inline Vector4DAligned::Vector4DAligned( vec_t X, vec_t Y, vec_t Z, vec_t W ) +{ + x = X; y = Y; z = Z; w = W; + Assert( IsValid() ); +} + +inline void Vector4DAligned::Set( vec_t X, vec_t Y, vec_t Z, vec_t W ) +{ + x = X; y = Y; z = Z; w = W; + Assert( IsValid() ); +} + +inline void Vector4DAligned::InitZero( void ) +{ +#if !defined( _X360 ) + this->AsM128() = _mm_set1_ps( 0.0f ); +#else + this->AsM128() = __vspltisw( 0 ); +#endif + Assert( IsValid() ); +} + +inline void Vector4DMultiplyAligned( Vector4DAligned const& a, Vector4DAligned const& b, Vector4DAligned& c ) +{ + Assert( a.IsValid() && b.IsValid() ); +#if !defined( _X360 ) + c.x = a.x * b.x; + c.y = a.y * b.y; + c.z = a.z * b.z; + c.w = a.w * b.w; +#else + c.AsM128() = __vmulfp( a.AsM128(), b.AsM128() ); +#endif +} + +inline void Vector4DWeightMAD( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB ) +{ + Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) ); + +#if !defined( _X360 ) + vOutA.x += vInA.x * w; + vOutA.y += vInA.y * w; + vOutA.z += vInA.z * w; + vOutA.w += vInA.w * w; + + vOutB.x += vInB.x * w; + vOutB.y += vInB.y * w; + vOutB.z += vInB.z * w; + vOutB.w += vInB.w * w; +#else + __vector4 temp; + + temp = __lvlx( &w, 0 ); + temp = __vspltw( temp, 0 ); + + vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() ); + vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() ); +#endif +} + +inline void Vector4DWeightMADSSE( vec_t w, Vector4DAligned const& vInA, Vector4DAligned& vOutA, Vector4DAligned const& vInB, Vector4DAligned& vOutB ) +{ + Assert( vInA.IsValid() && vInB.IsValid() && IsFinite(w) ); + +#if !defined( _X360 ) + // Replicate scalar float out to 4 components + __m128 packed = _mm_set1_ps( w ); + + // 4D SSE Vector MAD + vOutA.AsM128() = _mm_add_ps( vOutA.AsM128(), _mm_mul_ps( vInA.AsM128(), packed ) ); + vOutB.AsM128() = _mm_add_ps( vOutB.AsM128(), _mm_mul_ps( vInB.AsM128(), packed ) ); +#else + __vector4 temp; + + temp = __lvlx( &w, 0 ); + temp = __vspltw( temp, 0 ); + + vOutA.AsM128() = __vmaddfp( vInA.AsM128(), temp, vOutA.AsM128() ); + vOutB.AsM128() = __vmaddfp( vInB.AsM128(), temp, vOutB.AsM128() ); +#endif +} + +#endif // VECTOR4D_H + diff --git a/mp/src/public/mathlib/vmatrix.h b/mp/src/public/mathlib/vmatrix.h index e09a964f..2c536672 100644 --- a/mp/src/public/mathlib/vmatrix.h +++ b/mp/src/public/mathlib/vmatrix.h @@ -1,950 +1,950 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $NoKeywords: $ -// -//=============================================================================// -// -// VMatrix always postmultiply vectors as in Ax = b. -// Given a set of basis vectors ((F)orward, (L)eft, (U)p), and a (T)ranslation, -// a matrix to transform a vector into that space looks like this: -// Fx Lx Ux Tx -// Fy Ly Uy Ty -// Fz Lz Uz Tz -// 0 0 0 1 - -// Note that concatenating matrices needs to multiply them in reverse order. -// ie: if I want to apply matrix A, B, then C, the equation needs to look like this: -// C * B * A * v -// ie: -// v = A * v; -// v = B * v; -// v = C * v; -//============================================================================= - -#ifndef VMATRIX_H -#define VMATRIX_H - -#ifdef _WIN32 -#pragma once -#endif - -#include -#include "mathlib/vector.h" -#include "mathlib/vplane.h" -#include "mathlib/vector4d.h" -#include "mathlib/mathlib.h" - -struct cplane_t; - - -class VMatrix -{ -public: - - VMatrix(); - VMatrix( - vec_t m00, vec_t m01, vec_t m02, vec_t m03, - vec_t m10, vec_t m11, vec_t m12, vec_t m13, - vec_t m20, vec_t m21, vec_t m22, vec_t m23, - vec_t m30, vec_t m31, vec_t m32, vec_t m33 - ); - - // Creates a matrix where the X axis = forward - // the Y axis = left, and the Z axis = up - VMatrix( const Vector& forward, const Vector& left, const Vector& up ); - VMatrix( const Vector& forward, const Vector& left, const Vector& up, const Vector& translation ); - - // Construct from a 3x4 matrix - VMatrix( const matrix3x4_t& matrix3x4 ); - - // Set the values in the matrix. - void Init( - vec_t m00, vec_t m01, vec_t m02, vec_t m03, - vec_t m10, vec_t m11, vec_t m12, vec_t m13, - vec_t m20, vec_t m21, vec_t m22, vec_t m23, - vec_t m30, vec_t m31, vec_t m32, vec_t m33 - ); - - - // Initialize from a 3x4 - void Init( const matrix3x4_t& matrix3x4 ); - - // array access - inline float* operator[](int i) - { - return m[i]; - } - - inline const float* operator[](int i) const - { - return m[i]; - } - - // Get a pointer to m[0][0] - inline float *Base() - { - return &m[0][0]; - } - - inline const float *Base() const - { - return &m[0][0]; - } - - void SetLeft(const Vector &vLeft); - void SetUp(const Vector &vUp); - void SetForward(const Vector &vForward); - - void GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const; - void SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp); - - // Get/set the translation. - Vector & GetTranslation( Vector &vTrans ) const; - void SetTranslation(const Vector &vTrans); - - void PreTranslate(const Vector &vTrans); - void PostTranslate(const Vector &vTrans); - - matrix3x4_t& As3x4(); - const matrix3x4_t& As3x4() const; - void CopyFrom3x4( const matrix3x4_t &m3x4 ); - void Set3x4( matrix3x4_t& matrix3x4 ) const; - - bool operator==( const VMatrix& src ) const; - bool operator!=( const VMatrix& src ) const { return !( *this == src ); } - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // Access the basis vectors. - Vector GetLeft() const; - Vector GetUp() const; - Vector GetForward() const; - Vector GetTranslation() const; -#endif - - -// Matrix->vector operations. -public: - // Multiply by a 3D vector (same as operator*). - void V3Mul(const Vector &vIn, Vector &vOut) const; - - // Multiply by a 4D vector. - void V4Mul(const Vector4D &vIn, Vector4D &vOut) const; - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // Applies the rotation (ignores translation in the matrix). (This just calls VMul3x3). - Vector ApplyRotation(const Vector &vVec) const; - - // Multiply by a vector (divides by w, assumes input w is 1). - Vector operator*(const Vector &vVec) const; - - // Multiply by the upper 3x3 part of the matrix (ie: only apply rotation). - Vector VMul3x3(const Vector &vVec) const; - - // Apply the inverse (transposed) rotation (only works on pure rotation matrix) - Vector VMul3x3Transpose(const Vector &vVec) const; - - // Multiply by the upper 3 rows. - Vector VMul4x3(const Vector &vVec) const; - - // Apply the inverse (transposed) transformation (only works on pure rotation/translation) - Vector VMul4x3Transpose(const Vector &vVec) const; -#endif - - -// Matrix->plane operations. -public: - // Transform the plane. The matrix can only contain translation and rotation. - void TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const; - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // Just calls TransformPlane and returns the result. - VPlane operator*(const VPlane &thePlane) const; -#endif - -// Matrix->matrix operations. -public: - - VMatrix& operator=(const VMatrix &mOther); - - // Multiply two matrices (out = this * vm). - void MatrixMul( const VMatrix &vm, VMatrix &out ) const; - - // Add two matrices. - const VMatrix& operator+=(const VMatrix &other); - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // Just calls MatrixMul and returns the result. - VMatrix operator*(const VMatrix &mOther) const; - - // Add/Subtract two matrices. - VMatrix operator+(const VMatrix &other) const; - VMatrix operator-(const VMatrix &other) const; - - // Negation. - VMatrix operator-() const; - - // Return inverse matrix. Be careful because the results are undefined - // if the matrix doesn't have an inverse (ie: InverseGeneral returns false). - VMatrix operator~() const; -#endif - -// Matrix operations. -public: - // Set to identity. - void Identity(); - - bool IsIdentity() const; - - // Setup a matrix for origin and angles. - void SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles ); - - // General inverse. This may fail so check the return! - bool InverseGeneral(VMatrix &vInverse) const; - - // Does a fast inverse, assuming the matrix only contains translation and rotation. - void InverseTR( VMatrix &mRet ) const; - - // Usually used for debug checks. Returns true if the upper 3x3 contains - // unit vectors and they are all orthogonal. - bool IsRotationMatrix() const; - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // This calls the other InverseTR and returns the result. - VMatrix InverseTR() const; - - // Get the scale of the matrix's basis vectors. - Vector GetScale() const; - - // (Fast) multiply by a scaling matrix setup from vScale. - VMatrix Scale(const Vector &vScale); - - // Normalize the basis vectors. - VMatrix NormalizeBasisVectors() const; - - // Transpose. - VMatrix Transpose() const; - - // Transpose upper-left 3x3. - VMatrix Transpose3x3() const; -#endif - -public: - // The matrix. - vec_t m[4][4]; -}; - - - -//----------------------------------------------------------------------------- -// Helper functions. -//----------------------------------------------------------------------------- - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -// Setup an identity matrix. -VMatrix SetupMatrixIdentity(); - -// Setup as a scaling matrix. -VMatrix SetupMatrixScale(const Vector &vScale); - -// Setup a translation matrix. -VMatrix SetupMatrixTranslation(const Vector &vTranslation); - -// Setup a matrix to reflect around the plane. -VMatrix SetupMatrixReflection(const VPlane &thePlane); - -// Setup a matrix to project from vOrigin onto thePlane. -VMatrix SetupMatrixProjection(const Vector &vOrigin, const VPlane &thePlane); - -// Setup a matrix to rotate the specified amount around the specified axis. -VMatrix SetupMatrixAxisRot(const Vector &vAxis, vec_t fDegrees); - -// Setup a matrix from euler angles. Just sets identity and calls MatrixAngles. -VMatrix SetupMatrixAngles(const QAngle &vAngles); - -// Setup a matrix for origin and angles. -VMatrix SetupMatrixOrgAngles(const Vector &origin, const QAngle &vAngles); - -#endif - -#define VMatToString(mat) (static_cast(CFmtStr("[ (%f, %f, %f), (%f, %f, %f), (%f, %f, %f), (%f, %f, %f) ]", mat.m[0][0], mat.m[0][1], mat.m[0][2], mat.m[0][3], mat.m[1][0], mat.m[1][1], mat.m[1][2], mat.m[1][3], mat.m[2][0], mat.m[2][1], mat.m[2][2], mat.m[2][3], mat.m[3][0], mat.m[3][1], mat.m[3][2], mat.m[3][3] ))) // ** Note: this generates a temporary, don't hold reference! - -//----------------------------------------------------------------------------- -// Returns the point at the intersection on the 3 planes. -// Returns false if it can't be solved (2 or more planes are parallel). -//----------------------------------------------------------------------------- -bool PlaneIntersection( const VPlane &vp1, const VPlane &vp2, const VPlane &vp3, Vector &vOut ); - - -//----------------------------------------------------------------------------- -// These methods are faster. Use them if you want faster code -//----------------------------------------------------------------------------- -void MatrixSetIdentity( VMatrix &dst ); -void MatrixTranspose( const VMatrix& src, VMatrix& dst ); -void MatrixCopy( const VMatrix& src, VMatrix& dst ); -void MatrixMultiply( const VMatrix& src1, const VMatrix& src2, VMatrix& dst ); - -// Accessors -void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn ); -void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column ); -void MatrixGetRow( const VMatrix &src, int nCol, Vector *pColumn ); -void MatrixSetRow( VMatrix &src, int nCol, const Vector &column ); - -// Vector3DMultiply treats src2 as if it's a direction vector -void Vector3DMultiply( const VMatrix& src1, const Vector& src2, Vector& dst ); - -// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation) -inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst ); - -// Vector3DMultiplyPositionProjective treats src2 as if it's a point -// and does the perspective divide at the end -void Vector3DMultiplyPositionProjective( const VMatrix& src1, const Vector &src2, Vector& dst ); - -// Vector3DMultiplyPosition treats src2 as if it's a direction -// and does the perspective divide at the end -// NOTE: src1 had better be an inverse transpose to use this correctly -void Vector3DMultiplyProjective( const VMatrix& src1, const Vector &src2, Vector& dst ); - -void Vector4DMultiply( const VMatrix& src1, const Vector4D& src2, Vector4D& dst ); - -// Same as Vector4DMultiply except that src2 has an implicit W of 1 -void Vector4DMultiplyPosition( const VMatrix& src1, const Vector &src2, Vector4D& dst ); - -// Multiplies the vector by the transpose of the matrix -void Vector3DMultiplyTranspose( const VMatrix& src1, const Vector& src2, Vector& dst ); -void Vector4DMultiplyTranspose( const VMatrix& src1, const Vector4D& src2, Vector4D& dst ); - -// Transform a plane -void MatrixTransformPlane( const VMatrix &src, const cplane_t &inPlane, cplane_t &outPlane ); - -// Transform a plane that has an axis-aligned normal -void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane ); - -void MatrixBuildTranslation( VMatrix& dst, float x, float y, float z ); -void MatrixBuildTranslation( VMatrix& dst, const Vector &translation ); - -inline void MatrixTranslate( VMatrix& dst, const Vector &translation ) -{ - VMatrix matTranslation, temp; - MatrixBuildTranslation( matTranslation, translation ); - MatrixMultiply( dst, matTranslation, temp ); - dst = temp; -} - - -void MatrixBuildRotationAboutAxis( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees ); -void MatrixBuildRotateZ( VMatrix& dst, float angleDegrees ); - -inline void MatrixRotate( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees ) -{ - VMatrix rotation, temp; - MatrixBuildRotationAboutAxis( rotation, vAxisOfRot, angleDegrees ); - MatrixMultiply( dst, rotation, temp ); - dst = temp; -} - -// Builds a rotation matrix that rotates one direction vector into another -void MatrixBuildRotation( VMatrix &dst, const Vector& initialDirection, const Vector& finalDirection ); - -// Builds a scale matrix -void MatrixBuildScale( VMatrix &dst, float x, float y, float z ); -void MatrixBuildScale( VMatrix &dst, const Vector& scale ); - -// Build a perspective matrix. -// zNear and zFar are assumed to be positive. -// You end up looking down positive Z, X is to the right, Y is up. -// X range: [0..1] -// Y range: [0..1] -// Z range: [0..1] -void MatrixBuildPerspective( VMatrix &dst, float fovX, float fovY, float zNear, float zFar ); - -//----------------------------------------------------------------------------- -// Given a projection matrix, take the extremes of the space in transformed into world space and -// get a bounding box. -//----------------------------------------------------------------------------- -void CalculateAABBFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pMins, Vector *pMaxs ); - -//----------------------------------------------------------------------------- -// Given a projection matrix, take the extremes of the space in transformed into world space and -// get a bounding sphere. -//----------------------------------------------------------------------------- -void CalculateSphereFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pCenter, float *pflRadius ); - -//----------------------------------------------------------------------------- -// Given an inverse projection matrix, take the extremes of the space in transformed into world space and -// get a bounding box. -//----------------------------------------------------------------------------- -void CalculateAABBFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pMins, Vector *pMaxs ); - -//----------------------------------------------------------------------------- -// Given an inverse projection matrix, take the extremes of the space in transformed into world space and -// get a bounding sphere. -//----------------------------------------------------------------------------- -void CalculateSphereFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pCenter, float *pflRadius ); - -//----------------------------------------------------------------------------- -// Calculate frustum planes given a clip->world space transform. -//----------------------------------------------------------------------------- -void FrustumPlanesFromMatrix( const VMatrix &clipToWorld, Frustum_t &frustum ); - -//----------------------------------------------------------------------------- -// Setup a matrix from euler angles. -//----------------------------------------------------------------------------- -void MatrixFromAngles( const QAngle& vAngles, VMatrix& dst ); - -//----------------------------------------------------------------------------- -// Creates euler angles from a matrix -//----------------------------------------------------------------------------- -void MatrixToAngles( const VMatrix& src, QAngle& vAngles ); - -//----------------------------------------------------------------------------- -// Does a fast inverse, assuming the matrix only contains translation and rotation. -//----------------------------------------------------------------------------- -void MatrixInverseTR( const VMatrix& src, VMatrix &dst ); - -//----------------------------------------------------------------------------- -// Inverts any matrix at all -//----------------------------------------------------------------------------- -bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst); - -//----------------------------------------------------------------------------- -// Computes the inverse transpose -//----------------------------------------------------------------------------- -void MatrixInverseTranspose( const VMatrix& src, VMatrix& dst ); - - - -//----------------------------------------------------------------------------- -// VMatrix inlines. -//----------------------------------------------------------------------------- -inline VMatrix::VMatrix() -{ -} - -inline VMatrix::VMatrix( - vec_t m00, vec_t m01, vec_t m02, vec_t m03, - vec_t m10, vec_t m11, vec_t m12, vec_t m13, - vec_t m20, vec_t m21, vec_t m22, vec_t m23, - vec_t m30, vec_t m31, vec_t m32, vec_t m33) -{ - Init( - m00, m01, m02, m03, - m10, m11, m12, m13, - m20, m21, m22, m23, - m30, m31, m32, m33 - ); -} - - -inline VMatrix::VMatrix( const matrix3x4_t& matrix3x4 ) -{ - Init( matrix3x4 ); -} - - -//----------------------------------------------------------------------------- -// Creates a matrix where the X axis = forward -// the Y axis = left, and the Z axis = up -//----------------------------------------------------------------------------- -inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis ) -{ - Init( - xAxis.x, yAxis.x, zAxis.x, 0.0f, - xAxis.y, yAxis.y, zAxis.y, 0.0f, - xAxis.z, yAxis.z, zAxis.z, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - ); -} - -inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation ) -{ - Init( - xAxis.x, yAxis.x, zAxis.x, translation.x, - xAxis.y, yAxis.y, zAxis.y, translation.y, - xAxis.z, yAxis.z, zAxis.z, translation.z, - 0.0f, 0.0f, 0.0f, 1.0f - ); -} - - -inline void VMatrix::Init( - vec_t m00, vec_t m01, vec_t m02, vec_t m03, - vec_t m10, vec_t m11, vec_t m12, vec_t m13, - vec_t m20, vec_t m21, vec_t m22, vec_t m23, - vec_t m30, vec_t m31, vec_t m32, vec_t m33 - ) -{ - m[0][0] = m00; - m[0][1] = m01; - m[0][2] = m02; - m[0][3] = m03; - - m[1][0] = m10; - m[1][1] = m11; - m[1][2] = m12; - m[1][3] = m13; - - m[2][0] = m20; - m[2][1] = m21; - m[2][2] = m22; - m[2][3] = m23; - - m[3][0] = m30; - m[3][1] = m31; - m[3][2] = m32; - m[3][3] = m33; -} - - -//----------------------------------------------------------------------------- -// Initialize from a 3x4 -//----------------------------------------------------------------------------- -inline void VMatrix::Init( const matrix3x4_t& matrix3x4 ) -{ - memcpy(m, matrix3x4.Base(), sizeof( matrix3x4_t ) ); - - m[3][0] = 0.0f; - m[3][1] = 0.0f; - m[3][2] = 0.0f; - m[3][3] = 1.0f; -} - - -//----------------------------------------------------------------------------- -// Methods related to the basis vectors of the matrix -//----------------------------------------------------------------------------- - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline Vector VMatrix::GetForward() const -{ - return Vector(m[0][0], m[1][0], m[2][0]); -} - -inline Vector VMatrix::GetLeft() const -{ - return Vector(m[0][1], m[1][1], m[2][1]); -} - -inline Vector VMatrix::GetUp() const -{ - return Vector(m[0][2], m[1][2], m[2][2]); -} - -#endif - -inline void VMatrix::SetForward(const Vector &vForward) -{ - m[0][0] = vForward.x; - m[1][0] = vForward.y; - m[2][0] = vForward.z; -} - -inline void VMatrix::SetLeft(const Vector &vLeft) -{ - m[0][1] = vLeft.x; - m[1][1] = vLeft.y; - m[2][1] = vLeft.z; -} - -inline void VMatrix::SetUp(const Vector &vUp) -{ - m[0][2] = vUp.x; - m[1][2] = vUp.y; - m[2][2] = vUp.z; -} - -inline void VMatrix::GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const -{ - vForward.Init( m[0][0], m[1][0], m[2][0] ); - vLeft.Init( m[0][1], m[1][1], m[2][1] ); - vUp.Init( m[0][2], m[1][2], m[2][2] ); -} - -inline void VMatrix::SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp) -{ - SetForward(vForward); - SetLeft(vLeft); - SetUp(vUp); -} - - -//----------------------------------------------------------------------------- -// Methods related to the translation component of the matrix -//----------------------------------------------------------------------------- -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline Vector VMatrix::GetTranslation() const -{ - return Vector(m[0][3], m[1][3], m[2][3]); -} - -#endif - -inline Vector& VMatrix::GetTranslation( Vector &vTrans ) const -{ - vTrans.x = m[0][3]; - vTrans.y = m[1][3]; - vTrans.z = m[2][3]; - return vTrans; -} - -inline void VMatrix::SetTranslation(const Vector &vTrans) -{ - m[0][3] = vTrans.x; - m[1][3] = vTrans.y; - m[2][3] = vTrans.z; -} - - -//----------------------------------------------------------------------------- -// appply translation to this matrix in the input space -//----------------------------------------------------------------------------- -inline void VMatrix::PreTranslate(const Vector &vTrans) -{ - Vector tmp; - Vector3DMultiplyPosition( *this, vTrans, tmp ); - m[0][3] = tmp.x; - m[1][3] = tmp.y; - m[2][3] = tmp.z; -} - - -//----------------------------------------------------------------------------- -// appply translation to this matrix in the output space -//----------------------------------------------------------------------------- -inline void VMatrix::PostTranslate(const Vector &vTrans) -{ - m[0][3] += vTrans.x; - m[1][3] += vTrans.y; - m[2][3] += vTrans.z; -} - -inline const matrix3x4_t& VMatrix::As3x4() const -{ - return *((const matrix3x4_t*)this); -} - -inline matrix3x4_t& VMatrix::As3x4() -{ - return *((matrix3x4_t*)this); -} - -inline void VMatrix::CopyFrom3x4( const matrix3x4_t &m3x4 ) -{ - memcpy( m, m3x4.Base(), sizeof( matrix3x4_t ) ); - m[3][0] = m[3][1] = m[3][2] = 0; - m[3][3] = 1; -} - -inline void VMatrix::Set3x4( matrix3x4_t& matrix3x4 ) const -{ - memcpy(matrix3x4.Base(), m, sizeof( matrix3x4_t ) ); -} - - -//----------------------------------------------------------------------------- -// Matrix math operations -//----------------------------------------------------------------------------- -inline const VMatrix& VMatrix::operator+=(const VMatrix &other) -{ - for(int i=0; i < 4; i++) - { - for(int j=0; j < 4; j++) - { - m[i][j] += other.m[i][j]; - } - } - - return *this; -} - - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline VMatrix VMatrix::operator+(const VMatrix &other) const -{ - VMatrix ret; - for(int i=0; i < 16; i++) - { - ((float*)ret.m)[i] = ((float*)m)[i] + ((float*)other.m)[i]; - } - return ret; -} - -inline VMatrix VMatrix::operator-(const VMatrix &other) const -{ - VMatrix ret; - - for(int i=0; i < 4; i++) - { - for(int j=0; j < 4; j++) - { - ret.m[i][j] = m[i][j] - other.m[i][j]; - } - } - - return ret; -} - -inline VMatrix VMatrix::operator-() const -{ - VMatrix ret; - for( int i=0; i < 16; i++ ) - { - ((float*)ret.m)[i] = ((float*)m)[i]; - } - return ret; -} - -#endif // VECTOR_NO_SLOW_OPERATIONS - - -//----------------------------------------------------------------------------- -// Vector transformation -//----------------------------------------------------------------------------- - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline Vector VMatrix::operator*(const Vector &vVec) const -{ - Vector vRet; - vRet.x = m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z + m[0][3]; - vRet.y = m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z + m[1][3]; - vRet.z = m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z + m[2][3]; - - return vRet; -} - -inline Vector VMatrix::VMul4x3(const Vector &vVec) const -{ - Vector vResult; - Vector3DMultiplyPosition( *this, vVec, vResult ); - return vResult; -} - - -inline Vector VMatrix::VMul4x3Transpose(const Vector &vVec) const -{ - Vector tmp = vVec; - tmp.x -= m[0][3]; - tmp.y -= m[1][3]; - tmp.z -= m[2][3]; - - return Vector( - m[0][0]*tmp.x + m[1][0]*tmp.y + m[2][0]*tmp.z, - m[0][1]*tmp.x + m[1][1]*tmp.y + m[2][1]*tmp.z, - m[0][2]*tmp.x + m[1][2]*tmp.y + m[2][2]*tmp.z - ); -} - -inline Vector VMatrix::VMul3x3(const Vector &vVec) const -{ - return Vector( - m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z, - m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z, - m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z - ); -} - -inline Vector VMatrix::VMul3x3Transpose(const Vector &vVec) const -{ - return Vector( - m[0][0]*vVec.x + m[1][0]*vVec.y + m[2][0]*vVec.z, - m[0][1]*vVec.x + m[1][1]*vVec.y + m[2][1]*vVec.z, - m[0][2]*vVec.x + m[1][2]*vVec.y + m[2][2]*vVec.z - ); -} - -#endif // VECTOR_NO_SLOW_OPERATIONS - - -inline void VMatrix::V3Mul(const Vector &vIn, Vector &vOut) const -{ - vec_t rw; - - rw = 1.0f / (m[3][0]*vIn.x + m[3][1]*vIn.y + m[3][2]*vIn.z + m[3][3]); - vOut.x = (m[0][0]*vIn.x + m[0][1]*vIn.y + m[0][2]*vIn.z + m[0][3]) * rw; - vOut.y = (m[1][0]*vIn.x + m[1][1]*vIn.y + m[1][2]*vIn.z + m[1][3]) * rw; - vOut.z = (m[2][0]*vIn.x + m[2][1]*vIn.y + m[2][2]*vIn.z + m[2][3]) * rw; -} - -inline void VMatrix::V4Mul(const Vector4D &vIn, Vector4D &vOut) const -{ - vOut[0] = m[0][0]*vIn[0] + m[0][1]*vIn[1] + m[0][2]*vIn[2] + m[0][3]*vIn[3]; - vOut[1] = m[1][0]*vIn[0] + m[1][1]*vIn[1] + m[1][2]*vIn[2] + m[1][3]*vIn[3]; - vOut[2] = m[2][0]*vIn[0] + m[2][1]*vIn[1] + m[2][2]*vIn[2] + m[2][3]*vIn[3]; - vOut[3] = m[3][0]*vIn[0] + m[3][1]*vIn[1] + m[3][2]*vIn[2] + m[3][3]*vIn[3]; -} - - -//----------------------------------------------------------------------------- -// Plane transformation -//----------------------------------------------------------------------------- -inline void VMatrix::TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const -{ - Vector vTrans; - Vector3DMultiply( *this, inPlane.m_Normal, outPlane.m_Normal ); - outPlane.m_Dist = inPlane.m_Dist * DotProduct( outPlane.m_Normal, outPlane.m_Normal ); - outPlane.m_Dist += DotProduct( outPlane.m_Normal, GetTranslation( vTrans ) ); -} - - -//----------------------------------------------------------------------------- -// Other random stuff -//----------------------------------------------------------------------------- -inline void VMatrix::Identity() -{ - MatrixSetIdentity( *this ); -} - - -inline bool VMatrix::IsIdentity() const -{ - return - m[0][0] == 1.0f && m[0][1] == 0.0f && m[0][2] == 0.0f && m[0][3] == 0.0f && - m[1][0] == 0.0f && m[1][1] == 1.0f && m[1][2] == 0.0f && m[1][3] == 0.0f && - m[2][0] == 0.0f && m[2][1] == 0.0f && m[2][2] == 1.0f && m[2][3] == 0.0f && - m[3][0] == 0.0f && m[3][1] == 0.0f && m[3][2] == 0.0f && m[3][3] == 1.0f; -} - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline Vector VMatrix::ApplyRotation(const Vector &vVec) const -{ - return VMul3x3(vVec); -} - -inline VMatrix VMatrix::operator~() const -{ - VMatrix mRet; - InverseGeneral(mRet); - return mRet; -} - -#endif - - -//----------------------------------------------------------------------------- -// Accessors -//----------------------------------------------------------------------------- -inline void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn ) -{ - Assert( (nCol >= 0) && (nCol <= 3) ); - - pColumn->x = src[0][nCol]; - pColumn->y = src[1][nCol]; - pColumn->z = src[2][nCol]; -} - -inline void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column ) -{ - Assert( (nCol >= 0) && (nCol <= 3) ); - - src.m[0][nCol] = column.x; - src.m[1][nCol] = column.y; - src.m[2][nCol] = column.z; -} - -inline void MatrixGetRow( const VMatrix &src, int nRow, Vector *pRow ) -{ - Assert( (nRow >= 0) && (nRow <= 3) ); - *pRow = *(Vector*)src[nRow]; -} - -inline void MatrixSetRow( VMatrix &dst, int nRow, const Vector &row ) -{ - Assert( (nRow >= 0) && (nRow <= 3) ); - *(Vector*)dst[nRow] = row; -} - - -//----------------------------------------------------------------------------- -// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation) -//----------------------------------------------------------------------------- -// NJS: src2 is passed in as a full vector rather than a reference to prevent the need -// for 2 branches and a potential copy in the body. (ie, handling the case when the src2 -// reference is the same as the dst reference ). -inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst ) -{ - dst[0] = src1[0][0] * src2.x + src1[0][1] * src2.y + src1[0][2] * src2.z + src1[0][3]; - dst[1] = src1[1][0] * src2.x + src1[1][1] * src2.y + src1[1][2] * src2.z + src1[1][3]; - dst[2] = src1[2][0] * src2.x + src1[2][1] * src2.y + src1[2][2] * src2.z + src1[2][3]; -} - - -//----------------------------------------------------------------------------- -// Transform a plane that has an axis-aligned normal -//----------------------------------------------------------------------------- -inline void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane ) -{ - // See MatrixTransformPlane in the .cpp file for an explanation of the algorithm. - MatrixGetColumn( src, nDim, &outPlane.normal ); - outPlane.normal *= flSign; - outPlane.dist = flDist * DotProduct( outPlane.normal, outPlane.normal ); - - // NOTE: Writing this out by hand because it doesn't inline (inline depth isn't large enough) - // This should read outPlane.dist += DotProduct( outPlane.normal, src.GetTranslation ); - outPlane.dist += outPlane.normal.x * src.m[0][3] + outPlane.normal.y * src.m[1][3] + outPlane.normal.z * src.m[2][3]; -} - - -//----------------------------------------------------------------------------- -// Matrix equality test -//----------------------------------------------------------------------------- -inline bool MatricesAreEqual( const VMatrix &src1, const VMatrix &src2, float flTolerance ) -{ - for ( int i = 0; i < 3; ++i ) - { - for ( int j = 0; j < 3; ++j ) - { - if ( fabs( src1[i][j] - src2[i][j] ) > flTolerance ) - return false; - } - } - return true; -} - -//----------------------------------------------------------------------------- -// -//----------------------------------------------------------------------------- -void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar ); -void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar ); -void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right ); -void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar ); - -inline void MatrixOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar ) -{ - VMatrix mat; - MatrixBuildOrtho( mat, left, top, right, bottom, zNear, zFar ); - - VMatrix temp; - MatrixMultiply( dst, mat, temp ); - dst = temp; -} - -inline void MatrixPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar ) -{ - VMatrix mat; - MatrixBuildPerspectiveX( mat, flFovX, flAspect, flZNear, flZFar ); - - VMatrix temp; - MatrixMultiply( dst, mat, temp ); - dst = temp; -} - -inline void MatrixPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right ) -{ - VMatrix mat; - MatrixBuildPerspectiveOffCenterX( mat, flFovX, flAspect, flZNear, flZFar, bottom, top, left, right ); - - VMatrix temp; - MatrixMultiply( dst, mat, temp ); - dst = temp; -} - -#endif - - +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $NoKeywords: $ +// +//=============================================================================// +// +// VMatrix always postmultiply vectors as in Ax = b. +// Given a set of basis vectors ((F)orward, (L)eft, (U)p), and a (T)ranslation, +// a matrix to transform a vector into that space looks like this: +// Fx Lx Ux Tx +// Fy Ly Uy Ty +// Fz Lz Uz Tz +// 0 0 0 1 + +// Note that concatenating matrices needs to multiply them in reverse order. +// ie: if I want to apply matrix A, B, then C, the equation needs to look like this: +// C * B * A * v +// ie: +// v = A * v; +// v = B * v; +// v = C * v; +//============================================================================= + +#ifndef VMATRIX_H +#define VMATRIX_H + +#ifdef _WIN32 +#pragma once +#endif + +#include +#include "mathlib/vector.h" +#include "mathlib/vplane.h" +#include "mathlib/vector4d.h" +#include "mathlib/mathlib.h" + +struct cplane_t; + + +class VMatrix +{ +public: + + VMatrix(); + VMatrix( + vec_t m00, vec_t m01, vec_t m02, vec_t m03, + vec_t m10, vec_t m11, vec_t m12, vec_t m13, + vec_t m20, vec_t m21, vec_t m22, vec_t m23, + vec_t m30, vec_t m31, vec_t m32, vec_t m33 + ); + + // Creates a matrix where the X axis = forward + // the Y axis = left, and the Z axis = up + VMatrix( const Vector& forward, const Vector& left, const Vector& up ); + VMatrix( const Vector& forward, const Vector& left, const Vector& up, const Vector& translation ); + + // Construct from a 3x4 matrix + VMatrix( const matrix3x4_t& matrix3x4 ); + + // Set the values in the matrix. + void Init( + vec_t m00, vec_t m01, vec_t m02, vec_t m03, + vec_t m10, vec_t m11, vec_t m12, vec_t m13, + vec_t m20, vec_t m21, vec_t m22, vec_t m23, + vec_t m30, vec_t m31, vec_t m32, vec_t m33 + ); + + + // Initialize from a 3x4 + void Init( const matrix3x4_t& matrix3x4 ); + + // array access + inline float* operator[](int i) + { + return m[i]; + } + + inline const float* operator[](int i) const + { + return m[i]; + } + + // Get a pointer to m[0][0] + inline float *Base() + { + return &m[0][0]; + } + + inline const float *Base() const + { + return &m[0][0]; + } + + void SetLeft(const Vector &vLeft); + void SetUp(const Vector &vUp); + void SetForward(const Vector &vForward); + + void GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const; + void SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp); + + // Get/set the translation. + Vector & GetTranslation( Vector &vTrans ) const; + void SetTranslation(const Vector &vTrans); + + void PreTranslate(const Vector &vTrans); + void PostTranslate(const Vector &vTrans); + + matrix3x4_t& As3x4(); + const matrix3x4_t& As3x4() const; + void CopyFrom3x4( const matrix3x4_t &m3x4 ); + void Set3x4( matrix3x4_t& matrix3x4 ) const; + + bool operator==( const VMatrix& src ) const; + bool operator!=( const VMatrix& src ) const { return !( *this == src ); } + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Access the basis vectors. + Vector GetLeft() const; + Vector GetUp() const; + Vector GetForward() const; + Vector GetTranslation() const; +#endif + + +// Matrix->vector operations. +public: + // Multiply by a 3D vector (same as operator*). + void V3Mul(const Vector &vIn, Vector &vOut) const; + + // Multiply by a 4D vector. + void V4Mul(const Vector4D &vIn, Vector4D &vOut) const; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Applies the rotation (ignores translation in the matrix). (This just calls VMul3x3). + Vector ApplyRotation(const Vector &vVec) const; + + // Multiply by a vector (divides by w, assumes input w is 1). + Vector operator*(const Vector &vVec) const; + + // Multiply by the upper 3x3 part of the matrix (ie: only apply rotation). + Vector VMul3x3(const Vector &vVec) const; + + // Apply the inverse (transposed) rotation (only works on pure rotation matrix) + Vector VMul3x3Transpose(const Vector &vVec) const; + + // Multiply by the upper 3 rows. + Vector VMul4x3(const Vector &vVec) const; + + // Apply the inverse (transposed) transformation (only works on pure rotation/translation) + Vector VMul4x3Transpose(const Vector &vVec) const; +#endif + + +// Matrix->plane operations. +public: + // Transform the plane. The matrix can only contain translation and rotation. + void TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Just calls TransformPlane and returns the result. + VPlane operator*(const VPlane &thePlane) const; +#endif + +// Matrix->matrix operations. +public: + + VMatrix& operator=(const VMatrix &mOther); + + // Multiply two matrices (out = this * vm). + void MatrixMul( const VMatrix &vm, VMatrix &out ) const; + + // Add two matrices. + const VMatrix& operator+=(const VMatrix &other); + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Just calls MatrixMul and returns the result. + VMatrix operator*(const VMatrix &mOther) const; + + // Add/Subtract two matrices. + VMatrix operator+(const VMatrix &other) const; + VMatrix operator-(const VMatrix &other) const; + + // Negation. + VMatrix operator-() const; + + // Return inverse matrix. Be careful because the results are undefined + // if the matrix doesn't have an inverse (ie: InverseGeneral returns false). + VMatrix operator~() const; +#endif + +// Matrix operations. +public: + // Set to identity. + void Identity(); + + bool IsIdentity() const; + + // Setup a matrix for origin and angles. + void SetupMatrixOrgAngles( const Vector &origin, const QAngle &vAngles ); + + // General inverse. This may fail so check the return! + bool InverseGeneral(VMatrix &vInverse) const; + + // Does a fast inverse, assuming the matrix only contains translation and rotation. + void InverseTR( VMatrix &mRet ) const; + + // Usually used for debug checks. Returns true if the upper 3x3 contains + // unit vectors and they are all orthogonal. + bool IsRotationMatrix() const; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // This calls the other InverseTR and returns the result. + VMatrix InverseTR() const; + + // Get the scale of the matrix's basis vectors. + Vector GetScale() const; + + // (Fast) multiply by a scaling matrix setup from vScale. + VMatrix Scale(const Vector &vScale); + + // Normalize the basis vectors. + VMatrix NormalizeBasisVectors() const; + + // Transpose. + VMatrix Transpose() const; + + // Transpose upper-left 3x3. + VMatrix Transpose3x3() const; +#endif + +public: + // The matrix. + vec_t m[4][4]; +}; + + + +//----------------------------------------------------------------------------- +// Helper functions. +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +// Setup an identity matrix. +VMatrix SetupMatrixIdentity(); + +// Setup as a scaling matrix. +VMatrix SetupMatrixScale(const Vector &vScale); + +// Setup a translation matrix. +VMatrix SetupMatrixTranslation(const Vector &vTranslation); + +// Setup a matrix to reflect around the plane. +VMatrix SetupMatrixReflection(const VPlane &thePlane); + +// Setup a matrix to project from vOrigin onto thePlane. +VMatrix SetupMatrixProjection(const Vector &vOrigin, const VPlane &thePlane); + +// Setup a matrix to rotate the specified amount around the specified axis. +VMatrix SetupMatrixAxisRot(const Vector &vAxis, vec_t fDegrees); + +// Setup a matrix from euler angles. Just sets identity and calls MatrixAngles. +VMatrix SetupMatrixAngles(const QAngle &vAngles); + +// Setup a matrix for origin and angles. +VMatrix SetupMatrixOrgAngles(const Vector &origin, const QAngle &vAngles); + +#endif + +#define VMatToString(mat) (static_cast(CFmtStr("[ (%f, %f, %f), (%f, %f, %f), (%f, %f, %f), (%f, %f, %f) ]", mat.m[0][0], mat.m[0][1], mat.m[0][2], mat.m[0][3], mat.m[1][0], mat.m[1][1], mat.m[1][2], mat.m[1][3], mat.m[2][0], mat.m[2][1], mat.m[2][2], mat.m[2][3], mat.m[3][0], mat.m[3][1], mat.m[3][2], mat.m[3][3] ))) // ** Note: this generates a temporary, don't hold reference! + +//----------------------------------------------------------------------------- +// Returns the point at the intersection on the 3 planes. +// Returns false if it can't be solved (2 or more planes are parallel). +//----------------------------------------------------------------------------- +bool PlaneIntersection( const VPlane &vp1, const VPlane &vp2, const VPlane &vp3, Vector &vOut ); + + +//----------------------------------------------------------------------------- +// These methods are faster. Use them if you want faster code +//----------------------------------------------------------------------------- +void MatrixSetIdentity( VMatrix &dst ); +void MatrixTranspose( const VMatrix& src, VMatrix& dst ); +void MatrixCopy( const VMatrix& src, VMatrix& dst ); +void MatrixMultiply( const VMatrix& src1, const VMatrix& src2, VMatrix& dst ); + +// Accessors +void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn ); +void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column ); +void MatrixGetRow( const VMatrix &src, int nCol, Vector *pColumn ); +void MatrixSetRow( VMatrix &src, int nCol, const Vector &column ); + +// Vector3DMultiply treats src2 as if it's a direction vector +void Vector3DMultiply( const VMatrix& src1, const Vector& src2, Vector& dst ); + +// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation) +inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst ); + +// Vector3DMultiplyPositionProjective treats src2 as if it's a point +// and does the perspective divide at the end +void Vector3DMultiplyPositionProjective( const VMatrix& src1, const Vector &src2, Vector& dst ); + +// Vector3DMultiplyPosition treats src2 as if it's a direction +// and does the perspective divide at the end +// NOTE: src1 had better be an inverse transpose to use this correctly +void Vector3DMultiplyProjective( const VMatrix& src1, const Vector &src2, Vector& dst ); + +void Vector4DMultiply( const VMatrix& src1, const Vector4D& src2, Vector4D& dst ); + +// Same as Vector4DMultiply except that src2 has an implicit W of 1 +void Vector4DMultiplyPosition( const VMatrix& src1, const Vector &src2, Vector4D& dst ); + +// Multiplies the vector by the transpose of the matrix +void Vector3DMultiplyTranspose( const VMatrix& src1, const Vector& src2, Vector& dst ); +void Vector4DMultiplyTranspose( const VMatrix& src1, const Vector4D& src2, Vector4D& dst ); + +// Transform a plane +void MatrixTransformPlane( const VMatrix &src, const cplane_t &inPlane, cplane_t &outPlane ); + +// Transform a plane that has an axis-aligned normal +void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane ); + +void MatrixBuildTranslation( VMatrix& dst, float x, float y, float z ); +void MatrixBuildTranslation( VMatrix& dst, const Vector &translation ); + +inline void MatrixTranslate( VMatrix& dst, const Vector &translation ) +{ + VMatrix matTranslation, temp; + MatrixBuildTranslation( matTranslation, translation ); + MatrixMultiply( dst, matTranslation, temp ); + dst = temp; +} + + +void MatrixBuildRotationAboutAxis( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees ); +void MatrixBuildRotateZ( VMatrix& dst, float angleDegrees ); + +inline void MatrixRotate( VMatrix& dst, const Vector& vAxisOfRot, float angleDegrees ) +{ + VMatrix rotation, temp; + MatrixBuildRotationAboutAxis( rotation, vAxisOfRot, angleDegrees ); + MatrixMultiply( dst, rotation, temp ); + dst = temp; +} + +// Builds a rotation matrix that rotates one direction vector into another +void MatrixBuildRotation( VMatrix &dst, const Vector& initialDirection, const Vector& finalDirection ); + +// Builds a scale matrix +void MatrixBuildScale( VMatrix &dst, float x, float y, float z ); +void MatrixBuildScale( VMatrix &dst, const Vector& scale ); + +// Build a perspective matrix. +// zNear and zFar are assumed to be positive. +// You end up looking down positive Z, X is to the right, Y is up. +// X range: [0..1] +// Y range: [0..1] +// Z range: [0..1] +void MatrixBuildPerspective( VMatrix &dst, float fovX, float fovY, float zNear, float zFar ); + +//----------------------------------------------------------------------------- +// Given a projection matrix, take the extremes of the space in transformed into world space and +// get a bounding box. +//----------------------------------------------------------------------------- +void CalculateAABBFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pMins, Vector *pMaxs ); + +//----------------------------------------------------------------------------- +// Given a projection matrix, take the extremes of the space in transformed into world space and +// get a bounding sphere. +//----------------------------------------------------------------------------- +void CalculateSphereFromProjectionMatrix( const VMatrix &worldToVolume, Vector *pCenter, float *pflRadius ); + +//----------------------------------------------------------------------------- +// Given an inverse projection matrix, take the extremes of the space in transformed into world space and +// get a bounding box. +//----------------------------------------------------------------------------- +void CalculateAABBFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pMins, Vector *pMaxs ); + +//----------------------------------------------------------------------------- +// Given an inverse projection matrix, take the extremes of the space in transformed into world space and +// get a bounding sphere. +//----------------------------------------------------------------------------- +void CalculateSphereFromProjectionMatrixInverse( const VMatrix &volumeToWorld, Vector *pCenter, float *pflRadius ); + +//----------------------------------------------------------------------------- +// Calculate frustum planes given a clip->world space transform. +//----------------------------------------------------------------------------- +void FrustumPlanesFromMatrix( const VMatrix &clipToWorld, Frustum_t &frustum ); + +//----------------------------------------------------------------------------- +// Setup a matrix from euler angles. +//----------------------------------------------------------------------------- +void MatrixFromAngles( const QAngle& vAngles, VMatrix& dst ); + +//----------------------------------------------------------------------------- +// Creates euler angles from a matrix +//----------------------------------------------------------------------------- +void MatrixToAngles( const VMatrix& src, QAngle& vAngles ); + +//----------------------------------------------------------------------------- +// Does a fast inverse, assuming the matrix only contains translation and rotation. +//----------------------------------------------------------------------------- +void MatrixInverseTR( const VMatrix& src, VMatrix &dst ); + +//----------------------------------------------------------------------------- +// Inverts any matrix at all +//----------------------------------------------------------------------------- +bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst); + +//----------------------------------------------------------------------------- +// Computes the inverse transpose +//----------------------------------------------------------------------------- +void MatrixInverseTranspose( const VMatrix& src, VMatrix& dst ); + + + +//----------------------------------------------------------------------------- +// VMatrix inlines. +//----------------------------------------------------------------------------- +inline VMatrix::VMatrix() +{ +} + +inline VMatrix::VMatrix( + vec_t m00, vec_t m01, vec_t m02, vec_t m03, + vec_t m10, vec_t m11, vec_t m12, vec_t m13, + vec_t m20, vec_t m21, vec_t m22, vec_t m23, + vec_t m30, vec_t m31, vec_t m32, vec_t m33) +{ + Init( + m00, m01, m02, m03, + m10, m11, m12, m13, + m20, m21, m22, m23, + m30, m31, m32, m33 + ); +} + + +inline VMatrix::VMatrix( const matrix3x4_t& matrix3x4 ) +{ + Init( matrix3x4 ); +} + + +//----------------------------------------------------------------------------- +// Creates a matrix where the X axis = forward +// the Y axis = left, and the Z axis = up +//----------------------------------------------------------------------------- +inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis ) +{ + Init( + xAxis.x, yAxis.x, zAxis.x, 0.0f, + xAxis.y, yAxis.y, zAxis.y, 0.0f, + xAxis.z, yAxis.z, zAxis.z, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + ); +} + +inline VMatrix::VMatrix( const Vector& xAxis, const Vector& yAxis, const Vector& zAxis, const Vector& translation ) +{ + Init( + xAxis.x, yAxis.x, zAxis.x, translation.x, + xAxis.y, yAxis.y, zAxis.y, translation.y, + xAxis.z, yAxis.z, zAxis.z, translation.z, + 0.0f, 0.0f, 0.0f, 1.0f + ); +} + + +inline void VMatrix::Init( + vec_t m00, vec_t m01, vec_t m02, vec_t m03, + vec_t m10, vec_t m11, vec_t m12, vec_t m13, + vec_t m20, vec_t m21, vec_t m22, vec_t m23, + vec_t m30, vec_t m31, vec_t m32, vec_t m33 + ) +{ + m[0][0] = m00; + m[0][1] = m01; + m[0][2] = m02; + m[0][3] = m03; + + m[1][0] = m10; + m[1][1] = m11; + m[1][2] = m12; + m[1][3] = m13; + + m[2][0] = m20; + m[2][1] = m21; + m[2][2] = m22; + m[2][3] = m23; + + m[3][0] = m30; + m[3][1] = m31; + m[3][2] = m32; + m[3][3] = m33; +} + + +//----------------------------------------------------------------------------- +// Initialize from a 3x4 +//----------------------------------------------------------------------------- +inline void VMatrix::Init( const matrix3x4_t& matrix3x4 ) +{ + memcpy(m, matrix3x4.Base(), sizeof( matrix3x4_t ) ); + + m[3][0] = 0.0f; + m[3][1] = 0.0f; + m[3][2] = 0.0f; + m[3][3] = 1.0f; +} + + +//----------------------------------------------------------------------------- +// Methods related to the basis vectors of the matrix +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline Vector VMatrix::GetForward() const +{ + return Vector(m[0][0], m[1][0], m[2][0]); +} + +inline Vector VMatrix::GetLeft() const +{ + return Vector(m[0][1], m[1][1], m[2][1]); +} + +inline Vector VMatrix::GetUp() const +{ + return Vector(m[0][2], m[1][2], m[2][2]); +} + +#endif + +inline void VMatrix::SetForward(const Vector &vForward) +{ + m[0][0] = vForward.x; + m[1][0] = vForward.y; + m[2][0] = vForward.z; +} + +inline void VMatrix::SetLeft(const Vector &vLeft) +{ + m[0][1] = vLeft.x; + m[1][1] = vLeft.y; + m[2][1] = vLeft.z; +} + +inline void VMatrix::SetUp(const Vector &vUp) +{ + m[0][2] = vUp.x; + m[1][2] = vUp.y; + m[2][2] = vUp.z; +} + +inline void VMatrix::GetBasisVectors(Vector &vForward, Vector &vLeft, Vector &vUp) const +{ + vForward.Init( m[0][0], m[1][0], m[2][0] ); + vLeft.Init( m[0][1], m[1][1], m[2][1] ); + vUp.Init( m[0][2], m[1][2], m[2][2] ); +} + +inline void VMatrix::SetBasisVectors(const Vector &vForward, const Vector &vLeft, const Vector &vUp) +{ + SetForward(vForward); + SetLeft(vLeft); + SetUp(vUp); +} + + +//----------------------------------------------------------------------------- +// Methods related to the translation component of the matrix +//----------------------------------------------------------------------------- +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline Vector VMatrix::GetTranslation() const +{ + return Vector(m[0][3], m[1][3], m[2][3]); +} + +#endif + +inline Vector& VMatrix::GetTranslation( Vector &vTrans ) const +{ + vTrans.x = m[0][3]; + vTrans.y = m[1][3]; + vTrans.z = m[2][3]; + return vTrans; +} + +inline void VMatrix::SetTranslation(const Vector &vTrans) +{ + m[0][3] = vTrans.x; + m[1][3] = vTrans.y; + m[2][3] = vTrans.z; +} + + +//----------------------------------------------------------------------------- +// appply translation to this matrix in the input space +//----------------------------------------------------------------------------- +inline void VMatrix::PreTranslate(const Vector &vTrans) +{ + Vector tmp; + Vector3DMultiplyPosition( *this, vTrans, tmp ); + m[0][3] = tmp.x; + m[1][3] = tmp.y; + m[2][3] = tmp.z; +} + + +//----------------------------------------------------------------------------- +// appply translation to this matrix in the output space +//----------------------------------------------------------------------------- +inline void VMatrix::PostTranslate(const Vector &vTrans) +{ + m[0][3] += vTrans.x; + m[1][3] += vTrans.y; + m[2][3] += vTrans.z; +} + +inline const matrix3x4_t& VMatrix::As3x4() const +{ + return *((const matrix3x4_t*)this); +} + +inline matrix3x4_t& VMatrix::As3x4() +{ + return *((matrix3x4_t*)this); +} + +inline void VMatrix::CopyFrom3x4( const matrix3x4_t &m3x4 ) +{ + memcpy( m, m3x4.Base(), sizeof( matrix3x4_t ) ); + m[3][0] = m[3][1] = m[3][2] = 0; + m[3][3] = 1; +} + +inline void VMatrix::Set3x4( matrix3x4_t& matrix3x4 ) const +{ + memcpy(matrix3x4.Base(), m, sizeof( matrix3x4_t ) ); +} + + +//----------------------------------------------------------------------------- +// Matrix math operations +//----------------------------------------------------------------------------- +inline const VMatrix& VMatrix::operator+=(const VMatrix &other) +{ + for(int i=0; i < 4; i++) + { + for(int j=0; j < 4; j++) + { + m[i][j] += other.m[i][j]; + } + } + + return *this; +} + + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline VMatrix VMatrix::operator+(const VMatrix &other) const +{ + VMatrix ret; + for(int i=0; i < 16; i++) + { + ((float*)ret.m)[i] = ((float*)m)[i] + ((float*)other.m)[i]; + } + return ret; +} + +inline VMatrix VMatrix::operator-(const VMatrix &other) const +{ + VMatrix ret; + + for(int i=0; i < 4; i++) + { + for(int j=0; j < 4; j++) + { + ret.m[i][j] = m[i][j] - other.m[i][j]; + } + } + + return ret; +} + +inline VMatrix VMatrix::operator-() const +{ + VMatrix ret; + for( int i=0; i < 16; i++ ) + { + ((float*)ret.m)[i] = ((float*)m)[i]; + } + return ret; +} + +#endif // VECTOR_NO_SLOW_OPERATIONS + + +//----------------------------------------------------------------------------- +// Vector transformation +//----------------------------------------------------------------------------- + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline Vector VMatrix::operator*(const Vector &vVec) const +{ + Vector vRet; + vRet.x = m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z + m[0][3]; + vRet.y = m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z + m[1][3]; + vRet.z = m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z + m[2][3]; + + return vRet; +} + +inline Vector VMatrix::VMul4x3(const Vector &vVec) const +{ + Vector vResult; + Vector3DMultiplyPosition( *this, vVec, vResult ); + return vResult; +} + + +inline Vector VMatrix::VMul4x3Transpose(const Vector &vVec) const +{ + Vector tmp = vVec; + tmp.x -= m[0][3]; + tmp.y -= m[1][3]; + tmp.z -= m[2][3]; + + return Vector( + m[0][0]*tmp.x + m[1][0]*tmp.y + m[2][0]*tmp.z, + m[0][1]*tmp.x + m[1][1]*tmp.y + m[2][1]*tmp.z, + m[0][2]*tmp.x + m[1][2]*tmp.y + m[2][2]*tmp.z + ); +} + +inline Vector VMatrix::VMul3x3(const Vector &vVec) const +{ + return Vector( + m[0][0]*vVec.x + m[0][1]*vVec.y + m[0][2]*vVec.z, + m[1][0]*vVec.x + m[1][1]*vVec.y + m[1][2]*vVec.z, + m[2][0]*vVec.x + m[2][1]*vVec.y + m[2][2]*vVec.z + ); +} + +inline Vector VMatrix::VMul3x3Transpose(const Vector &vVec) const +{ + return Vector( + m[0][0]*vVec.x + m[1][0]*vVec.y + m[2][0]*vVec.z, + m[0][1]*vVec.x + m[1][1]*vVec.y + m[2][1]*vVec.z, + m[0][2]*vVec.x + m[1][2]*vVec.y + m[2][2]*vVec.z + ); +} + +#endif // VECTOR_NO_SLOW_OPERATIONS + + +inline void VMatrix::V3Mul(const Vector &vIn, Vector &vOut) const +{ + vec_t rw; + + rw = 1.0f / (m[3][0]*vIn.x + m[3][1]*vIn.y + m[3][2]*vIn.z + m[3][3]); + vOut.x = (m[0][0]*vIn.x + m[0][1]*vIn.y + m[0][2]*vIn.z + m[0][3]) * rw; + vOut.y = (m[1][0]*vIn.x + m[1][1]*vIn.y + m[1][2]*vIn.z + m[1][3]) * rw; + vOut.z = (m[2][0]*vIn.x + m[2][1]*vIn.y + m[2][2]*vIn.z + m[2][3]) * rw; +} + +inline void VMatrix::V4Mul(const Vector4D &vIn, Vector4D &vOut) const +{ + vOut[0] = m[0][0]*vIn[0] + m[0][1]*vIn[1] + m[0][2]*vIn[2] + m[0][3]*vIn[3]; + vOut[1] = m[1][0]*vIn[0] + m[1][1]*vIn[1] + m[1][2]*vIn[2] + m[1][3]*vIn[3]; + vOut[2] = m[2][0]*vIn[0] + m[2][1]*vIn[1] + m[2][2]*vIn[2] + m[2][3]*vIn[3]; + vOut[3] = m[3][0]*vIn[0] + m[3][1]*vIn[1] + m[3][2]*vIn[2] + m[3][3]*vIn[3]; +} + + +//----------------------------------------------------------------------------- +// Plane transformation +//----------------------------------------------------------------------------- +inline void VMatrix::TransformPlane( const VPlane &inPlane, VPlane &outPlane ) const +{ + Vector vTrans; + Vector3DMultiply( *this, inPlane.m_Normal, outPlane.m_Normal ); + outPlane.m_Dist = inPlane.m_Dist * DotProduct( outPlane.m_Normal, outPlane.m_Normal ); + outPlane.m_Dist += DotProduct( outPlane.m_Normal, GetTranslation( vTrans ) ); +} + + +//----------------------------------------------------------------------------- +// Other random stuff +//----------------------------------------------------------------------------- +inline void VMatrix::Identity() +{ + MatrixSetIdentity( *this ); +} + + +inline bool VMatrix::IsIdentity() const +{ + return + m[0][0] == 1.0f && m[0][1] == 0.0f && m[0][2] == 0.0f && m[0][3] == 0.0f && + m[1][0] == 0.0f && m[1][1] == 1.0f && m[1][2] == 0.0f && m[1][3] == 0.0f && + m[2][0] == 0.0f && m[2][1] == 0.0f && m[2][2] == 1.0f && m[2][3] == 0.0f && + m[3][0] == 0.0f && m[3][1] == 0.0f && m[3][2] == 0.0f && m[3][3] == 1.0f; +} + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline Vector VMatrix::ApplyRotation(const Vector &vVec) const +{ + return VMul3x3(vVec); +} + +inline VMatrix VMatrix::operator~() const +{ + VMatrix mRet; + InverseGeneral(mRet); + return mRet; +} + +#endif + + +//----------------------------------------------------------------------------- +// Accessors +//----------------------------------------------------------------------------- +inline void MatrixGetColumn( const VMatrix &src, int nCol, Vector *pColumn ) +{ + Assert( (nCol >= 0) && (nCol <= 3) ); + + pColumn->x = src[0][nCol]; + pColumn->y = src[1][nCol]; + pColumn->z = src[2][nCol]; +} + +inline void MatrixSetColumn( VMatrix &src, int nCol, const Vector &column ) +{ + Assert( (nCol >= 0) && (nCol <= 3) ); + + src.m[0][nCol] = column.x; + src.m[1][nCol] = column.y; + src.m[2][nCol] = column.z; +} + +inline void MatrixGetRow( const VMatrix &src, int nRow, Vector *pRow ) +{ + Assert( (nRow >= 0) && (nRow <= 3) ); + *pRow = *(Vector*)src[nRow]; +} + +inline void MatrixSetRow( VMatrix &dst, int nRow, const Vector &row ) +{ + Assert( (nRow >= 0) && (nRow <= 3) ); + *(Vector*)dst[nRow] = row; +} + + +//----------------------------------------------------------------------------- +// Vector3DMultiplyPosition treats src2 as if it's a point (adds the translation) +//----------------------------------------------------------------------------- +// NJS: src2 is passed in as a full vector rather than a reference to prevent the need +// for 2 branches and a potential copy in the body. (ie, handling the case when the src2 +// reference is the same as the dst reference ). +inline void Vector3DMultiplyPosition( const VMatrix& src1, const VectorByValue src2, Vector& dst ) +{ + dst[0] = src1[0][0] * src2.x + src1[0][1] * src2.y + src1[0][2] * src2.z + src1[0][3]; + dst[1] = src1[1][0] * src2.x + src1[1][1] * src2.y + src1[1][2] * src2.z + src1[1][3]; + dst[2] = src1[2][0] * src2.x + src1[2][1] * src2.y + src1[2][2] * src2.z + src1[2][3]; +} + + +//----------------------------------------------------------------------------- +// Transform a plane that has an axis-aligned normal +//----------------------------------------------------------------------------- +inline void MatrixTransformAxisAlignedPlane( const VMatrix &src, int nDim, float flSign, float flDist, cplane_t &outPlane ) +{ + // See MatrixTransformPlane in the .cpp file for an explanation of the algorithm. + MatrixGetColumn( src, nDim, &outPlane.normal ); + outPlane.normal *= flSign; + outPlane.dist = flDist * DotProduct( outPlane.normal, outPlane.normal ); + + // NOTE: Writing this out by hand because it doesn't inline (inline depth isn't large enough) + // This should read outPlane.dist += DotProduct( outPlane.normal, src.GetTranslation ); + outPlane.dist += outPlane.normal.x * src.m[0][3] + outPlane.normal.y * src.m[1][3] + outPlane.normal.z * src.m[2][3]; +} + + +//----------------------------------------------------------------------------- +// Matrix equality test +//----------------------------------------------------------------------------- +inline bool MatricesAreEqual( const VMatrix &src1, const VMatrix &src2, float flTolerance ) +{ + for ( int i = 0; i < 3; ++i ) + { + for ( int j = 0; j < 3; ++j ) + { + if ( fabs( src1[i][j] - src2[i][j] ) > flTolerance ) + return false; + } + } + return true; +} + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- +void MatrixBuildOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar ); +void MatrixBuildPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar ); +void MatrixBuildPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right ); +void MatrixBuildPerspectiveZRange( VMatrix& dst, double flZNear, double flZFar ); + +inline void MatrixOrtho( VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar ) +{ + VMatrix mat; + MatrixBuildOrtho( mat, left, top, right, bottom, zNear, zFar ); + + VMatrix temp; + MatrixMultiply( dst, mat, temp ); + dst = temp; +} + +inline void MatrixPerspectiveX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar ) +{ + VMatrix mat; + MatrixBuildPerspectiveX( mat, flFovX, flAspect, flZNear, flZFar ); + + VMatrix temp; + MatrixMultiply( dst, mat, temp ); + dst = temp; +} + +inline void MatrixPerspectiveOffCenterX( VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right ) +{ + VMatrix mat; + MatrixBuildPerspectiveOffCenterX( mat, flFovX, flAspect, flZNear, flZFar, bottom, top, left, right ); + + VMatrix temp; + MatrixMultiply( dst, mat, temp ); + dst = temp; +} + +#endif + + diff --git a/mp/src/public/mathlib/vplane.h b/mp/src/public/mathlib/vplane.h index 2c4441de..dd3d4a9a 100644 --- a/mp/src/public/mathlib/vplane.h +++ b/mp/src/public/mathlib/vplane.h @@ -1,182 +1,182 @@ -//========= Copyright Valve Corporation, All rights reserved. ============// -// -// Purpose: -// -// $Workfile: $ -// $Date: $ -// $NoKeywords: $ -//=============================================================================// - -#ifndef VPLANE_H -#define VPLANE_H - -#ifdef _WIN32 -#pragma once -#endif - -#include "mathlib/vector.h" - -typedef int SideType; - -// Used to represent sides of things like planes. -#define SIDE_FRONT 0 -#define SIDE_BACK 1 -#define SIDE_ON 2 - -#define VP_EPSILON 0.01f - - -class VPlane -{ -public: - VPlane(); - VPlane(const Vector &vNormal, vec_t dist); - - void Init(const Vector &vNormal, vec_t dist); - - // Return the distance from the point to the plane. - vec_t DistTo(const Vector &vVec) const; - - // Copy. - VPlane& operator=(const VPlane &thePlane); - - // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK. - // The epsilon for SIDE_ON can be passed in. - SideType GetPointSide(const Vector &vPoint, vec_t sideEpsilon=VP_EPSILON) const; - - // Returns SIDE_FRONT or SIDE_BACK. - SideType GetPointSideExact(const Vector &vPoint) const; - - // Classify the box with respect to the plane. - // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK - SideType BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const; - -#ifndef VECTOR_NO_SLOW_OPERATIONS - // Flip the plane. - VPlane Flip(); - - // Get a point on the plane (normal*dist). - Vector GetPointOnPlane() const; - - // Snap the specified point to the plane (along the plane's normal). - Vector SnapPointToPlane(const Vector &vPoint) const; -#endif - -public: - Vector m_Normal; - vec_t m_Dist; - -#ifdef VECTOR_NO_SLOW_OPERATIONS -private: - // No copy constructors allowed if we're in optimal mode - VPlane(const VPlane& vOther); -#endif -}; - - -//----------------------------------------------------------------------------- -// Inlines. -//----------------------------------------------------------------------------- -inline VPlane::VPlane() -{ -} - -inline VPlane::VPlane(const Vector &vNormal, vec_t dist) -{ - m_Normal = vNormal; - m_Dist = dist; -} - -inline void VPlane::Init(const Vector &vNormal, vec_t dist) -{ - m_Normal = vNormal; - m_Dist = dist; -} - -inline vec_t VPlane::DistTo(const Vector &vVec) const -{ - return vVec.Dot(m_Normal) - m_Dist; -} - -inline VPlane& VPlane::operator=(const VPlane &thePlane) -{ - m_Normal = thePlane.m_Normal; - m_Dist = thePlane.m_Dist; - return *this; -} - -#ifndef VECTOR_NO_SLOW_OPERATIONS - -inline VPlane VPlane::Flip() -{ - return VPlane(-m_Normal, -m_Dist); -} - -inline Vector VPlane::GetPointOnPlane() const -{ - return m_Normal * m_Dist; -} - -inline Vector VPlane::SnapPointToPlane(const Vector &vPoint) const -{ - return vPoint - m_Normal * DistTo(vPoint); -} - -#endif - -inline SideType VPlane::GetPointSide(const Vector &vPoint, vec_t sideEpsilon) const -{ - vec_t fDist; - - fDist = DistTo(vPoint); - if(fDist >= sideEpsilon) - return SIDE_FRONT; - else if(fDist <= -sideEpsilon) - return SIDE_BACK; - else - return SIDE_ON; -} - -inline SideType VPlane::GetPointSideExact(const Vector &vPoint) const -{ - return DistTo(vPoint) > 0.0f ? SIDE_FRONT : SIDE_BACK; -} - - -// BUGBUG: This should either simply use the implementation in mathlib or cease to exist. -// mathlib implementation is much more efficient. Check to see that VPlane isn't used in -// performance critical code. -inline SideType VPlane::BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const -{ - int i, firstSide, side; - TableVector vPoints[8] = - { - { vMin.x, vMin.y, vMin.z }, - { vMin.x, vMin.y, vMax.z }, - { vMin.x, vMax.y, vMax.z }, - { vMin.x, vMax.y, vMin.z }, - - { vMax.x, vMin.y, vMin.z }, - { vMax.x, vMin.y, vMax.z }, - { vMax.x, vMax.y, vMax.z }, - { vMax.x, vMax.y, vMin.z }, - }; - - firstSide = GetPointSideExact(vPoints[0]); - for(i=1; i < 8; i++) - { - side = GetPointSideExact(vPoints[i]); - - // Does the box cross the plane? - if(side != firstSide) - return SIDE_ON; - } - - // Ok, they're all on the same side, return that. - return firstSide; -} - - - - -#endif // VPLANE_H +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +// $Workfile: $ +// $Date: $ +// $NoKeywords: $ +//=============================================================================// + +#ifndef VPLANE_H +#define VPLANE_H + +#ifdef _WIN32 +#pragma once +#endif + +#include "mathlib/vector.h" + +typedef int SideType; + +// Used to represent sides of things like planes. +#define SIDE_FRONT 0 +#define SIDE_BACK 1 +#define SIDE_ON 2 + +#define VP_EPSILON 0.01f + + +class VPlane +{ +public: + VPlane(); + VPlane(const Vector &vNormal, vec_t dist); + + void Init(const Vector &vNormal, vec_t dist); + + // Return the distance from the point to the plane. + vec_t DistTo(const Vector &vVec) const; + + // Copy. + VPlane& operator=(const VPlane &thePlane); + + // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK. + // The epsilon for SIDE_ON can be passed in. + SideType GetPointSide(const Vector &vPoint, vec_t sideEpsilon=VP_EPSILON) const; + + // Returns SIDE_FRONT or SIDE_BACK. + SideType GetPointSideExact(const Vector &vPoint) const; + + // Classify the box with respect to the plane. + // Returns SIDE_ON, SIDE_FRONT, or SIDE_BACK + SideType BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const; + +#ifndef VECTOR_NO_SLOW_OPERATIONS + // Flip the plane. + VPlane Flip(); + + // Get a point on the plane (normal*dist). + Vector GetPointOnPlane() const; + + // Snap the specified point to the plane (along the plane's normal). + Vector SnapPointToPlane(const Vector &vPoint) const; +#endif + +public: + Vector m_Normal; + vec_t m_Dist; + +#ifdef VECTOR_NO_SLOW_OPERATIONS +private: + // No copy constructors allowed if we're in optimal mode + VPlane(const VPlane& vOther); +#endif +}; + + +//----------------------------------------------------------------------------- +// Inlines. +//----------------------------------------------------------------------------- +inline VPlane::VPlane() +{ +} + +inline VPlane::VPlane(const Vector &vNormal, vec_t dist) +{ + m_Normal = vNormal; + m_Dist = dist; +} + +inline void VPlane::Init(const Vector &vNormal, vec_t dist) +{ + m_Normal = vNormal; + m_Dist = dist; +} + +inline vec_t VPlane::DistTo(const Vector &vVec) const +{ + return vVec.Dot(m_Normal) - m_Dist; +} + +inline VPlane& VPlane::operator=(const VPlane &thePlane) +{ + m_Normal = thePlane.m_Normal; + m_Dist = thePlane.m_Dist; + return *this; +} + +#ifndef VECTOR_NO_SLOW_OPERATIONS + +inline VPlane VPlane::Flip() +{ + return VPlane(-m_Normal, -m_Dist); +} + +inline Vector VPlane::GetPointOnPlane() const +{ + return m_Normal * m_Dist; +} + +inline Vector VPlane::SnapPointToPlane(const Vector &vPoint) const +{ + return vPoint - m_Normal * DistTo(vPoint); +} + +#endif + +inline SideType VPlane::GetPointSide(const Vector &vPoint, vec_t sideEpsilon) const +{ + vec_t fDist; + + fDist = DistTo(vPoint); + if(fDist >= sideEpsilon) + return SIDE_FRONT; + else if(fDist <= -sideEpsilon) + return SIDE_BACK; + else + return SIDE_ON; +} + +inline SideType VPlane::GetPointSideExact(const Vector &vPoint) const +{ + return DistTo(vPoint) > 0.0f ? SIDE_FRONT : SIDE_BACK; +} + + +// BUGBUG: This should either simply use the implementation in mathlib or cease to exist. +// mathlib implementation is much more efficient. Check to see that VPlane isn't used in +// performance critical code. +inline SideType VPlane::BoxOnPlaneSide(const Vector &vMin, const Vector &vMax) const +{ + int i, firstSide, side; + TableVector vPoints[8] = + { + { vMin.x, vMin.y, vMin.z }, + { vMin.x, vMin.y, vMax.z }, + { vMin.x, vMax.y, vMax.z }, + { vMin.x, vMax.y, vMin.z }, + + { vMax.x, vMin.y, vMin.z }, + { vMax.x, vMin.y, vMax.z }, + { vMax.x, vMax.y, vMax.z }, + { vMax.x, vMax.y, vMin.z }, + }; + + firstSide = GetPointSideExact(vPoints[0]); + for(i=1; i < 8; i++) + { + side = GetPointSideExact(vPoints[i]); + + // Does the box cross the plane? + if(side != firstSide) + return SIDE_ON; + } + + // Ok, they're all on the same side, return that. + return firstSide; +} + + + + +#endif // VPLANE_H -- cgit v1.2.3