diff options
| author | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:31:46 -0800 |
|---|---|---|
| committer | Jørgen P. Tjernø <[email protected]> | 2013-12-02 19:46:31 -0800 |
| commit | f56bb35301836e56582a575a75864392a0177875 (patch) | |
| tree | de61ddd39de3e7df52759711950b4c288592f0dc /sp/src/public/mathlib/amd3dx.h | |
| parent | Mark some more files as text. (diff) | |
| download | source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.tar.xz source-sdk-2013-f56bb35301836e56582a575a75864392a0177875.zip | |
Fix line endings. WHAMMY.
Diffstat (limited to 'sp/src/public/mathlib/amd3dx.h')
| -rw-r--r-- | sp/src/public/mathlib/amd3dx.h | 2376 |
1 files changed, 1188 insertions, 1188 deletions
diff --git a/sp/src/public/mathlib/amd3dx.h b/sp/src/public/mathlib/amd3dx.h index 05eb663e..9dab1bfd 100644 --- a/sp/src/public/mathlib/amd3dx.h +++ b/sp/src/public/mathlib/amd3dx.h @@ -1,1188 +1,1188 @@ -//========= Copyright Valve Corporation, All rights reserved. ============//
-/******************************************************************************
-
- Copyright (c) 1999 Advanced Micro Devices, Inc.
-
- LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY
- EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY,
- NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY
- PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY
- DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS,
- BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR
- INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY
- OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION
- OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY
- NOT APPLY TO YOU.
-
- AMD does not assume any responsibility for any errors which may appear in the
- Materials nor any responsibility to support or update the Materials. AMD retains
- the right to make changes to its test specifications at any time, without notice.
-
- NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any
- further information, software, technical information, know-how, or show-how
- available to you.
-
- So that all may benefit from your experience, please report any problems
- or suggestions about this software to [email protected]
-
- AMD Developer Technologies, M/S 585
- Advanced Micro Devices, Inc.
- 5900 E. Ben White Blvd.
- Austin, TX 78741
-
-*******************************************************************************
-
- AMD3DX.H
-
- MACRO FORMAT
- ============
- This file contains inline assembly macros that
- generate AMD-3D instructions in binary format.
- Therefore, C or C++ programmer can use AMD-3D instructions
- without any penalty in their C or C++ source code.
-
- The macro's name and format conventions are as follow:
-
-
- 1. First argument of macro is a destination and
- second argument is a source operand.
- ex) _asm PFCMPEQ (mm3, mm4)
- | |
- dst src
-
- 2. The destination operand can be m0 to m7 only.
- The source operand can be any one of the register
- m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi
- that contains effective address.
- ex) _asm PFRCP (MM7, MM6)
- ex) _asm PFRCPIT2 (mm0, mm4)
- ex) _asm PFMUL (mm3, _edi)
-
- 3. The prefetch(w) takes one src operand _eax, ecx, _edx,
- _ebx, _esi, or _edi that contains effective address.
- ex) _asm PREFETCH (_edi)
-
- For WATCOM C/C++ users, when using #pragma aux instead if
- _asm, all macro names should be prefixed by a p_ or P_.
- Macros should not be enclosed in quotes.
- ex) p_pfrcp (MM7,MM6)
-
- NOTE: Not all instruction macros, nor all possible
- combinations of operands have been explicitely
- tested. If any errors are found, please report
- them.
-
- EXAMPLE
- =======
- Following program doesn't do anything but it shows you
- how to use inline assembly AMD-3D instructions in C.
- Note that this will only work in flat memory model which
- segment registers cs, ds, ss and es point to the same
- linear address space total less than 4GB.
-
- Used Microsoft VC++ 5.0
-
- #include <stdio.h>
- #include "amd3d.h"
-
- void main ()
- {
- float x = (float)1.25;
- float y = (float)1.25;
- float z, zz;
-
- _asm {
- movd mm1, x
- movd mm2, y
- pfmul (mm1, mm2)
- movd z, mm1
- femms
- }
-
- printf ("value of z = %f\n", z);
-
- //
- // Demonstration of using the memory instead of
- // multimedia register
- //
- _asm {
- movd mm3, x
- lea esi, y // load effective address of y
- pfmul (mm3, _esi)
- movd zz, mm3
- femms
- }
-
- printf ("value of zz = %f\n", zz);
- }
-
- #pragma aux EXAMPLE with WATCOM C/C++ v11.x
- ===========================================
-
- extern void Add(float *__Dest, float *__A, float *__B);
- #pragma aux Add = \
- p_femms \
- "movd mm6,[esi]" \
- p_pfadd(mm6,_edi) \
- "movd [ebx],mm6" \
- p_femms \
- parm [ebx] [esi] [edi];
-
-*******************************************************************************/
-
-#ifndef _K3DMACROSINCLUDED_
-#define _K3DMACROSINCLUDED_
-
-#if defined (__WATCOMC__)
-
-// The WATCOM C/C++ version of the 3DNow! macros.
-//
-// The older, compbined register style for WATCOM C/C++ macros is not
-// supported.
-
-/* Operand defines for instructions two operands */
-#define _k3d_mm0_mm0 0xc0
-#define _k3d_mm0_mm1 0xc1
-#define _k3d_mm0_mm2 0xc2
-#define _k3d_mm0_mm3 0xc3
-#define _k3d_mm0_mm4 0xc4
-#define _k3d_mm0_mm5 0xc5
-#define _k3d_mm0_mm6 0xc6
-#define _k3d_mm0_mm7 0xc7
-#define _k3d_mm0_eax 0x00
-#define _k3d_mm0_ecx 0x01
-#define _k3d_mm0_edx 0x02
-#define _k3d_mm0_ebx 0x03
-#define _k3d_mm0_esi 0x06
-#define _k3d_mm0_edi 0x07
-#define _k3d_mm1_mm0 0xc8
-#define _k3d_mm1_mm1 0xc9
-#define _k3d_mm1_mm2 0xca
-#define _k3d_mm1_mm3 0xcb
-#define _k3d_mm1_mm4 0xcc
-#define _k3d_mm1_mm5 0xcd
-#define _k3d_mm1_mm6 0xce
-#define _k3d_mm1_mm7 0xcf
-#define _k3d_mm1_eax 0x08
-#define _k3d_mm1_ecx 0x09
-#define _k3d_mm1_edx 0x0a
-#define _k3d_mm1_ebx 0x0b
-#define _k3d_mm1_esi 0x0e
-#define _k3d_mm1_edi 0x0f
-#define _k3d_mm2_mm0 0xd0
-#define _k3d_mm2_mm1 0xd1
-#define _k3d_mm2_mm2 0xd2
-#define _k3d_mm2_mm3 0xd3
-#define _k3d_mm2_mm4 0xd4
-#define _k3d_mm2_mm5 0xd5
-#define _k3d_mm2_mm6 0xd6
-#define _k3d_mm2_mm7 0xd7
-#define _k3d_mm2_eax 0x10
-#define _k3d_mm2_ecx 0x11
-#define _k3d_mm2_edx 0x12
-#define _k3d_mm2_ebx 0x13
-#define _k3d_mm2_esi 0x16
-#define _k3d_mm2_edi 0x17
-#define _k3d_mm3_mm0 0xd8
-#define _k3d_mm3_mm1 0xd9
-#define _k3d_mm3_mm2 0xda
-#define _k3d_mm3_mm3 0xdb
-#define _k3d_mm3_mm4 0xdc
-#define _k3d_mm3_mm5 0xdd
-#define _k3d_mm3_mm6 0xde
-#define _k3d_mm3_mm7 0xdf
-#define _k3d_mm3_eax 0x18
-#define _k3d_mm3_ecx 0x19
-#define _k3d_mm3_edx 0x1a
-#define _k3d_mm3_ebx 0x1b
-#define _k3d_mm3_esi 0x1e
-#define _k3d_mm3_edi 0x1f
-#define _k3d_mm4_mm0 0xe0
-#define _k3d_mm4_mm1 0xe1
-#define _k3d_mm4_mm2 0xe2
-#define _k3d_mm4_mm3 0xe3
-#define _k3d_mm4_mm4 0xe4
-#define _k3d_mm4_mm5 0xe5
-#define _k3d_mm4_mm6 0xe6
-#define _k3d_mm4_mm7 0xe7
-#define _k3d_mm4_eax 0x20
-#define _k3d_mm4_ecx 0x21
-#define _k3d_mm4_edx 0x22
-#define _k3d_mm4_ebx 0x23
-#define _k3d_mm4_esi 0x26
-#define _k3d_mm4_edi 0x27
-#define _k3d_mm5_mm0 0xe8
-#define _k3d_mm5_mm1 0xe9
-#define _k3d_mm5_mm2 0xea
-#define _k3d_mm5_mm3 0xeb
-#define _k3d_mm5_mm4 0xec
-#define _k3d_mm5_mm5 0xed
-#define _k3d_mm5_mm6 0xee
-#define _k3d_mm5_mm7 0xef
-#define _k3d_mm5_eax 0x28
-#define _k3d_mm5_ecx 0x29
-#define _k3d_mm5_edx 0x2a
-#define _k3d_mm5_ebx 0x2b
-#define _k3d_mm5_esi 0x2e
-#define _k3d_mm5_edi 0x2f
-#define _k3d_mm6_mm0 0xf0
-#define _k3d_mm6_mm1 0xf1
-#define _k3d_mm6_mm2 0xf2
-#define _k3d_mm6_mm3 0xf3
-#define _k3d_mm6_mm4 0xf4
-#define _k3d_mm6_mm5 0xf5
-#define _k3d_mm6_mm6 0xf6
-#define _k3d_mm6_mm7 0xf7
-#define _k3d_mm6_eax 0x30
-#define _k3d_mm6_ecx 0x31
-#define _k3d_mm6_edx 0x32
-#define _k3d_mm6_ebx 0x33
-#define _k3d_mm6_esi 0x36
-#define _k3d_mm6_edi 0x37
-#define _k3d_mm7_mm0 0xf8
-#define _k3d_mm7_mm1 0xf9
-#define _k3d_mm7_mm2 0xfa
-#define _k3d_mm7_mm3 0xfb
-#define _k3d_mm7_mm4 0xfc
-#define _k3d_mm7_mm5 0xfd
-#define _k3d_mm7_mm6 0xfe
-#define _k3d_mm7_mm7 0xff
-#define _k3d_mm7_eax 0x38
-#define _k3d_mm7_ecx 0x39
-#define _k3d_mm7_edx 0x3a
-#define _k3d_mm7_ebx 0x3b
-#define _k3d_mm7_esi 0x3e
-#define _k3d_mm7_edi 0x3f
-
-#define _k3d_name_xlat_m0 _mm0
-#define _k3d_name_xlat_m1 _mm1
-#define _k3d_name_xlat_m2 _mm2
-#define _k3d_name_xlat_m3 _mm3
-#define _k3d_name_xlat_m4 _mm4
-#define _k3d_name_xlat_m5 _mm5
-#define _k3d_name_xlat_m6 _mm6
-#define _k3d_name_xlat_m7 _mm7
-#define _k3d_name_xlat_M0 _mm0
-#define _k3d_name_xlat_M1 _mm1
-#define _k3d_name_xlat_M2 _mm2
-#define _k3d_name_xlat_M3 _mm3
-#define _k3d_name_xlat_M4 _mm4
-#define _k3d_name_xlat_M5 _mm5
-#define _k3d_name_xlat_M6 _mm6
-#define _k3d_name_xlat_M7 _mm7
-#define _k3d_name_xlat_mm0 _mm0
-#define _k3d_name_xlat_mm1 _mm1
-#define _k3d_name_xlat_mm2 _mm2
-#define _k3d_name_xlat_mm3 _mm3
-#define _k3d_name_xlat_mm4 _mm4
-#define _k3d_name_xlat_mm5 _mm5
-#define _k3d_name_xlat_mm6 _mm6
-#define _k3d_name_xlat_mm7 _mm7
-#define _k3d_name_xlat_MM0 _mm0
-#define _k3d_name_xlat_MM1 _mm1
-#define _k3d_name_xlat_MM2 _mm2
-#define _k3d_name_xlat_MM3 _mm3
-#define _k3d_name_xlat_MM4 _mm4
-#define _k3d_name_xlat_MM5 _mm5
-#define _k3d_name_xlat_MM6 _mm6
-#define _k3d_name_xlat_MM7 _mm7
-#define _k3d_name_xlat_eax _eax
-#define _k3d_name_xlat_ebx _ebx
-#define _k3d_name_xlat_ecx _ecx
-#define _k3d_name_xlat_edx _edx
-#define _k3d_name_xlat_esi _esi
-#define _k3d_name_xlat_edi _edi
-#define _k3d_name_xlat_ebp _ebp
-#define _k3d_name_xlat_EAX _eax
-#define _k3d_name_xlat_EBX _ebx
-#define _k3d_name_xlat_ECX _ecx
-#define _k3d_name_xlat_EDX _edx
-#define _k3d_name_xlat_ESI _esi
-#define _k3d_name_xlat_EDI _edi
-#define _k3d_name_xlat_EBP _ebp
-#define _k3d_name_xlat__eax _eax
-#define _k3d_name_xlat__ebx _ebx
-#define _k3d_name_xlat__ecx _ecx
-#define _k3d_name_xlat__edx _edx
-#define _k3d_name_xlat__esi _esi
-#define _k3d_name_xlat__edi _edi
-#define _k3d_name_xlat__ebp _ebp
-#define _k3d_name_xlat__EAX _eax
-#define _k3d_name_xlat__EBX _ebx
-#define _k3d_name_xlat__ECX _ecx
-#define _k3d_name_xlat__EDX _edx
-#define _k3d_name_xlat__ESI _esi
-#define _k3d_name_xlat__EDI _edi
-#define _k3d_name_xlat__EBP _ebp
-
-#define _k3d_xglue3(a,b,c) a##b##c
-#define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c)
-#define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src)
-
-/* Operand defines for prefetch and prefetchw */
-
-#define _k3d_pref_eax 0x00
-#define _k3d_pref_ecx 0x01
-#define _k3d_pref_edx 0x02
-#define _k3d_pref_ebx 0x03
-#define _k3d_pref_esi 0x06
-#define _k3d_pref_edi 0x07
-#define _k3d_pref_EAX 0x00
-#define _k3d_pref_ECX 0x01
-#define _k3d_pref_EDX 0x02
-#define _k3d_pref_EBX 0x03
-#define _k3d_pref_ESI 0x06
-#define _k3d_pref_EDI 0x07
-#define _k3d_prefw_eax 0x08
-#define _k3d_prefw_ecx 0x09
-#define _k3d_prefw_edx 0x0A
-#define _k3d_prefw_ebx 0x0B
-#define _k3d_prefw_esi 0x0E
-#define _k3d_prefw_edi 0x0F
-#define _k3d_prefw_EAX 0x08
-#define _k3d_prefw_ECX 0x09
-#define _k3d_prefw_EDX 0x0A
-#define _k3d_prefw_EBX 0x0B
-#define _k3d_prefw_ESI 0x0E
-#define _k3d_prefw_EDI 0x0F
-
-/* Defines for 3DNow! instructions */
-#define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d
-#define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae
-#define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e
-#define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0
-#define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90
-#define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0
-#define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4
-#define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94
-#define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4
-#define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96
-#define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6
-#define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6
-#define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97
-#define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7
-#define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a
-#define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa
-#define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d
-#define FEMMS db 0x0f, 0x0e
-#define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf
-#define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7
-#define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src
-#define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src
-#define CPUID db 0x0f, 0xa2
-
-/* Defines for new, K7 opcodes */
-#define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a
-#define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e
-#define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb
-#define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src)
-#define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src)
-#define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src)
-#define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src)
-#define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src)
-#define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src)
-#define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src)
-#define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src)
-#define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src)
-#define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src)
-#define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk
-#define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk
-#define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk
-#define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst)
-#define SFENCE db 0x0f, 0xae, 0xf8
-
-/* Memory/offset versions of the opcodes */
-#define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d
-#define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae
-#define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e
-#define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0
-#define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90
-#define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0
-#define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4
-#define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94
-#define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4
-#define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96
-#define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6
-#define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6
-#define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97
-#define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7
-#define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a
-#define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa
-#define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d
-#define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf
-#define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7
-
-
-/* Memory/offset versions of the new, K7 opcodes */
-#define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a
-#define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e
-#define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb
-#define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off
-#define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off
-#define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off
-#define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off
-#define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off
-#define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off
-#define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off
-#define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off
-#define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off
-#define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off
-#define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off
-#define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk
-#define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk
-
-
-/* Defines for 3DNow! instructions for use in pragmas */
-#define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d
-#define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae
-#define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e
-#define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0
-#define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90
-#define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0
-#define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4
-#define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94
-#define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4
-#define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96
-#define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6
-#define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6
-#define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97
-#define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7
-#define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a
-#define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa
-#define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d
-#define p_femms 0x0f 0x0e
-#define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf
-#define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7
-#define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src
-#define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src
-#define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
-#define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
-#define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
-#define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
-
-#define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d
-#define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae
-#define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e
-#define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0
-#define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90
-#define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0
-#define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4
-#define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94
-#define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4
-#define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96
-#define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6
-#define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6
-#define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97
-#define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7
-#define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a
-#define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa
-#define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d
-#define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf
-#define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7
-#define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a
-#define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e
-#define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb
-#define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off
-#define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off
-#define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk
-#define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk
-
-
-#define P_PF2ID(dst,src) p_pf2id(dst,src)
-#define P_PFACC(dst,src) p_pfacc(dst,src)
-#define P_PFADD(dst,src) p_pfadd(dst,src)
-#define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src)
-#define P_PFCMPGE(dst,src) p_pfcmpge(dst,src)
-#define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src)
-#define P_PFMAX(dst,src) p_pfmax(dst,src)
-#define P_PFMIN(dst,src) p_pfmin(dst,src)
-#define P_PFMUL(dst,src) p_pfmul(dst,src)
-#define P_PFRCP(dst,src) p_pfrcp(dst,src)
-#define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src)
-#define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src)
-#define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src)
-#define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src)
-#define P_PFSUB(dst,src) p_pfsub(dst,src)
-#define P_PFSUBR(dst,src) p_pfsubr(dst,src)
-#define P_PI2FD(dst,src) p_pi2fd(dst,src)
-#define P_FEMMS p_femms
-#define P_PAVGUSB(dst,src) p_pavgusb(dst,src)
-#define P_PMULHRW(dst,src) p_pmulhrw(dst,src)
-#define P_PREFETCH(src) p_prefetch(src)
-#define P_PREFETCHW(src) p_prefetchw(src)
-#define p_CPUID 0x0f 0xa2
-#define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off)
-#define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off)
-#define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off)
-#define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off)
-#define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off)
-#define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off)
-#define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off)
-#define p_pfminm(dst,src,off) P_PFMINM(dst,src,off)
-#define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off)
-#define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off)
-#define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off)
-#define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off)
-#define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off)
-#define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off)
-#define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off)
-#define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off)
-#define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off)
-#define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off)
-#define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off)
-
-#define P_PFNACC(dst,src) p_pfnacc(dst,src)
-#define P_FPPNACC(dst,src) p_pfpnacc(dst,src)
-#define P_PSWAPD(dst,src) p_pswapd(dst,src)
-#define P_PMINUB(dst,src) p_pminub(dst,src)
-#define P_PMAXUB(dst,src) p_pmaxub(dst,src)
-#define P_PMINSW(dst,src) p_pminsw(dst,src)
-#define P_PMAXSW(dst,src) p_pmaxsw(dst,src)
-#define P_PMULHUW(dst,src) p_pmulhuw(dst,src)
-#define P_PAVGB(dst,src) p_pavgb(dst,src)
-#define P_PAVGW(dst,src) p_avgw(dst,src)
-#define P_PSADBW(dst,src) p_psadbw(dst,src)
-#define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src)
-#define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src)
-#define P_PINSRW(dst,src,msk) p_pinsrw(dst,src)
-#define P_PEXTRW(dst,src,msk) p_pextrw(dst,src)
-#define P_PSHUFW(dst,src,msk) p_pshufw(dst,src)
-#define P_MOVNTQ(dst,src) p_movntq(dst,src)
-
-#define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off)
-#define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off)
-#define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off)
-#define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off)
-#define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off)
-#define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off)
-#define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off)
-#define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off)
-#define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off)
-#define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off)
-#define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off)
-#define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off)
-#define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off)
-#define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk)
-#define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk)
-#define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off)
-
-#elif defined (_MSC_VER) && !defined (__MWERKS__)
-// The Microsoft Visual C++ version of the 3DNow! macros.
-
-// Stop the "no EMMS" warning, since it doesn't detect FEMMS properly
-#pragma warning(disable:4799)
-
-// Defines for operands.
-#define _K3D_MM0 0xc0
-#define _K3D_MM1 0xc1
-#define _K3D_MM2 0xc2
-#define _K3D_MM3 0xc3
-#define _K3D_MM4 0xc4
-#define _K3D_MM5 0xc5
-#define _K3D_MM6 0xc6
-#define _K3D_MM7 0xc7
-#define _K3D_mm0 0xc0
-#define _K3D_mm1 0xc1
-#define _K3D_mm2 0xc2
-#define _K3D_mm3 0xc3
-#define _K3D_mm4 0xc4
-#define _K3D_mm5 0xc5
-#define _K3D_mm6 0xc6
-#define _K3D_mm7 0xc7
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-
-// These defines are for compatibility with the previous version of the header file.
-#define _K3D_M0 0xc0
-#define _K3D_M1 0xc1
-#define _K3D_M2 0xc2
-#define _K3D_M3 0xc3
-#define _K3D_M4 0xc4
-#define _K3D_M5 0xc5
-#define _K3D_M6 0xc6
-#define _K3D_M7 0xc7
-#define _K3D_m0 0xc0
-#define _K3D_m1 0xc1
-#define _K3D_m2 0xc2
-#define _K3D_m3 0xc3
-#define _K3D_m4 0xc4
-#define _K3D_m5 0xc5
-#define _K3D_m6 0xc6
-#define _K3D_m7 0xc7
-#define _K3D__EAX 0x00
-#define _K3D__ECX 0x01
-#define _K3D__EDX 0x02
-#define _K3D__EBX 0x03
-#define _K3D__ESI 0x06
-#define _K3D__EDI 0x07
-#define _K3D__eax 0x00
-#define _K3D__ecx 0x01
-#define _K3D__edx 0x02
-#define _K3D__ebx 0x03
-#define _K3D__esi 0x06
-#define _K3D__edi 0x07
-
-// General 3DNow! instruction format that is supported by
-// these macros. Note that only the most basic form of memory
-// operands are supported by these macros.
-
-#define InjK3DOps(dst,src,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0f \
- _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
- _asm _emit _3DNowOpcode##inst \
-}
-
-#define InjK3DMOps(dst,src,off,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0f \
- _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
- _asm _emit off \
- _asm _emit _3DNowOpcode##inst \
-}
-
-#define InjMMXOps(dst,src,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit _3DNowOpcode##inst \
- _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \
-}
-
-#define InjMMXMOps(dst,src,off,inst) \
-{ \
- _asm _emit 0x0f \
- _asm _emit _3DNowOpcode##inst \
- _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \
- _asm _emit off \
-}
-
-#define _3DNowOpcodePF2ID 0x1d
-#define _3DNowOpcodePFACC 0xae
-#define _3DNowOpcodePFADD 0x9e
-#define _3DNowOpcodePFCMPEQ 0xb0
-#define _3DNowOpcodePFCMPGE 0x90
-#define _3DNowOpcodePFCMPGT 0xa0
-#define _3DNowOpcodePFMAX 0xa4
-#define _3DNowOpcodePFMIN 0x94
-#define _3DNowOpcodePFMUL 0xb4
-#define _3DNowOpcodePFRCP 0x96
-#define _3DNowOpcodePFRCPIT1 0xa6
-#define _3DNowOpcodePFRCPIT2 0xb6
-#define _3DNowOpcodePFRSQRT 0x97
-#define _3DNowOpcodePFRSQIT1 0xa7
-#define _3DNowOpcodePFSUB 0x9a
-#define _3DNowOpcodePFSUBR 0xaa
-#define _3DNowOpcodePI2FD 0x0d
-#define _3DNowOpcodePAVGUSB 0xbf
-#define _3DNowOpcodePMULHRW 0xb7
-#define _3DNowOpcodePFNACC 0x8a
-#define _3DNowOpcodeFPPNACC 0x8e
-#define _3DNowOpcodePSWAPD 0xbb
-#define _3DNowOpcodePMINUB 0xda
-#define _3DNowOpcodePMAXUB 0xde
-#define _3DNowOpcodePMINSW 0xea
-#define _3DNowOpcodePMAXSW 0xee
-#define _3DNowOpcodePMULHUW 0xe4
-#define _3DNowOpcodePAVGB 0xe0
-#define _3DNowOpcodePAVGW 0xe3
-#define _3DNowOpcodePSADBW 0xf6
-#define _3DNowOpcodePMOVMSKB 0xd7
-#define _3DNowOpcodePMASKMOVQ 0xf7
-#define _3DNowOpcodePINSRW 0xc4
-#define _3DNowOpcodePEXTRW 0xc5
-#define _3DNowOpcodePSHUFW 0x70
-#define _3DNowOpcodeMOVNTQ 0xe7
-#define _3DNowOpcodePREFETCHT 0x18
-
-
-#define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID)
-#define PFACC(dst,src) InjK3DOps(dst, src, PFACC)
-#define PFADD(dst,src) InjK3DOps(dst, src, PFADD)
-#define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ)
-#define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE)
-#define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT)
-#define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX)
-#define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN)
-#define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL)
-#define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP)
-#define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1)
-#define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2)
-#define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT)
-#define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1)
-#define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB)
-#define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR)
-#define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD)
-#define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB)
-#define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW)
-
-#define FEMMS \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0e \
-}
-
-#define PREFETCH(src) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (_K3D_##src & 0x07) \
-}
-
-/* Prefetch with a short offset, < 127 or > -127
- Carefull! Doesn't check for your offset being
- in range. */
-
-#define PREFETCHM(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x40 | (_K3D_##src & 0x07)) \
- _asm _emit off \
-}
-
-/* Prefetch with a long offset */
-
-#define PREFETCHMLONG(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x80 | (_K3D_##src & 0x07)) \
- _asm _emit (off & 0x000000ff) \
- _asm _emit (off & 0x0000ff00) >> 8 \
- _asm _emit (off & 0x00ff0000) >> 16 \
- _asm _emit (off & 0xff000000) >> 24 \
-}
-
-#define PREFETCHW(src) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit (0x08 | (_K3D_##src & 0x07)) \
-}
-
-#define PREFETCHWM(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit 0x48 | (_K3D_##src & 0x07) \
- _asm _emit off \
-}
-
-#define PREFETCHWMLONG(src,off) \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0x0d \
- _asm _emit 0x88 | (_K3D_##src & 0x07) \
- _asm _emit (off & 0x000000ff) \
- _asm _emit (off & 0x0000ff00) >> 8 \
- _asm _emit (off & 0x00ff0000) >> 16 \
- _asm _emit (off & 0xff000000) >> 24 \
-}
-
-#define CPUID \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0xa2 \
-}
-
-
-/* Defines for new, K7 opcodes */
-#define SFENCE \
-{ \
- _asm _emit 0x0f \
- _asm _emit 0xae \
- _asm _emit 0xf8 \
-}
-
-#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
-#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
-#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
-#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
-#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
-#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
-#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
-#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
-#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
-#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
-#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
-#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
-#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
-#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk
-#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk
-#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk
-#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
-#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
-#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
-#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
-#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
-
-
-/* Memory/offset versions of the opcodes */
-#define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB)
-#define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID)
-#define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC)
-#define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD)
-#define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ)
-#define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE)
-#define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT)
-#define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX)
-#define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN)
-#define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL)
-#define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP)
-#define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1)
-#define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2)
-#define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT)
-#define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1)
-#define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB)
-#define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR)
-#define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD)
-#define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW)
-
-
-/* Memory/offset versions of the K7 opcodes */
-#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
-#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
-#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
-#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
-#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
-#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
-#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
-#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
-#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
-#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
-#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
-#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
-#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
-#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk
-#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk
-#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
-#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
-#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
-#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
-#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
-
-
-#else
-
-/* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */
-#define PAVGUSB(dst,src) pavgusb dst,src
-#define PF2ID(dst,src) pf2id dst,src
-#define PFACC(dst,src) pfacc dst,src
-#define PFADD(dst,src) pfadd dst,src
-#define PFCMPEQ(dst,src) pfcmpeq dst,src
-#define PFCMPGE(dst,src) pfcmpge dst,src
-#define PFCMPGT(dst,src) pfcmpgt dst,src
-#define PFMAX(dst,src) pfmax dst,src
-#define PFMIN(dst,src) pfmin dst,src
-#define PFMUL(dst,src) pfmul dst,src
-#define PFRCP(dst,src) pfrcp dst,src
-#define PFRCPIT1(dst,src) pfrcpit1 dst,src
-#define PFRCPIT2(dst,src) pfrcpit2 dst,src
-#define PFRSQRT(dst,src) pfrsqrt dst,src
-#define PFRSQIT1(dst,src) pfrsqit1 dst,src
-#define PFSUB(dst,src) pfsub dst,src
-#define PFSUBR(dst,src) pfsubr dst,src
-#define PI2FD(dst,src) pi2fd dst,src
-#define PMULHRW(dst,src) pmulhrw dst,src
-#define PREFETCH(src) prefetch src
-#define PREFETCHW(src) prefetchw src
-
-#define PAVGUSBM(dst,src,off) pavgusb dst,[src+off]
-#define PF2IDM(dst,src,off) PF2ID dst,[src+off]
-#define PFACCM(dst,src,off) PFACC dst,[src+off]
-#define PFADDM(dst,src,off) PFADD dst,[src+off]
-#define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off]
-#define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off]
-#define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off]
-#define PFMAXM(dst,src,off) PFMAX dst,[src+off]
-#define PFMINM(dst,src,off) PFMIN dst,[src+off]
-#define PFMULM(dst,src,off) PFMUL dst,[src+off]
-#define PFRCPM(dst,src,off) PFRCP dst,[src+off]
-#define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off]
-#define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off]
-#define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off]
-#define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off]
-#define PFSUBM(dst,src,off) PFSUB dst,[src+off]
-#define PFSUBRM(dst,src,off) PFSUBR dst,[src+off]
-#define PI2FDM(dst,src,off) PI2FD dst,[src+off]
-#define PMULHRWM(dst,src,off) PMULHRW dst,[src+off]
-
-
-#if defined (__MWERKS__)
-// At the moment, CodeWarrior does not support these opcodes, so hand-assemble them
-
-// Defines for operands.
-#define _K3D_MM0 0xc0
-#define _K3D_MM1 0xc1
-#define _K3D_MM2 0xc2
-#define _K3D_MM3 0xc3
-#define _K3D_MM4 0xc4
-#define _K3D_MM5 0xc5
-#define _K3D_MM6 0xc6
-#define _K3D_MM7 0xc7
-#define _K3D_mm0 0xc0
-#define _K3D_mm1 0xc1
-#define _K3D_mm2 0xc2
-#define _K3D_mm3 0xc3
-#define _K3D_mm4 0xc4
-#define _K3D_mm5 0xc5
-#define _K3D_mm6 0xc6
-#define _K3D_mm7 0xc7
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-#define _K3D_EAX 0x00
-#define _K3D_ECX 0x01
-#define _K3D_EDX 0x02
-#define _K3D_EBX 0x03
-#define _K3D_ESI 0x06
-#define _K3D_EDI 0x07
-#define _K3D_eax 0x00
-#define _K3D_ecx 0x01
-#define _K3D_edx 0x02
-#define _K3D_ebx 0x03
-#define _K3D_esi 0x06
-#define _K3D_edi 0x07
-
-#define InjK3DOps(dst,src,inst) \
- db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst
-
-#define InjK3DMOps(dst,src,off,inst) \
- db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst
-
-#define InjMMXOps(dst,src,inst) \
- db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src)
-
-#define InjMMXMOps(dst,src,off,inst) \
- db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off
-
-#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC)
-#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC)
-#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD)
-#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB)
-#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB)
-#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW)
-#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW)
-#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW)
-#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB)
-#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW)
-#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW)
-#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB)
-#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ)
-#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk
-#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk
-#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk
-#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ)
-#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT)
-#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT)
-#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT)
-#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT)
-
-
-/* Memory/offset versions of the K7 opcodes */
-#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC)
-#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC)
-#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD)
-#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB)
-#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB)
-#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW)
-#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW)
-#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW)
-#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB)
-#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW)
-#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW)
-#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB)
-#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ)
-#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk
-#define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk
-#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk
-#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ)
-#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT)
-#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT)
-#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT)
-#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT)
-
-
-#else
-
-#define PFNACC(dst,src) PFNACC dst,src
-#define PFPNACC(dst,src) PFPNACC dst,src
-#define PSWAPD(dst,src) PSWAPD dst,src
-#define PMINUB(dst,src) PMINUB dst,src
-#define PMAXUB(dst,src) PMAXUB dst,src
-#define PMINSW(dst,src) PMINSW dst,src
-#define PMAXSW(dst,src) PMAXSW dst,src
-#define PMULHUW(dst,src) PMULHUW dst,src
-#define PAVGB(dst,src) PAVGB dst,src
-#define PAVGW(dst,src) PAVGW dst,src
-#define PSADBW(dst,src) PSADBW dst,src
-#define PMOVMSKB(dst,src) PMOVMSKB dst,src
-#define PMASKMOVQ(dst,src) PMASKMOVQ dst,src
-#define PINSRW(dst,src,msk) PINSRW dst,src,msk
-#define PEXTRW(dst,src,msk) PEXTRW dst,src,msk
-#define PSHUFW(dst,src,msk) PSHUFW dst,src,msk
-#define MOVNTQ(dst,src) MOVNTQ dst,src
-
-#define PFNACCM(dst,src,off) PFNACC dst,[src+off]
-#define PFPNACCM(dst,src,off) PFPNACC dst,[src+off]
-#define PSWAPDM(dst,src,off) PSWAPD dst,[src+off]
-#define PMINUBM(dst,src,off) PMINUB dst,[src+off]
-#define PMAXUBM(dst,src,off) PMAXUB dst,[src+off]
-#define PMINSWM(dst,src,off) PMINSW dst,[src+off]
-#define PMAXSWM(dst,src,off) PMAXSW dst,[src+off]
-#define PMULHUWM(dst,src,off) PMULHUW dst,[src+off]
-#define PAVGBM(dst,src,off) PAVGB dst,[src+off]
-#define PAVGWM(dst,src,off) PAVGW dst,[src+off]
-#define PSADBWM(dst,src,off) PSADBW dst,[src+off]
-#define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off]
-#define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off]
-#define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk
-#define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk
-#define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk
-#define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off]
-
-#endif
-
-#endif
-
-/* Just to deal with lower case. */
-#define pf2id(dst,src) PF2ID(dst,src)
-#define pfacc(dst,src) PFACC(dst,src)
-#define pfadd(dst,src) PFADD(dst,src)
-#define pfcmpeq(dst,src) PFCMPEQ(dst,src)
-#define pfcmpge(dst,src) PFCMPGE(dst,src)
-#define pfcmpgt(dst,src) PFCMPGT(dst,src)
-#define pfmax(dst,src) PFMAX(dst,src)
-#define pfmin(dst,src) PFMIN(dst,src)
-#define pfmul(dst,src) PFMUL(dst,src)
-#define pfrcp(dst,src) PFRCP(dst,src)
-#define pfrcpit1(dst,src) PFRCPIT1(dst,src)
-#define pfrcpit2(dst,src) PFRCPIT2(dst,src)
-#define pfrsqrt(dst,src) PFRSQRT(dst,src)
-#define pfrsqit1(dst,src) PFRSQIT1(dst,src)
-#define pfsub(dst,src) PFSUB(dst,src)
-#define pfsubr(dst,src) PFSUBR(dst,src)
-#define pi2fd(dst,src) PI2FD(dst,src)
-#define femms FEMMS
-#define pavgusb(dst,src) PAVGUSB(dst,src)
-#define pmulhrw(dst,src) PMULHRW(dst,src)
-#define prefetch(src) PREFETCH(src)
-#define prefetchw(src) PREFETCHW(src)
-
-#define prefetchm(src,off) PREFETCHM(src,off)
-#define prefetchmlong(src,off) PREFETCHMLONG(src,off)
-#define prefetchwm(src,off) PREFETCHWM(src,off)
-#define prefetchwmlong(src,off) PREFETCHWMLONG(src,off)
-
-#define pfnacc(dst,src) PFNACC(dst,src)
-#define pfpnacc(dst,src) PFPNACC(dst,src)
-#define pswapd(dst,src) PSWAPD(dst,src)
-#define pminub(dst,src) PMINUB(dst,src)
-#define pmaxub(dst,src) PMAXUB(dst,src)
-#define pminsw(dst,src) PMINSW(dst,src)
-#define pmaxsw(dst,src) PMAXSW(dst,src)
-#define pmulhuw(dst,src) PMULHUW(dst,src)
-#define pavgb(dst,src) PAVGB(dst,src)
-#define pavgw(dst,src) PAVGW(dst,src)
-#define psadbw(dst,src) PSADBW(dst,src)
-#define pmovmskb(dst,src) PMOVMSKB(dst,src)
-#define pmaskmovq(dst,src) PMASKMOVQ(dst,src)
-#define pinsrw(dst,src,msk) PINSRW(dst,src,msk)
-#define pextrw(dst,src,msk) PEXTRW(dst,src,msk)
-#define pshufw(dst,src,msk) PSHUFW(dst,src,msk)
-#define movntq(dst,src) MOVNTQ(dst,src)
-#define prefetchnta(mem) PREFETCHNTA(mem)
-#define prefetcht0(mem) PREFETCHT0(mem)
-#define prefetcht1(mem) PREFETCHT1(mem)
-#define prefetcht2(mem) PREFETCHT2(mem)
-
-
-#define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off)
-#define pf2idm(dst,src,off) PF2IDM(dst,src,off)
-#define pfaccm(dst,src,off) PFACCM(dst,src,off)
-#define pfaddm(dst,src,off) PFADDM(dst,src,off)
-#define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off)
-#define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off)
-#define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off)
-#define pfmaxm(dst,src,off) PFMAXM(dst,src,off)
-#define pfminm(dst,src,off) PFMINM(dst,src,off)
-#define pfmulm(dst,src,off) PFMULM(dst,src,off)
-#define pfrcpm(dst,src,off) PFRCPM(dst,src,off)
-#define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off)
-#define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off)
-#define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off)
-#define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off)
-#define pfsubm(dst,src,off) PFSUBM(dst,src,off)
-#define pfsubrm(dst,src,off) PFSUBRM(dst,src,off)
-#define pi2fdm(dst,src,off) PI2FDM(dst,src,off)
-#define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off)
-#define cpuid CPUID
-#define sfence SFENCE
-
-#define pfnaccm(dst,src,off) PFNACCM(dst,src,off)
-#define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off)
-#define pswapdm(dst,src,off) PSWAPDM(dst,src,off)
-#define pminubm(dst,src,off) PMINUBM(dst,src,off)
-#define pmaxubm(dst,src,off) PMAXUBM(dst,src,off)
-#define pminswm(dst,src,off) PMINSWM(dst,src,off)
-#define pmaxswm(dst,src,off) PMAXSWM(dst,src,off)
-#define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off)
-#define pavgbm(dst,src,off) PAVGBM(dst,src,off)
-#define pavgwm(dst,src,off) PAVGWM(dst,src,off)
-#define psadbwm(dst,src,off) PSADBWM(dst,src,off)
-#define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off)
-#define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off)
-#define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk)
-#define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk)
-#define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk)
-#define movntqm(dst,src,off) MOVNTQM(dst,src,off)
-#define prefetchntam(mem,off) PREFETCHNTA(mem,off)
-#define prefetcht0m(mem,off) PREFETCHT0(mem,off)
-#define prefetcht1m(mem,off) PREFETCHT1(mem,off)
-#define prefetcht2m(mem,off) PREFETCHT2(mem,off)
-
-#endif
+//========= Copyright Valve Corporation, All rights reserved. ============// +/****************************************************************************** + + Copyright (c) 1999 Advanced Micro Devices, Inc. + + LIMITATION OF LIABILITY: THE MATERIALS ARE PROVIDED *AS IS* WITHOUT ANY + EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY, + NONINFRINGEMENT OF THIRD-PARTY INTELLECTUAL PROPERTY, OR FITNESS FOR ANY + PARTICULAR PURPOSE. IN NO EVENT SHALL AMD OR ITS SUPPLIERS BE LIABLE FOR ANY + DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS, + BUSINESS INTERRUPTION, LOSS OF INFORMATION) ARISING OUT OF THE USE OF OR + INABILITY TO USE THE MATERIALS, EVEN IF AMD HAS BEEN ADVISED OF THE POSSIBILITY + OF SUCH DAMAGES. BECAUSE SOME JURISDICTIONS PROHIBIT THE EXCLUSION OR LIMITATION + OF LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE LIMITATION MAY + NOT APPLY TO YOU. + + AMD does not assume any responsibility for any errors which may appear in the + Materials nor any responsibility to support or update the Materials. AMD retains + the right to make changes to its test specifications at any time, without notice. + + NO SUPPORT OBLIGATION: AMD is not obligated to furnish, support, or make any + further information, software, technical information, know-how, or show-how + available to you. + + So that all may benefit from your experience, please report any problems + or suggestions about this software to [email protected] + + AMD Developer Technologies, M/S 585 + Advanced Micro Devices, Inc. + 5900 E. Ben White Blvd. + Austin, TX 78741 + +******************************************************************************* + + AMD3DX.H + + MACRO FORMAT + ============ + This file contains inline assembly macros that + generate AMD-3D instructions in binary format. + Therefore, C or C++ programmer can use AMD-3D instructions + without any penalty in their C or C++ source code. + + The macro's name and format conventions are as follow: + + + 1. First argument of macro is a destination and + second argument is a source operand. + ex) _asm PFCMPEQ (mm3, mm4) + | | + dst src + + 2. The destination operand can be m0 to m7 only. + The source operand can be any one of the register + m0 to m7 or _eax, _ecx, _edx, _ebx, _esi, or _edi + that contains effective address. + ex) _asm PFRCP (MM7, MM6) + ex) _asm PFRCPIT2 (mm0, mm4) + ex) _asm PFMUL (mm3, _edi) + + 3. The prefetch(w) takes one src operand _eax, ecx, _edx, + _ebx, _esi, or _edi that contains effective address. + ex) _asm PREFETCH (_edi) + + For WATCOM C/C++ users, when using #pragma aux instead if + _asm, all macro names should be prefixed by a p_ or P_. + Macros should not be enclosed in quotes. + ex) p_pfrcp (MM7,MM6) + + NOTE: Not all instruction macros, nor all possible + combinations of operands have been explicitely + tested. If any errors are found, please report + them. + + EXAMPLE + ======= + Following program doesn't do anything but it shows you + how to use inline assembly AMD-3D instructions in C. + Note that this will only work in flat memory model which + segment registers cs, ds, ss and es point to the same + linear address space total less than 4GB. + + Used Microsoft VC++ 5.0 + + #include <stdio.h> + #include "amd3d.h" + + void main () + { + float x = (float)1.25; + float y = (float)1.25; + float z, zz; + + _asm { + movd mm1, x + movd mm2, y + pfmul (mm1, mm2) + movd z, mm1 + femms + } + + printf ("value of z = %f\n", z); + + // + // Demonstration of using the memory instead of + // multimedia register + // + _asm { + movd mm3, x + lea esi, y // load effective address of y + pfmul (mm3, _esi) + movd zz, mm3 + femms + } + + printf ("value of zz = %f\n", zz); + } + + #pragma aux EXAMPLE with WATCOM C/C++ v11.x + =========================================== + + extern void Add(float *__Dest, float *__A, float *__B); + #pragma aux Add = \ + p_femms \ + "movd mm6,[esi]" \ + p_pfadd(mm6,_edi) \ + "movd [ebx],mm6" \ + p_femms \ + parm [ebx] [esi] [edi]; + +*******************************************************************************/ + +#ifndef _K3DMACROSINCLUDED_ +#define _K3DMACROSINCLUDED_ + +#if defined (__WATCOMC__) + +// The WATCOM C/C++ version of the 3DNow! macros. +// +// The older, compbined register style for WATCOM C/C++ macros is not +// supported. + +/* Operand defines for instructions two operands */ +#define _k3d_mm0_mm0 0xc0 +#define _k3d_mm0_mm1 0xc1 +#define _k3d_mm0_mm2 0xc2 +#define _k3d_mm0_mm3 0xc3 +#define _k3d_mm0_mm4 0xc4 +#define _k3d_mm0_mm5 0xc5 +#define _k3d_mm0_mm6 0xc6 +#define _k3d_mm0_mm7 0xc7 +#define _k3d_mm0_eax 0x00 +#define _k3d_mm0_ecx 0x01 +#define _k3d_mm0_edx 0x02 +#define _k3d_mm0_ebx 0x03 +#define _k3d_mm0_esi 0x06 +#define _k3d_mm0_edi 0x07 +#define _k3d_mm1_mm0 0xc8 +#define _k3d_mm1_mm1 0xc9 +#define _k3d_mm1_mm2 0xca +#define _k3d_mm1_mm3 0xcb +#define _k3d_mm1_mm4 0xcc +#define _k3d_mm1_mm5 0xcd +#define _k3d_mm1_mm6 0xce +#define _k3d_mm1_mm7 0xcf +#define _k3d_mm1_eax 0x08 +#define _k3d_mm1_ecx 0x09 +#define _k3d_mm1_edx 0x0a +#define _k3d_mm1_ebx 0x0b +#define _k3d_mm1_esi 0x0e +#define _k3d_mm1_edi 0x0f +#define _k3d_mm2_mm0 0xd0 +#define _k3d_mm2_mm1 0xd1 +#define _k3d_mm2_mm2 0xd2 +#define _k3d_mm2_mm3 0xd3 +#define _k3d_mm2_mm4 0xd4 +#define _k3d_mm2_mm5 0xd5 +#define _k3d_mm2_mm6 0xd6 +#define _k3d_mm2_mm7 0xd7 +#define _k3d_mm2_eax 0x10 +#define _k3d_mm2_ecx 0x11 +#define _k3d_mm2_edx 0x12 +#define _k3d_mm2_ebx 0x13 +#define _k3d_mm2_esi 0x16 +#define _k3d_mm2_edi 0x17 +#define _k3d_mm3_mm0 0xd8 +#define _k3d_mm3_mm1 0xd9 +#define _k3d_mm3_mm2 0xda +#define _k3d_mm3_mm3 0xdb +#define _k3d_mm3_mm4 0xdc +#define _k3d_mm3_mm5 0xdd +#define _k3d_mm3_mm6 0xde +#define _k3d_mm3_mm7 0xdf +#define _k3d_mm3_eax 0x18 +#define _k3d_mm3_ecx 0x19 +#define _k3d_mm3_edx 0x1a +#define _k3d_mm3_ebx 0x1b +#define _k3d_mm3_esi 0x1e +#define _k3d_mm3_edi 0x1f +#define _k3d_mm4_mm0 0xe0 +#define _k3d_mm4_mm1 0xe1 +#define _k3d_mm4_mm2 0xe2 +#define _k3d_mm4_mm3 0xe3 +#define _k3d_mm4_mm4 0xe4 +#define _k3d_mm4_mm5 0xe5 +#define _k3d_mm4_mm6 0xe6 +#define _k3d_mm4_mm7 0xe7 +#define _k3d_mm4_eax 0x20 +#define _k3d_mm4_ecx 0x21 +#define _k3d_mm4_edx 0x22 +#define _k3d_mm4_ebx 0x23 +#define _k3d_mm4_esi 0x26 +#define _k3d_mm4_edi 0x27 +#define _k3d_mm5_mm0 0xe8 +#define _k3d_mm5_mm1 0xe9 +#define _k3d_mm5_mm2 0xea +#define _k3d_mm5_mm3 0xeb +#define _k3d_mm5_mm4 0xec +#define _k3d_mm5_mm5 0xed +#define _k3d_mm5_mm6 0xee +#define _k3d_mm5_mm7 0xef +#define _k3d_mm5_eax 0x28 +#define _k3d_mm5_ecx 0x29 +#define _k3d_mm5_edx 0x2a +#define _k3d_mm5_ebx 0x2b +#define _k3d_mm5_esi 0x2e +#define _k3d_mm5_edi 0x2f +#define _k3d_mm6_mm0 0xf0 +#define _k3d_mm6_mm1 0xf1 +#define _k3d_mm6_mm2 0xf2 +#define _k3d_mm6_mm3 0xf3 +#define _k3d_mm6_mm4 0xf4 +#define _k3d_mm6_mm5 0xf5 +#define _k3d_mm6_mm6 0xf6 +#define _k3d_mm6_mm7 0xf7 +#define _k3d_mm6_eax 0x30 +#define _k3d_mm6_ecx 0x31 +#define _k3d_mm6_edx 0x32 +#define _k3d_mm6_ebx 0x33 +#define _k3d_mm6_esi 0x36 +#define _k3d_mm6_edi 0x37 +#define _k3d_mm7_mm0 0xf8 +#define _k3d_mm7_mm1 0xf9 +#define _k3d_mm7_mm2 0xfa +#define _k3d_mm7_mm3 0xfb +#define _k3d_mm7_mm4 0xfc +#define _k3d_mm7_mm5 0xfd +#define _k3d_mm7_mm6 0xfe +#define _k3d_mm7_mm7 0xff +#define _k3d_mm7_eax 0x38 +#define _k3d_mm7_ecx 0x39 +#define _k3d_mm7_edx 0x3a +#define _k3d_mm7_ebx 0x3b +#define _k3d_mm7_esi 0x3e +#define _k3d_mm7_edi 0x3f + +#define _k3d_name_xlat_m0 _mm0 +#define _k3d_name_xlat_m1 _mm1 +#define _k3d_name_xlat_m2 _mm2 +#define _k3d_name_xlat_m3 _mm3 +#define _k3d_name_xlat_m4 _mm4 +#define _k3d_name_xlat_m5 _mm5 +#define _k3d_name_xlat_m6 _mm6 +#define _k3d_name_xlat_m7 _mm7 +#define _k3d_name_xlat_M0 _mm0 +#define _k3d_name_xlat_M1 _mm1 +#define _k3d_name_xlat_M2 _mm2 +#define _k3d_name_xlat_M3 _mm3 +#define _k3d_name_xlat_M4 _mm4 +#define _k3d_name_xlat_M5 _mm5 +#define _k3d_name_xlat_M6 _mm6 +#define _k3d_name_xlat_M7 _mm7 +#define _k3d_name_xlat_mm0 _mm0 +#define _k3d_name_xlat_mm1 _mm1 +#define _k3d_name_xlat_mm2 _mm2 +#define _k3d_name_xlat_mm3 _mm3 +#define _k3d_name_xlat_mm4 _mm4 +#define _k3d_name_xlat_mm5 _mm5 +#define _k3d_name_xlat_mm6 _mm6 +#define _k3d_name_xlat_mm7 _mm7 +#define _k3d_name_xlat_MM0 _mm0 +#define _k3d_name_xlat_MM1 _mm1 +#define _k3d_name_xlat_MM2 _mm2 +#define _k3d_name_xlat_MM3 _mm3 +#define _k3d_name_xlat_MM4 _mm4 +#define _k3d_name_xlat_MM5 _mm5 +#define _k3d_name_xlat_MM6 _mm6 +#define _k3d_name_xlat_MM7 _mm7 +#define _k3d_name_xlat_eax _eax +#define _k3d_name_xlat_ebx _ebx +#define _k3d_name_xlat_ecx _ecx +#define _k3d_name_xlat_edx _edx +#define _k3d_name_xlat_esi _esi +#define _k3d_name_xlat_edi _edi +#define _k3d_name_xlat_ebp _ebp +#define _k3d_name_xlat_EAX _eax +#define _k3d_name_xlat_EBX _ebx +#define _k3d_name_xlat_ECX _ecx +#define _k3d_name_xlat_EDX _edx +#define _k3d_name_xlat_ESI _esi +#define _k3d_name_xlat_EDI _edi +#define _k3d_name_xlat_EBP _ebp +#define _k3d_name_xlat__eax _eax +#define _k3d_name_xlat__ebx _ebx +#define _k3d_name_xlat__ecx _ecx +#define _k3d_name_xlat__edx _edx +#define _k3d_name_xlat__esi _esi +#define _k3d_name_xlat__edi _edi +#define _k3d_name_xlat__ebp _ebp +#define _k3d_name_xlat__EAX _eax +#define _k3d_name_xlat__EBX _ebx +#define _k3d_name_xlat__ECX _ecx +#define _k3d_name_xlat__EDX _edx +#define _k3d_name_xlat__ESI _esi +#define _k3d_name_xlat__EDI _edi +#define _k3d_name_xlat__EBP _ebp + +#define _k3d_xglue3(a,b,c) a##b##c +#define _k3d_glue3(a,b,c) _k3d_xglue3(a,b,c) +#define _k3d_MODRM(dst, src) _k3d_glue3(_k3d,_k3d_name_xlat_##dst,_k3d_name_xlat_##src) + +/* Operand defines for prefetch and prefetchw */ + +#define _k3d_pref_eax 0x00 +#define _k3d_pref_ecx 0x01 +#define _k3d_pref_edx 0x02 +#define _k3d_pref_ebx 0x03 +#define _k3d_pref_esi 0x06 +#define _k3d_pref_edi 0x07 +#define _k3d_pref_EAX 0x00 +#define _k3d_pref_ECX 0x01 +#define _k3d_pref_EDX 0x02 +#define _k3d_pref_EBX 0x03 +#define _k3d_pref_ESI 0x06 +#define _k3d_pref_EDI 0x07 +#define _k3d_prefw_eax 0x08 +#define _k3d_prefw_ecx 0x09 +#define _k3d_prefw_edx 0x0A +#define _k3d_prefw_ebx 0x0B +#define _k3d_prefw_esi 0x0E +#define _k3d_prefw_edi 0x0F +#define _k3d_prefw_EAX 0x08 +#define _k3d_prefw_ECX 0x09 +#define _k3d_prefw_EDX 0x0A +#define _k3d_prefw_EBX 0x0B +#define _k3d_prefw_ESI 0x0E +#define _k3d_prefw_EDI 0x0F + +/* Defines for 3DNow! instructions */ +#define PF2ID(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x1d +#define PFACC(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xae +#define PFADD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9e +#define PFCMPEQ(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb0 +#define PFCMPGE(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x90 +#define PFCMPGT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa0 +#define PFMAX(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa4 +#define PFMIN(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x94 +#define PFMUL(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb4 +#define PFRCP(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x96 +#define PFRCPIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa6 +#define PFRCPIT2(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb6 +#define PFRSQRT(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x97 +#define PFRSQIT1(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xa7 +#define PFSUB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x9a +#define PFSUBR(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xaa +#define PI2FD(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0x0d +#define FEMMS db 0x0f, 0x0e +#define PAVGUSB(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xbf +#define PMULHRW(dst, src) db 0x0f, 0x0f, _k3d_MODRM(dst, src), 0xb7 +#define PREFETCH(src) db 0x0f, 0x0d, _k3d_pref_##src +#define PREFETCHW(src) db 0x0f, 0x0d, _k3d_prefw_##src +#define CPUID db 0x0f, 0xa2 + +/* Defines for new, K7 opcodes */ +#define PFNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8a +#define FPPNACC(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0x8e +#define PSWAPD(dst,src) db 0x0f, 0x0f, _k3d_MODRM(dst,src), 0xbb +#define PMINUB(dst,src) db 0x0f, 0xda, _k3d_MODRM(dst,src) +#define PMAXUB(dst,src) db 0x0f, 0xde, _k3d_MODRM(dst,src) +#define PMINSW(dst,src) db 0x0f, 0xea, _k3d_MODRM(dst,src) +#define PMAXSW(dst,src) db 0x0f, 0xee, _k3d_MODRM(dst,src) +#define PMULHUW(dst,src) db 0x0f, 0xe4, _k3d_MODRM(dst,src) +#define PAVGB(dst,src) db 0x0f, 0xe0, _k3d_MODRM(dst,src) +#define PAVGW(dst,src) db 0x0f, 0xe3, _k3d_MODRM(dst,src) +#define PSADBW(dst,src) db 0x0f, 0xf6, _k3d_MODRM(dst,src) +#define PMOVMSKB(dst,src) db 0x0f, 0xd7, _k3d_MODRM(dst,src) +#define PMASKMOVQ(dst,src) db 0x0f, 0xf7, _k3d_MODRM(dst,src) +#define PINSRW(dst,src,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src), msk +#define PEXTRW(dst,src,msk) db 0x0f, 0xc5, _k3d_MODRM(dst,src), msk +#define PSHUFW(dst,src,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src), msk +#define MOVNTQ(dst,src) db 0x0f, 0xe7, _k3d_MODRM(src,dst) +#define SFENCE db 0x0f, 0xae, 0xf8 + +/* Memory/offset versions of the opcodes */ +#define PF2IDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x1d +#define PFACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xae +#define PFADDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9e +#define PFCMPEQM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb0 +#define PFCMPGEM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x90 +#define PFCMPGTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa0 +#define PFMAXM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa4 +#define PFMINM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x94 +#define PFMULM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb4 +#define PFRCPM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x96 +#define PFRCPIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa6 +#define PFRCPIT2M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb6 +#define PFRSQRTM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x97 +#define PFRSQIT1M(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xa7 +#define PFSUBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x9a +#define PFSUBRM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xaa +#define PI2FDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x0d +#define PAVGUSBM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbf +#define PMULHRWM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xb7 + + +/* Memory/offset versions of the new, K7 opcodes */ +#define PFNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8a +#define FPPNACCM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0x8e +#define PSWAPDM(dst,src,off) db 0x0f, 0x0f, _k3d_MODRM(dst,src) | 0x40, off, 0xbb +#define PMINUBM(dst,src,off) db 0x0f, 0xda, _k3d_MODRM(dst,src) | 0x40, off +#define PMAXUBM(dst,src,off) db 0x0f, 0xde, _k3d_MODRM(dst,src) | 0x40, off +#define PMINSWM(dst,src,off) db 0x0f, 0xea, _k3d_MODRM(dst,src) | 0x40, off +#define PMAXSWM(dst,src,off) db 0x0f, 0xee, _k3d_MODRM(dst,src) | 0x40, off +#define PMULHUWM(dst,src,off) db 0x0f, 0xe4, _k3d_MODRM(dst,src) | 0x40, off +#define PAVGBM(dst,src,off) db 0x0f, 0xe0, _k3d_MODRM(dst,src) | 0x40, off +#define PAVGWM(dst,src,off) db 0x0f, 0xe3, _k3d_MODRM(dst,src) | 0x40, off +#define PSADBWM(dst,src,off) db 0x0f, 0xf6, _k3d_MODRM(dst,src) | 0x40, off +#define PMOVMSKBM(dst,src,off) db 0x0f, 0xd7, _k3d_MODRM(dst,src) | 0x40, off +#define PMASKMOVQM(dst,src,off) db 0x0f, 0xf7, _k3d_MODRM(dst,src) | 0x40, off +#define MOVNTQM(dst,src,off) db 0x0f, 0xe7, _k3d_MODRM(src,dst) | 0x40, off +#define PINSRWM(dst,src,off,msk) db 0x0f, 0xc4, _k3d_MODRM(dst,src) | 0x40, off, msk +#define PSHUFWM(dst,src,off,msk) db 0x0f, 0x70, _k3d_MODRM(dst,src) | 0x40, off, msk + + +/* Defines for 3DNow! instructions for use in pragmas */ +#define p_pf2id(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x1d +#define p_pfacc(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xae +#define p_pfadd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9e +#define p_pfcmpeq(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb0 +#define p_pfcmpge(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x90 +#define p_pfcmpgt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa0 +#define p_pfmax(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa4 +#define p_pfmin(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x94 +#define p_pfmul(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb4 +#define p_pfrcp(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x96 +#define p_pfrcpit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa6 +#define p_pfrcpit2(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb6 +#define p_pfrsqrt(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x97 +#define p_pfrsqit1(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xa7 +#define p_pfsub(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x9a +#define p_pfsubr(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xaa +#define p_pi2fd(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0x0d +#define p_femms 0x0f 0x0e +#define p_pavgusb(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xbf +#define p_pmulhrw(dst,src) 0x0f 0x0f _k3d_MODRM(dst,src) 0xb7 +#define p_prefetch(src) 0x0f 0x0d _k3d_pref_##src +#define p_prefetchw(src) 0x0f 0x0d _k3d_prefw_##src +#define P_PFNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a +#define P_FPPNACC(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e +#define P_PSWAPD(dst,src) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb +#define P_PMINUB(dst,src) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXUB(dst,src) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMINSW(dst,src) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXSW(dst,src) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMULHUW(dst,src) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGB(dst,src) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGW(dst,src) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PSADBW(dst,src) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMOVMSKB(dst,src) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMASKMOVQ(dst,src) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PINSRW(dst,src,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PEXTRW(dst,src,msk) 0x0f 0xc5 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PSHUFW(dst,src,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_MOVNTQ(dst,src) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off + +#define P_PF2IDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x1d +#define P_PFACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xae +#define P_PFADDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9e +#define P_PFCMPEQM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb0 +#define P_PFCMPGEM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x90 +#define P_PFCMPGTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa0 +#define P_PFMAXM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa4 +#define P_PFMINM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x94 +#define P_PFMULM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb4 +#define P_PFRCPM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x96 +#define P_PFRCPIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa6 +#define P_PFRCPIT2M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb6 +#define P_PFRSQRTM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x97 +#define P_PFRSQIT1M(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xa7 +#define P_PFSUBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x9a +#define P_PFSUBRM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xaa +#define P_PI2FDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x0d +#define P_PAVGUSBM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbf +#define P_PMULHRWM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xb7 +#define P_PFNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8a +#define P_FPPNACCM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0x8e +#define P_PSWAPDM(dst,src,off) 0x0f 0x0f (_k3d_MODRM(dst,src) | 0x40) off 0xbb +#define P_PMINUBM(dst,src,off) 0x0f 0xda (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXUBM(dst,src,off) 0x0f 0xde (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMINSWM(dst,src,off) 0x0f 0xea (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMAXSWM(dst,src,off) 0x0f 0xee (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMULHUWM(dst,src,off) 0x0f 0xe4 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGBM(dst,src,off) 0x0f 0xe0 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PAVGWM(dst,src,off) 0x0f 0xe3 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PSADBWM(dst,src,off) 0x0f 0xf6 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PMOVMSKBM(dst,src,off) 0x0f 0xd7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_MOVNTQM(dst,src,off) 0x0f 0xe7 (_k3d_MODRM(src,dst) | 0x40) off +#define P_PMASKMOVQM(dst,src,off) 0x0f 0xf7 (_k3d_MODRM(dst,src) | 0x40) off +#define P_PINSRWM(dst,src,off,msk) 0x0f 0xc4 (_k3d_MODRM(dst,src) | 0x40) off msk +#define P_PSHUFWM(dst,src,off,msk) 0x0f 0x70 (_k3d_MODRM(dst,src) | 0x40) off msk + + +#define P_PF2ID(dst,src) p_pf2id(dst,src) +#define P_PFACC(dst,src) p_pfacc(dst,src) +#define P_PFADD(dst,src) p_pfadd(dst,src) +#define P_PFCMPEQ(dst,src) p_pfcmpeq(dst,src) +#define P_PFCMPGE(dst,src) p_pfcmpge(dst,src) +#define P_PFCMPGT(dst,src) p_pfcmpgt(dst,src) +#define P_PFMAX(dst,src) p_pfmax(dst,src) +#define P_PFMIN(dst,src) p_pfmin(dst,src) +#define P_PFMUL(dst,src) p_pfmul(dst,src) +#define P_PFRCP(dst,src) p_pfrcp(dst,src) +#define P_PFRCPIT1(dst,src) p_pfrcpit1(dst,src) +#define P_PFRCPIT2(dst,src) p_pfrcpit2(dst,src) +#define P_PFRSQRT(dst,src) p_pfrsqrt(dst,src) +#define P_PFRSQIT1(dst,src) p_pfrsqit1(dst,src) +#define P_PFSUB(dst,src) p_pfsub(dst,src) +#define P_PFSUBR(dst,src) p_pfsubr(dst,src) +#define P_PI2FD(dst,src) p_pi2fd(dst,src) +#define P_FEMMS p_femms +#define P_PAVGUSB(dst,src) p_pavgusb(dst,src) +#define P_PMULHRW(dst,src) p_pmulhrw(dst,src) +#define P_PREFETCH(src) p_prefetch(src) +#define P_PREFETCHW(src) p_prefetchw(src) +#define p_CPUID 0x0f 0xa2 +#define p_pf2idm(dst,src,off) P_PF2IDM(dst,src,off) +#define p_pfaccm(dst,src,off) P_PFACCM(dst,src,off) +#define p_pfaddm(dst,src,off) P_PFADDM(dst,src,off) +#define p_pfcmpeqm(dst,src,off) P_PFCMPEQM(dst,src,off) +#define p_pfcmpgem(dst,src,off) P_PFCMPGEM(dst,src,off) +#define p_pfcmpgtm(dst,src,off) P_PFCMPGTM(dst,src,off) +#define p_pfmaxm(dst,src,off) P_PFMAXM(dst,src,off) +#define p_pfminm(dst,src,off) P_PFMINM(dst,src,off) +#define p_pfmulm(dst,src,off) P_PFMULM(dst,src,off) +#define p_pfrcpm(dst,src,off) P_PFRCPM(dst,src,off) +#define p_pfrcpit1m(dst,src,off) P_PFRCPIT1M(dst,src,off) +#define p_pfrcpit2m(dst,src,off) P_PFRCPIT2M(dst,src,off) +#define p_pfrsqrtm(dst,src,off) P_PFRSQRTM(dst,src,off) +#define p_pfrsqit1m(dst,src,off) P_PFRSQIT1M(dst,src,off) +#define p_pfsubm(dst,src,off) P_PFSUBM(dst,src,off) +#define p_pfsubrm(dst,src,off) P_PFSUBRM(dst,src,off) +#define p_pi2fdm(dst,src,off) P_PI2FDM(dst,src,off) +#define p_pavgusbm(dst,src,off) P_PAVGUSBM(dst,src,off) +#define p_pmulhrwm(dst,src,off) P_PMULHRWM(dst,src,off) + +#define P_PFNACC(dst,src) p_pfnacc(dst,src) +#define P_FPPNACC(dst,src) p_pfpnacc(dst,src) +#define P_PSWAPD(dst,src) p_pswapd(dst,src) +#define P_PMINUB(dst,src) p_pminub(dst,src) +#define P_PMAXUB(dst,src) p_pmaxub(dst,src) +#define P_PMINSW(dst,src) p_pminsw(dst,src) +#define P_PMAXSW(dst,src) p_pmaxsw(dst,src) +#define P_PMULHUW(dst,src) p_pmulhuw(dst,src) +#define P_PAVGB(dst,src) p_pavgb(dst,src) +#define P_PAVGW(dst,src) p_avgw(dst,src) +#define P_PSADBW(dst,src) p_psadbw(dst,src) +#define P_PMOVMSKB(dst,src) p_pmovmskb(dst,src) +#define P_PMASKMOVQ(dst,src) p_pmaskmovq(dst,src) +#define P_PINSRW(dst,src,msk) p_pinsrw(dst,src) +#define P_PEXTRW(dst,src,msk) p_pextrw(dst,src) +#define P_PSHUFW(dst,src,msk) p_pshufw(dst,src) +#define P_MOVNTQ(dst,src) p_movntq(dst,src) + +#define P_PFNACCM(dst,src,off) p_pfnaccm(dst,src,off) +#define P_FPPNACCM(dst,src,off) p_pfpnaccm(dst,src,off) +#define P_PSWAPDM(dst,src,off) p_pswapdm(dst,src,off) +#define P_PMINUBM(dst,src,off) p_pminubm(dst,src,off) +#define P_PMAXUBM(dst,src,off) p_pmaxubm(dst,src,off) +#define P_PMINSWM(dst,src,off) p_pminswm(dst,src,off) +#define P_PMAXSWM(dst,src,off) p_pmaxswm(dst,src,off) +#define P_PMULHUWM(dst,src,off) p_pmulhuwm(dst,src,off) +#define P_PAVGBM(dst,src,off) p_pavgbm(dst,src,off) +#define P_PAVGWM(dst,src,off) p_avgwm(dst,src,off) +#define P_PSADBWM(dst,src,off) p_psadbwm(dst,src,off) +#define P_PMOVMSKBM(dst,src,off) p_pmovmskbm(dst,src,off) +#define P_PMASKMOVQM(dst,src,off) p_pmaskmovqm(dst,src,off) +#define P_PINSRWM(dst,src,off,msk) p_pinsrwm(dst,src,off,msk) +#define P_PSHUFWM(dst,src,off,msk) p_pshufwm(dst,src,off,msk) +#define P_MOVNTQM(dst,src,off) p_movntqm(dst,src,off) + +#elif defined (_MSC_VER) && !defined (__MWERKS__) +// The Microsoft Visual C++ version of the 3DNow! macros. + +// Stop the "no EMMS" warning, since it doesn't detect FEMMS properly +#pragma warning(disable:4799) + +// Defines for operands. +#define _K3D_MM0 0xc0 +#define _K3D_MM1 0xc1 +#define _K3D_MM2 0xc2 +#define _K3D_MM3 0xc3 +#define _K3D_MM4 0xc4 +#define _K3D_MM5 0xc5 +#define _K3D_MM6 0xc6 +#define _K3D_MM7 0xc7 +#define _K3D_mm0 0xc0 +#define _K3D_mm1 0xc1 +#define _K3D_mm2 0xc2 +#define _K3D_mm3 0xc3 +#define _K3D_mm4 0xc4 +#define _K3D_mm5 0xc5 +#define _K3D_mm6 0xc6 +#define _K3D_mm7 0xc7 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 + +// These defines are for compatibility with the previous version of the header file. +#define _K3D_M0 0xc0 +#define _K3D_M1 0xc1 +#define _K3D_M2 0xc2 +#define _K3D_M3 0xc3 +#define _K3D_M4 0xc4 +#define _K3D_M5 0xc5 +#define _K3D_M6 0xc6 +#define _K3D_M7 0xc7 +#define _K3D_m0 0xc0 +#define _K3D_m1 0xc1 +#define _K3D_m2 0xc2 +#define _K3D_m3 0xc3 +#define _K3D_m4 0xc4 +#define _K3D_m5 0xc5 +#define _K3D_m6 0xc6 +#define _K3D_m7 0xc7 +#define _K3D__EAX 0x00 +#define _K3D__ECX 0x01 +#define _K3D__EDX 0x02 +#define _K3D__EBX 0x03 +#define _K3D__ESI 0x06 +#define _K3D__EDI 0x07 +#define _K3D__eax 0x00 +#define _K3D__ecx 0x01 +#define _K3D__edx 0x02 +#define _K3D__ebx 0x03 +#define _K3D__esi 0x06 +#define _K3D__edi 0x07 + +// General 3DNow! instruction format that is supported by +// these macros. Note that only the most basic form of memory +// operands are supported by these macros. + +#define InjK3DOps(dst,src,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0f \ + _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \ + _asm _emit _3DNowOpcode##inst \ +} + +#define InjK3DMOps(dst,src,off,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0f \ + _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \ + _asm _emit off \ + _asm _emit _3DNowOpcode##inst \ +} + +#define InjMMXOps(dst,src,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit _3DNowOpcode##inst \ + _asm _emit ((_K3D_##dst & 0x3f) << 3) | _K3D_##src \ +} + +#define InjMMXMOps(dst,src,off,inst) \ +{ \ + _asm _emit 0x0f \ + _asm _emit _3DNowOpcode##inst \ + _asm _emit (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40) \ + _asm _emit off \ +} + +#define _3DNowOpcodePF2ID 0x1d +#define _3DNowOpcodePFACC 0xae +#define _3DNowOpcodePFADD 0x9e +#define _3DNowOpcodePFCMPEQ 0xb0 +#define _3DNowOpcodePFCMPGE 0x90 +#define _3DNowOpcodePFCMPGT 0xa0 +#define _3DNowOpcodePFMAX 0xa4 +#define _3DNowOpcodePFMIN 0x94 +#define _3DNowOpcodePFMUL 0xb4 +#define _3DNowOpcodePFRCP 0x96 +#define _3DNowOpcodePFRCPIT1 0xa6 +#define _3DNowOpcodePFRCPIT2 0xb6 +#define _3DNowOpcodePFRSQRT 0x97 +#define _3DNowOpcodePFRSQIT1 0xa7 +#define _3DNowOpcodePFSUB 0x9a +#define _3DNowOpcodePFSUBR 0xaa +#define _3DNowOpcodePI2FD 0x0d +#define _3DNowOpcodePAVGUSB 0xbf +#define _3DNowOpcodePMULHRW 0xb7 +#define _3DNowOpcodePFNACC 0x8a +#define _3DNowOpcodeFPPNACC 0x8e +#define _3DNowOpcodePSWAPD 0xbb +#define _3DNowOpcodePMINUB 0xda +#define _3DNowOpcodePMAXUB 0xde +#define _3DNowOpcodePMINSW 0xea +#define _3DNowOpcodePMAXSW 0xee +#define _3DNowOpcodePMULHUW 0xe4 +#define _3DNowOpcodePAVGB 0xe0 +#define _3DNowOpcodePAVGW 0xe3 +#define _3DNowOpcodePSADBW 0xf6 +#define _3DNowOpcodePMOVMSKB 0xd7 +#define _3DNowOpcodePMASKMOVQ 0xf7 +#define _3DNowOpcodePINSRW 0xc4 +#define _3DNowOpcodePEXTRW 0xc5 +#define _3DNowOpcodePSHUFW 0x70 +#define _3DNowOpcodeMOVNTQ 0xe7 +#define _3DNowOpcodePREFETCHT 0x18 + + +#define PF2ID(dst,src) InjK3DOps(dst, src, PF2ID) +#define PFACC(dst,src) InjK3DOps(dst, src, PFACC) +#define PFADD(dst,src) InjK3DOps(dst, src, PFADD) +#define PFCMPEQ(dst,src) InjK3DOps(dst, src, PFCMPEQ) +#define PFCMPGE(dst,src) InjK3DOps(dst, src, PFCMPGE) +#define PFCMPGT(dst,src) InjK3DOps(dst, src, PFCMPGT) +#define PFMAX(dst,src) InjK3DOps(dst, src, PFMAX) +#define PFMIN(dst,src) InjK3DOps(dst, src, PFMIN) +#define PFMUL(dst,src) InjK3DOps(dst, src, PFMUL) +#define PFRCP(dst,src) InjK3DOps(dst, src, PFRCP) +#define PFRCPIT1(dst,src) InjK3DOps(dst, src, PFRCPIT1) +#define PFRCPIT2(dst,src) InjK3DOps(dst, src, PFRCPIT2) +#define PFRSQRT(dst,src) InjK3DOps(dst, src, PFRSQRT) +#define PFRSQIT1(dst,src) InjK3DOps(dst, src, PFRSQIT1) +#define PFSUB(dst,src) InjK3DOps(dst, src, PFSUB) +#define PFSUBR(dst,src) InjK3DOps(dst, src, PFSUBR) +#define PI2FD(dst,src) InjK3DOps(dst, src, PI2FD) +#define PAVGUSB(dst,src) InjK3DOps(dst, src, PAVGUSB) +#define PMULHRW(dst,src) InjK3DOps(dst, src, PMULHRW) + +#define FEMMS \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0e \ +} + +#define PREFETCH(src) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (_K3D_##src & 0x07) \ +} + +/* Prefetch with a short offset, < 127 or > -127 + Carefull! Doesn't check for your offset being + in range. */ + +#define PREFETCHM(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x40 | (_K3D_##src & 0x07)) \ + _asm _emit off \ +} + +/* Prefetch with a long offset */ + +#define PREFETCHMLONG(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x80 | (_K3D_##src & 0x07)) \ + _asm _emit (off & 0x000000ff) \ + _asm _emit (off & 0x0000ff00) >> 8 \ + _asm _emit (off & 0x00ff0000) >> 16 \ + _asm _emit (off & 0xff000000) >> 24 \ +} + +#define PREFETCHW(src) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit (0x08 | (_K3D_##src & 0x07)) \ +} + +#define PREFETCHWM(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit 0x48 | (_K3D_##src & 0x07) \ + _asm _emit off \ +} + +#define PREFETCHWMLONG(src,off) \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0x0d \ + _asm _emit 0x88 | (_K3D_##src & 0x07) \ + _asm _emit (off & 0x000000ff) \ + _asm _emit (off & 0x0000ff00) >> 8 \ + _asm _emit (off & 0x00ff0000) >> 16 \ + _asm _emit (off & 0xff000000) >> 24 \ +} + +#define CPUID \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0xa2 \ +} + + +/* Defines for new, K7 opcodes */ +#define SFENCE \ +{ \ + _asm _emit 0x0f \ + _asm _emit 0xae \ + _asm _emit 0xf8 \ +} + +#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC) +#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC) +#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD) +#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB) +#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB) +#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW) +#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW) +#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW) +#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB) +#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW) +#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW) +#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB) +#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ) +#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) _asm _emit msk +#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) _asm _emit msk +#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) _asm _emit msk +#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ) +#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT) +#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT) +#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT) +#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT) + + +/* Memory/offset versions of the opcodes */ +#define PAVGUSBM(dst,src,off) InjK3DMOps(dst,src,off,PAVGUSB) +#define PF2IDM(dst,src,off) InjK3DMOps(dst,src,off,PF2ID) +#define PFACCM(dst,src,off) InjK3DMOps(dst,src,off,PFACC) +#define PFADDM(dst,src,off) InjK3DMOps(dst,src,off,PFADD) +#define PFCMPEQM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPEQ) +#define PFCMPGEM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGE) +#define PFCMPGTM(dst,src,off) InjK3DMOps(dst,src,off,PFCMPGT) +#define PFMAXM(dst,src,off) InjK3DMOps(dst,src,off,PFMAX) +#define PFMINM(dst,src,off) InjK3DMOps(dst,src,off,PFMIN) +#define PFMULM(dst,src,off) InjK3DMOps(dst,src,off,PFMUL) +#define PFRCPM(dst,src,off) InjK3DMOps(dst,src,off,PFRCP) +#define PFRCPIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT1) +#define PFRCPIT2M(dst,src,off) InjK3DMOps(dst,src,off,PFRCPIT2) +#define PFRSQRTM(dst,src,off) InjK3DMOps(dst,src,off,PFRSQRT) +#define PFRSQIT1M(dst,src,off) InjK3DMOps(dst,src,off,PFRSQIT1) +#define PFSUBM(dst,src,off) InjK3DMOps(dst,src,off,PFSUB) +#define PFSUBRM(dst,src,off) InjK3DMOps(dst,src,off,PFSUBR) +#define PI2FDM(dst,src,off) InjK3DMOps(dst,src,off,PI2FD) +#define PMULHRWM(dst,src,off) InjK3DMOps(dst,src,off,PMULHRW) + + +/* Memory/offset versions of the K7 opcodes */ +#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC) +#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC) +#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD) +#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB) +#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB) +#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW) +#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW) +#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW) +#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB) +#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW) +#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW) +#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB) +#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ) +#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW) _asm _emit msk +#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW) _asm _emit msk +#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ) +#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT) +#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT) +#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT) +#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT) + + +#else + +/* Assume built-in support for 3DNow! opcodes, replace macros with opcodes */ +#define PAVGUSB(dst,src) pavgusb dst,src +#define PF2ID(dst,src) pf2id dst,src +#define PFACC(dst,src) pfacc dst,src +#define PFADD(dst,src) pfadd dst,src +#define PFCMPEQ(dst,src) pfcmpeq dst,src +#define PFCMPGE(dst,src) pfcmpge dst,src +#define PFCMPGT(dst,src) pfcmpgt dst,src +#define PFMAX(dst,src) pfmax dst,src +#define PFMIN(dst,src) pfmin dst,src +#define PFMUL(dst,src) pfmul dst,src +#define PFRCP(dst,src) pfrcp dst,src +#define PFRCPIT1(dst,src) pfrcpit1 dst,src +#define PFRCPIT2(dst,src) pfrcpit2 dst,src +#define PFRSQRT(dst,src) pfrsqrt dst,src +#define PFRSQIT1(dst,src) pfrsqit1 dst,src +#define PFSUB(dst,src) pfsub dst,src +#define PFSUBR(dst,src) pfsubr dst,src +#define PI2FD(dst,src) pi2fd dst,src +#define PMULHRW(dst,src) pmulhrw dst,src +#define PREFETCH(src) prefetch src +#define PREFETCHW(src) prefetchw src + +#define PAVGUSBM(dst,src,off) pavgusb dst,[src+off] +#define PF2IDM(dst,src,off) PF2ID dst,[src+off] +#define PFACCM(dst,src,off) PFACC dst,[src+off] +#define PFADDM(dst,src,off) PFADD dst,[src+off] +#define PFCMPEQM(dst,src,off) PFCMPEQ dst,[src+off] +#define PFCMPGEM(dst,src,off) PFCMPGE dst,[src+off] +#define PFCMPGTM(dst,src,off) PFCMPGT dst,[src+off] +#define PFMAXM(dst,src,off) PFMAX dst,[src+off] +#define PFMINM(dst,src,off) PFMIN dst,[src+off] +#define PFMULM(dst,src,off) PFMUL dst,[src+off] +#define PFRCPM(dst,src,off) PFRCP dst,[src+off] +#define PFRCPIT1M(dst,src,off) PFRCPIT1 dst,[src+off] +#define PFRCPIT2M(dst,src,off) PFRCPIT2 dst,[src+off] +#define PFRSQRTM(dst,src,off) PFRSQRT dst,[src+off] +#define PFRSQIT1M(dst,src,off) PFRSQIT1 dst,[src+off] +#define PFSUBM(dst,src,off) PFSUB dst,[src+off] +#define PFSUBRM(dst,src,off) PFSUBR dst,[src+off] +#define PI2FDM(dst,src,off) PI2FD dst,[src+off] +#define PMULHRWM(dst,src,off) PMULHRW dst,[src+off] + + +#if defined (__MWERKS__) +// At the moment, CodeWarrior does not support these opcodes, so hand-assemble them + +// Defines for operands. +#define _K3D_MM0 0xc0 +#define _K3D_MM1 0xc1 +#define _K3D_MM2 0xc2 +#define _K3D_MM3 0xc3 +#define _K3D_MM4 0xc4 +#define _K3D_MM5 0xc5 +#define _K3D_MM6 0xc6 +#define _K3D_MM7 0xc7 +#define _K3D_mm0 0xc0 +#define _K3D_mm1 0xc1 +#define _K3D_mm2 0xc2 +#define _K3D_mm3 0xc3 +#define _K3D_mm4 0xc4 +#define _K3D_mm5 0xc5 +#define _K3D_mm6 0xc6 +#define _K3D_mm7 0xc7 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 +#define _K3D_EAX 0x00 +#define _K3D_ECX 0x01 +#define _K3D_EDX 0x02 +#define _K3D_EBX 0x03 +#define _K3D_ESI 0x06 +#define _K3D_EDI 0x07 +#define _K3D_eax 0x00 +#define _K3D_ecx 0x01 +#define _K3D_edx 0x02 +#define _K3D_ebx 0x03 +#define _K3D_esi 0x06 +#define _K3D_edi 0x07 + +#define InjK3DOps(dst,src,inst) \ + db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src), _3DNowOpcode##inst + +#define InjK3DMOps(dst,src,off,inst) \ + db 0x0f, 0x0f, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off, _3DNowOpcode##inst + +#define InjMMXOps(dst,src,inst) \ + db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src) + +#define InjMMXMOps(dst,src,off,inst) \ + db 0x0f, _3DNowOpcode##inst, (((_K3D_##dst & 0x3f) << 3) | _K3D_##src | 0x40), off + +#define PFNACC(dst,src) InjK3DOps(dst,src,PFNACC) +#define PFPNACC(dst,src) InjK3DOps(dst,src,PFPNACC) +#define PSWAPD(dst,src) InjK3DOps(dst,src,PSWAPD) +#define PMINUB(dst,src) InjMMXOps(dst,src,PMINUB) +#define PMAXUB(dst,src) InjMMXOps(dst,src,PMAXUB) +#define PMINSW(dst,src) InjMMXOps(dst,src,PMINSW) +#define PMAXSW(dst,src) InjMMXOps(dst,src,PMAXSW) +#define PMULHUW(dst,src) InjMMXOps(dst,src,PMULHUW) +#define PAVGB(dst,src) InjMMXOps(dst,src,PAVGB) +#define PAVGW(dst,src) InjMMXOps(dst,src,PAVGW) +#define PSADBW(dst,src) InjMMXOps(dst,src,PSADBW) +#define PMOVMSKB(dst,src) InjMMXOps(dst,src,PMOVMSKB) +#define PMASKMOVQ(dst,src) InjMMXOps(dst,src,PMASKMOVQ) +#define PINSRW(dst,src,msk) InjMMXOps(dst,src,PINSRW) db msk +#define PEXTRW(dst,src,msk) InjMMXOps(dst,src,PEXTRW) db msk +#define PSHUFW(dst,src,msk) InjMMXOps(dst,src,PSHUFW) db msk +#define MOVNTQ(dst,src) InjMMXOps(src,dst,MOVNTQ) +#define PREFETCHNTA(mem) InjMMXOps(mm0,mem,PREFETCHT) +#define PREFETCHT0(mem) InjMMXOps(mm1,mem,PREFETCHT) +#define PREFETCHT1(mem) InjMMXOps(mm2,mem,PREFETCHT) +#define PREFETCHT2(mem) InjMMXOps(mm3,mem,PREFETCHT) + + +/* Memory/offset versions of the K7 opcodes */ +#define PFNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFNACC) +#define PFPNACCM(dst,src,off) InjK3DMOps(dst,src,off,PFPNACC) +#define PSWAPDM(dst,src,off) InjK3DMOps(dst,src,off,PSWAPD) +#define PMINUBM(dst,src,off) InjMMXMOps(dst,src,off,PMINUB) +#define PMAXUBM(dst,src,off) InjMMXMOps(dst,src,off,PMAXUB) +#define PMINSWM(dst,src,off) InjMMXMOps(dst,src,off,PMINSW) +#define PMAXSWM(dst,src,off) InjMMXMOps(dst,src,off,PMAXSW) +#define PMULHUWM(dst,src,off) InjMMXMOps(dst,src,off,PMULHUW) +#define PAVGBM(dst,src,off) InjMMXMOps(dst,src,off,PAVGB) +#define PAVGWM(dst,src,off) InjMMXMOps(dst,src,off,PAVGW) +#define PSADBWM(dst,src,off) InjMMXMOps(dst,src,off,PSADBW) +#define PMOVMSKBM(dst,src,off) InjMMXMOps(dst,src,off,PMOVMSKB) +#define PMASKMOVQM(dst,src,off) InjMMXMOps(dst,src,off,PMASKMOVQ) +#define PINSRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PINSRW), msk +#define PEXTRWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PEXTRW), msk +#define PSHUFWM(dst,src,off,msk) InjMMXMOps(dst,src,off,PSHUFW), msk +#define MOVNTQM(dst,src,off) InjMMXMOps(src,dst,off,MOVNTQ) +#define PREFETCHNTAM(mem,off) InjMMXMOps(mm0,mem,off,PREFETCHT) +#define PREFETCHT0M(mem,off) InjMMXMOps(mm1,mem,off,PREFETCHT) +#define PREFETCHT1M(mem,off) InjMMXMOps(mm2,mem,off,PREFETCHT) +#define PREFETCHT2M(mem,off) InjMMXMOps(mm3,mem,off,PREFETCHT) + + +#else + +#define PFNACC(dst,src) PFNACC dst,src +#define PFPNACC(dst,src) PFPNACC dst,src +#define PSWAPD(dst,src) PSWAPD dst,src +#define PMINUB(dst,src) PMINUB dst,src +#define PMAXUB(dst,src) PMAXUB dst,src +#define PMINSW(dst,src) PMINSW dst,src +#define PMAXSW(dst,src) PMAXSW dst,src +#define PMULHUW(dst,src) PMULHUW dst,src +#define PAVGB(dst,src) PAVGB dst,src +#define PAVGW(dst,src) PAVGW dst,src +#define PSADBW(dst,src) PSADBW dst,src +#define PMOVMSKB(dst,src) PMOVMSKB dst,src +#define PMASKMOVQ(dst,src) PMASKMOVQ dst,src +#define PINSRW(dst,src,msk) PINSRW dst,src,msk +#define PEXTRW(dst,src,msk) PEXTRW dst,src,msk +#define PSHUFW(dst,src,msk) PSHUFW dst,src,msk +#define MOVNTQ(dst,src) MOVNTQ dst,src + +#define PFNACCM(dst,src,off) PFNACC dst,[src+off] +#define PFPNACCM(dst,src,off) PFPNACC dst,[src+off] +#define PSWAPDM(dst,src,off) PSWAPD dst,[src+off] +#define PMINUBM(dst,src,off) PMINUB dst,[src+off] +#define PMAXUBM(dst,src,off) PMAXUB dst,[src+off] +#define PMINSWM(dst,src,off) PMINSW dst,[src+off] +#define PMAXSWM(dst,src,off) PMAXSW dst,[src+off] +#define PMULHUWM(dst,src,off) PMULHUW dst,[src+off] +#define PAVGBM(dst,src,off) PAVGB dst,[src+off] +#define PAVGWM(dst,src,off) PAVGW dst,[src+off] +#define PSADBWM(dst,src,off) PSADBW dst,[src+off] +#define PMOVMSKBM(dst,src,off) PMOVMSKB dst,[src+off] +#define PMASKMOVQM(dst,src,off) PMASKMOVQ dst,[src+off] +#define PINSRWM(dst,src,off,msk) PINSRW dst,[src+off],msk +#define PEXTRWM(dst,src,off,msk) PEXTRW dst,[src+off],msk +#define PSHUFWM(dst,src,off,msk) PSHUFW dst,[src+off],msk +#define MOVNTQM(dst,src,off) MOVNTQ dst,[src+off] + +#endif + +#endif + +/* Just to deal with lower case. */ +#define pf2id(dst,src) PF2ID(dst,src) +#define pfacc(dst,src) PFACC(dst,src) +#define pfadd(dst,src) PFADD(dst,src) +#define pfcmpeq(dst,src) PFCMPEQ(dst,src) +#define pfcmpge(dst,src) PFCMPGE(dst,src) +#define pfcmpgt(dst,src) PFCMPGT(dst,src) +#define pfmax(dst,src) PFMAX(dst,src) +#define pfmin(dst,src) PFMIN(dst,src) +#define pfmul(dst,src) PFMUL(dst,src) +#define pfrcp(dst,src) PFRCP(dst,src) +#define pfrcpit1(dst,src) PFRCPIT1(dst,src) +#define pfrcpit2(dst,src) PFRCPIT2(dst,src) +#define pfrsqrt(dst,src) PFRSQRT(dst,src) +#define pfrsqit1(dst,src) PFRSQIT1(dst,src) +#define pfsub(dst,src) PFSUB(dst,src) +#define pfsubr(dst,src) PFSUBR(dst,src) +#define pi2fd(dst,src) PI2FD(dst,src) +#define femms FEMMS +#define pavgusb(dst,src) PAVGUSB(dst,src) +#define pmulhrw(dst,src) PMULHRW(dst,src) +#define prefetch(src) PREFETCH(src) +#define prefetchw(src) PREFETCHW(src) + +#define prefetchm(src,off) PREFETCHM(src,off) +#define prefetchmlong(src,off) PREFETCHMLONG(src,off) +#define prefetchwm(src,off) PREFETCHWM(src,off) +#define prefetchwmlong(src,off) PREFETCHWMLONG(src,off) + +#define pfnacc(dst,src) PFNACC(dst,src) +#define pfpnacc(dst,src) PFPNACC(dst,src) +#define pswapd(dst,src) PSWAPD(dst,src) +#define pminub(dst,src) PMINUB(dst,src) +#define pmaxub(dst,src) PMAXUB(dst,src) +#define pminsw(dst,src) PMINSW(dst,src) +#define pmaxsw(dst,src) PMAXSW(dst,src) +#define pmulhuw(dst,src) PMULHUW(dst,src) +#define pavgb(dst,src) PAVGB(dst,src) +#define pavgw(dst,src) PAVGW(dst,src) +#define psadbw(dst,src) PSADBW(dst,src) +#define pmovmskb(dst,src) PMOVMSKB(dst,src) +#define pmaskmovq(dst,src) PMASKMOVQ(dst,src) +#define pinsrw(dst,src,msk) PINSRW(dst,src,msk) +#define pextrw(dst,src,msk) PEXTRW(dst,src,msk) +#define pshufw(dst,src,msk) PSHUFW(dst,src,msk) +#define movntq(dst,src) MOVNTQ(dst,src) +#define prefetchnta(mem) PREFETCHNTA(mem) +#define prefetcht0(mem) PREFETCHT0(mem) +#define prefetcht1(mem) PREFETCHT1(mem) +#define prefetcht2(mem) PREFETCHT2(mem) + + +#define pavgusbm(dst,src,off) PAVGUSBM(dst,src,off) +#define pf2idm(dst,src,off) PF2IDM(dst,src,off) +#define pfaccm(dst,src,off) PFACCM(dst,src,off) +#define pfaddm(dst,src,off) PFADDM(dst,src,off) +#define pfcmpeqm(dst,src,off) PFCMPEQM(dst,src,off) +#define pfcmpgem(dst,src,off) PFCMPGEM(dst,src,off) +#define pfcmpgtm(dst,src,off) PFCMPGTM(dst,src,off) +#define pfmaxm(dst,src,off) PFMAXM(dst,src,off) +#define pfminm(dst,src,off) PFMINM(dst,src,off) +#define pfmulm(dst,src,off) PFMULM(dst,src,off) +#define pfrcpm(dst,src,off) PFRCPM(dst,src,off) +#define pfrcpit1m(dst,src,off) PFRCPIT1M(dst,src,off) +#define pfrcpit2m(dst,src,off) PFRCPIT2M(dst,src,off) +#define pfrsqrtm(dst,src,off) PFRSQRTM(dst,src,off) +#define pfrsqit1m(dst,src,off) PFRSQIT1M(dst,src,off) +#define pfsubm(dst,src,off) PFSUBM(dst,src,off) +#define pfsubrm(dst,src,off) PFSUBRM(dst,src,off) +#define pi2fdm(dst,src,off) PI2FDM(dst,src,off) +#define pmulhrwm(dst,src,off) PMULHRWM(dst,src,off) +#define cpuid CPUID +#define sfence SFENCE + +#define pfnaccm(dst,src,off) PFNACCM(dst,src,off) +#define pfpnaccm(dst,src,off) PFPNACCM(dst,src,off) +#define pswapdm(dst,src,off) PSWAPDM(dst,src,off) +#define pminubm(dst,src,off) PMINUBM(dst,src,off) +#define pmaxubm(dst,src,off) PMAXUBM(dst,src,off) +#define pminswm(dst,src,off) PMINSWM(dst,src,off) +#define pmaxswm(dst,src,off) PMAXSWM(dst,src,off) +#define pmulhuwm(dst,src,off) PMULHUWM(dst,src,off) +#define pavgbm(dst,src,off) PAVGBM(dst,src,off) +#define pavgwm(dst,src,off) PAVGWM(dst,src,off) +#define psadbwm(dst,src,off) PSADBWM(dst,src,off) +#define pmovmskbm(dst,src,off) PMOVMSKBM(dst,src,off) +#define pmaskmovqm(dst,src,off) PMASKMOVQM(dst,src,off) +#define pinsrwm(dst,src,off,msk) PINSRWM(dst,src,off,msk) +#define pextrwm(dst,src,off,msk) PEXTRWM(dst,src,off,msk) +#define pshufwm(dst,src,off,msk) PSHUFWM(dst,src,off,msk) +#define movntqm(dst,src,off) MOVNTQM(dst,src,off) +#define prefetchntam(mem,off) PREFETCHNTA(mem,off) +#define prefetcht0m(mem,off) PREFETCHT0(mem,off) +#define prefetcht1m(mem,off) PREFETCHT1(mem,off) +#define prefetcht2m(mem,off) PREFETCHT2(mem,off) + +#endif |