diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h')
| -rw-r--r-- | PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h b/PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h new file mode 100644 index 00000000..ccf6d620 --- /dev/null +++ b/PxShared/src/foundation/include/windows/PsWindowsIntrinsics.h @@ -0,0 +1,190 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSWINDOWSINTRINSICS_H +#define PSFOUNDATION_PSWINDOWSINTRINSICS_H + +#include "Ps.h" +#include "foundation/PxAssert.h" + +// this file is for internal intrinsics - that is, intrinsics that are used in +// cross platform code but do not appear in the API + +#if !PX_WINDOWS_FAMILY +#error "This file should only be included by Windows builds!!" +#endif + +#pragma warning(push) +//'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' +#pragma warning(disable : 4668) +#if PX_VC == 10 +#pragma warning(disable : 4987) // nonstandard extension used: 'throw (...)' +#endif +#include <intrin.h> +#pragma warning(pop) + +#pragma warning(push) +#pragma warning(disable : 4985) // 'symbol name': attributes not present on previous declaration +#include <math.h> +#pragma warning(pop) + +#include <float.h> +#include <mmintrin.h> + +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse) + +namespace physx +{ +namespace shdfnd +{ + +/* +* Implements a memory barrier +*/ +PX_FORCE_INLINE void memoryBarrier() +{ + _ReadWriteBarrier(); + /* long Barrier; + __asm { + xchg Barrier, eax + }*/ +} + +/*! +Returns the index of the highest set bit. Not valid for zero arg. +*/ +PX_FORCE_INLINE uint32_t highestSetBitUnsafe(uint32_t v) +{ + unsigned long retval; + _BitScanReverse(&retval, v); + return retval; +} + +/*! +Returns the index of the highest set bit. Undefined for zero arg. +*/ +PX_FORCE_INLINE uint32_t lowestSetBitUnsafe(uint32_t v) +{ + unsigned long retval; + _BitScanForward(&retval, v); + return retval; +} + +/*! +Returns the number of leading zeros in v. Returns 32 for v=0. +*/ +PX_FORCE_INLINE uint32_t countLeadingZeros(uint32_t v) +{ + if(v) + { + unsigned long bsr = (unsigned long)-1; + _BitScanReverse(&bsr, v); + return 31 - bsr; + } + else + return 32; +} + +/*! +Prefetch aligned cache size around \c ptr+offset. +*/ +#if !PX_ARM +PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) +{ + // cache line on X86/X64 is 64-bytes so a 128-byte prefetch would require 2 prefetches. + // However, we can only dispatch a limited number of prefetch instructions so we opt to prefetch just 1 cache line + /*_mm_prefetch(((const char*)ptr + offset), _MM_HINT_T0);*/ + // We get slightly better performance prefetching to non-temporal addresses instead of all cache levels + _mm_prefetch(((const char*)ptr + offset), _MM_HINT_NTA); +} +#else +PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) +{ + // arm does have 32b cache line size + __prefetch(((const char*)ptr + offset)); +} +#endif + +/*! +Prefetch \c count bytes starting at \c ptr. +*/ +#if !PX_ARM +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = (char*)ptr; + uint64_t p = size_t(ptr); + uint64_t startLine = p >> 6, endLine = (p + count - 1) >> 6; + uint64_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 64; + } while(--lines); +} +#else +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = (char*)ptr; + uint32_t p = size_t(ptr); + uint32_t startLine = p >> 5, endLine = (p + count - 1) >> 5; + uint32_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 32; + } while(--lines); +} +#endif + +//! \brief platform-specific reciprocal +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) +{ + return 1.0f / a; +} + +//! \brief platform-specific fast reciprocal square root +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) +{ + return 1.0f / ::sqrtf(a); +} + +//! \brief platform-specific floor +PX_CUDA_CALLABLE PX_FORCE_INLINE float floatFloor(float x) +{ + return ::floorf(x); +} + +#define NS_EXPECT_TRUE(x) x +#define NS_EXPECT_FALSE(x) x + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSWINDOWSINTRINSICS_H |