diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h')
| -rw-r--r-- | PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h new file mode 100644 index 00000000..07df2109 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h @@ -0,0 +1,265 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_INTERNAL_H +#define GU_BV4_INTERNAL_H + +#include "CmPhysXCommon.h" +#include "PsFPU.h" + + static PX_FORCE_INLINE PxU32 getChildOffset(PxU32 data) { return data>>GU_BV4_CHILD_OFFSET_SHIFT_COUNT; } + static PX_FORCE_INLINE PxU32 getChildType(PxU32 data) { return (data>>1)&3; } + + // PT: the general structure is that there is a root "process stream" function which is the entry point for the query. + // It then calls "process node" functions for each traversed node, except for the Slabs-based raycast versions that deal + // with 4 nodes at a time within the "process stream" function itself. When a leaf is found, "doLeafTest" functors + // passed to the "process stream" entry point are called. +#ifdef GU_BV4_USE_SLABS + #define processStreamNoOrder BV4_ProcessStreamSwizzledNoOrder + #define processStreamOrdered BV4_ProcessStreamSwizzledOrdered + #define processStreamRayNoOrder(a, b) BV4_ProcessStreamKajiyaNoOrder<a, b> + #define processStreamRayOrdered(a, b) BV4_ProcessStreamKajiyaOrdered<a, b> +#else + #define processStreamNoOrder BV4_ProcessStreamNoOrder + #define processStreamOrdered BV4_ProcessStreamOrdered2 + #define processStreamRayNoOrder(a, b) BV4_ProcessStreamNoOrder<b> + #define processStreamRayOrdered(a, b) BV4_ProcessStreamOrdered2<b> +#endif + +#ifndef GU_BV4_USE_SLABS +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + // PT: see http://www.codercorner.com/blog/?p=734 + + // PT: TODO: refactor with dup in bucket pruner (TA34704) + PX_FORCE_INLINE PxU32 computeDirMask(const PxVec3& dir) + { + // XYZ + // --- + // --+ + // -+- + // -++ + // +-- + // +-+ + // ++- + // +++ + + const PxU32 X = PX_IR(dir.x)>>31; + const PxU32 Y = PX_IR(dir.y)>>31; + const PxU32 Z = PX_IR(dir.z)>>31; + const PxU32 bitIndex = Z|(Y<<1)|(X<<2); + return 1u<<bitIndex; + } + + // 0 0 0 PP PN NP NN 0 1 2 3 + // 0 0 1 PP PN NN NP 0 1 3 2 + // 0 1 0 PN PP NP NN 1 0 2 3 + // 0 1 1 PN PP NN NP 1 0 3 2 + // 1 0 0 NP NN PP PN 2 3 0 1 + // 1 0 1 NN NP PP PN 3 2 0 1 + // 1 1 0 NP NN PN PP 2 3 1 0 + // 1 1 1 NN NP PN PP 3 2 1 0 + static const PxU8 order[] = { + 0,1,2,3, + 0,1,3,2, + 1,0,2,3, + 1,0,3,2, + 2,3,0,1, + 3,2,0,1, + 2,3,1,0, + 3,2,1,0, + }; + + PX_FORCE_INLINE PxU32 decodePNS(const BVDataPacked* PX_RESTRICT node, const PxU32 dirMask) + { + const PxU32 bit0 = (node[0].decodePNSNoShift() & dirMask) ? 1u : 0; + const PxU32 bit1 = (node[1].decodePNSNoShift() & dirMask) ? 1u : 0; + const PxU32 bit2 = (node[2].decodePNSNoShift() & dirMask) ? 1u : 0; //### potentially reads past the end of the stream here! + return bit2|(bit1<<1)|(bit0<<2); + } +#endif // GU_BV4_PRECOMPUTED_NODE_SORT + + #define PNS_BLOCK(i, a, b, c, d) \ + case i: \ + { \ + if(code & (1<<a)) { stack[nb++] = node[a].getChildData(); } \ + if(code & (1<<b)) { stack[nb++] = node[b].getChildData(); } \ + if(code & (1<<c)) { stack[nb++] = node[c].getChildData(); } \ + if(code & (1<<d)) { stack[nb++] = node[d].getChildData(); } \ + }break; + + #define PNS_BLOCK1(i, a, b, c, d) \ + case i: \ + { \ + stack[nb] = node[a].getChildData(); nb += (code & (1<<a))?1:0; \ + stack[nb] = node[b].getChildData(); nb += (code & (1<<b))?1:0; \ + stack[nb] = node[c].getChildData(); nb += (code & (1<<c))?1:0; \ + stack[nb] = node[d].getChildData(); nb += (code & (1<<d))?1:0; \ + }break; + + #define PNS_BLOCK2(a, b, c, d) { \ + if(code & (1<<a)) { stack[nb++] = node[a].getChildData(); } \ + if(code & (1<<b)) { stack[nb++] = node[b].getChildData(); } \ + if(code & (1<<c)) { stack[nb++] = node[c].getChildData(); } \ + if(code & (1<<d)) { stack[nb++] = node[d].getChildData(); } } \ + +#if PX_INTEL_FAMILY + template<class LeafTestT, class ParamsT> + static Ps::IntBool BV4_ProcessStreamNoOrder(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + const PxU32 nodeType = getChildType(childData); + + if(nodeType>1 && BV4_ProcessNodeNoOrder<LeafTestT, 3>(stack, nb, node, params)) + return 1; + if(nodeType>0 && BV4_ProcessNodeNoOrder<LeafTestT, 2>(stack, nb, node, params)) + return 1; + if(BV4_ProcessNodeNoOrder<LeafTestT, 1>(stack, nb, node, params)) + return 1; + if(BV4_ProcessNodeNoOrder<LeafTestT, 0>(stack, nb, node, params)) + return 1; + + }while(nb); + + return 0; + } + + template<class LeafTestT, class ParamsT> + static void BV4_ProcessStreamOrdered(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + const PxU32 dirMask = computeDirMask(params->mLocalDir)<<3; + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + + const PxU8* PX_RESTRICT ord = order + decodePNS(node, dirMask)*4; + const PxU32 limit = 2 + getChildType(childData); + + BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[0], limit); + BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[1], limit); + BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[2], limit); + BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[3], limit); + }while(Nb); + } + + // Alternative, experimental version using PNS + template<class LeafTestT, class ParamsT> + static void BV4_ProcessStreamOrdered2(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + const PxU32 X = PX_IR(params->mLocalDir_Padded.x)>>31; + const PxU32 Y = PX_IR(params->mLocalDir_Padded.y)>>31; + const PxU32 Z = PX_IR(params->mLocalDir_Padded.z)>>31; + const PxU32 bitIndex = 3+(Z|(Y<<1)|(X<<2)); + const PxU32 dirMask = 1u<<bitIndex; + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + const PxU32 nodeType = getChildType(childData); + + PxU32 code = 0; + BV4_ProcessNodeOrdered2<LeafTestT, 0>(code, node, params); + BV4_ProcessNodeOrdered2<LeafTestT, 1>(code, node, params); + if(nodeType>0) + BV4_ProcessNodeOrdered2<LeafTestT, 2>(code, node, params); + if(nodeType>1) + BV4_ProcessNodeOrdered2<LeafTestT, 3>(code, node, params); + + if(code) + { + // PT: TODO: check which implementation is best on each platform (TA34704) +#define FOURTH_TEST // Version avoids computing the PNS index, and also avoids all non-constant shifts. Full of branches though. Fastest on Win32. +#ifdef FOURTH_TEST + { + if(node[0].decodePNSNoShift() & dirMask) // Bit2 + { + if(node[1].decodePNSNoShift() & dirMask) // Bit1 + { + if(node[2].decodePNSNoShift() & dirMask) // Bit0 + PNS_BLOCK2(3,2,1,0) // 7 + else + PNS_BLOCK2(2,3,1,0) // 6 + } + else + { + if(node[2].decodePNSNoShift() & dirMask) // Bit0 + PNS_BLOCK2(3,2,0,1) // 5 + else + PNS_BLOCK2(2,3,0,1) // 4 + } + } + else + { + if(node[1].decodePNSNoShift() & dirMask) // Bit1 + { + if(node[2].decodePNSNoShift() & dirMask) // Bit0 + PNS_BLOCK2(1,0,3,2) // 3 + else + PNS_BLOCK2(1,0,2,3) // 2 + } + else + { + if(node[2].decodePNSNoShift() & dirMask) // Bit0 + PNS_BLOCK2(0,1,3,2) // 1 + else + PNS_BLOCK2(0,1,2,3) // 0 + } + } + } +#endif + } + }while(nb); + } +#endif // PX_INTEL_FAMILY +#endif // GU_BV4_USE_SLABS + +#endif // GU_BV4_INTERNAL_H |