diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/GeomUtils/src/mesh | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/GeomUtils/src/mesh')
58 files changed, 15683 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.cpp new file mode 100644 index 00000000..1ee2a683 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.cpp @@ -0,0 +1,277 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxMemory.h" +#include "GuBV32.h" +#include "GuSerialize.h" +#include "CmUtils.h" +#include "PsUtilities.h" + +using namespace physx; +using namespace Gu; + +#define DELETEARRAY(x) if (x) { delete []x; x = NULL; } + + +BV32Tree::BV32Tree(SourceMesh* meshInterface, const PxBounds3& localBounds) +{ + reset(); + init(meshInterface, localBounds); +} + +BV32Tree::BV32Tree() +{ + reset(); +} + +void BV32Tree::release() +{ + if (!mUserAllocated) + { + DELETEARRAY(mNodes); + PX_FREE_AND_RESET(mPackedNodes); + } + mNodes = NULL; + mNbNodes = 0; +} + +BV32Tree::~BV32Tree() +{ + release(); +} + +void BV32Tree::reset() +{ + mMeshInterface = NULL; + mNbNodes = 0; + mNodes = NULL; + mNbPackedNodes = 0; + mPackedNodes = NULL; + mInitData = 0; + mUserAllocated = false; +} + +void BV32Tree::operator=(BV32Tree& v) +{ + mMeshInterface = v.mMeshInterface; + mLocalBounds = v.mLocalBounds; + mNbNodes = v.mNbNodes; + mNodes = v.mNodes; + mInitData = v.mInitData; + mUserAllocated = v.mUserAllocated; + v.reset(); +} + +bool BV32Tree::init(SourceMesh* meshInterface, const PxBounds3& localBounds) +{ + mMeshInterface = meshInterface; + mLocalBounds.init(localBounds); + return true; +} + +// PX_SERIALIZATION +BV32Tree::BV32Tree(const PxEMPTY) +{ + mUserAllocated = true; +} + +void BV32Tree::exportExtraData(PxSerializationContext& stream) +{ + stream.alignData(16); + stream.writeData(mNodes, mNbNodes*sizeof(BVDataPacked)); +} + +void BV32Tree::importExtraData(PxDeserializationContext& context) +{ + context.alignExtraData(16); + mNodes = context.readExtraData<BV32Data>(mNbNodes); +} +//~PX_SERIALIZATION + +bool BV32Tree::load(PxInputStream& stream, PxU32 meshVersion) +{ + PX_ASSERT(!mUserAllocated); + PX_UNUSED(meshVersion); + + release(); + + PxI8 a, b, c, d; + readChunk(a, b, c, d, stream); + if (a != 'B' || b != 'V' || c != '3' || d != '2') + return false; + + const PxU32 version = 1; + const bool mismatch = (shdfnd::littleEndian() == 1); + if (readDword(mismatch, stream) != version) + return false; + + mLocalBounds.mCenter.x = readFloat(mismatch, stream); + mLocalBounds.mCenter.y = readFloat(mismatch, stream); + mLocalBounds.mCenter.z = readFloat(mismatch, stream); + mLocalBounds.mExtentsMagnitude = readFloat(mismatch, stream); + + mInitData = readDword(mismatch, stream); + + /*const PxU32 nbNodes = readDword(mismatch, stream); + mNbNodes = nbNodes; + + if (nbNodes) + { + BV32Data* nodes = PX_NEW(BV32Data)[nbNodes]; + + mNodes = nodes; + Cm::markSerializedMem(nodes, sizeof(BV32Data)*nbNodes); + + for (PxU32 i = 0; i<nbNodes; i++) + { + BV32Data& node = nodes[i]; + + readFloatBuffer(&node.mCenter.x, 3, mismatch, stream); + node.mData = readDword(mismatch, stream); + readFloatBuffer(&node.mExtents.x, 3, mismatch, stream); + } + }*/ + + + //read SOA format node data + const PxU32 nbPackedNodes = readDword(mismatch, stream); + mNbPackedNodes = nbPackedNodes; + + if (nbPackedNodes) + { + mPackedNodes = reinterpret_cast<BV32DataPacked*>(PX_ALLOC(sizeof(BV32DataPacked)*nbPackedNodes, "BV32DataPacked")); + + Cm::markSerializedMem(mPackedNodes, sizeof(BV32DataPacked)*nbPackedNodes); + + for (PxU32 i = 0; i < nbPackedNodes; ++i) + { + BV32DataPacked& node = mPackedNodes[i]; + node.mNbNodes = readDword(mismatch, stream); + PX_ASSERT(node.mNbNodes > 0); + ReadDwordBuffer(node.mData, node.mNbNodes, mismatch, stream); + const PxU32 nbElements = 4 * node.mNbNodes; + readFloatBuffer(&node.mCenter[0].x, nbElements, mismatch, stream); + readFloatBuffer(&node.mExtents[0].x, nbElements, mismatch, stream); + + } + } + + return true; +} + + +void BV32Tree::calculateLeafNode(BV32Data& node) +{ + if (!node.isLeaf()) + { + const PxU32 nbChildren = node.getNbChildren(); + const PxU32 offset = node.getChildOffset(); + //calcualte how many children nodes are leaf nodes + PxU32 nbLeafNodes = 0; + for (PxU32 i = 0; i < nbChildren; ++i) + { + BV32Data& child = mNodes[offset + i]; + + if (child.isLeaf()) + { + nbLeafNodes++; + } + } + + node.mNbLeafNodes = nbLeafNodes; + for (PxU32 i = 0; i < nbChildren; ++i) + { + BV32Data& child = mNodes[offset + i]; + calculateLeafNode(child); + } + + } +} + + + +void BV32Tree::createSOAformatNode(BV32DataPacked& packedData, const BV32Data& node, const PxU32 childOffset, PxU32& currentIndex, PxU32& nbPackedNodes) +{ + + //found the next 32 nodes and fill it in SOA format + + const PxU32 nbChildren = node.getNbChildren(); + const PxU32 offset = node.getChildOffset(); + + + for (PxU32 i = 0; i < nbChildren; ++i) + { + BV32Data& child = mNodes[offset + i]; + + packedData.mCenter[i] = PxVec4(child.mCenter, 0.f); + packedData.mExtents[i] = PxVec4(child.mExtents, 0.f); + packedData.mData[i] = PxU32(child.mData); + } + + packedData.mNbNodes = nbChildren; + + PxU32 NbToGo = 0; + PxU32 NextIDs[32]; + memset(NextIDs, PX_INVALID_U32, sizeof(PxU32) * 32); + const BV32Data* ChildNodes[32]; + memset(ChildNodes, 0, sizeof(BV32Data*) * 32); + + + for (PxU32 i = 0; i< nbChildren; i++) + { + BV32Data& child = mNodes[offset + i]; + + if (!child.isLeaf()) + { + const PxU32 NextID = currentIndex; + + const PxU32 ChildSize = child.getNbChildren() - child.mNbLeafNodes; + currentIndex += ChildSize; + + //packedData.mData[i] = (packedData.mData[i] & ((1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT) - 1)) | (NextID << GU_BV4_CHILD_OFFSET_SHIFT_COUNT); + packedData.mData[i] = (packedData.mData[i] & ((1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT) - 1)) | ((childOffset + NbToGo) << GU_BV4_CHILD_OFFSET_SHIFT_COUNT); + + NextIDs[NbToGo] = NextID; + ChildNodes[NbToGo] = &child; + NbToGo++; + } + } + + nbPackedNodes += NbToGo; + for (PxU32 i = 0; i < NbToGo; ++i) + { + const BV32Data& child = *ChildNodes[i]; + + BV32DataPacked& childData = mPackedNodes[childOffset+i]; + + createSOAformatNode(childData, child, NextIDs[i], currentIndex, nbPackedNodes); + + } + +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.h new file mode 100644 index 00000000..4caf67d5 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.h @@ -0,0 +1,146 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV32_H +#define GU_BV32_H + +#include "foundation/PxBounds3.h" +#include "PxSerialFramework.h" +#include "PsUserAllocated.h" +#include "GuBV4.h" +#include "CmPhysXCommon.h" +#include "PsArray.h" +#include "foundation/PxVec4.h" + +namespace physx +{ + namespace Gu + { + struct BV32Data : public physx::shdfnd::UserAllocated + { + PxVec3 mCenter; + PxU32 mNbLeafNodes; + PxVec3 mExtents; + size_t mData; + + + PX_FORCE_INLINE BV32Data() : mNbLeafNodes(0), mData(PX_INVALID_U32) + { + setEmpty(); + } + + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 isLeaf() const { return mData & 1; } + + //if the node is leaf, + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getNbReferencedTriangles() const { PX_ASSERT(isLeaf()); return PxU32((mData >>1)&63); } + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getTriangleStartIndex() const { PX_ASSERT(isLeaf()); return PxU32(mData >> 7); } + + //PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getPrimitive() const { return mData >> 1; } + //if the node isn't leaf, we will get the childOffset + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getChildOffset() const { PX_ASSERT(!isLeaf()); return PxU32(mData >> GU_BV4_CHILD_OFFSET_SHIFT_COUNT); } + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getNbChildren() const { PX_ASSERT(!isLeaf()); return ((mData) & ((1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT) - 1))>>1; } + + PX_CUDA_CALLABLE PX_FORCE_INLINE void getMinMax(PxVec3& min, PxVec3& max) const + { + min = mCenter - mExtents; + max = mCenter + mExtents; + } + + PX_FORCE_INLINE void setEmpty() + { + mCenter = PxVec3(0.0f, 0.0f, 0.0f); + mExtents = PxVec3(-1.0f, -1.0f, -1.0f); + } + + }; + + PX_ALIGN_PREFIX(16) + struct BV32DataPacked + { + PxVec4 mCenter[32]; + PxVec4 mExtents[32]; + PxU32 mData[32]; + PxU32 mNbNodes; + PxU32 pad[3]; + + PX_CUDA_CALLABLE PX_FORCE_INLINE BV32DataPacked() : mNbNodes(0) + { + } + + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 isLeaf(const PxU32 index) const { return mData[index] & 1; } + //if the node is leaf, + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getNbReferencedTriangles(const PxU32 index) const { PX_ASSERT(isLeaf(index)); return (mData[index] >> 1) & 63; } + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getTriangleStartIndex(const PxU32 index) const { PX_ASSERT(isLeaf(index)); return (mData[index] >> 7); } + //if the node isn't leaf, we will get the childOffset + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getChildOffset(const PxU32 index) const { PX_ASSERT(!isLeaf(index)); return mData[index] >> GU_BV4_CHILD_OFFSET_SHIFT_COUNT; } + PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 getNbChildren(const PxU32 index) const { PX_ASSERT(!isLeaf(index)); return ((mData[index])& ((1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT) - 1)) >> 1; } + } + PX_ALIGN_SUFFIX(16); + + class BV32Tree : public physx::shdfnd::UserAllocated + { + public: + // PX_SERIALIZATION + BV32Tree(const PxEMPTY); + void exportExtraData(PxSerializationContext&); + void importExtraData(PxDeserializationContext& context); + static void getBinaryMetaData(PxOutputStream& stream); + //~PX_SERIALIZATION + + PX_PHYSX_COMMON_API BV32Tree(); + PX_PHYSX_COMMON_API BV32Tree(SourceMesh* meshInterface, const PxBounds3& localBounds); + PX_PHYSX_COMMON_API ~BV32Tree(); + + bool load(PxInputStream& stream, PxU32 meshVersion); // converts to proper endian at load time + + void calculateLeafNode(BV32Data& node); + void createSOAformatNode(BV32DataPacked& packedData, const BV32Data& node, const PxU32 childOffset, PxU32& currentIndex, PxU32& nbPackedNodes); + + void reset(); + void operator = (BV32Tree& v); + + bool init(SourceMesh* meshInterface, const PxBounds3& localBounds); + void release(); + + SourceMesh* mMeshInterface; + LocalBounds mLocalBounds; + + PxU32 mNbNodes; + BV32Data* mNodes; + BV32DataPacked* mPackedNodes; + PxU32 mNbPackedNodes; + PxU32 mInitData; + bool mUserAllocated; // PT: please keep these 4 bytes right after mCenterOrMinCoeff/mExtentsOrMaxCoeff for safe V4 loading + bool mPadding[3]; + }; + + } // namespace Gu +} + +#endif // GU_BV32_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.cpp new file mode 100644 index 00000000..da62280f --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.cpp @@ -0,0 +1,530 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxVec4.h" +#include "GuBV32Build.h" +#include "GuBV32.h" +#include "PxTriangle.h" +#include "CmPhysXCommon.h" +#include "PsBasicTemplates.h" +#include "GuCenterExtents.h" +#include "GuBV4Build.h" +#include "PsAllocator.h" + +using namespace physx; +using namespace Gu; + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#define DELETESINGLE(x) if (x) { delete x; x = NULL; } +#define DELETEARRAY(x) if (x) { delete []x; x = NULL; } + +struct BV32Node : public physx::shdfnd::UserAllocated +{ + BV32Node() : mNbChildBVNodes(0) + {} + + BV32Data mBVData[32]; + PxU32 mNbChildBVNodes; + + PX_FORCE_INLINE size_t isLeaf(PxU32 i) const { return mBVData[i].mData & 1; } + PX_FORCE_INLINE PxU32 getPrimitive(PxU32 i) const { return PxU32(mBVData[i].mData >> 1); } + PX_FORCE_INLINE const BV32Node* getChild(PxU32 i) const { return reinterpret_cast<BV32Node*>(mBVData[i].mData); } + + + PxU32 getSize() const + { + return sizeof(BV32Data)*mNbChildBVNodes; + } +}; + + +static void fillInNodes(const AABBTreeNode* current_node, const PxU32 startIndex, const PxU32 endIndex, const AABBTreeNode** NODES, PxU32& stat) +{ + + if (startIndex + 1 == endIndex) + { + //fill in nodes + const AABBTreeNode* P = current_node->getPos(); + const AABBTreeNode* N = current_node->getNeg(); + NODES[startIndex] = P; + NODES[endIndex] = N; + stat += 2; + } + else + { + const AABBTreeNode* P = current_node->getPos(); + const AABBTreeNode* N = current_node->getNeg(); + const PxU32 midIndex = startIndex + ((endIndex - startIndex) / 2); + if (!P->isLeaf()) + fillInNodes(P, startIndex, midIndex, NODES, stat); + else + { + NODES[startIndex] = P; + stat++; + } + + if (!N->isLeaf()) + fillInNodes(N, midIndex + 1, endIndex, NODES, stat); + else + { + NODES[midIndex + 1] = N; + stat++; + } + } +} + + + +static void setPrimitive(const AABBTree& source, BV32Node* node32, PxU32 i, const AABBTreeNode* node, float epsilon) +{ + const PxU32 nbPrims = node->getNbPrimitives(); + PX_ASSERT(nbPrims<=32); + const PxU32* indexBase = source.getIndices(); + const PxU32* prims = node->getPrimitives(); + const PxU32 offset = PxU32(prims - indexBase); + +#if BV32_VALIDATE + for (PxU32 j = 0; j<nbPrims; j++) + { + PX_ASSERT(prims[j] == offset + j); + } +#endif + const PxU32 primitiveIndex = (offset << 6) | (nbPrims & 63); + + node32->mBVData[i].mCenter = node->getAABB().getCenter(); + node32->mBVData[i].mExtents = node->getAABB().getExtents(); + if (epsilon != 0.0f) + node32->mBVData[i].mExtents += PxVec3(epsilon, epsilon, epsilon); + node32->mBVData[i].mData = (primitiveIndex << 1) | 1; +} + +static BV32Node* setNode(const AABBTree& source, BV32Node* node32, PxU32 i, const AABBTreeNode* node, float epsilon) +{ + BV32Node* child = NULL; + + if (node) + { + if (node->isLeaf()) + { + setPrimitive(source, node32, i, node, epsilon); + } + else + { + node32->mBVData[i].mCenter = node->getAABB().getCenter(); + node32->mBVData[i].mExtents = node->getAABB().getExtents(); + if (epsilon != 0.0f) + node32->mBVData[i].mExtents += PxVec3(epsilon, epsilon, epsilon); + + child = PX_NEW(BV32Node); + node32->mBVData[i].mData = size_t(child); + } + } + + return child; +} + + +static void _BuildBV32(const AABBTree& source, BV32Node* tmp, const AABBTreeNode* current_node, float epsilon, PxU32& nbNodes) +{ + PX_ASSERT(!current_node->isLeaf()); + + const AABBTreeNode* NODES[32]; + memset(NODES, 0, sizeof(AABBTreeNode*) * 32); + + fillInNodes(current_node, 0, 31, NODES, tmp->mNbChildBVNodes); + + PxU32 left = 0; + PxU32 right = 31; + + while (left < right) + { + + //sweep from the front + while (left<right) + { + //found a hole + if (NODES[left] == NULL) + break; + left++; + } + + //sweep from the back + while (left < right) + { + //found a node + if (NODES[right]) + break; + right--; + } + + if (left != right) + { + //swap left and right + const AABBTreeNode* node = NODES[right]; + NODES[right] = NODES[left]; + NODES[left] = node; + } + + } + + nbNodes += tmp->mNbChildBVNodes; + + for (PxU32 i = 0; i < tmp->mNbChildBVNodes; ++i) + { + const AABBTreeNode* tempNode = NODES[i]; + BV32Node* Child = setNode(source, tmp, i, tempNode, epsilon); + if (Child) + { + _BuildBV32(source, Child, tempNode, epsilon, nbNodes); + } + } + +} + +// +//static void validateTree(const AABBTree& Source, const AABBTreeNode* currentNode) +//{ +// if (currentNode->isLeaf()) +// { +// const PxU32* indexBase = Source.getIndices(); +// const PxU32* prims = currentNode->getPrimitives(); +// const PxU32 offset = PxU32(prims - indexBase); +// const PxU32 nbPrims = currentNode->getNbPrimitives(); +// for (PxU32 j = 0; j<nbPrims; j++) +// { +// PX_ASSERT(prims[j] == offset + j); +// } +// } +// else +// { +// const AABBTreeNode* pos = currentNode->getPos(); +// validateTree(Source, pos); +// const AABBTreeNode* neg = currentNode->getNeg(); +// validateTree(Source, neg); +// } +//} + +#if BV32_VALIDATE +static void validateNodeBound(const BV32Node* currentNode, SourceMesh* mesh) +{ + const PxU32 nbNodes = currentNode->mNbChildBVNodes; + for (PxU32 i = 0; i < nbNodes; ++i) + { + const BV32Node* node = currentNode->getChild(i); + if (currentNode->isLeaf(i)) + { + BV32Data data = currentNode->mBVData[i]; + PxU32 nbTriangles = data.getNbReferencedTriangles(); + PxU32 startIndex = data.getTriangleStartIndex(); + const IndTri32* triIndices = mesh->getTris32(); + const PxVec3* verts = mesh->getVerts(); + PxVec3 min(PX_MAX_F32, PX_MAX_F32, PX_MAX_F32); + PxVec3 max(-PX_MAX_F32, -PX_MAX_F32, -PX_MAX_F32); + for (PxU32 j = 0; j < nbTriangles; ++j) + { + IndTri32 index = triIndices[startIndex + j]; + + for (PxU32 k = 0; k < 3; ++k) + { + const PxVec3& v = verts[index.mRef[k]]; + + min.x = (min.x > v.x) ? v.x : min.x; + min.y = (min.y > v.y) ? v.y : min.y; + min.z = (min.z > v.z) ? v.z : min.z; + + max.x = (max.x < v.x) ? v.x : max.x; + max.y = (max.y > v.y) ? v.y : max.y; + max.z = (max.z > v.z) ? v.z : max.z; + } + } + + PxVec3 dMin, dMax; + data.getMinMax(dMin, dMax); + PX_ASSERT(dMin.x <= min.x && dMin.y <= min.y && dMin.z <= min.z); + PX_ASSERT(dMax.x >= max.x && dMax.y >= max.y && dMax.z >= min.z); + + } + else + { + validateNodeBound(node, mesh); + } + } +} +#endif + +static bool BuildBV32Internal(BV32Tree& bv32Tree, const AABBTree& Source, SourceMesh* mesh, float epsilon) +{ + if (mesh->getNbTriangles() <= 32) + { + bv32Tree.mNbPackedNodes = 1; + bv32Tree.mPackedNodes = reinterpret_cast<BV32DataPacked*>(PX_ALLOC(sizeof(BV32DataPacked), "BV32DataPacked")); + BV32DataPacked& packedData = bv32Tree.mPackedNodes[0]; + packedData.mNbNodes = 1; + packedData.mCenter[0] = PxVec4(Source.getBV().getCenter(), 0.f); + packedData.mExtents[0] = PxVec4(Source.getBV().getExtents(), 0.f); + packedData.mData[0] = (mesh->getNbTriangles() << 1) | 1; + return bv32Tree.init(mesh, Source.getBV()); + } + + { + struct Local + { + static void _CheckMD(const AABBTreeNode* current_node, PxU32& md, PxU32& cd) + { + cd++; + md = PxMax(md, cd); + + if (current_node->getPos()) { _CheckMD(current_node->getPos(), md, cd); cd--; } + if (current_node->getNeg()) { _CheckMD(current_node->getNeg(), md, cd); cd--; } + } + + static void _Check(AABBTreeNode* current_node) + { + if (current_node->isLeaf()) + return; + + AABBTreeNode* P = const_cast<AABBTreeNode*>(current_node->getPos()); + AABBTreeNode* N = const_cast<AABBTreeNode*>(current_node->getNeg()); + { + PxU32 MDP = 0; PxU32 CDP = 0; _CheckMD(P, MDP, CDP); + PxU32 MDN = 0; PxU32 CDN = 0; _CheckMD(N, MDN, CDN); + + if (MDP>MDN) + // if(MDP<MDN) + { + Ps::swap(*P, *N); + Ps::swap(P, N); + } + } + _Check(P); + _Check(N); + } + }; + Local::_Check(const_cast<AABBTreeNode*>(Source.getNodes())); + } + + + PxU32 nbNodes = 1; + BV32Node* Root32 = PX_NEW(BV32Node); + + + _BuildBV32(Source, Root32, Source.getNodes(), epsilon, nbNodes); + +#if BV32_VALIDATE + validateNodeBound(Root32, mesh); +#endif + + if (!bv32Tree.init(mesh, Source.getBV())) + return false; + BV32Tree* T = &bv32Tree; + + // Version with variable-sized nodes in single stream + { + struct Local + { + static void _Flatten(BV32Data* const dest, const PxU32 box_id, PxU32& current_id, const BV32Node* current, PxU32& max_depth, PxU32& current_depth, const PxU32 nb_nodes) + { + // Entering a new node => increase depth + current_depth++; + // Keep track of max depth + if (current_depth>max_depth) + max_depth = current_depth; + + for (PxU32 i = 0; i<current->mNbChildBVNodes; i++) + { + dest[box_id + i].mCenter = current->mBVData[i].mCenter; + dest[box_id + i].mExtents = current->mBVData[i].mExtents; + dest[box_id + i].mData = PxU32(current->mBVData[i].mData); + + PX_ASSERT(box_id + i < nb_nodes); + } + + PxU32 NbToGo = 0; + PxU32 NextIDs[32]; + memset(NextIDs, PX_INVALID_U32, sizeof(PxU32)*32); + const BV32Node* ChildNodes[32]; + memset(ChildNodes, 0, sizeof(BV32Node*)*32); + + BV32Data* data = dest + box_id; + for (PxU32 i = 0; i<current->mNbChildBVNodes; i++) + { + PX_ASSERT(current->mBVData[i].mData != PX_INVALID_U32); + + if (!current->isLeaf(i)) + { + + const BV32Node* ChildNode = current->getChild(i); + + const PxU32 NextID = current_id; + + const PxU32 ChildSize = ChildNode->mNbChildBVNodes; + current_id += ChildSize; + + const PxU32 ChildType = ChildNode->mNbChildBVNodes << 1; + data[i].mData = size_t(ChildType + (NextID << GU_BV4_CHILD_OFFSET_SHIFT_COUNT)); + //PX_ASSERT(data[i].mData == size_t(ChildType+(NextID<<3))); + + PX_ASSERT(box_id + i < nb_nodes); + + NextIDs[NbToGo] = NextID; + ChildNodes[NbToGo] = ChildNode; + NbToGo++; + } + } + + + + for (PxU32 i = 0; i<NbToGo; i++) + { + _Flatten(dest, NextIDs[i], current_id, ChildNodes[i], max_depth, current_depth, nb_nodes); + current_depth--; + } + + DELETESINGLE(current); + } + }; + + + PxU32 CurID = Root32->mNbChildBVNodes+1; + + BV32Data* Nodes = PX_NEW(BV32Data)[nbNodes]; + Nodes[0].mCenter = Source.getBV().getCenter(); + Nodes[0].mExtents = Source.getBV().getExtents(); + + const PxU32 ChildType = Root32->mNbChildBVNodes << 1; + Nodes[0].mData = size_t(ChildType + (1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT)); + + const PxU32 nbChilden = Nodes[0].getNbChildren(); + + PX_UNUSED(nbChilden); + + + T->mInitData = CurID; + PxU32 MaxDepth = 0; + PxU32 CurrentDepth = 0; + + Local::_Flatten(Nodes, 1, CurID, Root32, MaxDepth, CurrentDepth, nbNodes); + + PX_ASSERT(CurID == nbNodes); + + T->mNbNodes = nbNodes; + + T->mNodes = Nodes; + } + + + bv32Tree.calculateLeafNode(bv32Tree.mNodes[0]); + + bv32Tree.mPackedNodes = reinterpret_cast<BV32DataPacked*>(PX_ALLOC(sizeof(BV32DataPacked)*nbNodes, "BV32DataPacked")); + bv32Tree.mNbPackedNodes = nbNodes; + + PxU32 nbPackedNodes = 1; + PxU32 currentIndex = bv32Tree.mNodes[0].getNbChildren() - bv32Tree.mNodes[0].mNbLeafNodes + 1; + BV32DataPacked& packedData = bv32Tree.mPackedNodes[0]; + bv32Tree.createSOAformatNode(packedData, bv32Tree.mNodes[0], 1, currentIndex, nbPackedNodes); + + bv32Tree.mNbPackedNodes = nbPackedNodes; + + PX_ASSERT(nbPackedNodes == currentIndex); + PX_ASSERT(nbPackedNodes > 0); + + return true; +} + +///// + +struct ReorderData32 +{ + const SourceMesh* mMesh; + PxU32* mOrder; + PxU32 mNbTrisPerLeaf; + PxU32 mIndex; + PxU32 mNbTris; + PxU32 mStats[32]; +}; + +static bool gReorderCallback(const AABBTreeNode* current, PxU32 /*depth*/, void* userData) +{ + ReorderData32* Data = reinterpret_cast<ReorderData32*>(userData); + if (current->isLeaf()) + { + const PxU32 n = current->getNbPrimitives(); + PX_ASSERT(n > 0); + PX_ASSERT(n <= Data->mNbTrisPerLeaf); + Data->mStats[n-1]++; + PxU32* Prims = const_cast<PxU32*>(current->getPrimitives()); + + for (PxU32 i = 0; i<n; i++) + { + PX_ASSERT(Prims[i]<Data->mNbTris); + Data->mOrder[Data->mIndex] = Prims[i]; + PX_ASSERT(Data->mIndex<Data->mNbTris); + Prims[i] = Data->mIndex; + Data->mIndex++; + } + } + return true; +} + + +bool physx::Gu::BuildBV32Ex(BV32Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf) +{ + const PxU32 nbTris = mesh.mNbTris; + + AABBTree Source; + if (!Source.buildFromMesh(mesh, nbTrisPerLeaf)) + return false; + + + { + PxU32* order = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbTris, "BV32")); + ReorderData32 RD; + RD.mMesh = &mesh; + RD.mOrder = order; + RD.mNbTrisPerLeaf = nbTrisPerLeaf; + RD.mIndex = 0; + RD.mNbTris = nbTris; + for (PxU32 i = 0; i<32; i++) + RD.mStats[i] = 0; + Source.walk(gReorderCallback, &RD); + PX_ASSERT(RD.mIndex == nbTris); + mesh.remapTopology(order); + PX_FREE(order); + // for(PxU32 i=0;i<16;i++) + // printf("%d: %d\n", i, RD.mStats[i]); + } + + + //if (mesh.getNbTriangles() <= nbTrisPerLeaf) + // return tree.init(&mesh, Source.getBV()); + + return BuildBV32Internal(tree, Source, &mesh, epsilon); +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.h new file mode 100644 index 00000000..68b8ebaf --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.h @@ -0,0 +1,50 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV32_BUILD_H +#define GU_BV32_BUILD_H + +#include "foundation/PxSimpleTypes.h" +#include "common/PxPhysXCommonConfig.h" + +#define BV32_VALIDATE 0 + +namespace physx +{ + namespace Gu + { + class BV32Tree; + class SourceMesh; + + PX_PHYSX_COMMON_API bool BuildBV32Ex(BV32Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf); + + } // namespace Gu +} + +#endif // GU_BV32_BUILD_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.cpp new file mode 100644 index 00000000..b7e0f4d0 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.cpp @@ -0,0 +1,261 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxMemory.h" +#include "GuBV4.h" +#include "GuSerialize.h" +#include "CmUtils.h" +#include "PsUtilities.h" + +using namespace physx; +using namespace Gu; + +#define DELETEARRAY(x) if (x) { delete []x; x = NULL; } + +SourceMesh::SourceMesh() +{ + reset(); +} + +SourceMesh::~SourceMesh() +{ + PX_FREE_AND_RESET(mRemap); +} + +void SourceMesh::reset() +{ + mNbVerts = 0; + mVerts = NULL; + mNbTris = 0; + mTriangles32 = NULL; + mTriangles16 = NULL; + mRemap = NULL; +} + +void SourceMesh::operator=(SourceMesh& v) +{ + mNbVerts = v.mNbVerts; + mVerts = v.mVerts; + mNbTris = v.mNbTris; + mTriangles32 = v.mTriangles32; + mTriangles16 = v.mTriangles16; + mRemap = v.mRemap; + v.reset(); +} + +void SourceMesh::remapTopology(const PxU32* order) +{ + if(!mNbTris) + return; + + if(mTriangles32) + { + IndTri32* newTopo = PX_NEW(IndTri32)[mNbTris]; + for(PxU32 i=0;i<mNbTris;i++) + newTopo[i] = mTriangles32[order[i]]; + + PxMemCopy(mTriangles32, newTopo, sizeof(IndTri32)*mNbTris); + DELETEARRAY(newTopo); + } + else + { + PX_ASSERT(mTriangles16); + IndTri16* newTopo = PX_NEW(IndTri16)[mNbTris]; + for(PxU32 i=0;i<mNbTris;i++) + newTopo[i] = mTriangles16[order[i]]; + + PxMemCopy(mTriangles16, newTopo, sizeof(IndTri16)*mNbTris); + DELETEARRAY(newTopo); + } + + { + PxU32* newMap = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mNbTris, "OPC2")); + for(PxU32 i=0;i<mNbTris;i++) + newMap[i] = mRemap ? mRemap[order[i]] : order[i]; + + PX_FREE_AND_RESET(mRemap); + mRemap = newMap; + } +} + +bool SourceMesh::isValid() const +{ + if(!mNbTris || !mNbVerts) return false; + if(!mVerts) return false; + if(!mTriangles32 && !mTriangles16) return false; + return true; +} + +///// + +BV4Tree::BV4Tree(SourceMesh* meshInterface, const PxBounds3& localBounds) +{ + reset(); + init(meshInterface, localBounds); +} + +BV4Tree::BV4Tree() +{ + reset(); +} + +void BV4Tree::release() +{ + if(!mUserAllocated) + { +#ifdef GU_BV4_USE_SLABS + PX_DELETE_AND_RESET(mNodes); +#else + DELETEARRAY(mNodes); +#endif + } + + mNodes = NULL; + mNbNodes = 0; +} + +BV4Tree::~BV4Tree() +{ + release(); +} + +void BV4Tree::reset() +{ + mMeshInterface = NULL; + mNbNodes = 0; + mNodes = NULL; + mInitData = 0; +#ifdef GU_BV4_QUANTIZED_TREE + mCenterOrMinCoeff = PxVec3(0.0f); + mExtentsOrMaxCoeff = PxVec3(0.0f); +#endif + mUserAllocated = false; +} + +void BV4Tree::operator=(BV4Tree& v) +{ + mMeshInterface = v.mMeshInterface; + mLocalBounds = v.mLocalBounds; + mNbNodes = v.mNbNodes; + mNodes = v.mNodes; + mInitData = v.mInitData; +#ifdef GU_BV4_QUANTIZED_TREE + mCenterOrMinCoeff = v.mCenterOrMinCoeff; + mExtentsOrMaxCoeff = v.mExtentsOrMaxCoeff; +#endif + mUserAllocated = v.mUserAllocated; + v.reset(); +} + +bool BV4Tree::init(SourceMesh* meshInterface, const PxBounds3& localBounds) +{ + mMeshInterface = meshInterface; + mLocalBounds.init(localBounds); + return true; +} + +// PX_SERIALIZATION +BV4Tree::BV4Tree(const PxEMPTY) +{ + mUserAllocated = true; +} + +void BV4Tree::exportExtraData(PxSerializationContext& stream) +{ + stream.alignData(16); + stream.writeData(mNodes, mNbNodes*sizeof(BVDataPacked)); +} + +void BV4Tree::importExtraData(PxDeserializationContext& context) +{ + context.alignExtraData(16); + mNodes = context.readExtraData<BVDataPacked>(mNbNodes); +} +//~PX_SERIALIZATION + +bool BV4Tree::load(PxInputStream& stream, PxU32 meshVersion) +{ + PX_ASSERT(!mUserAllocated); + PX_UNUSED(meshVersion); + + release(); + + PxI8 a, b, c, d; + readChunk(a, b, c, d, stream); + if(a!='B' || b!='V' || c!='4' || d!=' ') + return false; + + const PxU32 version = 1; + const bool mismatch = (shdfnd::littleEndian() == 1); + if(readDword(mismatch, stream) != version) + return false; + + mLocalBounds.mCenter.x = readFloat(mismatch, stream); + mLocalBounds.mCenter.y = readFloat(mismatch, stream); + mLocalBounds.mCenter.z = readFloat(mismatch, stream); + mLocalBounds.mExtentsMagnitude = readFloat(mismatch, stream); + + mInitData = readDword(mismatch, stream); + +#ifdef GU_BV4_QUANTIZED_TREE + mCenterOrMinCoeff.x = readFloat(mismatch, stream); + mCenterOrMinCoeff.y = readFloat(mismatch, stream); + mCenterOrMinCoeff.z = readFloat(mismatch, stream); + mExtentsOrMaxCoeff.x = readFloat(mismatch, stream); + mExtentsOrMaxCoeff.y = readFloat(mismatch, stream); + mExtentsOrMaxCoeff.z = readFloat(mismatch, stream); +#endif + const PxU32 nbNodes = readDword(mismatch, stream); + mNbNodes = nbNodes; + + if(nbNodes) + { +#ifdef GU_BV4_USE_SLABS + BVDataPacked* nodes = reinterpret_cast<BVDataPacked*>(PX_ALLOC(sizeof(BVDataPacked)*nbNodes, "BV4 nodes")); // PT: PX_NEW breaks alignment here +#else + BVDataPacked* nodes = PX_NEW(BVDataPacked)[nbNodes]; +#endif + mNodes = nodes; + Cm::markSerializedMem(nodes, sizeof(BVDataPacked)*nbNodes); + + for(PxU32 i=0;i<nbNodes;i++) + { + BVDataPacked& node = nodes[i]; +#ifdef GU_BV4_QUANTIZED_TREE + readWordBuffer(&node.mAABB.mData[0].mExtents, 6, mismatch, stream); +#else + readFloatBuffer(&node.mAABB.mCenter.x, 6, mismatch, stream); +#endif + node.mData = readDword(mismatch, stream); + } + } + else mNodes = NULL; + + return true; +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.h new file mode 100644 index 00000000..8746ef08 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.h @@ -0,0 +1,254 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_H +#define GU_BV4_H + +#include "foundation/PxBounds3.h" +#include "PxSerialFramework.h" +#include "PsUserAllocated.h" +#include "GuBV4Settings.h" + +#define V4LoadU_Safe V4LoadU +#define V4LoadA_Safe V4LoadA +#define V4StoreA_Safe V4StoreA +#define V4StoreU_Safe V4StoreU + +namespace physx +{ +namespace Gu +{ + + struct VertexPointers + { + const PxVec3* Vertex[3]; + }; + + class IndTri32 : public physx::shdfnd::UserAllocated + { + public: + PX_FORCE_INLINE IndTri32() {} + PX_FORCE_INLINE IndTri32(PxU32 r0, PxU32 r1, PxU32 r2) { mRef[0]=r0; mRef[1]=r1; mRef[2]=r2; } + PX_FORCE_INLINE IndTri32(const IndTri32& triangle) + { + mRef[0] = triangle.mRef[0]; + mRef[1] = triangle.mRef[1]; + mRef[2] = triangle.mRef[2]; + } + PX_FORCE_INLINE ~IndTri32() {} + PxU32 mRef[3]; + }; + PX_COMPILE_TIME_ASSERT(sizeof(IndTri32)==12); + + class IndTri16 : public physx::shdfnd::UserAllocated + { + public: + PX_FORCE_INLINE IndTri16() {} + PX_FORCE_INLINE IndTri16(PxU16 r0, PxU16 r1, PxU16 r2) { mRef[0]=r0; mRef[1]=r1; mRef[2]=r2; } + PX_FORCE_INLINE IndTri16(const IndTri16& triangle) + { + mRef[0] = triangle.mRef[0]; + mRef[1] = triangle.mRef[1]; + mRef[2] = triangle.mRef[2]; + } + PX_FORCE_INLINE ~IndTri16() {} + PxU16 mRef[3]; + }; + PX_COMPILE_TIME_ASSERT(sizeof(IndTri16)==6); + + PX_FORCE_INLINE void getVertexReferences(PxU32& vref0, PxU32& vref1, PxU32& vref2, PxU32 index, const IndTri32* T32, const IndTri16* T16) + { + if(T32) + { + const IndTri32* PX_RESTRICT tri = T32 + index; + vref0 = tri->mRef[0]; + vref1 = tri->mRef[1]; + vref2 = tri->mRef[2]; + } + else + { + const IndTri16* PX_RESTRICT tri = T16 + index; + vref0 = tri->mRef[0]; + vref1 = tri->mRef[1]; + vref2 = tri->mRef[2]; + } + } + + class SourceMesh : public physx::shdfnd::UserAllocated + { + public: + PX_PHYSX_COMMON_API SourceMesh(); + PX_PHYSX_COMMON_API ~SourceMesh(); + // PX_SERIALIZATION + SourceMesh(const PxEMPTY) {} + static void getBinaryMetaData(PxOutputStream& stream); + //~PX_SERIALIZATION + + void reset(); + void operator = (SourceMesh& v); + + PxU32 mNbVerts; + const PxVec3* mVerts; + PxU32 mNbTris; + IndTri32* mTriangles32; + IndTri16* mTriangles16; + + PX_FORCE_INLINE PxU32 getNbTriangles() const { return mNbTris; } + PX_FORCE_INLINE PxU32 getNbVertices() const { return mNbVerts; } + PX_FORCE_INLINE const IndTri32* getTris32() const { return mTriangles32; } + PX_FORCE_INLINE const IndTri16* getTris16() const { return mTriangles16; } + PX_FORCE_INLINE const PxVec3* getVerts() const { return mVerts; } + + PX_FORCE_INLINE void setNbTriangles(PxU32 nb) { mNbTris = nb; } + PX_FORCE_INLINE void setNbVertices(PxU32 nb) { mNbVerts = nb; } + + PX_FORCE_INLINE void setPointers(IndTri32* tris32, IndTri16* tris16, const PxVec3* verts) + { + mTriangles32 = tris32; + mTriangles16 = tris16; + mVerts = verts; + } + + PX_FORCE_INLINE void initRemap() { mRemap = NULL; } + PX_FORCE_INLINE const PxU32* getRemap() const { return mRemap; } + PX_FORCE_INLINE void releaseRemap() { PX_FREE_AND_RESET(mRemap); } + void remapTopology(const PxU32* order); + + bool isValid() const; + + PX_FORCE_INLINE void getTriangle(VertexPointers& vp, PxU32 index) const + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, index, mTriangles32, mTriangles16); + vp.Vertex[0] = mVerts + VRef0; + vp.Vertex[1] = mVerts + VRef1; + vp.Vertex[2] = mVerts + VRef2; + } + private: + PxU32* mRemap; + }; + + struct LocalBounds + { + LocalBounds() : mCenter(PxVec3(0.0f)), mExtentsMagnitude(0.0f) {} + + PxVec3 mCenter; + float mExtentsMagnitude; + + PX_FORCE_INLINE void init(const PxBounds3& bounds) + { + mCenter = bounds.getCenter(); + // PT: TODO: compute mag first, then multiplies by 0.5f (TA34704) + mExtentsMagnitude = bounds.getExtents().magnitude(); + } + }; + +#ifdef GU_BV4_QUANTIZED_TREE + class QuantizedAABB + { + public: + + struct Data + { + PxU16 mExtents; //!< Quantized extents + PxI16 mCenter; //!< Quantized center + }; + Data mData[3]; + }; + PX_COMPILE_TIME_ASSERT(sizeof(QuantizedAABB)==12); +#endif + + ///// + + #define GU_BV4_CHILD_OFFSET_SHIFT_COUNT 11 + + struct BVDataPacked : public physx::shdfnd::UserAllocated + { +#ifdef GU_BV4_QUANTIZED_TREE + QuantizedAABB mAABB; +#else + CenterExtents mAABB; +#endif + PxU32 mData; + + PX_FORCE_INLINE PxU32 isLeaf() const { return mData&1; } + PX_FORCE_INLINE PxU32 getPrimitive() const { return mData>>1; } + PX_FORCE_INLINE PxU32 getChildOffset() const { return mData>>GU_BV4_CHILD_OFFSET_SHIFT_COUNT;} + PX_FORCE_INLINE PxU32 getChildType() const { return (mData>>1)&3; } + PX_FORCE_INLINE PxU32 getChildData() const { return mData; } + + PX_FORCE_INLINE void encodePNS(PxU32 code) + { + PX_ASSERT(code<256); + mData |= code<<3; + } + PX_FORCE_INLINE PxU32 decodePNSNoShift() const { return mData; } + }; + + // PT: TODO: align class to 16? (TA34704) + class BV4Tree : public physx::shdfnd::UserAllocated + { + public: + // PX_SERIALIZATION + BV4Tree(const PxEMPTY); + void exportExtraData(PxSerializationContext&); + void importExtraData(PxDeserializationContext& context); + static void getBinaryMetaData(PxOutputStream& stream); + //~PX_SERIALIZATION + + PX_PHYSX_COMMON_API BV4Tree(); + PX_PHYSX_COMMON_API BV4Tree(SourceMesh* meshInterface, const PxBounds3& localBounds); + PX_PHYSX_COMMON_API ~BV4Tree(); + + bool load(PxInputStream& stream, PxU32 meshVersion); // converts to proper endian at load time + + void reset(); + void operator = (BV4Tree& v); + + bool init(SourceMesh* meshInterface, const PxBounds3& localBounds); + void release(); + + SourceMesh* mMeshInterface; + LocalBounds mLocalBounds; + + PxU32 mNbNodes; + BVDataPacked* mNodes; + PxU32 mInitData; +#ifdef GU_BV4_QUANTIZED_TREE + PxVec3 mCenterOrMinCoeff; // PT: dequantization coeff, either for Center or Min (depending on AABB format) + PxVec3 mExtentsOrMaxCoeff; // PT: dequantization coeff, either for Extents or Max (depending on AABB format) +#endif + bool mUserAllocated; // PT: please keep these 4 bytes right after mCenterOrMinCoeff/mExtentsOrMaxCoeff for safe V4 loading + bool mPadding[3]; + }; + +} // namespace Gu +} + +#endif // GU_BV4_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp new file mode 100644 index 00000000..fbe97042 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp @@ -0,0 +1,1294 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxVec4.h" +#include "GuBV4Build.h" +#include "GuBV4.h" +#include "PxTriangle.h" +#include "CmPhysXCommon.h" +#include "PsBasicTemplates.h" +#include "GuCenterExtents.h" + +using namespace physx; +using namespace Gu; + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#define GU_BV4_USE_NODE_POOLS + +#define DELETESINGLE(x) if (x) { delete x; x = NULL; } +#define DELETEARRAY(x) if (x) { delete []x; x = NULL; } + +static PX_FORCE_INLINE PxU32 largestAxis(const PxVec4& v) +{ + const float* Vals = &v.x; + PxU32 m = 0; + if(Vals[1] > Vals[m]) m = 1; + if(Vals[2] > Vals[m]) m = 2; + return m; +} + +AABBTree::AABBTree() : mIndices(NULL), mPool(NULL), mTotalNbNodes(0) +{ +} + +AABBTree::~AABBTree() +{ + release(); +} + +void AABBTree::release() +{ + DELETEARRAY(mPool); + PX_FREE_AND_RESET(mIndices); +} + +static PxU32 local_Split(const AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT /*Boxes*/, const PxVec3* PX_RESTRICT centers, PxU32 axis) +{ + const PxU32 nb = node->mNbPrimitives; + PxU32* PX_RESTRICT prims = node->mNodePrimitives; + + // Get node split value + const float splitValue = node->mBV.getCenter(axis); + + PxU32 nbPos = 0; + // Loop through all node-related primitives. Their indices range from mNodePrimitives[0] to mNodePrimitives[mNbPrimitives-1]. + // Those indices map the global list in the tree builder. + const size_t ptrValue = size_t(centers) + axis*sizeof(float); + const PxVec3* PX_RESTRICT centersX = reinterpret_cast<const PxVec3*>(ptrValue); + + for(PxU32 i=0;i<nb;i++) + { + // Get index in global list + const PxU32 index = prims[i]; + + // Test against the splitting value. The primitive value is tested against the enclosing-box center. + // [We only need an approximate partition of the enclosing box here.] + const float primitiveValue = centersX[index].x; + + // Reorganize the list of indices in this order: positive - negative. + if(primitiveValue > splitValue) + { + // Swap entries + prims[i] = prims[nbPos]; + prims[nbPos] = index; + // Count primitives assigned to positive space + nbPos++; + } + } + return nbPos; +} + +static bool local_Subdivide(AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT boxes, const PxVec3* PX_RESTRICT centers, BuildStats& stats, const AABBTreeNode* const PX_RESTRICT node_base, PxU32 limit) +{ + const PxU32* PX_RESTRICT prims = node->mNodePrimitives; + const PxU32 nb = node->mNbPrimitives; + + // Compute bv & means at the same time + Vec4V meansV; + { + Vec4V minV = V4LoadU(&boxes[prims[0]].minimum.x); + Vec4V maxV = V4LoadU(&boxes[prims[0]].maximum.x); + meansV = V4LoadU(¢ers[prims[0]].x); + + for(PxU32 i=1;i<nb;i++) + { + const PxU32 index = prims[i]; + minV = V4Min(minV, V4LoadU(&boxes[index].minimum.x)); + maxV = V4Max(maxV, V4LoadU(&boxes[index].maximum.x)); + meansV = V4Add(meansV, V4LoadU(¢ers[index].x)); + } + const float coeffNb = 1.0f/float(nb); + meansV = V4Scale(meansV, FLoad(coeffNb)); + +// BV4_ALIGN16(PxVec4 mergedMin); +// BV4_ALIGN16(PxVec4 mergedMax); + PX_ALIGN_PREFIX(16) PxVec4 mergedMin PX_ALIGN_SUFFIX(16); + PX_ALIGN_PREFIX(16) PxVec4 mergedMax PX_ALIGN_SUFFIX(16); + + V4StoreA_Safe(minV, &mergedMin.x); + V4StoreA_Safe(maxV, &mergedMax.x); + node->mBV.minimum = PxVec3(mergedMin.x, mergedMin.y, mergedMin.z); + node->mBV.maximum = PxVec3(mergedMax.x, mergedMax.y, mergedMax.z); + } + +// // Stop subdividing if we reach a leaf node. This is always performed here, +// // else we could end in trouble if user overrides this. +// if(nb==1) +// return false; + if(nb<=limit) + return false; + + bool validSplit = true; + PxU32 nbPos; + { + // Compute variances + Vec4V varsV = V4Zero(); + for(PxU32 i=0;i<nb;i++) + { + const PxU32 index = prims[i]; + Vec4V centerV = V4LoadU(¢ers[index].x); + centerV = V4Sub(centerV, meansV); + centerV = V4Mul(centerV, centerV); + varsV = V4Add(varsV, centerV); + } + const float coeffNb1 = 1.0f/float(nb-1); + varsV = V4Scale(varsV, FLoad(coeffNb1)); + +// BV4_ALIGN16(PxVec4 vars); + PX_ALIGN_PREFIX(16) PxVec4 vars PX_ALIGN_SUFFIX(16); + V4StoreA_Safe(varsV, &vars.x); + + // Choose axis with greatest variance + const PxU32 axis = largestAxis(vars); + + // Split along the axis + nbPos = local_Split(node, boxes, centers, axis); + + // Check split validity + if(!nbPos || nbPos==nb) + validSplit = false; + } + + // Check the subdivision has been successful + if(!validSplit) + { + // Here, all boxes lie in the same sub-space. Two strategies: + // - if the tree *must* be complete, make an arbitrary 50-50 split + // - else stop subdividing +// if(nb>limit) + { + nbPos = node->mNbPrimitives>>1; + + if(1) + { + // Test 3 axis, take the best + float results[3]; + nbPos = local_Split(node, boxes, centers, 0); results[0] = float(nbPos)/float(node->mNbPrimitives); + nbPos = local_Split(node, boxes, centers, 1); results[1] = float(nbPos)/float(node->mNbPrimitives); + nbPos = local_Split(node, boxes, centers, 2); results[2] = float(nbPos)/float(node->mNbPrimitives); + results[0]-=0.5f; results[0]*=results[0]; + results[1]-=0.5f; results[1]*=results[1]; + results[2]-=0.5f; results[2]*=results[2]; + PxU32 Min=0; + if(results[1]<results[Min]) Min = 1; + if(results[2]<results[Min]) Min = 2; + + // Split along the axis + nbPos = local_Split(node, boxes, centers, Min); + + // Check split validity + if(!nbPos || nbPos==node->mNbPrimitives) + nbPos = node->mNbPrimitives>>1; + } + } + //else return + } + + // Now create children and assign their pointers. + // We use a pre-allocated linear pool for complete trees [Opcode 1.3] + const PxU32 count = stats.getCount(); + node->mPos = size_t(node_base + count); + + // Update stats + stats.increaseCount(2); + + // Assign children + AABBTreeNode* pos = const_cast<AABBTreeNode*>(node->getPos()); + AABBTreeNode* neg = const_cast<AABBTreeNode*>(node->getNeg()); + pos->mNodePrimitives = node->mNodePrimitives; + pos->mNbPrimitives = nbPos; + neg->mNodePrimitives = node->mNodePrimitives + nbPos; + neg->mNbPrimitives = node->mNbPrimitives - nbPos; + return true; +} + +static void local_BuildHierarchy(AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT Boxes, const PxVec3* PX_RESTRICT centers, BuildStats& stats, const AABBTreeNode* const PX_RESTRICT node_base, PxU32 limit) +{ + if(local_Subdivide(node, Boxes, centers, stats, node_base, limit)) + { + AABBTreeNode* pos = const_cast<AABBTreeNode*>(node->getPos()); + AABBTreeNode* neg = const_cast<AABBTreeNode*>(node->getNeg()); + local_BuildHierarchy(pos, Boxes, centers, stats, node_base, limit); + local_BuildHierarchy(neg, Boxes, centers, stats, node_base, limit); + } +} + +bool AABBTree::buildFromMesh(SourceMesh& mesh, PxU32 limit) +{ + const PxU32 nbBoxes = mesh.getNbTriangles(); + if(!nbBoxes) + return false; + PxBounds3* boxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(nbBoxes+1), "BV4")); // PT: +1 to safely V4Load/V4Store the last element + PxVec3* centers = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*(nbBoxes+1), "BV4")); // PT: +1 to safely V4Load/V4Store the last element + const FloatV halfV = FLoad(0.5f); + for(PxU32 i=0;i<nbBoxes;i++) + { + VertexPointers VP; + mesh.getTriangle(VP, i); + + const Vec4V v0V = V4LoadU(&VP.Vertex[0]->x); + const Vec4V v1V = V4LoadU(&VP.Vertex[1]->x); + const Vec4V v2V = V4LoadU(&VP.Vertex[2]->x); + Vec4V minV = V4Min(v0V, v1V); + minV = V4Min(minV, v2V); + Vec4V maxV = V4Max(v0V, v1V); + maxV = V4Max(maxV, v2V); + V4StoreU_Safe(minV, &boxes[i].minimum.x); // PT: safe because 'maximum' follows 'minimum' + V4StoreU_Safe(maxV, &boxes[i].maximum.x); // PT: safe because we allocated one more box + + const Vec4V centerV = V4Scale(V4Add(maxV, minV), halfV); + V4StoreU_Safe(centerV, ¢ers[i].x); // PT: safe because we allocated one more PxVec3 + } + + { + // Release previous tree + release(); + + // Init stats + BuildStats Stats; + Stats.setCount(1); + + // Initialize indices. This list will be modified during build. + mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbBoxes, "BV4 indices")); + // Identity permutation + for(PxU32 i=0;i<nbBoxes;i++) + mIndices[i] = i; + + // Use a linear array for complete trees (since we can predict the final number of nodes) [Opcode 1.3] + // Allocate a pool of nodes + // PT: TODO: optimize memory here (TA34704) + mPool = PX_NEW(AABBTreeNode)[nbBoxes*2 - 1]; + + // Setup initial node. Here we have a complete permutation of the app's primitives. + mPool->mNodePrimitives = mIndices; + mPool->mNbPrimitives = nbBoxes; + + // Build the hierarchy + local_BuildHierarchy(mPool, boxes, centers, Stats, mPool, limit); + + // Get back total number of nodes + mTotalNbNodes = Stats.getCount(); + } + + PX_FREE(centers); + PX_FREE(boxes); + return true; +} + +PxU32 AABBTree::walk(WalkingCallback cb, void* userData) const +{ + // Call it without callback to compute max depth + PxU32 maxDepth = 0; + PxU32 currentDepth = 0; + + struct Local + { + static void _Walk(const AABBTreeNode* current_node, PxU32& max_depth, PxU32& current_depth, WalkingCallback callback, void* userData_) + { + // Checkings + if(!current_node) + return; + // Entering a new node => increase depth + current_depth++; + // Keep track of max depth + if(current_depth>max_depth) + max_depth = current_depth; + + // Callback + if(callback && !(callback)(current_node, current_depth, userData_)) + return; + + // Recurse + if(current_node->getPos()) { _Walk(current_node->getPos(), max_depth, current_depth, callback, userData_); current_depth--; } + if(current_node->getNeg()) { _Walk(current_node->getNeg(), max_depth, current_depth, callback, userData_); current_depth--; } + } + }; + Local::_Walk(mPool, maxDepth, currentDepth, cb, userData); + return maxDepth; +} + + + +#include "GuBV4_Internal.h" + +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT +// PT: see http://www.codercorner.com/blog/?p=734 +static PxU32 precomputeNodeSorting(const PxBounds3& box0, const PxBounds3& box1) +{ + const PxVec3 C0 = box0.getCenter(); + const PxVec3 C1 = box1.getCenter(); + + PxVec3 dirPPP(1.0f, 1.0f, 1.0f); dirPPP.normalize(); + PxVec3 dirPPN(1.0f, 1.0f, -1.0f); dirPPN.normalize(); + PxVec3 dirPNP(1.0f, -1.0f, 1.0f); dirPNP.normalize(); + PxVec3 dirPNN(1.0f, -1.0f, -1.0f); dirPNN.normalize(); + PxVec3 dirNPP(-1.0f, 1.0f, 1.0f); dirNPP.normalize(); + PxVec3 dirNPN(-1.0f, 1.0f, -1.0f); dirNPN.normalize(); + PxVec3 dirNNP(-1.0f, -1.0f, 1.0f); dirNNP.normalize(); + PxVec3 dirNNN(-1.0f, -1.0f, -1.0f); dirNNN.normalize(); + + const PxVec3 deltaC = C0 - C1; + const bool bPPP = deltaC.dot(dirPPP)<0.0f; + const bool bPPN = deltaC.dot(dirPPN)<0.0f; + const bool bPNP = deltaC.dot(dirPNP)<0.0f; + const bool bPNN = deltaC.dot(dirPNN)<0.0f; + const bool bNPP = deltaC.dot(dirNPP)<0.0f; + const bool bNPN = deltaC.dot(dirNPN)<0.0f; + const bool bNNP = deltaC.dot(dirNNP)<0.0f; + const bool bNNN = deltaC.dot(dirNNN)<0.0f; + + PxU32 code = 0; + if(!bPPP) + code |= (1<<7); // Bit 0: PPP + if(!bPPN) + code |= (1<<6); // Bit 1: PPN + if(!bPNP) + code |= (1<<5); // Bit 2: PNP + if(!bPNN) + code |= (1<<4); // Bit 3: PNN + if(!bNPP) + code |= (1<<3); // Bit 4: NPP + if(!bNPN) + code |= (1<<2); // Bit 5: NPN + if(!bNNP) + code |= (1<<1); // Bit 6: NNP + if(!bNNN) + code |= (1<<0); // Bit 7: NNN + return code; +} +#endif + +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Common.h" +#endif + +static void setEmpty(CenterExtents& box) +{ + box.mCenter = PxVec3(0.0f, 0.0f, 0.0f); + box.mExtents = PxVec3(-1.0f, -1.0f, -1.0f); +} + +// Data: +// 1 bit for leaf/no leaf +// 2 bits for child-node type +// 8 bits for PNS +// => 32 - 1 - 2 - 8 = 21 bits left for encoding triangle index or node *offset* +// => limited to 2.097.152 triangles +// => and 2Mb-large trees (this one may not work out well in practice) +// ==> lines marked with //* have been changed to address this. Now we don't store offsets in bytes directly +// but in BVData indices. There's more work at runtime calculating addresses, but now the format can support +// 2 million single nodes. +// +// That being said we only need 3*8 = 24 bits in total, so that could be only 6 bits in each BVData. +// For type0: we have 2 nodes, we need 8 bits => 6 bits/node = 12 bits available, ok +// For type1: we have 3 nodes, we need 8*2 = 16 bits => 6 bits/node = 18 bits available, ok +// For type2: we have 4 nodes, we need 8*3 = 24 bits => 6 bits/node = 24 bits available, ok +//#pragma pack(1) +struct BVData : public physx::shdfnd::UserAllocated +{ + BVData(); + CenterExtents mAABB; + size_t mData; +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + PxU32 mTempPNS; +#endif +}; +//#pragma pack() + +BVData::BVData() : mData(PX_INVALID_U32) +{ + setEmpty(mAABB); +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + mTempPNS = 0; +#endif +} + +struct BV4Node : public physx::shdfnd::UserAllocated +{ + PX_FORCE_INLINE BV4Node() {} + PX_FORCE_INLINE ~BV4Node() {} + + BVData mBVData[4]; + + PX_FORCE_INLINE size_t isLeaf(PxU32 i) const { return mBVData[i].mData&1; } + PX_FORCE_INLINE PxU32 getPrimitive(PxU32 i) const { return PxU32(mBVData[i].mData>>1); } + PX_FORCE_INLINE const BV4Node* getChild(PxU32 i) const { return reinterpret_cast<BV4Node*>(mBVData[i].mData); } + + PxU32 getType() const + { + PxU32 Nb=0; + for(PxU32 i=0;i<4;i++) + { + if(mBVData[i].mData!=PX_INVALID_U32) + Nb++; + } + return Nb; + } + + PxU32 getSize() const + { + const PxU32 type = getType(); + return sizeof(BVData)*type; + } +}; + +#define NB_NODES_PER_SLAB 256 +struct BV4BuildParams +{ + PX_FORCE_INLINE BV4BuildParams(float epsilon) : mEpsilon(epsilon) +#ifdef GU_BV4_USE_NODE_POOLS + ,mTop(NULL) +#endif + {} + ~BV4BuildParams(); + + // Stats + PxU32 mNbNodes; + PxU32 mStats[4]; + + // + float mEpsilon; + +#ifdef GU_BV4_USE_NODE_POOLS + // + struct Slab : public physx::shdfnd::UserAllocated + { + BV4Node mNodes[NB_NODES_PER_SLAB]; + PxU32 mNbUsedNodes; + Slab* mNext; + }; + Slab* mTop; + + BV4Node* allocateNode(); + void releaseNodes(); +#endif +}; + +BV4BuildParams::~BV4BuildParams() +{ +#ifdef GU_BV4_USE_NODE_POOLS + releaseNodes(); +#endif +} + +#ifdef GU_BV4_USE_NODE_POOLS +BV4Node* BV4BuildParams::allocateNode() +{ + if(!mTop || mTop->mNbUsedNodes==NB_NODES_PER_SLAB) + { + Slab* newSlab = PX_NEW(Slab); + newSlab->mNbUsedNodes = 0; + newSlab->mNext = mTop; + mTop = newSlab; + } + return &mTop->mNodes[mTop->mNbUsedNodes++]; +} + +void BV4BuildParams::releaseNodes() +{ + Slab* current = mTop; + while(current) + { + Slab* next = current->mNext; + PX_DELETE(current); + current = next; + } + mTop = NULL; +} +#endif + +static void setPrimitive(const AABBTree& source, BV4Node* node4, PxU32 i, const AABBTreeNode* node, float epsilon) +{ + const PxU32 nbPrims = node->getNbPrimitives(); + PX_ASSERT(nbPrims<16); + const PxU32* indexBase = source.getIndices(); + const PxU32* prims = node->getPrimitives(); + const PxU32 offset = PxU32(prims - indexBase); + for(PxU32 j=0;j<nbPrims;j++) + { + PX_ASSERT(prims[j] == offset+j); + } + const PxU32 primitiveIndex = (offset<<4)|(nbPrims&15); + + node4->mBVData[i].mAABB = node->getAABB(); + if(epsilon!=0.0f) + node4->mBVData[i].mAABB.mExtents += PxVec3(epsilon, epsilon, epsilon); + node4->mBVData[i].mData = (primitiveIndex<<1)|1; +} + +static BV4Node* setNode(const AABBTree& source, BV4Node* node4, PxU32 i, const AABBTreeNode* node, BV4BuildParams& params) +{ + BV4Node* child = NULL; + if(node->isLeaf()) + { + setPrimitive(source, node4, i, node, params.mEpsilon); + } + else + { + node4->mBVData[i].mAABB = node->getAABB(); + if(params.mEpsilon!=0.0f) + node4->mBVData[i].mAABB.mExtents += PxVec3(params.mEpsilon); + + params.mNbNodes++; +#ifdef GU_BV4_USE_NODE_POOLS + child = params.allocateNode(); +#else + child = PX_NEW(BV4Node); +#endif + node4->mBVData[i].mData = size_t(child); + } + return child; +} + +static void _BuildBV4(const AABBTree& source, BV4Node* tmp, const AABBTreeNode* current_node, BV4BuildParams& params) +{ + PX_ASSERT(!current_node->isLeaf()); + + // In the regular tree we have current node A, and: + // ____A____ + // P N + // __|__ __|__ + // PP PN NP NN + // + // For PNS we have: + // bit0 to sort P|N + // bit1 to sort PP|PN + // bit2 to sort NP|NN + // + // As much as possible we need to preserve the original order in BV4, if we want to reuse the same PNS bits. + // + // bit0|bit1|bit2 Order 8bits code + // 0 0 0 PP PN NP NN 0 1 2 3 + // 0 0 1 PP PN NN NP 0 1 3 2 + // 0 1 0 PN PP NP NN 1 0 2 3 + // 0 1 1 PN PP NN NP 1 0 3 2 + // 1 0 0 NP NN PP PN 2 3 0 1 + // 1 0 1 NN NP PP PN 3 2 0 1 + // 1 1 0 NP NN PN PP 2 3 1 0 + // 1 1 1 NN NP PN PP 3 2 1 0 + // + // So we can fetch/compute the sequence from the bits, combine it with limitations from the node type, and process the nodes in order. In theory. + // 8*8bits => the whole thing fits in a single 64bit register, so we could potentially use a "register LUT" here. + + const AABBTreeNode* P = current_node->getPos(); + const AABBTreeNode* N = current_node->getNeg(); + + const bool PLeaf = P->isLeaf(); + const bool NLeaf = N->isLeaf(); + + if(PLeaf) + { + if(NLeaf) + { + // Case 1: P and N are both leaves: + // ____A____ + // P N + // => store as (P,N) and keep bit0 + params.mStats[0]++; + // PN leaves => store 2 triangle pointers, lose 50% of node space + setPrimitive(source, tmp, 0, P, params.mEpsilon); + setPrimitive(source, tmp, 1, N, params.mEpsilon); + +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV); +#endif + } + else + { + // Case 2: P leaf, N no leaf + // ____A____ + // P N + // __|__ + // NP NN + // => store as (P,NP,NN), keep bit0 and bit2 + params.mStats[1]++; + // P leaf => store 1 triangle pointers and 2 node pointers + // => 3 slots used, 25% wasted + setPrimitive(source, tmp, 0, P, params.mEpsilon); + + // + + const AABBTreeNode* NP = N->getPos(); + const AABBTreeNode* NN = N->getNeg(); + +//#define NODE_FUSION +#ifdef NODE_FUSION + PxU32 c=0; + BV4Node* ChildNP; + if(!NP->isLeaf() && NP->getPos()->isLeaf() && NP->getNeg()->isLeaf()) + { + // Drag the terminal leaves directly into this BV4 node, drop internal node NP + setPrimitive(source, tmp, 1, NP->getPos(), params.mEpsilon); + setPrimitive(source, tmp, 2, NP->getNeg(), params.mEpsilon); + ChildNP = NULL; + params.mStats[1]--; + params.mStats[3]++; + c=1; + } + else + { + ChildNP = setNode(source, tmp, 1, NP, params); + } + + BV4Node* ChildNN; + if(c==0 && !NN->isLeaf() && NN->getPos()->isLeaf() && NN->getNeg()->isLeaf()) + { + // Drag the terminal leaves directly into this BV4 node, drop internal node NN + setPrimitive(source, tmp, 2, NN->getPos(), params.mEpsilon); + setPrimitive(source, tmp, 3, NN->getNeg(), params.mEpsilon); + ChildNN = NULL; + params.mStats[1]--; + params.mStats[3]++; + } + else + { + ChildNN = setNode(source, tmp, 2+c, NN, params); + } + + //BV4Node* ChildNN = setNode(tmp, 2+c, NN, epsilon, params); +#else + BV4Node* ChildNP = setNode(source, tmp, 1, NP, params); + BV4Node* ChildNN = setNode(source, tmp, 2, NN, params); +#endif + +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV); + tmp->mBVData[2].mTempPNS = precomputeNodeSorting(NP->mBV, NN->mBV); +#endif + if(ChildNP) + _BuildBV4(source, ChildNP, NP, params); + if(ChildNN) + _BuildBV4(source, ChildNN, NN, params); + } + } + else + { + if(NLeaf) + { + // Case 3: P no leaf, N leaf + // ____A____ + // P N + // __|__ + // PP PN + // => store as (PP,PN,N), keep bit0 and bit1 + params.mStats[2]++; + + // N leaf => store 1 triangle pointers and 2 node pointers + // => 3 slots used, 25% wasted + setPrimitive(source, tmp, 2, N, params.mEpsilon); + + // + + const AABBTreeNode* PP = P->getPos(); + const AABBTreeNode* PN = P->getNeg(); + + BV4Node* ChildPP = setNode(source, tmp, 0, PP, params); + BV4Node* ChildPN = setNode(source, tmp, 1, PN, params); + +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV); + tmp->mBVData[1].mTempPNS = precomputeNodeSorting(PP->mBV, PN->mBV); +#endif + if(ChildPP) + _BuildBV4(source, ChildPP, PP, params); + if(ChildPN) + _BuildBV4(source, ChildPN, PN, params); + } + else + { + // Case 4: P and N are no leaves: + // => store as (PP,PN,NP,NN), keep bit0/bit1/bit2 + params.mStats[3]++; + + // No leaves => store 4 node pointers + const AABBTreeNode* PP = P->getPos(); + const AABBTreeNode* PN = P->getNeg(); + const AABBTreeNode* NP = N->getPos(); + const AABBTreeNode* NN = N->getNeg(); + + BV4Node* ChildPP = setNode(source, tmp, 0, PP, params); + BV4Node* ChildPN = setNode(source, tmp, 1, PN, params); + BV4Node* ChildNP = setNode(source, tmp, 2, NP, params); + BV4Node* ChildNN = setNode(source, tmp, 3, NN, params); + +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV); + tmp->mBVData[1].mTempPNS = precomputeNodeSorting(PP->mBV, PN->mBV); + tmp->mBVData[2].mTempPNS = precomputeNodeSorting(NP->mBV, NN->mBV); +#endif + if(ChildPP) + _BuildBV4(source, ChildPP, PP, params); + if(ChildPN) + _BuildBV4(source, ChildPN, PN, params); + if(ChildNP) + _BuildBV4(source, ChildNP, NP, params); + if(ChildNN) + _BuildBV4(source, ChildNN, NN, params); + } + } +} + +static bool BuildBV4Internal(BV4Tree& tree, const AABBTree& Source, SourceMesh* mesh, float epsilon) +{ + if(mesh->getNbTriangles()<=4) + return tree.init(mesh, Source.getBV()); + + { + struct Local + { + static void _CheckMD(const AABBTreeNode* current_node, PxU32& md, PxU32& cd) + { + cd++; + md = PxMax(md, cd); + + if(current_node->getPos()) { _CheckMD(current_node->getPos(), md, cd); cd--; } + if(current_node->getNeg()) { _CheckMD(current_node->getNeg(), md, cd); cd--; } + } + + static void _Check(AABBTreeNode* current_node) + { + if(current_node->isLeaf()) + return; + + AABBTreeNode* P = const_cast<AABBTreeNode*>(current_node->getPos()); + AABBTreeNode* N = const_cast<AABBTreeNode*>(current_node->getNeg()); + { + PxU32 MDP = 0; PxU32 CDP = 0; _CheckMD(P, MDP, CDP); + PxU32 MDN = 0; PxU32 CDN = 0; _CheckMD(N, MDN, CDN); + + if(MDP>MDN) +// if(MDP<MDN) + { + Ps::swap(*P, *N); + Ps::swap(P, N); + } + } + _Check(P); + _Check(N); + } + }; + Local::_Check(const_cast<AABBTreeNode*>(Source.getNodes())); + } + + BV4BuildParams Params(epsilon); + Params.mNbNodes=1; // Root node + Params.mStats[0]=0; + Params.mStats[1]=0; + Params.mStats[2]=0; + Params.mStats[3]=0; + +#ifdef GU_BV4_USE_NODE_POOLS + BV4Node* Root = Params.allocateNode(); +#else + BV4Node* Root = PX_NEW(BV4Node); +#endif + _BuildBV4(Source, Root, Source.getNodes(), Params); + + if(!tree.init(mesh, Source.getBV())) + return false; + BV4Tree* T = &tree; + + // Version with variable-sized nodes in single stream + { + struct Local + { +#ifdef GU_BV4_QUANTIZED_TREE + #ifdef GU_BV4_USE_SLABS + static void _ComputeMaxValues(const BV4Node* current, PxVec3& MinMax, PxVec3& MaxMax) + { + for(PxU32 i=0;i<4;i++) + { + if(current->mBVData[i].mData!=PX_INVALID_U32) + { + const CenterExtents& Box = current->mBVData[i].mAABB; + const PxVec3 Min = Box.mCenter - Box.mExtents; + const PxVec3 Max = Box.mCenter + Box.mExtents; + if(fabsf(Min.x)>MinMax.x) MinMax.x = fabsf(Min.x); + if(fabsf(Min.y)>MinMax.y) MinMax.y = fabsf(Min.y); + if(fabsf(Min.z)>MinMax.z) MinMax.z = fabsf(Min.z); + if(fabsf(Max.x)>MaxMax.x) MaxMax.x = fabsf(Max.x); + if(fabsf(Max.y)>MaxMax.y) MaxMax.y = fabsf(Max.y); + if(fabsf(Max.z)>MaxMax.z) MaxMax.z = fabsf(Max.z); + if(!current->isLeaf(i)) + { + const BV4Node* ChildNode = current->getChild(i); + _ComputeMaxValues(ChildNode, MinMax, MaxMax); + } + } + } + } + #else + static void _ComputeMaxValues(const BV4Node* current, PxVec3& CMax, PxVec3& EMax) + { + for(PxU32 i=0;i<4;i++) + { + if(current->mBVData[i].mData!=PX_INVALID_U32) + { + const CenterExtents& Box = current->mBVData[i].mAABB; + if(fabsf(Box.mCenter.x)>CMax.x) CMax.x = fabsf(Box.mCenter.x); + if(fabsf(Box.mCenter.y)>CMax.y) CMax.y = fabsf(Box.mCenter.y); + if(fabsf(Box.mCenter.z)>CMax.z) CMax.z = fabsf(Box.mCenter.z); + if(fabsf(Box.mExtents.x)>EMax.x) EMax.x = fabsf(Box.mExtents.x); + if(fabsf(Box.mExtents.y)>EMax.y) EMax.y = fabsf(Box.mExtents.y); + if(fabsf(Box.mExtents.z)>EMax.z) EMax.z = fabsf(Box.mExtents.z); + + if(!current->isLeaf(i)) + { + const BV4Node* ChildNode = current->getChild(i); + _ComputeMaxValues(ChildNode, CMax, EMax); + } + } + } + } + #endif +#endif + + static void _Flatten(BVDataPacked* const dest, const PxU32 box_id, PxU32& current_id, const BV4Node* current, PxU32& max_depth, PxU32& current_depth +#ifdef GU_BV4_QUANTIZED_TREE + , const PxVec3& CQuantCoeff, const PxVec3& EQuantCoeff, + const PxVec3& mCenterCoeff, const PxVec3& mExtentsCoeff +#endif + ) + { + // Entering a new node => increase depth + current_depth++; + // Keep track of max depth + if(current_depth>max_depth) + max_depth = current_depth; + +// dest[box_id] = *current; + const PxU32 CurrentType = current->getType(); + for(PxU32 i=0;i<CurrentType;i++) + { +#ifdef GU_BV4_QUANTIZED_TREE + const CenterExtents& Box = current->mBVData[i].mAABB; + #ifdef GU_BV4_USE_SLABS + const PxVec3 m = Box.mCenter - Box.mExtents; + const PxVec3 M = Box.mCenter + Box.mExtents; + + dest[box_id+i].mAABB.mData[0].mCenter = PxI16(m.x * CQuantCoeff.x); + dest[box_id+i].mAABB.mData[1].mCenter = PxI16(m.y * CQuantCoeff.y); + dest[box_id+i].mAABB.mData[2].mCenter = PxI16(m.z * CQuantCoeff.z); + dest[box_id+i].mAABB.mData[0].mExtents = PxU16(PxI16(M.x * EQuantCoeff.x)); + dest[box_id+i].mAABB.mData[1].mExtents = PxU16(PxI16(M.y * EQuantCoeff.y)); + dest[box_id+i].mAABB.mData[2].mExtents = PxU16(PxI16(M.z * EQuantCoeff.z)); + + if(1) + { + for(PxU32 j=0;j<3;j++) + { + // Dequantize the min/max +// const float qmin = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j]; +// const float qmax = float(PxI16(dest[box_id+i].mAABB.mData[j].mExtents)) * mExtentsCoeff[j]; + // Compare real & dequantized values +/* if(qmax<M[j] || qmin>m[j]) + { + int stop=1; + }*/ + bool CanLeave; + do + { + CanLeave=true; + const float qmin = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j]; + const float qmax = float(PxI16(dest[box_id+i].mAABB.mData[j].mExtents)) * mExtentsCoeff[j]; + + if(qmax<M[j]) + { +// if(dest[box_id+i].mAABB.mData[j].mExtents!=0xffff) + if(dest[box_id+i].mAABB.mData[j].mExtents!=0x7fff) + { + dest[box_id+i].mAABB.mData[j].mExtents++; + CanLeave = false; + } + } + if(qmin>m[j]) + { + if(dest[box_id+i].mAABB.mData[j].mCenter) + { + dest[box_id+i].mAABB.mData[j].mCenter--; + CanLeave = false; + } + } + }while(!CanLeave); + } + } + #else + dest[box_id+i].mAABB.mData[0].mCenter = PxI16(Box.mCenter.x * CQuantCoeff.x); + dest[box_id+i].mAABB.mData[1].mCenter = PxI16(Box.mCenter.y * CQuantCoeff.y); + dest[box_id+i].mAABB.mData[2].mCenter = PxI16(Box.mCenter.z * CQuantCoeff.z); + dest[box_id+i].mAABB.mData[0].mExtents = PxU16(Box.mExtents.x * EQuantCoeff.x); + dest[box_id+i].mAABB.mData[1].mExtents = PxU16(Box.mExtents.y * EQuantCoeff.y); + dest[box_id+i].mAABB.mData[2].mExtents = PxU16(Box.mExtents.z * EQuantCoeff.z); + + // Fix quantized boxes + if(1) + { + // Make sure the quantized box is still valid + const PxVec3 Max = Box.mCenter + Box.mExtents; + const PxVec3 Min = Box.mCenter - Box.mExtents; + // For each axis + for(PxU32 j=0;j<3;j++) + { // Dequantize the box center + const float qc = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j]; + bool FixMe=true; + do + { // Dequantize the box extent + const float qe = float(dest[box_id+i].mAABB.mData[j].mExtents) * mExtentsCoeff[j]; + // Compare real & dequantized values + if(qc+qe<Max[j] || qc-qe>Min[j]) dest[box_id+i].mAABB.mData[j].mExtents++; + else FixMe=false; + // Prevent wrapping + if(!dest[box_id+i].mAABB.mData[j].mExtents) + { + dest[box_id+i].mAABB.mData[j].mExtents=0xffff; + FixMe=false; + } + }while(FixMe); + } + } + #endif +#else + #ifdef GU_BV4_USE_SLABS + // Compute min & max right here. Store temp as Center/Extents = Min/Max + const CenterExtents& Box = current->mBVData[i].mAABB; + dest[box_id+i].mAABB.mCenter = Box.mCenter - Box.mExtents; + dest[box_id+i].mAABB.mExtents = Box.mCenter + Box.mExtents; + #else + dest[box_id+i].mAABB = current->mBVData[i].mAABB; + #endif +#endif + dest[box_id+i].mData = PxU32(current->mBVData[i].mData); +// dest[box_id+i].encodePNS(current->mBVData[i].mTempPNS); + } + + PxU32 NbToGo=0; + PxU32 NextIDs[4] = {PX_INVALID_U32, PX_INVALID_U32, PX_INVALID_U32, PX_INVALID_U32}; + const BV4Node* ChildNodes[4] = {NULL,NULL,NULL,NULL}; + + BVDataPacked* data = dest+box_id; + for(PxU32 i=0;i<4;i++) + { + if(current->mBVData[i].mData!=PX_INVALID_U32 && !current->isLeaf(i)) + { + const BV4Node* ChildNode = current->getChild(i); + + const PxU32 NextID = current_id; +#ifdef GU_BV4_USE_SLABS + current_id += 4; +#else + const PxU32 ChildSize = ChildNode->getType(); + current_id += ChildSize; +#endif + const PxU32 ChildType = (ChildNode->getType()-2)<<1; + data[i].mData = size_t(ChildType+(NextID<<GU_BV4_CHILD_OFFSET_SHIFT_COUNT)); + //PX_ASSERT(data[i].mData == size_t(ChildType+(NextID<<3))); + + NextIDs[NbToGo] = NextID; + ChildNodes[NbToGo] = ChildNode; + NbToGo++; + +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + data[i].encodePNS(current->mBVData[i].mTempPNS); +#endif +//#define DEPTH_FIRST +#ifdef DEPTH_FIRST + _Flatten(dest, NextID, current_id, ChildNode, max_depth, current_depth + #ifdef GU_BV4_QUANTIZED_TREE + , CQuantCoeff, EQuantCoeff, mCenterCoeff, mExtentsCoeff + #endif + ); + current_depth--; +#endif + } +#ifdef GU_BV4_USE_SLABS + if(current->mBVData[i].mData==PX_INVALID_U32) + { + #ifdef GU_BV4_QUANTIZED_TREE + data[i].mAABB.mData[0].mExtents = 0; + data[i].mAABB.mData[1].mExtents = 0; + data[i].mAABB.mData[2].mExtents = 0; + data[i].mAABB.mData[0].mCenter = 0; + data[i].mAABB.mData[1].mCenter = 0; + data[i].mAABB.mData[2].mCenter = 0; + #else + data[i].mAABB.mCenter = PxVec3(0.0f); + data[i].mAABB.mExtents = PxVec3(0.0f); + #endif + data[i].mData = PX_INVALID_U32; + } +#endif + } + +#ifndef DEPTH_FIRST + for(PxU32 i=0;i<NbToGo;i++) + { + _Flatten(dest, NextIDs[i], current_id, ChildNodes[i], max_depth, current_depth + #ifdef GU_BV4_QUANTIZED_TREE + , CQuantCoeff, EQuantCoeff, mCenterCoeff, mExtentsCoeff + #endif + ); + current_depth--; + } +#endif +#ifndef GU_BV4_USE_NODE_POOLS + DELETESINGLE(current); +#endif + } + }; + + const PxU32 NbSingleNodes = Params.mStats[0]*2+(Params.mStats[1]+Params.mStats[2])*3+Params.mStats[3]*4; + + PxU32 CurID = Root->getType(); + PxU32 InitData = PX_INVALID_U32; +#ifdef GU_BV4_USE_SLABS + PX_UNUSED(NbSingleNodes); + const PxU32 NbNeeded = (Params.mStats[0]+Params.mStats[1]+Params.mStats[2]+Params.mStats[3])*4; + BVDataPacked* Nodes = reinterpret_cast<BVDataPacked*>(PX_ALLOC(sizeof(BVDataPacked)*NbNeeded, "BV4 nodes")); // PT: PX_NEW breaks alignment here +// BVDataPacked* Nodes = PX_NEW(BVDataPacked)[NbNeeded]; + + if(CurID==2) + { + InitData = 0; + } + else if(CurID==3) + { + InitData = 2; + } + else if(CurID==4) + { + InitData = 4; + } + + CurID = 4; +// PxU32 CurID = 4; +// PxU32 InitData = 4; +#else + BVDataPacked* Nodes = PX_NEW(BVDataPacked)[NbSingleNodes]; + + if(CurID==2) + { + InitData = 0; + } + else if(CurID==3) + { + InitData = 2; + } + else if(CurID==4) + { + InitData = 4; + } +#endif + + T->mInitData = InitData; + PxU32 MaxDepth = 0; + PxU32 CurrentDepth = 0; +#ifdef GU_BV4_QUANTIZED_TREE + #ifdef GU_BV4_USE_SLABS + PxVec3 MinQuantCoeff, MaxQuantCoeff; + { + // Get max values + PxVec3 MinMax(-FLT_MAX); + PxVec3 MaxMax(-FLT_MAX); + Local::_ComputeMaxValues(Root, MinMax, MaxMax); + + const PxU32 nbm=15; + + // Compute quantization coeffs + const float MinCoeff = float((1<<nbm)-1); + const float MaxCoeff = float((1<<nbm)-1); + MinQuantCoeff.x = MinMax.x!=0.0f ? MinCoeff/MinMax.x : 0.0f; + MinQuantCoeff.y = MinMax.y!=0.0f ? MinCoeff/MinMax.y : 0.0f; + MinQuantCoeff.z = MinMax.z!=0.0f ? MinCoeff/MinMax.z : 0.0f; + MaxQuantCoeff.x = MaxMax.x!=0.0f ? MaxCoeff/MaxMax.x : 0.0f; + MaxQuantCoeff.y = MaxMax.y!=0.0f ? MaxCoeff/MaxMax.y : 0.0f; + MaxQuantCoeff.z = MaxMax.z!=0.0f ? MaxCoeff/MaxMax.z : 0.0f; + // Compute and save dequantization coeffs + T->mCenterOrMinCoeff.x = MinMax.x/MinCoeff; + T->mCenterOrMinCoeff.y = MinMax.y/MinCoeff; + T->mCenterOrMinCoeff.z = MinMax.z/MinCoeff; + T->mExtentsOrMaxCoeff.x = MaxMax.x/MaxCoeff; + T->mExtentsOrMaxCoeff.y = MaxMax.y/MaxCoeff; + T->mExtentsOrMaxCoeff.z = MaxMax.z/MaxCoeff; + } + Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth, MinQuantCoeff, MaxQuantCoeff, T->mCenterOrMinCoeff, T->mExtentsOrMaxCoeff); + #else + PxVec3 CQuantCoeff, EQuantCoeff; + { + // Get max values + PxVec3 CMax(-FLT_MAX); + PxVec3 EMax(-FLT_MAX); + Local::_ComputeMaxValues(Root, CMax, EMax); + + const PxU32 nbc=15; + const PxU32 nbe=16; +// const PxU32 nbc=7; +// const PxU32 nbe=8; + + const float UnitQuantError = 2.0f/65535.0f; + EMax.x += CMax.x*UnitQuantError; + EMax.y += CMax.y*UnitQuantError; + EMax.z += CMax.z*UnitQuantError; + + // Compute quantization coeffs + const float CCoeff = float((1<<nbc)-1); + CQuantCoeff.x = CMax.x!=0.0f ? CCoeff/CMax.x : 0.0f; + CQuantCoeff.y = CMax.y!=0.0f ? CCoeff/CMax.y : 0.0f; + CQuantCoeff.z = CMax.z!=0.0f ? CCoeff/CMax.z : 0.0f; + const float ECoeff = float((1<<nbe)-32); + EQuantCoeff.x = EMax.x!=0.0f ? ECoeff/EMax.x : 0.0f; + EQuantCoeff.y = EMax.y!=0.0f ? ECoeff/EMax.y : 0.0f; + EQuantCoeff.z = EMax.z!=0.0f ? ECoeff/EMax.z : 0.0f; + // Compute and save dequantization coeffs + T->mCenterOrMinCoeff.x = CMax.x/CCoeff; + T->mCenterOrMinCoeff.y = CMax.y/CCoeff; + T->mCenterOrMinCoeff.z = CMax.z/CCoeff; + T->mExtentsOrMaxCoeff.x = EMax.x/ECoeff; + T->mExtentsOrMaxCoeff.y = EMax.y/ECoeff; + T->mExtentsOrMaxCoeff.z = EMax.z/ECoeff; + } + Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth, CQuantCoeff, EQuantCoeff, T->mCenterOrMinCoeff, T->mExtentsOrMaxCoeff); + #endif +#else + Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth); +#endif + +#ifdef GU_BV4_USE_NODE_POOLS + Params.releaseNodes(); +#endif + +#ifdef GU_BV4_USE_SLABS + { + PX_ASSERT(sizeof(BVDataSwizzled)==sizeof(BVDataPacked)*4); + BVDataPacked* Copy = PX_NEW(BVDataPacked)[NbNeeded]; + memcpy(Copy, Nodes, sizeof(BVDataPacked)*NbNeeded); + for(PxU32 i=0;i<NbNeeded/4;i++) + { + const BVDataPacked* Src = Copy + i*4; + BVDataSwizzled* Dst = reinterpret_cast<BVDataSwizzled*>(Nodes + i*4); + for(PxU32 j=0;j<4;j++) + { + // We previously stored m/M within c/e so we just need to swizzle now + #ifdef GU_BV4_QUANTIZED_TREE + const QuantizedAABB& Box = Src[j].mAABB; + Dst->mX[j].mMin = Box.mData[0].mCenter; + Dst->mY[j].mMin = Box.mData[1].mCenter; + Dst->mZ[j].mMin = Box.mData[2].mCenter; + Dst->mX[j].mMax = PxI16(Box.mData[0].mExtents); + Dst->mY[j].mMax = PxI16(Box.mData[1].mExtents); + Dst->mZ[j].mMax = PxI16(Box.mData[2].mExtents); + #else + const CenterExtents& Box = Src[j].mAABB; + Dst->mMinX[j] = Box.mCenter.x; + Dst->mMinY[j] = Box.mCenter.y; + Dst->mMinZ[j] = Box.mCenter.z; + Dst->mMaxX[j] = Box.mExtents.x; + Dst->mMaxY[j] = Box.mExtents.y; + Dst->mMaxZ[j] = Box.mExtents.z; + #endif + Dst->mData[j] = Src[j].mData; + } + } + DELETEARRAY(Copy); + } + T->mNbNodes = NbNeeded; +#else + PX_ASSERT(CurID==NbSingleNodes); + T->mNbNodes = NbSingleNodes; +#endif + T->mNodes = Nodes; + } + return true; +} + +///// + +struct ReorderData +{ + const SourceMesh* mMesh; + PxU32* mOrder; + PxU32 mNbTrisPerLeaf; + PxU32 mIndex; + PxU32 mNbTris; + PxU32 mStats[16]; +}; +static bool gReorderCallback(const AABBTreeNode* current, PxU32 /*depth*/, void* userData) +{ + ReorderData* Data = reinterpret_cast<ReorderData*>(userData); + if(current->isLeaf()) + { + const PxU32 n = current->getNbPrimitives(); + PX_ASSERT(n<=Data->mNbTrisPerLeaf); + Data->mStats[n]++; + PxU32* Prims = const_cast<PxU32*>(current->getPrimitives()); + + for(PxU32 i=0;i<n;i++) + { + PX_ASSERT(Prims[i]<Data->mNbTris); + Data->mOrder[Data->mIndex] = Prims[i]; + PX_ASSERT(Data->mIndex<Data->mNbTris); + Prims[i] = Data->mIndex; + Data->mIndex++; + } + } + return true; +} + +bool physx::Gu::BuildBV4Ex(BV4Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf) +{ + const PxU32 nbTris = mesh.mNbTris; + + AABBTree Source; + if(!Source.buildFromMesh(mesh, nbTrisPerLeaf)) + return false; + + { + PxU32* order = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbTris, "BV4")); + ReorderData RD; + RD.mMesh = &mesh; + RD.mOrder = order; + RD.mNbTrisPerLeaf = nbTrisPerLeaf; + RD.mIndex = 0; + RD.mNbTris = nbTris; + for(PxU32 i=0;i<16;i++) + RD.mStats[i] = 0; + Source.walk(gReorderCallback, &RD); + PX_ASSERT(RD.mIndex==nbTris); + mesh.remapTopology(order); + PX_FREE(order); +// for(PxU32 i=0;i<16;i++) +// printf("%d: %d\n", i, RD.mStats[i]); + } + + if(mesh.getNbTriangles()<=nbTrisPerLeaf) + return tree.init(&mesh, Source.getBV()); + + return BuildBV4Internal(tree, Source, &mesh, epsilon); +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.h new file mode 100644 index 00000000..eb2d9e99 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.h @@ -0,0 +1,125 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_BUILD_H +#define GU_BV4_BUILD_H + +#include "foundation/PxSimpleTypes.h" +#include "GuBV4.h" + +namespace physx +{ +namespace Gu +{ + class BV4Tree; + class SourceMesh; + + //! Contains AABB-tree build statistics + // PT: TODO: this is a duplicate of the SQ structure (TA34704) + struct BuildStats + { + BuildStats() : mCount(0), mTotalPrims(0) {} + + PxU32 mCount; //!< Number of nodes created + PxU32 mTotalPrims; //!< Total accumulated number of primitives. Should be much higher than the source + //!< number of prims, since it accumulates all prims covered by each node (i.e. internal + //!< nodes too, not just leaf ones) + + PX_FORCE_INLINE void reset() { mCount = mTotalPrims = 0; } + + PX_FORCE_INLINE void setCount(PxU32 nb) { mCount=nb; } + PX_FORCE_INLINE void increaseCount(PxU32 nb) { mCount+=nb; } + PX_FORCE_INLINE PxU32 getCount() const { return mCount; } + }; + + // PT: TODO: refactor with SQ version (TA34704) + class AABBTreeNode : public physx::shdfnd::UserAllocated + { + public: + PX_FORCE_INLINE AABBTreeNode() : mPos(0), mNodePrimitives(NULL), mNbPrimitives(0) + { + } + PX_FORCE_INLINE ~AABBTreeNode() + { + mPos = 0; + mNodePrimitives = NULL; // This was just a shortcut to the global list => no release + mNbPrimitives = 0; + } + // Data access + PX_FORCE_INLINE const PxBounds3& getAABB() const { return mBV; } + + PX_FORCE_INLINE const AABBTreeNode* getPos() const { return reinterpret_cast<const AABBTreeNode*>(mPos); } + PX_FORCE_INLINE const AABBTreeNode* getNeg() const { const AABBTreeNode* P = getPos(); return P ? P+1 : NULL; } + + PX_FORCE_INLINE bool isLeaf() const { return !getPos(); } + + PxBounds3 mBV; // Global bounding-volume enclosing all the node-related primitives + size_t mPos; // "Positive" & "Negative" children + + // Data access + PX_FORCE_INLINE const PxU32* getPrimitives() const { return mNodePrimitives; } + PX_FORCE_INLINE PxU32 getNbPrimitives() const { return mNbPrimitives; } + + PxU32* mNodePrimitives; //!< Node-related primitives (shortcut to a position in mIndices below) + PxU32 mNbPrimitives; //!< Number of primitives for this node + }; + + typedef bool (*WalkingCallback) (const AABBTreeNode* current, PxU32 depth, void* userData); + + // PT: TODO: refactor with SQ version (TA34704) + class AABBTree : public physx::shdfnd::UserAllocated + { + public: + AABBTree(); + ~AABBTree(); + + bool buildFromMesh(SourceMesh& mesh, PxU32 limit); + void release(); + + PX_FORCE_INLINE const PxU32* getIndices() const { return mIndices; } //!< Catch the indices + PX_FORCE_INLINE PxU32 getNbNodes() const { return mTotalNbNodes; } //!< Catch the number of nodes + + PX_FORCE_INLINE const PxU32* getPrimitives() const { return mPool->mNodePrimitives; } + PX_FORCE_INLINE PxU32 getNbPrimitives() const { return mPool->mNbPrimitives; } + PX_FORCE_INLINE const AABBTreeNode* getNodes() const { return mPool; } + PX_FORCE_INLINE const PxBounds3& getBV() const { return mPool->mBV; } + + PxU32 walk(WalkingCallback callback, void* userData) const; + private: + PxU32* mIndices; //!< Indices in the app list. Indices are reorganized during build (permutation). + AABBTreeNode* mPool; //!< Linear pool of nodes for complete trees. Null otherwise. [Opcode 1.3] + PxU32 mTotalNbNodes; //!< Number of nodes in the tree. + }; + + PX_PHYSX_COMMON_API bool BuildBV4Ex(BV4Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf); + +} // namespace Gu +} + +#endif // GU_BV4_BUILD_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Settings.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Settings.h new file mode 100644 index 00000000..9807e526 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Settings.h @@ -0,0 +1,39 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_SETTINGS_H +#define GU_BV4_SETTINGS_H + + // PT: "BV4" ported from "Opcode 2.0". Available compile-time options are: + #define GU_BV4_STACK_SIZE 256 // Default size of local stacks for non-recursive traversals. + #define GU_BV4_PRECOMPUTED_NODE_SORT // Use node sorting or not. This should probably always be enabled. + #define GU_BV4_QUANTIZED_TREE // Use AABB quantization/compression or not. + #define GU_BV4_USE_SLABS // Use swizzled data format or not. Swizzled = faster raycasts, but slower overlaps & larger trees. + +#endif // GU_BV4_SETTINGS_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBAABBSweepTest.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBAABBSweepTest.h new file mode 100644 index 00000000..1131edad --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBAABBSweepTest.h @@ -0,0 +1,114 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_AABB_AABB_SWEEP_TEST_H +#define GU_BV4_AABB_AABB_SWEEP_TEST_H + +#ifndef GU_BV4_USE_SLABS +#if PX_INTEL_FAMILY + PX_FORCE_INLINE Ps::IntBool BV4_SegmentAABBOverlap(const PxVec3& center, const PxVec3& extents, const PxVec3& extents2, const RayParams* PX_RESTRICT params) + { + const PxU32 maskI = 0x7fffffff; + const Vec4V fdirV = V4LoadA_Safe(¶ms->mFDir_PaddedAligned.x); + const Vec4V extentsV = V4Add(V4LoadU(&extents.x), V4LoadU(&extents2.x)); + const Vec4V DV = V4Sub(V4LoadA_Safe(¶ms->mData2_PaddedAligned.x), V4LoadU(¢er.x)); + __m128 absDV = _mm_and_ps(DV, _mm_load1_ps((float*)&maskI)); + absDV = _mm_cmpgt_ps(absDV, V4Add(extentsV, fdirV)); + const PxU32 test = (PxU32)_mm_movemask_ps(absDV); + if(test&7) + return 0; + + if(1) + { + const Vec4V dataZYX_V = V4LoadA_Safe(¶ms->mData_PaddedAligned.x); + const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1))); + const Vec4V fV = V4Sub(V4Mul(dataZYX_V, DXZY_V), V4Mul(dataXZY_V, DV)); + + const Vec4V fdirZYX_V = V4LoadA_Safe(¶ms->mFDir_PaddedAligned.x); + const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1))); + // PT: TODO: use V4MulAdd here (TA34704) + const Vec4V fg = V4Add(V4Mul(extentsV, fdirXZY_V), V4Mul(extentsXZY_V, fdirZYX_V)); + + __m128 absfV = _mm_and_ps(fV, _mm_load1_ps((float*)&maskI)); + absfV = _mm_cmpgt_ps(absfV, fg); + const PxU32 test2 = (PxU32)_mm_movemask_ps(absfV); + if(test2&7) + return 0; + return 1; + } + } + +#ifdef GU_BV4_QUANTIZED_TREE + template<class T> + PX_FORCE_INLINE Ps::IntBool BV4_SegmentAABBOverlap(const T* PX_RESTRICT node, const PxVec3& extents2, const RayParams* PX_RESTRICT params) + { + const __m128i testV = _mm_load_si128((__m128i*)node->mAABB.mData); + const __m128i qextentsV = _mm_and_si128(testV, _mm_set1_epi32(0x0000ffff)); + const __m128i qcenterV = _mm_srai_epi32(testV, 16); + const Vec4V centerV0 = V4Mul(_mm_cvtepi32_ps(qcenterV), V4LoadA_Safe(¶ms->mCenterOrMinCoeff_PaddedAligned.x)); + const Vec4V extentsV0 = V4Mul(_mm_cvtepi32_ps(qextentsV), V4LoadA_Safe(¶ms->mExtentsOrMaxCoeff_PaddedAligned.x)); + + const PxU32 maskI = 0x7fffffff; + const Vec4V fdirV = V4LoadA_Safe(¶ms->mFDir_PaddedAligned.x); + const Vec4V extentsV = V4Add(extentsV0, V4LoadU(&extents2.x)); + const Vec4V DV = V4Sub(V4LoadA_Safe(¶ms->mData2_PaddedAligned.x), centerV0); + __m128 absDV = _mm_and_ps(DV, _mm_load1_ps((float*)&maskI)); + absDV = _mm_cmpgt_ps(absDV, V4Add(extentsV, fdirV)); + const PxU32 test = (PxU32)_mm_movemask_ps(absDV); + if(test&7) + return 0; + + if(1) + { + const Vec4V dataZYX_V = V4LoadA_Safe(¶ms->mData_PaddedAligned.x); + const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1))); + const Vec4V fV = V4Sub(V4Mul(dataZYX_V, DXZY_V), V4Mul(dataXZY_V, DV)); + + const Vec4V fdirZYX_V = V4LoadA_Safe(¶ms->mFDir_PaddedAligned.x); + const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1))); + // PT: TODO: use V4MulAdd here (TA34704) + const Vec4V fg = V4Add(V4Mul(extentsV, fdirXZY_V), V4Mul(extentsXZY_V, fdirZYX_V)); + + __m128 absfV = _mm_and_ps(fV, _mm_load1_ps((float*)&maskI)); + absfV = _mm_cmpgt_ps(absfV, fg); + const PxU32 test2 = (PxU32)_mm_movemask_ps(absfV); + if(test2&7) + return 0; + return 1; + } + } +#endif +#endif +#endif + +#endif // GU_BV4_AABB_AABB_SWEEP_TEST_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBSweep.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBSweep.cpp new file mode 100644 index 00000000..2f377521 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBSweep.cpp @@ -0,0 +1,39 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuBV4.h" +using namespace physx; +using namespace Gu; + +#if PX_INTEL_FAMILY +#define SWEEP_AABB_IMPL +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; +#include "GuBV4_BoxSweep_Internal.h" +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxBoxOverlapTest.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxBoxOverlapTest.h new file mode 100644 index 00000000..ff696a38 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxBoxOverlapTest.h @@ -0,0 +1,201 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_BOX_BOX_OVERLAP_TEST_H +#define GU_BV4_BOX_BOX_OVERLAP_TEST_H + +#if PX_INTEL_FAMILY +#ifndef GU_BV4_USE_SLABS + PX_FORCE_INLINE Ps::IntBool BV4_BoxBoxOverlap(const PxVec3& extents, const PxVec3& center, const OBBTestParams* PX_RESTRICT params) + { + const PxU32 maskI = 0x7fffffff; + + const Vec4V extentsV = V4LoadU(&extents.x); + + const Vec4V TV = V4Sub(V4LoadA_Safe(¶ms->mTBoxToModel_PaddedAligned.x), V4LoadU(¢er.x)); + { + __m128 absTV = _mm_and_ps(TV, _mm_load1_ps((float*)&maskI)); + absTV = _mm_cmpgt_ps(absTV, V4Add(extentsV, V4LoadA_Safe(¶ms->mBB_PaddedAligned.x))); + const PxU32 test = (PxU32)_mm_movemask_ps(absTV); + if(test&7) + return 0; + } + + __m128 tV; + { + const __m128 T_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,0,2,1))); + const __m128 T_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,1,0,2))); + + tV = V4Mul(TV, V4LoadA_Safe(¶ms->mPreca0_PaddedAligned.x)); + tV = V4Add(tV, V4Mul(T_YZX_V, V4LoadA_Safe(¶ms->mPreca1_PaddedAligned.x))); + tV = V4Add(tV, V4Mul(T_ZXY_V, V4LoadA_Safe(¶ms->mPreca2_PaddedAligned.x))); + } + + __m128 t2V; + { + const __m128 extents_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1))); + const __m128 extents_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,1,0,2))); + + t2V = V4Mul(extentsV, V4LoadA_Safe(¶ms->mPreca0b_PaddedAligned.x)); + t2V = V4Add(t2V, V4Mul(extents_YZX_V, V4LoadA_Safe(¶ms->mPreca1b_PaddedAligned.x))); + t2V = V4Add(t2V, V4Mul(extents_ZXY_V, V4LoadA_Safe(¶ms->mPreca2b_PaddedAligned.x))); + t2V = V4Add(t2V, V4LoadA_Safe(¶ms->mBoxExtents_PaddedAligned.x)); + } + + { + __m128 abstV = _mm_and_ps(tV, _mm_load1_ps((float*)&maskI)); + abstV = _mm_cmpgt_ps(abstV, t2V); + const PxU32 test = (PxU32)_mm_movemask_ps(abstV); + if(test&7) + return 0; + } + return 1; + } + +#ifdef GU_BV4_QUANTIZED_TREE + template<class T> + PX_FORCE_INLINE Ps::IntBool BV4_BoxBoxOverlap(const T* PX_RESTRICT node, const OBBTestParams* PX_RESTRICT params) + { +#define NEW_VERSION +#ifdef NEW_VERSION + SSE_CONST4(maskV, 0x7fffffff); + SSE_CONST4(maskQV, 0x0000ffff); +#else + const PxU32 maskI = 0x7fffffff; +#endif + + Vec4V centerV = V4LoadA((float*)node->mAABB.mData); +#ifdef NEW_VERSION + __m128 extentsV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(centerV), SSE_CONST(maskQV))); +#else + __m128 extentsV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(centerV), _mm_set1_epi32(0x0000ffff))); +#endif + extentsV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(extentsV)), V4LoadA_Safe(¶ms->mExtentsOrMaxCoeff_PaddedAligned.x)); + centerV = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(centerV), 16)); + centerV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(centerV)), V4LoadA_Safe(¶ms->mCenterOrMinCoeff_PaddedAligned.x)); + + const Vec4V TV = V4Sub(V4LoadA_Safe(¶ms->mTBoxToModel_PaddedAligned.x), centerV); + { +#ifdef NEW_VERSION + __m128 absTV = _mm_and_ps(TV, SSE_CONSTF(maskV)); +#else + __m128 absTV = _mm_and_ps(TV, _mm_load1_ps((float*)&maskI)); +#endif + + absTV = _mm_cmpgt_ps(absTV, V4Add(extentsV, V4LoadA_Safe(¶ms->mBB_PaddedAligned.x))); + const PxU32 test = (PxU32)_mm_movemask_ps(absTV); + if(test&7) + return 0; + } + + __m128 tV; + { + const __m128 T_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,0,2,1))); + const __m128 T_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,1,0,2))); + + tV = V4Mul(TV, V4LoadA_Safe(¶ms->mPreca0_PaddedAligned.x)); + tV = V4Add(tV, V4Mul(T_YZX_V, V4LoadA_Safe(¶ms->mPreca1_PaddedAligned.x))); + tV = V4Add(tV, V4Mul(T_ZXY_V, V4LoadA_Safe(¶ms->mPreca2_PaddedAligned.x))); + } + + __m128 t2V; + { + const __m128 extents_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1))); + const __m128 extents_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,1,0,2))); + + t2V = V4Mul(extentsV, V4LoadA_Safe(¶ms->mPreca0b_PaddedAligned.x)); + t2V = V4Add(t2V, V4Mul(extents_YZX_V, V4LoadA_Safe(¶ms->mPreca1b_PaddedAligned.x))); + t2V = V4Add(t2V, V4Mul(extents_ZXY_V, V4LoadA_Safe(¶ms->mPreca2b_PaddedAligned.x))); + t2V = V4Add(t2V, V4LoadA_Safe(¶ms->mBoxExtents_PaddedAligned.x)); + } + + { +#ifdef NEW_VERSION + __m128 abstV = _mm_and_ps(tV, SSE_CONSTF(maskV)); +#else + __m128 abstV = _mm_and_ps(tV, _mm_load1_ps((float*)&maskI)); +#endif + abstV = _mm_cmpgt_ps(abstV, t2V); + const PxU32 test = (PxU32)_mm_movemask_ps(abstV); + if(test&7) + return 0; + } + return 1; + } +#endif // GU_BV4_QUANTIZED_TREE +#endif // GU_BV4_USE_SLABS + +#ifdef GU_BV4_USE_SLABS + PX_FORCE_INLINE Ps::IntBool BV4_BoxBoxOverlap(const __m128 boxCenter, const __m128 extentsV, const OBBTestParams* PX_RESTRICT params) + { + const PxU32 maskI = 0x7fffffff; + + const Vec4V TV = V4Sub(V4LoadA_Safe(¶ms->mTBoxToModel_PaddedAligned.x), boxCenter); + { + __m128 absTV = _mm_and_ps(TV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI))); + absTV = _mm_cmpgt_ps(absTV, V4Add(extentsV, V4LoadA_Safe(¶ms->mBB_PaddedAligned.x))); + const PxU32 test = PxU32(_mm_movemask_ps(absTV)); + if(test&7) + return 0; + } + + __m128 tV; + { + const __m128 T_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,0,2,1))); + const __m128 T_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,1,0,2))); + + tV = V4Mul(TV, V4LoadA_Safe(¶ms->mPreca0_PaddedAligned.x)); + tV = V4Add(tV, V4Mul(T_YZX_V, V4LoadA_Safe(¶ms->mPreca1_PaddedAligned.x))); + tV = V4Add(tV, V4Mul(T_ZXY_V, V4LoadA_Safe(¶ms->mPreca2_PaddedAligned.x))); + } + + __m128 t2V; + { + const __m128 extents_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1))); + const __m128 extents_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,1,0,2))); + + t2V = V4Mul(extentsV, V4LoadA_Safe(¶ms->mPreca0b_PaddedAligned.x)); + t2V = V4Add(t2V, V4Mul(extents_YZX_V, V4LoadA_Safe(¶ms->mPreca1b_PaddedAligned.x))); + t2V = V4Add(t2V, V4Mul(extents_ZXY_V, V4LoadA_Safe(¶ms->mPreca2b_PaddedAligned.x))); + t2V = V4Add(t2V, V4LoadA_Safe(¶ms->mBoxExtents_PaddedAligned.x)); + } + + { + __m128 abstV = _mm_and_ps(tV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI))); + abstV = _mm_cmpgt_ps(abstV, t2V); + const PxU32 test = PxU32(_mm_movemask_ps(abstV)); + if(test&7) + return 0; + } + return 1; + } +#endif // GU_BV4_USE_SLABS +#endif // PX_INTEL_FAMILY + +#endif // GU_BV4_BOX_BOX_OVERLAP_TEST_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap.cpp new file mode 100644 index 00000000..febf7261 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap.cpp @@ -0,0 +1,473 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuBV4.h" +using namespace physx; +using namespace Gu; + +#if PX_INTEL_FAMILY + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#include "GuInternal.h" +#include "GuDistancePointSegment.h" +#include "GuIntersectionCapsuleTriangle.h" +#include "GuIntersectionTriangleBox.h" + +#include "GuBV4_BoxOverlap_Internal.h" +#include "GuBV4_BoxBoxOverlapTest.h" + +// Box overlap any + +struct OBBParams : OBBTestParams +{ + const IndTri32* PX_RESTRICT mTris32; + const IndTri16* PX_RESTRICT mTris16; + const PxVec3* PX_RESTRICT mVerts; + + PxMat33 mRModelToBox_Padded; //!< Rotation from model space to obb space + Vec3p mTModelToBox_Padded; //!< Translation from model space to obb space +}; + +// PT: TODO: this used to be inlined so we lost some perf by moving to PhysX's version. Revisit. (TA34704) +Ps::IntBool intersectTriangleBoxBV4(const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, + const PxMat33& rotModelToBox, const PxVec3& transModelToBox, const PxVec3& extents); +namespace +{ +class LeafFunction_BoxOverlapAny +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(const OBBParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + if(intersectTriangleBoxBV4(params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params->mRModelToBox_Padded, params->mTModelToBox_Padded, params->mBoxExtents_PaddedAligned)) + return 1; + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +template<class ParamsT> +static PX_FORCE_INLINE void setupBoxParams(ParamsT* PX_RESTRICT params, const Box& localBox, const BV4Tree* PX_RESTRICT tree, const SourceMesh* PX_RESTRICT mesh) +{ + invertBoxMatrix(params->mRModelToBox_Padded, params->mTModelToBox_Padded, localBox); + params->mTBoxToModel_PaddedAligned = localBox.center; + + setupMeshPointersAndQuantizedCoeffs(params, mesh, tree); + + params->precomputeBoxData(localBox.extents, &localBox.rot); +} + +/////////////////////////////////////////////////////////////////////////////// + +#include "GuBV4_Internal.h" +#include "GuBV4_BoxBoxOverlapTest.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs.h" +#endif +#include "GuBV4_ProcessStreamNoOrder_OBBOBB.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs_SwizzledNoOrder.h" +#endif + +Ps::IntBool BV4_OverlapBoxAny(const Box& box, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + Box localBox; + computeLocalBox(localBox, box, worldm_Aligned); + + OBBParams Params; + setupBoxParams(&Params, localBox, &tree, mesh); + + if(tree.mNodes) + { + return processStreamNoOrder<LeafFunction_BoxOverlapAny>(tree.mNodes, tree.mInitData, &Params); + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + return LeafFunction_BoxOverlapAny::doLeafTest(&Params, nbTris); + } +} + + +// Box overlap all + +struct OBBParamsAll : OBBParams +{ + PxU32 mNbHits; + PxU32 mMaxNbHits; + PxU32* mHits; +}; + +namespace +{ +class LeafFunction_BoxOverlapAll +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(OBBParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + if(intersectTriangleBoxBV4(params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params->mRModelToBox_Padded, params->mTModelToBox_Padded, params->mBoxExtents_PaddedAligned)) + { + OBBParamsAll* ParamsAll = static_cast<OBBParamsAll*>(params); + ParamsAll->mHits[ParamsAll->mNbHits] = primIndex; + ParamsAll->mNbHits++; + if(ParamsAll->mNbHits==ParamsAll->mMaxNbHits) + return 1; + } + primIndex++; + }while(nbToGo--); + + return 0; + } +}; + +} + +PxU32 BV4_OverlapBoxAll(const Box& box, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + Box localBox; + computeLocalBox(localBox, box, worldm_Aligned); + + OBBParamsAll Params; + Params.mNbHits = 0; + Params.mMaxNbHits = size; + Params.mHits = results; + setupBoxParams(&Params, localBox, &tree, mesh); + + if(tree.mNodes) + { + overflow = processStreamNoOrder<LeafFunction_BoxOverlapAll>(tree.mNodes, tree.mInitData, &Params)!=0; + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + overflow = LeafFunction_BoxOverlapAll::doLeafTest(&Params, nbTris)!=0; + } + return Params.mNbHits; +} + +// Box overlap - callback version + +struct OBBParamsCB : OBBParams +{ + MeshOverlapCallback mCallback; + void* mUserData; +}; + +namespace +{ +class LeafFunction_BoxOverlapCB +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(const OBBParamsCB* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + if(intersectTriangleBoxBV4(params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params->mRModelToBox_Padded, params->mTModelToBox_Padded, params->mBoxExtents_PaddedAligned)) + { + const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 }; + if((params->mCallback)(params->mUserData, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], primIndex, vrefs)) + return 1; + } + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +void BV4_OverlapBoxCB(const Box& localBox, const BV4Tree& tree, MeshOverlapCallback callback, void* userData) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + OBBParamsCB Params; + Params.mCallback = callback; + Params.mUserData = userData; + setupBoxParams(&Params, localBox, &tree, mesh); + + if(tree.mNodes) + { + processStreamNoOrder<LeafFunction_BoxOverlapCB>(tree.mNodes, tree.mInitData, &Params); + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + LeafFunction_BoxOverlapCB::doLeafTest(&Params, nbTris); + } +} + +// Capsule overlap any + +struct CapsuleParamsAny : OBBParams +{ + Capsule mLocalCapsule; // Capsule in mesh space + CapsuleTriangleOverlapData mData; +}; + +// PT: TODO: try to refactor this one with the PhysX version (TA34704) +static bool CapsuleVsTriangle_SAT(const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, const CapsuleParamsAny* PX_RESTRICT params) +{ +// PX_ASSERT(capsule.p0!=capsule.p1); + + { + const PxReal d2 = distancePointSegmentSquaredInternal(params->mLocalCapsule.p0, params->mData.mCapsuleDir, p0); + if(d2<=params->mLocalCapsule.radius*params->mLocalCapsule.radius) + return 1; + } + + const PxVec3 N = (p0 - p1).cross(p0 - p2); + + if(!testAxis(p0, p1, p2, params->mLocalCapsule, N)) + return 0; + + const float BDotB = params->mData.mBDotB; + const float oneOverBDotB = params->mData.mOneOverBDotB; + const PxVec3& capP0 = params->mLocalCapsule.p0; + const PxVec3& capDir = params->mData.mCapsuleDir; + + if(!testAxis(p0, p1, p2, params->mLocalCapsule, computeEdgeAxis(p0, p1 - p0, capP0, capDir, BDotB, oneOverBDotB))) + return 0; + + if(!testAxis(p0, p1, p2, params->mLocalCapsule, computeEdgeAxis(p1, p2 - p1, capP0, capDir, BDotB, oneOverBDotB))) + return 0; + + if(!testAxis(p0, p1, p2, params->mLocalCapsule, computeEdgeAxis(p2, p0 - p2, capP0, capDir, BDotB, oneOverBDotB))) + return 0; + + return 1; +} + +static Ps::IntBool PX_FORCE_INLINE __CapsuleTriangle(const CapsuleParamsAny* PX_RESTRICT params, PxU32 primIndex) +{ + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + return CapsuleVsTriangle_SAT(params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params); +} + +namespace +{ +class LeafFunction_CapsuleOverlapAny +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(const OBBParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(__CapsuleTriangle(static_cast<const CapsuleParamsAny*>(params), primIndex)) + return 1; + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +template<class ParamsT> +static PX_FORCE_INLINE void setupCapsuleParams(ParamsT* PX_RESTRICT params, const Capsule& capsule, const BV4Tree* PX_RESTRICT tree, const PxMat44* PX_RESTRICT worldm_Aligned, const SourceMesh* PX_RESTRICT mesh) +{ + computeLocalCapsule(params->mLocalCapsule, capsule, worldm_Aligned); + + params->mData.init(params->mLocalCapsule); + + Box localBox; + computeBoxAroundCapsule(params->mLocalCapsule, localBox); + + setupBoxParams(params, localBox, tree, mesh); +} + +Ps::IntBool BV4_OverlapCapsuleAny(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + CapsuleParamsAny Params; + setupCapsuleParams(&Params, capsule, &tree, worldm_Aligned, mesh); + + if(tree.mNodes) + { + return processStreamNoOrder<LeafFunction_CapsuleOverlapAny>(tree.mNodes, tree.mInitData, &Params); + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + return LeafFunction_CapsuleOverlapAny::doLeafTest(&Params, nbTris); + } +} + + +// Capsule overlap all + +struct CapsuleParamsAll : CapsuleParamsAny +{ + PxU32 mNbHits; + PxU32 mMaxNbHits; + PxU32* mHits; +}; + +namespace +{ +class LeafFunction_CapsuleOverlapAll +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(OBBParams* PX_RESTRICT params, PxU32 primIndex) + { + CapsuleParamsAll* ParamsAll = static_cast<CapsuleParamsAll*>(params); + + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(__CapsuleTriangle(ParamsAll, primIndex)) + { + ParamsAll->mHits[ParamsAll->mNbHits] = primIndex; + ParamsAll->mNbHits++; + if(ParamsAll->mNbHits==ParamsAll->mMaxNbHits) + return 1; + } + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +PxU32 BV4_OverlapCapsuleAll(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + CapsuleParamsAll Params; + Params.mNbHits = 0; + Params.mMaxNbHits = size; + Params.mHits = results; + setupCapsuleParams(&Params, capsule, &tree, worldm_Aligned, mesh); + + if(tree.mNodes) + { + overflow = processStreamNoOrder<LeafFunction_CapsuleOverlapAll>(tree.mNodes, tree.mInitData, &Params)!=0; + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + overflow = LeafFunction_CapsuleOverlapAll::doLeafTest(&Params, nbTris)!=0; + } + return Params.mNbHits; +} + +// Capsule overlap - callback version + +struct CapsuleParamsCB : CapsuleParamsAny +{ + MeshOverlapCallback mCallback; + void* mUserData; +}; + +namespace +{ +class LeafFunction_CapsuleOverlapCB +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(const CapsuleParamsCB* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + const PxVec3& p0 = params->mVerts[VRef0]; + const PxVec3& p1 = params->mVerts[VRef1]; + const PxVec3& p2 = params->mVerts[VRef2]; + + if(CapsuleVsTriangle_SAT(p0, p1, p2, params)) + { + const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 }; + if((params->mCallback)(params->mUserData, p0, p1, p2, primIndex, vrefs)) + return 1; + } + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +// PT: this one is currently not used +void BV4_OverlapCapsuleCB(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshOverlapCallback callback, void* userData) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + CapsuleParamsCB Params; + Params.mCallback = callback; + Params.mUserData = userData; + setupCapsuleParams(&Params, capsule, &tree, worldm_Aligned, mesh); + + if(tree.mNodes) + { + processStreamNoOrder<LeafFunction_CapsuleOverlapCB>(tree.mNodes, tree.mInitData, &Params); + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + LeafFunction_CapsuleOverlapCB::doLeafTest(&Params, nbTris); + } +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap_Internal.h new file mode 100644 index 00000000..410af5b8 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap_Internal.h @@ -0,0 +1,105 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_BOX_OVERLAP_INTERNAL_H +#define GU_BV4_BOX_OVERLAP_INTERNAL_H + +#include "GuBV4_Common.h" + + template<class ParamsT> + PX_FORCE_INLINE void precomputeData(ParamsT* PX_RESTRICT dst, PxMat33* PX_RESTRICT absRot, const PxMat33* PX_RESTRICT boxToModelR) + { + // Precompute absolute box-to-model rotation matrix + dst->mPreca0_PaddedAligned.x = boxToModelR->column0.x; + dst->mPreca0_PaddedAligned.y = boxToModelR->column1.y; + dst->mPreca0_PaddedAligned.z = boxToModelR->column2.z; + + dst->mPreca1_PaddedAligned.x = boxToModelR->column0.y; + dst->mPreca1_PaddedAligned.y = boxToModelR->column1.z; + dst->mPreca1_PaddedAligned.z = boxToModelR->column2.x; + + dst->mPreca2_PaddedAligned.x = boxToModelR->column0.z; + dst->mPreca2_PaddedAligned.y = boxToModelR->column1.x; + dst->mPreca2_PaddedAligned.z = boxToModelR->column2.y; + + // Epsilon value prevents floating-point inaccuracies (strategy borrowed from RAPID) + const PxReal epsilon = 1e-6f; + absRot->column0.x = dst->mPreca0b_PaddedAligned.x = epsilon + fabsf(boxToModelR->column0.x); + absRot->column0.y = dst->mPreca1b_PaddedAligned.x = epsilon + fabsf(boxToModelR->column0.y); + absRot->column0.z = dst->mPreca2b_PaddedAligned.x = epsilon + fabsf(boxToModelR->column0.z); + + absRot->column1.x = dst->mPreca2b_PaddedAligned.y = epsilon + fabsf(boxToModelR->column1.x); + absRot->column1.y = dst->mPreca0b_PaddedAligned.y = epsilon + fabsf(boxToModelR->column1.y); + absRot->column1.z = dst->mPreca1b_PaddedAligned.y = epsilon + fabsf(boxToModelR->column1.z); + + absRot->column2.x = dst->mPreca1b_PaddedAligned.z = epsilon + fabsf(boxToModelR->column2.x); + absRot->column2.y = dst->mPreca2b_PaddedAligned.z = epsilon + fabsf(boxToModelR->column2.y); + absRot->column2.z = dst->mPreca0b_PaddedAligned.z = epsilon + fabsf(boxToModelR->column2.z); + } + + template<class ParamsT> + PX_FORCE_INLINE void setupBoxData(ParamsT* PX_RESTRICT dst, const PxVec3& extents, const PxMat33* PX_RESTRICT mAR) + { + dst->mBoxExtents_PaddedAligned = extents; + + const float Ex = extents.x; + const float Ey = extents.y; + const float Ez = extents.z; + dst->mBB_PaddedAligned.x = Ex*mAR->column0.x + Ey*mAR->column1.x + Ez*mAR->column2.x; + dst->mBB_PaddedAligned.y = Ex*mAR->column0.y + Ey*mAR->column1.y + Ez*mAR->column2.y; + dst->mBB_PaddedAligned.z = Ex*mAR->column0.z + Ey*mAR->column1.z + Ez*mAR->column2.z; + } + + struct OBBTestParams // Data needed to perform the OBB-OBB overlap test + { +#ifdef GU_BV4_QUANTIZED_TREE + BV4_ALIGN16(Vec3p mCenterOrMinCoeff_PaddedAligned); + BV4_ALIGN16(Vec3p mExtentsOrMaxCoeff_PaddedAligned); +#endif + BV4_ALIGN16(Vec3p mTBoxToModel_PaddedAligned); //!< Translation from obb space to model space + BV4_ALIGN16(Vec3p mBB_PaddedAligned); + BV4_ALIGN16(Vec3p mBoxExtents_PaddedAligned); + + BV4_ALIGN16(Vec3p mPreca0_PaddedAligned); + BV4_ALIGN16(Vec3p mPreca1_PaddedAligned); + BV4_ALIGN16(Vec3p mPreca2_PaddedAligned); + BV4_ALIGN16(Vec3p mPreca0b_PaddedAligned); + BV4_ALIGN16(Vec3p mPreca1b_PaddedAligned); + BV4_ALIGN16(Vec3p mPreca2b_PaddedAligned); + + PX_FORCE_INLINE void precomputeBoxData(const PxVec3& extents, const PxMat33* PX_RESTRICT box_to_model) + { + PxMat33 absRot; //!< Absolute rotation matrix + precomputeData(this, &absRot, box_to_model); + + setupBoxData(this, extents, &absRot); + } + }; + +#endif // GU_BV4_BOX_OVERLAP_INTERNAL_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Internal.h new file mode 100644 index 00000000..ed595e39 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Internal.h @@ -0,0 +1,512 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuSweepTriangleUtils.h" +#include "GuSweepBoxTriangle_FeatureBased.h" +#include "GuSweepBoxTriangle_SAT.h" +#include "GuBV4_BoxOverlap_Internal.h" + +// PT: for box-sweeps please refer to \\sw\physx\PhysXSDK\3.4\trunk\InternalDocumentation\GU\Sweep strategies.ppt. +// We use: +// - method 3 if the box is an AABB (SWEEP_AABB_IMPL is defined) +// - method 2 if the box is an OBB (SWEEP_AABB_IMPL is undefined) + +#ifdef SWEEP_AABB_IMPL + // PT: TODO: refactor structure (TA34704) + struct RayParams + { + #ifdef GU_BV4_QUANTIZED_TREE + BV4_ALIGN16(Vec3p mCenterOrMinCoeff_PaddedAligned); + BV4_ALIGN16(Vec3p mExtentsOrMaxCoeff_PaddedAligned); + #endif + #ifndef GU_BV4_USE_SLABS + BV4_ALIGN16(Vec3p mData2_PaddedAligned); + BV4_ALIGN16(Vec3p mFDir_PaddedAligned); + BV4_ALIGN16(Vec3p mData_PaddedAligned); + BV4_ALIGN16(Vec3p mLocalDir_PaddedAligned); + #endif + BV4_ALIGN16(Vec3p mOrigin_Padded); // PT: TODO: this one could be switched to PaddedAligned & V4LoadA (TA34704) + }; + + #include "GuBV4_AABBAABBSweepTest.h" +#else + #include "GuBV4_BoxBoxOverlapTest.h" +#endif + +#include "GuBV4_BoxSweep_Params.h" + +static PX_FORCE_INLINE Vec4V multiply3x3V(const Vec4V p, const PxMat33& mat_Padded) +{ + const FloatV xxxV = V4GetX(p); + const FloatV yyyV = V4GetY(p); + const FloatV zzzV = V4GetZ(p); + + Vec4V ResV = V4Scale(V4LoadU_Safe(&mat_Padded.column0.x), xxxV); + ResV = V4Add(ResV, V4Scale(V4LoadU_Safe(&mat_Padded.column1.x), yyyV)); + ResV = V4Add(ResV, V4Scale(V4LoadU_Safe(&mat_Padded.column2.x), zzzV)); + + return ResV; +} + +// PT: TODO: __fastcall removed to make it compile everywhere. Revisit. +static bool /*__fastcall*/ triBoxSweep(BoxSweepParams* PX_RESTRICT params, PxU32 primIndex, bool nodeSorting=true) +{ + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + const PxVec3& p0 = params->mVerts[VRef0]; + const PxVec3& p1 = params->mVerts[VRef1]; + const PxVec3& p2 = params->mVerts[VRef2]; + + // Don't bother doing the actual sweep test if the triangle is too far away + if(1) + { + const float dp0 = p0.dot(params->mLocalDir_Padded); + const float dp1 = p1.dot(params->mLocalDir_Padded); + const float dp2 = p2.dot(params->mLocalDir_Padded); + + float TriMin = PxMin(dp0, dp1); + TriMin = PxMin(TriMin, dp2); + + if(TriMin >= params->mOffset + params->mStabbedFace.mDistance) + return false; + } + + TrianglePadded triBoxSpace; + const Vec4V transModelToBoxV = V4LoadU_Safe(¶ms->mTModelToBox_Padded.x); + const Vec4V v0V = V4Add(multiply3x3V(V4LoadU_Safe(&p0.x), params->mRModelToBox_Padded), transModelToBoxV); + V4StoreU_Safe(v0V, &triBoxSpace.verts[0].x); + const Vec4V v1V = V4Add(multiply3x3V(V4LoadU_Safe(&p1.x), params->mRModelToBox_Padded), transModelToBoxV); + V4StoreU_Safe(v1V, &triBoxSpace.verts[1].x); + const Vec4V v2V = V4Add(multiply3x3V(V4LoadU_Safe(&p2.x), params->mRModelToBox_Padded), transModelToBoxV); + V4StoreU_Safe(v2V, &triBoxSpace.verts[2].x); + + float Dist; + if(triBoxSweepTestBoxSpace_inlined(triBoxSpace, params->mOriginalExtents_Padded, params->mOriginalDir_Padded*params->mStabbedFace.mDistance, params->mOneOverDir_Padded, 1.0f, Dist, params->mBackfaceCulling)) + { + // PT: TODO: these muls & divs may not be needed at all - we just pass the unit dir/inverse dir to the sweep code. Revisit. (TA34704) + Dist *= params->mStabbedFace.mDistance; + params->mOneOverDir_Padded = params->mOneOverOriginalDir / Dist; + params->mStabbedFace.mDistance = Dist; + params->mStabbedFace.mTriangleID = primIndex; + // PT: TODO: revisit this (TA34704) + params->mP0 = triBoxSpace.verts[0]; + params->mP1 = triBoxSpace.verts[1]; + params->mP2 = triBoxSpace.verts[2]; +// V4StoreU_Safe(v0V, ¶ms->mP0.x); +// V4StoreU_Safe(v1V, ¶ms->mP1.x); +// V4StoreU_Safe(v2V, ¶ms->mP2.x); + + if(nodeSorting) + { +#ifdef SWEEP_AABB_IMPL + #ifndef GU_BV4_USE_SLABS + setupRayData(params, Dist, params->mOrigin_Padded, params->mLocalDir_PaddedAligned); + #endif +#else + params->ShrinkOBB(Dist); +#endif + } + return true; + } + return false; +} + +namespace +{ +class LeafFunction_BoxSweepClosest +{ +public: + static PX_FORCE_INLINE void doLeafTest(BoxSweepParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + triBoxSweep(params, primIndex); + primIndex++; + }while(nbToGo--); + } +}; + +class LeafFunction_BoxSweepAny +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(BoxSweepParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(triBoxSweep(params, primIndex)) + return 1; + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +// PT: TODO: refactor with sphere/capsule versions (TA34704) +static PX_FORCE_INLINE bool computeImpactData(const Box& box, const PxVec3& dir, SweepHit* PX_RESTRICT hit, const BoxSweepParams* PX_RESTRICT params, bool isDoubleSided, bool meshBothSides) +{ + if(params->mStabbedFace.mTriangleID==PX_INVALID_U32) + return false; // We didn't touch any triangle + + if(hit) + { + const float t = params->mStabbedFace.mDistance; + hit->mTriangleID = params->mStabbedFace.mTriangleID; + hit->mDistance = t; + + if(t==0.0f) + { + hit->mPos = PxVec3(0.0f); + hit->mNormal = -dir; + } + else + { + // PT: TODO: revisit/optimize/use this (TA34704) + const PxTriangle triInBoxSpace(params->mP0, params->mP1, params->mP2); + PxHitFlags outFlags = PxHitFlag::Enum(0); + computeBoxLocalImpact(hit->mPos, hit->mNormal, outFlags, box, params->mOriginalDir_Padded, triInBoxSpace, PxHitFlag::ePOSITION|PxHitFlag::eNORMAL, isDoubleSided, meshBothSides, t); + } + } + return true; +} + +template<class ParamsT> +static PX_FORCE_INLINE void setupBoxSweepParams(ParamsT* PX_RESTRICT params, const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree* PX_RESTRICT tree, const SourceMesh* PX_RESTRICT mesh, PxU32 flags) +{ + params->mStabbedFace.mTriangleID = PX_INVALID_U32; + setupParamsFlags(params, flags); + + setupMeshPointersAndQuantizedCoeffs(params, mesh, tree); + + prepareSweepData(localBox, localDir, maxDist, params); + +#ifdef SWEEP_AABB_IMPL + params->mOrigin_Padded = localBox.center; + #ifndef GU_BV4_USE_SLABS + params->mLocalDir_PaddedAligned = localDir; + setupRayData(params, maxDist, localBox.center, localDir); + #endif +#endif +} + +#include "GuBV4_Internal.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs.h" +#endif +#ifdef SWEEP_AABB_IMPL + #include "GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h" + #include "GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h" + #ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs_KajiyaNoOrder.h" + #include "GuBV4_Slabs_KajiyaOrdered.h" + #endif +#else + #include "GuBV4_ProcessStreamOrdered_OBBOBB.h" + #include "GuBV4_ProcessStreamNoOrder_OBBOBB.h" + #ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs_SwizzledNoOrder.h" + #include "GuBV4_Slabs_SwizzledOrdered.h" + #endif +#endif + +#ifdef SWEEP_AABB_IMPL +Ps::IntBool Sweep_AABB_BV4(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags) +#else +Ps::IntBool Sweep_OBB_BV4(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags) +#endif +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + BoxSweepParams Params; + setupBoxSweepParams(&Params, localBox, localDir, maxDist, &tree, mesh, flags); + + if(tree.mNodes) + { +#ifdef SWEEP_AABB_IMPL + if(Params.mEarlyExit) + processStreamRayNoOrder(1, LeafFunction_BoxSweepAny)(tree.mNodes, tree.mInitData, &Params); + else + processStreamRayOrdered(1, LeafFunction_BoxSweepClosest)(tree.mNodes, tree.mInitData, &Params); +#else + if(Params.mEarlyExit) + processStreamNoOrder<LeafFunction_BoxSweepAny>(tree.mNodes, tree.mInitData, &Params); + else + processStreamOrdered<LeafFunction_BoxSweepClosest>(tree.mNodes, tree.mInitData, &Params); +#endif + } + else + doBruteForceTests<LeafFunction_BoxSweepAny, LeafFunction_BoxSweepClosest>(mesh->getNbTriangles(), &Params); + + return computeImpactData(localBox, localDir, hit, &Params, (flags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (flags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0); +} + + + +// PT: box sweep callback version - currently not used + +namespace +{ + struct BoxSweepParamsCB : BoxSweepParams + { + // PT: these new members are only here to call computeImpactData during traversal :( + // PT: TODO: most of them may not be needed + Box mBoxCB; // Box in original space (maybe not local/mesh space) + PxVec3 mDirCB; // Dir in original space (maybe not local/mesh space) + const PxMat44* mWorldm_Aligned; + PxU32 mFlags; + + SweepUnlimitedCallback mCallback; + void* mUserData; + float mMaxDist; + bool mNodeSorting; + }; + +class LeafFunction_BoxSweepCB +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(BoxSweepParamsCB* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(triBoxSweep(params, primIndex, params->mNodeSorting)) + { + // PT: TODO: in this version we must compute the impact data immediately, + // which is a terrible idea in general, but I'm not sure what else I can do. + SweepHit hit; + const bool b = computeImpactData(params->mBoxCB, params->mDirCB, &hit, params, (params->mFlags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (params->mFlags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0); + PX_ASSERT(b); + + // PT: then replicate part from BV4_BoxSweepSingle: + if(b && params->mWorldm_Aligned) + { + // Move to world space + // PT: TODO: optimize (TA34704) + hit.mPos = params->mWorldm_Aligned->transform(hit.mPos); + hit.mNormal = params->mWorldm_Aligned->rotate(hit.mNormal); + } + + reportUnlimitedCallbackHit(params, hit); + } + + primIndex++; + }while(nbToGo--); + + return 0; + } +}; + +} + +// PT: for design decisions in this function, refer to the comments of BV4_GenericSweepCB(). +// PT: 'worldm_Aligned' is only here to move back results to world space, but input is already in local space. +#ifdef SWEEP_AABB_IMPL +void Sweep_AABB_BV4_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting) +#else +void Sweep_OBB_BV4_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting) +#endif +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + BoxSweepParamsCB Params; + Params.mBoxCB = localBox; + Params.mDirCB = localDir; + Params.mWorldm_Aligned = worldm_Aligned; + Params.mFlags = flags; + + Params.mCallback = callback; + Params.mUserData = userData; + Params.mMaxDist = maxDist; + Params.mNodeSorting = nodeSorting; + setupBoxSweepParams(&Params, localBox, localDir, maxDist, &tree, mesh, flags); + + PX_ASSERT(!Params.mEarlyExit); + + if(tree.mNodes) + { + if(nodeSorting) + { +#ifdef SWEEP_AABB_IMPL + processStreamRayOrdered(1, LeafFunction_BoxSweepCB)(tree.mNodes, tree.mInitData, &Params); +#else + processStreamOrdered<LeafFunction_BoxSweepCB>(tree.mNodes, tree.mInitData, &Params); +#endif + } + else + { +#ifdef SWEEP_AABB_IMPL + processStreamRayNoOrder(1, LeafFunction_BoxSweepCB)(tree.mNodes, tree.mInitData, &Params); +#else + processStreamNoOrder<LeafFunction_BoxSweepCB>(tree.mNodes, tree.mInitData, &Params); +#endif + } + } + else + doBruteForceTests<LeafFunction_BoxSweepCB, LeafFunction_BoxSweepCB>(mesh->getNbTriangles(), &Params); +} + + + + +// New callback-based box sweeps. Reuses code above, allow early exits. Some init code may be done in vain +// since the leaf tests are not performed (we don't do box-sweeps-vs-tri since the box is only a BV around +// the actual shape, say a convex) + +namespace +{ +struct GenericSweepParamsCB : BoxSweepParams +{ + MeshSweepCallback mCallback; + void* mUserData; +}; + +class LeafFunction_BoxSweepClosestCB +{ +public: + static PX_FORCE_INLINE void doLeafTest(GenericSweepParamsCB* PX_RESTRICT params, PxU32 prim_index) + { + PxU32 nbToGo = getNbPrimitives(prim_index); + do + { + // PT: in the regular version we'd do a box-vs-triangle sweep test here + // Instead we just grab the triangle and send it to the callback + // + // This can be used for regular "closest hit" sweeps, when the scale is not identity or + // when the box is just around a more complex shape (e.g. convex). In this case we want + // the calling code to compute a convex-triangle distance, and then we want to shrink + // the ray/box while doing an ordered traversal. + // + // For "sweep all" or "sweep any" purposes we want to either report all hits or early exit + // as soon as we find one. There is no need for shrinking or ordered traversals here. + + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, prim_index, params->mTris32, params->mTris16); + + const PxVec3& p0 = params->mVerts[VRef0]; + const PxVec3& p1 = params->mVerts[VRef1]; + const PxVec3& p2 = params->mVerts[VRef2]; + + // Don't bother doing the actual sweep test if the triangle is too far away + const float dp0 = p0.dot(params->mLocalDir_Padded); + const float dp1 = p1.dot(params->mLocalDir_Padded); + const float dp2 = p2.dot(params->mLocalDir_Padded); + + float TriMin = PxMin(dp0, dp1); + TriMin = PxMin(TriMin, dp2); + + if(TriMin < params->mOffset + params->mStabbedFace.mDistance) + { +// const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 }; + float Dist = params->mStabbedFace.mDistance; + if((params->mCallback)(params->mUserData, p0, p1, p2, prim_index, /*vrefs,*/ Dist)) + return; // PT: TODO: we return here but the ordered path doesn't really support early exits (TA34704) + + if(Dist<params->mStabbedFace.mDistance) + { + params->mStabbedFace.mDistance = Dist; + params->mStabbedFace.mTriangleID = prim_index; +#ifdef SWEEP_AABB_IMPL + #ifndef GU_BV4_USE_SLABS + setupRayData(params, Dist, params->mOrigin_Padded, params->mLocalDir_PaddedAligned); + #endif +#else + params->ShrinkOBB(Dist); +#endif + } + } + + prim_index++; + }while(nbToGo--); + } +}; + +class LeafFunction_BoxSweepAnyCB +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(GenericSweepParamsCB* PX_RESTRICT params, PxU32 prim_index) + { + PxU32 nbToGo = getNbPrimitives(prim_index); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, prim_index, params->mTris32, params->mTris16); + + const PxVec3& p0 = params->mVerts[VRef0]; + const PxVec3& p1 = params->mVerts[VRef1]; + const PxVec3& p2 = params->mVerts[VRef2]; + + { +// const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 }; + float Dist = params->mStabbedFace.mDistance; + if((params->mCallback)(params->mUserData, p0, p1, p2, prim_index, /*vrefs,*/ Dist)) + return 1; + } + + prim_index++; + }while(nbToGo--); + + return 0; + } +}; +} + +#ifdef SWEEP_AABB_IMPL +void GenericSweep_AABB_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, PxU32 flags) +#else +void GenericSweep_OBB_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, PxU32 flags) +#endif +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + GenericSweepParamsCB Params; + Params.mCallback = callback; + Params.mUserData = userData; + setupBoxSweepParams(&Params, localBox, localDir, maxDist, &tree, mesh, flags); + + if(tree.mNodes) + { +#ifdef SWEEP_AABB_IMPL + if(Params.mEarlyExit) + processStreamRayNoOrder(1, LeafFunction_BoxSweepAnyCB)(tree.mNodes, tree.mInitData, &Params); + else + processStreamRayOrdered(1, LeafFunction_BoxSweepClosestCB)(tree.mNodes, tree.mInitData, &Params); +#else + if(Params.mEarlyExit) + processStreamNoOrder<LeafFunction_BoxSweepAnyCB>(tree.mNodes, tree.mInitData, &Params); + else + processStreamOrdered<LeafFunction_BoxSweepClosestCB>(tree.mNodes, tree.mInitData, &Params); +#endif + } + else + doBruteForceTests<LeafFunction_BoxSweepAnyCB, LeafFunction_BoxSweepClosestCB>(mesh->getNbTriangles(), &Params); +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Params.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Params.h new file mode 100644 index 00000000..6869783f --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Params.h @@ -0,0 +1,211 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +// This is used by the box-sweep & capsule-sweep code + +#if PX_VC + #pragma warning(disable: 4505) // unreferenced local function has been removed +#endif + +#include "PsBasicTemplates.h" + +namespace +{ +#ifdef SWEEP_AABB_IMPL +struct BoxSweepParams : RayParams +#else +struct BoxSweepParams : OBBTestParams +#endif +{ + const IndTri32* PX_RESTRICT mTris32; + const IndTri16* PX_RESTRICT mTris16; + const PxVec3* PX_RESTRICT mVerts; + +#ifndef SWEEP_AABB_IMPL + Box mLocalBox; +#endif + PxVec3 mLocalDir_Padded; + RaycastHitInternal mStabbedFace; + + PxU32 mBackfaceCulling; + PxU32 mEarlyExit; + + PxVec3 mP0, mP1, mP2; + PxVec3 mBestTriNormal; + + float mOffset; + PxVec3 mProj; + PxVec3 mDP; + +#ifndef SWEEP_AABB_IMPL + PxMat33 mAR; //!< Absolute rotation matrix +#endif + + PxMat33 mRModelToBox_Padded; //!< Rotation from model space to obb space + PxVec3 mTModelToBox_Padded; //!< Translation from model space to obb space + PxVec3 mOriginalExtents_Padded; + PxVec3 mOriginalDir_Padded; + PxVec3 mOneOverDir_Padded; + PxVec3 mOneOverOriginalDir; + +#ifndef SWEEP_AABB_IMPL + PX_FORCE_INLINE void ShrinkOBB(float d) + { + const PxVec3 BoxExtents = mDP + d * mProj; + mTBoxToModel_PaddedAligned = mLocalBox.center + mLocalDir_Padded*d*0.5f; + + setupBoxData(this, BoxExtents, &mAR); + } +#endif +}; +} + +// PT: TODO: check asm again in PhysX version, compare to original (TA34704) +static void prepareSweepData(const Box& box, const PxVec3& dir, float maxDist, BoxSweepParams* PX_RESTRICT params) +{ + invertBoxMatrix(params->mRModelToBox_Padded, params->mTModelToBox_Padded, box); + + params->mOriginalExtents_Padded = box.extents; + + const PxVec3 OriginalDir = params->mRModelToBox_Padded.transform(dir); + params->mOriginalDir_Padded = OriginalDir; + + const PxVec3 OneOverOriginalDir(OriginalDir.x!=0.0f ? 1.0f/OriginalDir.x : 0.0f, + OriginalDir.y!=0.0f ? 1.0f/OriginalDir.y : 0.0f, + OriginalDir.z!=0.0f ? 1.0f/OriginalDir.z : 0.0f); + + params->mOneOverOriginalDir = OneOverOriginalDir; + params->mOneOverDir_Padded = OneOverOriginalDir / maxDist; + + { + const Box& LocalBox = box; + const PxVec3& LocalDir = dir; + + params->mLocalDir_Padded = LocalDir; + params->mStabbedFace.mDistance = maxDist; +#ifndef SWEEP_AABB_IMPL + params->mLocalBox = LocalBox; // PT: TODO: check asm for operator= +#endif + + PxMat33 boxToModelR; + + // Original code: + // OBB::CreateOBB(LocalBox, LocalDir, 0.5f) + { + PxVec3 R1, R2; + { + float dd[3]; + dd[0] = fabsf(LocalBox.rot.column0.dot(LocalDir)); + dd[1] = fabsf(LocalBox.rot.column1.dot(LocalDir)); + dd[2] = fabsf(LocalBox.rot.column2.dot(LocalDir)); + float dmax = dd[0]; + PxU32 ax0=1; + PxU32 ax1=2; + if(dd[1]>dmax) + { + dmax=dd[1]; + ax0=0; + ax1=2; + } + if(dd[2]>dmax) + { + dmax=dd[2]; + ax0=0; + ax1=1; + } + if(dd[ax1]<dd[ax0]) + Ps::swap(ax0, ax1); + + R1 = LocalBox.rot[ax0]; + R1 -= R1.dot(LocalDir)*LocalDir; // Project to plane whose normal is dir + R1.normalize(); + R2 = LocalDir.cross(R1); + } + // Original code: + // mRot = params->mRBoxToModel + boxToModelR.column0 = LocalDir; + boxToModelR.column1 = R1; + boxToModelR.column2 = R2; + + // Original code: + // float Offset[3]; + // 0.5f comes from the Offset[r]*0.5f, doesn't mean 'd' is 0.5f + params->mProj.x = 0.5f; + params->mProj.y = LocalDir.dot(R1)*0.5f; + params->mProj.z = LocalDir.dot(R2)*0.5f; + + // Original code: + //mExtents[r] = Offset[r]*0.5f + fabsf(box.mRot[0]|R)*box.mExtents.x + fabsf(box.mRot[1]|R)*box.mExtents.y + fabsf(box.mRot[2]|R)*box.mExtents.z; + // => we store the first part of the computation, minus 'Offset[r]*0.5f' + for(PxU32 r=0;r<3;r++) + { + const PxVec3& R = boxToModelR[r]; + params->mDP[r] = fabsf(LocalBox.rot.column0.dot(R)*LocalBox.extents.x) + + fabsf(LocalBox.rot.column1.dot(R)*LocalBox.extents.y) + + fabsf(LocalBox.rot.column2.dot(R)*LocalBox.extents.z); + } + // In the original code, both mCenter & mExtents depend on 'd', and thus we will need to recompute these two members. + // + // For mExtents we have: + // + // float Offset[3]; + // Offset[0] = d; + // Offset[1] = d*(dir|R1); + // Offset[2] = d*(dir|R2); + // + // mExtents[r] = Offset[r]*0.5f + fabsf(box.mRot[0]|R)*box.mExtents.x + fabsf(box.mRot[1]|R)*box.mExtents.y + fabsf(box.mRot[2]|R)*box.mExtents.z; + // <=> mExtents[r] = Offset[r]*0.5f + Params.mDP[r]; We precompute the second part that doesn't depend on d, stored in mDP + // <=> mExtents[r] = Params.mProj[r]*d + Params.mDP[r]; We extract d from the first part, store what is left in mProj + // + // Thus in ShrinkOBB the code needed to update the extents is just: + // mBoxExtents = mDP + d * mProj; + // + // For mCenter we have: + // + // mCenter = box.mCenter + dir*d*0.5f; + // + // So we simply use this formula directly, with the new d. Result is stored in 'mTBoxToModel' +/* + PX_FORCE_INLINE void ShrinkOBB(float d) + { + mBoxExtents = mDP + d * mProj; + mTBoxToModel = mLocalBox.mCenter + mLocalDir*d*0.5f; +*/ + } + + // This one is for culling tris, unrelated to CreateOBB + params->mOffset = params->mDP.x + LocalBox.center.dot(LocalDir); + +#ifndef SWEEP_AABB_IMPL + precomputeData(params, ¶ms->mAR, &boxToModelR); + + params->ShrinkOBB(maxDist); +#endif + } +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep.cpp new file mode 100644 index 00000000..c8d8a5c2 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep.cpp @@ -0,0 +1,173 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuBV4.h" +#include "GuSweepSphereTriangle.h" +using namespace physx; +using namespace Gu; + +#if PX_INTEL_FAMILY + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#include "GuSIMDHelpers.h" +#include "GuInternal.h" + +#include "GuBV4_BoxOverlap_Internal.h" +#include "GuBV4_BoxSweep_Params.h" + +namespace +{ + struct CapsuleSweepParams : BoxSweepParams + { + Capsule mLocalCapsule; + PxVec3 mCapsuleCenter; + PxVec3 mExtrusionDir; + PxU32 mEarlyExit; + float mBestAlignmentValue; + float mBestDistance; + float mMaxDist; + }; +} + +#include "GuBV4_CapsuleSweep_Internal.h" +#include "GuBV4_Internal.h" + +#include "GuBV4_BoxBoxOverlapTest.h" + +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs.h" +#endif +#include "GuBV4_ProcessStreamOrdered_OBBOBB.h" +#include "GuBV4_ProcessStreamNoOrder_OBBOBB.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs_SwizzledNoOrder.h" + #include "GuBV4_Slabs_SwizzledOrdered.h" +#endif + +Ps::IntBool BV4_CapsuleSweepSingle(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + CapsuleSweepParams Params; + setupCapsuleParams(&Params, capsule, dir, maxDist, &tree, mesh, flags); + + if(tree.mNodes) + { + if(Params.mEarlyExit) + processStreamNoOrder<LeafFunction_CapsuleSweepAny>(tree.mNodes, tree.mInitData, &Params); + else + processStreamOrdered<LeafFunction_CapsuleSweepClosest>(tree.mNodes, tree.mInitData, &Params); + } + else + doBruteForceTests<LeafFunction_CapsuleSweepAny, LeafFunction_CapsuleSweepClosest>(mesh->getNbTriangles(), &Params); + + return computeImpactDataT<ImpactFunctionCapsule>(capsule, dir, hit, &Params, NULL, (flags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (flags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0); +} + +// PT: capsule sweep callback version - currently not used + +namespace +{ + struct CapsuleSweepParamsCB : CapsuleSweepParams + { + // PT: these new members are only here to call computeImpactDataT during traversal :( + // PT: TODO: most of them may not be needed + // PT: TODO: for example mCapsuleCB probably dup of mLocalCapsule + Capsule mCapsuleCB; // Capsule in original space (maybe not local/mesh space) + PxVec3 mDirCB; // Dir in original space (maybe not local/mesh space) + const PxMat44* mWorldm_Aligned; + PxU32 mFlags; + + SweepUnlimitedCallback mCallback; + void* mUserData; + float mMaxDist; + bool mNodeSorting; + }; + +class LeafFunction_CapsuleSweepCB +{ +public: + + static PX_FORCE_INLINE Ps::IntBool doLeafTest(CapsuleSweepParamsCB* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(triCapsuleSweep(params, primIndex, params->mNodeSorting)) + { + // PT: TODO: in this version we must compute the impact data immediately, + // which is a terrible idea in general, but I'm not sure what else I can do. + SweepHit hit; + const bool b = computeImpactDataT<ImpactFunctionCapsule>(params->mCapsuleCB, params->mDirCB, &hit, params, params->mWorldm_Aligned, (params->mFlags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (params->mFlags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0); + PX_ASSERT(b); + PX_UNUSED(b); + + reportUnlimitedCallbackHit(params, hit); + } + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +// PT: for design decisions in this function, refer to the comments of BV4_GenericSweepCB(). +void BV4_CapsuleSweepCB(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + CapsuleSweepParamsCB Params; + Params.mCapsuleCB = capsule; + Params.mDirCB = dir; + Params.mWorldm_Aligned = worldm_Aligned; + Params.mFlags = flags; + + Params.mCallback = callback; + Params.mUserData = userData; + Params.mMaxDist = maxDist; + Params.mNodeSorting = nodeSorting; + setupCapsuleParams(&Params, capsule, dir, maxDist, &tree, mesh, flags); + + PX_ASSERT(!Params.mEarlyExit); + + if(tree.mNodes) + { + if(nodeSorting) + processStreamOrdered<LeafFunction_CapsuleSweepCB>(tree.mNodes, tree.mInitData, &Params); + else + processStreamNoOrder<LeafFunction_CapsuleSweepCB>(tree.mNodes, tree.mInitData, &Params); + } + else + doBruteForceTests<LeafFunction_CapsuleSweepCB, LeafFunction_CapsuleSweepCB>(mesh->getNbTriangles(), &Params); +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweepAA.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweepAA.cpp new file mode 100644 index 00000000..1fd6aa05 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweepAA.cpp @@ -0,0 +1,111 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuBV4.h" +#include "GuSweepSphereTriangle.h" +using namespace physx; +using namespace Gu; + +#if PX_INTEL_FAMILY + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#include "GuBV4_Common.h" +#include "GuInternal.h" + +#define SWEEP_AABB_IMPL + + // PT: TODO: refactor structure (TA34704) + struct RayParams + { + #ifdef GU_BV4_QUANTIZED_TREE + BV4_ALIGN16(Vec3p mCenterOrMinCoeff_PaddedAligned); + BV4_ALIGN16(Vec3p mExtentsOrMaxCoeff_PaddedAligned); + #endif + #ifndef GU_BV4_USE_SLABS + BV4_ALIGN16(Vec3p mData2_PaddedAligned); + BV4_ALIGN16(Vec3p mFDir_PaddedAligned); + BV4_ALIGN16(Vec3p mData_PaddedAligned); + BV4_ALIGN16(Vec3p mLocalDir_PaddedAligned); + #endif + BV4_ALIGN16(Vec3p mOrigin_Padded); // PT: TODO: this one could be switched to PaddedAligned & V4LoadA (TA34704) + }; + +#include "GuBV4_BoxSweep_Params.h" + +namespace +{ + struct CapsuleSweepParams : BoxSweepParams + { + Capsule mLocalCapsule; + PxVec3 mCapsuleCenter; + PxVec3 mExtrusionDir; + PxU32 mEarlyExit; + float mBestAlignmentValue; + float mBestDistance; + float mMaxDist; + }; +} + +#include "GuBV4_CapsuleSweep_Internal.h" +#include "GuBV4_Internal.h" + +#include "GuBV4_AABBAABBSweepTest.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs.h" +#endif +#include "GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h" +#include "GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs_KajiyaNoOrder.h" + #include "GuBV4_Slabs_KajiyaOrdered.h" +#endif + +Ps::IntBool BV4_CapsuleSweepSingleAA(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + CapsuleSweepParams Params; + setupCapsuleParams(&Params, capsule, dir, maxDist, &tree, mesh, flags); + + if(tree.mNodes) + { + if(Params.mEarlyExit) + processStreamRayNoOrder(1, LeafFunction_CapsuleSweepAny)(tree.mNodes, tree.mInitData, &Params); + else + processStreamRayOrdered(1, LeafFunction_CapsuleSweepClosest)(tree.mNodes, tree.mInitData, &Params); + } + else + doBruteForceTests<LeafFunction_CapsuleSweepAny, LeafFunction_CapsuleSweepClosest>(mesh->getNbTriangles(), &Params); + + return computeImpactDataT<ImpactFunctionCapsule>(capsule, dir, hit, &Params, NULL, (flags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (flags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0); +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep_Internal.h new file mode 100644 index 00000000..260ba0af --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep_Internal.h @@ -0,0 +1,434 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_CAPSULE_SWEEP_INTERNAL_H +#define GU_BV4_CAPSULE_SWEEP_INTERNAL_H + +// PT: for capsule-sweeps please refer to \\sw\physx\PhysXSDK\3.4\trunk\InternalDocumentation\GU\Sweep strategies.ppt. +// We use: +// - method 3 if the capsule is axis-aligned (SWEEP_AABB_IMPL is defined) +// - method 2 otherwise (SWEEP_AABB_IMPL is undefined) + +// PT: TODO: get rid of that one +static PX_FORCE_INLINE bool sweepSphereVSTriangle( const PxVec3& center, const float radius, + const PxVec3* PX_RESTRICT triVerts, const PxVec3& triUnitNormal, + const PxVec3& unitDir, + float& curT, bool& directHit) +{ + float currentDistance; + if(!sweepSphereVSTri(triVerts, triUnitNormal, center, radius, unitDir, currentDistance, directHit, true)) + return false; + + // PT: using ">" or ">=" is enough to block the CCT or not in the DE5967 visual test. Change to ">=" if a repro is needed. + if(currentDistance > curT) + return false; + curT = currentDistance; + return true; +} + +static PX_FORCE_INLINE bool sweepSphereVSQuad( const PxVec3& center, const float radius, + const PxVec3* PX_RESTRICT quadVerts, const PxVec3& quadUnitNormal, + const PxVec3& unitDir, + float& curT) +{ + float currentDistance; + if(!sweepSphereVSQuad(quadVerts, quadUnitNormal, center, radius, unitDir, currentDistance)) + return false; + + // PT: using ">" or ">=" is enough to block the CCT or not in the DE5967 visual test. Change to ">=" if a repro is needed. + if(currentDistance > curT) + return false; + curT = currentDistance; + return true; +} + +/////////////////////////////////////////////////////////////////////////////// + +// PT: TODO: __fastcall removed to make it compile everywhere. Revisit. +static bool /*__fastcall*/ testTri( const CapsuleSweepParams* PX_RESTRICT params, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, const PxVec3& N, + const PxVec3& unitDir, const float capsuleRadius, const float dpc0, float& curT, bool& status) +{ + // PT: TODO: check the assembly here (TA34704) + PxVec3 currentTri[3]; + // PT: TODO: optimize this copy (TA34704) + currentTri[0] = p0; + currentTri[1] = p1; + currentTri[2] = p2; + + // PT: beware, culling is only ok on the sphere I think + if(rejectTriangle(params->mCapsuleCenter, unitDir, curT, capsuleRadius, currentTri, dpc0)) + return false; + + float magnitude = N.magnitude(); + if(magnitude==0.0f) + return false; + + PxVec3 triNormal = N / magnitude; + + bool DirectHit; + if(sweepSphereVSTriangle(params->mCapsuleCenter, capsuleRadius, currentTri, triNormal, unitDir, curT, DirectHit)) + { + status = true; + } + return DirectHit; +} + +// PT: TODO: __fastcall removed to make it compile everywhere. Revisit. +static void /*__fastcall*/ testQuad(const CapsuleSweepParams* PX_RESTRICT params, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, const PxVec3& p3, const PxVec3& N, + const PxVec3& unitDir, const float capsuleRadius, const float dpc0, float& curT, bool& status) +{ + // PT: TODO: optimize this copy (TA34704) + PxVec3 currentQuad[4]; + currentQuad[0] = p0; + currentQuad[1] = p1; + currentQuad[2] = p2; + currentQuad[3] = p3; + + // PT: beware, culling is only ok on the sphere I think + if(rejectQuad(params->mCapsuleCenter, unitDir, curT, capsuleRadius, currentQuad, dpc0)) + return; + + float magnitude = N.magnitude(); + if(magnitude==0.0f) + return; + + PxVec3 triNormal = N / magnitude; + + if(sweepSphereVSQuad(params->mCapsuleCenter, capsuleRadius, currentQuad, triNormal, unitDir, curT)) + { + status = true; + } +} + +static PX_FORCE_INLINE float Set2(const PxVec3& p0, const PxVec3& n, const PxVec3& p) +{ + return (p-p0).dot(n); +} + +static PX_FORCE_INLINE bool sweepCapsuleVsTriangle(const CapsuleSweepParams* PX_RESTRICT params, const PxTriangle& triangle, float& t, bool isDoubleSided, PxVec3& normal) +{ + const PxVec3& unitDir = params->mLocalDir_Padded; + + // Create triangle normal + PxVec3 denormalizedNormal = (triangle.verts[0] - triangle.verts[1]).cross(triangle.verts[0] - triangle.verts[2]); + + normal = denormalizedNormal; + + // Backface culling + const bool culled = denormalizedNormal.dot(unitDir) > 0.0f; + if(culled) + { + if(!isDoubleSided) + return false; + + denormalizedNormal = -denormalizedNormal; + } + + const float capsuleRadius = params->mLocalCapsule.radius; + float curT = params->mStabbedFace.mDistance; + const float dpc0 = params->mCapsuleCenter.dot(unitDir); + + bool status = false; + + // Extrude mesh on the fly + const PxVec3 p0 = triangle.verts[0] - params->mExtrusionDir; + const PxVec3 p1 = triangle.verts[1+culled] - params->mExtrusionDir; + const PxVec3 p2 = triangle.verts[2-culled] - params->mExtrusionDir; + + const PxVec3 p0b = triangle.verts[0] + params->mExtrusionDir; + const PxVec3 p1b = triangle.verts[1+culled] + params->mExtrusionDir; + const PxVec3 p2b = triangle.verts[2-culled] + params->mExtrusionDir; + + const float extrusionSign = denormalizedNormal.dot(params->mExtrusionDir); + + const PxVec3 p2b_p1b = p2b - p1b; + const PxVec3 p0b_p1b = p0b - p1b; + const PxVec3 p2b_p2 = 2.0f * params->mExtrusionDir; + const PxVec3 p1_p1b = -p2b_p2; + + const PxVec3 N1 = p2b_p1b.cross(p0b_p1b); + const float dp0 = Set2(p0b, N1, params->mCapsuleCenter); + + const PxVec3 N2 = (p2 - p1).cross(p0 - p1); + const float dp1 = -Set2(p0, N2, params->mCapsuleCenter); + + bool directHit; + if(extrusionSign >= 0.0f) + directHit = testTri(params, p0b, p1b, p2b, N1, unitDir, capsuleRadius, dpc0, curT, status); + else + directHit = testTri(params, p0, p1, p2, N2, unitDir, capsuleRadius, dpc0, curT, status); + + const PxVec3 N3 = p2b_p1b.cross(p1_p1b); + const float dp2 = -Set2(p1, N3, params->mCapsuleCenter); + if(!directHit) + { + const float dp = N3.dot(unitDir); + if(dp*extrusionSign>=0.0f) + testQuad(params, p1, p1b, p2, p2b, N3, unitDir, capsuleRadius, dpc0, curT, status); + } + + const PxVec3 N5 = p2b_p2.cross(p0 - p2); + const float dp3 = -Set2(p0, N5, params->mCapsuleCenter); + if(!directHit) + { + const float dp = N5.dot(unitDir); + if(dp*extrusionSign>=0.0f) + testQuad(params, p2, p2b, p0, p0b, N5, unitDir, capsuleRadius, dpc0, curT, status); + } + + const PxVec3 N7 = p1_p1b.cross(p0b_p1b); + const float dp4 = -Set2(p0b, N7, params->mCapsuleCenter); + if(!directHit) + { + const float dp = N7.dot(unitDir); + if(dp*extrusionSign>=0.0f) + testQuad(params, p0, p0b, p1, p1b, N7, unitDir, capsuleRadius, dpc0, curT, status); + } + + if(1) + { + bool originInside = true; + if(extrusionSign<0.0f) + { + if(dp0<0.0f || dp1<0.0f || dp2<0.0f || dp3<0.0f || dp4<0.0f) + originInside = false; + } + else + { + if(dp0>0.0f || dp1>0.0f || dp2>0.0f || dp3>0.0f || dp4>0.0f) + originInside = false; + } + if(originInside) + { + t = 0.0f; + return true; + } + } + + if(!status) + return false; // We didn't touch any triangle + + t = curT; + + return true; +} + +// PT: TODO: __fastcall removed to make it compile everywhere. Revisit. +static bool /*__fastcall*/ triCapsuleSweep(CapsuleSweepParams* PX_RESTRICT params, PxU32 primIndex, bool nodeSorting=true) +{ + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + const PxVec3& p0 = params->mVerts[VRef0]; + const PxVec3& p1 = params->mVerts[VRef1]; + const PxVec3& p2 = params->mVerts[VRef2]; + + const PxTriangle Tri(p0, p1, p2); // PT: TODO: check calls to empty ctor/dtor here (TA34704) + + const bool isDoubleSided = params->mBackfaceCulling==0; + + float Dist; + PxVec3 denormalizedNormal; + if(sweepCapsuleVsTriangle(params, Tri, Dist, isDoubleSided, denormalizedNormal)) + { + const PxReal distEpsilon = GU_EPSILON_SAME_DISTANCE; // pick a farther hit within distEpsilon that is more opposing than the previous closest hit + const PxReal alignmentValue = computeAlignmentValue(denormalizedNormal, params->mLocalDir_Padded); + + if(keepTriangle(Dist, alignmentValue, params->mBestDistance, params->mBestAlignmentValue, params->mMaxDist, distEpsilon)) + { + params->mStabbedFace.mDistance = Dist; + params->mStabbedFace.mTriangleID = primIndex; + + params->mP0 = p0; + params->mP1 = p1; + params->mP2 = p2; + + params->mBestDistance = PxMin(params->mBestDistance, Dist); // exact lower bound + params->mBestAlignmentValue = alignmentValue; + params->mBestTriNormal = denormalizedNormal; + + if(nodeSorting) + { +#ifdef SWEEP_AABB_IMPL + #ifndef GU_BV4_USE_SLABS + setupRayData(params, Dist, params->mOrigin_Padded, params->mLocalDir_PaddedAligned); + #endif +#else + params->ShrinkOBB(Dist); +#endif + } + return true; + } + } + return false; +} + +#include "GuDistanceSegmentTriangleSIMD.h" + +namespace +{ +class LeafFunction_CapsuleSweepClosest +{ +public: + static PX_FORCE_INLINE void doLeafTest(CapsuleSweepParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + triCapsuleSweep(params, primIndex); + primIndex++; + }while(nbToGo--); + } +}; + +class LeafFunction_CapsuleSweepAny +{ +public: + + static PX_FORCE_INLINE Ps::IntBool doLeafTest(CapsuleSweepParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(triCapsuleSweep(params, primIndex)) + return 1; + primIndex++; + }while(nbToGo--); + + return 0; + } +}; + +class ImpactFunctionCapsule +{ +public: + static PX_FORCE_INLINE void computeImpact(PxVec3& impactPos, PxVec3& impactNormal, const Capsule& capsule, const PxVec3& dir, const PxReal t, const TrianglePadded& triangle) + { + const PxVec3 delta = dir * t; + const Vec3p P0 = capsule.p0 + delta; + const Vec3p P1 = capsule.p1 + delta; + Vec3V pointOnSeg, pointOnTri; + distanceSegmentTriangleSquared( + // PT: we use Vec3p so it is safe to V4LoadU P0 and P1 + V3LoadU_SafeReadW(P0), V3LoadU_SafeReadW(P1), + // PT: we use TrianglePadded so it is safe to V4LoadU the triangle vertices + V3LoadU_SafeReadW(triangle.verts[0]), V3LoadU_SafeReadW(triangle.verts[1]), V3LoadU_SafeReadW(triangle.verts[2]), + pointOnSeg, pointOnTri); + + PxVec3 localImpactPos, tmp; + V3StoreU(pointOnTri, localImpactPos); + V3StoreU(pointOnSeg, tmp); + + // PT: TODO: refactor with computeSphereTriImpactData (TA34704) + PxVec3 localImpactNormal = tmp - localImpactPos; + const float M = localImpactNormal.magnitude(); + if(M<1e-3f) + { + localImpactNormal = (triangle.verts[0] - triangle.verts[1]).cross(triangle.verts[0] - triangle.verts[2]); + localImpactNormal.normalize(); + } + else + localImpactNormal /= M; + + impactPos = localImpactPos; + impactNormal = localImpactNormal; + } +}; +} + +static void computeBoxAroundCapsule(const Capsule& capsule, Box& box, PxVec3& extrusionDir) +{ + // Box center = center of the two capsule's endpoints + box.center = capsule.computeCenter(); + + extrusionDir = (capsule.p0 - capsule.p1)*0.5f; + const PxF32 d = extrusionDir.magnitude(); + + // Box extents + box.extents.x = capsule.radius + d; + box.extents.y = capsule.radius; + box.extents.z = capsule.radius; + + // Box orientation + if(d==0.0f) + { + box.rot = PxMat33(PxIdentity); + } + else + { + PxVec3 dir, right, up; + Ps::computeBasis(capsule.p0, capsule.p1, dir, right, up); + box.setAxes(dir, right, up); + } +} + +template<class ParamsT> +static PX_FORCE_INLINE void setupCapsuleParams(ParamsT* PX_RESTRICT params, const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree* PX_RESTRICT tree, const SourceMesh* PX_RESTRICT mesh, PxU32 flags) +{ + params->mStabbedFace.mTriangleID = PX_INVALID_U32; + params->mBestAlignmentValue = 2.0f; + params->mBestDistance = maxDist + GU_EPSILON_SAME_DISTANCE; + params->mMaxDist = maxDist; + + setupParamsFlags(params, flags); + + setupMeshPointersAndQuantizedCoeffs(params, mesh, tree); + + params->mLocalCapsule = capsule; + + Box localBox; + computeBoxAroundCapsule(capsule, localBox, params->mExtrusionDir); + + params->mCapsuleCenter = localBox.center; + + const PxVec3& localDir = dir; + +#ifdef SWEEP_AABB_IMPL + const PxVec3& localP0 = params->mLocalCapsule.p0; + const PxVec3& localP1 = params->mLocalCapsule.p1; + const PxVec3 sweepOrigin = (localP0+localP1)*0.5f; + const PxVec3 sweepExtents = PxVec3(params->mLocalCapsule.radius) + (localP0-localP1).abs()*0.5f; + + #ifndef GU_BV4_USE_SLABS + params->mLocalDir_PaddedAligned = localDir; + #endif + params->mOrigin_Padded = sweepOrigin; + + const Box aabb(sweepOrigin, sweepExtents, PxMat33(PxIdentity)); + prepareSweepData(aabb, localDir, maxDist, params); // PT: TODO: optimize this call for idt rotation (TA34704) + + #ifndef GU_BV4_USE_SLABS + setupRayData(params, maxDist, sweepOrigin, localDir); + #endif +#else + prepareSweepData(localBox, localDir, maxDist, params); +#endif +} + +#endif // GU_BV4_CAPSULE_SWEEP_INTERNAL_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Common.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Common.h new file mode 100644 index 00000000..2596785f --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Common.h @@ -0,0 +1,437 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Include Guard +#ifndef GU_BV4_COMMON_H +#define GU_BV4_COMMON_H + +#include "foundation/PxMat44.h" +#include "GuBox.h" +#include "GuSphere.h" +#include "GuCapsule.h" +#include "GuSIMDHelpers.h" + +#define BV4_ALIGN16(x) PX_ALIGN_PREFIX(16) x PX_ALIGN_SUFFIX(16) + +namespace physx +{ +namespace Gu +{ + enum QueryModifierFlag + { + QUERY_MODIFIER_ANY_HIT = (1<<0), + QUERY_MODIFIER_DOUBLE_SIDED = (1<<1), + QUERY_MODIFIER_MESH_BOTH_SIDES = (1<<2) + }; + + template<class ParamsT> + PX_FORCE_INLINE void setupParamsFlags(ParamsT* PX_RESTRICT params, PxU32 flags) + { + params->mBackfaceCulling = (flags & (QUERY_MODIFIER_DOUBLE_SIDED|QUERY_MODIFIER_MESH_BOTH_SIDES)) ? 0 : 1u; + params->mEarlyExit = flags & QUERY_MODIFIER_ANY_HIT; + } + + enum HitCode + { + HIT_NONE = 0, //!< No hit + HIT_CONTINUE = 1, //!< Hit found, but keep looking for closer one + HIT_EXIT = 2 //!< Hit found, you can early-exit (raycast any) + }; + + class RaycastHitInternal : public physx::shdfnd::UserAllocated + { + public: + PX_FORCE_INLINE RaycastHitInternal() {} + PX_FORCE_INLINE ~RaycastHitInternal() {} + + float mDistance; + PxU32 mTriangleID; + }; + + class SweepHit : public physx::shdfnd::UserAllocated + { + public: + PX_FORCE_INLINE SweepHit() {} + PX_FORCE_INLINE ~SweepHit() {} + + PxU32 mTriangleID; //!< Index of touched face + float mDistance; //!< Impact distance + + PxVec3 mPos; + PxVec3 mNormal; + }; + + typedef HitCode (*MeshRayCallback) (void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, float dist, float u, float v); + typedef bool (*MeshOverlapCallback) (void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* vertexIndices); + typedef bool (*MeshSweepCallback) (void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, /*const PxU32* vertexIndices,*/ float& dist); + typedef bool (*SweepUnlimitedCallback) (void* userData, const SweepHit& hit); + + template<class ParamsT> + PX_FORCE_INLINE void reportUnlimitedCallbackHit(ParamsT* PX_RESTRICT params, const SweepHit& hit) + { + // PT: we can't reuse the MeshSweepCallback here since it's designed for doing the sweep test inside the callback + // (in the user's code) rather than inside the traversal code. So we use the SweepUnlimitedCallback instead to + // report the already fully computed hit to users. + // PT: TODO: this may not be very efficient, since computing the full hit is expensive. If we use this codepath + // to implement the Epic Tweak, the resulting code will not be optimal. + (params->mCallback)(params->mUserData, hit); + + // PT: the existing traversal code already shrunk the ray. For real "sweep all" calls we must undo that by reseting the max dist. + // (params->mStabbedFace.mDistance is used in computeImpactDataX code, so we need it before that point - we can't simply avoid + // modifying this value before this point). + if(!params->mNodeSorting) + params->mStabbedFace.mDistance = params->mMaxDist; + } + + PX_FORCE_INLINE void invertPRMatrix(PxMat44* PX_RESTRICT dest, const PxMat44* PX_RESTRICT src) + { + const float m30 = src->column3.x; + const float m31 = src->column3.y; + const float m32 = src->column3.z; + + const float m00 = src->column0.x; + const float m01 = src->column0.y; + const float m02 = src->column0.z; + + dest->column0.x = m00; + dest->column1.x = m01; + dest->column2.x = m02; + dest->column3.x = -(m30*m00 + m31*m01 + m32*m02); + + const float m10 = src->column1.x; + const float m11 = src->column1.y; + const float m12 = src->column1.z; + + dest->column0.y = m10; + dest->column1.y = m11; + dest->column2.y = m12; + dest->column3.y = -(m30*m10 + m31*m11 + m32*m12); + + const float m20 = src->column2.x; + const float m21 = src->column2.y; + const float m22 = src->column2.z; + + dest->column0.z = m20; + dest->column1.z = m21; + dest->column2.z = m22; + dest->column3.z = -(m30*m20 + m31*m21 + m32*m22); + + dest->column0.w = 0.0f; + dest->column1.w = 0.0f; + dest->column2.w = 0.0f; + dest->column3.w = 1.0f; + } + + PX_FORCE_INLINE void invertBoxMatrix(PxMat33& m, PxVec3& t, const Gu::Box& box) + { + const float m30 = box.center.x; + const float m31 = box.center.y; + const float m32 = box.center.z; + + const float m00 = box.rot.column0.x; + const float m01 = box.rot.column0.y; + const float m02 = box.rot.column0.z; + + m.column0.x = m00; + m.column1.x = m01; + m.column2.x = m02; + t.x = -(m30*m00 + m31*m01 + m32*m02); + + const float m10 = box.rot.column1.x; + const float m11 = box.rot.column1.y; + const float m12 = box.rot.column1.z; + + m.column0.y = m10; + m.column1.y = m11; + m.column2.y = m12; + t.y = -(m30*m10 + m31*m11 + m32*m12); + + const float m20 = box.rot.column2.x; + const float m21 = box.rot.column2.y; + const float m22 = box.rot.column2.z; + + m.column0.z = m20; + m.column1.z = m21; + m.column2.z = m22; + t.z = -(m30*m20 + m31*m21 + m32*m22); + } + +#ifdef GU_BV4_USE_SLABS + // PT: this class moved here to make things compile with pedantic compilers. + struct BVDataSwizzled : public physx::shdfnd::UserAllocated + { + #ifdef GU_BV4_QUANTIZED_TREE + struct Data + { + PxI16 mMin; //!< Quantized min + PxI16 mMax; //!< Quantized max + }; + + Data mX[4]; + Data mY[4]; + Data mZ[4]; + #else + float mMinX[4]; + float mMinY[4]; + float mMinZ[4]; + float mMaxX[4]; + float mMaxY[4]; + float mMaxZ[4]; + #endif + PxU32 mData[4]; + + PX_FORCE_INLINE PxU32 isLeaf(PxU32 i) const { return mData[i]&1; } + PX_FORCE_INLINE PxU32 getPrimitive(PxU32 i) const { return mData[i]>>1; } + PX_FORCE_INLINE PxU32 getChildOffset(PxU32 i) const { return mData[i]>>GU_BV4_CHILD_OFFSET_SHIFT_COUNT; } + PX_FORCE_INLINE PxU32 getChildType(PxU32 i) const { return (mData[i]>>1)&3; } + PX_FORCE_INLINE PxU32 getChildData(PxU32 i) const { return mData[i]; } + PX_FORCE_INLINE PxU32 decodePNSNoShift(PxU32 i) const { return mData[i]; } + }; +#else + #define SSE_CONST4(name, val) static const __declspec(align(16)) PxU32 name[4] = { (val), (val), (val), (val) } + #define SSE_CONST(name) *(const __m128i *)&name + #define SSE_CONSTF(name) *(const __m128 *)&name +#endif + + PX_FORCE_INLINE PxU32 getNbPrimitives(PxU32& primIndex) + { + PxU32 NbToGo = (primIndex & 15)-1; + primIndex>>=4; + return NbToGo; + } + + template<class ParamsT> + PX_FORCE_INLINE void setupMeshPointersAndQuantizedCoeffs(ParamsT* PX_RESTRICT params, const SourceMesh* PX_RESTRICT mesh, const BV4Tree* PX_RESTRICT tree) + { + params->mTris32 = mesh->getTris32(); + params->mTris16 = mesh->getTris16(); + params->mVerts = mesh->getVerts(); + +#ifdef GU_BV4_QUANTIZED_TREE + V4StoreA_Safe(V4LoadU_Safe(&tree->mCenterOrMinCoeff.x), ¶ms->mCenterOrMinCoeff_PaddedAligned.x); + V4StoreA_Safe(V4LoadU_Safe(&tree->mExtentsOrMaxCoeff.x), ¶ms->mExtentsOrMaxCoeff_PaddedAligned.x); +#else + PX_UNUSED(tree); +#endif + } + + PX_FORCE_INLINE void rotateBox(Gu::Box& dst, const PxMat44& m, const Gu::Box& src) + { + // The extents remain constant + dst.extents = src.extents; + // The center gets x-formed + dst.center = m.transform(src.center); + // Combine rotations + // PT: TODO: revisit.. this is awkward... grab 3x3 part of 4x4 matrix (TA34704) + const PxMat33 tmp( PxVec3(m.column0.x, m.column0.y, m.column0.z), + PxVec3(m.column1.x, m.column1.y, m.column1.z), + PxVec3(m.column2.x, m.column2.y, m.column2.z)); + dst.rot = tmp * src.rot; + } + + PX_FORCE_INLINE PxVec3 inverseRotate(const PxMat44* PX_RESTRICT src, const PxVec3& p) + { + const float m00 = src->column0.x; + const float m01 = src->column0.y; + const float m02 = src->column0.z; + + const float m10 = src->column1.x; + const float m11 = src->column1.y; + const float m12 = src->column1.z; + + const float m20 = src->column2.x; + const float m21 = src->column2.y; + const float m22 = src->column2.z; + + return PxVec3( m00*p.x + m01*p.y + m02*p.z, + m10*p.x + m11*p.y + m12*p.z, + m20*p.x + m21*p.y + m22*p.z); + } + + PX_FORCE_INLINE PxVec3 inverseTransform(const PxMat44* PX_RESTRICT src, const PxVec3& p) + { + const float m30 = src->column3.x; + const float m31 = src->column3.y; + const float m32 = src->column3.z; + + const float m00 = src->column0.x; + const float m01 = src->column0.y; + const float m02 = src->column0.z; + + const float m10 = src->column1.x; + const float m11 = src->column1.y; + const float m12 = src->column1.z; + + const float m20 = src->column2.x; + const float m21 = src->column2.y; + const float m22 = src->column2.z; + + return PxVec3( m00*p.x + m01*p.y + m02*p.z -(m30*m00 + m31*m01 + m32*m02), + m10*p.x + m11*p.y + m12*p.z -(m30*m10 + m31*m11 + m32*m12), + m20*p.x + m21*p.y + m22*p.z -(m30*m20 + m31*m21 + m32*m22)); + } + + PX_FORCE_INLINE void computeLocalRay(PxVec3& localDir, PxVec3& localOrigin, const PxVec3& dir, const PxVec3& origin, const PxMat44* PX_RESTRICT worldm_Aligned) + { + if(worldm_Aligned) + { + localDir = inverseRotate(worldm_Aligned, dir); + localOrigin = inverseTransform(worldm_Aligned, origin); + } + else + { + localDir = dir; + localOrigin = origin; + } + } + + PX_FORCE_INLINE void computeLocalSphere(float& radius2, PxVec3& local_center, const Sphere& sphere, const PxMat44* PX_RESTRICT worldm_Aligned) + { + radius2 = sphere.radius * sphere.radius; + if(worldm_Aligned) + { + local_center = inverseTransform(worldm_Aligned, sphere.center); + } + else + { + local_center = sphere.center; + } + } + + PX_FORCE_INLINE void computeLocalCapsule(Capsule& localCapsule, const Capsule& capsule, const PxMat44* PX_RESTRICT worldm_Aligned) + { + localCapsule.radius = capsule.radius; + if(worldm_Aligned) + { + localCapsule.p0 = inverseTransform(worldm_Aligned, capsule.p0); + localCapsule.p1 = inverseTransform(worldm_Aligned, capsule.p1); + } + else + { + localCapsule.p0 = capsule.p0; + localCapsule.p1 = capsule.p1; + } + } + + PX_FORCE_INLINE void computeLocalBox(Gu::Box& dst, const Gu::Box& src, const PxMat44* PX_RESTRICT worldm_Aligned) + { + if(worldm_Aligned) + { + PxMat44 invWorldM; + invertPRMatrix(&invWorldM, worldm_Aligned); + + rotateBox(dst, invWorldM, src); + } + else + { + dst = src; // PT: TODO: check asm for operator= (TA34704) + } + } + + template<class ImpactFunctionT, class ShapeT, class ParamsT> + static PX_FORCE_INLINE bool computeImpactDataT(const ShapeT& shape, const PxVec3& dir, SweepHit* PX_RESTRICT hit, const ParamsT* PX_RESTRICT params, const PxMat44* PX_RESTRICT worldm, bool isDoubleSided, bool meshBothSides) + { + if(params->mStabbedFace.mTriangleID==PX_INVALID_U32) + return false; // We didn't touch any triangle + + if(hit) + { + const float t = params->mStabbedFace.mDistance; + hit->mTriangleID = params->mStabbedFace.mTriangleID; + hit->mDistance = t; + + if(t==0.0f) + { + hit->mPos = PxVec3(0.0f); + hit->mNormal = -dir; + } + else + { + // PT: TODO: we shouldn't compute impact in world space, and in fact moving this to local space is necessary if we want to reuse this for box-sweeps (TA34704) + TrianglePadded WP; + if(worldm) + { + WP.verts[0] = worldm->transform(params->mP0); + WP.verts[1] = worldm->transform(params->mP1); + WP.verts[2] = worldm->transform(params->mP2); + } + else + { + WP.verts[0] = params->mP0; + WP.verts[1] = params->mP1; + WP.verts[2] = params->mP2; + } + + PxVec3 impactNormal; + ImpactFunctionT::computeImpact(hit->mPos, impactNormal, shape, dir, t, WP); + + // PT: by design, returned normal is opposed to the sweep direction. + if(shouldFlipNormal(impactNormal, meshBothSides, isDoubleSided, params->mBestTriNormal, dir)) + impactNormal = -impactNormal; + + hit->mNormal = impactNormal; + } + } + return true; + } + + // PT: we don't create a structure for small meshes with just a few triangles. We use brute-force tests on these. + template<class LeafFunction_AnyT, class LeafFunction_ClosestT, class ParamsT> + static void doBruteForceTests(PxU32 nbTris, ParamsT* PX_RESTRICT params) + { + PX_ASSERT(nbTris<16); + if(params->mEarlyExit) + LeafFunction_AnyT::doLeafTest(params, nbTris); + else + LeafFunction_ClosestT::doLeafTest(params, nbTris); + } + +#if PX_INTEL_FAMILY +#ifndef GU_BV4_USE_SLABS + template<class ParamsT> + PX_FORCE_INLINE void setupRayData(ParamsT* PX_RESTRICT params, float max_dist, const PxVec3& origin, const PxVec3& dir) + { + const float Half = 0.5f*max_dist; + const FloatV HalfV = FLoad(Half); + const Vec4V DataV = V4Scale(V4LoadU(&dir.x), HalfV); + const Vec4V Data2V = V4Add(V4LoadU(&origin.x), DataV); + const PxU32 MaskI = 0x7fffffff; + const Vec4V FDirV = _mm_and_ps(_mm_load1_ps((float*)&MaskI), DataV); + V4StoreA_Safe(DataV, ¶ms->mData_PaddedAligned.x); + V4StoreA_Safe(Data2V, ¶ms->mData2_PaddedAligned.x); + V4StoreA_Safe(FDirV, ¶ms->mFDir_PaddedAligned.x); + } +#endif +#endif + +} +} + +#endif // GU_BV4_COMMON_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h new file mode 100644 index 00000000..07df2109 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h @@ -0,0 +1,265 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_INTERNAL_H +#define GU_BV4_INTERNAL_H + +#include "CmPhysXCommon.h" +#include "PsFPU.h" + + static PX_FORCE_INLINE PxU32 getChildOffset(PxU32 data) { return data>>GU_BV4_CHILD_OFFSET_SHIFT_COUNT; } + static PX_FORCE_INLINE PxU32 getChildType(PxU32 data) { return (data>>1)&3; } + + // PT: the general structure is that there is a root "process stream" function which is the entry point for the query. + // It then calls "process node" functions for each traversed node, except for the Slabs-based raycast versions that deal + // with 4 nodes at a time within the "process stream" function itself. When a leaf is found, "doLeafTest" functors + // passed to the "process stream" entry point are called. +#ifdef GU_BV4_USE_SLABS + #define processStreamNoOrder BV4_ProcessStreamSwizzledNoOrder + #define processStreamOrdered BV4_ProcessStreamSwizzledOrdered + #define processStreamRayNoOrder(a, b) BV4_ProcessStreamKajiyaNoOrder<a, b> + #define processStreamRayOrdered(a, b) BV4_ProcessStreamKajiyaOrdered<a, b> +#else + #define processStreamNoOrder BV4_ProcessStreamNoOrder + #define processStreamOrdered BV4_ProcessStreamOrdered2 + #define processStreamRayNoOrder(a, b) BV4_ProcessStreamNoOrder<b> + #define processStreamRayOrdered(a, b) BV4_ProcessStreamOrdered2<b> +#endif + +#ifndef GU_BV4_USE_SLABS +#ifdef GU_BV4_PRECOMPUTED_NODE_SORT + // PT: see http://www.codercorner.com/blog/?p=734 + + // PT: TODO: refactor with dup in bucket pruner (TA34704) + PX_FORCE_INLINE PxU32 computeDirMask(const PxVec3& dir) + { + // XYZ + // --- + // --+ + // -+- + // -++ + // +-- + // +-+ + // ++- + // +++ + + const PxU32 X = PX_IR(dir.x)>>31; + const PxU32 Y = PX_IR(dir.y)>>31; + const PxU32 Z = PX_IR(dir.z)>>31; + const PxU32 bitIndex = Z|(Y<<1)|(X<<2); + return 1u<<bitIndex; + } + + // 0 0 0 PP PN NP NN 0 1 2 3 + // 0 0 1 PP PN NN NP 0 1 3 2 + // 0 1 0 PN PP NP NN 1 0 2 3 + // 0 1 1 PN PP NN NP 1 0 3 2 + // 1 0 0 NP NN PP PN 2 3 0 1 + // 1 0 1 NN NP PP PN 3 2 0 1 + // 1 1 0 NP NN PN PP 2 3 1 0 + // 1 1 1 NN NP PN PP 3 2 1 0 + static const PxU8 order[] = { + 0,1,2,3, + 0,1,3,2, + 1,0,2,3, + 1,0,3,2, + 2,3,0,1, + 3,2,0,1, + 2,3,1,0, + 3,2,1,0, + }; + + PX_FORCE_INLINE PxU32 decodePNS(const BVDataPacked* PX_RESTRICT node, const PxU32 dirMask) + { + const PxU32 bit0 = (node[0].decodePNSNoShift() & dirMask) ? 1u : 0; + const PxU32 bit1 = (node[1].decodePNSNoShift() & dirMask) ? 1u : 0; + const PxU32 bit2 = (node[2].decodePNSNoShift() & dirMask) ? 1u : 0; //### potentially reads past the end of the stream here! + return bit2|(bit1<<1)|(bit0<<2); + } +#endif // GU_BV4_PRECOMPUTED_NODE_SORT + + #define PNS_BLOCK(i, a, b, c, d) \ + case i: \ + { \ + if(code & (1<<a)) { stack[nb++] = node[a].getChildData(); } \ + if(code & (1<<b)) { stack[nb++] = node[b].getChildData(); } \ + if(code & (1<<c)) { stack[nb++] = node[c].getChildData(); } \ + if(code & (1<<d)) { stack[nb++] = node[d].getChildData(); } \ + }break; + + #define PNS_BLOCK1(i, a, b, c, d) \ + case i: \ + { \ + stack[nb] = node[a].getChildData(); nb += (code & (1<<a))?1:0; \ + stack[nb] = node[b].getChildData(); nb += (code & (1<<b))?1:0; \ + stack[nb] = node[c].getChildData(); nb += (code & (1<<c))?1:0; \ + stack[nb] = node[d].getChildData(); nb += (code & (1<<d))?1:0; \ + }break; + + #define PNS_BLOCK2(a, b, c, d) { \ + if(code & (1<<a)) { stack[nb++] = node[a].getChildData(); } \ + if(code & (1<<b)) { stack[nb++] = node[b].getChildData(); } \ + if(code & (1<<c)) { stack[nb++] = node[c].getChildData(); } \ + if(code & (1<<d)) { stack[nb++] = node[d].getChildData(); } } \ + +#if PX_INTEL_FAMILY + template<class LeafTestT, class ParamsT> + static Ps::IntBool BV4_ProcessStreamNoOrder(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + const PxU32 nodeType = getChildType(childData); + + if(nodeType>1 && BV4_ProcessNodeNoOrder<LeafTestT, 3>(stack, nb, node, params)) + return 1; + if(nodeType>0 && BV4_ProcessNodeNoOrder<LeafTestT, 2>(stack, nb, node, params)) + return 1; + if(BV4_ProcessNodeNoOrder<LeafTestT, 1>(stack, nb, node, params)) + return 1; + if(BV4_ProcessNodeNoOrder<LeafTestT, 0>(stack, nb, node, params)) + return 1; + + }while(nb); + + return 0; + } + + template<class LeafTestT, class ParamsT> + static void BV4_ProcessStreamOrdered(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + const PxU32 dirMask = computeDirMask(params->mLocalDir)<<3; + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + + const PxU8* PX_RESTRICT ord = order + decodePNS(node, dirMask)*4; + const PxU32 limit = 2 + getChildType(childData); + + BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[0], limit); + BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[1], limit); + BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[2], limit); + BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[3], limit); + }while(Nb); + } + + // Alternative, experimental version using PNS + template<class LeafTestT, class ParamsT> + static void BV4_ProcessStreamOrdered2(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + const PxU32 X = PX_IR(params->mLocalDir_Padded.x)>>31; + const PxU32 Y = PX_IR(params->mLocalDir_Padded.y)>>31; + const PxU32 Z = PX_IR(params->mLocalDir_Padded.z)>>31; + const PxU32 bitIndex = 3+(Z|(Y<<1)|(X<<2)); + const PxU32 dirMask = 1u<<bitIndex; + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + const PxU32 nodeType = getChildType(childData); + + PxU32 code = 0; + BV4_ProcessNodeOrdered2<LeafTestT, 0>(code, node, params); + BV4_ProcessNodeOrdered2<LeafTestT, 1>(code, node, params); + if(nodeType>0) + BV4_ProcessNodeOrdered2<LeafTestT, 2>(code, node, params); + if(nodeType>1) + BV4_ProcessNodeOrdered2<LeafTestT, 3>(code, node, params); + + if(code) + { + // PT: TODO: check which implementation is best on each platform (TA34704) +#define FOURTH_TEST // Version avoids computing the PNS index, and also avoids all non-constant shifts. Full of branches though. Fastest on Win32. +#ifdef FOURTH_TEST + { + if(node[0].decodePNSNoShift() & dirMask) // Bit2 + { + if(node[1].decodePNSNoShift() & dirMask) // Bit1 + { + if(node[2].decodePNSNoShift() & dirMask) // Bit0 + PNS_BLOCK2(3,2,1,0) // 7 + else + PNS_BLOCK2(2,3,1,0) // 6 + } + else + { + if(node[2].decodePNSNoShift() & dirMask) // Bit0 + PNS_BLOCK2(3,2,0,1) // 5 + else + PNS_BLOCK2(2,3,0,1) // 4 + } + } + else + { + if(node[1].decodePNSNoShift() & dirMask) // Bit1 + { + if(node[2].decodePNSNoShift() & dirMask) // Bit0 + PNS_BLOCK2(1,0,3,2) // 3 + else + PNS_BLOCK2(1,0,2,3) // 2 + } + else + { + if(node[2].decodePNSNoShift() & dirMask) // Bit0 + PNS_BLOCK2(0,1,3,2) // 1 + else + PNS_BLOCK2(0,1,2,3) // 0 + } + } + } +#endif + } + }while(nb); + } +#endif // PX_INTEL_FAMILY +#endif // GU_BV4_USE_SLABS + +#endif // GU_BV4_INTERNAL_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_OBBSweep.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_OBBSweep.cpp new file mode 100644 index 00000000..c578b359 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_OBBSweep.cpp @@ -0,0 +1,170 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuBV4.h" +using namespace physx; +using namespace Gu; + +#if PX_INTEL_FAMILY +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; +#include "GuBV4_BoxSweep_Internal.h" + +Ps::IntBool Sweep_AABB_BV4(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags); +void GenericSweep_AABB_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, PxU32 flags); +void Sweep_AABB_BV4_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting); + +// PT: TODO: optimize this (TA34704) +static PX_FORCE_INLINE void computeLocalData(Box& localBox, PxVec3& localDir, const Box& box, const PxVec3& dir, const PxMat44* PX_RESTRICT worldm_Aligned) +{ + if(worldm_Aligned) + { + PxMat44 IWM; + invertPRMatrix(&IWM, worldm_Aligned); + + localDir = IWM.rotate(dir); + + rotateBox(localBox, IWM, box); + } + else + { + localDir = dir; + localBox = box; // PT: TODO: check asm for operator= (TA34704) + } +} + +static PX_FORCE_INLINE bool isAxisAligned(const PxVec3& axis) +{ + const PxReal minLimit = 1e-3f; + const PxReal maxLimit = 1.0f - 1e-3f; + + const PxReal absX = PxAbs(axis.x); + if(absX>minLimit && absX<maxLimit) + return false; + + const PxReal absY = PxAbs(axis.y); + if(absY>minLimit && absY<maxLimit) + return false; + + const PxReal absZ = PxAbs(axis.z); + if(absZ>minLimit && absZ<maxLimit) + return false; + + return true; +} + +static PX_FORCE_INLINE bool isAABB(const Box& box) +{ + if(!isAxisAligned(box.rot.column0)) + return false; + if(!isAxisAligned(box.rot.column1)) + return false; + if(!isAxisAligned(box.rot.column2)) + return false; + return true; +} + +Ps::IntBool BV4_BoxSweepSingle(const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepHit* PX_RESTRICT hit, PxU32 flags) +{ + Box localBox; + PxVec3 localDir; + computeLocalData(localBox, localDir, box, dir, worldm_Aligned); + + Ps::IntBool Status; + if(isAABB(localBox)) + Status = Sweep_AABB_BV4(localBox, localDir, maxDist, tree, hit, flags); + else + Status = Sweep_OBB_BV4(localBox, localDir, maxDist, tree, hit, flags); + if(Status && worldm_Aligned) + { + // Move to world space + // PT: TODO: optimize (TA34704) + hit->mPos = worldm_Aligned->transform(hit->mPos); + hit->mNormal = worldm_Aligned->rotate(hit->mNormal); + } + return Status; +} + +// PT: for design decisions in this function, refer to the comments of BV4_GenericSweepCB(). +void BV4_BoxSweepCB(const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting) +{ + Box localBox; + PxVec3 localDir; + computeLocalData(localBox, localDir, box, dir, worldm_Aligned); + + if(isAABB(localBox)) + Sweep_AABB_BV4_CB(localBox, localDir, maxDist, tree, worldm_Aligned, callback, userData, flags, nodeSorting); + else + Sweep_OBB_BV4_CB(localBox, localDir, maxDist, tree, worldm_Aligned, callback, userData, flags, nodeSorting); +} + + +// PT: this generic sweep uses an OBB because this is the most versatile volume, but it does not mean this function is +// a "box sweep function" per-se. In fact it could be used all alone to implement all sweeps in the SDK (but that would +// have an impact on performance). +// +// So the idea here is simply to provide and use a generic function for everything that the BV4 code does not support directly. +// In particular this should be used: +// - for convex sweeps (where the OBB is the box around the swept convex) +// - for non-trivial sphere/capsule/box sweeps where mesh scaling or inflation +// +// By design we don't do leaf tests inside the BV4 traversal code here (because we don't support them, e.g. convex +// sweeps. If we could do them inside the BV4 traversal code, like we do for regular sweeps, then this would not be a generic +// sweep function, but instead a built-in, natively supported query). So the leaf tests are performed outside of BV4, in the +// client code, through MeshSweepCallback. This has a direct impact on the design & parameters of MeshSweepCallback. +// +// On the other hand this is used for "regular sweeps with shapes we don't natively support", i.e. SweepSingle kind of queries. +// This means that we need to support an early-exit codepath (without node-sorting) and a regular sweep single codepath (with +// node sorting) for this generic function. The leaf tests are external, but everything traversal-related should be exactly the +// same as the regular box-sweep function otherwise. +// +// As a consequence, this function is not well-suited to implement "unlimited results" kind of queries, a.k.a. "sweep all": +// +// - for regular sphere/capsule/box "sweep all" queries, the leaf tests should be internal (same as sweep single queries). This +// means the existing MeshSweepCallback can't be reused. +// +// - there is no need to support "sweep any" (it is already supported by the other sweep functions). +// +// - there may be no need for ordered traversal/node sorting/ray shrinking, since we want to return all results anyway. But this +// may not be true if the "sweep all" function is used to emulate the Epic Tweak. In that case we still want to shrink the ray +// and use node sorting. Since both versions are useful, we should probably have a bool param to enable/disable node sorting. +// +// - we are interested in all hits so we can't delay the computation of impact data (computing it only once in the end, for the +// closest hit). We actually need to compute the data for all hits, possibly within the traversal code. +void BV4_GenericSweepCB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, bool anyHit) +{ + const PxU32 flags = anyHit ? PxU32(QUERY_MODIFIER_ANY_HIT) : 0; + + if(isAABB(localBox)) + GenericSweep_AABB_CB(localBox, localDir, maxDist, tree, callback, userData, flags); + else + GenericSweep_OBB_CB(localBox, localDir, maxDist, tree, callback, userData, flags); +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_OBBOBB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_OBBOBB.h new file mode 100644 index 00000000..9c55cd66 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_OBBOBB.h @@ -0,0 +1,73 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_PROCESS_STREAM_NOORDER_OBB_OBB_H +#define GU_BV4_PROCESS_STREAM_NOORDER_OBB_OBB_H + +#ifdef GU_BV4_USE_SLABS + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder_Swizzled(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataSwizzled* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + OPC_SLABS_GET_CE(i) + + if(BV4_BoxBoxOverlap(centerV, extentsV, params)) + { + if(node->isLeaf(i)) + { + if(LeafTestT::doLeafTest(params, node->getPrimitive(i))) + return 1; + } + else + Stack[Nb++] = node->getChildData(i); + } + return 0; + } +#else + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(BV4_BoxBoxOverlap(node+i, params)) + #else + if(BV4_BoxBoxOverlap(node[i].mAABB.mExtents, node[i].mAABB.mCenter, params)) + #endif + { + if(node[i].isLeaf()) + { + if(LeafTestT::doLeafTest(params, node[i].getPrimitive())) + return 1; + } + else + Stack[Nb++] = node[i].getChildData(); + } + return 0; + } +#endif + +#endif // GU_BV4_PROCESS_STREAM_NOORDER_OBB_OBB_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB.h new file mode 100644 index 00000000..86ea5f97 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB.h @@ -0,0 +1,55 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_H +#define GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_H + +#ifndef GU_BV4_USE_SLABS + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(BV4_SegmentAABBOverlap(node+i, params)) + #else + if(BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params)) + #endif + { + if(node[i].isLeaf()) + { + if(LeafTestT::doLeafTest(params, node[i].getPrimitive())) + return 1; + } + else + Stack[Nb++] = node[i].getChildData(); + } + return 0; + } +#endif + +#endif // GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h new file mode 100644 index 00000000..7bf3285c --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h @@ -0,0 +1,55 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_INFLATED_H +#define GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_INFLATED_H + +#ifndef GU_BV4_USE_SLABS + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(BV4_SegmentAABBOverlap(node+i, params->mOriginalExtents_Padded, params)) + #else + if(BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params->mOriginalExtents_Padded, params)) + #endif + { + if(node[i].isLeaf()) + { + if(LeafTestT::doLeafTest(params, node[i].getPrimitive())) + return 1; + } + else + Stack[Nb++] = node[i].getChildData(); + } + return 0; + } +#endif + +#endif // GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_INFLATED_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SphereAABB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SphereAABB.h new file mode 100644 index 00000000..52d1dce5 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SphereAABB.h @@ -0,0 +1,74 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_PROCESS_STREAM_NOORDER_SPHERE_AABB_H +#define GU_BV4_PROCESS_STREAM_NOORDER_SPHERE_AABB_H + +#ifdef GU_BV4_USE_SLABS + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder_Swizzled(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataSwizzled* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { +// OPC_SLABS_GET_CE(i) + OPC_SLABS_GET_CE2(i) + + if(BV4_SphereAABBOverlap(centerV, extentsV, params)) + { + if(node->isLeaf(i)) + { + if(LeafTestT::doLeafTest(params, node->getPrimitive(i))) + return 1; + } + else + Stack[Nb++] = node->getChildData(i); + } + return 0; + } +#else + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(BV4_SphereAABBOverlap(node+i, params)) + #else + if(BV4_SphereAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params)) + #endif + { + if(node[i].isLeaf()) + { + if(LeafTestT::doLeafTest(params, node[i].getPrimitive())) + return 1; + } + else + Stack[Nb++] = node[i].getChildData(); + } + return 0; + } +#endif + +#endif // GU_BV4_PROCESS_STREAM_NOORDER_SPHERE_AABB_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_OBBOBB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_OBBOBB.h new file mode 100644 index 00000000..24dd9757 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_OBBOBB.h @@ -0,0 +1,81 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_PROCESS_STREAM_ORDERED_OBB_OBB_H +#define GU_BV4_PROCESS_STREAM_ORDERED_OBB_OBB_H + +#ifdef GU_BV4_USE_SLABS + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE void BV4_ProcessNodeOrdered2_Swizzled(PxU32& code, const BVDataSwizzled* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + OPC_SLABS_GET_CE(i) + + if(BV4_BoxBoxOverlap(centerV, extentsV, params)) + { + if(node->isLeaf(i)) + LeafTestT::doLeafTest(params, node->getPrimitive(i)); + else + code |= 1<<i; + } + } +#else + template<class LeafTestT, class ParamsT> + PX_FORCE_INLINE void BV4_ProcessNodeOrdered(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params, PxU32 i, PxU32 limit) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(i<limit && BV4_BoxBoxOverlap(node+i, params)) + #else + if(i<limit && BV4_BoxBoxOverlap(node[i].mAABB.mExtents, node[i].mAABB.mCenter, params)) + #endif + { + if(node[i].isLeaf()) + LeafTestT::doLeafTest(params, node[i].getPrimitive()); + else + Stack[Nb++] = node[i].getChildData(); + } + } + + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE void BV4_ProcessNodeOrdered2(PxU32& code, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(BV4_BoxBoxOverlap(node+i, params)) + #else + if(BV4_BoxBoxOverlap(node[i].mAABB.mExtents, node[i].mAABB.mCenter, params)) + #endif + { + if(node[i].isLeaf()) + LeafTestT::doLeafTest(params, node[i].getPrimitive()); + else + code |= 1<<i; + } + } +#endif + +#endif // GU_BV4_PROCESS_STREAM_ORDERED_OBB_OBB_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB.h new file mode 100644 index 00000000..b3b3e90a --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB.h @@ -0,0 +1,67 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_H +#define GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_H + +#ifndef GU_BV4_USE_SLABS + template<class LeafTestT, class ParamsT> + PX_FORCE_INLINE void BV4_ProcessNodeOrdered(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params, PxU32 i, PxU32 limit) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(i<limit && BV4_SegmentAABBOverlap(node+i, params)) + #else + if(i<limit && BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params)) + #endif + { + if(node[i].isLeaf()) + LeafTestT::doLeafTest(params, node[i].getPrimitive()); + else + Stack[Nb++] = node[i].getChildData(); + } + } + + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE void BV4_ProcessNodeOrdered2(PxU32& code, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(BV4_SegmentAABBOverlap(node+i, params)) + #else + if(BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params)) + #endif + { + if(node[i].isLeaf()) + LeafTestT::doLeafTest(params, node[i].getPrimitive()); + else + code |= 1<<i; + } + } +#endif + +#endif // GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h new file mode 100644 index 00000000..9e4f8ed1 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h @@ -0,0 +1,67 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_INFLATED_H +#define GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_INFLATED_H + +#ifndef GU_BV4_USE_SLABS + template<class LeafTestT, class ParamsT> + PX_FORCE_INLINE void BV4_ProcessNodeOrdered(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params, PxU32 i, PxU32 limit) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(i<limit && BV4_SegmentAABBOverlap(node+i, params->mOriginalExtents, params)) + #else + if(i<limit && BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params->mOriginalExtents, params)) + #endif + { + if(node[i].isLeaf()) + LeafTestT::doLeafTest(params, node[i].getPrimitive()); + else + Stack[Nb++] = node[i].getChildData(); + } + } + + template<class LeafTestT, int i, class ParamsT> + PX_FORCE_INLINE void BV4_ProcessNodeOrdered2(PxU32& code, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params) + { + #ifdef GU_BV4_QUANTIZED_TREE + if(BV4_SegmentAABBOverlap(node+i, params->mOriginalExtents_Padded, params)) + #else + if(BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params->mOriginalExtents_Padded, params)) + #endif + { + if(node[i].isLeaf()) + LeafTestT::doLeafTest(params, node[i].getPrimitive()); + else + code |= 1<<i; + } + } +#endif + +#endif // GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_INFLATED_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Raycast.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Raycast.cpp new file mode 100644 index 00000000..83becacc --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Raycast.cpp @@ -0,0 +1,625 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuBV4.h" +using namespace physx; +using namespace Gu; + +#if PX_INTEL_FAMILY + +#include "PxQueryReport.h" +#include "GuInternal.h" + +#include "GuIntersectionRayTriangle.h" + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#include "GuBV4_Common.h" + +class RaycastHitInternalUV : public RaycastHitInternal +{ + public: + PX_FORCE_INLINE RaycastHitInternalUV() {} + PX_FORCE_INLINE ~RaycastHitInternalUV() {} + + float mU, mV; +}; + +template<class T> +PX_FORCE_INLINE Ps::IntBool RayTriOverlapT(PxRaycastHit& mStabbedFace, const PxVec3& vert0, const PxVec3& vert1, const PxVec3& vert2, const T* PX_RESTRICT params) +{ + // Find vectors for two edges sharing vert0 + const PxVec3 edge1 = vert1 - vert0; + const PxVec3 edge2 = vert2 - vert0; + + // Begin calculating determinant - also used to calculate U parameter + const PxVec3 pvec = params->mLocalDir_Padded.cross(edge2); + + // If determinant is near zero, ray lies in plane of triangle + const float det = edge1.dot(pvec); + + if(params->mBackfaceCulling) + { + if(det<GU_CULLING_EPSILON_RAY_TRIANGLE) + return 0; + + // Calculate distance from vert0 to ray origin + const PxVec3 tvec = params->mOrigin_Padded - vert0; + + // Calculate U parameter and test bounds + const float u = tvec.dot(pvec); + + const PxReal enlargeCoeff = params->mGeomEpsilon*det; + const PxReal uvlimit = -enlargeCoeff; + const PxReal uvlimit2 = det + enlargeCoeff; + + if(u < uvlimit || u > uvlimit2) + return 0; + + // Prepare to test V parameter + const PxVec3 qvec = tvec.cross(edge1); + + // Calculate V parameter and test bounds + const float v = params->mLocalDir_Padded.dot(qvec); + if(v < uvlimit || (u + v) > uvlimit2) + return 0; + + // Calculate t, scale parameters, ray intersects triangle + const float d = edge2.dot(qvec); + // Det > 0 so we can early exit here + // Intersection point is valid if distance is positive (else it can just be a face behind the orig point) + if(d<0.0f) + return 0; + + // Else go on + const float OneOverDet = 1.0f / det; + mStabbedFace.distance = d * OneOverDet; + mStabbedFace.u = u * OneOverDet; + mStabbedFace.v = v * OneOverDet; + } + else + { + if(PxAbs(det)<GU_CULLING_EPSILON_RAY_TRIANGLE) + return 0; + + const float OneOverDet = 1.0f / det; + + const PxVec3 tvec = params->mOrigin_Padded - vert0; + + const float u = tvec.dot(pvec) * OneOverDet; + if(u<-params->mGeomEpsilon || u>1.0f+params->mGeomEpsilon) + return 0; + + // prepare to test V parameter + const PxVec3 qvec = tvec.cross(edge1); + + // Calculate V parameter and test bounds + const float v = params->mLocalDir_Padded.dot(qvec) * OneOverDet; + if(v < -params->mGeomEpsilon || (u + v) > 1.0f + params->mGeomEpsilon) + return 0; + + // Calculate t, ray intersects triangle + const float d = edge2.dot(qvec) * OneOverDet; + // Intersection point is valid if distance is positive (else it can just be a face behind the orig point) + if(d<0.0f) + return 0; + mStabbedFace.distance = d; + mStabbedFace.u = u; + mStabbedFace.v = v; + } + return 1; +} + +#if PX_VC +#pragma warning ( disable : 4324 ) +#endif + +struct RayParams +{ +#ifdef GU_BV4_QUANTIZED_TREE + BV4_ALIGN16(Vec3p mCenterOrMinCoeff_PaddedAligned); + BV4_ALIGN16(Vec3p mExtentsOrMaxCoeff_PaddedAligned); +#endif +// Organized in the order they are accessed +#ifndef GU_BV4_USE_SLABS + BV4_ALIGN16(Vec3p mData2_PaddedAligned); + BV4_ALIGN16(Vec3p mFDir_PaddedAligned); + BV4_ALIGN16(Vec3p mData_PaddedAligned); +#endif + const IndTri32* PX_RESTRICT mTris32; + const IndTri16* PX_RESTRICT mTris16; + const PxVec3* PX_RESTRICT mVerts; + PxVec3 mLocalDir_Padded; + PxVec3 mOrigin_Padded; + + float mGeomEpsilon; + PxU32 mBackfaceCulling; + + RaycastHitInternalUV mStabbedFace; + PxU32 mEarlyExit; + + PxVec3 mOriginalExtents_Padded; // Added to please the slabs code + + BV4_ALIGN16(Vec3p mP0_PaddedAligned); + BV4_ALIGN16(Vec3p mP1_PaddedAligned); + BV4_ALIGN16(Vec3p mP2_PaddedAligned); +}; + +/////////////////////////////////////////////////////////////////////////////// + +static PX_FORCE_INLINE void updateParamsAfterImpact(RayParams* PX_RESTRICT params, PxU32 primIndex, PxU32 VRef0, PxU32 VRef1, PxU32 VRef2, const PxRaycastHit& StabbedFace) +{ + V4StoreA_Safe(V4LoadU_Safe(¶ms->mVerts[VRef0].x), ¶ms->mP0_PaddedAligned.x); + V4StoreA_Safe(V4LoadU_Safe(¶ms->mVerts[VRef1].x), ¶ms->mP1_PaddedAligned.x); + V4StoreA_Safe(V4LoadU_Safe(¶ms->mVerts[VRef2].x), ¶ms->mP2_PaddedAligned.x); + + params->mStabbedFace.mTriangleID = primIndex; + params->mStabbedFace.mDistance = StabbedFace.distance; + params->mStabbedFace.mU = StabbedFace.u; + params->mStabbedFace.mV = StabbedFace.v; +} + +namespace +{ +class LeafFunction_RaycastClosest +{ +public: + static /*PX_FORCE_INLINE*/ Ps::IntBool doLeafTest(RayParams* PX_RESTRICT params, PxU32 primIndex) + { + PX_ALIGN_PREFIX(16) char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16); + PxRaycastHit& StabbedFace = reinterpret_cast<PxRaycastHit&>(buffer); + + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + if(RayTriOverlapT<RayParams>(StabbedFace, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params)) + { + if(StabbedFace.distance<params->mStabbedFace.mDistance) //### just for a corner case UT in PhysX :( + { + updateParamsAfterImpact(params, primIndex, VRef0, VRef1, VRef2, StabbedFace); + +#ifndef GU_BV4_USE_SLABS + setupRayData(params, StabbedFace.distance, params->mOrigin_Padded, params->mLocalDir_Padded); +#endif + } + } + + primIndex++; + }while(nbToGo--); + + return 0; + } +}; + +class LeafFunction_RaycastAny +{ +public: + static /*PX_FORCE_INLINE*/ Ps::IntBool doLeafTest(RayParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + PX_ALIGN_PREFIX(16) char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16); + PxRaycastHit& StabbedFace = reinterpret_cast<PxRaycastHit&>(buffer); + if(RayTriOverlapT<RayParams>(StabbedFace, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params)) + { + if(StabbedFace.distance<params->mStabbedFace.mDistance) //### just for a corner case UT in PhysX :( + { + updateParamsAfterImpact(params, primIndex, VRef0, VRef1, VRef2, StabbedFace); + return 1; + } + } + + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +static PX_FORCE_INLINE Vec4V multiply3x3V_Aligned(const Vec4V p, const PxMat44* PX_RESTRICT mat) +{ + const FloatV xxxV = V4GetX(p); + const FloatV yyyV = V4GetY(p); + const FloatV zzzV = V4GetZ(p); + + Vec4V ResV = V4Scale(V4LoadA(&mat->column0.x), xxxV); + ResV = V4Add(ResV, V4Scale(V4LoadA(&mat->column1.x), yyyV)); + ResV = V4Add(ResV, V4Scale(V4LoadA(&mat->column2.x), zzzV)); + return ResV; +} + +static PX_FORCE_INLINE Ps::IntBool computeImpactData(PxRaycastHit* PX_RESTRICT hit, const RayParams* PX_RESTRICT params, const PxMat44* PX_RESTRICT worldm_Aligned, PxHitFlags /*hitFlags*/) +{ + if(params->mStabbedFace.mTriangleID!=PX_INVALID_U32 /*&& !params->mEarlyExit*/) //### PhysX needs the raycast data even for "any hit" :( + { + const float u = params->mStabbedFace.mU; + const float v = params->mStabbedFace.mV; + const float d = params->mStabbedFace.mDistance; + const PxU32 id = params->mStabbedFace.mTriangleID; + hit->u = u; + hit->v = v; + hit->distance = d; + hit->faceIndex = id; + + { + const Vec4V P0V = V4LoadA_Safe(¶ms->mP0_PaddedAligned.x); + const Vec4V P1V = V4LoadA_Safe(¶ms->mP1_PaddedAligned.x); + const Vec4V P2V = V4LoadA_Safe(¶ms->mP2_PaddedAligned.x); + + const FloatV uV = FLoad(params->mStabbedFace.mU); + const FloatV vV = FLoad(params->mStabbedFace.mV); + const float w = 1.0f - params->mStabbedFace.mU - params->mStabbedFace.mV; + const FloatV wV = FLoad(w); + //pt = (1.0f - u - v)*p0 + u*p1 + v*p2; + Vec4V LocalPtV = V4Scale(P1V, uV); + LocalPtV = V4Add(LocalPtV, V4Scale(P2V, vV)); + LocalPtV = V4Add(LocalPtV, V4Scale(P0V, wV)); + + const Vec4V LocalNormalV = V4Cross(V4Sub(P0V, P1V), V4Sub(P0V, P2V)); + + BV4_ALIGN16(Vec3p tmp_PaddedAligned); + if(worldm_Aligned) + { + const Vec4V TransV = V4LoadA(&worldm_Aligned->column3.x); + V4StoreU_Safe(V4Add(multiply3x3V_Aligned(LocalPtV, worldm_Aligned), TransV), &hit->position.x); + V4StoreA_Safe(multiply3x3V_Aligned(LocalNormalV, worldm_Aligned), &tmp_PaddedAligned.x); + } + else + { + V4StoreU_Safe(LocalPtV, &hit->position.x); + V4StoreA_Safe(LocalNormalV, &tmp_PaddedAligned.x); + } + tmp_PaddedAligned.normalize(); + hit->normal = tmp_PaddedAligned; // PT: TODO: check asm here (TA34704) + } + } + return params->mStabbedFace.mTriangleID!=PX_INVALID_U32; +} + +static PX_FORCE_INLINE float clipRay(const PxVec3& ray_orig, const PxVec3& ray_dir, const LocalBounds& local_bounds) +{ + const float dpc = local_bounds.mCenter.dot(ray_dir); + const float dpMin = dpc - local_bounds.mExtentsMagnitude; + const float dpMax = dpc + local_bounds.mExtentsMagnitude; + const float dpO = ray_orig.dot(ray_dir); + const float boxLength = local_bounds.mExtentsMagnitude * 2.0f; + const float distToBox = PxMin(fabsf(dpMin - dpO), fabsf(dpMax - dpO)); + return distToBox + boxLength * 2.0f; +} + +template<class ParamsT> +static PX_FORCE_INLINE void setupRayParams(ParamsT* PX_RESTRICT params, const PxVec3& origin, const PxVec3& dir, const BV4Tree* PX_RESTRICT tree, const PxMat44* PX_RESTRICT world, const SourceMesh* PX_RESTRICT mesh, float maxDist, float geomEpsilon, PxU32 flags) +{ + params->mGeomEpsilon = geomEpsilon; + setupParamsFlags(params, flags); + + computeLocalRay(params->mLocalDir_Padded, params->mOrigin_Padded, dir, origin, world); + + // PT: TODO: clipRay may not be needed with GU_BV4_USE_SLABS (TA34704) + const float MaxDist = clipRay(params->mOrigin_Padded, params->mLocalDir_Padded, tree->mLocalBounds); + maxDist = PxMin(maxDist, MaxDist); + params->mStabbedFace.mDistance = maxDist; + params->mStabbedFace.mTriangleID = PX_INVALID_U32; + + setupMeshPointersAndQuantizedCoeffs(params, mesh, tree); + +#ifndef GU_BV4_USE_SLABS + setupRayData(params, maxDist, params->mOrigin_Padded, params->mLocalDir_Padded); +#endif +} + +#include "GuBV4_Internal.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs.h" +#endif +#include "GuBV4_ProcessStreamOrdered_SegmentAABB.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs_KajiyaNoOrder.h" + #include "GuBV4_Slabs_KajiyaOrdered.h" +#endif + +#ifndef GU_BV4_USE_SLABS +#ifdef GU_BV4_QUANTIZED_TREE + +#define NEW_VERSION + +static PX_FORCE_INLINE /*PX_NOINLINE*/ Ps::IntBool BV4_SegmentAABBOverlap(const BVDataPacked* PX_RESTRICT node, const RayParams* PX_RESTRICT params) +{ +#ifdef NEW_VERSION + SSE_CONST4(maskV, 0x7fffffff); + SSE_CONST4(maskQV, 0x0000ffff); +#else + const PxU32 maskI = 0x7fffffff; +#endif + + Vec4V centerV = V4LoadA((float*)node->mAABB.mData); +#ifdef NEW_VERSION + __m128 extentsV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(centerV), SSE_CONST(maskQV))); +#else + __m128 extentsV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(centerV), _mm_set1_epi32(0x0000ffff))); +#endif + extentsV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(extentsV)), V4LoadA_Safe(¶ms->mExtentsOrMaxCoeff_PaddedAligned.x)); + centerV = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(centerV), 16)); + centerV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(centerV)), V4LoadA_Safe(¶ms->mCenterOrMinCoeff_PaddedAligned.x)); + + const Vec4V fdirV = V4LoadA_Safe(¶ms->mFDir_PaddedAligned.x); + const Vec4V DV = V4Sub(V4LoadA_Safe(¶ms->mData2_PaddedAligned.x), centerV); + +#ifdef NEW_VERSION + __m128 absDV = _mm_and_ps(DV, SSE_CONSTF(maskV)); +#else + __m128 absDV = _mm_and_ps(DV, _mm_load1_ps((float*)&maskI)); +#endif + + absDV = V4Sub(V4Add(extentsV, fdirV), absDV); + const PxU32 test = (PxU32)_mm_movemask_ps(absDV); + if(test&7) + return 0; + + if(1) + { + const Vec4V dataZYX_V = V4LoadA_Safe(¶ms->mData_PaddedAligned.x); + const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1))); + const Vec4V fV = V4Sub(V4Mul(dataZYX_V, DXZY_V), V4Mul(dataXZY_V, DV)); + + const Vec4V fdirZYX_V = V4LoadA_Safe(¶ms->mFDir_PaddedAligned.x); + const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1))); + // PT: TODO: use V4MulAdd here (TA34704) + const Vec4V fg = V4Add(V4Mul(extentsV, fdirXZY_V), V4Mul(extentsXZY_V, fdirZYX_V)); + +#ifdef NEW_VERSION + __m128 absfV = _mm_and_ps(fV, SSE_CONSTF(maskV)); +#else + __m128 absfV = _mm_and_ps(fV, _mm_load1_ps((float*)&maskI)); +#endif + absfV = V4Sub(fg, absfV); + const PxU32 test2 = (PxU32)_mm_movemask_ps(absfV); + + if(test2&7) + return 0; + return 1; + } +} +#else +static PX_FORCE_INLINE /*PX_NOINLINE*/ Ps::IntBool BV4_SegmentAABBOverlap(const PxVec3& center, const PxVec3& extents, const RayParams* PX_RESTRICT params) +{ + const PxU32 maskI = 0x7fffffff; + + const Vec4V fdirV = V4LoadA_Safe(¶ms->mFDir_PaddedAligned.x); + const Vec4V extentsV = V4LoadU(&extents.x); + + const Vec4V DV = V4Sub(V4LoadA_Safe(¶ms->mData2_PaddedAligned.x), V4LoadU(¢er.x)); //###center should be aligned + + __m128 absDV = _mm_and_ps(DV, _mm_load1_ps((float*)&maskI)); + + absDV = _mm_cmpgt_ps(absDV, V4Add(extentsV, fdirV)); + const PxU32 test = (PxU32)_mm_movemask_ps(absDV); + if(test&7) + return 0; + + if(1) + { + const Vec4V dataZYX_V = V4LoadA_Safe(¶ms->mData_PaddedAligned.x); + const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1))); + const Vec4V fV = V4Sub(V4Mul(dataZYX_V, DXZY_V), V4Mul(dataXZY_V, DV)); + + const Vec4V fdirZYX_V = V4LoadA_Safe(¶ms->mFDir_PaddedAligned.x); + const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1))); + // PT: TODO: use V4MulAdd here (TA34704) + const Vec4V fg = V4Add(V4Mul(extentsV, fdirXZY_V), V4Mul(extentsXZY_V, fdirZYX_V)); + + __m128 absfV = _mm_and_ps(fV, _mm_load1_ps((float*)&maskI)); + absfV = _mm_cmpgt_ps(absfV, fg); + const PxU32 test2 = (PxU32)_mm_movemask_ps(absfV); + if(test2&7) + return 0; + return 1; + } +} +#endif +#endif + +Ps::IntBool BV4_RaycastSingle(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxRaycastHit* PX_RESTRICT hit, float maxDist, float geomEpsilon, PxU32 flags, PxHitFlags hitFlags) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + RayParams Params; + setupRayParams(&Params, origin, dir, &tree, worldm_Aligned, mesh, maxDist, geomEpsilon, flags); + + if(tree.mNodes) + { + if(Params.mEarlyExit) + processStreamRayNoOrder(0, LeafFunction_RaycastAny)(tree.mNodes, tree.mInitData, &Params); + else + processStreamRayOrdered(0, LeafFunction_RaycastClosest)(tree.mNodes, tree.mInitData, &Params); + } + else + doBruteForceTests<LeafFunction_RaycastAny, LeafFunction_RaycastClosest>(mesh->getNbTriangles(), &Params); + + return computeImpactData(hit, &Params, worldm_Aligned, hitFlags); +} + + + +// Callback-based version + +namespace +{ + +struct RayParamsCB : RayParams +{ + MeshRayCallback mCallback; + void* mUserData; +}; + +class LeafFunction_RaycastCB +{ +public: + static Ps::IntBool doLeafTest(RayParamsCB* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + const PxVec3& p0 = params->mVerts[VRef0]; + const PxVec3& p1 = params->mVerts[VRef1]; + const PxVec3& p2 = params->mVerts[VRef2]; + + PX_ALIGN_PREFIX(16) char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16); + PxRaycastHit& StabbedFace = reinterpret_cast<PxRaycastHit&>(buffer); + if(RayTriOverlapT<RayParams>(StabbedFace, p0, p1, p2, params)) + { + HitCode Code = (params->mCallback)(params->mUserData, p0, p1, p2, primIndex, StabbedFace.distance, StabbedFace.u, StabbedFace.v); + if(Code==HIT_EXIT) + return 1; + + // PT: TODO: no shrinking here? (TA34704) + } + + primIndex++; + }while(nbToGo--); + + return 0; + } +}; + +} + +#include "GuBV4_ProcessStreamNoOrder_SegmentAABB.h" + +void BV4_RaycastCB(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, float maxDist, float geomEpsilon, PxU32 flags, MeshRayCallback callback, void* userData) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + //### beware, some parameters in the struct aren't used + RayParamsCB Params; + Params.mCallback = callback; + Params.mUserData = userData; + setupRayParams(&Params, origin, dir, &tree, worldm_Aligned, mesh, maxDist, geomEpsilon, flags); + + if(tree.mNodes) + { + processStreamRayNoOrder(0, LeafFunction_RaycastCB)(tree.mNodes, tree.mInitData, &Params); + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); +// if(Params.mEarlyExit) +// LeafFunction_BoxSweepAnyCB::doLeafTest(&Params, nbTris); +// else + LeafFunction_RaycastCB::doLeafTest(&Params, nbTris); + } +} + +// Raycast all + +namespace +{ +struct RayParamsAll : RayParams +{ + PxU32 mNbHits; + PxU32 mMaxNbHits; + PxRaycastHit* mHits; + const PxMat44* mWorld_Aligned; + PxHitFlags mHitFlags; +}; + +class LeafFunction_RaycastAll +{ +public: + static /*PX_FORCE_INLINE*/ Ps::IntBool doLeafTest(RayParams* PX_RESTRICT p, PxU32 primIndex) + { + RayParamsAll* params = static_cast<RayParamsAll*>(p); + + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + PxRaycastHit& StabbedFace = params->mHits[params->mNbHits]; + if(RayTriOverlapT<RayParams>(StabbedFace, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params)) + { + updateParamsAfterImpact(params, primIndex, VRef0, VRef1, VRef2, StabbedFace); + + computeImpactData(&StabbedFace, params, params->mWorld_Aligned, params->mHitFlags); + + params->mNbHits++; + if(params->mNbHits==params->mMaxNbHits) + return 1; + } + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +// PT: this function is not used yet, but eventually it should be +PxU32 BV4_RaycastAll(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxRaycastHit* PX_RESTRICT hits, PxU32 maxNbHits, float maxDist, float geomEpsilon, PxU32 flags, PxHitFlags hitFlags) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + RayParamsAll Params; + Params.mNbHits = 0; + Params.mMaxNbHits = maxNbHits; + Params.mHits = hits; + Params.mWorld_Aligned = worldm_Aligned; + Params.mHitFlags = hitFlags; + setupRayParams(&Params, origin, dir, &tree, worldm_Aligned, mesh, maxDist, geomEpsilon, flags); + + if(tree.mNodes) + { + processStreamRayNoOrder(0, LeafFunction_RaycastAll)(tree.mNodes, tree.mInitData, &Params); + } + else + { + PX_ASSERT(0); + } + return Params.mNbHits; +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs.h new file mode 100644 index 00000000..a371ea93 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs.h @@ -0,0 +1,206 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_SLABS_H +#define GU_BV4_SLABS_H + +#include "PsFPU.h" +#include "GuBV4_Common.h" + +#ifdef GU_BV4_USE_SLABS + + // PT: contains code for tree-traversal using the swizzled format. + // PT: ray traversal based on Kay & Kajiya's slab intersection code, but using SIMD to do 4 ray-vs-AABB tests at a time. + // PT: other (ordered or unordered) traversals just process one node at a time, similar to the non-swizzled format. + + #define BV4_SLABS_FIX + #define BV4_SLABS_SORT + + #define PNS_BLOCK3(a, b, c, d) { \ + if(code2 & (1<<a)) { stack[nb++] = tn->getChildData(a); } \ + if(code2 & (1<<b)) { stack[nb++] = tn->getChildData(b); } \ + if(code2 & (1<<c)) { stack[nb++] = tn->getChildData(c); } \ + if(code2 & (1<<d)) { stack[nb++] = tn->getChildData(d); } } \ + + #ifdef GU_BV4_QUANTIZED_TREE + #define OPC_SLABS_GET_MIN_MAX(i) \ + const __m128i minVi = _mm_set_epi32(0, node->mZ[i].mMin, node->mY[i].mMin, node->mX[i].mMin); \ + const Vec4V minCoeffV = V4LoadA_Safe(¶ms->mCenterOrMinCoeff_PaddedAligned.x); \ + Vec4V minV = V4Mul(_mm_cvtepi32_ps(minVi), minCoeffV); \ + const __m128i maxVi = _mm_set_epi32(0, node->mZ[i].mMax, node->mY[i].mMax, node->mX[i].mMax); \ + const Vec4V maxCoeffV = V4LoadA_Safe(¶ms->mExtentsOrMaxCoeff_PaddedAligned.x); \ + Vec4V maxV = V4Mul(_mm_cvtepi32_ps(maxVi), maxCoeffV); \ + + #define OPC_SLABS_GET_CE(i) \ + OPC_SLABS_GET_MIN_MAX(i) \ + const FloatV HalfV = FLoad(0.5f); \ + const Vec4V centerV = V4Scale(V4Add(maxV, minV), HalfV); \ + const Vec4V extentsV = V4Scale(V4Sub(maxV, minV), HalfV); + + #define OPC_SLABS_GET_CE2(i) \ + OPC_SLABS_GET_MIN_MAX(i) \ + const Vec4V centerV = V4Add(maxV, minV); \ + const Vec4V extentsV = V4Sub(maxV, minV); + #else + #define OPC_SLABS_GET_CE(i) \ + const FloatV HalfV = FLoad(0.5f); \ + const Vec4V minV = _mm_set_ps(0.0f, node->mMinZ[i], node->mMinY[i], node->mMinX[i]); \ + const Vec4V maxV = _mm_set_ps(0.0f, node->mMaxZ[i], node->mMaxY[i], node->mMaxX[i]); \ + const Vec4V centerV = V4Scale(V4Add(maxV, minV), HalfV); \ + const Vec4V extentsV = V4Scale(V4Sub(maxV, minV), HalfV); + + #define OPC_SLABS_GET_CE2(i) \ + const Vec4V minV = _mm_set_ps(0.0f, node->mMinZ[i], node->mMinY[i], node->mMinX[i]); \ + const Vec4V maxV = _mm_set_ps(0.0f, node->mMaxZ[i], node->mMaxY[i], node->mMaxX[i]); \ + const Vec4V centerV = V4Add(maxV, minV); \ + const Vec4V extentsV = V4Sub(maxV, minV); + #endif // GU_BV4_QUANTIZED_TREE + +#if PX_PS4 + // PT: TODO: for some reason using the intrinsics directly produces a compile error on PS4. TODO: find a better fix. + PX_FORCE_INLINE __m128i my_mm_srai_epi32(__m128i a, int count) + { + return _mm_srai_epi32(a, count); + } + + PX_FORCE_INLINE __m128i my_mm_slli_epi32(__m128i a, int count) + { + return _mm_slli_epi32(a, count); + } +#else + #define my_mm_srai_epi32 _mm_srai_epi32 + #define my_mm_slli_epi32 _mm_slli_epi32 +#endif + +#define OPC_DEQ4(part2xV, part1xV, mMember, minCoeff, maxCoeff) \ +{ \ + part2xV = V4LoadA(reinterpret_cast<const float*>(tn->mMember)); \ + part1xV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(part2xV), _mm_set1_epi32(0x0000ffff))); \ + part1xV = _mm_castsi128_ps(my_mm_srai_epi32(my_mm_slli_epi32(_mm_castps_si128(part1xV), 16), 16)); \ + part1xV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(part1xV)), minCoeff); \ + part2xV = _mm_castsi128_ps(my_mm_srai_epi32(_mm_castps_si128(part2xV), 16)); \ + part2xV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(part2xV)), maxCoeff); \ +} + +#define SLABS_INIT\ + Vec4V maxT4 = V4Load(params->mStabbedFace.mDistance);\ + const Vec4V rayP = V4LoadU_Safe(¶ms->mOrigin_Padded.x);\ + Vec4V rayD = V4LoadU_Safe(¶ms->mLocalDir_Padded.x);\ + const VecU32V raySign = V4U32and(VecU32V_ReinterpretFrom_Vec4V(rayD), signMask);\ + const Vec4V rayDAbs = V4Abs(rayD);\ + Vec4V rayInvD = Vec4V_ReinterpretFrom_VecU32V(V4U32or(raySign, VecU32V_ReinterpretFrom_Vec4V(V4Max(rayDAbs, epsFloat4))));\ + rayD = rayInvD;\ + rayInvD = V4RecipFast(rayInvD);\ + rayInvD = V4Mul(rayInvD, V4NegMulSub(rayD, rayInvD, twos));\ + const Vec4V rayPinvD = V4NegMulSub(rayInvD, rayP, zeroes);\ + const Vec4V rayInvDsplatX = V4SplatElement<0>(rayInvD);\ + const Vec4V rayInvDsplatY = V4SplatElement<1>(rayInvD);\ + const Vec4V rayInvDsplatZ = V4SplatElement<2>(rayInvD);\ + const Vec4V rayPinvDsplatX = V4SplatElement<0>(rayPinvD);\ + const Vec4V rayPinvDsplatY = V4SplatElement<1>(rayPinvD);\ + const Vec4V rayPinvDsplatZ = V4SplatElement<2>(rayPinvD); + +#define SLABS_TEST\ + const Vec4V tminxa0 = V4MulAdd(minx4a, rayInvDsplatX, rayPinvDsplatX);\ + const Vec4V tminya0 = V4MulAdd(miny4a, rayInvDsplatY, rayPinvDsplatY);\ + const Vec4V tminza0 = V4MulAdd(minz4a, rayInvDsplatZ, rayPinvDsplatZ);\ + const Vec4V tmaxxa0 = V4MulAdd(maxx4a, rayInvDsplatX, rayPinvDsplatX);\ + const Vec4V tmaxya0 = V4MulAdd(maxy4a, rayInvDsplatY, rayPinvDsplatY);\ + const Vec4V tmaxza0 = V4MulAdd(maxz4a, rayInvDsplatZ, rayPinvDsplatZ);\ + const Vec4V tminxa = V4Min(tminxa0, tmaxxa0);\ + const Vec4V tmaxxa = V4Max(tminxa0, tmaxxa0);\ + const Vec4V tminya = V4Min(tminya0, tmaxya0);\ + const Vec4V tmaxya = V4Max(tminya0, tmaxya0);\ + const Vec4V tminza = V4Min(tminza0, tmaxza0);\ + const Vec4V tmaxza = V4Max(tminza0, tmaxza0);\ + const Vec4V maxOfNeasa = V4Max(V4Max(tminxa, tminya), tminza);\ + const Vec4V minOfFarsa = V4Min(V4Min(tmaxxa, tmaxya), tmaxza);\ + + #define SLABS_TEST2\ + __m128 ignore4a = _mm_cmpgt_ps(epsFloat4, minOfFarsa); /* if tfar is negative, ignore since its a ray, not a line */\ + ignore4a = _mm_or_ps(ignore4a, _mm_cmpgt_ps(maxOfNeasa, maxT4)); /* if tnear is over maxT, ignore this result */\ + __m128 resa4 = _mm_cmpgt_ps(maxOfNeasa, minOfFarsa); /* if 1 => fail */\ + resa4 = _mm_or_ps(resa4, ignore4a);\ + const int code = _mm_movemask_ps(resa4);\ + if(code==15)\ + continue; + +#define SLABS_PNS \ + if(code2) \ + { \ + if(tn->decodePNSNoShift(0) & dirMask) \ + { \ + if(tn->decodePNSNoShift(1) & dirMask) \ + { \ + if(tn->decodePNSNoShift(2) & dirMask) \ + PNS_BLOCK3(3,2,1,0) \ + else \ + PNS_BLOCK3(2,3,1,0) \ + } \ + else \ + { \ + if(tn->decodePNSNoShift(2) & dirMask) \ + PNS_BLOCK3(3,2,0,1) \ + else \ + PNS_BLOCK3(2,3,0,1) \ + } \ + } \ + else \ + { \ + if(tn->decodePNSNoShift(1) & dirMask) \ + { \ + if(tn->decodePNSNoShift(2) & dirMask) \ + PNS_BLOCK3(1,0,3,2) \ + else \ + PNS_BLOCK3(1,0,2,3) \ + } \ + else \ + { \ + if(tn->decodePNSNoShift(2) & dirMask) \ + PNS_BLOCK3(0,1,3,2) \ + else \ + PNS_BLOCK3(0,1,2,3) \ + } \ + } \ + } + +#if PX_INTEL_FAMILY +namespace +{ + const VecU32V signMask = U4LoadXYZW((PxU32(1)<<31), (PxU32(1)<<31), (PxU32(1)<<31), (PxU32(1)<<31)); + const Vec4V epsFloat4 = V4Load(1e-9f); + const Vec4V zeroes = V4Zero(); + const Vec4V twos = V4Load(2.0f); + const Vec4V epsInflateFloat4 = V4Load(1e-7f); +} +#endif // PX_INTEL_FAMILY + +#endif // GU_BV4_USE_SLABS + +#endif // GU_BV4_SLABS_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaNoOrder.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaNoOrder.h new file mode 100644 index 00000000..45f4e4a9 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaNoOrder.h @@ -0,0 +1,136 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_SLABS_KAJIYA_NO_ORDER_H +#define GU_BV4_SLABS_KAJIYA_NO_ORDER_H + + // Kajiya, no sort + template<int inflateT, class LeafTestT, class ParamsT> + static Ps::IntBool BV4_ProcessStreamKajiyaNoOrder(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + /// + + Vec4V fattenAABBsX, fattenAABBsY, fattenAABBsZ; + if(inflateT) + { + Vec4V fattenAABBs4 = V4LoadU_Safe(¶ms->mOriginalExtents_Padded.x); + fattenAABBs4 = V4Add(fattenAABBs4, epsInflateFloat4); // US2385 - shapes are "closed" meaning exactly touching shapes should report overlap + fattenAABBsX = V4SplatElement<0>(fattenAABBs4); + fattenAABBsY = V4SplatElement<1>(fattenAABBs4); + fattenAABBsZ = V4SplatElement<2>(fattenAABBs4); + } + + /// + + SLABS_INIT + +#ifdef GU_BV4_QUANTIZED_TREE + const Vec4V minCoeffV = V4LoadA_Safe(¶ms->mCenterOrMinCoeff_PaddedAligned.x); + const Vec4V maxCoeffV = V4LoadA_Safe(¶ms->mExtentsOrMaxCoeff_PaddedAligned.x); + const Vec4V minCoeffxV = V4SplatElement<0>(minCoeffV); + const Vec4V minCoeffyV = V4SplatElement<1>(minCoeffV); + const Vec4V minCoeffzV = V4SplatElement<2>(minCoeffV); + const Vec4V maxCoeffxV = V4SplatElement<0>(maxCoeffV); + const Vec4V maxCoeffyV = V4SplatElement<1>(maxCoeffV); + const Vec4V maxCoeffzV = V4SplatElement<2>(maxCoeffV); +#endif + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + + const BVDataSwizzled* tn = reinterpret_cast<const BVDataSwizzled*>(node); + +#ifdef GU_BV4_QUANTIZED_TREE + Vec4V minx4a; + Vec4V maxx4a; + OPC_DEQ4(maxx4a, minx4a, mX, minCoeffxV, maxCoeffxV) + + Vec4V miny4a; + Vec4V maxy4a; + OPC_DEQ4(maxy4a, miny4a, mY, minCoeffyV, maxCoeffyV) + + Vec4V minz4a; + Vec4V maxz4a; + OPC_DEQ4(maxz4a, minz4a, mZ, minCoeffzV, maxCoeffzV) +#else + Vec4V minx4a = V4LoadA(tn->mMinX); + Vec4V miny4a = V4LoadA(tn->mMinY); + Vec4V minz4a = V4LoadA(tn->mMinZ); + + Vec4V maxx4a = V4LoadA(tn->mMaxX); + Vec4V maxy4a = V4LoadA(tn->mMaxY); + Vec4V maxz4a = V4LoadA(tn->mMaxZ); +#endif + if(inflateT) + { + maxx4a = V4Add(maxx4a, fattenAABBsX); maxy4a = V4Add(maxy4a, fattenAABBsY); maxz4a = V4Add(maxz4a, fattenAABBsZ); + minx4a = V4Sub(minx4a, fattenAABBsX); miny4a = V4Sub(miny4a, fattenAABBsY); minz4a = V4Sub(minz4a, fattenAABBsZ); + } + + SLABS_TEST + + SLABS_TEST2 + +#define DO_LEAF_TEST(x) \ + {if(tn->isLeaf(x)) \ + { \ + if(LeafTestT::doLeafTest(params, tn->getPrimitive(x))) \ + return 1; \ + } \ + else \ + stack[nb++] = tn->getChildData(x);} + + const PxU32 nodeType = getChildType(childData); + if(!(code&8) && nodeType>1) + DO_LEAF_TEST(3) + + if(!(code&4) && nodeType>0) + DO_LEAF_TEST(2) + + if(!(code&2)) + DO_LEAF_TEST(1) + + if(!(code&1)) + DO_LEAF_TEST(0) + + }while(nb); + + return 0; + } +#undef DO_LEAF_TEST + +#endif // GU_BV4_SLABS_KAJIYA_NO_ORDER_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaOrdered.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaOrdered.h new file mode 100644 index 00000000..4bdcee3a --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaOrdered.h @@ -0,0 +1,240 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_SLABS_KAJIYA_ORDERED_H +#define GU_BV4_SLABS_KAJIYA_ORDERED_H + + // Kajiya + PNS + template<const int inflateT, class LeafTestT, class ParamsT> + static void BV4_ProcessStreamKajiyaOrdered(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + +#ifdef BV4_SLABS_SORT + const PxU32* tmp = reinterpret_cast<const PxU32*>(¶ms->mLocalDir_Padded); + const PxU32 X = tmp[0]>>31; + const PxU32 Y = tmp[1]>>31; + const PxU32 Z = tmp[2]>>31; +// const PxU32 X = PX_IR(params->mLocalDir_Padded.x)>>31; +// const PxU32 Y = PX_IR(params->mLocalDir_Padded.y)>>31; +// const PxU32 Z = PX_IR(params->mLocalDir_Padded.z)>>31; + const PxU32 bitIndex = 3+(Z|(Y<<1)|(X<<2)); + const PxU32 dirMask = 1u<<bitIndex; +#endif + +#ifdef BV4_SLABS_FIX + BV4_ALIGN16(float distances4[4]); +#endif + /// + + Vec4V fattenAABBsX, fattenAABBsY, fattenAABBsZ; + if(inflateT) + { + Vec4V fattenAABBs4 = V4LoadU_Safe(¶ms->mOriginalExtents_Padded.x); + fattenAABBs4 = V4Add(fattenAABBs4, epsInflateFloat4); // US2385 - shapes are "closed" meaning exactly touching shapes should report overlap + fattenAABBsX = V4SplatElement<0>(fattenAABBs4); + fattenAABBsY = V4SplatElement<1>(fattenAABBs4); + fattenAABBsZ = V4SplatElement<2>(fattenAABBs4); + } + + /// + + SLABS_INIT + +#ifdef GU_BV4_QUANTIZED_TREE + const Vec4V minCoeffV = V4LoadA_Safe(¶ms->mCenterOrMinCoeff_PaddedAligned.x); + const Vec4V maxCoeffV = V4LoadA_Safe(¶ms->mExtentsOrMaxCoeff_PaddedAligned.x); + const Vec4V minCoeffxV = V4SplatElement<0>(minCoeffV); + const Vec4V minCoeffyV = V4SplatElement<1>(minCoeffV); + const Vec4V minCoeffzV = V4SplatElement<2>(minCoeffV); + const Vec4V maxCoeffxV = V4SplatElement<0>(maxCoeffV); + const Vec4V maxCoeffyV = V4SplatElement<1>(maxCoeffV); + const Vec4V maxCoeffzV = V4SplatElement<2>(maxCoeffV); +#endif + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + + const BVDataSwizzled* tn = reinterpret_cast<const BVDataSwizzled*>(node); + +#ifdef GU_BV4_QUANTIZED_TREE + Vec4V minx4a; + Vec4V maxx4a; + OPC_DEQ4(maxx4a, minx4a, mX, minCoeffxV, maxCoeffxV) + + Vec4V miny4a; + Vec4V maxy4a; + OPC_DEQ4(maxy4a, miny4a, mY, minCoeffyV, maxCoeffyV) + + Vec4V minz4a; + Vec4V maxz4a; + OPC_DEQ4(maxz4a, minz4a, mZ, minCoeffzV, maxCoeffzV) +#else + Vec4V minx4a = V4LoadA(tn->mMinX); + Vec4V miny4a = V4LoadA(tn->mMinY); + Vec4V minz4a = V4LoadA(tn->mMinZ); + + Vec4V maxx4a = V4LoadA(tn->mMaxX); + Vec4V maxy4a = V4LoadA(tn->mMaxY); + Vec4V maxz4a = V4LoadA(tn->mMaxZ); +#endif + if(inflateT) + { + maxx4a = V4Add(maxx4a, fattenAABBsX); maxy4a = V4Add(maxy4a, fattenAABBsY); maxz4a = V4Add(maxz4a, fattenAABBsZ); + minx4a = V4Sub(minx4a, fattenAABBsX); miny4a = V4Sub(miny4a, fattenAABBsY); minz4a = V4Sub(minz4a, fattenAABBsZ); + } + + SLABS_TEST + +#ifdef BV4_SLABS_FIX + if(inflateT) + _mm_store_ps(distances4, maxOfNeasa); +#endif + + SLABS_TEST2 + +#ifdef BV4_SLABS_SORT + #ifdef BV4_SLABS_FIX + // PT: for some unknown reason the PS4/Linux/OSX compilers fail to understand this version +/* #define DO_LEAF_TEST(x) \ + { \ + if(!inflateT) \ + { \ + if(tn->isLeaf(x)) \ + { \ + LeafTestT::doLeafTest(params, tn->getPrimitive(x)); \ + maxT4 = V4Load(params->mStabbedFace.mDistance); \ + } \ + else \ + { \ + code2 |= 1<<x; \ + } \ + } \ + else \ + { \ + if(distances4[x]<params->mStabbedFace.mDistance) \ + { \ + if(tn->isLeaf(x)) \ + { \ + LeafTestT::doLeafTest(params, tn->getPrimitive(x)); \ + maxT4 = V4Load(params->mStabbedFace.mDistance); \ + } \ + else \ + { \ + code2 |= 1<<x; \ + } \ + } \ + } \ + }*/ + + // PT: TODO: check that this version compiles to the same code as above. Redo benchmarks. + #define DO_LEAF_TEST(x) \ + { \ + if(!inflateT || distances4[x]<params->mStabbedFace.mDistance + GU_EPSILON_SAME_DISTANCE) \ + { \ + if(tn->isLeaf(x)) \ + { \ + LeafTestT::doLeafTest(params, tn->getPrimitive(x)); \ + maxT4 = V4Load(params->mStabbedFace.mDistance); \ + } \ + else \ + { \ + code2 |= 1<<x; \ + } \ + } \ + } + + #else + #define DO_LEAF_TEST(x) \ + { \ + if(tn->isLeaf(x)) \ + { \ + LeafTestT::doLeafTest(params, tn->getPrimitive(x)); \ + maxT4 = V4Load(params->mStabbedFace.mDistance); \ + } \ + else \ + { \ + code2 |= 1<<x; \ + } \ + } + #endif + PxU32 code2 = 0; + const PxU32 nodeType = getChildType(childData); + + if(!(code&8) && nodeType>1) + DO_LEAF_TEST(3) + + if(!(code&4) && nodeType>0) + DO_LEAF_TEST(2) + + if(!(code&2)) + DO_LEAF_TEST(1) + + if(!(code&1)) + DO_LEAF_TEST(0) + + SLABS_PNS +#else + #define DO_LEAF_TEST(x) \ + {if(tn->isLeaf(x)) \ + { \ + LeafTestT::doLeafTest(params, tn->getPrimitive(x)); \ + maxT4 = V4Load(params->mStabbedFace.mDistance); \ + } \ + else \ + { \ + stack[nb++] = tn->getChildData(x); \ + }} + + + const PxU32 nodeType = getChildType(childData); + if(!(code&8) && nodeType>1) + DO_LEAF_TEST(3) + + if(!(code&4) && nodeType>0) + DO_LEAF_TEST(2) + + if(!(code&2)) + DO_LEAF_TEST(1) + + if(!(code&1)) + DO_LEAF_TEST(0) +#endif + + }while(nb); + } +#undef DO_LEAF_TEST + +#endif // GU_BV4_SLABS_KAJIYA_ORDERED_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledNoOrder.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledNoOrder.h new file mode 100644 index 00000000..a7717d7c --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledNoOrder.h @@ -0,0 +1,66 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_SLABS_SWIZZLED_NO_ORDER_H +#define GU_BV4_SLABS_SWIZZLED_NO_ORDER_H + + // Generic, no sort + template<class LeafTestT, class ParamsT> + static Ps::IntBool BV4_ProcessStreamSwizzledNoOrder(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + + const BVDataSwizzled* tn = reinterpret_cast<const BVDataSwizzled*>(node); + + const PxU32 nodeType = getChildType(childData); + + if(nodeType>1 && BV4_ProcessNodeNoOrder_Swizzled<LeafTestT, 3>(stack, nb, tn, params)) + return 1; + if(nodeType>0 && BV4_ProcessNodeNoOrder_Swizzled<LeafTestT, 2>(stack, nb, tn, params)) + return 1; + if(BV4_ProcessNodeNoOrder_Swizzled<LeafTestT, 1>(stack, nb, tn, params)) + return 1; + if(BV4_ProcessNodeNoOrder_Swizzled<LeafTestT, 0>(stack, nb, tn, params)) + return 1; + + }while(nb); + + return 0; + } + +#endif // GU_BV4_SLABS_SWIZZLED_NO_ORDER_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledOrdered.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledOrdered.h new file mode 100644 index 00000000..4be851e1 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledOrdered.h @@ -0,0 +1,74 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_BV4_SLABS_SWIZZLED_ORDERED_H +#define GU_BV4_SLABS_SWIZZLED_ORDERED_H + + // Generic + PNS + template<class LeafTestT, class ParamsT> + static void BV4_ProcessStreamSwizzledOrdered(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params) + { + const BVDataPacked* root = node; + + PxU32 nb=1; + PxU32 stack[GU_BV4_STACK_SIZE]; + stack[0] = initData; + + const PxU32* tmp = reinterpret_cast<const PxU32*>(¶ms->mLocalDir_Padded); + const PxU32 X = tmp[0]>>31; + const PxU32 Y = tmp[1]>>31; + const PxU32 Z = tmp[2]>>31; +// const PxU32 X = PX_IR(params->mLocalDir_Padded.x)>>31; +// const PxU32 Y = PX_IR(params->mLocalDir_Padded.y)>>31; +// const PxU32 Z = PX_IR(params->mLocalDir_Padded.z)>>31; + const PxU32 bitIndex = 3+(Z|(Y<<1)|(X<<2)); + const PxU32 dirMask = 1u<<bitIndex; + + do + { + const PxU32 childData = stack[--nb]; + node = root + getChildOffset(childData); + const PxU32 nodeType = getChildType(childData); + + const BVDataSwizzled* tn = reinterpret_cast<const BVDataSwizzled*>(node); + + PxU32 code2 = 0; + BV4_ProcessNodeOrdered2_Swizzled<LeafTestT, 0>(code2, tn, params); + BV4_ProcessNodeOrdered2_Swizzled<LeafTestT, 1>(code2, tn, params); + if(nodeType>0) + BV4_ProcessNodeOrdered2_Swizzled<LeafTestT, 2>(code2, tn, params); + if(nodeType>1) + BV4_ProcessNodeOrdered2_Swizzled<LeafTestT, 3>(code2, tn, params); + + SLABS_PNS + + }while(nb); + } + +#endif // GU_BV4_SLABS_SWIZZLED_ORDERED_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereOverlap.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereOverlap.cpp new file mode 100644 index 00000000..d709e273 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereOverlap.cpp @@ -0,0 +1,330 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuBV4.h" +using namespace physx; +using namespace Gu; + +#if PX_INTEL_FAMILY + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#include "GuBV4_Common.h" +#include "GuSphere.h" +#include "GuDistancePointTriangle.h" +#include "PsVecMath.h" + +using namespace physx::shdfnd::aos; + +#if PX_VC +#pragma warning ( disable : 4324 ) +#endif + +// Sphere overlap any + +struct SphereParams +{ + const IndTri32* PX_RESTRICT mTris32; + const IndTri16* PX_RESTRICT mTris16; + const PxVec3* PX_RESTRICT mVerts; + +#ifdef GU_BV4_QUANTIZED_TREE + BV4_ALIGN16(Vec3p mCenterOrMinCoeff_PaddedAligned); + BV4_ALIGN16(Vec3p mExtentsOrMaxCoeff_PaddedAligned); +#endif + + BV4_ALIGN16(PxVec3 mCenter_PaddedAligned); float mRadius2; +#ifdef GU_BV4_USE_SLABS + BV4_ALIGN16(PxVec3 mCenter_PaddedAligned2); float mRadius22; +#endif +}; + +#ifndef GU_BV4_QUANTIZED_TREE +// PT: TODO: refactor with bucket pruner code (TA34704) +static PX_FORCE_INLINE Ps::IntBool BV4_SphereAABBOverlap(const PxVec3& center, const PxVec3& extents, const SphereParams* PX_RESTRICT params) +{ + const Vec4V mCenter = V4LoadA_Safe(¶ms->mCenter_PaddedAligned.x); + const FloatV mRadius2 = FLoad(params->mRadius2); + + const Vec4V boxCenter = V4LoadU(¢er.x); + const Vec4V boxExtents = V4LoadU(&extents.x); + + const Vec4V offset = V4Sub(mCenter, boxCenter); + const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents); + const Vec4V d = V4Sub(offset, closest); + + const PxU32 test = (PxU32)_mm_movemask_ps(FIsGrtrOrEq(mRadius2, V4Dot3(d, d))); + return (test & 0x7) == 0x7; +} +#endif + +static PX_FORCE_INLINE Ps::IntBool __SphereTriangle(const SphereParams* PX_RESTRICT params, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2) +{ + { + const float sqrDist = (p0 - params->mCenter_PaddedAligned).magnitudeSquared(); + if(sqrDist <= params->mRadius2) + return 1; + } + + const PxVec3 edge10 = p1 - p0; + const PxVec3 edge20 = p2 - p0; + const PxVec3 cp = closestPtPointTriangle2(params->mCenter_PaddedAligned, p0, p1, p2, edge10, edge20); + const float sqrDist = (cp - params->mCenter_PaddedAligned).magnitudeSquared(); + return sqrDist <= params->mRadius2; +} + +// PT: TODO: evaluate if SIMD distance function would be faster here (TA34704) +// PT: TODO: __fastcall removed to make it compile everywhere. Revisit. +static /*PX_FORCE_INLINE*/ Ps::IntBool /*__fastcall*/ __SphereTriangle(const SphereParams* PX_RESTRICT params, PxU32 primIndex) +{ + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + return __SphereTriangle(params, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2]); +} + +namespace +{ +class LeafFunction_SphereOverlapAny +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(const SphereParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(__SphereTriangle(params, primIndex)) + return 1; + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +template<class ParamsT> +static PX_FORCE_INLINE void setupSphereParams(ParamsT* PX_RESTRICT params, const Sphere& sphere, const BV4Tree* PX_RESTRICT tree, const PxMat44* PX_RESTRICT worldm_Aligned, const SourceMesh* PX_RESTRICT mesh) +{ + computeLocalSphere(params->mRadius2, params->mCenter_PaddedAligned, sphere, worldm_Aligned); + +#ifdef GU_BV4_USE_SLABS + params->mCenter_PaddedAligned2 = params->mCenter_PaddedAligned*2.0f; + params->mRadius22 = params->mRadius2*4.0f; +#endif + + setupMeshPointersAndQuantizedCoeffs(params, mesh, tree); +} + +#include "GuBV4_Internal.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs.h" + + static PX_FORCE_INLINE Ps::IntBool BV4_SphereAABBOverlap(const Vec4V boxCenter, const Vec4V boxExtents, const SphereParams* PX_RESTRICT params) + { + const Vec4V mCenter = V4LoadA_Safe(¶ms->mCenter_PaddedAligned2.x); + const FloatV mRadius2 = FLoad(params->mRadius22); + + const Vec4V offset = V4Sub(mCenter, boxCenter); + const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents); + const Vec4V d = V4Sub(offset, closest); + + const PxU32 test = PxU32(_mm_movemask_ps(FIsGrtrOrEq(mRadius2, V4Dot3(d, d)))); + return (test & 0x7) == 0x7; + } +#else + #ifdef GU_BV4_QUANTIZED_TREE + static PX_FORCE_INLINE Ps::IntBool BV4_SphereAABBOverlap(const BVDataPacked* PX_RESTRICT node, const SphereParams* PX_RESTRICT params) + { + const __m128i testV = _mm_load_si128((__m128i*)node->mAABB.mData); + const __m128i qextentsV = _mm_and_si128(testV, _mm_set1_epi32(0x0000ffff)); + const __m128i qcenterV = _mm_srai_epi32(testV, 16); + const Vec4V boxCenter = V4Mul(_mm_cvtepi32_ps(qcenterV), V4LoadA_Safe(¶ms->mCenterOrMinCoeff_PaddedAligned.x)); + const Vec4V boxExtents = V4Mul(_mm_cvtepi32_ps(qextentsV), V4LoadA_Safe(¶ms->mExtentsOrMaxCoeff_PaddedAligned.x)); + + const Vec4V mCenter = V4LoadA_Safe(¶ms->mCenter_PaddedAligned.x); + const FloatV mRadius2 = FLoad(params->mRadius2); + + const Vec4V offset = V4Sub(mCenter, boxCenter); + const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents); + const Vec4V d = V4Sub(offset, closest); + + const PxU32 test = (PxU32)_mm_movemask_ps(FIsGrtrOrEq(mRadius2, V4Dot3(d, d))); + return (test & 0x7) == 0x7; + } + #endif +#endif + +#include "GuBV4_ProcessStreamNoOrder_SphereAABB.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs_SwizzledNoOrder.h" +#endif + +Ps::IntBool BV4_OverlapSphereAny(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + SphereParams Params; + setupSphereParams(&Params, sphere, &tree, worldm_Aligned, mesh); + + if(tree.mNodes) + { + return processStreamNoOrder<LeafFunction_SphereOverlapAny>(tree.mNodes, tree.mInitData, &Params); + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + return LeafFunction_SphereOverlapAny::doLeafTest(&Params, nbTris); + } +} + +// Sphere overlap all + +struct SphereParamsAll : SphereParams +{ + PxU32 mNbHits; + PxU32 mMaxNbHits; + PxU32* mHits; +}; + +namespace +{ +class LeafFunction_SphereOverlapAll +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(SphereParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(__SphereTriangle(params, primIndex)) + { + SphereParamsAll* ParamsAll = static_cast<SphereParamsAll*>(params); + ParamsAll->mHits[ParamsAll->mNbHits] = primIndex; + ParamsAll->mNbHits++; + if(ParamsAll->mNbHits==ParamsAll->mMaxNbHits) + return 1; + } + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +PxU32 BV4_OverlapSphereAll(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + SphereParamsAll Params; + Params.mNbHits = 0; + Params.mMaxNbHits = size; + Params.mHits = results; + + setupSphereParams(&Params, sphere, &tree, worldm_Aligned, mesh); + + if(tree.mNodes) + { + overflow = processStreamNoOrder<LeafFunction_SphereOverlapAll>(tree.mNodes, tree.mInitData, &Params)!=0; + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + overflow = LeafFunction_SphereOverlapAll::doLeafTest(&Params, nbTris)!=0; + } + return Params.mNbHits; +} + + +// Sphere overlap - callback version + +struct SphereParamsCB : SphereParams +{ + MeshOverlapCallback mCallback; + void* mUserData; +}; + +namespace +{ +class LeafFunction_SphereOverlapCB +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(const SphereParamsCB* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + const PxVec3& p0 = params->mVerts[VRef0]; + const PxVec3& p1 = params->mVerts[VRef1]; + const PxVec3& p2 = params->mVerts[VRef2]; + + if(__SphereTriangle(params, p0, p1, p2)) + { + const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 }; + if((params->mCallback)(params->mUserData, p0, p1, p2, primIndex, vrefs)) + return 1; + } + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +// PT: this one is currently not used +void BV4_OverlapSphereCB(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshOverlapCallback callback, void* userData) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + SphereParamsCB Params; + Params.mCallback = callback; + Params.mUserData = userData; + setupSphereParams(&Params, sphere, &tree, worldm_Aligned, mesh); + + if(tree.mNodes) + { + processStreamNoOrder<LeafFunction_SphereOverlapCB>(tree.mNodes, tree.mInitData, &Params); + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + LeafFunction_SphereOverlapCB::doLeafTest(&Params, nbTris); + } +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereSweep.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereSweep.cpp new file mode 100644 index 00000000..c955c7f5 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereSweep.cpp @@ -0,0 +1,388 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxMat44.h" +#include "GuBV4.h" +#include "GuBox.h" +#include "GuSphere.h" +#include "GuSIMDHelpers.h" +#include "GuSweepSphereTriangle.h" + +using namespace physx; +using namespace Gu; + +#if PX_INTEL_FAMILY + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#include "GuBV4_Common.h" + +// PT: for sphere-sweeps we use method 3 in \\sw\physx\PhysXSDK\3.4\trunk\InternalDocumentation\GU\Sweep strategies.ppt + +namespace +{ + // PT: TODO: refactor structure (TA34704) + struct RayParams + { +#ifdef GU_BV4_QUANTIZED_TREE + BV4_ALIGN16(Vec3p mCenterOrMinCoeff_PaddedAligned); + BV4_ALIGN16(Vec3p mExtentsOrMaxCoeff_PaddedAligned); +#endif +#ifndef GU_BV4_USE_SLABS + BV4_ALIGN16(Vec3p mData2_PaddedAligned); + BV4_ALIGN16(Vec3p mFDir_PaddedAligned); + BV4_ALIGN16(Vec3p mData_PaddedAligned); +#endif + BV4_ALIGN16(Vec3p mLocalDir_Padded); // PT: TODO: this one could be switched to PaddedAligned & V4LoadA (TA34704) + BV4_ALIGN16(Vec3p mOrigin_Padded); // PT: TODO: this one could be switched to PaddedAligned & V4LoadA (TA34704) + }; + + struct SphereSweepParams : RayParams + { + const IndTri32* PX_RESTRICT mTris32; + const IndTri16* PX_RESTRICT mTris16; + const PxVec3* PX_RESTRICT mVerts; + + PxVec3 mOriginalExtents_Padded; + + RaycastHitInternal mStabbedFace; + PxU32 mBackfaceCulling; + PxU32 mEarlyExit; + + PxVec3 mP0, mP1, mP2; + PxVec3 mBestTriNormal; + float mBestAlignmentValue; + float mBestDistance; + float mMaxDist; + }; +} + +#include "GuBV4_AABBAABBSweepTest.h" + +// PT: TODO: __fastcall removed to make it compile everywhere. Revisit. +static bool /*__fastcall*/ triSphereSweep(SphereSweepParams* PX_RESTRICT params, PxU32 primIndex, bool nodeSorting=true) +{ + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + const PxVec3& p0 = params->mVerts[VRef0]; + const PxVec3& p1 = params->mVerts[VRef1]; + const PxVec3& p2 = params->mVerts[VRef2]; + + PxVec3 normal = (p1 - p0).cross(p2 - p0); + + // Backface culling + const bool culled = params->mBackfaceCulling && normal.dot(params->mLocalDir_Padded) > 0.0f; + if(culled) + return false; + + const PxTriangle T(p0, p1, p2); // PT: TODO: check potential bad ctor/dtor here (TA34704) <= or avoid creating the tri, not needed anymore + + normal.normalize(); + + // PT: TODO: we lost some perf when switching to PhysX version. Revisit/investigate. (TA34704) + float dist; + bool directHit; + if(!sweepSphereVSTri(T.verts, normal, params->mOrigin_Padded, params->mOriginalExtents_Padded.x, params->mLocalDir_Padded, dist, directHit, true)) + return false; + + const PxReal distEpsilon = GU_EPSILON_SAME_DISTANCE; // pick a farther hit within distEpsilon that is more opposing than the previous closest hit + const PxReal alignmentValue = computeAlignmentValue(normal, params->mLocalDir_Padded); + if(keepTriangle(dist, alignmentValue, params->mBestDistance, params->mBestAlignmentValue, params->mMaxDist, distEpsilon)) + { + params->mStabbedFace.mDistance = dist; + params->mStabbedFace.mTriangleID = primIndex; + params->mP0 = p0; + params->mP1 = p1; + params->mP2 = p2; + params->mBestDistance = PxMin(params->mBestDistance, dist); // exact lower bound + params->mBestAlignmentValue = alignmentValue; + params->mBestTriNormal = normal; + if(nodeSorting) + { +#ifndef GU_BV4_USE_SLABS + setupRayData(params, dist, params->mOrigin_Padded, params->mLocalDir_Padded); +#endif + } + return true; + } + return false; +} + +namespace +{ +class LeafFunction_SphereSweepClosest +{ +public: + static PX_FORCE_INLINE void doLeafTest(SphereSweepParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + triSphereSweep(params, primIndex); + primIndex++; + }while(nbToGo--); + } +}; + +class LeafFunction_SphereSweepAny +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(SphereSweepParams* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(triSphereSweep(params, primIndex)) + return 1; + + primIndex++; + }while(nbToGo--); + + return 0; + } +}; + +class ImpactFunctionSphere +{ +public: + static PX_FORCE_INLINE void computeImpact(PxVec3& impactPos, PxVec3& impactNormal, const Sphere& sphere, const PxVec3& dir, const PxReal t, const TrianglePadded& triangle) + { + computeSphereTriImpactData(impactPos, impactNormal, sphere.center, dir, t, triangle); + } +}; +} + +template<class ParamsT> +static PX_FORCE_INLINE void setupSphereParams(ParamsT* PX_RESTRICT params, const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree* PX_RESTRICT tree, const PxMat44* PX_RESTRICT worldm_Aligned, const SourceMesh* PX_RESTRICT mesh, PxU32 flags) +{ + params->mOriginalExtents_Padded = PxVec3(sphere.radius); + params->mStabbedFace.mTriangleID = PX_INVALID_U32; + params->mStabbedFace.mDistance = maxDist; + params->mBestDistance = PX_MAX_REAL; + params->mBestAlignmentValue = 2.0f; + params->mMaxDist = maxDist; + setupParamsFlags(params, flags); + + setupMeshPointersAndQuantizedCoeffs(params, mesh, tree); + + computeLocalRay(params->mLocalDir_Padded, params->mOrigin_Padded, dir, sphere.center, worldm_Aligned); + +#ifndef GU_BV4_USE_SLABS + setupRayData(params, maxDist, params->mOrigin_Padded, params->mLocalDir_Padded); +#endif +} + +#include "GuBV4_Internal.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs.h" +#endif +#include "GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h" +#include "GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h" +#ifdef GU_BV4_USE_SLABS + #include "GuBV4_Slabs_KajiyaNoOrder.h" + #include "GuBV4_Slabs_KajiyaOrdered.h" +#endif + +Ps::IntBool BV4_SphereSweepSingle(const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepHit* PX_RESTRICT hit, PxU32 flags) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + SphereSweepParams Params; + setupSphereParams(&Params, sphere, dir, maxDist, &tree, worldm_Aligned, mesh, flags); + + if(tree.mNodes) + { + if(Params.mEarlyExit) + processStreamRayNoOrder(1, LeafFunction_SphereSweepAny)(tree.mNodes, tree.mInitData, &Params); + else + processStreamRayOrdered(1, LeafFunction_SphereSweepClosest)(tree.mNodes, tree.mInitData, &Params); + } + else + doBruteForceTests<LeafFunction_SphereSweepAny, LeafFunction_SphereSweepClosest>(mesh->getNbTriangles(), &Params); + + return computeImpactDataT<ImpactFunctionSphere>(sphere, dir, hit, &Params, worldm_Aligned, (flags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (flags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0); +} + +// PT: sphere sweep callback version - currently not used + +namespace +{ + struct SphereSweepParamsCB : SphereSweepParams + { + // PT: these new members are only here to call computeImpactDataT during traversal :( + // PT: TODO: most of them may not be needed if we just move sphere to local space before traversal + Sphere mSphere; // Sphere in original space (maybe not local/mesh space) + PxVec3 mDir; // Dir in original space (maybe not local/mesh space) + const PxMat44* mWorldm_Aligned; + PxU32 mFlags; + + SweepUnlimitedCallback mCallback; + void* mUserData; + float mMaxDist; + bool mNodeSorting; + }; + +class LeafFunction_SphereSweepCB +{ +public: + static PX_FORCE_INLINE Ps::IntBool doLeafTest(SphereSweepParamsCB* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + if(triSphereSweep(params, primIndex, params->mNodeSorting)) + { + // PT: TODO: in this version we must compute the impact data immediately, + // which is a terrible idea in general, but I'm not sure what else I can do. + SweepHit hit; + const bool b = computeImpactDataT<ImpactFunctionSphere>(params->mSphere, params->mDir, &hit, params, params->mWorldm_Aligned, (params->mFlags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (params->mFlags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0); + PX_ASSERT(b); + PX_UNUSED(b); + + reportUnlimitedCallbackHit(params, hit); + } + + primIndex++; + }while(nbToGo--); + + return 0; + } +}; +} + +// PT: for design decisions in this function, refer to the comments of BV4_GenericSweepCB(). +void BV4_SphereSweepCB(const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting) +{ + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + SphereSweepParamsCB Params; + Params.mSphere = sphere; + Params.mDir = dir; + Params.mWorldm_Aligned = worldm_Aligned; + Params.mFlags = flags; + + Params.mCallback = callback; + Params.mUserData = userData; + Params.mMaxDist = maxDist; + Params.mNodeSorting = nodeSorting; + setupSphereParams(&Params, sphere, dir, maxDist, &tree, worldm_Aligned, mesh, flags); + + PX_ASSERT(!Params.mEarlyExit); + + if(tree.mNodes) + { + if(nodeSorting) + processStreamRayOrdered(1, LeafFunction_SphereSweepCB)(tree.mNodes, tree.mInitData, &Params); + else + processStreamRayNoOrder(1, LeafFunction_SphereSweepCB)(tree.mNodes, tree.mInitData, &Params); + } + else + doBruteForceTests<LeafFunction_SphereSweepCB, LeafFunction_SphereSweepCB>(mesh->getNbTriangles(), &Params); +} + + +// Old box sweep callback version, using sphere code + +namespace +{ +struct BoxSweepParamsCB : SphereSweepParams +{ + MeshSweepCallback mCallback; + void* mUserData; +}; + +class ExLeafTestSweepCB +{ +public: + static PX_FORCE_INLINE void doLeafTest(BoxSweepParamsCB* PX_RESTRICT params, PxU32 primIndex) + { + PxU32 nbToGo = getNbPrimitives(primIndex); + do + { + PxU32 VRef0, VRef1, VRef2; + getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16); + + { +// const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 }; + float dist = params->mStabbedFace.mDistance; + if((params->mCallback)(params->mUserData, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], primIndex, /*vrefs,*/ dist)) + return; + + if(dist<params->mStabbedFace.mDistance) + { + params->mStabbedFace.mDistance = dist; +#ifndef GU_BV4_USE_SLABS + setupRayData(params, dist, params->mOrigin_Padded, params->mLocalDir_Padded); +#endif + } + } + + primIndex++; + }while(nbToGo--); + } +}; +} + +void BV4_GenericSweepCB_Old(const PxVec3& origin, const PxVec3& extents, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshSweepCallback callback, void* userData) +{ + BoxSweepParamsCB Params; + Params.mCallback = callback; + Params.mUserData = userData; + Params.mOriginalExtents_Padded = extents; + + Params.mStabbedFace.mTriangleID = PX_INVALID_U32; + Params.mStabbedFace.mDistance = maxDist; + + computeLocalRay(Params.mLocalDir_Padded, Params.mOrigin_Padded, dir, origin, worldm_Aligned); + +#ifndef GU_BV4_USE_SLABS + setupRayData(&Params, maxDist, Params.mOrigin_Padded, Params.mLocalDir_Padded); +#endif + + const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface; + + setupMeshPointersAndQuantizedCoeffs(&Params, mesh, &tree); + + if(tree.mNodes) + { + processStreamRayOrdered(1, ExLeafTestSweepCB)(tree.mNodes, tree.mInitData, &Params); + } + else + { + const PxU32 nbTris = mesh->getNbTriangles(); + PX_ASSERT(nbTris<16); + ExLeafTestSweepCB::doLeafTest(&Params, nbTris); + } +} + +#endif + diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshData.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshData.h new file mode 100644 index 00000000..37cdbcfc --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshData.h @@ -0,0 +1,298 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_MESH_DATA_H +#define GU_MESH_DATA_H + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxVec4.h" +#include "foundation/PxBounds3.h" +#include "PsUserAllocated.h" +#include "CmPhysXCommon.h" +#include "PsAllocator.h" +#include "PxTriangleMesh.h" +#include "GuRTree.h" +#include "GuBV4.h" +#include "GuBV32.h" + +namespace physx +{ + +#define RTREE_COOK_VERSION 1 + +namespace Gu { + +// 1: support stackless collision trees for non-recursive collision queries +// 2: height field functionality not supported anymore +// 3: mass struct removed +// 4: bounding sphere removed +// 5: RTree added, opcode tree still in the binary image, physx 3.0 +// 6: opcode tree removed from binary image +// 7: convex decomposition is out +// 8: adjacency information added +// 9: removed leaf triangles and most of opcode data, changed rtree layout +// 10: float rtrees +// 11: new build, isLeaf added to page +// 12: isLeaf is now the lowest bit in ptrs +// 13: TA30159 removed deprecated convexEdgeThreshold and bumped version +// 14: added midphase ID + +#define PX_MESH_VERSION 14 + +// these flags are used to indicate/validate the contents of a cooked mesh file +enum InternalMeshSerialFlag +{ + IMSF_MATERIALS = (1<<0), //!< if set, the cooked mesh file contains per-triangle material indices + IMSF_FACE_REMAP = (1<<1), //!< if set, the cooked mesh file contains a remap table + IMSF_8BIT_INDICES = (1<<2), //!< if set, the cooked mesh file contains 8bit indices (topology) + IMSF_16BIT_INDICES = (1<<3), //!< if set, the cooked mesh file contains 16bit indices (topology) + IMSF_ADJACENCIES = (1<<4), //!< if set, the cooked mesh file contains adjacency structures + IMSF_GRB_DATA = (1<<5) //!< if set, the cooked mesh file contains GRB data structures +}; + + + +#if PX_VC +#pragma warning(push) +#pragma warning(disable: 4324) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + + class TriangleMeshData : public Ps::UserAllocated + { + public: + PxMeshMidPhase::Enum mType; + + PxU32 mNbVertices; + PxU32 mNbTriangles; + PxVec3* mVertices; + void* mTriangles; + + PxBounds3 mAABB; + PxU8* mExtraTrigData; + PxReal mGeomEpsilon; + + PxU8 mFlags; + PxU16* mMaterialIndices; + PxU32* mFaceRemap; + PxU32* mAdjacencies; + + // GRB data ------------------------- + void * mGRB_triIndices; //!< GRB: GPU-friendly tri indices(uint3) + + // TODO avoroshilov: adjacency info - duplicated, remove it and use 'mAdjacencies' and 'mExtraTrigData' see GuTriangleMesh.cpp:325 + void * mGRB_triAdjacencies; //!< GRB: adjacency data, with BOUNDARY and NONCONVEX flags (flags replace adj indices where applicable) [uin4] + PxU32 * mGRB_vertValency; //!< GRB: number of adjacent vertices to a vertex + PxU32 * mGRB_adjVertStart; //!< GRB: offset for each vertex in the adjacency list + PxU32 * mGRB_adjVertices; //!< GRB: list of adjacent vertices + + PxU32 mGRB_meshAdjVerticiesTotal; //!< GRB: total number of indices in the 'mGRB_adjVertices' + PxU32* mGRB_faceRemap; //!< GRB: this remap the GPU triangle indices to CPU triangle indices + + void* mGRB_BV32Tree; + // End of GRB data ------------------ + + TriangleMeshData() : + mNbVertices (0), + mNbTriangles (0), + mVertices (NULL), + mTriangles (NULL), + mAABB (PxBounds3::empty()), + mExtraTrigData (NULL), + mGeomEpsilon (0.0f), + mFlags (0), + mMaterialIndices (NULL), + mFaceRemap (NULL), + mAdjacencies (NULL), + + mGRB_triIndices (NULL), + mGRB_triAdjacencies (NULL), + mGRB_vertValency (NULL), + mGRB_adjVertStart (NULL), + mGRB_adjVertices (NULL), + + mGRB_meshAdjVerticiesTotal (0), + mGRB_faceRemap (NULL), + mGRB_BV32Tree (NULL) + + { + } + + virtual ~TriangleMeshData() + { + if(mVertices) + PX_FREE(mVertices); + if(mTriangles) + PX_FREE(mTriangles); + if(mMaterialIndices) + PX_DELETE_POD(mMaterialIndices); + if(mFaceRemap) + PX_DELETE_POD(mFaceRemap); + if(mAdjacencies) + PX_DELETE_POD(mAdjacencies); + if(mExtraTrigData) + PX_DELETE_POD(mExtraTrigData); + + + if (mGRB_triIndices) + PX_FREE(mGRB_triIndices); + if (mGRB_triAdjacencies) + PX_DELETE_POD(mGRB_triAdjacencies); + if (mGRB_vertValency) + PX_DELETE_POD(mGRB_vertValency); + if (mGRB_adjVertStart) + PX_DELETE_POD(mGRB_adjVertStart); + if (mGRB_adjVertices) + PX_DELETE_POD(mGRB_adjVertices); + + if (mGRB_faceRemap) + PX_DELETE_POD(mGRB_faceRemap); + + if (mGRB_BV32Tree) + { + Gu::BV32Tree* bv32Tree = reinterpret_cast<BV32Tree*>(mGRB_BV32Tree); + PX_DELETE(bv32Tree); + mGRB_BV32Tree = NULL; + } + + + } + + + PxVec3* allocateVertices(PxU32 nbVertices) + { + PX_ASSERT(!mVertices); + // PT: we allocate one more vertex to make sure it's safe to V4Load the last one + const PxU32 nbAllocatedVerts = nbVertices + 1; + mVertices = reinterpret_cast<PxVec3*>(PX_ALLOC(nbAllocatedVerts * sizeof(PxVec3), "PxVec3")); + mNbVertices = nbVertices; + return mVertices; + } + + void* allocateTriangles(PxU32 nbTriangles, bool force32Bit, PxU32 allocateGPUData = 0) + { + PX_ASSERT(mNbVertices); + PX_ASSERT(!mTriangles); + + bool index16 = mNbVertices <= 0xffff && !force32Bit; + if(index16) + mFlags |= PxTriangleMeshFlag::e16_BIT_INDICES; + + mTriangles = PX_ALLOC(nbTriangles * (index16 ? sizeof(PxU16) : sizeof(PxU32)) * 3, "mTriangles"); + if (allocateGPUData) + mGRB_triIndices = PX_ALLOC(nbTriangles * (index16 ? sizeof(PxU16) : sizeof(PxU32)) * 3, "mGRB_triIndices"); + mNbTriangles = nbTriangles; + return mTriangles; + } + + PxU16* allocateMaterials() + { + PX_ASSERT(mNbTriangles); + PX_ASSERT(!mMaterialIndices); + mMaterialIndices = PX_NEW(PxU16)[mNbTriangles]; + return mMaterialIndices; + } + + PxU32* allocateAdjacencies() + { + PX_ASSERT(mNbTriangles); + PX_ASSERT(!mAdjacencies); + mAdjacencies = PX_NEW(PxU32)[mNbTriangles*3]; + mFlags |= PxTriangleMeshFlag::eADJACENCY_INFO; + return mAdjacencies; + } + + PxU32* allocateFaceRemap() + { + PX_ASSERT(mNbTriangles); + PX_ASSERT(!mFaceRemap); + mFaceRemap = PX_NEW(PxU32)[mNbTriangles]; + return mFaceRemap; + } + + PxU8* allocateExtraTrigData() + { + PX_ASSERT(mNbTriangles); + PX_ASSERT(!mExtraTrigData); + mExtraTrigData = PX_NEW(PxU8)[mNbTriangles]; + return mExtraTrigData; + } + + PX_FORCE_INLINE void setTriangleAdjacency(PxU32 triangleIndex, PxU32 adjacency, PxU32 offset) + { + PX_ASSERT(mAdjacencies); + mAdjacencies[triangleIndex*3 + offset] = adjacency; + } + + PX_FORCE_INLINE bool has16BitIndices() const + { + return (mFlags & PxTriangleMeshFlag::e16_BIT_INDICES) ? true : false; + } + }; + + class RTreeTriangleData : public TriangleMeshData + { + public: + RTreeTriangleData() { mType = PxMeshMidPhase::eBVH33; } + virtual ~RTreeTriangleData() {} + + Gu::RTree mRTree; + }; + + class BV4TriangleData : public TriangleMeshData + { + public: + BV4TriangleData() { mType = PxMeshMidPhase::eBVH34; } + virtual ~BV4TriangleData() {} + + Gu::SourceMesh mMeshInterface; + Gu::BV4Tree mBV4Tree; + }; + + + class BV32TriangleData : public TriangleMeshData + { + public: + //using the same type as BV4 + BV32TriangleData() { mType = PxMeshMidPhase::eBVH34; } + virtual ~BV32TriangleData() {} + + Gu::SourceMesh mMeshInterface; + Gu::BV32Tree mBV32Tree; + }; + +#if PX_VC +#pragma warning(pop) +#endif + + +} // namespace Gu + +} + +#endif // #ifdef GU_MESH_DATA_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshQuery.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshQuery.cpp new file mode 100644 index 00000000..74b352d5 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshQuery.cpp @@ -0,0 +1,312 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxProfiler.h" +#include "PxMeshQuery.h" +#include "GuInternal.h" +#include "PxSphereGeometry.h" +#include "PxGeometryQuery.h" +#include "GuEntityReport.h" +#include "GuHeightFieldUtil.h" +#include "GuBoxConversion.h" +#include "GuIntersectionTriangleBox.h" +#include "CmScaling.h" +#include "GuSweepTests.h" +#include "GuSIMDHelpers.h" +#include "GuMidphaseInterface.h" +#include "PsFPU.h" + +using namespace physx; +using namespace Gu; + +namespace { + + class HfTrianglesEntityReport2 : public EntityReport<PxU32>, public LimitedResults + { + public: + HfTrianglesEntityReport2( + PxU32* results, PxU32 maxResults, PxU32 startIndex, + HeightFieldUtil& hfUtil, + const PxVec3& boxCenter, const PxVec3& boxExtents, const PxQuat& boxRot, + bool aabbOverlap) : + LimitedResults (results, maxResults, startIndex), + mHfUtil (hfUtil), + mAABBOverlap (aabbOverlap) + { + buildFrom(mBox2Hf, boxCenter, boxExtents, boxRot); + } + + virtual bool onEvent(PxU32 nbEntities, PxU32* entities) + { + if(mAABBOverlap) + { + while(nbEntities--) + if(!add(*entities++)) + return false; + } + else + { + const PxTransform idt(PxIdentity); + for(PxU32 i=0; i<nbEntities; i++) + { + TrianglePadded tri; + mHfUtil.getTriangle(idt, tri, NULL, NULL, entities[i], false, false); // First parameter not needed if local space triangle is enough + + // PT: this one is safe because triangle class is padded + if(intersectTriangleBox(mBox2Hf, tri.verts[0], tri.verts[1], tri.verts[2])) + { + if(!add(entities[i])) + return false; + } + } + } + return true; + } + + HeightFieldUtil& mHfUtil; + BoxPadded mBox2Hf; + bool mAABBOverlap; + + private: + HfTrianglesEntityReport2& operator=(const HfTrianglesEntityReport2&); + }; + + +} // namespace + +void physx::PxMeshQuery::getTriangle(const PxTriangleMeshGeometry& triGeom, const PxTransform& globalPose, PxTriangleID triangleIndex, PxTriangle& triangle, PxU32* vertexIndices, PxU32* adjacencyIndices) +{ + TriangleMesh* tm = static_cast<TriangleMesh*>(triGeom.triangleMesh); + + PX_CHECK_AND_RETURN(triangleIndex<tm->getNbTriangles(), "PxMeshQuery::getTriangle: triangle index is out of bounds"); + + if(adjacencyIndices && !tm->getAdjacencies()) + Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "Adjacency information not created. Set buildTriangleAdjacencies on Cooking params."); + + const Cm::Matrix34 vertex2worldSkew = globalPose * triGeom.scale; + tm->computeWorldTriangle(triangle, triangleIndex, vertex2worldSkew, triGeom.scale.hasNegativeDeterminant(), vertexIndices, adjacencyIndices); +} + +/////////////////////////////////////////////////////////////////////////////// + +void physx::PxMeshQuery::getTriangle(const PxHeightFieldGeometry& hfGeom, const PxTransform& globalPose, PxTriangleID triangleIndex, PxTriangle& triangle, PxU32* vertexIndices, PxU32* adjacencyIndices) +{ + HeightFieldUtil hfUtil(hfGeom); + + hfUtil.getTriangle(globalPose, triangle, vertexIndices, adjacencyIndices, triangleIndex, true, true); +} + +/////////////////////////////////////////////////////////////////////////////// + +PxU32 physx::PxMeshQuery::findOverlapTriangleMesh( + const PxGeometry& geom, const PxTransform& geomPose, + const PxTriangleMeshGeometry& meshGeom, const PxTransform& meshPose, + PxU32* results, PxU32 maxResults, PxU32 startIndex, bool& overflow) +{ + PX_SIMD_GUARD; + + LimitedResults limitedResults(results, maxResults, startIndex); + + TriangleMesh* tm = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + + switch(geom.getType()) + { + case PxGeometryType::eBOX: + { + const PxBoxGeometry& boxGeom = static_cast<const PxBoxGeometry&>(geom); + + Box box; + buildFrom(box, geomPose.p, boxGeom.halfExtents, geomPose.q); + + Midphase::intersectBoxVsMesh(box, *tm, meshPose, meshGeom.scale, &limitedResults); + break; + } + + case PxGeometryType::eCAPSULE: + { + const PxCapsuleGeometry& capsGeom = static_cast<const PxCapsuleGeometry&>(geom); + + Capsule capsule; + getCapsule(capsule, capsGeom, geomPose); + + Midphase::intersectCapsuleVsMesh(capsule, *tm, meshPose, meshGeom.scale, &limitedResults); + break; + } + + case PxGeometryType::eSPHERE: + { + const PxSphereGeometry& sphereGeom = static_cast<const PxSphereGeometry&>(geom); + Midphase::intersectSphereVsMesh(Sphere(geomPose.p, sphereGeom.radius), *tm, meshPose, meshGeom.scale, &limitedResults); + break; + } + + case PxGeometryType::ePLANE: + case PxGeometryType::eCONVEXMESH: + case PxGeometryType::eTRIANGLEMESH: + case PxGeometryType::eHEIGHTFIELD: + case PxGeometryType::eGEOMETRY_COUNT: + case PxGeometryType::eINVALID: + { + PX_CHECK_MSG(false, "findOverlapTriangleMesh: Only box, capsule and sphere geometries are supported."); + } + } + + overflow = limitedResults.mOverflow; + return limitedResults.mNbResults; +} + +/////////////////////////////////////////////////////////////////////////////// + +PxU32 physx::PxMeshQuery::findOverlapHeightField( const PxGeometry& geom, const PxTransform& geomPose, + const PxHeightFieldGeometry& hfGeom, const PxTransform& hfPose, + PxU32* results, PxU32 maxResults, PxU32 startIndex, bool& overflow) +{ + PX_SIMD_GUARD; + const PxTransform localPose0 = hfPose.transformInv(geomPose); + PxBoxGeometry boxGeom; + + switch(geom.getType()) + { + case PxGeometryType::eCAPSULE: + { + const PxCapsuleGeometry& cap = static_cast<const PxCapsuleGeometry&>(geom); + boxGeom.halfExtents = PxVec3(cap.halfHeight+cap.radius, cap.radius, cap.radius); + } + break; + case PxGeometryType::eSPHERE: + { + const PxSphereGeometry& sph = static_cast<const PxSphereGeometry&>(geom); + boxGeom.halfExtents = PxVec3(sph.radius, sph.radius, sph.radius); + } + break; + case PxGeometryType::eBOX: + boxGeom = static_cast<const PxBoxGeometry&>(geom); + break; + case PxGeometryType::ePLANE: + case PxGeometryType::eCONVEXMESH: + case PxGeometryType::eTRIANGLEMESH: + case PxGeometryType::eHEIGHTFIELD: + case PxGeometryType::eGEOMETRY_COUNT: + case PxGeometryType::eINVALID: + { + overflow = false; + PX_CHECK_AND_RETURN_VAL(false, "findOverlapHeightField: Only box, sphere and capsule queries are supported.", false); + } + } + + const bool isAABB = ((localPose0.q.x == 0.0f) && (localPose0.q.y == 0.0f) && (localPose0.q.z == 0.0f)); + + PxBounds3 bounds; + if (isAABB) + bounds = PxBounds3::centerExtents(localPose0.p, boxGeom.halfExtents); + else + bounds = PxBounds3::poseExtent(localPose0, boxGeom.halfExtents); // box.halfExtents is really extent + + HeightFieldUtil hfUtil(hfGeom); + HfTrianglesEntityReport2 entityReport(results, maxResults, startIndex, hfUtil, localPose0.p, boxGeom.halfExtents, localPose0.q, isAABB); + + hfUtil.overlapAABBTriangles(hfPose, bounds, 0, &entityReport); + overflow = entityReport.mOverflow; + return entityReport.mNbResults; +} + +/////////////////////////////////////////////////////////////////////////////// + +bool physx::PxMeshQuery::sweep( const PxVec3& unitDir, const PxReal maxDistance, + const PxGeometry& geom, const PxTransform& pose, + PxU32 triangleCount, const PxTriangle* triangles, + PxSweepHit& sweepHit, PxHitFlags hitFlags, + const PxU32* cachedIndex, const PxReal inflation, bool doubleSided) +{ + PX_SIMD_GUARD; + PX_CHECK_AND_RETURN_VAL(pose.isValid(), "PxMeshQuery::sweep(): pose is not valid.", false); + PX_CHECK_AND_RETURN_VAL(unitDir.isFinite(), "PxMeshQuery::sweep(): unitDir is not valid.", false); + PX_CHECK_AND_RETURN_VAL(PxIsFinite(maxDistance), "PxMeshQuery::sweep(): distance is not valid.", false); + PX_CHECK_AND_RETURN_VAL(maxDistance > 0, "PxMeshQuery::sweep(): sweep distance must be greater than 0.", false); + + PX_PROFILE_ZONE("MeshQuery.sweep", 0); + + const PxReal distance = PxMin(maxDistance, PX_MAX_SWEEP_DISTANCE); + + switch(geom.getType()) + { + case PxGeometryType::eSPHERE: + { + const PxSphereGeometry& sphereGeom = static_cast<const PxSphereGeometry&>(geom); + + // PT: TODO: technically this capsule with 0.0 half-height is invalid ("isValid" returns false) + const PxCapsuleGeometry capsuleGeom(sphereGeom.radius, 0.0f); + + return sweepCapsuleTriangles( triangleCount, triangles, doubleSided, capsuleGeom, pose, unitDir, distance, + sweepHit, cachedIndex, inflation, hitFlags); + } + + case PxGeometryType::eCAPSULE: + { + const PxCapsuleGeometry& capsuleGeom = static_cast<const PxCapsuleGeometry&>(geom); + + return sweepCapsuleTriangles( triangleCount, triangles, doubleSided, capsuleGeom, pose, unitDir, distance, + sweepHit, cachedIndex, inflation, hitFlags); + } + + case PxGeometryType::eBOX: + { + const PxBoxGeometry& boxGeom = static_cast<const PxBoxGeometry&>(geom); + + if(hitFlags & PxHitFlag::ePRECISE_SWEEP) + { + return sweepBoxTriangles_Precise( triangleCount, triangles, doubleSided, boxGeom, pose, unitDir, distance, sweepHit, cachedIndex, + inflation, hitFlags); + } + else + { + return sweepBoxTriangles( triangleCount, triangles, doubleSided, boxGeom, pose, unitDir, distance, sweepHit, cachedIndex, + inflation, hitFlags); + } + } + case PxGeometryType::ePLANE: + case PxGeometryType::eCONVEXMESH: + case PxGeometryType::eTRIANGLEMESH: + case PxGeometryType::eHEIGHTFIELD: + case PxGeometryType::eGEOMETRY_COUNT: + case PxGeometryType::eINVALID: + PX_CHECK_MSG(false, "PxMeshQuery::sweep(): geometry object parameter must be sphere, capsule or box geometry."); + } + return false; +} + +/////////////////////////////////////////////////////////////////////////////// + +// Exposing wrapper for Midphase::intersectOBB just for particles in order to avoid DelayLoad performance problem. This should be removed with particles in PhysX 3.5 (US16993) +void physx::Gu::intersectOBB_Particles(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned) +{ + Midphase::intersectOBB(mesh, obb, callback, bothTriangleSidesCollide, checkObbIsAligned); +} + +/////////////////////////////////////////////////////////////////////////////// diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseBV4.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseBV4.cpp new file mode 100644 index 00000000..1cab487f --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseBV4.cpp @@ -0,0 +1,999 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuBV4.h" +using namespace physx; +using namespace Gu; + +#include "PsVecMath.h" +using namespace physx::shdfnd::aos; + +#include "GuSweepMesh.h" +#include "GuBV4Build.h" +#include "GuBV4_Common.h" +#include "GuSphere.h" +#include "GuCapsule.h" +#include "GuBoxConversion.h" +#include "GuConvexUtilsInternal.h" +#include "GuVecTriangle.h" +#include "GuIntersectionTriangleBox.h" +#include "GuIntersectionCapsuleTriangle.h" +#include "GuIntersectionRayBox.h" +#include "PxTriangleMeshGeometry.h" +#include "CmScaling.h" +#include "GuTriangleMeshBV4.h" + +// This file contains code specific to the BV4 midphase. + +// PT: TODO: revisit/inline static sweep functions (TA34704) + +using namespace physx; +using namespace Gu; +using namespace Cm; + +#if PX_INTEL_FAMILY +Ps::IntBool BV4_RaycastSingle (const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxRaycastHit* PX_RESTRICT hit, float maxDist, float geomEpsilon, PxU32 flags, PxHitFlags hitFlags); +PxU32 BV4_RaycastAll (const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxRaycastHit* PX_RESTRICT hits, PxU32 maxNbHits, float maxDist, float geomEpsilon, PxU32 flags, PxHitFlags hitFlags); +void BV4_RaycastCB (const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, float maxDist, float geomEpsilon, PxU32 flags, MeshRayCallback callback, void* userData); + +Ps::IntBool BV4_OverlapSphereAny (const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned); +PxU32 BV4_OverlapSphereAll (const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow); +void BV4_OverlapSphereCB (const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshOverlapCallback callback, void* userData); + +Ps::IntBool BV4_OverlapBoxAny (const Box& box, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned); +PxU32 BV4_OverlapBoxAll (const Box& box, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow); +void BV4_OverlapBoxCB (const Box& box, const BV4Tree& tree, MeshOverlapCallback callback, void* userData); + +Ps::IntBool BV4_OverlapCapsuleAny (const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned); +PxU32 BV4_OverlapCapsuleAll (const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow); +void BV4_OverlapCapsuleCB (const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshOverlapCallback callback, void* userData); + +Ps::IntBool BV4_SphereSweepSingle (const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepHit* PX_RESTRICT hit, PxU32 flags); +void BV4_SphereSweepCB (const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting); + +Ps::IntBool BV4_BoxSweepSingle (const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepHit* PX_RESTRICT hit, PxU32 flags); +void BV4_BoxSweepCB (const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting); + +Ps::IntBool BV4_CapsuleSweepSingle (const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags); +Ps::IntBool BV4_CapsuleSweepSingleAA(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags); +void BV4_CapsuleSweepCB (const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags); +void BV4_CapsuleSweepAACB (const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags); + +void BV4_GenericSweepCB_Old (const PxVec3& origin, const PxVec3& extents, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshSweepCallback callback, void* userData); +void BV4_GenericSweepCB (const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, bool anyHit); + +static PX_FORCE_INLINE void setIdentity(PxMat44& m) +{ + m.column0 = PxVec4(1.0f, 0.0f, 0.0f, 0.0f); + m.column1 = PxVec4(0.0f, 1.0f, 0.0f, 0.0f); + m.column2 = PxVec4(0.0f, 0.0f, 1.0f, 0.0f); + m.column3 = PxVec4(0.0f, 0.0f, 0.0f, 1.0f); +} + +static PX_FORCE_INLINE void setRotation(PxMat44& m, const PxQuat& q) +{ + const PxReal x = q.x; + const PxReal y = q.y; + const PxReal z = q.z; + const PxReal w = q.w; + + const PxReal x2 = x + x; + const PxReal y2 = y + y; + const PxReal z2 = z + z; + + const PxReal xx = x2*x; + const PxReal yy = y2*y; + const PxReal zz = z2*z; + + const PxReal xy = x2*y; + const PxReal xz = x2*z; + const PxReal xw = x2*w; + + const PxReal yz = y2*z; + const PxReal yw = y2*w; + const PxReal zw = z2*w; + + m.column0 = PxVec4(1.0f - yy - zz, xy + zw, xz - yw, 0.0f); + m.column1 = PxVec4(xy - zw, 1.0f - xx - zz, yz + xw, 0.0f); + m.column2 = PxVec4(xz + yw, yz - xw, 1.0f - xx - yy, 0.0f); +} + +#define IEEE_1_0 0x3f800000 //!< integer representation of 1.0 +static PX_FORCE_INLINE const PxMat44* setupWorldMatrix(PxMat44& world, const float* meshPos, const float* meshRot) +{ +// world = PxMat44(PxIdentity); + setIdentity(world); + + bool isIdt = true; + if(meshRot) + { + const PxU32* Bin = reinterpret_cast<const PxU32*>(meshRot); + if(Bin[0]!=0 || Bin[1]!=0 || Bin[2]!=0 || Bin[3]!=IEEE_1_0) + { +// const PxQuat Q(meshRot[0], meshRot[1], meshRot[2], meshRot[3]); +// world = PxMat44(Q); + setRotation(world, PxQuat(meshRot[0], meshRot[1], meshRot[2], meshRot[3])); + isIdt = false; + } + } + + if(meshPos) + { + const PxU32* Bin = reinterpret_cast<const PxU32*>(meshPos); + if(Bin[0]!=0 || Bin[1]!=0 || Bin[2]!=0) + { +// world.setPosition(PxVec3(meshPos[0], meshPos[1], meshPos[2])); + world.column3.x = meshPos[0]; + world.column3.y = meshPos[1]; + world.column3.z = meshPos[2]; + isIdt = false; + } + } + return isIdt ? NULL : &world; +} + +static PX_FORCE_INLINE PxU32 setupFlags(bool anyHit, bool doubleSided, bool meshBothSides) +{ + PxU32 flags = 0; + if(anyHit) + flags |= QUERY_MODIFIER_ANY_HIT; + if(doubleSided) + flags |= QUERY_MODIFIER_DOUBLE_SIDED; + if(meshBothSides) + flags |= QUERY_MODIFIER_MESH_BOTH_SIDES; + return flags; +} + +static Ps::IntBool boxSweepVsMesh(SweepHit& h, const BV4Tree& tree, const float* meshPos, const float* meshRot, const Box& box, const PxVec3& dir, float maxDist, bool anyHit, bool doubleSided, bool meshBothSides) +{ + BV4_ALIGN16(PxMat44 World); + const PxMat44* TM = setupWorldMatrix(World, meshPos, meshRot); + + const PxU32 flags = setupFlags(anyHit, doubleSided, meshBothSides); + return BV4_BoxSweepSingle(box, dir, maxDist, tree, TM, &h, flags); +} + +static Ps::IntBool sphereSweepVsMesh(SweepHit& h, const BV4Tree& tree, const PxVec3& center, float radius, const PxVec3& dir, float maxDist, const PxMat44* TM, const PxU32 flags) +{ + // PT: TODO: avoid this copy (TA34704) + const Sphere tmp(center, radius); + + return BV4_SphereSweepSingle(tmp, dir, maxDist, tree, TM, &h, flags); +} + +static bool capsuleSweepVsMesh(SweepHit& h, const BV4Tree& tree, const Capsule& capsule, const PxVec3& dir, float maxDist, const PxMat44* TM, const PxU32 flags) +{ + Capsule localCapsule; + computeLocalCapsule(localCapsule, capsule, TM); + + // PT: TODO: optimize + PxVec3 localDir, unused; + computeLocalRay(localDir, unused, dir, dir, TM); + + const PxVec3 capsuleDir = localCapsule.p1 - localCapsule.p0; + PxU32 nbNullComponents = 0; + const float epsilon = 1e-3f; + if(PxAbs(capsuleDir.x)<epsilon) + nbNullComponents++; + if(PxAbs(capsuleDir.y)<epsilon) + nbNullComponents++; + if(PxAbs(capsuleDir.z)<epsilon) + nbNullComponents++; + + // PT: TODO: consider passing TM to BV4_CapsuleSweepSingleXX just to do the final transforms there instead + // of below. It would make the parameters slightly inconsistent (local input + world TM) but it might make + // the code better overall, more aligned with the "unlimited results" version. + Ps::IntBool status; + if(nbNullComponents==2) + { + status = BV4_CapsuleSweepSingleAA(localCapsule, localDir, maxDist, tree, &h, flags); + } + else + { + status = BV4_CapsuleSweepSingle(localCapsule, localDir, maxDist, tree, &h, flags); + } + if(status && TM) + { + h.mPos = TM->transform(h.mPos); + h.mNormal = TM->rotate(h.mNormal); + } + return status!=0; +} + +static PX_FORCE_INLINE void boxSweepVsMeshCBOld(const BV4Tree& tree, const float* meshPos, const float* meshRot, const PxVec3& center, const PxVec3& extents, const PxVec3& dir, float maxDist, MeshSweepCallback callback, void* userData) +{ + BV4_ALIGN16(PxMat44 World); + const PxMat44* TM = setupWorldMatrix(World, meshPos, meshRot); + + BV4_GenericSweepCB_Old(center, extents, dir, maxDist, tree, TM, callback, userData); +} + +// + +static PX_FORCE_INLINE bool raycastVsMesh(PxRaycastHit& hitData, const BV4Tree& tree, const float* meshPos, const float* meshRot, const PxVec3& orig, const PxVec3& dir, float maxDist, float geomEpsilon, bool doubleSided, PxHitFlags hitFlags) +{ + BV4_ALIGN16(PxMat44 World); + const PxMat44* TM = setupWorldMatrix(World, meshPos, meshRot); + + const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY; + const PxU32 flags = setupFlags(anyHit, doubleSided, false); + + if(!BV4_RaycastSingle(orig, dir, tree, TM, &hitData, maxDist, geomEpsilon, flags, hitFlags)) + return false; + + return true; +} + +/*static PX_FORCE_INLINE PxU32 raycastVsMeshAll(PxRaycastHit* hits, PxU32 maxNbHits, const BV4Tree& tree, const float* meshPos, const float* meshRot, const PxVec3& orig, const PxVec3& dir, float maxDist, float geomEpsilon, bool doubleSided, PxHitFlags hitFlags) +{ + BV4_ALIGN16(PxMat44 World); + const PxMat44* TM = setupWorldMatrix(World, meshPos, meshRot); + + const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY; + const PxU32 flags = setupFlags(anyHit, doubleSided, false); + + return BV4_RaycastAll(orig, dir, tree, TM, hits, maxNbHits, maxDist, geomEpsilon, flags, hitFlags); +}*/ + +static PX_FORCE_INLINE void raycastVsMeshCB(const BV4Tree& tree, const PxVec3& orig, const PxVec3& dir, float maxDist, float geomEpsilon, bool doubleSided, MeshRayCallback callback, void* userData) +{ + const PxU32 flags = setupFlags(false, doubleSided, false); + BV4_RaycastCB(orig, dir, tree, NULL, maxDist, geomEpsilon, flags, callback, userData); +} + +struct BV4RaycastCBParams +{ + PX_FORCE_INLINE BV4RaycastCBParams( PxRaycastHit* hits, PxU32 maxHits, const PxMeshScale* scale, const PxTransform* pose, + const Cm::Matrix34* world2vertexSkew, PxU32 hitFlags, + const PxVec3& rayDir, bool isDoubleSided, float distCoeff) : + mDstBase (hits), + mHitNum (0), + mMaxHits (maxHits), + mScale (scale), + mPose (pose), + mWorld2vertexSkew (world2vertexSkew), + mHitFlags (hitFlags), + mRayDir (rayDir), + mIsDoubleSided (isDoubleSided), + mDistCoeff (distCoeff) + { + } + + PxRaycastHit* mDstBase; + PxU32 mHitNum; + PxU32 mMaxHits; + const PxMeshScale* mScale; + const PxTransform* mPose; + const Cm::Matrix34* mWorld2vertexSkew; + PxU32 mHitFlags; + const PxVec3& mRayDir; + bool mIsDoubleSided; + float mDistCoeff; + +private: + BV4RaycastCBParams& operator=(const BV4RaycastCBParams&); +}; + +static PX_FORCE_INLINE PxVec3 processLocalNormal(const Cm::Matrix34* PX_RESTRICT world2vertexSkew, const PxTransform* PX_RESTRICT pose, const PxVec3& localNormal, const PxVec3& rayDir, const bool isDoubleSided) +{ + PxVec3 normal; + if(world2vertexSkew) + normal = world2vertexSkew->rotateTranspose(localNormal); + else + normal = pose->rotate(localNormal); + normal.normalize(); + + // PT: figure out correct normal orientation (DE7458) + // - if the mesh is single-sided the normal should be the regular triangle normal N, regardless of eMESH_BOTH_SIDES. + // - if the mesh is double-sided the correct normal can be either N or -N. We take the one opposed to ray direction. + if(isDoubleSided && normal.dot(rayDir) > 0.0f) + normal = -normal; + return normal; +} + +static HitCode gRayCallback(void* userData, const PxVec3& lp0, const PxVec3& lp1, const PxVec3& lp2, PxU32 triangleIndex, float dist, float u, float v) +{ + BV4RaycastCBParams* params = reinterpret_cast<BV4RaycastCBParams*>(userData); + +//const bool last = params->mHitNum == params->mMaxHits; + + //not worth concatenating to do 1 transform: PxMat34Legacy vertex2worldSkew = scaling.getVertex2WorldSkew(absPose); + // PT: TODO: revisit this for N hits + PX_ALIGN_PREFIX(16) char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16); + PxRaycastHit& hit = reinterpret_cast<PxRaycastHit&>(buffer); +//PxRaycastHit& hit = last ? (PxRaycastHit&)buffer : params->mDstBase[params->mHitNum]; + + hit.distance = dist * params->mDistCoeff; + hit.u = u; + hit.v = v; + hit.faceIndex = triangleIndex; + + PxVec3 localImpact = (1.0f - u - v)*lp0 + u*lp1 + v*lp2; + if(params->mWorld2vertexSkew) + { + localImpact = params->mScale->transform(localImpact); + if(params->mScale->hasNegativeDeterminant()) + Ps::swap<PxReal>(hit.u, hit.v); // have to swap the UVs though since they were computed in mesh local space + } + + hit.position = params->mPose->transform(localImpact); + hit.flags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE|PxHitFlag::eUV|PxHitFlag::eFACE_INDEX; + + PxVec3 normal(0.0f); + // Compute additional information if needed + if(params->mHitFlags & PxHitFlag::eNORMAL) + { + const PxVec3 localNormal = (lp1 - lp0).cross(lp2 - lp0); + normal = processLocalNormal(params->mWorld2vertexSkew, params->mPose, localNormal, params->mRayDir, params->mIsDoubleSided); + hit.flags |= PxHitFlag::eNORMAL; + } + hit.normal = normal; + + // PT: no callback => store results in provided buffer + if(params->mHitNum == params->mMaxHits) +// if(last) + return HIT_EXIT; + + params->mDstBase[params->mHitNum++] = hit; +// params->mHitNum++; + + return HIT_NONE; +} + +PxU32 physx::Gu::raycast_triangleMesh_BV4( const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose, + const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist, + PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits) +{ + PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34); + const BV4TriangleMesh* meshData = static_cast<const BV4TriangleMesh*>(mesh); + + const bool multipleHits = (maxHits > 1); + const bool idtScale = meshGeom.scale.isIdentity(); + + const bool isDoubleSided = meshGeom.meshFlags.isSet(PxMeshGeometryFlag::eDOUBLE_SIDED); + const bool bothSides = isDoubleSided || (hitFlags & PxHitFlag::eMESH_BOTH_SIDES); + + const BV4Tree& tree = static_cast<const BV4TriangleMesh*>(meshData)->getBV4Tree(); + if(idtScale && !multipleHits) + { + bool b = raycastVsMesh(*hits, tree, &pose.p.x, &pose.q.x, rayOrigin, rayDir, maxDist, meshData->getGeomEpsilon(), bothSides, hitFlags); + if(b) + { + PxHitFlags dstFlags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE|PxHitFlag::eUV|PxHitFlag::eFACE_INDEX; + + // PT: TODO: pass flags to BV4 code (TA34704) + if(hitFlags & PxHitFlag::eNORMAL) + { + dstFlags |= PxHitFlag::eNORMAL; + if(isDoubleSided) + { + PxVec3 normal = hits->normal; + // PT: figure out correct normal orientation (DE7458) + // - if the mesh is single-sided the normal should be the regular triangle normal N, regardless of eMESH_BOTH_SIDES. + // - if the mesh is double-sided the correct normal can be either N or -N. We take the one opposed to ray direction. + if(normal.dot(rayDir) > 0.0f) + normal = -normal; + hits->normal = normal; + } + } + else + { + hits->normal = PxVec3(0.0f); + } + hits->flags = dstFlags; + } + return PxU32(b); + } + +/* + if(idtScale && multipleHits) + { + PxU32 nbHits = raycastVsMeshAll(hits, maxHits, tree, &pose.p.x, &pose.q.x, rayOrigin, rayDir, maxDist, meshData->getGeomEpsilon(), bothSides, hitFlags); + + return nbHits; + } +*/ + + //scaling: transform the ray to vertex space + PxVec3 orig, dir; + Cm::Matrix34 world2vertexSkew; + Cm::Matrix34* world2vertexSkewP = NULL; + PxReal distCoeff = 1.0f; + if(idtScale) + { + orig = pose.transformInv(rayOrigin); + dir = pose.rotateInv(rayDir); + } + else + { + world2vertexSkew = meshGeom.scale.getInverse() * pose.getInverse(); + world2vertexSkewP = &world2vertexSkew; + orig = world2vertexSkew.transform(rayOrigin); + dir = world2vertexSkew.rotate(rayDir); + { + distCoeff = dir.normalize(); + maxDist *= distCoeff; + maxDist += 1e-3f; + distCoeff = 1.0f/distCoeff; + } + } + + if(!multipleHits) + { + bool b = raycastVsMesh(*hits, tree, NULL, NULL, orig, dir, maxDist, meshData->getGeomEpsilon(), bothSides, hitFlags); + if(b) + { + hits->distance *= distCoeff; + hits->position = pose.transform(meshGeom.scale.transform(hits->position)); + PxHitFlags dstFlags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE|PxHitFlag::eUV|PxHitFlag::eFACE_INDEX; + + if(meshGeom.scale.hasNegativeDeterminant()) + Ps::swap<PxReal>(hits->u, hits->v); // have to swap the UVs though since they were computed in mesh local space + + // PT: TODO: pass flags to BV4 code (TA34704) + // Compute additional information if needed + if(hitFlags & PxHitFlag::eNORMAL) + { + dstFlags |= PxHitFlag::eNORMAL; + hits->normal = processLocalNormal(world2vertexSkewP, &pose, hits->normal, rayDir, isDoubleSided); + } + else + { + hits->normal = PxVec3(0.0f); + } + hits->flags = dstFlags; + } + return PxU32(b); + } + + BV4RaycastCBParams callback(hits, maxHits, &meshGeom.scale, &pose, world2vertexSkewP, hitFlags, rayDir, isDoubleSided, distCoeff); + + raycastVsMeshCB( static_cast<const BV4TriangleMesh*>(meshData)->getBV4Tree(), + orig, dir, + maxDist, meshData->getGeomEpsilon(), bothSides, + gRayCallback, &callback); + return callback.mHitNum; +} + +namespace +{ +struct IntersectShapeVsMeshCallback +{ + IntersectShapeVsMeshCallback(LimitedResults* results, bool flipNormal) : mResults(results), mAnyHits(false), mFlipNormal(flipNormal) {} + + LimitedResults* mResults; + bool mAnyHits; + bool mFlipNormal; + + PX_FORCE_INLINE bool recordHit(PxU32 faceIndex, Ps::IntBool hit) + { + if(hit) + { + mAnyHits = true; + if(mResults) + mResults->add(faceIndex); + else + return false; // abort traversal if we are only interested in firstContact (mResults is NULL) + } + return true; // if we are here, either no triangles were hit or multiple results are expected => continue traversal + } +}; + +// PT: TODO: get rid of this (TA34704) +struct IntersectSphereVsMeshCallback : IntersectShapeVsMeshCallback +{ + PX_FORCE_INLINE IntersectSphereVsMeshCallback(const PxMeshScale& meshScale, const PxTransform& meshTransform, const Sphere& sphere, LimitedResults* r, bool flipNormal) + : IntersectShapeVsMeshCallback(r, flipNormal) + { + mVertexToShapeSkew = meshScale.toMat33(); + mLocalCenter = meshTransform.transformInv(sphere.center); // sphereCenterInMeshSpace + mSphereRadius2 = sphere.radius*sphere.radius; + } + + PxMat33 mVertexToShapeSkew; + PxVec3 mLocalCenter; // PT: sphere center in local/mesh space + PxF32 mSphereRadius2; + + PX_FORCE_INLINE PxAgain processHit(PxU32 faceIndex, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2) + { + const Vec3V v0 = V3LoadU(mVertexToShapeSkew * av0); + const Vec3V v1 = V3LoadU(mVertexToShapeSkew * (mFlipNormal ? av2 : av1)); + const Vec3V v2 = V3LoadU(mVertexToShapeSkew * (mFlipNormal ? av1 : av2)); + + FloatV dummy1, dummy2; + Vec3V closestP; + PxReal dist2; + FStore(distancePointTriangleSquared(V3LoadU(mLocalCenter), v0, v1, v2, dummy1, dummy2, closestP), &dist2); + return recordHit(faceIndex, dist2 <= mSphereRadius2); + } +}; + +// PT: TODO: get rid of this (TA34704) +struct IntersectCapsuleVsMeshCallback : IntersectShapeVsMeshCallback +{ + PX_FORCE_INLINE IntersectCapsuleVsMeshCallback(const PxMeshScale& meshScale, const PxTransform& meshTransform, const Capsule& capsule, LimitedResults* r, bool flipNormal) + : IntersectShapeVsMeshCallback(r, flipNormal) + { + mVertexToShapeSkew = meshScale.toMat33(); + + // transform world capsule to mesh shape space + mLocalCapsule.p0 = meshTransform.transformInv(capsule.p0); + mLocalCapsule.p1 = meshTransform.transformInv(capsule.p1); + mLocalCapsule.radius = capsule.radius; + mParams.init(mLocalCapsule); + } + + PxMat33 mVertexToShapeSkew; + Capsule mLocalCapsule; // PT: capsule in mesh/local space + CapsuleTriangleOverlapData mParams; + + PX_FORCE_INLINE PxAgain processHit(PxU32 faceIndex, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2) + { + const PxVec3 v0 = mVertexToShapeSkew * av0; + const PxVec3 v1 = mVertexToShapeSkew * (mFlipNormal ? av2 : av1); + const PxVec3 v2 = mVertexToShapeSkew * (mFlipNormal ? av1 : av2); + const PxVec3 normal = (v0 - v1).cross(v0 - v2); + bool hit = intersectCapsuleTriangle(normal, v0, v1, v2, mLocalCapsule, mParams); + return recordHit(faceIndex, hit); + } +}; + +// PT: TODO: get rid of this (TA34704) +struct IntersectBoxVsMeshCallback : IntersectShapeVsMeshCallback +{ + PX_FORCE_INLINE IntersectBoxVsMeshCallback(const PxMeshScale& meshScale, const PxTransform& meshTransform, const Box& box, LimitedResults* r, bool flipNormal) + : IntersectShapeVsMeshCallback(r, flipNormal) + { + const PxMat33 vertexToShapeSkew = meshScale.toMat33(); + + // mesh scale needs to be included - inverse transform and optimize the box + const PxMat33 vertexToWorldSkew_Rot = PxMat33Padded(meshTransform.q) * vertexToShapeSkew; + const PxVec3& vertexToWorldSkew_Trans = meshTransform.p; + + Matrix34 tmp; + buildMatrixFromBox(tmp, box); + const Matrix34 inv = tmp.getInverseRT(); + const Matrix34 _vertexToWorldSkew(vertexToWorldSkew_Rot, vertexToWorldSkew_Trans); + + mVertexToBox = inv * _vertexToWorldSkew; + mBoxCenter = PxVec3(0.0f); + mBoxExtents = box.extents; // extents do not change + } + + Matrix34 mVertexToBox; + Vec3p mBoxExtents, mBoxCenter; + + PX_FORCE_INLINE PxAgain processHit(PxU32 faceIndex, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2) + { + const Vec3p v0 = mVertexToBox.transform(av0); + const Vec3p v1 = mVertexToBox.transform(mFlipNormal ? av2 : av1); + const Vec3p v2 = mVertexToBox.transform(mFlipNormal ? av1 : av2); + + // PT: this one is safe because we're using Vec3p for all parameters + const Ps::IntBool hit = intersectTriangleBox_Unsafe(mBoxCenter, mBoxExtents, v0, v1, v2); + return recordHit(faceIndex, hit); + } +}; +} + +static bool gSphereVsMeshCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* /*vertexIndices*/) +{ + IntersectSphereVsMeshCallback* callback = reinterpret_cast<IntersectSphereVsMeshCallback*>(userData); + return !callback->processHit(triangleIndex, p0, p1, p2); +} + +static bool gCapsuleVsMeshCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* /*vertexIndices*/) +{ + IntersectCapsuleVsMeshCallback* callback = reinterpret_cast<IntersectCapsuleVsMeshCallback*>(userData); + return !callback->processHit(triangleIndex, p0, p1, p2); +} + +static bool gBoxVsMeshCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* /*vertexIndices*/) +{ + IntersectBoxVsMeshCallback* callback = reinterpret_cast<IntersectBoxVsMeshCallback*>(userData); + return !callback->processHit(triangleIndex, p0, p1, p2); +} + +bool physx::Gu::intersectSphereVsMesh_BV4(const Sphere& sphere, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) +{ + PX_ASSERT(triMesh.getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34); + const BV4Tree& tree = static_cast<const BV4TriangleMesh&>(triMesh).getBV4Tree(); + + if(meshScale.isIdentity()) + { + BV4_ALIGN16(PxMat44 World); + const PxMat44* TM = setupWorldMatrix(World, &meshTransform.p.x, &meshTransform.q.x); + if(results) + { + const PxU32 nbResults = BV4_OverlapSphereAll(sphere, tree, TM, results->mResults, results->mMaxResults, results->mOverflow); + results->mNbResults = nbResults; + return nbResults!=0; + } + else + { + return BV4_OverlapSphereAny(sphere, tree, TM)!=0; + } + } + else + { + // PT: TODO: we don't need to use this callback here (TA34704) + IntersectSphereVsMeshCallback callback(meshScale, meshTransform, sphere, results, meshScale.hasNegativeDeterminant()); + + const Box worldOBB_(sphere.center, PxVec3(sphere.radius), PxMat33(PxIdentity)); + Box vertexOBB; + computeVertexSpaceOBB(vertexOBB, worldOBB_, meshTransform, meshScale); + + BV4_OverlapBoxCB(vertexOBB, tree, gSphereVsMeshCallback, &callback); + return callback.mAnyHits; + } +} + +bool physx::Gu::intersectBoxVsMesh_BV4(const Box& box, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) +{ + PX_ASSERT(triMesh.getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34); + const BV4Tree& tree = static_cast<const BV4TriangleMesh&>(triMesh).getBV4Tree(); + + if(meshScale.isIdentity()) + { + BV4_ALIGN16(PxMat44 World); + const PxMat44* TM = setupWorldMatrix(World, &meshTransform.p.x, &meshTransform.q.x); + if(results) + { + const PxU32 nbResults = BV4_OverlapBoxAll(box, tree, TM, results->mResults, results->mMaxResults, results->mOverflow); + results->mNbResults = nbResults; + return nbResults!=0; + } + else + { + return BV4_OverlapBoxAny(box, tree, TM)!=0; + } + } + else + { + // PT: TODO: we don't need to use this callback here (TA34704) + IntersectBoxVsMeshCallback callback(meshScale, meshTransform, box, results, meshScale.hasNegativeDeterminant()); + + Box vertexOBB; // query box in vertex space + computeVertexSpaceOBB(vertexOBB, box, meshTransform, meshScale); + + BV4_OverlapBoxCB(vertexOBB, tree, gBoxVsMeshCallback, &callback); + return callback.mAnyHits; + } +} + +bool physx::Gu::intersectCapsuleVsMesh_BV4(const Capsule& capsule, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) +{ + PX_ASSERT(triMesh.getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34); + const BV4Tree& tree = static_cast<const BV4TriangleMesh&>(triMesh).getBV4Tree(); + + if(meshScale.isIdentity()) + { + BV4_ALIGN16(PxMat44 World); + const PxMat44* TM = setupWorldMatrix(World, &meshTransform.p.x, &meshTransform.q.x); + if(results) + { + const PxU32 nbResults = BV4_OverlapCapsuleAll(capsule, tree, TM, results->mResults, results->mMaxResults, results->mOverflow); + results->mNbResults = nbResults; + return nbResults!=0; + } + else + { + return BV4_OverlapCapsuleAny(capsule, tree, TM)!=0; + } + } + else + { + // PT: TODO: we don't need to use this callback here (TA34704) + IntersectCapsuleVsMeshCallback callback(meshScale, meshTransform, capsule, results, meshScale.hasNegativeDeterminant()); + + // make vertex space OBB + Box vertexOBB; + Box worldOBB_; + worldOBB_.create(capsule); // AP: potential optimization (meshTransform.inverse is already in callback.mCapsule) + computeVertexSpaceOBB(vertexOBB, worldOBB_, meshTransform, meshScale); + + BV4_OverlapBoxCB(vertexOBB, tree, gCapsuleVsMeshCallback, &callback); + return callback.mAnyHits; + } +} + +// PT: TODO: get rid of this (TA34704) +static bool gVolumeCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* vertexIndices) +{ + MeshHitCallback<PxRaycastHit>* callback = reinterpret_cast<MeshHitCallback<PxRaycastHit>*>(userData); + PX_ALIGN_PREFIX(16) char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16); + PxRaycastHit& hit = reinterpret_cast<PxRaycastHit&>(buffer); + hit.faceIndex = triangleIndex; + PxReal dummy; + return !callback->processHit(hit, p0, p1, p2, dummy, vertexIndices); +} + +void physx::Gu::intersectOBB_BV4(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned) +{ + PX_UNUSED(checkObbIsAligned); + PX_UNUSED(bothTriangleSidesCollide); + BV4_OverlapBoxCB(obb, static_cast<const BV4TriangleMesh*>(mesh)->getBV4Tree(), gVolumeCallback, &callback); +} + + + + +#include "GuVecCapsule.h" +#include "GuSweepMTD.h" + +static bool gCapsuleMeshSweepCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, /*const PxU32* vertexIndices,*/ float& dist) +{ + SweepCapsuleMeshHitCallback* callback = reinterpret_cast<SweepCapsuleMeshHitCallback*>(userData); + PxRaycastHit meshHit; + meshHit.faceIndex = triangleIndex; + return !callback->SweepCapsuleMeshHitCallback::processHit(meshHit, p0, p1, p2, dist, NULL/*vertexIndices*/); +} + +// PT: TODO: refactor/share bits of this (TA34704) +bool physx::Gu::sweepCapsule_MeshGeom_BV4( const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Capsule& lss, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation) +{ + PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34); + const BV4TriangleMesh* meshData = static_cast<const BV4TriangleMesh*>(mesh); + + const Capsule inflatedCapsule(lss.p0, lss.p1, lss.radius + inflation); + + const bool isIdentity = triMeshGeom.scale.isIdentity(); + bool isDoubleSided = (triMeshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED); + const PxU32 meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES; + + if(isIdentity) + { + const BV4Tree& tree = meshData->getBV4Tree(); + const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY; + + BV4_ALIGN16(PxMat44 World); + const PxMat44* TM = setupWorldMatrix(World, &pose.p.x, &pose.q.x); + + const PxU32 flags = setupFlags(anyHit, isDoubleSided, meshBothSides!=0); + + SweepHit hitData; + if(lss.p0==lss.p1) + { + if(!sphereSweepVsMesh(hitData, tree, inflatedCapsule.p0, inflatedCapsule.radius, unitDir, distance, TM, flags)) + return false; + } + else + { + if(!capsuleSweepVsMesh(hitData, tree, inflatedCapsule, unitDir, distance, TM, flags)) + return false; + } + + sweepHit.distance = hitData.mDistance; + sweepHit.position = hitData.mPos; + sweepHit.normal = hitData.mNormal; + sweepHit.faceIndex = hitData.mTriangleID; + + if(hitData.mDistance==0.0f) + { + sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::eNORMAL; + + if(meshBothSides) + isDoubleSided = true; + + // PT: TODO: consider using 'setInitialOverlapResults' here + bool hasContacts = false; + if(hitFlags & PxHitFlag::eMTD) + { + const Vec3V p0 = V3LoadU(inflatedCapsule.p0); + const Vec3V p1 = V3LoadU(inflatedCapsule.p1); + const FloatV radius = FLoad(lss.radius); + CapsuleV capsuleV; + capsuleV.initialize(p0, p1, radius); + + //we need to calculate the MTD + hasContacts = computeCapsule_TriangleMeshMTD(triMeshGeom, pose, capsuleV, inflatedCapsule.radius, isDoubleSided, sweepHit); + } + setupSweepHitForMTD(sweepHit, hasContacts, unitDir); + } + else + sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::ePOSITION | PxHitFlag::eNORMAL | PxHitFlag::eFACE_INDEX; + return true; + } + + // compute sweptAABB + const PxVec3 localP0 = pose.transformInv(inflatedCapsule.p0); + const PxVec3 localP1 = pose.transformInv(inflatedCapsule.p1); + PxVec3 sweepOrigin = (localP0+localP1)*0.5f; + PxVec3 sweepDir = pose.rotateInv(unitDir); + PxVec3 sweepExtents = PxVec3(inflatedCapsule.radius) + (localP0-localP1).abs()*0.5f; + PxReal distance1 = distance; + PxReal distCoef = 1.0f; + Matrix34 poseWithScale; + if(!isIdentity) + { + poseWithScale = pose * triMeshGeom.scale; + distance1 = computeSweepData(triMeshGeom, sweepOrigin, sweepExtents, sweepDir, distance); + distCoef = distance1 / distance; + } else + poseWithScale = Matrix34(pose); + + SweepCapsuleMeshHitCallback callback(sweepHit, poseWithScale, distance, isDoubleSided, inflatedCapsule, unitDir, hitFlags, triMeshGeom.scale.hasNegativeDeterminant(), distCoef); + + boxSweepVsMeshCBOld(meshData->getBV4Tree(), NULL, NULL, sweepOrigin, sweepExtents, sweepDir, distance1, gCapsuleMeshSweepCallback, &callback); + + if(meshBothSides) + isDoubleSided = true; + + return callback.finalizeHit(sweepHit, inflatedCapsule, triMeshGeom, pose, isDoubleSided); +} + +#include "GuSweepSharedTests.h" +static bool gBoxMeshSweepCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, /*const PxU32* vertexIndices,*/ float& dist) +{ + SweepBoxMeshHitCallback* callback = reinterpret_cast<SweepBoxMeshHitCallback*>(userData); + PxRaycastHit meshHit; + meshHit.faceIndex = triangleIndex; + return !callback->SweepBoxMeshHitCallback::processHit(meshHit, p0, p1, p2, dist, NULL/*vertexIndices*/); +} + +// PT: TODO: refactor/share bits of this (TA34704) +bool physx::Gu::sweepBox_MeshGeom_BV4( const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Box& box, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation) +{ + PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34); + const BV4TriangleMesh* meshData = static_cast<const BV4TriangleMesh*>(mesh); + + const bool isIdentity = triMeshGeom.scale.isIdentity(); + + const bool meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES; + const bool isDoubleSided = triMeshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED; + + if(isIdentity && inflation==0.0f) + { + const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY; + + // PT: TODO: this is wrong, we shouldn't actually sweep the inflated version +// const PxVec3 inflated = (box.extents + PxVec3(inflation)) * 1.01f; + // PT: TODO: avoid this copy +// const Box tmp(box.center, inflated, box.rot); + + SweepHit hitData; +// if(!boxSweepVsMesh(hitData, meshData->getBV4Tree(), &pose.p.x, &pose.q.x, tmp, unitDir, distance, anyHit, isDoubleSided, meshBothSides)) + if(!boxSweepVsMesh(hitData, meshData->getBV4Tree(), &pose.p.x, &pose.q.x, box, unitDir, distance, anyHit, isDoubleSided, meshBothSides)) + return false; + + sweepHit.distance = hitData.mDistance; + sweepHit.position = hitData.mPos; + sweepHit.normal = hitData.mNormal; + sweepHit.faceIndex = hitData.mTriangleID; + + if(hitData.mDistance==0.0f) + { + sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::eNORMAL; + + const bool bothTriangleSidesCollide = isDoubleSided || meshBothSides; + const PxTransform boxTransform = box.getTransform(); + + bool hasContacts = false; + if(hitFlags & PxHitFlag::eMTD) + hasContacts = computeBox_TriangleMeshMTD(triMeshGeom, pose, box, boxTransform, inflation, bothTriangleSidesCollide, sweepHit); + + setupSweepHitForMTD(sweepHit, hasContacts, unitDir); + } + else + { + sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::ePOSITION | PxHitFlag::eNORMAL | PxHitFlag::eFACE_INDEX; + } + return true; + } + + // PT: TODO: revisit this codepath, we don't need to sweep an AABB all the time (TA34704) + + Matrix34 meshToWorldSkew; + PxVec3 sweptAABBMeshSpaceExtents, meshSpaceOrigin, meshSpaceDir; + + // Input sweep params: geom, pose, box, unitDir, distance + // We convert the origin from world space to mesh local space + // and convert the box+pose to mesh space AABB + if(isIdentity) + { + meshToWorldSkew = Matrix34(pose); + PxMat33 worldToMeshRot(pose.q.getConjugate()); // extract rotation matrix from pose.q + meshSpaceOrigin = worldToMeshRot.transform(box.center - pose.p); + meshSpaceDir = worldToMeshRot.transform(unitDir) * distance; + PxMat33 boxToMeshRot = worldToMeshRot * box.rot; + sweptAABBMeshSpaceExtents = boxToMeshRot.column0.abs() * box.extents.x + + boxToMeshRot.column1.abs() * box.extents.y + + boxToMeshRot.column2.abs() * box.extents.z; + } + else + { + meshToWorldSkew = pose * triMeshGeom.scale; + const PxMat33 meshToWorldSkew_Rot = PxMat33Padded(pose.q) * triMeshGeom.scale.toMat33(); + const PxVec3& meshToWorldSkew_Trans = pose.p; + + PxMat33 worldToVertexSkew_Rot; + PxVec3 worldToVertexSkew_Trans; + getInverse(worldToVertexSkew_Rot, worldToVertexSkew_Trans, meshToWorldSkew_Rot, meshToWorldSkew_Trans); + + //make vertex space OBB + Box vertexSpaceBox1; + const Matrix34 worldToVertexSkew(worldToVertexSkew_Rot, worldToVertexSkew_Trans); + vertexSpaceBox1 = transform(worldToVertexSkew, box); + // compute swept aabb + sweptAABBMeshSpaceExtents = vertexSpaceBox1.computeAABBExtent(); + + meshSpaceOrigin = worldToVertexSkew.transform(box.center); + meshSpaceDir = worldToVertexSkew.rotate(unitDir*distance); // also applies scale to direction/length + } + + sweptAABBMeshSpaceExtents += PxVec3(inflation); // inflate the bounds with additive inflation + sweptAABBMeshSpaceExtents *= 1.01f; // fatten the bounds to account for numerical discrepancies + + PxReal dirLen = PxMax(meshSpaceDir.magnitude(), 1e-5f); + PxReal distCoeff = 1.0f; + if (!isIdentity) + distCoeff = dirLen / distance; + + // Move to AABB space + Matrix34 worldToBox; + computeWorldToBoxMatrix(worldToBox, box); + + const bool bothTriangleSidesCollide = isDoubleSided || meshBothSides; + + const Matrix34Padded meshToBox = worldToBox*meshToWorldSkew; + const PxTransform boxTransform = box.getTransform(); // PT: TODO: this is not needed when there's no hit (TA34704) + + const PxVec3 localDir = worldToBox.rotate(unitDir); + const PxVec3 localDirDist = localDir*distance; + SweepBoxMeshHitCallback callback( // using eMULTIPLE with shrinkMaxT + CallbackMode::eMULTIPLE, meshToBox, distance, bothTriangleSidesCollide, box, localDirDist, localDir, unitDir, hitFlags, inflation, triMeshGeom.scale.hasNegativeDeterminant(), distCoeff); + + const PxVec3 dir = meshSpaceDir/dirLen; + boxSweepVsMeshCBOld(meshData->getBV4Tree(), NULL, NULL, meshSpaceOrigin, sweptAABBMeshSpaceExtents, dir, dirLen, gBoxMeshSweepCallback, &callback); + + return callback.finalizeHit(sweepHit, triMeshGeom, pose, boxTransform, localDir, meshBothSides, isDoubleSided); +} + +static bool gConvexVsMeshSweepCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, /*const PxU32* vertexIndices,*/ float& dist) +{ + SweepConvexMeshHitCallback* callback = reinterpret_cast<SweepConvexMeshHitCallback*>(userData); + PX_ALIGN_PREFIX(16) char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16); + PxRaycastHit& hit = reinterpret_cast<PxRaycastHit&>(buffer); + hit.faceIndex = triangleIndex; + return !callback->SweepConvexMeshHitCallback::processHit(hit, p0, p1, p2, dist, NULL/*vertexIndices*/); +} + +void physx::Gu::sweepConvex_MeshGeom_BV4(const TriangleMesh* mesh, const Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit) +{ + PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34); + const BV4TriangleMesh* meshData = static_cast<const BV4TriangleMesh*>(mesh); + BV4_GenericSweepCB(hullBox, localDir, distance, meshData->getBV4Tree(), gConvexVsMeshSweepCallback, &callback, anyHit); +} + +#endif + diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseInterface.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseInterface.h new file mode 100644 index 00000000..b7cab6ef --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseInterface.h @@ -0,0 +1,420 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_MIDPHASE_INTERFACE_H +#define GU_MIDPHASE_INTERFACE_H + +#include "GuOverlapTests.h" +#include "GuRaycastTests.h" +#include "GuTriangleMesh.h" +#include "PsVecMath.h" + +// PT: this file contains the common interface for all midphase implementations. Specifically the Midphase namespace contains the +// midphase-related entry points, dispatching calls to the proper implementations depending on the triangle mesh's type. The rest of it +// is simply classes & structs shared by all implementations. + +namespace physx +{ + class PxMeshScale; + class PxTriangleMeshGeometry; +namespace Cm +{ + class Matrix34; + class FastVertex2ShapeScaling; +} + +namespace Gu +{ + struct ConvexHullData; + + struct CallbackMode { enum Enum { eANY, eCLOSEST, eMULTIPLE }; }; + + template<typename HitType> + struct MeshHitCallback + { + CallbackMode::Enum mode; + + MeshHitCallback(CallbackMode::Enum aMode) : mode(aMode) {} + + PX_FORCE_INLINE bool inAnyMode() const { return mode == CallbackMode::eANY; } + PX_FORCE_INLINE bool inClosestMode() const { return mode == CallbackMode::eCLOSEST; } + PX_FORCE_INLINE bool inMultipleMode() const { return mode == CallbackMode::eMULTIPLE; } + + virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position + const HitType& hit, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, PxReal& shrunkMaxT, const PxU32* vIndices) = 0; + + virtual ~MeshHitCallback() {} + }; + + struct SweepConvexMeshHitCallback; + + struct LimitedResults + { + PxU32* mResults; + PxU32 mNbResults; + PxU32 mMaxResults; + PxU32 mStartIndex; + PxU32 mNbSkipped; + bool mOverflow; + + PX_FORCE_INLINE LimitedResults(PxU32* results, PxU32 maxResults, PxU32 startIndex) + : mResults(results), mMaxResults(maxResults), mStartIndex(startIndex) + { + reset(); + } + + PX_FORCE_INLINE void reset() + { + mNbResults = 0; + mNbSkipped = 0; + mOverflow = false; + } + + PX_FORCE_INLINE bool add(PxU32 index) + { + if(mNbResults>=mMaxResults) + { + mOverflow = true; + return false; + } + + if(mNbSkipped>=mStartIndex) + mResults[mNbResults++] = index; + else + mNbSkipped++; + + return true; + } + }; + + // Exposing wrapper for Midphase::intersectOBB just for particles in order to avoid DelayLoad performance problem. This should be removed with particles in PhysX 3.5 (US16993) + PX_PHYSX_COMMON_API void intersectOBB_Particles(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned = true); + + // RTree forward declarations + PX_PHYSX_COMMON_API PxU32 raycast_triangleMesh_RTREE(const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose, + const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist, + PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits); + PX_PHYSX_COMMON_API bool intersectSphereVsMesh_RTREE(const Sphere& sphere, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + PX_PHYSX_COMMON_API bool intersectBoxVsMesh_RTREE (const Box& box, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + PX_PHYSX_COMMON_API bool intersectCapsuleVsMesh_RTREE(const Capsule& capsule, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + PX_PHYSX_COMMON_API void intersectOBB_RTREE(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned); + PX_PHYSX_COMMON_API bool sweepCapsule_MeshGeom_RTREE( const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Gu::Capsule& lss, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation); + PX_PHYSX_COMMON_API bool sweepBox_MeshGeom_RTREE( const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Gu::Box& box, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation); + PX_PHYSX_COMMON_API void sweepConvex_MeshGeom_RTREE(const TriangleMesh* mesh, const Gu::Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit); + +#if PX_INTEL_FAMILY + // BV4 forward declarations + PX_PHYSX_COMMON_API PxU32 raycast_triangleMesh_BV4( const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose, + const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist, + PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits); + PX_PHYSX_COMMON_API bool intersectSphereVsMesh_BV4 (const Sphere& sphere, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + PX_PHYSX_COMMON_API bool intersectBoxVsMesh_BV4 (const Box& box, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + PX_PHYSX_COMMON_API bool intersectCapsuleVsMesh_BV4 (const Capsule& capsule, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + PX_PHYSX_COMMON_API void intersectOBB_BV4(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned); + PX_PHYSX_COMMON_API bool sweepCapsule_MeshGeom_BV4( const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Gu::Capsule& lss, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation); + PX_PHYSX_COMMON_API bool sweepBox_MeshGeom_BV4( const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Gu::Box& box, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation); + PX_PHYSX_COMMON_API void sweepConvex_MeshGeom_BV4(const TriangleMesh* mesh, const Gu::Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit); +#endif + + typedef PxU32 (*MidphaseRaycastFunction)( const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose, + const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist, + PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits); + + typedef bool (*MidphaseSphereOverlapFunction) (const Sphere& sphere, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + typedef bool (*MidphaseBoxOverlapFunction) (const Box& box, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + typedef bool (*MidphaseCapsuleOverlapFunction) (const Capsule& capsule, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results); + typedef void (*MidphaseBoxCBOverlapFunction) (const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned); + + typedef bool (*MidphaseCapsuleSweepFunction)( const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Gu::Capsule& lss, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation); + typedef bool (*MidphaseBoxSweepFunction)( const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Gu::Box& box, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation); + typedef void (*MidphaseConvexSweepFunction)( const TriangleMesh* mesh, const Gu::Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit); + +namespace Midphase +{ + PX_FORCE_INLINE bool outputError() + { + static bool reportOnlyOnce = false; + if(!reportOnlyOnce) + { + reportOnlyOnce = true; + Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "BV4 midphase only supported on Intel platforms."); + } + return false; + } +} + + #if PX_INTEL_FAMILY + #else + static PxU32 unsupportedMidphase( const TriangleMesh*, const PxTriangleMeshGeometry&, const PxTransform&, + const PxVec3&, const PxVec3&, PxReal, + PxHitFlags, PxU32, PxRaycastHit* PX_RESTRICT) + { + return PxU32(Midphase::outputError()); + } + static bool unsupportedSphereOverlapMidphase(const Sphere&, const TriangleMesh&, const PxTransform&, const PxMeshScale&, LimitedResults*) + { + return Midphase::outputError(); + } + static bool unsupportedBoxOverlapMidphase(const Box&, const TriangleMesh&, const PxTransform&, const PxMeshScale&, LimitedResults*) + { + return Midphase::outputError(); + } + static bool unsupportedCapsuleOverlapMidphase(const Capsule&, const TriangleMesh&, const PxTransform&, const PxMeshScale&, LimitedResults*) + { + return Midphase::outputError(); + } + static void unsupportedBoxCBOverlapMidphase(const TriangleMesh*, const Box&, MeshHitCallback<PxRaycastHit>&, bool, bool) + { + Midphase::outputError(); + } + static bool unsupportedBoxSweepMidphase(const TriangleMesh*, const PxTriangleMeshGeometry&, const PxTransform&, const Gu::Box&, const PxVec3&, const PxReal, PxSweepHit&, PxHitFlags, const PxReal) + { + return Midphase::outputError(); + } + static bool unsupportedCapsuleSweepMidphase(const TriangleMesh*, const PxTriangleMeshGeometry&, const PxTransform&, const Gu::Capsule&, const PxVec3&, const PxReal, PxSweepHit&, PxHitFlags, const PxReal) + { + return Midphase::outputError(); + } + static void unsupportedConvexSweepMidphase(const TriangleMesh*, const Gu::Box&, const PxVec3&, const PxReal, SweepConvexMeshHitCallback&, bool) + { + Midphase::outputError(); + } + #endif + + static const MidphaseRaycastFunction gMidphaseRaycastTable[PxMeshMidPhase::eLAST] = + { + raycast_triangleMesh_RTREE, + #if PX_INTEL_FAMILY + raycast_triangleMesh_BV4, + #else + unsupportedMidphase, + #endif + }; + + static const MidphaseSphereOverlapFunction gMidphaseSphereOverlapTable[PxMeshMidPhase::eLAST] = + { + intersectSphereVsMesh_RTREE, + #if PX_INTEL_FAMILY + intersectSphereVsMesh_BV4, + #else + unsupportedSphereOverlapMidphase, + #endif + }; + + static const MidphaseBoxOverlapFunction gMidphaseBoxOverlapTable[PxMeshMidPhase::eLAST] = + { + intersectBoxVsMesh_RTREE, + #if PX_INTEL_FAMILY + intersectBoxVsMesh_BV4, + #else + unsupportedBoxOverlapMidphase, + #endif + }; + + static const MidphaseCapsuleOverlapFunction gMidphaseCapsuleOverlapTable[PxMeshMidPhase::eLAST] = + { + intersectCapsuleVsMesh_RTREE, + #if PX_INTEL_FAMILY + intersectCapsuleVsMesh_BV4, + #else + unsupportedCapsuleOverlapMidphase, + #endif + }; + + static const MidphaseBoxCBOverlapFunction gMidphaseBoxCBOverlapTable[PxMeshMidPhase::eLAST] = + { + intersectOBB_RTREE, + #if PX_INTEL_FAMILY + intersectOBB_BV4, + #else + unsupportedBoxCBOverlapMidphase, + #endif + }; + + static const MidphaseBoxSweepFunction gMidphaseBoxSweepTable[PxMeshMidPhase::eLAST] = + { + sweepBox_MeshGeom_RTREE, + #if PX_INTEL_FAMILY + sweepBox_MeshGeom_BV4, + #else + unsupportedBoxSweepMidphase, + #endif + }; + + static const MidphaseCapsuleSweepFunction gMidphaseCapsuleSweepTable[PxMeshMidPhase::eLAST] = + { + sweepCapsule_MeshGeom_RTREE, + #if PX_INTEL_FAMILY + sweepCapsule_MeshGeom_BV4, + #else + unsupportedCapsuleSweepMidphase, + #endif + }; + + static const MidphaseConvexSweepFunction gMidphaseConvexSweepTable[PxMeshMidPhase::eLAST] = + { + sweepConvex_MeshGeom_RTREE, + #if PX_INTEL_FAMILY + sweepConvex_MeshGeom_BV4, + #else + unsupportedConvexSweepMidphase, + #endif + }; + +namespace Midphase +{ + // \param[in] mesh triangle mesh to raycast against + // \param[in] meshGeom geometry object associated with the mesh + // \param[in] meshTransform pose/transform of geometry object + // \param[in] rayOrigin ray's origin + // \param[in] rayDir ray's unit dir + // \param[in] maxDist ray's length/max distance + // \param[in] hitFlags query behavior flags + // \param[in] maxHits max number of hits = size of 'hits' buffer + // \param[out] hits result buffer where to write raycast hits + // \return number of hits written to 'hits' result buffer + // \note there's no mechanism to report overflow. Returned number of hits is just clamped to maxHits. + PX_FORCE_INLINE PxU32 raycastTriangleMesh( const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& meshTransform, + const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist, + PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits) + { + const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33); + return gMidphaseRaycastTable[index](mesh, meshGeom, meshTransform, rayOrigin, rayDir, maxDist, hitFlags, maxHits, hits); + } + + // \param[in] sphere sphere + // \param[in] mesh triangle mesh + // \param[in] meshTransform pose/transform of triangle mesh + // \param[in] meshScale mesh scale + // \param[out] results results object if multiple hits are needed, NULL if a simple boolean answer is enough + // \return true if at least one overlap has been found + PX_FORCE_INLINE bool intersectSphereVsMesh(const Sphere& sphere, const TriangleMesh& mesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) + { + const PxU32 index = PxU32(mesh.getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33); + return gMidphaseSphereOverlapTable[index](sphere, mesh, meshTransform, meshScale, results); + } + + // \param[in] box box + // \param[in] mesh triangle mesh + // \param[in] meshTransform pose/transform of triangle mesh + // \param[in] meshScale mesh scale + // \param[out] results results object if multiple hits are needed, NULL if a simple boolean answer is enough + // \return true if at least one overlap has been found + PX_FORCE_INLINE bool intersectBoxVsMesh(const Box& box, const TriangleMesh& mesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) + { + const PxU32 index = PxU32(mesh.getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33); + return gMidphaseBoxOverlapTable[index](box, mesh, meshTransform, meshScale, results); + } + + // \param[in] capsule capsule + // \param[in] mesh triangle mesh + // \param[in] meshTransform pose/transform of triangle mesh + // \param[in] meshScale mesh scale + // \param[out] results results object if multiple hits are needed, NULL if a simple boolean answer is enough + // \return true if at least one overlap has been found + PX_FORCE_INLINE bool intersectCapsuleVsMesh(const Capsule& capsule, const TriangleMesh& mesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) + { + const PxU32 index = PxU32(mesh.getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33); + return gMidphaseCapsuleOverlapTable[index](capsule, mesh, meshTransform, meshScale, results); + } + + // \param[in] mesh triangle mesh + // \param[in] box box + // \param[in] callback callback object, called each time a hit is found + // \param[in] bothTriangleSidesCollide true for double-sided meshes + // \param[in] checkObbIsAligned true to use a dedicated codepath for axis-aligned boxes + PX_FORCE_INLINE void intersectOBB(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned = true) + { + const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33); + gMidphaseBoxCBOverlapTable[index](mesh, obb, callback, bothTriangleSidesCollide, checkObbIsAligned); + } + + // \param[in] mesh triangle mesh + // \param[in] meshGeom geometry object associated with the mesh + // \param[in] meshTransform pose/transform of geometry object + // \param[in] capsule swept capsule + // \param[in] unitDir sweep's unit dir + // \param[in] distance sweep's length/max distance + // \param[out] sweepHit hit result + // \param[in] hitFlags query behavior flags + // \param[in] inflation optional inflation value for swept shape + // \return true if a hit was found, false otherwise + PX_FORCE_INLINE bool sweepCapsuleVsMesh(const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& meshTransform, + const Gu::Capsule& capsule, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation) + { + const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33); + return gMidphaseCapsuleSweepTable[index](mesh, meshGeom, meshTransform, capsule, unitDir, distance, sweepHit, hitFlags, inflation); + } + + // \param[in] mesh triangle mesh + // \param[in] meshGeom geometry object associated with the mesh + // \param[in] meshTransform pose/transform of geometry object + // \param[in] box swept box + // \param[in] unitDir sweep's unit dir + // \param[in] distance sweep's length/max distance + // \param[out] sweepHit hit result + // \param[in] hitFlags query behavior flags + // \param[in] inflation optional inflation value for swept shape + // \return true if a hit was found, false otherwise + PX_FORCE_INLINE bool sweepBoxVsMesh(const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& meshTransform, + const Gu::Box& box, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation) + { + const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33); + return gMidphaseBoxSweepTable[index](mesh, meshGeom, meshTransform, box, unitDir, distance, sweepHit, hitFlags, inflation); + } + + // \param[in] mesh triangle mesh + // \param[in] hullBox hull's bounding box + // \param[in] localDir sweep's unit dir, in local/mesh space + // \param[in] distance sweep's length/max distance + // \param[in] callback callback object, called each time a hit is found + // \param[in] anyHit true for PxHitFlag::eMESH_ANY queries + PX_FORCE_INLINE void sweepConvexVsMesh(const TriangleMesh* mesh, const Gu::Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit) + { + const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33); + gMidphaseConvexSweepTable[index](mesh, hullBox, localDir, distance, callback, anyHit); + } +} +} +} +#endif // GU_MIDPHASE_INTERFACE_H diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseRTree.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseRTree.cpp new file mode 100644 index 00000000..6133f0b8 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseRTree.cpp @@ -0,0 +1,886 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "GuSweepMesh.h" +#include "GuIntersectionRayTriangle.h" +#include "GuIntersectionCapsuleTriangle.h" +#include "GuIntersectionRayBox.h" +#include "GuIntersectionRayBoxSIMD.h" +#include "GuSphere.h" +#include "GuBoxConversion.h" +#include "GuConvexUtilsInternal.h" +#include "GuVecTriangle.h" +#include "GuIntersectionTriangleBox.h" +#include "GuSIMDHelpers.h" +#include "GuTriangleVertexPointers.h" +#include "GuTriangleMeshRTree.h" +#include "GuInternal.h" + +// This file contains code specific to the RTree midphase. + +using namespace physx; +using namespace Cm; +using namespace Gu; +using namespace physx::shdfnd::aos; + +struct MeshRayCollider +{ + template <int tInflate, int tRayTest> + PX_PHYSX_COMMON_API static void collide( + const PxVec3& orig, const PxVec3& dir, // dir is not normalized (full length), both in mesh space (unless meshWorld is non-zero) + PxReal maxT, // maxT is from [0,1], if maxT is 0.0f, AABB traversal will be used + bool bothTriangleSidesCollide, const RTreeTriangleMesh* mesh, MeshHitCallback<PxRaycastHit>& callback, + const PxVec3* inflate = NULL); + + PX_PHYSX_COMMON_API static void collideOBB( + const Box& obb, bool bothTriangleSidesCollide, const RTreeTriangleMesh* mesh, MeshHitCallback<PxRaycastHit>& callback, + bool checkObbIsAligned = true); // perf hint, pass false if obb is rarely axis aligned +}; + +class SimpleRayTriOverlap +{ +public: + PX_FORCE_INLINE SimpleRayTriOverlap(const PxVec3& origin, const PxVec3& dir, bool bothSides, PxReal geomEpsilon) + : mOrigin(origin), mDir(dir), mBothSides(bothSides), mGeomEpsilon(geomEpsilon) + { + } + + PX_FORCE_INLINE Ps::IntBool overlap(const PxVec3& vert0, const PxVec3& vert1, const PxVec3& vert2, PxRaycastHit& hit) const + { + if(!intersectRayTriangle(mOrigin, mDir, vert0, vert1, vert2, hit.distance, hit.u, hit.v, !mBothSides, mGeomEpsilon)) + return false; + + if(hit.distance<-mGeomEpsilon) // test if the ray intersection t is really negative + return false; + + return true; + } + + PxVec3 mOrigin; + PxVec3 mDir; + bool mBothSides; + PxReal mGeomEpsilon; +}; + +using Gu::RTree; + +// This callback comes from RTree and decodes LeafTriangle indices stored in rtree into actual triangles +// This callback is needed because RTree doesn't know that it stores triangles since it's a general purpose spatial index + +#if PX_VC + #pragma warning(push) + #pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + +template <int tInflate, bool tRayTest> +struct RayRTreeCallback : RTree::CallbackRaycast, RTree::Callback +{ + MeshHitCallback<PxRaycastHit>& outerCallback; + PxI32 has16BitIndices; + const void* mTris; + const PxVec3* mVerts; + const PxVec3* mInflate; + const SimpleRayTriOverlap rayCollider; + PxReal maxT; + PxRaycastHit closestHit; // recorded closest hit over the whole traversal (only for callback mode eCLOSEST) + PxVec3 cv0, cv1, cv2; // PT: make sure these aren't last in the class, to safely V4Load them + PxU32 cis[3]; + bool hadClosestHit; + const bool closestMode; + Vec3V inflateV, rayOriginV, rayDirV; + + RayRTreeCallback( + PxReal geomEpsilon, MeshHitCallback<PxRaycastHit>& callback, + PxI32 has16BitIndices_, const void* tris, const PxVec3* verts, + const PxVec3& origin, const PxVec3& dir, PxReal maxT_, bool bothSides, const PxVec3* inflate) + : outerCallback(callback), has16BitIndices(has16BitIndices_), + mTris(tris), mVerts(verts), mInflate(inflate), rayCollider(origin, dir, bothSides, geomEpsilon), + maxT(maxT_), closestMode(callback.inClosestMode()) + { + PX_ASSERT(closestHit.distance == PX_MAX_REAL); + hadClosestHit = false; + if (tInflate) + inflateV = V3LoadU(*mInflate); + rayOriginV = V3LoadU(rayCollider.mOrigin); + rayDirV = V3LoadU(rayCollider.mDir); + } + + PX_FORCE_INLINE void getVertIndices(PxU32 triIndex, PxU32& i0, PxU32 &i1, PxU32 &i2) + { + if(has16BitIndices) + { + const PxU16* p = reinterpret_cast<const PxU16*>(mTris) + triIndex*3; + i0 = p[0]; i1 = p[1]; i2 = p[2]; + } + else + { + const PxU32* p = reinterpret_cast<const PxU32*>(mTris) + triIndex*3; + i0 = p[0]; i1 = p[1]; i2 = p[2]; + } + } + + virtual PX_FORCE_INLINE bool processResults(PxU32 NumTouched, PxU32* Touched, PxF32& newMaxT) + { + PX_ASSERT(NumTouched > 0); + // Loop through touched leaves + PxRaycastHit tempHit; + for(PxU32 leaf = 0; leaf<NumTouched; leaf++) + { + // Each leaf box has a set of triangles + LeafTriangles currentLeaf; + currentLeaf.Data = Touched[leaf]; + PxU32 nbLeafTris = currentLeaf.GetNbTriangles(); + PxU32 baseLeafTriIndex = currentLeaf.GetTriangleIndex(); + + for(PxU32 i = 0; i < nbLeafTris; i++) + { + PxU32 i0, i1, i2; + const PxU32 triangleIndex = baseLeafTriIndex+i; + getVertIndices(triangleIndex, i0, i1, i2); + + const PxVec3& v0 = mVerts[i0], &v1 = mVerts[i1], &v2 = mVerts[i2]; + const PxU32 vinds[3] = { i0, i1, i2 }; + + if (tRayTest) + { + Ps::IntBool overlap; + if (tInflate) + { + // AP: mesh skew is already included here (ray is pre-transformed) + Vec3V v0v = V3LoadU(v0), v1v = V3LoadU(v1), v2v = V3LoadU(v2); + Vec3V minB = V3Min(V3Min(v0v, v1v), v2v), maxB = V3Max(V3Max(v0v, v1v), v2v); + + // PT: we add an epsilon to max distance, to make sure we don't reject triangles that are just at the same + // distance as best triangle so far. We need to keep all of these to make sure we return the one with the + // best normal. + const float relativeEpsilon = GU_EPSILON_SAME_DISTANCE * PxMax(1.0f, maxT); + FloatV tNear, tFar; + overlap = intersectRayAABB2( + V3Sub(minB, inflateV), V3Add(maxB, inflateV), rayOriginV, rayDirV, FLoad(maxT+relativeEpsilon), tNear, tFar); + if (overlap) + { + // can't clip to tFar here because hitting the AABB doesn't guarantee that we can clip + // (since we can still miss the actual tri) + tempHit.distance = maxT; + tempHit.faceIndex = triangleIndex; + tempHit.u = tempHit.v = 0.0f; + } + } else + overlap = rayCollider.overlap(v0, v1, v2, tempHit) && tempHit.distance <= maxT; + if(!overlap) + continue; + } + tempHit.faceIndex = triangleIndex; + tempHit.flags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE; + // Intersection point is valid if dist < segment's length + // We know dist>0 so we can use integers + if (closestMode) + { + if(tempHit.distance < closestHit.distance) + { + closestHit = tempHit; + newMaxT = PxMin(tempHit.distance, newMaxT); + cv0 = v0; cv1 = v1; cv2 = v2; + cis[0] = vinds[0]; cis[1] = vinds[1]; cis[2] = vinds[2]; + hadClosestHit = true; + } + } else + { + PxReal shrunkMaxT = newMaxT; + PxAgain again = outerCallback.processHit(tempHit, v0, v1, v2, shrunkMaxT, vinds); + if (!again) + return false; + if (shrunkMaxT < newMaxT) + { + newMaxT = shrunkMaxT; + maxT = shrunkMaxT; + } + } + + if (outerCallback.inAnyMode()) // early out if in ANY mode + return false; + } + + } // for(PxU32 leaf = 0; leaf<NumTouched; leaf++) + + return true; + } + + virtual bool processResults(PxU32 numTouched, PxU32* touched) + { + PxF32 dummy; + return RayRTreeCallback::processResults(numTouched, touched, dummy); + } + + + virtual ~RayRTreeCallback() + { + if (hadClosestHit) + { + PX_ASSERT(outerCallback.inClosestMode()); + outerCallback.processHit(closestHit, cv0, cv1, cv2, maxT, cis); + } + } + +private: + RayRTreeCallback& operator=(const RayRTreeCallback&); +}; + +#if PX_VC + #pragma warning(pop) +#endif + +void MeshRayCollider::collideOBB( + const Box& obb, bool bothTriangleSidesCollide, const RTreeTriangleMesh* mi, MeshHitCallback<PxRaycastHit>& callback, + bool checkObbIsAligned) +{ + const PxU32 maxResults = RTREE_N; // maxResults=rtree page size for more efficient early out + PxU32 buf[maxResults]; + RayRTreeCallback<false, false> rTreeCallback( + mi->getGeomEpsilon(), callback, mi->has16BitIndices(), mi->getTrianglesFast(), mi->getVerticesFast(), + PxVec3(0), PxVec3(0), 0.0f, bothTriangleSidesCollide, NULL); + if (checkObbIsAligned && PxAbs(PxQuat(obb.rot).w) > 0.9999f) + { + PxVec3 aabbExtents = obb.computeAABBExtent(); + mi->getRTree().traverseAABB(obb.center - aabbExtents, obb.center + aabbExtents, maxResults, buf, &rTreeCallback); + } else + mi->getRTree().traverseOBB(obb, maxResults, buf, &rTreeCallback); +} + +template <int tInflate, int tRayTest> +void MeshRayCollider::collide( + const PxVec3& orig, const PxVec3& dir, PxReal maxT, bool bothSides, + const RTreeTriangleMesh* mi, MeshHitCallback<PxRaycastHit>& callback, + const PxVec3* inflate) +{ + const PxU32 maxResults = RTREE_N; // maxResults=rtree page size for more efficient early out + PxU32 buf[maxResults]; + if (maxT == 0.0f) // AABB traversal path + { + RayRTreeCallback<tInflate, false> rTreeCallback( + mi->getGeomEpsilon(), callback, mi->has16BitIndices(), mi->getTrianglesFast(), mi->getVerticesFast(), + orig, dir, maxT, bothSides, inflate); + PxVec3 inflate1 = tInflate ? *inflate : PxVec3(0); // both maxT and inflate can be zero, so need to check tInflate + mi->getRTree().traverseAABB(orig-inflate1, orig+inflate1, maxResults, buf, &rTreeCallback); + } + else // ray traversal path + { + RayRTreeCallback<tInflate, tRayTest> rTreeCallback( + mi->getGeomEpsilon(), callback, mi->has16BitIndices(), mi->getTrianglesFast(), mi->getVerticesFast(), + orig, dir, maxT, bothSides, inflate); + mi->getRTree().traverseRay<tInflate>(orig, dir, maxResults, buf, &rTreeCallback, inflate, maxT); + } +} + + +#define TINST(a,b) \ +template void MeshRayCollider::collide<a,b>( \ + const PxVec3& orig, const PxVec3& dir, PxReal maxT, bool bothSides, const RTreeTriangleMesh* mesh, \ + MeshHitCallback<PxRaycastHit>& callback, const PxVec3* inflate); + +TINST(0,0) +TINST(1,0) +TINST(0,1) +TINST(1,1) + +#undef TINST + +#include "GuRaycastTests.h" +#include "PxTriangleMeshGeometry.h" +#include "GuTriangleMesh.h" +#include "CmScaling.h" + +struct RayMeshColliderCallback : public MeshHitCallback<PxRaycastHit> +{ + PxRaycastHit* mDstBase; + PxU32 mHitNum; + PxU32 mMaxHits; + const PxMeshScale* mScale; + const PxTransform* mPose; + const Matrix34* mWorld2vertexSkew; + PxU32 mHitFlags; + const PxVec3& mRayDir; + bool mIsDoubleSided; + float mDistCoeff; + + RayMeshColliderCallback( + CallbackMode::Enum mode_, PxRaycastHit* hits, PxU32 maxHits, const PxMeshScale* scale, const PxTransform* pose, + const Matrix34* world2vertexSkew, PxU32 hitFlags, const PxVec3& rayDir, bool isDoubleSided, float distCoeff) : + MeshHitCallback<PxRaycastHit> (mode_), + mDstBase (hits), + mHitNum (0), + mMaxHits (maxHits), + mScale (scale), + mPose (pose), + mWorld2vertexSkew (world2vertexSkew), + mHitFlags (hitFlags), + mRayDir (rayDir), + mIsDoubleSided (isDoubleSided), + mDistCoeff (distCoeff) + { + } + + // return false for early out + virtual bool processHit( + const PxRaycastHit& lHit, const PxVec3& lp0, const PxVec3& lp1, const PxVec3& lp2, PxReal&, const PxU32*) + { + const PxReal u = lHit.u, v = lHit.v; + const PxVec3 localImpact = (1.0f - u - v)*lp0 + u*lp1 + v*lp2; + + //not worth concatenating to do 1 transform: PxMat34Legacy vertex2worldSkew = scaling.getVertex2WorldSkew(absPose); + // PT: TODO: revisit this for N hits + PxRaycastHit hit = lHit; + hit.position = mPose->transform(mScale->transform(localImpact)); + hit.flags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE|PxHitFlag::eUV|PxHitFlag::eFACE_INDEX; + hit.normal = PxVec3(0.0f); + hit.distance *= mDistCoeff; + + // Compute additional information if needed + if(mHitFlags & PxHitFlag::eNORMAL) + { + // User requested impact normal + const PxVec3 localNormal = (lp1 - lp0).cross(lp2 - lp0); + + if(mWorld2vertexSkew) + { + hit.normal = mWorld2vertexSkew->rotateTranspose(localNormal); + if (mScale->hasNegativeDeterminant()) + Ps::swap<PxReal>(hit.u, hit.v); // have to swap the UVs though since they were computed in mesh local space + } + else + hit.normal = hit.normal = mPose->rotate(localNormal); + hit.normal.normalize(); + + // PT: figure out correct normal orientation (DE7458) + // - if the mesh is single-sided the normal should be the regular triangle normal N, regardless of eMESH_BOTH_SIDES. + // - if the mesh is double-sided the correct normal can be either N or -N. We take the one opposed to ray direction. + if(mIsDoubleSided && hit.normal.dot(mRayDir) > 0.0f) + hit.normal = -hit.normal; + + hit.flags |= PxHitFlag::eNORMAL; + } + + // PT: no callback => store results in provided buffer + if(mHitNum == mMaxHits) + return false; + + mDstBase[mHitNum++] = hit; + return true; + } + +private: + RayMeshColliderCallback& operator=(const RayMeshColliderCallback&); +}; + +PxU32 physx::Gu::raycast_triangleMesh_RTREE(const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose, + const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist, + PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits) +{ + PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33); + + const RTreeTriangleMesh* meshData = static_cast<const RTreeTriangleMesh*>(mesh); + + //scaling: transform the ray to vertex space + + PxVec3 orig, dir; + Matrix34 world2vertexSkew; + Matrix34* world2vertexSkewP = NULL; + PxReal distCoeff = 1.0f; + if(meshGeom.scale.isIdentity()) + { + orig = pose.transformInv(rayOrigin); + dir = pose.rotateInv(rayDir); + } + else + { + world2vertexSkew = meshGeom.scale.getInverse() * pose.getInverse(); + world2vertexSkewP = &world2vertexSkew; + orig = world2vertexSkew.transform(rayOrigin); + dir = world2vertexSkew.rotate(rayDir); + { + distCoeff = dir.normalize(); + maxDist *= distCoeff; + maxDist += 1e-3f; + distCoeff = 1.0f / distCoeff; + } + } + + const bool isDoubleSided = meshGeom.meshFlags.isSet(PxMeshGeometryFlag::eDOUBLE_SIDED); + const bool bothSides = isDoubleSided || (hitFlags & PxHitFlag::eMESH_BOTH_SIDES); + + RayMeshColliderCallback callback( + (maxHits > 1) ? CallbackMode::eMULTIPLE : (hitFlags & PxHitFlag::eMESH_ANY ? CallbackMode::eANY : CallbackMode::eCLOSEST), + hits, maxHits, &meshGeom.scale, &pose, world2vertexSkewP, hitFlags, rayDir, isDoubleSided, distCoeff); + + MeshRayCollider::collide<0, 1>(orig, dir, maxDist, bothSides, static_cast<const RTreeTriangleMesh*>(meshData), callback, NULL); + return callback.mHitNum; +} + + +static PX_INLINE void computeSweptAABBAroundOBB( + const Box& obb, PxVec3& sweepOrigin, PxVec3& sweepExtents, PxVec3& sweepDir, PxReal& sweepLen) +{ + PxU32 other1, other2; + // largest axis of the OBB is the sweep direction, sum of abs of two other is the swept AABB extents + PxU32 lai = Ps::largestAxis(obb.extents, other1, other2); + PxVec3 longestAxis = obb.rot[lai]*obb.extents[lai]; + PxVec3 absOther1 = obb.rot[other1].abs()*obb.extents[other1]; + PxVec3 absOther2 = obb.rot[other2].abs()*obb.extents[other2]; + sweepOrigin = obb.center - longestAxis; + sweepExtents = absOther1 + absOther2 + PxVec3(GU_MIN_AABB_EXTENT); // see comments for GU_MIN_AABB_EXTENT + sweepLen = 2.0f; // length is already included in longestAxis + sweepDir = longestAxis; +} + +enum { eSPHERE, eCAPSULE, eBOX }; // values for tSCB + +#if PX_VC + #pragma warning(push) + #pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value. + #pragma warning( disable : 4512 ) // assignment operator could not be generated +#endif + +namespace +{ +struct IntersectShapeVsMeshCallback : MeshHitCallback<PxRaycastHit> +{ + PX_NOCOPY(IntersectShapeVsMeshCallback) +public: + IntersectShapeVsMeshCallback(const PxMat33& vertexToShapeSkew, LimitedResults* results, bool flipNormal) + : MeshHitCallback<PxRaycastHit>(CallbackMode::eMULTIPLE), + mVertexToShapeSkew (vertexToShapeSkew), + mResults (results), + mAnyHits (false), + mFlipNormal (flipNormal) + { + } + virtual ~IntersectShapeVsMeshCallback(){} + + const PxMat33& mVertexToShapeSkew; // vertex to box without translation for boxes + LimitedResults* mResults; + bool mAnyHits; + bool mFlipNormal; + + PX_FORCE_INLINE bool recordHit(const PxRaycastHit& aHit, Ps::IntBool hit) + { + if(hit) + { + mAnyHits = true; + if(mResults) + mResults->add(aHit.faceIndex); + else + return false; // abort traversal if we are only interested in firstContact (mResults is NULL) + } + return true; // if we are here, either no triangles were hit or multiple results are expected => continue traversal + } +}; + +template<bool tScaleIsIdentity> +struct IntersectSphereVsMeshCallback : IntersectShapeVsMeshCallback +{ + IntersectSphereVsMeshCallback(const PxMat33& m, LimitedResults* r, bool flipNormal) : IntersectShapeVsMeshCallback(m, r, flipNormal) {} + virtual ~IntersectSphereVsMeshCallback(){} + PxF32 mMinDist2; + PxVec3 mLocalCenter; // PT: sphere center in local/mesh space + + virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position + const PxRaycastHit& aHit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal&, const PxU32*) + { + const Vec3V v0 = V3LoadU(tScaleIsIdentity ? av0 : mVertexToShapeSkew * av0); + const Vec3V v1 = V3LoadU(tScaleIsIdentity ? av1 : mVertexToShapeSkew * (mFlipNormal ? av2 : av1)); + const Vec3V v2 = V3LoadU(tScaleIsIdentity ? av2 : mVertexToShapeSkew * (mFlipNormal ? av1 : av2)); + + FloatV dummy1, dummy2; + Vec3V closestP; + PxReal dist2; + FStore(distancePointTriangleSquared(V3LoadU(mLocalCenter), v0, v1, v2, dummy1, dummy2, closestP), &dist2); + return recordHit(aHit, dist2 <= mMinDist2); + } +}; + +template<bool tScaleIsIdentity> +struct IntersectCapsuleVsMeshCallback : IntersectShapeVsMeshCallback +{ + IntersectCapsuleVsMeshCallback(const PxMat33& m, LimitedResults* r, bool flipNormal) : IntersectShapeVsMeshCallback(m, r, flipNormal) {} + virtual ~IntersectCapsuleVsMeshCallback(){} + + Capsule mLocalCapsule; // PT: capsule in mesh/local space + CapsuleTriangleOverlapData mParams; + + virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position + const PxRaycastHit& aHit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal&, const PxU32*) + { + bool hit; + if(tScaleIsIdentity) + { + const PxVec3 normal = (av0 - av1).cross(av0 - av2); + hit = intersectCapsuleTriangle(normal, av0, av1, av2, mLocalCapsule, mParams); + } + else + { + const PxVec3 v0 = mVertexToShapeSkew * av0; + const PxVec3 v1 = mVertexToShapeSkew * (mFlipNormal ? av2 : av1); + const PxVec3 v2 = mVertexToShapeSkew * (mFlipNormal ? av1 : av2); + const PxVec3 normal = (v0 - v1).cross(v0 - v2); + hit = intersectCapsuleTriangle(normal, v0, v1, v2, mLocalCapsule, mParams); + } + return recordHit(aHit, hit); + } +}; + +template<bool tScaleIsIdentity> +struct IntersectBoxVsMeshCallback : IntersectShapeVsMeshCallback +{ + IntersectBoxVsMeshCallback(const PxMat33& m, LimitedResults* r, bool flipNormal) : IntersectShapeVsMeshCallback(m, r, flipNormal) {} + virtual ~IntersectBoxVsMeshCallback(){} + + Matrix34 mVertexToBox; + Vec3p mBoxExtents, mBoxCenter; + + virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position + const PxRaycastHit& aHit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal&, const PxU32*) + { + Vec3p v0, v1, v2; + if(tScaleIsIdentity) + { + v0 = mVertexToShapeSkew * av0; // transform from skewed mesh vertex to box space, + v1 = mVertexToShapeSkew * av1; // this includes inverse skew, inverse mesh shape transform and inverse box basis + v2 = mVertexToShapeSkew * av2; + } + else + { + v0 = mVertexToBox.transform(av0); + v1 = mVertexToBox.transform(mFlipNormal ? av2 : av1); + v2 = mVertexToBox.transform(mFlipNormal ? av1 : av2); + } + + // PT: this one is safe because we're using Vec3p for all parameters + const Ps::IntBool hit = intersectTriangleBox_Unsafe(mBoxCenter, mBoxExtents, v0, v1, v2); + return recordHit(aHit, hit); + } +}; +} + +#if PX_VC + #pragma warning(pop) +#endif + +template<int tSCB, bool idtMeshScale> +static bool intersectAnyVsMeshT( + const Sphere* worldSphere, const Capsule* worldCapsule, const Box* worldOBB, + const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, + LimitedResults* results) +{ + const bool flipNormal = meshScale.hasNegativeDeterminant(); + PxMat33 shapeToVertexSkew, vertexToShapeSkew; + if (!idtMeshScale && tSCB != eBOX) + { + vertexToShapeSkew = meshScale.toMat33(); + shapeToVertexSkew = vertexToShapeSkew.getInverse(); + } + + if (tSCB == eSPHERE) + { + IntersectSphereVsMeshCallback<idtMeshScale> callback(vertexToShapeSkew, results, flipNormal); + // transform sphere center from world to mesh shape space + const PxVec3 center = meshTransform.transformInv(worldSphere->center); + + // callback will transform verts + callback.mLocalCenter = center; + callback.mMinDist2 = worldSphere->radius*worldSphere->radius; + + PxVec3 sweepOrigin, sweepDir, sweepExtents; + PxReal sweepLen; + if (!idtMeshScale) + { + // AP: compute a swept AABB around an OBB around a skewed sphere + // TODO: we could do better than an AABB around OBB actually because we can slice off the corners.. + const Box worldOBB_(worldSphere->center, PxVec3(worldSphere->radius), PxMat33(PxIdentity)); + Box vertexOBB; + computeVertexSpaceOBB(vertexOBB, worldOBB_, meshTransform, meshScale); + computeSweptAABBAroundOBB(vertexOBB, sweepOrigin, sweepExtents, sweepDir, sweepLen); + } else + { + sweepOrigin = center; + sweepDir = PxVec3(1.0f,0,0); + sweepLen = 0.0f; + sweepExtents = PxVec3(PxMax(worldSphere->radius, GU_MIN_AABB_EXTENT)); + } + + MeshRayCollider::collide<1, 1>(sweepOrigin, sweepDir, sweepLen, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback, &sweepExtents); + + return callback.mAnyHits; + } + else if (tSCB == eCAPSULE) + { + IntersectCapsuleVsMeshCallback<idtMeshScale> callback(vertexToShapeSkew, results, flipNormal); + const PxF32 radius = worldCapsule->radius; + + // transform world capsule to mesh shape space + callback.mLocalCapsule.p0 = meshTransform.transformInv(worldCapsule->p0); + callback.mLocalCapsule.p1 = meshTransform.transformInv(worldCapsule->p1); + callback.mLocalCapsule.radius = radius; + callback.mParams.init(callback.mLocalCapsule); + + if (idtMeshScale) + { + // traverse a sweptAABB around the capsule + const PxVec3 radius3(radius); + MeshRayCollider::collide<1, 0>(callback.mLocalCapsule.p0, callback.mLocalCapsule.p1-callback.mLocalCapsule.p0, 1.0f, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback, &radius3); + } + else + { + // make vertex space OBB + Box vertexOBB; + Box worldOBB_; + worldOBB_.create(*worldCapsule); // AP: potential optimization (meshTransform.inverse is already in callback.mCapsule) + computeVertexSpaceOBB(vertexOBB, worldOBB_, meshTransform, meshScale); + + MeshRayCollider::collideOBB(vertexOBB, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback); + } + return callback.mAnyHits; + } + else if (tSCB == eBOX) + { + Box vertexOBB; // query box in vertex space + if (idtMeshScale) + { + // mesh scale is identity - just inverse transform the box without optimization + vertexOBB = transformBoxOrthonormal(*worldOBB, meshTransform.getInverse()); + // mesh vertices will be transformed from skewed vertex space directly to box AABB space + // box inverse rotation is baked into the vertexToShapeSkew transform + // if meshScale is not identity, vertexOBB already effectively includes meshScale transform + PxVec3 boxCenter; + getInverse(vertexToShapeSkew, boxCenter, vertexOBB.rot, vertexOBB.center); + IntersectBoxVsMeshCallback<idtMeshScale> callback(vertexToShapeSkew, results, flipNormal); + + callback.mBoxCenter = -boxCenter; + callback.mBoxExtents = worldOBB->extents; // extents do not change + + MeshRayCollider::collideOBB(vertexOBB, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback); + + return callback.mAnyHits; + } else + { + computeVertexSpaceOBB(vertexOBB, *worldOBB, meshTransform, meshScale); + + // mesh scale needs to be included - inverse transform and optimize the box + const PxMat33 vertexToWorldSkew_Rot = PxMat33Padded(meshTransform.q) * meshScale.toMat33(); + const PxVec3& vertexToWorldSkew_Trans = meshTransform.p; + + Matrix34 tmp; + buildMatrixFromBox(tmp, *worldOBB); + const Matrix34 inv = tmp.getInverseRT(); + const Matrix34 _vertexToWorldSkew(vertexToWorldSkew_Rot, vertexToWorldSkew_Trans); + + IntersectBoxVsMeshCallback<idtMeshScale> callback(vertexToShapeSkew, results, flipNormal); + callback.mVertexToBox = inv * _vertexToWorldSkew; + callback.mBoxCenter = PxVec3(0.0f); + callback.mBoxExtents = worldOBB->extents; // extents do not change + + MeshRayCollider::collideOBB(vertexOBB, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback); + + return callback.mAnyHits; + } + } + else + { + PX_ASSERT(0); + return false; + } +} + +template<int tSCB> +static bool intersectAnyVsMesh( + const Sphere* worldSphere, const Capsule* worldCapsule, const Box* worldOBB, + const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, + LimitedResults* results) +{ + PX_ASSERT(triMesh.getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33); + if (meshScale.isIdentity()) + return intersectAnyVsMeshT<tSCB, true>(worldSphere, worldCapsule, worldOBB, triMesh, meshTransform, meshScale, results); + else + return intersectAnyVsMeshT<tSCB, false>(worldSphere, worldCapsule, worldOBB, triMesh, meshTransform, meshScale, results); +} + +bool physx::Gu::intersectSphereVsMesh_RTREE(const Sphere& sphere, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) +{ + return intersectAnyVsMesh<eSPHERE>(&sphere, NULL, NULL, triMesh, meshTransform, meshScale, results); +} + +bool physx::Gu::intersectBoxVsMesh_RTREE(const Box& box, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) +{ + return intersectAnyVsMesh<eBOX>(NULL, NULL, &box, triMesh, meshTransform, meshScale, results); +} + +bool physx::Gu::intersectCapsuleVsMesh_RTREE(const Capsule& capsule, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results) +{ + return intersectAnyVsMesh<eCAPSULE>(NULL, &capsule, NULL, triMesh, meshTransform, meshScale, results); +} + +void physx::Gu::intersectOBB_RTREE(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned) +{ + MeshRayCollider::collideOBB(obb, bothTriangleSidesCollide, static_cast<const RTreeTriangleMesh*>(mesh), callback, checkObbIsAligned); +} + +// PT: TODO: refactor/share bits of this +bool physx::Gu::sweepCapsule_MeshGeom_RTREE(const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Capsule& lss, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation) +{ + PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33); + const RTreeTriangleMesh* meshData = static_cast<const RTreeTriangleMesh*>(mesh); + + const Capsule inflatedCapsule(lss.p0, lss.p1, lss.radius + inflation); + + const bool isIdentity = triMeshGeom.scale.isIdentity(); + bool isDoubleSided = (triMeshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED); + const PxU32 meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES; + + // compute sweptAABB + const PxVec3 localP0 = pose.transformInv(inflatedCapsule.p0); + const PxVec3 localP1 = pose.transformInv(inflatedCapsule.p1); + PxVec3 sweepOrigin = (localP0+localP1)*0.5f; + PxVec3 sweepDir = pose.rotateInv(unitDir); + PxVec3 sweepExtents = PxVec3(inflatedCapsule.radius) + (localP0-localP1).abs()*0.5f; + PxReal distance1 = distance; + PxReal distCoeff = 1.0f; + Matrix34 poseWithScale; + if(!isIdentity) + { + poseWithScale = pose * triMeshGeom.scale; + distance1 = computeSweepData(triMeshGeom, sweepOrigin, sweepExtents, sweepDir, distance); + distCoeff = distance1 / distance; + } else + poseWithScale = Matrix34(pose); + + SweepCapsuleMeshHitCallback callback(sweepHit, poseWithScale, distance, isDoubleSided, inflatedCapsule, unitDir, hitFlags, triMeshGeom.scale.hasNegativeDeterminant(), distCoeff); + + MeshRayCollider::collide<1, 1>(sweepOrigin, sweepDir, distance1, true, meshData, callback, &sweepExtents); + + if(meshBothSides) + isDoubleSided = true; + + return callback.finalizeHit(sweepHit, inflatedCapsule, triMeshGeom, pose, isDoubleSided); +} + +#include "GuSweepSharedTests.h" + +// PT: TODO: refactor/share bits of this +bool physx::Gu::sweepBox_MeshGeom_RTREE(const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const Box& box, const PxVec3& unitDir, const PxReal distance, + PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation) +{ + PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33); + const RTreeTriangleMesh* meshData = static_cast<const RTreeTriangleMesh*>(mesh); + + const bool isIdentity = triMeshGeom.scale.isIdentity(); + + const bool meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES; + const bool isDoubleSided = triMeshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED; + + Matrix34 meshToWorldSkew; + PxVec3 sweptAABBMeshSpaceExtents, meshSpaceOrigin, meshSpaceDir; + + // Input sweep params: geom, pose, box, unitDir, distance + // We convert the origin from world space to mesh local space + // and convert the box+pose to mesh space AABB + if(isIdentity) + { + meshToWorldSkew = Matrix34(pose); + PxMat33 worldToMeshRot(pose.q.getConjugate()); // extract rotation matrix from pose.q + meshSpaceOrigin = worldToMeshRot.transform(box.center - pose.p); + meshSpaceDir = worldToMeshRot.transform(unitDir) * distance; + PxMat33 boxToMeshRot = worldToMeshRot * box.rot; + sweptAABBMeshSpaceExtents = boxToMeshRot.column0.abs() * box.extents.x + + boxToMeshRot.column1.abs() * box.extents.y + + boxToMeshRot.column2.abs() * box.extents.z; + } + else + { + meshToWorldSkew = pose * triMeshGeom.scale; + const PxMat33 meshToWorldSkew_Rot = PxMat33Padded(pose.q) * triMeshGeom.scale.toMat33(); + const PxVec3& meshToWorldSkew_Trans = pose.p; + + PxMat33 worldToVertexSkew_Rot; + PxVec3 worldToVertexSkew_Trans; + getInverse(worldToVertexSkew_Rot, worldToVertexSkew_Trans, meshToWorldSkew_Rot, meshToWorldSkew_Trans); + + //make vertex space OBB + Box vertexSpaceBox1; + const Matrix34 worldToVertexSkew(worldToVertexSkew_Rot, worldToVertexSkew_Trans); + vertexSpaceBox1 = transform(worldToVertexSkew, box); + // compute swept aabb + sweptAABBMeshSpaceExtents = vertexSpaceBox1.computeAABBExtent(); + + meshSpaceOrigin = worldToVertexSkew.transform(box.center); + meshSpaceDir = worldToVertexSkew.rotate(unitDir*distance); // also applies scale to direction/length + } + + sweptAABBMeshSpaceExtents += PxVec3(inflation); // inflate the bounds with additive inflation + sweptAABBMeshSpaceExtents *= 1.01f; // fatten the bounds to account for numerical discrepancies + + PxReal dirLen = PxMax(meshSpaceDir.magnitude(), 1e-5f); + PxReal distCoeff = 1.0f; + if (!isIdentity) + distCoeff = dirLen / distance; + + // Move to AABB space + Matrix34 worldToBox; + computeWorldToBoxMatrix(worldToBox, box); + + const bool bothTriangleSidesCollide = isDoubleSided || meshBothSides; + + const Matrix34Padded meshToBox = worldToBox*meshToWorldSkew; + const PxTransform boxTransform = box.getTransform(); + + const PxVec3 localDir = worldToBox.rotate(unitDir); + const PxVec3 localDirDist = localDir*distance; + SweepBoxMeshHitCallback callback( // using eMULTIPLE with shrinkMaxT + CallbackMode::eMULTIPLE, meshToBox, distance, bothTriangleSidesCollide, box, localDirDist, localDir, unitDir, hitFlags, inflation, triMeshGeom.scale.hasNegativeDeterminant(), distCoeff); + + MeshRayCollider::collide<1, 1>(meshSpaceOrigin, meshSpaceDir/dirLen, dirLen, bothTriangleSidesCollide, meshData, callback, &sweptAABBMeshSpaceExtents); + + return callback.finalizeHit(sweepHit, triMeshGeom, pose, boxTransform, localDir, meshBothSides, isDoubleSided); +} + +#include "GuInternal.h" +void physx::Gu::sweepConvex_MeshGeom_RTREE(const TriangleMesh* mesh, const Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool) +{ + PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33); + const RTreeTriangleMesh* meshData = static_cast<const RTreeTriangleMesh*>(mesh); + + // create temporal bounds + Box querySweptBox; + computeSweptBox(querySweptBox, hullBox.extents, hullBox.center, hullBox.rot, localDir, distance); + + MeshRayCollider::collideOBB(querySweptBox, true, meshData, callback); +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuOverlapTestsMesh.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuOverlapTestsMesh.cpp new file mode 100644 index 00000000..a44343e8 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuOverlapTestsMesh.cpp @@ -0,0 +1,241 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuMidphaseInterface.h" +#include "CmScaling.h" +#include "GuSphere.h" +#include "GuInternal.h" +#include "GuConvexUtilsInternal.h" +#include "GuVecTriangle.h" +#include "GuVecConvexHull.h" +#include "GuConvexMesh.h" +#include "GuGJK.h" +#include "GuSweepSharedTests.h" + +using namespace physx; +using namespace Cm; +using namespace Gu; +using namespace physx::shdfnd::aos; + +// PT: TODO: remove this function, replace with Midphase:: call at calling sites (TA34704) +bool Gu::checkOverlapAABB_triangleGeom(const PxGeometry& geom, const PxTransform& pose, const PxBounds3& box) +{ + PX_ASSERT(geom.getType() == PxGeometryType::eTRIANGLEMESH); + const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom); + + // PT: TODO: pass AABB directly to interface + const Box obb(box.getCenter(), box.getExtents(), PxMat33(PxIdentity)); + + TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + return Midphase::intersectBoxVsMesh(obb, *meshData, pose, meshGeom.scale, NULL); +} + +bool GeomOverlapCallback_SphereMesh(GU_OVERLAP_FUNC_PARAMS) +{ + PX_ASSERT(geom0.getType()==PxGeometryType::eSPHERE); + PX_ASSERT(geom1.getType()==PxGeometryType::eTRIANGLEMESH); + PX_UNUSED(cache); + + const PxSphereGeometry& sphereGeom = static_cast<const PxSphereGeometry&>(geom0); + const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom1); + + const Sphere worldSphere(pose0.p, sphereGeom.radius); + + TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + return Midphase::intersectSphereVsMesh(worldSphere, *meshData, pose1, meshGeom.scale, NULL); +} + +bool GeomOverlapCallback_CapsuleMesh(GU_OVERLAP_FUNC_PARAMS) +{ + PX_ASSERT(geom0.getType()==PxGeometryType::eCAPSULE); + PX_ASSERT(geom1.getType()==PxGeometryType::eTRIANGLEMESH); + PX_UNUSED(cache); + + const PxCapsuleGeometry& capsuleGeom = static_cast<const PxCapsuleGeometry&>(geom0); + const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom1); + + TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + + Capsule capsule; + getCapsule(capsule, capsuleGeom, pose0); + return Midphase::intersectCapsuleVsMesh(capsule, *meshData, pose1, meshGeom.scale, NULL); +} + +bool GeomOverlapCallback_BoxMesh(GU_OVERLAP_FUNC_PARAMS) +{ + PX_ASSERT(geom0.getType()==PxGeometryType::eBOX); + PX_ASSERT(geom1.getType()==PxGeometryType::eTRIANGLEMESH); + PX_UNUSED(cache); + + const PxBoxGeometry& boxGeom = static_cast<const PxBoxGeometry&>(geom0); + const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom1); + + TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + + Box box; + buildFrom(box, pose0.p, boxGeom.halfExtents, pose0.q); + return Midphase::intersectBoxVsMesh(box, *meshData, pose1, meshGeom.scale, NULL); +} + +/////////////////////////////////////////////////////////////////////////////// +struct ConvexVsMeshOverlapCallback : MeshHitCallback<PxRaycastHit> +{ + PsMatTransformV MeshToBoxV; + Vec3V boxExtents; + + ConvexVsMeshOverlapCallback( + const ConvexMesh& cm, const PxMeshScale& convexScale, const FastVertex2ShapeScaling& meshScale, + const PxTransform& tr0, const PxTransform& tr1, bool identityScale, const Box& meshSpaceOBB) + : + MeshHitCallback<PxRaycastHit>(CallbackMode::eMULTIPLE), + mAnyHit (false), + mIdentityScale (identityScale) + { + if (!mIdentityScale) // not done in initializer list for performance + mMeshScale = Ps::aos::Mat33V( + V3LoadU(meshScale.getVertex2ShapeSkew().column0), + V3LoadU(meshScale.getVertex2ShapeSkew().column1), + V3LoadU(meshScale.getVertex2ShapeSkew().column2) ); + using namespace Ps::aos; + + const ConvexHullData* hullData = &cm.getHull(); + + const Vec3V vScale0 = V3LoadU_SafeReadW(convexScale.scale); // PT: safe because 'rotation' follows 'scale' in PxMeshScale + const QuatV vQuat0 = QuatVLoadU(&convexScale.rotation.x); + + mConvex = ConvexHullV(hullData, V3Zero(), vScale0, vQuat0, convexScale.isIdentity()); + aToB = PsMatTransformV(tr0.transformInv(tr1)); + + mIdentityScale = identityScale; + + { + // Move to AABB space + Matrix34 MeshToBox; + computeWorldToBoxMatrix(MeshToBox, meshSpaceOBB); + + const Vec3V base0 = V3LoadU(MeshToBox.m.column0); + const Vec3V base1 = V3LoadU(MeshToBox.m.column1); + const Vec3V base2 = V3LoadU(MeshToBox.m.column2); + const Mat33V matV(base0, base1, base2); + const Vec3V p = V3LoadU(MeshToBox.p); + MeshToBoxV = PsMatTransformV(p, matV); + boxExtents = V3LoadU(meshSpaceOBB.extents+PxVec3(0.001f)); + } + } + virtual ~ConvexVsMeshOverlapCallback() {} + + virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position + const PxRaycastHit&, const PxVec3& v0a, const PxVec3& v1a, const PxVec3& v2a, PxReal&, const PxU32*) + { + using namespace Ps::aos; + Vec3V v0 = V3LoadU(v0a), v1 = V3LoadU(v1a), v2 = V3LoadU(v2a); + + // test triangle AABB in box space vs box AABB in box local space + const Vec3V triV0 = MeshToBoxV.transform(v0); // AP: MeshToBoxV already includes mesh scale so we have to use unscaled verts here + const Vec3V triV1 = MeshToBoxV.transform(v1); + const Vec3V triV2 = MeshToBoxV.transform(v2); + Vec3V triMn = V3Min(V3Min(triV0, triV1), triV2); + Vec3V triMx = V3Max(V3Max(triV0, triV1), triV2); + Vec3V negExtents = V3Neg(boxExtents); + BoolV minSeparated = V3IsGrtr(triMn, boxExtents), maxSeparated = V3IsGrtr(negExtents, triMx); + BoolV bSeparated = BAnyTrue3(BOr(minSeparated, maxSeparated)); + if (BAllEqTTTT(bSeparated)) + return true; // continue traversal + + if (!mIdentityScale) + { + v0 = M33MulV3(mMeshScale, v0); + v1 = M33MulV3(mMeshScale, v1); + v2 = M33MulV3(mMeshScale, v2); + } + + TriangleV triangle(v0, v1, v2); + Vec3V contactA, contactB, normal; + FloatV dist; + GjkStatus status; + RelativeConvex<TriangleV> convexA(triangle, aToB); + LocalConvex<ConvexHullV> convexB(mConvex); + status = gjk(convexA, convexB, aToB.p, FZero(), contactA, contactB, normal, dist); + if (status == GJK_CONTACT)// || FAllGrtrOrEq(mSqTolerance, sqDist)) + { + mAnyHit = true; + return false; // abort traversal + } + return true; // continue traversal + } + + ConvexHullV mConvex; + PsMatTransformV aToB; + Ps::aos::Mat33V mMeshScale; + bool mAnyHit; + bool mIdentityScale; + +private: + ConvexVsMeshOverlapCallback& operator=(const ConvexVsMeshOverlapCallback&); +}; + +// PT: TODO: refactor bits of this with convex-vs-mesh code +bool GeomOverlapCallback_ConvexMesh(GU_OVERLAP_FUNC_PARAMS) +{ + PX_ASSERT(geom0.getType()==PxGeometryType::eCONVEXMESH); + PX_ASSERT(geom1.getType()==PxGeometryType::eTRIANGLEMESH); + PX_UNUSED(cache); + + const PxConvexMeshGeometry& convexGeom = static_cast<const PxConvexMeshGeometry&>(geom0); + const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom1); + + ConvexMesh* cm = static_cast<ConvexMesh*>(convexGeom.convexMesh); + TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + + const bool idtScaleConvex = convexGeom.scale.isIdentity(); + const bool idtScaleMesh = meshGeom.scale.isIdentity(); + + FastVertex2ShapeScaling convexScaling; + if (!idtScaleConvex) + convexScaling.init(convexGeom.scale); + + FastVertex2ShapeScaling meshScaling; + if (!idtScaleMesh) + meshScaling.init(meshGeom.scale); + + const Matrix34 world0(pose0); + const Matrix34 world1(pose1); + + PX_ASSERT(!cm->getLocalBoundsFast().isEmpty()); + const PxBounds3 hullAABB = cm->getLocalBoundsFast().transformFast(convexScaling.getVertex2ShapeSkew()); + + Box hullOBB; + computeHullOBB(hullOBB, hullAABB, 0.0f, world0, world1, meshScaling, idtScaleMesh); + + ConvexVsMeshOverlapCallback cb(*cm, convexGeom.scale, meshScaling, pose0, pose1, idtScaleMesh, hullOBB); + Midphase::intersectOBB(meshData, hullOBB, cb, true, false); + + return cb.mAnyHit; +} + diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.cpp new file mode 100644 index 00000000..7556f4e0 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.cpp @@ -0,0 +1,466 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxPreprocessor.h" + +#define RTREE_TEXT_DUMP_ENABLE 0 +#if PX_P64_FAMILY +#define RTREE_PAGES_PER_POOL_SLAB 16384 // preallocate all pages in first batch to make sure we stay within 32 bits for relative pointers.. this is 2 megs +#else +#define RTREE_PAGES_PER_POOL_SLAB 128 +#endif + +#define INSERT_SCAN_LOOKAHEAD 1 // enable one level lookahead scan for determining which child page is best to insert a node into + +#define RTREE_INFLATION_EPSILON 5e-4f + +#include "GuRTree.h" +#include "PsSort.h" +#include "GuSerialize.h" +#include "CmUtils.h" +#include "PsUtilities.h" + +using namespace physx; +#if PX_ENABLE_DYNAMIC_MESH_RTREE +using namespace shdfnd::aos; +#endif +using Ps::Array; +using Ps::sort; +using namespace Gu; + +namespace physx +{ +namespace Gu { + +///////////////////////////////////////////////////////////////////////// +PxU32 RTree::mVersion = 1; + +bool RTree::save(PxOutputStream& stream) const +{ + // save the RTree root structure followed immediately by RTreePage pages to an output stream + bool mismatch = (Ps::littleEndian() == 1); + writeChunk('R', 'T', 'R', 'E', stream); + writeDword(mVersion, mismatch, stream); + writeFloatBuffer(&mBoundsMin.x, 4, mismatch, stream); + writeFloatBuffer(&mBoundsMax.x, 4, mismatch, stream); + writeFloatBuffer(&mInvDiagonal.x, 4, mismatch, stream); + writeFloatBuffer(&mDiagonalScaler.x, 4, mismatch, stream); + writeDword(mPageSize, mismatch, stream); + writeDword(mNumRootPages, mismatch, stream); + writeDword(mNumLevels, mismatch, stream); + writeDword(mTotalNodes, mismatch, stream); + writeDword(mTotalPages, mismatch, stream); + PxU32 unused = 0; // backwards compatibility + writeDword(unused, mismatch, stream); + for (PxU32 j = 0; j < mTotalPages; j++) + { + writeFloatBuffer(mPages[j].minx, RTREE_N, mismatch, stream); + writeFloatBuffer(mPages[j].miny, RTREE_N, mismatch, stream); + writeFloatBuffer(mPages[j].minz, RTREE_N, mismatch, stream); + writeFloatBuffer(mPages[j].maxx, RTREE_N, mismatch, stream); + writeFloatBuffer(mPages[j].maxy, RTREE_N, mismatch, stream); + writeFloatBuffer(mPages[j].maxz, RTREE_N, mismatch, stream); + WriteDwordBuffer(mPages[j].ptrs, RTREE_N, mismatch, stream); + } + + return true; +} + +///////////////////////////////////////////////////////////////////////// +bool RTree::load(PxInputStream& stream, PxU32 meshVersion) +{ + PX_UNUSED(meshVersion); + + release(); + + PxI8 a, b, c, d; + readChunk(a, b, c, d, stream); + if(a!='R' || b!='T' || c!='R' || d!='E') + return false; + + bool mismatch = (Ps::littleEndian() == 1); + if(readDword(mismatch, stream) != mVersion) + return false; + + readFloatBuffer(&mBoundsMin.x, 4, mismatch, stream); + readFloatBuffer(&mBoundsMax.x, 4, mismatch, stream); + readFloatBuffer(&mInvDiagonal.x, 4, mismatch, stream); + readFloatBuffer(&mDiagonalScaler.x, 4, mismatch, stream); + mPageSize = readDword(mismatch, stream); + mNumRootPages = readDword(mismatch, stream); + mNumLevels = readDword(mismatch, stream); + mTotalNodes = readDword(mismatch, stream); + mTotalPages = readDword(mismatch, stream); + PxU32 unused = readDword(mismatch, stream); PX_UNUSED(unused); // backwards compatibility + mPages = static_cast<RTreePage*>( + Ps::AlignedAllocator<128>().allocate(sizeof(RTreePage)*mTotalPages, __FILE__, __LINE__)); + Cm::markSerializedMem(mPages, sizeof(RTreePage)*mTotalPages); + for (PxU32 j = 0; j < mTotalPages; j++) + { + readFloatBuffer(mPages[j].minx, RTREE_N, mismatch, stream); + readFloatBuffer(mPages[j].miny, RTREE_N, mismatch, stream); + readFloatBuffer(mPages[j].minz, RTREE_N, mismatch, stream); + readFloatBuffer(mPages[j].maxx, RTREE_N, mismatch, stream); + readFloatBuffer(mPages[j].maxy, RTREE_N, mismatch, stream); + readFloatBuffer(mPages[j].maxz, RTREE_N, mismatch, stream); + ReadDwordBuffer(mPages[j].ptrs, RTREE_N, mismatch, stream); + } + + return true; +} + +///////////////////////////////////////////////////////////////////////// +PxU32 RTree::computeBottomLevelCount(PxU32 multiplier) const +{ + PxU32 topCount = 0, curCount = mNumRootPages; + const RTreePage* rightMostPage = &mPages[mNumRootPages-1]; + PX_ASSERT(rightMostPage); + for (PxU32 level = 0; level < mNumLevels-1; level++) + { + topCount += curCount; + PxU32 nc = rightMostPage->nodeCount(); + PX_ASSERT(nc > 0 && nc <= RTREE_N); + // old version pointer, up to PX_MESH_VERSION 8 + PxU32 ptr = (rightMostPage->ptrs[nc-1]) * multiplier; + PX_ASSERT(ptr % sizeof(RTreePage) == 0); + const RTreePage* rightMostPageNext = mPages + (ptr / sizeof(RTreePage)); + curCount = PxU32(rightMostPageNext - rightMostPage); + rightMostPage = rightMostPageNext; + } + + return mTotalPages - topCount; +} + +///////////////////////////////////////////////////////////////////////// +RTree::RTree(const PxEMPTY) +{ + mFlags |= USER_ALLOCATED; +} + + +// PX_SERIALIZATION +///////////////////////////////////////////////////////////////////////// +void RTree::exportExtraData(PxSerializationContext& stream) +{ + stream.alignData(128); + stream.writeData(mPages, mTotalPages*sizeof(RTreePage)); +} + +///////////////////////////////////////////////////////////////////////// +void RTree::importExtraData(PxDeserializationContext& context) +{ + context.alignExtraData(128); + mPages = context.readExtraData<RTreePage>(mTotalPages); +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE PxU32 RTreePage::nodeCount() const +{ + for (int j = 0; j < RTREE_N; j ++) + if (minx[j] == MX) + return PxU32(j); + + return RTREE_N; +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreePage::clearNode(PxU32 nodeIndex) +{ + PX_ASSERT(nodeIndex < RTREE_N); + minx[nodeIndex] = miny[nodeIndex] = minz[nodeIndex] = MX; // initialize empty node with sentinels + maxx[nodeIndex] = maxy[nodeIndex] = maxz[nodeIndex] = MN; + ptrs[nodeIndex] = 0; +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreePage::getNode(const PxU32 nodeIndex, RTreeNodeQ& r) const +{ + PX_ASSERT(nodeIndex < RTREE_N); + r.minx = minx[nodeIndex]; + r.miny = miny[nodeIndex]; + r.minz = minz[nodeIndex]; + r.maxx = maxx[nodeIndex]; + r.maxy = maxy[nodeIndex]; + r.maxz = maxz[nodeIndex]; + r.ptr = ptrs[nodeIndex]; +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreePage::setEmpty(PxU32 startIndex) +{ + PX_ASSERT(startIndex < RTREE_N); + for (PxU32 j = startIndex; j < RTREE_N; j ++) + clearNode(j); +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreePage::computeBounds(RTreeNodeQ& newBounds) +{ + RTreeValue _minx = MX, _miny = MX, _minz = MX, _maxx = MN, _maxy = MN, _maxz = MN; + for (PxU32 j = 0; j < RTREE_N; j++) + { + if (isEmpty(j)) + continue; + _minx = PxMin(_minx, minx[j]); + _miny = PxMin(_miny, miny[j]); + _minz = PxMin(_minz, minz[j]); + _maxx = PxMax(_maxx, maxx[j]); + _maxy = PxMax(_maxy, maxy[j]); + _maxz = PxMax(_maxz, maxz[j]); + } + newBounds.minx = _minx; + newBounds.miny = _miny; + newBounds.minz = _minz; + newBounds.maxx = _maxx; + newBounds.maxy = _maxy; + newBounds.maxz = _maxz; +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreePage::adjustChildBounds(PxU32 index, const RTreeNodeQ& adjChild) +{ + PX_ASSERT(index < RTREE_N); + minx[index] = adjChild.minx; + miny[index] = adjChild.miny; + minz[index] = adjChild.minz; + maxx[index] = adjChild.maxx; + maxy[index] = adjChild.maxy; + maxz[index] = adjChild.maxz; +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreePage::growChildBounds(PxU32 index, const RTreeNodeQ& child) +{ + PX_ASSERT(index < RTREE_N); + minx[index] = PxMin(minx[index], child.minx); + miny[index] = PxMin(miny[index], child.miny); + minz[index] = PxMin(minz[index], child.minz); + maxx[index] = PxMax(maxx[index], child.maxx); + maxy[index] = PxMax(maxy[index], child.maxy); + maxz[index] = PxMax(maxz[index], child.maxz); +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreePage::copyNode(PxU32 targetIndex, const RTreePage& sourcePage, PxU32 sourceIndex) +{ + PX_ASSERT(targetIndex < RTREE_N); + PX_ASSERT(sourceIndex < RTREE_N); + minx[targetIndex] = sourcePage.minx[sourceIndex]; + miny[targetIndex] = sourcePage.miny[sourceIndex]; + minz[targetIndex] = sourcePage.minz[sourceIndex]; + maxx[targetIndex] = sourcePage.maxx[sourceIndex]; + maxy[targetIndex] = sourcePage.maxy[sourceIndex]; + maxz[targetIndex] = sourcePage.maxz[sourceIndex]; + ptrs[targetIndex] = sourcePage.ptrs[sourceIndex]; +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreePage::setNode(PxU32 targetIndex, const RTreeNodeQ& sourceNode) +{ + PX_ASSERT(targetIndex < RTREE_N); + minx[targetIndex] = sourceNode.minx; + miny[targetIndex] = sourceNode.miny; + minz[targetIndex] = sourceNode.minz; + maxx[targetIndex] = sourceNode.maxx; + maxy[targetIndex] = sourceNode.maxy; + maxz[targetIndex] = sourceNode.maxz; + ptrs[targetIndex] = sourceNode.ptr; +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreeNodeQ::grow(const RTreePage& page, int nodeIndex) +{ + PX_ASSERT(nodeIndex < RTREE_N); + minx = PxMin(minx, page.minx[nodeIndex]); + miny = PxMin(miny, page.miny[nodeIndex]); + minz = PxMin(minz, page.minz[nodeIndex]); + maxx = PxMax(maxx, page.maxx[nodeIndex]); + maxy = PxMax(maxy, page.maxy[nodeIndex]); + maxz = PxMax(maxz, page.maxz[nodeIndex]); +} + +///////////////////////////////////////////////////////////////////////// +PX_FORCE_INLINE void RTreeNodeQ::grow(const RTreeNodeQ& node) +{ + minx = PxMin(minx, node.minx); miny = PxMin(miny, node.miny); minz = PxMin(minz, node.minz); + maxx = PxMax(maxx, node.maxx); maxy = PxMax(maxy, node.maxy); maxz = PxMax(maxz, node.maxz); +} + +///////////////////////////////////////////////////////////////////////// +#if PX_ENABLE_DYNAMIC_MESH_RTREE +void RTree::validateRecursive(PxU32 level, RTreeNodeQ parentBounds, RTreePage* page, CallbackRefit* cbLeaf) +#else +void RTree::validateRecursive(PxU32 level, RTreeNodeQ parentBounds, RTreePage* page) +#endif +{ + PX_UNUSED(parentBounds); + + static PxU32 validateCounter = 0; // this is to suppress a warning that recursive call has no side effects + validateCounter++; + + RTreeNodeQ n; + PxU32 pageNodeCount = page->nodeCount(); + for (PxU32 j = 0; j < pageNodeCount; j++) + { + page->getNode(j, n); + if (page->isEmpty(j)) + continue; + PX_ASSERT(n.minx >= parentBounds.minx); PX_ASSERT(n.miny >= parentBounds.miny); PX_ASSERT(n.minz >= parentBounds.minz); + PX_ASSERT(n.maxx <= parentBounds.maxx); PX_ASSERT(n.maxy <= parentBounds.maxy); PX_ASSERT(n.maxz <= parentBounds.maxz); + if (!n.isLeaf()) + { + PX_ASSERT((n.ptr&1) == 0); + RTreePage* childPage = reinterpret_cast<RTreePage*>(size_t(mPages) + n.ptr); +#if PX_ENABLE_DYNAMIC_MESH_RTREE + validateRecursive(level+1, n, childPage, cbLeaf); + } else if (cbLeaf) + { + Vec3V mnv, mxv; + cbLeaf->recomputeBounds(page->ptrs[j] & ~1, mnv, mxv); + PxVec3 mn3, mx3; V3StoreU(mnv, mn3); V3StoreU(mxv, mx3); + const PxBounds3 lb(mn3, mx3); + const PxVec3& mn = lb.minimum; const PxVec3& mx = lb.maximum; PX_UNUSED(mn); PX_UNUSED(mx); + PX_ASSERT(mn.x >= n.minx); PX_ASSERT(mn.y >= n.miny); PX_ASSERT(mn.z >= n.minz); + PX_ASSERT(mx.x <= n.maxx); PX_ASSERT(mx.y <= n.maxy); PX_ASSERT(mx.z <= n.maxz); + } +#else + validateRecursive(level+1, n, childPage); + } +#endif + } + RTreeNodeQ recomputedBounds; + page->computeBounds(recomputedBounds); + PX_ASSERT((recomputedBounds.minx - parentBounds.minx)<=RTREE_INFLATION_EPSILON); + PX_ASSERT((recomputedBounds.miny - parentBounds.miny)<=RTREE_INFLATION_EPSILON); + PX_ASSERT((recomputedBounds.minz - parentBounds.minz)<=RTREE_INFLATION_EPSILON); + PX_ASSERT((recomputedBounds.maxx - parentBounds.maxx)<=RTREE_INFLATION_EPSILON); + PX_ASSERT((recomputedBounds.maxy - parentBounds.maxy)<=RTREE_INFLATION_EPSILON); + PX_ASSERT((recomputedBounds.maxz - parentBounds.maxz)<=RTREE_INFLATION_EPSILON); +} + +///////////////////////////////////////////////////////////////////////// +#if PX_ENABLE_DYNAMIC_MESH_RTREE +void RTree::validate(CallbackRefit* cbLeaf) +#else +void RTree::validate() +#endif +{ + for (PxU32 j = 0; j < mNumRootPages; j++) + { + RTreeNodeQ rootBounds; + mPages[j].computeBounds(rootBounds); +#if PX_ENABLE_DYNAMIC_MESH_RTREE + validateRecursive(0, rootBounds, mPages+j, cbLeaf); +#else + validateRecursive(0, rootBounds, mPages+j); +#endif + } +} + +#if PX_ENABLE_DYNAMIC_MESH_RTREE +void RTree::refitAllStaticTree(CallbackRefit& cb, PxBounds3* retBounds) +{ + PxU8* treeNodes8 = reinterpret_cast<PxU8*>(mPages); + + // since pages are ordered we can scan back to front and the hierarchy will be updated + for (PxI32 iPage = PxI32(mTotalPages)-1; iPage>=0; iPage--) + { + RTreePage& page = mPages[iPage]; + for (PxU32 j = 0; j < RTREE_N; j++) + { + if (page.isEmpty(j)) + continue; + if (page.isLeaf(j)) + { + Vec3V childMn, childMx; + cb.recomputeBounds(page.ptrs[j]-1, childMn, childMx); // compute the bound around triangles + PxVec3 mn3, mx3; + V3StoreU(childMn, mn3); + V3StoreU(childMx, mx3); + page.minx[j] = mn3.x; page.miny[j] = mn3.y; page.minz[j] = mn3.z; + page.maxx[j] = mx3.x; page.maxy[j] = mx3.y; page.maxz[j] = mx3.z; + } else + { + const RTreePage* child = reinterpret_cast<const RTreePage*>(treeNodes8 + page.ptrs[j]); + PX_COMPILE_TIME_ASSERT(RTREE_N == 4); + bool first = true; + for (PxU32 k = 0; k < RTREE_N; k++) + { + if (child->isEmpty(k)) + continue; + if (first) + { + page.minx[j] = child->minx[k]; page.miny[j] = child->miny[k]; page.minz[j] = child->minz[k]; + page.maxx[j] = child->maxx[k]; page.maxy[j] = child->maxy[k]; page.maxz[j] = child->maxz[k]; + first = false; + } else + { + page.minx[j] = PxMin(page.minx[j], child->minx[k]); + page.miny[j] = PxMin(page.miny[j], child->miny[k]); + page.minz[j] = PxMin(page.minz[j], child->minz[k]); + page.maxx[j] = PxMax(page.maxx[j], child->maxx[k]); + page.maxy[j] = PxMax(page.maxy[j], child->maxy[k]); + page.maxz[j] = PxMax(page.maxz[j], child->maxz[k]); + } + } + } + } + } + + if (retBounds) + { + RTreeNodeQ bound1; + for (PxU32 ii = 0; ii<mNumRootPages; ii++) + { + mPages[ii].computeBounds(bound1); + if (ii == 0) + { + retBounds->minimum = PxVec3(bound1.minx, bound1.miny, bound1.minz); + retBounds->maximum = PxVec3(bound1.maxx, bound1.maxy, bound1.maxz); + } else + { + retBounds->minimum = retBounds->minimum.minimum(PxVec3(bound1.minx, bound1.miny, bound1.minz)); + retBounds->maximum = retBounds->maximum.maximum(PxVec3(bound1.maxx, bound1.maxy, bound1.maxz)); + } + } + } + +#if PX_CHECKED + validate(&cb); +#endif +} +#endif // PX_ENABLE_DYNAMIC_MESH_RTREE + +//~PX_SERIALIZATION +const RTreeValue RTreePage::MN = -PX_MAX_F32; +const RTreeValue RTreePage::MX = PX_MAX_F32; + +} // namespace Gu + +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.h new file mode 100644 index 00000000..48c54fc5 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.h @@ -0,0 +1,304 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_RTREE_H +#define GU_RTREE_H + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxVec4.h" +#include "foundation/PxBounds3.h" +#include "foundation/PxAssert.h" +#include "PsUserAllocated.h" // for PxSerializationContext +#include "PxSerialFramework.h" +#include "PxTriangleMesh.h" +#include "PsAlignedMalloc.h" + + +#if PX_ENABLE_DYNAMIC_MESH_RTREE +#include "PsVecMath.h" +#endif + +#define RTREE_N 4 // changing this number will affect the mesh format +PX_COMPILE_TIME_ASSERT(RTREE_N == 4 || RTREE_N == 8); // using the low 5 bits for storage of index(childPtr) for dynamic rtree + +namespace physx +{ + + +#if PX_VC +#pragma warning(push) +#pragma warning(disable: 4324) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + +namespace Gu { + + class Box; + struct RTreePage; + + typedef PxF32 RTreeValue; + + ///////////////////////////////////////////////////////////////////////// + // quantized untransposed RTree node - used for offline build and dynamic insertion + struct RTreeNodeQ + { + RTreeValue minx, miny, minz, maxx, maxy, maxz; + PxU32 ptr; // lowest bit is leaf flag + + PX_FORCE_INLINE void setLeaf(bool set) { if (set) ptr |= 1; else ptr &= ~1; } + PX_FORCE_INLINE PxU32 isLeaf() const { return ptr & 1; } + PX_FORCE_INLINE void setEmpty(); + PX_FORCE_INLINE void grow(const RTreePage& page, int nodeIndex); + PX_FORCE_INLINE void grow(const RTreeNodeQ& node); + }; + + ///////////////////////////////////////////////////////////////////////// + // RTreePage data structure, holds RTREE_N transposed nodes + + // RTreePage data structure, holds 8 transposed nodes + PX_ALIGN_PREFIX(16) + struct RTreePage + { + //= ATTENTION! ===================================================================================== + // Changing the data layout of this class breaks the binary serialization format. See comments for + // PX_BINARY_SERIAL_VERSION. If a modification is required, please adjust the getBinaryMetaData + // function. If the modification is made on a custom branch, please change PX_BINARY_SERIAL_VERSION + // accordingly. + //================================================================================================== + + static const RTreeValue MN, MX; + + RTreeValue minx[RTREE_N]; // [min=MX, max=MN] is used as a sentinel range for empty bounds + RTreeValue miny[RTREE_N]; + RTreeValue minz[RTREE_N]; + RTreeValue maxx[RTREE_N]; + RTreeValue maxy[RTREE_N]; + RTreeValue maxz[RTREE_N]; + PxU32 ptrs[RTREE_N]; // for static rtree this is an offset relative to the first page divided by 16, for dynamics it's an absolute pointer divided by 16 + + PX_FORCE_INLINE PxU32 nodeCount() const; // returns the number of occupied nodes in this page + PX_FORCE_INLINE void setEmpty(PxU32 startIndex = 0); + PX_FORCE_INLINE bool isEmpty(PxU32 index) const { return minx[index] > maxx[index]; } + PX_FORCE_INLINE void copyNode(PxU32 targetIndex, const RTreePage& sourcePage, PxU32 sourceIndex); + PX_FORCE_INLINE void setNode(PxU32 targetIndex, const RTreeNodeQ& node); + PX_FORCE_INLINE void clearNode(PxU32 nodeIndex); + PX_FORCE_INLINE void getNode(PxU32 nodeIndex, RTreeNodeQ& result) const; + PX_FORCE_INLINE void computeBounds(RTreeNodeQ& bounds); + PX_FORCE_INLINE void adjustChildBounds(PxU32 index, const RTreeNodeQ& adjustedChildBounds); + PX_FORCE_INLINE void growChildBounds(PxU32 index, const RTreeNodeQ& adjustedChildBounds); + PX_FORCE_INLINE PxU32 getNodeHandle(PxU32 index) const; + PX_FORCE_INLINE PxU32 isLeaf(PxU32 index) const { return ptrs[index] & 1; } + } PX_ALIGN_SUFFIX(16); + + ///////////////////////////////////////////////////////////////////////// + // RTree root data structure + PX_ALIGN_PREFIX(16) + struct RTree + { + //= ATTENTION! ===================================================================================== + // Changing the data layout of this class breaks the binary serialization format. See comments for + // PX_BINARY_SERIAL_VERSION. If a modification is required, please adjust the getBinaryMetaData + // function. If the modification is made on a custom branch, please change PX_BINARY_SERIAL_VERSION + // accordingly. + //================================================================================================== + // PX_SERIALIZATION + RTree(const PxEMPTY); + void exportExtraData(PxSerializationContext&); + void importExtraData(PxDeserializationContext& context); + static void getBinaryMetaData(PxOutputStream& stream); + //~PX_SERIALIZATION + + PX_INLINE RTree(); // offline static rtree constructor used with cooking + + ~RTree() { release(); } + + PX_INLINE void release(); + bool save(PxOutputStream& stream) const; // always saves as big endian + bool load(PxInputStream& stream, PxU32 meshVersion); // converts to proper endian at load time + + //////////////////////////////////////////////////////////////////////////// + // QUERIES + struct Callback + { + // result buffer should have room for at least RTREE_N items + // should return true to continue traversal. If false is returned, traversal is aborted + virtual bool processResults(PxU32 count, PxU32* buf) = 0; + virtual void profile() {} + virtual ~Callback() {} + }; + + struct CallbackRaycast + { + // result buffer should have room for at least RTREE_N items + // should return true to continue traversal. If false is returned, traversal is aborted + // newMaxT serves as both input and output, as input it's the maxT so far + // set it to a new value (which should be smaller) and it will become the new far clip t + virtual bool processResults(PxU32 count, PxU32* buf, PxF32& newMaxT) = 0; + virtual ~CallbackRaycast() {} + }; + + // callback will be issued as soon as the buffer overflows maxResultsPerBlock-RTreePage:SIZE entries + // use maxResults = RTreePage:SIZE and return false from callback for "first hit" early out + void traverseAABB( + const PxVec3& boxMin, const PxVec3& boxMax, + const PxU32 maxResultsPerBlock, PxU32* resultsBlockBuf, Callback* processResultsBlockCallback) const; + void traverseOBB( + const Gu::Box& obb, + const PxU32 maxResultsPerBlock, PxU32* resultsBlockBuf, Callback* processResultsBlockCallback) const; + template <int inflate> + //PX_PHYSX_COMMON_API + void traverseRay( + const PxVec3& rayOrigin, const PxVec3& rayDir, // dir doesn't have to be normalized and is B-A for raySegment + const PxU32 maxResults, PxU32* resultsPtr, + Gu::RTree::CallbackRaycast* callback, + const PxVec3* inflateAABBs, // inflate tree's AABBs by this amount. This function turns into AABB sweep. + PxF32 maxT = PX_MAX_F32 // maximum ray t parameter, p(t)=origin+t*dir; use 1.0f for ray segment + ) const; + +#if PX_ENABLE_DYNAMIC_MESH_RTREE + struct CallbackRefit + { + // In this callback index is the number stored in the RTree, which is a LeafTriangles object for current PhysX mesh + virtual void recomputeBounds(PxU32 index, shdfnd::aos::Vec3V& mn, shdfnd::aos::Vec3V& mx) = 0; + virtual ~CallbackRefit() {} + }; + void refitAllStaticTree(CallbackRefit& cb, PxBounds3* resultMeshBounds); // faster version of refit for static RTree only +#endif + + + //////////////////////////////////////////////////////////////////////////// + // DEBUG HELPER FUNCTIONS +#if PX_ENABLE_DYNAMIC_MESH_RTREE + PX_PHYSX_COMMON_API void validate(CallbackRefit* cb = NULL); // verify that all children are indeed included in parent bounds +#else + PX_PHYSX_COMMON_API void validate(); // verify that all children are indeed included in parent bounds +#endif + void openTextDump(); + void closeTextDump(); + void textDump(const char* prefix); + void maxscriptExport(); + PxU32 computeBottomLevelCount(PxU32 storedToMemMultiplier) const; + + //////////////////////////////////////////////////////////////////////////// + // DATA + // remember to update save() and load() when adding or removing data + PxVec4 mBoundsMin, mBoundsMax, mInvDiagonal, mDiagonalScaler; // 16 + PxU32 mPageSize; + PxU32 mNumRootPages; + PxU32 mNumLevels; + PxU32 mTotalNodes; // 16 + PxU32 mTotalPages; + PxU32 mFlags; enum { USER_ALLOCATED = 0x1, IS_EDGE_SET = 0x2 }; + RTreePage* mPages; + + static PxU32 mVersion; + + protected: + typedef PxU32 NodeHandle; +#if PX_ENABLE_DYNAMIC_MESH_RTREE + void validateRecursive(PxU32 level, RTreeNodeQ parentBounds, RTreePage* page, CallbackRefit* cb = NULL); +#else + void validateRecursive(PxU32 level, RTreeNodeQ parentBounds, RTreePage* page); +#endif + + friend struct RTreePage; + } PX_ALIGN_SUFFIX(16); + +#if PX_VC +#pragma warning(pop) +#endif + + ///////////////////////////////////////////////////////////////////////// + PX_INLINE RTree::RTree() + { + mFlags = 0; + mPages = NULL; + mTotalNodes = 0; + mNumLevels = 0; + mPageSize = RTREE_N; + } + + ///////////////////////////////////////////////////////////////////////// + PX_INLINE void RTree::release() + { + if ((mFlags & USER_ALLOCATED) == 0 && mPages) + { + physx::shdfnd::AlignedAllocator<128>().deallocate(mPages); + mPages = NULL; + } + } + + // explicit instantiations for traverseRay + // XXX: dima: g++ 4.4 won't compile this => skipping by PX_UNIX_FAMILY +#if PX_X86 && !PX_UNIX_FAMILY + template + //PX_PHYSX_COMMON_API + void RTree::traverseRay<0>( + const PxVec3&, const PxVec3&, const PxU32, PxU32*, Gu::RTree::CallbackRaycast*, const PxVec3*, PxF32 maxT) const; + template + //PX_PHYSX_COMMON_API + void RTree::traverseRay<1>( + const PxVec3&, const PxVec3&, const PxU32, PxU32*, Gu::RTree::CallbackRaycast*, const PxVec3*, PxF32 maxT) const; +#endif + + ///////////////////////////////////////////////////////////////////////// + PX_FORCE_INLINE void RTreeNodeQ::setEmpty() + { + minx = miny = minz = RTreePage::MX; + maxx = maxy = maxz = RTreePage::MN; + } + + + // bit 1 is always expected to be set to differentiate between leaf and non-leaf node + PX_FORCE_INLINE PxU32 LeafGetNbTriangles(PxU32 Data) { return ((Data>>1) & 15)+1; } + PX_FORCE_INLINE PxU32 LeafGetTriangleIndex(PxU32 Data) { return Data>>5; } + PX_FORCE_INLINE PxU32 LeafSetData(PxU32 nb, PxU32 index) + { + PX_ASSERT(nb>0 && nb<=16); PX_ASSERT(index < (1<<27)); + return (index<<5)|(((nb-1)&15)<<1) | 1; + } + + struct LeafTriangles + { + PxU32 Data; + + // Gets number of triangles in the leaf, returns the number of triangles N, with 0 < N <= 16 + PX_FORCE_INLINE PxU32 GetNbTriangles() const { return LeafGetNbTriangles(Data); } + + // Gets triangle index for this leaf. Indexed model's array of indices retrieved with RTreeMidphase::GetIndices() + PX_FORCE_INLINE PxU32 GetTriangleIndex() const { return LeafGetTriangleIndex(Data); } + PX_FORCE_INLINE void SetData(PxU32 nb, PxU32 index) { Data = LeafSetData(nb, index); } + }; + + PX_COMPILE_TIME_ASSERT(sizeof(LeafTriangles)==4); // RTree has space for 4 bytes + +} // namespace Gu + +} + +#endif // #ifdef PX_COLLISION_RTREE diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTreeQueries.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTreeQueries.cpp new file mode 100644 index 00000000..9d7bd57a --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTreeQueries.cpp @@ -0,0 +1,581 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +/* +General notes: + + rtree depth-first traversal looks like this: + push top level page onto stack + + pop page from stack + for each node in page + if node overlaps with testrect + push node's subpage + + we want to efficiently keep track of current stack level to know if the current page is a leaf or not + (since we don't store a flag with the page due to no space, we can't determine it just by looking at current page) + since we traverse depth first, the levels for nodes on the stack look like this: + l0 l0 l1 l2 l2 l3 l3 l3 l4 + + we can encode this as an array of 4 bits per level count into a 32-bit integer + to simplify the code->level computation we also keep track of current level by incrementing the level whenever any subpages + from current test page are pushed onto the stack + when we pop a page off the stack we use this encoding to determine if we should decrement the stack level +*/ + +#include "foundation/PxBounds3.h" +#include "GuRTree.h" +#include "PsIntrinsics.h" +#include "GuBox.h" +#include "PsVecMath.h" +#include "PxQueryReport.h" // for PxAgain +#include "PsBitUtils.h" + +//#define VERIFY_RTREE +#ifdef VERIFY_RTREE +#include "GuIntersectionRayBox.h" +#include "GuIntersectionBoxBox.h" +#include "stdio.h" +#endif + +using namespace physx; +using namespace physx::shdfnd; +using namespace Ps::aos; + +namespace physx +{ +namespace Gu { + +using namespace Ps::aos; + +#define v_absm(a) V4Andc(a, signMask) +#define V4FromF32A(x) V4LoadA(x) +#define PxF32FV(x) FStore(x) +#define CAST_U8(a) reinterpret_cast<PxU8*>(a) + +///////////////////////////////////////////////////////////////////////// +void RTree::traverseAABB(const PxVec3& boxMin, const PxVec3& boxMax, const PxU32 maxResults, PxU32* resultsPtr, Callback* callback) const +{ + PX_UNUSED(resultsPtr); + + PX_ASSERT(callback); + PX_ASSERT(maxResults >= mPageSize); + PX_UNUSED(maxResults); + + const PxU32 maxStack = 128; + PxU32 stack1[maxStack]; + PxU32* stack = stack1+1; + + PX_ASSERT(mPages); + PX_ASSERT((uintptr_t(mPages) & 127) == 0); + PX_ASSERT((uintptr_t(this) & 15) == 0); + + // conservatively quantize the input box + Vec4V nqMin = Vec4V_From_PxVec3_WUndefined(boxMin); + Vec4V nqMax = Vec4V_From_PxVec3_WUndefined(boxMax); + + Vec4V nqMinx4 = V4SplatElement<0>(nqMin); + Vec4V nqMiny4 = V4SplatElement<1>(nqMin); + Vec4V nqMinz4 = V4SplatElement<2>(nqMin); + Vec4V nqMaxx4 = V4SplatElement<0>(nqMax); + Vec4V nqMaxy4 = V4SplatElement<1>(nqMax); + Vec4V nqMaxz4 = V4SplatElement<2>(nqMax); + + // on 64-bit platforms the dynamic rtree pointer is also relative to mPages + PxU8* treeNodes8 = CAST_U8(mPages); + PxU32* stackPtr = stack; + + // AP potential perf optimization - fetch the top level right away + PX_ASSERT(RTREE_N == 4 || RTREE_N == 8); + PX_ASSERT(Ps::isPowerOfTwo(mPageSize)); + + for (PxI32 j = PxI32(mNumRootPages-1); j >= 0; j --) + *stackPtr++ = j*sizeof(RTreePage); + + PxU32 cacheTopValid = true; + PxU32 cacheTop = 0; + + do { + stackPtr--; + PxU32 top; + if (cacheTopValid) // branch is faster than lhs + top = cacheTop; + else + top = stackPtr[0]; + PX_ASSERT(!cacheTopValid || stackPtr[0] == cacheTop); + RTreePage* PX_RESTRICT tn = reinterpret_cast<RTreePage*>(treeNodes8 + top); + const PxU32* ptrs = (reinterpret_cast<RTreePage*>(tn))->ptrs; + + Vec4V minx4 = V4LoadA(tn->minx); + Vec4V miny4 = V4LoadA(tn->miny); + Vec4V minz4 = V4LoadA(tn->minz); + Vec4V maxx4 = V4LoadA(tn->maxx); + Vec4V maxy4 = V4LoadA(tn->maxy); + Vec4V maxz4 = V4LoadA(tn->maxz); + + // AABB/AABB overlap test + BoolV res0 = V4IsGrtr(nqMinx4, maxx4); BoolV res1 = V4IsGrtr(nqMiny4, maxy4); BoolV res2 = V4IsGrtr(nqMinz4, maxz4); + BoolV res3 = V4IsGrtr(minx4, nqMaxx4); BoolV res4 = V4IsGrtr(miny4, nqMaxy4); BoolV res5 = V4IsGrtr(minz4, nqMaxz4); + BoolV resx = BOr(BOr(BOr(res0, res1), BOr(res2, res3)), BOr(res4, res5)); + PX_ALIGN_PREFIX(16) PxU32 resa[RTREE_N] PX_ALIGN_SUFFIX(16); + + VecU32V res4x = VecU32V_From_BoolV(resx); + U4StoreA(res4x, resa); + + cacheTopValid = false; + for (PxU32 i = 0; i < RTREE_N; i++) + { + PxU32 ptr = ptrs[i] & ~1; // clear the isLeaf bit + if (resa[i]) + continue; + if (tn->isLeaf(i)) + { + if (!callback->processResults(1, &ptr)) + return; + } + else + { + *(stackPtr++) = ptr; + cacheTop = ptr; + cacheTopValid = true; + } + } + } while (stackPtr > stack); +} + +namespace +{ + const VecU32V signMask = U4LoadXYZW((PxU32(1)<<31), (PxU32(1)<<31), (PxU32(1)<<31), (PxU32(1)<<31)); + const Vec4V epsFloat4 = V4Load(1e-9f); + const Vec4V zeroes = V4Zero(); + const Vec4V twos = V4Load(2.0f); + const Vec4V epsInflateFloat4 = V4Load(1e-7f); +} + +///////////////////////////////////////////////////////////////////////// +template <int inflate> +void RTree::traverseRay( + const PxVec3& rayOrigin, const PxVec3& rayDir, + const PxU32 maxResults, PxU32* resultsPtr, Gu::RTree::CallbackRaycast* callback, + const PxVec3* fattenAABBs, PxF32 maxT) const +{ + // implements Kay-Kajiya 4-way SIMD test + PX_UNUSED(resultsPtr); + PX_UNUSED(maxResults); + + const PxU32 maxStack = 128; + PxU32 stack1[maxStack]; + PxU32* stack = stack1+1; + + PX_ASSERT(mPages); + PX_ASSERT((uintptr_t(mPages) & 127) == 0); + PX_ASSERT((uintptr_t(this) & 15) == 0); + + PxU8* treeNodes8 = CAST_U8(mPages); + + Vec4V fattenAABBsX, fattenAABBsY, fattenAABBsZ; + PX_UNUSED(fattenAABBsX); PX_UNUSED(fattenAABBsY); PX_UNUSED(fattenAABBsZ); + if (inflate) + { + Vec4V fattenAABBs4 = Vec4V_From_PxVec3_WUndefined(*fattenAABBs); + fattenAABBs4 = V4Add(fattenAABBs4, epsInflateFloat4); // US2385 - shapes are "closed" meaning exactly touching shapes should report overlap + fattenAABBsX = V4SplatElement<0>(fattenAABBs4); + fattenAABBsY = V4SplatElement<1>(fattenAABBs4); + fattenAABBsZ = V4SplatElement<2>(fattenAABBs4); + } + + Vec4V maxT4; + maxT4 = V4Load(maxT); + Vec4V rayP = Vec4V_From_PxVec3_WUndefined(rayOrigin); + Vec4V rayD = Vec4V_From_PxVec3_WUndefined(rayDir); + VecU32V raySign = V4U32and(VecU32V_ReinterpretFrom_Vec4V(rayD), signMask); + Vec4V rayDAbs = V4Abs(rayD); // abs value of rayD + Vec4V rayInvD = Vec4V_ReinterpretFrom_VecU32V(V4U32or(raySign, VecU32V_ReinterpretFrom_Vec4V(V4Max(rayDAbs, epsFloat4)))); // clamp near-zero components up to epsilon + rayD = rayInvD; + + //rayInvD = V4Recip(rayInvD); + // Newton-Raphson iteration for reciprocal (see wikipedia): + // X[n+1] = X[n]*(2-original*X[n]), X[0] = V4RecipFast estimate + //rayInvD = rayInvD*(twos-rayD*rayInvD); + rayInvD = V4RecipFast(rayInvD); // initial estimate, not accurate enough + rayInvD = V4Mul(rayInvD, V4NegMulSub(rayD, rayInvD, twos)); + + // P+tD=a; t=(a-P)/D + // t=(a - p.x)*1/d.x = a/d.x +(- p.x/d.x) + Vec4V rayPinvD = V4NegMulSub(rayInvD, rayP, zeroes); + Vec4V rayInvDsplatX = V4SplatElement<0>(rayInvD); + Vec4V rayInvDsplatY = V4SplatElement<1>(rayInvD); + Vec4V rayInvDsplatZ = V4SplatElement<2>(rayInvD); + Vec4V rayPinvDsplatX = V4SplatElement<0>(rayPinvD); + Vec4V rayPinvDsplatY = V4SplatElement<1>(rayPinvD); + Vec4V rayPinvDsplatZ = V4SplatElement<2>(rayPinvD); + + PX_ASSERT(RTREE_N == 4 || RTREE_N == 8); + PX_ASSERT(mNumRootPages > 0); + + PxU32 stackPtr = 0; + for (PxI32 j = PxI32(mNumRootPages-1); j >= 0; j --) + stack[stackPtr++] = j*sizeof(RTreePage); + + PX_ALIGN_PREFIX(16) PxU32 resa[4] PX_ALIGN_SUFFIX(16); + + while (stackPtr) + { + PxU32 top = stack[--stackPtr]; + if (top&1) // isLeaf test + { + top--; + PxF32 newMaxT = maxT; + if (!callback->processResults(1, &top, newMaxT)) + return; + /* shrink the ray if newMaxT is reduced compared to the original maxT */ + if (maxT != newMaxT) + { + PX_ASSERT(newMaxT < maxT); + maxT = newMaxT; + maxT4 = V4Load(newMaxT); + } + continue; + } + + RTreePage* PX_RESTRICT tn = reinterpret_cast<RTreePage*>(treeNodes8 + top); + + // 6i load + Vec4V minx4a = V4LoadA(tn->minx), miny4a = V4LoadA(tn->miny), minz4a = V4LoadA(tn->minz); + Vec4V maxx4a = V4LoadA(tn->maxx), maxy4a = V4LoadA(tn->maxy), maxz4a = V4LoadA(tn->maxz); + + // 1i disabled test + // AP scaffold - optimization opportunity - can save 2 instructions here + VecU32V ignore4a = V4IsGrtrV32u(minx4a, maxx4a); // 1 if degenerate box (empty slot in the page) + + if (inflate) + { + // 6i + maxx4a = V4Add(maxx4a, fattenAABBsX); maxy4a = V4Add(maxy4a, fattenAABBsY); maxz4a = V4Add(maxz4a, fattenAABBsZ); + minx4a = V4Sub(minx4a, fattenAABBsX); miny4a = V4Sub(miny4a, fattenAABBsY); minz4a = V4Sub(minz4a, fattenAABBsZ); + } + + // P+tD=a; t=(a-P)/D + // t=(a - p.x)*1/d.x = a/d.x +(- p.x/d.x) + // 6i + Vec4V tminxa0 = V4MulAdd(minx4a, rayInvDsplatX, rayPinvDsplatX); + Vec4V tminya0 = V4MulAdd(miny4a, rayInvDsplatY, rayPinvDsplatY); + Vec4V tminza0 = V4MulAdd(minz4a, rayInvDsplatZ, rayPinvDsplatZ); + Vec4V tmaxxa0 = V4MulAdd(maxx4a, rayInvDsplatX, rayPinvDsplatX); + Vec4V tmaxya0 = V4MulAdd(maxy4a, rayInvDsplatY, rayPinvDsplatY); + Vec4V tmaxza0 = V4MulAdd(maxz4a, rayInvDsplatZ, rayPinvDsplatZ); + + // test half-spaces + // P+tD=dN + // t = (d(N,D)-(P,D))/(D,D) , (D,D)=1 + + // compute 4x dot products (N,D) and (P,N) for each AABB in the page + + // 6i + // now compute tnear and tfar for each pair of planes for each box + Vec4V tminxa = V4Min(tminxa0, tmaxxa0); Vec4V tmaxxa = V4Max(tminxa0, tmaxxa0); + Vec4V tminya = V4Min(tminya0, tmaxya0); Vec4V tmaxya = V4Max(tminya0, tmaxya0); + Vec4V tminza = V4Min(tminza0, tmaxza0); Vec4V tmaxza = V4Max(tminza0, tmaxza0); + + // 8i + Vec4V maxOfNeasa = V4Max(V4Max(tminxa, tminya), tminza); + Vec4V minOfFarsa = V4Min(V4Min(tmaxxa, tmaxya), tmaxza); + ignore4a = V4U32or(ignore4a, V4IsGrtrV32u(epsFloat4, minOfFarsa)); // if tfar is negative, ignore since its a ray, not a line + // AP scaffold: update the build to eliminate 3 more instructions for ignore4a above + //VecU32V ignore4a = V4IsGrtrV32u(epsFloat4, minOfFarsa); // if tfar is negative, ignore since its a ray, not a line + ignore4a = V4U32or(ignore4a, V4IsGrtrV32u(maxOfNeasa, maxT4)); // if tnear is over maxT, ignore this result + + // 2i + VecU32V resa4 = V4IsGrtrV32u(maxOfNeasa, minOfFarsa); // if 1 => fail + resa4 = V4U32or(resa4, ignore4a); + + // 1i + V4U32StoreAligned(resa4, reinterpret_cast<VecU32V*>(resa)); + + PxU32* ptrs = (reinterpret_cast<RTreePage*>(tn))->ptrs; + + stack[stackPtr] = ptrs[0]; stackPtr += (1+resa[0]); // AP scaffold TODO: use VecU32add + stack[stackPtr] = ptrs[1]; stackPtr += (1+resa[1]); + stack[stackPtr] = ptrs[2]; stackPtr += (1+resa[2]); + stack[stackPtr] = ptrs[3]; stackPtr += (1+resa[3]); + } +} + +template void RTree::traverseRay<0>( + const PxVec3&, const PxVec3&, const PxU32, PxU32*, Gu::RTree::CallbackRaycast*, const PxVec3*, PxF32 maxT) const; +template void RTree::traverseRay<1>( + const PxVec3&, const PxVec3&, const PxU32, PxU32*, Gu::RTree::CallbackRaycast*, const PxVec3*, PxF32 maxT) const; + +///////////////////////////////////////////////////////////////////////// +void RTree::traverseOBB( + const Gu::Box& obb, const PxU32 maxResults, PxU32* resultsPtr, Gu::RTree::Callback* callback) const +{ + PX_UNUSED(resultsPtr); + PX_UNUSED(maxResults); + + const PxU32 maxStack = 128; + PxU32 stack[maxStack]; + + PX_ASSERT(mPages); + PX_ASSERT((uintptr_t(mPages) & 127) == 0); + PX_ASSERT((uintptr_t(this) & 15) == 0); + + PxU8* treeNodes8 = CAST_U8(mPages); + PxU32* stackPtr = stack; + + Vec4V ones, halves, eps; + ones = V4Load(1.0f); + halves = V4Load(0.5f); + eps = V4Load(1e-6f); + + PX_UNUSED(ones); + + Vec4V obbO = Vec4V_From_PxVec3_WUndefined(obb.center); + Vec4V obbE = Vec4V_From_PxVec3_WUndefined(obb.extents); + // Gu::Box::rot matrix columns are the OBB axes + Vec4V obbX = Vec4V_From_PxVec3_WUndefined(obb.rot.column0); + Vec4V obbY = Vec4V_From_PxVec3_WUndefined(obb.rot.column1); + Vec4V obbZ = Vec4V_From_PxVec3_WUndefined(obb.rot.column2); + +#if PX_WINDOWS || PX_XBOXONE + // Visual Studio compiler hangs with #defines + // On VMX platforms we use #defines in the other branch of this #ifdef to avoid register spills (LHS) + Vec4V obbESplatX = V4SplatElement<0>(obbE); + Vec4V obbESplatY = V4SplatElement<1>(obbE); + Vec4V obbESplatZ = V4SplatElement<2>(obbE); + Vec4V obbESplatNegX = V4Sub(zeroes, obbESplatX); + Vec4V obbESplatNegY = V4Sub(zeroes, obbESplatY); + Vec4V obbESplatNegZ = V4Sub(zeroes, obbESplatZ); + Vec4V obbXE = V4MulAdd(obbX, obbESplatX, zeroes); // scale axii by E + Vec4V obbYE = V4MulAdd(obbY, obbESplatY, zeroes); // scale axii by E + Vec4V obbZE = V4MulAdd(obbZ, obbESplatZ, zeroes); // scale axii by E + Vec4V obbOSplatX = V4SplatElement<0>(obbO); + Vec4V obbOSplatY = V4SplatElement<1>(obbO); + Vec4V obbOSplatZ = V4SplatElement<2>(obbO); + Vec4V obbXSplatX = V4SplatElement<0>(obbX); + Vec4V obbXSplatY = V4SplatElement<1>(obbX); + Vec4V obbXSplatZ = V4SplatElement<2>(obbX); + Vec4V obbYSplatX = V4SplatElement<0>(obbY); + Vec4V obbYSplatY = V4SplatElement<1>(obbY); + Vec4V obbYSplatZ = V4SplatElement<2>(obbY); + Vec4V obbZSplatX = V4SplatElement<0>(obbZ); + Vec4V obbZSplatY = V4SplatElement<1>(obbZ); + Vec4V obbZSplatZ = V4SplatElement<2>(obbZ); + Vec4V obbXESplatX = V4SplatElement<0>(obbXE); + Vec4V obbXESplatY = V4SplatElement<1>(obbXE); + Vec4V obbXESplatZ = V4SplatElement<2>(obbXE); + Vec4V obbYESplatX = V4SplatElement<0>(obbYE); + Vec4V obbYESplatY = V4SplatElement<1>(obbYE); + Vec4V obbYESplatZ = V4SplatElement<2>(obbYE); + Vec4V obbZESplatX = V4SplatElement<0>(obbZE); + Vec4V obbZESplatY = V4SplatElement<1>(obbZE); + Vec4V obbZESplatZ = V4SplatElement<2>(obbZE); +#else + #define obbESplatX V4SplatElement<0>(obbE) + #define obbESplatY V4SplatElement<1>(obbE) + #define obbESplatZ V4SplatElement<2>(obbE) + #define obbESplatNegX V4Sub(zeroes, obbESplatX) + #define obbESplatNegY V4Sub(zeroes, obbESplatY) + #define obbESplatNegZ V4Sub(zeroes, obbESplatZ) + #define obbXE V4MulAdd(obbX, obbESplatX, zeroes) + #define obbYE V4MulAdd(obbY, obbESplatY, zeroes) + #define obbZE V4MulAdd(obbZ, obbESplatZ, zeroes) + #define obbOSplatX V4SplatElement<0>(obbO) + #define obbOSplatY V4SplatElement<1>(obbO) + #define obbOSplatZ V4SplatElement<2>(obbO) + #define obbXSplatX V4SplatElement<0>(obbX) + #define obbXSplatY V4SplatElement<1>(obbX) + #define obbXSplatZ V4SplatElement<2>(obbX) + #define obbYSplatX V4SplatElement<0>(obbY) + #define obbYSplatY V4SplatElement<1>(obbY) + #define obbYSplatZ V4SplatElement<2>(obbY) + #define obbZSplatX V4SplatElement<0>(obbZ) + #define obbZSplatY V4SplatElement<1>(obbZ) + #define obbZSplatZ V4SplatElement<2>(obbZ) + #define obbXESplatX V4SplatElement<0>(obbXE) + #define obbXESplatY V4SplatElement<1>(obbXE) + #define obbXESplatZ V4SplatElement<2>(obbXE) + #define obbYESplatX V4SplatElement<0>(obbYE) + #define obbYESplatY V4SplatElement<1>(obbYE) + #define obbYESplatZ V4SplatElement<2>(obbYE) + #define obbZESplatX V4SplatElement<0>(obbZE) + #define obbZESplatY V4SplatElement<1>(obbZE) + #define obbZESplatZ V4SplatElement<2>(obbZE) +#endif + + PX_ASSERT(mPageSize == 4 || mPageSize == 8); + PX_ASSERT(mNumRootPages > 0); + + for (PxI32 j = PxI32(mNumRootPages-1); j >= 0; j --) + *stackPtr++ = j*sizeof(RTreePage); + PxU32 cacheTopValid = true; + PxU32 cacheTop = 0; + + PX_ALIGN_PREFIX(16) PxU32 resa_[4] PX_ALIGN_SUFFIX(16); + + do { + stackPtr--; + + PxU32 top; + if (cacheTopValid) // branch is faster than lhs + top = cacheTop; + else + top = stackPtr[0]; + PX_ASSERT(!cacheTopValid || top == cacheTop); + RTreePage* PX_RESTRICT tn = reinterpret_cast<RTreePage*>(treeNodes8 + top); + + const PxU32 offs = 0; + PxU32* ptrs = (reinterpret_cast<RTreePage*>(tn))->ptrs; + + // 6i + Vec4V minx4a = V4LoadA(tn->minx+offs); + Vec4V miny4a = V4LoadA(tn->miny+offs); + Vec4V minz4a = V4LoadA(tn->minz+offs); + Vec4V maxx4a = V4LoadA(tn->maxx+offs); + Vec4V maxy4a = V4LoadA(tn->maxy+offs); + Vec4V maxz4a = V4LoadA(tn->maxz+offs); + + VecU32V noOverlapa; + VecU32V resa4u; + { + // PRECOMPUTE FOR A BLOCK + // 109 instr per 4 OBB/AABB + // ABB iteration 1, start with OBB origin as other point -- 6 + Vec4V p1ABBxa = V4Max(minx4a, V4Min(maxx4a, obbOSplatX)); + Vec4V p1ABBya = V4Max(miny4a, V4Min(maxy4a, obbOSplatY)); + Vec4V p1ABBza = V4Max(minz4a, V4Min(maxz4a, obbOSplatZ)); + + // OBB iteration 1, move to OBB space first -- 12 + Vec4V p1ABBOxa = V4Sub(p1ABBxa, obbOSplatX); + Vec4V p1ABBOya = V4Sub(p1ABBya, obbOSplatY); + Vec4V p1ABBOza = V4Sub(p1ABBza, obbOSplatZ); + Vec4V obbPrjXa = V4MulAdd(p1ABBOxa, obbXSplatX, V4MulAdd(p1ABBOya, obbXSplatY, V4MulAdd(p1ABBOza, obbXSplatZ, zeroes))); + Vec4V obbPrjYa = V4MulAdd(p1ABBOxa, obbYSplatX, V4MulAdd(p1ABBOya, obbYSplatY, V4MulAdd(p1ABBOza, obbYSplatZ, zeroes))); + Vec4V obbPrjZa = V4MulAdd(p1ABBOxa, obbZSplatX, V4MulAdd(p1ABBOya, obbZSplatY, V4MulAdd(p1ABBOza, obbZSplatZ, zeroes))); + // clamp AABB point in OBB space to OBB extents. Since we scaled the axii, the extents are [-1,1] -- 6 + Vec4V pOBBxa = V4Max(obbESplatNegX, V4Min(obbPrjXa, obbESplatX)); + Vec4V pOBBya = V4Max(obbESplatNegY, V4Min(obbPrjYa, obbESplatY)); + Vec4V pOBBza = V4Max(obbESplatNegZ, V4Min(obbPrjZa, obbESplatZ)); + // go back to AABB space. we have x,y,z in obb space, need to multiply by axii -- 9 + Vec4V p1OBBxa = V4MulAdd(pOBBxa, obbXSplatX, V4MulAdd(pOBBya, obbYSplatX, V4MulAdd(pOBBza, obbZSplatX, obbOSplatX))); + Vec4V p1OBBya = V4MulAdd(pOBBxa, obbXSplatY, V4MulAdd(pOBBya, obbYSplatY, V4MulAdd(pOBBza, obbZSplatY, obbOSplatY))); + Vec4V p1OBBza = V4MulAdd(pOBBxa, obbXSplatZ, V4MulAdd(pOBBya, obbYSplatZ, V4MulAdd(pOBBza, obbZSplatZ, obbOSplatZ))); + + // ABB iteration 2 -- 6 instructions + Vec4V p2ABBxa = V4Max(minx4a, V4Min(maxx4a, p1OBBxa)); + Vec4V p2ABBya = V4Max(miny4a, V4Min(maxy4a, p1OBBya)); + Vec4V p2ABBza = V4Max(minz4a, V4Min(maxz4a, p1OBBza)); + // above blocks add up to 12+12+15=39 instr + // END PRECOMPUTE FOR A BLOCK + + // for AABBs precompute extents and center -- 9i + Vec4V abbCxa = V4MulAdd(V4Add(maxx4a, minx4a), halves, zeroes); + Vec4V abbCya = V4MulAdd(V4Add(maxy4a, miny4a), halves, zeroes); + Vec4V abbCza = V4MulAdd(V4Add(maxz4a, minz4a), halves, zeroes); + Vec4V abbExa = V4Sub(maxx4a, abbCxa); + Vec4V abbEya = V4Sub(maxy4a, abbCya); + Vec4V abbEza = V4Sub(maxz4a, abbCza); + + // now test separating axes D1 = p1OBB-p1ABB and D2 = p1OBB-p2ABB -- 37 instructions per axis + // D1 first -- 3 instructions + Vec4V d1xa = V4Sub(p1OBBxa, p1ABBxa), d1ya = V4Sub(p1OBBya, p1ABBya), d1za = V4Sub(p1OBBza, p1ABBza); + + // for AABB compute projections of extents and center -- 6 + Vec4V abbExd1Prja = V4MulAdd(d1xa, abbExa, zeroes); + Vec4V abbEyd1Prja = V4MulAdd(d1ya, abbEya, zeroes); + Vec4V abbEzd1Prja = V4MulAdd(d1za, abbEza, zeroes); + Vec4V abbCd1Prja = V4MulAdd(d1xa, abbCxa, V4MulAdd(d1ya, abbCya, V4MulAdd(d1za, abbCza, zeroes))); + + // for obb project each halfaxis and origin and add abs values of half-axis projections -- 12 instructions + Vec4V obbXEd1Prja = V4MulAdd(d1xa, obbXESplatX, V4MulAdd(d1ya, obbXESplatY, V4MulAdd(d1za, obbXESplatZ, zeroes))); + Vec4V obbYEd1Prja = V4MulAdd(d1xa, obbYESplatX, V4MulAdd(d1ya, obbYESplatY, V4MulAdd(d1za, obbYESplatZ, zeroes))); + Vec4V obbZEd1Prja = V4MulAdd(d1xa, obbZESplatX, V4MulAdd(d1ya, obbZESplatY, V4MulAdd(d1za, obbZESplatZ, zeroes))); + Vec4V obbOd1Prja = V4MulAdd(d1xa, obbOSplatX, V4MulAdd(d1ya, obbOSplatY, V4MulAdd(d1za, obbOSplatZ, zeroes))); + + // compare lengths between projected centers with sum of projected radii -- 16i + Vec4V originDiffd1a = v_absm(V4Sub(abbCd1Prja, obbOd1Prja)); + Vec4V absABBRd1a = V4Add(V4Add(v_absm(abbExd1Prja), v_absm(abbEyd1Prja)), v_absm(abbEzd1Prja)); + Vec4V absOBBRd1a = V4Add(V4Add(v_absm(obbXEd1Prja), v_absm(obbYEd1Prja)), v_absm(obbZEd1Prja)); + VecU32V noOverlapd1a = V4IsGrtrV32u(V4Sub(originDiffd1a, eps), V4Add(absABBRd1a, absOBBRd1a)); + VecU32V epsNoOverlapd1a = V4IsGrtrV32u(originDiffd1a, eps); + + // D2 next (35 instr) + // 3i + Vec4V d2xa = V4Sub(p1OBBxa, p2ABBxa), d2ya = V4Sub(p1OBBya, p2ABBya), d2za = V4Sub(p1OBBza, p2ABBza); + // for AABB compute projections of extents and center -- 6 + Vec4V abbExd2Prja = V4MulAdd(d2xa, abbExa, zeroes); + Vec4V abbEyd2Prja = V4MulAdd(d2ya, abbEya, zeroes); + Vec4V abbEzd2Prja = V4MulAdd(d2za, abbEza, zeroes); + Vec4V abbCd2Prja = V4MulAdd(d2xa, abbCxa, V4MulAdd(d2ya, abbCya, V4MulAdd(d2za, abbCza, zeroes))); + // for obb project each halfaxis and origin and add abs values of half-axis projections -- 12i + Vec4V obbXEd2Prja = V4MulAdd(d2xa, obbXESplatX, V4MulAdd(d2ya, obbXESplatY, V4MulAdd(d2za, obbXESplatZ, zeroes))); + Vec4V obbYEd2Prja = V4MulAdd(d2xa, obbYESplatX, V4MulAdd(d2ya, obbYESplatY, V4MulAdd(d2za, obbYESplatZ, zeroes))); + Vec4V obbZEd2Prja = V4MulAdd(d2xa, obbZESplatX, V4MulAdd(d2ya, obbZESplatY, V4MulAdd(d2za, obbZESplatZ, zeroes))); + Vec4V obbOd2Prja = V4MulAdd(d2xa, obbOSplatX, V4MulAdd(d2ya, obbOSplatY, V4MulAdd(d2za, obbOSplatZ, zeroes))); + // compare lengths between projected centers with sum of projected radii -- 16i + Vec4V originDiffd2a = v_absm(V4Sub(abbCd2Prja, obbOd2Prja)); + Vec4V absABBRd2a = V4Add(V4Add(v_absm(abbExd2Prja), v_absm(abbEyd2Prja)), v_absm(abbEzd2Prja)); + Vec4V absOBBRd2a = V4Add(V4Add(v_absm(obbXEd2Prja), v_absm(obbYEd2Prja)), v_absm(obbZEd2Prja)); + VecU32V noOverlapd2a = V4IsGrtrV32u(V4Sub(originDiffd2a, eps), V4Add(absABBRd2a, absOBBRd2a)); + VecU32V epsNoOverlapd2a = V4IsGrtrV32u(originDiffd2a, eps); + + // 8i + noOverlapa = V4U32or(V4U32and(noOverlapd1a, epsNoOverlapd1a), V4U32and(noOverlapd2a, epsNoOverlapd2a)); + VecU32V ignore4a = V4IsGrtrV32u(minx4a, maxx4a); // 1 if degenerate box (empty slot) + noOverlapa = V4U32or(noOverlapa, ignore4a); + resa4u = V4U32Andc(U4Load(1), noOverlapa); // 1 & ~noOverlap + V4U32StoreAligned(resa4u, reinterpret_cast<VecU32V*>(resa_)); + ///// 8+16+12+6+3+16+12+6+3+9+6+9+6+12+6+6=136i from load to result + } + + cacheTopValid = false; + for (PxU32 i = 0; i < 4; i++) + { + PxU32 ptr = ptrs[i+offs] & ~1; // clear the isLeaf bit + if (resa_[i]) + { + if (tn->isLeaf(i)) + { + if (!callback->processResults(1, &ptr)) + return; + } + else + { + *(stackPtr++) = ptr; + cacheTop = ptr; + cacheTopValid = true; + } + } + } + } while (stackPtr > stack); +} + +} // namespace Gu + +} diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepConvexTri.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepConvexTri.h new file mode 100644 index 00000000..15263717 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepConvexTri.h @@ -0,0 +1,103 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_SWEEP_CONVEX_TRI +#define GU_SWEEP_CONVEX_TRI + +#include "GuVecTriangle.h" +#include "GuVecConvexHull.h" +#include "GuConvexMesh.h" +#include "PxConvexMeshGeometry.h" +#include "GuGJKRaycast.h" + +// return true if hit, false if no hit +static PX_FORCE_INLINE bool sweepConvexVsTriangle( + const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, + ConvexHullV& convexHull, const Ps::aos::PsMatTransformV& meshToConvex, const Ps::aos::PsTransformV& convexTransfV, + const Ps::aos::Vec3VArg convexSpaceDir, const PxVec3& unitDir, const PxVec3& meshSpaceUnitDir, + const Ps::aos::FloatVArg fullDistance, PxReal shrunkDistance, + PxSweepHit& hit, bool isDoubleSided, const PxReal inflation, bool& initialOverlap, PxU32 faceIndex) +{ + using namespace Ps::aos; + // Create triangle normal + const PxVec3 denormalizedNormal = (v1 - v0).cross(v2 - v1); + + // Backface culling + // PT: WARNING, the test is reversed compared to usual because we pass -unitDir to this function + const bool culled = !isDoubleSided && (denormalizedNormal.dot(meshSpaceUnitDir) <= 0.0f); + if(culled) + return false; + + const Vec3V zeroV = V3Zero(); + const FloatV zero = FZero(); + + const Vec3V p0 = V3LoadU(v0); // in mesh local space + const Vec3V p1 = V3LoadU(v1); + const Vec3V p2 = V3LoadU(v2); + + // transform triangle verts from mesh local to convex local space + TriangleV triangleV(meshToConvex.transform(p0), meshToConvex.transform(p1), meshToConvex.transform(p2)); + + FloatV toi; + Vec3V closestA,normal; + + LocalConvex<TriangleV> convexA(triangleV); + LocalConvex<ConvexHullV> convexB(convexHull); + const Vec3V initialSearchDir = V3Sub(triangleV.getCenter(), convexHull.getCenter()); + // run GJK raycast + // sweep triangle in convex local space vs convex, closestA will be the impact point in convex local space + const bool gjkHit = gjkRaycastPenetration<LocalConvex<TriangleV>, LocalConvex<ConvexHullV> >( + convexA, convexB, initialSearchDir, zero, zeroV, convexSpaceDir, toi, normal, closestA, inflation, false); + if(!gjkHit) + return false; + + const FloatV minDist = FLoad(shrunkDistance); + const Vec3V destWorldPointA = convexTransfV.transform(closestA); + const Vec3V destNormal = V3Normalize(convexTransfV.rotate(normal)); + + if(FAllGrtrOrEq(zero, toi)) + { + initialOverlap = true; // PT: TODO: redundant with hit distance, consider removing + return setInitialOverlapResults(hit, unitDir, faceIndex); + } + + const FloatV dist = FMul(toi, fullDistance); // scale the toi to original full sweep distance + if(FAllGrtr(minDist, dist)) // is current dist < minDist? + { + hit.faceIndex = faceIndex; + hit.flags = PxHitFlag::eDISTANCE | PxHitFlag::ePOSITION | PxHitFlag::eNORMAL | PxHitFlag::eFACE_INDEX; + V3StoreU(destWorldPointA, hit.position); + V3StoreU(destNormal, hit.normal); + FStore(dist, &hit.distance); + return true; // report a hit + } + return false; // report no hit +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepMesh.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepMesh.h new file mode 100644 index 00000000..c1f8521e --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepMesh.h @@ -0,0 +1,169 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_SWEEP_MESH_H +#define GU_SWEEP_MESH_H + +#include "GuMidphaseInterface.h" +#include "GuVecConvexHull.h" + +namespace physx +{ + +namespace Gu +{ + // PT: class to make sure we can safely V4Load Matrix34's last column + class Matrix34Padded : public Cm::Matrix34 + { + public: + PX_FORCE_INLINE Matrix34Padded(const Matrix34& src) : Matrix34(src) {} + PX_FORCE_INLINE Matrix34Padded() {} + PX_FORCE_INLINE ~Matrix34Padded() {} + PxU32 padding; + }; + PX_COMPILE_TIME_ASSERT(0==(sizeof(Matrix34Padded)==16)); + + // PT: intermediate class containing shared bits of code & members + struct SweepShapeMeshHitCallback : MeshHitCallback<PxRaycastHit> + { + SweepShapeMeshHitCallback(CallbackMode::Enum mode, const PxHitFlags& hitFlags, bool flipNormal, float distCoef); + + const PxHitFlags mHitFlags; + bool mStatus; // Default is false, set to true if a valid hit is found. Stays true once true. + bool mInitialOverlap; // Default is false, set to true if an initial overlap hit is found. Reset for each hit. + bool mFlipNormal; // If negative scale is used we need to flip normal + PxReal mDistCoeff; // dist coeff from unscaled to scaled distance + + void operator=(const SweepShapeMeshHitCallback&) {} + }; + + struct SweepCapsuleMeshHitCallback : SweepShapeMeshHitCallback + { + PxSweepHit& mSweepHit; + const Cm::Matrix34& mVertexToWorldSkew; + const PxReal mTrueSweepDistance; // max sweep distance that can be used + PxReal mBestAlignmentValue; // best alignment value for triangle normal + PxReal mBestDist; // best distance, not the same as sweepHit.distance, can be shorter by epsilon + const Capsule& mCapsule; + const PxVec3& mUnitDir; + const bool mMeshDoubleSided; // PT: true if PxMeshGeometryFlag::eDOUBLE_SIDED + const bool mIsSphere; + + SweepCapsuleMeshHitCallback(PxSweepHit& sweepHit, const Cm::Matrix34& worldMatrix, PxReal distance, bool meshDoubleSided, + const Capsule& capsule, const PxVec3& unitDir, const PxHitFlags& hitFlags, bool flipNormal, float distCoef); + + virtual PxAgain processHit(const PxRaycastHit& aHit, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, PxReal& shrunkMaxT, const PxU32*); + + // PT: TODO: unify these operators + void operator=(const SweepCapsuleMeshHitCallback&) {} + + bool finalizeHit( PxSweepHit& sweepHit, const Capsule& lss, const PxTriangleMeshGeometry& triMeshGeom, + const PxTransform& pose, bool isDoubleSided) const; + }; + +#if PX_VC + #pragma warning(push) + #pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + + struct SweepBoxMeshHitCallback : SweepShapeMeshHitCallback + { + const Matrix34Padded& mMeshToBox; + PxReal mDist, mDist0; + physx::shdfnd::aos::FloatV mDistV; + const Box& mBox; + const PxVec3& mLocalDir; + const PxVec3& mWorldUnitDir; + PxReal mInflation; + PxTriangle mHitTriangle; + physx::shdfnd::aos::Vec3V mMinClosestA; + physx::shdfnd::aos::Vec3V mMinNormal; + physx::shdfnd::aos::Vec3V mLocalMotionV; + PxU32 mMinTriangleIndex; + PxVec3 mOneOverDir; + const bool mBothTriangleSidesCollide; // PT: true if PxMeshGeometryFlag::eDOUBLE_SIDED || PxHitFlag::eMESH_BOTH_SIDES + + SweepBoxMeshHitCallback(CallbackMode::Enum mode_, const Matrix34Padded& meshToBox, PxReal distance, bool bothTriangleSidesCollide, + const Box& box, const PxVec3& localMotion, const PxVec3& localDir, const PxVec3& unitDir, + const PxHitFlags& hitFlags, const PxReal inflation, bool flipNormal, float distCoef); + + virtual ~SweepBoxMeshHitCallback() {} + + virtual PxAgain processHit(const PxRaycastHit& meshHit, const PxVec3& lp0, const PxVec3& lp1, const PxVec3& lp2, PxReal& shrinkMaxT, const PxU32*); + + bool finalizeHit( PxSweepHit& sweepHit, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const PxTransform& boxTransform, const PxVec3& localDir, + bool meshBothSides, bool isDoubleSided) const; + + private: + SweepBoxMeshHitCallback& operator=(const SweepBoxMeshHitCallback&); + }; + + struct SweepConvexMeshHitCallback : SweepShapeMeshHitCallback + { + PxTriangle mHitTriangle; + ConvexHullV mConvexHull; + physx::shdfnd::aos::PsMatTransformV mMeshToConvex; + physx::shdfnd::aos::PsTransformV mConvexPoseV; + const Cm::FastVertex2ShapeScaling& mMeshScale; + PxSweepHit mSweepHit; // stores either the closest or any hit depending on value of mAnyHit + physx::shdfnd::aos::FloatV mInitialDistance; + physx::shdfnd::aos::Vec3V mConvexSpaceDir; // convexPose.rotateInv(-unit*distance) + PxVec3 mUnitDir; + PxVec3 mMeshSpaceUnitDir; + PxReal mInflation; + const bool mAnyHit; + const bool mBothTriangleSidesCollide; // PT: true if PxMeshGeometryFlag::eDOUBLE_SIDED || PxHitFlag::eMESH_BOTH_SIDES + + SweepConvexMeshHitCallback( const ConvexHullData& hull, const PxMeshScale& convexScale, const Cm::FastVertex2ShapeScaling& meshScale, + const PxTransform& convexPose, const PxTransform& meshPose, + const PxVec3& unitDir, const PxReal distance, PxHitFlags hitFlags, const bool bothTriangleSidesCollide, const PxReal inflation, + const bool anyHit, float distCoef); + + virtual ~SweepConvexMeshHitCallback() {} + + virtual PxAgain processHit(const PxRaycastHit& hit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal& shrunkMaxT, const PxU32*); + + bool finalizeHit(PxSweepHit& sweepHit, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose, + const PxConvexMeshGeometry& convexGeom, const PxTransform& convexPose, + const PxVec3& unitDir, PxReal inflation, + bool isMtd, bool meshBothSides, bool isDoubleSided, bool bothTriangleSidesCollide); + + private: + SweepConvexMeshHitCallback& operator=(const SweepConvexMeshHitCallback&); + }; + +#if PX_VC + #pragma warning(pop) +#endif + +} +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepsMesh.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepsMesh.cpp new file mode 100644 index 00000000..6efb85db --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepsMesh.cpp @@ -0,0 +1,602 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuSweepTests.h" +#include "GuSweepMesh.h" +#include "GuInternal.h" +#include "GuConvexUtilsInternal.h" +#include "CmScaling.h" +#include "GuVecShrunkBox.h" +#include "GuSweepMTD.h" +#include "GuVecCapsule.h" +#include "GuSweepBoxTriangle_SAT.h" +#include "GuSweepCapsuleTriangle.h" +#include "GuSweepSphereTriangle.h" +#include "GuDistancePointTriangle.h" +#include "GuCapsule.h" + +using namespace physx; +using namespace Gu; +using namespace Cm; +using namespace physx::shdfnd::aos; + +#include "GuSweepConvexTri.h" + +/////////////////////////////////////////////////////////////////////////////// + +static bool sweepSphereTriangle(const PxTriangle& tri, + const PxVec3& center, PxReal radius, + const PxVec3& unitDir, const PxReal distance, + PxSweepHit& hit, PxVec3& triNormalOut, + PxHitFlags hitFlags, bool isDoubleSided) +{ + const bool meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES; + if(!(hitFlags & PxHitFlag::eASSUME_NO_INITIAL_OVERLAP)) + { + const bool doBackfaceCulling = !isDoubleSided && !meshBothSides; + + // PT: test if shapes initially overlap + // PT: add culling here for now, but could be made more efficiently... + + // Create triangle normal + PxVec3 denormalizedNormal; + tri.denormalizedNormal(denormalizedNormal); + + // Backface culling + if(doBackfaceCulling && (denormalizedNormal.dot(unitDir) > 0.0f)) + return false; + + float s_unused, t_unused; + const PxVec3 cp = closestPtPointTriangle(center, tri.verts[0], tri.verts[1], tri.verts[2], s_unused, t_unused); + const PxReal dist2 = (cp - center).magnitudeSquared(); + if(dist2<=radius*radius) + { + triNormalOut = denormalizedNormal.getNormalized(); + return setInitialOverlapResults(hit, unitDir, 0); + } + } + + return sweepSphereTriangles(1, &tri, + center, radius, + unitDir, distance, + NULL, + hit, triNormalOut, + isDoubleSided, meshBothSides, false, false); +} + +/////////////////////////////////////////////////////////////////////////////// + +SweepShapeMeshHitCallback::SweepShapeMeshHitCallback(CallbackMode::Enum mode, const PxHitFlags& hitFlags, bool flipNormal, float distCoef) : + MeshHitCallback<PxRaycastHit> (mode), + mHitFlags (hitFlags), + mStatus (false), + mInitialOverlap (false), + mFlipNormal (flipNormal), + mDistCoeff (distCoef) +{ +} + +/////////////////////////////////////////////////////////////////////////////// + +SweepCapsuleMeshHitCallback::SweepCapsuleMeshHitCallback( + PxSweepHit& sweepHit, const Matrix34& worldMatrix, PxReal distance, bool meshDoubleSided, + const Capsule& capsule, const PxVec3& unitDir, const PxHitFlags& hitFlags, bool flipNormal, float distCoef) : + SweepShapeMeshHitCallback (CallbackMode::eMULTIPLE, hitFlags, flipNormal, distCoef), + mSweepHit (sweepHit), + mVertexToWorldSkew (worldMatrix), + mTrueSweepDistance (distance), + mBestAlignmentValue (2.0f), + mBestDist (distance + GU_EPSILON_SAME_DISTANCE), + mCapsule (capsule), + mUnitDir (unitDir), + mMeshDoubleSided (meshDoubleSided), + mIsSphere (capsule.p0 == capsule.p1) +{ + mSweepHit.distance = mTrueSweepDistance; +} + +PxAgain SweepCapsuleMeshHitCallback::processHit( // all reported coords are in mesh local space including hit.position + const PxRaycastHit& aHit, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, PxReal& shrunkMaxT, const PxU32*) +{ + const PxTriangle tmpt( mVertexToWorldSkew.transform(v0), + mVertexToWorldSkew.transform(mFlipNormal ? v2 : v1), + mVertexToWorldSkew.transform(mFlipNormal ? v1 : v2)); + + PxSweepHit localHit; // PT: TODO: ctor! + PxVec3 triNormal; + // pick a farther hit within distEpsilon that is more opposing than the previous closest hit + // make it a relative epsilon to make sure it still works with large distances + const PxReal distEpsilon = GU_EPSILON_SAME_DISTANCE * PxMax(1.0f, mSweepHit.distance); + const float minD = mSweepHit.distance + distEpsilon; + if(mIsSphere) + { + if(!::sweepSphereTriangle( tmpt, + mCapsule.p0, mCapsule.radius, + mUnitDir, minD, + localHit, triNormal, + mHitFlags, mMeshDoubleSided)) + return true; + } + else + { + // PT: this one is safe because cullbox is NULL (no need to allocate one more triangle) + if(!sweepCapsuleTriangles_Precise( 1, &tmpt, + mCapsule, + mUnitDir, minD, + NULL, + localHit, triNormal, + mHitFlags, mMeshDoubleSided, + NULL)) + return true; + } + + const PxReal alignmentValue = computeAlignmentValue(triNormal, mUnitDir); + if(keepTriangle(localHit.distance, alignmentValue, mBestDist, mBestAlignmentValue, mTrueSweepDistance, distEpsilon)) + { + mBestAlignmentValue = alignmentValue; + + // AP: need to shrink the sweep distance passed into sweepCapsuleTriangles for correctness so that next sweep is closer + shrunkMaxT = localHit.distance * mDistCoeff; // shrunkMaxT is scaled + + mBestDist = PxMin(mBestDist, localHit.distance); // exact lower bound + mSweepHit.flags = localHit.flags; + mSweepHit.distance = localHit.distance; + mSweepHit.normal = localHit.normal; + mSweepHit.position = localHit.position; + mSweepHit.faceIndex = aHit.faceIndex; + + mStatus = true; + //ML:this is the initial overlap condition + if(localHit.distance == 0.0f) + { + mInitialOverlap = true; + return false; + } + if(mHitFlags & PxHitFlag::eMESH_ANY) + return false; // abort traversal + } + return true; +} + +bool SweepCapsuleMeshHitCallback::finalizeHit( PxSweepHit& sweepHit, const Capsule& lss, const PxTriangleMeshGeometry& triMeshGeom, + const PxTransform& pose, bool isDoubleSided) const +{ + if(!mStatus) + return false; + + if(mInitialOverlap) + { + // PT: TODO: consider using 'setInitialOverlapResults' here + bool hasContacts = false; + if(mHitFlags & PxHitFlag::eMTD) + { + const Vec3V p0 = V3LoadU(mCapsule.p0); + const Vec3V p1 = V3LoadU(mCapsule.p1); + const FloatV radius = FLoad(lss.radius); + CapsuleV capsuleV; + capsuleV.initialize(p0, p1, radius); + + //we need to calculate the MTD + hasContacts = computeCapsule_TriangleMeshMTD(triMeshGeom, pose, capsuleV, mCapsule.radius, isDoubleSided, sweepHit); + } + setupSweepHitForMTD(sweepHit, hasContacts, mUnitDir); + } + else + { + sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::eNORMAL | PxHitFlag::ePOSITION | PxHitFlag::eFACE_INDEX; + } + return true; +} + +/////////////////////////////////////////////////////////////////////////////// + +bool sweepCapsule_MeshGeom(GU_CAPSULE_SWEEP_FUNC_PARAMS) +{ + PX_UNUSED(capsuleGeom_); + PX_UNUSED(capsulePose_); + + PX_ASSERT(geom.getType() == PxGeometryType::eTRIANGLEMESH); + const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom); + + TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + + return Midphase::sweepCapsuleVsMesh(meshData, meshGeom, pose, lss, unitDir, distance, sweepHit, hitFlags, inflation); +} + +/////////////////////////////////////////////////////////////////////////////// + + // same as 'mat.transform(p)' but using SIMD + static PX_FORCE_INLINE Vec4V transformV(const Vec4V p, const Matrix34Padded& mat) + { + Vec4V ResV = V4Scale(V4LoadU(&mat.m.column0.x), V4GetX(p)); + ResV = V4ScaleAdd(V4LoadU(&mat.m.column1.x), V4GetY(p), ResV); + ResV = V4ScaleAdd(V4LoadU(&mat.m.column2.x), V4GetZ(p), ResV); + ResV = V4Add(ResV, V4LoadU(&mat.p.x)); // PT: this load is safe thanks to padding + return ResV; + } + +/////////////////////////////////////////////////////////////////////////////// + +SweepBoxMeshHitCallback::SweepBoxMeshHitCallback( CallbackMode::Enum mode_, const Matrix34Padded& meshToBox, PxReal distance, bool bothTriangleSidesCollide, + const Box& box, const PxVec3& localMotion, const PxVec3& localDir, const PxVec3& unitDir, + const PxHitFlags& hitFlags, const PxReal inflation, bool flipNormal, float distCoef) : + SweepShapeMeshHitCallback (mode_, hitFlags, flipNormal,distCoef), + mMeshToBox (meshToBox), + mDist (distance), + mBox (box), + mLocalDir (localDir), + mWorldUnitDir (unitDir), + mInflation (inflation), + mBothTriangleSidesCollide (bothTriangleSidesCollide) +{ + mLocalMotionV = V3LoadU(localMotion); + mDistV = FLoad(distance); + mDist0 = distance; + mOneOverDir = PxVec3( + mLocalDir.x!=0.0f ? 1.0f/mLocalDir.x : 0.0f, + mLocalDir.y!=0.0f ? 1.0f/mLocalDir.y : 0.0f, + mLocalDir.z!=0.0f ? 1.0f/mLocalDir.z : 0.0f); +} + +PxAgain SweepBoxMeshHitCallback::processHit( // all reported coords are in mesh local space including hit.position + const PxRaycastHit& meshHit, const PxVec3& lp0, const PxVec3& lp1, const PxVec3& lp2, PxReal& shrinkMaxT, const PxU32*) +{ + if(mHitFlags & PxHitFlag::ePRECISE_SWEEP) + { + const PxTriangle currentTriangle( + mMeshToBox.transform(lp0), + mMeshToBox.transform(mFlipNormal ? lp2 : lp1), + mMeshToBox.transform(mFlipNormal ? lp1 : lp2)); + + PxF32 t = PX_MAX_REAL; // PT: could be better! + if(!triBoxSweepTestBoxSpace(currentTriangle, mBox.extents, mLocalDir, mOneOverDir, mDist, t, !mBothTriangleSidesCollide)) + return true; + + if(t <= mDist) + { + // PT: test if shapes initially overlap + mDist = t; + shrinkMaxT = t * mDistCoeff; // shrunkMaxT is scaled + mMinClosestA = V3LoadU(currentTriangle.verts[0]); // PT: this is arbitrary + mMinNormal = V3LoadU(-mWorldUnitDir); + mStatus = true; + mMinTriangleIndex = meshHit.faceIndex; + mHitTriangle = currentTriangle; + if(t == 0.0f) + { + mInitialOverlap = true; + return false; // abort traversal + } + } + } + else + { + const FloatV zero = FZero(); + + // PT: SIMD code similar to: + // const Vec3V triV0 = V3LoadU(mMeshToBox.transform(lp0)); + // const Vec3V triV1 = V3LoadU(mMeshToBox.transform(lp1)); + // const Vec3V triV2 = V3LoadU(mMeshToBox.transform(lp2)); + // + // SIMD version works but we need to ensure all loads are safe. + // For incoming vertices they should either come from the vertex array or from a binary deserialized file. + // For the vertex array we can just allocate one more vertex. For the binary file it should be ok as soon + // as vertices aren't the last thing serialized in the file. + // For the matrix only the last column is a problem, and we can easily solve that with some padding in the local class. + const Vec3V triV0 = Vec3V_From_Vec4V(transformV(V4LoadU(&lp0.x), mMeshToBox)); + const Vec3V triV1 = Vec3V_From_Vec4V(transformV(V4LoadU(mFlipNormal ? &lp2.x : &lp1.x), mMeshToBox)); + const Vec3V triV2 = Vec3V_From_Vec4V(transformV(V4LoadU(mFlipNormal ? &lp1.x : &lp2.x), mMeshToBox)); + + if(!mBothTriangleSidesCollide) + { + const Vec3V triNormal = V3Cross(V3Sub(triV2, triV1),V3Sub(triV0, triV1)); + if(FAllGrtrOrEq(V3Dot(triNormal, mLocalMotionV), zero)) + return true; + } + + const Vec3V zeroV = V3Zero(); + const Vec3V boxExtents = V3LoadU(mBox.extents); + const BoxV boxV(zeroV, boxExtents); + + const TriangleV triangleV(triV0, triV1, triV2); + + FloatV lambda; + Vec3V closestA, normal;//closestA and normal is in the local space of convex hull + LocalConvex<TriangleV> convexA(triangleV); + LocalConvex<BoxV> convexB(boxV); + const Vec3V initialSearchDir = V3Sub(triangleV.getCenter(), boxV.getCenter()); + if(!gjkRaycastPenetration< LocalConvex<TriangleV>, LocalConvex<BoxV> >(convexA, convexB, initialSearchDir, zero, zeroV, mLocalMotionV, lambda, normal, closestA, mInflation, false)) + return true; + + mStatus = true; + mMinClosestA = closestA; + mMinTriangleIndex = meshHit.faceIndex; + if(FAllGrtrOrEq(zero, lambda)) // lambda < 0? => initial overlap + { + mInitialOverlap = true; + shrinkMaxT = 0.0f; + mDistV = zero; + mDist = 0.0f; + mMinNormal = V3LoadU(-mWorldUnitDir); + return false; + } + + PxF32 f; + FStore(lambda, &f); + mDist = f*mDist; // shrink dist + mLocalMotionV = V3Scale(mLocalMotionV, lambda); // shrink localMotion + mDistV = FMul(mDistV, lambda); // shrink distV + mMinNormal = normal; + if(mDist * mDistCoeff < shrinkMaxT) // shrink shrinkMaxT + shrinkMaxT = mDist * mDistCoeff; // shrunkMaxT is scaled + + //mHitTriangle = currentTriangle; + V3StoreU(triV0, mHitTriangle.verts[0]); + V3StoreU(triV1, mHitTriangle.verts[1]); + V3StoreU(triV2, mHitTriangle.verts[2]); + } + return true; +} + +bool SweepBoxMeshHitCallback::finalizeHit( PxSweepHit& sweepHit, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose, + const PxTransform& boxTransform, const PxVec3& localDir, + bool meshBothSides, bool isDoubleSided) const +{ + if(!mStatus) + return false; + + Vec3V minClosestA = mMinClosestA; + Vec3V minNormal = mMinNormal; + sweepHit.faceIndex = mMinTriangleIndex; + + if(mInitialOverlap) + { + bool hasContacts = false; + if(mHitFlags & PxHitFlag::eMTD) + hasContacts = computeBox_TriangleMeshMTD(triMeshGeom, pose, mBox, boxTransform, mInflation, mBothTriangleSidesCollide, sweepHit); + + setupSweepHitForMTD(sweepHit, hasContacts, mWorldUnitDir); + } + else + { + sweepHit.distance = mDist; + sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::eFACE_INDEX; + + // PT: we need the "best triangle" normal in order to call 'shouldFlipNormal'. We stored the best + // triangle in both GJK & precise codepaths (in box space). We use a dedicated 'shouldFlipNormal' + // function that delays computing the triangle normal. + // TODO: would still be more efficient to store the best normal directly, it's already computed at least + // in the GJK codepath. + + const Vec3V p0 = V3LoadU(&boxTransform.p.x); + const QuatV q0 = QuatVLoadU(&boxTransform.q.x); + const PsTransformV boxPos(p0, q0); + + if(mHitFlags & PxHitFlag::ePRECISE_SWEEP) + { + computeBoxLocalImpact(sweepHit.position, sweepHit.normal, sweepHit.flags, mBox, localDir, mHitTriangle, mHitFlags, isDoubleSided, meshBothSides, mDist); + } + else + { + sweepHit.flags |= PxHitFlag::eNORMAL|PxHitFlag::ePOSITION; + + // PT: now for the GJK path, we must first always negate the returned normal. Similar to what happens in the precise path, + // we can't delay this anymore: our normal must be properly oriented in order to call 'shouldFlipNormal'. + minNormal = V3Neg(minNormal); + + // PT: this one is to ensure the normal respects the mesh-both-sides/double-sided convention + PxVec3 tmp; + V3StoreU(minNormal, tmp); + + if(shouldFlipNormal(tmp, meshBothSides, isDoubleSided, mHitTriangle, localDir, NULL)) + minNormal = V3Neg(minNormal); + + // PT: finally, this moves everything back to world space + V3StoreU(boxPos.rotate(minNormal), sweepHit.normal); + V3StoreU(boxPos.transform(minClosestA), sweepHit.position); + } + } + return true; +} + +/////////////////////////////////////////////////////////////////////////////// + +bool sweepBox_MeshGeom(GU_BOX_SWEEP_FUNC_PARAMS) +{ + PX_ASSERT(geom.getType() == PxGeometryType::eTRIANGLEMESH); + PX_UNUSED(boxPose_); + PX_UNUSED(boxGeom_); + + const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom); + + TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + + return Midphase::sweepBoxVsMesh(meshData, meshGeom, pose, box, unitDir, distance, sweepHit, hitFlags, inflation); +} + +/////////////////////////////////////////////////////////////////////////////// + +SweepConvexMeshHitCallback::SweepConvexMeshHitCallback( const ConvexHullData& hull, const PxMeshScale& convexScale, const FastVertex2ShapeScaling& meshScale, + const PxTransform& convexPose, const PxTransform& meshPose, + const PxVec3& unitDir, const PxReal distance, PxHitFlags hitFlags, const bool bothTriangleSidesCollide, const PxReal inflation, + const bool anyHit, float distCoef) : + SweepShapeMeshHitCallback (CallbackMode::eMULTIPLE, hitFlags, meshScale.flipsNormal(), distCoef), + mMeshScale (meshScale), + mUnitDir (unitDir), + mInflation (inflation), + mAnyHit (anyHit), + mBothTriangleSidesCollide (bothTriangleSidesCollide) +{ + mSweepHit.distance = distance; // this will be shrinking progressively as we sweep and clip the sweep length + mSweepHit.faceIndex = 0xFFFFFFFF; + + mMeshSpaceUnitDir = meshPose.rotateInv(unitDir); + + const Vec3V worldDir = V3LoadU(unitDir); + const FloatV dist = FLoad(distance); + const QuatV q0 = QuatVLoadU(&meshPose.q.x); + const Vec3V p0 = V3LoadU(&meshPose.p.x); + + const QuatV q1 = QuatVLoadU(&convexPose.q.x); + const Vec3V p1 = V3LoadU(&convexPose.p.x); + + const PsTransformV meshPoseV(p0, q0); + const PsTransformV convexPoseV(p1, q1); + + mMeshToConvex = convexPoseV.transformInv(meshPoseV); + mConvexPoseV = convexPoseV; + mConvexSpaceDir = convexPoseV.rotateInv(V3Neg(V3Scale(worldDir, dist))); + mInitialDistance = dist; + + const Vec3V vScale = V3LoadU_SafeReadW(convexScale.scale); // PT: safe because 'rotation' follows 'scale' in PxMeshScale + const QuatV vQuat = QuatVLoadU(&convexScale.rotation.x); + mConvexHull.initialize(&hull, V3Zero(), vScale, vQuat, convexScale.isIdentity()); +} + +PxAgain SweepConvexMeshHitCallback::processHit( // all reported coords are in mesh local space including hit.position + const PxRaycastHit& hit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal& shrunkMaxT, const PxU32*) +{ + const PxVec3 v0 = mMeshScale * av0; + const PxVec3 v1 = mMeshScale * (mFlipNormal ? av2 : av1); + const PxVec3 v2 = mMeshScale * (mFlipNormal ? av1 : av2); + + // mSweepHit will be updated if sweep distance is < input mSweepHit.distance + const PxReal oldDist = mSweepHit.distance; + if(sweepConvexVsTriangle( + v0, v1, v2, mConvexHull, mMeshToConvex, mConvexPoseV, mConvexSpaceDir, + mUnitDir, mMeshSpaceUnitDir, mInitialDistance, oldDist, mSweepHit, mBothTriangleSidesCollide, + mInflation, mInitialOverlap, hit.faceIndex)) + { + mStatus = true; + shrunkMaxT = mSweepHit.distance * mDistCoeff; // shrunkMaxT is scaled + + // PT: added for 'shouldFlipNormal' + mHitTriangle.verts[0] = v0; + mHitTriangle.verts[1] = v1; + mHitTriangle.verts[2] = v2; + + if(mAnyHit) + return false; // abort traversal + + if(mSweepHit.distance == 0.0f) + return false; + } + return true; // continue traversal +} + +bool SweepConvexMeshHitCallback::finalizeHit( PxSweepHit& sweepHit, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose, + const PxConvexMeshGeometry& convexGeom, const PxTransform& convexPose, + const PxVec3& unitDir, PxReal inflation, + bool isMtd, bool meshBothSides, bool isDoubleSided, bool bothTriangleSidesCollide) +{ + if(!mStatus) + return false; + + if(mInitialOverlap) + { + bool hasContacts = false; + if(isMtd) + hasContacts = computeConvex_TriangleMeshMTD(meshGeom, pose, convexGeom, convexPose, inflation, bothTriangleSidesCollide, sweepHit); + + setupSweepHitForMTD(sweepHit, hasContacts, unitDir); + + sweepHit.faceIndex = mSweepHit.faceIndex; + } + else + { + sweepHit = mSweepHit; + //sweepHit.position += unitDir * sweepHit.distance; + sweepHit.normal = -sweepHit.normal; + sweepHit.normal.normalize(); + + // PT: this one is to ensure the normal respects the mesh-both-sides/double-sided convention + // PT: beware, the best triangle is in mesh-space, but the impact data is in world-space already + if(shouldFlipNormal(sweepHit.normal, meshBothSides, isDoubleSided, mHitTriangle, unitDir, &pose)) + sweepHit.normal = -sweepHit.normal; + } + return true; +} + +/////////////////////////////////////////////////////////////////////////////// + +bool sweepConvex_MeshGeom(GU_CONVEX_SWEEP_FUNC_PARAMS) +{ + PX_ASSERT(geom.getType() == PxGeometryType::eTRIANGLEMESH); + const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom); + + ConvexMesh* convexMesh = static_cast<ConvexMesh*>(convexGeom.convexMesh); + TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh); + + const bool idtScaleConvex = convexGeom.scale.isIdentity(); + const bool idtScaleMesh = meshGeom.scale.isIdentity(); + + FastVertex2ShapeScaling convexScaling; + if(!idtScaleConvex) + convexScaling.init(convexGeom.scale); + + FastVertex2ShapeScaling meshScaling; + if(!idtScaleMesh) + meshScaling.init(meshGeom.scale); + + PX_ASSERT(!convexMesh->getLocalBoundsFast().isEmpty()); + const PxBounds3 hullAABB = convexMesh->getLocalBoundsFast().transformFast(convexScaling.getVertex2ShapeSkew()); + + Box hullOBB; + computeHullOBB(hullOBB, hullAABB, 0.0f, Matrix34(convexPose), Matrix34(pose), meshScaling, idtScaleMesh); + + hullOBB.extents.x += inflation; + hullOBB.extents.y += inflation; + hullOBB.extents.z += inflation; + + const PxVec3 localDir = pose.rotateInv(unitDir); + + // inverse transform the sweep direction and distance to mesh space + PxVec3 meshSpaceSweepVector = meshScaling.getShape2VertexSkew().transform(localDir*distance); + const PxReal meshSpaceSweepDist = meshSpaceSweepVector.normalize(); + + PxReal distCoeff = 1.0f; + if (!idtScaleMesh) + distCoeff = meshSpaceSweepDist / distance; + + const bool meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES; + const bool isDoubleSided = meshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED; + const bool bothTriangleSidesCollide = isDoubleSided || meshBothSides; + const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY; + SweepConvexMeshHitCallback callback( + convexMesh->getHull(), convexGeom.scale, meshScaling, convexPose, pose, -unitDir, distance, hitFlags, + bothTriangleSidesCollide, inflation, anyHit, distCoeff); + + Midphase::sweepConvexVsMesh(meshData, hullOBB, meshSpaceSweepVector, meshSpaceSweepDist, callback, anyHit); + + const bool isMtd = hitFlags & PxHitFlag::eMTD; + return callback.finalizeHit(sweepHit, meshGeom, pose, convexGeom, convexPose, unitDir, inflation, isMtd, meshBothSides, isDoubleSided, bothTriangleSidesCollide); +} + +/////////////////////////////////////////////////////////////////////////////// + diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangle32.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangle32.h new file mode 100644 index 00000000..7607e730 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangle32.h @@ -0,0 +1,132 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_TRIANGLE32_H +#define GU_TRIANGLE32_H + +#include "foundation/PxVec3.h" +#include "CmPhysXCommon.h" +#include "PsUtilities.h" + +namespace physx +{ +namespace Gu +{ + /** + \brief Structure used to store indices for a triangles points. T is either PxU32 or PxU16 + + */ + + template <class T> + struct TriangleT// : public Ps::UserAllocated + { + PX_INLINE TriangleT() {} + PX_INLINE TriangleT(T a, T b, T c) { v[0] = a; v[1] = b; v[2] = c; } + template <class TX> + PX_INLINE TriangleT(const TriangleT<TX>& other) { v[0] = other[0]; v[1] = other[1]; v[2] = other[2]; } + PX_INLINE T& operator[](T i) { return v[i]; } + template<class TX>//any type of TriangleT<>, possibly with different T + PX_INLINE TriangleT<T>& operator=(const TriangleT<TX>& i) { v[0]=i[0]; v[1]=i[1]; v[2]=i[2]; return *this; } + PX_INLINE const T& operator[](T i) const { return v[i]; } + + void flip() + { + Ps::swap(v[1], v[2]); + } + + PX_INLINE void center(const PxVec3* verts, PxVec3& center) const + { + const PxVec3& p0 = verts[v[0]]; + const PxVec3& p1 = verts[v[1]]; + const PxVec3& p2 = verts[v[2]]; + center = (p0+p1+p2)*0.33333333333333333333f; + } + + float area(const PxVec3* verts) const + { + const PxVec3& p0 = verts[v[0]]; + const PxVec3& p1 = verts[v[1]]; + const PxVec3& p2 = verts[v[2]]; + return ((p0-p1).cross(p0-p2)).magnitude() * 0.5f; + } + + PxU8 findEdge(T vref0, T vref1) const + { + if(v[0]==vref0 && v[1]==vref1) return 0; + else if(v[0]==vref1 && v[1]==vref0) return 0; + else if(v[0]==vref0 && v[2]==vref1) return 1; + else if(v[0]==vref1 && v[2]==vref0) return 1; + else if(v[1]==vref0 && v[2]==vref1) return 2; + else if(v[1]==vref1 && v[2]==vref0) return 2; + return 0xff; + } + + // counter clock wise order + PxU8 findEdgeCCW(T vref0, T vref1) const + { + if(v[0]==vref0 && v[1]==vref1) return 0; + else if(v[0]==vref1 && v[1]==vref0) return 0; + else if(v[0]==vref0 && v[2]==vref1) return 2; + else if(v[0]==vref1 && v[2]==vref0) return 2; + else if(v[1]==vref0 && v[2]==vref1) return 1; + else if(v[1]==vref1 && v[2]==vref0) return 1; + return 0xff; + } + + bool replaceVertex(T oldref, T newref) + { + if(v[0]==oldref) { v[0] = newref; return true; } + else if(v[1]==oldref) { v[1] = newref; return true; } + else if(v[2]==oldref) { v[2] = newref; return true; } + return false; + } + + bool isDegenerate() const + { + if(v[0]==v[1]) return true; + if(v[1]==v[2]) return true; + if(v[2]==v[0]) return true; + return false; + } + + PX_INLINE void denormalizedNormal(const PxVec3* verts, PxVec3& normal) const + { + const PxVec3& p0 = verts[v[0]]; + const PxVec3& p1 = verts[v[1]]; + const PxVec3& p2 = verts[v[2]]; + normal = ((p2 - p1).cross(p0 - p1)); + } + + T v[3]; //vertex indices + }; +} + +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleCache.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleCache.h new file mode 100644 index 00000000..9dc2a453 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleCache.h @@ -0,0 +1,207 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_TRIANGLE_CACHE_H +#define GU_TRIANGLE_CACHE_H +#include "PsHash.h" +#include "PsUtilities.h" + +namespace physx +{ + namespace Gu + { + struct CachedEdge + { + protected: + PxU32 mId0, mId1; + public: + CachedEdge(PxU32 i0, PxU32 i1) + { + mId0 = PxMin(i0, i1); + mId1 = PxMax(i0, i1); + } + + CachedEdge() + { + } + + PxU32 getId0() const { return mId0; } + PxU32 getId1() const { return mId1; } + + bool operator == (const CachedEdge& other) const + { + return mId0 == other.mId0 && mId1 == other.mId1; + } + + PxU32 getHashCode() const + { + return Ps::hash(mId0 << 16 | mId1); + } + }; + + struct CachedVertex + { + private: + PxU32 mId; + public: + CachedVertex(PxU32 id) + { + mId = id; + } + + CachedVertex() + { + } + + PxU32 getId() const { return mId; } + + PxU32 getHashCode() const + { + return mId; + } + + bool operator == (const CachedVertex& other) const + { + return mId == other.mId; + } + }; + + template <typename Elem, PxU32 MaxCount> + struct CacheMap + { + PX_COMPILE_TIME_ASSERT(MaxCount < 0xFF); + Elem mCache[MaxCount]; + PxU8 mNextInd[MaxCount]; + PxU8 mIndex[MaxCount]; + PxU32 mSize; + + CacheMap() : mSize(0) + { + for(PxU32 a = 0; a < MaxCount; ++a) + { + mIndex[a] = 0xFF; + } + } + + bool addData(const Elem& data) + { + if(mSize == MaxCount) + return false; + + const PxU8 hash = PxU8(data.getHashCode() % MaxCount); + + PxU8 index = hash; + PxU8 nextInd = mIndex[hash]; + while(nextInd != 0xFF) + { + index = nextInd; + if(mCache[index] == data) + return false; + nextInd = mNextInd[nextInd]; + } + + if(mIndex[hash] == 0xFF) + { + mIndex[hash] = Ps::to8(mSize); + } + else + { + mNextInd[index] = Ps::to8(mSize); + } + mNextInd[mSize] = 0xFF; + mCache[mSize++] = data; + return true; + } + + bool contains(const Elem& data) const + { + PxU32 hash = (data.getHashCode() % MaxCount); + PxU8 index = mIndex[hash]; + + while(index != 0xFF) + { + if(mCache[index] == data) + return true; + index = mNextInd[index]; + } + return false; + } + + const Elem* get(const Elem& data) const + { + PxU32 hash = (data.getHashCode() % MaxCount); + PxU8 index = mIndex[hash]; + + while(index != 0xFF) + { + if(mCache[index] == data) + return &mCache[index]; + index = mNextInd[index]; + } + return NULL; + } + }; + + template <PxU32 MaxTriangles> + struct TriangleCache + { + PxVec3 mVertices[3*MaxTriangles]; + PxU32 mIndices[3*MaxTriangles]; + PxU32 mTriangleIndex[MaxTriangles]; + PxU8 mEdgeFlags[MaxTriangles]; + PxU32 mNumTriangles; + + TriangleCache() : mNumTriangles(0) + { + } + + PX_FORCE_INLINE bool isEmpty() const { return mNumTriangles == 0; } + PX_FORCE_INLINE bool isFull() const { return mNumTriangles == MaxTriangles; } + PX_FORCE_INLINE void reset() { mNumTriangles = 0; } + + void addTriangle(const PxVec3* verts, const PxU32* indices, PxU32 triangleIndex, PxU8 edgeFlag) + { + PX_ASSERT(mNumTriangles < MaxTriangles); + PxU32 triInd = mNumTriangles++; + PxU32 triIndMul3 = triInd*3; + mVertices[triIndMul3] = verts[0]; + mVertices[triIndMul3+1] = verts[1]; + mVertices[triIndMul3+2] = verts[2]; + mIndices[triIndMul3] = indices[0]; + mIndices[triIndMul3+1] = indices[1]; + mIndices[triIndMul3+2] = indices[2]; + mTriangleIndex[triInd] = triangleIndex; + mEdgeFlags[triInd] = edgeFlag; + } + }; + } +} + +#endif + diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.cpp new file mode 100644 index 00000000..be47d3e1 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.cpp @@ -0,0 +1,457 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "PsIntrinsics.h" +#include "GuMidphaseInterface.h" +#include "GuSerialize.h" +#include "GuMeshFactory.h" +#include "CmRenderOutput.h" +#include "PxVisualizationParameter.h" +#include "GuConvexEdgeFlags.h" +#include "GuBox.h" +#include "PxMeshScale.h" +#include "CmUtils.h" + +using namespace physx; + +namespace physx +{ + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +static PxConcreteType::Enum gTable[] = { PxConcreteType::eTRIANGLE_MESH_BVH33, + PxConcreteType::eTRIANGLE_MESH_BVH34 + }; + +Gu::TriangleMesh::TriangleMesh(GuMeshFactory& factory, TriangleMeshData& d) +: PxTriangleMesh(PxType(gTable[d.mType]), PxBaseFlag::eOWNS_MEMORY | PxBaseFlag::eIS_RELEASABLE) +, mNbVertices (d.mNbVertices) +, mNbTriangles (d.mNbTriangles) +, mVertices (d.mVertices) +, mTriangles (d.mTriangles) +, mAABB (d.mAABB) +, mExtraTrigData (d.mExtraTrigData) +, mGeomEpsilon (d.mGeomEpsilon) +, mFlags (d.mFlags) +, mMaterialIndices (d.mMaterialIndices) +, mFaceRemap (d.mFaceRemap) +, mAdjacencies (d.mAdjacencies) + +, mMeshFactory (&factory) + +, mGRB_triIndices (d.mGRB_triIndices) + +, mGRB_triAdjacencies (d.mGRB_triAdjacencies) +, mGRB_vertValency (d.mGRB_vertValency) +, mGRB_adjVertStart (d.mGRB_adjVertStart) +, mGRB_adjVertices (d.mGRB_adjVertices) + +, mGRB_meshAdjVerticiesTotal (d.mGRB_meshAdjVerticiesTotal) +, mGRB_faceRemap (d.mGRB_faceRemap) +, mGRB_BV32Tree (d.mGRB_BV32Tree) +{ + // this constructor takes ownership of memory from the data object + d.mVertices = 0; + d.mTriangles = 0; + d.mExtraTrigData = 0; + d.mFaceRemap = 0; + d.mAdjacencies = 0; + d.mMaterialIndices = 0; + + d.mGRB_triIndices = 0; + + d.mGRB_triAdjacencies = 0; + d.mGRB_vertValency = 0; + d.mGRB_adjVertStart = 0; + d.mGRB_adjVertices = 0; + d.mGRB_faceRemap = 0; + d.mGRB_BV32Tree = 0; + + // PT: 'getPaddedBounds()' is only safe if we make sure the bounds member is followed by at least 32bits of data + PX_COMPILE_TIME_ASSERT(PX_OFFSET_OF(Gu::TriangleMesh, mExtraTrigData)>=PX_OFFSET_OF(Gu::TriangleMesh, mAABB)+4); + +} + +Gu::TriangleMesh::~TriangleMesh() +{ + if(getBaseFlags() & PxBaseFlag::eOWNS_MEMORY) + { + PX_FREE_AND_RESET(mExtraTrigData); + PX_FREE_AND_RESET(mFaceRemap); + PX_FREE_AND_RESET(mAdjacencies); + PX_FREE_AND_RESET(mMaterialIndices); + PX_FREE_AND_RESET(mTriangles); + PX_FREE_AND_RESET(mVertices); + + PX_FREE_AND_RESET(mGRB_triIndices); + + PX_FREE_AND_RESET(mGRB_triAdjacencies); + PX_FREE_AND_RESET(mGRB_vertValency); + PX_FREE_AND_RESET(mGRB_adjVertStart); + PX_FREE_AND_RESET(mGRB_adjVertices); + PX_FREE_AND_RESET(mGRB_faceRemap); + + BV32Tree* bv32Tree = reinterpret_cast<BV32Tree*>(mGRB_BV32Tree); + PX_DELETE_AND_RESET(bv32Tree); + + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// PT: used to be automatic but making it manual saves bytes in the internal mesh + +void Gu::TriangleMesh::exportExtraData(PxSerializationContext& stream) +{ + //PX_DEFINE_DYNAMIC_ARRAY(TriangleMesh, mVertices, PxField::eVEC3, mNbVertices, Ps::PxFieldFlag::eSERIALIZE), + if(mVertices) + { + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mVertices, mNbVertices * sizeof(PxVec3)); + } + + if(mTriangles) + { + const PxU32 triangleSize = mFlags & PxTriangleMeshFlag::e16_BIT_INDICES ? sizeof(PxU16) : sizeof(PxU32); + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mTriangles, mNbTriangles * 3 * triangleSize); + } + + //PX_DEFINE_DYNAMIC_ARRAY(TriangleMesh, mExtraTrigData, PxField::eBYTE, mNbTriangles, Ps::PxFieldFlag::eSERIALIZE), + if(mExtraTrigData) + { + // PT: it might not be needed to 16-byte align this array of PxU8.... + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mExtraTrigData, mNbTriangles * sizeof(PxU8)); + } + + if(mMaterialIndices) + { + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mMaterialIndices, mNbTriangles * sizeof(PxU16)); + } + + if(mFaceRemap) + { + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mFaceRemap, mNbTriangles * sizeof(PxU32)); + } + + if(mAdjacencies) + { + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mAdjacencies, mNbTriangles * sizeof(PxU32) * 3); + } +} + +void Gu::TriangleMesh::importExtraData(PxDeserializationContext& context) +{ + // PT: vertices are followed by indices, so it will be safe to V4Load vertices from a deserialized binary file + if(mVertices) + mVertices = context.readExtraData<PxVec3, PX_SERIAL_ALIGN>(mNbVertices); + + if(mTriangles) + { + if(mFlags & PxTriangleMeshFlag::e16_BIT_INDICES) + mTriangles = context.readExtraData<PxU16, PX_SERIAL_ALIGN>(3*mNbTriangles); + else + mTriangles = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(3*mNbTriangles); + } + + if(mExtraTrigData) + mExtraTrigData = context.readExtraData<PxU8, PX_SERIAL_ALIGN>(mNbTriangles); + + if(mMaterialIndices) + mMaterialIndices = context.readExtraData<PxU16, PX_SERIAL_ALIGN>(mNbTriangles); + + if(mFaceRemap) + mFaceRemap = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(mNbTriangles); + + if(mAdjacencies) + mAdjacencies = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(3*mNbTriangles); +} + +void Gu::TriangleMesh::onRefCountZero() +{ + if(mMeshFactory->removeTriangleMesh(*this)) + { + const PxType type = getConcreteType(); + GuMeshFactory* mf = mMeshFactory; + Cm::deletePxBase(this); + mf->notifyFactoryListener(this, type); + return; + } + + // PT: if we reach this point, we didn't find the mesh in the Physics object => don't delete! + // This prevents deleting the object twice. + Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "Gu::TriangleMesh::release: double deletion detected!"); +} +//~PX_SERIALIZATION + +void Gu::TriangleMesh::release() +{ + decRefCount(); +} + +#if PX_ENABLE_DYNAMIC_MESH_RTREE +PxVec3 * Gu::TriangleMesh::getVerticesForModification() +{ + Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "PxTriangleMesh::getVerticesForModification() is only supported for meshes with PxMeshMidPhase::eBVHDynamic."); + + return NULL; +} + +PxBounds3 Gu::TriangleMesh::refitBVH() +{ + Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "PxTriangleMesh::refitBVH() is only supported for meshes with PxMeshMidPhase::eBVHDynamic."); + + return PxBounds3(mAABB.getMin(), mAABB.getMax()); +} +#endif + +#if PX_ENABLE_DEBUG_VISUALIZATION + +static void getTriangle(const Gu::TriangleMesh&, PxU32 i, PxVec3* wp, const PxVec3* vertices, const void* indices, bool has16BitIndices) +{ + PxU32 ref0, ref1, ref2; + + if(!has16BitIndices) + { + const PxU32* dtriangles = reinterpret_cast<const PxU32*>(indices); + ref0 = dtriangles[i*3+0]; + ref1 = dtriangles[i*3+1]; + ref2 = dtriangles[i*3+2]; + } + else + { + const PxU16* wtriangles = reinterpret_cast<const PxU16*>(indices); + ref0 = wtriangles[i*3+0]; + ref1 = wtriangles[i*3+1]; + ref2 = wtriangles[i*3+2]; + } + + wp[0] = vertices[ref0]; + wp[1] = vertices[ref1]; + wp[2] = vertices[ref2]; +} + +static void getTriangle(const Gu::TriangleMesh& mesh, PxU32 i, PxVec3* wp, const PxVec3* vertices, const void* indices, const Cm::Matrix34& absPose, bool has16BitIndices) +{ + PxVec3 localVerts[3]; + getTriangle(mesh, i, localVerts, vertices, indices, has16BitIndices); + + wp[0] = absPose.transform(localVerts[0]); + wp[1] = absPose.transform(localVerts[1]); + wp[2] = absPose.transform(localVerts[2]); +} + +static void visualizeActiveEdges(Cm::RenderOutput& out, const Gu::TriangleMesh& mesh, PxU32 nbTriangles, const PxU32* results, const Cm::Matrix34& absPose, const PxMat44& midt) +{ + const PxU8* extraTrigData = mesh.getExtraTrigData(); + PX_ASSERT(extraTrigData); + + const PxVec3* vertices = mesh.getVerticesFast(); + const void* indices = mesh.getTrianglesFast(); + + const PxU32 ecolor = PxU32(PxDebugColor::eARGB_YELLOW); + const bool has16Bit = mesh.has16BitIndices(); + for(PxU32 i=0; i<nbTriangles; i++) + { + const PxU32 index = results ? results[i] : i; + + PxVec3 wp[3]; + getTriangle(mesh, index, wp, vertices, indices, absPose, has16Bit); + + const PxU32 flags = extraTrigData[index]; + + if(flags & Gu::ETD_CONVEX_EDGE_01) + { + out << midt << ecolor << Cm::RenderOutput::LINES << wp[0] << wp[1]; + } + if(flags & Gu::ETD_CONVEX_EDGE_12) + { + out << midt << ecolor << Cm::RenderOutput::LINES << wp[1] << wp[2]; + } + if(flags & Gu::ETD_CONVEX_EDGE_20) + { + out << midt << ecolor << Cm::RenderOutput::LINES << wp[0] << wp[2]; + } + } +} + +void Gu::TriangleMesh::debugVisualize( + Cm::RenderOutput& out, const PxTransform& pose, const PxMeshScale& scaling, const PxBounds3& cullbox, + const PxU64 mask, const PxReal fscale, const PxU32 numMaterials) const +{ + PX_UNUSED(numMaterials); + + //bool cscale = !!(mask & ((PxU64)1 << PxVisualizationParameter::eCULL_BOX)); + const PxU64 cullBoxMask = PxU64(1) << PxVisualizationParameter::eCULL_BOX; + bool cscale = ((mask & cullBoxMask) == cullBoxMask); + + const PxMat44 midt(PxIdentity); + const Cm::Matrix34 absPose(PxMat33(pose.q) * scaling.toMat33(), pose.p); + + PxU32 nbTriangles = getNbTrianglesFast(); + const PxU32 nbVertices = getNbVerticesFast(); + const PxVec3* vertices = getVerticesFast(); + const void* indices = getTrianglesFast(); + + const PxDebugColor::Enum colors[] = + { + PxDebugColor::eARGB_BLACK, + PxDebugColor::eARGB_RED, + PxDebugColor::eARGB_GREEN, + PxDebugColor::eARGB_BLUE, + PxDebugColor::eARGB_YELLOW, + PxDebugColor::eARGB_MAGENTA, + PxDebugColor::eARGB_CYAN, + PxDebugColor::eARGB_WHITE, + PxDebugColor::eARGB_GREY, + PxDebugColor::eARGB_DARKRED, + PxDebugColor::eARGB_DARKGREEN, + PxDebugColor::eARGB_DARKBLUE, + }; + + const PxU32 colorCount = sizeof(colors)/sizeof(PxDebugColor::Enum); + + if(cscale) + { + const Gu::Box worldBox( + (cullbox.maximum + cullbox.minimum)*0.5f, + (cullbox.maximum - cullbox.minimum)*0.5f, + PxMat33(PxIdentity)); + + // PT: TODO: use the callback version here to avoid allocating this huge array + PxU32* results = reinterpret_cast<PxU32*>(PX_ALLOC_TEMP(sizeof(PxU32)*nbTriangles, "tmp triangle indices")); + LimitedResults limitedResults(results, nbTriangles, 0); + Midphase::intersectBoxVsMesh(worldBox, *this, pose, scaling, &limitedResults); + nbTriangles = limitedResults.mNbResults; + + if (fscale) + { + const PxU32 fcolor = PxU32(PxDebugColor::eARGB_DARKRED); + + for (PxU32 i=0; i<nbTriangles; i++) + { + const PxU32 index = results[i]; + PxVec3 wp[3]; + getTriangle(*this, index, wp, vertices, indices, absPose, has16BitIndices()); + + const PxVec3 center = (wp[0] + wp[1] + wp[2]) / 3.0f; + PxVec3 normal = (wp[0] - wp[1]).cross(wp[0] - wp[2]); + PX_ASSERT(!normal.isZero()); + normal = normal.getNormalized(); + + out << midt << fcolor << + Cm::DebugArrow(center, normal * fscale); + } + } + + if (mask & (PxU64(1) << PxVisualizationParameter::eCOLLISION_SHAPES)) + { + const PxU32 scolor = PxU32(PxDebugColor::eARGB_MAGENTA); + + out << midt << scolor; // PT: no need to output this for each segment! + + PxDebugLine* segments = out.reserveSegments(nbTriangles*3); + for(PxU32 i=0; i<nbTriangles; i++) + { + const PxU32 index = results[i]; + PxVec3 wp[3]; + getTriangle(*this, index, wp, vertices, indices, absPose, has16BitIndices()); + segments[0] = PxDebugLine(wp[0], wp[1], scolor); + segments[1] = PxDebugLine(wp[1], wp[2], scolor); + segments[2] = PxDebugLine(wp[2], wp[0], scolor); + segments+=3; + } + } + + if ((mask & (PxU64(1) << PxVisualizationParameter::eCOLLISION_EDGES)) && mExtraTrigData) + visualizeActiveEdges(out, *this, nbTriangles, results, absPose, midt); + + PX_FREE(results); + } + else + { + if (fscale) + { + const PxU32 fcolor = PxU32(PxDebugColor::eARGB_DARKRED); + + for (PxU32 i=0; i<nbTriangles; i++) + { + PxVec3 wp[3]; + getTriangle(*this, i, wp, vertices, indices, absPose, has16BitIndices()); + + const PxVec3 center = (wp[0] + wp[1] + wp[2]) / 3.0f; + PxVec3 normal = (wp[0] - wp[1]).cross(wp[0] - wp[2]); + PX_ASSERT(!normal.isZero()); + normal = normal.getNormalized(); + + out << midt << fcolor << + Cm::DebugArrow(center, normal * fscale); + } + } + + if (mask & (PxU64(1) << PxVisualizationParameter::eCOLLISION_SHAPES)) + { + PxU32 scolor = PxU32(PxDebugColor::eARGB_MAGENTA); + + out << midt << scolor; // PT: no need to output this for each segment! + + PxVec3* transformed = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*nbVertices, "PxVec3")); + for(PxU32 i=0;i<nbVertices;i++) + transformed[i] = absPose.transform(vertices[i]); + + PxDebugLine* segments = out.reserveSegments(nbTriangles*3); + for (PxU32 i=0; i<nbTriangles; i++) + { + PxVec3 wp[3]; + getTriangle(*this, i, wp, transformed, indices, has16BitIndices()); + const PxU32 localMaterialIndex = getTriangleMaterialIndex(i); + scolor = colors[localMaterialIndex % colorCount]; + + segments[0] = PxDebugLine(wp[0], wp[1], scolor); + segments[1] = PxDebugLine(wp[1], wp[2], scolor); + segments[2] = PxDebugLine(wp[2], wp[0], scolor); + segments+=3; + } + + PX_FREE(transformed); + } + + if ((mask & (PxU64(1) << PxVisualizationParameter::eCOLLISION_EDGES)) && mExtraTrigData) + visualizeActiveEdges(out, *this, nbTriangles, NULL, absPose, midt); + } +} + +#endif // #if PX_ENABLE_DEBUG_VISUALIZATION + +} // namespace physx diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.h new file mode 100644 index 00000000..854f43b5 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.h @@ -0,0 +1,302 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_TRIANGLEMESH_H +#define GU_TRIANGLEMESH_H + +#include "foundation/PxIO.h" +#include "PxSimpleTriangleMesh.h" +#include "PxTriangleMeshGeometry.h" +#include "CmScaling.h" +#include "GuTriangleMesh.h" +#include "GuTriangle32.h" +#include "CmRefCountable.h" +#include "PxTriangle.h" +#include "PxTriangleMesh.h" +#include "CmRenderOutput.h" +#include "GuMeshData.h" +#include "GuCenterExtents.h" + +namespace physx +{ + +class GuMeshFactory; +class PxMeshScale; + +namespace Gu +{ + +#if PX_VC +#pragma warning(push) +#pragma warning(disable: 4324) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + +// Possible optimization: align the whole struct to cache line +class TriangleMesh : public PxTriangleMesh, public Ps::UserAllocated, public Cm::RefCountable +{ +//= ATTENTION! ===================================================================================== +// Changing the data layout of this class breaks the binary serialization format. See comments for +// PX_BINARY_SERIAL_VERSION. If a modification is required, please adjust the getBinaryMetaData +// function. If the modification is made on a custom branch, please change PX_BINARY_SERIAL_VERSION +// accordingly. +//================================================================================================== +public: + +// PX_SERIALIZATION + TriangleMesh(PxBaseFlags baseFlags) : PxTriangleMesh(baseFlags), Cm::RefCountable(PxEmpty) {} + virtual void exportExtraData(PxSerializationContext& ctx); + void importExtraData(PxDeserializationContext&); + PX_PHYSX_COMMON_API static void getBinaryMetaData(PxOutputStream& stream); + virtual void release(); + + void resolveReferences(PxDeserializationContext& ) {} + virtual void requires(PxProcessPxBaseCallback&){} +//~PX_SERIALIZATION + +// Cm::RefCountable + virtual void onRefCountZero(); +//~Cm::RefCountable + + TriangleMesh(GuMeshFactory& factory, TriangleMeshData& data); + virtual ~TriangleMesh(); + +// PxTriangleMesh + virtual PxU32 getNbVertices() const { return mNbVertices; } + virtual const PxVec3* getVertices() const { return mVertices; } + virtual const PxU32* getTrianglesRemap() const { return mFaceRemap; } + virtual PxU32 getNbTriangles() const { return mNbTriangles; } + virtual const void* getTriangles() const { return mTriangles; } + virtual PxTriangleMeshFlags getTriangleMeshFlags() const { return PxTriangleMeshFlags(mFlags); } + virtual PxMaterialTableIndex getTriangleMaterialIndex(PxTriangleID triangleIndex) const { + return hasPerTriangleMaterials() ? getMaterials()[triangleIndex] : PxMaterialTableIndex(0xffff); } + +#if PX_ENABLE_DYNAMIC_MESH_RTREE + virtual PxVec3* getVerticesForModification(); + virtual PxBounds3 refitBVH(); +#endif + + virtual PxBounds3 getLocalBounds() const + { + PX_ASSERT(mAABB.isValid()); + return PxBounds3::centerExtents(mAABB.mCenter, mAABB.mExtents); + } + + virtual void acquireReference() { incRefCount(); } + virtual PxU32 getReferenceCount() const { return getRefCount(); } +//~PxTriangleMesh + // PT: this one is just to prevent instancing Gu::TriangleMesh. + // But you should use PxBase::getConcreteType() instead to avoid the virtual call. + virtual PxMeshMidPhase::Enum getMidphaseID() const = 0; + + PX_FORCE_INLINE const PxU32* getFaceRemap() const { return mFaceRemap; } + PX_FORCE_INLINE bool has16BitIndices() const { return (mFlags & PxMeshFlag::e16_BIT_INDICES) ? true : false; } + PX_FORCE_INLINE bool hasPerTriangleMaterials() const { return mMaterialIndices != NULL; } + PX_FORCE_INLINE PxU32 getNbVerticesFast() const { return mNbVertices; } + PX_FORCE_INLINE PxU32 getNbTrianglesFast() const { return mNbTriangles; } + PX_FORCE_INLINE const void* getTrianglesFast() const { return mTriangles; } + PX_FORCE_INLINE const PxVec3* getVerticesFast() const { return mVertices; } + PX_FORCE_INLINE const PxU32* getAdjacencies() const { return mAdjacencies; } + PX_FORCE_INLINE PxReal getGeomEpsilon() const { return mGeomEpsilon; } + PX_FORCE_INLINE const CenterExtents& getLocalBoundsFast() const { return mAABB; } + PX_FORCE_INLINE const PxU16* getMaterials() const { return mMaterialIndices; } + PX_FORCE_INLINE const PxU8* getExtraTrigData() const { return mExtraTrigData; } + + PX_FORCE_INLINE const CenterExtentsPadded& getPaddedBounds() const + { + // PT: see compile-time assert in cpp + return static_cast<const CenterExtentsPadded&>(mAABB); + } + + PX_FORCE_INLINE void computeWorldTriangle( + PxTriangle& worldTri, PxTriangleID triangleIndex, const Cm::Matrix34& worldMatrix, bool flipNormal = false, + PxU32* PX_RESTRICT vertexIndices=NULL, PxU32* PX_RESTRICT adjacencyIndices=NULL) const; + PX_FORCE_INLINE void getLocalTriangle(PxTriangle& localTri, PxTriangleID triangleIndex, bool flipNormal = false) const; + + void setMeshFactory(GuMeshFactory* factory) { mMeshFactory = factory; } + +protected: + PxU32 mNbVertices; + PxU32 mNbTriangles; + PxVec3* mVertices; + void* mTriangles; //!< 16 (<= 0xffff #vertices) or 32 bit trig indices (mNbTriangles * 3) + // 16 bytes block + + // PT: WARNING: bounds must be followed by at least 32bits of data for safe SIMD loading + CenterExtents mAABB; + PxU8* mExtraTrigData; //one per trig + PxReal mGeomEpsilon; //!< see comments in cooking code referencing this variable + // 16 bytes block + /* + low 3 bits (mask: 7) are the edge flags: + b001 = 1 = ignore edge 0 = edge v0-->v1 + b010 = 2 = ignore edge 1 = edge v0-->v2 + b100 = 4 = ignore edge 2 = edge v1-->v2 + */ + PxU8 mFlags; //!< Flag whether indices are 16 or 32 bits wide + //!< Flag whether triangle adajacencies are build + PxU16* mMaterialIndices; //!< the size of the array is numTriangles. + PxU32* mFaceRemap; //!< new faces to old faces mapping (after cleaning, etc). Usage: old = faceRemap[new] + PxU32* mAdjacencies; //!< Adjacency information for each face - 3 adjacent faces + //!< Set to 0xFFFFffff if no adjacent face + + GuMeshFactory* mMeshFactory; // PT: changed to pointer for serialization + +#if PX_ENABLE_DEBUG_VISUALIZATION +public: + /** + \brief Perform triangle mesh geometry debug visualization + + \param out Debug renderer. + \param pose World position. + */ + void debugVisualize( Cm::RenderOutput& out, const PxTransform& pose, const PxMeshScale& scaling, const PxBounds3& cullbox, + const PxU64 mask, const PxReal fscale, const PxU32 numMaterials) const; +#endif + +public: + + // GRB data ------------------------- + void * mGRB_triIndices; //!< GRB: GPU-friendly tri indices [uint4] + + // TODO avoroshilov: cooking - adjacency info - duplicated, remove it and use 'mAdjacencies' and 'mExtraTrigData' see GuTriangleMesh.cpp:325 + void * mGRB_triAdjacencies; //!< GRB: adjacency data, with BOUNDARY and NONCONVEX flags (flags replace adj indices where applicable) + PxU32 * mGRB_vertValency; //!< GRB: number of adjacent vertices to a vertex + PxU32 * mGRB_adjVertStart; //!< GRB: offset for each vertex in the adjacency list + PxU32 * mGRB_adjVertices; //!< GRB: list of adjacent vertices + + PxU32 mGRB_meshAdjVerticiesTotal; //!< GRB: total number of indices in the 'mGRB_adjVertices' + PxU32* mGRB_faceRemap; //!< GRB : gpu to cpu triangle indice remap + void* mGRB_BV32Tree; //!< GRB: BV32 tree + // End of GRB data ------------------ + +}; + +#if PX_VC +#pragma warning(pop) +#endif + +} // namespace Gu + +PX_FORCE_INLINE void Gu::TriangleMesh::computeWorldTriangle(PxTriangle& worldTri, PxTriangleID triangleIndex, const Cm::Matrix34& worldMatrix, bool flipNormal, + PxU32* PX_RESTRICT vertexIndices, PxU32* PX_RESTRICT adjacencyIndices) const +{ + PxU32 vref0, vref1, vref2; + if(has16BitIndices()) + { + const Gu::TriangleT<PxU16>& T = (reinterpret_cast<const Gu::TriangleT<PxU16>*>(getTrianglesFast()))[triangleIndex]; + vref0 = T.v[0]; + vref1 = T.v[1]; + vref2 = T.v[2]; + } + else + { + const Gu::TriangleT<PxU32>& T = (reinterpret_cast<const Gu::TriangleT<PxU32>*>(getTrianglesFast()))[triangleIndex]; + vref0 = T.v[0]; + vref1 = T.v[1]; + vref2 = T.v[2]; + } + if (flipNormal) + Ps::swap<PxU32>(vref1, vref2); + const PxVec3* PX_RESTRICT vertices = getVerticesFast(); + worldTri.verts[0] = worldMatrix.transform(vertices[vref0]); + worldTri.verts[1] = worldMatrix.transform(vertices[vref1]); + worldTri.verts[2] = worldMatrix.transform(vertices[vref2]); + + if(vertexIndices) + { + vertexIndices[0] = vref0; + vertexIndices[1] = vref1; + vertexIndices[2] = vref2; + } + + if(adjacencyIndices) + { + if(getAdjacencies()) + { + adjacencyIndices[0] = flipNormal ? getAdjacencies()[triangleIndex*3 + 2] : getAdjacencies()[triangleIndex*3 + 0]; + adjacencyIndices[1] = getAdjacencies()[triangleIndex*3 + 1]; + adjacencyIndices[2] = flipNormal ? getAdjacencies()[triangleIndex*3 + 0] : getAdjacencies()[triangleIndex*3 + 2]; + } + else + { + adjacencyIndices[0] = 0xffffffff; + adjacencyIndices[1] = 0xffffffff; + adjacencyIndices[2] = 0xffffffff; + } + } +} + +PX_FORCE_INLINE void Gu::TriangleMesh::getLocalTriangle(PxTriangle& localTri, PxTriangleID triangleIndex, bool flipNormal) const +{ + PxU32 vref0, vref1, vref2; + if(has16BitIndices()) + { + const Gu::TriangleT<PxU16>& T = (reinterpret_cast<const Gu::TriangleT<PxU16>*>(getTrianglesFast()))[triangleIndex]; + vref0 = T.v[0]; + vref1 = T.v[1]; + vref2 = T.v[2]; + } + else + { + const Gu::TriangleT<PxU32>& T = (reinterpret_cast<const Gu::TriangleT<PxU32>*>(getTrianglesFast()))[triangleIndex]; + vref0 = T.v[0]; + vref1 = T.v[1]; + vref2 = T.v[2]; + } + if (flipNormal) + Ps::swap<PxU32>(vref1, vref2); + const PxVec3* PX_RESTRICT vertices = getVerticesFast(); + localTri.verts[0] = vertices[vref0]; + localTri.verts[1] = vertices[vref1]; + localTri.verts[2] = vertices[vref2]; +} + +PX_INLINE float computeSweepData(const PxTriangleMeshGeometry& triMeshGeom, /*const Cm::FastVertex2ShapeScaling& scaling,*/ PxVec3& sweepOrigin, PxVec3& sweepExtents, PxVec3& sweepDir, float distance) +{ + PX_ASSERT(!Cm::isEmpty(sweepOrigin, sweepExtents)); + + const PxVec3 endPt = sweepOrigin + sweepDir*distance; + PX_ASSERT(!Cm::isEmpty(endPt, sweepExtents)); + + const Cm::FastVertex2ShapeScaling meshScaling(triMeshGeom.scale.getInverse()); // shape to vertex transform + + const PxMat33& vertex2ShapeSkew = meshScaling.getVertex2ShapeSkew(); + + const PxVec3 originBoundsCenter = vertex2ShapeSkew * sweepOrigin; + const PxVec3 originBoundsExtents = Cm::basisExtent(vertex2ShapeSkew.column0, vertex2ShapeSkew.column1, vertex2ShapeSkew.column2, sweepExtents); + + sweepOrigin = originBoundsCenter; + sweepExtents = originBoundsExtents; + sweepDir = (vertex2ShapeSkew * endPt) - originBoundsCenter; + return sweepDir.normalizeSafe(); +} + +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.cpp new file mode 100644 index 00000000..f10409e2 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.cpp @@ -0,0 +1,76 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuTriangleMesh.h" +#include "GuTriangleMeshBV4.h" + +using namespace physx; + +namespace physx +{ + +Gu::BV4TriangleMesh::BV4TriangleMesh(GuMeshFactory& factory, TriangleMeshData& d) +: TriangleMesh(factory, d) +{ + PX_ASSERT(d.mType==PxMeshMidPhase::eBVH34); + + BV4TriangleData& bv4Data = static_cast<BV4TriangleData&>(d); + mMeshInterface = bv4Data.mMeshInterface; + mBV4Tree = bv4Data.mBV4Tree; + mBV4Tree.mMeshInterface = &mMeshInterface; +} + +Gu::TriangleMesh* Gu::BV4TriangleMesh::createObject(PxU8*& address, PxDeserializationContext& context) +{ + BV4TriangleMesh* obj = new (address) BV4TriangleMesh(PxBaseFlag::eIS_RELEASABLE); + address += sizeof(BV4TriangleMesh); + obj->importExtraData(context); + obj->resolveReferences(context); + return obj; +} + +void Gu::BV4TriangleMesh::exportExtraData(PxSerializationContext& stream) +{ + mBV4Tree.exportExtraData(stream); + TriangleMesh::exportExtraData(stream); +} + +void Gu::BV4TriangleMesh::importExtraData(PxDeserializationContext& context) +{ + mBV4Tree.importExtraData(context); + TriangleMesh::importExtraData(context); + + if(has16BitIndices()) + mMeshInterface.setPointers(NULL, const_cast<IndTri16*>(reinterpret_cast<const IndTri16*>(getTrianglesFast())), getVerticesFast()); + else + mMeshInterface.setPointers(const_cast<IndTri32*>(reinterpret_cast<const IndTri32*>(getTrianglesFast())), NULL, getVerticesFast()); + mBV4Tree.mMeshInterface = &mMeshInterface; +} + +} // namespace physx diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.h new file mode 100644 index 00000000..608f5d2d --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.h @@ -0,0 +1,76 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_TRIANGLEMESH_BV4_H +#define GU_TRIANGLEMESH_BV4_H + +#include "GuTriangleMesh.h" + +namespace physx +{ +class GuMeshFactory; + +namespace Gu +{ + +#if PX_VC +#pragma warning(push) +#pragma warning(disable: 4324) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + +class BV4TriangleMesh : public TriangleMesh +{ + public: + virtual const char* getConcreteTypeName() const { return "PxBVH34TriangleMesh"; } +// PX_SERIALIZATION + BV4TriangleMesh(PxBaseFlags baseFlags) : TriangleMesh(baseFlags), mMeshInterface(PxEmpty), mBV4Tree(PxEmpty) {} + PX_PHYSX_COMMON_API virtual void exportExtraData(PxSerializationContext& ctx); + void importExtraData(PxDeserializationContext&); + PX_PHYSX_COMMON_API static TriangleMesh* createObject(PxU8*& address, PxDeserializationContext& context); + PX_PHYSX_COMMON_API static void getBinaryMetaData(PxOutputStream& stream); +//~PX_SERIALIZATION + BV4TriangleMesh(GuMeshFactory& factory, TriangleMeshData& data); + virtual ~BV4TriangleMesh(){} + + virtual PxMeshMidPhase::Enum getMidphaseID() const { return PxMeshMidPhase::eBVH34; } + PX_FORCE_INLINE const Gu::BV4Tree& getBV4Tree() const { return mBV4Tree; } + private: + Gu::SourceMesh mMeshInterface; + Gu::BV4Tree mBV4Tree; +}; + +#if PX_VC +#pragma warning(pop) +#endif + +} // namespace Gu + +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.cpp new file mode 100644 index 00000000..ec5a1931 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.cpp @@ -0,0 +1,148 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuTriangleMesh.h" +#include "GuTriangleMeshRTree.h" +#if PX_ENABLE_DYNAMIC_MESH_RTREE +#include "GuConvexEdgeFlags.h" +#endif + +using namespace physx; + +namespace physx +{ + +Gu::RTreeTriangleMesh::RTreeTriangleMesh(GuMeshFactory& factory, TriangleMeshData& d) +: TriangleMesh(factory, d) +{ + PX_ASSERT(d.mType==PxMeshMidPhase::eBVH33); + + RTreeTriangleData& rtreeData = static_cast<RTreeTriangleData&>(d); + mRTree = rtreeData.mRTree; + rtreeData.mRTree.mPages = NULL; +} + +Gu::TriangleMesh* Gu::RTreeTriangleMesh::createObject(PxU8*& address, PxDeserializationContext& context) +{ + RTreeTriangleMesh* obj = new (address) RTreeTriangleMesh(PxBaseFlag::eIS_RELEASABLE); + address += sizeof(RTreeTriangleMesh); + obj->importExtraData(context); + obj->resolveReferences(context); + return obj; +} + +void Gu::RTreeTriangleMesh::exportExtraData(PxSerializationContext& stream) +{ + mRTree.exportExtraData(stream); + TriangleMesh::exportExtraData(stream); +} + +void Gu::RTreeTriangleMesh::importExtraData(PxDeserializationContext& context) +{ + mRTree.importExtraData(context); + TriangleMesh::importExtraData(context); +} + +#if PX_ENABLE_DYNAMIC_MESH_RTREE +PxVec3 * Gu::RTreeTriangleMesh::getVerticesForModification() +{ + return const_cast<PxVec3*>(getVertices()); +} + +template<typename IndexType> +struct RefitCallback : Gu::RTree::CallbackRefit +{ + const PxVec3* newPositions; + const IndexType* indices; + + RefitCallback(const PxVec3* aNewPositions, const IndexType* aIndices) : newPositions(aNewPositions), indices(aIndices) {} + PX_FORCE_INLINE ~RefitCallback() {} + + virtual void recomputeBounds(PxU32 index, shdfnd::aos::Vec3V& aMn, shdfnd::aos::Vec3V& aMx) + { + using namespace shdfnd::aos; + + // Each leaf box has a set of triangles + Gu::LeafTriangles currentLeaf; currentLeaf.Data = index; + PxU32 nbTris = currentLeaf.GetNbTriangles(); + PxU32 baseTri = currentLeaf.GetTriangleIndex(); + PX_ASSERT(nbTris > 0); + const IndexType* vInds = indices + 3 * baseTri; + Vec3V vPos = V3LoadU(newPositions[vInds[0]]); + Vec3V mn = vPos, mx = vPos; + //PxBounds3 result(newPositions[vInds[0]], newPositions[vInds[0]]); + vPos = V3LoadU(newPositions[vInds[1]]); + mn = V3Min(mn, vPos); mx = V3Max(mx, vPos); + vPos = V3LoadU(newPositions[vInds[2]]); + mn = V3Min(mn, vPos); mx = V3Max(mx, vPos); + for (PxU32 i = 1; i < nbTris; i++) + { + const IndexType* vInds1 = indices + 3 * (baseTri + i); + vPos = V3LoadU(newPositions[vInds1[0]]); + mn = V3Min(mn, vPos); mx = V3Max(mx, vPos); + vPos = V3LoadU(newPositions[vInds1[1]]); + mn = V3Min(mn, vPos); mx = V3Max(mx, vPos); + vPos = V3LoadU(newPositions[vInds1[2]]); + mn = V3Min(mn, vPos); mx = V3Max(mx, vPos); + } + + aMn = mn; + aMx = mx; + } +}; + +PxBounds3 Gu::RTreeTriangleMesh::refitBVH() +{ + PxBounds3 meshBounds; + if (has16BitIndices()) + { + RefitCallback<PxU16> cb(mVertices, static_cast<const PxU16*>(mTriangles)); + mRTree.refitAllStaticTree(cb, &meshBounds); + } + else + { + RefitCallback<PxU32> cb(mVertices, static_cast<const PxU32*>(mTriangles)); + mRTree.refitAllStaticTree(cb, &meshBounds); + } + + // reset edge flags and remember we did that using a mesh flag (optimization) + if ((mRTree.mFlags & RTree::IS_EDGE_SET) == 0) + { + mRTree.mFlags |= RTree::IS_EDGE_SET; + const PxU32 nbTris = getNbTriangles(); + for (PxU32 i = 0; i < nbTris; i++) + mExtraTrigData[i] |= (Gu::ETD_CONVEX_EDGE_01 | Gu::ETD_CONVEX_EDGE_12 | Gu::ETD_CONVEX_EDGE_20); + } + + mAABB = meshBounds; + return meshBounds; +} +#endif + +} // namespace physx diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.h new file mode 100644 index 00000000..7c861663 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.h @@ -0,0 +1,81 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_TRIANGLEMESH_RTREE_H +#define GU_TRIANGLEMESH_RTREE_H + +#include "GuTriangleMesh.h" + +namespace physx +{ +class GuMeshFactory; + +namespace Gu +{ + +#if PX_VC +#pragma warning(push) +#pragma warning(disable: 4324) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + +class RTreeTriangleMesh : public TriangleMesh +{ + public: + virtual const char* getConcreteTypeName() const { return "PxBVH33TriangleMesh"; } +// PX_SERIALIZATION + RTreeTriangleMesh(PxBaseFlags baseFlags) : TriangleMesh(baseFlags), mRTree(PxEmpty) {} + PX_PHYSX_COMMON_API virtual void exportExtraData(PxSerializationContext& ctx); + void importExtraData(PxDeserializationContext&); + PX_PHYSX_COMMON_API static TriangleMesh* createObject(PxU8*& address, PxDeserializationContext& context); + PX_PHYSX_COMMON_API static void getBinaryMetaData(PxOutputStream& stream); +//~PX_SERIALIZATION + RTreeTriangleMesh(GuMeshFactory& factory, TriangleMeshData& data); + virtual ~RTreeTriangleMesh(){} + + virtual PxMeshMidPhase::Enum getMidphaseID() const { return PxMeshMidPhase::eBVH33; } + +#if PX_ENABLE_DYNAMIC_MESH_RTREE + virtual PxVec3* getVerticesForModification(); + virtual PxBounds3 refitBVH(); +#endif + + PX_FORCE_INLINE const Gu::RTree& getRTree() const { return mRTree; } + private: + Gu::RTree mRTree; +}; + +#if PX_VC +#pragma warning(pop) +#endif + +} // namespace Gu + +} + +#endif diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleVertexPointers.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleVertexPointers.h new file mode 100644 index 00000000..952f6998 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleVertexPointers.h @@ -0,0 +1,65 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_TRIANGLE_VERTEX_POINTERS_H +#define GU_TRIANGLE_VERTEX_POINTERS_H + +#include "PxTriangleMesh.h" +#include "GuTriangleMesh.h" + +namespace physx { + namespace Gu { + + // PT: TODO: replace with Gu::TriangleMesh::getLocalTriangle(...) + struct TriangleVertexPointers + { + static void PX_FORCE_INLINE getTriangleVerts(const TriangleMesh* mesh, PxU32 triangleIndex, PxVec3& v0, PxVec3& v1, PxVec3& v2) + { + const PxVec3* verts = mesh->getVerticesFast(); + if(mesh->has16BitIndices()) + { + const PxU16* tris = reinterpret_cast<const PxU16*>(mesh->getTrianglesFast()); + const PxU16* inds = tris+triangleIndex*3; + v0 = verts[inds[0]]; + v1 = verts[inds[1]]; + v2 = verts[inds[2]]; + } + else + { + const PxU32* tris = reinterpret_cast<const PxU32*>(mesh->getTrianglesFast()); + const PxU32* inds = tris+triangleIndex*3; + v0 = verts[inds[0]]; + v1 = verts[inds[1]]; + v2 = verts[inds[2]]; + } + } + }; +} } // physx, Gu + +#endif |