// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2018 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #include "foundation/PxVec4.h" #include "foundation/PxMemory.h" #include "GuBV4Build.h" #include "GuBV4.h" #include "PxTriangle.h" #include "CmPhysXCommon.h" #include "PsBasicTemplates.h" #include "GuCenterExtents.h" using namespace physx; using namespace Gu; #include "PsVecMath.h" using namespace physx::shdfnd::aos; #define GU_BV4_USE_NODE_POOLS #define DELETESINGLE(x) if (x) { delete x; x = NULL; } #define DELETEARRAY(x) if (x) { delete []x; x = NULL; } static PX_FORCE_INLINE PxU32 largestAxis(const PxVec4& v) { const float* Vals = &v.x; PxU32 m = 0; if(Vals[1] > Vals[m]) m = 1; if(Vals[2] > Vals[m]) m = 2; return m; } AABBTree::AABBTree() : mIndices(NULL), mPool(NULL), mTotalNbNodes(0) { } AABBTree::~AABBTree() { release(); } void AABBTree::release() { DELETEARRAY(mPool); PX_FREE_AND_RESET(mIndices); } static PxU32 local_Split(const AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT /*Boxes*/, const PxVec3* PX_RESTRICT centers, PxU32 axis) { const PxU32 nb = node->mNbPrimitives; PxU32* PX_RESTRICT prims = node->mNodePrimitives; // Get node split value const float splitValue = node->mBV.getCenter(axis); PxU32 nbPos = 0; // Loop through all node-related primitives. Their indices range from mNodePrimitives[0] to mNodePrimitives[mNbPrimitives-1]. // Those indices map the global list in the tree builder. const size_t ptrValue = size_t(centers) + axis*sizeof(float); const PxVec3* PX_RESTRICT centersX = reinterpret_cast(ptrValue); for(PxU32 i=0;i splitValue) { // Swap entries prims[i] = prims[nbPos]; prims[nbPos] = index; // Count primitives assigned to positive space nbPos++; } } return nbPos; } static bool local_Subdivide(AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT boxes, const PxVec3* PX_RESTRICT centers, BuildStats& stats, const AABBTreeNode* const PX_RESTRICT node_base, PxU32 limit) { const PxU32* PX_RESTRICT prims = node->mNodePrimitives; const PxU32 nb = node->mNbPrimitives; // Compute bv & means at the same time Vec4V meansV; { Vec4V minV = V4LoadU(&boxes[prims[0]].minimum.x); Vec4V maxV = V4LoadU(&boxes[prims[0]].maximum.x); meansV = V4LoadU(¢ers[prims[0]].x); for(PxU32 i=1;imBV.minimum = PxVec3(mergedMin.x, mergedMin.y, mergedMin.z); node->mBV.maximum = PxVec3(mergedMax.x, mergedMax.y, mergedMax.z); } // // Stop subdividing if we reach a leaf node. This is always performed here, // // else we could end in trouble if user overrides this. // if(nb==1) // return false; if(nb<=limit) return false; bool validSplit = true; PxU32 nbPos; { // Compute variances Vec4V varsV = V4Zero(); for(PxU32 i=0;ilimit) { nbPos = node->mNbPrimitives>>1; if(1) { // Test 3 axis, take the best float results[3]; nbPos = local_Split(node, boxes, centers, 0); results[0] = float(nbPos)/float(node->mNbPrimitives); nbPos = local_Split(node, boxes, centers, 1); results[1] = float(nbPos)/float(node->mNbPrimitives); nbPos = local_Split(node, boxes, centers, 2); results[2] = float(nbPos)/float(node->mNbPrimitives); results[0]-=0.5f; results[0]*=results[0]; results[1]-=0.5f; results[1]*=results[1]; results[2]-=0.5f; results[2]*=results[2]; PxU32 Min=0; if(results[1]mNbPrimitives) nbPos = node->mNbPrimitives>>1; } } //else return } // Now create children and assign their pointers. // We use a pre-allocated linear pool for complete trees [Opcode 1.3] const PxU32 count = stats.getCount(); node->mPos = size_t(node_base + count); // Update stats stats.increaseCount(2); // Assign children AABBTreeNode* pos = const_cast(node->getPos()); AABBTreeNode* neg = const_cast(node->getNeg()); pos->mNodePrimitives = node->mNodePrimitives; pos->mNbPrimitives = nbPos; neg->mNodePrimitives = node->mNodePrimitives + nbPos; neg->mNbPrimitives = node->mNbPrimitives - nbPos; return true; } static void local_BuildHierarchy(AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT Boxes, const PxVec3* PX_RESTRICT centers, BuildStats& stats, const AABBTreeNode* const PX_RESTRICT node_base, PxU32 limit) { if(local_Subdivide(node, Boxes, centers, stats, node_base, limit)) { AABBTreeNode* pos = const_cast(node->getPos()); AABBTreeNode* neg = const_cast(node->getNeg()); local_BuildHierarchy(pos, Boxes, centers, stats, node_base, limit); local_BuildHierarchy(neg, Boxes, centers, stats, node_base, limit); } } bool AABBTree::buildFromMesh(SourceMesh& mesh, PxU32 limit) { const PxU32 nbBoxes = mesh.getNbTriangles(); if(!nbBoxes) return false; PxBounds3* boxes = reinterpret_cast(PX_ALLOC(sizeof(PxBounds3)*(nbBoxes+1), "BV4")); // PT: +1 to safely V4Load/V4Store the last element PxVec3* centers = reinterpret_cast(PX_ALLOC(sizeof(PxVec3)*(nbBoxes+1), "BV4")); // PT: +1 to safely V4Load/V4Store the last element const FloatV halfV = FLoad(0.5f); for(PxU32 i=0;ix); const Vec4V v1V = V4LoadU(&VP.Vertex[1]->x); const Vec4V v2V = V4LoadU(&VP.Vertex[2]->x); Vec4V minV = V4Min(v0V, v1V); minV = V4Min(minV, v2V); Vec4V maxV = V4Max(v0V, v1V); maxV = V4Max(maxV, v2V); V4StoreU_Safe(minV, &boxes[i].minimum.x); // PT: safe because 'maximum' follows 'minimum' V4StoreU_Safe(maxV, &boxes[i].maximum.x); // PT: safe because we allocated one more box const Vec4V centerV = V4Scale(V4Add(maxV, minV), halfV); V4StoreU_Safe(centerV, ¢ers[i].x); // PT: safe because we allocated one more PxVec3 } { // Release previous tree release(); // Init stats BuildStats Stats; Stats.setCount(1); // Initialize indices. This list will be modified during build. mIndices = reinterpret_cast(PX_ALLOC(sizeof(PxU32)*nbBoxes, "BV4 indices")); // Identity permutation for(PxU32 i=0;imNodePrimitives = mIndices; mPool->mNbPrimitives = nbBoxes; // Build the hierarchy local_BuildHierarchy(mPool, boxes, centers, Stats, mPool, limit); // Get back total number of nodes mTotalNbNodes = Stats.getCount(); } PX_FREE(centers); PX_FREE(boxes); return true; } PxU32 AABBTree::walk(WalkingCallback cb, void* userData) const { // Call it without callback to compute max depth PxU32 maxDepth = 0; PxU32 currentDepth = 0; struct Local { static void _Walk(const AABBTreeNode* current_node, PxU32& max_depth, PxU32& current_depth, WalkingCallback callback, void* userData_) { // Checkings if(!current_node) return; // Entering a new node => increase depth current_depth++; // Keep track of max depth if(current_depth>max_depth) max_depth = current_depth; // Callback if(callback && !(callback)(current_node, current_depth, userData_)) return; // Recurse if(current_node->getPos()) { _Walk(current_node->getPos(), max_depth, current_depth, callback, userData_); current_depth--; } if(current_node->getNeg()) { _Walk(current_node->getNeg(), max_depth, current_depth, callback, userData_); current_depth--; } } }; Local::_Walk(mPool, maxDepth, currentDepth, cb, userData); return maxDepth; } #include "GuBV4_Internal.h" #ifdef GU_BV4_PRECOMPUTED_NODE_SORT // PT: see http://www.codercorner.com/blog/?p=734 static PxU32 precomputeNodeSorting(const PxBounds3& box0, const PxBounds3& box1) { const PxVec3 C0 = box0.getCenter(); const PxVec3 C1 = box1.getCenter(); PxVec3 dirPPP(1.0f, 1.0f, 1.0f); dirPPP.normalize(); PxVec3 dirPPN(1.0f, 1.0f, -1.0f); dirPPN.normalize(); PxVec3 dirPNP(1.0f, -1.0f, 1.0f); dirPNP.normalize(); PxVec3 dirPNN(1.0f, -1.0f, -1.0f); dirPNN.normalize(); PxVec3 dirNPP(-1.0f, 1.0f, 1.0f); dirNPP.normalize(); PxVec3 dirNPN(-1.0f, 1.0f, -1.0f); dirNPN.normalize(); PxVec3 dirNNP(-1.0f, -1.0f, 1.0f); dirNNP.normalize(); PxVec3 dirNNN(-1.0f, -1.0f, -1.0f); dirNNN.normalize(); const PxVec3 deltaC = C0 - C1; const bool bPPP = deltaC.dot(dirPPP)<0.0f; const bool bPPN = deltaC.dot(dirPPN)<0.0f; const bool bPNP = deltaC.dot(dirPNP)<0.0f; const bool bPNN = deltaC.dot(dirPNN)<0.0f; const bool bNPP = deltaC.dot(dirNPP)<0.0f; const bool bNPN = deltaC.dot(dirNPN)<0.0f; const bool bNNP = deltaC.dot(dirNNP)<0.0f; const bool bNNN = deltaC.dot(dirNNN)<0.0f; PxU32 code = 0; if(!bPPP) code |= (1<<7); // Bit 0: PPP if(!bPPN) code |= (1<<6); // Bit 1: PPN if(!bPNP) code |= (1<<5); // Bit 2: PNP if(!bPNN) code |= (1<<4); // Bit 3: PNN if(!bNPP) code |= (1<<3); // Bit 4: NPP if(!bNPN) code |= (1<<2); // Bit 5: NPN if(!bNNP) code |= (1<<1); // Bit 6: NNP if(!bNNN) code |= (1<<0); // Bit 7: NNN return code; } #endif #ifdef GU_BV4_USE_SLABS #include "GuBV4_Common.h" #endif static void setEmpty(CenterExtents& box) { box.mCenter = PxVec3(0.0f, 0.0f, 0.0f); box.mExtents = PxVec3(-1.0f, -1.0f, -1.0f); } // Data: // 1 bit for leaf/no leaf // 2 bits for child-node type // 8 bits for PNS // => 32 - 1 - 2 - 8 = 21 bits left for encoding triangle index or node *offset* // => limited to 2.097.152 triangles // => and 2Mb-large trees (this one may not work out well in practice) // ==> lines marked with //* have been changed to address this. Now we don't store offsets in bytes directly // but in BVData indices. There's more work at runtime calculating addresses, but now the format can support // 2 million single nodes. // // That being said we only need 3*8 = 24 bits in total, so that could be only 6 bits in each BVData. // For type0: we have 2 nodes, we need 8 bits => 6 bits/node = 12 bits available, ok // For type1: we have 3 nodes, we need 8*2 = 16 bits => 6 bits/node = 18 bits available, ok // For type2: we have 4 nodes, we need 8*3 = 24 bits => 6 bits/node = 24 bits available, ok //#pragma pack(1) struct BVData : public physx::shdfnd::UserAllocated { BVData(); CenterExtents mAABB; size_t mData; #ifdef GU_BV4_PRECOMPUTED_NODE_SORT PxU32 mTempPNS; #endif }; //#pragma pack() BVData::BVData() : mData(PX_INVALID_U32) { setEmpty(mAABB); #ifdef GU_BV4_PRECOMPUTED_NODE_SORT mTempPNS = 0; #endif } struct BV4Node : public physx::shdfnd::UserAllocated { PX_FORCE_INLINE BV4Node() {} PX_FORCE_INLINE ~BV4Node() {} BVData mBVData[4]; PX_FORCE_INLINE size_t isLeaf(PxU32 i) const { return mBVData[i].mData&1; } PX_FORCE_INLINE PxU32 getPrimitive(PxU32 i) const { return PxU32(mBVData[i].mData>>1); } PX_FORCE_INLINE const BV4Node* getChild(PxU32 i) const { return reinterpret_cast(mBVData[i].mData); } PxU32 getType() const { PxU32 Nb=0; for(PxU32 i=0;i<4;i++) { if(mBVData[i].mData!=PX_INVALID_U32) Nb++; } return Nb; } PxU32 getSize() const { const PxU32 type = getType(); return sizeof(BVData)*type; } }; #define NB_NODES_PER_SLAB 256 struct BV4BuildParams { PX_FORCE_INLINE BV4BuildParams(float epsilon) : mEpsilon(epsilon) #ifdef GU_BV4_USE_NODE_POOLS ,mTop(NULL) #endif {} ~BV4BuildParams(); // Stats PxU32 mNbNodes; PxU32 mStats[4]; // float mEpsilon; #ifdef GU_BV4_USE_NODE_POOLS // struct Slab : public physx::shdfnd::UserAllocated { BV4Node mNodes[NB_NODES_PER_SLAB]; PxU32 mNbUsedNodes; Slab* mNext; }; Slab* mTop; BV4Node* allocateNode(); void releaseNodes(); #endif }; BV4BuildParams::~BV4BuildParams() { #ifdef GU_BV4_USE_NODE_POOLS releaseNodes(); #endif } #ifdef GU_BV4_USE_NODE_POOLS BV4Node* BV4BuildParams::allocateNode() { if(!mTop || mTop->mNbUsedNodes==NB_NODES_PER_SLAB) { Slab* newSlab = PX_NEW(Slab); newSlab->mNbUsedNodes = 0; newSlab->mNext = mTop; mTop = newSlab; } return &mTop->mNodes[mTop->mNbUsedNodes++]; } void BV4BuildParams::releaseNodes() { Slab* current = mTop; while(current) { Slab* next = current->mNext; PX_DELETE(current); current = next; } mTop = NULL; } #endif static void setPrimitive(const AABBTree& source, BV4Node* node4, PxU32 i, const AABBTreeNode* node, float epsilon) { const PxU32 nbPrims = node->getNbPrimitives(); PX_ASSERT(nbPrims<16); const PxU32* indexBase = source.getIndices(); const PxU32* prims = node->getPrimitives(); const PxU32 offset = PxU32(prims - indexBase); for(PxU32 j=0;jmBVData[i].mAABB = node->getAABB(); if(epsilon!=0.0f) node4->mBVData[i].mAABB.mExtents += PxVec3(epsilon, epsilon, epsilon); node4->mBVData[i].mData = (primitiveIndex<<1)|1; } static BV4Node* setNode(const AABBTree& source, BV4Node* node4, PxU32 i, const AABBTreeNode* node, BV4BuildParams& params) { BV4Node* child = NULL; if(node->isLeaf()) { setPrimitive(source, node4, i, node, params.mEpsilon); } else { node4->mBVData[i].mAABB = node->getAABB(); if(params.mEpsilon!=0.0f) node4->mBVData[i].mAABB.mExtents += PxVec3(params.mEpsilon); params.mNbNodes++; #ifdef GU_BV4_USE_NODE_POOLS child = params.allocateNode(); #else child = PX_NEW(BV4Node); #endif node4->mBVData[i].mData = size_t(child); } return child; } static void _BuildBV4(const AABBTree& source, BV4Node* tmp, const AABBTreeNode* current_node, BV4BuildParams& params) { PX_ASSERT(!current_node->isLeaf()); // In the regular tree we have current node A, and: // ____A____ // P N // __|__ __|__ // PP PN NP NN // // For PNS we have: // bit0 to sort P|N // bit1 to sort PP|PN // bit2 to sort NP|NN // // As much as possible we need to preserve the original order in BV4, if we want to reuse the same PNS bits. // // bit0|bit1|bit2 Order 8bits code // 0 0 0 PP PN NP NN 0 1 2 3 // 0 0 1 PP PN NN NP 0 1 3 2 // 0 1 0 PN PP NP NN 1 0 2 3 // 0 1 1 PN PP NN NP 1 0 3 2 // 1 0 0 NP NN PP PN 2 3 0 1 // 1 0 1 NN NP PP PN 3 2 0 1 // 1 1 0 NP NN PN PP 2 3 1 0 // 1 1 1 NN NP PN PP 3 2 1 0 // // So we can fetch/compute the sequence from the bits, combine it with limitations from the node type, and process the nodes in order. In theory. // 8*8bits => the whole thing fits in a single 64bit register, so we could potentially use a "register LUT" here. const AABBTreeNode* P = current_node->getPos(); const AABBTreeNode* N = current_node->getNeg(); const bool PLeaf = P->isLeaf(); const bool NLeaf = N->isLeaf(); if(PLeaf) { if(NLeaf) { // Case 1: P and N are both leaves: // ____A____ // P N // => store as (P,N) and keep bit0 params.mStats[0]++; // PN leaves => store 2 triangle pointers, lose 50% of node space setPrimitive(source, tmp, 0, P, params.mEpsilon); setPrimitive(source, tmp, 1, N, params.mEpsilon); #ifdef GU_BV4_PRECOMPUTED_NODE_SORT tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV); #endif } else { // Case 2: P leaf, N no leaf // ____A____ // P N // __|__ // NP NN // => store as (P,NP,NN), keep bit0 and bit2 params.mStats[1]++; // P leaf => store 1 triangle pointers and 2 node pointers // => 3 slots used, 25% wasted setPrimitive(source, tmp, 0, P, params.mEpsilon); // const AABBTreeNode* NP = N->getPos(); const AABBTreeNode* NN = N->getNeg(); //#define NODE_FUSION #ifdef NODE_FUSION PxU32 c=0; BV4Node* ChildNP; if(!NP->isLeaf() && NP->getPos()->isLeaf() && NP->getNeg()->isLeaf()) { // Drag the terminal leaves directly into this BV4 node, drop internal node NP setPrimitive(source, tmp, 1, NP->getPos(), params.mEpsilon); setPrimitive(source, tmp, 2, NP->getNeg(), params.mEpsilon); ChildNP = NULL; params.mStats[1]--; params.mStats[3]++; c=1; } else { ChildNP = setNode(source, tmp, 1, NP, params); } BV4Node* ChildNN; if(c==0 && !NN->isLeaf() && NN->getPos()->isLeaf() && NN->getNeg()->isLeaf()) { // Drag the terminal leaves directly into this BV4 node, drop internal node NN setPrimitive(source, tmp, 2, NN->getPos(), params.mEpsilon); setPrimitive(source, tmp, 3, NN->getNeg(), params.mEpsilon); ChildNN = NULL; params.mStats[1]--; params.mStats[3]++; } else { ChildNN = setNode(source, tmp, 2+c, NN, params); } //BV4Node* ChildNN = setNode(tmp, 2+c, NN, epsilon, params); #else BV4Node* ChildNP = setNode(source, tmp, 1, NP, params); BV4Node* ChildNN = setNode(source, tmp, 2, NN, params); #endif #ifdef GU_BV4_PRECOMPUTED_NODE_SORT tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV); tmp->mBVData[2].mTempPNS = precomputeNodeSorting(NP->mBV, NN->mBV); #endif if(ChildNP) _BuildBV4(source, ChildNP, NP, params); if(ChildNN) _BuildBV4(source, ChildNN, NN, params); } } else { if(NLeaf) { // Case 3: P no leaf, N leaf // ____A____ // P N // __|__ // PP PN // => store as (PP,PN,N), keep bit0 and bit1 params.mStats[2]++; // N leaf => store 1 triangle pointers and 2 node pointers // => 3 slots used, 25% wasted setPrimitive(source, tmp, 2, N, params.mEpsilon); // const AABBTreeNode* PP = P->getPos(); const AABBTreeNode* PN = P->getNeg(); BV4Node* ChildPP = setNode(source, tmp, 0, PP, params); BV4Node* ChildPN = setNode(source, tmp, 1, PN, params); #ifdef GU_BV4_PRECOMPUTED_NODE_SORT tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV); tmp->mBVData[1].mTempPNS = precomputeNodeSorting(PP->mBV, PN->mBV); #endif if(ChildPP) _BuildBV4(source, ChildPP, PP, params); if(ChildPN) _BuildBV4(source, ChildPN, PN, params); } else { // Case 4: P and N are no leaves: // => store as (PP,PN,NP,NN), keep bit0/bit1/bit2 params.mStats[3]++; // No leaves => store 4 node pointers const AABBTreeNode* PP = P->getPos(); const AABBTreeNode* PN = P->getNeg(); const AABBTreeNode* NP = N->getPos(); const AABBTreeNode* NN = N->getNeg(); BV4Node* ChildPP = setNode(source, tmp, 0, PP, params); BV4Node* ChildPN = setNode(source, tmp, 1, PN, params); BV4Node* ChildNP = setNode(source, tmp, 2, NP, params); BV4Node* ChildNN = setNode(source, tmp, 3, NN, params); #ifdef GU_BV4_PRECOMPUTED_NODE_SORT tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV); tmp->mBVData[1].mTempPNS = precomputeNodeSorting(PP->mBV, PN->mBV); tmp->mBVData[2].mTempPNS = precomputeNodeSorting(NP->mBV, NN->mBV); #endif if(ChildPP) _BuildBV4(source, ChildPP, PP, params); if(ChildPN) _BuildBV4(source, ChildPN, PN, params); if(ChildNP) _BuildBV4(source, ChildNP, NP, params); if(ChildNN) _BuildBV4(source, ChildNN, NN, params); } } } static bool BuildBV4Internal(BV4Tree& tree, const AABBTree& Source, SourceMesh* mesh, float epsilon) { if(mesh->getNbTriangles()<=4) return tree.init(mesh, Source.getBV()); { struct Local { static void _CheckMD(const AABBTreeNode* current_node, PxU32& md, PxU32& cd) { cd++; md = PxMax(md, cd); if(current_node->getPos()) { _CheckMD(current_node->getPos(), md, cd); cd--; } if(current_node->getNeg()) { _CheckMD(current_node->getNeg(), md, cd); cd--; } } static void _Check(AABBTreeNode* current_node) { if(current_node->isLeaf()) return; AABBTreeNode* P = const_cast(current_node->getPos()); AABBTreeNode* N = const_cast(current_node->getNeg()); { PxU32 MDP = 0; PxU32 CDP = 0; _CheckMD(P, MDP, CDP); PxU32 MDN = 0; PxU32 CDN = 0; _CheckMD(N, MDN, CDN); if(MDP>MDN) // if(MDP(Source.getNodes())); } BV4BuildParams Params(epsilon); Params.mNbNodes=1; // Root node Params.mStats[0]=0; Params.mStats[1]=0; Params.mStats[2]=0; Params.mStats[3]=0; #ifdef GU_BV4_USE_NODE_POOLS BV4Node* Root = Params.allocateNode(); #else BV4Node* Root = PX_NEW(BV4Node); #endif _BuildBV4(Source, Root, Source.getNodes(), Params); if(!tree.init(mesh, Source.getBV())) return false; BV4Tree* T = &tree; // Version with variable-sized nodes in single stream { struct Local { #ifdef GU_BV4_QUANTIZED_TREE #ifdef GU_BV4_USE_SLABS static void _ComputeMaxValues(const BV4Node* current, PxVec3& MinMax, PxVec3& MaxMax) { for(PxU32 i=0;i<4;i++) { if(current->mBVData[i].mData!=PX_INVALID_U32) { const CenterExtents& Box = current->mBVData[i].mAABB; const PxVec3 Min = Box.mCenter - Box.mExtents; const PxVec3 Max = Box.mCenter + Box.mExtents; if(fabsf(Min.x)>MinMax.x) MinMax.x = fabsf(Min.x); if(fabsf(Min.y)>MinMax.y) MinMax.y = fabsf(Min.y); if(fabsf(Min.z)>MinMax.z) MinMax.z = fabsf(Min.z); if(fabsf(Max.x)>MaxMax.x) MaxMax.x = fabsf(Max.x); if(fabsf(Max.y)>MaxMax.y) MaxMax.y = fabsf(Max.y); if(fabsf(Max.z)>MaxMax.z) MaxMax.z = fabsf(Max.z); if(!current->isLeaf(i)) { const BV4Node* ChildNode = current->getChild(i); _ComputeMaxValues(ChildNode, MinMax, MaxMax); } } } } #else static void _ComputeMaxValues(const BV4Node* current, PxVec3& CMax, PxVec3& EMax) { for(PxU32 i=0;i<4;i++) { if(current->mBVData[i].mData!=PX_INVALID_U32) { const CenterExtents& Box = current->mBVData[i].mAABB; if(fabsf(Box.mCenter.x)>CMax.x) CMax.x = fabsf(Box.mCenter.x); if(fabsf(Box.mCenter.y)>CMax.y) CMax.y = fabsf(Box.mCenter.y); if(fabsf(Box.mCenter.z)>CMax.z) CMax.z = fabsf(Box.mCenter.z); if(fabsf(Box.mExtents.x)>EMax.x) EMax.x = fabsf(Box.mExtents.x); if(fabsf(Box.mExtents.y)>EMax.y) EMax.y = fabsf(Box.mExtents.y); if(fabsf(Box.mExtents.z)>EMax.z) EMax.z = fabsf(Box.mExtents.z); if(!current->isLeaf(i)) { const BV4Node* ChildNode = current->getChild(i); _ComputeMaxValues(ChildNode, CMax, EMax); } } } } #endif #endif static void _Flatten(BVDataPacked* const dest, const PxU32 box_id, PxU32& current_id, const BV4Node* current, PxU32& max_depth, PxU32& current_depth #ifdef GU_BV4_QUANTIZED_TREE , const PxVec3& CQuantCoeff, const PxVec3& EQuantCoeff, const PxVec3& mCenterCoeff, const PxVec3& mExtentsCoeff #endif ) { // Entering a new node => increase depth current_depth++; // Keep track of max depth if(current_depth>max_depth) max_depth = current_depth; // dest[box_id] = *current; const PxU32 CurrentType = current->getType(); for(PxU32 i=0;imBVData[i].mAABB; #ifdef GU_BV4_USE_SLABS const PxVec3 m = Box.mCenter - Box.mExtents; const PxVec3 M = Box.mCenter + Box.mExtents; dest[box_id+i].mAABB.mData[0].mCenter = PxI16(m.x * CQuantCoeff.x); dest[box_id+i].mAABB.mData[1].mCenter = PxI16(m.y * CQuantCoeff.y); dest[box_id+i].mAABB.mData[2].mCenter = PxI16(m.z * CQuantCoeff.z); dest[box_id+i].mAABB.mData[0].mExtents = PxU16(PxI16(M.x * EQuantCoeff.x)); dest[box_id+i].mAABB.mData[1].mExtents = PxU16(PxI16(M.y * EQuantCoeff.y)); dest[box_id+i].mAABB.mData[2].mExtents = PxU16(PxI16(M.z * EQuantCoeff.z)); if(1) { for(PxU32 j=0;j<3;j++) { // Dequantize the min/max // const float qmin = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j]; // const float qmax = float(PxI16(dest[box_id+i].mAABB.mData[j].mExtents)) * mExtentsCoeff[j]; // Compare real & dequantized values /* if(qmaxm[j]) { int stop=1; }*/ bool CanLeave; do { CanLeave=true; const float qmin = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j]; const float qmax = float(PxI16(dest[box_id+i].mAABB.mData[j].mExtents)) * mExtentsCoeff[j]; if(qmaxm[j]) { if(dest[box_id+i].mAABB.mData[j].mCenter) { dest[box_id+i].mAABB.mData[j].mCenter--; CanLeave = false; } } }while(!CanLeave); } } #else dest[box_id+i].mAABB.mData[0].mCenter = PxI16(Box.mCenter.x * CQuantCoeff.x); dest[box_id+i].mAABB.mData[1].mCenter = PxI16(Box.mCenter.y * CQuantCoeff.y); dest[box_id+i].mAABB.mData[2].mCenter = PxI16(Box.mCenter.z * CQuantCoeff.z); dest[box_id+i].mAABB.mData[0].mExtents = PxU16(Box.mExtents.x * EQuantCoeff.x); dest[box_id+i].mAABB.mData[1].mExtents = PxU16(Box.mExtents.y * EQuantCoeff.y); dest[box_id+i].mAABB.mData[2].mExtents = PxU16(Box.mExtents.z * EQuantCoeff.z); // Fix quantized boxes if(1) { // Make sure the quantized box is still valid const PxVec3 Max = Box.mCenter + Box.mExtents; const PxVec3 Min = Box.mCenter - Box.mExtents; // For each axis for(PxU32 j=0;j<3;j++) { // Dequantize the box center const float qc = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j]; bool FixMe=true; do { // Dequantize the box extent const float qe = float(dest[box_id+i].mAABB.mData[j].mExtents) * mExtentsCoeff[j]; // Compare real & dequantized values if(qc+qeMin[j]) dest[box_id+i].mAABB.mData[j].mExtents++; else FixMe=false; // Prevent wrapping if(!dest[box_id+i].mAABB.mData[j].mExtents) { dest[box_id+i].mAABB.mData[j].mExtents=0xffff; FixMe=false; } }while(FixMe); } } #endif #else #ifdef GU_BV4_USE_SLABS // Compute min & max right here. Store temp as Center/Extents = Min/Max const CenterExtents& Box = current->mBVData[i].mAABB; dest[box_id+i].mAABB.mCenter = Box.mCenter - Box.mExtents; dest[box_id+i].mAABB.mExtents = Box.mCenter + Box.mExtents; #else dest[box_id+i].mAABB = current->mBVData[i].mAABB; #endif #endif dest[box_id+i].mData = PxU32(current->mBVData[i].mData); // dest[box_id+i].encodePNS(current->mBVData[i].mTempPNS); } PxU32 NbToGo=0; PxU32 NextIDs[4] = {PX_INVALID_U32, PX_INVALID_U32, PX_INVALID_U32, PX_INVALID_U32}; const BV4Node* ChildNodes[4] = {NULL,NULL,NULL,NULL}; BVDataPacked* data = dest+box_id; for(PxU32 i=0;i<4;i++) { if(current->mBVData[i].mData!=PX_INVALID_U32 && !current->isLeaf(i)) { const BV4Node* ChildNode = current->getChild(i); const PxU32 NextID = current_id; #ifdef GU_BV4_USE_SLABS current_id += 4; #else const PxU32 ChildSize = ChildNode->getType(); current_id += ChildSize; #endif const PxU32 ChildType = (ChildNode->getType()-2)<<1; data[i].mData = size_t(ChildType+(NextID<mBVData[i].mTempPNS); #endif //#define DEPTH_FIRST #ifdef DEPTH_FIRST _Flatten(dest, NextID, current_id, ChildNode, max_depth, current_depth #ifdef GU_BV4_QUANTIZED_TREE , CQuantCoeff, EQuantCoeff, mCenterCoeff, mExtentsCoeff #endif ); current_depth--; #endif } #ifdef GU_BV4_USE_SLABS if(current->mBVData[i].mData==PX_INVALID_U32) { #ifdef GU_BV4_QUANTIZED_TREE data[i].mAABB.mData[0].mExtents = 0; data[i].mAABB.mData[1].mExtents = 0; data[i].mAABB.mData[2].mExtents = 0; data[i].mAABB.mData[0].mCenter = 0; data[i].mAABB.mData[1].mCenter = 0; data[i].mAABB.mData[2].mCenter = 0; #else data[i].mAABB.mCenter = PxVec3(0.0f); data[i].mAABB.mExtents = PxVec3(0.0f); #endif data[i].mData = PX_INVALID_U32; } #endif } #ifndef DEPTH_FIRST for(PxU32 i=0;igetType(); PxU32 InitData = PX_INVALID_U32; #ifdef GU_BV4_USE_SLABS PX_UNUSED(NbSingleNodes); const PxU32 NbNeeded = (Params.mStats[0]+Params.mStats[1]+Params.mStats[2]+Params.mStats[3])*4; BVDataPacked* Nodes = reinterpret_cast(PX_ALLOC(sizeof(BVDataPacked)*NbNeeded, "BV4 nodes")); // PT: PX_NEW breaks alignment here // BVDataPacked* Nodes = PX_NEW(BVDataPacked)[NbNeeded]; if(CurID==2) { InitData = 0; } else if(CurID==3) { InitData = 2; } else if(CurID==4) { InitData = 4; } CurID = 4; // PxU32 CurID = 4; // PxU32 InitData = 4; #else BVDataPacked* Nodes = PX_NEW(BVDataPacked)[NbSingleNodes]; if(CurID==2) { InitData = 0; } else if(CurID==3) { InitData = 2; } else if(CurID==4) { InitData = 4; } #endif T->mInitData = InitData; PxU32 MaxDepth = 0; PxU32 CurrentDepth = 0; #ifdef GU_BV4_QUANTIZED_TREE #ifdef GU_BV4_USE_SLABS PxVec3 MinQuantCoeff, MaxQuantCoeff; { // Get max values PxVec3 MinMax(-FLT_MAX); PxVec3 MaxMax(-FLT_MAX); Local::_ComputeMaxValues(Root, MinMax, MaxMax); const PxU32 nbm=15; // Compute quantization coeffs const float MinCoeff = float((1<mCenterOrMinCoeff.x = MinMax.x/MinCoeff; T->mCenterOrMinCoeff.y = MinMax.y/MinCoeff; T->mCenterOrMinCoeff.z = MinMax.z/MinCoeff; T->mExtentsOrMaxCoeff.x = MaxMax.x/MaxCoeff; T->mExtentsOrMaxCoeff.y = MaxMax.y/MaxCoeff; T->mExtentsOrMaxCoeff.z = MaxMax.z/MaxCoeff; } Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth, MinQuantCoeff, MaxQuantCoeff, T->mCenterOrMinCoeff, T->mExtentsOrMaxCoeff); #else PxVec3 CQuantCoeff, EQuantCoeff; { // Get max values PxVec3 CMax(-FLT_MAX); PxVec3 EMax(-FLT_MAX); Local::_ComputeMaxValues(Root, CMax, EMax); const PxU32 nbc=15; const PxU32 nbe=16; // const PxU32 nbc=7; // const PxU32 nbe=8; const float UnitQuantError = 2.0f/65535.0f; EMax.x += CMax.x*UnitQuantError; EMax.y += CMax.y*UnitQuantError; EMax.z += CMax.z*UnitQuantError; // Compute quantization coeffs const float CCoeff = float((1<mCenterOrMinCoeff.x = CMax.x/CCoeff; T->mCenterOrMinCoeff.y = CMax.y/CCoeff; T->mCenterOrMinCoeff.z = CMax.z/CCoeff; T->mExtentsOrMaxCoeff.x = EMax.x/ECoeff; T->mExtentsOrMaxCoeff.y = EMax.y/ECoeff; T->mExtentsOrMaxCoeff.z = EMax.z/ECoeff; } Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth, CQuantCoeff, EQuantCoeff, T->mCenterOrMinCoeff, T->mExtentsOrMaxCoeff); #endif #else Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth); #endif #ifdef GU_BV4_USE_NODE_POOLS Params.releaseNodes(); #endif #ifdef GU_BV4_USE_SLABS { PX_ASSERT(sizeof(BVDataSwizzled)==sizeof(BVDataPacked)*4); BVDataPacked* Copy = PX_NEW(BVDataPacked)[NbNeeded]; PxMemCopy(Copy, Nodes, sizeof(BVDataPacked)*NbNeeded); for(PxU32 i=0;i(Nodes + i*4); for(PxU32 j=0;j<4;j++) { // We previously stored m/M within c/e so we just need to swizzle now #ifdef GU_BV4_QUANTIZED_TREE const QuantizedAABB& Box = Src[j].mAABB; Dst->mX[j].mMin = Box.mData[0].mCenter; Dst->mY[j].mMin = Box.mData[1].mCenter; Dst->mZ[j].mMin = Box.mData[2].mCenter; Dst->mX[j].mMax = PxI16(Box.mData[0].mExtents); Dst->mY[j].mMax = PxI16(Box.mData[1].mExtents); Dst->mZ[j].mMax = PxI16(Box.mData[2].mExtents); #else const CenterExtents& Box = Src[j].mAABB; Dst->mMinX[j] = Box.mCenter.x; Dst->mMinY[j] = Box.mCenter.y; Dst->mMinZ[j] = Box.mCenter.z; Dst->mMaxX[j] = Box.mExtents.x; Dst->mMaxY[j] = Box.mExtents.y; Dst->mMaxZ[j] = Box.mExtents.z; #endif Dst->mData[j] = Src[j].mData; } } DELETEARRAY(Copy); } T->mNbNodes = NbNeeded; #else PX_ASSERT(CurID==NbSingleNodes); T->mNbNodes = NbSingleNodes; #endif T->mNodes = Nodes; } return true; } ///// struct ReorderData { const SourceMesh* mMesh; PxU32* mOrder; PxU32 mNbTrisPerLeaf; PxU32 mIndex; PxU32 mNbTris; PxU32 mStats[16]; }; static bool gReorderCallback(const AABBTreeNode* current, PxU32 /*depth*/, void* userData) { ReorderData* Data = reinterpret_cast(userData); if(current->isLeaf()) { const PxU32 n = current->getNbPrimitives(); PX_ASSERT(n<=Data->mNbTrisPerLeaf); Data->mStats[n]++; PxU32* Prims = const_cast(current->getPrimitives()); for(PxU32 i=0;imNbTris); Data->mOrder[Data->mIndex] = Prims[i]; PX_ASSERT(Data->mIndexmNbTris); Prims[i] = Data->mIndex; Data->mIndex++; } } return true; } bool physx::Gu::BuildBV4Ex(BV4Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf) { const PxU32 nbTris = mesh.mNbTris; AABBTree Source; if(!Source.buildFromMesh(mesh, nbTrisPerLeaf)) return false; { PxU32* order = reinterpret_cast(PX_ALLOC(sizeof(PxU32)*nbTris, "BV4")); ReorderData RD; RD.mMesh = &mesh; RD.mOrder = order; RD.mNbTrisPerLeaf = nbTrisPerLeaf; RD.mIndex = 0; RD.mNbTris = nbTris; for(PxU32 i=0;i<16;i++) RD.mStats[i] = 0; Source.walk(gReorderCallback, &RD); PX_ASSERT(RD.mIndex==nbTris); mesh.remapTopology(order); PX_FREE(order); // for(PxU32 i=0;i<16;i++) // printf("%d: %d\n", i, RD.mStats[i]); } if(mesh.getNbTriangles()<=nbTrisPerLeaf) return tree.init(&mesh, Source.getBV()); return BuildBV4Internal(tree, Source, &mesh, epsilon); }