aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp')
-rw-r--r--PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp1294
1 files changed, 1294 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp
new file mode 100644
index 00000000..fbe97042
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp
@@ -0,0 +1,1294 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxVec4.h"
+#include "GuBV4Build.h"
+#include "GuBV4.h"
+#include "PxTriangle.h"
+#include "CmPhysXCommon.h"
+#include "PsBasicTemplates.h"
+#include "GuCenterExtents.h"
+
+using namespace physx;
+using namespace Gu;
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#define GU_BV4_USE_NODE_POOLS
+
+#define DELETESINGLE(x) if (x) { delete x; x = NULL; }
+#define DELETEARRAY(x) if (x) { delete []x; x = NULL; }
+
+static PX_FORCE_INLINE PxU32 largestAxis(const PxVec4& v)
+{
+ const float* Vals = &v.x;
+ PxU32 m = 0;
+ if(Vals[1] > Vals[m]) m = 1;
+ if(Vals[2] > Vals[m]) m = 2;
+ return m;
+}
+
+AABBTree::AABBTree() : mIndices(NULL), mPool(NULL), mTotalNbNodes(0)
+{
+}
+
+AABBTree::~AABBTree()
+{
+ release();
+}
+
+void AABBTree::release()
+{
+ DELETEARRAY(mPool);
+ PX_FREE_AND_RESET(mIndices);
+}
+
+static PxU32 local_Split(const AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT /*Boxes*/, const PxVec3* PX_RESTRICT centers, PxU32 axis)
+{
+ const PxU32 nb = node->mNbPrimitives;
+ PxU32* PX_RESTRICT prims = node->mNodePrimitives;
+
+ // Get node split value
+ const float splitValue = node->mBV.getCenter(axis);
+
+ PxU32 nbPos = 0;
+ // Loop through all node-related primitives. Their indices range from mNodePrimitives[0] to mNodePrimitives[mNbPrimitives-1].
+ // Those indices map the global list in the tree builder.
+ const size_t ptrValue = size_t(centers) + axis*sizeof(float);
+ const PxVec3* PX_RESTRICT centersX = reinterpret_cast<const PxVec3*>(ptrValue);
+
+ for(PxU32 i=0;i<nb;i++)
+ {
+ // Get index in global list
+ const PxU32 index = prims[i];
+
+ // Test against the splitting value. The primitive value is tested against the enclosing-box center.
+ // [We only need an approximate partition of the enclosing box here.]
+ const float primitiveValue = centersX[index].x;
+
+ // Reorganize the list of indices in this order: positive - negative.
+ if(primitiveValue > splitValue)
+ {
+ // Swap entries
+ prims[i] = prims[nbPos];
+ prims[nbPos] = index;
+ // Count primitives assigned to positive space
+ nbPos++;
+ }
+ }
+ return nbPos;
+}
+
+static bool local_Subdivide(AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT boxes, const PxVec3* PX_RESTRICT centers, BuildStats& stats, const AABBTreeNode* const PX_RESTRICT node_base, PxU32 limit)
+{
+ const PxU32* PX_RESTRICT prims = node->mNodePrimitives;
+ const PxU32 nb = node->mNbPrimitives;
+
+ // Compute bv & means at the same time
+ Vec4V meansV;
+ {
+ Vec4V minV = V4LoadU(&boxes[prims[0]].minimum.x);
+ Vec4V maxV = V4LoadU(&boxes[prims[0]].maximum.x);
+ meansV = V4LoadU(&centers[prims[0]].x);
+
+ for(PxU32 i=1;i<nb;i++)
+ {
+ const PxU32 index = prims[i];
+ minV = V4Min(minV, V4LoadU(&boxes[index].minimum.x));
+ maxV = V4Max(maxV, V4LoadU(&boxes[index].maximum.x));
+ meansV = V4Add(meansV, V4LoadU(&centers[index].x));
+ }
+ const float coeffNb = 1.0f/float(nb);
+ meansV = V4Scale(meansV, FLoad(coeffNb));
+
+// BV4_ALIGN16(PxVec4 mergedMin);
+// BV4_ALIGN16(PxVec4 mergedMax);
+ PX_ALIGN_PREFIX(16) PxVec4 mergedMin PX_ALIGN_SUFFIX(16);
+ PX_ALIGN_PREFIX(16) PxVec4 mergedMax PX_ALIGN_SUFFIX(16);
+
+ V4StoreA_Safe(minV, &mergedMin.x);
+ V4StoreA_Safe(maxV, &mergedMax.x);
+ node->mBV.minimum = PxVec3(mergedMin.x, mergedMin.y, mergedMin.z);
+ node->mBV.maximum = PxVec3(mergedMax.x, mergedMax.y, mergedMax.z);
+ }
+
+// // Stop subdividing if we reach a leaf node. This is always performed here,
+// // else we could end in trouble if user overrides this.
+// if(nb==1)
+// return false;
+ if(nb<=limit)
+ return false;
+
+ bool validSplit = true;
+ PxU32 nbPos;
+ {
+ // Compute variances
+ Vec4V varsV = V4Zero();
+ for(PxU32 i=0;i<nb;i++)
+ {
+ const PxU32 index = prims[i];
+ Vec4V centerV = V4LoadU(&centers[index].x);
+ centerV = V4Sub(centerV, meansV);
+ centerV = V4Mul(centerV, centerV);
+ varsV = V4Add(varsV, centerV);
+ }
+ const float coeffNb1 = 1.0f/float(nb-1);
+ varsV = V4Scale(varsV, FLoad(coeffNb1));
+
+// BV4_ALIGN16(PxVec4 vars);
+ PX_ALIGN_PREFIX(16) PxVec4 vars PX_ALIGN_SUFFIX(16);
+ V4StoreA_Safe(varsV, &vars.x);
+
+ // Choose axis with greatest variance
+ const PxU32 axis = largestAxis(vars);
+
+ // Split along the axis
+ nbPos = local_Split(node, boxes, centers, axis);
+
+ // Check split validity
+ if(!nbPos || nbPos==nb)
+ validSplit = false;
+ }
+
+ // Check the subdivision has been successful
+ if(!validSplit)
+ {
+ // Here, all boxes lie in the same sub-space. Two strategies:
+ // - if the tree *must* be complete, make an arbitrary 50-50 split
+ // - else stop subdividing
+// if(nb>limit)
+ {
+ nbPos = node->mNbPrimitives>>1;
+
+ if(1)
+ {
+ // Test 3 axis, take the best
+ float results[3];
+ nbPos = local_Split(node, boxes, centers, 0); results[0] = float(nbPos)/float(node->mNbPrimitives);
+ nbPos = local_Split(node, boxes, centers, 1); results[1] = float(nbPos)/float(node->mNbPrimitives);
+ nbPos = local_Split(node, boxes, centers, 2); results[2] = float(nbPos)/float(node->mNbPrimitives);
+ results[0]-=0.5f; results[0]*=results[0];
+ results[1]-=0.5f; results[1]*=results[1];
+ results[2]-=0.5f; results[2]*=results[2];
+ PxU32 Min=0;
+ if(results[1]<results[Min]) Min = 1;
+ if(results[2]<results[Min]) Min = 2;
+
+ // Split along the axis
+ nbPos = local_Split(node, boxes, centers, Min);
+
+ // Check split validity
+ if(!nbPos || nbPos==node->mNbPrimitives)
+ nbPos = node->mNbPrimitives>>1;
+ }
+ }
+ //else return
+ }
+
+ // Now create children and assign their pointers.
+ // We use a pre-allocated linear pool for complete trees [Opcode 1.3]
+ const PxU32 count = stats.getCount();
+ node->mPos = size_t(node_base + count);
+
+ // Update stats
+ stats.increaseCount(2);
+
+ // Assign children
+ AABBTreeNode* pos = const_cast<AABBTreeNode*>(node->getPos());
+ AABBTreeNode* neg = const_cast<AABBTreeNode*>(node->getNeg());
+ pos->mNodePrimitives = node->mNodePrimitives;
+ pos->mNbPrimitives = nbPos;
+ neg->mNodePrimitives = node->mNodePrimitives + nbPos;
+ neg->mNbPrimitives = node->mNbPrimitives - nbPos;
+ return true;
+}
+
+static void local_BuildHierarchy(AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT Boxes, const PxVec3* PX_RESTRICT centers, BuildStats& stats, const AABBTreeNode* const PX_RESTRICT node_base, PxU32 limit)
+{
+ if(local_Subdivide(node, Boxes, centers, stats, node_base, limit))
+ {
+ AABBTreeNode* pos = const_cast<AABBTreeNode*>(node->getPos());
+ AABBTreeNode* neg = const_cast<AABBTreeNode*>(node->getNeg());
+ local_BuildHierarchy(pos, Boxes, centers, stats, node_base, limit);
+ local_BuildHierarchy(neg, Boxes, centers, stats, node_base, limit);
+ }
+}
+
+bool AABBTree::buildFromMesh(SourceMesh& mesh, PxU32 limit)
+{
+ const PxU32 nbBoxes = mesh.getNbTriangles();
+ if(!nbBoxes)
+ return false;
+ PxBounds3* boxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(nbBoxes+1), "BV4")); // PT: +1 to safely V4Load/V4Store the last element
+ PxVec3* centers = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*(nbBoxes+1), "BV4")); // PT: +1 to safely V4Load/V4Store the last element
+ const FloatV halfV = FLoad(0.5f);
+ for(PxU32 i=0;i<nbBoxes;i++)
+ {
+ VertexPointers VP;
+ mesh.getTriangle(VP, i);
+
+ const Vec4V v0V = V4LoadU(&VP.Vertex[0]->x);
+ const Vec4V v1V = V4LoadU(&VP.Vertex[1]->x);
+ const Vec4V v2V = V4LoadU(&VP.Vertex[2]->x);
+ Vec4V minV = V4Min(v0V, v1V);
+ minV = V4Min(minV, v2V);
+ Vec4V maxV = V4Max(v0V, v1V);
+ maxV = V4Max(maxV, v2V);
+ V4StoreU_Safe(minV, &boxes[i].minimum.x); // PT: safe because 'maximum' follows 'minimum'
+ V4StoreU_Safe(maxV, &boxes[i].maximum.x); // PT: safe because we allocated one more box
+
+ const Vec4V centerV = V4Scale(V4Add(maxV, minV), halfV);
+ V4StoreU_Safe(centerV, &centers[i].x); // PT: safe because we allocated one more PxVec3
+ }
+
+ {
+ // Release previous tree
+ release();
+
+ // Init stats
+ BuildStats Stats;
+ Stats.setCount(1);
+
+ // Initialize indices. This list will be modified during build.
+ mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbBoxes, "BV4 indices"));
+ // Identity permutation
+ for(PxU32 i=0;i<nbBoxes;i++)
+ mIndices[i] = i;
+
+ // Use a linear array for complete trees (since we can predict the final number of nodes) [Opcode 1.3]
+ // Allocate a pool of nodes
+ // PT: TODO: optimize memory here (TA34704)
+ mPool = PX_NEW(AABBTreeNode)[nbBoxes*2 - 1];
+
+ // Setup initial node. Here we have a complete permutation of the app's primitives.
+ mPool->mNodePrimitives = mIndices;
+ mPool->mNbPrimitives = nbBoxes;
+
+ // Build the hierarchy
+ local_BuildHierarchy(mPool, boxes, centers, Stats, mPool, limit);
+
+ // Get back total number of nodes
+ mTotalNbNodes = Stats.getCount();
+ }
+
+ PX_FREE(centers);
+ PX_FREE(boxes);
+ return true;
+}
+
+PxU32 AABBTree::walk(WalkingCallback cb, void* userData) const
+{
+ // Call it without callback to compute max depth
+ PxU32 maxDepth = 0;
+ PxU32 currentDepth = 0;
+
+ struct Local
+ {
+ static void _Walk(const AABBTreeNode* current_node, PxU32& max_depth, PxU32& current_depth, WalkingCallback callback, void* userData_)
+ {
+ // Checkings
+ if(!current_node)
+ return;
+ // Entering a new node => increase depth
+ current_depth++;
+ // Keep track of max depth
+ if(current_depth>max_depth)
+ max_depth = current_depth;
+
+ // Callback
+ if(callback && !(callback)(current_node, current_depth, userData_))
+ return;
+
+ // Recurse
+ if(current_node->getPos()) { _Walk(current_node->getPos(), max_depth, current_depth, callback, userData_); current_depth--; }
+ if(current_node->getNeg()) { _Walk(current_node->getNeg(), max_depth, current_depth, callback, userData_); current_depth--; }
+ }
+ };
+ Local::_Walk(mPool, maxDepth, currentDepth, cb, userData);
+ return maxDepth;
+}
+
+
+
+#include "GuBV4_Internal.h"
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+// PT: see http://www.codercorner.com/blog/?p=734
+static PxU32 precomputeNodeSorting(const PxBounds3& box0, const PxBounds3& box1)
+{
+ const PxVec3 C0 = box0.getCenter();
+ const PxVec3 C1 = box1.getCenter();
+
+ PxVec3 dirPPP(1.0f, 1.0f, 1.0f); dirPPP.normalize();
+ PxVec3 dirPPN(1.0f, 1.0f, -1.0f); dirPPN.normalize();
+ PxVec3 dirPNP(1.0f, -1.0f, 1.0f); dirPNP.normalize();
+ PxVec3 dirPNN(1.0f, -1.0f, -1.0f); dirPNN.normalize();
+ PxVec3 dirNPP(-1.0f, 1.0f, 1.0f); dirNPP.normalize();
+ PxVec3 dirNPN(-1.0f, 1.0f, -1.0f); dirNPN.normalize();
+ PxVec3 dirNNP(-1.0f, -1.0f, 1.0f); dirNNP.normalize();
+ PxVec3 dirNNN(-1.0f, -1.0f, -1.0f); dirNNN.normalize();
+
+ const PxVec3 deltaC = C0 - C1;
+ const bool bPPP = deltaC.dot(dirPPP)<0.0f;
+ const bool bPPN = deltaC.dot(dirPPN)<0.0f;
+ const bool bPNP = deltaC.dot(dirPNP)<0.0f;
+ const bool bPNN = deltaC.dot(dirPNN)<0.0f;
+ const bool bNPP = deltaC.dot(dirNPP)<0.0f;
+ const bool bNPN = deltaC.dot(dirNPN)<0.0f;
+ const bool bNNP = deltaC.dot(dirNNP)<0.0f;
+ const bool bNNN = deltaC.dot(dirNNN)<0.0f;
+
+ PxU32 code = 0;
+ if(!bPPP)
+ code |= (1<<7); // Bit 0: PPP
+ if(!bPPN)
+ code |= (1<<6); // Bit 1: PPN
+ if(!bPNP)
+ code |= (1<<5); // Bit 2: PNP
+ if(!bPNN)
+ code |= (1<<4); // Bit 3: PNN
+ if(!bNPP)
+ code |= (1<<3); // Bit 4: NPP
+ if(!bNPN)
+ code |= (1<<2); // Bit 5: NPN
+ if(!bNNP)
+ code |= (1<<1); // Bit 6: NNP
+ if(!bNNN)
+ code |= (1<<0); // Bit 7: NNN
+ return code;
+}
+#endif
+
+#ifdef GU_BV4_USE_SLABS
+ #include "GuBV4_Common.h"
+#endif
+
+static void setEmpty(CenterExtents& box)
+{
+ box.mCenter = PxVec3(0.0f, 0.0f, 0.0f);
+ box.mExtents = PxVec3(-1.0f, -1.0f, -1.0f);
+}
+
+// Data:
+// 1 bit for leaf/no leaf
+// 2 bits for child-node type
+// 8 bits for PNS
+// => 32 - 1 - 2 - 8 = 21 bits left for encoding triangle index or node *offset*
+// => limited to 2.097.152 triangles
+// => and 2Mb-large trees (this one may not work out well in practice)
+// ==> lines marked with //* have been changed to address this. Now we don't store offsets in bytes directly
+// but in BVData indices. There's more work at runtime calculating addresses, but now the format can support
+// 2 million single nodes.
+//
+// That being said we only need 3*8 = 24 bits in total, so that could be only 6 bits in each BVData.
+// For type0: we have 2 nodes, we need 8 bits => 6 bits/node = 12 bits available, ok
+// For type1: we have 3 nodes, we need 8*2 = 16 bits => 6 bits/node = 18 bits available, ok
+// For type2: we have 4 nodes, we need 8*3 = 24 bits => 6 bits/node = 24 bits available, ok
+//#pragma pack(1)
+struct BVData : public physx::shdfnd::UserAllocated
+{
+ BVData();
+ CenterExtents mAABB;
+ size_t mData;
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+ PxU32 mTempPNS;
+#endif
+};
+//#pragma pack()
+
+BVData::BVData() : mData(PX_INVALID_U32)
+{
+ setEmpty(mAABB);
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+ mTempPNS = 0;
+#endif
+}
+
+struct BV4Node : public physx::shdfnd::UserAllocated
+{
+ PX_FORCE_INLINE BV4Node() {}
+ PX_FORCE_INLINE ~BV4Node() {}
+
+ BVData mBVData[4];
+
+ PX_FORCE_INLINE size_t isLeaf(PxU32 i) const { return mBVData[i].mData&1; }
+ PX_FORCE_INLINE PxU32 getPrimitive(PxU32 i) const { return PxU32(mBVData[i].mData>>1); }
+ PX_FORCE_INLINE const BV4Node* getChild(PxU32 i) const { return reinterpret_cast<BV4Node*>(mBVData[i].mData); }
+
+ PxU32 getType() const
+ {
+ PxU32 Nb=0;
+ for(PxU32 i=0;i<4;i++)
+ {
+ if(mBVData[i].mData!=PX_INVALID_U32)
+ Nb++;
+ }
+ return Nb;
+ }
+
+ PxU32 getSize() const
+ {
+ const PxU32 type = getType();
+ return sizeof(BVData)*type;
+ }
+};
+
+#define NB_NODES_PER_SLAB 256
+struct BV4BuildParams
+{
+ PX_FORCE_INLINE BV4BuildParams(float epsilon) : mEpsilon(epsilon)
+#ifdef GU_BV4_USE_NODE_POOLS
+ ,mTop(NULL)
+#endif
+ {}
+ ~BV4BuildParams();
+
+ // Stats
+ PxU32 mNbNodes;
+ PxU32 mStats[4];
+
+ //
+ float mEpsilon;
+
+#ifdef GU_BV4_USE_NODE_POOLS
+ //
+ struct Slab : public physx::shdfnd::UserAllocated
+ {
+ BV4Node mNodes[NB_NODES_PER_SLAB];
+ PxU32 mNbUsedNodes;
+ Slab* mNext;
+ };
+ Slab* mTop;
+
+ BV4Node* allocateNode();
+ void releaseNodes();
+#endif
+};
+
+BV4BuildParams::~BV4BuildParams()
+{
+#ifdef GU_BV4_USE_NODE_POOLS
+ releaseNodes();
+#endif
+}
+
+#ifdef GU_BV4_USE_NODE_POOLS
+BV4Node* BV4BuildParams::allocateNode()
+{
+ if(!mTop || mTop->mNbUsedNodes==NB_NODES_PER_SLAB)
+ {
+ Slab* newSlab = PX_NEW(Slab);
+ newSlab->mNbUsedNodes = 0;
+ newSlab->mNext = mTop;
+ mTop = newSlab;
+ }
+ return &mTop->mNodes[mTop->mNbUsedNodes++];
+}
+
+void BV4BuildParams::releaseNodes()
+{
+ Slab* current = mTop;
+ while(current)
+ {
+ Slab* next = current->mNext;
+ PX_DELETE(current);
+ current = next;
+ }
+ mTop = NULL;
+}
+#endif
+
+static void setPrimitive(const AABBTree& source, BV4Node* node4, PxU32 i, const AABBTreeNode* node, float epsilon)
+{
+ const PxU32 nbPrims = node->getNbPrimitives();
+ PX_ASSERT(nbPrims<16);
+ const PxU32* indexBase = source.getIndices();
+ const PxU32* prims = node->getPrimitives();
+ const PxU32 offset = PxU32(prims - indexBase);
+ for(PxU32 j=0;j<nbPrims;j++)
+ {
+ PX_ASSERT(prims[j] == offset+j);
+ }
+ const PxU32 primitiveIndex = (offset<<4)|(nbPrims&15);
+
+ node4->mBVData[i].mAABB = node->getAABB();
+ if(epsilon!=0.0f)
+ node4->mBVData[i].mAABB.mExtents += PxVec3(epsilon, epsilon, epsilon);
+ node4->mBVData[i].mData = (primitiveIndex<<1)|1;
+}
+
+static BV4Node* setNode(const AABBTree& source, BV4Node* node4, PxU32 i, const AABBTreeNode* node, BV4BuildParams& params)
+{
+ BV4Node* child = NULL;
+ if(node->isLeaf())
+ {
+ setPrimitive(source, node4, i, node, params.mEpsilon);
+ }
+ else
+ {
+ node4->mBVData[i].mAABB = node->getAABB();
+ if(params.mEpsilon!=0.0f)
+ node4->mBVData[i].mAABB.mExtents += PxVec3(params.mEpsilon);
+
+ params.mNbNodes++;
+#ifdef GU_BV4_USE_NODE_POOLS
+ child = params.allocateNode();
+#else
+ child = PX_NEW(BV4Node);
+#endif
+ node4->mBVData[i].mData = size_t(child);
+ }
+ return child;
+}
+
+static void _BuildBV4(const AABBTree& source, BV4Node* tmp, const AABBTreeNode* current_node, BV4BuildParams& params)
+{
+ PX_ASSERT(!current_node->isLeaf());
+
+ // In the regular tree we have current node A, and:
+ // ____A____
+ // P N
+ // __|__ __|__
+ // PP PN NP NN
+ //
+ // For PNS we have:
+ // bit0 to sort P|N
+ // bit1 to sort PP|PN
+ // bit2 to sort NP|NN
+ //
+ // As much as possible we need to preserve the original order in BV4, if we want to reuse the same PNS bits.
+ //
+ // bit0|bit1|bit2 Order 8bits code
+ // 0 0 0 PP PN NP NN 0 1 2 3
+ // 0 0 1 PP PN NN NP 0 1 3 2
+ // 0 1 0 PN PP NP NN 1 0 2 3
+ // 0 1 1 PN PP NN NP 1 0 3 2
+ // 1 0 0 NP NN PP PN 2 3 0 1
+ // 1 0 1 NN NP PP PN 3 2 0 1
+ // 1 1 0 NP NN PN PP 2 3 1 0
+ // 1 1 1 NN NP PN PP 3 2 1 0
+ //
+ // So we can fetch/compute the sequence from the bits, combine it with limitations from the node type, and process the nodes in order. In theory.
+ // 8*8bits => the whole thing fits in a single 64bit register, so we could potentially use a "register LUT" here.
+
+ const AABBTreeNode* P = current_node->getPos();
+ const AABBTreeNode* N = current_node->getNeg();
+
+ const bool PLeaf = P->isLeaf();
+ const bool NLeaf = N->isLeaf();
+
+ if(PLeaf)
+ {
+ if(NLeaf)
+ {
+ // Case 1: P and N are both leaves:
+ // ____A____
+ // P N
+ // => store as (P,N) and keep bit0
+ params.mStats[0]++;
+ // PN leaves => store 2 triangle pointers, lose 50% of node space
+ setPrimitive(source, tmp, 0, P, params.mEpsilon);
+ setPrimitive(source, tmp, 1, N, params.mEpsilon);
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+ tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV);
+#endif
+ }
+ else
+ {
+ // Case 2: P leaf, N no leaf
+ // ____A____
+ // P N
+ // __|__
+ // NP NN
+ // => store as (P,NP,NN), keep bit0 and bit2
+ params.mStats[1]++;
+ // P leaf => store 1 triangle pointers and 2 node pointers
+ // => 3 slots used, 25% wasted
+ setPrimitive(source, tmp, 0, P, params.mEpsilon);
+
+ //
+
+ const AABBTreeNode* NP = N->getPos();
+ const AABBTreeNode* NN = N->getNeg();
+
+//#define NODE_FUSION
+#ifdef NODE_FUSION
+ PxU32 c=0;
+ BV4Node* ChildNP;
+ if(!NP->isLeaf() && NP->getPos()->isLeaf() && NP->getNeg()->isLeaf())
+ {
+ // Drag the terminal leaves directly into this BV4 node, drop internal node NP
+ setPrimitive(source, tmp, 1, NP->getPos(), params.mEpsilon);
+ setPrimitive(source, tmp, 2, NP->getNeg(), params.mEpsilon);
+ ChildNP = NULL;
+ params.mStats[1]--;
+ params.mStats[3]++;
+ c=1;
+ }
+ else
+ {
+ ChildNP = setNode(source, tmp, 1, NP, params);
+ }
+
+ BV4Node* ChildNN;
+ if(c==0 && !NN->isLeaf() && NN->getPos()->isLeaf() && NN->getNeg()->isLeaf())
+ {
+ // Drag the terminal leaves directly into this BV4 node, drop internal node NN
+ setPrimitive(source, tmp, 2, NN->getPos(), params.mEpsilon);
+ setPrimitive(source, tmp, 3, NN->getNeg(), params.mEpsilon);
+ ChildNN = NULL;
+ params.mStats[1]--;
+ params.mStats[3]++;
+ }
+ else
+ {
+ ChildNN = setNode(source, tmp, 2+c, NN, params);
+ }
+
+ //BV4Node* ChildNN = setNode(tmp, 2+c, NN, epsilon, params);
+#else
+ BV4Node* ChildNP = setNode(source, tmp, 1, NP, params);
+ BV4Node* ChildNN = setNode(source, tmp, 2, NN, params);
+#endif
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+ tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV);
+ tmp->mBVData[2].mTempPNS = precomputeNodeSorting(NP->mBV, NN->mBV);
+#endif
+ if(ChildNP)
+ _BuildBV4(source, ChildNP, NP, params);
+ if(ChildNN)
+ _BuildBV4(source, ChildNN, NN, params);
+ }
+ }
+ else
+ {
+ if(NLeaf)
+ {
+ // Case 3: P no leaf, N leaf
+ // ____A____
+ // P N
+ // __|__
+ // PP PN
+ // => store as (PP,PN,N), keep bit0 and bit1
+ params.mStats[2]++;
+
+ // N leaf => store 1 triangle pointers and 2 node pointers
+ // => 3 slots used, 25% wasted
+ setPrimitive(source, tmp, 2, N, params.mEpsilon);
+
+ //
+
+ const AABBTreeNode* PP = P->getPos();
+ const AABBTreeNode* PN = P->getNeg();
+
+ BV4Node* ChildPP = setNode(source, tmp, 0, PP, params);
+ BV4Node* ChildPN = setNode(source, tmp, 1, PN, params);
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+ tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV);
+ tmp->mBVData[1].mTempPNS = precomputeNodeSorting(PP->mBV, PN->mBV);
+#endif
+ if(ChildPP)
+ _BuildBV4(source, ChildPP, PP, params);
+ if(ChildPN)
+ _BuildBV4(source, ChildPN, PN, params);
+ }
+ else
+ {
+ // Case 4: P and N are no leaves:
+ // => store as (PP,PN,NP,NN), keep bit0/bit1/bit2
+ params.mStats[3]++;
+
+ // No leaves => store 4 node pointers
+ const AABBTreeNode* PP = P->getPos();
+ const AABBTreeNode* PN = P->getNeg();
+ const AABBTreeNode* NP = N->getPos();
+ const AABBTreeNode* NN = N->getNeg();
+
+ BV4Node* ChildPP = setNode(source, tmp, 0, PP, params);
+ BV4Node* ChildPN = setNode(source, tmp, 1, PN, params);
+ BV4Node* ChildNP = setNode(source, tmp, 2, NP, params);
+ BV4Node* ChildNN = setNode(source, tmp, 3, NN, params);
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+ tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV);
+ tmp->mBVData[1].mTempPNS = precomputeNodeSorting(PP->mBV, PN->mBV);
+ tmp->mBVData[2].mTempPNS = precomputeNodeSorting(NP->mBV, NN->mBV);
+#endif
+ if(ChildPP)
+ _BuildBV4(source, ChildPP, PP, params);
+ if(ChildPN)
+ _BuildBV4(source, ChildPN, PN, params);
+ if(ChildNP)
+ _BuildBV4(source, ChildNP, NP, params);
+ if(ChildNN)
+ _BuildBV4(source, ChildNN, NN, params);
+ }
+ }
+}
+
+static bool BuildBV4Internal(BV4Tree& tree, const AABBTree& Source, SourceMesh* mesh, float epsilon)
+{
+ if(mesh->getNbTriangles()<=4)
+ return tree.init(mesh, Source.getBV());
+
+ {
+ struct Local
+ {
+ static void _CheckMD(const AABBTreeNode* current_node, PxU32& md, PxU32& cd)
+ {
+ cd++;
+ md = PxMax(md, cd);
+
+ if(current_node->getPos()) { _CheckMD(current_node->getPos(), md, cd); cd--; }
+ if(current_node->getNeg()) { _CheckMD(current_node->getNeg(), md, cd); cd--; }
+ }
+
+ static void _Check(AABBTreeNode* current_node)
+ {
+ if(current_node->isLeaf())
+ return;
+
+ AABBTreeNode* P = const_cast<AABBTreeNode*>(current_node->getPos());
+ AABBTreeNode* N = const_cast<AABBTreeNode*>(current_node->getNeg());
+ {
+ PxU32 MDP = 0; PxU32 CDP = 0; _CheckMD(P, MDP, CDP);
+ PxU32 MDN = 0; PxU32 CDN = 0; _CheckMD(N, MDN, CDN);
+
+ if(MDP>MDN)
+// if(MDP<MDN)
+ {
+ Ps::swap(*P, *N);
+ Ps::swap(P, N);
+ }
+ }
+ _Check(P);
+ _Check(N);
+ }
+ };
+ Local::_Check(const_cast<AABBTreeNode*>(Source.getNodes()));
+ }
+
+ BV4BuildParams Params(epsilon);
+ Params.mNbNodes=1; // Root node
+ Params.mStats[0]=0;
+ Params.mStats[1]=0;
+ Params.mStats[2]=0;
+ Params.mStats[3]=0;
+
+#ifdef GU_BV4_USE_NODE_POOLS
+ BV4Node* Root = Params.allocateNode();
+#else
+ BV4Node* Root = PX_NEW(BV4Node);
+#endif
+ _BuildBV4(Source, Root, Source.getNodes(), Params);
+
+ if(!tree.init(mesh, Source.getBV()))
+ return false;
+ BV4Tree* T = &tree;
+
+ // Version with variable-sized nodes in single stream
+ {
+ struct Local
+ {
+#ifdef GU_BV4_QUANTIZED_TREE
+ #ifdef GU_BV4_USE_SLABS
+ static void _ComputeMaxValues(const BV4Node* current, PxVec3& MinMax, PxVec3& MaxMax)
+ {
+ for(PxU32 i=0;i<4;i++)
+ {
+ if(current->mBVData[i].mData!=PX_INVALID_U32)
+ {
+ const CenterExtents& Box = current->mBVData[i].mAABB;
+ const PxVec3 Min = Box.mCenter - Box.mExtents;
+ const PxVec3 Max = Box.mCenter + Box.mExtents;
+ if(fabsf(Min.x)>MinMax.x) MinMax.x = fabsf(Min.x);
+ if(fabsf(Min.y)>MinMax.y) MinMax.y = fabsf(Min.y);
+ if(fabsf(Min.z)>MinMax.z) MinMax.z = fabsf(Min.z);
+ if(fabsf(Max.x)>MaxMax.x) MaxMax.x = fabsf(Max.x);
+ if(fabsf(Max.y)>MaxMax.y) MaxMax.y = fabsf(Max.y);
+ if(fabsf(Max.z)>MaxMax.z) MaxMax.z = fabsf(Max.z);
+ if(!current->isLeaf(i))
+ {
+ const BV4Node* ChildNode = current->getChild(i);
+ _ComputeMaxValues(ChildNode, MinMax, MaxMax);
+ }
+ }
+ }
+ }
+ #else
+ static void _ComputeMaxValues(const BV4Node* current, PxVec3& CMax, PxVec3& EMax)
+ {
+ for(PxU32 i=0;i<4;i++)
+ {
+ if(current->mBVData[i].mData!=PX_INVALID_U32)
+ {
+ const CenterExtents& Box = current->mBVData[i].mAABB;
+ if(fabsf(Box.mCenter.x)>CMax.x) CMax.x = fabsf(Box.mCenter.x);
+ if(fabsf(Box.mCenter.y)>CMax.y) CMax.y = fabsf(Box.mCenter.y);
+ if(fabsf(Box.mCenter.z)>CMax.z) CMax.z = fabsf(Box.mCenter.z);
+ if(fabsf(Box.mExtents.x)>EMax.x) EMax.x = fabsf(Box.mExtents.x);
+ if(fabsf(Box.mExtents.y)>EMax.y) EMax.y = fabsf(Box.mExtents.y);
+ if(fabsf(Box.mExtents.z)>EMax.z) EMax.z = fabsf(Box.mExtents.z);
+
+ if(!current->isLeaf(i))
+ {
+ const BV4Node* ChildNode = current->getChild(i);
+ _ComputeMaxValues(ChildNode, CMax, EMax);
+ }
+ }
+ }
+ }
+ #endif
+#endif
+
+ static void _Flatten(BVDataPacked* const dest, const PxU32 box_id, PxU32& current_id, const BV4Node* current, PxU32& max_depth, PxU32& current_depth
+#ifdef GU_BV4_QUANTIZED_TREE
+ , const PxVec3& CQuantCoeff, const PxVec3& EQuantCoeff,
+ const PxVec3& mCenterCoeff, const PxVec3& mExtentsCoeff
+#endif
+ )
+ {
+ // Entering a new node => increase depth
+ current_depth++;
+ // Keep track of max depth
+ if(current_depth>max_depth)
+ max_depth = current_depth;
+
+// dest[box_id] = *current;
+ const PxU32 CurrentType = current->getType();
+ for(PxU32 i=0;i<CurrentType;i++)
+ {
+#ifdef GU_BV4_QUANTIZED_TREE
+ const CenterExtents& Box = current->mBVData[i].mAABB;
+ #ifdef GU_BV4_USE_SLABS
+ const PxVec3 m = Box.mCenter - Box.mExtents;
+ const PxVec3 M = Box.mCenter + Box.mExtents;
+
+ dest[box_id+i].mAABB.mData[0].mCenter = PxI16(m.x * CQuantCoeff.x);
+ dest[box_id+i].mAABB.mData[1].mCenter = PxI16(m.y * CQuantCoeff.y);
+ dest[box_id+i].mAABB.mData[2].mCenter = PxI16(m.z * CQuantCoeff.z);
+ dest[box_id+i].mAABB.mData[0].mExtents = PxU16(PxI16(M.x * EQuantCoeff.x));
+ dest[box_id+i].mAABB.mData[1].mExtents = PxU16(PxI16(M.y * EQuantCoeff.y));
+ dest[box_id+i].mAABB.mData[2].mExtents = PxU16(PxI16(M.z * EQuantCoeff.z));
+
+ if(1)
+ {
+ for(PxU32 j=0;j<3;j++)
+ {
+ // Dequantize the min/max
+// const float qmin = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j];
+// const float qmax = float(PxI16(dest[box_id+i].mAABB.mData[j].mExtents)) * mExtentsCoeff[j];
+ // Compare real & dequantized values
+/* if(qmax<M[j] || qmin>m[j])
+ {
+ int stop=1;
+ }*/
+ bool CanLeave;
+ do
+ {
+ CanLeave=true;
+ const float qmin = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j];
+ const float qmax = float(PxI16(dest[box_id+i].mAABB.mData[j].mExtents)) * mExtentsCoeff[j];
+
+ if(qmax<M[j])
+ {
+// if(dest[box_id+i].mAABB.mData[j].mExtents!=0xffff)
+ if(dest[box_id+i].mAABB.mData[j].mExtents!=0x7fff)
+ {
+ dest[box_id+i].mAABB.mData[j].mExtents++;
+ CanLeave = false;
+ }
+ }
+ if(qmin>m[j])
+ {
+ if(dest[box_id+i].mAABB.mData[j].mCenter)
+ {
+ dest[box_id+i].mAABB.mData[j].mCenter--;
+ CanLeave = false;
+ }
+ }
+ }while(!CanLeave);
+ }
+ }
+ #else
+ dest[box_id+i].mAABB.mData[0].mCenter = PxI16(Box.mCenter.x * CQuantCoeff.x);
+ dest[box_id+i].mAABB.mData[1].mCenter = PxI16(Box.mCenter.y * CQuantCoeff.y);
+ dest[box_id+i].mAABB.mData[2].mCenter = PxI16(Box.mCenter.z * CQuantCoeff.z);
+ dest[box_id+i].mAABB.mData[0].mExtents = PxU16(Box.mExtents.x * EQuantCoeff.x);
+ dest[box_id+i].mAABB.mData[1].mExtents = PxU16(Box.mExtents.y * EQuantCoeff.y);
+ dest[box_id+i].mAABB.mData[2].mExtents = PxU16(Box.mExtents.z * EQuantCoeff.z);
+
+ // Fix quantized boxes
+ if(1)
+ {
+ // Make sure the quantized box is still valid
+ const PxVec3 Max = Box.mCenter + Box.mExtents;
+ const PxVec3 Min = Box.mCenter - Box.mExtents;
+ // For each axis
+ for(PxU32 j=0;j<3;j++)
+ { // Dequantize the box center
+ const float qc = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j];
+ bool FixMe=true;
+ do
+ { // Dequantize the box extent
+ const float qe = float(dest[box_id+i].mAABB.mData[j].mExtents) * mExtentsCoeff[j];
+ // Compare real & dequantized values
+ if(qc+qe<Max[j] || qc-qe>Min[j]) dest[box_id+i].mAABB.mData[j].mExtents++;
+ else FixMe=false;
+ // Prevent wrapping
+ if(!dest[box_id+i].mAABB.mData[j].mExtents)
+ {
+ dest[box_id+i].mAABB.mData[j].mExtents=0xffff;
+ FixMe=false;
+ }
+ }while(FixMe);
+ }
+ }
+ #endif
+#else
+ #ifdef GU_BV4_USE_SLABS
+ // Compute min & max right here. Store temp as Center/Extents = Min/Max
+ const CenterExtents& Box = current->mBVData[i].mAABB;
+ dest[box_id+i].mAABB.mCenter = Box.mCenter - Box.mExtents;
+ dest[box_id+i].mAABB.mExtents = Box.mCenter + Box.mExtents;
+ #else
+ dest[box_id+i].mAABB = current->mBVData[i].mAABB;
+ #endif
+#endif
+ dest[box_id+i].mData = PxU32(current->mBVData[i].mData);
+// dest[box_id+i].encodePNS(current->mBVData[i].mTempPNS);
+ }
+
+ PxU32 NbToGo=0;
+ PxU32 NextIDs[4] = {PX_INVALID_U32, PX_INVALID_U32, PX_INVALID_U32, PX_INVALID_U32};
+ const BV4Node* ChildNodes[4] = {NULL,NULL,NULL,NULL};
+
+ BVDataPacked* data = dest+box_id;
+ for(PxU32 i=0;i<4;i++)
+ {
+ if(current->mBVData[i].mData!=PX_INVALID_U32 && !current->isLeaf(i))
+ {
+ const BV4Node* ChildNode = current->getChild(i);
+
+ const PxU32 NextID = current_id;
+#ifdef GU_BV4_USE_SLABS
+ current_id += 4;
+#else
+ const PxU32 ChildSize = ChildNode->getType();
+ current_id += ChildSize;
+#endif
+ const PxU32 ChildType = (ChildNode->getType()-2)<<1;
+ data[i].mData = size_t(ChildType+(NextID<<GU_BV4_CHILD_OFFSET_SHIFT_COUNT));
+ //PX_ASSERT(data[i].mData == size_t(ChildType+(NextID<<3)));
+
+ NextIDs[NbToGo] = NextID;
+ ChildNodes[NbToGo] = ChildNode;
+ NbToGo++;
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+ data[i].encodePNS(current->mBVData[i].mTempPNS);
+#endif
+//#define DEPTH_FIRST
+#ifdef DEPTH_FIRST
+ _Flatten(dest, NextID, current_id, ChildNode, max_depth, current_depth
+ #ifdef GU_BV4_QUANTIZED_TREE
+ , CQuantCoeff, EQuantCoeff, mCenterCoeff, mExtentsCoeff
+ #endif
+ );
+ current_depth--;
+#endif
+ }
+#ifdef GU_BV4_USE_SLABS
+ if(current->mBVData[i].mData==PX_INVALID_U32)
+ {
+ #ifdef GU_BV4_QUANTIZED_TREE
+ data[i].mAABB.mData[0].mExtents = 0;
+ data[i].mAABB.mData[1].mExtents = 0;
+ data[i].mAABB.mData[2].mExtents = 0;
+ data[i].mAABB.mData[0].mCenter = 0;
+ data[i].mAABB.mData[1].mCenter = 0;
+ data[i].mAABB.mData[2].mCenter = 0;
+ #else
+ data[i].mAABB.mCenter = PxVec3(0.0f);
+ data[i].mAABB.mExtents = PxVec3(0.0f);
+ #endif
+ data[i].mData = PX_INVALID_U32;
+ }
+#endif
+ }
+
+#ifndef DEPTH_FIRST
+ for(PxU32 i=0;i<NbToGo;i++)
+ {
+ _Flatten(dest, NextIDs[i], current_id, ChildNodes[i], max_depth, current_depth
+ #ifdef GU_BV4_QUANTIZED_TREE
+ , CQuantCoeff, EQuantCoeff, mCenterCoeff, mExtentsCoeff
+ #endif
+ );
+ current_depth--;
+ }
+#endif
+#ifndef GU_BV4_USE_NODE_POOLS
+ DELETESINGLE(current);
+#endif
+ }
+ };
+
+ const PxU32 NbSingleNodes = Params.mStats[0]*2+(Params.mStats[1]+Params.mStats[2])*3+Params.mStats[3]*4;
+
+ PxU32 CurID = Root->getType();
+ PxU32 InitData = PX_INVALID_U32;
+#ifdef GU_BV4_USE_SLABS
+ PX_UNUSED(NbSingleNodes);
+ const PxU32 NbNeeded = (Params.mStats[0]+Params.mStats[1]+Params.mStats[2]+Params.mStats[3])*4;
+ BVDataPacked* Nodes = reinterpret_cast<BVDataPacked*>(PX_ALLOC(sizeof(BVDataPacked)*NbNeeded, "BV4 nodes")); // PT: PX_NEW breaks alignment here
+// BVDataPacked* Nodes = PX_NEW(BVDataPacked)[NbNeeded];
+
+ if(CurID==2)
+ {
+ InitData = 0;
+ }
+ else if(CurID==3)
+ {
+ InitData = 2;
+ }
+ else if(CurID==4)
+ {
+ InitData = 4;
+ }
+
+ CurID = 4;
+// PxU32 CurID = 4;
+// PxU32 InitData = 4;
+#else
+ BVDataPacked* Nodes = PX_NEW(BVDataPacked)[NbSingleNodes];
+
+ if(CurID==2)
+ {
+ InitData = 0;
+ }
+ else if(CurID==3)
+ {
+ InitData = 2;
+ }
+ else if(CurID==4)
+ {
+ InitData = 4;
+ }
+#endif
+
+ T->mInitData = InitData;
+ PxU32 MaxDepth = 0;
+ PxU32 CurrentDepth = 0;
+#ifdef GU_BV4_QUANTIZED_TREE
+ #ifdef GU_BV4_USE_SLABS
+ PxVec3 MinQuantCoeff, MaxQuantCoeff;
+ {
+ // Get max values
+ PxVec3 MinMax(-FLT_MAX);
+ PxVec3 MaxMax(-FLT_MAX);
+ Local::_ComputeMaxValues(Root, MinMax, MaxMax);
+
+ const PxU32 nbm=15;
+
+ // Compute quantization coeffs
+ const float MinCoeff = float((1<<nbm)-1);
+ const float MaxCoeff = float((1<<nbm)-1);
+ MinQuantCoeff.x = MinMax.x!=0.0f ? MinCoeff/MinMax.x : 0.0f;
+ MinQuantCoeff.y = MinMax.y!=0.0f ? MinCoeff/MinMax.y : 0.0f;
+ MinQuantCoeff.z = MinMax.z!=0.0f ? MinCoeff/MinMax.z : 0.0f;
+ MaxQuantCoeff.x = MaxMax.x!=0.0f ? MaxCoeff/MaxMax.x : 0.0f;
+ MaxQuantCoeff.y = MaxMax.y!=0.0f ? MaxCoeff/MaxMax.y : 0.0f;
+ MaxQuantCoeff.z = MaxMax.z!=0.0f ? MaxCoeff/MaxMax.z : 0.0f;
+ // Compute and save dequantization coeffs
+ T->mCenterOrMinCoeff.x = MinMax.x/MinCoeff;
+ T->mCenterOrMinCoeff.y = MinMax.y/MinCoeff;
+ T->mCenterOrMinCoeff.z = MinMax.z/MinCoeff;
+ T->mExtentsOrMaxCoeff.x = MaxMax.x/MaxCoeff;
+ T->mExtentsOrMaxCoeff.y = MaxMax.y/MaxCoeff;
+ T->mExtentsOrMaxCoeff.z = MaxMax.z/MaxCoeff;
+ }
+ Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth, MinQuantCoeff, MaxQuantCoeff, T->mCenterOrMinCoeff, T->mExtentsOrMaxCoeff);
+ #else
+ PxVec3 CQuantCoeff, EQuantCoeff;
+ {
+ // Get max values
+ PxVec3 CMax(-FLT_MAX);
+ PxVec3 EMax(-FLT_MAX);
+ Local::_ComputeMaxValues(Root, CMax, EMax);
+
+ const PxU32 nbc=15;
+ const PxU32 nbe=16;
+// const PxU32 nbc=7;
+// const PxU32 nbe=8;
+
+ const float UnitQuantError = 2.0f/65535.0f;
+ EMax.x += CMax.x*UnitQuantError;
+ EMax.y += CMax.y*UnitQuantError;
+ EMax.z += CMax.z*UnitQuantError;
+
+ // Compute quantization coeffs
+ const float CCoeff = float((1<<nbc)-1);
+ CQuantCoeff.x = CMax.x!=0.0f ? CCoeff/CMax.x : 0.0f;
+ CQuantCoeff.y = CMax.y!=0.0f ? CCoeff/CMax.y : 0.0f;
+ CQuantCoeff.z = CMax.z!=0.0f ? CCoeff/CMax.z : 0.0f;
+ const float ECoeff = float((1<<nbe)-32);
+ EQuantCoeff.x = EMax.x!=0.0f ? ECoeff/EMax.x : 0.0f;
+ EQuantCoeff.y = EMax.y!=0.0f ? ECoeff/EMax.y : 0.0f;
+ EQuantCoeff.z = EMax.z!=0.0f ? ECoeff/EMax.z : 0.0f;
+ // Compute and save dequantization coeffs
+ T->mCenterOrMinCoeff.x = CMax.x/CCoeff;
+ T->mCenterOrMinCoeff.y = CMax.y/CCoeff;
+ T->mCenterOrMinCoeff.z = CMax.z/CCoeff;
+ T->mExtentsOrMaxCoeff.x = EMax.x/ECoeff;
+ T->mExtentsOrMaxCoeff.y = EMax.y/ECoeff;
+ T->mExtentsOrMaxCoeff.z = EMax.z/ECoeff;
+ }
+ Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth, CQuantCoeff, EQuantCoeff, T->mCenterOrMinCoeff, T->mExtentsOrMaxCoeff);
+ #endif
+#else
+ Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth);
+#endif
+
+#ifdef GU_BV4_USE_NODE_POOLS
+ Params.releaseNodes();
+#endif
+
+#ifdef GU_BV4_USE_SLABS
+ {
+ PX_ASSERT(sizeof(BVDataSwizzled)==sizeof(BVDataPacked)*4);
+ BVDataPacked* Copy = PX_NEW(BVDataPacked)[NbNeeded];
+ memcpy(Copy, Nodes, sizeof(BVDataPacked)*NbNeeded);
+ for(PxU32 i=0;i<NbNeeded/4;i++)
+ {
+ const BVDataPacked* Src = Copy + i*4;
+ BVDataSwizzled* Dst = reinterpret_cast<BVDataSwizzled*>(Nodes + i*4);
+ for(PxU32 j=0;j<4;j++)
+ {
+ // We previously stored m/M within c/e so we just need to swizzle now
+ #ifdef GU_BV4_QUANTIZED_TREE
+ const QuantizedAABB& Box = Src[j].mAABB;
+ Dst->mX[j].mMin = Box.mData[0].mCenter;
+ Dst->mY[j].mMin = Box.mData[1].mCenter;
+ Dst->mZ[j].mMin = Box.mData[2].mCenter;
+ Dst->mX[j].mMax = PxI16(Box.mData[0].mExtents);
+ Dst->mY[j].mMax = PxI16(Box.mData[1].mExtents);
+ Dst->mZ[j].mMax = PxI16(Box.mData[2].mExtents);
+ #else
+ const CenterExtents& Box = Src[j].mAABB;
+ Dst->mMinX[j] = Box.mCenter.x;
+ Dst->mMinY[j] = Box.mCenter.y;
+ Dst->mMinZ[j] = Box.mCenter.z;
+ Dst->mMaxX[j] = Box.mExtents.x;
+ Dst->mMaxY[j] = Box.mExtents.y;
+ Dst->mMaxZ[j] = Box.mExtents.z;
+ #endif
+ Dst->mData[j] = Src[j].mData;
+ }
+ }
+ DELETEARRAY(Copy);
+ }
+ T->mNbNodes = NbNeeded;
+#else
+ PX_ASSERT(CurID==NbSingleNodes);
+ T->mNbNodes = NbSingleNodes;
+#endif
+ T->mNodes = Nodes;
+ }
+ return true;
+}
+
+/////
+
+struct ReorderData
+{
+ const SourceMesh* mMesh;
+ PxU32* mOrder;
+ PxU32 mNbTrisPerLeaf;
+ PxU32 mIndex;
+ PxU32 mNbTris;
+ PxU32 mStats[16];
+};
+static bool gReorderCallback(const AABBTreeNode* current, PxU32 /*depth*/, void* userData)
+{
+ ReorderData* Data = reinterpret_cast<ReorderData*>(userData);
+ if(current->isLeaf())
+ {
+ const PxU32 n = current->getNbPrimitives();
+ PX_ASSERT(n<=Data->mNbTrisPerLeaf);
+ Data->mStats[n]++;
+ PxU32* Prims = const_cast<PxU32*>(current->getPrimitives());
+
+ for(PxU32 i=0;i<n;i++)
+ {
+ PX_ASSERT(Prims[i]<Data->mNbTris);
+ Data->mOrder[Data->mIndex] = Prims[i];
+ PX_ASSERT(Data->mIndex<Data->mNbTris);
+ Prims[i] = Data->mIndex;
+ Data->mIndex++;
+ }
+ }
+ return true;
+}
+
+bool physx::Gu::BuildBV4Ex(BV4Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf)
+{
+ const PxU32 nbTris = mesh.mNbTris;
+
+ AABBTree Source;
+ if(!Source.buildFromMesh(mesh, nbTrisPerLeaf))
+ return false;
+
+ {
+ PxU32* order = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbTris, "BV4"));
+ ReorderData RD;
+ RD.mMesh = &mesh;
+ RD.mOrder = order;
+ RD.mNbTrisPerLeaf = nbTrisPerLeaf;
+ RD.mIndex = 0;
+ RD.mNbTris = nbTris;
+ for(PxU32 i=0;i<16;i++)
+ RD.mStats[i] = 0;
+ Source.walk(gReorderCallback, &RD);
+ PX_ASSERT(RD.mIndex==nbTris);
+ mesh.remapTopology(order);
+ PX_FREE(order);
+// for(PxU32 i=0;i<16;i++)
+// printf("%d: %d\n", i, RD.mStats[i]);
+ }
+
+ if(mesh.getNbTriangles()<=nbTrisPerLeaf)
+ return tree.init(&mesh, Source.getBV());
+
+ return BuildBV4Internal(tree, Source, &mesh, epsilon);
+}