aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp')
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp1154
1 files changed, 1154 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp
new file mode 100644
index 00000000..191344fe
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp
@@ -0,0 +1,1154 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "SqAABBTree.h"
+#include "SqAABBTreeUpdateMap.h"
+
+#include "PsMathUtils.h"
+#include "PsFoundation.h"
+#include "GuInternal.h"
+
+using namespace physx;
+using namespace Sq;
+
+#define INVALID_ID 0xffffffff
+
+// Progressive building
+class Sq::FIFOStack : public Ps::UserAllocated
+{
+ public:
+ FIFOStack() : mStack(PX_DEBUG_EXP("SQFIFOStack")), mCurIndex(0) {}
+ ~FIFOStack() {}
+
+ PX_FORCE_INLINE PxU32 getNbEntries() const { return mStack.size(); }
+ PX_FORCE_INLINE void push(AABBTreeBuildNode* entry) { mStack.pushBack(entry); }
+ bool pop(AABBTreeBuildNode*& entry);
+ private:
+ Ps::Array<AABBTreeBuildNode*> mStack;
+ PxU32 mCurIndex; //!< Current index within the container
+};
+
+bool Sq::FIFOStack::pop(AABBTreeBuildNode*& entry)
+{
+ const PxU32 NbEntries = mStack.size(); // Get current number of entries
+ if(!NbEntries)
+ return false; // Can be NULL when no value has been pushed. This is an invalid pop call.
+ entry = mStack[mCurIndex++]; // Get oldest entry, move to next one
+ if(mCurIndex==NbEntries)
+ {
+ // All values have been poped
+ mStack.clear();
+ mCurIndex=0;
+ }
+ return true;
+}
+//~Progressive building
+
+NodeAllocator::NodeAllocator() : mPool(NULL), mCurrentSlabIndex(0), mTotalNbNodes(0)
+{
+}
+
+NodeAllocator::~NodeAllocator()
+{
+ release();
+}
+
+void NodeAllocator::release()
+{
+ const PxU32 nbSlabs = mSlabs.size();
+ for(PxU32 i=0;i<nbSlabs;i++)
+ {
+ Slab& s = mSlabs[i];
+ PX_DELETE_ARRAY(s.mPool);
+ }
+
+ mSlabs.reset();
+ mCurrentSlabIndex = 0;
+ mTotalNbNodes = 0;
+}
+
+void NodeAllocator::init(PxU32 nbPrimitives, PxU32 limit)
+{
+ const PxU32 maxSize = nbPrimitives*2 - 1; // PT: max possible #nodes for a complete tree
+ const PxU32 estimatedFinalSize = maxSize<=1024 ? maxSize : maxSize/limit;
+ mPool = PX_NEW(AABBTreeBuildNode)[estimatedFinalSize];
+ PxMemZero(mPool, sizeof(AABBTreeBuildNode)*estimatedFinalSize);
+
+ // Setup initial node. Here we have a complete permutation of the app's primitives.
+ mPool->mNodeIndex = 0;
+ mPool->mNbPrimitives = nbPrimitives;
+
+ mSlabs.pushBack(Slab(mPool, 1, estimatedFinalSize));
+ mCurrentSlabIndex = 0;
+ mTotalNbNodes = 1;
+}
+
+// PT: TODO: inline this?
+AABBTreeBuildNode* NodeAllocator::getBiNode()
+{
+ mTotalNbNodes += 2;
+ Slab& currentSlab = mSlabs[mCurrentSlabIndex];
+ if(currentSlab.mNbUsedNodes+2<=currentSlab.mMaxNbNodes)
+ {
+ AABBTreeBuildNode* biNode = currentSlab.mPool + currentSlab.mNbUsedNodes;
+ currentSlab.mNbUsedNodes += 2;
+ return biNode;
+ }
+ else
+ {
+ // Allocate new slab
+ const PxU32 size = 1024;
+ AABBTreeBuildNode* pool = PX_NEW(AABBTreeBuildNode)[size];
+ PxMemZero(pool, sizeof(AABBTreeBuildNode)*size);
+
+ mSlabs.pushBack(Slab(pool, 2, size));
+ mCurrentSlabIndex++;
+ return pool;
+ }
+}
+
+void NodeAllocator::flatten(AABBTreeRuntimeNode* dest)
+{
+ // PT: gathers all build nodes allocated so far and flatten them to a linear destination array of smaller runtime nodes
+ PxU32 offset = 0;
+ const PxU32 nbSlabs = mSlabs.size();
+ for(PxU32 s=0;s<nbSlabs;s++)
+ {
+ const Slab& currentSlab = mSlabs[s];
+
+ AABBTreeBuildNode* pool = currentSlab.mPool;
+ for(PxU32 i=0;i<currentSlab.mNbUsedNodes;i++)
+ {
+ dest[offset].mBV = pool[i].mBV;
+ if(pool[i].isLeaf())
+ {
+ const PxU32 index = pool[i].mNodeIndex;
+
+ const PxU32 nbPrims = pool[i].getNbPrimitives();
+ PX_ASSERT(nbPrims<=16);
+
+ dest[offset].mData = (index<<5)|((nbPrims&15)<<1)|1;
+ }
+ else
+ {
+ PX_ASSERT(pool[i].mPos);
+ PxU32 localNodeIndex = 0xffffffff;
+ PxU32 nodeBase = 0;
+ for(PxU32 j=0;j<nbSlabs;j++)
+ {
+ if(pool[i].mPos>=mSlabs[j].mPool && pool[i].mPos<mSlabs[j].mPool+mSlabs[j].mNbUsedNodes)
+ {
+ localNodeIndex = PxU32(pool[i].mPos - mSlabs[j].mPool);
+ break;
+ }
+ nodeBase += mSlabs[j].mNbUsedNodes;
+ }
+ const PxU32 nodeIndex = nodeBase + localNodeIndex;
+ PX_ASSERT(nodeIndex<mTotalNbNodes);
+ dest[offset].mData = nodeIndex<<1;
+ }
+ offset++;
+ }
+ }
+ PX_ASSERT(offset==mTotalNbNodes);
+ release();
+}
+
+static PX_FORCE_INLINE float getSplittingValue(const PxBounds3& global_box, PxU32 axis)
+{
+ // Default split value = middle of the axis (using only the box)
+ return global_box.getCenter(axis);
+}
+
+static PxU32 split(const PxBounds3& box, PxU32 nb, PxU32* const PX_RESTRICT prims, PxU32 axis, const AABBTreeBuildParams& params)
+{
+ // Get node split value
+ const float splitValue = getSplittingValue(box, axis);
+
+ PxU32 nbPos = 0;
+ // Loop through all node-related primitives. Their indices range from "mNodePrimitives[0]" to "mNodePrimitives[mNbPrimitives-1]",
+ // with mNodePrimitives = mIndices + mNodeIndex (i.e. those indices map the global list in the tree params).
+
+ // PT: to avoid calling the unsafe [] operator
+ const size_t ptrValue = size_t(params.mCache) + axis*sizeof(float);
+ const PxVec3* /*PX_RESTRICT*/ cache = reinterpret_cast<const PxVec3*>(ptrValue);
+
+ for(PxU32 i=0;i<nb;i++)
+ {
+ // Get index in global list
+ const PxU32 index = prims[i];
+
+ // Test against the splitting value. The primitive value is tested against the enclosing-box center.
+ // [We only need an approximate partition of the enclosing box here.]
+ const float primitiveValue = cache[index].x;
+ PX_ASSERT(primitiveValue==params.mCache[index][axis]);
+
+ // Reorganize the list of indices in this order: positive - negative.
+ if(primitiveValue > splitValue)
+ {
+ // Swap entries
+ prims[i] = prims[nbPos];
+ prims[nbPos] = index;
+ // Count primitives assigned to positive space
+ nbPos++;
+ }
+ }
+ return nbPos;
+}
+
+void AABBTreeBuildNode::subdivide(const AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices)
+{
+ PxU32* const PX_RESTRICT primitives = indices + mNodeIndex;
+ const PxU32 nbPrims = mNbPrimitives;
+
+ // Compute global box & means for current node. The box is stored in mBV.
+ Vec4V meansV;
+ {
+ const PxBounds3* PX_RESTRICT boxes = params.mAABBArray;
+ PX_ASSERT(boxes);
+ PX_ASSERT(primitives);
+ PX_ASSERT(nbPrims);
+
+ Vec4V minV = V4LoadU(&boxes[primitives[0]].minimum.x);
+ Vec4V maxV = V4LoadU(&boxes[primitives[0]].maximum.x);
+
+ meansV = V4LoadU(&params.mCache[primitives[0]].x);
+
+ for(PxU32 i=1;i<nbPrims;i++)
+ {
+ const PxU32 index = primitives[i];
+ const Vec4V curMinV = V4LoadU(&boxes[index].minimum.x);
+ const Vec4V curMaxV = V4LoadU(&boxes[index].maximum.x);
+ meansV = V4Add(meansV, V4LoadU(&params.mCache[index].x));
+ minV = V4Min(minV, curMinV);
+ maxV = V4Max(maxV, curMaxV);
+ }
+
+ StoreBounds(mBV, minV, maxV);
+
+ const float coeff = 1.0f/float(nbPrims);
+ meansV = V4Scale(meansV, FLoad(coeff));
+ }
+
+ // Check the user-defined limit. Also ensures we stop subdividing if we reach a leaf node.
+ if(nbPrims<=params.mLimit)
+ return;
+
+ bool validSplit = true;
+ PxU32 nbPos;
+ {
+ // Compute variances
+ Vec4V varsV = V4Zero();
+ for(PxU32 i=0;i<nbPrims;i++)
+ {
+ const PxU32 index = primitives[i];
+ Vec4V centerV = V4LoadU(&params.mCache[index].x);
+ centerV = V4Sub(centerV, meansV);
+ centerV = V4Mul(centerV, centerV);
+ varsV = V4Add(varsV, centerV);
+ }
+ const float coeffNb1 = 1.0f/float(nbPrims-1);
+ varsV = V4Scale(varsV, FLoad(coeffNb1));
+ PX_ALIGN(16, PxVec4) vars;
+ V4StoreA(varsV, &vars.x);
+
+ // Choose axis with greatest variance
+ const PxU32 axis = Ps::largestAxis(PxVec3(vars.x, vars.y, vars.z));
+
+ // Split along the axis
+ nbPos = split(mBV, nbPrims, primitives, axis, params);
+
+ // Check split validity
+ if(!nbPos || nbPos==nbPrims)
+ validSplit = false;
+ }
+
+ // Check the subdivision has been successful
+ if(!validSplit)
+ {
+ // Here, all boxes lie in the same sub-space. Two strategies:
+ // - if we are over the split limit, make an arbitrary 50-50 split
+ // - else stop subdividing
+ if(nbPrims>params.mLimit)
+ {
+ nbPos = nbPrims>>1;
+ }
+ else return;
+ }
+
+ // Now create children and assign their pointers.
+ mPos = allocator.getBiNode();
+
+ stats.increaseCount(2);
+
+ // Assign children
+ PX_ASSERT(!isLeaf());
+ AABBTreeBuildNode* Pos = const_cast<AABBTreeBuildNode*>(mPos);
+ AABBTreeBuildNode* Neg = Pos + 1;
+ Pos->mNodeIndex = mNodeIndex;
+ Pos->mNbPrimitives = nbPos;
+ Neg->mNodeIndex = mNodeIndex + nbPos;
+ Neg->mNbPrimitives = mNbPrimitives - nbPos;
+}
+
+void AABBTreeBuildNode::_buildHierarchy(AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices)
+{
+ // Subdivide current node
+ subdivide(params, stats, nodeBase, indices);
+
+ // Recurse
+ if(!isLeaf())
+ {
+ AABBTreeBuildNode* Pos = const_cast<AABBTreeBuildNode*>(getPos());
+ PX_ASSERT(Pos);
+ AABBTreeBuildNode* Neg = Pos + 1;
+ Pos->_buildHierarchy(params, stats, nodeBase, indices);
+ Neg->_buildHierarchy(params, stats, nodeBase, indices);
+ }
+
+ stats.mTotalPrims += mNbPrimitives;
+}
+
+AABBTree::AABBTree() :
+ mIndices (NULL),
+ mNbIndices (0),
+ mRuntimePool (NULL),
+ mParentIndices (NULL),
+ mTotalNbNodes (0),
+ mTotalPrims (0)
+{
+// Progressive building
+ mStack = NULL;
+//~Progressive building
+
+// REFIT
+ mRefitHighestSetWord = 0;
+//~REFIT
+}
+
+AABBTree::~AABBTree()
+{
+ release(false);
+}
+
+void AABBTree::release(bool clearRefitMap)
+{
+// Progressive building
+ PX_DELETE_AND_RESET(mStack);
+//~Progressive building
+ PX_FREE_AND_RESET(mParentIndices);
+ PX_DELETE_ARRAY(mRuntimePool);
+ mNodeAllocator.release();
+ PX_FREE_AND_RESET(mIndices);
+ mTotalNbNodes = 0;
+ mNbIndices = 0;
+
+// REFIT
+ if(clearRefitMap)
+ mRefitBitmask.clearAll();
+ mRefitHighestSetWord = 0;
+//~REFIT
+}
+
+// Initialize nodes/indices from the input tree merge data
+void AABBTree::initTree(const AABBTreeMergeData& tree)
+{
+ PX_ASSERT(mIndices == NULL);
+ PX_ASSERT(mRuntimePool == NULL);
+ PX_ASSERT(mParentIndices == NULL);
+
+ // allocate,copy indices
+ mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*tree.mNbIndices, "AABB tree indices"));
+ mNbIndices = tree.mNbIndices;
+ PxMemCopy(mIndices, tree.mIndices, sizeof(PxU32)*tree.mNbIndices);
+
+ // allocate,copy nodes
+ mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[tree.mNbNodes];
+ mTotalNbNodes = tree.mNbNodes;
+ PxMemCopy(mRuntimePool, tree.mNodes, sizeof(AABBTreeRuntimeNode)*tree.mNbNodes);
+}
+
+// Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool
+void AABBTree::shiftIndices(PxU32 offset)
+{
+ for (PxU32 i = 0; i < mNbIndices; i++)
+ {
+ mIndices[i] += offset;
+ }
+}
+
+bool AABBTree::buildInit(AABBTreeBuildParams& params, BuildStats& stats)
+{
+ // Checkings
+ const PxU32 nbPrimitives = params.mNbPrimitives;
+ if(!nbPrimitives)
+ return false;
+
+ // Release previous tree
+ release();
+
+ // Init stats
+ stats.setCount(1);
+
+ // Initialize indices. This list will be modified during build.
+ mNbIndices = nbPrimitives;
+ mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbPrimitives, "AABB tree indices"));
+ // Identity permutation
+ for(PxU32 i=0;i<nbPrimitives;i++)
+ mIndices[i] = i;
+
+ // Allocate a pool of nodes
+ mNodeAllocator.init(nbPrimitives, params.mLimit);
+
+ // Compute box centers only once and cache them
+ params.mCache = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*(nbPrimitives+1), "cache"));
+ const float half = 0.5f;
+ const FloatV halfV = FLoad(half);
+ for(PxU32 i=0;i<nbPrimitives;i++)
+ {
+ const Vec4V curMinV = V4LoadU(&params.mAABBArray[i].minimum.x);
+ const Vec4V curMaxV = V4LoadU(&params.mAABBArray[i].maximum.x);
+ const Vec4V centerV = V4Scale(V4Add(curMaxV, curMinV), halfV);
+ V4StoreU(centerV, &params.mCache[i].x);
+ }
+ return true;
+}
+
+void AABBTree::buildEnd(AABBTreeBuildParams& params, BuildStats& stats)
+{
+ PX_FREE_AND_RESET(params.mCache);
+ // Get back total number of nodes
+ mTotalNbNodes = stats.getCount();
+ mTotalPrims = stats.mTotalPrims;
+
+ mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes];
+ PX_ASSERT(mTotalNbNodes==mNodeAllocator.mTotalNbNodes);
+ mNodeAllocator.flatten(mRuntimePool);
+}
+
+bool AABBTree::build(AABBTreeBuildParams& params)
+{
+ // Init stats
+ BuildStats stats;
+ if(!buildInit(params, stats))
+ return false;
+
+ // Build the hierarchy
+ mNodeAllocator.mPool->_buildHierarchy(params, stats, mNodeAllocator, mIndices);
+
+ buildEnd(params, stats);
+ return true;
+}
+
+void AABBTree::shiftOrigin(const PxVec3& shift)
+{
+ AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+ const PxU32 totalNbNodes = mTotalNbNodes;
+ for(PxU32 i=0; i<totalNbNodes; i++)
+ {
+ AABBTreeRuntimeNode& current = nodeBase[i];
+ if((i+1) < totalNbNodes)
+ Ps::prefetch(nodeBase + i + 1);
+
+ current.mBV.minimum -= shift;
+ current.mBV.maximum -= shift;
+ }
+}
+
+#if PX_DEBUG
+void AABBTree::validate() const
+{
+}
+#endif
+
+// Progressive building
+static PxU32 incrementalBuildHierarchy(FIFOStack& stack, AABBTreeBuildNode* node, AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices)
+{
+ node->subdivide(params, stats, nodeBase, indices);
+
+ if(!node->isLeaf())
+ {
+ AABBTreeBuildNode* pos = const_cast<AABBTreeBuildNode*>(node->getPos());
+ PX_ASSERT(pos);
+ AABBTreeBuildNode* neg = pos + 1;
+ stack.push(neg);
+ stack.push(pos);
+ }
+
+ stats.mTotalPrims += node->mNbPrimitives;
+ return node->mNbPrimitives;
+}
+
+PxU32 AABBTree::progressiveBuild(AABBTreeBuildParams& params, BuildStats& stats, PxU32 progress, PxU32 limit)
+{
+ if(progress==0)
+ {
+ if(!buildInit(params, stats))
+ return PX_INVALID_U32;
+
+ mStack = PX_NEW(FIFOStack);
+ mStack->push(mNodeAllocator.mPool);
+ return progress++;
+ }
+ else if(progress==1)
+ {
+ PxU32 stackCount = mStack->getNbEntries();
+ if(stackCount)
+ {
+ PxU32 Total = 0;
+ const PxU32 Limit = limit;
+ while(Total<Limit)
+ {
+ AABBTreeBuildNode* Entry;
+ if(mStack->pop(Entry))
+ Total += incrementalBuildHierarchy(*mStack, Entry, params, stats, mNodeAllocator, mIndices);
+ else
+ break;
+ }
+ return progress;
+ }
+
+ buildEnd(params, stats);
+
+ PX_DELETE_AND_RESET(mStack);
+
+ return 0; // Done!
+ }
+ return PX_INVALID_U32;
+}
+//~Progressive building
+
+
+
+static PX_FORCE_INLINE PxU32 BitsToDwords(PxU32 nb_bits)
+{
+ return (nb_bits>>5) + ((nb_bits&31) ? 1 : 0);
+}
+
+bool Sq::BitArray::init(PxU32 nb_bits)
+{
+ mSize = BitsToDwords(nb_bits);
+ // Get ram for n bits
+ PX_FREE(mBits);
+ mBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mSize, "BitArray::mBits"));
+ // Set all bits to 0
+ clearAll();
+ return true;
+}
+
+void Sq::BitArray::resize(PxU32 maxBitNumber)
+{
+ const PxU32 newSize = BitsToDwords(maxBitNumber);
+ if (newSize <= mSize)
+ return;
+
+ PxU32* newBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*newSize, "BitArray::mBits"));
+ PxMemZero(newBits + mSize, (newSize - mSize) * sizeof(PxU32));
+ PxMemCopy(newBits, mBits, mSize*sizeof(PxU32));
+ PX_FREE(mBits);
+ mBits = newBits;
+ mSize = newSize;
+}
+
+static PX_FORCE_INLINE PxU32 getNbPrimitives(PxU32 data) { return (data>>1)&15; }
+static PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base, PxU32 data) { return base + (data>>5); }
+static PX_FORCE_INLINE const AABBTreeRuntimeNode* getPos(const AABBTreeRuntimeNode* base, PxU32 data) { return base + (data>>1); }
+static PX_FORCE_INLINE PxU32 isLeaf(PxU32 data) { return data&1; }
+
+static PX_FORCE_INLINE void refitNode(AABBTreeRuntimeNode* PX_RESTRICT current, const PxBounds3* PX_RESTRICT boxes, const PxU32* PX_RESTRICT indices, AABBTreeRuntimeNode* PX_RESTRICT const nodeBase)
+{
+ // PT: we can safely use V4 loads on both boxes and nodes here:
+ // - it's safe on boxes because we allocated one extra box in the pruning pool
+ // - it's safe on nodes because there's always some data within the node, after the BV
+
+ const PxU32 data = current->mData;
+
+ Vec4V resultMinV, resultMaxV;
+ if(isLeaf(data))
+ {
+ const PxU32 nbPrims = getNbPrimitives(data);
+ if(nbPrims)
+ {
+ const PxU32* primitives = getPrimitives(indices, data);
+ resultMinV = V4LoadU(&boxes[*primitives].minimum.x);
+ resultMaxV = V4LoadU(&boxes[*primitives].maximum.x);
+
+ if(nbPrims>1)
+ {
+ const PxU32* last = primitives + nbPrims;
+ primitives++;
+
+ while(primitives!=last)
+ {
+ resultMinV = V4Min(resultMinV, V4LoadU(&boxes[*primitives].minimum.x));
+ resultMaxV = V4Max(resultMaxV, V4LoadU(&boxes[*primitives].maximum.x));
+ primitives++;
+ }
+ }
+ }
+ else
+ {
+ // Might happen after a node has been invalidated
+ const float max = 0.25f * 1e33f; // ###
+ resultMinV = V4Load(max);
+ resultMaxV = V4Load(-max);
+ }
+ }
+ else
+ {
+ const AABBTreeRuntimeNode* pos = getPos(nodeBase, data);
+ const AABBTreeRuntimeNode* neg = pos+1;
+
+ const PxBounds3& posBox = pos->mBV;
+ const PxBounds3& negBox = neg->mBV;
+
+ resultMinV = V4Min(V4LoadU(&posBox.minimum.x), V4LoadU(&negBox.minimum.x));
+// resultMaxV = V4Max(V4LoadU(&posBox.maximum.x), V4LoadU(&negBox.maximum.x));
+
+#if PX_INTEL_FAMILY
+ Vec4V posMinV = V4LoadU(&posBox.minimum.z);
+ Vec4V negMinV = V4LoadU(&negBox.minimum.z);
+ posMinV = _mm_shuffle_ps(posMinV, posMinV, _MM_SHUFFLE(0, 3, 2, 1));
+ negMinV = _mm_shuffle_ps(negMinV, negMinV, _MM_SHUFFLE(0, 3, 2, 1));
+ resultMaxV = V4Max(posMinV, negMinV);
+#else
+ // PT: fixes the perf issue but not really convincing
+ resultMaxV = Vec4V_From_Vec3V(V3Max(V3LoadU(&posBox.maximum.x), V3LoadU(&negBox.maximum.x)));
+#endif
+ }
+
+ // PT: the V4 stores overwrite the data after the BV, but we just put it back afterwards
+ V4StoreU(resultMinV, &current->mBV.minimum.x);
+ V4StoreU(resultMaxV, &current->mBV.maximum.x);
+ current->mData = data;
+}
+
+void AABBTree::fullRefit(const PxBounds3* boxes)
+{
+ PX_ASSERT(boxes);
+
+ const PxU32* indices = mIndices;
+ AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+ PX_ASSERT(nodeBase);
+
+ // Bottom-up update
+ PxU32 index = mTotalNbNodes;
+ while(index--)
+ {
+ AABBTreeRuntimeNode* current = nodeBase + index;
+ if(index)
+ Ps::prefetch(current - 1);
+
+ refitNode(current, boxes, indices, nodeBase);
+ }
+}
+
+static void _createParentArray(PxU32 totalNbNodes, PxU32* parentIndices, const AABBTreeRuntimeNode* parentNode, const AABBTreeRuntimeNode* currentNode, const AABBTreeRuntimeNode* root)
+{
+ const PxU32 parentIndex = PxU32(parentNode - root);
+ const PxU32 currentIndex = PxU32(currentNode - root);
+ PX_ASSERT(parentIndex<totalNbNodes);
+ PX_ASSERT(currentIndex<totalNbNodes);
+ PX_UNUSED(totalNbNodes);
+ parentIndices[currentIndex] = parentIndex;
+
+ if(!currentNode->isLeaf())
+ {
+ _createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getPos(root), root);
+ _createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getNeg(root), root);
+ }
+}
+
+void AABBTree::markNodeForRefit(TreeNodeIndex nodeIndex)
+{
+ if(!mRefitBitmask.getBits())
+ mRefitBitmask.init(mTotalNbNodes);
+
+ PX_ASSERT(nodeIndex<mTotalNbNodes);
+
+ // PT: lazy-create parent array. Memory is not wasted for purely static trees, or dynamic trees that only do "full refit".
+ if(!mParentIndices)
+ {
+ mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices"));
+ _createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool);
+ }
+
+ PxU32 currentIndex = nodeIndex;
+ while(1)
+ {
+ PX_ASSERT(currentIndex<mTotalNbNodes);
+ if(mRefitBitmask.isSet(currentIndex))
+ {
+ // We can early exit if we already visited the node!
+ return;
+ }
+ else
+ {
+ mRefitBitmask.setBit(currentIndex);
+ const PxU32 currentMarkedWord = currentIndex>>5;
+ mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+
+ const PxU32 parentIndex = mParentIndices[currentIndex];
+ PX_ASSERT(parentIndex == 0 || parentIndex < currentIndex);
+ if(currentIndex == parentIndex)
+ break;
+ currentIndex = parentIndex;
+ }
+ }
+}
+
+#define FIRST_VERSION
+#ifdef FIRST_VERSION
+void AABBTree::refitMarkedNodes(const PxBounds3* boxes)
+{
+ if(!mRefitBitmask.getBits())
+ return; // No refit needed
+
+ {
+ /*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits());
+ PxU32 size = mRefitHighestSetWord+1;
+#ifdef _DEBUG
+ if(1)
+ {
+ const PxU32 totalSize = mRefitBitmask.getSize();
+ for(PxU32 i=size;i<totalSize;i++)
+ {
+ PX_ASSERT(!bits[i]);
+ }
+ }
+ PxU32 nbRefit=0;
+#endif
+ const PxU32* indices = mIndices;
+ AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+
+ while(size--)
+ {
+ // Test 32 bits at a time
+ const PxU32 currentBits = bits[size];
+ if(!currentBits)
+ continue;
+
+ PxU32 index = (size+1)<<5;
+ PxU32 mask = PxU32(1<<((index-1)&31));
+ PxU32 _Count=32;
+ while(_Count--)
+ {
+ index--;
+ Ps::prefetch(nodeBase + index);
+
+ PX_ASSERT(size==index>>5);
+ PX_ASSERT(mask==PxU32(1<<(index&31)));
+ if(currentBits & mask)
+ {
+ refitNode(nodeBase + index, boxes, indices, nodeBase);
+#ifdef _DEBUG
+ nbRefit++;
+#endif
+ }
+ mask>>=1;
+ }
+ bits[size] = 0;
+ }
+
+ mRefitHighestSetWord = 0;
+// mRefitBitmask.clearAll();
+ }
+}
+#endif
+
+
+//#define SECOND_VERSION
+#ifdef SECOND_VERSION
+void AABBTree::refitMarkedNodes(const PxBounds3* boxes)
+{
+ /*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits());
+ if(!bits)
+ return; // No refit needed
+
+ const PxU32 lastSetBit = mRefitBitmask.findLast();
+
+ const PxU32* indices = mIndices;
+ AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+
+ for(PxU32 w = 0; w <= lastSetBit >> 5; ++w)
+ {
+ for(PxU32 b = bits[w]; b; b &= b-1)
+ {
+ const PxU32 index = (PxU32)(w<<5|Ps::lowestSetBit(b));
+
+
+
+ while(size--)
+ {
+ // Test 32 bits at a time
+ const PxU32 currentBits = bits[size];
+ if(!currentBits)
+ continue;
+
+ PxU32 index = (size+1)<<5;
+ PxU32 mask = PxU32(1<<((index-1)&31));
+ PxU32 _Count=32;
+ while(_Count--)
+ {
+ index--;
+ Ps::prefetch(nodeBase + index);
+
+ PX_ASSERT(size==index>>5);
+ PX_ASSERT(mask==PxU32(1<<(index&31)));
+ if(currentBits & mask)
+ {
+ refitNode(nodeBase + index, boxes, indices, nodeBase);
+#ifdef _DEBUG
+ nbRefit++;
+#endif
+ }
+ mask>>=1;
+ }
+ bits[size] = 0;
+ }
+ mRefitHighestSetWord = 0;
+// mRefitBitmask.clearAll();
+ }
+}
+#endif
+
+PX_FORCE_INLINE static void setLeafData(PxU32& leafData, const AABBTreeRuntimeNode& node, const PxU32 indicesOffset)
+{
+ const PxU32 index = indicesOffset + (node.mData >> 5);
+ const PxU32 nbPrims = node.getNbPrimitives();
+ PX_ASSERT(nbPrims <= 16);
+ leafData = (index << 5) | ((nbPrims & 15) << 1) | 1;
+}
+
+// Copy the tree into nodes. Update node indices, leaf indices.
+void AABBTree::addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& treeParams)
+{
+ PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1);
+ const PxU32 baseNodeIndex = nodeIndex;
+
+ // copy the src tree into dest tree nodes, update its data
+ for (PxU32 i = 0; i < treeParams.mNbNodes; i++)
+ {
+ PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1);
+ mRuntimePool[nodeIndex].mBV = treeParams.mNodes[i].mBV;
+ if (treeParams.mNodes[i].isLeaf())
+ {
+ setLeafData(mRuntimePool[nodeIndex].mData, treeParams.mNodes[i], mNbIndices);
+ }
+ else
+ {
+ const PxU32 srcNodeIndex = baseNodeIndex + (treeParams.mNodes[i].getPosIndex());
+ mRuntimePool[nodeIndex].mData = srcNodeIndex << 1;
+ mParentIndices[srcNodeIndex] = nodeIndex;
+ mParentIndices[srcNodeIndex + 1] = nodeIndex;
+ }
+ nodeIndex++;
+ }
+}
+
+// Merge tree into targetNode, where target node is a leaf
+// 1. Allocate new nodes/parent, copy all the nodes/parents
+// 2. Create new node at the end, copy the data from target node
+// 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+// Schematic view:
+// Target Nodes: ...Tn...
+// Input tree: R1->Rc0, Rc1...
+// Merged tree: ...Tnc->...->Nc0,R1->Rc0,Rc1...
+// where new node: Nc0==Tn and Tnc is not a leaf anymore and points to Nc0
+
+void AABBTree::mergeRuntimeLeaf(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex)
+{
+ PX_ASSERT(mParentIndices);
+ PX_ASSERT(targetNode.isLeaf());
+
+ // 1. Allocate new nodes/parent, copy all the nodes/parents
+ // allocate new runtime pool with max combine number of nodes
+ // we allocate only 1 additional node each merge
+ AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1];
+ PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"));
+
+ // copy the whole target nodes, we will add the new node at the end together with the merge tree
+ PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes));
+ PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(mTotalNbNodes));
+
+ // 2. Create new node at the end, copy the data from target node
+ PxU32 nodeIndex = mTotalNbNodes;
+ // copy the targetNode at the end of the new nodes
+ newRuntimePool[nodeIndex].mBV = targetNode.mBV;
+ newRuntimePool[nodeIndex].mData = targetNode.mData;
+ // update the parent information
+ newParentIndices[nodeIndex] = targetMergeNodeIndex;
+
+ // mark for refit
+ if (mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex))
+ {
+ mRefitBitmask.setBit(nodeIndex);
+ const PxU32 currentMarkedWord = nodeIndex >> 5;
+ mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+ }
+
+ // swap pointers
+ PX_DELETE_ARRAY(mRuntimePool);
+ mRuntimePool = newRuntimePool;
+ PX_FREE(mParentIndices);
+ mParentIndices = newParentIndices;
+
+ // 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+ nodeIndex++;
+ addRuntimeChilds(nodeIndex, treeParams);
+ PX_ASSERT(nodeIndex == mTotalNbNodes + 1 + treeParams.mNbNodes);
+
+ // update the parent information for the input tree root node
+ mParentIndices[mTotalNbNodes + 1] = targetMergeNodeIndex;
+
+ // fix the child information for the target node, was a leaf before
+ mRuntimePool[targetMergeNodeIndex].mData = mTotalNbNodes << 1;
+
+ // update the total number of nodes
+ mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes;
+}
+
+// Merge tree into targetNode, where target node is not a leaf
+// 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex
+// 2. Create new node , copy the data from target node
+// 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes
+// 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+// 5. Go through the nodes copied at the end and fix the parents/childs
+// Schematic view:
+// Target Nodes: ...Tn->...->Tc0,Tc1...
+// Input tree: R1->Rc0, Rc1...
+// Merged tree: ...Tn->...->Nc0,R1->Rc0,Rc1...,Tc0,Tc1...
+// where new node: Nc0->...->Tc0,Tc1
+void AABBTree::mergeRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex)
+{
+ PX_ASSERT(mParentIndices);
+ PX_ASSERT(!targetNode.isLeaf());
+
+ // Get the target node child pos, this is where we insert the new node and the input tree
+ const PxU32 targetNodePosIndex = targetNode.getPosIndex();
+
+ // 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex
+ // allocate new runtime pool with max combine number of nodes
+ // we allocate only 1 additional node each merge
+ AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1];
+ PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"));
+ // copy the untouched part of the nodes and parents
+ PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(targetNodePosIndex));
+ PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(targetNodePosIndex));
+
+ PxU32 nodeIndex = targetNodePosIndex;
+ // 2. Create new node , copy the data from target node
+ newRuntimePool[nodeIndex].mBV = targetNode.mBV;
+ newRuntimePool[nodeIndex].mData = ((targetNode.mData >> 1) + 1 + treeParams.mNbNodes) << 1;
+ // update parent information
+ newParentIndices[nodeIndex] = targetMergeNodeIndex;
+
+ // handle mark for refit
+ if(mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex))
+ {
+ mRefitBitmask.setBit(nodeIndex);
+ const PxU32 currentMarkedWord = nodeIndex >> 5;
+ mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+ }
+
+ // 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes
+ if(mTotalNbNodes - targetNodePosIndex)
+ {
+ PX_ASSERT(mTotalNbNodes - targetNodePosIndex > 0);
+ PxMemCopy(newRuntimePool + targetNodePosIndex + 1 + treeParams.mNbNodes, mRuntimePool + targetNodePosIndex, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes - targetNodePosIndex));
+ PxMemCopy(newParentIndices + targetNodePosIndex + 1 + treeParams.mNbNodes, mParentIndices + targetNodePosIndex, sizeof(PxU32)*(mTotalNbNodes - targetNodePosIndex));
+ }
+ // swap the pointers, release the old memory
+ PX_DELETE_ARRAY(mRuntimePool);
+ mRuntimePool = newRuntimePool;
+ PX_FREE(mParentIndices);
+ mParentIndices = newParentIndices;
+
+ // 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+ nodeIndex++;
+ addRuntimeChilds(nodeIndex, treeParams);
+ PX_ASSERT(nodeIndex == targetNodePosIndex + 1 + treeParams.mNbNodes);
+ // update the total number of nodes
+ mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes;
+
+ // update the parent information for the input tree root node
+ mParentIndices[targetNodePosIndex + 1] = targetMergeNodeIndex;
+
+ // 5. Go through the nodes copied at the end and fix the parents/childs
+ for (PxU32 i = targetNodePosIndex + 1 + treeParams.mNbNodes; i < mTotalNbNodes; i++)
+ {
+ // check if the parent is the targetNode, if yes update the parent to new node
+ if(mParentIndices[i] == targetMergeNodeIndex)
+ {
+ mParentIndices[i] = targetNodePosIndex;
+ }
+ else
+ {
+ // if parent node has been moved, update the parent node
+ if(mParentIndices[i] >= targetNodePosIndex)
+ {
+ mParentIndices[i] = mParentIndices[i] + 1 + treeParams.mNbNodes;
+ }
+ else
+ {
+ // if parent has not been moved, update its child information
+ const PxU32 parentIndex = mParentIndices[i];
+ // update the child information to point to Pos child
+ if(i % 2 != 0)
+ {
+ const PxU32 srcNodeIndex = mRuntimePool[parentIndex].getPosIndex();
+ // if child index points to a node that has been moved, update the child index
+ PX_ASSERT(!mRuntimePool[parentIndex].isLeaf());
+ PX_ASSERT(srcNodeIndex > targetNodePosIndex);
+ mRuntimePool[parentIndex].mData = (1 + treeParams.mNbNodes + srcNodeIndex) << 1;
+ }
+ }
+ }
+ if(!mRuntimePool[i].isLeaf())
+ {
+ // update the child node index
+ const PxU32 srcNodeIndex = 1 + treeParams.mNbNodes + mRuntimePool[i].getPosIndex();
+ mRuntimePool[i].mData = srcNodeIndex << 1;
+ }
+ }
+}
+
+// traverse the target node, the tree is inside the targetNode, and find the best place where merge the tree
+void AABBTree::traverseRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 nodeIndex)
+{
+ const AABBTreeRuntimeNode& srcNode = treeParams.getRootNode();
+ PX_ASSERT(srcNode.mBV.isInside(targetNode.mBV));
+
+ // Check if the srcNode(tree) can fit inside any of the target childs. If yes, traverse the target tree child
+ AABBTreeRuntimeNode& targetPosChild = *targetNode.getPos(mRuntimePool);
+ if(srcNode.mBV.isInside(targetPosChild.mBV))
+ {
+ return traverseRuntimeNode(targetPosChild, treeParams, targetNode.getPosIndex());
+ }
+
+ AABBTreeRuntimeNode& targetNegChild = *targetNode.getNeg(mRuntimePool);
+ if (srcNode.mBV.isInside(targetNegChild.mBV))
+ {
+ return traverseRuntimeNode(targetNegChild, treeParams, targetNode.getNegIndex());
+ }
+
+ // we cannot traverse target anymore, lets add the srcTree to current target node
+ if(targetNode.isLeaf())
+ mergeRuntimeLeaf(targetNode, treeParams, nodeIndex);
+ else
+ mergeRuntimeNode(targetNode, treeParams, nodeIndex);
+}
+
+// Merge the input tree into current tree.
+// Traverse the tree and find the smallest node, where the whole new tree fits. When we find the node
+// we create one new node pointing to the original children and the to the input tree root.
+void AABBTree::mergeTree(const AABBTreeMergeData& treeParams)
+{
+ // allocate new indices buffer
+ PxU32* newIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mNbIndices + treeParams.mNbIndices), "AABB tree indices"));
+ PxMemCopy(newIndices, mIndices, sizeof(PxU32)*mNbIndices);
+ PX_FREE(mIndices);
+ mIndices = newIndices;
+ mTotalPrims += treeParams.mNbIndices;
+
+ // copy the new indices, re-index using the provided indicesOffset. Note that indicesOffset
+ // must be provided, as original mNbIndices can be different than indicesOffset dues to object releases.
+ for (PxU32 i = 0; i < treeParams.mNbIndices; i++)
+ {
+ mIndices[mNbIndices + i] = treeParams.mIndicesOffset + treeParams.mIndices[i];
+ }
+
+ // check the mRefitBitmask if we fit all the new nodes
+ mRefitBitmask.resize(mTotalNbNodes + treeParams.mNbNodes + 1);
+
+ // create the parent information so we can update it
+ if(!mParentIndices)
+ {
+ mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices"));
+ _createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool);
+ }
+
+ // if new tree is inside the root AABB we will traverse the tree to find better node where to attach the tree subnodes
+ // if the root is a leaf we merge with the root.
+ if(treeParams.getRootNode().mBV.isInside(mRuntimePool[0].mBV) && !mRuntimePool[0].isLeaf())
+ {
+ traverseRuntimeNode(mRuntimePool[0], treeParams, 0);
+ }
+ else
+ {
+ if(mRuntimePool[0].isLeaf())
+ {
+ mergeRuntimeLeaf(mRuntimePool[0], treeParams, 0);
+ }
+ else
+ {
+ mergeRuntimeNode(mRuntimePool[0], treeParams, 0);
+ }
+
+ // increase the tree root AABB
+ mRuntimePool[0].mBV.include(treeParams.getRootNode().mBV);
+ }
+
+#ifdef _DEBUG
+ //verify parent indices
+ for (PxU32 i = 0; i < mTotalNbNodes; i++)
+ {
+ if (i)
+ {
+ PX_ASSERT(mRuntimePool[mParentIndices[i]].getPosIndex() == i || mRuntimePool[mParentIndices[i]].getNegIndex() == i);
+ }
+ if (!mRuntimePool[i].isLeaf())
+ {
+ PX_ASSERT(mParentIndices[mRuntimePool[i].getPosIndex()] == i);
+ PX_ASSERT(mParentIndices[mRuntimePool[i].getNegIndex()] == i);
+ }
+ }
+
+ // verify the tree nodes, leafs
+ for (PxU32 i = 0; i < mTotalNbNodes; i++)
+ {
+ if (mRuntimePool[i].isLeaf())
+ {
+ const PxU32 index = mRuntimePool[i].mData >> 5;
+ const PxU32 nbPrim = mRuntimePool[i].getNbPrimitives();
+ PX_ASSERT(index + nbPrim <= mNbIndices + treeParams.mNbIndices);
+ }
+ else
+ {
+ const PxU32 nodeIndex = (mRuntimePool[i].getPosIndex());
+ PX_ASSERT(nodeIndex < mTotalNbNodes);
+ }
+ }
+#endif // _DEBUG
+
+ mNbIndices += treeParams.mNbIndices;
+}
+
+
+