aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/SceneQuery/src
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/SceneQuery/src
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/SceneQuery/src')
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp816
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h268
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp1154
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h364
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h234
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp197
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h82
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp75
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqBounds.h70
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp2601
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h279
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp887
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h176
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp57
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h258
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp182
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h120
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp427
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp500
-rw-r--r--PhysX_3.4/Source/SceneQuery/src/SqTypedef.h47
20 files changed, 8794 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp
new file mode 100644
index 00000000..895c5776
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp
@@ -0,0 +1,816 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "foundation/PxProfiler.h"
+#include "PsIntrinsics.h"
+#include "PsUserAllocated.h"
+#include "PsBitUtils.h"
+#include "PsFoundation.h"
+#include "SqAABBPruner.h"
+#include "SqAABBTree.h"
+#include "SqPrunerMergeData.h"
+#include "GuSphere.h"
+#include "GuBox.h"
+#include "GuCapsule.h"
+#include "SqAABBTreeQuery.h"
+#include "GuBounds.h"
+
+using namespace physx;
+using namespace Gu;
+using namespace Sq;
+using namespace Cm;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IncrementalPruner* physx::Sq::createAABBPruner(bool incrementalRebuild)
+{
+ return PX_NEW(Sq::AABBPruner)(incrementalRebuild, 0);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// PT: currently limited to 15 max
+#define NB_OBJECTS_PER_NODE 4
+
+AABBPruner::AABBPruner(bool incrementalRebuild, PxU64 contextID) :
+ mAABBTree (NULL),
+ mNewTree (NULL),
+ mCachedBoxes (NULL),
+ mNbCachedBoxes (0),
+ mNbCalls (0),
+ mTimeStamp (0),
+ mBucketPruner (&mPool),
+ mProgress (BUILD_NOT_STARTED),
+ mRebuildRateHint (100),
+ mAdaptiveRebuildTerm(0),
+ mIncrementalRebuild (incrementalRebuild),
+ mUncommittedChanges (false),
+ mNeedsNewTree (false),
+ mNewTreeFixups (PX_DEBUG_EXP("AABBPruner::mNewTreeFixups")),
+ mContextID (contextID)
+{
+}
+
+AABBPruner::~AABBPruner()
+{
+ release();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ * Add, Remove, Update methods
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+bool AABBPruner::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool hasPruningStructure)
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerAddObjects", mContextID);
+
+ if(!count)
+ return true;
+
+ // no need to do refitMarked for added objects since they are not in the tree
+
+ // if we have provided pruning structure, we will merge it, the changes will be applied after the objects has been addded
+ if(!hasPruningStructure || !mAABBTree)
+ mUncommittedChanges = true;
+
+ // PT: TODO: 'addObjects' for bucket pruner too. Not urgent since we always call the function with count=1 at the moment
+ const PxU32 valid = mPool.addObjects(results, bounds, payload, count);
+
+ // Bucket pruner is only used while the dynamic pruner is rebuilding
+ // For the static pruner a full rebuild will happen in commit() every time we modify something, this is not true if
+ // pruning structure was provided. The objects tree will be merged directly into the static tree. No rebuild will be triggered.
+ if(mIncrementalRebuild && mAABBTree)
+ {
+ mNeedsNewTree = true; // each add forces a tree rebuild
+
+ // if a pruner structure is provided, we dont move the new objects into bucket pruner
+ // the pruning structure will be merged into the bucket pruner
+ if(!hasPruningStructure)
+ {
+ for(PxU32 i=0;i<valid;i++)
+ mBucketPruner.addObject(payload[i], bounds[i], mTimeStamp);
+ }
+ }
+ return valid==count;
+}
+
+void AABBPruner::updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count)
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerUpdateObjects", mContextID);
+
+ if(!count)
+ return;
+
+ mUncommittedChanges = true;
+
+ if(newBounds)
+ {
+ for(PxU32 i=0; i<count; i++)
+ mPool.setWorldAABB(handles[i], newBounds[i]); // only updates the bounds
+ }
+
+ if(mIncrementalRebuild && mAABBTree)
+ {
+ mNeedsNewTree = true; // each update forces a tree rebuild
+ newBounds = mPool.getCurrentWorldBoxes();
+ PrunerPayload* payloads = mPool.getObjects();
+ for(PxU32 i=0; i<count; i++)
+ {
+ const PoolIndex poolIndex = mPool.getIndex(handles[i]);
+ const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex];
+ if(treeNodeIndex!=INVALID_NODE_ID) // this means it's in the current tree still and hasn't been removed
+ mAABBTree->markNodeForRefit(treeNodeIndex);
+ else // otherwise it means it should be in the bucket pruner
+ {
+ bool found = mBucketPruner.updateObject(newBounds[poolIndex], payloads[poolIndex]);
+ PX_UNUSED(found); PX_ASSERT(found);
+ }
+
+ if(mProgress==BUILD_NEW_MAPPING || mProgress==BUILD_FULL_REFIT)
+ mToRefit.pushBack(poolIndex);
+ }
+ }
+}
+
+void AABBPruner::updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count)
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerUpdateObjects", mContextID);
+
+ mUncommittedChanges = true;
+
+ mPool.updateObjects(handles, indices, newBounds, count);
+
+ if (mIncrementalRebuild && mAABBTree)
+ {
+ mNeedsNewTree = true; // each update forces a tree rebuild
+ for (PxU32 i = 0; i<count; i++)
+ {
+ const PoolIndex poolIndex = mPool.getIndex(handles[i]);
+ const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex];
+ if (treeNodeIndex != INVALID_NODE_ID) // this means it's in the current tree still and hasn't been removed
+ mAABBTree->markNodeForRefit(treeNodeIndex);
+ else // otherwise it means it should be in the bucket pruner
+ {
+ bool found = mBucketPruner.updateObject(newBounds[indices[i]], mPool.getPayload(handles[i]));
+ PX_UNUSED(found); PX_ASSERT(found);
+ }
+
+ if (mProgress == BUILD_NEW_MAPPING || mProgress == BUILD_FULL_REFIT)
+ mToRefit.pushBack(poolIndex);
+ }
+ }
+}
+
+void AABBPruner::removeObjects(const PrunerHandle* handles, PxU32 count)
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerRemoveObjects", mContextID);
+
+ if(!count)
+ return;
+
+ mUncommittedChanges = true;
+
+ for(PxU32 i=0; i<count; i++)
+ {
+ const PrunerHandle h = handles[i];
+ // copy the payload before removing it since we need to know the payload to remove it from the bucket pruner
+ const PrunerPayload removedPayload = mPool.getPayload(h);
+ const PoolIndex poolIndex = mPool.getIndex(h); // save the pool index for removed object
+ const PoolIndex poolRelocatedLastIndex = mPool.removeObject(h); // save the lastIndex returned by removeObject
+ if(mIncrementalRebuild && mAABBTree)
+ {
+ mNeedsNewTree = true;
+
+ const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex]; // already removed from pool but still in tree map
+ const PrunerPayload swappedPayload = mPool.getObjects()[poolIndex];
+ if(treeNodeIndex!=INVALID_NODE_ID) // can be invalid if removed
+ {
+ mAABBTree->markNodeForRefit(treeNodeIndex); // mark the spot as blank
+ mBucketPruner.swapIndex(poolIndex, swappedPayload, poolRelocatedLastIndex); // if swapped index is in bucket pruner
+ }
+ else
+ {
+ PX_ASSERT(treeNodeIndex==INVALID_PRUNERHANDLE);
+ PxU32 timeStamp;
+ bool status = mBucketPruner.removeObject(removedPayload, poolIndex, swappedPayload, poolRelocatedLastIndex, timeStamp);
+ PX_ASSERT(status);
+ PX_UNUSED(status);
+ }
+
+ mTreeMap.invalidate(poolIndex, poolRelocatedLastIndex, *mAABBTree);
+ if(mNewTree)
+ mNewTreeFixups.pushBack(NewTreeFixup(poolIndex, poolRelocatedLastIndex));
+ }
+ }
+
+ if (mPool.getNbActiveObjects()==0)
+ {
+ // this is just to make sure we release all the internal data once all the objects are out of the pruner
+ // since this is the only place we know that and we don't want to keep memory reserved
+ release();
+
+ // Pruner API requires a commit before the next query, even if we ended up removing the entire tree here. This
+ // forces that to happen.
+ mUncommittedChanges = true;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ * Query Implementation
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+PxAgain AABBPruner::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const
+{
+ PX_ASSERT(!mUncommittedChanges);
+
+ PxAgain again = true;
+
+ if(mAABBTree)
+ {
+ switch(queryVolume.getType())
+ {
+ case PxGeometryType::eBOX:
+ {
+ if(queryVolume.isOBB())
+ {
+ const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated());
+ again = AABBTreeOverlap<Gu::OBBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+ }
+ else
+ {
+ const Gu::AABBAABBTest test(queryVolume.getPrunerInflatedWorldAABB());
+ again = AABBTreeOverlap<Gu::AABBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+ }
+ }
+ break;
+ case PxGeometryType::eCAPSULE:
+ {
+ const Gu::Capsule& capsule = queryVolume.getGuCapsule();
+ const Gu::CapsuleAABBTest test( capsule.p1, queryVolume.getPrunerWorldRot33().column0,
+ queryVolume.getCapsuleHalfHeight()*2.0f, PxVec3(capsule.radius*SQ_PRUNER_INFLATION));
+ again = AABBTreeOverlap<Gu::CapsuleAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+ }
+ break;
+ case PxGeometryType::eSPHERE:
+ {
+ const Gu::Sphere& sphere = queryVolume.getGuSphere();
+ Gu::SphereAABBTest test(sphere.center, sphere.radius);
+ again = AABBTreeOverlap<Gu::SphereAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+ }
+ break;
+ case PxGeometryType::eCONVEXMESH:
+ {
+ const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated());
+ again = AABBTreeOverlap<Gu::OBBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+ }
+ break;
+ case PxGeometryType::ePLANE:
+ case PxGeometryType::eTRIANGLEMESH:
+ case PxGeometryType::eHEIGHTFIELD:
+ case PxGeometryType::eGEOMETRY_COUNT:
+ case PxGeometryType::eINVALID:
+ PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type");
+ }
+ }
+
+ if(again && mIncrementalRebuild && mBucketPruner.getNbObjects())
+ again = mBucketPruner.overlap(queryVolume, pcb);
+
+ return again;
+}
+
+PxAgain AABBPruner::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+ PX_ASSERT(!mUncommittedChanges);
+
+ PxAgain again = true;
+
+ if(mAABBTree)
+ {
+ const PxBounds3& aabb = queryVolume.getPrunerInflatedWorldAABB();
+ const PxVec3 extents = aabb.getExtents();
+ again = AABBTreeRaycast<true>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, aabb.getCenter(), unitDir, inOutDistance, extents, pcb);
+ }
+
+ if(again && mIncrementalRebuild && mBucketPruner.getNbObjects())
+ again = mBucketPruner.sweep(queryVolume, unitDir, inOutDistance, pcb);
+
+ return again;
+}
+
+PxAgain AABBPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+ PX_ASSERT(!mUncommittedChanges);
+
+ PxAgain again = true;
+
+ if(mAABBTree)
+ again = AABBTreeRaycast<false>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, origin, unitDir, inOutDistance, PxVec3(0.0f), pcb);
+
+ if(again && mIncrementalRebuild && mBucketPruner.getNbObjects())
+ again = mBucketPruner.raycast(origin, unitDir, inOutDistance, pcb);
+
+ return again;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ * Other methods of Pruner Interface
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// This isn't part of the pruner virtual interface, but it is part of the public interface
+// of AABBPruner - it gets called by SqManager to force a rebuild, and requires a commit() before
+// queries can take place
+
+void AABBPruner::purge()
+{
+ release();
+ mUncommittedChanges = true; // this ensures a commit() must happen before any query
+}
+
+void AABBPruner::setRebuildRateHint(PxU32 nbStepsForRebuild)
+{
+ PX_ASSERT(nbStepsForRebuild > 3);
+ mRebuildRateHint = (nbStepsForRebuild-3); // looks like a magic number to account for the rebuild pipeline latency
+ mAdaptiveRebuildTerm = 0;
+}
+
+// Commit either performs a refit if background rebuild is not yet finished
+// or swaps the current tree for the second tree rebuilt in the background
+void AABBPruner::commit()
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerCommit", mContextID);
+
+ if(!mUncommittedChanges)
+ // Q: seems like this is both for refit and finalization so is this is correct?
+ // i.e. in a situation when we started rebuilding a tree and didn't add anything since
+ // who is going to set mUncommittedChanges to true?
+ // A: it's set in buildStep at final stage, so that finalization is forced.
+ // Seems a bit difficult to follow and verify correctness.
+ return;
+
+ mUncommittedChanges = false;
+
+ if(!mAABBTree || !mIncrementalRebuild)
+ {
+#if PX_CHECKED
+ if(!mIncrementalRebuild && mAABBTree)
+ Ps::getFoundation().error(PxErrorCode::ePERF_WARNING, __FILE__, __LINE__, "SceneQuery static AABB Tree rebuilt, because a shape attached to a static actor was added, removed or moved, and PxSceneDesc::staticStructure is set to eSTATIC_AABB_TREE.");
+#endif
+ fullRebuildAABBTree();
+ return;
+ }
+
+ // Note: it is not safe to call AABBPruner::build() here
+ // because the first thread will perform one step of the incremental update,
+ // continue raycasting, while the second thread performs the next step in
+ // the incremental update
+
+ // Calling Refit() below is safe. It will call
+ // StaticPruner::build() when necessary. Both will early
+ // exit if the tree is already up to date, if it is not already, then we
+ // must be the first thread performing raycasts on a dirty tree and other
+ // scene query threads will be locked out by the write lock in
+ // SceneQueryManager::flushUpdates()
+
+
+ if (mProgress != BUILD_FINISHED)
+ {
+ // Calling refit because the second tree is not ready to be swapped in (mProgress != BUILD_FINISHED)
+ // Generally speaking as long as things keep moving the second build will never catch up with true state
+ refitUpdatedAndRemoved();
+ }
+ else
+ {
+ PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFinalize", mContextID);
+
+ {
+ PX_PROFILE_ZONE("SceneQuery.prunerNewTreeSwitch", mContextID);
+
+ PX_DELETE(mAABBTree); // delete the old tree
+ PX_FREE_AND_RESET(mCachedBoxes);
+ mProgress = BUILD_NOT_STARTED; // reset the build state to initial
+
+ // Adjust adaptive term to get closer to specified rebuild rate.
+ // perform an even division correction to make sure the rebuild rate adds up
+ if (mNbCalls > mRebuildRateHint)
+ mAdaptiveRebuildTerm++;
+ else if (mNbCalls < mRebuildRateHint)
+ mAdaptiveRebuildTerm--;
+
+ // Switch trees
+#if PX_DEBUG
+ mNewTree->validate();
+#endif
+ mAABBTree = mNewTree; // set current tree to progressively rebuilt tree
+ mNewTree = NULL; // clear out the progressively rebuild tree pointer
+ }
+
+ {
+ PX_PROFILE_ZONE("SceneQuery.prunerNewTreeMapping", mContextID);
+
+ // rebuild the tree map to match the current (newly built) tree
+ mTreeMap.initMap(PxMax(mPool.getNbActiveObjects(), mNbCachedBoxes), *mAABBTree);
+
+ // The new mapping has been computed using only indices stored in the new tree. Those indices map the pruning pool
+ // we had when starting to build the tree. We need to re-apply recorded moves to fix the tree that finished rebuilding.
+ // AP: the problem here is while we are rebuilding the tree there are ongoing modifications to the current tree
+ // but the background build has a cached copy of all the AABBs at the time it was started
+ // (and will produce indices referencing those)
+ // Things that can happen in the meantime: update, remove, add, commit
+ for(NewTreeFixup* r = mNewTreeFixups.begin(); r < mNewTreeFixups.end(); r++)
+ {
+ // PT: we're not doing a full refit after this point anymore, so the remaining deleted objects must be manually marked for
+ // refit (otherwise their AABB in the tree would remain valid, leading to crashes when the corresponding index is 0xffffffff).
+ // We must do this before invalidating the corresponding tree nodes in the map, obviously (otherwise we'd be reading node
+ // indices that we already invalidated).
+ const PoolIndex poolIndex = r->removedIndex;
+ const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex];
+ if(treeNodeIndex!=INVALID_NODE_ID)
+ mAABBTree->markNodeForRefit(treeNodeIndex);
+
+ mTreeMap.invalidate(r->removedIndex, r->relocatedLastIndex, *mAABBTree);
+ }
+ mNewTreeFixups.clear(); // clear out the fixups since we just applied them all
+ }
+
+ {
+ PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFinalRefit", mContextID);
+
+ const PxU32 size = mToRefit.size();
+ for(PxU32 i=0;i<size;i++)
+ {
+ const PoolIndex poolIndex = mToRefit[i];
+ const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex];
+ if(treeNodeIndex!=INVALID_NODE_ID)
+ mAABBTree->markNodeForRefit(treeNodeIndex);
+ }
+ mToRefit.clear();
+ refitUpdatedAndRemoved();
+ }
+
+ {
+ PX_PROFILE_ZONE("SceneQuery.prunerNewTreeRemoveObjects", mContextID);
+
+ PxU32 nbRemovedPairs = mBucketPruner.removeMarkedObjects(mTimeStamp-1);
+ PX_UNUSED(nbRemovedPairs);
+
+ mNeedsNewTree = mBucketPruner.getNbObjects()>0;
+ }
+ }
+
+ updateBucketPruner();
+}
+
+
+void AABBPruner::shiftOrigin(const PxVec3& shift)
+{
+ mPool.shiftOrigin(shift);
+
+ if(mAABBTree)
+ mAABBTree->shiftOrigin(shift);
+
+ if(mIncrementalRebuild)
+ mBucketPruner.shiftOrigin(shift);
+
+ if(mNewTree)
+ mNewTree->shiftOrigin(shift);
+}
+
+#include "CmRenderOutput.h"
+void AABBPruner::visualize(Cm::RenderOutput& out, PxU32 color) const
+{
+ // getAABBTree() asserts when pruner is dirty. NpScene::visualization() does not enforce flushUpdate. see DE7834
+ const AABBTree* tree = mAABBTree;
+
+ if(tree)
+ {
+ struct Local
+ {
+ static void _Draw(const AABBTreeRuntimeNode* root, const AABBTreeRuntimeNode* node, Cm::RenderOutput& out_)
+ {
+ out_ << Cm::DebugBox(node->mBV, true);
+ if (node->isLeaf())
+ return;
+ _Draw(root, node->getPos(root), out_);
+ _Draw(root, node->getNeg(root), out_);
+ }
+ };
+ out << PxTransform(PxIdentity);
+ out << color;
+ Local::_Draw(tree->getNodes(), tree->getNodes(), out);
+ }
+
+ // Render added objects not yet in the tree
+ out << PxTransform(PxIdentity);
+ out << PxU32(PxDebugColor::eARGB_WHITE);
+
+ if(mIncrementalRebuild && mBucketPruner.getNbObjects())
+ mBucketPruner.visualize(out, color);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ * Internal methods
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+bool AABBPruner::buildStep()
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerBuildStep", mContextID);
+
+ PX_ASSERT(mIncrementalRebuild);
+ if(mNeedsNewTree)
+ {
+ if(mProgress==BUILD_NOT_STARTED)
+ {
+ const PxU32 nbObjects = mPool.getNbActiveObjects();
+ if(!nbObjects)
+ return true;
+
+ PX_DELETE(mNewTree);
+ mNewTree = PX_NEW(AABBTree);
+
+ mNbCachedBoxes = nbObjects;
+ // PT: we always allocate one extra box, to make sure we can safely use V4 loads on the array
+ mCachedBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(nbObjects+1), "PxBound3"));
+
+ PxMemCopy(mCachedBoxes, mPool.getCurrentWorldBoxes(), nbObjects*sizeof(PxBounds3));
+
+ // PT: objects currently in the bucket pruner will be in the new tree. They are marked with the
+ // current timestamp (mTimeStamp). However more objects can get added while we compute the new tree,
+ // and those ones will not be part of it. These new objects will be marked with the new timestamp
+ // value (mTimeStamp+1), and we can use these different values to remove the proper objects from
+ // the bucket pruner (when switching to the new tree).
+ mTimeStamp++;
+ mBuilder.reset();
+ mBuilder.mNbPrimitives = mNbCachedBoxes;
+ mBuilder.mAABBArray = mCachedBoxes;
+ mBuilder.mLimit = NB_OBJECTS_PER_NODE;
+
+ mBuildStats.reset();
+
+ // start recording modifications to the tree made during rebuild to reapply (fix the new tree) eventually
+ PX_ASSERT(mNewTreeFixups.size()==0);
+
+ mProgress = BUILD_INIT;
+ }
+ else if(mProgress==BUILD_INIT)
+ {
+ mNewTree->progressiveBuild(mBuilder, mBuildStats, 0, 0);
+ mProgress = BUILD_IN_PROGRESS;
+ mNbCalls = 0;
+
+ // Use a heuristic to estimate the number of work units needed for rebuilding the tree.
+ // The general idea is to use the number of work units of the previous tree to build the new tree.
+ // This works fine as long as the number of leaves remains more or less the same for the old and the
+ // new tree. If that is not the case, this estimate can be way off and the work units per step will
+ // be either much too small or too large. Hence, in that case we will try to estimate the number of work
+ // units based on the number of leaves of the new tree as follows:
+ //
+ // - Assume new tree with n leaves is perfectly-balanced
+ // - Compute the depth of perfectly-balanced tree with n leaves
+ // - Estimate number of working units for the new tree
+
+ const PxU32 depth = Ps::ilog2(mBuilder.mNbPrimitives); // Note: This is the depth without counting the leaf layer
+ const PxU32 estimatedNbWorkUnits = depth * mBuilder.mNbPrimitives; // Estimated number of work units for new tree
+ const PxU32 estimatedNbWorkUnitsOld = mAABBTree->getTotalPrims();
+ if ((estimatedNbWorkUnits <= (estimatedNbWorkUnitsOld << 1)) && (estimatedNbWorkUnits >= (estimatedNbWorkUnitsOld >> 1)))
+ // The two estimates do not differ by more than a factor 2
+ mTotalWorkUnits = estimatedNbWorkUnitsOld;
+ else
+ {
+ mAdaptiveRebuildTerm = 0;
+ mTotalWorkUnits = estimatedNbWorkUnits;
+ }
+
+ const PxI32 totalWorkUnits = PxI32(mTotalWorkUnits + (mAdaptiveRebuildTerm * mBuilder.mNbPrimitives));
+ mTotalWorkUnits = PxU32(PxMax(totalWorkUnits, 0));
+ }
+ else if(mProgress==BUILD_IN_PROGRESS)
+ {
+ mNbCalls++;
+ const PxU32 Limit = 1 + (mTotalWorkUnits / mRebuildRateHint);
+ // looks like progressiveRebuild returns 0 when finished
+ if (!mNewTree->progressiveBuild(mBuilder, mBuildStats, 1, Limit))
+ {
+ // Done
+ mProgress = BUILD_NEW_MAPPING;
+#if PX_DEBUG
+ mNewTree->validate();
+#endif
+ }
+ }
+ else if(mProgress==BUILD_NEW_MAPPING)
+ {
+ mNbCalls++;
+ mProgress = BUILD_FULL_REFIT;
+
+ // PT: we can't call fullRefit without creating the new mapping first: the refit function will fetch boxes from
+ // the pool using "primitive indices" captured in the tree. But some of these indices may have been invalidated
+ // if objects got removed while the tree was built. So we need to invalidate the corresponding nodes before refit,
+ // that way the #prims will be zero and the code won't fetch a wrong box (which may now below to a different object).
+ {
+ PX_PROFILE_ZONE("SceneQuery.prunerNewTreeMapping", mContextID);
+
+ if(mNewTreeFixups.size())
+ {
+ mNewTreeMap.initMap(PxMax(mPool.getNbActiveObjects(), mNbCachedBoxes), *mNewTree);
+
+ // The new mapping has been computed using only indices stored in the new tree. Those indices map the pruning pool
+ // we had when starting to build the tree. We need to re-apply recorded moves to fix the tree.
+ for(NewTreeFixup* r = mNewTreeFixups.begin(); r < mNewTreeFixups.end(); r++)
+ mNewTreeMap.invalidate(r->removedIndex, r->relocatedLastIndex, *mNewTree);
+
+ mNewTreeFixups.clear();
+#if PX_DEBUG
+ mNewTree->validate();
+#endif
+ }
+ }
+ }
+ else if(mProgress==BUILD_FULL_REFIT)
+ {
+ mNbCalls++;
+ mProgress = BUILD_LAST_FRAME;
+
+ {
+ PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFullRefit", mContextID);
+
+ // We need to refit the new tree because objects may have moved while we were building it.
+ mNewTree->fullRefit(mPool.getCurrentWorldBoxes());
+ }
+ }
+ else if(mProgress==BUILD_LAST_FRAME)
+ {
+ mProgress = BUILD_FINISHED;
+ }
+
+ // This is required to be set because commit handles both refit and a portion of build finalization (why?)
+ // This is overly conservative also only necessary in case there were no updates at all to the tree since the last tree swap
+ // It also overly conservative in a sense that it could be set only if mProgress was just set to BUILD_FINISHED
+ mUncommittedChanges = true;
+
+ return mProgress==BUILD_FINISHED;
+ }
+
+ return true;
+}
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ * Builds an AABB-tree for objects in the pruning pool.
+ * \return true if success
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool AABBPruner::fullRebuildAABBTree()
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerFullRebuildAABBTree", mContextID);
+
+ // Release possibly already existing tree
+ PX_DELETE_AND_RESET(mAABBTree);
+
+ // Don't bother building an AABB-tree if there isn't a single static object
+ const PxU32 nbObjects = mPool.getNbActiveObjects();
+ if(!nbObjects)
+ return true;
+
+ bool Status;
+ {
+ // Create a new tree
+ mAABBTree = PX_NEW(AABBTree);
+
+ AABBTreeBuildParams TB;
+ TB.mNbPrimitives = nbObjects;
+ TB.mAABBArray = mPool.getCurrentWorldBoxes();
+ TB.mLimit = NB_OBJECTS_PER_NODE;
+ Status = mAABBTree->build(TB);
+ }
+
+ // No need for the tree map for static pruner
+ if(mIncrementalRebuild)
+ mTreeMap.initMap(PxMax(nbObjects,mNbCachedBoxes),*mAABBTree);
+
+ return Status;
+}
+
+// called in the end of commit(), but only if mIncrementalRebuild is true
+void AABBPruner::updateBucketPruner()
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerUpdateBucketPruner", mContextID);
+
+ PX_ASSERT(mIncrementalRebuild);
+ mBucketPruner.build();
+}
+
+PxBounds3 AABBPruner::getAABB(PrunerHandle handle)
+{
+ return mPool.getWorldAABB(handle);
+}
+
+void AABBPruner::release() // this can be called from purge()
+{
+ mBucketPruner.release();
+
+ mTimeStamp = 0;
+
+ mTreeMap.release();
+ mNewTreeMap.release();
+
+ PX_FREE_AND_RESET(mCachedBoxes);
+ mBuilder.reset();
+ PX_DELETE_AND_RESET(mNewTree);
+ PX_DELETE_AND_RESET(mAABBTree);
+
+ mNbCachedBoxes = 0;
+ mProgress = BUILD_NOT_STARTED;
+ mNewTreeFixups.clear();
+ mUncommittedChanges = false;
+}
+
+// Refit current tree
+void AABBPruner::refitUpdatedAndRemoved()
+{
+ PX_PROFILE_ZONE("SceneQuery.prunerRefitUpdatedAndRemoved", mContextID);
+
+ PX_ASSERT(mIncrementalRebuild);
+ AABBTree* tree = getAABBTree();
+ if(!tree)
+ return;
+
+#if PX_DEBUG
+ tree->validate();
+#endif
+
+ //### missing a way to skip work if not needed
+
+ const PxU32 nbObjects = mPool.getNbActiveObjects();
+ // At this point there still can be objects in the tree that are blanked out so it's an optimization shortcut (not required)
+ if(!nbObjects)
+ return;
+
+ mBucketPruner.refitMarkedNodes(mPool.getCurrentWorldBoxes());
+ tree->refitMarkedNodes(mPool.getCurrentWorldBoxes());
+}
+
+void AABBPruner::merge(const void* mergeParams)
+{
+ const AABBPrunerMergeData& pruningStructure = *reinterpret_cast<const AABBPrunerMergeData*> (mergeParams);
+
+ if(mAABBTree)
+ {
+ // index in pruning pool, where new objects were added
+ const PxU32 pruningPoolIndex = mPool.getNbActiveObjects() - pruningStructure.mNbObjects;
+
+ // create tree from given nodes and indices
+ AABBTreeMergeData aabbTreeMergeParams(pruningStructure.mNbNodes, pruningStructure.mAABBTreeNodes,
+ pruningStructure.mNbObjects, pruningStructure.mAABBTreeIndices, pruningPoolIndex);
+
+ if (!mIncrementalRebuild)
+ {
+ // merge tree directly
+ mAABBTree->mergeTree(aabbTreeMergeParams);
+ }
+ else
+ {
+ mBucketPruner.addTree(aabbTreeMergeParams, mTimeStamp);
+ }
+ }
+}
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h
new file mode 100644
index 00000000..c5e96aa6
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h
@@ -0,0 +1,268 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_AABB_PRUNER_H
+#define SQ_AABB_PRUNER_H
+
+#include "SqPruningPool.h"
+#include "SqExtendedBucketPruner.h"
+#include "SqAABBTreeUpdateMap.h"
+#include "SqAABBTree.h"
+
+namespace physx
+{
+
+namespace Sq
+{
+ // PT: we build the new tree over a number of frames/states, in order to limit perf spikes in 'updatePruningTrees'.
+ // The states are as follows:
+ //
+ // BUILD_NOT_STARTED (1 frame, AABBPruner):
+ //
+ // This is the initial state, before the new (AABBTree) build even starts. In this frame/state, we perform the AABBPruner-related
+ // memory allocations:
+ // - the new AABB tree is allocated
+ // - the array of cached bounding boxes is allocated and filled
+ //
+ // BUILD_INIT (1 frame, AABBTree):
+ //
+ // This is the first frame in which the new tree gets built. It deserves its own special state since various things happen in the
+ // first frame, that do no happen in subsequent frames. Basically most initial AABBTree-related allocations happen here (but no
+ // build step per se).
+ //
+ // BUILD_IN_PROGRESS (N frames, AABBTree):
+ //
+ // This is the core build function, actually building the tree. This should be mostly allocation-free, except here and there when
+ // building non-complete trees, and during the last call when the tree is finally built.
+ //
+ // BUILD_NEW_MAPPING (1 frame, AABBPruner):
+ //
+ // After the new AABBTree is built, we recreate an AABBTreeUpdateMap for the new tree, and use it to invalidate nodes whose objects
+ // have been removed during the build.
+ //
+ // We need to do that before doing a full refit in the next stage/frame. If we don't do that, the refit code will fetch a wrong box,
+ // that may very well belong to an entirely new object.
+ //
+ // Note that this mapping/update map (mNewTreeMap) is temporary, and only needed for the next stage.
+ //
+ // BUILD_FULL_REFIT (1 frame, AABBPruner):
+ //
+ // Once the new update map is available, we fully refit the new tree. AABBs of moved objects get updated. AABBs of removed objects
+ // become empty.
+ //
+ // BUILD_LAST_FRAME (1 frame, AABBPruner):
+ //
+ // This is an artificial frame used to delay the tree switching code. The switch happens as soon as we reach the BUILD_FINISHED
+ // state, but we don't want to execute BUILD_FULL_REFIT and the switch in the same frame. This extra BUILD_LAST_FRAME stage buys
+ // us one frame, i.e. we have one frame in which we do BUILD_FULL_REFIT, and in the next frame we'll do both BUILD_LAST_FRAME /
+ // BUILD_FINISHED / the switch.
+ //
+ // BUILD_FINISHED (1 frame, AABBPruner):
+ //
+ // Several things happen in this 'finalization' frame/stage:
+ // - We switch the trees (old one is deleted, cached boxes are deleted, new tree pointer is setup)
+ // - A new (final) update map is created (mTreeMap). The map is used to invalidate objects that may have been removed during
+ // the BUILD_NEW_MAPPING and BUILD_FULL_REFIT frames. The nodes containing these removed objects are marked for refit.
+ // - Nodes containing objects that have moved during the BUILD_NEW_MAPPING and BUILD_FULL_REFIT frames are marked for refit.
+ // - We do a partial refit on the new tree, to take these final changes into account. This small partial refit is usually much
+ // cheaper than the full refit we previously performed here.
+ // - We remove old objects from the bucket pruner
+ //
+ enum BuildStatus
+ {
+ BUILD_NOT_STARTED,
+ BUILD_INIT,
+ BUILD_IN_PROGRESS,
+ BUILD_NEW_MAPPING,
+ BUILD_FULL_REFIT,
+ BUILD_LAST_FRAME,
+ BUILD_FINISHED,
+
+ BUILD_FORCE_DWORD = 0xffffffff
+ };
+
+ // This class implements the Pruner interface for internal SQ use with some additional specialized functions
+ // The underlying data structure is a binary AABB tree
+ // AABBPruner supports insertions, removals and updates for dynamic objects
+ // The tree is either entirely rebuilt in a single frame (static pruner) or progressively rebuilt over multiple frames (dynamic pruner)
+ // The rebuild happens on a copy of the tree
+ // the copy is then swapped with current tree at the time commit() is called (only if mBuildState is BUILD_FINISHED),
+ // otherwise commit() will perform a refit operation applying any pending changes to the current tree
+ // While the tree is being rebuilt a temporary data structure (BucketPruner) is also kept in sync and used to speed up
+ // queries on updated objects that are not yet in either old or new tree.
+ // The requirements on the order of calls:
+ // commit() is required to be called before any queries to apply modifications
+ // queries can be issued on multiple threads after commit is called
+ // commit, buildStep, add/remove/update have to be called from the same thread or otherwise strictly serialized by external code
+ // and cannot be issued while a query is running
+ class AABBPruner : public IncrementalPruner
+ {
+ public:
+ AABBPruner(bool incrementalRebuild, PxU64 contextID); // true is equivalent to former dynamic pruner
+ virtual ~AABBPruner();
+
+ // Pruner
+ virtual bool addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* userData, PxU32 count = 1, bool hasPruningStructure = false);
+ virtual void removeObjects(const PrunerHandle* handles, PxU32 count = 1);
+ virtual void updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count = 1);
+ virtual void updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count = 1);
+ virtual void commit();
+ virtual PxAgain raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+ virtual PxAgain overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const;
+ virtual PxAgain sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+ virtual const PrunerPayload& getPayload(PrunerHandle handle) const { return mPool.getPayload(handle); }
+ virtual const PrunerPayload& getPayload(PrunerHandle handle, PxBounds3*& bounds) const { return mPool.getPayload(handle, bounds); }
+ virtual void preallocate(PxU32 entries) { mPool.preallocate(entries); }
+ virtual void shiftOrigin(const PxVec3& shift);
+ virtual void visualize(Cm::RenderOutput& out, PxU32 color) const;
+ virtual void merge(const void* mergeParams);
+ //~Pruner
+
+ // IncrementalPruner
+ virtual void purge(); // gets rid of internal accel struct
+ virtual void setRebuildRateHint(PxU32 nbStepsForRebuild); // Besides the actual rebuild steps, 3 additional steps are needed.
+ virtual bool buildStep(); // returns true if finished
+ //~IncrementalPruner
+
+ // direct access for test code
+
+ PX_FORCE_INLINE PxU32 getNbAddedObjects() const { return mBucketPruner.getNbObjects(); }
+ PX_FORCE_INLINE const Sq::AABBTree* getAABBTree() const { PX_ASSERT(!mUncommittedChanges); return mAABBTree; }
+ PX_FORCE_INLINE Sq::AABBTree* getAABBTree() { PX_ASSERT(!mUncommittedChanges); return mAABBTree; }
+ PX_FORCE_INLINE void setAABBTree(Sq::AABBTree* tree) { mAABBTree = tree; }
+ PX_FORCE_INLINE const Sq::AABBTree* hasAABBTree() const { return mAABBTree; }
+ PX_FORCE_INLINE BuildStatus getBuildStatus() const { return mProgress; }
+
+ // local functions
+// private:
+ Sq::AABBTree* mAABBTree; // current active tree
+ Sq::AABBTreeBuildParams mBuilder; // this class deals with the details of the actual tree building
+ BuildStats mBuildStats;
+
+ // tree with build in progress, assigned to mAABBTree in commit, when mProgress is BUILD_FINISHED
+ // created in buildStep(), BUILD_NOT_STARTED
+ // This is non-null when there is a tree rebuild going on in progress
+ // and thus also indicates that we have to start saving the fixups
+ Sq::AABBTree* mNewTree;
+
+ // during rebuild the pool might change so we need a copy of boxes for the tree build
+ PxBounds3* mCachedBoxes;
+ PxU32 mNbCachedBoxes;
+
+ // incremented in commit(), serves as a progress counter for rebuild
+ PxU32 mNbCalls;
+
+ // PT: incremented each time we start building a new tree (i.e. effectively identifies a given tree)
+ // Timestamp is passed to bucket pruner to mark objects added there, linking them to a specific tree.
+ // When switching to the new tree, timestamp is used to remove old objects (now in the new tree) from
+ // the bucket pruner.
+ PxU32 mTimeStamp;
+
+ // this pruner is used for queries on objects that are not in the current tree yet
+ // includes both the objects in the tree being rebuilt and all the objects added later
+ ExtendedBucketPruner mBucketPruner;
+
+ BuildStatus mProgress; // current state of second tree build progress
+
+ // Fraction (as in 1/Nth) of the total number of primitives
+ // that should be processed per step by the AABB builder
+ // so if this value is 1, all primitives will be rebuilt, 2 => 1/2 of primitives per step etc.
+ // see also mNbCalls, mNbCalls varies from 0 to mRebuildRateHint-1
+ PxU32 mRebuildRateHint;
+
+ // Estimate for how much work has to be done to rebuild the tree.
+ PxU32 mTotalWorkUnits;
+
+ // Term to correct the work unit estimate if the rebuild rate is not matched
+ PxI32 mAdaptiveRebuildTerm;
+
+ PruningPool mPool; // Pool of AABBs
+
+ // maps pruning pool indices to aabb tree indices
+ // maps to INVALID_NODE_ID if the pool entry was removed or "pool index is outside input domain"
+ // The map is the inverse of the tree mapping: (node[map[poolID]].primitive == poolID)
+ // So:
+ // treeNodeIndex = mTreeMap.operator[](poolIndex)
+ // aabbTree->treeNodes[treeNodeIndex].primitives[0] == poolIndex
+ AABBTreeUpdateMap mTreeMap;
+ // Temporary update map, see BuildStatus notes above for details
+ AABBTreeUpdateMap mNewTreeMap;
+
+ // This is only set once in the constructor and is equivalent to isDynamicTree
+ // if it set to false then a 1-shot rebuild is performed in commit()
+ // bucket pruner is only used with incremental rebuild
+ bool mIncrementalRebuild;
+
+ // A rebuild can be triggered even when the Pruner is not dirty
+ // mUncommittedChanges is set to true in add, remove, update and buildStep
+ // mUncommittedChanges is set to false in commit
+ // mUncommittedChanges has to be false (commit() has to be called) in order to run a query as defined by the
+ // mUncommittedChanges is not set to true in add, when pruning structure is provided. Scene query shapes
+ // are merged to current AABB tree directly
+ // Pruner higher level API
+ bool mUncommittedChanges;
+
+ // A new AABB tree is built if an object was added, removed or updated
+ // Changing objects during a build will trigger another rebuild right afterwards
+ // this is set to true if a new tree has to be created again after the current rebuild is done
+ bool mNeedsNewTree;
+
+ // This struct is used to record modifications made to the pruner state
+ // while a tree is building in the background
+ // this is so we can apply the modifications to the tree at the time of completion
+ // the recorded fixup information is: removedIndex (in ::remove()) and
+ // lastIndexMoved which is the last index in the pruner array
+ // (since the way we remove from PruningPool is by swapping last into removed slot,
+ // we need to apply a fixup so that it syncs up that operation in the new tree)
+ struct NewTreeFixup
+ {
+ PX_FORCE_INLINE NewTreeFixup(PxU32 removedIndex_, PxU32 relocatedLastIndex_)
+ : removedIndex(removedIndex_), relocatedLastIndex(relocatedLastIndex_) {}
+ PxU32 removedIndex;
+ PxU32 relocatedLastIndex;
+ };
+ Ps::Array<NewTreeFixup> mNewTreeFixups;
+
+ Ps::Array<PoolIndex> mToRefit;
+
+ PxU64 mContextID;
+
+ // Internal methods
+ bool fullRebuildAABBTree(); // full rebuild function, used with static pruner mode
+ void release();
+ void refitUpdatedAndRemoved();
+ void updateBucketPruner();
+ PxBounds3 getAABB(PrunerHandle h);
+ };
+
+} // namespace Sq
+
+}
+
+#endif // SQ_AABB_PRUNER_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp
new file mode 100644
index 00000000..191344fe
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp
@@ -0,0 +1,1154 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "SqAABBTree.h"
+#include "SqAABBTreeUpdateMap.h"
+
+#include "PsMathUtils.h"
+#include "PsFoundation.h"
+#include "GuInternal.h"
+
+using namespace physx;
+using namespace Sq;
+
+#define INVALID_ID 0xffffffff
+
+// Progressive building
+class Sq::FIFOStack : public Ps::UserAllocated
+{
+ public:
+ FIFOStack() : mStack(PX_DEBUG_EXP("SQFIFOStack")), mCurIndex(0) {}
+ ~FIFOStack() {}
+
+ PX_FORCE_INLINE PxU32 getNbEntries() const { return mStack.size(); }
+ PX_FORCE_INLINE void push(AABBTreeBuildNode* entry) { mStack.pushBack(entry); }
+ bool pop(AABBTreeBuildNode*& entry);
+ private:
+ Ps::Array<AABBTreeBuildNode*> mStack;
+ PxU32 mCurIndex; //!< Current index within the container
+};
+
+bool Sq::FIFOStack::pop(AABBTreeBuildNode*& entry)
+{
+ const PxU32 NbEntries = mStack.size(); // Get current number of entries
+ if(!NbEntries)
+ return false; // Can be NULL when no value has been pushed. This is an invalid pop call.
+ entry = mStack[mCurIndex++]; // Get oldest entry, move to next one
+ if(mCurIndex==NbEntries)
+ {
+ // All values have been poped
+ mStack.clear();
+ mCurIndex=0;
+ }
+ return true;
+}
+//~Progressive building
+
+NodeAllocator::NodeAllocator() : mPool(NULL), mCurrentSlabIndex(0), mTotalNbNodes(0)
+{
+}
+
+NodeAllocator::~NodeAllocator()
+{
+ release();
+}
+
+void NodeAllocator::release()
+{
+ const PxU32 nbSlabs = mSlabs.size();
+ for(PxU32 i=0;i<nbSlabs;i++)
+ {
+ Slab& s = mSlabs[i];
+ PX_DELETE_ARRAY(s.mPool);
+ }
+
+ mSlabs.reset();
+ mCurrentSlabIndex = 0;
+ mTotalNbNodes = 0;
+}
+
+void NodeAllocator::init(PxU32 nbPrimitives, PxU32 limit)
+{
+ const PxU32 maxSize = nbPrimitives*2 - 1; // PT: max possible #nodes for a complete tree
+ const PxU32 estimatedFinalSize = maxSize<=1024 ? maxSize : maxSize/limit;
+ mPool = PX_NEW(AABBTreeBuildNode)[estimatedFinalSize];
+ PxMemZero(mPool, sizeof(AABBTreeBuildNode)*estimatedFinalSize);
+
+ // Setup initial node. Here we have a complete permutation of the app's primitives.
+ mPool->mNodeIndex = 0;
+ mPool->mNbPrimitives = nbPrimitives;
+
+ mSlabs.pushBack(Slab(mPool, 1, estimatedFinalSize));
+ mCurrentSlabIndex = 0;
+ mTotalNbNodes = 1;
+}
+
+// PT: TODO: inline this?
+AABBTreeBuildNode* NodeAllocator::getBiNode()
+{
+ mTotalNbNodes += 2;
+ Slab& currentSlab = mSlabs[mCurrentSlabIndex];
+ if(currentSlab.mNbUsedNodes+2<=currentSlab.mMaxNbNodes)
+ {
+ AABBTreeBuildNode* biNode = currentSlab.mPool + currentSlab.mNbUsedNodes;
+ currentSlab.mNbUsedNodes += 2;
+ return biNode;
+ }
+ else
+ {
+ // Allocate new slab
+ const PxU32 size = 1024;
+ AABBTreeBuildNode* pool = PX_NEW(AABBTreeBuildNode)[size];
+ PxMemZero(pool, sizeof(AABBTreeBuildNode)*size);
+
+ mSlabs.pushBack(Slab(pool, 2, size));
+ mCurrentSlabIndex++;
+ return pool;
+ }
+}
+
+void NodeAllocator::flatten(AABBTreeRuntimeNode* dest)
+{
+ // PT: gathers all build nodes allocated so far and flatten them to a linear destination array of smaller runtime nodes
+ PxU32 offset = 0;
+ const PxU32 nbSlabs = mSlabs.size();
+ for(PxU32 s=0;s<nbSlabs;s++)
+ {
+ const Slab& currentSlab = mSlabs[s];
+
+ AABBTreeBuildNode* pool = currentSlab.mPool;
+ for(PxU32 i=0;i<currentSlab.mNbUsedNodes;i++)
+ {
+ dest[offset].mBV = pool[i].mBV;
+ if(pool[i].isLeaf())
+ {
+ const PxU32 index = pool[i].mNodeIndex;
+
+ const PxU32 nbPrims = pool[i].getNbPrimitives();
+ PX_ASSERT(nbPrims<=16);
+
+ dest[offset].mData = (index<<5)|((nbPrims&15)<<1)|1;
+ }
+ else
+ {
+ PX_ASSERT(pool[i].mPos);
+ PxU32 localNodeIndex = 0xffffffff;
+ PxU32 nodeBase = 0;
+ for(PxU32 j=0;j<nbSlabs;j++)
+ {
+ if(pool[i].mPos>=mSlabs[j].mPool && pool[i].mPos<mSlabs[j].mPool+mSlabs[j].mNbUsedNodes)
+ {
+ localNodeIndex = PxU32(pool[i].mPos - mSlabs[j].mPool);
+ break;
+ }
+ nodeBase += mSlabs[j].mNbUsedNodes;
+ }
+ const PxU32 nodeIndex = nodeBase + localNodeIndex;
+ PX_ASSERT(nodeIndex<mTotalNbNodes);
+ dest[offset].mData = nodeIndex<<1;
+ }
+ offset++;
+ }
+ }
+ PX_ASSERT(offset==mTotalNbNodes);
+ release();
+}
+
+static PX_FORCE_INLINE float getSplittingValue(const PxBounds3& global_box, PxU32 axis)
+{
+ // Default split value = middle of the axis (using only the box)
+ return global_box.getCenter(axis);
+}
+
+static PxU32 split(const PxBounds3& box, PxU32 nb, PxU32* const PX_RESTRICT prims, PxU32 axis, const AABBTreeBuildParams& params)
+{
+ // Get node split value
+ const float splitValue = getSplittingValue(box, axis);
+
+ PxU32 nbPos = 0;
+ // Loop through all node-related primitives. Their indices range from "mNodePrimitives[0]" to "mNodePrimitives[mNbPrimitives-1]",
+ // with mNodePrimitives = mIndices + mNodeIndex (i.e. those indices map the global list in the tree params).
+
+ // PT: to avoid calling the unsafe [] operator
+ const size_t ptrValue = size_t(params.mCache) + axis*sizeof(float);
+ const PxVec3* /*PX_RESTRICT*/ cache = reinterpret_cast<const PxVec3*>(ptrValue);
+
+ for(PxU32 i=0;i<nb;i++)
+ {
+ // Get index in global list
+ const PxU32 index = prims[i];
+
+ // Test against the splitting value. The primitive value is tested against the enclosing-box center.
+ // [We only need an approximate partition of the enclosing box here.]
+ const float primitiveValue = cache[index].x;
+ PX_ASSERT(primitiveValue==params.mCache[index][axis]);
+
+ // Reorganize the list of indices in this order: positive - negative.
+ if(primitiveValue > splitValue)
+ {
+ // Swap entries
+ prims[i] = prims[nbPos];
+ prims[nbPos] = index;
+ // Count primitives assigned to positive space
+ nbPos++;
+ }
+ }
+ return nbPos;
+}
+
+void AABBTreeBuildNode::subdivide(const AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices)
+{
+ PxU32* const PX_RESTRICT primitives = indices + mNodeIndex;
+ const PxU32 nbPrims = mNbPrimitives;
+
+ // Compute global box & means for current node. The box is stored in mBV.
+ Vec4V meansV;
+ {
+ const PxBounds3* PX_RESTRICT boxes = params.mAABBArray;
+ PX_ASSERT(boxes);
+ PX_ASSERT(primitives);
+ PX_ASSERT(nbPrims);
+
+ Vec4V minV = V4LoadU(&boxes[primitives[0]].minimum.x);
+ Vec4V maxV = V4LoadU(&boxes[primitives[0]].maximum.x);
+
+ meansV = V4LoadU(&params.mCache[primitives[0]].x);
+
+ for(PxU32 i=1;i<nbPrims;i++)
+ {
+ const PxU32 index = primitives[i];
+ const Vec4V curMinV = V4LoadU(&boxes[index].minimum.x);
+ const Vec4V curMaxV = V4LoadU(&boxes[index].maximum.x);
+ meansV = V4Add(meansV, V4LoadU(&params.mCache[index].x));
+ minV = V4Min(minV, curMinV);
+ maxV = V4Max(maxV, curMaxV);
+ }
+
+ StoreBounds(mBV, minV, maxV);
+
+ const float coeff = 1.0f/float(nbPrims);
+ meansV = V4Scale(meansV, FLoad(coeff));
+ }
+
+ // Check the user-defined limit. Also ensures we stop subdividing if we reach a leaf node.
+ if(nbPrims<=params.mLimit)
+ return;
+
+ bool validSplit = true;
+ PxU32 nbPos;
+ {
+ // Compute variances
+ Vec4V varsV = V4Zero();
+ for(PxU32 i=0;i<nbPrims;i++)
+ {
+ const PxU32 index = primitives[i];
+ Vec4V centerV = V4LoadU(&params.mCache[index].x);
+ centerV = V4Sub(centerV, meansV);
+ centerV = V4Mul(centerV, centerV);
+ varsV = V4Add(varsV, centerV);
+ }
+ const float coeffNb1 = 1.0f/float(nbPrims-1);
+ varsV = V4Scale(varsV, FLoad(coeffNb1));
+ PX_ALIGN(16, PxVec4) vars;
+ V4StoreA(varsV, &vars.x);
+
+ // Choose axis with greatest variance
+ const PxU32 axis = Ps::largestAxis(PxVec3(vars.x, vars.y, vars.z));
+
+ // Split along the axis
+ nbPos = split(mBV, nbPrims, primitives, axis, params);
+
+ // Check split validity
+ if(!nbPos || nbPos==nbPrims)
+ validSplit = false;
+ }
+
+ // Check the subdivision has been successful
+ if(!validSplit)
+ {
+ // Here, all boxes lie in the same sub-space. Two strategies:
+ // - if we are over the split limit, make an arbitrary 50-50 split
+ // - else stop subdividing
+ if(nbPrims>params.mLimit)
+ {
+ nbPos = nbPrims>>1;
+ }
+ else return;
+ }
+
+ // Now create children and assign their pointers.
+ mPos = allocator.getBiNode();
+
+ stats.increaseCount(2);
+
+ // Assign children
+ PX_ASSERT(!isLeaf());
+ AABBTreeBuildNode* Pos = const_cast<AABBTreeBuildNode*>(mPos);
+ AABBTreeBuildNode* Neg = Pos + 1;
+ Pos->mNodeIndex = mNodeIndex;
+ Pos->mNbPrimitives = nbPos;
+ Neg->mNodeIndex = mNodeIndex + nbPos;
+ Neg->mNbPrimitives = mNbPrimitives - nbPos;
+}
+
+void AABBTreeBuildNode::_buildHierarchy(AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices)
+{
+ // Subdivide current node
+ subdivide(params, stats, nodeBase, indices);
+
+ // Recurse
+ if(!isLeaf())
+ {
+ AABBTreeBuildNode* Pos = const_cast<AABBTreeBuildNode*>(getPos());
+ PX_ASSERT(Pos);
+ AABBTreeBuildNode* Neg = Pos + 1;
+ Pos->_buildHierarchy(params, stats, nodeBase, indices);
+ Neg->_buildHierarchy(params, stats, nodeBase, indices);
+ }
+
+ stats.mTotalPrims += mNbPrimitives;
+}
+
+AABBTree::AABBTree() :
+ mIndices (NULL),
+ mNbIndices (0),
+ mRuntimePool (NULL),
+ mParentIndices (NULL),
+ mTotalNbNodes (0),
+ mTotalPrims (0)
+{
+// Progressive building
+ mStack = NULL;
+//~Progressive building
+
+// REFIT
+ mRefitHighestSetWord = 0;
+//~REFIT
+}
+
+AABBTree::~AABBTree()
+{
+ release(false);
+}
+
+void AABBTree::release(bool clearRefitMap)
+{
+// Progressive building
+ PX_DELETE_AND_RESET(mStack);
+//~Progressive building
+ PX_FREE_AND_RESET(mParentIndices);
+ PX_DELETE_ARRAY(mRuntimePool);
+ mNodeAllocator.release();
+ PX_FREE_AND_RESET(mIndices);
+ mTotalNbNodes = 0;
+ mNbIndices = 0;
+
+// REFIT
+ if(clearRefitMap)
+ mRefitBitmask.clearAll();
+ mRefitHighestSetWord = 0;
+//~REFIT
+}
+
+// Initialize nodes/indices from the input tree merge data
+void AABBTree::initTree(const AABBTreeMergeData& tree)
+{
+ PX_ASSERT(mIndices == NULL);
+ PX_ASSERT(mRuntimePool == NULL);
+ PX_ASSERT(mParentIndices == NULL);
+
+ // allocate,copy indices
+ mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*tree.mNbIndices, "AABB tree indices"));
+ mNbIndices = tree.mNbIndices;
+ PxMemCopy(mIndices, tree.mIndices, sizeof(PxU32)*tree.mNbIndices);
+
+ // allocate,copy nodes
+ mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[tree.mNbNodes];
+ mTotalNbNodes = tree.mNbNodes;
+ PxMemCopy(mRuntimePool, tree.mNodes, sizeof(AABBTreeRuntimeNode)*tree.mNbNodes);
+}
+
+// Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool
+void AABBTree::shiftIndices(PxU32 offset)
+{
+ for (PxU32 i = 0; i < mNbIndices; i++)
+ {
+ mIndices[i] += offset;
+ }
+}
+
+bool AABBTree::buildInit(AABBTreeBuildParams& params, BuildStats& stats)
+{
+ // Checkings
+ const PxU32 nbPrimitives = params.mNbPrimitives;
+ if(!nbPrimitives)
+ return false;
+
+ // Release previous tree
+ release();
+
+ // Init stats
+ stats.setCount(1);
+
+ // Initialize indices. This list will be modified during build.
+ mNbIndices = nbPrimitives;
+ mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbPrimitives, "AABB tree indices"));
+ // Identity permutation
+ for(PxU32 i=0;i<nbPrimitives;i++)
+ mIndices[i] = i;
+
+ // Allocate a pool of nodes
+ mNodeAllocator.init(nbPrimitives, params.mLimit);
+
+ // Compute box centers only once and cache them
+ params.mCache = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*(nbPrimitives+1), "cache"));
+ const float half = 0.5f;
+ const FloatV halfV = FLoad(half);
+ for(PxU32 i=0;i<nbPrimitives;i++)
+ {
+ const Vec4V curMinV = V4LoadU(&params.mAABBArray[i].minimum.x);
+ const Vec4V curMaxV = V4LoadU(&params.mAABBArray[i].maximum.x);
+ const Vec4V centerV = V4Scale(V4Add(curMaxV, curMinV), halfV);
+ V4StoreU(centerV, &params.mCache[i].x);
+ }
+ return true;
+}
+
+void AABBTree::buildEnd(AABBTreeBuildParams& params, BuildStats& stats)
+{
+ PX_FREE_AND_RESET(params.mCache);
+ // Get back total number of nodes
+ mTotalNbNodes = stats.getCount();
+ mTotalPrims = stats.mTotalPrims;
+
+ mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes];
+ PX_ASSERT(mTotalNbNodes==mNodeAllocator.mTotalNbNodes);
+ mNodeAllocator.flatten(mRuntimePool);
+}
+
+bool AABBTree::build(AABBTreeBuildParams& params)
+{
+ // Init stats
+ BuildStats stats;
+ if(!buildInit(params, stats))
+ return false;
+
+ // Build the hierarchy
+ mNodeAllocator.mPool->_buildHierarchy(params, stats, mNodeAllocator, mIndices);
+
+ buildEnd(params, stats);
+ return true;
+}
+
+void AABBTree::shiftOrigin(const PxVec3& shift)
+{
+ AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+ const PxU32 totalNbNodes = mTotalNbNodes;
+ for(PxU32 i=0; i<totalNbNodes; i++)
+ {
+ AABBTreeRuntimeNode& current = nodeBase[i];
+ if((i+1) < totalNbNodes)
+ Ps::prefetch(nodeBase + i + 1);
+
+ current.mBV.minimum -= shift;
+ current.mBV.maximum -= shift;
+ }
+}
+
+#if PX_DEBUG
+void AABBTree::validate() const
+{
+}
+#endif
+
+// Progressive building
+static PxU32 incrementalBuildHierarchy(FIFOStack& stack, AABBTreeBuildNode* node, AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices)
+{
+ node->subdivide(params, stats, nodeBase, indices);
+
+ if(!node->isLeaf())
+ {
+ AABBTreeBuildNode* pos = const_cast<AABBTreeBuildNode*>(node->getPos());
+ PX_ASSERT(pos);
+ AABBTreeBuildNode* neg = pos + 1;
+ stack.push(neg);
+ stack.push(pos);
+ }
+
+ stats.mTotalPrims += node->mNbPrimitives;
+ return node->mNbPrimitives;
+}
+
+PxU32 AABBTree::progressiveBuild(AABBTreeBuildParams& params, BuildStats& stats, PxU32 progress, PxU32 limit)
+{
+ if(progress==0)
+ {
+ if(!buildInit(params, stats))
+ return PX_INVALID_U32;
+
+ mStack = PX_NEW(FIFOStack);
+ mStack->push(mNodeAllocator.mPool);
+ return progress++;
+ }
+ else if(progress==1)
+ {
+ PxU32 stackCount = mStack->getNbEntries();
+ if(stackCount)
+ {
+ PxU32 Total = 0;
+ const PxU32 Limit = limit;
+ while(Total<Limit)
+ {
+ AABBTreeBuildNode* Entry;
+ if(mStack->pop(Entry))
+ Total += incrementalBuildHierarchy(*mStack, Entry, params, stats, mNodeAllocator, mIndices);
+ else
+ break;
+ }
+ return progress;
+ }
+
+ buildEnd(params, stats);
+
+ PX_DELETE_AND_RESET(mStack);
+
+ return 0; // Done!
+ }
+ return PX_INVALID_U32;
+}
+//~Progressive building
+
+
+
+static PX_FORCE_INLINE PxU32 BitsToDwords(PxU32 nb_bits)
+{
+ return (nb_bits>>5) + ((nb_bits&31) ? 1 : 0);
+}
+
+bool Sq::BitArray::init(PxU32 nb_bits)
+{
+ mSize = BitsToDwords(nb_bits);
+ // Get ram for n bits
+ PX_FREE(mBits);
+ mBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mSize, "BitArray::mBits"));
+ // Set all bits to 0
+ clearAll();
+ return true;
+}
+
+void Sq::BitArray::resize(PxU32 maxBitNumber)
+{
+ const PxU32 newSize = BitsToDwords(maxBitNumber);
+ if (newSize <= mSize)
+ return;
+
+ PxU32* newBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*newSize, "BitArray::mBits"));
+ PxMemZero(newBits + mSize, (newSize - mSize) * sizeof(PxU32));
+ PxMemCopy(newBits, mBits, mSize*sizeof(PxU32));
+ PX_FREE(mBits);
+ mBits = newBits;
+ mSize = newSize;
+}
+
+static PX_FORCE_INLINE PxU32 getNbPrimitives(PxU32 data) { return (data>>1)&15; }
+static PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base, PxU32 data) { return base + (data>>5); }
+static PX_FORCE_INLINE const AABBTreeRuntimeNode* getPos(const AABBTreeRuntimeNode* base, PxU32 data) { return base + (data>>1); }
+static PX_FORCE_INLINE PxU32 isLeaf(PxU32 data) { return data&1; }
+
+static PX_FORCE_INLINE void refitNode(AABBTreeRuntimeNode* PX_RESTRICT current, const PxBounds3* PX_RESTRICT boxes, const PxU32* PX_RESTRICT indices, AABBTreeRuntimeNode* PX_RESTRICT const nodeBase)
+{
+ // PT: we can safely use V4 loads on both boxes and nodes here:
+ // - it's safe on boxes because we allocated one extra box in the pruning pool
+ // - it's safe on nodes because there's always some data within the node, after the BV
+
+ const PxU32 data = current->mData;
+
+ Vec4V resultMinV, resultMaxV;
+ if(isLeaf(data))
+ {
+ const PxU32 nbPrims = getNbPrimitives(data);
+ if(nbPrims)
+ {
+ const PxU32* primitives = getPrimitives(indices, data);
+ resultMinV = V4LoadU(&boxes[*primitives].minimum.x);
+ resultMaxV = V4LoadU(&boxes[*primitives].maximum.x);
+
+ if(nbPrims>1)
+ {
+ const PxU32* last = primitives + nbPrims;
+ primitives++;
+
+ while(primitives!=last)
+ {
+ resultMinV = V4Min(resultMinV, V4LoadU(&boxes[*primitives].minimum.x));
+ resultMaxV = V4Max(resultMaxV, V4LoadU(&boxes[*primitives].maximum.x));
+ primitives++;
+ }
+ }
+ }
+ else
+ {
+ // Might happen after a node has been invalidated
+ const float max = 0.25f * 1e33f; // ###
+ resultMinV = V4Load(max);
+ resultMaxV = V4Load(-max);
+ }
+ }
+ else
+ {
+ const AABBTreeRuntimeNode* pos = getPos(nodeBase, data);
+ const AABBTreeRuntimeNode* neg = pos+1;
+
+ const PxBounds3& posBox = pos->mBV;
+ const PxBounds3& negBox = neg->mBV;
+
+ resultMinV = V4Min(V4LoadU(&posBox.minimum.x), V4LoadU(&negBox.minimum.x));
+// resultMaxV = V4Max(V4LoadU(&posBox.maximum.x), V4LoadU(&negBox.maximum.x));
+
+#if PX_INTEL_FAMILY
+ Vec4V posMinV = V4LoadU(&posBox.minimum.z);
+ Vec4V negMinV = V4LoadU(&negBox.minimum.z);
+ posMinV = _mm_shuffle_ps(posMinV, posMinV, _MM_SHUFFLE(0, 3, 2, 1));
+ negMinV = _mm_shuffle_ps(negMinV, negMinV, _MM_SHUFFLE(0, 3, 2, 1));
+ resultMaxV = V4Max(posMinV, negMinV);
+#else
+ // PT: fixes the perf issue but not really convincing
+ resultMaxV = Vec4V_From_Vec3V(V3Max(V3LoadU(&posBox.maximum.x), V3LoadU(&negBox.maximum.x)));
+#endif
+ }
+
+ // PT: the V4 stores overwrite the data after the BV, but we just put it back afterwards
+ V4StoreU(resultMinV, &current->mBV.minimum.x);
+ V4StoreU(resultMaxV, &current->mBV.maximum.x);
+ current->mData = data;
+}
+
+void AABBTree::fullRefit(const PxBounds3* boxes)
+{
+ PX_ASSERT(boxes);
+
+ const PxU32* indices = mIndices;
+ AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+ PX_ASSERT(nodeBase);
+
+ // Bottom-up update
+ PxU32 index = mTotalNbNodes;
+ while(index--)
+ {
+ AABBTreeRuntimeNode* current = nodeBase + index;
+ if(index)
+ Ps::prefetch(current - 1);
+
+ refitNode(current, boxes, indices, nodeBase);
+ }
+}
+
+static void _createParentArray(PxU32 totalNbNodes, PxU32* parentIndices, const AABBTreeRuntimeNode* parentNode, const AABBTreeRuntimeNode* currentNode, const AABBTreeRuntimeNode* root)
+{
+ const PxU32 parentIndex = PxU32(parentNode - root);
+ const PxU32 currentIndex = PxU32(currentNode - root);
+ PX_ASSERT(parentIndex<totalNbNodes);
+ PX_ASSERT(currentIndex<totalNbNodes);
+ PX_UNUSED(totalNbNodes);
+ parentIndices[currentIndex] = parentIndex;
+
+ if(!currentNode->isLeaf())
+ {
+ _createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getPos(root), root);
+ _createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getNeg(root), root);
+ }
+}
+
+void AABBTree::markNodeForRefit(TreeNodeIndex nodeIndex)
+{
+ if(!mRefitBitmask.getBits())
+ mRefitBitmask.init(mTotalNbNodes);
+
+ PX_ASSERT(nodeIndex<mTotalNbNodes);
+
+ // PT: lazy-create parent array. Memory is not wasted for purely static trees, or dynamic trees that only do "full refit".
+ if(!mParentIndices)
+ {
+ mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices"));
+ _createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool);
+ }
+
+ PxU32 currentIndex = nodeIndex;
+ while(1)
+ {
+ PX_ASSERT(currentIndex<mTotalNbNodes);
+ if(mRefitBitmask.isSet(currentIndex))
+ {
+ // We can early exit if we already visited the node!
+ return;
+ }
+ else
+ {
+ mRefitBitmask.setBit(currentIndex);
+ const PxU32 currentMarkedWord = currentIndex>>5;
+ mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+
+ const PxU32 parentIndex = mParentIndices[currentIndex];
+ PX_ASSERT(parentIndex == 0 || parentIndex < currentIndex);
+ if(currentIndex == parentIndex)
+ break;
+ currentIndex = parentIndex;
+ }
+ }
+}
+
+#define FIRST_VERSION
+#ifdef FIRST_VERSION
+void AABBTree::refitMarkedNodes(const PxBounds3* boxes)
+{
+ if(!mRefitBitmask.getBits())
+ return; // No refit needed
+
+ {
+ /*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits());
+ PxU32 size = mRefitHighestSetWord+1;
+#ifdef _DEBUG
+ if(1)
+ {
+ const PxU32 totalSize = mRefitBitmask.getSize();
+ for(PxU32 i=size;i<totalSize;i++)
+ {
+ PX_ASSERT(!bits[i]);
+ }
+ }
+ PxU32 nbRefit=0;
+#endif
+ const PxU32* indices = mIndices;
+ AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+
+ while(size--)
+ {
+ // Test 32 bits at a time
+ const PxU32 currentBits = bits[size];
+ if(!currentBits)
+ continue;
+
+ PxU32 index = (size+1)<<5;
+ PxU32 mask = PxU32(1<<((index-1)&31));
+ PxU32 _Count=32;
+ while(_Count--)
+ {
+ index--;
+ Ps::prefetch(nodeBase + index);
+
+ PX_ASSERT(size==index>>5);
+ PX_ASSERT(mask==PxU32(1<<(index&31)));
+ if(currentBits & mask)
+ {
+ refitNode(nodeBase + index, boxes, indices, nodeBase);
+#ifdef _DEBUG
+ nbRefit++;
+#endif
+ }
+ mask>>=1;
+ }
+ bits[size] = 0;
+ }
+
+ mRefitHighestSetWord = 0;
+// mRefitBitmask.clearAll();
+ }
+}
+#endif
+
+
+//#define SECOND_VERSION
+#ifdef SECOND_VERSION
+void AABBTree::refitMarkedNodes(const PxBounds3* boxes)
+{
+ /*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits());
+ if(!bits)
+ return; // No refit needed
+
+ const PxU32 lastSetBit = mRefitBitmask.findLast();
+
+ const PxU32* indices = mIndices;
+ AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+
+ for(PxU32 w = 0; w <= lastSetBit >> 5; ++w)
+ {
+ for(PxU32 b = bits[w]; b; b &= b-1)
+ {
+ const PxU32 index = (PxU32)(w<<5|Ps::lowestSetBit(b));
+
+
+
+ while(size--)
+ {
+ // Test 32 bits at a time
+ const PxU32 currentBits = bits[size];
+ if(!currentBits)
+ continue;
+
+ PxU32 index = (size+1)<<5;
+ PxU32 mask = PxU32(1<<((index-1)&31));
+ PxU32 _Count=32;
+ while(_Count--)
+ {
+ index--;
+ Ps::prefetch(nodeBase + index);
+
+ PX_ASSERT(size==index>>5);
+ PX_ASSERT(mask==PxU32(1<<(index&31)));
+ if(currentBits & mask)
+ {
+ refitNode(nodeBase + index, boxes, indices, nodeBase);
+#ifdef _DEBUG
+ nbRefit++;
+#endif
+ }
+ mask>>=1;
+ }
+ bits[size] = 0;
+ }
+ mRefitHighestSetWord = 0;
+// mRefitBitmask.clearAll();
+ }
+}
+#endif
+
+PX_FORCE_INLINE static void setLeafData(PxU32& leafData, const AABBTreeRuntimeNode& node, const PxU32 indicesOffset)
+{
+ const PxU32 index = indicesOffset + (node.mData >> 5);
+ const PxU32 nbPrims = node.getNbPrimitives();
+ PX_ASSERT(nbPrims <= 16);
+ leafData = (index << 5) | ((nbPrims & 15) << 1) | 1;
+}
+
+// Copy the tree into nodes. Update node indices, leaf indices.
+void AABBTree::addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& treeParams)
+{
+ PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1);
+ const PxU32 baseNodeIndex = nodeIndex;
+
+ // copy the src tree into dest tree nodes, update its data
+ for (PxU32 i = 0; i < treeParams.mNbNodes; i++)
+ {
+ PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1);
+ mRuntimePool[nodeIndex].mBV = treeParams.mNodes[i].mBV;
+ if (treeParams.mNodes[i].isLeaf())
+ {
+ setLeafData(mRuntimePool[nodeIndex].mData, treeParams.mNodes[i], mNbIndices);
+ }
+ else
+ {
+ const PxU32 srcNodeIndex = baseNodeIndex + (treeParams.mNodes[i].getPosIndex());
+ mRuntimePool[nodeIndex].mData = srcNodeIndex << 1;
+ mParentIndices[srcNodeIndex] = nodeIndex;
+ mParentIndices[srcNodeIndex + 1] = nodeIndex;
+ }
+ nodeIndex++;
+ }
+}
+
+// Merge tree into targetNode, where target node is a leaf
+// 1. Allocate new nodes/parent, copy all the nodes/parents
+// 2. Create new node at the end, copy the data from target node
+// 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+// Schematic view:
+// Target Nodes: ...Tn...
+// Input tree: R1->Rc0, Rc1...
+// Merged tree: ...Tnc->...->Nc0,R1->Rc0,Rc1...
+// where new node: Nc0==Tn and Tnc is not a leaf anymore and points to Nc0
+
+void AABBTree::mergeRuntimeLeaf(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex)
+{
+ PX_ASSERT(mParentIndices);
+ PX_ASSERT(targetNode.isLeaf());
+
+ // 1. Allocate new nodes/parent, copy all the nodes/parents
+ // allocate new runtime pool with max combine number of nodes
+ // we allocate only 1 additional node each merge
+ AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1];
+ PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"));
+
+ // copy the whole target nodes, we will add the new node at the end together with the merge tree
+ PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes));
+ PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(mTotalNbNodes));
+
+ // 2. Create new node at the end, copy the data from target node
+ PxU32 nodeIndex = mTotalNbNodes;
+ // copy the targetNode at the end of the new nodes
+ newRuntimePool[nodeIndex].mBV = targetNode.mBV;
+ newRuntimePool[nodeIndex].mData = targetNode.mData;
+ // update the parent information
+ newParentIndices[nodeIndex] = targetMergeNodeIndex;
+
+ // mark for refit
+ if (mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex))
+ {
+ mRefitBitmask.setBit(nodeIndex);
+ const PxU32 currentMarkedWord = nodeIndex >> 5;
+ mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+ }
+
+ // swap pointers
+ PX_DELETE_ARRAY(mRuntimePool);
+ mRuntimePool = newRuntimePool;
+ PX_FREE(mParentIndices);
+ mParentIndices = newParentIndices;
+
+ // 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+ nodeIndex++;
+ addRuntimeChilds(nodeIndex, treeParams);
+ PX_ASSERT(nodeIndex == mTotalNbNodes + 1 + treeParams.mNbNodes);
+
+ // update the parent information for the input tree root node
+ mParentIndices[mTotalNbNodes + 1] = targetMergeNodeIndex;
+
+ // fix the child information for the target node, was a leaf before
+ mRuntimePool[targetMergeNodeIndex].mData = mTotalNbNodes << 1;
+
+ // update the total number of nodes
+ mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes;
+}
+
+// Merge tree into targetNode, where target node is not a leaf
+// 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex
+// 2. Create new node , copy the data from target node
+// 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes
+// 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+// 5. Go through the nodes copied at the end and fix the parents/childs
+// Schematic view:
+// Target Nodes: ...Tn->...->Tc0,Tc1...
+// Input tree: R1->Rc0, Rc1...
+// Merged tree: ...Tn->...->Nc0,R1->Rc0,Rc1...,Tc0,Tc1...
+// where new node: Nc0->...->Tc0,Tc1
+void AABBTree::mergeRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex)
+{
+ PX_ASSERT(mParentIndices);
+ PX_ASSERT(!targetNode.isLeaf());
+
+ // Get the target node child pos, this is where we insert the new node and the input tree
+ const PxU32 targetNodePosIndex = targetNode.getPosIndex();
+
+ // 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex
+ // allocate new runtime pool with max combine number of nodes
+ // we allocate only 1 additional node each merge
+ AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1];
+ PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"));
+ // copy the untouched part of the nodes and parents
+ PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(targetNodePosIndex));
+ PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(targetNodePosIndex));
+
+ PxU32 nodeIndex = targetNodePosIndex;
+ // 2. Create new node , copy the data from target node
+ newRuntimePool[nodeIndex].mBV = targetNode.mBV;
+ newRuntimePool[nodeIndex].mData = ((targetNode.mData >> 1) + 1 + treeParams.mNbNodes) << 1;
+ // update parent information
+ newParentIndices[nodeIndex] = targetMergeNodeIndex;
+
+ // handle mark for refit
+ if(mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex))
+ {
+ mRefitBitmask.setBit(nodeIndex);
+ const PxU32 currentMarkedWord = nodeIndex >> 5;
+ mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+ }
+
+ // 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes
+ if(mTotalNbNodes - targetNodePosIndex)
+ {
+ PX_ASSERT(mTotalNbNodes - targetNodePosIndex > 0);
+ PxMemCopy(newRuntimePool + targetNodePosIndex + 1 + treeParams.mNbNodes, mRuntimePool + targetNodePosIndex, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes - targetNodePosIndex));
+ PxMemCopy(newParentIndices + targetNodePosIndex + 1 + treeParams.mNbNodes, mParentIndices + targetNodePosIndex, sizeof(PxU32)*(mTotalNbNodes - targetNodePosIndex));
+ }
+ // swap the pointers, release the old memory
+ PX_DELETE_ARRAY(mRuntimePool);
+ mRuntimePool = newRuntimePool;
+ PX_FREE(mParentIndices);
+ mParentIndices = newParentIndices;
+
+ // 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+ nodeIndex++;
+ addRuntimeChilds(nodeIndex, treeParams);
+ PX_ASSERT(nodeIndex == targetNodePosIndex + 1 + treeParams.mNbNodes);
+ // update the total number of nodes
+ mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes;
+
+ // update the parent information for the input tree root node
+ mParentIndices[targetNodePosIndex + 1] = targetMergeNodeIndex;
+
+ // 5. Go through the nodes copied at the end and fix the parents/childs
+ for (PxU32 i = targetNodePosIndex + 1 + treeParams.mNbNodes; i < mTotalNbNodes; i++)
+ {
+ // check if the parent is the targetNode, if yes update the parent to new node
+ if(mParentIndices[i] == targetMergeNodeIndex)
+ {
+ mParentIndices[i] = targetNodePosIndex;
+ }
+ else
+ {
+ // if parent node has been moved, update the parent node
+ if(mParentIndices[i] >= targetNodePosIndex)
+ {
+ mParentIndices[i] = mParentIndices[i] + 1 + treeParams.mNbNodes;
+ }
+ else
+ {
+ // if parent has not been moved, update its child information
+ const PxU32 parentIndex = mParentIndices[i];
+ // update the child information to point to Pos child
+ if(i % 2 != 0)
+ {
+ const PxU32 srcNodeIndex = mRuntimePool[parentIndex].getPosIndex();
+ // if child index points to a node that has been moved, update the child index
+ PX_ASSERT(!mRuntimePool[parentIndex].isLeaf());
+ PX_ASSERT(srcNodeIndex > targetNodePosIndex);
+ mRuntimePool[parentIndex].mData = (1 + treeParams.mNbNodes + srcNodeIndex) << 1;
+ }
+ }
+ }
+ if(!mRuntimePool[i].isLeaf())
+ {
+ // update the child node index
+ const PxU32 srcNodeIndex = 1 + treeParams.mNbNodes + mRuntimePool[i].getPosIndex();
+ mRuntimePool[i].mData = srcNodeIndex << 1;
+ }
+ }
+}
+
+// traverse the target node, the tree is inside the targetNode, and find the best place where merge the tree
+void AABBTree::traverseRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 nodeIndex)
+{
+ const AABBTreeRuntimeNode& srcNode = treeParams.getRootNode();
+ PX_ASSERT(srcNode.mBV.isInside(targetNode.mBV));
+
+ // Check if the srcNode(tree) can fit inside any of the target childs. If yes, traverse the target tree child
+ AABBTreeRuntimeNode& targetPosChild = *targetNode.getPos(mRuntimePool);
+ if(srcNode.mBV.isInside(targetPosChild.mBV))
+ {
+ return traverseRuntimeNode(targetPosChild, treeParams, targetNode.getPosIndex());
+ }
+
+ AABBTreeRuntimeNode& targetNegChild = *targetNode.getNeg(mRuntimePool);
+ if (srcNode.mBV.isInside(targetNegChild.mBV))
+ {
+ return traverseRuntimeNode(targetNegChild, treeParams, targetNode.getNegIndex());
+ }
+
+ // we cannot traverse target anymore, lets add the srcTree to current target node
+ if(targetNode.isLeaf())
+ mergeRuntimeLeaf(targetNode, treeParams, nodeIndex);
+ else
+ mergeRuntimeNode(targetNode, treeParams, nodeIndex);
+}
+
+// Merge the input tree into current tree.
+// Traverse the tree and find the smallest node, where the whole new tree fits. When we find the node
+// we create one new node pointing to the original children and the to the input tree root.
+void AABBTree::mergeTree(const AABBTreeMergeData& treeParams)
+{
+ // allocate new indices buffer
+ PxU32* newIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mNbIndices + treeParams.mNbIndices), "AABB tree indices"));
+ PxMemCopy(newIndices, mIndices, sizeof(PxU32)*mNbIndices);
+ PX_FREE(mIndices);
+ mIndices = newIndices;
+ mTotalPrims += treeParams.mNbIndices;
+
+ // copy the new indices, re-index using the provided indicesOffset. Note that indicesOffset
+ // must be provided, as original mNbIndices can be different than indicesOffset dues to object releases.
+ for (PxU32 i = 0; i < treeParams.mNbIndices; i++)
+ {
+ mIndices[mNbIndices + i] = treeParams.mIndicesOffset + treeParams.mIndices[i];
+ }
+
+ // check the mRefitBitmask if we fit all the new nodes
+ mRefitBitmask.resize(mTotalNbNodes + treeParams.mNbNodes + 1);
+
+ // create the parent information so we can update it
+ if(!mParentIndices)
+ {
+ mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices"));
+ _createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool);
+ }
+
+ // if new tree is inside the root AABB we will traverse the tree to find better node where to attach the tree subnodes
+ // if the root is a leaf we merge with the root.
+ if(treeParams.getRootNode().mBV.isInside(mRuntimePool[0].mBV) && !mRuntimePool[0].isLeaf())
+ {
+ traverseRuntimeNode(mRuntimePool[0], treeParams, 0);
+ }
+ else
+ {
+ if(mRuntimePool[0].isLeaf())
+ {
+ mergeRuntimeLeaf(mRuntimePool[0], treeParams, 0);
+ }
+ else
+ {
+ mergeRuntimeNode(mRuntimePool[0], treeParams, 0);
+ }
+
+ // increase the tree root AABB
+ mRuntimePool[0].mBV.include(treeParams.getRootNode().mBV);
+ }
+
+#ifdef _DEBUG
+ //verify parent indices
+ for (PxU32 i = 0; i < mTotalNbNodes; i++)
+ {
+ if (i)
+ {
+ PX_ASSERT(mRuntimePool[mParentIndices[i]].getPosIndex() == i || mRuntimePool[mParentIndices[i]].getNegIndex() == i);
+ }
+ if (!mRuntimePool[i].isLeaf())
+ {
+ PX_ASSERT(mParentIndices[mRuntimePool[i].getPosIndex()] == i);
+ PX_ASSERT(mParentIndices[mRuntimePool[i].getNegIndex()] == i);
+ }
+ }
+
+ // verify the tree nodes, leafs
+ for (PxU32 i = 0; i < mTotalNbNodes; i++)
+ {
+ if (mRuntimePool[i].isLeaf())
+ {
+ const PxU32 index = mRuntimePool[i].mData >> 5;
+ const PxU32 nbPrim = mRuntimePool[i].getNbPrimitives();
+ PX_ASSERT(index + nbPrim <= mNbIndices + treeParams.mNbIndices);
+ }
+ else
+ {
+ const PxU32 nodeIndex = (mRuntimePool[i].getPosIndex());
+ PX_ASSERT(nodeIndex < mTotalNbNodes);
+ }
+ }
+#endif // _DEBUG
+
+ mNbIndices += treeParams.mNbIndices;
+}
+
+
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h
new file mode 100644
index 00000000..0962747b
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h
@@ -0,0 +1,364 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_AABBTREE_H
+#define SQ_AABBTREE_H
+
+#include "foundation/PxMemory.h"
+#include "foundation/PxBounds3.h"
+#include "PsUserAllocated.h"
+#include "PsVecMath.h"
+#include "SqTypedef.h"
+#include "PsArray.h"
+
+namespace physx
+{
+
+using namespace shdfnd::aos;
+
+namespace Sq
+{
+ class AABBTreeUpdateMap;
+
+ typedef Ps::Pair<PxU32, PxU32> TreeMergePair;
+ typedef Ps::Array<TreeMergePair > TreeMergeMap;
+
+ class BitArray
+ {
+ public:
+ BitArray() : mBits(NULL), mSize(0) {}
+ BitArray(PxU32 nb_bits) { init(nb_bits); }
+ ~BitArray() { PX_FREE_AND_RESET(mBits); mBits = NULL; }
+
+ bool init(PxU32 nb_bits);
+
+ // Data management
+ PX_FORCE_INLINE void setBit(PxU32 bit_number)
+ {
+ mBits[bit_number>>5] |= 1<<(bit_number&31);
+ }
+ PX_FORCE_INLINE void clearBit(PxU32 bit_number)
+ {
+ mBits[bit_number>>5] &= ~(1<<(bit_number&31));
+ }
+ PX_FORCE_INLINE void toggleBit(PxU32 bit_number)
+ {
+ mBits[bit_number>>5] ^= 1<<(bit_number&31);
+ }
+
+ PX_FORCE_INLINE void clearAll() { PxMemZero(mBits, mSize*4); }
+ PX_FORCE_INLINE void setAll() { PxMemSet(mBits, 0xff, mSize*4); }
+
+ void resize(PxU32 maxBitNumber);
+
+ // Data access
+ PX_FORCE_INLINE Ps::IntBool isSet(PxU32 bit_number) const
+ {
+ return Ps::IntBool(mBits[bit_number>>5] & (1<<(bit_number&31)));
+ }
+
+ PX_FORCE_INLINE const PxU32* getBits() const { return mBits; }
+ PX_FORCE_INLINE PxU32 getSize() const { return mSize; }
+
+ protected:
+ PxU32* mBits; //!< Array of bits
+ PxU32 mSize; //!< Size of the array in dwords
+ };
+
+ //! Contains AABB-tree build statistics
+ struct BuildStats
+ {
+ BuildStats() : mCount(0), mTotalPrims(0) {}
+
+ PxU32 mCount; //!< Number of nodes created
+ PxU32 mTotalPrims; //!< Total accumulated number of primitives. Should be much higher than the source
+ //!< number of prims, since it accumulates all prims covered by each node (i.e. internal
+ //!< nodes too, not just leaf ones)
+
+ PX_FORCE_INLINE void reset() { mCount = mTotalPrims = 0; }
+
+ PX_FORCE_INLINE void setCount(PxU32 nb) { mCount=nb; }
+ PX_FORCE_INLINE void increaseCount(PxU32 nb) { mCount+=nb; }
+ PX_FORCE_INLINE PxU32 getCount() const { return mCount; }
+ };
+
+ //! Contains AABB-tree build parameters
+ class AABBTreeBuildParams : public Ps::UserAllocated
+ {
+ public:
+ AABBTreeBuildParams(PxU32 limit=1, PxU32 nb_prims=0, const PxBounds3* boxes=NULL) :
+ mLimit(limit), mNbPrimitives(nb_prims), mAABBArray(boxes), mCache(NULL) {}
+ ~AABBTreeBuildParams()
+ {
+ reset();
+ }
+
+ PX_FORCE_INLINE void reset()
+ {
+ mLimit = mNbPrimitives = 0;
+ mAABBArray = NULL;
+ PX_FREE_AND_RESET(mCache);
+ }
+
+ PxU32 mLimit; //!< Limit number of primitives / node. If limit is 1, build a complete tree (2*N-1 nodes)
+ PxU32 mNbPrimitives; //!< Number of (source) primitives.
+ const PxBounds3* mAABBArray; //!< Shortcut to an app-controlled array of AABBs.
+ PxVec3* mCache; //!< Cache for AABB centers - managed by build code.
+ };
+
+ class NodeAllocator;
+
+ //! AABB tree node used for building
+ class AABBTreeBuildNode : public Ps::UserAllocated
+ {
+ public:
+ PX_FORCE_INLINE AABBTreeBuildNode() {}
+ PX_FORCE_INLINE ~AABBTreeBuildNode() {}
+
+ PX_FORCE_INLINE const PxBounds3& getAABB() const { return mBV; }
+ PX_FORCE_INLINE const AABBTreeBuildNode* getPos() const { return mPos; }
+ PX_FORCE_INLINE const AABBTreeBuildNode* getNeg() const { const AABBTreeBuildNode* P = mPos; return P ? P+1 : NULL; }
+
+ PX_FORCE_INLINE bool isLeaf() const { return !getPos(); }
+
+ PxBounds3 mBV; //!< Global bounding-volume enclosing all the node-related primitives
+ const AABBTreeBuildNode* mPos; //!< "Positive" & "Negative" children
+
+ PxU32 mNodeIndex; //!< Index of node-related primitives (in the tree's mIndices array)
+ PxU32 mNbPrimitives; //!< Number of primitives for this node
+
+ // Data access
+ PX_FORCE_INLINE PxU32 getNbPrimitives() const { return mNbPrimitives; }
+
+ PX_FORCE_INLINE PxU32 getNbRuntimePrimitives() const { return mNbPrimitives; }
+ PX_FORCE_INLINE void setNbRunTimePrimitives(PxU32 val) { mNbPrimitives = val; }
+ PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base) const { return base+mNodeIndex; }
+ PX_FORCE_INLINE PxU32* getPrimitives(PxU32* base) { return base+mNodeIndex; }
+
+ // Internal methods
+ void subdivide(const AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices);
+ void _buildHierarchy(AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices);
+ };
+
+ //! AABB tree node used for runtime (smaller than for build)
+ class AABBTreeRuntimeNode : public Ps::UserAllocated
+ {
+ public:
+ PX_FORCE_INLINE AABBTreeRuntimeNode() {}
+ PX_FORCE_INLINE ~AABBTreeRuntimeNode() {}
+
+ PX_FORCE_INLINE PxU32 isLeaf() const { return mData&1; }
+
+ PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base) const { return base + (mData>>5); }
+ PX_FORCE_INLINE PxU32* getPrimitives(PxU32* base) { return base + (mData>>5); }
+ PX_FORCE_INLINE PxU32 getNbPrimitives() const { return (mData>>1)&15; }
+
+ PX_FORCE_INLINE PxU32 getPosIndex() const { return mData>>1; }
+ PX_FORCE_INLINE PxU32 getNegIndex() const { return (mData>>1) + 1; }
+ PX_FORCE_INLINE const AABBTreeRuntimeNode* getPos(const AABBTreeRuntimeNode* base) const { return base + (mData>>1); }
+ PX_FORCE_INLINE const AABBTreeRuntimeNode* getNeg(const AABBTreeRuntimeNode* base) const { const AABBTreeRuntimeNode* P = getPos(base); return P ? P+1 : NULL;}
+
+ PX_FORCE_INLINE AABBTreeRuntimeNode* getPos(AABBTreeRuntimeNode* base) { return base + (mData >> 1); }
+ PX_FORCE_INLINE AABBTreeRuntimeNode* getNeg(AABBTreeRuntimeNode* base) { AABBTreeRuntimeNode* P = getPos(base); return P ? P + 1 : NULL; }
+
+ PX_FORCE_INLINE PxU32 getNbRuntimePrimitives() const { return (mData>>1)&15; }
+ PX_FORCE_INLINE void setNbRunTimePrimitives(PxU32 val)
+ {
+ PX_ASSERT(val<16);
+ PxU32 data = mData & ~(15<<1);
+ data |= val<<1;
+ mData = data;
+ }
+
+ PX_FORCE_INLINE void getAABBCenterExtentsV(Vec3V* center, Vec3V* extents) const
+ {
+ const Vec4V minV = V4LoadU(&mBV.minimum.x);
+ const Vec4V maxV = V4LoadU(&mBV.maximum.x);
+
+ const float half = 0.5f;
+ const FloatV halfV = FLoad(half);
+
+ *extents = Vec3V_From_Vec4V(V4Scale(V4Sub(maxV, minV), halfV));
+ *center = Vec3V_From_Vec4V(V4Scale(V4Add(maxV, minV), halfV));
+ }
+
+ PX_FORCE_INLINE void getAABBCenterExtentsV2(Vec3V* center, Vec3V* extents) const
+ {
+ const Vec4V minV = V4LoadU(&mBV.minimum.x);
+ const Vec4V maxV = V4LoadU(&mBV.maximum.x);
+
+ *extents = Vec3V_From_Vec4V(V4Sub(maxV, minV));
+ *center = Vec3V_From_Vec4V(V4Add(maxV, minV));
+ }
+
+ PX_FORCE_INLINE void getAABBMinMaxV(Vec4V* minV, Vec4V* maxV) const
+ {
+ *minV = V4LoadU(&mBV.minimum.x);
+ *maxV = V4LoadU(&mBV.maximum.x);
+ }
+
+ PxBounds3 mBV; // Global bounding-volume enclosing all the node-related primitives
+ PxU32 mData; // 27 bits node or prim index|4 bits #prims|1 bit leaf
+ };
+
+ //! Contains AABB-tree merge parameters
+ class AABBTreeMergeData
+ {
+ public:
+ AABBTreeMergeData(PxU32 nbNodes, const AABBTreeRuntimeNode* nodes, PxU32 nbIndices, const PxU32* indices, PxU32 indicesOffset) :
+ mNbNodes(nbNodes), mNodes(nodes), mNbIndices(nbIndices), mIndices(indices), mIndicesOffset(indicesOffset)
+ {
+ }
+
+ ~AABBTreeMergeData() {}
+
+ PX_FORCE_INLINE const AABBTreeRuntimeNode& getRootNode() const { return mNodes[0]; }
+
+ public:
+ PxU32 mNbNodes; //!< Number of nodes of AABB tree merge
+ const AABBTreeRuntimeNode* mNodes; //!< Nodes of AABB tree merge
+
+ PxU32 mNbIndices; //!< Number of indices of AABB tree merge
+ const PxU32* mIndices; //!< Indices of AABB tree merge
+
+ PxU32 mIndicesOffset; //!< Indices offset from pruning pool
+ };
+
+ // Progressive building
+ class FIFOStack;
+ //~Progressive building
+
+ //! For complete trees we can predict the final number of nodes and preallocate them. For incomplete trees we can't.
+ //! But we don't want to allocate nodes one by one (which would be quite slow), so we use this helper class to
+ //! allocate N nodes at once, while minimizing the amount of nodes allocated for nothing. An initial amount of
+ //! nodes is estimated using the max number for a complete tree, and the user-defined number of primitives per leaf.
+ //! In ideal cases this estimated number will be quite close to the final number of nodes. When that number is not
+ //! enough though, slabs of N=1024 extra nodes are allocated until the build is complete.
+ class NodeAllocator : public Ps::UserAllocated
+ {
+ public:
+ NodeAllocator();
+ ~NodeAllocator();
+
+ void release();
+ void init(PxU32 nbPrimitives, PxU32 limit);
+ void flatten(AABBTreeRuntimeNode* dest);
+ AABBTreeBuildNode* getBiNode();
+
+ AABBTreeBuildNode* mPool;
+
+ struct Slab
+ {
+ PX_FORCE_INLINE Slab() {}
+ PX_FORCE_INLINE Slab(AABBTreeBuildNode* pool, PxU32 nbUsedNodes, PxU32 maxNbNodes) : mPool(pool), mNbUsedNodes(nbUsedNodes), mMaxNbNodes(maxNbNodes) {}
+ AABBTreeBuildNode* mPool;
+ PxU32 mNbUsedNodes;
+ PxU32 mMaxNbNodes;
+ };
+ Ps::Array<Slab> mSlabs;
+ PxU32 mCurrentSlabIndex;
+ PxU32 mTotalNbNodes;
+ };
+
+ //! AABB-tree, N primitives/leaf
+ class AABBTree : public Ps::UserAllocated
+ {
+ public:
+ AABBTree();
+ ~AABBTree();
+ // Build
+ bool build(AABBTreeBuildParams& params);
+ // Progressive building
+ PxU32 progressiveBuild(AABBTreeBuildParams& params, BuildStats& stats, PxU32 progress, PxU32 limit);
+ //~Progressive building
+ void release(bool clearRefitMap=true);
+
+ // Merge tree with another one
+ void mergeTree(const AABBTreeMergeData& tree);
+ // Initialize tree from given merge data
+ void initTree(const AABBTreeMergeData& tree);
+
+ // Data access
+ PX_FORCE_INLINE const PxU32* getIndices() const { return mIndices; }
+ PX_FORCE_INLINE PxU32* getIndices() { return mIndices; }
+ PX_FORCE_INLINE void setIndices(PxU32* indices) { mIndices = indices; }
+ PX_FORCE_INLINE PxU32 getNbNodes() const { return mTotalNbNodes; }
+ PX_FORCE_INLINE const AABBTreeRuntimeNode* getNodes() const { return mRuntimePool; }
+ PX_FORCE_INLINE AABBTreeRuntimeNode* getNodes() { return mRuntimePool; }
+ PX_FORCE_INLINE void setNodes(AABBTreeRuntimeNode* nodes) { mRuntimePool = nodes; }
+ PX_FORCE_INLINE PxU32 getTotalPrims() const { return mTotalPrims; }
+
+#if PX_DEBUG
+ void validate() const;
+#endif
+ void shiftOrigin(const PxVec3& shift);
+
+ // Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool
+ void shiftIndices(PxU32 offset);
+
+ private:
+ PxU32* mIndices; //!< Indices in the app list. Indices are reorganized during build (permutation).
+ PxU32 mNbIndices; //!< Nb indices
+ AABBTreeRuntimeNode* mRuntimePool; //!< Linear pool of nodes.
+ NodeAllocator mNodeAllocator;
+ PxU32* mParentIndices; //!< PT: hot/cold split, keep parent data in separate array
+ // Stats
+ PxU32 mTotalNbNodes; //!< Number of nodes in the tree.
+ PxU32 mTotalPrims; //!< Copy of final BuildStats::mTotalPrims
+
+ // Progressive building
+ FIFOStack* mStack;
+ //~Progressive building
+ bool buildInit(AABBTreeBuildParams& params, BuildStats& stats);
+ void buildEnd(AABBTreeBuildParams& params, BuildStats& stats);
+
+ // tree merge
+ void mergeRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 targetNodeIndex);
+ void mergeRuntimeLeaf(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 targetNodeIndex);
+ void addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& tree);
+ void traverseRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 nodeIndex);
+ // REFIT
+ public:
+ void fullRefit(const PxBounds3* boxes);
+
+ // adds node[index] to a list of nodes to refit when refitMarkedNodes is called
+ // Note that this includes updating the hierarchy up the chain
+ void markNodeForRefit(TreeNodeIndex nodeIndex);
+ void refitMarkedNodes(const PxBounds3* boxes);
+ private:
+ BitArray mRefitBitmask; //!< bit is set for each node index in markForRefit
+ PxU32 mRefitHighestSetWord;
+ //~REFIT
+ };
+
+} // namespace Sq
+
+}
+
+#endif // SQ_AABBTREE_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h
new file mode 100644
index 00000000..299d8993
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h
@@ -0,0 +1,234 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_AABBTREEQUERY_H
+#define SQ_AABBTREEQUERY_H
+
+#include "SqAABBTree.h"
+#include "SqPrunerTestsSIMD.h"
+
+namespace physx
+{
+ namespace Sq
+ {
+ #define RAW_TRAVERSAL_STACK_SIZE 256
+
+ //////////////////////////////////////////////////////////////////////////
+
+ static PX_FORCE_INLINE void getBoundsTimesTwo(Vec4V& center, Vec4V& extents, const PxBounds3* boxes, PoolIndex poolIndex)
+ {
+ const PxBounds3* objectBounds = boxes + poolIndex;
+
+ const Vec4V minV = V4LoadU(&objectBounds->minimum.x);
+ const Vec4V maxV = V4LoadU(&objectBounds->maximum.x);
+
+ center = V4Add(maxV, minV);
+ extents = V4Sub(maxV, minV);
+ }
+
+ //////////////////////////////////////////////////////////////////////////
+
+ template<typename Test>
+ class AABBTreeOverlap
+ {
+ public:
+ bool operator()(const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree, const Test& test, PrunerCallback& visitor)
+ {
+ using namespace Cm;
+
+ const AABBTreeRuntimeNode* stack[RAW_TRAVERSAL_STACK_SIZE];
+ const AABBTreeRuntimeNode* const nodeBase = tree.getNodes();
+ stack[0] = nodeBase;
+ PxU32 stackIndex = 1;
+
+ while (stackIndex > 0)
+ {
+ const AABBTreeRuntimeNode* node = stack[--stackIndex];
+ Vec3V center, extents;
+ node->getAABBCenterExtentsV(&center, &extents);
+ while (test(center, extents))
+ {
+ if (node->isLeaf())
+ {
+ PxU32 nbPrims = node->getNbPrimitives();
+ const bool doBoxTest = nbPrims > 1;
+ const PxU32* prims = node->getPrimitives(tree.getIndices());
+ while (nbPrims--)
+ {
+ const PxU32* prunableIndex = prims;
+ prims++;
+
+ const PoolIndex poolIndex = *prunableIndex;
+ if (doBoxTest)
+ {
+ Vec4V center2, extents2;
+ getBoundsTimesTwo(center2, extents2, boxes, poolIndex);
+
+ const float half = 0.5f;
+ const FloatV halfV = FLoad(half);
+
+ const Vec4V extents_ = V4Scale(extents2, halfV);
+ const Vec4V center_ = V4Scale(center2, halfV);
+
+ if (!test(Vec3V_From_Vec4V(center_), Vec3V_From_Vec4V(extents_)))
+ continue;
+ }
+
+ PxReal unusedDistance;
+ if (!visitor.invoke(unusedDistance, objects[poolIndex]))
+ return false;
+ }
+ break;
+ }
+
+ const AABBTreeRuntimeNode* children = node->getPos(nodeBase);
+
+ node = children;
+ stack[stackIndex++] = children + 1;
+ PX_ASSERT(stackIndex < RAW_TRAVERSAL_STACK_SIZE);
+ node->getAABBCenterExtentsV(&center, &extents);
+ }
+ }
+ return true;
+ }
+ };
+
+ //////////////////////////////////////////////////////////////////////////
+
+ template <bool tInflate> // use inflate=true for sweeps, inflate=false for raycasts
+ static PX_FORCE_INLINE bool doLeafTest(const AABBTreeRuntimeNode* node, Gu::RayAABBTest& test, PxReal& md, PxReal oldMaxDist,
+ const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree,
+ PxReal& maxDist, PrunerCallback& pcb)
+ {
+ PxU32 nbPrims = node->getNbPrimitives();
+ const bool doBoxTest = nbPrims > 1;
+ const PxU32* prims = node->getPrimitives(tree.getIndices());
+ while (nbPrims--)
+ {
+ const PxU32* prunableIndex = prims;
+ prims++;
+
+ const PoolIndex poolIndex = *prunableIndex;
+ if (doBoxTest)
+ {
+ Vec4V center_, extents_;
+ getBoundsTimesTwo(center_, extents_, boxes, poolIndex);
+
+ if (!test.check<tInflate>(Vec3V_From_Vec4V(center_), Vec3V_From_Vec4V(extents_)))
+ continue;
+ }
+
+ if (!pcb.invoke(md, objects[poolIndex]))
+ return false;
+
+ if (md < oldMaxDist)
+ {
+ maxDist = md;
+ test.setDistance(md);
+ }
+ }
+ return true;
+ }
+
+ //////////////////////////////////////////////////////////////////////////
+
+ template <bool tInflate> // use inflate=true for sweeps, inflate=false for raycasts
+ class AABBTreeRaycast
+ {
+ public:
+ bool operator()(
+ const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree,
+ const PxVec3& origin, const PxVec3& unitDir, PxReal& maxDist, const PxVec3& inflation,
+ PrunerCallback& pcb)
+ {
+ using namespace Cm;
+
+ // PT: we will pass center*2 and extents*2 to the ray-box code, to save some work per-box
+ // So we initialize the test with values multiplied by 2 as well, to get correct results
+ Gu::RayAABBTest test(origin*2.0f, unitDir*2.0f, maxDist, inflation*2.0f);
+
+ const AABBTreeRuntimeNode* stack[RAW_TRAVERSAL_STACK_SIZE]; // stack always contains PPU addresses
+ const AABBTreeRuntimeNode* const nodeBase = tree.getNodes();
+ stack[0] = nodeBase;
+ PxU32 stackIndex = 1;
+
+ PxReal oldMaxDist;
+ while (stackIndex--)
+ {
+ const AABBTreeRuntimeNode* node = stack[stackIndex];
+ Vec3V center, extents;
+ node->getAABBCenterExtentsV2(&center, &extents);
+ if (test.check<tInflate>(center, extents)) // TODO: try timestamp ray shortening to skip this
+ {
+ PxReal md = maxDist; // has to be before the goto below to avoid compile error
+ while (!node->isLeaf())
+ {
+ const AABBTreeRuntimeNode* children = node->getPos(nodeBase);
+
+ Vec3V c0, e0;
+ children[0].getAABBCenterExtentsV2(&c0, &e0);
+ const PxU32 b0 = test.check<tInflate>(c0, e0);
+
+ Vec3V c1, e1;
+ children[1].getAABBCenterExtentsV2(&c1, &e1);
+ const PxU32 b1 = test.check<tInflate>(c1, e1);
+
+ if (b0 && b1) // if both intersect, push the one with the further center on the stack for later
+ {
+ // & 1 because FAllGrtr behavior differs across platforms
+ const PxU32 bit = FAllGrtr(V3Dot(V3Sub(c1, c0), test.mDir), FZero()) & 1;
+ stack[stackIndex++] = children + bit;
+ node = children + (1 - bit);
+ PX_ASSERT(stackIndex < RAW_TRAVERSAL_STACK_SIZE);
+ }
+ else if (b0)
+ node = children;
+ else if (b1)
+ node = children + 1;
+ else
+ goto skip_leaf_code;
+ }
+
+ oldMaxDist = maxDist; // we copy since maxDist can be updated in the callback and md<maxDist test below can fail
+
+ if (!doLeafTest<tInflate>(node, test, md, oldMaxDist,
+ objects, boxes, tree,
+ maxDist,
+ pcb))
+ return false;
+ skip_leaf_code:;
+ }
+ }
+ return true;
+ }
+ };
+ }
+}
+
+#endif // SQ_AABBTREEQUERY_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp
new file mode 100644
index 00000000..807de9d1
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp
@@ -0,0 +1,197 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "SqAABBTreeUpdateMap.h"
+#include "SqAABBTree.h"
+
+using namespace physx;
+using namespace Sq;
+
+static const PxU32 SHRINK_THRESHOLD = 1024;
+
+void AABBTreeUpdateMap::initMap(PxU32 nbObjects, const AABBTree& tree)
+{
+ if(!nbObjects)
+ {
+ release();
+ return;
+ }
+
+ // Memory management
+ {
+ const PxU32 mapSize = nbObjects;
+ const PxU32 targetCapacity = mapSize + (mapSize>>2);
+
+ PxU32 currentCapacity = mMapping.capacity();
+ if( ( targetCapacity < (currentCapacity>>1) ) && ( (currentCapacity-targetCapacity) > SHRINK_THRESHOLD ) )
+ {
+ // trigger reallocation of a smaller array, there is enough memory to save
+ currentCapacity = 0;
+ }
+
+ if(mapSize > currentCapacity)
+ {
+ // the mapping values are invalid and reset below in any case
+ // so there is no need to copy the values at all
+ mMapping.reset();
+ mMapping.reserve(targetCapacity); // since size is 0, reserve will also just allocate
+ }
+
+ mMapping.forceSize_Unsafe(mapSize);
+
+ for(PxU32 i=0;i<mapSize;i++)
+ mMapping[i] = INVALID_NODE_ID;
+ }
+
+ const PxU32 nbNodes = tree.getNbNodes();
+ const AABBTreeRuntimeNode* nodes = tree.getNodes();
+ const PxU32* indices = tree.getIndices();
+ for(TreeNodeIndex i=0;i<nbNodes;i++)
+ {
+ if(nodes[i].isLeaf())
+ {
+ const PxU32 nbPrims = nodes[i].getNbRuntimePrimitives();
+ // PT: with multiple primitives per node, several mapping entries will point to the same node.
+ PX_ASSERT(nbPrims<=16);
+ for(PxU32 j=0;j<nbPrims;j++)
+ {
+ const PxU32 index = nodes[i].getPrimitives(indices)[j];
+ PX_ASSERT(index<nbObjects);
+ mMapping[index] = i;
+ }
+ }
+ }
+}
+
+void AABBTreeUpdateMap::invalidate(PoolIndex prunerIndex0, PoolIndex prunerIndex1, AABBTree& tree)
+{
+ // prunerIndex0 and prunerIndex1 are both indices into the pool, not handles
+ // prunerIndex0 is the index in the pruning pool for the node that was just removed
+ // prunerIndex1 is the index in the pruning pool for the node
+ const TreeNodeIndex nodeIndex0 = prunerIndex0<mMapping.size() ? mMapping[prunerIndex0] : INVALID_NODE_ID;
+ const TreeNodeIndex nodeIndex1 = prunerIndex1<mMapping.size() ? mMapping[prunerIndex1] : INVALID_NODE_ID;
+
+ //printf("map invalidate pi0:%x ni0:%x\t",prunerIndex0,nodeIndex0);
+ //printf(" replace with pi1:%x ni1:%x\n",prunerIndex1,nodeIndex1);
+
+ // if nodeIndex0 exists:
+ // invalidate node 0
+ // invalidate map prunerIndex0
+ // if nodeIndex1 exists:
+ // point node 1 to prunerIndex0
+ // map prunerIndex0 to node 1
+ // invalidate map prunerIndex1
+
+ // eventually:
+ // - node 0 is invalid
+ // - prunerIndex0 is mapped to node 1 or
+ // is not mapped if prunerIndex1 is not mapped
+ // is not mapped if prunerIndex0==prunerIndex1
+ // - node 1 points to prunerIndex0 or
+ // is invalid if prunerIndex1 is not mapped
+ // is invalid if prunerIndex0==prunerIndex1
+ // - prunerIndex1 is not mapped
+
+ AABBTreeRuntimeNode* nodes = tree.getNodes();
+
+ if(nodeIndex0!=INVALID_NODE_ID)
+ {
+ PX_ASSERT(nodeIndex0 < tree.getNbNodes());
+ PX_ASSERT(nodes[nodeIndex0].isLeaf());
+ AABBTreeRuntimeNode* node0 = nodes + nodeIndex0;
+ const PxU32 nbPrims = node0->getNbRuntimePrimitives();
+ PX_ASSERT(nbPrims <= 16);
+
+ // retrieve the primitives pointer
+ PxU32* primitives = node0->getPrimitives(tree.getIndices());
+ PX_ASSERT(primitives);
+
+ // PT: look for desired pool index in the leaf
+ bool foundIt = false;
+ for(PxU32 i=0;i<nbPrims;i++)
+ {
+ PX_ASSERT(mMapping[primitives[i]] == nodeIndex0); // PT: all primitives should point to the same leaf node
+
+ if(prunerIndex0 == primitives[i])
+ {
+ foundIt = true;
+ const PxU32 last = nbPrims-1;
+ node0->setNbRunTimePrimitives(last);
+ primitives[i] = INVALID_POOL_ID; // Mark primitive index as invalid in the node
+ mMapping[prunerIndex0] = INVALID_NODE_ID; // invalidate the node index for pool 0
+
+ // PT: swap within the leaf node. No need to update the mapping since they should all point
+ // to the same tree node anyway.
+ if(last!=i)
+ Ps::swap(primitives[i], primitives[last]);
+ break;
+ }
+ }
+ PX_ASSERT(foundIt);
+ PX_UNUSED(foundIt);
+ }
+
+ if (nodeIndex1!=INVALID_NODE_ID)
+ {
+ // PT: with multiple primitives per leaf, tree nodes may very well be the same for different pool indices.
+ // However the pool indices may be the same when a swap has been skipped in the pruning pool, in which
+ // case there is nothing to do.
+ if(prunerIndex0!=prunerIndex1)
+ {
+ PX_ASSERT(nodeIndex1 < tree.getNbNodes());
+ PX_ASSERT(nodes[nodeIndex1].isLeaf());
+ AABBTreeRuntimeNode* node1 = nodes + nodeIndex1;
+ const PxU32 nbPrims = node1->getNbRuntimePrimitives();
+ PX_ASSERT(nbPrims <= 16);
+
+ // retrieve the primitives pointer
+ PxU32* primitives = node1->getPrimitives(tree.getIndices());
+ PX_ASSERT(primitives);
+
+ // PT: look for desired pool index in the leaf
+ bool foundIt = false;
+ for(PxU32 i=0;i<nbPrims;i++)
+ {
+ PX_ASSERT(mMapping[primitives[i]] == nodeIndex1); // PT: all primitives should point to the same leaf node
+
+ if(prunerIndex1 == primitives[i])
+ {
+ foundIt = true;
+ primitives[i] = prunerIndex0; // point node 1 to the pool object moved to ID 0
+ mMapping[prunerIndex0] = nodeIndex1; // pool 0 is pointed at by node 1 now
+ mMapping[prunerIndex1] = INVALID_NODE_ID; // pool 1 is no longer stored in the tree
+ break;
+ }
+ }
+ PX_ASSERT(foundIt);
+ PX_UNUSED(foundIt);
+ }
+ }
+}
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h
new file mode 100644
index 00000000..58418b03
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h
@@ -0,0 +1,82 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_PRUNERTREEMAP_H
+#define SQ_PRUNERTREEMAP_H
+
+#include "SqTypedef.h"
+#include "PsArray.h"
+
+namespace physx
+{
+namespace Sq
+{
+ static const PxU32 INVALID_NODE_ID = 0xFFffFFff;
+ static const PxU32 INVALID_POOL_ID = 0xFFffFFff;
+
+ // Maps pruning pool indices to AABB-tree indices (i.e. locates the object's box in the aabb-tree nodes pool)
+ //
+ // The map spans pool indices from 0..N-1, where N is the number of pool entries when the map was created from a tree.
+ //
+ // It maps:
+ // to node indices in the range 0..M-1, where M is the number of nodes in the tree the map was created from,
+ // or to INVALID_NODE_ID if the pool entry was removed or pool index is outside input domain.
+ //
+ // The map is the inverse of the tree mapping: (node[map[poolID]].primitive == poolID) is true at all times.
+
+ class AABBTreeUpdateMap
+ {
+ public:
+ AABBTreeUpdateMap() {}
+ ~AABBTreeUpdateMap() {}
+
+ void release()
+ {
+ mMapping.reset();
+ }
+
+ // indices offset used when indices are shifted from objects (used for merged trees)
+ void initMap(PxU32 numPoolObjects, const Sq::AABBTree& tree);
+
+ void invalidate(PoolIndex poolIndex, PoolIndex replacementPoolIndex, Sq::AABBTree& tree);
+
+ PX_FORCE_INLINE TreeNodeIndex operator[](PxU32 poolIndex) const
+ {
+ return poolIndex < mMapping.size() ? mMapping[poolIndex] : INVALID_NODE_ID;
+ }
+ private:
+ // maps from prunerIndex (index in the PruningPool) to treeNode index
+ // this will only map to leaf tree nodes
+ Ps::Array<TreeNodeIndex> mMapping;
+ };
+
+}
+}
+
+#endif
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp b/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp
new file mode 100644
index 00000000..3bae047d
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp
@@ -0,0 +1,75 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxTransform.h"
+#include "SqBounds.h"
+#include "CmTransformUtils.h"
+#include "SqPruner.h"
+#include "ScbShape.h"
+#include "ScbActor.h"
+#include "ScbRigidStatic.h"
+#include "ScbBody.h"
+#include "PsAllocator.h"
+#include "GuBounds.h"
+
+using namespace physx;
+using namespace Sq;
+
+void Sq::computeStaticWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor)
+{
+ const PxTransform& shape2Actor = scbShape.getShape2Actor();
+
+ PX_ALIGN(16, PxTransform) globalPose;
+
+ Cm::getStaticGlobalPoseAligned(static_cast<const Scb::RigidStatic&>(scbActor).getActor2World(), shape2Actor, globalPose);
+ Gu::computeBounds(bounds, scbShape.getGeometry(), globalPose, 0.0f, NULL, SQ_PRUNER_INFLATION, false);
+}
+
+void Sq::computeDynamicWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor)
+{
+ const PxTransform& shape2Actor = scbShape.getShape2Actor();
+
+ PX_ALIGN(16, PxTransform) globalPose;
+ {
+ const Scb::Body& body = static_cast<const Scb::Body&>(scbActor);
+ PX_ALIGN(16, PxTransform) kinematicTarget;
+ const PxU16 sqktFlags = PxRigidBodyFlag::eKINEMATIC | PxRigidBodyFlag::eUSE_KINEMATIC_TARGET_FOR_SCENE_QUERIES;
+ const bool useTarget = (PxU16(body.getFlags()) & sqktFlags) == sqktFlags;
+ const PxTransform& body2World = (useTarget && body.getKinematicTarget(kinematicTarget)) ? kinematicTarget : body.getBody2World();
+ Cm::getDynamicGlobalPoseAligned(body2World, shape2Actor, body.getBody2Actor(), globalPose);
+ }
+
+ Gu::computeBounds(bounds, scbShape.getGeometry(), globalPose, 0.0f, NULL, SQ_PRUNER_INFLATION, false);
+}
+
+const ComputeBoundsFunc Sq::gComputeBoundsTable[2] =
+{
+ computeStaticWorldAABB,
+ computeDynamicWorldAABB
+};
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBounds.h b/PhysX_3.4/Source/SceneQuery/src/SqBounds.h
new file mode 100644
index 00000000..60c6ad6f
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqBounds.h
@@ -0,0 +1,70 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_BOUNDS_H
+#define SQ_BOUNDS_H
+
+#include "CmPhysXCommon.h"
+#include "foundation/PxBounds3.h"
+#include "PsVecMath.h"
+
+namespace physx
+{
+ namespace Scb
+ {
+ class Shape;
+ class Actor;
+ }
+
+namespace Sq
+{
+ void computeStaticWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor);
+ void computeDynamicWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor);
+
+ typedef void(*ComputeBoundsFunc) (PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor);
+
+ extern const ComputeBoundsFunc gComputeBoundsTable[2];
+
+ PX_FORCE_INLINE void inflateBounds(PxBounds3& dst, const PxBounds3& src)
+ {
+ using namespace physx::shdfnd::aos;
+
+ const Vec4V minV = V4LoadU(&src.minimum.x);
+ const Vec4V maxV = V4LoadU(&src.maximum.x);
+ const Vec4V eV = V4Scale(V4Sub(maxV, minV), FLoad(0.5f* 0.01f));
+
+ V4StoreU(V4Sub(minV, eV), &dst.minimum.x);
+ PX_ALIGN(16, PxVec4) max4;
+ V4StoreA(V4Add(maxV, eV), &max4.x);
+ dst.maximum = PxVec3(max4.x, max4.y, max4.z);
+ }
+}
+}
+
+#endif // SQ_BOUNDS_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp
new file mode 100644
index 00000000..35a5ca13
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp
@@ -0,0 +1,2601 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxMemory.h"
+#include "SqBucketPruner.h"
+#include "GuIntersectionBoxBox.h"
+#include "GuInternal.h"
+#include "PsVecMath.h"
+#include "foundation/PxUnionCast.h"
+#include "CmRadixSortBuffered.h"
+#include "CmRenderOutput.h"
+#include "PsFPU.h"
+#include "PsBitUtils.h"
+#include "PsIntrinsics.h"
+#include "GuBounds.h"
+
+using namespace physx::shdfnd::aos;
+
+using namespace physx;
+using namespace Sq;
+using namespace Gu;
+using namespace Ps;
+
+#define INVALID_HANDLE 0xffffffff
+
+/*
+TODO:
+- if Core is always available, mSortedObjects could be replaced with just indices to mCoreObjects => less memory.
+- UTS:
+ - test that queries against empty boxes all return false
+- invalidate after 16 removes
+- check shiftOrigin stuff (esp what happens to emptied boxes)
+ - isn't there a very hard-to-find bug waiting to happen in there,
+ when the shift touches the empty box and overrides mdata0/mdata1 with "wrong" values that break the sort?
+- revisit updateObject/removeObject
+- optimize/cache computation of free global bounds before clipRay
+
+- remove temp memory buffers (sorted arrays)
+- take care of code duplication
+- better code to generate SIMD 0x7fffffff
+- refactor SIMD tests
+- optimize:
+ - better split values
+ - optimize update (bitmap, less data copy, etc)
+ - use ray limits in traversal code too?
+ - the SIMD XBOX code operates on Min/Max rather than C/E. Change format?
+ - or just try the alternative ray-box code (as on PC) ==> pretty much exactly the same speed
+*/
+
+//#define VERIFY_SORT
+//#define BRUTE_FORCE_LIMIT 32
+#define LOCAL_SIZE 256 // Size of various local arrays. Dynamic allocations occur if exceeded.
+#define USE_SIMD // Use SIMD code or not (sanity performance check)
+#define NODE_SORT // Enable/disable node sorting
+#define NODE_SORT_MIN_COUNT 16 // Limit above which node sorting is performed
+#if PX_INTEL_FAMILY
+ #if COMPILE_VECTOR_INTRINSICS
+ #define CAN_USE_MOVEMASK
+ #endif
+#endif
+
+#define ALIGN16(size) ((unsigned(size)+15) & unsigned(~15))
+
+#ifdef _DEBUG
+ #define AlignedLoad V4LoadU
+ #define AlignedStore V4StoreU
+#else
+ #define AlignedLoad V4LoadA
+ #define AlignedStore V4StoreA
+#endif
+
+// SAT-based ray-box overlap test has accuracy issues for long rays, so we clip them against the global AABB to limit these issues.
+static void clipRay(const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, const PxVec3& boxMin, const PxVec3& boxMax)
+{
+ const PxVec3 boxCenter = (boxMax + boxMin)*0.5f;
+ const PxVec3 boxExtents = (boxMax - boxMin)*0.5f;
+ const float dpc = boxCenter.dot(rayDir);
+ const float extentsMagnitude = boxExtents.magnitude();
+ const float dpMin = dpc - extentsMagnitude;
+ const float dpMax = dpc + extentsMagnitude;
+ const float dpO = rayOrig.dot(rayDir);
+ const float boxLength = extentsMagnitude * 2.0f;
+ const float distToBox = PxMin(PxAbs(dpMin - dpO), PxAbs(dpMax - dpO));
+ maxDist = distToBox + boxLength * 2.0f;
+}
+
+BucketPrunerNode::BucketPrunerNode()
+{
+ for(PxU32 i=0;i<5;i++)
+ mBucketBox[i].setEmpty();
+}
+
+static const PxU8 gCodes[] = { 4, 4, 4, 4, 4, 3, 2, 2,
+ 4, 1, 0, 0, 4, 1, 0, 0,
+ 4, 1, 0, 0, 2, 1, 0, 0,
+ 3, 1, 0, 0, 2, 1, 0, 0};
+
+#ifdef CAN_USE_MOVEMASK
+/*static PX_FORCE_INLINE PxU32 classifyBox_x86(const BucketBox& box, const PxVec4& limits, const bool useY, const bool isCrossBucket)
+{
+ const Vec4V extents = AlignedLoad(&box.mExtents.x);
+ const Vec4V center = AlignedLoad(&box.mCenter.x);
+ const Vec4V plus = V4Add(extents, center);
+ const Vec4V minus = V4Sub(extents, center);
+
+ Vec4V tmp;
+ if(useY) // PT: this is a constant so branch prediction works here
+ tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,1,0,1));
+ else
+ tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,2,0,2));
+
+ const Vec4V comp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0,2,1,3)); // oh well, nm
+
+ const PxU32 Code = (PxU32)_mm_movemask_ps(V4IsGrtr(V4LoadA(&limits.x), comp));
+ return gCodes[Code | PxU32(isCrossBucket)<<4];
+}*/
+
+static PX_FORCE_INLINE PxU32 classifyBox_x86(const Vec4V boxMin, const Vec4V boxMax, const PxVec4& limits, const bool useY, const bool isCrossBucket)
+{
+ const Vec4V plus = boxMax;
+ const Vec4V minus = V4Neg(boxMin);
+
+ Vec4V tmp;
+ if(useY) // PT: this is a constant so branch prediction works here
+ tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,1,0,1));
+ else
+ tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,2,0,2));
+
+ const Vec4V comp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0,2,1,3)); // oh well, nm
+
+ const PxU32 Code = PxU32(_mm_movemask_ps(V4IsGrtr(V4LoadA(&limits.x), comp)));
+ return gCodes[Code | PxU32(isCrossBucket)<<4];
+}
+#endif
+
+#ifdef CAN_USE_MOVEMASK
+ #if PX_DEBUG
+ #define USE_CLASSIFY_BOX
+ #endif
+#else
+ #define USE_CLASSIFY_BOX
+#endif
+
+#ifdef USE_CLASSIFY_BOX
+static PX_FORCE_INLINE PxU32 classifyBox(const BucketBox& box, const float limitX, const float limitYZ, const PxU32 yz, const bool isCrossBucket)
+{
+ const bool upperPart = (box.mCenter[yz] + box.mExtents[yz])<limitYZ;
+ const bool lowerPart = (box.mCenter[yz] - box.mExtents[yz])>limitYZ;
+ const bool leftPart = (box.mCenter.x + box.mExtents.x)<limitX;
+ const bool rightPart = (box.mCenter.x - box.mExtents.x)>limitX;
+
+ // Table-based box classification avoids many branches
+ const PxU32 Code = PxU32(rightPart)|(PxU32(leftPart)<<1)|(PxU32(lowerPart)<<2)|(PxU32(upperPart)<<3);
+ return gCodes[Code + (isCrossBucket ? 16 : 0)];
+}
+#endif
+
+void BucketPrunerNode::classifyBoxes( float limitX, float limitYZ,
+ PxU32 nb, BucketBox* PX_RESTRICT boxes, const PrunerPayload* PX_RESTRICT objects,
+ BucketBox* PX_RESTRICT sortedBoxes, PrunerPayload* PX_RESTRICT sortedObjects,
+ bool isCrossBucket, PxU32 sortAxis)
+{
+ const PxU32 yz = PxU32(sortAxis == 1 ? 2 : 1);
+
+ #ifdef _DEBUG
+ {
+ float prev = boxes[0].mDebugMin;
+ for(PxU32 i=1;i<nb;i++)
+ {
+ const float current = boxes[i].mDebugMin;
+ PX_ASSERT(current>=prev);
+ prev = current;
+ }
+ }
+ #endif
+
+ // Local (stack-based) min/max bucket bounds
+ PX_ALIGN(16, PxVec4) bucketBoxMin[5];
+ PX_ALIGN(16, PxVec4) bucketBoxMax[5];
+ {
+ const PxBounds3 empty = PxBounds3::empty();
+ for(PxU32 i=0;i<5;i++)
+ {
+ mCounters[i] = 0;
+ bucketBoxMin[i] = PxVec4(empty.minimum, 0.0f);
+ bucketBoxMax[i] = PxVec4(empty.maximum, 0.0f);
+ }
+ }
+
+ {
+#ifdef CAN_USE_MOVEMASK
+ // DS: order doesn't play nice with x86 shuffles :-|
+ PX_ALIGN(16, PxVec4) limits(-limitX, limitX, -limitYZ, limitYZ);
+ const bool useY = yz==1;
+#endif
+ // Determine in which bucket each object falls, update bucket bounds
+ for(PxU32 i=0;i<nb;i++)
+ {
+ const Vec4V boxCenterV = AlignedLoad(&boxes[i].mCenter.x);
+ const Vec4V boxExtentsV = AlignedLoad(&boxes[i].mExtents.x);
+ const Vec4V boxMinV = V4Sub(boxCenterV, boxExtentsV);
+ const Vec4V boxMaxV = V4Add(boxCenterV, boxExtentsV);
+
+#ifdef CAN_USE_MOVEMASK
+// const PxU32 index = classifyBox_x86(boxes[i], limits, useY, isCrossBucket);
+ const PxU32 index = classifyBox_x86(boxMinV, boxMaxV, limits, useY, isCrossBucket);
+ #if PX_DEBUG
+ const PxU32 index_ = classifyBox(boxes[i], limitX, limitYZ, yz, isCrossBucket);
+ PX_ASSERT(index == index_);
+ #endif
+#else
+ const PxU32 index = classifyBox(boxes[i], limitX, limitYZ, yz, isCrossBucket);
+#endif
+ // Merge boxes
+ {
+ const Vec4V mergedMinV = V4Min(V4LoadA(&bucketBoxMin[index].x), boxMinV);
+ const Vec4V mergedMaxV = V4Max(V4LoadA(&bucketBoxMax[index].x), boxMaxV);
+ V4StoreA(mergedMinV, &bucketBoxMin[index].x);
+ V4StoreA(mergedMaxV, &bucketBoxMax[index].x);
+ }
+ boxes[i].mData0 = index; // Store bucket index for current box in this temporary location
+ mCounters[index]++;
+ }
+ }
+
+ {
+ // Regenerate offsets
+ mOffsets[0]=0;
+ for(PxU32 i=0;i<4;i++)
+ mOffsets[i+1] = mOffsets[i] + mCounters[i];
+ }
+
+ {
+ // Group boxes with same bucket index together
+ for(PxU32 i=0;i<nb;i++)
+ {
+ const PxU32 bucketOffset = mOffsets[boxes[i].mData0]++; // Bucket index for current box was stored in mData0 by previous loop
+ // The 2 following lines are the same as:
+ // sortedBoxes[bucketOffset] = boxes[i];
+ AlignedStore(AlignedLoad(&boxes[i].mCenter.x), &sortedBoxes[bucketOffset].mCenter.x);
+ AlignedStore(AlignedLoad(&boxes[i].mExtents.x), &sortedBoxes[bucketOffset].mExtents.x);
+
+ #ifdef _DEBUG
+ sortedBoxes[bucketOffset].mDebugMin = boxes[i].mDebugMin;
+ #endif
+ sortedObjects[bucketOffset] = objects[i];
+ }
+ }
+
+ {
+ // Regenerate offsets
+ mOffsets[0]=0;
+ for(PxU32 i=0;i<4;i++)
+ mOffsets[i+1] = mOffsets[i] + mCounters[i];
+ }
+
+ {
+ // Convert local (stack-based) min/max bucket bounds to persistent center/extents format
+ const float Half = 0.5f;
+ const FloatV HalfV = FLoad(Half);
+ PX_ALIGN(16, PxVec4) bucketCenter;
+ PX_ALIGN(16, PxVec4) bucketExtents;
+ for(PxU32 i=0;i<5;i++)
+ {
+ // The following lines are the same as:
+ // mBucketBox[i].mCenter = bucketBox[i].getCenter();
+ // mBucketBox[i].mExtents = bucketBox[i].getExtents();
+ const Vec4V bucketBoxMinV = V4LoadA(&bucketBoxMin[i].x);
+ const Vec4V bucketBoxMaxV = V4LoadA(&bucketBoxMax[i].x);
+ const Vec4V bucketBoxCenterV = V4Scale(V4Add(bucketBoxMaxV, bucketBoxMinV), HalfV);
+ const Vec4V bucketBoxExtentsV = V4Scale(V4Sub(bucketBoxMaxV, bucketBoxMinV), HalfV);
+ V4StoreA(bucketBoxCenterV, &bucketCenter.x);
+ V4StoreA(bucketBoxExtentsV, &bucketExtents.x);
+ mBucketBox[i].mCenter = PxVec3(bucketCenter.x, bucketCenter.y, bucketCenter.z);
+ mBucketBox[i].mExtents = PxVec3(bucketExtents.x, bucketExtents.y, bucketExtents.z);
+ }
+ }
+
+ #ifdef _DEBUG
+ for(PxU32 j=0;j<5;j++)
+ {
+ const PxU32 count = mCounters[j];
+ if(count)
+ {
+ const BucketBox* base = sortedBoxes + mOffsets[j];
+ float prev = base[0].mDebugMin;
+ for(PxU32 i=1;i<count;i++)
+ {
+ const float current = base[i].mDebugMin;
+ PX_ASSERT(current>=prev);
+ prev = current;
+ }
+ }
+ }
+ #endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static void processChildBuckets(PxU32 nbAllocated,
+ BucketBox* sortedBoxesInBucket, PrunerPayload* sortedObjectsInBucket,
+ const BucketPrunerNode& bucket, BucketPrunerNode* PX_RESTRICT childBucket,
+ BucketBox* PX_RESTRICT baseBucketsBoxes, PrunerPayload* PX_RESTRICT baseBucketsObjects,
+ PxU32 sortAxis)
+{
+ PX_UNUSED(nbAllocated);
+
+ const PxU32 yz = PxU32(sortAxis == 1 ? 2 : 1);
+ for(PxU32 i=0;i<5;i++)
+ {
+ const PxU32 nbInBucket = bucket.mCounters[i];
+ if(!nbInBucket)
+ {
+ childBucket[i].initCounters();
+ continue;
+ }
+ BucketBox* bucketsBoxes = baseBucketsBoxes + bucket.mOffsets[i];
+ PrunerPayload* bucketsObjects = baseBucketsObjects + bucket.mOffsets[i];
+ PX_ASSERT(nbInBucket<=nbAllocated);
+
+ const float limitX = bucket.mBucketBox[i].mCenter.x;
+ const float limitYZ = bucket.mBucketBox[i].mCenter[yz];
+ const bool isCrossBucket = i==4;
+ childBucket[i].classifyBoxes(limitX, limitYZ, nbInBucket, bucketsBoxes, bucketsObjects,
+ sortedBoxesInBucket, sortedObjectsInBucket,
+ isCrossBucket, sortAxis);
+
+ PxMemCopy(bucketsBoxes, sortedBoxesInBucket, sizeof(BucketBox)*nbInBucket);
+ PxMemCopy(bucketsObjects, sortedObjectsInBucket, sizeof(PrunerPayload)*nbInBucket);
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static PX_FORCE_INLINE PxU32 encodeFloat(PxU32 newPos)
+{
+ //we may need to check on -0 and 0
+ //But it should make no practical difference.
+ if(newPos & PX_SIGN_BITMASK) //negative?
+ return ~newPos;//reverse sequence of negative numbers
+ else
+ return newPos | PX_SIGN_BITMASK; // flip sign
+}
+
+static PX_FORCE_INLINE void computeRayLimits(float& rayMin, float& rayMax, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist, PxU32 sortAxis)
+{
+ const float rayOrigValue = rayOrig[sortAxis];
+ const float rayDirValue = rayDir[sortAxis] * maxDist;
+ rayMin = PxMin(rayOrigValue, rayOrigValue + rayDirValue);
+ rayMax = PxMax(rayOrigValue, rayOrigValue + rayDirValue);
+}
+
+static PX_FORCE_INLINE void computeRayLimits(float& rayMin, float& rayMax, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist, const PxVec3& inflate, PxU32 sortAxis)
+{
+ const float inflateValue = inflate[sortAxis];
+ const float rayOrigValue = rayOrig[sortAxis];
+ const float rayDirValue = rayDir[sortAxis] * maxDist;
+ rayMin = PxMin(rayOrigValue, rayOrigValue + rayDirValue) - inflateValue;
+ rayMax = PxMax(rayOrigValue, rayOrigValue + rayDirValue) + inflateValue;
+}
+
+static PX_FORCE_INLINE void encodeBoxMinMax(BucketBox& box, const PxU32 axis)
+{
+ const float min = box.mCenter[axis] - box.mExtents[axis];
+ const float max = box.mCenter[axis] + box.mExtents[axis];
+
+ const PxU32* binaryMin = reinterpret_cast<const PxU32*>(&min);
+ const PxU32* binaryMax = reinterpret_cast<const PxU32*>(&max);
+ box.mData0 = encodeFloat(binaryMin[0]);
+ box.mData1 = encodeFloat(binaryMax[0]);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+BucketPrunerCore::BucketPrunerCore(bool externalMemory) :
+ mCoreNbObjects (0),
+ mCoreCapacity (0),
+ mCoreBoxes (NULL),
+ mCoreObjects (NULL),
+ mCoreRemap (NULL),
+ mSortedWorldBoxes (NULL),
+ mSortedObjects (NULL),
+ mNbFree (0),
+ mSortedNb (0),
+ mSortedCapacity (0),
+ mSortAxis (0),
+ mDirty (true),
+ mOwnMemory (!externalMemory)
+{
+ mGlobalBox.setEmpty();
+
+ mLevel1.initCounters();
+
+ for(PxU32 i=0;i<5;i++)
+ mLevel2[i].initCounters();
+ for(PxU32 j=0;j<5;j++)
+ for(PxU32 i=0;i<5;i++)
+ mLevel3[j][i].initCounters();
+}
+
+BucketPrunerCore::~BucketPrunerCore()
+{
+ release();
+}
+
+void BucketPrunerCore::release()
+{
+ mDirty = true;
+ mCoreNbObjects = 0;
+
+ mCoreCapacity = 0;
+ if(mOwnMemory)
+ {
+ PX_FREE_AND_RESET(mCoreBoxes);
+ PX_FREE_AND_RESET(mCoreObjects);
+ PX_FREE_AND_RESET(mCoreRemap);
+ }
+
+ PX_FREE_AND_RESET(mSortedWorldBoxes);
+ PX_FREE_AND_RESET(mSortedObjects);
+ mSortedNb = 0;
+ mSortedCapacity = 0;
+
+ mNbFree = 0;
+#ifdef USE_REGULAR_HASH_MAP
+ mMap.clear();
+#else
+ mMap.purge();
+#endif
+}
+
+void BucketPrunerCore::setExternalMemory(PxU32 nbObjects, PxBounds3* boxes, PrunerPayload* objects)
+{
+ PX_ASSERT(!mOwnMemory);
+ mCoreNbObjects = nbObjects;
+ mCoreBoxes = boxes;
+ mCoreObjects = objects;
+ mCoreRemap = NULL;
+}
+
+void BucketPrunerCore::allocateSortedMemory(PxU32 nb)
+{
+ mSortedNb = nb;
+ if(nb<=mSortedCapacity && (nb>=mSortedCapacity/2))
+ return;
+
+ const PxU32 capacity = Ps::nextPowerOfTwo(nb);
+ mSortedCapacity = capacity;
+
+ PxU32 bytesNeededForBoxes = capacity*sizeof(BucketBox);
+ bytesNeededForBoxes = ALIGN16(bytesNeededForBoxes);
+
+ PxU32 bytesNeededForObjects = capacity*sizeof(PrunerPayload);
+ bytesNeededForObjects = ALIGN16(bytesNeededForObjects);
+
+ PX_FREE(mSortedObjects);
+ PX_FREE(mSortedWorldBoxes);
+ mSortedWorldBoxes = reinterpret_cast<BucketBox*>(PX_ALLOC(bytesNeededForBoxes, "BucketPruner"));
+ mSortedObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(bytesNeededForObjects, "BucketPruner"));
+ PX_ASSERT(!(size_t(mSortedWorldBoxes)&15));
+ PX_ASSERT(!(size_t(mSortedObjects)&15));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerCore::resizeCore()
+{
+ const PxU32 capacity = mCoreCapacity ? mCoreCapacity*2 : 32;
+ mCoreCapacity = capacity;
+
+ const PxU32 bytesNeededForBoxes = capacity*sizeof(PxBounds3);
+ const PxU32 bytesNeededForObjects = capacity*sizeof(PrunerPayload);
+ const PxU32 bytesNeededForRemap = capacity*sizeof(PxU32);
+
+ PxBounds3* newCoreBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(bytesNeededForBoxes, "BucketPruner"));
+ PrunerPayload* newCoreObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(bytesNeededForObjects, "BucketPruner"));
+ PxU32* newCoreRemap = reinterpret_cast<PxU32*>(PX_ALLOC(bytesNeededForRemap, "BucketPruner"));
+ if(mCoreBoxes)
+ {
+ PxMemCopy(newCoreBoxes, mCoreBoxes, mCoreNbObjects*sizeof(PxBounds3));
+ PX_FREE(mCoreBoxes);
+ }
+ if(mCoreObjects)
+ {
+ PxMemCopy(newCoreObjects, mCoreObjects, mCoreNbObjects*sizeof(PrunerPayload));
+ PX_FREE(mCoreObjects);
+ }
+ if(mCoreRemap)
+ {
+ PxMemCopy(newCoreRemap, mCoreRemap, mCoreNbObjects*sizeof(PxU32));
+ PX_FREE(mCoreRemap);
+ }
+ mCoreBoxes = newCoreBoxes;
+ mCoreObjects = newCoreObjects;
+ mCoreRemap = newCoreRemap;
+}
+
+PX_FORCE_INLINE void BucketPrunerCore::addObjectInternal(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp)
+{
+ if(mCoreNbObjects==mCoreCapacity)
+ resizeCore();
+
+ const PxU32 index = mCoreNbObjects++;
+ mCoreObjects[index] = object;
+ mCoreBoxes[index] = worldAABB; // PT: TODO: check assembly here
+ mCoreRemap[index] = 0xffffffff;
+
+ // Objects are only inserted into the map once they're part of the main/core arrays.
+#ifdef USE_REGULAR_HASH_MAP
+ bool ok = mMap.insert(object, BucketPrunerPair(index, timeStamp));
+#else
+ BucketPrunerPair* ok = mMap.addPair(object, index, timeStamp);
+#endif
+ PX_UNUSED(ok);
+ PX_ASSERT(ok);
+}
+
+bool BucketPrunerCore::addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp)
+{
+/*
+ We should probably use a bigger Payload struct here, which would also contains the external handle.
+ (EDIT: we can't even do that, because of the setExternalMemory function)
+ When asked to update/remove an object it would be O(n) to find the proper object in the mSortedObjects array.
+
+ -
+
+ For removing it we can simply empty the corresponding box, and the object will never be returned from queries.
+ Maybe this isn't even true, since boxes are sorted along one axis. So marking a box as empty could break the code relying on a sorted order.
+ An alternative is to mark the external handle as invalid, and ignore the object when a hit is found.
+
+ (EDIT: the sorting is now tested via data0/data1 anyway so we could mark the box as empty without breaking this)
+
+ -
+
+ For updating an object we would need to keep the (sub) array sorted (not the whole thing, only the array within a bucket).
+ We don't know the range (what part of the array maps to our bucket) but we may have the bucket ID somewhere? If we'd have this
+ we could parse the array left/right and resort just the right boxes. If we don't have this we may be able to "quickly" find the
+ range by traversing the tree, looking for the proper bucket. In any case I don't think there's a mapping to update within a bucket,
+ unlike in SAP or MBP. So we should be able to shuffle a bucket without having to update anything. For example there's no mapping
+ between the Core array and the Sorted array. It's a shame in a way because we'd need one, but it's not there - and in fact I think
+ we can free the Core array once Sorted is created, we don't need it at all.
+
+ If we don't want to re-sort the full bucket we can just mark it as dirty and ignore the sort-based early exits in the queries. Then we
+ can incrementally resort it over N frames or something.
+
+ This only works if the updated object remains in the same bucket though. If it moves to another bucket it becomes tempting to just remove
+ the object and re-insert it.
+
+ -
+
+ Now for adding an object, we can first have a "free pruner" and do the 16 next entries brute-force. Rebuilding every 16 objects might
+ give a good speedup already. Otherwise we need to do something more complicated.
+*/
+
+ PX_ASSERT(mOwnMemory);
+ PX_ASSERT(!mDirty || !mNbFree);
+ if(!mDirty)
+ {
+ // In this path the structure is marked as valid. We do not want to invalidate it for each new object...
+ if(mNbFree<FREE_PRUNER_SIZE)
+ {
+ // ...so as long as there is space in the "free array", we store the newly added object there and
+ // return immediately. Subsequent queries will parse the free array as if it was a free pruner.
+ const PxU32 index = mNbFree++;
+ mFreeObjects[index] = object;
+ mFreeBounds[index] = worldAABB;
+ mFreeStamps[index] = timeStamp;
+ return true;
+ }
+
+ // If we reach this place, the free array is full. We must transfer the objects from the free array to
+ // the main (core) arrays, mark the structure as invalid, and still deal with the incoming object.
+
+ // First we transfer free objects, reset the number of free objects, and mark the structure as
+ // invalid/dirty (the core arrays will need rebuilding).
+ for(PxU32 i=0;i<mNbFree;i++)
+ addObjectInternal(mFreeObjects[i], mFreeBounds[i], mFreeStamps[i]);
+
+ mNbFree = 0;
+ mDirty = true;
+// mSortedNb = 0; // PT: TODO: investigate if this should be done here
+
+ // After that we still need to deal with the new incoming object (so far we only
+ // transferred the already existing objects from the full free array). This will
+ // happen automatically by letting the code continue to the regular codepath below.
+ }
+
+ // If we reach this place, the structure must be invalid and the incoming object
+ // must be added to the main arrays.
+ PX_ASSERT(mDirty);
+
+ addObjectInternal(object, worldAABB, timeStamp);
+ return true;
+}
+
+bool BucketPrunerCore::removeObject(const PrunerPayload& object, PxU32& timeStamp)
+{
+ // Even if the structure is already marked as dirty, we still need to update the
+ // core arrays and the map.
+
+ // The map only contains core objects, so we can use it to determine if the object
+ // exists in the core arrays or in the free array.
+#ifdef USE_REGULAR_HASH_MAP
+/* BucketPrunerPair entry;
+ if(mMap.findAndErase(object, entry))
+ {
+ PxU32 coreIndex = entry.mCoreIndex;
+ timeStamp = entry.mTimeStamp;*/
+ const BucketPrunerMap::Entry* removedEntry = mMap.find(object);
+ if(removedEntry)
+ {
+ PxU32 coreIndex = removedEntry->second.mCoreIndex;
+ timeStamp = removedEntry->second.mTimeStamp;
+#else
+ PxU32 coreIndex; // This is the object's index in the core arrays.
+ if(mMap.removePair(object, coreIndex, timeStamp))
+ {
+#endif
+ // In this codepath, the object we want to remove exists in the core arrays.
+
+ // We will need to remove it from both the core arrays & the sorted arrays.
+ const PxU32 sortedIndex = mCoreRemap[coreIndex]; // This is the object's index in the sorted arrays.
+
+#ifdef USE_REGULAR_HASH_MAP
+ bool status = mMap.erase(object);
+ PX_ASSERT(status);
+ PX_UNUSED(status);
+#endif
+
+ // First let's deal with the core arrays
+ mCoreNbObjects--;
+ if(coreIndex!=mCoreNbObjects)
+ {
+ // If it wasn't the last object in the array, close the gaps as usual
+ const PrunerPayload& movedObject = mCoreObjects[mCoreNbObjects];
+ mCoreBoxes[coreIndex] = mCoreBoxes[mCoreNbObjects];
+ mCoreObjects[coreIndex] = movedObject;
+ mCoreRemap[coreIndex] = mCoreRemap[mCoreNbObjects];
+
+ // Since we just moved the last object, its index in the core arrays has changed.
+ // We must reflect this change in the map.
+#ifdef USE_REGULAR_HASH_MAP
+ BucketPrunerMap::Entry* movedEntry = const_cast<BucketPrunerMap::Entry*>(mMap.find(movedObject));
+ PX_ASSERT(movedEntry->second.mCoreIndex==mCoreNbObjects);
+ movedEntry->second.mCoreIndex = coreIndex;
+#else
+ BucketPrunerPair* movedEntry = const_cast<BucketPrunerPair*>(mMap.findPair(movedObject));
+ PX_ASSERT(movedEntry->mCoreIndex==mCoreNbObjects);
+ movedEntry->mCoreIndex = coreIndex;
+#endif
+ }
+
+ // Now, let's deal with the sorted arrays.
+ // If the structure is dirty, the sorted arrays will be rebuilt from scratch so there's no need to
+ // update them right now.
+ if(!mDirty)
+ {
+ // If the structure is valid, we want to keep it this way to avoid rebuilding sorted arrays after
+ // each removal. We can't "close the gaps" easily here because order of objects in the arrays matters.
+
+ // Instead we just invalidate the object by setting its bounding box as empty.
+ // Queries against empty boxes will never return a hit, so this effectively "removes" the object
+ // from any subsequent query results. Sorted arrays now contain a "disabled" object, until next build.
+
+ // Invalidating the box does not invalidate the sorting, since it's now captured in mData0/mData1.
+ // That is, mData0/mData1 keep their previous integer-encoded values, as if the box/object was still here.
+ PxBounds3 empty;
+ empty.setEmpty();
+ mSortedWorldBoxes[sortedIndex].mCenter = empty.getCenter();
+ mSortedWorldBoxes[sortedIndex].mExtents = empty.getExtents();
+ // Note that we don't touch mSortedObjects here. We could, but this is not necessary.
+ }
+ return true;
+ }
+
+ // Here, the object we want to remove exists in the free array. So we just parse it.
+ for(PxU32 i=0;i<mNbFree;i++)
+ {
+ if(mFreeObjects[i]==object)
+ {
+ // We found the object we want to remove. Close the gap as usual.
+ timeStamp = mFreeStamps[i];
+ mNbFree--;
+ mFreeBounds[i] = mFreeBounds[mNbFree];
+ mFreeObjects[i] = mFreeObjects[mNbFree];
+ mFreeStamps[i] = mFreeStamps[mNbFree];
+ return true;
+ }
+ }
+ // We didn't find the object. Can happen with a double remove. PX_ASSERT might be an option here.
+ return false;
+}
+
+bool BucketPrunerCore::updateObject(const PxBounds3& worldAABB, const PrunerPayload& object)
+{
+ PxU32 timeStamp;
+ if(!removeObject(object, timeStamp))
+ return false;
+
+ return addObject(object, worldAABB, timeStamp);
+}
+
+PxU32 BucketPrunerCore::removeMarkedObjects(PxU32 timeStamp)
+{
+ PxU32 nbRemoved=0;
+ // PT: objects can be either in the hash-map, or in the 'free' array. First we look in the hash-map...
+#ifdef USE_REGULAR_HASH_MAP
+ if(mMap.size())
+#else
+ if(mMap.mNbActivePairs)
+#endif
+ {
+ PxBounds3 empty;
+ empty.setEmpty();
+ const PxVec3 emptyCenter = empty.getCenter();
+ const PxVec3 emptyExtents = empty.getExtents();
+
+ // PT: hash-map is coalesced so we just parse it in linear order, no holes
+ PxU32 i=0;
+#ifdef USE_REGULAR_HASH_MAP
+ PxU32 nbActivePairs = mMap.size();
+ const BucketPrunerMap::Entry* entries = mMap.mBase.getEntries();
+#else
+ PxU32 nbActivePairs = mMap.mNbActivePairs;
+#endif
+ PxU32 coreNbObjects = mCoreNbObjects; // PT: to avoid LHS
+ while(i<nbActivePairs)
+ {
+#ifdef USE_REGULAR_HASH_MAP
+ const BucketPrunerMap::Entry& p = entries[i];
+ if(p.second.mTimeStamp==timeStamp)
+#else
+ const BucketPrunerPair& p = mMap.mActivePairs[i];
+ if(p.mTimeStamp==timeStamp)
+#endif
+ {
+ // PT: timestamps match. We must remove this object.
+ // PT: we replicate here what we do in BucketPrunerCore::removeObject(). See that function for details.
+
+#ifdef USE_REGULAR_HASH_MAP
+ const PxU32 coreIndex = p.second.mCoreIndex;
+#else
+ const PxU32 coreIndex = p.mCoreIndex;
+#endif
+ if(!mDirty)
+ {
+ // PT: invalidating the box does not invalidate the sorting, since it's now captured in mData0/mData1
+ const PxU32 sortedIndex = mCoreRemap[coreIndex];
+ mSortedWorldBoxes[sortedIndex].mCenter = emptyCenter;
+ mSortedWorldBoxes[sortedIndex].mExtents = emptyExtents;
+ }
+
+ coreNbObjects--;
+ if(coreIndex!=coreNbObjects)
+ {
+ const PrunerPayload& movedObject = mCoreObjects[coreNbObjects];
+ mCoreBoxes[coreIndex] = mCoreBoxes[coreNbObjects];
+ mCoreObjects[coreIndex] = movedObject;
+ mCoreRemap[coreIndex] = mCoreRemap[coreNbObjects];
+
+#ifdef USE_REGULAR_HASH_MAP
+ BucketPrunerMap::Entry* movedEntry = const_cast<BucketPrunerMap::Entry*>(mMap.find(movedObject));
+ PX_ASSERT(movedEntry->second.mCoreIndex==coreNbObjects);
+ movedEntry->second.mCoreIndex = coreIndex;
+#else
+ BucketPrunerPair* movedEntry = const_cast<BucketPrunerPair*>(mMap.findPair(movedObject));
+ PX_ASSERT(movedEntry->mCoreIndex==coreNbObjects);
+ movedEntry->mCoreIndex = coreIndex;
+#endif
+ }
+
+ nbRemoved++;
+#ifdef USE_REGULAR_HASH_MAP
+ bool status = mMap.erase(p.first);
+ PX_ASSERT(status);
+ PX_UNUSED(status);
+#else
+ const PxU32 hashValue = hash(p.mPayload) & mMap.mMask;
+ mMap.removePairInternal(p.mPayload, hashValue, i);
+#endif
+ nbActivePairs--;
+ }
+ else i++;
+ }
+ mCoreNbObjects = coreNbObjects;
+
+#ifdef USE_REGULAR_HASH_MAP
+#else
+ mMap.shrinkMemory();
+#endif
+ }
+
+ // PT: ...then we look in the 'free' array
+ PxU32 i=0;
+ while(i<mNbFree)
+ {
+ if(mFreeStamps[i]==timeStamp)
+ {
+ nbRemoved++;
+ mNbFree--;
+ mFreeBounds[i] = mFreeBounds[mNbFree];
+ mFreeObjects[i] = mFreeObjects[mNbFree];
+ mFreeStamps[i] = mFreeStamps[mNbFree];
+ }
+ else i++;
+ }
+ return nbRemoved;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static PxU32 sortBoxes( PxU32 nb, const PxBounds3* PX_RESTRICT boxes, const PrunerPayload* PX_RESTRICT objects,
+ BucketBox& _globalBox, BucketBox* PX_RESTRICT sortedBoxes, PrunerPayload* PX_RESTRICT sortedObjects)
+{
+ // Compute global box & sort axis
+ PxU32 sortAxis;
+ {
+ PX_ASSERT(nb>0);
+ Vec4V mergedMinV = V4LoadU(&boxes[nb-1].minimum.x);
+ Vec4V mergedMaxV = Vec4V_From_Vec3V(V3LoadU(&boxes[nb-1].maximum.x));
+ for(PxU32 i=0;i<nb-1;i++)
+ {
+ mergedMinV = V4Min(mergedMinV, V4LoadU(&boxes[i].minimum.x));
+ mergedMaxV = V4Max(mergedMaxV, V4LoadU(&boxes[i].maximum.x));
+ }
+
+/* PX_ALIGN(16, PxVec4) mergedMin;
+ PX_ALIGN(16, PxVec4) mergedMax;
+ V4StoreA(mergedMinV, &mergedMin.x);
+ V4StoreA(mergedMaxV, &mergedMax.x);
+
+ _globalBox.mCenter.x = (mergedMax.x + mergedMin.x)*0.5f;
+ _globalBox.mCenter.y = (mergedMax.y + mergedMin.y)*0.5f;
+ _globalBox.mCenter.z = (mergedMax.z + mergedMin.z)*0.5f;
+ _globalBox.mExtents.x = (mergedMax.x - mergedMin.x)*0.5f;
+ _globalBox.mExtents.y = (mergedMax.y - mergedMin.y)*0.5f;
+ _globalBox.mExtents.z = (mergedMax.z - mergedMin.z)*0.5f;*/
+
+ const float Half = 0.5f;
+ const FloatV HalfV = FLoad(Half);
+ PX_ALIGN(16, PxVec4) mergedCenter;
+ PX_ALIGN(16, PxVec4) mergedExtents;
+
+ const Vec4V mergedCenterV = V4Scale(V4Add(mergedMaxV, mergedMinV), HalfV);
+ const Vec4V mergedExtentsV = V4Scale(V4Sub(mergedMaxV, mergedMinV), HalfV);
+ V4StoreA(mergedCenterV, &mergedCenter.x);
+ V4StoreA(mergedExtentsV, &mergedExtents.x);
+ _globalBox.mCenter = PxVec3(mergedCenter.x, mergedCenter.y, mergedCenter.z);
+ _globalBox.mExtents = PxVec3(mergedExtents.x, mergedExtents.y, mergedExtents.z);
+
+ const PxF32 absY = PxAbs(_globalBox.mExtents.y);
+ const PxF32 absZ = PxAbs(_globalBox.mExtents.z);
+ sortAxis = PxU32(absY < absZ ? 1 : 2);
+// printf("Sort axis: %d\n", sortAxis);
+ }
+
+ float* keys = reinterpret_cast<float*>(sortedObjects);
+ for(PxU32 i=0;i<nb;i++)
+ keys[i] = boxes[i].minimum[sortAxis];
+
+ Cm::RadixSortBuffered rs; // ###TODO: some allocs here, remove
+ const PxU32* ranks = rs.Sort(keys, nb).GetRanks();
+
+ const float Half = 0.5f;
+ const FloatV HalfV = FLoad(Half);
+ for(PxU32 i=0;i<nb;i++)
+ {
+ const PxU32 index = *ranks++;
+//const PxU32 index = local[i].index;
+// sortedBoxes[i].mCenter = boxes[index].getCenter();
+// sortedBoxes[i].mExtents = boxes[index].getExtents();
+
+ const Vec4V bucketBoxMinV = V4LoadU(&boxes[index].minimum.x);
+ const Vec4V bucketBoxMaxV = Vec4V_From_Vec3V(V3LoadU(&boxes[index].maximum.x));
+ const Vec4V bucketBoxCenterV = V4Scale(V4Add(bucketBoxMaxV, bucketBoxMinV), HalfV);
+ const Vec4V bucketBoxExtentsV = V4Scale(V4Sub(bucketBoxMaxV, bucketBoxMinV), HalfV);
+ // We don't need to preserve data0/data1 here
+ AlignedStore(bucketBoxCenterV, &sortedBoxes[i].mCenter.x);
+ AlignedStore(bucketBoxExtentsV, &sortedBoxes[i].mExtents.x);
+
+ #ifdef _DEBUG
+ sortedBoxes[i].mDebugMin = boxes[index].minimum[sortAxis];
+ #endif
+ sortedObjects[i] = objects[index];
+ }
+
+ return sortAxis;
+}
+
+#ifdef NODE_SORT
+ template<class T>
+ PX_CUDA_CALLABLE PX_FORCE_INLINE void tswap(T& x, T& y)
+ {
+ T tmp = x;
+ x = y;
+ y = tmp;
+ }
+
+/* PX_FORCE_INLINE __m128 DotV(const __m128 a, const __m128 b)
+ {
+ const __m128 dot1 = _mm_mul_ps(a, b);
+ const __m128 shuf1 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(0,0,0,0)));
+ const __m128 shuf2 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(1,1,1,1)));
+ const __m128 shuf3 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(2,2,2,2)));
+ return _mm_add_ps(_mm_add_ps(shuf1, shuf2), shuf3);
+ }*/
+
+// PT: hmmm, by construction, isn't the order always the same for all bucket pruners?
+// => maybe not because the bucket boxes are still around the merged aabbs, not around the bucket
+// Still we could do something here
+static /*PX_FORCE_INLINE*/ PxU32 sort(const BucketPrunerNode& parent, const PxVec3& rayDir)
+{
+ const PxU32 totalCount = parent.mCounters[0]+parent.mCounters[1]+parent.mCounters[2]+parent.mCounters[3]+parent.mCounters[4];
+ if(totalCount<NODE_SORT_MIN_COUNT)
+ return 0|(1<<3)|(2<<6)|(3<<9)|(4<<12);
+
+ float dp[5];
+/* const __m128 rayDirV = _mm_loadu_ps(&rayDir.x);
+ __m128 dp0V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[0].mCenter.x)); _mm_store_ss(&dp[0], dp0V);
+ __m128 dp1V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[1].mCenter.x)); _mm_store_ss(&dp[1], dp1V);
+ __m128 dp2V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[2].mCenter.x)); _mm_store_ss(&dp[2], dp2V);
+ __m128 dp3V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[3].mCenter.x)); _mm_store_ss(&dp[3], dp3V);
+ __m128 dp4V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[4].mCenter.x)); _mm_store_ss(&dp[4], dp4V);
+*/
+
+#ifdef VERIFY_SORT
+ PxU32 code;
+ {
+ dp[0] = parent.mCounters[0] ? PxAbs(parent.mBucketBox[0].mCenter.dot(rayDir)) : PX_MAX_F32;
+ dp[1] = parent.mCounters[1] ? PxAbs(parent.mBucketBox[1].mCenter.dot(rayDir)) : PX_MAX_F32;
+ dp[2] = parent.mCounters[2] ? PxAbs(parent.mBucketBox[2].mCenter.dot(rayDir)) : PX_MAX_F32;
+ dp[3] = parent.mCounters[3] ? PxAbs(parent.mBucketBox[3].mCenter.dot(rayDir)) : PX_MAX_F32;
+ dp[4] = parent.mCounters[4] ? PxAbs(parent.mBucketBox[4].mCenter.dot(rayDir)) : PX_MAX_F32;
+
+ PxU32 ii0 = 0;
+ PxU32 ii1 = 1;
+ PxU32 ii2 = 2;
+ PxU32 ii3 = 3;
+ PxU32 ii4 = 4;
+
+ // PT: using integer cmps since we used fabsf above
+ // const PxU32* values = reinterpret_cast<const PxU32*>(dp);
+ const PxU32* values = PxUnionCast<PxU32*, PxF32*>(dp);
+
+ PxU32 value0 = values[0];
+ PxU32 value1 = values[1];
+ PxU32 value2 = values[2];
+ PxU32 value3 = values[3];
+ PxU32 value4 = values[4];
+
+ for(PxU32 j=0;j<5-1;j++)
+ {
+ if(value1<value0)
+ {
+ tswap(value0, value1);
+ tswap(ii0, ii1);
+ }
+ if(value2<value1)
+ {
+ tswap(value1, value2);
+ tswap(ii1, ii2);
+ }
+ if(value3<value2)
+ {
+ tswap(value2, value3);
+ tswap(ii2, ii3);
+ }
+ if(value4<value3)
+ {
+ tswap(value3, value4);
+ tswap(ii3, ii4);
+ }
+ }
+ //return ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
+ code = ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
+ }
+#endif
+
+ dp[0] = parent.mCounters[0] ? parent.mBucketBox[0].mCenter.dot(rayDir) : PX_MAX_F32;
+ dp[1] = parent.mCounters[1] ? parent.mBucketBox[1].mCenter.dot(rayDir) : PX_MAX_F32;
+ dp[2] = parent.mCounters[2] ? parent.mBucketBox[2].mCenter.dot(rayDir) : PX_MAX_F32;
+ dp[3] = parent.mCounters[3] ? parent.mBucketBox[3].mCenter.dot(rayDir) : PX_MAX_F32;
+ dp[4] = parent.mCounters[4] ? parent.mBucketBox[4].mCenter.dot(rayDir) : PX_MAX_F32;
+
+ const PxU32* values = PxUnionCast<PxU32*, PxF32*>(dp);
+
+// const PxU32 mask = ~7U;
+ const PxU32 mask = 0x7ffffff8;
+ PxU32 value0 = (values[0]&mask);
+ PxU32 value1 = (values[1]&mask)|1;
+ PxU32 value2 = (values[2]&mask)|2;
+ PxU32 value3 = (values[3]&mask)|3;
+ PxU32 value4 = (values[4]&mask)|4;
+
+#define SORT_BLOCK \
+ if(value1<value0) tswap(value0, value1); \
+ if(value2<value1) tswap(value1, value2); \
+ if(value3<value2) tswap(value2, value3); \
+ if(value4<value3) tswap(value3, value4);
+ SORT_BLOCK
+ SORT_BLOCK
+ SORT_BLOCK
+ SORT_BLOCK
+
+ const PxU32 ii0 = value0&7;
+ const PxU32 ii1 = value1&7;
+ const PxU32 ii2 = value2&7;
+ const PxU32 ii3 = value3&7;
+ const PxU32 ii4 = value4&7;
+ const PxU32 code2 = ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
+#ifdef VERIFY_SORT
+ PX_ASSERT(code2==code);
+#endif
+ return code2;
+}
+
+static void gPrecomputeSort(BucketPrunerNode& node, const PxVec3* PX_RESTRICT dirs)
+{
+ for(int i=0;i<8;i++)
+ node.mOrder[i] = Ps::to16(sort(node, dirs[i]));
+}
+#endif
+
+void BucketPrunerCore::classifyBoxes()
+{
+ if(!mDirty)
+ return;
+
+ mDirty = false;
+
+ const PxU32 nb = mCoreNbObjects;
+ if(!nb)
+ {
+ mSortedNb=0;
+ return;
+ }
+
+ PX_ASSERT(!mNbFree);
+
+#ifdef BRUTE_FORCE_LIMIT
+ if(nb<=BRUTE_FORCE_LIMIT)
+ {
+ allocateSortedMemory(nb);
+ BucketBox* sortedBoxes = mSortedWorldBoxes;
+ PrunerPayload* sortedObjects = mSortedObjects;
+
+ const float Half = 0.5f;
+ const __m128 HalfV = _mm_load1_ps(&Half);
+ PX_ALIGN(16, PxVec4) bucketCenter;
+ PX_ALIGN(16, PxVec4) bucketExtents;
+ for(PxU32 i=0;i<nb;i++)
+ {
+ const __m128 bucketBoxMinV = _mm_loadu_ps(&mCoreBoxes[i].minimum.x);
+ const __m128 bucketBoxMaxV = _mm_loadu_ps(&mCoreBoxes[i].maximum.x);
+ const __m128 bucketBoxCenterV = _mm_mul_ps(_mm_add_ps(bucketBoxMaxV, bucketBoxMinV), HalfV);
+ const __m128 bucketBoxExtentsV = _mm_mul_ps(_mm_sub_ps(bucketBoxMaxV, bucketBoxMinV), HalfV);
+ _mm_store_ps(&bucketCenter.x, bucketBoxCenterV);
+ _mm_store_ps(&bucketExtents.x, bucketBoxExtentsV);
+ sortedBoxes[i].mCenter = PxVec3(bucketCenter.x, bucketCenter.y, bucketCenter.z);
+ sortedBoxes[i].mExtents = PxVec3(bucketExtents.x, bucketExtents.y, bucketExtents.z);
+
+ sortedObjects[i] = mCoreObjects[i];
+ }
+ return;
+ }
+#endif
+
+
+size_t* remap = reinterpret_cast<size_t*>(PX_ALLOC(nb*sizeof(size_t), ""));
+for(PxU32 i=0;i<nb;i++)
+{
+ remap[i] = mCoreObjects[i].data[0];
+ mCoreObjects[i].data[0] = i;
+}
+
+// printf("Nb objects: %d\n", nb);
+
+ PrunerPayload localTempObjects[LOCAL_SIZE];
+ BucketBox localTempBoxes[LOCAL_SIZE];
+ PrunerPayload* tempObjects;
+ BucketBox* tempBoxes;
+ if(nb>LOCAL_SIZE)
+ {
+ tempObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(sizeof(PrunerPayload)*nb, "BucketPruner"));
+ tempBoxes = reinterpret_cast<BucketBox*>(PX_ALLOC(nb*sizeof(BucketBox), "BucketPruner"));
+ }
+ else
+ {
+ tempObjects = localTempObjects;
+ tempBoxes = localTempBoxes;
+ }
+
+ mSortAxis = sortBoxes(nb, mCoreBoxes, mCoreObjects, mGlobalBox, tempBoxes, tempObjects);
+
+ PX_ASSERT(mSortAxis);
+
+ allocateSortedMemory(nb);
+ BucketBox* sortedBoxes = mSortedWorldBoxes;
+ PrunerPayload* sortedObjects = mSortedObjects;
+
+ const PxU32 yz = PxU32(mSortAxis == 1 ? 2 : 1);
+ const float limitX = mGlobalBox.mCenter.x;
+ const float limitYZ = mGlobalBox.mCenter[yz];
+ mLevel1.classifyBoxes(limitX, limitYZ, nb, tempBoxes, tempObjects,
+ sortedBoxes, sortedObjects,
+ false, mSortAxis);
+
+ processChildBuckets(nb, tempBoxes, tempObjects,
+ mLevel1, mLevel2, mSortedWorldBoxes, mSortedObjects,
+ mSortAxis);
+
+ for(PxU32 j=0;j<5;j++)
+ processChildBuckets(nb, tempBoxes, tempObjects,
+ mLevel2[j], mLevel3[j], mSortedWorldBoxes + mLevel1.mOffsets[j], mSortedObjects + mLevel1.mOffsets[j],
+ mSortAxis);
+
+ {
+ for(PxU32 i=0;i<nb;i++)
+ {
+ encodeBoxMinMax(mSortedWorldBoxes[i], mSortAxis);
+ }
+ }
+
+ if(nb>LOCAL_SIZE)
+ {
+ PX_FREE(tempBoxes);
+ PX_FREE(tempObjects);
+ }
+
+for(PxU32 i=0;i<nb;i++)
+{
+ const PxU32 coreIndex = PxU32(mSortedObjects[i].data[0]);
+ const size_t saved = remap[coreIndex];
+ mSortedObjects[i].data[0] = saved;
+ mCoreObjects[coreIndex].data[0] = saved;
+ if(mCoreRemap)
+ mCoreRemap[coreIndex] = i;
+// remap[i] = mCoreObjects[i].data[0];
+// mCoreObjects[i].data[0] = i;
+}
+PX_FREE(remap);
+
+/* if(mOwnMemory)
+ {
+ PX_FREE_AND_RESET(mCoreBoxes);
+ PX_FREE_AND_RESET(mCoreObjects);
+ }*/
+
+
+#ifdef NODE_SORT
+ {
+ PxVec3 dirs[8];
+ dirs[0] = PxVec3(1.0f, 1.0f, 1.0f);
+ dirs[1] = PxVec3(1.0f, 1.0f, -1.0f);
+ dirs[2] = PxVec3(1.0f, -1.0f, 1.0f);
+ dirs[3] = PxVec3(1.0f, -1.0f, -1.0f);
+ dirs[4] = PxVec3(-1.0f, 1.0f, 1.0f);
+ dirs[5] = PxVec3(-1.0f, 1.0f, -1.0f);
+ dirs[6] = PxVec3(-1.0f, -1.0f, 1.0f);
+ dirs[7] = PxVec3(-1.0f, -1.0f, -1.0f);
+ for(int i=0;i<8;i++)
+ dirs[i].normalize();
+
+ gPrecomputeSort(mLevel1, dirs);
+
+ for(PxU32 i=0;i<5;i++)
+ gPrecomputeSort(mLevel2[i], dirs);
+
+ for(PxU32 j=0;j<5;j++)
+ {
+ for(PxU32 i=0;i<5;i++)
+ gPrecomputeSort(mLevel3[j][i], dirs);
+ }
+ }
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#ifdef CAN_USE_MOVEMASK
+ struct RayParams
+ {
+ PX_ALIGN(16, PxVec3 mData2); float padding0;
+ PX_ALIGN(16, PxVec3 mFDir); float padding1;
+ PX_ALIGN(16, PxVec3 mData); float padding2;
+ PX_ALIGN(16, PxVec3 mInflate); float padding3;
+ };
+
+ static PX_FORCE_INLINE void precomputeRayData(RayParams* PX_RESTRICT rayParams, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist)
+ {
+ #ifdef USE_SIMD
+ const float Half = 0.5f * maxDist;
+ const __m128 HalfV = _mm_load1_ps(&Half);
+ const __m128 DataV = _mm_mul_ps(_mm_loadu_ps(&rayDir.x), HalfV);
+ const __m128 Data2V = _mm_add_ps(_mm_loadu_ps(&rayOrig.x), DataV);
+ const PxU32 MaskI = 0x7fffffff;
+ const __m128 FDirV = _mm_and_ps(_mm_load1_ps(reinterpret_cast<const float*>(&MaskI)), DataV);
+ _mm_store_ps(&rayParams->mData.x, DataV);
+ _mm_store_ps(&rayParams->mData2.x, Data2V);
+ _mm_store_ps(&rayParams->mFDir.x, FDirV);
+ #else
+ const PxVec3 data = 0.5f * rayDir * maxDist;
+ rayParams->mData = data;
+ rayParams->mData2 = rayOrig + data;
+ rayParams->mFDir.x = PxAbs(data.x);
+ rayParams->mFDir.y = PxAbs(data.y);
+ rayParams->mFDir.z = PxAbs(data.z);
+ #endif
+ }
+
+ template <int inflateT>
+ static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const RayParams* PX_RESTRICT params)
+ {
+ #ifdef USE_SIMD
+ const PxU32 maskI = 0x7fffffff;
+ const __m128 fdirV = _mm_load_ps(&params->mFDir.x);
+// #ifdef _DEBUG
+ const __m128 extentsV = inflateT ? _mm_add_ps(_mm_loadu_ps(&box.mExtents.x), _mm_load_ps(&params->mInflate.x)) : _mm_loadu_ps(&box.mExtents.x);
+ const __m128 DV = _mm_sub_ps(_mm_load_ps(&params->mData2.x), _mm_loadu_ps(&box.mCenter.x));
+/* #else
+ const __m128 extentsV = inflateT ? _mm_add_ps(_mm_load_ps(&box.mExtents.x), _mm_load_ps(&params->mInflate.x)) : _mm_load_ps(&box.mExtents.x);
+ const __m128 DV = _mm_sub_ps(_mm_load_ps(&params->mData2.x), _mm_load_ps(&box.mCenter.x));
+ #endif*/
+ __m128 absDV = _mm_and_ps(DV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI)));
+ absDV = _mm_cmpgt_ps(absDV, _mm_add_ps(extentsV, fdirV));
+ const PxU32 test = PxU32(_mm_movemask_ps(absDV));
+ if(test&7)
+ return 0;
+
+ const __m128 dataZYX_V = _mm_load_ps(&params->mData.x);
+ const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1)));
+ const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1)));
+ const __m128 fV = _mm_sub_ps(_mm_mul_ps(dataZYX_V, DXZY_V), _mm_mul_ps(dataXZY_V, DV));
+
+ const __m128 fdirZYX_V = _mm_load_ps(&params->mFDir.x);
+ const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1)));
+ const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+ const __m128 fg = _mm_add_ps(_mm_mul_ps(extentsV, fdirXZY_V), _mm_mul_ps(extentsXZY_V, fdirZYX_V));
+
+ __m128 absfV = _mm_and_ps(fV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI)));
+ absfV = _mm_cmpgt_ps(absfV, fg);
+ const PxU32 test2 = PxU32(_mm_movemask_ps(absfV));
+ if(test2&7)
+ return 0;
+ return 1;
+ #else
+ const float boxExtentsx = inflateT ? box.mExtents.x + params->mInflate.x : box.mExtents.x;
+ const float Dx = params->mData2.x - box.mCenter.x; if(fabsf(Dx) > boxExtentsx + params->mFDir.x) return IntFalse;
+
+ const float boxExtentsz = inflateT ? box.mExtents.z + params->mInflate.z : box.mExtents.z;
+ const float Dz = params->mData2.z - box.mCenter.z; if(fabsf(Dz) > boxExtentsz + params->mFDir.z) return IntFalse;
+
+ const float boxExtentsy = inflateT ? box.mExtents.y + params->mInflate.y : box.mExtents.y;
+ const float Dy = params->mData2.y - box.mCenter.y; if(fabsf(Dy) > boxExtentsy + params->mFDir.y) return IntFalse;
+
+ float f;
+ f = params->mData.y * Dz - params->mData.z * Dy; if(fabsf(f) > boxExtentsy*params->mFDir.z + boxExtentsz*params->mFDir.y) return IntFalse;
+ f = params->mData.z * Dx - params->mData.x * Dz; if(fabsf(f) > boxExtentsx*params->mFDir.z + boxExtentsz*params->mFDir.x) return IntFalse;
+ f = params->mData.x * Dy - params->mData.y * Dx; if(fabsf(f) > boxExtentsx*params->mFDir.y + boxExtentsy*params->mFDir.x) return IntFalse;
+ return IntTrue;
+ #endif
+ }
+#else
+ #include "SqPrunerTestsSIMD.h"
+
+ typedef RayAABBTest BPRayAABBTest;
+
+template <int inflateT>
+static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const BPRayAABBTest& test)
+{
+ return static_cast<IntBool>(test.check<inflateT>(V3LoadU(box.mCenter), V3LoadU(box.mExtents)));
+}
+
+/*static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const BPRayAABBTest& test, PxU32 rayMinLimitX, PxU32 rayMaxLimitX)
+{
+ if(rayMinLimitX>box.mData1)
+ return 0;
+ if(rayMaxLimitX<box.mData0)
+ return 0;
+
+ return test(Vec3V_From_PxVec3(box.mCenter), Vec3V_From_PxVec3(box.mExtents));
+}*/
+#endif
+
+template <int inflateT>
+static PxAgain processBucket(
+ PxU32 nb, const BucketBox* PX_RESTRICT baseBoxes, PrunerPayload* PX_RESTRICT baseObjects,
+ PxU32 offset, PxU32 totalAllocated,
+ const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist,
+#ifdef CAN_USE_MOVEMASK
+ RayParams* PX_RESTRICT rayParams,
+#else
+ BPRayAABBTest& test, const PxVec3& inflate,
+#endif
+ PrunerCallback& pcb, PxU32& _rayMinLimitInt, PxU32& _rayMaxLimitInt, PxU32 sortAxis)
+{
+ PX_UNUSED(totalAllocated);
+
+ const BucketBox* PX_RESTRICT _boxes = baseBoxes + offset;
+ PrunerPayload* PX_RESTRICT _objects = baseObjects + offset;
+
+ PxU32 rayMinLimitInt = _rayMinLimitInt;
+ PxU32 rayMaxLimitInt = _rayMaxLimitInt;
+
+ const BucketBox* last = _boxes + nb;
+
+ while(_boxes!=last)
+ {
+ const BucketBox& currentBox = *_boxes++;
+ PrunerPayload* currentObject = _objects++;
+
+ if(currentBox.mData1<rayMinLimitInt)
+ continue;
+
+ if(currentBox.mData0>rayMaxLimitInt)
+ goto Exit;
+
+#ifdef CAN_USE_MOVEMASK
+ if(!_segmentAABB<inflateT>(currentBox, rayParams))
+ continue;
+#else
+ if(!_segmentAABB<inflateT>(currentBox, test))
+ continue;
+#endif
+
+ const float MaxDist = maxDist;
+ const PxAgain again = pcb.invoke(maxDist, *currentObject);
+ if(!again)
+ return false;
+ if(maxDist < MaxDist)
+ {
+ float rayMinLimit, rayMaxLimit;
+#ifdef CAN_USE_MOVEMASK
+ if(inflateT)
+ computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, rayParams->mInflate, sortAxis);
+ else
+ computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
+
+ precomputeRayData(rayParams, rayOrig, rayDir, maxDist);
+#else
+ if(inflateT)
+ computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, inflate, sortAxis);
+ else
+ computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
+
+ test.setDistance(maxDist);
+#endif
+ const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&rayMinLimit);
+ const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&rayMaxLimit);
+ rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
+ rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
+ }
+ }
+Exit:
+
+ _rayMinLimitInt = rayMinLimitInt;
+ _rayMaxLimitInt = rayMaxLimitInt;
+ return true;
+}
+
+#ifdef NODE_SORT
+static PxU32 computeDirMask(const PxVec3& dir)
+{
+ const PxU32* binary = reinterpret_cast<const PxU32*>(&dir.x);
+ const PxU32 X = (binary[0])>>31;
+ const PxU32 Y = (binary[1])>>31;
+ const PxU32 Z = (binary[2])>>31;
+ return Z|(Y<<1)|(X<<2);
+}
+#endif
+
+template <int inflateT>
+static PxAgain stab(const BucketPrunerCore& core, PrunerCallback& pcb, const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, const PxVec3 inflate)
+{
+ const PxU32 nb = core.mSortedNb;
+ if(!nb && !core.mNbFree)
+ return true;
+
+ if(maxDist==PX_MAX_F32)
+ {
+ /*const*/ PxVec3 boxMin = core.mGlobalBox.getMin() - inflate;
+ /*const*/ PxVec3 boxMax = core.mGlobalBox.getMax() + inflate;
+
+ if(core.mNbFree)
+ {
+ // TODO: optimize this
+ PxBounds3 freeGlobalBounds;
+ freeGlobalBounds.setEmpty();
+ for(PxU32 i=0;i<core.mNbFree;i++)
+ freeGlobalBounds.include(core.mFreeBounds[i]);
+ freeGlobalBounds.minimum -= inflate;
+ freeGlobalBounds.maximum += inflate;
+ boxMin = boxMin.minimum(freeGlobalBounds.minimum);
+ boxMax = boxMax.maximum(freeGlobalBounds.maximum);
+ }
+
+ clipRay(rayOrig, rayDir, maxDist, boxMin, boxMax);
+ }
+
+#ifdef CAN_USE_MOVEMASK
+ RayParams rayParams;
+ #ifdef USE_SIMD
+ rayParams.padding0 = rayParams.padding1 = rayParams.padding2 = rayParams.padding3 = 0.0f;
+ #endif
+ if(inflateT)
+ rayParams.mInflate = inflate;
+
+ precomputeRayData(&rayParams, rayOrig, rayDir, maxDist);
+#else
+ BPRayAABBTest test(rayOrig, rayDir, maxDist, inflateT ? inflate : PxVec3(0.0f));
+#endif
+
+ for(PxU32 i=0;i<core.mNbFree;i++)
+ {
+ BucketBox tmp;
+ tmp.mCenter = core.mFreeBounds[i].getCenter();
+ tmp.mExtents = core.mFreeBounds[i].getExtents();
+
+#ifdef CAN_USE_MOVEMASK
+ if(_segmentAABB<inflateT>(tmp, &rayParams))
+#else
+ if(_segmentAABB<inflateT>(tmp, test))
+#endif
+ {
+ if(!pcb.invoke(maxDist, core.mFreeObjects[i]))
+ return false;
+ }
+ }
+
+ if(!nb)
+ return true;
+
+#ifdef CAN_USE_MOVEMASK
+ if(!_segmentAABB<inflateT>(core.mGlobalBox, &rayParams))
+ return true;
+#else
+ if(!_segmentAABB<inflateT>(core.mGlobalBox, test))
+ return true;
+#endif
+
+ const PxU32 sortAxis = core.mSortAxis;
+ float rayMinLimit, rayMaxLimit;
+ if(inflateT)
+ computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, inflate, sortAxis);
+ else
+ computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
+
+ const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&rayMinLimit);
+ const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&rayMaxLimit);
+ PxU32 rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
+ PxU32 rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
+/*
+float rayMinLimitX, rayMaxLimitX;
+if(inflateT)
+ computeRayLimits(rayMinLimitX, rayMaxLimitX, rayOrig, rayDir, maxDist, inflate, 0);
+else
+ computeRayLimits(rayMinLimitX, rayMaxLimitX, rayOrig, rayDir, maxDist, 0);
+
+PxU32 rayMinLimitIntX = encodeFloat(PX_IR(rayMinLimitX));
+PxU32 rayMaxLimitIntX = encodeFloat(PX_IR(rayMaxLimitX));
+*/
+
+ float currentDist = maxDist;
+
+#ifdef NODE_SORT
+ const PxU32 dirIndex = computeDirMask(rayDir);
+ PxU32 orderi = core.mLevel1.mOrder[dirIndex];
+// PxU32 orderi = sort(core.mLevel1, rayDir);
+
+ for(PxU32 i_=0;i_<5;i_++)
+ {
+ const PxU32 i = orderi&7; orderi>>=3;
+#else
+ for(PxU32 i=0;i<5;i++)
+ {
+#endif
+
+#ifdef CAN_USE_MOVEMASK
+ if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], &rayParams))
+#else
+ if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], test))
+// if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], test, rayMinLimitIntX, rayMaxLimitIntX))
+#endif
+ {
+
+#ifdef NODE_SORT
+ PxU32 orderj = core.mLevel2[i].mOrder[dirIndex];
+// PxU32 orderj = sort(core.mLevel2[i], rayDir);
+
+ for(PxU32 j_=0;j_<5;j_++)
+ {
+ const PxU32 j = orderj&7; orderj>>=3;
+#else
+ for(PxU32 j=0;j<5;j++)
+ {
+#endif
+
+#ifdef CAN_USE_MOVEMASK
+ if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], &rayParams))
+#else
+ if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], test))
+// if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], test, rayMinLimitIntX, rayMaxLimitIntX))
+#endif
+ {
+ const BucketPrunerNode& parent = core.mLevel3[i][j];
+ const PxU32 parentOffset = core.mLevel1.mOffsets[i] + core.mLevel2[i].mOffsets[j];
+
+#ifdef NODE_SORT
+ PxU32 orderk = parent.mOrder[dirIndex];
+// PxU32 orderk = sort(parent, rayDir);
+
+ for(PxU32 k_=0;k_<5;k_++)
+ {
+ const PxU32 k = orderk&7; orderk>>=3;
+#else
+ for(PxU32 k=0;k<5;k++)
+ {
+#endif
+ const PxU32 nbInBucket = parent.mCounters[k];
+#ifdef CAN_USE_MOVEMASK
+ if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], &rayParams))
+#else
+ if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], test))
+// if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], test, rayMinLimitIntX, rayMaxLimitIntX))
+#endif
+ {
+ const PxU32 offset = parentOffset + parent.mOffsets[k];
+ const PxAgain again = processBucket<inflateT>( nbInBucket, core.mSortedWorldBoxes, core.mSortedObjects,
+ offset, core.mSortedNb,
+ rayOrig, rayDir, currentDist,
+#ifdef CAN_USE_MOVEMASK
+ &rayParams,
+#else
+ test, inflate,
+#endif
+ pcb,
+ rayMinLimitInt, rayMaxLimitInt,
+ sortAxis);
+ if(!again)
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ maxDist = currentDist;
+ return true;
+}
+
+PxAgain BucketPrunerCore::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+ return ::stab<0>(*this, pcb, origin, unitDir, inOutDistance, PxVec3(0.0f));
+}
+
+PxAgain BucketPrunerCore::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+ const PxVec3 extents = queryVolume.getPrunerInflatedWorldAABB().getExtents();
+ return ::stab<1>(*this, pcb, queryVolume.getPrunerInflatedWorldAABB().getCenter(), unitDir, inOutDistance, extents);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template<bool doAssert, typename Test>
+static PX_FORCE_INLINE bool processBucket( PxU32 nb, const BucketBox* PX_RESTRICT baseBoxes, PrunerPayload* PX_RESTRICT baseObjects,
+ PxU32 offset, PxU32 totalAllocated,
+ const Test& test, PrunerCallback& pcb,
+ PxU32 minLimitInt, PxU32 maxLimitInt)
+{
+ PX_UNUSED(totalAllocated);
+
+ const BucketBox* PX_RESTRICT boxes = baseBoxes + offset;
+ PrunerPayload* PX_RESTRICT objects = baseObjects + offset;
+
+ while(nb--)
+ {
+ const BucketBox& currentBox = *boxes++;
+ PrunerPayload* currentObject = objects++;
+
+ if(currentBox.mData1<minLimitInt)
+ {
+ if(doAssert)
+ PX_ASSERT(!test(currentBox));
+ continue;
+ }
+
+ if(currentBox.mData0>maxLimitInt)
+ {
+ if(doAssert)
+ PX_ASSERT(!test(currentBox));
+ return true;
+ }
+
+ if(test(currentBox))
+ {
+ PxReal dist = -1.0f; // no distance for overlaps
+ if(!pcb.invoke(dist, *currentObject))
+ return false;
+ }
+ }
+ return true;
+}
+
+template<typename Test, bool isPrecise>
+class BucketPrunerOverlapTraversal
+{
+public:
+ PX_FORCE_INLINE BucketPrunerOverlapTraversal() {}
+
+ /*PX_FORCE_INLINE*/ bool operator()(const BucketPrunerCore& core, const Test& test, PrunerCallback& pcb, const PxBounds3& cullBox) const
+ {
+ for(PxU32 i=0;i<core.mNbFree;i++)
+ {
+ if(test(core.mFreeBounds[i]))
+ {
+ PxReal dist = -1.0f; // no distance for overlaps
+ if(!pcb.invoke(dist, core.mFreeObjects[i]))
+ return false;
+ }
+ }
+
+ const PxU32 nb = core.mSortedNb;
+ if(!nb)
+ return true;
+
+#ifdef BRUTE_FORCE_LIMIT
+ if(nb<=BRUTE_FORCE_LIMIT)
+ {
+ for(PxU32 i=0;i<nb;i++)
+ {
+ if(test(core.mSortedWorldBoxes[i]))
+ {
+ PxReal dist = -1.0f; // no distance for overlaps
+ if(!pcb.invoke(dist, core.mSortedObjects[i]))
+ return false;
+ }
+ }
+ return true;
+ }
+#endif
+
+ if(!test(core.mGlobalBox))
+ return true;
+
+ const PxU32 sortAxis = core.mSortAxis;
+ const float boxMinLimit = cullBox.minimum[sortAxis];
+ const float boxMaxLimit = cullBox.maximum[sortAxis];
+
+ const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&boxMinLimit);
+ const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&boxMaxLimit);
+ const PxU32 rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
+ const PxU32 rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
+
+ for(PxU32 i=0;i<5;i++)
+ {
+ if(core.mLevel1.mCounters[i] && test(core.mLevel1.mBucketBox[i]))
+ {
+ for(PxU32 j=0;j<5;j++)
+ {
+ if(core.mLevel2[i].mCounters[j] && test(core.mLevel2[i].mBucketBox[j]))
+ {
+ for(PxU32 k=0;k<5;k++)
+ {
+ const PxU32 nbInBucket = core.mLevel3[i][j].mCounters[k];
+ if(nbInBucket && test(core.mLevel3[i][j].mBucketBox[k]))
+ {
+ const PxU32 offset = core.mLevel1.mOffsets[i] + core.mLevel2[i].mOffsets[j] + core.mLevel3[i][j].mOffsets[k];
+ if(!processBucket<isPrecise>(nbInBucket, core.mSortedWorldBoxes, core.mSortedObjects,
+ offset, core.mSortedNb, test, pcb, rayMinLimitInt, rayMaxLimitInt))
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef CAN_USE_MOVEMASK
+PX_FORCE_INLINE PxU32 BAllTrue3_R(const BoolV a)
+{
+ const PxI32 moveMask = _mm_movemask_ps(a);
+ return PxU32((moveMask & 0x7) == (0x7));
+}
+#endif
+
+#ifdef USE_SIMD
+struct SphereAABBTest_SIMD
+{
+ PX_FORCE_INLINE SphereAABBTest_SIMD(const Gu::Sphere& sphere) :
+ #ifdef CAN_USE_MOVEMASK
+ mCenter (V4LoadU(&sphere.center.x)),
+ #else
+ mCenter (V3LoadU(sphere.center)),
+ #endif
+ mRadius2(FLoad(sphere.radius * sphere.radius))
+ {}
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const
+ {
+ #ifdef CAN_USE_MOVEMASK
+ const Vec4V boxCenter = AlignedLoad(&box.mCenter.x);
+ const Vec4V boxExtents = AlignedLoad(&box.mExtents.x);
+ //
+ const Vec4V offset = V4Sub(mCenter, boxCenter);
+ const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents);
+ const Vec4V d = V4Sub(offset, closest);
+
+ const FloatV dot = V4Dot3(d,d);
+ return Ps::IntBool(BAllTrue3_R(FIsGrtrOrEq(mRadius2, dot)));
+ #else
+ const Vec3V boxCenter = V3LoadU(box.mCenter);
+ const Vec3V boxExtents = V3LoadU(box.mExtents);
+ //
+ const Vec3V offset = V3Sub(mCenter, boxCenter);
+ const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents);
+ const Vec3V d = V3Sub(offset, closest);
+ return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d))));
+ #endif
+ }
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const
+ {
+ BucketBox tmp;
+ tmp.mCenter = bounds.getCenter();
+ tmp.mExtents = bounds.getExtents();
+ return (*this)(tmp);
+ }
+
+private:
+ SphereAABBTest_SIMD& operator=(const SphereAABBTest_SIMD&);
+ #ifdef CAN_USE_MOVEMASK
+ const Vec4V mCenter;
+ #else
+ const Vec3V mCenter;
+ #endif
+ const FloatV mRadius2;
+};
+#else
+struct SphereAABBTest_Scalar
+{
+ PX_FORCE_INLINE SphereAABBTest_Scalar(const Gu::Sphere& sphere) :
+ mCenter (sphere.center),
+ mRadius2(sphere.radius * sphere.radius)
+ {}
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const
+ {
+ const PxVec3 minimum = box.getMin();
+ const PxVec3 maximum = box.getMax();
+
+ float d = 0.0f;
+
+ //find the square of the distance
+ //from the sphere to the box
+ for(PxU32 i=0;i<3;i++)
+ {
+ if(mCenter[i]<minimum[i])
+ {
+ const float s = mCenter[i] - minimum[i];
+ d += s*s;
+ }
+ else if(mCenter[i]>maximum[i])
+ {
+ const float s = mCenter[i] - maximum[i];
+ d += s*s;
+ }
+ }
+ return d <= mRadius2;
+ }
+
+private:
+ SphereAABBTest_Scalar& operator=(const SphereAABBTest_Scalar&);
+ const PxVec3 mCenter;
+ float mRadius2;
+};
+#endif
+
+#ifdef USE_SIMD
+typedef SphereAABBTest_SIMD BucketPrunerSphereAABBTest;
+#else
+typedef SphereAABBTest_Scalar BucketPrunerSphereAABBTest;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+struct BucketPrunerAABBAABBTest
+{
+ PX_FORCE_INLINE BucketPrunerAABBAABBTest(const PxBounds3& queryBox) : mBox(queryBox) {}
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const
+ {
+ // PT: we don't use PxBounds3::intersects() because isValid() asserts on our empty boxes!
+ const PxVec3 bucketMin = box.getMin();
+ const PxVec3 bucketMax = box.getMax();
+ return !(mBox.minimum.x > bucketMax.x || bucketMin.x > mBox.maximum.x ||
+ mBox.minimum.y > bucketMax.y || bucketMin.y > mBox.maximum.y ||
+ mBox.minimum.z > bucketMax.z || bucketMin.z > mBox.maximum.z);
+ }
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const
+ {
+ // PT: we don't use PxBounds3::intersects() because isValid() asserts on our empty boxes!
+ const PxVec3& bucketMin = bounds.minimum;
+ const PxVec3& bucketMax = bounds.maximum;
+ return !(mBox.minimum.x > bucketMax.x || bucketMin.x > mBox.maximum.x ||
+ mBox.minimum.y > bucketMax.y || bucketMin.y > mBox.maximum.y ||
+ mBox.minimum.z > bucketMax.z || bucketMin.z > mBox.maximum.z);
+ }
+private:
+ BucketPrunerAABBAABBTest& operator=(const BucketPrunerAABBAABBTest&);
+ const PxBounds3 mBox;
+};
+
+/*struct BucketPrunerAABBAABBTest_SIMD
+{
+ PX_FORCE_INLINE BucketPrunerAABBAABBTest_SIMD(const PxBounds3& b)
+ : mCenter(V3LoadU(b.getCenter()))
+ , mExtents(V3LoadU(b.getExtents()))
+ {}
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const
+ {
+ return V3AllGrtrOrEq(V3Add(mExtents, AlignedLoad(&box.mExtents.x)), V3Abs(V3Sub(AlignedLoad(&box.mCenter.x), mCenter)));
+ }
+private:
+ BucketPrunerAABBAABBTest_SIMD& operator=(const BucketPrunerAABBAABBTest_SIMD&);
+ const Vec3V mCenter, mExtents;
+};*/
+
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef USE_SIMD
+struct OBBAABBTest_SIMD
+{
+ OBBAABBTest_SIMD(const PxMat33& rotation, const PxVec3& translation, const PxVec3& extents)
+ {
+ const Vec3V eps = V3Load(1e-6f);
+
+ mT = V3LoadU(translation);
+ mExtents = V3LoadU(extents);
+
+ // storing the transpose matrices yields a simpler SIMD test
+ mRT = Mat33V_From_PxMat33(rotation.getTranspose());
+ mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps));
+ mBB_xyz = M33TrnspsMulV3(mART, mExtents);
+
+/* if(fullTest)
+ {
+ const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents);
+
+ mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0)));
+ mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1)));
+ mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2)));
+ }*/
+ }
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const
+ {
+ const Vec3V extentsV = V3LoadU(box.mExtents);
+
+ const Vec3V t = V3Sub(mT, V3LoadU(box.mCenter));
+
+ // class I - axes of AABB
+ if(V3OutOfBounds(t, V3Add(extentsV, mBB_xyz)))
+ return Ps::IntFalse;
+
+ const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2;
+ const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2;
+
+ const FloatV eX = V3GetX(extentsV), eY = V3GetY(extentsV), eZ = V3GetZ(extentsV);
+ const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t);
+
+ // class II - axes of OBB
+ {
+ const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX)));
+ const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents)));
+ if(V3OutOfBounds(v, v2))
+ return Ps::IntFalse;
+ }
+
+// if(!fullTest)
+ return Ps::IntTrue;
+
+/* // class III - edge cross products. Almost all OBB tests early-out with type I or type II,
+ // so early-outs here probably aren't useful (TODO: profile)
+
+ const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ));
+ const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123));
+ const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va));
+
+ const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX));
+ const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456));
+ const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb));
+
+ const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY));
+ const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789));
+ const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc));
+
+ return BAllEq(BOr(ba, BOr(bb,bc)), BFFFF());*/
+ }
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const
+ {
+ BucketBox tmp;
+ tmp.mCenter = bounds.getCenter();
+ tmp.mExtents = bounds.getExtents();
+ return (*this)(tmp);
+ }
+
+ Vec3V mExtents; // extents of OBB
+ Vec3V mT; // translation of OBB
+ Mat33V mRT; // transpose of rotation matrix of OBB
+ Mat33V mART; // transpose of mRT, padded by epsilon
+ Vec3V mBB_xyz; // extents of OBB along coordinate axes
+
+/* Vec3V mBB_123; // projections of extents onto edge-cross axes
+ Vec3V mBB_456;
+ Vec3V mBB_789;*/
+};
+#else
+struct OBBAABBTest_Scalar
+{
+ OBBAABBTest_Scalar(const PxMat33& rotation, const PxVec3& translation, const PxVec3& extents)
+ {
+ mR = rotation;
+ mT = translation;
+ mExtents = extents;
+
+ const PxVec3 eps(1e-6f);
+ mAR = PxMat33(mR[0].abs() + eps, mR[1].abs() + eps, mR[2].abs() + eps); // Epsilon prevents floating-point inaccuracies (strategy borrowed from RAPID)
+ mBB_xyz = mAR.transform(mExtents); // Precompute box-box data - Courtesy of Erwin de Vries
+
+/* PxReal ex = mExtents.x, ey = mExtents.y, ez = mExtents.z;
+ mBB_1 = ey*mAR[2].x + ez*mAR[1].x; mBB_2 = ez*mAR[0].x + ex*mAR[2].x; mBB_3 = ex*mAR[1].x + ey*mAR[0].x;
+ mBB_4 = ey*mAR[2].y + ez*mAR[1].y; mBB_5 = ez*mAR[0].y + ex*mAR[2].y; mBB_6 = ex*mAR[1].y + ey*mAR[0].y;
+ mBB_7 = ey*mAR[2].z + ez*mAR[1].z; mBB_8 = ez*mAR[0].z + ex*mAR[2].z; mBB_9 = ex*mAR[1].z + ey*mAR[0].z;*/
+ }
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const
+ {
+ const PxVec3& c = box.mCenter;
+ const PxVec3& e = box.mExtents;
+
+ const PxVec3 T = mT - c;
+ // Class I : A's basis vectors
+ if(PxAbs(T.x) > e.x + mBB_xyz.x) return Ps::IntFalse;
+ if(PxAbs(T.y) > e.y + mBB_xyz.y) return Ps::IntFalse;
+ if(PxAbs(T.z) > e.z + mBB_xyz.z) return Ps::IntFalse;
+
+ // Class II : B's basis vectors
+ if(PxAbs(T.dot(mR[0])) > e.dot(mAR[0]) + mExtents.x) return Ps::IntFalse;
+ if(PxAbs(T.dot(mR[1])) > e.dot(mAR[1]) + mExtents.y) return Ps::IntFalse;
+ if(PxAbs(T.dot(mR[2])) > e.dot(mAR[2]) + mExtents.z) return Ps::IntFalse;
+
+ // Class III : 9 cross products
+ if(0)
+ {
+ if(PxAbs(T.z*mR[0].y - T.y*mR[0].z) > e.y*mAR[0].z + e.z*mAR[0].y + mBB_1) return Ps::IntFalse; // L = A0 x B0
+ if(PxAbs(T.z*mR[1].y - T.y*mR[1].z) > e.y*mAR[1].z + e.z*mAR[1].y + mBB_2) return Ps::IntFalse; // L = A0 x B1
+ if(PxAbs(T.z*mR[2].y - T.y*mR[2].z) > e.y*mAR[2].z + e.z*mAR[2].y + mBB_3) return Ps::IntFalse; // L = A0 x B2
+
+ if(PxAbs(T.x*mR[0].z - T.z*mR[0].x) > e.x*mAR[0].z + e.z*mAR[0].x + mBB_4) return Ps::IntFalse; // L = A1 x B0
+ if(PxAbs(T.x*mR[1].z - T.z*mR[1].x) > e.x*mAR[1].z + e.z*mAR[1].x + mBB_5) return Ps::IntFalse; // L = A1 x B1
+ if(PxAbs(T.x*mR[2].z - T.z*mR[2].x) > e.x*mAR[2].z + e.z*mAR[2].x + mBB_6) return Ps::IntFalse; // L = A1 x B2
+
+ if(PxAbs(T.y*mR[0].x - T.x*mR[0].y) > e.x*mAR[0].y + e.y*mAR[0].x + mBB_7) return Ps::IntFalse; // L = A2 x B0
+ if(PxAbs(T.y*mR[1].x - T.x*mR[1].y) > e.x*mAR[1].y + e.y*mAR[1].x + mBB_8) return Ps::IntFalse; // L = A2 x B1
+ if(PxAbs(T.y*mR[2].x - T.x*mR[2].y) > e.x*mAR[2].y + e.y*mAR[2].x + mBB_9) return Ps::IntFalse; // L = A2 x B2
+ }
+ return Ps::IntTrue;
+ }
+
+private:
+ PxMat33 mR; // rotation matrix
+ PxMat33 mAR; // absolute rotation matrix
+ PxVec3 mT; // translation from obb space to model space
+ PxVec3 mExtents;
+
+ PxVec3 mBB_xyz;
+
+ float mBB_1, mBB_2, mBB_3;
+ float mBB_4, mBB_5, mBB_6;
+ float mBB_7, mBB_8, mBB_9;
+};
+#endif
+
+#ifdef USE_SIMD
+typedef OBBAABBTest_SIMD BucketPrunerOBBAABBTest;
+#else
+typedef OBBAABBTest_Scalar BucketPrunerOBBAABBTest;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+PxAgain BucketPrunerCore::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const
+{
+ PX_ASSERT(!mDirty);
+ PxAgain again = true;
+
+ const PxBounds3& cullBox = queryVolume.getPrunerInflatedWorldAABB();
+
+ switch(queryVolume.getType())
+ {
+ case PxGeometryType::eBOX:
+ {
+ if(queryVolume.isOBB())
+ {
+ const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
+ again = overlap(*this,
+ BucketPrunerOBBAABBTest(
+ queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
+ queryVolume.getPrunerBoxGeomExtentsInflated()),
+ pcb, cullBox);
+ }
+ else
+ {
+ const BucketPrunerOverlapTraversal<BucketPrunerAABBAABBTest, true> overlap;
+ again = overlap(*this, BucketPrunerAABBAABBTest(cullBox), pcb, cullBox);
+ }
+ }
+ break;
+
+ case PxGeometryType::eCAPSULE:
+ {
+ const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
+ again = overlap(*this,
+ BucketPrunerOBBAABBTest(
+ queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
+ queryVolume.getPrunerBoxGeomExtentsInflated()),
+ pcb, cullBox);
+ }
+ break;
+
+ case PxGeometryType::eSPHERE:
+ {
+ const Sphere& sphere = queryVolume.getGuSphere();
+ const PxVec3 sphereExtents(sphere.radius);
+ const BucketPrunerOverlapTraversal<BucketPrunerSphereAABBTest, true> overlap;
+ again = overlap(*this, BucketPrunerSphereAABBTest(sphere), pcb, cullBox);
+ }
+ break;
+
+ case PxGeometryType::eCONVEXMESH:
+ {
+ const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
+ again = overlap(*this,
+ BucketPrunerOBBAABBTest(
+ queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
+ queryVolume.getPrunerBoxGeomExtentsInflated()),
+ pcb, cullBox);
+ }
+ break;
+
+ case PxGeometryType::ePLANE:
+ case PxGeometryType::eTRIANGLEMESH:
+ case PxGeometryType::eHEIGHTFIELD:
+ case PxGeometryType::eGEOMETRY_COUNT:
+ case PxGeometryType::eINVALID:
+ PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type");
+ }
+ return again;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerCore::shiftOrigin(const PxVec3& shift)
+{
+ for(PxU32 i=0;i<mNbFree;i++)
+ {
+ mFreeBounds[i].minimum -= shift;
+ mFreeBounds[i].maximum -= shift;
+ }
+
+ const PxU32 nb = mCoreNbObjects;
+ //if (nb)
+ {
+ mGlobalBox.mCenter -= shift;
+
+ #ifdef _DEBUG
+ mGlobalBox.mDebugMin -= shift[mSortAxis];
+ #endif
+
+ encodeBoxMinMax(mGlobalBox, mSortAxis);
+
+ for(PxU32 i=0; i < nb; i++)
+ {
+ mCoreBoxes[i].minimum -= shift;
+ mCoreBoxes[i].maximum -= shift;
+ }
+
+ for(PxU32 i=0; i < mSortedNb; i++)
+ {
+ mSortedWorldBoxes[i].mCenter -= shift;
+
+ #ifdef _DEBUG
+ mSortedWorldBoxes[i].mDebugMin -= shift[mSortAxis];
+ #endif
+ encodeBoxMinMax(mSortedWorldBoxes[i], mSortAxis);
+ }
+
+ for(PxU32 i=0; i < 5; i++)
+ mLevel1.mBucketBox[i].mCenter -= shift;
+
+ for(PxU32 i=0; i < 5; i++)
+ for(PxU32 j=0; j < 5; j++)
+ mLevel2[i].mBucketBox[j].mCenter -= shift;
+
+ for(PxU32 i=0; i < 5; i++)
+ for(PxU32 j=0; j < 5; j++)
+ for(PxU32 k=0; k < 5; k++)
+ mLevel3[i][j].mBucketBox[k].mCenter -= shift;
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static void visualize(Cm::RenderOutput& out, const BucketBox& bounds)
+{
+ out << Cm::DebugBox(PxBounds3(bounds.getMin(), bounds.getMax()), true);
+}
+
+void BucketPrunerCore::visualize(Cm::RenderOutput& out, PxU32 color) const
+{
+ const PxTransform idt = PxTransform(PxIdentity);
+ out << idt;
+ out << color;
+
+ ::visualize(out, mGlobalBox);
+
+ for(PxU32 i=0;i<5;i++)
+ {
+ if(!mLevel1.mCounters[i])
+ continue;
+
+ ::visualize(out, mLevel1.mBucketBox[i]);
+
+ for(PxU32 j=0;j<5;j++)
+ {
+ if(!mLevel2[i].mCounters[j])
+ continue;
+
+ ::visualize(out, mLevel2[i].mBucketBox[j]);
+
+ for(PxU32 k=0;k<5;k++)
+ {
+ if(!mLevel3[i][j].mCounters[k])
+ continue;
+
+ ::visualize(out, mLevel3[i][j].mBucketBox[k]);
+ }
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+BucketPruner::BucketPruner()
+{
+}
+
+BucketPruner::~BucketPruner()
+{
+}
+
+bool BucketPruner::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool)
+{
+ if(!count)
+ return true;
+
+ const PxU32 valid = mPool.addObjects(results, bounds, payload, count);
+ mCore.mDirty = true;
+
+ mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects());
+
+ return valid == count;
+}
+
+void BucketPruner::removeObjects(const PrunerHandle* handles, PxU32 count)
+{
+ if(!count)
+ return;
+
+ for(PxU32 i=0;i<count;i++)
+ mPool.removeObject(handles[i]);
+
+ mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects());
+ mCore.mDirty = true;
+}
+
+void BucketPruner::updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count)
+{
+ if(!count)
+ return;
+
+ if(newBounds)
+ {
+ for(PxU32 i=0;i<count;i++)
+ mPool.setWorldAABB(handles[i], newBounds[i]);
+ }
+
+ mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects());
+ mCore.mDirty = true;
+}
+
+void BucketPruner::updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count)
+{
+ mPool.updateObjects(handles, indices, newBounds, count);
+ mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects());
+ mCore.mDirty = true;
+}
+
+void BucketPruner::commit()
+{
+ mCore.build();
+}
+
+void BucketPruner::shiftOrigin(const PxVec3& shift)
+{
+ mCore.shiftOrigin(shift);
+}
+
+PxAgain BucketPruner::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+ PX_ASSERT(!mCore.mDirty);
+ if(mCore.mDirty)
+ return true; // it may crash otherwise
+ return mCore.sweep(queryVolume, unitDir, inOutDistance, pcb);
+}
+
+PxAgain BucketPruner::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const
+{
+ PX_ASSERT(!mCore.mDirty);
+ if(mCore.mDirty)
+ return true; // it may crash otherwise
+ return mCore.overlap(queryVolume, pcb);
+}
+
+PxAgain BucketPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+ PX_ASSERT(!mCore.mDirty);
+ if(mCore.mDirty)
+ return true; // it may crash otherwise
+ return mCore.raycast(origin, unitDir, inOutDistance, pcb);
+}
+
+void BucketPruner::visualize(Cm::RenderOutput& out, PxU32 color) const
+{
+ mCore.visualize(out, color);
+}
+
+
+#define MBP_ALLOC(x) PX_ALLOC(x, "BucketPruner")
+#define MBP_ALLOC_TMP(x) PX_ALLOC_TEMP(x, "BucketPruner")
+#define MBP_FREE(x) if(x) PX_FREE_AND_RESET(x)
+#define DELETESINGLE(x) if (x) { delete x; x = NULL; }
+#define DELETEARRAY(x) if (x) { delete []x; x = NULL; }
+#define INVALID_ID 0xffffffff
+
+#ifndef USE_REGULAR_HASH_MAP
+static PX_FORCE_INLINE bool differentPair(const BucketPrunerPair& p, const PrunerPayload& payload)
+{
+ const bool same = p.mPayload == payload;
+ return !same;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+BucketPrunerMap::BucketPrunerMap() :
+ mHashSize (0),
+ mMask (0),
+ mNbActivePairs (0),
+ mHashTable (NULL),
+ mNext (NULL),
+ mActivePairs (NULL),
+ mReservedMemory (0)
+{
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+BucketPrunerMap::~BucketPrunerMap()
+{
+ purge();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerMap::purge()
+{
+ MBP_FREE(mNext);
+ MBP_FREE(mActivePairs);
+ MBP_FREE(mHashTable);
+ mHashSize = 0;
+ mMask = 0;
+ mNbActivePairs = 0;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+const BucketPrunerPair* BucketPrunerMap::findPair(const PrunerPayload& payload) const
+{
+ if(!mHashTable)
+ return NULL; // Nothing has been allocated yet
+
+ // Compute hash value for this pair
+ const PxU32 hashValue = hash(payload) & mMask;
+
+ const BucketPrunerPair* PX_RESTRICT activePairs = mActivePairs;
+ const PxU32* PX_RESTRICT next = mNext;
+
+ // Look for it in the table
+ PxU32 offset = mHashTable[hashValue];
+ while(offset!=INVALID_ID && differentPair(activePairs[offset], payload))
+ {
+ offset = next[offset]; // Better to have a separate array for this
+ }
+ if(offset==INVALID_ID)
+ return NULL;
+ PX_ASSERT(offset<mNbActivePairs);
+ // Match mActivePairs[offset] => the pair is persistent
+ return &activePairs[offset];
+}
+
+// Internal version saving hash computation
+PX_FORCE_INLINE BucketPrunerPair* BucketPrunerMap::findPair(const PrunerPayload& payload, PxU32 hashValue) const
+{
+ if(!mHashTable)
+ return NULL; // Nothing has been allocated yet
+
+ BucketPrunerPair* PX_RESTRICT activePairs = mActivePairs;
+ const PxU32* PX_RESTRICT next = mNext;
+
+ // Look for it in the table
+ PxU32 offset = mHashTable[hashValue];
+ while(offset!=INVALID_ID && differentPair(activePairs[offset], payload))
+ {
+ offset = next[offset]; // Better to have a separate array for this
+ }
+ if(offset==INVALID_ID)
+ return NULL;
+ PX_ASSERT(offset<mNbActivePairs);
+ // Match mActivePairs[offset] => the pair is persistent
+ return &activePairs[offset];
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+BucketPrunerPair* BucketPrunerMap::addPair(const PrunerPayload& payload, PxU32 coreIndex, PxU32 timeStamp)
+{
+ PxU32 hashValue = hash(payload) & mMask;
+
+ {
+ BucketPrunerPair* PX_RESTRICT p = findPair(payload, hashValue);
+ if(p)
+ {
+ PX_ASSERT(p->mCoreIndex==coreIndex);
+ PX_ASSERT(p->mTimeStamp==timeStamp);
+ return p; // Persistent pair
+ }
+ }
+
+ // This is a new pair
+ if(mNbActivePairs >= mHashSize)
+ {
+ // Get more entries
+ mHashSize = Ps::nextPowerOfTwo(mNbActivePairs+1);
+ mMask = mHashSize-1;
+
+ reallocPairs();
+
+ // Recompute hash value with new hash size
+ hashValue = hash(payload) & mMask; // ### redundant hash computation here?
+ }
+
+ BucketPrunerPair* PX_RESTRICT p = &mActivePairs[mNbActivePairs];
+ p->mPayload = payload;
+ p->mCoreIndex = coreIndex;
+ p->mTimeStamp = timeStamp;
+ mNext[mNbActivePairs] = mHashTable[hashValue];
+ mHashTable[hashValue] = mNbActivePairs++;
+ return p;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerMap::removePairInternal(const PrunerPayload& /*payload*/, PxU32 hashValue, PxU32 pairIndex)
+{
+ // Walk the hash table to fix mNext
+ {
+ PxU32 offset = mHashTable[hashValue];
+ PX_ASSERT(offset!=INVALID_ID);
+
+ PxU32 previous=INVALID_ID;
+ while(offset!=pairIndex)
+ {
+ previous = offset;
+ offset = mNext[offset];
+ }
+
+ // Let us go/jump us
+ if(previous!=INVALID_ID)
+ {
+ PX_ASSERT(mNext[previous]==pairIndex);
+ mNext[previous] = mNext[pairIndex];
+ }
+ // else we were the first
+ else mHashTable[hashValue] = mNext[pairIndex];
+ // we're now free to reuse mNext[pairIndex] without breaking the list
+ }
+#if PX_DEBUG
+ mNext[pairIndex]=INVALID_ID;
+#endif
+ // Invalidate entry
+
+ // Fill holes
+ if(1)
+ {
+ // 1) Remove last pair
+ const PxU32 lastPairIndex = mNbActivePairs-1;
+ if(lastPairIndex==pairIndex)
+ {
+ mNbActivePairs--;
+ }
+ else
+ {
+ const BucketPrunerPair* last = &mActivePairs[lastPairIndex];
+ const PxU32 lastHashValue = hash(last->mPayload) & mMask;
+
+ // Walk the hash table to fix mNext
+ PxU32 offset = mHashTable[lastHashValue];
+ PX_ASSERT(offset!=INVALID_ID);
+
+ PxU32 previous=INVALID_ID;
+ while(offset!=lastPairIndex)
+ {
+ previous = offset;
+ offset = mNext[offset];
+ }
+
+ // Let us go/jump us
+ if(previous!=INVALID_ID)
+ {
+ PX_ASSERT(mNext[previous]==lastPairIndex);
+ mNext[previous] = mNext[lastPairIndex];
+ }
+ // else we were the first
+ else mHashTable[lastHashValue] = mNext[lastPairIndex];
+ // we're now free to reuse mNext[lastPairIndex] without breaking the list
+
+#if PX_DEBUG
+ mNext[lastPairIndex]=INVALID_ID;
+#endif
+
+ // Don't invalidate entry since we're going to shrink the array
+
+ // 2) Re-insert in free slot
+ mActivePairs[pairIndex] = mActivePairs[lastPairIndex];
+#if PX_DEBUG
+ PX_ASSERT(mNext[pairIndex]==INVALID_ID);
+#endif
+ mNext[pairIndex] = mHashTable[lastHashValue];
+ mHashTable[lastHashValue] = pairIndex;
+
+ mNbActivePairs--;
+ }
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool BucketPrunerMap::removePair(const PrunerPayload& payload, PxU32& coreIndex, PxU32& timeStamp)
+{
+ const PxU32 hashValue = hash(payload) & mMask;
+ const BucketPrunerPair* p = findPair(payload, hashValue);
+ if(!p)
+ return false;
+ PX_ASSERT(p->mPayload==payload);
+
+ coreIndex = p->mCoreIndex;
+ timeStamp = p->mTimeStamp;
+
+ removePairInternal(payload, hashValue, getPairIndex(p));
+
+ shrinkMemory();
+ return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerMap::shrinkMemory()
+{
+ // Check correct memory against actually used memory
+ const PxU32 correctHashSize = Ps::nextPowerOfTwo(mNbActivePairs);
+ if(mHashSize==correctHashSize)
+ return;
+
+ if(mReservedMemory && correctHashSize < mReservedMemory)
+ return;
+
+ // Reduce memory used
+ mHashSize = correctHashSize;
+ mMask = mHashSize-1;
+
+ reallocPairs();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+ static PX_FORCE_INLINE void storeDwords(PxU32* dest, PxU32 nb, PxU32 value)
+ {
+ while(nb--)
+ *dest++ = value;
+ }
+
+void BucketPrunerMap::reallocPairs()
+{
+ MBP_FREE(mHashTable);
+ mHashTable = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize*sizeof(PxU32)));
+ storeDwords(mHashTable, mHashSize, INVALID_ID);
+
+ // Get some bytes for new entries
+ BucketPrunerPair* newPairs = reinterpret_cast<BucketPrunerPair*>(MBP_ALLOC(mHashSize * sizeof(BucketPrunerPair)));
+ PX_ASSERT(newPairs);
+
+ PxU32* newNext = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize * sizeof(PxU32)));
+ PX_ASSERT(newNext);
+
+ // Copy old data if needed
+ if(mNbActivePairs)
+ PxMemCopy(newPairs, mActivePairs, mNbActivePairs*sizeof(BucketPrunerPair));
+ // ### check it's actually needed... probably only for pairs whose hash value was cut by the and
+ // yeah, since hash(id0, id1) is a constant
+ // However it might not be needed to recompute them => only less efficient but still ok
+ for(PxU32 i=0;i<mNbActivePairs;i++)
+ {
+ const PxU32 hashValue = hash(mActivePairs[i].mPayload) & mMask; // New hash value with new mask
+ newNext[i] = mHashTable[hashValue];
+ mHashTable[hashValue] = i;
+ }
+
+ // Delete old data
+ MBP_FREE(mNext);
+ MBP_FREE(mActivePairs);
+
+ // Assign new pointer
+ mActivePairs = newPairs;
+ mNext = newNext;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerMap::reserveMemory(PxU32 memSize)
+{
+ if(!memSize)
+ return;
+
+ if(!Ps::isPowerOfTwo(memSize))
+ memSize = Ps::nextPowerOfTwo(memSize);
+
+ mHashSize = memSize;
+ mMask = mHashSize-1;
+
+ mReservedMemory = memSize;
+
+ reallocPairs();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+#endif
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h
new file mode 100644
index 00000000..dec62ccd
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h
@@ -0,0 +1,279 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_BUCKETPRUNER_H
+#define SQ_BUCKETPRUNER_H
+
+#include "SqTypedef.h"
+#include "SqPruningPool.h"
+#include "PsHash.h"
+
+#define FREE_PRUNER_SIZE 16
+//#define USE_REGULAR_HASH_MAP
+#ifdef USE_REGULAR_HASH_MAP
+ #include "PsHashMap.h"
+#endif
+
+namespace physx
+{
+namespace Sq
+{
+ typedef PxU32 BucketWord;
+
+#if PX_VC
+ #pragma warning(push)
+ #pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+ PX_ALIGN_PREFIX(16) struct BucketBox
+ {
+ PxVec3 mCenter;
+ PxU32 mData0; // Integer-encoded min value along sorting axis
+ PxVec3 mExtents;
+ PxU32 mData1; // Integer-encoded max value along sorting axis
+
+ #ifdef _DEBUG
+ // PT: we need the original min value for debug checks. Using the center/extents version
+ // fails because recomputing the min from them introduces FPU accuracy errors in the values.
+ float mDebugMin;
+ #endif
+
+ PX_FORCE_INLINE PxVec3 getMin() const
+ {
+ return mCenter - mExtents;
+ }
+
+ PX_FORCE_INLINE PxVec3 getMax() const
+ {
+ return mCenter + mExtents;
+ }
+
+ PX_FORCE_INLINE void setEmpty()
+ {
+ mCenter = PxVec3(0.0f);
+ mExtents = PxVec3(-PX_MAX_BOUNDS_EXTENTS);
+
+ #ifdef _DEBUG
+ mDebugMin = PX_MAX_BOUNDS_EXTENTS;
+ #endif
+ }
+ }PX_ALIGN_SUFFIX(16);
+
+ PX_ALIGN_PREFIX(16) struct BucketPrunerNode
+ {
+ BucketPrunerNode();
+
+ void classifyBoxes( float limitX, float limitZ,
+ PxU32 nb,
+ BucketBox* PX_RESTRICT boxes,
+ const PrunerPayload* PX_RESTRICT objects,
+ BucketBox* PX_RESTRICT sortedBoxes,
+ PrunerPayload* PX_RESTRICT sortedObjects,
+ bool isCrossBucket, PxU32 sortAxis);
+
+ PX_FORCE_INLINE void initCounters()
+ {
+ for(PxU32 i=0;i<5;i++)
+ mCounters[i] = 0;
+ for(PxU32 i=0;i<5;i++)
+ mOffsets[i] = 0;
+ }
+
+ BucketWord mCounters[5]; // Number of objects in each of the 5 children
+ BucketWord mOffsets[5]; // Start index of objects for each of the 5 children
+ BucketBox mBucketBox[5]; // AABBs around objects for each of the 5 children
+ PxU16 mOrder[8]; // PNS: 5 children => 3 bits/index => 3*5=15 bits total, for each of the 8 canonical directions
+ }PX_ALIGN_SUFFIX(16);
+
+ PX_FORCE_INLINE PxU32 hash(const PrunerPayload& payload)
+ {
+#if PX_P64_FAMILY
+// const PxU32 h0 = Ps::hash((const void*)payload.data[0]);
+// const PxU32 h1 = Ps::hash((const void*)payload.data[1]);
+ const PxU32 h0 = PxU32(PX_MAX_U32 & payload.data[0]);
+ const PxU32 h1 = PxU32(PX_MAX_U32 & payload.data[1]);
+ return Ps::hash(PxU64(h0)|(PxU64(h1)<<32));
+#else
+ return Ps::hash(PxU64(payload.data[0])|(PxU64(payload.data[1])<<32));
+#endif
+ }
+
+#ifdef USE_REGULAR_HASH_MAP
+ struct BucketPrunerPair : public Ps::UserAllocated
+ {
+ PX_FORCE_INLINE BucketPrunerPair() {}
+ PX_FORCE_INLINE BucketPrunerPair(PxU32 index, PxU32 stamp) : mCoreIndex(index), mTimeStamp(stamp) {}
+ PxU32 mCoreIndex; // index in mCoreObjects
+ PxU32 mTimeStamp;
+ };
+ typedef Ps::HashMap<PrunerPayload, BucketPrunerPair> BucketPrunerMap;
+#else
+ struct BucketPrunerPair : public Ps::UserAllocated
+ {
+ PrunerPayload mPayload;
+ PxU32 mCoreIndex; // index in mCoreObjects
+ PxU32 mTimeStamp;
+ };
+
+ // Custom hash-map - currently faster than the regular hash-map (Ps::HashMap), in particular for 'find-and-erase' operations.
+ class BucketPrunerMap : public Ps::UserAllocated
+ {
+ public:
+ BucketPrunerMap();
+ ~BucketPrunerMap();
+
+ void purge();
+ void shrinkMemory();
+
+ BucketPrunerPair* addPair (const PrunerPayload& payload, PxU32 coreIndex, PxU32 timeStamp);
+ bool removePair (const PrunerPayload& payload, PxU32& coreIndex, PxU32& timeStamp);
+ const BucketPrunerPair* findPair (const PrunerPayload& payload) const;
+ PX_FORCE_INLINE PxU32 getPairIndex (const BucketPrunerPair* pair) const
+ {
+ return (PxU32((size_t(pair) - size_t(mActivePairs)))/sizeof(BucketPrunerPair));
+ }
+
+ PxU32 mHashSize;
+ PxU32 mMask;
+ PxU32 mNbActivePairs;
+ PxU32* mHashTable;
+ PxU32* mNext;
+ BucketPrunerPair* mActivePairs;
+ PxU32 mReservedMemory;
+
+ PX_FORCE_INLINE BucketPrunerPair* findPair(const PrunerPayload& payload, PxU32 hashValue) const;
+ void removePairInternal(const PrunerPayload& payload, PxU32 hashValue, PxU32 pairIndex);
+ void reallocPairs();
+ void reserveMemory(PxU32 memSize);
+ };
+#endif
+
+ class BucketPrunerCore : public Ps::UserAllocated
+ {
+ public:
+ BucketPrunerCore(bool externalMemory=true);
+ ~BucketPrunerCore();
+
+ void release();
+
+ void setExternalMemory(PxU32 nbObjects, PxBounds3* boxes, PrunerPayload* objects);
+
+ bool addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp=0);
+ bool removeObject(const PrunerPayload& object, PxU32& timeStamp);
+ bool updateObject(const PxBounds3& worldAABB, const PrunerPayload& object);
+
+ // PT: look for objects marked with input timestamp everywhere in the structure, and remove them. This is the same
+ // as calling 'removeObject' individually for all these objects, but much more efficient. Returns number of removed objects.
+ PxU32 removeMarkedObjects(PxU32 timeStamp);
+
+ PxAgain raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+ PxAgain overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const;
+ PxAgain sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+
+ void shiftOrigin(const PxVec3& shift);
+
+ void visualize(Cm::RenderOutput& out, PxU32 color) const;
+
+ PX_FORCE_INLINE void build() { classifyBoxes(); }
+
+ PX_FORCE_INLINE PxU32 getNbObjects() const { return mNbFree + mCoreNbObjects; }
+
+// private:
+ PxU32 mCoreNbObjects; // Current number of objects in core arrays
+ PxU32 mCoreCapacity; // Capacity of core arrays
+ PxBounds3* mCoreBoxes; // Core array
+ PrunerPayload* mCoreObjects; // Core array
+ PxU32* mCoreRemap; // Remaps core index to sorted index, i.e. sortedIndex = mCoreRemap[coreIndex]
+
+ BucketBox* mSortedWorldBoxes; // Sorted array
+ PrunerPayload* mSortedObjects; // Sorted array
+
+ PxU32 mNbFree; // Current number of objects in the "free array" (mFreeObjects/mFreeBounds)
+ PrunerPayload mFreeObjects[FREE_PRUNER_SIZE]; // mNbFree objects are stored here
+ PxBounds3 mFreeBounds[FREE_PRUNER_SIZE]; // mNbFree object bounds are stored here
+ PxU32 mFreeStamps[FREE_PRUNER_SIZE];
+
+ BucketPrunerMap mMap; // Maps (PrunerPayload) object to corresponding index in core array.
+ // Objects in the free array do not appear in this map.
+ PxU32 mSortedNb;
+ PxU32 mSortedCapacity;
+ PxU32 mSortAxis;
+
+ BucketBox mGlobalBox; // Global bounds around all objects in the structure (except the ones in the "free" array)
+ BucketPrunerNode mLevel1;
+ BucketPrunerNode mLevel2[5];
+ BucketPrunerNode mLevel3[5][5];
+
+ bool mDirty;
+ bool mOwnMemory;
+ private:
+ void classifyBoxes();
+ void allocateSortedMemory(PxU32 nb);
+ void resizeCore();
+ PX_FORCE_INLINE void addObjectInternal(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp);
+ };
+
+#if PX_VC
+ #pragma warning(pop)
+#endif
+
+ class BucketPruner : public Pruner
+ {
+ public:
+ BucketPruner();
+ virtual ~BucketPruner();
+
+ // Pruner
+ virtual bool addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool);
+ virtual void removeObjects(const PrunerHandle* handles, PxU32 count);
+ virtual void updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count);
+ virtual void updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count = 1);
+ virtual void commit();
+ virtual PxAgain raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+ virtual PxAgain overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const;
+ virtual PxAgain sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+ virtual const PrunerPayload& getPayload(PrunerHandle handle) const { return mPool.getPayload(handle); }
+ virtual const PrunerPayload& getPayload(PrunerHandle handle, PxBounds3*& bounds) const { return mPool.getPayload(handle, bounds); }
+ virtual void preallocate(PxU32 entries) { mPool.preallocate(entries); }
+ virtual void shiftOrigin(const PxVec3& shift);
+ virtual void visualize(Cm::RenderOutput& out, PxU32 color) const;
+ // merge not implemented for bucket pruner
+ virtual void merge(const void* ) {}
+ //~Pruner
+
+ private:
+ BucketPrunerCore mCore;
+ PruningPool mPool;
+ };
+
+} // namespace Sq
+
+}
+
+#endif // SQ_BUCKETPRUNER_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp
new file mode 100644
index 00000000..748817cb
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp
@@ -0,0 +1,887 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "SqExtendedBucketPruner.h"
+#include "SqAABBTree.h"
+#include "SqPrunerMergeData.h"
+#include "SqAABBTreeQuery.h"
+#include "GuBounds.h"
+#include "CmBitMap.h"
+
+using namespace physx;
+using namespace Sq;
+using namespace Gu;
+using namespace Ps;
+
+#define NB_OBJECTS_PER_NODE 4
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Constructor, preallocate trees, bounds
+ExtendedBucketPruner::ExtendedBucketPruner(const PruningPool* pool)
+ : mBucketCore(false), mPruningPool(pool), mMainTree(NULL), mBounds(NULL), mMergedTrees(NULL),
+ mCurrentTreeIndex(0), mTreesDirty(false)
+{
+ // preallocated size for bounds, trees
+ mCurrentTreeCapacity = 32;
+
+ mBounds = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*mCurrentTreeCapacity, "Bounds"));
+ mMergedTrees = reinterpret_cast<MergedTree*>(PX_ALLOC(sizeof(MergedTree)*mCurrentTreeCapacity, "AABB trees"));
+ mExtendedBucketPrunerMap.reserve(mCurrentTreeCapacity);
+
+ // create empty main tree
+ mMainTree = PX_NEW(AABBTree);
+
+ // create empty merge trees
+ for (PxU32 i = 0; i < mCurrentTreeCapacity; i++)
+ {
+ mMergedTrees[i].mTimeStamp = 0;
+ mMergedTrees[i].mTree = PX_NEW(AABBTree);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+ExtendedBucketPruner::~ExtendedBucketPruner()
+{
+ // release main tree
+ if (mMainTree)
+ {
+ PX_DELETE_AND_RESET(mMainTree);
+ }
+
+ // release merged trees
+ for (PxU32 i = 0; i < mCurrentTreeCapacity; i++)
+ {
+ AABBTree* aabbTree = mMergedTrees[i].mTree;
+ PX_DELETE(aabbTree);
+ }
+
+ PX_FREE(mBounds);
+ PX_FREE(mMergedTrees);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// release all objects in bucket pruner
+void ExtendedBucketPruner::release()
+{
+ // release core bucket pruner
+ mBucketCore.release();
+
+ mMainTreeUpdateMap.release();
+ mMergeTreeUpdateMap.release();
+
+ // release all objecs from the map
+ mExtendedBucketPrunerMap.clear();
+
+ // release all merged trees
+ for (PxU32 i = 0; i < mCurrentTreeCapacity; i++)
+ {
+ mMergedTrees[i].mTimeStamp = 0;
+ mMergedTrees[i].mTree->release();
+ }
+
+ // reset current tree index
+ mCurrentTreeIndex = 0;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Add a tree from a pruning structure
+// 1. get new tree index
+// 2. initialize merged tree, bounds
+// 3. create update map for the merged tree
+// 4. build new tree of trees from given trees bounds
+// 5. add new objects into extended bucket pruner map
+// 6. shift indices in the merged tree
+void ExtendedBucketPruner::addTree(const AABBTreeMergeData& mergeData, PxU32 timeStamp)
+{
+ // check if we have to resize
+ if(mCurrentTreeIndex == mCurrentTreeCapacity)
+ {
+ resize(mCurrentTreeCapacity*2);
+ }
+
+ // get current merge tree index
+ const PxU32 mergeTreeIndex = mCurrentTreeIndex++;
+
+ // get payloads pointers - the pointers start at mIndicesOffset, thats where all
+ // objects were added before merge was called
+ const PrunerPayload* payloads = &mPruningPool->getObjects()[mergeData.mIndicesOffset];
+
+ // setup merged tree with the merge data and timestamp
+ mMergedTrees[mergeTreeIndex].mTimeStamp = timeStamp;
+ AABBTree& mergedTree = *mMergedTrees[mergeTreeIndex].mTree;
+ mergedTree.initTree(mergeData);
+ // set bounds
+ mBounds[mergeTreeIndex] = mergeData.getRootNode().mBV;
+
+ // update temporally update map for the current merge tree, map is used to setup the base extended bucket pruner map
+ mMergeTreeUpdateMap.initMap(mergeData.mNbIndices, mergedTree);
+
+ // create new base tree of trees
+ buildMainAABBTree();
+
+ // Add each object into extended bucket pruner hash map
+ for (PxU32 i = 0; i < mergeData.mNbIndices; i++)
+ {
+ ExtendedBucketPrunerData mapData;
+ mapData.mMergeIndex = mergeTreeIndex;
+ mapData.mTimeStamp = timeStamp;
+ PX_ASSERT(mMergeTreeUpdateMap[i] < mergedTree.getNbNodes());
+ // get node information from the merge tree update map
+ mapData.mSubTreeNode = mMergeTreeUpdateMap[i];
+ mExtendedBucketPrunerMap.insert(payloads[i], mapData);
+ }
+ // merged tree indices needs to be shifted now, we cannot shift it in init - the update map
+ // could not be constructed otherwise, as the indices wont start from 0. The indices
+ // needs to be shifted by offset from the pruning pool, where the new objects were added into the pruning pool.
+ mergedTree.shiftIndices(mergeData.mIndicesOffset);
+
+#if PX_DEBUG
+ checkValidity();
+#endif // PX_DEBUG
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Builds the new main AABB tree with given current active merged trees and its bounds
+void ExtendedBucketPruner::buildMainAABBTree()
+{
+ // create the AABB tree from given merged trees bounds
+ AABBTreeBuildParams sTB;
+ sTB.mNbPrimitives = mCurrentTreeIndex;
+ sTB.mAABBArray = mBounds;
+ sTB.mLimit = NB_OBJECTS_PER_NODE;
+ bool status = mMainTree->build(sTB);
+
+ PX_UNUSED(status);
+ PX_ASSERT(status);
+
+ // Init main tree update map for the new main tree
+ mMainTreeUpdateMap.initMap(mCurrentTreeIndex, *mMainTree);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// resize internal memory, buffers
+void ExtendedBucketPruner::resize(PxU32 size)
+{
+ PX_ASSERT(size > mCurrentTreeCapacity);
+ // allocate new bounds
+ PxBounds3* newBounds = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*size, "Bounds"));
+ // copy previous bounds
+ PxMemCopy(newBounds, mBounds, sizeof(PxBounds3)*mCurrentTreeCapacity);
+ PX_FREE(mBounds);
+ mBounds = newBounds;
+
+ // allocate new merged trees
+ MergedTree* newMergeTrees = reinterpret_cast<MergedTree*>(PX_ALLOC(sizeof(MergedTree)*size, "AABB trees"));
+ // copy previous merged trees
+ PxMemCopy(newMergeTrees, mMergedTrees, sizeof(MergedTree)*mCurrentTreeCapacity);
+ PX_FREE(mMergedTrees);
+ mMergedTrees = newMergeTrees;
+ // allocate new trees for merged trees
+ for (PxU32 i = mCurrentTreeCapacity; i < size; i++)
+ {
+ mMergedTrees[i].mTimeStamp = 0;
+ mMergedTrees[i].mTree = PX_NEW(AABBTree);
+ }
+
+ mCurrentTreeCapacity = size;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Update object
+bool ExtendedBucketPruner::updateObject(const PxBounds3& worldAABB, const PrunerPayload& object)
+{
+ const ExtendedBucketPrunerMap::Entry* extendedPrunerEntry = mExtendedBucketPrunerMap.find(object);
+
+ // if object is not in tree of trees, it is in bucket pruner core
+ if(!extendedPrunerEntry)
+ {
+ return mBucketCore.updateObject(worldAABB, object);
+ }
+ else
+ {
+ const ExtendedBucketPrunerData& data = extendedPrunerEntry->second;
+
+ PX_ASSERT(data.mMergeIndex < mCurrentTreeIndex);
+
+ // update tree where objects belongs to
+ AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree;
+ PX_ASSERT(data.mSubTreeNode < tree.getNbNodes());
+ // mark for refit node in merged tree
+ tree.markNodeForRefit(data.mSubTreeNode);
+ PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes());
+ // mark for refit node in main aabb tree
+ mMainTree->markNodeForRefit(mMainTreeUpdateMap[data.mMergeIndex]);
+ mTreesDirty = true;
+ }
+ return true;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// refit merged nodes
+// 1. refit nodes in merged trees
+// 2. check if after refit root node is valid - might happen edge case
+// where all objects were released - the root node is then invalid
+// in this edge case we need to compact the merged trees array
+// and create new main AABB tree
+// 3. If all merged trees bounds are valid - refit main tree
+// 4. If bounds are invalid create new main AABB tree
+void ExtendedBucketPruner::refitMarkedNodes(const PxBounds3* boxes)
+{
+ // if no tree needs update early exit
+ if(!mTreesDirty)
+ return;
+
+ // refit trees and update bounds for main tree
+ PxU32 nbValidTrees = 0;
+ for (PxU32 i = mCurrentTreeIndex; i--; )
+ {
+ AABBTree& tree = *mMergedTrees[i].mTree;
+ tree.refitMarkedNodes(boxes);
+ const PxBounds3& bounds = tree.getNodes()[0].mBV;
+ // check if bounds are valid, if all objects of the tree were released, the bounds
+ // will be invalid, in that case we cannot use this tree anymore.
+ if(bounds.isValid())
+ {
+ nbValidTrees++;
+ }
+ mBounds[i] = bounds;
+ }
+
+ if(nbValidTrees == mCurrentTreeIndex)
+ {
+ // no tree has been removed refit main tree
+ mMainTree->refitMarkedNodes(mBounds);
+ }
+ else
+ {
+ // edge case path, tree does not have a valid root node bounds - all objects from the tree were released
+ // we might even fire perf warning
+ // compact the tree array - no holes in the array, remember the swap position
+ PxU32* swapMap = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mCurrentTreeIndex, "Swap Map"));
+ PxU32 writeIndex = 0;
+ for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+ {
+ AABBTree& tree = *mMergedTrees[i].mTree;
+ if(tree.getNodes()[0].mBV.isValid())
+ {
+ // we have to store the tree into an empty location
+ if(i != writeIndex)
+ {
+ PX_ASSERT(writeIndex < i);
+ AABBTree* ptr = mMergedTrees[writeIndex].mTree;
+ mMergedTrees[writeIndex] = mMergedTrees[i];
+ mMergedTrees[i].mTree = ptr;
+ mBounds[writeIndex] = mBounds[i];
+ }
+ // remember the swap location
+ swapMap[i] = writeIndex;
+ writeIndex++;
+ }
+ else
+ {
+ // tree is not valid, release it
+ tree.release();
+ mMergedTrees[i].mTimeStamp = 0;
+ }
+
+ // remember the swap
+ swapMap[mCurrentTreeIndex] = i;
+ }
+
+ PX_ASSERT(writeIndex == nbValidTrees);
+
+ // new merged trees size
+ mCurrentTreeIndex = nbValidTrees;
+
+ // trees have changed, we need to rebuild the main tree
+ buildMainAABBTree();
+
+ // fixup the object entries, the merge index has changed
+ for (ExtendedBucketPrunerMap::Iterator iter = mExtendedBucketPrunerMap.getIterator(); !iter.done(); ++iter)
+ {
+ ExtendedBucketPrunerData& data = iter->second;
+ PX_ASSERT(swapMap[data.mMergeIndex] < nbValidTrees);
+ data.mMergeIndex = swapMap[data.mMergeIndex];
+ }
+ PX_FREE(swapMap);
+ }
+#if PX_DEBUG
+ checkValidity();
+#endif
+ mTreesDirty = false;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// remove object
+bool ExtendedBucketPruner::removeObject(const PrunerPayload& object, PxU32 objectIndex, const PrunerPayload& swapObject,
+ PxU32 swapObjectIndex, PxU32& timeStamp)
+{
+ ExtendedBucketPrunerMap::Entry dataEntry;
+
+ // if object is not in tree of trees, it is in bucket pruner core
+ if (!mExtendedBucketPrunerMap.erase(object, dataEntry))
+ {
+ // we need to call invalidateObjects, it might happen that the swapped object
+ // does belong to the extended bucket pruner, in that case the objects index
+ // needs to be swapped.
+ swapIndex(objectIndex, swapObject, swapObjectIndex);
+ return mBucketCore.removeObject(object, timeStamp);
+ }
+ else
+ {
+ const ExtendedBucketPrunerData& data = dataEntry.second;
+
+ // mark tree nodes where objects belongs to
+ AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree;
+ PX_ASSERT(data.mSubTreeNode < tree.getNbNodes());
+ // mark the merged tree for refit
+ tree.markNodeForRefit(data.mSubTreeNode);
+ PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes());
+ // mark the main tree for refit
+ mMainTree->markNodeForRefit(mMainTreeUpdateMap[data.mMergeIndex]);
+
+ // call invalidate object to swap the object indices in the merged trees
+ invalidateObject(data, objectIndex, swapObject, swapObjectIndex);
+
+ mTreesDirty = true;
+ }
+#if PX_DEBUG
+ checkValidity();
+#endif // PX_DEBUG
+ return true;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// invalidate object
+// remove the objectIndex from the merged tree
+void ExtendedBucketPruner::invalidateObject(const ExtendedBucketPrunerData& data, PxU32 objectIndex, const PrunerPayload& swapObject,
+ PxU32 swapObjectIndex)
+{
+ // get the merged tree
+ AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree;
+ PX_ASSERT(data.mSubTreeNode < tree.getNbNodes());
+ PX_ASSERT(tree.getNodes()[data.mSubTreeNode].isLeaf());
+ // get merged tree node
+ AABBTreeRuntimeNode& node0 = tree.getNodes()[data.mSubTreeNode];
+ const PxU32 nbPrims = node0.getNbRuntimePrimitives();
+ PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE);
+
+ // retrieve the primitives pointer
+ PxU32* primitives = node0.getPrimitives(tree.getIndices());
+ PX_ASSERT(primitives);
+
+ // Look for desired pool index in the leaf
+ bool foundIt = false;
+ for (PxU32 i = 0; i < nbPrims; i++)
+ {
+ if (objectIndex == primitives[i])
+ {
+ foundIt = true;
+ const PxU32 last = nbPrims - 1;
+ node0.setNbRunTimePrimitives(last);
+ primitives[i] = INVALID_POOL_ID; // Mark primitive index as invalid in the node
+
+ // Swap within the leaf node. No need to update the mapping since they should all point
+ // to the same tree node anyway.
+ if (last != i)
+ Ps::swap(primitives[i], primitives[last]);
+ break;
+ }
+ }
+ PX_ASSERT(foundIt);
+ PX_UNUSED(foundIt);
+
+ swapIndex(objectIndex, swapObject, swapObjectIndex);
+}
+
+// Swap object index
+// if swapObject is in a merged tree its index needs to be swapped with objectIndex
+void ExtendedBucketPruner::swapIndex(PxU32 objectIndex, const PrunerPayload& swapObject, PxU32 swapObjectIndex)
+{
+ if (objectIndex == swapObjectIndex)
+ return;
+
+ const ExtendedBucketPrunerMap::Entry* extendedPrunerSwapEntry = mExtendedBucketPrunerMap.find(swapObject);
+
+ // if swapped object index is in extended pruner, we have to fix the primitives index
+ if (extendedPrunerSwapEntry)
+ {
+ const ExtendedBucketPrunerData& swapData = extendedPrunerSwapEntry->second;
+ AABBTree& swapTree = *mMergedTrees[swapData.mMergeIndex].mTree;
+ // With multiple primitives per leaf, tree nodes may very well be the same for different pool indices.
+ // However the pool indices may be the same when a swap has been skipped in the pruning pool, in which
+ // case there is nothing to do.
+ PX_ASSERT(swapData.mSubTreeNode < swapTree.getNbNodes());
+ PX_ASSERT(swapTree.getNodes()[swapData.mSubTreeNode].isLeaf());
+ AABBTreeRuntimeNode* node1 = swapTree.getNodes() + swapData.mSubTreeNode;
+ const PxU32 nbPrims = node1->getNbRuntimePrimitives();
+ PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE);
+
+ // retrieve the primitives pointer
+ PxU32* primitives = node1->getPrimitives(swapTree.getIndices());
+ PX_ASSERT(primitives);
+
+ // look for desired pool index in the leaf
+ bool foundIt = false;
+ for (PxU32 i = 0; i < nbPrims; i++)
+ {
+ if (swapObjectIndex == primitives[i])
+ {
+ foundIt = true;
+ primitives[i] = objectIndex; // point node to the pool object moved to
+ break;
+ }
+ }
+ PX_ASSERT(foundIt);
+ PX_UNUSED(foundIt);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Optimized removal of timestamped objects from the extended bucket pruner
+PxU32 ExtendedBucketPruner::removeMarkedObjects(PxU32 timeStamp)
+{
+ // remove objects from the core bucket pruner
+ PxU32 retVal = mBucketCore.removeMarkedObjects(timeStamp);
+
+ // nothing to be removed
+ if(!mCurrentTreeIndex)
+ return retVal;
+
+ // if last merged tree is the timeStamp to remove, we can clear all
+ // this is safe as the merged trees array is time ordered, never shifted
+ if(mMergedTrees[mCurrentTreeIndex - 1].mTimeStamp == timeStamp)
+ {
+ retVal += mExtendedBucketPrunerMap.size();
+ cleanTrees();
+ return retVal;
+ }
+
+ // get the highest index in the merged trees array, where timeStamp match
+ // we release than all trees till the index
+ PxU32 highestTreeIndex = 0xFFFFFFFF;
+ for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+ {
+ if(mMergedTrees[i].mTimeStamp == timeStamp)
+ highestTreeIndex = i;
+ else
+ break;
+ }
+
+ // if no timestamp found early exit
+ if(highestTreeIndex == 0xFFFFFFFF)
+ {
+ return retVal;
+ }
+
+ PX_ASSERT(highestTreeIndex < mCurrentTreeIndex);
+ // get offset, where valid trees start
+ const PxU32 mergeTreeOffset = highestTreeIndex + 1;
+
+ // shrink the array to merged trees with a valid timeStamp
+ mCurrentTreeIndex = mCurrentTreeIndex - mergeTreeOffset;
+ // go over trees and swap released trees with valid trees from the back (valid trees are at the back)
+ for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+ {
+ // store bounds, timestamp
+ mBounds[i] = mMergedTrees[mergeTreeOffset + i].mTree->getNodes()[0].mBV;
+ mMergedTrees[i].mTimeStamp = mMergedTrees[mergeTreeOffset + i].mTimeStamp;
+
+ // release the tree with timestamp
+ AABBTree* ptr = mMergedTrees[i].mTree;
+ ptr->release();
+
+ // store the valid tree
+ mMergedTrees[i].mTree = mMergedTrees[mergeTreeOffset + i].mTree;
+ // store the release tree at the offset
+ mMergedTrees[mergeTreeOffset + i].mTree = ptr;
+ mMergedTrees[mergeTreeOffset + i].mTimeStamp = 0;
+ }
+ // release the rest of the trees with not valid timestamp
+ for (PxU32 i = mCurrentTreeIndex; i <= highestTreeIndex; i++)
+ {
+ mMergedTrees[i].mTree->release();
+ mMergedTrees[i].mTimeStamp = 0;
+ }
+
+ // build new main AABB tree with only trees with valid valid timeStamp
+ buildMainAABBTree();
+
+ // remove all unnecessary trees and map entries
+ bool removeEntry = false;
+ PxU32 numRemovedEntries = 0;
+ ExtendedBucketPrunerMap::EraseIterator eraseIterator = mExtendedBucketPrunerMap.getEraseIterator();
+ ExtendedBucketPrunerMap::Entry* entry = eraseIterator.eraseCurrentGetNext(removeEntry);
+ while (entry)
+ {
+ ExtendedBucketPrunerData& data = entry->second;
+ // data to be removed
+ if (data.mTimeStamp == timeStamp)
+ {
+ removeEntry = true;
+ numRemovedEntries++;
+ }
+ else
+ {
+ // update the merge index and main tree node index
+ PX_ASSERT(highestTreeIndex < data.mMergeIndex);
+ data.mMergeIndex -= mergeTreeOffset;
+ removeEntry = false;
+ }
+ entry = eraseIterator.eraseCurrentGetNext(removeEntry);
+ }
+
+#if PX_DEBUG
+ checkValidity();
+#endif // PX_DEBUG
+ // return the number of removed objects
+ return retVal + numRemovedEntries;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// clean all trees, all objects have been released
+void ExtendedBucketPruner::cleanTrees()
+{
+ for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+ {
+ mMergedTrees[i].mTree->release();
+ mMergedTrees[i].mTimeStamp = 0;
+ }
+ mExtendedBucketPrunerMap.clear();
+ mCurrentTreeIndex = 0;
+ mMainTree->release();
+}
+
+//////////////////////////////////////////////////////////////////////////
+// shift origin
+void ExtendedBucketPruner::shiftOrigin(const PxVec3& shift)
+{
+ mMainTree->shiftOrigin(shift);
+
+ for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+ {
+ mMergedTrees[i].mTree->shiftOrigin(shift);
+ }
+
+ mBucketCore.shiftOrigin(shift);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Queries implementation
+//////////////////////////////////////////////////////////////////////////
+// Raycast/sweeps callback for main AABB tree
+template<bool tInflate>
+struct MainTreeRaycastPrunerCallback: public PrunerCallback
+{
+ MainTreeRaycastPrunerCallback(const PxVec3& origin, const PxVec3& unitDir, const PxVec3& extent, PrunerCallback& prunerCallback, const PruningPool* pool)
+ : mOrigin(origin), mUnitDir(unitDir), mExtent(extent), mPrunerCallback(prunerCallback), mPruningPool(pool)
+ {
+ }
+
+ virtual PxAgain invoke(PxReal& distance, const PrunerPayload& payload)
+ {
+ // payload data match merged tree data MergedTree, we can cast it
+ const AABBTree* aabbTree = reinterpret_cast<const AABBTree*> (payload.data[0]);
+ // raycast the merged tree
+ return AABBTreeRaycast<tInflate>()(mPruningPool->getObjects(), mPruningPool->getCurrentWorldBoxes(), *aabbTree, mOrigin, mUnitDir, distance, mExtent, mPrunerCallback);
+ }
+
+ PX_NOCOPY(MainTreeRaycastPrunerCallback)
+
+private:
+ const PxVec3& mOrigin;
+ const PxVec3& mUnitDir;
+ const PxVec3& mExtent;
+ PrunerCallback& mPrunerCallback;
+ const PruningPool* mPruningPool;
+};
+
+//////////////////////////////////////////////////////////////////////////
+// raycast against the extended bucket pruner
+PxAgain ExtendedBucketPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& prunerCallback) const
+{
+ PxAgain again = true;
+
+ // searc the bucket pruner first
+ if (mBucketCore.getNbObjects())
+ again = mBucketCore.raycast(origin, unitDir, inOutDistance, prunerCallback);
+
+ if (again && mExtendedBucketPrunerMap.size())
+ {
+ const PxVec3 extent(0.0f);
+ // main tree callback
+ MainTreeRaycastPrunerCallback<false> pcb(origin, unitDir, extent, prunerCallback, mPruningPool);
+ // traverse the main tree
+ again = AABBTreeRaycast<false>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, origin, unitDir, inOutDistance, extent, pcb);
+ }
+
+ return again;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// overlap main tree callback
+template<typename Test>
+struct MainTreeOverlapPrunerCallback : public PrunerCallback
+{
+ MainTreeOverlapPrunerCallback(const Test& test, PrunerCallback& prunerCallback, const PruningPool* pool)
+ : mTest(test), mPrunerCallback(prunerCallback), mPruningPool(pool)
+ {
+ }
+
+ virtual PxAgain invoke(PxReal& , const PrunerPayload& payload)
+ {
+ // payload data match merged tree data MergedTree, we can cast it
+ const AABBTree* aabbTree = reinterpret_cast<const AABBTree*> (payload.data[0]);
+ // overlap the merged tree
+ return AABBTreeOverlap<Test>()(mPruningPool->getObjects(), mPruningPool->getCurrentWorldBoxes(), *aabbTree, mTest, mPrunerCallback);
+ }
+
+ PX_NOCOPY(MainTreeOverlapPrunerCallback)
+
+private:
+ const Test& mTest;
+ PrunerCallback& mPrunerCallback;
+ const PruningPool* mPruningPool;
+};
+
+//////////////////////////////////////////////////////////////////////////
+// overlap implementation
+PxAgain ExtendedBucketPruner::overlap(const Gu::ShapeData& queryVolume, PrunerCallback& prunerCallback) const
+{
+ PxAgain again = true;
+
+ // core bucket pruner overlap
+ if (mBucketCore.getNbObjects())
+ again = mBucketCore.overlap(queryVolume, prunerCallback);
+
+ if(again && mExtendedBucketPrunerMap.size())
+ {
+ switch (queryVolume.getType())
+ {
+ case PxGeometryType::eBOX:
+ {
+ if (queryVolume.isOBB())
+ {
+ const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated());
+ MainTreeOverlapPrunerCallback<Gu::OBBAABBTest> pcb(test, prunerCallback, mPruningPool);
+ again = AABBTreeOverlap<Gu::OBBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);
+ }
+ else
+ {
+ const Gu::AABBAABBTest test(queryVolume.getPrunerInflatedWorldAABB());
+ MainTreeOverlapPrunerCallback<Gu::AABBAABBTest> pcb(test, prunerCallback, mPruningPool);
+ again = AABBTreeOverlap<Gu::AABBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);
+ }
+ }
+ break;
+ case PxGeometryType::eCAPSULE:
+ {
+ const Gu::Capsule& capsule = queryVolume.getGuCapsule();
+ const Gu::CapsuleAABBTest test(capsule.p1, queryVolume.getPrunerWorldRot33().column0,
+ queryVolume.getCapsuleHalfHeight()*2.0f, PxVec3(capsule.radius*SQ_PRUNER_INFLATION));
+ MainTreeOverlapPrunerCallback<Gu::CapsuleAABBTest> pcb(test, prunerCallback, mPruningPool);
+ again = AABBTreeOverlap<Gu::CapsuleAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);
+ }
+ break;
+ case PxGeometryType::eSPHERE:
+ {
+ const Gu::Sphere& sphere = queryVolume.getGuSphere();
+ Gu::SphereAABBTest test(sphere.center, sphere.radius);
+ MainTreeOverlapPrunerCallback<Gu::SphereAABBTest> pcb(test, prunerCallback, mPruningPool);
+ again = AABBTreeOverlap<Gu::SphereAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);
+ }
+ break;
+ case PxGeometryType::eCONVEXMESH:
+ {
+ const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated());
+ MainTreeOverlapPrunerCallback<Gu::OBBAABBTest> pcb(test, prunerCallback, mPruningPool);
+ again = AABBTreeOverlap<Gu::OBBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);
+ }
+ break;
+ case PxGeometryType::ePLANE:
+ case PxGeometryType::eTRIANGLEMESH:
+ case PxGeometryType::eHEIGHTFIELD:
+ case PxGeometryType::eGEOMETRY_COUNT:
+ case PxGeometryType::eINVALID:
+ PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type");
+ }
+ }
+
+ return again;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// sweep implementation
+PxAgain ExtendedBucketPruner::sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& prunerCallback) const
+{
+ PxAgain again = true;
+
+ // core bucket pruner sweep
+ if (mBucketCore.getNbObjects())
+ again = mBucketCore.sweep(queryVolume, unitDir, inOutDistance, prunerCallback);
+
+ if(again && mExtendedBucketPrunerMap.size())
+ {
+ const PxBounds3& aabb = queryVolume.getPrunerInflatedWorldAABB();
+ const PxVec3 extents = aabb.getExtents();
+ const PxVec3 center = aabb.getCenter();
+ MainTreeRaycastPrunerCallback<true> pcb(center, unitDir, extents, prunerCallback, mPruningPool);
+ again = AABBTreeRaycast<true>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, center, unitDir, inOutDistance, extents, pcb);
+ }
+ return again;
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+#include "CmRenderOutput.h"
+
+// visualization
+void visualizeTree(Cm::RenderOutput& out, PxU32 color, AABBTree* tree)
+{
+ if (tree)
+ {
+ struct Local
+ {
+ static void _Draw(const AABBTreeRuntimeNode* root, const AABBTreeRuntimeNode* node, Cm::RenderOutput& out_)
+ {
+ out_ << Cm::DebugBox(node->mBV, true);
+ if (node->isLeaf())
+ return;
+ _Draw(root, node->getPos(root), out_);
+ _Draw(root, node->getNeg(root), out_);
+ }
+ };
+ out << PxTransform(PxIdentity);
+ out << color;
+ Local::_Draw(tree->getNodes(), tree->getNodes(), out);
+ }
+}
+
+void ExtendedBucketPruner::visualize(Cm::RenderOutput& out, PxU32 color) const
+{
+ visualizeTree(out, color, mMainTree);
+
+ for(PxU32 i = 0; i < mCurrentTreeIndex; i++)
+ {
+ visualizeTree(out, color, mMergedTrees[i].mTree);
+ }
+
+ mBucketCore.visualize(out, color);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+#if PX_DEBUG
+// extended bucket pruner validity check
+bool ExtendedBucketPruner::checkValidity()
+{
+ Cm::BitMap testBitmap;
+ testBitmap.resizeAndClear(mCurrentTreeIndex);
+ for (PxU32 i = 0; i < mMainTree->getNbNodes(); i++)
+ {
+ const AABBTreeRuntimeNode& node = mMainTree->getNodes()[i];
+ if(node.isLeaf())
+ {
+ const PxU32 nbPrims = node.getNbRuntimePrimitives();
+ PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE);
+
+ const PxU32* primitives = node.getPrimitives(mMainTree->getIndices());
+ for (PxU32 j = 0; j < nbPrims; j++)
+ {
+ const PxU32 index = primitives[j];
+ // check if index is correct
+ PX_ASSERT(index < mCurrentTreeIndex);
+ // mark the index in the test bitmap, must be once set only, all merged trees must be in the main tree
+ PX_ASSERT(testBitmap.test(index) == IntFalse);
+ testBitmap.set(index);
+ }
+ }
+ }
+
+ Cm::BitMap mergeTreeTestBitmap;
+ mergeTreeTestBitmap.resizeAndClear(mPruningPool->getNbActiveObjects());
+ for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+ {
+ // check if bounds are the same as the merged tree root bounds
+ PX_ASSERT(mBounds[i].maximum.x == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.x);
+ PX_ASSERT(mBounds[i].maximum.y == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.y);
+ PX_ASSERT(mBounds[i].maximum.z == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.z);
+ PX_ASSERT(mBounds[i].minimum.x == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.x);
+ PX_ASSERT(mBounds[i].minimum.y == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.y);
+ PX_ASSERT(mBounds[i].minimum.z == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.z);
+
+ // check each tree
+ const AABBTree& mergedTree = *mMergedTrees[i].mTree;
+ for (PxU32 j = 0; j < mergedTree.getNbNodes(); j++)
+ {
+ const AABBTreeRuntimeNode& node = mergedTree.getNodes()[j];
+ if (node.isLeaf())
+ {
+ const PxU32 nbPrims = node.getNbRuntimePrimitives();
+ PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE);
+
+ const PxU32* primitives = node.getPrimitives(mergedTree.getIndices());
+ for (PxU32 k = 0; k < nbPrims; k++)
+ {
+ const PxU32 index = primitives[k];
+ // check if index is correct
+ PX_ASSERT(index < mPruningPool->getNbActiveObjects());
+ // mark the index in the test bitmap, must be once set only, all merged trees must be in the main tree
+ PX_ASSERT(mergeTreeTestBitmap.test(index) == IntFalse);
+ mergeTreeTestBitmap.set(index);
+
+ const PrunerPayload& payload = mPruningPool->getObjects()[index];
+ const ExtendedBucketPrunerMap::Entry* extendedPrunerSwapEntry = mExtendedBucketPrunerMap.find(payload);
+ PX_ASSERT(extendedPrunerSwapEntry);
+
+ const ExtendedBucketPrunerData& data = extendedPrunerSwapEntry->second;
+ PX_ASSERT(data.mMergeIndex == i);
+ PX_ASSERT(data.mSubTreeNode == j);
+ }
+ }
+ }
+ }
+ for (PxU32 i = mCurrentTreeIndex; i < mCurrentTreeCapacity; i++)
+ {
+ PX_ASSERT(mMergedTrees[i].mTree->getIndices() == NULL);
+ PX_ASSERT(mMergedTrees[i].mTree->getNodes() == NULL);
+ }
+ for (ExtendedBucketPrunerMap::Iterator iter = mExtendedBucketPrunerMap.getIterator(); !iter.done(); ++iter)
+ {
+ const ExtendedBucketPrunerData& data = iter->second;
+ PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes());
+ PX_ASSERT(data.mMergeIndex < mCurrentTreeIndex);
+ PX_ASSERT(data.mSubTreeNode < mMergedTrees[data.mMergeIndex].mTree->getNbNodes());
+ }
+ return true;
+}
+#endif
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h
new file mode 100644
index 00000000..ad360e10
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h
@@ -0,0 +1,176 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_EXTENDEDBUCKETPRUNER_H
+#define SQ_EXTENDEDBUCKETPRUNER_H
+
+#include "SqTypedef.h"
+#include "SqBucketPruner.h"
+#include "SqAABBTreeUpdateMap.h"
+#include "PsHashMap.h"
+
+namespace physx
+{
+namespace Sq
+{
+ struct AABBPrunerMergeData;
+ class AABBTreeMergeData;
+
+ // Extended bucket pruner data, if an object belongs to the tree of trees, we need to
+ // remember node for the sub tree, the tree it belongs to and the main tree node
+ struct ExtendedBucketPrunerData
+ {
+ PxU32 mTimeStamp; // timestamp
+ TreeNodeIndex mSubTreeNode; // sub tree node index
+ PxU32 mMergeIndex; // index in bounds and merged trees array
+ };
+
+ // Merged tree structure, holds tree and its timeStamp, released when no objects is in the tree
+ // or timeStamped objects are released
+ struct MergedTree
+ {
+ AABBTree* mTree; // AABB tree
+ size_t mTimeStamp; // needs to be size_t to match PrunerPayload size
+ };
+ // needs to be size_t to match PrunerPayload size, pointer used for AABB tree query callbacks
+ PX_COMPILE_TIME_ASSERT(sizeof(MergedTree) == sizeof(PrunerPayload));
+
+ // hashing function for PrunerPaylod key
+ struct ExtendedBucketPrunerHash
+ {
+ PX_FORCE_INLINE uint32_t operator()(const PrunerPayload& payload) const
+ {
+#if PX_P64_FAMILY
+ // const PxU32 h0 = Ps::hash((const void*)payload.data[0]);
+ // const PxU32 h1 = Ps::hash((const void*)payload.data[1]);
+ const PxU32 h0 = PxU32(PX_MAX_U32 & payload.data[0]);
+ const PxU32 h1 = PxU32(PX_MAX_U32 & payload.data[1]);
+ return Ps::hash(PxU64(h0) | (PxU64(h1) << 32));
+#else
+ return Ps::hash(PxU64(payload.data[0]) | (PxU64(payload.data[1]) << 32));
+#endif
+ }
+ PX_FORCE_INLINE bool equal(const PrunerPayload& k0, const PrunerPayload& k1) const
+ {
+ return (k0.data[0] == k1.data[0]) && (k0.data[1] == k1.data[1]);
+ }
+ };
+
+ // A.B. replace, this is useless, need to be able to traverse the map and release while traversing, also eraseAt failed
+ typedef Ps::HashMap<PrunerPayload, ExtendedBucketPrunerData, ExtendedBucketPrunerHash> ExtendedBucketPrunerMap;
+
+ // Extended bucket pruner holds single objects in a bucket pruner and AABBtrees in a tree of trees.
+ // Base usage of ExtendedBucketPruner is for dynamic AABBPruner new objects, that did not make it
+ // into new tree. Single objects go directly into a bucket pruner, while merged AABBtrees
+ // go into a tree of trees.
+ class ExtendedBucketPruner
+ {
+ public:
+ ExtendedBucketPruner(const PruningPool* pool);
+ virtual ~ExtendedBucketPruner();
+
+ // release
+ void release();
+
+ // add single object into a bucket pruner directly
+ PX_FORCE_INLINE bool addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp)
+ {
+ return mBucketCore.addObject(object, worldAABB, timeStamp);
+ }
+
+ // add AABB tree from pruning structure - adds new primitive into main AABB tree
+ void addTree(const AABBTreeMergeData& mergeData, PxU32 timeStamp);
+
+ // update object
+ bool updateObject(const PxBounds3& worldAABB, const PrunerPayload& object);
+
+ // remove object, removed object is replaced in pruning pool by swapped object, indices needs to be updated
+ bool removeObject(const PrunerPayload& object, PxU32 objectIndex, const PrunerPayload& swapObject,
+ PxU32 swapObjectIndex, PxU32& timeStamp);
+
+ // separate call for indices invalidation, object can be either in AABBPruner or Bucket pruner, but the swapped object can be
+ // in the tree of trees
+ void invalidateObject(const ExtendedBucketPrunerData& object, PxU32 objectIndex, const PrunerPayload& swapObject,
+ PxU32 swapObjectIndex);
+
+ // swap object index, the object index can be in bucket pruner or tree of trees
+ void swapIndex(PxU32 objectIndex, const PrunerPayload& swapObject, PxU32 swapObjectIndex);
+
+ // refit marked nodes in tree of trees
+ void refitMarkedNodes(const PxBounds3* boxes);
+
+
+ // look for objects marked with input timestamp everywhere in the structure, and remove them. This is the same
+ // as calling 'removeObject' individually for all these objects, but much more efficient. Returns number of removed objects.
+ PxU32 removeMarkedObjects(PxU32 timeStamp);
+
+ // queries against the pruner
+ PxAgain raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+ PxAgain overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const;
+ PxAgain sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+
+ // origin shift
+ void shiftOrigin(const PxVec3& shift);
+
+ // debug visualize
+ void visualize(Cm::RenderOutput& out, PxU32 color) const;
+
+ PX_FORCE_INLINE void build() { mBucketCore.build(); }
+
+ PX_FORCE_INLINE PxU32 getNbObjects() const { return mBucketCore.getNbObjects() + mExtendedBucketPrunerMap.size(); }
+
+ private:
+ void resize(PxU32 size);
+ void buildMainAABBTree();
+ void copyTree(AABBTree& destTree, const AABBPrunerMergeData& inputData);
+ void cleanTrees();
+
+#if PX_DEBUG
+ // Extended bucket pruner validity check
+ bool checkValidity();
+#endif
+ private:
+ BucketPrunerCore mBucketCore; // Bucket pruner for single objects
+ const PruningPool* mPruningPool; // Pruning pool from AABB pruner
+ ExtendedBucketPrunerMap mExtendedBucketPrunerMap; // Map holding objects from tree merge - objects in tree of trees
+ AABBTree* mMainTree; // Main tree holding merged trees
+ AABBTreeUpdateMap mMainTreeUpdateMap; // Main tree updated map - merged trees index to nodes
+ AABBTreeUpdateMap mMergeTreeUpdateMap; // Merged tree update map used while tree is merged
+ PxBounds3* mBounds; // Merged trees bounds used for main tree building
+ MergedTree* mMergedTrees; // Merged trees
+ PxU32 mCurrentTreeIndex; // Current trees index
+ PxU32 mCurrentTreeCapacity; // Current tress capacity
+ bool mTreesDirty; // Dirty marker
+ };
+
+} // namespace Sq
+
+}
+
+#endif // SQ_EXTENDEDBUCKETPRUNER_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp b/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp
new file mode 100644
index 00000000..86ba1d67
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp
@@ -0,0 +1,57 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PxMetaData.h"
+
+#include "SqPruningStructure.h"
+
+using namespace physx;
+using namespace Sq;
+
+///////////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::getBinaryMetaData(PxOutputStream& stream)
+{
+ PX_DEF_BIN_METADATA_VCLASS(stream, PruningStructure)
+ PX_DEF_BIN_METADATA_BASE_CLASS(stream, PruningStructure, PxBase)
+
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbNodes[0], 0)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbNodes[1], 0)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, AABBTreeRuntimeNode, mAABBTreeNodes[0], PxMetaDataFlag::ePTR)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, AABBTreeRuntimeNode, mAABBTreeNodes[1], PxMetaDataFlag::ePTR)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbObjects[0], 0)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbObjects[1], 0)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mAABBTreeIndices[0], PxMetaDataFlag::ePTR)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mAABBTreeIndices[1], PxMetaDataFlag::ePTR)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbActors, 0)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxActor*, mActors, PxMetaDataFlag::ePTR)
+ PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, bool, mValid, 0)
+}
+
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h b/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h
new file mode 100644
index 00000000..9ded6d26
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h
@@ -0,0 +1,258 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef GU_RAWQUERY_TESTS_SIMD_H
+#define GU_RAWQUERY_TESTS_SIMD_H
+
+#include "foundation/PxTransform.h"
+#include "foundation/PxBounds3.h"
+#include "CmPhysXCommon.h"
+#include "PxBoxGeometry.h"
+#include "PxSphereGeometry.h"
+#include "PxCapsuleGeometry.h"
+#include "PsVecMath.h"
+
+namespace physx
+{
+namespace Gu
+{
+
+struct RayAABBTest
+{
+ PX_FORCE_INLINE RayAABBTest(const PxVec3& origin_, const PxVec3& unitDir_, const PxReal maxDist, const PxVec3& inflation_)
+ : mOrigin(V3LoadU(origin_))
+ , mDir(V3LoadU(unitDir_))
+ , mDirYZX(V3PermYZX(mDir))
+ , mInflation(V3LoadU(inflation_))
+ , mAbsDir(V3Abs(mDir))
+ , mAbsDirYZX(V3PermYZX(mAbsDir))
+ {
+ const PxVec3 ext = maxDist >= PX_MAX_F32 ? PxVec3( unitDir_.x == 0 ? origin_.x : PxSign(unitDir_.x)*PX_MAX_F32,
+ unitDir_.y == 0 ? origin_.y : PxSign(unitDir_.y)*PX_MAX_F32,
+ unitDir_.z == 0 ? origin_.z : PxSign(unitDir_.z)*PX_MAX_F32)
+ : origin_ + unitDir_ * maxDist;
+ mRayMin = V3Min(mOrigin, V3LoadU(ext));
+ mRayMax = V3Max(mOrigin, V3LoadU(ext));
+ }
+
+ PX_FORCE_INLINE void setDistance(PxReal distance)
+ {
+ const Vec3V ext = V3ScaleAdd(mDir, FLoad(distance), mOrigin);
+ mRayMin = V3Min(mOrigin, ext);
+ mRayMax = V3Max(mOrigin, ext);
+ }
+
+ template<bool TInflate>
+ PX_FORCE_INLINE PxU32 check(const Vec3V center, const Vec3V extents) const
+ {
+ const Vec3V iExt = TInflate ? V3Add(extents, mInflation) : extents;
+
+ // coordinate axes
+ const Vec3V nodeMax = V3Add(center, iExt);
+ const Vec3V nodeMin = V3Sub(center, iExt);
+
+ // cross axes
+ const Vec3V offset = V3Sub(mOrigin, center);
+ const Vec3V offsetYZX = V3PermYZX(offset);
+ const Vec3V iExtYZX = V3PermYZX(iExt);
+
+ const Vec3V f = V3NegMulSub(mDirYZX, offset, V3Mul(mDir, offsetYZX));
+ const Vec3V g = V3MulAdd(iExt, mAbsDirYZX, V3Mul(iExtYZX, mAbsDir));
+
+ const BoolV
+ maskA = V3IsGrtrOrEq(nodeMax, mRayMin),
+ maskB = V3IsGrtrOrEq(mRayMax, nodeMin),
+ maskC = V3IsGrtrOrEq(g, V3Abs(f));
+ const BoolV andABCMasks = BAnd(BAnd(maskA, maskB), maskC);
+
+ return BAllEqTTTT(andABCMasks);
+ }
+
+ const Vec3V mOrigin, mDir, mDirYZX, mInflation, mAbsDir, mAbsDirYZX;
+ Vec3V mRayMin, mRayMax;
+protected:
+ RayAABBTest& operator=(const RayAABBTest&);
+};
+
+// probably not worth having a SIMD version of this unless the traversal passes Vec3Vs
+struct AABBAABBTest
+{
+ PX_FORCE_INLINE AABBAABBTest(const PxTransform&t, const PxBoxGeometry&b)
+ : mCenter(V3LoadU(t.p))
+ , mExtents(V3LoadU(b.halfExtents))
+ { }
+
+ PX_FORCE_INLINE AABBAABBTest(const PxBounds3& b)
+ : mCenter(V3LoadU(b.getCenter()))
+ , mExtents(V3LoadU(b.getExtents()))
+ { }
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const
+ {
+ //PxVec3 c; PxVec3_From_Vec3V(center, c);
+ //PxVec3 e; PxVec3_From_Vec3V(extents, e);
+ //if(PxAbs(c.x - mCenter.x) > mExtents.x + e.x) return Ps::IntFalse;
+ //if(PxAbs(c.y - mCenter.y) > mExtents.y + e.y) return Ps::IntFalse;
+ //if(PxAbs(c.z - mCenter.z) > mExtents.z + e.z) return Ps::IntFalse;
+ //return Ps::IntTrue;
+ return Ps::IntBool(V3AllGrtrOrEq(V3Add(mExtents, extents), V3Abs(V3Sub(center, mCenter))));
+ }
+
+private:
+ AABBAABBTest& operator=(const AABBAABBTest&);
+ const Vec3V mCenter, mExtents;
+};
+
+struct SphereAABBTest
+{
+ PX_FORCE_INLINE SphereAABBTest(const PxTransform& t, const PxSphereGeometry& s)
+ : mCenter(V3LoadU(t.p))
+ , mRadius2(FLoad(s.radius * s.radius))
+ {}
+
+ PX_FORCE_INLINE SphereAABBTest(const PxVec3& center, PxF32 radius)
+ : mCenter(V3LoadU(center))
+ , mRadius2(FLoad(radius * radius))
+ {}
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V boxCenter, const Vec3V boxExtents) const
+ {
+ const Vec3V offset = V3Sub(mCenter, boxCenter);
+ const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents);
+ const Vec3V d = V3Sub(offset, closest);
+ return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d))));
+ }
+
+private:
+ SphereAABBTest& operator=(const SphereAABBTest&);
+ const Vec3V mCenter;
+ const FloatV mRadius2;
+};
+
+// The Opcode capsule-AABB traversal test seems to be *exactly* the same as the ray-box test inflated by the capsule radius (so not a true capsule/box test)
+// and the code for the ray-box test is better. TODO: check the zero length case and use the sphere traversal if this one fails.
+// (OTOH it's not that hard to adapt the Ray-AABB test to a capsule test)
+
+struct CapsuleAABBTest: private RayAABBTest
+{
+ PX_FORCE_INLINE CapsuleAABBTest(const PxVec3& origin, const PxVec3& unitDir, const PxReal length, const PxVec3& inflation)
+ : RayAABBTest(origin, unitDir, length, inflation)
+ {}
+
+ PX_FORCE_INLINE Ps::IntBool operator()(const Vec3VArg center, const Vec3VArg extents) const
+ {
+ return Ps::IntBool(RayAABBTest::check<true>(center, extents));
+ }
+};
+
+template<bool fullTest>
+struct OBBAABBTests
+{
+ OBBAABBTests(const PxVec3& pos, const PxMat33& rot, const PxVec3& halfExtentsInflated)
+ {
+ const Vec3V eps = V3Load(1e-6f);
+
+ mT = V3LoadU(pos);
+ mExtents = V3LoadU(halfExtentsInflated);
+
+ // storing the transpose matrices yields a simpler SIMD test
+ mRT = Mat33V_From_PxMat33(rot.getTranspose());
+ mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps));
+ mBB_xyz = M33TrnspsMulV3(mART, mExtents);
+
+ if(fullTest)
+ {
+ const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents);
+
+ mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0)));
+ mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1)));
+ mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2)));
+ }
+ }
+
+ // TODO: force inline it?
+ Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const
+ {
+ const Vec3V t = V3Sub(mT, center);
+
+ // class I - axes of AABB
+ if(V3OutOfBounds(t, V3Add(extents, mBB_xyz)))
+ return Ps::IntFalse;
+
+ const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2;
+ const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2;
+
+ const FloatV eX = V3GetX(extents), eY = V3GetY(extents), eZ = V3GetZ(extents);
+ const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t);
+
+ // class II - axes of OBB
+ {
+ const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX)));
+ const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents)));
+ if(V3OutOfBounds(v, v2))
+ return Ps::IntFalse;
+ }
+
+ if(!fullTest)
+ return Ps::IntTrue;
+
+ // class III - edge cross products. Almost all OBB tests early-out with type I or type II,
+ // so early-outs here probably aren't useful (TODO: profile)
+
+ const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ));
+ const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123));
+ const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va));
+
+ const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX));
+ const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456));
+ const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb));
+
+ const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY));
+ const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789));
+ const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc));
+
+ return Ps::IntBool(BAllEqFFFF(BOr(ba, BOr(bb,bc))));
+ }
+
+ Vec3V mExtents; // extents of OBB
+ Vec3V mT; // translation of OBB
+ Mat33V mRT; // transpose of rotation matrix of OBB
+ Mat33V mART; // transpose of mRT, padded by epsilon
+
+ Vec3V mBB_xyz; // extents of OBB along coordinate axes
+ Vec3V mBB_123; // projections of extents onto edge-cross axes
+ Vec3V mBB_456;
+ Vec3V mBB_789;
+};
+
+typedef OBBAABBTests<true> OBBAABBTest;
+
+}
+}
+#endif
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp
new file mode 100644
index 00000000..8a90a1d3
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp
@@ -0,0 +1,182 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#include "foundation/PxMemory.h"
+#include "SqPruningPool.h"
+
+using namespace physx;
+using namespace Sq;
+using namespace Cm;
+
+PruningPool::PruningPool() :
+ mNbObjects (0),
+ mMaxNbObjects (0),
+ mWorldBoxes (NULL),
+ mObjects (NULL),
+ mHandleToIndex (NULL),
+ mIndexToHandle (NULL),
+ mFirstRecycledHandle(INVALID_PRUNERHANDLE)
+{
+}
+
+PruningPool::~PruningPool()
+{
+ PX_FREE_AND_RESET(mWorldBoxes);
+ PX_FREE_AND_RESET(mObjects);
+ PX_FREE_AND_RESET(mHandleToIndex);
+ PX_FREE_AND_RESET(mIndexToHandle);
+}
+
+bool PruningPool::resize(PxU32 newCapacity)
+{
+ // PT: we always allocate one extra box, to make sure we can safely use V4 loads on the array
+ PxBounds3* newBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(newCapacity+1), "PxBounds3"));
+ PrunerPayload* newData = reinterpret_cast<PrunerPayload*>(PX_ALLOC(sizeof(PrunerPayload)*newCapacity, "PrunerPayload*"));
+ PrunerHandle* newIndexToHandle = reinterpret_cast<PrunerHandle*>(PX_ALLOC(sizeof(PrunerHandle)*newCapacity, "Pruner Index Mapping"));
+ PoolIndex* newHandleToIndex = reinterpret_cast<PoolIndex*>(PX_ALLOC(sizeof(PoolIndex)*newCapacity, "Pruner Index Mapping"));
+ if( (NULL==newBoxes) || (NULL==newData) || (NULL==newIndexToHandle) || (NULL==newHandleToIndex)
+ )
+ {
+ PX_FREE_AND_RESET(newBoxes);
+ PX_FREE_AND_RESET(newData);
+ PX_FREE_AND_RESET(newIndexToHandle);
+ PX_FREE_AND_RESET(newHandleToIndex);
+ return false;
+ }
+
+ if(mWorldBoxes) PxMemCopy(newBoxes, mWorldBoxes, mNbObjects*sizeof(PxBounds3));
+ if(mObjects) PxMemCopy(newData, mObjects, mNbObjects*sizeof(PrunerPayload));
+ if(mIndexToHandle) PxMemCopy(newIndexToHandle, mIndexToHandle, mNbObjects*sizeof(PrunerHandle));
+ if(mHandleToIndex) PxMemCopy(newHandleToIndex, mHandleToIndex, mMaxNbObjects*sizeof(PoolIndex));
+ mMaxNbObjects = newCapacity;
+
+ PX_FREE_AND_RESET(mWorldBoxes);
+ PX_FREE_AND_RESET(mObjects);
+ PX_FREE_AND_RESET(mHandleToIndex);
+ PX_FREE_AND_RESET(mIndexToHandle);
+ mWorldBoxes = newBoxes;
+ mObjects = newData;
+ mHandleToIndex = newHandleToIndex;
+ mIndexToHandle = newIndexToHandle;
+
+ return true;
+}
+
+void PruningPool::preallocate(PxU32 newCapacity)
+{
+ if(newCapacity>mMaxNbObjects)
+ resize(newCapacity);
+}
+
+PxU32 PruningPool::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count)
+{
+ for(PxU32 i=0;i<count;i++)
+ {
+ if(mNbObjects==mMaxNbObjects) // increase the capacity on overflow
+ {
+ if(!resize(PxMax<PxU32>(mMaxNbObjects*2, 64)))
+ {
+ // pool can return an invalid handle if memory alloc fails
+ // should probably have an error here or not handle this
+ results[i] = INVALID_PRUNERHANDLE; // PT: we need to write the potentially invalid handle to let users know which object failed first
+ return i;
+ }
+ }
+ PX_ASSERT(mNbObjects!=mMaxNbObjects);
+
+ const PoolIndex index = mNbObjects++;
+
+ // update mHandleToIndex and mIndexToHandle mappings
+ PrunerHandle handle;
+ if(mFirstRecycledHandle != INVALID_PRUNERHANDLE)
+ {
+ // mFirstRecycledHandle is an entry into a freelist for removed slots
+ // this path is only taken if we have any removed slots
+ handle = mFirstRecycledHandle;
+ mFirstRecycledHandle = mHandleToIndex[handle];
+ }
+ else
+ {
+ handle = index;
+ }
+
+ // PT: TODO: investigate why we added mIndexToHandle/mHandleToIndex. The initial design with 'Prunable' objects didn't need these arrays.
+
+ // PT: these 3 arrays are "parallel"
+ mWorldBoxes [index] = bounds[i]; // store the payload and AABB in parallel arrays
+ mObjects [index] = payload[i];
+ mIndexToHandle [index] = handle;
+
+ mHandleToIndex[handle] = index;
+ results[i] = handle;
+ }
+ return count;
+}
+
+PoolIndex PruningPool::removeObject(PrunerHandle h)
+{
+ PX_ASSERT(mNbObjects);
+
+ // remove the object and its AABB by provided PrunerHandle and update mHandleToIndex and mIndexToHandle mappings
+ const PoolIndex indexOfRemovedObject = mHandleToIndex[h]; // retrieve object's index from handle
+
+ const PoolIndex indexOfLastObject = --mNbObjects; // swap the object at last index with index
+ if(indexOfLastObject!=indexOfRemovedObject)
+ {
+ // PT: move last object's data to recycled spot (from removed object)
+
+ // PT: the last object has moved so we need to handle the mappings for this object
+ // PT: TODO: investigate where this double-mapping comes from. Should not be needed...
+
+ // PT: these 3 arrays are "parallel"
+ const PrunerHandle handleOfLastObject = mIndexToHandle[indexOfLastObject];
+ mWorldBoxes [indexOfRemovedObject] = mWorldBoxes [indexOfLastObject];
+ mObjects [indexOfRemovedObject] = mObjects [indexOfLastObject];
+ mIndexToHandle [indexOfRemovedObject] = handleOfLastObject;
+
+ mHandleToIndex[handleOfLastObject] = indexOfRemovedObject;
+ }
+
+ // mHandleToIndex also stores the freelist for removed handles (in place of holes formed by removed handles)
+ mHandleToIndex[h] = mFirstRecycledHandle; // update linked list of available recycled handles
+ mFirstRecycledHandle = h; // update the list head
+
+ return indexOfLastObject;
+}
+
+void PruningPool::shiftOrigin(const PxVec3& shift)
+{
+ for(PxU32 i=0; i < mNbObjects; i++)
+ {
+ mWorldBoxes[i].minimum -= shift;
+ mWorldBoxes[i].maximum -= shift;
+ }
+}
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h
new file mode 100644
index 00000000..229ea340
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h
@@ -0,0 +1,120 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_PRUNINGPOOL_H
+#define SQ_PRUNINGPOOL_H
+
+#include "SqPruner.h"
+#include "SqTypedef.h"
+#include "SqBounds.h"
+
+namespace physx
+{
+namespace Sq
+{
+ // This class is designed to maintain a two way mapping between pair(PrunerPayload,AABB) and PrunerHandle
+ // Internally there's also an index for handles (AP: can be simplified?)
+ // This class effectively stores bounded pruner payloads, returns a PrunerHandle and allows O(1)
+ // access to them using a PrunerHandle
+ // Supported operations are add, remove, update bounds
+ class PruningPool
+ {
+ public:
+ PruningPool();
+ ~PruningPool();
+
+ PX_FORCE_INLINE const PrunerPayload& getPayload(PrunerHandle handle) const { return mObjects[getIndex(handle)]; }
+
+ PX_FORCE_INLINE const PrunerPayload& getPayload(PrunerHandle handle, PxBounds3*& bounds) const
+ {
+ const PoolIndex index = getIndex(handle);
+ bounds = mWorldBoxes + index;
+ return mObjects[index];
+ }
+
+ void shiftOrigin(const PxVec3& shift);
+
+ // PT: adds 'count' objects to the pool. Needs 'count' bounds and 'count' payloads passed as input. Writes out 'count' handles
+ // in 'results' array. Function returns number of successfully added objects, ideally 'count' but can be less in case we run
+ // out of memory.
+ PxU32 addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count);
+
+ // this function will swap the last object with the hole formed by removed PrunerHandle object
+ // and return the removed last object's index in the pool
+ PoolIndex removeObject(PrunerHandle h);
+
+ // Data access
+ PX_FORCE_INLINE PoolIndex getIndex(PrunerHandle h)const { return mHandleToIndex[h]; }
+ PX_FORCE_INLINE PrunerPayload* getObjects() const { return mObjects; }
+ PX_FORCE_INLINE PxU32 getNbActiveObjects() const { return mNbObjects; }
+ PX_FORCE_INLINE const PxBounds3* getCurrentWorldBoxes() const { return mWorldBoxes; }
+ PX_FORCE_INLINE PxBounds3* getCurrentWorldBoxes() { return mWorldBoxes; }
+
+ PX_FORCE_INLINE void setWorldAABB(PrunerHandle h, const PxBounds3& worldAABB)
+ {
+ mWorldBoxes[getIndex(h)] = worldAABB;
+ }
+
+ PX_FORCE_INLINE const PxBounds3& getWorldAABB(PrunerHandle h) const
+ {
+ return mWorldBoxes[getIndex(h)];
+ }
+
+ PX_FORCE_INLINE void updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count)
+ {
+ for(PxU32 i=0; i<count; i++)
+ Sq::inflateBounds(mWorldBoxes[getIndex(handles[i])], newBounds[indices[i]]);
+ }
+
+ void preallocate(PxU32 entries);
+// protected:
+
+ PxU32 mNbObjects; //!< Current number of objects
+ PxU32 mMaxNbObjects; //!< Max. number of objects (capacity for mWorldBoxes, mObjects)
+
+ //!< these arrays are parallel
+ PxBounds3* mWorldBoxes; //!< List of world boxes, stores mNbObjects, capacity=mMaxNbObjects
+ PrunerPayload* mObjects; //!< List of objects, stores mNbObjects, capacity=mMaxNbObjects
+// private:
+ PoolIndex* mHandleToIndex; //!< Maps from PrunerHandle to internal index (payload index in mObjects)
+ PrunerHandle* mIndexToHandle; //!< Inverse map from objectIndex to PrunerHandle
+
+ // this is the head of a list of holes formed in mHandleToIndex
+ // by removed handles
+ // the rest of the list is stored in holes in mHandleToIndex (in place)
+ PrunerHandle mFirstRecycledHandle;
+
+ bool resize(PxU32 newCapacity);
+ };
+
+} // namespace Sq
+
+}
+
+#endif // SQ_PRUNINGPOOL_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp b/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp
new file mode 100644
index 00000000..d785abb2
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp
@@ -0,0 +1,427 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "SqPruningStructure.h"
+#include "SqAABBPruner.h"
+#include "SqAABBTree.h"
+#include "SqBounds.h"
+
+#include "NpRigidDynamic.h"
+#include "NpRigidStatic.h"
+#include "NpShape.h"
+
+#include "GuBounds.h"
+
+#include "CmTransformUtils.h"
+#include "CmUtils.h"
+
+#include "ScbShape.h"
+
+using namespace physx;
+using namespace Sq;
+
+//////////////////////////////////////////////////////////////////////////
+
+#define NB_OBJECTS_PER_NODE 4
+
+//////////////////////////////////////////////////////////////////////////
+PruningStructure::PruningStructure(PxBaseFlags baseFlags)
+ : PxPruningStructure(baseFlags)
+{
+}
+
+//////////////////////////////////////////////////////////////////////////
+PruningStructure::PruningStructure()
+ : PxPruningStructure(PxConcreteType::ePRUNING_STRUCTURE, PxBaseFlag::eOWNS_MEMORY | PxBaseFlag::eIS_RELEASABLE),
+ mNbActors(0), mActors(0), mValid(true)
+{
+ for (PxU32 i = 0; i < 2; i++)
+ {
+ mNbNodes[i] = 0;
+ mNbObjects[i] = 0;
+ mAABBTreeIndices[i] = NULL;
+ mAABBTreeNodes[i] = NULL;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+PruningStructure::~PruningStructure()
+{
+ if(getBaseFlags() & PxBaseFlag::eOWNS_MEMORY)
+ {
+ for (PxU32 i = 0; i < 2; i++)
+ {
+ if(mAABBTreeIndices[i])
+ {
+ PX_FREE(mAABBTreeIndices[i]);
+ }
+ if (mAABBTreeNodes[i])
+ {
+ PX_FREE(mAABBTreeNodes[i]);
+ }
+ }
+
+ if(mActors)
+ {
+ PX_FREE(mActors);
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+void PruningStructure::release()
+{
+ // if we release the pruning structure we set the pruner structure to NUUL
+ for (PxU32 i = 0; i < mNbActors; i++)
+ {
+ PX_ASSERT(mActors[i]);
+
+ PxType type = mActors[i]->getConcreteType();
+ if (type == PxConcreteType::eRIGID_STATIC)
+ {
+ static_cast<NpRigidStatic*>(mActors[i])->getShapeManager().setPruningStructure(NULL);
+ }
+ else if (type == PxConcreteType::eRIGID_DYNAMIC)
+ {
+ static_cast<NpRigidDynamic*>(mActors[i])->getShapeManager().setPruningStructure(NULL);
+ }
+ }
+
+ if(getBaseFlags() & PxBaseFlag::eOWNS_MEMORY)
+ {
+ delete this;
+ }
+ else
+ {
+ this->~PruningStructure();
+ }
+}
+
+template <typename ActorType>
+static void getShapeBounds(PxRigidActor* actor, bool dynamic, PxBounds3& bounds, PxU32& numShapes)
+{
+ PruningIndex::Enum treeStructure = dynamic ? PruningIndex::eDYNAMIC : PruningIndex::eSTATIC;
+ ActorType& a = *static_cast<ActorType*>(actor);
+ const PxU32 nbShapes = a.getNbShapes();
+ for (PxU32 iShape = 0; iShape < nbShapes; iShape++)
+ {
+ NpShape* shape = a.getShapeManager().getShapes()[iShape];
+ if (shape->getFlags() & PxShapeFlag::eSCENE_QUERY_SHAPE)
+ {
+ const Scb::Shape& scbShape = shape->getScbShape();
+ const Scb::Actor& scbActor = a.getScbActorFast();
+
+ (gComputeBoundsTable[treeStructure])(bounds, scbShape, scbActor);
+ numShapes++;
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+bool PruningStructure::build(PxRigidActor*const* actors, PxU32 nbActors)
+{
+ PX_ASSERT(actors);
+ PX_ASSERT(nbActors > 0);
+
+ PxU32 numShapes[2] = { 0, 0 };
+ // parse the actors first to get the shapes size
+ for (PxU32 actorsDone = 0; actorsDone < nbActors; actorsDone++)
+ {
+ if (actorsDone + 1 < nbActors)
+ Ps::prefetch(actors[actorsDone + 1], sizeof(NpRigidDynamic)); // worst case: PxRigidStatic is smaller
+
+ PxType type = actors[actorsDone]->getConcreteType();
+ const PxRigidActor& actor = *(actors[actorsDone]);
+
+ Scb::ControlState::Enum cs = NpActor::getScbFromPxActor(actor).getControlState();
+ if (!((cs == Scb::ControlState::eNOT_IN_SCENE) || ((cs == Scb::ControlState::eREMOVE_PENDING))))
+ {
+ Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Actor already assigned to a scene!");
+ return false;
+ }
+
+ const PxU32 nbShapes = actor.getNbShapes();
+ bool hasQueryShape = false;
+ for (PxU32 iShape = 0; iShape < nbShapes; iShape++)
+ {
+ PxShape* shape;
+ actor.getShapes(&shape, 1, iShape);
+ if(shape->getFlags() & PxShapeFlag::eSCENE_QUERY_SHAPE)
+ {
+ hasQueryShape = true;
+ if (type == PxConcreteType::eRIGID_STATIC)
+ numShapes[PruningIndex::eSTATIC]++;
+ else
+ numShapes[PruningIndex::eDYNAMIC]++;
+ }
+ }
+
+ // each provided actor must have a query shape
+ if(!hasQueryShape)
+ {
+ Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has no scene query shape!");
+ return false;
+ }
+
+ if (type == PxConcreteType::eRIGID_STATIC)
+ {
+ NpRigidStatic* rs = static_cast<NpRigidStatic*>(actors[actorsDone]);
+ if(rs->getShapeManager().getPruningStructure())
+ {
+ Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has already a pruning structure!");
+ return false;
+ }
+ rs->getShapeManager().setPruningStructure(this);
+ }
+ else if (type == PxConcreteType::eRIGID_DYNAMIC)
+ {
+ NpRigidDynamic* rd = static_cast<NpRigidDynamic*>(actors[actorsDone]);
+ if (rd->getShapeManager().getPruningStructure())
+ {
+ Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has already a pruning structure!");
+ return false;
+ }
+ rd->getShapeManager().setPruningStructure(this);
+ }
+ else
+ {
+ Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor is not a rigid actor!");
+ return false;
+ }
+ }
+
+ PxBounds3* bounds[2] = { NULL, NULL };
+
+ for (PxU32 i = 0; i < 2; i++)
+ {
+ if(numShapes[i])
+ {
+ bounds[i] = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*numShapes[i], "Pruner bounds"));
+ }
+ }
+
+ // now I go again and gather bounds and payload
+ numShapes[PruningIndex::eSTATIC] = 0;
+ numShapes[PruningIndex::eDYNAMIC] = 0;
+ for (PxU32 actorsDone = 0; actorsDone < nbActors; actorsDone++)
+ {
+ PxType type = actors[actorsDone]->getConcreteType();
+ if (type == PxConcreteType::eRIGID_STATIC)
+ {
+ getShapeBounds<NpRigidStatic>(actors[actorsDone], false,
+ bounds[PruningIndex::eSTATIC][numShapes[PruningIndex::eSTATIC]], numShapes[PruningIndex::eSTATIC]);
+ }
+ else if (type == PxConcreteType::eRIGID_DYNAMIC)
+ {
+ getShapeBounds<NpRigidDynamic>(actors[actorsDone], true,
+ bounds[PruningIndex::eDYNAMIC][numShapes[PruningIndex::eDYNAMIC]], numShapes[PruningIndex::eDYNAMIC]);
+ }
+ }
+
+ AABBTree aabbTrees[2];
+ for (PxU32 i = 0; i < 2; i++)
+ {
+ mNbObjects[i] = numShapes[i];
+ if (numShapes[i])
+ {
+ // create the AABB tree
+ AABBTreeBuildParams sTB;
+ sTB.mNbPrimitives = numShapes[i];
+ sTB.mAABBArray = bounds[i];
+ sTB.mLimit = NB_OBJECTS_PER_NODE;
+ bool status = aabbTrees[i].build(sTB);
+
+ PX_UNUSED(status);
+ PX_ASSERT(status);
+
+ // store the tree nodes
+ mNbNodes[i] = aabbTrees[i].getNbNodes();
+ mAABBTreeNodes[i] = reinterpret_cast<AABBTreeRuntimeNode*>(PX_ALLOC(sizeof(AABBTreeRuntimeNode)*mNbNodes[i], "AABBTreeRuntimeNode"));
+ PxMemCopy(mAABBTreeNodes[i], aabbTrees[i].getNodes(), sizeof(AABBTreeRuntimeNode)*mNbNodes[i]);
+ mAABBTreeIndices[i] = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mNbObjects[i], "PxU32"));
+ PxMemCopy(mAABBTreeIndices[i], aabbTrees[i].getIndices(), sizeof(PxU32)*mNbObjects[i]);
+
+ // discard the data
+ PX_FREE(bounds[i]);
+ }
+ }
+
+ // store the actors for verification and serialization
+ mNbActors = nbActors;
+ mActors = reinterpret_cast<PxActor**>(PX_ALLOC(sizeof(PxActor*)*mNbActors, "PxActor*"));
+ PxMemCopy(mActors, actors, sizeof(PxActor*)*mNbActors);
+
+ return true;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+PruningStructure* PruningStructure::createObject(PxU8*& address, PxDeserializationContext& context)
+{
+ PruningStructure* obj = new (address)PruningStructure(PxBaseFlag::eIS_RELEASABLE);
+ address += sizeof(PruningStructure);
+ obj->importExtraData(context);
+ obj->resolveReferences(context);
+ return obj;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::resolveReferences(PxDeserializationContext& context)
+{
+ if (!isValid())
+ return;
+
+ for (PxU32 i = 0; i < mNbActors; i++)
+ {
+ context.translatePxBase(mActors[i]);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::requires(PxProcessPxBaseCallback& c)
+{
+ if (!isValid())
+ return;
+
+ for (PxU32 i = 0; i < mNbActors; i++)
+ {
+ c.process(*mActors[i]);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::exportExtraData(PxSerializationContext& stream)
+{
+ if (!isValid())
+ {
+ Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::exportExtraData: Pruning structure is invalid!");
+ return;
+ }
+
+ for (PxU32 i = 0; i < 2; i++)
+ {
+ if (mAABBTreeNodes[i])
+ {
+ // store nodes
+ stream.alignData(PX_SERIAL_ALIGN);
+ stream.writeData(mAABBTreeNodes[i], mNbNodes[i] * sizeof(AABBTreeRuntimeNode));
+ }
+
+ if(mAABBTreeIndices[i])
+ {
+ // store indices
+ stream.alignData(PX_SERIAL_ALIGN);
+ stream.writeData(mAABBTreeIndices[i], mNbObjects[i] * sizeof(PxU32));
+ }
+ }
+
+ if(mActors)
+ {
+ // store actor pointers
+ stream.alignData(PX_SERIAL_ALIGN);
+ stream.writeData(mActors, mNbActors * sizeof(PxActor*));
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::importExtraData(PxDeserializationContext& context)
+{
+ if (!isValid())
+ {
+ Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::importExtraData: Pruning structure is invalid!");
+ return;
+ }
+
+ for (PxU32 i = 0; i < 2; i++)
+ {
+ if (mAABBTreeNodes[i])
+ {
+ mAABBTreeNodes[i] = context.readExtraData<Sq::AABBTreeRuntimeNode, PX_SERIAL_ALIGN>(mNbNodes[i]);
+ }
+ if(mAABBTreeIndices[i])
+ {
+ mAABBTreeIndices[i] = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(mNbObjects[i]);
+ }
+ }
+
+ if (mActors)
+ {
+ // read actor pointers
+ mActors = context.readExtraData<PxActor*, PX_SERIAL_ALIGN>(mNbActors);
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+PxU32 PruningStructure::getRigidActors(PxRigidActor** userBuffer, PxU32 bufferSize, PxU32 startIndex/* =0 */) const
+{
+ if(!isValid())
+ {
+ Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::getRigidActors: Pruning structure is invalid!");
+ return 0;
+ }
+
+ return Cm::getArrayOfPointers(userBuffer, bufferSize, startIndex, mActors, mNbActors);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::invalidate(PxActor* actor)
+{
+ PX_ASSERT(actor);
+
+ // remove actor from the actor list to avoid mem corruption
+ // this slow, but should be called only with error msg send to user about invalid behavior
+ for (PxU32 i = 0; i < mNbActors; i++)
+ {
+ if(mActors[i] == actor)
+ {
+ // set pruning structure to NULL and remove the actor from the list
+ PxType type = mActors[i]->getConcreteType();
+ if (type == PxConcreteType::eRIGID_STATIC)
+ {
+ static_cast<NpRigidStatic*>(mActors[i])->getShapeManager().setPruningStructure(NULL);
+ }
+ else if (type == PxConcreteType::eRIGID_DYNAMIC)
+ {
+ static_cast<NpRigidDynamic*>(mActors[i])->getShapeManager().setPruningStructure(NULL);
+ }
+
+ mActors[i] = mActors[mNbActors--];
+ break;
+ }
+ }
+
+ mValid = false;
+}
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp b/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp
new file mode 100644
index 00000000..cd3e25eb
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp
@@ -0,0 +1,500 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "SqSceneQueryManager.h"
+#include "SqAABBPruner.h"
+#include "SqBucketPruner.h"
+#include "SqBounds.h"
+#include "NpBatchQuery.h"
+#include "PxFiltering.h"
+#include "NpRigidDynamic.h"
+#include "NpRigidStatic.h"
+#include "NpArticulationLink.h"
+#include "CmTransformUtils.h"
+#include "PsAllocator.h"
+#include "PxSceneDesc.h"
+#include "ScBodyCore.h"
+#include "SqPruner.h"
+#include "GuBounds.h"
+#include "NpShape.h"
+
+using namespace physx;
+using namespace Sq;
+using namespace Sc;
+
+namespace physx
+{
+ namespace Sq
+ {
+ OffsetTable gOffsetTable;
+ }
+}
+
+PrunerExt::PrunerExt() :
+ mPruner (NULL),
+ mDirtyList (PX_DEBUG_EXP("SQmDirtyList")),
+ mPrunerType (PxPruningStructureType::eLAST),
+ mTimestamp (0xffffffff)
+{
+}
+
+PrunerExt::~PrunerExt()
+{
+ PX_DELETE_AND_RESET(mPruner);
+}
+
+void PrunerExt::init(PxPruningStructureType::Enum type, PxU64 contextID)
+{
+ mPrunerType = type;
+ mTimestamp = 0;
+ Pruner* pruner = NULL;
+ switch(type)
+ {
+ case PxPruningStructureType::eNONE: { pruner = PX_NEW(BucketPruner); break; }
+ case PxPruningStructureType::eDYNAMIC_AABB_TREE: { pruner = PX_NEW(AABBPruner)(true, contextID); break; }
+ case PxPruningStructureType::eSTATIC_AABB_TREE: { pruner = PX_NEW(AABBPruner)(false, contextID); break; }
+ case PxPruningStructureType::eLAST: break;
+ }
+ mPruner = pruner;
+}
+
+void PrunerExt::preallocate(PxU32 nbShapes)
+{
+ if(nbShapes > mDirtyMap.size())
+ mDirtyMap.resize(nbShapes);
+
+ if(mPruner)
+ mPruner->preallocate(nbShapes);
+}
+
+void PrunerExt::flushMemory()
+{
+ if(!mDirtyList.size())
+ mDirtyList.reset();
+
+ // PT: TODO: flush bitmap here
+
+ // PT: TODO: flush pruner here?
+}
+
+void PrunerExt::flushShapes(PxU32 index)
+{
+ const PxU32 numDirtyList = mDirtyList.size();
+ if(!numDirtyList)
+ return;
+ const PrunerHandle* const prunerHandles = mDirtyList.begin();
+
+ const ComputeBoundsFunc func = gComputeBoundsTable[index];
+
+ for(PxU32 i=0; i<numDirtyList; i++)
+ {
+ const PrunerHandle handle = prunerHandles[i];
+ mDirtyMap.reset(handle);
+
+ // PT: we compute the new bounds and store them directly in the pruner structure to avoid copies. We delay the updateObjects() call
+ // to take advantage of batching.
+ PxBounds3* bounds;
+ const PrunerPayload& pp = mPruner->getPayload(handle, bounds);
+ (func)(*bounds, *(reinterpret_cast<Scb::Shape*>(pp.data[0])), *(reinterpret_cast<Scb::Actor*>(pp.data[1])));
+ }
+ // PT: batch update happens after the loop instead of once per loop iteration
+ mPruner->updateObjects(prunerHandles, NULL, numDirtyList);
+ mTimestamp += numDirtyList;
+ mDirtyList.clear();
+}
+
+// PT: TODO: re-inline this
+void PrunerExt::addToDirtyList(PrunerHandle handle)
+{
+ Cm::BitMap& dirtyMap = mDirtyMap;
+ if(!dirtyMap.test(handle))
+ {
+ dirtyMap.set(handle);
+ mDirtyList.pushBack(handle);
+ mTimestamp++;
+ }
+}
+
+// PT: TODO: re-inline this
+Ps::IntBool PrunerExt::isDirty(PrunerHandle handle) const
+{
+ return mDirtyMap.test(handle);
+}
+
+// PT: TODO: re-inline this
+void PrunerExt::removeFromDirtyList(PrunerHandle handle)
+{
+ Cm::BitMap& dirtyMap = mDirtyMap;
+ if(dirtyMap.test(handle))
+ {
+ dirtyMap.reset(handle);
+ mDirtyList.findAndReplaceWithLast(handle);
+ }
+}
+
+// PT: TODO: re-inline this
+void PrunerExt::growDirtyList(PrunerHandle handle)
+{
+ // pruners must either provide indices in order or reuse existing indices, so this 'if' is enough to ensure we have space for the new handle
+ // PT: TODO: fix this. There is just no need for any of it. The pruning pool itself could support the feature for free, similar to what we do
+ // in MBP. There would be no need for the bitmap or the dirty list array. However doing this through the virtual interface would be clumsy,
+ // adding the cost of virtual calls for very cheap & simple operations. It would be a lot easier to drop it and go back to what we had before.
+
+ Cm::BitMap& dirtyMap = mDirtyMap;
+ if(dirtyMap.size() <= handle)
+ dirtyMap.resize(PxMax<PxU32>(dirtyMap.size() * 2, 1024));
+ PX_ASSERT(handle<dirtyMap.size());
+ dirtyMap.reset(handle);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+SceneQueryManager::SceneQueryManager( Scb::Scene& scene, PxPruningStructureType::Enum staticStructure,
+ PxPruningStructureType::Enum dynamicStructure, PxU32 dynamicTreeRebuildRateHint,
+ const PxSceneLimits& limits) :
+ mScene (scene)
+{
+ mPrunerExt[PruningIndex::eSTATIC].init(staticStructure, scene.getContextId());
+ mPrunerExt[PruningIndex::eDYNAMIC].init(dynamicStructure, scene.getContextId());
+
+ setDynamicTreeRebuildRateHint(dynamicTreeRebuildRateHint);
+
+ preallocate(limits.maxNbStaticShapes, limits.maxNbDynamicShapes);
+
+ mDynamicBoundsSync.mPruner = mPrunerExt[PruningIndex::eDYNAMIC].pruner();
+ mDynamicBoundsSync.mTimestamp = &mPrunerExt[PruningIndex::eDYNAMIC].mTimestamp;
+}
+
+SceneQueryManager::~SceneQueryManager()
+{
+}
+
+void SceneQueryManager::flushMemory()
+{
+ for(PxU32 i=0;i<PruningIndex::eCOUNT;i++)
+ mPrunerExt[i].flushMemory();
+}
+
+void SceneQueryManager::markForUpdate(PrunerData data)
+{
+ const PxU32 index = getPrunerIndex(data);
+ const PrunerHandle handle = getPrunerHandle(data);
+
+ mPrunerExt[index].addToDirtyList(handle);
+}
+
+void SceneQueryManager::preallocate(PxU32 staticShapes, PxU32 dynamicShapes)
+{
+ mPrunerExt[PruningIndex::eSTATIC].preallocate(staticShapes);
+ mPrunerExt[PruningIndex::eDYNAMIC].preallocate(dynamicShapes);
+}
+
+PrunerData SceneQueryManager::addPrunerShape(const NpShape& shape, const PxRigidActor& actor, bool dynamic, const PxBounds3* bounds, bool hasPrunerStructure)
+{
+ PrunerPayload pp;
+ const Scb::Shape& scbShape = shape.getScbShape();
+ const Scb::Actor& scbActor = gOffsetTable.convertPxActor2Scb(actor);
+ pp.data[0] = size_t(&scbShape);
+ pp.data[1] = size_t(&scbActor);
+
+ PxBounds3 b;
+ if(bounds)
+ inflateBounds(b, *bounds);
+ else
+ (gComputeBoundsTable[dynamic])(b, scbShape, scbActor);
+
+ const PxU32 index = PxU32(dynamic);
+ PrunerHandle handle;
+ PX_ASSERT(mPrunerExt[index].pruner());
+ mPrunerExt[index].pruner()->addObjects(&handle, &b, &pp, 1, hasPrunerStructure);
+ mPrunerExt[index].invalidateTimestamp();
+
+ mPrunerExt[index].growDirtyList(handle);
+
+ return createPrunerData(index, handle);
+}
+
+const PrunerPayload& SceneQueryManager::getPayload(PrunerData data) const
+{
+ const PxU32 index = getPrunerIndex(data);
+ const PrunerHandle handle = getPrunerHandle(data);
+ return mPrunerExt[index].pruner()->getPayload(handle);
+}
+
+void SceneQueryManager::removePrunerShape(PrunerData data)
+{
+ const PxU32 index = getPrunerIndex(data);
+ const PrunerHandle handle = getPrunerHandle(data);
+
+ PX_ASSERT(mPrunerExt[index].pruner());
+
+ mPrunerExt[index].removeFromDirtyList(handle);
+
+ mPrunerExt[index].invalidateTimestamp();
+ mPrunerExt[index].pruner()->removeObjects(&handle);
+}
+
+void SceneQueryManager::setDynamicTreeRebuildRateHint(PxU32 rebuildRateHint)
+{
+ mRebuildRateHint = rebuildRateHint;
+
+ for(PxU32 i=0;i<PruningIndex::eCOUNT;i++)
+ {
+ if(mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE)
+ static_cast<AABBPruner*>(mPrunerExt[i].pruner())->setRebuildRateHint(rebuildRateHint);
+ }
+}
+
+
+static PxBounds3 computeWorldAABB(const Scb::Shape& scbShape, const Sc::BodyCore& bodyCore)
+{
+ const Gu::GeometryUnion& geom = scbShape.getGeometryUnion();
+ const PxTransform& shape2Actor = scbShape.getShape2Actor();
+
+ PX_ALIGN(16, PxTransform) globalPose;
+
+ PX_ALIGN(16, PxTransform) kinematicTarget;
+ PxU16 sqktFlags = PxRigidBodyFlag::eKINEMATIC | PxRigidBodyFlag::eUSE_KINEMATIC_TARGET_FOR_SCENE_QUERIES;
+ bool useTarget = (PxU16(bodyCore.getFlags()) & sqktFlags) == sqktFlags;
+
+ const PxTransform& body2World = (useTarget && bodyCore.getKinematicTarget(kinematicTarget)) ? kinematicTarget : bodyCore.getBody2World();
+ Cm::getDynamicGlobalPoseAligned(body2World, shape2Actor, bodyCore.getBody2Actor(), globalPose);
+
+ PxBounds3 tmp;
+ inflateBounds(tmp, Gu::computeBounds(geom.getGeometry(), globalPose, false));
+ return tmp;
+}
+
+
+void SceneQueryManager::validateSimUpdates()
+{
+ if (mPrunerExt[1].type() != PxPruningStructureType::eDYNAMIC_AABB_TREE)
+ return;
+
+
+ Sc::BodyCore*const* activeBodies = mScene.getActiveBodiesArray();
+ const PxU32 nbActiveBodies = mScene.getNumActiveBodies();
+
+ for (PxU32 i = 0; i < nbActiveBodies; ++i)
+ {
+ const Sc::BodyCore* bCore = activeBodies[i];
+
+ if (bCore->isFrozen())
+ continue;
+
+ PxRigidBody* pxBody = static_cast<PxRigidBody*>(bCore->getPxActor());
+
+ PX_ASSERT(pxBody->getConcreteType() == PxConcreteType::eRIGID_DYNAMIC || pxBody->getConcreteType() == PxConcreteType::eARTICULATION_LINK);
+
+ NpShapeManager& shapeManager = *NpActor::getShapeManager(*pxBody);
+ const PxU32 nbShapes = shapeManager.getNbShapes();
+ NpShape* const* shape = shapeManager.getShapes();
+
+
+ for (PxU32 j = 0; j<nbShapes; j++)
+ {
+ PrunerData prunerData = shapeManager.getPrunerData(j);
+ if (prunerData != INVALID_PRUNERHANDLE)
+ {
+ const PrunerHandle handle = getPrunerHandle(prunerData);
+ const PxBounds3 worldAABB = computeWorldAABB(shape[j]->getScbShape(), *bCore);
+ PxBounds3 prunerAABB = static_cast<AABBPruner*>(mPrunerExt[1].pruner())->getAABB(handle);
+ PX_ASSERT((worldAABB.minimum - prunerAABB.minimum).magnitudeSquared() < 0.005f*mScene.getPxScene()->getPhysics().getTolerancesScale().length);
+ PX_ASSERT((worldAABB.maximum - prunerAABB.maximum).magnitudeSquared() < 0.005f*mScene.getPxScene()->getPhysics().getTolerancesScale().length);
+ PX_UNUSED(worldAABB);
+ PX_UNUSED(prunerAABB);
+ }
+ }
+ }
+}
+
+void SceneQueryManager::processSimUpdates()
+{
+ PX_PROFILE_ZONE("Sim.updatePruningTrees", mScene.getContextId());
+
+ {
+ PX_PROFILE_ZONE("SceneQuery.processActiveShapes", mScene.getContextId());
+
+ // update all active objects
+ BodyCore*const* activeBodies = mScene.getScScene().getActiveBodiesArray();
+ PxU32 nbActiveBodies = mScene.getScScene().getNumActiveBodies();
+
+#define NB_BATCHED_OBJECTS 128
+ PrunerHandle batchedHandles[NB_BATCHED_OBJECTS];
+ PxU32 nbBatchedObjects = 0;
+ Pruner* pruner = mPrunerExt[PruningIndex::eDYNAMIC].pruner();
+
+ while(nbActiveBodies--)
+ {
+ // PT: TODO: don't put frozen objects in "active bodies" array? After all they
+ // are also not included in the 'active transforms' or 'active actors' arrays.
+ BodyCore* currentBody = *activeBodies++;
+ if(currentBody->isFrozen())
+ continue;
+
+ PxActorType::Enum type;
+ PxRigidBody* pxBody = static_cast<PxRigidBody*>(getPxActorFromBodyCore(currentBody, type));
+ PX_ASSERT(pxBody->getConcreteType()==PxConcreteType::eRIGID_DYNAMIC || pxBody->getConcreteType()==PxConcreteType::eARTICULATION_LINK);
+
+ NpShapeManager* shapeManager;
+ if(type==PxActorType::eRIGID_DYNAMIC)
+ {
+ NpRigidDynamic* rigidDynamic = static_cast<NpRigidDynamic*>(pxBody);
+ shapeManager = &rigidDynamic->getShapeManager();
+ }
+ else
+ {
+ NpArticulationLink* articulationLink = static_cast<NpArticulationLink*>(pxBody);
+ shapeManager = &articulationLink->getShapeManager();
+ }
+
+ const PxU32 nbShapes = shapeManager->getNbShapes();
+ for(PxU32 i=0; i<nbShapes; i++)
+ {
+ const PrunerData data = shapeManager->getPrunerData(i);
+ if(data!=SQ_INVALID_PRUNER_DATA)
+ {
+ // PT: index can't be zero here!
+ PX_ASSERT(getPrunerIndex(data)==PruningIndex::eDYNAMIC);
+
+ const PrunerHandle handle = getPrunerHandle(data);
+
+ if(!mPrunerExt[PruningIndex::eDYNAMIC].isDirty(handle)) // PT: if dirty, will be updated in "flushShapes"
+ {
+ batchedHandles[nbBatchedObjects] = handle;
+
+ PxBounds3* bounds;
+ const PrunerPayload& pp = pruner->getPayload(handle, bounds);
+ computeDynamicWorldAABB(*bounds, *(reinterpret_cast<Scb::Shape*>(pp.data[0])), *(reinterpret_cast<Scb::Actor*>(pp.data[1])));
+ nbBatchedObjects++;
+
+ if(nbBatchedObjects==NB_BATCHED_OBJECTS)
+ {
+ mPrunerExt[PruningIndex::eDYNAMIC].invalidateTimestamp();
+ pruner->updateObjects(batchedHandles, NULL, nbBatchedObjects);
+ nbBatchedObjects = 0;
+ }
+ }
+ }
+ }
+ }
+ if(nbBatchedObjects)
+ {
+ mPrunerExt[PruningIndex::eDYNAMIC].invalidateTimestamp();
+ pruner->updateObjects(batchedHandles, NULL, nbBatchedObjects);
+ }
+ }
+
+ // flush user modified objects
+ flushShapes();
+
+ for(PxU32 i=0;i<PruningIndex::eCOUNT;i++)
+ {
+ if(mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE)
+ static_cast<AABBPruner*>(mPrunerExt[i].pruner())->buildStep();
+
+ mPrunerExt[i].pruner()->commit();
+ }
+}
+
+void SceneQueryManager::afterSync(bool commit)
+{
+ PX_PROFILE_ZONE("Sim.sceneQueryBuildStep", mScene.getContextId());
+
+ // flush user modified objects
+ flushShapes();
+
+ for (PxU32 i = 0; i<2; i++)
+ {
+ if (mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE)
+ static_cast<AABBPruner*>(mPrunerExt[i].pruner())->buildStep();
+
+ if (commit)
+ mPrunerExt[i].pruner()->commit();
+ }
+}
+
+void SceneQueryManager::flushShapes()
+{
+ PX_PROFILE_ZONE("SceneQuery.flushShapes", mScene.getContextId());
+
+ // must already have acquired writer lock here
+
+ for(PxU32 i=0; i<PruningIndex::eCOUNT; i++)
+ mPrunerExt[i].flushShapes(i);
+}
+
+void SceneQueryManager::flushUpdates()
+{
+ PX_PROFILE_ZONE("SceneQuery.flushUpdates", mScene.getContextId());
+
+ // no need to take lock if manual sq update is enabled
+ // as flushUpdates will only be called from NpScene::flushQueryUpdates()
+ mSceneQueryLock.lock();
+
+ flushShapes();
+
+ for(PxU32 i=0;i<PruningIndex::eCOUNT;i++)
+ if(mPrunerExt[i].pruner())
+ mPrunerExt[i].pruner()->commit();
+
+ mSceneQueryLock.unlock();
+}
+
+void SceneQueryManager::forceDynamicTreeRebuild(bool rebuildStaticStructure, bool rebuildDynamicStructure)
+{
+ PX_PROFILE_ZONE("SceneQuery.forceDynamicTreeRebuild", mScene.getContextId());
+
+ const bool rebuild[PruningIndex::eCOUNT] = { rebuildStaticStructure, rebuildDynamicStructure };
+
+ Ps::Mutex::ScopedLock lock(mSceneQueryLock);
+ for(PxU32 i=0; i<PruningIndex::eCOUNT; i++)
+ {
+ if(rebuild[i] && mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE)
+ {
+ static_cast<AABBPruner*>(mPrunerExt[i].pruner())->purge();
+ static_cast<AABBPruner*>(mPrunerExt[i].pruner())->commit();
+ }
+ }
+}
+
+void SceneQueryManager::shiftOrigin(const PxVec3& shift)
+{
+ for(PxU32 i=0; i<PruningIndex::eCOUNT; i++)
+ mPrunerExt[i].pruner()->shiftOrigin(shift);
+}
+
+void DynamicBoundsSync::sync(const PxU32* sqRefs, const PxU32* indices, const PxBounds3* bounds, PxU32 count)
+{
+ mPruner->updateObjects(sqRefs, indices, bounds, count);
+
+ if (count)
+ (*mTimestamp)++;
+}
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h b/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h
new file mode 100644
index 00000000..48b77592
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h
@@ -0,0 +1,47 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef SQ_TYPEDEF_H
+#define SQ_TYPEDEF_H
+
+#include "CmPhysXCommon.h"
+
+namespace physx
+{
+namespace Sq
+{
+ typedef PxU32 PoolIndex;
+ typedef PxU32 TreeNodeIndex;
+
+ class AABBTree;
+ class AABBTreeBuildParams;
+}
+}
+
+#endif // SQ_TYPEDEF_H