Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/SceneQuery/src
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
20 files changed, 8794 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp
new file mode 100644
index 00000000..895c5776
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp
@@ -0,0 +1,816 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "foundation/PxProfiler.h"
+#include "PsIntrinsics.h"
+#include "PsUserAllocated.h"
+#include "PsBitUtils.h"
+#include "PsFoundation.h"
+#include "SqAABBPruner.h"
+#include "SqAABBTree.h"
+#include "SqPrunerMergeData.h"
+#include "GuSphere.h"
+#include "GuBox.h"
+#include "GuCapsule.h"
+#include "SqAABBTreeQuery.h"
+#include "GuBounds.h"
+
+using namespace physx;
+using namespace Gu;
+using namespace Sq;
+using namespace Cm;
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IncrementalPruner* physx::Sq::createAABBPruner(bool incrementalRebuild)
+{
+	return PX_NEW(Sq::AABBPruner)(incrementalRebuild, 0);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// PT: currently limited to 15 max
+#define NB_OBJECTS_PER_NODE	4
+
+AABBPruner::AABBPruner(bool incrementalRebuild, PxU64 contextID) :
+	mAABBTree			(NULL),
+	mNewTree			(NULL),
+	mCachedBoxes		(NULL),
+	mNbCachedBoxes		(0),
+	mNbCalls			(0),
+	mTimeStamp			(0),
+	mBucketPruner		(&mPool),
+	mProgress			(BUILD_NOT_STARTED),
+	mRebuildRateHint	(100),
+	mAdaptiveRebuildTerm(0),
+	mIncrementalRebuild	(incrementalRebuild),
+	mUncommittedChanges	(false),
+	mNeedsNewTree		(false),
+	mNewTreeFixups		(PX_DEBUG_EXP("AABBPruner::mNewTreeFixups")),
+	mContextID			(contextID)
+{
+}
+
+AABBPruner::~AABBPruner()
+{
+	release();
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ *	Add, Remove, Update methods
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+bool AABBPruner::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool hasPruningStructure)
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerAddObjects", mContextID);
+
+	if(!count)
+		return true;
+
+	// no need to do refitMarked for added objects since they are not in the tree
+
+	// if we have provided pruning structure, we will merge it, the changes will be applied after the objects has been addded
+	if(!hasPruningStructure || !mAABBTree)
+		mUncommittedChanges = true;
+
+	// PT: TODO: 'addObjects' for bucket pruner too. Not urgent since we always call the function with count=1 at the moment
+	const PxU32 valid = mPool.addObjects(results, bounds, payload, count);
+
+	// Bucket pruner is only used while the dynamic pruner is rebuilding
+	// For the static pruner a full rebuild will happen in commit() every time we modify something, this is not true if
+	// pruning structure was provided. The objects tree will be merged directly into the static tree. No rebuild will be triggered.
+	if(mIncrementalRebuild && mAABBTree)
+	{
+		mNeedsNewTree = true; // each add forces a tree rebuild
+
+		// if a pruner structure is provided, we dont move the new objects into bucket pruner
+		// the pruning structure will be merged into the bucket pruner
+		if(!hasPruningStructure)
+		{
+			for(PxU32 i=0;i<valid;i++)
+				mBucketPruner.addObject(payload[i], bounds[i], mTimeStamp);
+		}
+	}
+	return valid==count;
+}
+
+void AABBPruner::updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count)
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerUpdateObjects", mContextID);
+
+	if(!count)
+		return;
+
+	mUncommittedChanges = true;
+
+	if(newBounds)
+	{
+		for(PxU32 i=0; i<count; i++)
+			mPool.setWorldAABB(handles[i], newBounds[i]); // only updates the bounds
+	}
+
+	if(mIncrementalRebuild && mAABBTree) 
+	{
+		mNeedsNewTree = true; // each update forces a tree rebuild
+		newBounds = mPool.getCurrentWorldBoxes();
+		PrunerPayload* payloads = mPool.getObjects();
+		for(PxU32 i=0; i<count; i++)
+		{
+			const PoolIndex poolIndex = mPool.getIndex(handles[i]);
+			const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex];
+			if(treeNodeIndex!=INVALID_NODE_ID) // this means it's in the current tree still and hasn't been removed
+				mAABBTree->markNodeForRefit(treeNodeIndex);
+			else // otherwise it means it should be in the bucket pruner
+			{
+				bool found = mBucketPruner.updateObject(newBounds[poolIndex], payloads[poolIndex]);
+				PX_UNUSED(found); PX_ASSERT(found);
+			}
+
+			if(mProgress==BUILD_NEW_MAPPING || mProgress==BUILD_FULL_REFIT)
+				mToRefit.pushBack(poolIndex);
+		}
+	}
+}
+
+void AABBPruner::updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count)
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerUpdateObjects", mContextID);
+
+	mUncommittedChanges = true;
+
+	mPool.updateObjects(handles, indices, newBounds, count);
+
+	if (mIncrementalRebuild && mAABBTree)
+	{
+		mNeedsNewTree = true; // each update forces a tree rebuild
+		for (PxU32 i = 0; i<count; i++)
+		{
+			const PoolIndex poolIndex = mPool.getIndex(handles[i]);
+			const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex];
+			if (treeNodeIndex != INVALID_NODE_ID) // this means it's in the current tree still and hasn't been removed
+				mAABBTree->markNodeForRefit(treeNodeIndex);
+			else // otherwise it means it should be in the bucket pruner
+			{
+				bool found = mBucketPruner.updateObject(newBounds[indices[i]], mPool.getPayload(handles[i]));
+				PX_UNUSED(found); PX_ASSERT(found);
+			}
+
+			if (mProgress == BUILD_NEW_MAPPING || mProgress == BUILD_FULL_REFIT)
+				mToRefit.pushBack(poolIndex);
+		}
+	}
+}
+
+void AABBPruner::removeObjects(const PrunerHandle* handles, PxU32 count)
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerRemoveObjects", mContextID);
+
+	if(!count)
+		return;
+
+	mUncommittedChanges = true;
+
+	for(PxU32 i=0; i<count; i++)
+	{
+		const PrunerHandle h = handles[i];
+		// copy the payload before removing it since we need to know the payload to remove it from the bucket pruner
+		const PrunerPayload removedPayload = mPool.getPayload(h);
+		const PoolIndex poolIndex = mPool.getIndex(h); // save the pool index for removed object
+		const PoolIndex poolRelocatedLastIndex = mPool.removeObject(h); // save the lastIndex returned by removeObject
+		if(mIncrementalRebuild && mAABBTree)
+		{
+			mNeedsNewTree = true;
+
+			const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex]; // already removed from pool but still in tree map
+			const PrunerPayload swappedPayload = mPool.getObjects()[poolIndex];
+			if(treeNodeIndex!=INVALID_NODE_ID) // can be invalid if removed
+			{
+				mAABBTree->markNodeForRefit(treeNodeIndex); // mark the spot as blank
+				mBucketPruner.swapIndex(poolIndex, swappedPayload, poolRelocatedLastIndex);	// if swapped index is in bucket pruner
+			}
+			else
+			{
+				PX_ASSERT(treeNodeIndex==INVALID_PRUNERHANDLE);
+				PxU32 timeStamp;				
+				bool status = mBucketPruner.removeObject(removedPayload, poolIndex, swappedPayload, poolRelocatedLastIndex, timeStamp);
+				PX_ASSERT(status);
+				PX_UNUSED(status);
+			}
+
+			mTreeMap.invalidate(poolIndex, poolRelocatedLastIndex, *mAABBTree);
+			if(mNewTree)
+				mNewTreeFixups.pushBack(NewTreeFixup(poolIndex, poolRelocatedLastIndex));
+		}
+	}
+
+	if (mPool.getNbActiveObjects()==0)
+	{
+		// this is just to make sure we release all the internal data once all the objects are out of the pruner
+		// since this is the only place we know that and we don't want to keep memory reserved
+		release();
+
+		// Pruner API requires a commit before the next query, even if we ended up removing the entire tree here. This
+		// forces that to happen.
+		mUncommittedChanges = true;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ *	Query Implementation
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+PxAgain AABBPruner::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const
+{
+	PX_ASSERT(!mUncommittedChanges);
+
+	PxAgain again = true;
+
+	if(mAABBTree)
+	{
+		switch(queryVolume.getType())
+		{
+		case PxGeometryType::eBOX:
+			{
+				if(queryVolume.isOBB())
+				{	
+					const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated());
+					again = AABBTreeOverlap<Gu::OBBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+				}
+				else
+				{
+					const Gu::AABBAABBTest test(queryVolume.getPrunerInflatedWorldAABB());
+					again = AABBTreeOverlap<Gu::AABBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+				}
+			}
+			break;
+		case PxGeometryType::eCAPSULE:
+			{
+				const Gu::Capsule& capsule = queryVolume.getGuCapsule();
+				const Gu::CapsuleAABBTest test(	capsule.p1, queryVolume.getPrunerWorldRot33().column0,
+												queryVolume.getCapsuleHalfHeight()*2.0f, PxVec3(capsule.radius*SQ_PRUNER_INFLATION));
+				again = AABBTreeOverlap<Gu::CapsuleAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+			}
+			break;
+		case PxGeometryType::eSPHERE:
+			{
+				const Gu::Sphere& sphere = queryVolume.getGuSphere();
+				Gu::SphereAABBTest test(sphere.center, sphere.radius);
+				again = AABBTreeOverlap<Gu::SphereAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);
+			}
+			break;
+		case PxGeometryType::eCONVEXMESH:
+			{
+				const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated());
+				again = AABBTreeOverlap<Gu::OBBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb);			
+			}
+			break;
+		case PxGeometryType::ePLANE:
+		case PxGeometryType::eTRIANGLEMESH:
+		case PxGeometryType::eHEIGHTFIELD:
+		case PxGeometryType::eGEOMETRY_COUNT:
+		case PxGeometryType::eINVALID:
+			PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type");
+		}
+	}
+
+	if(again && mIncrementalRebuild && mBucketPruner.getNbObjects())
+		again = mBucketPruner.overlap(queryVolume, pcb);
+
+	return again;
+}
+
+PxAgain AABBPruner::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+	PX_ASSERT(!mUncommittedChanges);
+
+	PxAgain again = true;
+
+	if(mAABBTree)
+	{
+		const PxBounds3& aabb = queryVolume.getPrunerInflatedWorldAABB();
+		const PxVec3 extents = aabb.getExtents();
+		again = AABBTreeRaycast<true>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, aabb.getCenter(), unitDir, inOutDistance, extents, pcb);
+	}
+
+	if(again && mIncrementalRebuild && mBucketPruner.getNbObjects())
+		again = mBucketPruner.sweep(queryVolume, unitDir, inOutDistance, pcb);
+
+	return again;
+}
+
+PxAgain AABBPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+	PX_ASSERT(!mUncommittedChanges);
+
+	PxAgain again = true;
+
+	if(mAABBTree)
+		again = AABBTreeRaycast<false>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, origin, unitDir, inOutDistance, PxVec3(0.0f), pcb);
+		
+	if(again && mIncrementalRebuild && mBucketPruner.getNbObjects())
+		again = mBucketPruner.raycast(origin, unitDir, inOutDistance, pcb);
+
+	return again;
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ *	Other methods of Pruner Interface
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// This isn't part of the pruner virtual interface, but it is part of the public interface
+// of AABBPruner - it gets called by SqManager to force a rebuild, and requires a commit() before 
+// queries can take place
+
+void AABBPruner::purge()
+{
+	release();
+	mUncommittedChanges = true; // this ensures a commit() must happen before any query
+} 
+
+void AABBPruner::setRebuildRateHint(PxU32 nbStepsForRebuild) 
+{ 
+	PX_ASSERT(nbStepsForRebuild > 3);
+	mRebuildRateHint = (nbStepsForRebuild-3); // looks like a magic number to account for the rebuild pipeline latency
+	mAdaptiveRebuildTerm = 0; 
+}
+
+// Commit either performs a refit if background rebuild is not yet finished
+// or swaps the current tree for the second tree rebuilt in the background
+void AABBPruner::commit()
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerCommit", mContextID);
+
+	if(!mUncommittedChanges)
+		// Q: seems like this is both for refit and finalization so is this is correct?
+		// i.e. in a situation when we started rebuilding a tree and didn't add anything since
+		// who is going to set mUncommittedChanges to true?
+		// A: it's set in buildStep at final stage, so that finalization is forced.
+		// Seems a bit difficult to follow and verify correctness.
+		return;
+
+	mUncommittedChanges = false;
+
+	if(!mAABBTree || !mIncrementalRebuild)
+	{
+#if PX_CHECKED
+		if(!mIncrementalRebuild && mAABBTree)
+			Ps::getFoundation().error(PxErrorCode::ePERF_WARNING, __FILE__, __LINE__, "SceneQuery static AABB Tree rebuilt, because a shape attached to a static actor was added, removed or moved, and PxSceneDesc::staticStructure is set to eSTATIC_AABB_TREE.");
+#endif
+		fullRebuildAABBTree();
+		return;
+	}
+
+	// Note: it is not safe to call AABBPruner::build() here
+	// because the first thread will perform one step of the incremental update,
+	// continue raycasting, while the second thread performs the next step in
+	// the incremental update
+
+	// Calling Refit() below is safe. It will call 
+	// StaticPruner::build() when necessary. Both will early
+	// exit if the tree is already up to date, if it is not already, then we 
+	// must be the first thread performing raycasts on a dirty tree and other 
+	// scene query threads will be locked out by the write lock in 
+	// SceneQueryManager::flushUpdates()
+
+
+	if (mProgress != BUILD_FINISHED)	
+	{
+		// Calling refit because the second tree is not ready to be swapped in (mProgress != BUILD_FINISHED)
+		// Generally speaking as long as things keep moving the second build will never catch up with true state
+		refitUpdatedAndRemoved();
+	}
+	else
+	{
+		PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFinalize", mContextID);
+
+		{
+			PX_PROFILE_ZONE("SceneQuery.prunerNewTreeSwitch", mContextID);
+
+			PX_DELETE(mAABBTree); // delete the old tree
+			PX_FREE_AND_RESET(mCachedBoxes);
+			mProgress = BUILD_NOT_STARTED; // reset the build state to initial
+
+			// Adjust adaptive term to get closer to specified rebuild rate.
+			// perform an even division correction to make sure the rebuild rate adds up
+			if (mNbCalls > mRebuildRateHint)
+				mAdaptiveRebuildTerm++;
+			else if (mNbCalls < mRebuildRateHint)
+				mAdaptiveRebuildTerm--;
+
+			// Switch trees
+#if PX_DEBUG
+			mNewTree->validate();
+#endif
+			mAABBTree = mNewTree; // set current tree to progressively rebuilt tree
+			mNewTree = NULL; // clear out the progressively rebuild tree pointer
+		}
+
+		{
+			PX_PROFILE_ZONE("SceneQuery.prunerNewTreeMapping", mContextID);
+
+			// rebuild the tree map to match the current (newly built) tree
+			mTreeMap.initMap(PxMax(mPool.getNbActiveObjects(), mNbCachedBoxes), *mAABBTree);
+
+			// The new mapping has been computed using only indices stored in the new tree. Those indices map the pruning pool
+			// we had when starting to build the tree. We need to re-apply recorded moves to fix the tree that finished rebuilding.
+			// AP: the problem here is while we are rebuilding the tree there are ongoing modifications to the current tree
+			// but the background build has a cached copy of all the AABBs at the time it was started
+			// (and will produce indices referencing those)
+			// Things that can happen in the meantime: update, remove, add, commit
+			for(NewTreeFixup* r = mNewTreeFixups.begin(); r < mNewTreeFixups.end(); r++)
+			{
+				// PT: we're not doing a full refit after this point anymore, so the remaining deleted objects must be manually marked for
+				// refit (otherwise their AABB in the tree would remain valid, leading to crashes when the corresponding index is 0xffffffff).
+				// We must do this before invalidating the corresponding tree nodes in the map, obviously (otherwise we'd be reading node
+				// indices that we already invalidated).
+				const PoolIndex poolIndex = r->removedIndex;
+				const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex];
+				if(treeNodeIndex!=INVALID_NODE_ID)
+					mAABBTree->markNodeForRefit(treeNodeIndex);
+
+				mTreeMap.invalidate(r->removedIndex, r->relocatedLastIndex, *mAABBTree);
+			}
+			mNewTreeFixups.clear(); // clear out the fixups since we just applied them all
+		}
+
+		{
+			PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFinalRefit", mContextID);
+
+			const PxU32 size = mToRefit.size();
+			for(PxU32 i=0;i<size;i++)
+			{
+				const PoolIndex poolIndex = mToRefit[i];
+				const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex];
+				if(treeNodeIndex!=INVALID_NODE_ID)
+					mAABBTree->markNodeForRefit(treeNodeIndex);
+			}
+			mToRefit.clear();
+			refitUpdatedAndRemoved();
+		}
+
+		{
+			PX_PROFILE_ZONE("SceneQuery.prunerNewTreeRemoveObjects", mContextID);
+
+			PxU32 nbRemovedPairs = mBucketPruner.removeMarkedObjects(mTimeStamp-1);
+			PX_UNUSED(nbRemovedPairs);
+
+			mNeedsNewTree = mBucketPruner.getNbObjects()>0;
+		}
+	}
+
+	updateBucketPruner();
+}
+
+
+void AABBPruner::shiftOrigin(const PxVec3& shift)
+{
+	mPool.shiftOrigin(shift);
+
+	if(mAABBTree)
+		mAABBTree->shiftOrigin(shift);
+
+	if(mIncrementalRebuild)
+		mBucketPruner.shiftOrigin(shift);
+
+	if(mNewTree)
+		mNewTree->shiftOrigin(shift);
+}
+
+#include "CmRenderOutput.h"
+void AABBPruner::visualize(Cm::RenderOutput& out, PxU32 color) const
+{
+	// getAABBTree() asserts when pruner is dirty. NpScene::visualization() does not enforce flushUpdate. see DE7834
+	const AABBTree* tree = mAABBTree;
+
+	if(tree)
+	{
+		struct Local
+		{
+			static void _Draw(const AABBTreeRuntimeNode* root, const AABBTreeRuntimeNode* node, Cm::RenderOutput& out_)
+			{
+				out_ << Cm::DebugBox(node->mBV, true);
+				if (node->isLeaf())
+					return;
+				_Draw(root, node->getPos(root), out_);
+				_Draw(root, node->getNeg(root), out_);
+			}
+		};
+		out << PxTransform(PxIdentity);
+		out << color;
+		Local::_Draw(tree->getNodes(), tree->getNodes(), out);
+	}
+
+	// Render added objects not yet in the tree
+	out << PxTransform(PxIdentity);
+	out << PxU32(PxDebugColor::eARGB_WHITE);
+
+	if(mIncrementalRebuild && mBucketPruner.getNbObjects())
+		mBucketPruner.visualize(out, color);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ *	Internal methods
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+bool AABBPruner::buildStep()
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerBuildStep", mContextID);
+
+	PX_ASSERT(mIncrementalRebuild);
+	if(mNeedsNewTree)
+	{
+		if(mProgress==BUILD_NOT_STARTED)
+		{
+			const PxU32 nbObjects = mPool.getNbActiveObjects();
+			if(!nbObjects)
+				return true;
+
+			PX_DELETE(mNewTree);
+			mNewTree = PX_NEW(AABBTree);
+
+			mNbCachedBoxes = nbObjects;
+			// PT: we always allocate one extra box, to make sure we can safely use V4 loads on the array
+			mCachedBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(nbObjects+1), "PxBound3"));
+
+			PxMemCopy(mCachedBoxes, mPool.getCurrentWorldBoxes(), nbObjects*sizeof(PxBounds3));
+
+			// PT: objects currently in the bucket pruner will be in the new tree. They are marked with the
+			// current timestamp (mTimeStamp). However more objects can get added while we compute the new tree,
+			// and those ones will not be part of it. These new objects will be marked with the new timestamp
+			// value (mTimeStamp+1), and we can use these different values to remove the proper objects from
+			// the bucket pruner (when switching to the new tree).
+			mTimeStamp++;
+			mBuilder.reset();
+			mBuilder.mNbPrimitives	= mNbCachedBoxes;
+			mBuilder.mAABBArray		= mCachedBoxes;
+			mBuilder.mLimit			= NB_OBJECTS_PER_NODE;
+
+			mBuildStats.reset();
+
+			// start recording modifications to the tree made during rebuild to reapply (fix the new tree) eventually
+			PX_ASSERT(mNewTreeFixups.size()==0);
+
+			mProgress = BUILD_INIT;
+		}
+		else if(mProgress==BUILD_INIT)
+		{
+			mNewTree->progressiveBuild(mBuilder, mBuildStats, 0, 0);
+			mProgress = BUILD_IN_PROGRESS;
+			mNbCalls = 0;
+
+			// Use a heuristic to estimate the number of work units needed for rebuilding the tree.
+			// The general idea is to use the number of work units of the previous tree to build the new tree.
+			// This works fine as long as the number of leaves remains more or less the same for the old and the
+			// new tree. If that is not the case, this estimate can be way off and the work units per step will
+			// be either much too small or too large. Hence, in that case we will try to estimate the number of work
+			// units based on the number of leaves of the new tree as follows:
+ 			//
+			// - Assume new tree with n leaves is perfectly-balanced
+			// - Compute the depth of perfectly-balanced tree with n leaves
+			// - Estimate number of working units for the new tree
+
+			const PxU32 depth = Ps::ilog2(mBuilder.mNbPrimitives);	// Note: This is the depth without counting the leaf layer
+			const PxU32 estimatedNbWorkUnits = depth * mBuilder.mNbPrimitives;	// Estimated number of work units for new tree
+			const PxU32 estimatedNbWorkUnitsOld = mAABBTree->getTotalPrims();
+			if ((estimatedNbWorkUnits <= (estimatedNbWorkUnitsOld << 1)) && (estimatedNbWorkUnits >= (estimatedNbWorkUnitsOld >> 1)))
+				// The two estimates do not differ by more than a factor 2
+				mTotalWorkUnits = estimatedNbWorkUnitsOld;
+ 			else
+			{
+ 				mAdaptiveRebuildTerm = 0;
+				mTotalWorkUnits = estimatedNbWorkUnits;
+ 			}
+ 
+ 			const PxI32 totalWorkUnits = PxI32(mTotalWorkUnits + (mAdaptiveRebuildTerm * mBuilder.mNbPrimitives));
+ 			mTotalWorkUnits = PxU32(PxMax(totalWorkUnits, 0));
+		}
+		else if(mProgress==BUILD_IN_PROGRESS)
+		{
+			mNbCalls++;
+			const PxU32 Limit = 1 + (mTotalWorkUnits / mRebuildRateHint);
+			// looks like progressiveRebuild returns 0 when finished
+			if (!mNewTree->progressiveBuild(mBuilder, mBuildStats, 1, Limit))
+			{
+				// Done
+				mProgress = BUILD_NEW_MAPPING;
+#if PX_DEBUG
+				mNewTree->validate();
+#endif
+			}
+		}
+		else if(mProgress==BUILD_NEW_MAPPING)
+		{
+			mNbCalls++;
+			mProgress = BUILD_FULL_REFIT;
+
+			// PT: we can't call fullRefit without creating the new mapping first: the refit function will fetch boxes from
+			// the pool using "primitive indices" captured in the tree. But some of these indices may have been invalidated
+			// if objects got removed while the tree was built. So we need to invalidate the corresponding nodes before refit,
+			// that way the #prims will be zero and the code won't fetch a wrong box (which may now below to a different object).
+			{
+				PX_PROFILE_ZONE("SceneQuery.prunerNewTreeMapping", mContextID);
+
+				if(mNewTreeFixups.size())
+				{
+					mNewTreeMap.initMap(PxMax(mPool.getNbActiveObjects(), mNbCachedBoxes), *mNewTree);
+
+					// The new mapping has been computed using only indices stored in the new tree. Those indices map the pruning pool
+					// we had when starting to build the tree. We need to re-apply recorded moves to fix the tree.
+					for(NewTreeFixup* r = mNewTreeFixups.begin(); r < mNewTreeFixups.end(); r++)
+						mNewTreeMap.invalidate(r->removedIndex, r->relocatedLastIndex, *mNewTree);
+
+					mNewTreeFixups.clear();
+#if PX_DEBUG
+					mNewTree->validate();
+#endif
+				}
+			}
+		}
+		else if(mProgress==BUILD_FULL_REFIT)
+		{
+			mNbCalls++;
+			mProgress = BUILD_LAST_FRAME;
+
+			{
+				PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFullRefit", mContextID);
+
+				// We need to refit the new tree because objects may have moved while we were building it.
+				mNewTree->fullRefit(mPool.getCurrentWorldBoxes());
+			}
+		}
+		else if(mProgress==BUILD_LAST_FRAME)
+		{
+			mProgress = BUILD_FINISHED;
+		}
+
+		// This is required to be set because commit handles both refit and a portion of build finalization (why?)
+		// This is overly conservative also only necessary in case there were no updates at all to the tree since the last tree swap
+		// It also overly conservative in a sense that it could be set only if mProgress was just set to BUILD_FINISHED
+		mUncommittedChanges = true;
+
+		return mProgress==BUILD_FINISHED;
+	}
+
+	return true;
+}
+
+
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+/**
+ *	Builds an AABB-tree for objects in the pruning pool.
+ *	\return		true if success
+ */
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+bool AABBPruner::fullRebuildAABBTree()
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerFullRebuildAABBTree", mContextID);
+
+	// Release possibly already existing tree
+	PX_DELETE_AND_RESET(mAABBTree);
+
+	// Don't bother building an AABB-tree if there isn't a single static object
+	const PxU32 nbObjects = mPool.getNbActiveObjects();
+	if(!nbObjects)
+		return true;
+
+	bool Status;
+	{
+		// Create a new tree
+		mAABBTree = PX_NEW(AABBTree);
+
+		AABBTreeBuildParams TB;
+		TB.mNbPrimitives	= nbObjects;
+		TB.mAABBArray		= mPool.getCurrentWorldBoxes();
+		TB.mLimit			= NB_OBJECTS_PER_NODE;
+		Status = mAABBTree->build(TB);
+	}
+
+	// No need for the tree map for static pruner
+	if(mIncrementalRebuild)
+		mTreeMap.initMap(PxMax(nbObjects,mNbCachedBoxes),*mAABBTree);
+
+	return Status;
+}
+
+// called in the end of commit(), but only if mIncrementalRebuild is true
+void AABBPruner::updateBucketPruner()
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerUpdateBucketPruner", mContextID);
+
+	PX_ASSERT(mIncrementalRebuild);
+	mBucketPruner.build();
+}
+
+PxBounds3 AABBPruner::getAABB(PrunerHandle handle)
+{
+	return mPool.getWorldAABB(handle);
+}
+
+void AABBPruner::release() // this can be called from purge()
+{
+	mBucketPruner.release();
+
+	mTimeStamp = 0;
+
+	mTreeMap.release();
+	mNewTreeMap.release();
+
+	PX_FREE_AND_RESET(mCachedBoxes);
+	mBuilder.reset();
+	PX_DELETE_AND_RESET(mNewTree);
+	PX_DELETE_AND_RESET(mAABBTree);
+
+	mNbCachedBoxes = 0;
+	mProgress = BUILD_NOT_STARTED;
+	mNewTreeFixups.clear();
+	mUncommittedChanges = false;
+}
+
+// Refit current tree
+void AABBPruner::refitUpdatedAndRemoved()
+{
+	PX_PROFILE_ZONE("SceneQuery.prunerRefitUpdatedAndRemoved", mContextID);
+
+	PX_ASSERT(mIncrementalRebuild);
+	AABBTree* tree = getAABBTree();
+	if(!tree)
+		return;
+
+#if PX_DEBUG
+	tree->validate();
+#endif
+
+	//### missing a way to skip work if not needed
+
+	const PxU32 nbObjects = mPool.getNbActiveObjects();
+	// At this point there still can be objects in the tree that are blanked out so it's an optimization shortcut (not required)
+	if(!nbObjects)
+		return;
+
+	mBucketPruner.refitMarkedNodes(mPool.getCurrentWorldBoxes());
+	tree->refitMarkedNodes(mPool.getCurrentWorldBoxes());
+}
+
+void AABBPruner::merge(const void* mergeParams)
+{
+	const AABBPrunerMergeData& pruningStructure = *reinterpret_cast<const AABBPrunerMergeData*> (mergeParams);
+
+	if(mAABBTree)
+	{
+		// index in pruning pool, where new objects were added
+		const PxU32 pruningPoolIndex = mPool.getNbActiveObjects() - pruningStructure.mNbObjects;
+
+		// create tree from given nodes and indices
+		AABBTreeMergeData aabbTreeMergeParams(pruningStructure.mNbNodes, pruningStructure.mAABBTreeNodes,
+			pruningStructure.mNbObjects, pruningStructure.mAABBTreeIndices, pruningPoolIndex);
+
+		if (!mIncrementalRebuild)
+		{
+			// merge tree directly
+			mAABBTree->mergeTree(aabbTreeMergeParams);		
+		}
+		else
+		{
+			mBucketPruner.addTree(aabbTreeMergeParams, mTimeStamp);
+		}
+	}
+}
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h
new file mode 100644
index 00000000..c5e96aa6
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h
@@ -0,0 +1,268 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_AABB_PRUNER_H
+#define SQ_AABB_PRUNER_H
+
+#include "SqPruningPool.h"
+#include "SqExtendedBucketPruner.h"
+#include "SqAABBTreeUpdateMap.h"
+#include "SqAABBTree.h"
+
+namespace physx
+{
+
+namespace Sq
+{
+	// PT: we build the new tree over a number of frames/states, in order to limit perf spikes in 'updatePruningTrees'.
+	// The states are as follows:
+	//
+	// BUILD_NOT_STARTED (1 frame, AABBPruner):
+	//
+	// This is the initial state, before the new (AABBTree) build even starts. In this frame/state, we perform the AABBPruner-related
+	// memory allocations:
+	// - the new AABB tree is allocated
+	// - the array of cached bounding boxes is allocated and filled
+	//
+	// BUILD_INIT (1 frame, AABBTree):
+	//
+	// This is the first frame in which the new tree gets built. It deserves its own special state since various things happen in the
+	// first frame, that do no happen in subsequent frames. Basically most initial AABBTree-related allocations happen here (but no
+	// build step per se).
+	//
+	// BUILD_IN_PROGRESS (N frames, AABBTree):
+	//
+	// This is the core build function, actually building the tree. This should be mostly allocation-free, except here and there when
+	// building non-complete trees, and during the last call when the tree is finally built.
+	//
+	// BUILD_NEW_MAPPING (1 frame, AABBPruner):
+	//
+	// After the new AABBTree is built, we recreate an AABBTreeUpdateMap for the new tree, and use it to invalidate nodes whose objects
+	// have been removed during the build.
+	//
+	// We need to do that before doing a full refit in the next stage/frame. If we don't do that, the refit code will fetch a wrong box,
+	// that may very well belong to an entirely new object.
+	//
+	// Note that this mapping/update map (mNewTreeMap) is temporary, and only needed for the next stage.
+	//
+	// BUILD_FULL_REFIT (1 frame, AABBPruner):
+	//
+	// Once the new update map is available, we fully refit the new tree. AABBs of moved objects get updated. AABBs of removed objects
+	// become empty.
+	//
+	// BUILD_LAST_FRAME (1 frame, AABBPruner):
+	//
+	// This is an artificial frame used to delay the tree switching code. The switch happens as soon as we reach the BUILD_FINISHED
+	// state, but we don't want to execute BUILD_FULL_REFIT and the switch in the same frame. This extra BUILD_LAST_FRAME stage buys
+	// us one frame, i.e. we have one frame in which we do BUILD_FULL_REFIT, and in the next frame we'll do both BUILD_LAST_FRAME /
+	// BUILD_FINISHED / the switch.
+	//
+	// BUILD_FINISHED (1 frame, AABBPruner):
+	//
+	// Several things happen in this 'finalization' frame/stage:
+	// - We switch the trees (old one is deleted, cached boxes are deleted, new tree pointer is setup)
+	// - A new (final) update map is created (mTreeMap). The map is used to invalidate objects that may have been removed during
+	//   the BUILD_NEW_MAPPING and BUILD_FULL_REFIT frames. The nodes containing these removed objects are marked for refit.
+	// - Nodes containing objects that have moved during the BUILD_NEW_MAPPING and BUILD_FULL_REFIT frames are marked for refit.
+	// - We do a partial refit on the new tree, to take these final changes into account. This small partial refit is usually much
+	//   cheaper than the full refit we previously performed here.
+	// - We remove old objects from the bucket pruner
+	//
+	enum BuildStatus
+	{
+		BUILD_NOT_STARTED,
+		BUILD_INIT,
+		BUILD_IN_PROGRESS,
+		BUILD_NEW_MAPPING,
+		BUILD_FULL_REFIT,
+		BUILD_LAST_FRAME,
+		BUILD_FINISHED,
+
+		BUILD_FORCE_DWORD	= 0xffffffff
+	};
+
+	// This class implements the Pruner interface for internal SQ use with some additional specialized functions
+	// The underlying data structure is a binary AABB tree
+	// AABBPruner supports insertions, removals and updates for dynamic objects
+	// The tree is either entirely rebuilt in a single frame (static pruner) or progressively rebuilt over multiple frames (dynamic pruner)
+	// The rebuild happens on a copy of the tree
+	// the copy is then swapped with current tree at the time commit() is called (only if mBuildState is BUILD_FINISHED),
+	// otherwise commit() will perform a refit operation applying any pending changes to the current tree
+	// While the tree is being rebuilt a temporary data structure (BucketPruner) is also kept in sync and used to speed up
+	// queries on updated objects that are not yet in either old or new tree.
+	// The requirements on the order of calls:
+	// commit() is required to be called before any queries to apply modifications
+	// queries can be issued on multiple threads after commit is called
+	// commit, buildStep, add/remove/update have to be called from the same thread or otherwise strictly serialized by external code
+	// and cannot be issued while a query is running
+	class AABBPruner : public IncrementalPruner
+	{
+		public:
+												AABBPruner(bool incrementalRebuild, PxU64 contextID); // true is equivalent to former dynamic pruner
+		virtual									~AABBPruner();
+
+		// Pruner
+		virtual			bool					addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* userData, PxU32 count = 1, bool hasPruningStructure = false);
+		virtual			void					removeObjects(const PrunerHandle* handles, PxU32 count = 1);
+		virtual			void					updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count = 1);
+		virtual			void					updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count = 1);
+		virtual			void					commit();
+		virtual			PxAgain					raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&)	const;
+		virtual			PxAgain					overlap(const Gu::ShapeData& queryVolume, PrunerCallback&)	const;
+		virtual			PxAgain					sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&)	const;
+		virtual			const PrunerPayload&	getPayload(PrunerHandle handle)						const	{ return mPool.getPayload(handle);			}
+		virtual			const PrunerPayload&	getPayload(PrunerHandle handle, PxBounds3*& bounds)	const	{ return mPool.getPayload(handle, bounds);	}
+		virtual			void					preallocate(PxU32 entries)									{ mPool.preallocate(entries);				}
+		virtual			void					shiftOrigin(const PxVec3& shift);
+		virtual			void					visualize(Cm::RenderOutput& out, PxU32 color) const;		
+		virtual			void					merge(const void* mergeParams);
+		//~Pruner
+		
+		// IncrementalPruner
+		virtual			void					purge();		// gets rid of internal accel struct
+		virtual			void					setRebuildRateHint(PxU32 nbStepsForRebuild);	// Besides the actual rebuild steps, 3 additional steps are needed.
+		virtual			bool					buildStep();	// returns true if finished
+		//~IncrementalPruner
+
+		// direct access for test code
+
+		PX_FORCE_INLINE	PxU32					getNbAddedObjects()	const		{ return mBucketPruner.getNbObjects();					}
+		PX_FORCE_INLINE	const Sq::AABBTree*		getAABBTree()		const		{ PX_ASSERT(!mUncommittedChanges); return mAABBTree;	}
+		PX_FORCE_INLINE	Sq::AABBTree*			getAABBTree()					{ PX_ASSERT(!mUncommittedChanges); return mAABBTree;	}
+		PX_FORCE_INLINE	void					setAABBTree(Sq::AABBTree* tree)	{ mAABBTree = tree; }
+		PX_FORCE_INLINE	const Sq::AABBTree*		hasAABBTree()		const		{ return mAABBTree;	}
+		PX_FORCE_INLINE	BuildStatus				getBuildStatus()	const		{ return mProgress;	}
+				
+		// local functions
+//		private:
+						Sq::AABBTree*			mAABBTree; // current active tree
+						Sq::AABBTreeBuildParams	mBuilder; // this class deals with the details of the actual tree building
+						BuildStats				mBuildStats;
+
+		// tree with build in progress, assigned to mAABBTree in commit, when mProgress is BUILD_FINISHED
+		// created in buildStep(), BUILD_NOT_STARTED
+		// This is non-null when there is a tree rebuild going on in progress
+		// and thus also indicates that we have to start saving the fixups
+						Sq::AABBTree*			mNewTree;
+
+		// during rebuild the pool might change so we need a copy of boxes for the tree build
+						PxBounds3*				mCachedBoxes;
+						PxU32					mNbCachedBoxes;
+
+		// incremented in commit(), serves as a progress counter for rebuild
+						PxU32					mNbCalls;
+
+		// PT: incremented each time we start building a new tree (i.e. effectively identifies a given tree)
+		// Timestamp is passed to bucket pruner to mark objects added there, linking them to a specific tree.
+		// When switching to the new tree, timestamp is used to remove old objects (now in the new tree) from
+		// the bucket pruner.
+						PxU32					mTimeStamp;
+
+		// this pruner is used for queries on objects that are not in the current tree yet
+		// includes both the objects in the tree being rebuilt and all the objects added later
+						ExtendedBucketPruner	mBucketPruner;
+
+						BuildStatus				mProgress;		// current state of second tree build progress
+
+		// Fraction (as in 1/Nth) of the total number of primitives
+		// that should be processed per step by the AABB builder
+		// so if this value is 1, all primitives will be rebuilt, 2 => 1/2 of primitives per step etc.
+		// see also mNbCalls, mNbCalls varies from 0 to mRebuildRateHint-1
+						PxU32					mRebuildRateHint;
+
+		// Estimate for how much work has to be done to rebuild the tree.
+						PxU32					mTotalWorkUnits;
+
+		// Term to correct the work unit estimate if the rebuild rate is not matched
+						PxI32					mAdaptiveRebuildTerm;
+
+						PruningPool				mPool; // Pool of AABBs
+
+		// maps pruning pool indices to aabb tree indices
+		// maps to INVALID_NODE_ID if the pool entry was removed or "pool index is outside input domain"
+		// The map is the inverse of the tree mapping: (node[map[poolID]].primitive == poolID)
+		// So:
+		// treeNodeIndex = mTreeMap.operator[](poolIndex)
+		// aabbTree->treeNodes[treeNodeIndex].primitives[0] == poolIndex
+						AABBTreeUpdateMap		mTreeMap;
+		// Temporary update map, see BuildStatus notes above for details
+						AABBTreeUpdateMap		mNewTreeMap;
+
+		// This is only set once in the constructor and is equivalent to isDynamicTree
+		// if it set to false then a 1-shot rebuild is performed in commit()
+		// bucket pruner is only used with incremental rebuild
+						bool					mIncrementalRebuild;
+
+		// A rebuild can be triggered even when the Pruner is not dirty
+		// mUncommittedChanges is set to true in add, remove, update and buildStep
+		// mUncommittedChanges is set to false in commit
+		// mUncommittedChanges has to be false (commit() has to be called) in order to run a query as defined by the
+		// mUncommittedChanges is not set to true in add, when pruning structure is provided. Scene query shapes
+		// are merged to current AABB tree directly
+		// Pruner higher level API
+						bool					mUncommittedChanges;
+
+		// A new AABB tree is built if an object was added, removed or updated
+		// Changing objects during a build will trigger another rebuild right afterwards
+		// this is set to true if a new tree has to be created again after the current rebuild is done
+						bool					mNeedsNewTree;
+
+		// This struct is used to record modifications made to the pruner state
+		// while a tree is building in the background
+		// this is so we can apply the modifications to the tree at the time of completion
+		// the recorded fixup information is: removedIndex (in ::remove()) and 
+		// lastIndexMoved which is the last index in the pruner array
+		// (since the way we remove from PruningPool is by swapping last into removed slot,
+		// we need to apply a fixup so that it syncs up that operation in the new tree)
+						struct NewTreeFixup
+						{
+							PX_FORCE_INLINE NewTreeFixup(PxU32 removedIndex_, PxU32 relocatedLastIndex_)
+								: removedIndex(removedIndex_), relocatedLastIndex(relocatedLastIndex_) {}
+							PxU32 removedIndex;
+							PxU32 relocatedLastIndex;
+						};
+						Ps::Array<NewTreeFixup>	mNewTreeFixups;
+
+						Ps::Array<PoolIndex>	mToRefit;
+
+						PxU64					mContextID;
+
+		// Internal methods
+						bool					fullRebuildAABBTree(); // full rebuild function, used with static pruner mode
+						void					release();
+						void					refitUpdatedAndRemoved();
+						void					updateBucketPruner();
+						PxBounds3				getAABB(PrunerHandle h);
+	};
+
+} // namespace Sq
+
+}
+
+#endif // SQ_AABB_PRUNER_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp
new file mode 100644
index 00000000..191344fe
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp
@@ -0,0 +1,1154 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "SqAABBTree.h"
+#include "SqAABBTreeUpdateMap.h"
+
+#include "PsMathUtils.h"
+#include "PsFoundation.h"
+#include "GuInternal.h"
+
+using namespace physx;
+using namespace Sq;
+
+#define INVALID_ID	0xffffffff
+
+// Progressive building
+class Sq::FIFOStack : public Ps::UserAllocated
+{
+	public:
+										FIFOStack() : mStack(PX_DEBUG_EXP("SQFIFOStack")), mCurIndex(0) {}
+										~FIFOStack() {}
+
+	PX_FORCE_INLINE	PxU32				getNbEntries() const { return mStack.size(); }
+	PX_FORCE_INLINE	void				push(AABBTreeBuildNode* entry)	{ mStack.pushBack(entry); }
+					bool				pop(AABBTreeBuildNode*& entry);
+	private:
+		Ps::Array<AABBTreeBuildNode*>	mStack;
+		PxU32							mCurIndex;			//!< Current index within the container
+};
+
+bool Sq::FIFOStack::pop(AABBTreeBuildNode*& entry)
+{
+	const PxU32 NbEntries = mStack.size(); // Get current number of entries
+	if(!NbEntries)
+		return false; // Can be NULL when no value has been pushed. This is an invalid pop call.
+	entry = mStack[mCurIndex++]; // Get oldest entry, move to next one
+	if(mCurIndex==NbEntries)
+	{
+		// All values have been poped
+		mStack.clear();
+		mCurIndex=0;
+	}
+	return true;
+}
+//~Progressive building
+
+NodeAllocator::NodeAllocator() : mPool(NULL), mCurrentSlabIndex(0), mTotalNbNodes(0)
+{
+}
+
+NodeAllocator::~NodeAllocator()
+{
+	release();
+}
+
+void NodeAllocator::release()
+{
+	const PxU32 nbSlabs = mSlabs.size();
+	for(PxU32 i=0;i<nbSlabs;i++)
+	{
+		Slab& s = mSlabs[i];
+		PX_DELETE_ARRAY(s.mPool);
+	}
+
+	mSlabs.reset();
+	mCurrentSlabIndex = 0;
+	mTotalNbNodes = 0;
+}
+
+void NodeAllocator::init(PxU32 nbPrimitives, PxU32 limit)
+{
+	const PxU32 maxSize = nbPrimitives*2 - 1;	// PT: max possible #nodes for a complete tree
+	const PxU32 estimatedFinalSize = maxSize<=1024 ? maxSize : maxSize/limit;
+	mPool = PX_NEW(AABBTreeBuildNode)[estimatedFinalSize];
+	PxMemZero(mPool, sizeof(AABBTreeBuildNode)*estimatedFinalSize);
+
+	// Setup initial node. Here we have a complete permutation of the app's primitives.
+	mPool->mNodeIndex		= 0;
+	mPool->mNbPrimitives	= nbPrimitives;
+
+	mSlabs.pushBack(Slab(mPool, 1, estimatedFinalSize));
+	mCurrentSlabIndex = 0;
+	mTotalNbNodes = 1;
+}
+
+// PT: TODO: inline this?
+AABBTreeBuildNode* NodeAllocator::getBiNode()
+{
+	mTotalNbNodes += 2;
+	Slab& currentSlab = mSlabs[mCurrentSlabIndex];
+	if(currentSlab.mNbUsedNodes+2<=currentSlab.mMaxNbNodes)
+	{
+		AABBTreeBuildNode* biNode = currentSlab.mPool + currentSlab.mNbUsedNodes;
+		currentSlab.mNbUsedNodes += 2;
+		return biNode;
+	}
+	else
+	{
+		// Allocate new slab
+		const PxU32 size = 1024;
+		AABBTreeBuildNode* pool = PX_NEW(AABBTreeBuildNode)[size];
+		PxMemZero(pool, sizeof(AABBTreeBuildNode)*size);
+
+		mSlabs.pushBack(Slab(pool, 2, size));
+		mCurrentSlabIndex++;
+		return pool;
+	}
+}
+
+void NodeAllocator::flatten(AABBTreeRuntimeNode* dest)
+{
+	// PT: gathers all build nodes allocated so far and flatten them to a linear destination array of smaller runtime nodes
+	PxU32 offset = 0;
+	const PxU32 nbSlabs = mSlabs.size();
+	for(PxU32 s=0;s<nbSlabs;s++)
+	{
+		const Slab& currentSlab = mSlabs[s];
+
+		AABBTreeBuildNode* pool = currentSlab.mPool;
+		for(PxU32 i=0;i<currentSlab.mNbUsedNodes;i++)
+		{
+			dest[offset].mBV = pool[i].mBV;
+			if(pool[i].isLeaf())
+			{
+				const PxU32 index = pool[i].mNodeIndex;
+
+				const PxU32 nbPrims = pool[i].getNbPrimitives();
+				PX_ASSERT(nbPrims<=16);
+
+				dest[offset].mData = (index<<5)|((nbPrims&15)<<1)|1;
+			}
+			else
+			{
+				PX_ASSERT(pool[i].mPos);
+				PxU32 localNodeIndex = 0xffffffff;
+				PxU32 nodeBase = 0;
+				for(PxU32 j=0;j<nbSlabs;j++)
+				{
+					if(pool[i].mPos>=mSlabs[j].mPool && pool[i].mPos<mSlabs[j].mPool+mSlabs[j].mNbUsedNodes)
+					{
+						localNodeIndex = PxU32(pool[i].mPos - mSlabs[j].mPool);
+						break;
+					}
+					nodeBase += mSlabs[j].mNbUsedNodes;
+				}
+				const PxU32 nodeIndex = nodeBase + localNodeIndex;
+				PX_ASSERT(nodeIndex<mTotalNbNodes);
+				dest[offset].mData = nodeIndex<<1;
+			}
+			offset++;
+		}
+	}
+	PX_ASSERT(offset==mTotalNbNodes);
+	release();
+}
+
+static PX_FORCE_INLINE float getSplittingValue(const PxBounds3& global_box, PxU32 axis)
+{
+	// Default split value = middle of the axis (using only the box)
+	return global_box.getCenter(axis);
+}
+
+static PxU32 split(const PxBounds3& box, PxU32 nb, PxU32* const PX_RESTRICT prims, PxU32 axis, const AABBTreeBuildParams& params)
+{
+	// Get node split value
+	const float splitValue = getSplittingValue(box, axis);
+
+	PxU32 nbPos = 0;
+	// Loop through all node-related primitives. Their indices range from "mNodePrimitives[0]" to "mNodePrimitives[mNbPrimitives-1]",
+	// with mNodePrimitives = mIndices + mNodeIndex (i.e. those indices map the global list in the tree params).
+
+	// PT: to avoid calling the unsafe [] operator
+	const size_t ptrValue = size_t(params.mCache) + axis*sizeof(float);
+	const PxVec3* /*PX_RESTRICT*/ cache = reinterpret_cast<const PxVec3*>(ptrValue);
+
+	for(PxU32 i=0;i<nb;i++)
+	{
+		// Get index in global list
+		const PxU32 index = prims[i];
+
+		// Test against the splitting value. The primitive value is tested against the enclosing-box center.
+		// [We only need an approximate partition of the enclosing box here.]
+		const float primitiveValue = cache[index].x;
+		PX_ASSERT(primitiveValue==params.mCache[index][axis]);
+
+		// Reorganize the list of indices in this order: positive - negative.
+		if(primitiveValue > splitValue)
+		{
+			// Swap entries
+			prims[i] = prims[nbPos];
+			prims[nbPos] = index;
+			// Count primitives assigned to positive space
+			nbPos++;
+		}
+	}
+	return nbPos;
+}
+
+void AABBTreeBuildNode::subdivide(const AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices)
+{
+	PxU32* const PX_RESTRICT primitives = indices + mNodeIndex;
+	const PxU32 nbPrims = mNbPrimitives;
+
+	// Compute global box & means for current node. The box is stored in mBV.
+	Vec4V meansV;
+	{
+		const PxBounds3* PX_RESTRICT boxes = params.mAABBArray;
+		PX_ASSERT(boxes);
+		PX_ASSERT(primitives);
+		PX_ASSERT(nbPrims);
+
+		Vec4V minV = V4LoadU(&boxes[primitives[0]].minimum.x);
+		Vec4V maxV = V4LoadU(&boxes[primitives[0]].maximum.x);
+
+		meansV = V4LoadU(&params.mCache[primitives[0]].x);
+
+		for(PxU32 i=1;i<nbPrims;i++)
+		{
+			const PxU32 index = primitives[i];
+			const Vec4V curMinV = V4LoadU(&boxes[index].minimum.x);
+			const Vec4V curMaxV = V4LoadU(&boxes[index].maximum.x);
+			meansV = V4Add(meansV, V4LoadU(&params.mCache[index].x));
+			minV = V4Min(minV, curMinV);
+			maxV = V4Max(maxV, curMaxV);
+		}
+
+		StoreBounds(mBV, minV, maxV);
+
+		const float coeff = 1.0f/float(nbPrims);
+		meansV = V4Scale(meansV, FLoad(coeff));
+	}
+
+	// Check the user-defined limit. Also ensures we stop subdividing if we reach a leaf node.
+	if(nbPrims<=params.mLimit)
+		return;
+
+	bool validSplit = true;
+	PxU32 nbPos;
+	{
+		// Compute variances
+		Vec4V varsV = V4Zero();
+		for(PxU32 i=0;i<nbPrims;i++)
+		{
+			const PxU32 index = primitives[i];
+			Vec4V centerV = V4LoadU(&params.mCache[index].x);
+			centerV = V4Sub(centerV, meansV);
+			centerV = V4Mul(centerV, centerV);
+			varsV = V4Add(varsV, centerV);
+		}
+		const float coeffNb1 = 1.0f/float(nbPrims-1);
+		varsV = V4Scale(varsV, FLoad(coeffNb1));
+		PX_ALIGN(16, PxVec4) vars;
+		V4StoreA(varsV, &vars.x);
+
+		// Choose axis with greatest variance
+		const PxU32 axis = Ps::largestAxis(PxVec3(vars.x, vars.y, vars.z));
+
+		// Split along the axis
+		nbPos = split(mBV, nbPrims, primitives, axis, params);
+
+		// Check split validity
+		if(!nbPos || nbPos==nbPrims)
+			validSplit = false;
+	}
+
+	// Check the subdivision has been successful
+	if(!validSplit)
+	{
+		// Here, all boxes lie in the same sub-space. Two strategies:
+		// - if we are over the split limit, make an arbitrary 50-50 split
+		// - else stop subdividing
+		if(nbPrims>params.mLimit)
+		{
+			nbPos = nbPrims>>1;
+		}
+		else return;
+	}
+
+	// Now create children and assign their pointers.
+	mPos = allocator.getBiNode();
+
+	stats.increaseCount(2);
+
+	// Assign children
+	PX_ASSERT(!isLeaf());
+	AABBTreeBuildNode* Pos = const_cast<AABBTreeBuildNode*>(mPos);
+	AABBTreeBuildNode* Neg = Pos + 1;
+	Pos->mNodeIndex		= mNodeIndex;
+	Pos->mNbPrimitives	= nbPos;
+	Neg->mNodeIndex		= mNodeIndex + nbPos;
+	Neg->mNbPrimitives	= mNbPrimitives - nbPos;
+}
+
+void AABBTreeBuildNode::_buildHierarchy(AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices)
+{
+	// Subdivide current node
+	subdivide(params, stats, nodeBase, indices);
+
+	// Recurse
+	if(!isLeaf())
+	{
+		AABBTreeBuildNode* Pos = const_cast<AABBTreeBuildNode*>(getPos());
+		PX_ASSERT(Pos);
+		AABBTreeBuildNode* Neg = Pos + 1;
+		Pos->_buildHierarchy(params, stats, nodeBase, indices);
+		Neg->_buildHierarchy(params, stats, nodeBase, indices);
+	}
+
+	stats.mTotalPrims += mNbPrimitives;
+}
+
+AABBTree::AABBTree() :
+	mIndices		(NULL),
+	mNbIndices		(0),
+	mRuntimePool	(NULL),
+	mParentIndices	(NULL),
+	mTotalNbNodes	(0),
+	mTotalPrims		(0)
+{
+// Progressive building
+	mStack = NULL;
+//~Progressive building
+
+// REFIT
+	mRefitHighestSetWord = 0;
+//~REFIT
+}
+
+AABBTree::~AABBTree()
+{
+	release(false);
+}
+
+void AABBTree::release(bool clearRefitMap)
+{
+// Progressive building
+	PX_DELETE_AND_RESET(mStack);
+//~Progressive building
+	PX_FREE_AND_RESET(mParentIndices);
+	PX_DELETE_ARRAY(mRuntimePool);
+	mNodeAllocator.release();
+	PX_FREE_AND_RESET(mIndices);
+	mTotalNbNodes = 0;
+	mNbIndices = 0;
+
+// REFIT
+	if(clearRefitMap)
+		mRefitBitmask.clearAll();
+	mRefitHighestSetWord = 0;
+//~REFIT
+}
+
+// Initialize nodes/indices from the input tree merge data
+void AABBTree::initTree(const AABBTreeMergeData& tree)
+{
+	PX_ASSERT(mIndices == NULL);
+	PX_ASSERT(mRuntimePool == NULL);
+	PX_ASSERT(mParentIndices == NULL);
+
+	// allocate,copy indices
+	mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*tree.mNbIndices, "AABB tree indices"));
+	mNbIndices = tree.mNbIndices;
+	PxMemCopy(mIndices, tree.mIndices, sizeof(PxU32)*tree.mNbIndices);
+
+	// allocate,copy nodes
+	mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[tree.mNbNodes];
+	mTotalNbNodes = tree.mNbNodes;
+	PxMemCopy(mRuntimePool, tree.mNodes, sizeof(AABBTreeRuntimeNode)*tree.mNbNodes);
+}
+
+// Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool
+void AABBTree::shiftIndices(PxU32 offset)
+{
+	for (PxU32 i = 0; i < mNbIndices; i++)
+	{
+		mIndices[i] += offset;
+	}
+}
+
+bool AABBTree::buildInit(AABBTreeBuildParams& params, BuildStats& stats)
+{
+	// Checkings
+	const PxU32 nbPrimitives = params.mNbPrimitives;
+	if(!nbPrimitives)
+		return false;
+
+	// Release previous tree
+	release();
+
+	// Init stats
+	stats.setCount(1);
+
+	// Initialize indices. This list will be modified during build.
+	mNbIndices = nbPrimitives;
+	mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbPrimitives, "AABB tree indices"));
+	// Identity permutation
+	for(PxU32 i=0;i<nbPrimitives;i++)
+		mIndices[i] = i;
+
+	// Allocate a pool of nodes
+	mNodeAllocator.init(nbPrimitives, params.mLimit);
+
+	// Compute box centers only once and cache them
+	params.mCache = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*(nbPrimitives+1), "cache"));
+	const float half = 0.5f;
+	const FloatV halfV = FLoad(half);
+	for(PxU32 i=0;i<nbPrimitives;i++)
+	{
+		const Vec4V curMinV = V4LoadU(&params.mAABBArray[i].minimum.x);
+		const Vec4V curMaxV = V4LoadU(&params.mAABBArray[i].maximum.x);
+		const Vec4V centerV = V4Scale(V4Add(curMaxV, curMinV), halfV);
+		V4StoreU(centerV, &params.mCache[i].x);
+	}
+	return true;
+}
+
+void AABBTree::buildEnd(AABBTreeBuildParams& params, BuildStats& stats)
+{
+	PX_FREE_AND_RESET(params.mCache);
+	// Get back total number of nodes
+	mTotalNbNodes	= stats.getCount();
+	mTotalPrims		= stats.mTotalPrims;
+
+	mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes];
+	PX_ASSERT(mTotalNbNodes==mNodeAllocator.mTotalNbNodes);
+	mNodeAllocator.flatten(mRuntimePool);
+}
+
+bool AABBTree::build(AABBTreeBuildParams& params)
+{
+	// Init stats
+	BuildStats stats;
+	if(!buildInit(params, stats))
+		return false;
+
+	// Build the hierarchy
+	mNodeAllocator.mPool->_buildHierarchy(params, stats, mNodeAllocator, mIndices);
+
+	buildEnd(params, stats);
+	return true;
+}
+
+void AABBTree::shiftOrigin(const PxVec3& shift)
+{
+	AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+	const PxU32 totalNbNodes = mTotalNbNodes;
+	for(PxU32 i=0; i<totalNbNodes; i++)
+	{
+		AABBTreeRuntimeNode& current = nodeBase[i];
+		if((i+1) < totalNbNodes)
+			Ps::prefetch(nodeBase + i + 1);
+
+		current.mBV.minimum -= shift;
+		current.mBV.maximum -= shift;
+	}
+}
+
+#if PX_DEBUG
+void AABBTree::validate() const
+{
+}
+#endif
+
+// Progressive building
+static PxU32 incrementalBuildHierarchy(FIFOStack& stack, AABBTreeBuildNode* node, AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices)
+{
+	node->subdivide(params, stats, nodeBase, indices);
+
+	if(!node->isLeaf())
+	{
+		AABBTreeBuildNode* pos = const_cast<AABBTreeBuildNode*>(node->getPos());
+		PX_ASSERT(pos);
+		AABBTreeBuildNode* neg = pos + 1;
+		stack.push(neg);
+		stack.push(pos);
+	}
+
+	stats.mTotalPrims += node->mNbPrimitives;
+	return node->mNbPrimitives;
+}
+
+PxU32 AABBTree::progressiveBuild(AABBTreeBuildParams& params, BuildStats& stats, PxU32 progress, PxU32 limit)
+{
+	if(progress==0)
+	{
+		if(!buildInit(params, stats))
+			return PX_INVALID_U32;
+
+		mStack = PX_NEW(FIFOStack);
+		mStack->push(mNodeAllocator.mPool);
+		return progress++;
+	}
+	else if(progress==1)
+	{
+		PxU32 stackCount = mStack->getNbEntries();
+		if(stackCount)
+		{
+			PxU32 Total = 0;
+			const PxU32 Limit = limit;
+			while(Total<Limit)
+			{
+				AABBTreeBuildNode* Entry;
+				if(mStack->pop(Entry))
+					Total += incrementalBuildHierarchy(*mStack, Entry, params, stats, mNodeAllocator, mIndices);
+				else
+					break;
+			}
+			return progress;
+		}
+
+		buildEnd(params, stats);
+
+		PX_DELETE_AND_RESET(mStack);
+
+		return 0;	// Done!
+	}
+	return PX_INVALID_U32;
+}
+//~Progressive building
+
+
+
+static PX_FORCE_INLINE PxU32 BitsToDwords(PxU32 nb_bits)
+{
+	return (nb_bits>>5) + ((nb_bits&31) ? 1 : 0);
+}
+
+bool Sq::BitArray::init(PxU32 nb_bits)
+{
+	mSize = BitsToDwords(nb_bits);
+	// Get ram for n bits
+	PX_FREE(mBits);
+	mBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mSize, "BitArray::mBits"));
+	// Set all bits to 0
+	clearAll();
+	return true;
+}
+
+void Sq::BitArray::resize(PxU32 maxBitNumber)
+{
+	const PxU32 newSize = BitsToDwords(maxBitNumber);
+	if (newSize <= mSize)
+		return;
+
+	PxU32* newBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*newSize, "BitArray::mBits"));
+	PxMemZero(newBits + mSize, (newSize - mSize) * sizeof(PxU32));
+	PxMemCopy(newBits, mBits, mSize*sizeof(PxU32));
+	PX_FREE(mBits);
+	mBits = newBits;
+	mSize = newSize;
+}
+
+static PX_FORCE_INLINE	PxU32						getNbPrimitives(PxU32 data)							{ return (data>>1)&15;		}
+static PX_FORCE_INLINE	const PxU32*				getPrimitives(const PxU32* base, PxU32 data)		{ return base + (data>>5);	}
+static PX_FORCE_INLINE	const AABBTreeRuntimeNode*	getPos(const AABBTreeRuntimeNode* base, PxU32 data)	{ return base + (data>>1);	}
+static PX_FORCE_INLINE	PxU32						isLeaf(PxU32 data)									{ return data&1;			}
+
+static PX_FORCE_INLINE void refitNode(AABBTreeRuntimeNode* PX_RESTRICT current, const PxBounds3* PX_RESTRICT boxes, const PxU32* PX_RESTRICT indices, AABBTreeRuntimeNode* PX_RESTRICT const nodeBase)
+{
+	// PT: we can safely use V4 loads on both boxes and nodes here:
+	// - it's safe on boxes because we allocated one extra box in the pruning pool
+	// - it's safe on nodes because there's always some data within the node, after the BV
+
+	const PxU32 data = current->mData;
+
+	Vec4V resultMinV, resultMaxV;
+	if(isLeaf(data))
+	{
+		const PxU32 nbPrims = getNbPrimitives(data);
+		if(nbPrims)
+		{
+			const PxU32* primitives = getPrimitives(indices, data);
+			resultMinV = V4LoadU(&boxes[*primitives].minimum.x);
+			resultMaxV = V4LoadU(&boxes[*primitives].maximum.x);
+
+			if(nbPrims>1)
+			{
+				const PxU32* last = primitives + nbPrims;
+				primitives++;
+
+				while(primitives!=last)
+				{
+					resultMinV = V4Min(resultMinV, V4LoadU(&boxes[*primitives].minimum.x));
+					resultMaxV = V4Max(resultMaxV, V4LoadU(&boxes[*primitives].maximum.x));
+					primitives++;
+				}
+			}
+		}
+		else
+		{
+			// Might happen after a node has been invalidated
+			const float max = 0.25f * 1e33f;	// ###
+			resultMinV = V4Load(max);
+			resultMaxV = V4Load(-max);
+		}
+	}
+	else
+	{
+		const AABBTreeRuntimeNode* pos = getPos(nodeBase, data);
+		const AABBTreeRuntimeNode* neg = pos+1;
+
+		const PxBounds3& posBox = pos->mBV;
+		const PxBounds3& negBox = neg->mBV;
+
+		resultMinV = V4Min(V4LoadU(&posBox.minimum.x), V4LoadU(&negBox.minimum.x));
+//		resultMaxV = V4Max(V4LoadU(&posBox.maximum.x), V4LoadU(&negBox.maximum.x));
+
+#if PX_INTEL_FAMILY
+		Vec4V posMinV = V4LoadU(&posBox.minimum.z);
+		Vec4V negMinV = V4LoadU(&negBox.minimum.z);
+		posMinV = _mm_shuffle_ps(posMinV, posMinV, _MM_SHUFFLE(0, 3, 2, 1));
+		negMinV = _mm_shuffle_ps(negMinV, negMinV, _MM_SHUFFLE(0, 3, 2, 1));
+		resultMaxV = V4Max(posMinV, negMinV);
+#else
+		// PT: fixes the perf issue but not really convincing
+		resultMaxV = Vec4V_From_Vec3V(V3Max(V3LoadU(&posBox.maximum.x), V3LoadU(&negBox.maximum.x)));
+#endif
+	}
+
+	// PT: the V4 stores overwrite the data after the BV, but we just put it back afterwards
+	V4StoreU(resultMinV, &current->mBV.minimum.x);
+	V4StoreU(resultMaxV, &current->mBV.maximum.x);
+	current->mData = data;
+}
+
+void AABBTree::fullRefit(const PxBounds3* boxes)
+{
+	PX_ASSERT(boxes);
+
+	const PxU32* indices = mIndices;
+	AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+	PX_ASSERT(nodeBase);
+
+	// Bottom-up update
+	PxU32 index = mTotalNbNodes;
+	while(index--)
+	{
+		AABBTreeRuntimeNode* current = nodeBase + index;
+		if(index)
+			Ps::prefetch(current - 1);
+
+		refitNode(current, boxes, indices, nodeBase);
+	}
+}
+
+static void _createParentArray(PxU32 totalNbNodes, PxU32* parentIndices, const AABBTreeRuntimeNode* parentNode, const AABBTreeRuntimeNode* currentNode, const AABBTreeRuntimeNode* root)
+{
+	const PxU32 parentIndex = PxU32(parentNode - root);
+	const PxU32 currentIndex = PxU32(currentNode - root);
+	PX_ASSERT(parentIndex<totalNbNodes);
+	PX_ASSERT(currentIndex<totalNbNodes);
+	PX_UNUSED(totalNbNodes);
+	parentIndices[currentIndex] = parentIndex;
+
+	if(!currentNode->isLeaf())
+	{
+		_createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getPos(root), root);
+		_createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getNeg(root), root);
+	}
+}
+
+void AABBTree::markNodeForRefit(TreeNodeIndex nodeIndex)
+{
+	if(!mRefitBitmask.getBits())
+		mRefitBitmask.init(mTotalNbNodes);
+
+	PX_ASSERT(nodeIndex<mTotalNbNodes);
+
+	// PT: lazy-create parent array. Memory is not wasted for purely static trees, or dynamic trees that only do "full refit".
+	if(!mParentIndices)
+	{
+		mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices"));
+		_createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool);
+	}
+
+	PxU32 currentIndex = nodeIndex;
+	while(1)
+	{
+		PX_ASSERT(currentIndex<mTotalNbNodes);
+		if(mRefitBitmask.isSet(currentIndex))
+		{
+			// We can early exit if we already visited the node!
+			return;
+		}
+		else
+		{
+			mRefitBitmask.setBit(currentIndex);
+			const PxU32 currentMarkedWord = currentIndex>>5;
+			mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+
+			const PxU32 parentIndex = mParentIndices[currentIndex];			
+			PX_ASSERT(parentIndex == 0 || parentIndex < currentIndex);
+			if(currentIndex == parentIndex)
+				break;
+			currentIndex = parentIndex;
+		}
+	}
+}
+
+#define FIRST_VERSION
+#ifdef FIRST_VERSION
+void AABBTree::refitMarkedNodes(const PxBounds3* boxes)
+{
+	if(!mRefitBitmask.getBits())
+		return;	// No refit needed
+
+	{
+		/*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits());
+		PxU32 size = mRefitHighestSetWord+1;
+#ifdef _DEBUG
+		if(1)
+		{
+			const PxU32 totalSize = mRefitBitmask.getSize();
+			for(PxU32 i=size;i<totalSize;i++)
+			{
+				PX_ASSERT(!bits[i]);
+			}
+		}
+		PxU32 nbRefit=0;
+#endif
+		const PxU32* indices = mIndices;
+		AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+
+		while(size--)
+		{
+			// Test 32 bits at a time
+			const PxU32 currentBits = bits[size];
+			if(!currentBits)
+				continue;
+
+			PxU32 index = (size+1)<<5;
+			PxU32 mask = PxU32(1<<((index-1)&31));
+			PxU32 _Count=32;
+			while(_Count--)
+			{
+				index--;
+				Ps::prefetch(nodeBase + index);
+
+				PX_ASSERT(size==index>>5);
+				PX_ASSERT(mask==PxU32(1<<(index&31)));
+				if(currentBits & mask)
+				{
+					refitNode(nodeBase + index, boxes, indices, nodeBase);
+#ifdef _DEBUG
+					nbRefit++;
+#endif
+				}
+				mask>>=1;
+			}
+			bits[size] = 0;
+		}
+
+		mRefitHighestSetWord = 0;
+//		mRefitBitmask.clearAll();
+	}
+}
+#endif
+
+
+//#define SECOND_VERSION
+#ifdef SECOND_VERSION
+void AABBTree::refitMarkedNodes(const PxBounds3* boxes)
+{
+	/*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits());
+	if(!bits)
+		return;	// No refit needed
+
+	const PxU32 lastSetBit = mRefitBitmask.findLast();
+
+	const PxU32* indices = mIndices;
+	AABBTreeRuntimeNode* const nodeBase = mRuntimePool;
+
+	for(PxU32 w = 0; w <= lastSetBit >> 5; ++w)
+	{
+		for(PxU32 b = bits[w]; b; b &= b-1)
+		{
+			const PxU32 index = (PxU32)(w<<5|Ps::lowestSetBit(b));
+
+
+
+		while(size--)
+		{
+			// Test 32 bits at a time
+			const PxU32 currentBits = bits[size];
+			if(!currentBits)
+				continue;
+
+			PxU32 index = (size+1)<<5;
+			PxU32 mask = PxU32(1<<((index-1)&31));
+			PxU32 _Count=32;
+			while(_Count--)
+			{
+				index--;
+				Ps::prefetch(nodeBase + index);
+
+				PX_ASSERT(size==index>>5);
+				PX_ASSERT(mask==PxU32(1<<(index&31)));
+				if(currentBits & mask)
+				{
+					refitNode(nodeBase + index, boxes, indices, nodeBase);
+#ifdef _DEBUG
+					nbRefit++;
+#endif
+				}
+				mask>>=1;
+			}
+			bits[size] = 0;
+		}
+		mRefitHighestSetWord = 0;
+//		mRefitBitmask.clearAll();
+	}
+}
+#endif
+
+PX_FORCE_INLINE static void setLeafData(PxU32& leafData, const AABBTreeRuntimeNode& node, const PxU32 indicesOffset)
+{
+	const PxU32 index = indicesOffset + (node.mData >> 5);
+	const PxU32 nbPrims = node.getNbPrimitives();
+	PX_ASSERT(nbPrims <= 16);
+	leafData = (index << 5) | ((nbPrims & 15) << 1) | 1;
+}
+
+// Copy the tree into nodes. Update node indices, leaf indices.
+void AABBTree::addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& treeParams)
+{
+	PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1);	
+	const PxU32 baseNodeIndex = nodeIndex;	
+
+	// copy the src tree into dest tree nodes, update its data
+	for (PxU32 i = 0; i < treeParams.mNbNodes; i++)
+	{
+		PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes  + 1);
+		mRuntimePool[nodeIndex].mBV = treeParams.mNodes[i].mBV;
+		if (treeParams.mNodes[i].isLeaf())
+		{
+			setLeafData(mRuntimePool[nodeIndex].mData, treeParams.mNodes[i], mNbIndices);
+		}
+		else
+		{
+			const PxU32 srcNodeIndex = baseNodeIndex + (treeParams.mNodes[i].getPosIndex());
+			mRuntimePool[nodeIndex].mData = srcNodeIndex << 1;
+			mParentIndices[srcNodeIndex] = nodeIndex;
+			mParentIndices[srcNodeIndex + 1] = nodeIndex;
+		}
+		nodeIndex++;
+	}
+}
+
+// Merge tree into targetNode, where target node is a leaf
+// 1. Allocate new nodes/parent, copy all the nodes/parents
+// 2. Create new node at the end, copy the data from target node	
+// 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+// Schematic view:
+// Target Nodes: ...Tn...
+// Input tree: R1->Rc0, Rc1...
+// Merged tree: ...Tnc->...->Nc0,R1->Rc0,Rc1...
+//		where new node:		Nc0==Tn and Tnc is not a leaf anymore and points to Nc0
+
+void AABBTree::mergeRuntimeLeaf(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex)
+{
+	PX_ASSERT(mParentIndices);
+	PX_ASSERT(targetNode.isLeaf());	
+
+	// 1. Allocate new nodes/parent, copy all the nodes/parents
+	// allocate new runtime pool with max combine number of nodes
+	// we allocate only 1 additional node each merge
+	AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1];
+	PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"));
+
+	// copy the whole target nodes, we will add the new node at the end together with the merge tree
+	PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes));
+	PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(mTotalNbNodes));
+
+	// 2. Create new node at the end, copy the data from target node	
+	PxU32 nodeIndex = mTotalNbNodes;	
+	// copy the targetNode at the end of the new nodes
+	newRuntimePool[nodeIndex].mBV = targetNode.mBV;
+	newRuntimePool[nodeIndex].mData = targetNode.mData;
+	// update the parent information
+	newParentIndices[nodeIndex] = targetMergeNodeIndex;
+
+	// mark for refit
+	if (mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex))
+	{
+		mRefitBitmask.setBit(nodeIndex);
+		const PxU32 currentMarkedWord = nodeIndex >> 5;
+		mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+	}
+
+	// swap pointers
+	PX_DELETE_ARRAY(mRuntimePool);
+	mRuntimePool = newRuntimePool;
+	PX_FREE(mParentIndices);
+	mParentIndices = newParentIndices;
+
+	// 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+	nodeIndex++;
+	addRuntimeChilds(nodeIndex, treeParams);
+	PX_ASSERT(nodeIndex == mTotalNbNodes + 1 + treeParams.mNbNodes);	
+
+	// update the parent information for the input tree root node
+	mParentIndices[mTotalNbNodes + 1] = targetMergeNodeIndex;
+
+	// fix the child information for the target node, was a leaf before
+	mRuntimePool[targetMergeNodeIndex].mData = mTotalNbNodes << 1;
+
+	// update the total number of nodes
+	mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes;
+}
+
+// Merge tree into targetNode, where target node is not a leaf
+// 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex
+// 2. Create new node , copy the data from target node	
+// 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes
+// 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+// 5. Go through the nodes copied at the end and fix the parents/childs
+// Schematic view:
+// Target Nodes: ...Tn->...->Tc0,Tc1...
+// Input tree: R1->Rc0, Rc1...
+// Merged tree: ...Tn->...->Nc0,R1->Rc0,Rc1...,Tc0,Tc1...       
+//		where new node:		Nc0->...->Tc0,Tc1
+void AABBTree::mergeRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex)
+{
+	PX_ASSERT(mParentIndices);	
+	PX_ASSERT(!targetNode.isLeaf());
+
+	// Get the target node child pos, this is where we insert the new node and the input tree
+	const PxU32 targetNodePosIndex = targetNode.getPosIndex();
+
+	// 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex
+	// allocate new runtime pool with max combine number of nodes
+	// we allocate only 1 additional node each merge
+	AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1];
+	PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices"));
+	// copy the untouched part of the nodes and parents
+	PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(targetNodePosIndex));
+	PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(targetNodePosIndex));
+
+	PxU32 nodeIndex = targetNodePosIndex;
+	// 2. Create new node , copy the data from target node	
+	newRuntimePool[nodeIndex].mBV = targetNode.mBV;
+	newRuntimePool[nodeIndex].mData = ((targetNode.mData >> 1) + 1 + treeParams.mNbNodes) << 1;
+	// update parent information
+	newParentIndices[nodeIndex] = targetMergeNodeIndex;
+
+	// handle mark for refit
+	if(mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex))
+	{
+		mRefitBitmask.setBit(nodeIndex);
+		const PxU32 currentMarkedWord = nodeIndex >> 5;
+		mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord);
+	}
+
+	// 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes
+	if(mTotalNbNodes - targetNodePosIndex)
+	{
+		PX_ASSERT(mTotalNbNodes - targetNodePosIndex > 0);
+		PxMemCopy(newRuntimePool + targetNodePosIndex + 1 + treeParams.mNbNodes, mRuntimePool + targetNodePosIndex, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes - targetNodePosIndex));
+		PxMemCopy(newParentIndices + targetNodePosIndex + 1 + treeParams.mNbNodes, mParentIndices + targetNodePosIndex, sizeof(PxU32)*(mTotalNbNodes - targetNodePosIndex));
+	}
+	// swap the pointers, release the old memory
+	PX_DELETE_ARRAY(mRuntimePool);
+	mRuntimePool = newRuntimePool;
+	PX_FREE(mParentIndices);
+	mParentIndices = newParentIndices;
+
+	// 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices
+	nodeIndex++;
+	addRuntimeChilds(nodeIndex, treeParams);
+	PX_ASSERT(nodeIndex == targetNodePosIndex + 1 + treeParams.mNbNodes);
+	// update the total number of nodes
+	mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes;	
+
+	// update the parent information for the input tree root node
+	mParentIndices[targetNodePosIndex + 1] = targetMergeNodeIndex;
+	
+	// 5. Go through the nodes copied at the end and fix the parents/childs
+	for (PxU32 i = targetNodePosIndex + 1 + treeParams.mNbNodes; i < mTotalNbNodes; i++)
+	{
+		// check if the parent is the targetNode, if yes update the parent to new node
+		if(mParentIndices[i] == targetMergeNodeIndex)
+		{
+			mParentIndices[i] = targetNodePosIndex;
+		}
+		else
+		{
+			// if parent node has been moved, update the parent node
+			if(mParentIndices[i] >= targetNodePosIndex)
+			{
+				mParentIndices[i] = mParentIndices[i] + 1 + treeParams.mNbNodes;
+			}
+			else
+			{
+				// if parent has not been moved, update its child information
+				const PxU32 parentIndex = mParentIndices[i];
+				// update the child information to point to Pos child
+				if(i % 2 != 0)
+				{
+					const PxU32 srcNodeIndex = mRuntimePool[parentIndex].getPosIndex();
+					// if child index points to a node that has been moved, update the child index
+					PX_ASSERT(!mRuntimePool[parentIndex].isLeaf());
+					PX_ASSERT(srcNodeIndex > targetNodePosIndex);
+					mRuntimePool[parentIndex].mData = (1 + treeParams.mNbNodes + srcNodeIndex) << 1;
+				}
+			}
+		}
+		if(!mRuntimePool[i].isLeaf())
+		{
+			// update the child node index
+			const PxU32 srcNodeIndex = 1 + treeParams.mNbNodes + mRuntimePool[i].getPosIndex();
+			mRuntimePool[i].mData = srcNodeIndex << 1;
+		}
+	}
+}
+
+// traverse the target node, the tree is inside the targetNode, and find the best place where merge the tree
+void AABBTree::traverseRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 nodeIndex)
+{
+	const AABBTreeRuntimeNode& srcNode = treeParams.getRootNode();
+	PX_ASSERT(srcNode.mBV.isInside(targetNode.mBV));
+
+	// Check if the srcNode(tree) can fit inside any of the target childs. If yes, traverse the target tree child
+	AABBTreeRuntimeNode& targetPosChild = *targetNode.getPos(mRuntimePool);	
+	if(srcNode.mBV.isInside(targetPosChild.mBV))
+	{
+		return traverseRuntimeNode(targetPosChild, treeParams, targetNode.getPosIndex());		
+	}
+
+	AABBTreeRuntimeNode& targetNegChild = *targetNode.getNeg(mRuntimePool);
+	if (srcNode.mBV.isInside(targetNegChild.mBV))
+	{
+		return traverseRuntimeNode(targetNegChild, treeParams, targetNode.getNegIndex());		
+	}
+
+	// we cannot traverse target anymore, lets add the srcTree to current target node
+	if(targetNode.isLeaf())
+		mergeRuntimeLeaf(targetNode, treeParams, nodeIndex);
+	else
+		mergeRuntimeNode(targetNode, treeParams, nodeIndex);	
+}
+
+// Merge the input tree into current tree.
+// Traverse the tree and find the smallest node, where the whole new tree fits. When we find the node 
+// we create one new node pointing to the original children and the to the input tree root.
+void AABBTree::mergeTree(const AABBTreeMergeData& treeParams)
+{ 
+	// allocate new indices buffer 	
+	PxU32* newIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mNbIndices + treeParams.mNbIndices), "AABB tree indices"));
+	PxMemCopy(newIndices, mIndices, sizeof(PxU32)*mNbIndices);
+	PX_FREE(mIndices);
+	mIndices = newIndices;
+	mTotalPrims += treeParams.mNbIndices;
+
+	// copy the new indices, re-index using the provided indicesOffset. Note that indicesOffset 
+	// must be provided, as original mNbIndices can be different than indicesOffset dues to object releases.	
+	for (PxU32 i = 0; i < treeParams.mNbIndices; i++)
+	{
+		mIndices[mNbIndices + i] = treeParams.mIndicesOffset + treeParams.mIndices[i];
+	}	
+
+	// check the mRefitBitmask if we fit all the new nodes
+	mRefitBitmask.resize(mTotalNbNodes + treeParams.mNbNodes + 1);	
+
+	// create the parent information so we can update it
+	if(!mParentIndices)
+	{
+		mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices"));
+		_createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool);
+	}		
+	
+	// if new tree is inside the root AABB we will traverse the tree to find better node where to attach the tree subnodes
+	// if the root is a leaf we merge with the root. 		
+	if(treeParams.getRootNode().mBV.isInside(mRuntimePool[0].mBV) && !mRuntimePool[0].isLeaf())
+	{
+		traverseRuntimeNode(mRuntimePool[0], treeParams, 0);
+	}
+	else
+	{				
+		if(mRuntimePool[0].isLeaf())
+		{			
+			mergeRuntimeLeaf(mRuntimePool[0], treeParams, 0);
+		}
+		else		
+		{			
+			mergeRuntimeNode(mRuntimePool[0], treeParams, 0);		
+		}
+
+		// increase the tree root AABB
+		mRuntimePool[0].mBV.include(treeParams.getRootNode().mBV);
+	}
+
+#ifdef _DEBUG
+	//verify parent indices
+	for (PxU32 i = 0; i < mTotalNbNodes; i++)
+	{
+		if (i)
+		{
+			PX_ASSERT(mRuntimePool[mParentIndices[i]].getPosIndex() == i || mRuntimePool[mParentIndices[i]].getNegIndex() == i);
+		}
+		if (!mRuntimePool[i].isLeaf())
+		{
+			PX_ASSERT(mParentIndices[mRuntimePool[i].getPosIndex()] == i);
+			PX_ASSERT(mParentIndices[mRuntimePool[i].getNegIndex()] == i);
+		}
+	}
+
+	// verify the tree nodes, leafs
+	for (PxU32 i = 0; i < mTotalNbNodes; i++)
+	{
+		if (mRuntimePool[i].isLeaf())
+		{
+			const PxU32 index = mRuntimePool[i].mData >> 5;
+			const PxU32 nbPrim = mRuntimePool[i].getNbPrimitives();
+			PX_ASSERT(index + nbPrim <= mNbIndices + treeParams.mNbIndices);
+		}
+		else
+		{
+			const PxU32 nodeIndex = (mRuntimePool[i].getPosIndex());
+			PX_ASSERT(nodeIndex < mTotalNbNodes);
+		}
+	}
+#endif // _DEBUG
+
+	mNbIndices += treeParams.mNbIndices;
+}
+
+
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h
new file mode 100644
index 00000000..0962747b
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h
@@ -0,0 +1,364 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_AABBTREE_H
+#define SQ_AABBTREE_H
+
+#include "foundation/PxMemory.h"
+#include "foundation/PxBounds3.h"
+#include "PsUserAllocated.h"
+#include "PsVecMath.h"
+#include "SqTypedef.h"
+#include "PsArray.h"
+
+namespace physx
+{
+
+using namespace shdfnd::aos;
+
+namespace Sq
+{
+	class AABBTreeUpdateMap;
+
+	typedef Ps::Pair<PxU32, PxU32>		TreeMergePair;
+	typedef Ps::Array<TreeMergePair >	TreeMergeMap;
+
+	class BitArray
+	{
+		public:
+										BitArray() : mBits(NULL), mSize(0) {}
+										BitArray(PxU32 nb_bits) { init(nb_bits); }
+										~BitArray() { PX_FREE_AND_RESET(mBits); mBits = NULL; }
+
+						bool			init(PxU32 nb_bits);
+
+		// Data management
+		PX_FORCE_INLINE	void			setBit(PxU32 bit_number)
+										{
+											mBits[bit_number>>5] |= 1<<(bit_number&31);
+										}
+		PX_FORCE_INLINE	void			clearBit(PxU32 bit_number)
+										{
+											mBits[bit_number>>5] &= ~(1<<(bit_number&31));
+										}
+		PX_FORCE_INLINE	void			toggleBit(PxU32 bit_number)
+										{
+											mBits[bit_number>>5] ^= 1<<(bit_number&31);
+										}
+
+		PX_FORCE_INLINE	void			clearAll()			{ PxMemZero(mBits, mSize*4);		}
+		PX_FORCE_INLINE	void			setAll()			{ PxMemSet(mBits, 0xff, mSize*4);	}
+
+						void			resize(PxU32 maxBitNumber);
+
+		// Data access
+		PX_FORCE_INLINE	Ps::IntBool		isSet(PxU32 bit_number)	const
+										{
+											return Ps::IntBool(mBits[bit_number>>5] & (1<<(bit_number&31)));
+										}
+
+		PX_FORCE_INLINE	const PxU32*	getBits()	const	{ return mBits;		}
+		PX_FORCE_INLINE	PxU32			getSize()	const	{ return mSize;		}
+
+		protected:
+						PxU32*			mBits;		//!< Array of bits
+						PxU32			mSize;		//!< Size of the array in dwords
+	};
+
+	//! Contains AABB-tree build statistics
+	struct BuildStats
+	{
+								BuildStats() : mCount(0), mTotalPrims(0)	{}
+
+						PxU32	mCount;			//!< Number of nodes created
+						PxU32	mTotalPrims;	//!< Total accumulated number of primitives. Should be much higher than the source
+												//!< number of prims, since it accumulates all prims covered by each node (i.e. internal
+												//!< nodes too, not just leaf ones)
+
+		PX_FORCE_INLINE	void	reset()							{ mCount = mTotalPrims = 0;	}
+
+		PX_FORCE_INLINE	void	setCount(PxU32 nb)				{ mCount=nb;				}
+		PX_FORCE_INLINE	void	increaseCount(PxU32 nb)			{ mCount+=nb;				}
+		PX_FORCE_INLINE	PxU32	getCount()				const	{ return mCount;			}
+	};
+
+	//! Contains AABB-tree build parameters
+	class AABBTreeBuildParams : public Ps::UserAllocated
+	{
+		public:
+								AABBTreeBuildParams(PxU32 limit=1, PxU32 nb_prims=0, const PxBounds3* boxes=NULL) :
+									mLimit(limit), mNbPrimitives(nb_prims), mAABBArray(boxes), mCache(NULL)	{}
+								~AABBTreeBuildParams()
+								{
+									reset();
+								}
+
+		PX_FORCE_INLINE	void	reset()
+								{
+									mLimit = mNbPrimitives = 0;
+									mAABBArray = NULL;
+									PX_FREE_AND_RESET(mCache);
+								}
+
+								PxU32			mLimit;			//!< Limit number of primitives / node. If limit is 1, build a complete tree (2*N-1 nodes)
+								PxU32			mNbPrimitives;	//!< Number of (source) primitives.
+						const	PxBounds3*		mAABBArray;		//!< Shortcut to an app-controlled array of AABBs.
+								PxVec3*			mCache;			//!< Cache for AABB centers - managed by build code.
+	};
+
+	class NodeAllocator;
+
+	//! AABB tree node used for building
+	class AABBTreeBuildNode : public Ps::UserAllocated
+	{
+		public:
+		PX_FORCE_INLINE								AABBTreeBuildNode()			{}
+		PX_FORCE_INLINE								~AABBTreeBuildNode()		{}
+
+		PX_FORCE_INLINE	const PxBounds3&			getAABB()							const	{ return mBV;												}
+		PX_FORCE_INLINE	const AABBTreeBuildNode*	getPos()							const	{ return mPos;												}
+		PX_FORCE_INLINE	const AABBTreeBuildNode*	getNeg()							const	{ const AABBTreeBuildNode* P = mPos; return P ? P+1 : NULL;	}
+
+		PX_FORCE_INLINE	bool						isLeaf()							const	{ return !getPos();			}
+
+						PxBounds3					mBV;	//!< Global bounding-volume enclosing all the node-related primitives
+						const AABBTreeBuildNode*	mPos;	//!< "Positive" & "Negative" children
+
+						PxU32						mNodeIndex;			//!< Index of node-related primitives (in the tree's mIndices array)
+						PxU32						mNbPrimitives;		//!< Number of primitives for this node
+
+		// Data access
+		PX_FORCE_INLINE	PxU32						getNbPrimitives()					const	{ return mNbPrimitives;		}
+
+		PX_FORCE_INLINE	PxU32						getNbRuntimePrimitives()			const	{ return mNbPrimitives;		}
+		PX_FORCE_INLINE void						setNbRunTimePrimitives(PxU32 val)			{ mNbPrimitives = val;		}
+		PX_FORCE_INLINE	const PxU32*				getPrimitives(const PxU32* base)	const	{ return base+mNodeIndex;	}
+		PX_FORCE_INLINE	PxU32*						getPrimitives(PxU32* base)					{ return base+mNodeIndex;	}
+
+		// Internal methods
+						void						subdivide(const AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices);
+						void						_buildHierarchy(AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices);
+	};
+
+	//! AABB tree node used for runtime (smaller than for build)
+	class AABBTreeRuntimeNode : public Ps::UserAllocated
+	{
+		public:
+		PX_FORCE_INLINE								AABBTreeRuntimeNode()		{}
+		PX_FORCE_INLINE								~AABBTreeRuntimeNode()		{}
+
+		PX_FORCE_INLINE	PxU32						isLeaf()								const	{ return mData&1;			}
+
+		PX_FORCE_INLINE	const PxU32*				getPrimitives(const PxU32* base)		const	{ return base + (mData>>5);	}
+		PX_FORCE_INLINE	PxU32*						getPrimitives(PxU32* base)						{ return base + (mData>>5);	}
+		PX_FORCE_INLINE	PxU32						getNbPrimitives()						const	{ return (mData>>1)&15;		}
+
+		PX_FORCE_INLINE	PxU32						getPosIndex()							const	{ return mData>>1;			}
+		PX_FORCE_INLINE	PxU32						getNegIndex()							const	{ return (mData>>1) + 1;			}
+		PX_FORCE_INLINE	const AABBTreeRuntimeNode*	getPos(const AABBTreeRuntimeNode* base)	const	{ return base + (mData>>1);	}
+		PX_FORCE_INLINE	const AABBTreeRuntimeNode*	getNeg(const AABBTreeRuntimeNode* base)	const	{ const AABBTreeRuntimeNode* P = getPos(base); return P ? P+1 : NULL;}
+
+		PX_FORCE_INLINE	AABBTreeRuntimeNode*		getPos(AABBTreeRuntimeNode* base)				{ return base + (mData >> 1); }
+		PX_FORCE_INLINE	AABBTreeRuntimeNode*		getNeg(AABBTreeRuntimeNode* base)				{ AABBTreeRuntimeNode* P = getPos(base); return P ? P + 1 : NULL; }
+
+		PX_FORCE_INLINE	PxU32						getNbRuntimePrimitives()				const	{ return (mData>>1)&15;		}
+		PX_FORCE_INLINE void						setNbRunTimePrimitives(PxU32 val)
+													{
+														PX_ASSERT(val<16);
+														PxU32 data = mData & ~(15<<1);
+														data |= val<<1;
+														mData = data;
+													}
+
+		PX_FORCE_INLINE	void						getAABBCenterExtentsV(Vec3V* center, Vec3V* extents) const
+													{
+														const Vec4V minV = V4LoadU(&mBV.minimum.x);
+														const Vec4V maxV = V4LoadU(&mBV.maximum.x);
+
+														const float half = 0.5f;
+														const FloatV halfV = FLoad(half);
+
+														*extents = Vec3V_From_Vec4V(V4Scale(V4Sub(maxV, minV), halfV));
+														*center = Vec3V_From_Vec4V(V4Scale(V4Add(maxV, minV), halfV));
+													}
+
+		PX_FORCE_INLINE	void						getAABBCenterExtentsV2(Vec3V* center, Vec3V* extents) const
+													{
+														const Vec4V minV = V4LoadU(&mBV.minimum.x);
+														const Vec4V maxV = V4LoadU(&mBV.maximum.x);
+
+														*extents = Vec3V_From_Vec4V(V4Sub(maxV, minV));
+														*center = Vec3V_From_Vec4V(V4Add(maxV, minV));
+													}
+
+		PX_FORCE_INLINE	void						getAABBMinMaxV(Vec4V* minV, Vec4V* maxV) const
+													{
+														*minV = V4LoadU(&mBV.minimum.x);
+														*maxV = V4LoadU(&mBV.maximum.x);
+													}
+
+						PxBounds3					mBV;	// Global bounding-volume enclosing all the node-related primitives
+						PxU32						mData;	// 27 bits node or prim index|4 bits #prims|1 bit leaf
+	};
+
+	//! Contains AABB-tree merge parameters
+	class AABBTreeMergeData
+	{
+	public:
+		AABBTreeMergeData(PxU32 nbNodes, const AABBTreeRuntimeNode* nodes, PxU32 nbIndices, const PxU32* indices, PxU32 indicesOffset) :
+			mNbNodes(nbNodes), mNodes(nodes), mNbIndices(nbIndices), mIndices(indices), mIndicesOffset(indicesOffset)
+		{
+		}
+
+		~AABBTreeMergeData()		{}
+
+		PX_FORCE_INLINE const AABBTreeRuntimeNode& getRootNode() const { return mNodes[0]; }
+
+	public:
+		PxU32			mNbNodes;		//!< Number of nodes of AABB tree merge
+		const AABBTreeRuntimeNode*	mNodes;	//!< Nodes of AABB tree merge
+
+		PxU32			mNbIndices;		//!< Number of indices of AABB tree merge
+		const PxU32*	mIndices;		//!< Indices of AABB tree merge
+
+		PxU32			mIndicesOffset;	//!< Indices offset from pruning pool
+	};
+
+	// Progressive building
+	class FIFOStack;
+	//~Progressive building
+
+	//! For complete trees we can predict the final number of nodes and preallocate them. For incomplete trees we can't.
+	//! But we don't want to allocate nodes one by one (which would be quite slow), so we use this helper class to
+	//! allocate N nodes at once, while minimizing the amount of nodes allocated for nothing. An initial amount of
+	//! nodes is estimated using the max number for a complete tree, and the user-defined number of primitives per leaf.
+	//! In ideal cases this estimated number will be quite close to the final number of nodes. When that number is not
+	//! enough though, slabs of N=1024 extra nodes are allocated until the build is complete.
+	class NodeAllocator : public Ps::UserAllocated
+	{
+		public:
+													NodeAllocator();
+													~NodeAllocator();
+
+						void						release();
+						void						init(PxU32 nbPrimitives, PxU32 limit);
+						void						flatten(AABBTreeRuntimeNode* dest);
+						AABBTreeBuildNode*			getBiNode();
+
+						AABBTreeBuildNode*			mPool;
+
+						struct Slab
+						{
+							PX_FORCE_INLINE	Slab()	{}
+							PX_FORCE_INLINE	Slab(AABBTreeBuildNode* pool, PxU32 nbUsedNodes, PxU32 maxNbNodes) : mPool(pool), mNbUsedNodes(nbUsedNodes), mMaxNbNodes(maxNbNodes)	{}
+							AABBTreeBuildNode*		mPool;
+							PxU32					mNbUsedNodes;
+							PxU32					mMaxNbNodes;
+						};
+						Ps::Array<Slab>				mSlabs;
+						PxU32						mCurrentSlabIndex;
+						PxU32						mTotalNbNodes;
+	};
+
+	//! AABB-tree, N primitives/leaf
+	class AABBTree : public Ps::UserAllocated
+	{
+		public:
+													AABBTree();													
+													~AABBTree();
+		// Build
+						bool						build(AABBTreeBuildParams& params);
+		// Progressive building
+						PxU32						progressiveBuild(AABBTreeBuildParams& params, BuildStats& stats, PxU32 progress, PxU32 limit);
+		//~Progressive building
+						void						release(bool clearRefitMap=true);
+
+		// Merge tree with another one
+						void						mergeTree(const AABBTreeMergeData& tree);
+		// Initialize tree from given merge data
+						void						initTree(const AABBTreeMergeData& tree);
+
+		// Data access
+		PX_FORCE_INLINE	const PxU32*				getIndices()		const	{ return mIndices;		}
+		PX_FORCE_INLINE	PxU32*						getIndices()				{ return mIndices;		}
+		PX_FORCE_INLINE	void						setIndices(PxU32* indices)	{ mIndices = indices;	}
+		PX_FORCE_INLINE	PxU32						getNbNodes()		const	{ return mTotalNbNodes;	}
+		PX_FORCE_INLINE	const AABBTreeRuntimeNode*	getNodes()			const	{ return mRuntimePool;	}
+		PX_FORCE_INLINE	AABBTreeRuntimeNode*		getNodes()					{ return mRuntimePool;	}		
+		PX_FORCE_INLINE	void						setNodes(AABBTreeRuntimeNode* nodes) { mRuntimePool = nodes;	}		
+		PX_FORCE_INLINE	PxU32						getTotalPrims()		const	{ return mTotalPrims;	}
+
+#if PX_DEBUG 
+						void						validate()			const;
+#endif
+						void						shiftOrigin(const PxVec3& shift);
+
+		// Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool
+						void						shiftIndices(PxU32 offset);
+				
+		private:
+						PxU32*						mIndices;			//!< Indices in the app list. Indices are reorganized during build (permutation).
+						PxU32						mNbIndices;			//!< Nb indices
+						AABBTreeRuntimeNode*		mRuntimePool;		//!< Linear pool of nodes.
+						NodeAllocator				mNodeAllocator;
+						PxU32*						mParentIndices;		//!< PT: hot/cold split, keep parent data in separate array
+		// Stats
+						PxU32						mTotalNbNodes;		//!< Number of nodes in the tree.
+						PxU32						mTotalPrims;		//!< Copy of final BuildStats::mTotalPrims
+
+	// Progressive building
+						FIFOStack*					mStack;
+	//~Progressive building
+						bool						buildInit(AABBTreeBuildParams& params, BuildStats& stats);
+						void						buildEnd(AABBTreeBuildParams& params, BuildStats& stats);
+
+		// tree merge							
+						void						mergeRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 targetNodeIndex);
+						void						mergeRuntimeLeaf(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 targetNodeIndex);
+						void						addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& tree);
+						void						traverseRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 nodeIndex);
+		// REFIT
+		public:
+						void						fullRefit(const PxBounds3* boxes);
+
+		// adds node[index] to a list of nodes to refit when refitMarkedNodes is called
+		// Note that this includes updating the hierarchy up the chain
+						void						markNodeForRefit(TreeNodeIndex nodeIndex);
+						void						refitMarkedNodes(const PxBounds3* boxes);
+		private:
+						BitArray					mRefitBitmask; //!< bit is set for each node index in markForRefit
+						PxU32						mRefitHighestSetWord;
+		//~REFIT
+	};
+
+} // namespace Sq
+
+}
+
+#endif // SQ_AABBTREE_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h
new file mode 100644
index 00000000..299d8993
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h
@@ -0,0 +1,234 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_AABBTREEQUERY_H
+#define SQ_AABBTREEQUERY_H
+
+#include "SqAABBTree.h"
+#include "SqPrunerTestsSIMD.h"
+
+namespace physx
+{
+	namespace Sq
+	{
+		#define RAW_TRAVERSAL_STACK_SIZE 256
+
+		//////////////////////////////////////////////////////////////////////////
+
+		static PX_FORCE_INLINE void getBoundsTimesTwo(Vec4V& center, Vec4V& extents, const PxBounds3* boxes, PoolIndex poolIndex)
+		{
+			const PxBounds3* objectBounds = boxes + poolIndex;
+
+			const Vec4V minV = V4LoadU(&objectBounds->minimum.x);
+			const Vec4V maxV = V4LoadU(&objectBounds->maximum.x);
+
+			center = V4Add(maxV, minV);
+			extents = V4Sub(maxV, minV);
+		}
+
+		//////////////////////////////////////////////////////////////////////////
+
+		template<typename Test>
+		class AABBTreeOverlap
+		{
+		public:
+			bool operator()(const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree, const Test& test, PrunerCallback& visitor)
+			{
+				using namespace Cm;
+
+				const AABBTreeRuntimeNode* stack[RAW_TRAVERSAL_STACK_SIZE];
+				const AABBTreeRuntimeNode* const nodeBase = tree.getNodes();
+				stack[0] = nodeBase;
+				PxU32 stackIndex = 1;
+
+				while (stackIndex > 0)
+				{
+					const AABBTreeRuntimeNode* node = stack[--stackIndex];
+					Vec3V center, extents;
+					node->getAABBCenterExtentsV(&center, &extents);
+					while (test(center, extents))
+					{
+						if (node->isLeaf())
+						{
+							PxU32 nbPrims = node->getNbPrimitives();
+							const bool doBoxTest = nbPrims > 1;
+							const PxU32* prims = node->getPrimitives(tree.getIndices());
+							while (nbPrims--)
+							{
+								const PxU32* prunableIndex = prims;
+								prims++;
+
+								const PoolIndex poolIndex = *prunableIndex;
+								if (doBoxTest)
+								{
+									Vec4V center2, extents2;
+									getBoundsTimesTwo(center2, extents2, boxes, poolIndex);
+
+									const float half = 0.5f;
+									const FloatV halfV = FLoad(half);
+
+									const Vec4V extents_ = V4Scale(extents2, halfV);
+									const Vec4V center_ = V4Scale(center2, halfV);
+
+									if (!test(Vec3V_From_Vec4V(center_), Vec3V_From_Vec4V(extents_)))
+										continue;
+								}
+
+								PxReal unusedDistance;
+								if (!visitor.invoke(unusedDistance, objects[poolIndex]))
+									return false;
+							}
+							break;
+						}
+
+						const AABBTreeRuntimeNode* children = node->getPos(nodeBase);
+
+						node = children;
+						stack[stackIndex++] = children + 1;
+						PX_ASSERT(stackIndex < RAW_TRAVERSAL_STACK_SIZE);
+						node->getAABBCenterExtentsV(&center, &extents);
+					}
+				}
+				return true;
+			}
+		};
+
+		//////////////////////////////////////////////////////////////////////////
+
+		template <bool tInflate> // use inflate=true for sweeps, inflate=false for raycasts
+		static PX_FORCE_INLINE bool doLeafTest(const AABBTreeRuntimeNode* node, Gu::RayAABBTest& test, PxReal& md, PxReal oldMaxDist,
+			const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree,
+			PxReal& maxDist, PrunerCallback& pcb)
+		{
+			PxU32 nbPrims = node->getNbPrimitives();
+			const bool doBoxTest = nbPrims > 1;
+			const PxU32* prims = node->getPrimitives(tree.getIndices());
+			while (nbPrims--)
+			{
+				const PxU32* prunableIndex = prims;
+				prims++;
+
+				const PoolIndex poolIndex = *prunableIndex;
+				if (doBoxTest)
+				{
+					Vec4V center_, extents_;
+					getBoundsTimesTwo(center_, extents_, boxes, poolIndex);
+
+					if (!test.check<tInflate>(Vec3V_From_Vec4V(center_), Vec3V_From_Vec4V(extents_)))
+						continue;
+				}
+
+				if (!pcb.invoke(md, objects[poolIndex]))
+					return false;
+
+				if (md < oldMaxDist)
+				{
+					maxDist = md;
+					test.setDistance(md);
+				}
+			}
+			return true;
+		}
+
+		//////////////////////////////////////////////////////////////////////////
+
+		template <bool tInflate> // use inflate=true for sweeps, inflate=false for raycasts
+		class AABBTreeRaycast
+		{
+		public:
+			bool operator()(
+				const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree,
+				const PxVec3& origin, const PxVec3& unitDir, PxReal& maxDist, const PxVec3& inflation,
+				PrunerCallback& pcb)
+			{
+				using namespace Cm;
+
+				// PT: we will pass center*2 and extents*2 to the ray-box code, to save some work per-box
+				// So we initialize the test with values multiplied by 2 as well, to get correct results
+				Gu::RayAABBTest test(origin*2.0f, unitDir*2.0f, maxDist, inflation*2.0f);
+
+				const AABBTreeRuntimeNode* stack[RAW_TRAVERSAL_STACK_SIZE]; // stack always contains PPU addresses
+				const AABBTreeRuntimeNode* const nodeBase = tree.getNodes();
+				stack[0] = nodeBase;
+				PxU32 stackIndex = 1;
+
+				PxReal oldMaxDist;
+				while (stackIndex--)
+				{
+					const AABBTreeRuntimeNode* node = stack[stackIndex];
+					Vec3V center, extents;
+					node->getAABBCenterExtentsV2(&center, &extents);
+					if (test.check<tInflate>(center, extents))	// TODO: try timestamp ray shortening to skip this
+					{
+						PxReal md = maxDist; // has to be before the goto below to avoid compile error
+						while (!node->isLeaf())
+						{
+							const AABBTreeRuntimeNode* children = node->getPos(nodeBase);
+
+							Vec3V c0, e0;
+							children[0].getAABBCenterExtentsV2(&c0, &e0);
+							const PxU32 b0 = test.check<tInflate>(c0, e0);
+
+							Vec3V c1, e1;
+							children[1].getAABBCenterExtentsV2(&c1, &e1);
+							const PxU32 b1 = test.check<tInflate>(c1, e1);
+
+							if (b0 && b1)	// if both intersect, push the one with the further center on the stack for later
+							{
+								// & 1 because FAllGrtr behavior differs across platforms
+								const PxU32 bit = FAllGrtr(V3Dot(V3Sub(c1, c0), test.mDir), FZero()) & 1;
+								stack[stackIndex++] = children + bit;
+								node = children + (1 - bit);
+								PX_ASSERT(stackIndex < RAW_TRAVERSAL_STACK_SIZE);
+							}
+							else if (b0)
+								node = children;
+							else if (b1)
+								node = children + 1;
+							else
+								goto skip_leaf_code;
+						}
+
+						oldMaxDist = maxDist; // we copy since maxDist can be updated in the callback and md<maxDist test below can fail
+
+						if (!doLeafTest<tInflate>(node, test, md, oldMaxDist,
+							objects, boxes, tree,
+							maxDist,
+							pcb))
+							return false;
+					skip_leaf_code:;
+					}
+				}
+				return true;
+			}
+		};
+	}
+}
+
+#endif   // SQ_AABBTREEQUERY_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp
new file mode 100644
index 00000000..807de9d1
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp
@@ -0,0 +1,197 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "SqAABBTreeUpdateMap.h"
+#include "SqAABBTree.h"
+
+using namespace physx;
+using namespace Sq;
+
+static const PxU32 SHRINK_THRESHOLD = 1024;
+
+void AABBTreeUpdateMap::initMap(PxU32 nbObjects, const AABBTree& tree)
+{
+	if(!nbObjects)	
+	{
+		release();
+		return;
+	}
+
+	// Memory management
+	{
+		const PxU32 mapSize = nbObjects;
+		const PxU32 targetCapacity = mapSize + (mapSize>>2);
+
+		PxU32 currentCapacity = mMapping.capacity();
+		if( ( targetCapacity < (currentCapacity>>1) ) && ( (currentCapacity-targetCapacity) > SHRINK_THRESHOLD ) )
+		{
+			// trigger reallocation of a smaller array, there is enough memory to save
+			currentCapacity = 0;
+		}
+
+		if(mapSize > currentCapacity)
+		{
+			// the mapping values are invalid and reset below in any case
+			// so there is no need to copy the values at all
+			mMapping.reset();
+			mMapping.reserve(targetCapacity);	// since size is 0, reserve will also just allocate
+		}
+
+		mMapping.forceSize_Unsafe(mapSize);
+
+		for(PxU32 i=0;i<mapSize;i++)
+			mMapping[i] = INVALID_NODE_ID;
+	}
+
+	const PxU32 nbNodes = tree.getNbNodes();
+	const AABBTreeRuntimeNode*	nodes = tree.getNodes();
+	const PxU32* indices = tree.getIndices();
+	for(TreeNodeIndex i=0;i<nbNodes;i++)
+	{
+		if(nodes[i].isLeaf())
+		{
+			const PxU32 nbPrims = nodes[i].getNbRuntimePrimitives();
+			// PT: with multiple primitives per node, several mapping entries will point to the same node.
+			PX_ASSERT(nbPrims<=16);
+			for(PxU32 j=0;j<nbPrims;j++)
+			{
+				const PxU32 index = nodes[i].getPrimitives(indices)[j];
+				PX_ASSERT(index<nbObjects);
+				mMapping[index] = i;
+			}
+		}
+	}
+}
+
+void AABBTreeUpdateMap::invalidate(PoolIndex prunerIndex0, PoolIndex prunerIndex1, AABBTree& tree)
+{
+	// prunerIndex0 and prunerIndex1 are both indices into the pool, not handles
+	// prunerIndex0 is the index in the pruning pool for the node that was just removed
+	// prunerIndex1 is the index in the pruning pool for the node
+	const TreeNodeIndex nodeIndex0 = prunerIndex0<mMapping.size() ? mMapping[prunerIndex0] : INVALID_NODE_ID;
+	const TreeNodeIndex nodeIndex1 = prunerIndex1<mMapping.size() ? mMapping[prunerIndex1] : INVALID_NODE_ID;
+
+	//printf("map invalidate pi0:%x ni0:%x\t",prunerIndex0,nodeIndex0);
+	//printf("  replace with pi1:%x ni1:%x\n",prunerIndex1,nodeIndex1);
+
+	// if nodeIndex0 exists:
+	//		invalidate node 0 
+	//		invalidate map prunerIndex0
+	// if nodeIndex1 exists: 
+	//		point node 1 to prunerIndex0
+	//		map prunerIndex0 to node 1
+	//		invalidate map prunerIndex1
+
+	// eventually: 
+	// - node 0 is invalid
+	// - prunerIndex0 is mapped to node 1 or
+	//		is not mapped if prunerIndex1 is not mapped
+	//		is not mapped if prunerIndex0==prunerIndex1
+	// - node 1 points to prunerIndex0 or  
+	//		is invalid if prunerIndex1 is not mapped
+	//		is invalid if prunerIndex0==prunerIndex1
+	// - prunerIndex1 is not mapped
+
+	AABBTreeRuntimeNode* nodes = tree.getNodes();
+
+	if(nodeIndex0!=INVALID_NODE_ID)
+	{
+		PX_ASSERT(nodeIndex0 < tree.getNbNodes());
+		PX_ASSERT(nodes[nodeIndex0].isLeaf());
+		AABBTreeRuntimeNode* node0 = nodes + nodeIndex0;
+		const PxU32 nbPrims = node0->getNbRuntimePrimitives();
+		PX_ASSERT(nbPrims <= 16);
+
+		// retrieve the primitives pointer
+		PxU32* primitives = node0->getPrimitives(tree.getIndices());
+		PX_ASSERT(primitives);
+
+		// PT: look for desired pool index in the leaf
+		bool foundIt = false;
+		for(PxU32 i=0;i<nbPrims;i++)
+		{
+			PX_ASSERT(mMapping[primitives[i]] == nodeIndex0); // PT: all primitives should point to the same leaf node
+
+			if(prunerIndex0 == primitives[i])
+			{
+				foundIt = true;
+				const PxU32 last = nbPrims-1;
+				node0->setNbRunTimePrimitives(last);
+				primitives[i] = INVALID_POOL_ID;			// Mark primitive index as invalid in the node
+				mMapping[prunerIndex0] = INVALID_NODE_ID;	// invalidate the node index for pool 0
+
+				// PT: swap within the leaf node. No need to update the mapping since they should all point
+				// to the same tree node anyway.
+				if(last!=i)
+					Ps::swap(primitives[i], primitives[last]);
+				break;
+			}
+		}
+		PX_ASSERT(foundIt);
+		PX_UNUSED(foundIt);
+	}
+
+	if (nodeIndex1!=INVALID_NODE_ID)
+	{
+		// PT: with multiple primitives per leaf, tree nodes may very well be the same for different pool indices.
+		// However the pool indices may be the same when a swap has been skipped in the pruning pool, in which
+		// case there is nothing to do.
+		if(prunerIndex0!=prunerIndex1)
+		{
+			PX_ASSERT(nodeIndex1 < tree.getNbNodes());
+			PX_ASSERT(nodes[nodeIndex1].isLeaf());
+			AABBTreeRuntimeNode* node1 = nodes + nodeIndex1;
+			const PxU32 nbPrims = node1->getNbRuntimePrimitives();
+			PX_ASSERT(nbPrims <= 16);
+
+			// retrieve the primitives pointer
+			PxU32* primitives = node1->getPrimitives(tree.getIndices());
+			PX_ASSERT(primitives);
+
+			// PT: look for desired pool index in the leaf
+			bool foundIt = false;
+			for(PxU32 i=0;i<nbPrims;i++)
+			{
+				PX_ASSERT(mMapping[primitives[i]] == nodeIndex1); // PT: all primitives should point to the same leaf node
+
+				if(prunerIndex1 == primitives[i])
+				{
+					foundIt = true;
+					primitives[i] = prunerIndex0;				// point node 1 to the pool object moved to ID 0
+					mMapping[prunerIndex0] = nodeIndex1;		// pool 0 is pointed at by node 1 now
+					mMapping[prunerIndex1] = INVALID_NODE_ID;	// pool 1 is no longer stored in the tree
+					break;
+				}
+			}
+			PX_ASSERT(foundIt);
+			PX_UNUSED(foundIt);
+		}
+	}
+}
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h
new file mode 100644
index 00000000..58418b03
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h
@@ -0,0 +1,82 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_PRUNERTREEMAP_H
+#define SQ_PRUNERTREEMAP_H
+
+#include "SqTypedef.h"
+#include "PsArray.h"
+
+namespace physx
+{
+namespace Sq
+{
+	static const PxU32 INVALID_NODE_ID = 0xFFffFFff;
+	static const PxU32 INVALID_POOL_ID = 0xFFffFFff;
+
+	// Maps pruning pool indices to AABB-tree indices (i.e. locates the object's box in the aabb-tree nodes pool)
+	// 
+	// The map spans pool indices from 0..N-1, where N is the number of pool entries when the map was created from a tree.
+	//
+	// It maps: 
+	//		to node indices in the range 0..M-1, where M is the number of nodes in the tree the map was created from,
+	//   or to INVALID_NODE_ID if the pool entry was removed or pool index is outside input domain.
+	//
+	// The map is the inverse of the tree mapping: (node[map[poolID]].primitive == poolID) is true at all times.
+
+	class AABBTreeUpdateMap 
+	{
+	public:
+												AABBTreeUpdateMap()		{}
+												~AABBTreeUpdateMap()	{}
+
+						void					release()
+												{
+													mMapping.reset();
+												}
+
+						// indices offset used when indices are shifted from objects (used for merged trees)
+						void					initMap(PxU32 numPoolObjects, const Sq::AABBTree& tree);
+
+						void					invalidate(PoolIndex poolIndex, PoolIndex replacementPoolIndex, Sq::AABBTree& tree);
+
+		PX_FORCE_INLINE TreeNodeIndex operator[](PxU32 poolIndex) const
+												{ 
+													return poolIndex < mMapping.size() ? mMapping[poolIndex] : INVALID_NODE_ID;
+												}
+	private:
+		// maps from prunerIndex (index in the PruningPool) to treeNode index
+		// this will only map to leaf tree nodes
+					Ps::Array<TreeNodeIndex>	mMapping;
+	};
+
+}
+}
+
+#endif
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp b/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp
new file mode 100644
index 00000000..3bae047d
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp
@@ -0,0 +1,75 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxTransform.h"
+#include "SqBounds.h"
+#include "CmTransformUtils.h"
+#include "SqPruner.h"
+#include "ScbShape.h"
+#include "ScbActor.h"
+#include "ScbRigidStatic.h"
+#include "ScbBody.h"
+#include "PsAllocator.h"
+#include "GuBounds.h"
+
+using namespace physx;
+using namespace Sq;
+
+void Sq::computeStaticWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor)
+{
+	const PxTransform& shape2Actor = scbShape.getShape2Actor();
+
+	PX_ALIGN(16, PxTransform) globalPose;
+
+	Cm::getStaticGlobalPoseAligned(static_cast<const Scb::RigidStatic&>(scbActor).getActor2World(), shape2Actor, globalPose);
+	Gu::computeBounds(bounds, scbShape.getGeometry(), globalPose, 0.0f, NULL, SQ_PRUNER_INFLATION, false);
+}
+
+void Sq::computeDynamicWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor)
+{
+	const PxTransform& shape2Actor = scbShape.getShape2Actor();
+
+	PX_ALIGN(16, PxTransform) globalPose;
+	{
+		const Scb::Body& body = static_cast<const Scb::Body&>(scbActor);
+		PX_ALIGN(16, PxTransform) kinematicTarget;
+		const PxU16 sqktFlags = PxRigidBodyFlag::eKINEMATIC | PxRigidBodyFlag::eUSE_KINEMATIC_TARGET_FOR_SCENE_QUERIES;
+		const bool useTarget = (PxU16(body.getFlags()) & sqktFlags) == sqktFlags;
+		const PxTransform& body2World = (useTarget && body.getKinematicTarget(kinematicTarget)) ? kinematicTarget : body.getBody2World();
+		Cm::getDynamicGlobalPoseAligned(body2World, shape2Actor, body.getBody2Actor(), globalPose);
+	}
+
+	Gu::computeBounds(bounds, scbShape.getGeometry(), globalPose, 0.0f, NULL, SQ_PRUNER_INFLATION, false);
+}
+
+const ComputeBoundsFunc Sq::gComputeBoundsTable[2] = 
+{ 
+	computeStaticWorldAABB, 
+	computeDynamicWorldAABB 
+};
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBounds.h b/PhysX_3.4/Source/SceneQuery/src/SqBounds.h
new file mode 100644
index 00000000..60c6ad6f
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqBounds.h
@@ -0,0 +1,70 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_BOUNDS_H
+#define SQ_BOUNDS_H
+
+#include "CmPhysXCommon.h"
+#include "foundation/PxBounds3.h"
+#include "PsVecMath.h"
+
+namespace physx
+{
+	namespace Scb
+	{
+		class Shape;
+		class Actor;
+	}
+
+namespace Sq
+{
+	void computeStaticWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor);
+	void computeDynamicWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor);
+
+	typedef void(*ComputeBoundsFunc)	(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor);
+
+	extern const ComputeBoundsFunc gComputeBoundsTable[2];
+
+	PX_FORCE_INLINE void inflateBounds(PxBounds3& dst, const PxBounds3& src)
+	{
+		using namespace physx::shdfnd::aos;
+
+		const Vec4V minV = V4LoadU(&src.minimum.x);
+		const Vec4V maxV = V4LoadU(&src.maximum.x);
+		const Vec4V eV = V4Scale(V4Sub(maxV, minV), FLoad(0.5f* 0.01f));
+
+		V4StoreU(V4Sub(minV, eV), &dst.minimum.x);
+		PX_ALIGN(16, PxVec4) max4;
+		V4StoreA(V4Add(maxV, eV), &max4.x);
+		dst.maximum = PxVec3(max4.x, max4.y, max4.z);
+	}
+}
+}
+
+#endif // SQ_BOUNDS_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp
new file mode 100644
index 00000000..35a5ca13
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp
@@ -0,0 +1,2601 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxMemory.h"
+#include "SqBucketPruner.h"
+#include "GuIntersectionBoxBox.h"
+#include "GuInternal.h"
+#include "PsVecMath.h"
+#include "foundation/PxUnionCast.h"
+#include "CmRadixSortBuffered.h"
+#include "CmRenderOutput.h"
+#include "PsFPU.h"
+#include "PsBitUtils.h"
+#include "PsIntrinsics.h"
+#include "GuBounds.h"
+
+using namespace physx::shdfnd::aos;
+
+using namespace physx;
+using namespace Sq;
+using namespace Gu;
+using namespace Ps;
+
+#define INVALID_HANDLE	0xffffffff
+
+/*
+TODO:
+- if Core is always available, mSortedObjects could be replaced with just indices to mCoreObjects => less memory.
+- UTS:
+	- test that queries against empty boxes all return false
+- invalidate after 16 removes
+- check shiftOrigin stuff (esp what happens to emptied boxes)
+	- isn't there a very hard-to-find bug waiting to happen in there,
+	when the shift touches the empty box and overrides mdata0/mdata1 with "wrong" values that break the sort?
+- revisit updateObject/removeObject
+- optimize/cache computation of free global bounds before clipRay
+
+- remove temp memory buffers (sorted arrays)
+- take care of code duplication
+- better code to generate SIMD 0x7fffffff
+- refactor SIMD tests
+- optimize:
+	- better split values
+	- optimize update (bitmap, less data copy, etc)
+	- use ray limits in traversal code too?
+	- the SIMD XBOX code operates on Min/Max rather than C/E. Change format?
+	- or just try the alternative ray-box code (as on PC) ==> pretty much exactly the same speed
+*/
+
+//#define VERIFY_SORT
+//#define BRUTE_FORCE_LIMIT	32
+#define LOCAL_SIZE	256			// Size of various local arrays. Dynamic allocations occur if exceeded.
+#define USE_SIMD				// Use SIMD code or not (sanity performance check)
+#define NODE_SORT				// Enable/disable node sorting
+#define NODE_SORT_MIN_COUNT	16	// Limit above which node sorting is performed
+#if PX_INTEL_FAMILY
+	#if COMPILE_VECTOR_INTRINSICS
+		#define CAN_USE_MOVEMASK
+	#endif
+#endif
+
+#define ALIGN16(size) ((unsigned(size)+15) & unsigned(~15))
+
+#ifdef _DEBUG
+	#define AlignedLoad		V4LoadU
+	#define AlignedStore	V4StoreU
+#else
+	#define AlignedLoad		V4LoadA
+	#define AlignedStore	V4StoreA
+#endif
+
+// SAT-based ray-box overlap test has accuracy issues for long rays, so we clip them against the global AABB to limit these issues.
+static void clipRay(const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, const PxVec3& boxMin, const PxVec3& boxMax)
+{
+	const PxVec3 boxCenter = (boxMax + boxMin)*0.5f;
+	const PxVec3 boxExtents = (boxMax - boxMin)*0.5f;
+	const float dpc = boxCenter.dot(rayDir);
+	const float extentsMagnitude = boxExtents.magnitude();
+	const float dpMin = dpc - extentsMagnitude;
+	const float dpMax = dpc + extentsMagnitude;
+	const float dpO = rayOrig.dot(rayDir);
+	const float boxLength = extentsMagnitude * 2.0f;
+	const float distToBox = PxMin(PxAbs(dpMin - dpO), PxAbs(dpMax - dpO));
+	maxDist = distToBox + boxLength * 2.0f;
+}
+
+BucketPrunerNode::BucketPrunerNode()
+{
+	for(PxU32 i=0;i<5;i++)
+		mBucketBox[i].setEmpty();
+}
+
+static const PxU8 gCodes[] = {	4, 4, 4, 4, 4, 3, 2, 2,
+								4, 1, 0, 0, 4, 1, 0, 0,
+								4, 1, 0, 0, 2, 1, 0, 0,
+								3, 1, 0, 0, 2, 1, 0, 0};
+
+#ifdef CAN_USE_MOVEMASK
+/*static PX_FORCE_INLINE PxU32 classifyBox_x86(const BucketBox& box, const PxVec4& limits, const bool useY, const bool isCrossBucket)
+{
+	const Vec4V extents = AlignedLoad(&box.mExtents.x);
+	const Vec4V center = AlignedLoad(&box.mCenter.x);
+	const Vec4V plus = V4Add(extents, center);
+	const Vec4V minus = V4Sub(extents, center);
+
+	Vec4V tmp;
+	if(useY)	// PT: this is a constant so branch prediction works here
+		tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,1,0,1));
+	else
+		tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,2,0,2));
+
+	const Vec4V comp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0,2,1,3)); // oh well, nm
+
+	const PxU32 Code = (PxU32)_mm_movemask_ps(V4IsGrtr(V4LoadA(&limits.x), comp));
+	return gCodes[Code | PxU32(isCrossBucket)<<4];
+}*/
+
+static PX_FORCE_INLINE PxU32 classifyBox_x86(const Vec4V boxMin, const Vec4V boxMax, const PxVec4& limits, const bool useY, const bool isCrossBucket)
+{
+	const Vec4V plus = boxMax;
+	const Vec4V minus = V4Neg(boxMin);
+
+	Vec4V tmp;
+	if(useY)	// PT: this is a constant so branch prediction works here
+		tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,1,0,1));
+	else
+		tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,2,0,2));
+
+	const Vec4V comp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0,2,1,3)); // oh well, nm
+
+	const PxU32 Code = PxU32(_mm_movemask_ps(V4IsGrtr(V4LoadA(&limits.x), comp)));
+	return gCodes[Code | PxU32(isCrossBucket)<<4];
+}
+#endif
+
+#ifdef CAN_USE_MOVEMASK
+	#if PX_DEBUG
+		#define USE_CLASSIFY_BOX
+	#endif
+#else
+	#define USE_CLASSIFY_BOX
+#endif
+
+#ifdef USE_CLASSIFY_BOX
+static PX_FORCE_INLINE PxU32 classifyBox(const BucketBox& box, const float limitX, const float limitYZ, const PxU32 yz, const bool isCrossBucket)
+{
+	const bool upperPart = (box.mCenter[yz] + box.mExtents[yz])<limitYZ;
+	const bool lowerPart = (box.mCenter[yz] - box.mExtents[yz])>limitYZ;
+	const bool leftPart = (box.mCenter.x + box.mExtents.x)<limitX;
+	const bool rightPart = (box.mCenter.x - box.mExtents.x)>limitX;
+
+	// Table-based box classification avoids many branches
+	const PxU32 Code = PxU32(rightPart)|(PxU32(leftPart)<<1)|(PxU32(lowerPart)<<2)|(PxU32(upperPart)<<3);
+	return gCodes[Code + (isCrossBucket ? 16 : 0)];
+}
+#endif
+
+void BucketPrunerNode::classifyBoxes(	float limitX, float limitYZ,
+										PxU32 nb, BucketBox* PX_RESTRICT boxes, const PrunerPayload* PX_RESTRICT objects,
+										BucketBox* PX_RESTRICT sortedBoxes, PrunerPayload* PX_RESTRICT sortedObjects,
+										bool isCrossBucket, PxU32 sortAxis)
+{
+	const PxU32 yz = PxU32(sortAxis == 1 ? 2 : 1);
+
+	#ifdef _DEBUG
+	{
+		float prev = boxes[0].mDebugMin;
+		for(PxU32 i=1;i<nb;i++)
+		{
+			const float current = boxes[i].mDebugMin;
+			PX_ASSERT(current>=prev);
+			prev = current;
+		}
+	}
+	#endif
+
+	// Local (stack-based) min/max bucket bounds
+	PX_ALIGN(16, PxVec4) bucketBoxMin[5];
+	PX_ALIGN(16, PxVec4) bucketBoxMax[5];
+	{
+		const PxBounds3 empty = PxBounds3::empty();
+		for(PxU32 i=0;i<5;i++)
+		{
+			mCounters[i] = 0;
+			bucketBoxMin[i] = PxVec4(empty.minimum, 0.0f);
+			bucketBoxMax[i] = PxVec4(empty.maximum, 0.0f);
+		}
+	}
+
+	{
+#ifdef CAN_USE_MOVEMASK
+		// DS: order doesn't play nice with x86 shuffles :-|
+		PX_ALIGN(16, PxVec4) limits(-limitX, limitX, -limitYZ, limitYZ);
+		const bool useY = yz==1;
+#endif
+		// Determine in which bucket each object falls, update bucket bounds
+		for(PxU32 i=0;i<nb;i++)
+		{
+			const Vec4V boxCenterV = AlignedLoad(&boxes[i].mCenter.x);
+			const Vec4V boxExtentsV = AlignedLoad(&boxes[i].mExtents.x);
+			const Vec4V boxMinV = V4Sub(boxCenterV, boxExtentsV);
+			const Vec4V boxMaxV = V4Add(boxCenterV, boxExtentsV);
+
+#ifdef CAN_USE_MOVEMASK
+//			const PxU32 index = classifyBox_x86(boxes[i], limits, useY, isCrossBucket);
+			const PxU32 index = classifyBox_x86(boxMinV, boxMaxV, limits, useY, isCrossBucket);
+	#if PX_DEBUG
+			const PxU32 index_ = classifyBox(boxes[i], limitX, limitYZ, yz, isCrossBucket);
+			PX_ASSERT(index == index_);
+	#endif
+#else
+			const PxU32 index = classifyBox(boxes[i], limitX, limitYZ, yz, isCrossBucket);
+#endif
+			// Merge boxes
+			{
+				const Vec4V mergedMinV = V4Min(V4LoadA(&bucketBoxMin[index].x), boxMinV);
+				const Vec4V mergedMaxV = V4Max(V4LoadA(&bucketBoxMax[index].x), boxMaxV);
+				V4StoreA(mergedMinV, &bucketBoxMin[index].x);
+				V4StoreA(mergedMaxV, &bucketBoxMax[index].x);
+			}
+			boxes[i].mData0 = index;	// Store bucket index for current box in this temporary location
+			mCounters[index]++;
+		}
+	}
+
+	{
+		// Regenerate offsets
+		mOffsets[0]=0;
+		for(PxU32 i=0;i<4;i++)
+			mOffsets[i+1] = mOffsets[i] + mCounters[i];
+	}
+
+	{
+		// Group boxes with same bucket index together
+		for(PxU32 i=0;i<nb;i++)
+		{
+			const PxU32 bucketOffset = mOffsets[boxes[i].mData0]++;	// Bucket index for current box was stored in mData0 by previous loop
+			// The 2 following lines are the same as:
+			// sortedBoxes[bucketOffset] = boxes[i];
+			AlignedStore(AlignedLoad(&boxes[i].mCenter.x), &sortedBoxes[bucketOffset].mCenter.x);
+			AlignedStore(AlignedLoad(&boxes[i].mExtents.x), &sortedBoxes[bucketOffset].mExtents.x);
+
+	#ifdef _DEBUG
+			sortedBoxes[bucketOffset].mDebugMin = boxes[i].mDebugMin;
+	#endif
+			sortedObjects[bucketOffset] = objects[i];
+		}
+	}
+
+	{
+		// Regenerate offsets
+		mOffsets[0]=0;
+		for(PxU32 i=0;i<4;i++)
+			mOffsets[i+1] = mOffsets[i] + mCounters[i];
+	}
+
+	{
+		// Convert local (stack-based) min/max bucket bounds to persistent center/extents format
+		const float Half = 0.5f;
+		const FloatV HalfV = FLoad(Half);
+		PX_ALIGN(16, PxVec4) bucketCenter;
+		PX_ALIGN(16, PxVec4) bucketExtents;
+		for(PxU32 i=0;i<5;i++)
+		{
+			// The following lines are the same as:
+			// mBucketBox[i].mCenter = bucketBox[i].getCenter();
+			// mBucketBox[i].mExtents = bucketBox[i].getExtents();
+			const Vec4V bucketBoxMinV = V4LoadA(&bucketBoxMin[i].x);
+			const Vec4V bucketBoxMaxV = V4LoadA(&bucketBoxMax[i].x);
+			const Vec4V bucketBoxCenterV = V4Scale(V4Add(bucketBoxMaxV, bucketBoxMinV), HalfV);
+			const Vec4V bucketBoxExtentsV = V4Scale(V4Sub(bucketBoxMaxV, bucketBoxMinV), HalfV);
+			V4StoreA(bucketBoxCenterV, &bucketCenter.x);
+			V4StoreA(bucketBoxExtentsV, &bucketExtents.x);
+			mBucketBox[i].mCenter = PxVec3(bucketCenter.x, bucketCenter.y, bucketCenter.z);
+			mBucketBox[i].mExtents = PxVec3(bucketExtents.x, bucketExtents.y, bucketExtents.z);
+		}
+	}
+
+	#ifdef _DEBUG
+	for(PxU32 j=0;j<5;j++)
+	{
+		const PxU32 count = mCounters[j];
+		if(count)
+		{
+			const BucketBox* base = sortedBoxes + mOffsets[j];
+			float prev = base[0].mDebugMin;
+			for(PxU32 i=1;i<count;i++)
+			{
+				const float current = base[i].mDebugMin;
+				PX_ASSERT(current>=prev);
+				prev = current;
+			}
+		}
+	}
+	#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static void processChildBuckets(PxU32 nbAllocated,
+								BucketBox* sortedBoxesInBucket, PrunerPayload* sortedObjectsInBucket,
+								const BucketPrunerNode& bucket, BucketPrunerNode* PX_RESTRICT childBucket,
+								BucketBox* PX_RESTRICT baseBucketsBoxes, PrunerPayload* PX_RESTRICT baseBucketsObjects,
+								PxU32 sortAxis)
+{
+	PX_UNUSED(nbAllocated);
+
+	const PxU32 yz = PxU32(sortAxis == 1 ? 2 : 1);
+	for(PxU32 i=0;i<5;i++)
+	{
+		const PxU32 nbInBucket = bucket.mCounters[i];
+		if(!nbInBucket)
+		{
+			childBucket[i].initCounters();
+			continue;
+		}
+		BucketBox* bucketsBoxes = baseBucketsBoxes + bucket.mOffsets[i];
+		PrunerPayload* bucketsObjects = baseBucketsObjects + bucket.mOffsets[i];
+		PX_ASSERT(nbInBucket<=nbAllocated);
+
+		const float limitX = bucket.mBucketBox[i].mCenter.x;
+		const float limitYZ = bucket.mBucketBox[i].mCenter[yz];
+		const bool isCrossBucket = i==4;
+		childBucket[i].classifyBoxes(limitX, limitYZ, nbInBucket, bucketsBoxes, bucketsObjects,
+			sortedBoxesInBucket, sortedObjectsInBucket,
+			isCrossBucket, sortAxis);
+
+		PxMemCopy(bucketsBoxes, sortedBoxesInBucket, sizeof(BucketBox)*nbInBucket);
+		PxMemCopy(bucketsObjects, sortedObjectsInBucket, sizeof(PrunerPayload)*nbInBucket);
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static PX_FORCE_INLINE PxU32 encodeFloat(PxU32 newPos)
+{
+	//we may need to check on -0 and 0
+	//But it should make no practical difference.
+	if(newPos & PX_SIGN_BITMASK) //negative?
+		return ~newPos;//reverse sequence of negative numbers
+	else
+		return newPos | PX_SIGN_BITMASK; // flip sign
+}
+
+static PX_FORCE_INLINE void computeRayLimits(float& rayMin, float& rayMax, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist, PxU32 sortAxis)
+{
+	const float rayOrigValue = rayOrig[sortAxis];
+	const float rayDirValue = rayDir[sortAxis] * maxDist;
+	rayMin = PxMin(rayOrigValue, rayOrigValue + rayDirValue);
+	rayMax = PxMax(rayOrigValue, rayOrigValue + rayDirValue);
+}
+
+static PX_FORCE_INLINE void computeRayLimits(float& rayMin, float& rayMax, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist, const PxVec3& inflate, PxU32 sortAxis)
+{
+	const float inflateValue = inflate[sortAxis];
+	const float rayOrigValue = rayOrig[sortAxis];
+	const float rayDirValue = rayDir[sortAxis] * maxDist;
+	rayMin = PxMin(rayOrigValue, rayOrigValue + rayDirValue) - inflateValue;
+	rayMax = PxMax(rayOrigValue, rayOrigValue + rayDirValue) + inflateValue;
+}
+
+static PX_FORCE_INLINE void encodeBoxMinMax(BucketBox& box, const PxU32 axis)
+{
+	const float min = box.mCenter[axis] - box.mExtents[axis];
+	const float max = box.mCenter[axis] + box.mExtents[axis];
+
+	const PxU32* binaryMin = reinterpret_cast<const PxU32*>(&min);
+	const PxU32* binaryMax = reinterpret_cast<const PxU32*>(&max);
+	box.mData0 = encodeFloat(binaryMin[0]);
+	box.mData1 = encodeFloat(binaryMax[0]);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+BucketPrunerCore::BucketPrunerCore(bool externalMemory) :
+	mCoreNbObjects		(0),
+	mCoreCapacity		(0),
+	mCoreBoxes			(NULL),
+	mCoreObjects		(NULL),
+	mCoreRemap			(NULL),
+	mSortedWorldBoxes	(NULL),
+	mSortedObjects		(NULL),
+	mNbFree				(0),
+	mSortedNb			(0),
+	mSortedCapacity		(0),
+	mSortAxis			(0),
+	mDirty				(true),
+	mOwnMemory			(!externalMemory)
+{
+	mGlobalBox.setEmpty();
+
+	mLevel1.initCounters();
+
+	for(PxU32 i=0;i<5;i++)
+		mLevel2[i].initCounters();
+	for(PxU32 j=0;j<5;j++)
+		for(PxU32 i=0;i<5;i++)
+			mLevel3[j][i].initCounters();
+}
+
+BucketPrunerCore::~BucketPrunerCore()
+{
+	release();
+}
+
+void BucketPrunerCore::release()
+{
+	mDirty			= true;
+	mCoreNbObjects	= 0;
+
+	mCoreCapacity	= 0;
+	if(mOwnMemory)
+	{
+		PX_FREE_AND_RESET(mCoreBoxes);
+		PX_FREE_AND_RESET(mCoreObjects);
+		PX_FREE_AND_RESET(mCoreRemap);
+	}
+
+	PX_FREE_AND_RESET(mSortedWorldBoxes);
+	PX_FREE_AND_RESET(mSortedObjects);
+	mSortedNb = 0;
+	mSortedCapacity = 0;
+
+	mNbFree = 0;
+#ifdef USE_REGULAR_HASH_MAP
+	mMap.clear();
+#else
+	mMap.purge();
+#endif
+}
+
+void BucketPrunerCore::setExternalMemory(PxU32 nbObjects, PxBounds3* boxes, PrunerPayload* objects)
+{
+	PX_ASSERT(!mOwnMemory);
+	mCoreNbObjects	= nbObjects;
+	mCoreBoxes		= boxes;
+	mCoreObjects	= objects;
+	mCoreRemap		= NULL;
+}
+
+void BucketPrunerCore::allocateSortedMemory(PxU32 nb)
+{
+	mSortedNb = nb;
+	if(nb<=mSortedCapacity && (nb>=mSortedCapacity/2))
+		return;
+
+	const PxU32 capacity = Ps::nextPowerOfTwo(nb);
+	mSortedCapacity = capacity;
+
+	PxU32 bytesNeededForBoxes = capacity*sizeof(BucketBox);
+	bytesNeededForBoxes = ALIGN16(bytesNeededForBoxes);
+
+	PxU32 bytesNeededForObjects = capacity*sizeof(PrunerPayload);
+	bytesNeededForObjects = ALIGN16(bytesNeededForObjects);
+
+	PX_FREE(mSortedObjects);
+	PX_FREE(mSortedWorldBoxes);
+	mSortedWorldBoxes = reinterpret_cast<BucketBox*>(PX_ALLOC(bytesNeededForBoxes, "BucketPruner"));
+	mSortedObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(bytesNeededForObjects, "BucketPruner"));
+	PX_ASSERT(!(size_t(mSortedWorldBoxes)&15));
+	PX_ASSERT(!(size_t(mSortedObjects)&15));
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerCore::resizeCore()
+{
+	const PxU32 capacity = mCoreCapacity ? mCoreCapacity*2 : 32;
+	mCoreCapacity = capacity;
+
+	const PxU32 bytesNeededForBoxes = capacity*sizeof(PxBounds3);
+	const PxU32 bytesNeededForObjects = capacity*sizeof(PrunerPayload);
+	const PxU32 bytesNeededForRemap = capacity*sizeof(PxU32);
+
+	PxBounds3* newCoreBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(bytesNeededForBoxes, "BucketPruner"));
+	PrunerPayload* newCoreObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(bytesNeededForObjects, "BucketPruner"));
+	PxU32* newCoreRemap = reinterpret_cast<PxU32*>(PX_ALLOC(bytesNeededForRemap, "BucketPruner"));
+	if(mCoreBoxes)
+	{
+		PxMemCopy(newCoreBoxes, mCoreBoxes, mCoreNbObjects*sizeof(PxBounds3));
+		PX_FREE(mCoreBoxes);
+	}
+	if(mCoreObjects)
+	{
+		PxMemCopy(newCoreObjects, mCoreObjects, mCoreNbObjects*sizeof(PrunerPayload));
+		PX_FREE(mCoreObjects);
+	}
+	if(mCoreRemap)
+	{
+		PxMemCopy(newCoreRemap, mCoreRemap, mCoreNbObjects*sizeof(PxU32));
+		PX_FREE(mCoreRemap);
+	}
+	mCoreBoxes = newCoreBoxes;
+	mCoreObjects = newCoreObjects;
+	mCoreRemap = newCoreRemap;
+}
+
+PX_FORCE_INLINE void BucketPrunerCore::addObjectInternal(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp)
+{
+	if(mCoreNbObjects==mCoreCapacity)
+		resizeCore();
+
+	const PxU32 index = mCoreNbObjects++;
+	mCoreObjects[index] = object;
+	mCoreBoxes[index] = worldAABB;	// PT: TODO: check assembly here
+	mCoreRemap[index] = 0xffffffff;
+
+	// Objects are only inserted into the map once they're part of the main/core arrays.
+#ifdef USE_REGULAR_HASH_MAP
+	bool ok = mMap.insert(object, BucketPrunerPair(index, timeStamp));
+#else
+	BucketPrunerPair* ok = mMap.addPair(object, index, timeStamp);
+#endif
+	PX_UNUSED(ok);
+	PX_ASSERT(ok);
+}
+
+bool BucketPrunerCore::addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp)
+{
+/*
+	We should probably use a bigger Payload struct here, which would also contains the external handle.
+	(EDIT: we can't even do that, because of the setExternalMemory function)
+	When asked to update/remove an object it would be O(n) to find the proper object in the mSortedObjects array.
+
+	-
+
+	For removing it we can simply empty the corresponding box, and the object will never be returned from queries.
+	Maybe this isn't even true, since boxes are sorted along one axis. So marking a box as empty could break the code relying on a sorted order.
+	An alternative is to mark the external handle as invalid, and ignore the object when a hit is found.
+
+	(EDIT: the sorting is now tested via data0/data1 anyway so we could mark the box as empty without breaking this)
+
+	-
+
+	For updating an object we would need to keep the (sub) array sorted (not the whole thing, only the array within a bucket).
+	We don't know the range (what part of the array maps to our bucket) but we may have the bucket ID somewhere? If we'd have this
+	we could parse the array left/right and resort just the right boxes. If we don't have this we may be able to "quickly" find the
+	range by traversing the tree, looking for the proper bucket. In any case I don't think there's a mapping to update within a bucket,
+	unlike in SAP or MBP. So we should be able to shuffle a bucket without having to update anything. For example there's no mapping
+	between the Core array and the Sorted array. It's a shame in a way because we'd need one, but it's not there - and in fact I think
+	we can free the Core array once Sorted is created, we don't need it at all.
+
+	If we don't want to re-sort the full bucket we can just mark it as dirty and ignore the sort-based early exits in the queries. Then we
+	can incrementally resort it over N frames or something.
+
+	This only works if the updated object remains in the same bucket though. If it moves to another bucket it becomes tempting to just remove
+	the object and re-insert it.
+
+	-
+
+	Now for adding an object, we can first have a "free pruner" and do the 16 next entries brute-force. Rebuilding every 16 objects might
+	give a good speedup already. Otherwise we need to do something more complicated.
+*/
+
+	PX_ASSERT(mOwnMemory);
+	PX_ASSERT(!mDirty || !mNbFree);
+	if(!mDirty)
+	{
+		// In this path the structure is marked as valid. We do not want to invalidate it for each new object...
+		if(mNbFree<FREE_PRUNER_SIZE)
+		{
+			// ...so as long as there is space in the "free array", we store the newly added object there and
+			// return immediately. Subsequent queries will parse the free array as if it was a free pruner.
+			const PxU32 index = mNbFree++;
+			mFreeObjects[index] = object;
+			mFreeBounds[index] = worldAABB;
+			mFreeStamps[index] = timeStamp;
+			return true;
+		}
+
+		// If we reach this place, the free array is full. We must transfer the objects from the free array to
+		// the main (core) arrays, mark the structure as invalid, and still deal with the incoming object.
+
+		// First we transfer free objects, reset the number of free objects, and mark the structure as
+		// invalid/dirty (the core arrays will need rebuilding).
+		for(PxU32 i=0;i<mNbFree;i++)
+			addObjectInternal(mFreeObjects[i], mFreeBounds[i], mFreeStamps[i]);
+
+		mNbFree = 0;
+		mDirty = true;
+//		mSortedNb = 0;	// PT: TODO: investigate if this should be done here
+
+		// After that we still need to deal with the new incoming object (so far we only
+		// transferred the already existing objects from the full free array). This will
+		// happen automatically by letting the code continue to the regular codepath below.
+	}
+
+	// If we reach this place, the structure must be invalid and the incoming object
+	// must be added to the main arrays.
+	PX_ASSERT(mDirty);
+
+	addObjectInternal(object, worldAABB, timeStamp);
+	return true;
+}
+
+bool BucketPrunerCore::removeObject(const PrunerPayload& object, PxU32& timeStamp)
+{
+	// Even if the structure is already marked as dirty, we still need to update the
+	// core arrays and the map.
+
+	// The map only contains core objects, so we can use it to determine if the object
+	// exists in the core arrays or in the free array.
+#ifdef USE_REGULAR_HASH_MAP
+/*	BucketPrunerPair entry;
+	if(mMap.findAndErase(object, entry))
+	{
+		PxU32 coreIndex = entry.mCoreIndex;
+		timeStamp = entry.mTimeStamp;*/
+	const BucketPrunerMap::Entry* removedEntry = mMap.find(object);
+	if(removedEntry)
+	{
+		PxU32 coreIndex = removedEntry->second.mCoreIndex;
+		timeStamp = removedEntry->second.mTimeStamp;
+#else
+	PxU32 coreIndex;	// This is the object's index in the core arrays.
+	if(mMap.removePair(object, coreIndex, timeStamp))
+	{
+#endif
+		// In this codepath, the object we want to remove exists in the core arrays.
+
+		// We will need to remove it from both the core arrays & the sorted arrays.
+		const PxU32 sortedIndex = mCoreRemap[coreIndex];	// This is the object's index in the sorted arrays.
+
+#ifdef USE_REGULAR_HASH_MAP
+		bool status = mMap.erase(object);
+		PX_ASSERT(status);
+		PX_UNUSED(status);
+#endif
+
+		// First let's deal with the core arrays
+		mCoreNbObjects--;
+		if(coreIndex!=mCoreNbObjects)
+		{
+			// If it wasn't the last object in the array, close the gaps as usual
+			const PrunerPayload& movedObject = mCoreObjects[mCoreNbObjects];
+			mCoreBoxes[coreIndex] = mCoreBoxes[mCoreNbObjects];
+			mCoreObjects[coreIndex] = movedObject;
+			mCoreRemap[coreIndex] = mCoreRemap[mCoreNbObjects];
+
+			// Since we just moved the last object, its index in the core arrays has changed.
+			// We must reflect this change in the map.
+#ifdef USE_REGULAR_HASH_MAP
+			BucketPrunerMap::Entry* movedEntry = const_cast<BucketPrunerMap::Entry*>(mMap.find(movedObject));
+			PX_ASSERT(movedEntry->second.mCoreIndex==mCoreNbObjects);
+			movedEntry->second.mCoreIndex = coreIndex;
+#else
+			BucketPrunerPair* movedEntry = const_cast<BucketPrunerPair*>(mMap.findPair(movedObject));
+			PX_ASSERT(movedEntry->mCoreIndex==mCoreNbObjects);
+			movedEntry->mCoreIndex = coreIndex;
+#endif
+		}
+
+		// Now, let's deal with the sorted arrays.
+		// If the structure is dirty, the sorted arrays will be rebuilt from scratch so there's no need to
+		// update them right now.
+		if(!mDirty)
+		{
+			// If the structure is valid, we want to keep it this way to avoid rebuilding sorted arrays after
+			// each removal. We can't "close the gaps" easily here because order of objects in the arrays matters.
+			
+			// Instead we just invalidate the object by setting its bounding box as empty.
+			// Queries against empty boxes will never return a hit, so this effectively "removes" the object
+			// from any subsequent query results. Sorted arrays now contain a "disabled" object, until next build.
+			
+			// Invalidating the box does not invalidate the sorting, since it's now captured in mData0/mData1.
+			// That is, mData0/mData1 keep their previous integer-encoded values, as if the box/object was still here.
+			PxBounds3 empty;
+			empty.setEmpty();
+			mSortedWorldBoxes[sortedIndex].mCenter = empty.getCenter();
+			mSortedWorldBoxes[sortedIndex].mExtents = empty.getExtents();
+			// Note that we don't touch mSortedObjects here. We could, but this is not necessary.
+		}
+		return true;
+	}
+
+	// Here, the object we want to remove exists in the free array. So we just parse it.
+	for(PxU32 i=0;i<mNbFree;i++)
+	{
+		if(mFreeObjects[i]==object)
+		{
+			// We found the object we want to remove. Close the gap as usual.
+			timeStamp = mFreeStamps[i];
+			mNbFree--;
+			mFreeBounds[i] = mFreeBounds[mNbFree];
+			mFreeObjects[i] = mFreeObjects[mNbFree];
+			mFreeStamps[i] = mFreeStamps[mNbFree];
+			return true;
+		}
+	}
+	// We didn't find the object. Can happen with a double remove. PX_ASSERT might be an option here.
+	return false;
+}
+
+bool BucketPrunerCore::updateObject(const PxBounds3& worldAABB, const PrunerPayload& object)
+{
+	PxU32 timeStamp;
+	if(!removeObject(object, timeStamp))
+		return false;
+
+	return addObject(object, worldAABB, timeStamp);
+}
+
+PxU32 BucketPrunerCore::removeMarkedObjects(PxU32 timeStamp)
+{
+	PxU32 nbRemoved=0;
+	// PT: objects can be either in the hash-map, or in the 'free' array. First we look in the hash-map...
+#ifdef USE_REGULAR_HASH_MAP
+	if(mMap.size())
+#else
+	if(mMap.mNbActivePairs)
+#endif
+	{
+		PxBounds3 empty;
+		empty.setEmpty();
+		const PxVec3 emptyCenter = empty.getCenter();
+		const PxVec3 emptyExtents = empty.getExtents();
+
+		// PT: hash-map is coalesced so we just parse it in linear order, no holes
+		PxU32 i=0;
+#ifdef USE_REGULAR_HASH_MAP
+		PxU32 nbActivePairs = mMap.size();
+		const BucketPrunerMap::Entry* entries = mMap.mBase.getEntries();
+#else
+		PxU32 nbActivePairs = mMap.mNbActivePairs;
+#endif
+		PxU32 coreNbObjects = mCoreNbObjects;	// PT: to avoid LHS
+		while(i<nbActivePairs)
+		{
+#ifdef USE_REGULAR_HASH_MAP
+			const BucketPrunerMap::Entry& p = entries[i];
+			if(p.second.mTimeStamp==timeStamp)
+#else
+			const BucketPrunerPair& p = mMap.mActivePairs[i];
+			if(p.mTimeStamp==timeStamp)
+#endif
+			{
+				// PT: timestamps match. We must remove this object.
+				// PT: we replicate here what we do in BucketPrunerCore::removeObject(). See that function for details.
+
+#ifdef USE_REGULAR_HASH_MAP
+				const PxU32 coreIndex = p.second.mCoreIndex;
+#else
+				const PxU32 coreIndex = p.mCoreIndex;
+#endif
+				if(!mDirty)
+				{
+					// PT: invalidating the box does not invalidate the sorting, since it's now captured in mData0/mData1
+					const PxU32 sortedIndex = mCoreRemap[coreIndex];
+					mSortedWorldBoxes[sortedIndex].mCenter = emptyCenter;
+					mSortedWorldBoxes[sortedIndex].mExtents = emptyExtents;
+				}
+
+				coreNbObjects--;
+				if(coreIndex!=coreNbObjects)
+				{
+					const PrunerPayload& movedObject = mCoreObjects[coreNbObjects];
+					mCoreBoxes[coreIndex] = mCoreBoxes[coreNbObjects];
+					mCoreObjects[coreIndex] = movedObject;
+					mCoreRemap[coreIndex] = mCoreRemap[coreNbObjects];
+
+#ifdef USE_REGULAR_HASH_MAP
+					BucketPrunerMap::Entry* movedEntry = const_cast<BucketPrunerMap::Entry*>(mMap.find(movedObject));
+					PX_ASSERT(movedEntry->second.mCoreIndex==coreNbObjects);
+					movedEntry->second.mCoreIndex = coreIndex;
+#else
+					BucketPrunerPair* movedEntry = const_cast<BucketPrunerPair*>(mMap.findPair(movedObject));
+					PX_ASSERT(movedEntry->mCoreIndex==coreNbObjects);
+					movedEntry->mCoreIndex = coreIndex;
+#endif
+				}
+
+				nbRemoved++;
+#ifdef USE_REGULAR_HASH_MAP
+				bool status = mMap.erase(p.first);
+				PX_ASSERT(status);
+				PX_UNUSED(status);
+#else
+				const PxU32 hashValue = hash(p.mPayload) & mMap.mMask;
+				mMap.removePairInternal(p.mPayload, hashValue, i);
+#endif
+				nbActivePairs--;
+			}
+			else i++;
+		}
+		mCoreNbObjects = coreNbObjects;
+
+#ifdef USE_REGULAR_HASH_MAP
+#else
+		mMap.shrinkMemory();
+#endif
+	}
+
+	// PT: ...then we look in the 'free' array
+	PxU32 i=0;
+	while(i<mNbFree)
+	{
+		if(mFreeStamps[i]==timeStamp)
+		{
+			nbRemoved++;
+			mNbFree--;
+			mFreeBounds[i] = mFreeBounds[mNbFree];
+			mFreeObjects[i] = mFreeObjects[mNbFree];
+			mFreeStamps[i] = mFreeStamps[mNbFree];
+		}
+		else i++;
+	}
+	return nbRemoved;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static PxU32 sortBoxes(	PxU32 nb, const PxBounds3* PX_RESTRICT boxes, const PrunerPayload* PX_RESTRICT objects,
+						BucketBox& _globalBox, BucketBox* PX_RESTRICT sortedBoxes, PrunerPayload* PX_RESTRICT sortedObjects)
+{
+	// Compute global box & sort axis
+	PxU32 sortAxis;
+	{
+		PX_ASSERT(nb>0);
+		Vec4V mergedMinV = V4LoadU(&boxes[nb-1].minimum.x);
+		Vec4V mergedMaxV = Vec4V_From_Vec3V(V3LoadU(&boxes[nb-1].maximum.x));
+		for(PxU32 i=0;i<nb-1;i++)
+		{
+			mergedMinV = V4Min(mergedMinV, V4LoadU(&boxes[i].minimum.x));
+			mergedMaxV = V4Max(mergedMaxV, V4LoadU(&boxes[i].maximum.x));
+		}
+
+/*		PX_ALIGN(16, PxVec4) mergedMin;
+		PX_ALIGN(16, PxVec4) mergedMax;
+		V4StoreA(mergedMinV, &mergedMin.x);
+		V4StoreA(mergedMaxV, &mergedMax.x);
+
+		_globalBox.mCenter.x = (mergedMax.x + mergedMin.x)*0.5f;
+		_globalBox.mCenter.y = (mergedMax.y + mergedMin.y)*0.5f;
+		_globalBox.mCenter.z = (mergedMax.z + mergedMin.z)*0.5f;
+		_globalBox.mExtents.x = (mergedMax.x - mergedMin.x)*0.5f;
+		_globalBox.mExtents.y = (mergedMax.y - mergedMin.y)*0.5f;
+		_globalBox.mExtents.z = (mergedMax.z - mergedMin.z)*0.5f;*/
+
+			const float Half = 0.5f;
+			const FloatV HalfV = FLoad(Half);
+			PX_ALIGN(16, PxVec4) mergedCenter;
+			PX_ALIGN(16, PxVec4) mergedExtents;
+
+			const Vec4V mergedCenterV = V4Scale(V4Add(mergedMaxV, mergedMinV), HalfV);
+			const Vec4V mergedExtentsV = V4Scale(V4Sub(mergedMaxV, mergedMinV), HalfV);
+			V4StoreA(mergedCenterV, &mergedCenter.x);
+			V4StoreA(mergedExtentsV, &mergedExtents.x);
+			_globalBox.mCenter = PxVec3(mergedCenter.x, mergedCenter.y, mergedCenter.z);
+			_globalBox.mExtents = PxVec3(mergedExtents.x, mergedExtents.y, mergedExtents.z);
+
+		const PxF32 absY = PxAbs(_globalBox.mExtents.y);
+		const PxF32 absZ = PxAbs(_globalBox.mExtents.z);
+		sortAxis = PxU32(absY < absZ ? 1 : 2);
+//		printf("Sort axis: %d\n", sortAxis);
+	}
+
+	float* keys = reinterpret_cast<float*>(sortedObjects);
+	for(PxU32 i=0;i<nb;i++)
+		keys[i] = boxes[i].minimum[sortAxis];
+
+	Cm::RadixSortBuffered rs;	// ###TODO: some allocs here, remove
+	const PxU32* ranks = rs.Sort(keys, nb).GetRanks();
+
+	const float Half = 0.5f;
+	const FloatV HalfV = FLoad(Half);
+	for(PxU32 i=0;i<nb;i++)
+	{
+		const PxU32 index = *ranks++;
+//const PxU32 index = local[i].index;
+//		sortedBoxes[i].mCenter = boxes[index].getCenter();
+//		sortedBoxes[i].mExtents = boxes[index].getExtents();
+
+		const Vec4V bucketBoxMinV = V4LoadU(&boxes[index].minimum.x);
+		const Vec4V bucketBoxMaxV = Vec4V_From_Vec3V(V3LoadU(&boxes[index].maximum.x));
+		const Vec4V bucketBoxCenterV = V4Scale(V4Add(bucketBoxMaxV, bucketBoxMinV), HalfV);
+		const Vec4V bucketBoxExtentsV = V4Scale(V4Sub(bucketBoxMaxV, bucketBoxMinV), HalfV);
+		// We don't need to preserve data0/data1 here
+		AlignedStore(bucketBoxCenterV, &sortedBoxes[i].mCenter.x);
+		AlignedStore(bucketBoxExtentsV, &sortedBoxes[i].mExtents.x);
+
+	#ifdef _DEBUG
+		sortedBoxes[i].mDebugMin = boxes[index].minimum[sortAxis];
+	#endif
+		sortedObjects[i] = objects[index];
+	}
+
+	return sortAxis;
+}
+
+#ifdef NODE_SORT
+	template<class T>
+	PX_CUDA_CALLABLE PX_FORCE_INLINE void tswap(T& x, T& y)
+	{
+		T tmp = x;
+		x = y;
+		y = tmp;
+	}
+
+/*	PX_FORCE_INLINE __m128 DotV(const __m128 a, const __m128 b)	
+	{
+		const __m128 dot1 = _mm_mul_ps(a, b);
+		const __m128 shuf1 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(0,0,0,0)));
+		const __m128 shuf2 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(1,1,1,1)));
+		const __m128 shuf3 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(2,2,2,2)));
+		return _mm_add_ps(_mm_add_ps(shuf1, shuf2), shuf3);
+	}*/
+
+// PT: hmmm, by construction, isn't the order always the same for all bucket pruners?
+// => maybe not because the bucket boxes are still around the merged aabbs, not around the bucket
+// Still we could do something here
+static /*PX_FORCE_INLINE*/ PxU32 sort(const BucketPrunerNode& parent, const PxVec3& rayDir)
+{
+	const PxU32 totalCount = parent.mCounters[0]+parent.mCounters[1]+parent.mCounters[2]+parent.mCounters[3]+parent.mCounters[4];
+	if(totalCount<NODE_SORT_MIN_COUNT)
+		return 0|(1<<3)|(2<<6)|(3<<9)|(4<<12);
+
+	float dp[5];
+/*	const __m128 rayDirV = _mm_loadu_ps(&rayDir.x);
+	__m128 dp0V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[0].mCenter.x));	_mm_store_ss(&dp[0], dp0V);
+	__m128 dp1V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[1].mCenter.x));	_mm_store_ss(&dp[1], dp1V);
+	__m128 dp2V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[2].mCenter.x));	_mm_store_ss(&dp[2], dp2V);
+	__m128 dp3V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[3].mCenter.x));	_mm_store_ss(&dp[3], dp3V);
+	__m128 dp4V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[4].mCenter.x));	_mm_store_ss(&dp[4], dp4V);
+*/
+
+#ifdef VERIFY_SORT
+	PxU32 code;
+	{
+		dp[0] = parent.mCounters[0] ? PxAbs(parent.mBucketBox[0].mCenter.dot(rayDir)) : PX_MAX_F32;
+		dp[1] = parent.mCounters[1] ? PxAbs(parent.mBucketBox[1].mCenter.dot(rayDir)) : PX_MAX_F32;
+		dp[2] = parent.mCounters[2] ? PxAbs(parent.mBucketBox[2].mCenter.dot(rayDir)) : PX_MAX_F32;
+		dp[3] = parent.mCounters[3] ? PxAbs(parent.mBucketBox[3].mCenter.dot(rayDir)) : PX_MAX_F32;
+		dp[4] = parent.mCounters[4] ? PxAbs(parent.mBucketBox[4].mCenter.dot(rayDir)) : PX_MAX_F32;
+
+		PxU32 ii0 = 0;
+		PxU32 ii1 = 1;
+		PxU32 ii2 = 2;
+		PxU32 ii3 = 3;
+		PxU32 ii4 = 4;
+
+		// PT: using integer cmps since we used fabsf above
+	//	const PxU32* values = reinterpret_cast<const PxU32*>(dp);
+		const PxU32* values = PxUnionCast<PxU32*, PxF32*>(dp);
+
+		PxU32 value0 = values[0];
+		PxU32 value1 = values[1];
+		PxU32 value2 = values[2];
+		PxU32 value3 = values[3];
+		PxU32 value4 = values[4];
+
+		for(PxU32 j=0;j<5-1;j++)
+		{
+			if(value1<value0)
+			{
+				tswap(value0, value1);
+				tswap(ii0, ii1);
+			}
+			if(value2<value1)
+			{
+				tswap(value1, value2);
+				tswap(ii1, ii2);
+			}
+			if(value3<value2)
+			{
+				tswap(value2, value3);
+				tswap(ii2, ii3);
+			}
+			if(value4<value3)
+			{
+				tswap(value3, value4);
+				tswap(ii3, ii4);
+			}
+		}
+		//return ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
+		code = ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
+	}
+#endif
+
+	dp[0] = parent.mCounters[0] ? parent.mBucketBox[0].mCenter.dot(rayDir) : PX_MAX_F32;
+	dp[1] = parent.mCounters[1] ? parent.mBucketBox[1].mCenter.dot(rayDir) : PX_MAX_F32;
+	dp[2] = parent.mCounters[2] ? parent.mBucketBox[2].mCenter.dot(rayDir) : PX_MAX_F32;
+	dp[3] = parent.mCounters[3] ? parent.mBucketBox[3].mCenter.dot(rayDir) : PX_MAX_F32;
+	dp[4] = parent.mCounters[4] ? parent.mBucketBox[4].mCenter.dot(rayDir) : PX_MAX_F32;
+
+	const PxU32* values = PxUnionCast<PxU32*, PxF32*>(dp);
+
+//	const PxU32 mask = ~7U;
+	const PxU32 mask = 0x7ffffff8;
+	PxU32 value0 = (values[0]&mask);
+	PxU32 value1 = (values[1]&mask)|1;
+	PxU32 value2 = (values[2]&mask)|2;
+	PxU32 value3 = (values[3]&mask)|3;
+	PxU32 value4 = (values[4]&mask)|4;
+
+#define SORT_BLOCK								\
+	if(value1<value0)	tswap(value0, value1);	\
+	if(value2<value1)	tswap(value1, value2);	\
+	if(value3<value2)	tswap(value2, value3);	\
+	if(value4<value3)	tswap(value3, value4);
+	SORT_BLOCK
+	SORT_BLOCK
+	SORT_BLOCK
+	SORT_BLOCK
+
+	const PxU32 ii0 = value0&7;
+	const PxU32 ii1 = value1&7;
+	const PxU32 ii2 = value2&7;
+	const PxU32 ii3 = value3&7;
+	const PxU32 ii4 = value4&7;
+	const PxU32 code2 = ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12);
+#ifdef VERIFY_SORT
+	PX_ASSERT(code2==code);
+#endif
+	return code2;
+}
+
+static void gPrecomputeSort(BucketPrunerNode& node, const PxVec3* PX_RESTRICT dirs)
+{
+	for(int i=0;i<8;i++)
+		node.mOrder[i] = Ps::to16(sort(node, dirs[i]));
+}
+#endif
+
+void BucketPrunerCore::classifyBoxes()
+{
+	if(!mDirty)
+		return;
+
+	mDirty = false;
+
+	const PxU32 nb = mCoreNbObjects;
+	if(!nb)
+	{
+		mSortedNb=0;
+		return;
+	}
+
+	PX_ASSERT(!mNbFree);
+
+#ifdef BRUTE_FORCE_LIMIT
+	if(nb<=BRUTE_FORCE_LIMIT)
+	{
+		allocateSortedMemory(nb);
+		BucketBox* sortedBoxes = mSortedWorldBoxes;
+		PrunerPayload* sortedObjects = mSortedObjects;
+
+		const float Half = 0.5f;
+		const __m128 HalfV = _mm_load1_ps(&Half);
+		PX_ALIGN(16, PxVec4) bucketCenter;
+		PX_ALIGN(16, PxVec4) bucketExtents;
+		for(PxU32 i=0;i<nb;i++)
+		{
+			const __m128 bucketBoxMinV = _mm_loadu_ps(&mCoreBoxes[i].minimum.x);
+			const __m128 bucketBoxMaxV = _mm_loadu_ps(&mCoreBoxes[i].maximum.x);
+			const __m128 bucketBoxCenterV = _mm_mul_ps(_mm_add_ps(bucketBoxMaxV, bucketBoxMinV), HalfV);
+			const __m128 bucketBoxExtentsV = _mm_mul_ps(_mm_sub_ps(bucketBoxMaxV, bucketBoxMinV), HalfV);
+			_mm_store_ps(&bucketCenter.x, bucketBoxCenterV);
+			_mm_store_ps(&bucketExtents.x, bucketBoxExtentsV);
+			sortedBoxes[i].mCenter = PxVec3(bucketCenter.x, bucketCenter.y, bucketCenter.z);
+			sortedBoxes[i].mExtents = PxVec3(bucketExtents.x, bucketExtents.y, bucketExtents.z);
+
+			sortedObjects[i] = mCoreObjects[i];
+		}
+		return;
+	}
+#endif
+
+
+size_t* remap = reinterpret_cast<size_t*>(PX_ALLOC(nb*sizeof(size_t), ""));
+for(PxU32 i=0;i<nb;i++)
+{
+	remap[i] = mCoreObjects[i].data[0];
+	mCoreObjects[i].data[0] = i;
+}
+
+//	printf("Nb objects: %d\n", nb);
+
+	PrunerPayload localTempObjects[LOCAL_SIZE];
+	BucketBox localTempBoxes[LOCAL_SIZE];
+	PrunerPayload* tempObjects;
+	BucketBox* tempBoxes;
+	if(nb>LOCAL_SIZE)
+	{
+		tempObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(sizeof(PrunerPayload)*nb, "BucketPruner"));
+		tempBoxes = reinterpret_cast<BucketBox*>(PX_ALLOC(nb*sizeof(BucketBox), "BucketPruner"));
+	}
+	else
+	{
+		tempObjects = localTempObjects;
+		tempBoxes = localTempBoxes;
+	}
+
+	mSortAxis = sortBoxes(nb, mCoreBoxes, mCoreObjects, mGlobalBox, tempBoxes, tempObjects);
+
+	PX_ASSERT(mSortAxis);
+
+	allocateSortedMemory(nb);
+	BucketBox* sortedBoxes = mSortedWorldBoxes;
+	PrunerPayload* sortedObjects = mSortedObjects;
+
+	const PxU32 yz = PxU32(mSortAxis == 1 ? 2 : 1);
+	const float limitX = mGlobalBox.mCenter.x;
+	const float limitYZ = mGlobalBox.mCenter[yz];
+	mLevel1.classifyBoxes(limitX, limitYZ, nb, tempBoxes, tempObjects,
+		sortedBoxes, sortedObjects,
+		false, mSortAxis);
+
+	processChildBuckets(nb, tempBoxes, tempObjects,
+		mLevel1, mLevel2, mSortedWorldBoxes, mSortedObjects,
+		mSortAxis);
+
+	for(PxU32 j=0;j<5;j++)
+		processChildBuckets(nb, tempBoxes, tempObjects,
+		mLevel2[j], mLevel3[j], mSortedWorldBoxes + mLevel1.mOffsets[j], mSortedObjects + mLevel1.mOffsets[j],
+		mSortAxis);
+
+	{
+		for(PxU32 i=0;i<nb;i++)
+		{
+			encodeBoxMinMax(mSortedWorldBoxes[i], mSortAxis);
+		}
+	}
+
+	if(nb>LOCAL_SIZE)
+	{
+		PX_FREE(tempBoxes);
+		PX_FREE(tempObjects);
+	}
+
+for(PxU32 i=0;i<nb;i++)
+{
+	const PxU32 coreIndex = PxU32(mSortedObjects[i].data[0]);
+	const size_t saved = remap[coreIndex];
+	mSortedObjects[i].data[0] = saved;
+	mCoreObjects[coreIndex].data[0] = saved;
+	if(mCoreRemap)
+		mCoreRemap[coreIndex] = i;
+//	remap[i] = mCoreObjects[i].data[0];
+//	mCoreObjects[i].data[0] = i;
+}
+PX_FREE(remap);
+
+/*	if(mOwnMemory)
+	{
+		PX_FREE_AND_RESET(mCoreBoxes);
+		PX_FREE_AND_RESET(mCoreObjects);
+	}*/
+
+
+#ifdef NODE_SORT
+	{
+		PxVec3 dirs[8];
+		dirs[0] = PxVec3(1.0f, 1.0f, 1.0f);
+		dirs[1] = PxVec3(1.0f, 1.0f, -1.0f);
+		dirs[2] = PxVec3(1.0f, -1.0f, 1.0f);
+		dirs[3] = PxVec3(1.0f, -1.0f, -1.0f);
+		dirs[4] = PxVec3(-1.0f, 1.0f, 1.0f);
+		dirs[5] = PxVec3(-1.0f, 1.0f, -1.0f);
+		dirs[6] = PxVec3(-1.0f, -1.0f, 1.0f);
+		dirs[7] = PxVec3(-1.0f, -1.0f, -1.0f);
+		for(int i=0;i<8;i++)
+			dirs[i].normalize();
+
+		gPrecomputeSort(mLevel1, dirs);
+
+		for(PxU32 i=0;i<5;i++)
+			gPrecomputeSort(mLevel2[i], dirs);
+
+		for(PxU32 j=0;j<5;j++)
+		{
+			for(PxU32 i=0;i<5;i++)
+				gPrecomputeSort(mLevel3[j][i], dirs);
+		}
+	}
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#ifdef CAN_USE_MOVEMASK
+	struct RayParams
+	{
+		PX_ALIGN(16,	PxVec3	mData2);	float	padding0;
+		PX_ALIGN(16,	PxVec3	mFDir);		float	padding1;
+		PX_ALIGN(16,	PxVec3	mData);		float	padding2;
+		PX_ALIGN(16,	PxVec3	mInflate);	float	padding3;
+	};
+
+	static PX_FORCE_INLINE void precomputeRayData(RayParams* PX_RESTRICT rayParams, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist)
+	{
+	#ifdef USE_SIMD
+		const float Half = 0.5f * maxDist;
+		const __m128 HalfV = _mm_load1_ps(&Half);
+		const __m128 DataV = _mm_mul_ps(_mm_loadu_ps(&rayDir.x), HalfV);
+		const __m128 Data2V = _mm_add_ps(_mm_loadu_ps(&rayOrig.x), DataV);
+		const PxU32 MaskI = 0x7fffffff;
+		const __m128 FDirV = _mm_and_ps(_mm_load1_ps(reinterpret_cast<const float*>(&MaskI)), DataV);
+		_mm_store_ps(&rayParams->mData.x, DataV);
+		_mm_store_ps(&rayParams->mData2.x, Data2V);
+		_mm_store_ps(&rayParams->mFDir.x, FDirV);
+	#else
+		const PxVec3 data = 0.5f * rayDir * maxDist;
+		rayParams->mData = data;
+		rayParams->mData2 = rayOrig + data;
+		rayParams->mFDir.x = PxAbs(data.x);
+		rayParams->mFDir.y = PxAbs(data.y);
+		rayParams->mFDir.z = PxAbs(data.z);
+	#endif
+	}
+
+	template <int inflateT>
+	static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const RayParams* PX_RESTRICT params)
+	{
+	#ifdef USE_SIMD
+		const PxU32 maskI = 0x7fffffff;
+		const __m128 fdirV = _mm_load_ps(&params->mFDir.x);
+//		#ifdef _DEBUG
+		const __m128 extentsV = inflateT ? _mm_add_ps(_mm_loadu_ps(&box.mExtents.x), _mm_load_ps(&params->mInflate.x)) : _mm_loadu_ps(&box.mExtents.x);
+		const __m128 DV = _mm_sub_ps(_mm_load_ps(&params->mData2.x), _mm_loadu_ps(&box.mCenter.x));
+/*		#else
+		const __m128 extentsV = inflateT ? _mm_add_ps(_mm_load_ps(&box.mExtents.x), _mm_load_ps(&params->mInflate.x)) : _mm_load_ps(&box.mExtents.x);
+		const __m128 DV = _mm_sub_ps(_mm_load_ps(&params->mData2.x), _mm_load_ps(&box.mCenter.x));
+		#endif*/
+		__m128 absDV = _mm_and_ps(DV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI)));
+		absDV = _mm_cmpgt_ps(absDV, _mm_add_ps(extentsV, fdirV));
+		const PxU32 test = PxU32(_mm_movemask_ps(absDV));
+		if(test&7)
+			return 0;
+
+		const __m128 dataZYX_V = _mm_load_ps(&params->mData.x);
+		const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1)));
+		const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1)));
+		const __m128 fV = _mm_sub_ps(_mm_mul_ps(dataZYX_V, DXZY_V), _mm_mul_ps(dataXZY_V, DV));
+
+		const __m128 fdirZYX_V = _mm_load_ps(&params->mFDir.x);
+		const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1)));
+		const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+		const __m128 fg = _mm_add_ps(_mm_mul_ps(extentsV, fdirXZY_V), _mm_mul_ps(extentsXZY_V, fdirZYX_V));
+
+		__m128 absfV = _mm_and_ps(fV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI)));
+		absfV = _mm_cmpgt_ps(absfV, fg);
+		const PxU32 test2 = PxU32(_mm_movemask_ps(absfV));
+		if(test2&7)
+			return 0;
+		return 1;
+	#else
+		const float boxExtentsx = inflateT ? box.mExtents.x + params->mInflate.x : box.mExtents.x;
+		const float Dx = params->mData2.x - box.mCenter.x;	if(fabsf(Dx) > boxExtentsx + params->mFDir.x)	return IntFalse;
+
+		const float boxExtentsz = inflateT ? box.mExtents.z + params->mInflate.z : box.mExtents.z;
+		const float Dz = params->mData2.z - box.mCenter.z;	if(fabsf(Dz) > boxExtentsz + params->mFDir.z)	return IntFalse;
+
+		const float boxExtentsy = inflateT ? box.mExtents.y + params->mInflate.y : box.mExtents.y;
+		const float Dy = params->mData2.y - box.mCenter.y;	if(fabsf(Dy) > boxExtentsy + params->mFDir.y)	return IntFalse;
+
+		float f;
+		f = params->mData.y * Dz - params->mData.z * Dy;	if(fabsf(f) > boxExtentsy*params->mFDir.z + boxExtentsz*params->mFDir.y)	return IntFalse;
+		f = params->mData.z * Dx - params->mData.x * Dz;	if(fabsf(f) > boxExtentsx*params->mFDir.z + boxExtentsz*params->mFDir.x)	return IntFalse;
+		f = params->mData.x * Dy - params->mData.y * Dx;	if(fabsf(f) > boxExtentsx*params->mFDir.y + boxExtentsy*params->mFDir.x)	return IntFalse;
+		return IntTrue;
+	#endif
+	}
+#else
+	#include "SqPrunerTestsSIMD.h"
+
+	typedef RayAABBTest BPRayAABBTest;
+
+template <int inflateT>
+static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const BPRayAABBTest& test)
+{
+	return static_cast<IntBool>(test.check<inflateT>(V3LoadU(box.mCenter), V3LoadU(box.mExtents)));
+}
+
+/*static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const BPRayAABBTest& test, PxU32 rayMinLimitX, PxU32 rayMaxLimitX)
+{
+	if(rayMinLimitX>box.mData1)
+		return 0;
+	if(rayMaxLimitX<box.mData0)
+		return 0;
+
+	return test(Vec3V_From_PxVec3(box.mCenter), Vec3V_From_PxVec3(box.mExtents));
+}*/
+#endif
+
+template <int inflateT>
+static PxAgain processBucket(
+	PxU32 nb, const BucketBox* PX_RESTRICT baseBoxes, PrunerPayload* PX_RESTRICT baseObjects,
+	PxU32 offset, PxU32 totalAllocated,
+	const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist,
+#ifdef CAN_USE_MOVEMASK
+	RayParams* PX_RESTRICT rayParams,
+#else
+	BPRayAABBTest& test, const PxVec3& inflate,
+#endif
+	PrunerCallback& pcb, PxU32& _rayMinLimitInt, PxU32& _rayMaxLimitInt, PxU32 sortAxis)
+{
+	PX_UNUSED(totalAllocated);
+
+	const BucketBox* PX_RESTRICT _boxes = baseBoxes + offset;
+	PrunerPayload* PX_RESTRICT _objects = baseObjects + offset;
+
+	PxU32 rayMinLimitInt = _rayMinLimitInt;
+	PxU32 rayMaxLimitInt = _rayMaxLimitInt;
+
+	const BucketBox* last = _boxes + nb;
+
+	while(_boxes!=last)
+	{
+		const BucketBox& currentBox = *_boxes++;
+		PrunerPayload* currentObject = _objects++;
+
+		if(currentBox.mData1<rayMinLimitInt)
+			continue;
+
+		if(currentBox.mData0>rayMaxLimitInt)
+			goto Exit;
+
+#ifdef CAN_USE_MOVEMASK
+		if(!_segmentAABB<inflateT>(currentBox, rayParams))
+			continue;
+#else
+		if(!_segmentAABB<inflateT>(currentBox, test))
+			continue;
+#endif
+
+		const float MaxDist = maxDist;
+		const PxAgain again = pcb.invoke(maxDist, *currentObject);
+		if(!again)
+			return false;
+		if(maxDist < MaxDist)
+		{
+			float rayMinLimit, rayMaxLimit;
+#ifdef CAN_USE_MOVEMASK
+			if(inflateT)
+				computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, rayParams->mInflate, sortAxis);
+			else
+				computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
+
+			precomputeRayData(rayParams, rayOrig, rayDir, maxDist);
+#else
+			if(inflateT)
+				computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, inflate, sortAxis);
+			else
+				computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
+
+			test.setDistance(maxDist);
+#endif
+			const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&rayMinLimit);
+			const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&rayMaxLimit);
+			rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
+			rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
+		}
+	}
+Exit:
+
+	_rayMinLimitInt = rayMinLimitInt;
+	_rayMaxLimitInt = rayMaxLimitInt;
+	return true;
+}
+
+#ifdef NODE_SORT
+static PxU32 computeDirMask(const PxVec3& dir)
+{
+	const PxU32* binary = reinterpret_cast<const PxU32*>(&dir.x);
+	const PxU32 X = (binary[0])>>31;
+	const PxU32 Y = (binary[1])>>31;
+	const PxU32 Z = (binary[2])>>31;
+	return Z|(Y<<1)|(X<<2);
+}
+#endif
+
+template <int inflateT>
+static PxAgain stab(const BucketPrunerCore& core, PrunerCallback& pcb, const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, const PxVec3 inflate)
+{
+	const PxU32 nb = core.mSortedNb;
+	if(!nb && !core.mNbFree)
+		return true;
+
+	if(maxDist==PX_MAX_F32)
+	{
+		/*const*/ PxVec3 boxMin = core.mGlobalBox.getMin() - inflate;
+		/*const*/ PxVec3 boxMax = core.mGlobalBox.getMax() + inflate;
+
+		if(core.mNbFree)
+		{
+			// TODO: optimize this
+			PxBounds3 freeGlobalBounds;
+			freeGlobalBounds.setEmpty();
+			for(PxU32 i=0;i<core.mNbFree;i++)
+				freeGlobalBounds.include(core.mFreeBounds[i]);
+			freeGlobalBounds.minimum -= inflate;
+			freeGlobalBounds.maximum += inflate;
+			boxMin = boxMin.minimum(freeGlobalBounds.minimum);
+			boxMax = boxMax.maximum(freeGlobalBounds.maximum);
+		}
+
+		clipRay(rayOrig, rayDir, maxDist, boxMin, boxMax);
+	}
+
+#ifdef CAN_USE_MOVEMASK
+	RayParams rayParams;
+	#ifdef USE_SIMD
+	rayParams.padding0 = rayParams.padding1 = rayParams.padding2 = rayParams.padding3 = 0.0f;
+	#endif
+	if(inflateT)
+		rayParams.mInflate = inflate;
+
+	precomputeRayData(&rayParams, rayOrig, rayDir, maxDist);
+#else
+	BPRayAABBTest test(rayOrig, rayDir, maxDist, inflateT ? inflate : PxVec3(0.0f));
+#endif
+
+	for(PxU32 i=0;i<core.mNbFree;i++)
+	{
+		BucketBox tmp;
+		tmp.mCenter = core.mFreeBounds[i].getCenter();
+		tmp.mExtents = core.mFreeBounds[i].getExtents();
+
+#ifdef CAN_USE_MOVEMASK
+		if(_segmentAABB<inflateT>(tmp, &rayParams))
+#else
+		if(_segmentAABB<inflateT>(tmp, test))
+#endif
+		{
+			if(!pcb.invoke(maxDist, core.mFreeObjects[i]))
+				return false;
+		}
+	}
+
+	if(!nb)
+		return true;
+
+#ifdef CAN_USE_MOVEMASK
+	if(!_segmentAABB<inflateT>(core.mGlobalBox, &rayParams))
+		return true;
+#else
+	if(!_segmentAABB<inflateT>(core.mGlobalBox, test))
+		return true;
+#endif
+
+	const PxU32 sortAxis = core.mSortAxis;
+	float rayMinLimit, rayMaxLimit;
+	if(inflateT)
+		computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, inflate, sortAxis);
+	else
+		computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis);
+
+	const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&rayMinLimit);
+	const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&rayMaxLimit);
+	PxU32 rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
+	PxU32 rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
+/*
+float rayMinLimitX, rayMaxLimitX;
+if(inflateT)
+	computeRayLimits(rayMinLimitX, rayMaxLimitX, rayOrig, rayDir, maxDist, inflate, 0);
+else
+	computeRayLimits(rayMinLimitX, rayMaxLimitX, rayOrig, rayDir, maxDist, 0);
+
+PxU32 rayMinLimitIntX = encodeFloat(PX_IR(rayMinLimitX));
+PxU32 rayMaxLimitIntX = encodeFloat(PX_IR(rayMaxLimitX));
+*/
+
+	float currentDist = maxDist;
+
+#ifdef NODE_SORT
+	const PxU32 dirIndex = computeDirMask(rayDir);
+	PxU32 orderi = core.mLevel1.mOrder[dirIndex];
+//	PxU32 orderi = sort(core.mLevel1, rayDir);
+
+	for(PxU32 i_=0;i_<5;i_++)
+	{
+		const PxU32 i = orderi&7;	orderi>>=3;
+#else
+	for(PxU32 i=0;i<5;i++)
+	{
+#endif
+
+#ifdef CAN_USE_MOVEMASK
+		if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], &rayParams))
+#else
+		if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], test))
+//		if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], test, rayMinLimitIntX, rayMaxLimitIntX))
+#endif
+		{
+
+#ifdef NODE_SORT
+			PxU32 orderj = core.mLevel2[i].mOrder[dirIndex];
+//			PxU32 orderj = sort(core.mLevel2[i], rayDir);
+
+			for(PxU32 j_=0;j_<5;j_++)
+			{
+				const PxU32 j = orderj&7;	orderj>>=3;
+#else
+			for(PxU32 j=0;j<5;j++)
+			{
+#endif
+
+#ifdef CAN_USE_MOVEMASK
+				if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], &rayParams))
+#else
+				if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], test))
+//				if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], test, rayMinLimitIntX, rayMaxLimitIntX))
+#endif
+				{
+					const BucketPrunerNode& parent = core.mLevel3[i][j];
+					const PxU32 parentOffset = core.mLevel1.mOffsets[i] + core.mLevel2[i].mOffsets[j];
+
+#ifdef NODE_SORT
+					PxU32 orderk = parent.mOrder[dirIndex];
+//					PxU32 orderk = sort(parent, rayDir);
+
+					for(PxU32 k_=0;k_<5;k_++)
+					{
+						const PxU32 k = orderk&7;	orderk>>=3;
+#else
+					for(PxU32 k=0;k<5;k++)
+					{
+#endif
+						const PxU32 nbInBucket = parent.mCounters[k];
+#ifdef CAN_USE_MOVEMASK
+						if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], &rayParams))
+#else
+						if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], test))
+//						if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], test, rayMinLimitIntX, rayMaxLimitIntX))
+#endif
+						{
+							const PxU32 offset = parentOffset + parent.mOffsets[k];
+							const PxAgain again = processBucket<inflateT>(	nbInBucket, core.mSortedWorldBoxes, core.mSortedObjects,
+																			offset, core.mSortedNb,
+																			rayOrig, rayDir, currentDist,
+#ifdef CAN_USE_MOVEMASK
+																			&rayParams,
+#else
+																			test, inflate,
+#endif
+																			pcb,
+																			rayMinLimitInt, rayMaxLimitInt,
+																			sortAxis);
+							if(!again)
+								return false;
+						}
+					}
+				}
+			}
+		}
+	}
+
+	maxDist = currentDist;
+	return true;
+}
+		
+PxAgain BucketPrunerCore::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+	return ::stab<0>(*this, pcb, origin, unitDir, inOutDistance, PxVec3(0.0f));
+}
+
+PxAgain BucketPrunerCore::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+	const PxVec3 extents = queryVolume.getPrunerInflatedWorldAABB().getExtents();
+	return ::stab<1>(*this, pcb, queryVolume.getPrunerInflatedWorldAABB().getCenter(), unitDir, inOutDistance, extents);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+template<bool doAssert, typename Test>
+static PX_FORCE_INLINE bool processBucket(	PxU32 nb, const BucketBox* PX_RESTRICT baseBoxes, PrunerPayload* PX_RESTRICT baseObjects,
+											PxU32 offset, PxU32 totalAllocated,
+											const Test& test, PrunerCallback& pcb,
+											PxU32 minLimitInt, PxU32 maxLimitInt)
+{
+	PX_UNUSED(totalAllocated);
+
+	const BucketBox* PX_RESTRICT boxes = baseBoxes + offset;
+	PrunerPayload* PX_RESTRICT objects = baseObjects + offset;
+
+	while(nb--)
+	{
+		const BucketBox& currentBox = *boxes++;
+		PrunerPayload* currentObject = objects++;
+
+		if(currentBox.mData1<minLimitInt)
+		{
+			if(doAssert)
+				PX_ASSERT(!test(currentBox));
+			continue;
+		}
+
+		if(currentBox.mData0>maxLimitInt)
+		{
+			if(doAssert)
+				PX_ASSERT(!test(currentBox));
+			return true;
+		}
+
+		if(test(currentBox))
+		{
+			PxReal dist = -1.0f; // no distance for overlaps
+			if(!pcb.invoke(dist, *currentObject))
+				return false;
+		}
+	}
+	return true;
+}
+
+template<typename Test, bool isPrecise>
+class BucketPrunerOverlapTraversal
+{
+public:
+	PX_FORCE_INLINE BucketPrunerOverlapTraversal() {}
+
+	/*PX_FORCE_INLINE*/ bool operator()(const BucketPrunerCore& core, const Test& test, PrunerCallback& pcb, const PxBounds3& cullBox) const
+	{
+		for(PxU32 i=0;i<core.mNbFree;i++)
+		{
+			if(test(core.mFreeBounds[i]))
+			{
+				PxReal dist = -1.0f; // no distance for overlaps
+				if(!pcb.invoke(dist, core.mFreeObjects[i]))
+					return false;
+			}
+		}
+
+		const PxU32 nb = core.mSortedNb;
+		if(!nb)
+			return true;
+
+#ifdef BRUTE_FORCE_LIMIT
+		if(nb<=BRUTE_FORCE_LIMIT)
+		{
+			for(PxU32 i=0;i<nb;i++)
+			{
+				if(test(core.mSortedWorldBoxes[i]))
+				{
+					PxReal dist = -1.0f; // no distance for overlaps
+					if(!pcb.invoke(dist, core.mSortedObjects[i]))
+						return false;
+				}
+			}
+			return true;
+		}
+#endif
+
+		if(!test(core.mGlobalBox))
+			return true;
+
+		const PxU32 sortAxis = core.mSortAxis;
+		const float boxMinLimit = cullBox.minimum[sortAxis];
+		const float boxMaxLimit = cullBox.maximum[sortAxis];
+
+		const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&boxMinLimit);
+		const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&boxMaxLimit);
+		const PxU32 rayMinLimitInt = encodeFloat(binaryMinLimit[0]);
+		const PxU32 rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]);
+
+		for(PxU32 i=0;i<5;i++)
+		{
+			if(core.mLevel1.mCounters[i] && test(core.mLevel1.mBucketBox[i]))
+			{
+				for(PxU32 j=0;j<5;j++)
+				{
+					if(core.mLevel2[i].mCounters[j] && test(core.mLevel2[i].mBucketBox[j]))
+					{
+						for(PxU32 k=0;k<5;k++)
+						{
+							const PxU32 nbInBucket = core.mLevel3[i][j].mCounters[k];
+							if(nbInBucket && test(core.mLevel3[i][j].mBucketBox[k]))
+							{
+								const PxU32 offset = core.mLevel1.mOffsets[i] + core.mLevel2[i].mOffsets[j] + core.mLevel3[i][j].mOffsets[k];
+								if(!processBucket<isPrecise>(nbInBucket, core.mSortedWorldBoxes, core.mSortedObjects,
+									offset, core.mSortedNb, test, pcb, rayMinLimitInt, rayMaxLimitInt))
+									return false;
+							}
+						}
+					}
+				}
+			}
+		}
+		return true;
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef CAN_USE_MOVEMASK
+PX_FORCE_INLINE PxU32 BAllTrue3_R(const BoolV a)
+{
+	const PxI32 moveMask = _mm_movemask_ps(a);
+	return PxU32((moveMask & 0x7) == (0x7));
+}
+#endif
+
+#ifdef USE_SIMD
+struct SphereAABBTest_SIMD
+{
+	PX_FORCE_INLINE SphereAABBTest_SIMD(const Gu::Sphere& sphere) :
+	#ifdef CAN_USE_MOVEMASK
+		mCenter	(V4LoadU(&sphere.center.x)),
+	#else
+		mCenter	(V3LoadU(sphere.center)),
+	#endif
+		mRadius2(FLoad(sphere.radius * sphere.radius))
+	{}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const		
+	{
+	#ifdef CAN_USE_MOVEMASK
+		const Vec4V boxCenter = AlignedLoad(&box.mCenter.x);
+		const Vec4V boxExtents = AlignedLoad(&box.mExtents.x);
+		//
+		const Vec4V offset = V4Sub(mCenter, boxCenter);
+		const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents);
+		const Vec4V d = V4Sub(offset, closest);
+
+		const FloatV dot = V4Dot3(d,d); 
+		return Ps::IntBool(BAllTrue3_R(FIsGrtrOrEq(mRadius2, dot)));
+	#else
+		const Vec3V boxCenter = V3LoadU(box.mCenter);
+		const Vec3V boxExtents = V3LoadU(box.mExtents);
+		//
+		const Vec3V offset = V3Sub(mCenter, boxCenter);
+		const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents);
+		const Vec3V d = V3Sub(offset, closest);
+		return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d))));
+	#endif
+	}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const		
+	{
+		BucketBox tmp;
+		tmp.mCenter = bounds.getCenter();
+		tmp.mExtents = bounds.getExtents();
+		return (*this)(tmp);
+	}
+
+private:
+	SphereAABBTest_SIMD& operator=(const SphereAABBTest_SIMD&);
+	#ifdef CAN_USE_MOVEMASK
+	const Vec4V		mCenter;
+	#else
+	const Vec3V		mCenter;
+	#endif
+	const FloatV	mRadius2;
+};
+#else
+struct SphereAABBTest_Scalar
+{
+	PX_FORCE_INLINE SphereAABBTest_Scalar(const Gu::Sphere& sphere) :
+		mCenter	(sphere.center),
+		mRadius2(sphere.radius * sphere.radius)
+	{}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const		
+	{
+		const PxVec3 minimum = box.getMin();
+		const PxVec3 maximum = box.getMax();
+
+		float d = 0.0f;
+
+		//find the square of the distance
+		//from the sphere to the box
+		for(PxU32 i=0;i<3;i++)
+		{
+			if(mCenter[i]<minimum[i])
+			{
+				const float s = mCenter[i] - minimum[i];
+				d += s*s;
+			}
+			else if(mCenter[i]>maximum[i])
+			{
+				const float s = mCenter[i] - maximum[i];
+				d += s*s;
+			}
+		}
+		return d <= mRadius2;
+	}
+
+private:
+	SphereAABBTest_Scalar& operator=(const SphereAABBTest_Scalar&);
+	const PxVec3	mCenter;
+	float			mRadius2;
+};
+#endif
+
+#ifdef USE_SIMD
+typedef SphereAABBTest_SIMD		BucketPrunerSphereAABBTest;
+#else
+typedef SphereAABBTest_Scalar	BucketPrunerSphereAABBTest;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+struct BucketPrunerAABBAABBTest
+{
+	PX_FORCE_INLINE BucketPrunerAABBAABBTest(const PxBounds3& queryBox) : mBox(queryBox)	{}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const
+	{	
+		// PT: we don't use PxBounds3::intersects() because isValid() asserts on our empty boxes!
+		const PxVec3 bucketMin = box.getMin();
+		const PxVec3 bucketMax = box.getMax();
+		return !(mBox.minimum.x > bucketMax.x || bucketMin.x > mBox.maximum.x ||
+				 mBox.minimum.y > bucketMax.y || bucketMin.y > mBox.maximum.y ||
+				 mBox.minimum.z > bucketMax.z || bucketMin.z > mBox.maximum.z);
+	}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const
+	{	
+		// PT: we don't use PxBounds3::intersects() because isValid() asserts on our empty boxes!
+		const PxVec3& bucketMin = bounds.minimum;
+		const PxVec3& bucketMax = bounds.maximum;
+		return !(mBox.minimum.x > bucketMax.x || bucketMin.x > mBox.maximum.x ||
+				 mBox.minimum.y > bucketMax.y || bucketMin.y > mBox.maximum.y ||
+				 mBox.minimum.z > bucketMax.z || bucketMin.z > mBox.maximum.z);
+	}
+private:
+	BucketPrunerAABBAABBTest& operator=(const BucketPrunerAABBAABBTest&);
+	const PxBounds3	mBox;
+};
+
+/*struct BucketPrunerAABBAABBTest_SIMD
+{
+	PX_FORCE_INLINE BucketPrunerAABBAABBTest_SIMD(const PxBounds3& b)
+	: mCenter(V3LoadU(b.getCenter()))
+	, mExtents(V3LoadU(b.getExtents()))
+	{}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const
+	{	
+		return V3AllGrtrOrEq(V3Add(mExtents, AlignedLoad(&box.mExtents.x)), V3Abs(V3Sub(AlignedLoad(&box.mCenter.x), mCenter)));
+	}
+private:
+	BucketPrunerAABBAABBTest_SIMD& operator=(const BucketPrunerAABBAABBTest_SIMD&);
+	const Vec3V mCenter, mExtents;
+};*/
+
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef USE_SIMD
+struct OBBAABBTest_SIMD
+{
+	OBBAABBTest_SIMD(const PxMat33& rotation, const PxVec3& translation, const PxVec3& extents)
+	{
+		const Vec3V eps = V3Load(1e-6f);
+
+		mT = V3LoadU(translation);
+		mExtents = V3LoadU(extents);	
+
+		// storing the transpose matrices yields a simpler SIMD test
+		mRT = Mat33V_From_PxMat33(rotation.getTranspose());	
+		mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps));
+		mBB_xyz = M33TrnspsMulV3(mART, mExtents);
+
+/*		if(fullTest)
+		{
+			const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents);
+
+			mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0)));
+			mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1)));
+			mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2)));
+		}*/
+	}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const	
+	{	
+		const Vec3V extentsV = V3LoadU(box.mExtents);
+
+		const Vec3V t = V3Sub(mT, V3LoadU(box.mCenter));
+
+		// class I - axes of AABB
+		if(V3OutOfBounds(t, V3Add(extentsV, mBB_xyz)))
+			return Ps::IntFalse;
+
+		const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2;
+		const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2;
+
+		const FloatV eX = V3GetX(extentsV), eY = V3GetY(extentsV), eZ = V3GetZ(extentsV);
+		const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t);
+
+		// class II - axes of OBB
+		{
+			const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX)));
+			const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents)));
+			if(V3OutOfBounds(v, v2))
+				return Ps::IntFalse;
+		}
+
+//		if(!fullTest)
+			return Ps::IntTrue;
+
+/*		// class III - edge cross products. Almost all OBB tests early-out with type I or type II,
+		// so early-outs here probably aren't useful (TODO: profile)
+
+		const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ));
+		const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123));
+		const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va));
+	
+		const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX));
+		const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456));
+		const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb));
+		
+		const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY));
+		const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789));
+		const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc));
+
+		return BAllEq(BOr(ba, BOr(bb,bc)), BFFFF());*/
+	}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const	
+	{	
+		BucketBox tmp;
+		tmp.mCenter = bounds.getCenter();
+		tmp.mExtents = bounds.getExtents();
+		return (*this)(tmp);
+	}
+
+	Vec3V		mExtents;	// extents of OBB
+	Vec3V		mT;			// translation of OBB
+	Mat33V		mRT;		// transpose of rotation matrix of OBB
+	Mat33V		mART;		// transpose of mRT, padded by epsilon
+	Vec3V		mBB_xyz;	// extents of OBB along coordinate axes
+
+/*	Vec3V		mBB_123;	// projections of extents onto edge-cross axes
+	Vec3V		mBB_456;
+	Vec3V		mBB_789;*/
+};
+#else
+struct OBBAABBTest_Scalar
+{
+	OBBAABBTest_Scalar(const PxMat33& rotation, const PxVec3& translation, const PxVec3& extents)
+	{
+		mR = rotation;
+		mT = translation;
+		mExtents = extents;
+
+		const PxVec3 eps(1e-6f);		
+		mAR = PxMat33(mR[0].abs() + eps, mR[1].abs() + eps, mR[2].abs() + eps);			// Epsilon prevents floating-point inaccuracies (strategy borrowed from RAPID)
+		mBB_xyz = mAR.transform(mExtents);												// Precompute box-box data - Courtesy of Erwin de Vries
+
+/*		PxReal ex = mExtents.x, ey = mExtents.y, ez = mExtents.z;
+		mBB_1 = ey*mAR[2].x + ez*mAR[1].x; mBB_2 = ez*mAR[0].x + ex*mAR[2].x; mBB_3 = ex*mAR[1].x + ey*mAR[0].x;
+		mBB_4 = ey*mAR[2].y + ez*mAR[1].y; mBB_5 = ez*mAR[0].y + ex*mAR[2].y; mBB_6 = ex*mAR[1].y + ey*mAR[0].y;
+		mBB_7 = ey*mAR[2].z + ez*mAR[1].z; mBB_8 = ez*mAR[0].z + ex*mAR[2].z; mBB_9 = ex*mAR[1].z + ey*mAR[0].z;*/
+	}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const	
+	{
+		const PxVec3& c = box.mCenter;
+		const PxVec3& e = box.mExtents;
+
+		const PxVec3 T = mT - c;
+		// Class I : A's basis vectors
+		if(PxAbs(T.x) > e.x + mBB_xyz.x)	return Ps::IntFalse;
+		if(PxAbs(T.y) > e.y + mBB_xyz.y)	return Ps::IntFalse;
+		if(PxAbs(T.z) > e.z + mBB_xyz.z)	return Ps::IntFalse;
+
+		// Class II : B's basis vectors
+		if(PxAbs(T.dot(mR[0])) > e.dot(mAR[0]) + mExtents.x)	return Ps::IntFalse;
+		if(PxAbs(T.dot(mR[1])) > e.dot(mAR[1]) + mExtents.y)	return Ps::IntFalse;
+		if(PxAbs(T.dot(mR[2])) > e.dot(mAR[2]) + mExtents.z)	return Ps::IntFalse;
+
+		// Class III : 9 cross products
+		if(0)
+		{
+			if(PxAbs(T.z*mR[0].y - T.y*mR[0].z) > e.y*mAR[0].z + e.z*mAR[0].y + mBB_1) return Ps::IntFalse;	// L = A0 x B0
+			if(PxAbs(T.z*mR[1].y - T.y*mR[1].z) > e.y*mAR[1].z + e.z*mAR[1].y + mBB_2) return Ps::IntFalse;	// L = A0 x B1
+			if(PxAbs(T.z*mR[2].y - T.y*mR[2].z) > e.y*mAR[2].z + e.z*mAR[2].y + mBB_3) return Ps::IntFalse;	// L = A0 x B2
+
+			if(PxAbs(T.x*mR[0].z - T.z*mR[0].x) > e.x*mAR[0].z + e.z*mAR[0].x + mBB_4) return Ps::IntFalse;	// L = A1 x B0
+			if(PxAbs(T.x*mR[1].z - T.z*mR[1].x) > e.x*mAR[1].z + e.z*mAR[1].x + mBB_5) return Ps::IntFalse;	// L = A1 x B1
+			if(PxAbs(T.x*mR[2].z - T.z*mR[2].x) > e.x*mAR[2].z + e.z*mAR[2].x + mBB_6) return Ps::IntFalse;	// L = A1 x B2
+
+			if(PxAbs(T.y*mR[0].x - T.x*mR[0].y) > e.x*mAR[0].y + e.y*mAR[0].x + mBB_7) return Ps::IntFalse;	// L = A2 x B0
+			if(PxAbs(T.y*mR[1].x - T.x*mR[1].y) > e.x*mAR[1].y + e.y*mAR[1].x + mBB_8) return Ps::IntFalse;	// L = A2 x B1
+			if(PxAbs(T.y*mR[2].x - T.x*mR[2].y) > e.x*mAR[2].y + e.y*mAR[2].x + mBB_9) return Ps::IntFalse;	// L = A2 x B2
+		}
+		return Ps::IntTrue;
+	}
+
+private:
+	PxMat33		mR;					// rotation matrix
+	PxMat33		mAR;				// absolute rotation matrix
+	PxVec3		mT;					// translation from obb space to model space
+	PxVec3		mExtents;
+
+	PxVec3		mBB_xyz;
+
+	float		mBB_1, mBB_2, mBB_3;
+	float		mBB_4, mBB_5, mBB_6;
+	float		mBB_7, mBB_8, mBB_9;
+};
+#endif
+
+#ifdef USE_SIMD
+typedef OBBAABBTest_SIMD	BucketPrunerOBBAABBTest;
+#else
+typedef OBBAABBTest_Scalar	BucketPrunerOBBAABBTest;
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+
+PxAgain	BucketPrunerCore::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const
+{
+	PX_ASSERT(!mDirty);
+	PxAgain again = true;
+
+	const PxBounds3& cullBox = queryVolume.getPrunerInflatedWorldAABB();
+
+	switch(queryVolume.getType())
+	{
+		case PxGeometryType::eBOX:
+		{
+			if(queryVolume.isOBB())
+			{	
+				const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
+				again = overlap(*this,
+							BucketPrunerOBBAABBTest(
+								queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
+								queryVolume.getPrunerBoxGeomExtentsInflated()),
+								pcb, cullBox);
+			}
+			else
+			{
+				const BucketPrunerOverlapTraversal<BucketPrunerAABBAABBTest, true> overlap;
+				again = overlap(*this, BucketPrunerAABBAABBTest(cullBox), pcb, cullBox);
+			}
+		}
+		break;
+
+		case PxGeometryType::eCAPSULE:
+		{
+			const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
+			again = overlap(*this,
+						BucketPrunerOBBAABBTest(
+							queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
+							queryVolume.getPrunerBoxGeomExtentsInflated()),
+							pcb, cullBox);
+		}
+		break;
+
+		case PxGeometryType::eSPHERE:
+		{
+			const Sphere& sphere = queryVolume.getGuSphere();
+			const PxVec3 sphereExtents(sphere.radius);
+			const BucketPrunerOverlapTraversal<BucketPrunerSphereAABBTest, true> overlap;
+			again = overlap(*this, BucketPrunerSphereAABBTest(sphere), pcb, cullBox);
+		}
+		break;
+
+		case PxGeometryType::eCONVEXMESH:
+		{
+			const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap;
+			again = overlap(*this,
+						BucketPrunerOBBAABBTest(
+							queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(),
+							queryVolume.getPrunerBoxGeomExtentsInflated()),
+							pcb, cullBox);
+		}
+		break;
+
+		case PxGeometryType::ePLANE:
+		case PxGeometryType::eTRIANGLEMESH:
+		case PxGeometryType::eHEIGHTFIELD:
+		case PxGeometryType::eGEOMETRY_COUNT:
+		case PxGeometryType::eINVALID:
+			PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type");
+	}
+	return again;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerCore::shiftOrigin(const PxVec3& shift)
+{
+	for(PxU32 i=0;i<mNbFree;i++)
+	{
+		mFreeBounds[i].minimum -= shift;
+		mFreeBounds[i].maximum -= shift;
+	}
+
+	const PxU32 nb = mCoreNbObjects;
+	//if (nb)
+	{
+		mGlobalBox.mCenter -= shift;
+
+	#ifdef _DEBUG
+		mGlobalBox.mDebugMin -= shift[mSortAxis];
+	#endif
+
+		encodeBoxMinMax(mGlobalBox, mSortAxis);
+
+		for(PxU32 i=0; i < nb; i++)
+		{
+			mCoreBoxes[i].minimum -= shift;
+			mCoreBoxes[i].maximum -= shift;
+		}
+
+		for(PxU32 i=0; i < mSortedNb; i++)
+		{
+			mSortedWorldBoxes[i].mCenter -= shift;
+
+	#ifdef _DEBUG
+			mSortedWorldBoxes[i].mDebugMin -= shift[mSortAxis];
+	#endif
+			encodeBoxMinMax(mSortedWorldBoxes[i], mSortAxis);
+		}
+
+		for(PxU32 i=0; i < 5; i++)
+			mLevel1.mBucketBox[i].mCenter -= shift;
+
+		for(PxU32 i=0; i < 5; i++)
+			for(PxU32 j=0; j < 5; j++)
+				mLevel2[i].mBucketBox[j].mCenter -= shift;
+
+		for(PxU32 i=0; i < 5; i++)
+			for(PxU32 j=0; j < 5; j++)
+				for(PxU32 k=0; k < 5; k++)
+					mLevel3[i][j].mBucketBox[k].mCenter -= shift;
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+static void visualize(Cm::RenderOutput& out, const BucketBox& bounds)
+{
+	out << Cm::DebugBox(PxBounds3(bounds.getMin(), bounds.getMax()), true);
+}
+
+void BucketPrunerCore::visualize(Cm::RenderOutput& out, PxU32 color) const
+{
+	const PxTransform idt = PxTransform(PxIdentity);
+	out << idt;
+	out << color;
+
+	::visualize(out, mGlobalBox);
+
+	for(PxU32 i=0;i<5;i++)
+	{
+		if(!mLevel1.mCounters[i])
+			continue;
+
+		::visualize(out, mLevel1.mBucketBox[i]);
+
+		for(PxU32 j=0;j<5;j++)
+		{
+			if(!mLevel2[i].mCounters[j])
+				continue;
+				
+			::visualize(out, mLevel2[i].mBucketBox[j]);
+
+			for(PxU32 k=0;k<5;k++)
+			{
+				if(!mLevel3[i][j].mCounters[k])
+					continue;
+
+				::visualize(out, mLevel3[i][j].mBucketBox[k]);
+			}
+		}
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+BucketPruner::BucketPruner()
+{
+}
+
+BucketPruner::~BucketPruner()
+{
+}
+
+bool BucketPruner::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool)
+{
+	if(!count)
+		return true;
+
+	const PxU32 valid = mPool.addObjects(results, bounds, payload, count);
+	mCore.mDirty = true;
+
+	mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects());
+
+	return valid == count;
+}
+
+void BucketPruner::removeObjects(const PrunerHandle* handles, PxU32 count)
+{
+	if(!count)
+		return;
+
+	for(PxU32 i=0;i<count;i++)
+		mPool.removeObject(handles[i]);
+
+	mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects());
+	mCore.mDirty = true;
+}
+
+void BucketPruner::updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count)
+{
+	if(!count)
+		return;
+
+	if(newBounds)
+	{
+		for(PxU32 i=0;i<count;i++)
+			mPool.setWorldAABB(handles[i], newBounds[i]);
+	}
+
+	mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects());
+	mCore.mDirty = true;
+}
+
+void BucketPruner::updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count)
+{
+	mPool.updateObjects(handles, indices, newBounds, count);
+	mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects());
+	mCore.mDirty = true;
+}
+
+void BucketPruner::commit()
+{
+	mCore.build();
+}
+
+void BucketPruner::shiftOrigin(const PxVec3& shift)
+{
+	mCore.shiftOrigin(shift);
+}
+
+PxAgain BucketPruner::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+	PX_ASSERT(!mCore.mDirty);
+	if(mCore.mDirty)
+		return true; // it may crash otherwise
+	return mCore.sweep(queryVolume, unitDir, inOutDistance, pcb);
+}
+
+PxAgain BucketPruner::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const
+{
+	PX_ASSERT(!mCore.mDirty);
+	if(mCore.mDirty)
+		return true; // it may crash otherwise
+	return mCore.overlap(queryVolume, pcb);
+}
+
+PxAgain BucketPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const
+{
+	PX_ASSERT(!mCore.mDirty);
+	if(mCore.mDirty)
+		return true; // it may crash otherwise
+	return mCore.raycast(origin, unitDir, inOutDistance, pcb);
+}
+
+void BucketPruner::visualize(Cm::RenderOutput& out, PxU32 color) const
+{
+	mCore.visualize(out, color);
+}
+
+
+#define MBP_ALLOC(x)		PX_ALLOC(x, "BucketPruner")
+#define MBP_ALLOC_TMP(x)	PX_ALLOC_TEMP(x, "BucketPruner")
+#define MBP_FREE(x)			if(x)	PX_FREE_AND_RESET(x)
+#define DELETESINGLE(x)		if (x) { delete x;		x = NULL; }
+#define DELETEARRAY(x)		if (x) { delete []x;	x = NULL; }
+#define	INVALID_ID			0xffffffff
+
+#ifndef USE_REGULAR_HASH_MAP
+static PX_FORCE_INLINE bool differentPair(const BucketPrunerPair& p, const PrunerPayload& payload)
+{
+	const bool same = p.mPayload == payload;
+	return !same;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+BucketPrunerMap::BucketPrunerMap() :
+	mHashSize		(0),
+	mMask			(0),
+	mNbActivePairs	(0),
+	mHashTable		(NULL),
+	mNext			(NULL),
+	mActivePairs	(NULL),
+	mReservedMemory (0)
+{
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+BucketPrunerMap::~BucketPrunerMap()
+{
+	purge();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerMap::purge()
+{
+	MBP_FREE(mNext);
+	MBP_FREE(mActivePairs);
+	MBP_FREE(mHashTable);
+	mHashSize		= 0;
+	mMask			= 0;
+	mNbActivePairs	= 0;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+const BucketPrunerPair* BucketPrunerMap::findPair(const PrunerPayload& payload) const
+{
+	if(!mHashTable)
+		return NULL;	// Nothing has been allocated yet
+
+	// Compute hash value for this pair
+	const PxU32 hashValue = hash(payload) & mMask;
+
+	const BucketPrunerPair* PX_RESTRICT activePairs = mActivePairs;
+	const PxU32* PX_RESTRICT next = mNext;
+
+	// Look for it in the table
+	PxU32 offset = mHashTable[hashValue];
+	while(offset!=INVALID_ID && differentPair(activePairs[offset], payload))
+	{
+		offset = next[offset];		// Better to have a separate array for this
+	}
+	if(offset==INVALID_ID)
+		return NULL;
+	PX_ASSERT(offset<mNbActivePairs);
+	// Match mActivePairs[offset] => the pair is persistent
+	return &activePairs[offset];
+}
+
+// Internal version saving hash computation
+PX_FORCE_INLINE BucketPrunerPair* BucketPrunerMap::findPair(const PrunerPayload& payload, PxU32 hashValue) const
+{
+	if(!mHashTable)
+		return NULL;	// Nothing has been allocated yet
+
+	BucketPrunerPair* PX_RESTRICT activePairs = mActivePairs;
+	const PxU32* PX_RESTRICT next = mNext;
+
+	// Look for it in the table
+	PxU32 offset = mHashTable[hashValue];
+	while(offset!=INVALID_ID && differentPair(activePairs[offset], payload))
+	{
+		offset = next[offset];		// Better to have a separate array for this
+	}
+	if(offset==INVALID_ID)
+		return NULL;
+	PX_ASSERT(offset<mNbActivePairs);
+	// Match mActivePairs[offset] => the pair is persistent
+	return &activePairs[offset];
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+BucketPrunerPair* BucketPrunerMap::addPair(const PrunerPayload& payload, PxU32 coreIndex, PxU32 timeStamp)
+{
+	PxU32 hashValue = hash(payload) & mMask;
+
+	{
+		BucketPrunerPair* PX_RESTRICT p = findPair(payload, hashValue);
+		if(p)
+		{
+			PX_ASSERT(p->mCoreIndex==coreIndex);
+			PX_ASSERT(p->mTimeStamp==timeStamp);
+			return p;	// Persistent pair
+		}
+	}
+
+	// This is a new pair
+	if(mNbActivePairs >= mHashSize)
+	{
+		// Get more entries
+		mHashSize = Ps::nextPowerOfTwo(mNbActivePairs+1);
+		mMask = mHashSize-1;
+
+		reallocPairs();
+
+		// Recompute hash value with new hash size
+		hashValue = hash(payload) & mMask;	// ### redundant hash computation here?
+	}
+
+	BucketPrunerPair* PX_RESTRICT p = &mActivePairs[mNbActivePairs];
+	p->mPayload		= payload;
+	p->mCoreIndex	= coreIndex;
+	p->mTimeStamp	= timeStamp;
+	mNext[mNbActivePairs] = mHashTable[hashValue];
+	mHashTable[hashValue] = mNbActivePairs++;
+	return p;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerMap::removePairInternal(const PrunerPayload& /*payload*/, PxU32 hashValue, PxU32 pairIndex)
+{
+	// Walk the hash table to fix mNext
+	{
+		PxU32 offset = mHashTable[hashValue];
+		PX_ASSERT(offset!=INVALID_ID);
+
+		PxU32 previous=INVALID_ID;
+		while(offset!=pairIndex)
+		{
+			previous = offset;
+			offset = mNext[offset];
+		}
+
+		// Let us go/jump us
+		if(previous!=INVALID_ID)
+		{
+			PX_ASSERT(mNext[previous]==pairIndex);
+			mNext[previous] = mNext[pairIndex];
+		}
+		// else we were the first
+		else mHashTable[hashValue] = mNext[pairIndex];
+		// we're now free to reuse mNext[pairIndex] without breaking the list
+	}
+#if PX_DEBUG
+	mNext[pairIndex]=INVALID_ID;
+#endif
+	// Invalidate entry
+
+	// Fill holes
+	if(1)
+	{
+		// 1) Remove last pair
+		const PxU32 lastPairIndex = mNbActivePairs-1;
+		if(lastPairIndex==pairIndex)
+		{
+			mNbActivePairs--;
+		}
+		else
+		{
+			const BucketPrunerPair* last = &mActivePairs[lastPairIndex];
+			const PxU32 lastHashValue = hash(last->mPayload) & mMask;
+
+			// Walk the hash table to fix mNext
+			PxU32 offset = mHashTable[lastHashValue];
+			PX_ASSERT(offset!=INVALID_ID);
+
+			PxU32 previous=INVALID_ID;
+			while(offset!=lastPairIndex)
+			{
+				previous = offset;
+				offset = mNext[offset];
+			}
+
+			// Let us go/jump us
+			if(previous!=INVALID_ID)
+			{
+				PX_ASSERT(mNext[previous]==lastPairIndex);
+				mNext[previous] = mNext[lastPairIndex];
+			}
+			// else we were the first
+			else mHashTable[lastHashValue] = mNext[lastPairIndex];
+			// we're now free to reuse mNext[lastPairIndex] without breaking the list
+
+#if PX_DEBUG
+			mNext[lastPairIndex]=INVALID_ID;
+#endif
+
+			// Don't invalidate entry since we're going to shrink the array
+
+			// 2) Re-insert in free slot
+			mActivePairs[pairIndex] = mActivePairs[lastPairIndex];
+#if PX_DEBUG
+			PX_ASSERT(mNext[pairIndex]==INVALID_ID);
+#endif
+			mNext[pairIndex] = mHashTable[lastHashValue];
+			mHashTable[lastHashValue] = pairIndex;
+
+			mNbActivePairs--;
+		}
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool BucketPrunerMap::removePair(const PrunerPayload& payload, PxU32& coreIndex, PxU32& timeStamp)
+{
+	const PxU32 hashValue = hash(payload) & mMask;
+	const BucketPrunerPair* p = findPair(payload, hashValue);
+	if(!p)
+		return false;
+	PX_ASSERT(p->mPayload==payload);
+
+	coreIndex = p->mCoreIndex;
+	timeStamp = p->mTimeStamp;
+
+	removePairInternal(payload, hashValue, getPairIndex(p));
+
+	shrinkMemory();
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerMap::shrinkMemory()
+{
+	// Check correct memory against actually used memory
+	const PxU32 correctHashSize = Ps::nextPowerOfTwo(mNbActivePairs);
+	if(mHashSize==correctHashSize)
+		return;
+
+	if(mReservedMemory && correctHashSize < mReservedMemory)
+		return;
+
+	// Reduce memory used
+	mHashSize = correctHashSize;
+	mMask = mHashSize-1;
+
+	reallocPairs();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+	static PX_FORCE_INLINE void storeDwords(PxU32* dest, PxU32 nb, PxU32 value)
+	{
+		while(nb--)
+			*dest++ = value;
+	}
+
+void BucketPrunerMap::reallocPairs()
+{
+	MBP_FREE(mHashTable);
+	mHashTable = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize*sizeof(PxU32)));
+	storeDwords(mHashTable, mHashSize, INVALID_ID);
+
+	// Get some bytes for new entries
+	BucketPrunerPair* newPairs	= reinterpret_cast<BucketPrunerPair*>(MBP_ALLOC(mHashSize * sizeof(BucketPrunerPair)));
+	PX_ASSERT(newPairs);
+
+	PxU32* newNext = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize * sizeof(PxU32)));
+	PX_ASSERT(newNext);
+
+	// Copy old data if needed
+	if(mNbActivePairs)
+		PxMemCopy(newPairs, mActivePairs, mNbActivePairs*sizeof(BucketPrunerPair));
+	// ### check it's actually needed... probably only for pairs whose hash value was cut by the and
+	// yeah, since hash(id0, id1) is a constant
+	// However it might not be needed to recompute them => only less efficient but still ok
+	for(PxU32 i=0;i<mNbActivePairs;i++)
+	{
+		const PxU32 hashValue = hash(mActivePairs[i].mPayload) & mMask;	// New hash value with new mask
+		newNext[i] = mHashTable[hashValue];
+		mHashTable[hashValue] = i;
+	}
+
+	// Delete old data
+	MBP_FREE(mNext);
+	MBP_FREE(mActivePairs);
+
+	// Assign new pointer
+	mActivePairs = newPairs;
+	mNext = newNext;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void BucketPrunerMap::reserveMemory(PxU32 memSize)
+{
+	if(!memSize)
+		return;
+
+	if(!Ps::isPowerOfTwo(memSize))
+		memSize = Ps::nextPowerOfTwo(memSize);
+
+	mHashSize = memSize;
+	mMask = mHashSize-1;
+
+	mReservedMemory = memSize;
+
+	reallocPairs();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+#endif
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h
new file mode 100644
index 00000000..dec62ccd
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h
@@ -0,0 +1,279 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_BUCKETPRUNER_H
+#define SQ_BUCKETPRUNER_H
+
+#include "SqTypedef.h"
+#include "SqPruningPool.h"
+#include "PsHash.h"
+
+#define FREE_PRUNER_SIZE	16
+//#define USE_REGULAR_HASH_MAP
+#ifdef USE_REGULAR_HASH_MAP
+	#include "PsHashMap.h"
+#endif
+
+namespace physx
+{
+namespace Sq
+{
+	typedef PxU32	BucketWord;
+	
+#if PX_VC 
+    #pragma warning(push)
+	#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+	PX_ALIGN_PREFIX(16)	struct BucketBox
+	{
+		PxVec3	mCenter;
+		PxU32	mData0;		// Integer-encoded min value along sorting axis
+		PxVec3	mExtents;
+		PxU32	mData1;		// Integer-encoded max value along sorting axis
+
+	#ifdef _DEBUG
+		// PT: we need the original min value for debug checks. Using the center/extents version
+		// fails because recomputing the min from them introduces FPU accuracy errors in the values.
+		float	mDebugMin;
+	#endif
+
+		PX_FORCE_INLINE	PxVec3	getMin()	const
+		{
+			return mCenter - mExtents;
+		}
+
+		PX_FORCE_INLINE	PxVec3	getMax()	const
+		{
+			return mCenter + mExtents;
+		}
+
+		PX_FORCE_INLINE void	setEmpty()
+		{
+			mCenter = PxVec3(0.0f);
+			mExtents = PxVec3(-PX_MAX_BOUNDS_EXTENTS);
+
+	#ifdef _DEBUG
+			mDebugMin = PX_MAX_BOUNDS_EXTENTS;
+	#endif
+		}
+	}PX_ALIGN_SUFFIX(16);
+
+	PX_ALIGN_PREFIX(16) struct BucketPrunerNode
+	{
+					BucketPrunerNode();
+
+		void		classifyBoxes(	float limitX, float limitZ,
+									PxU32 nb,
+									BucketBox* PX_RESTRICT boxes,
+									const PrunerPayload* PX_RESTRICT objects,
+									BucketBox* PX_RESTRICT sortedBoxes,
+									PrunerPayload* PX_RESTRICT sortedObjects,
+									bool isCrossBucket, PxU32 sortAxis);
+
+		PX_FORCE_INLINE	void	initCounters()
+		{
+			for(PxU32 i=0;i<5;i++)
+				mCounters[i] = 0;
+			for(PxU32 i=0;i<5;i++)
+				mOffsets[i] = 0;
+		}
+
+		BucketWord	mCounters[5];	// Number of objects in each of the 5 children
+		BucketWord	mOffsets[5];	// Start index of objects for each of the 5 children
+		BucketBox	mBucketBox[5];	// AABBs around objects for each of the 5 children
+		PxU16		mOrder[8];		// PNS: 5 children => 3 bits/index => 3*5=15 bits total, for each of the 8 canonical directions
+	}PX_ALIGN_SUFFIX(16);
+
+	PX_FORCE_INLINE PxU32 hash(const PrunerPayload& payload)
+	{
+#if PX_P64_FAMILY
+//		const PxU32 h0 = Ps::hash((const void*)payload.data[0]);
+//		const PxU32 h1 = Ps::hash((const void*)payload.data[1]);
+		const PxU32 h0 = PxU32(PX_MAX_U32 & payload.data[0]);
+		const PxU32 h1 = PxU32(PX_MAX_U32 & payload.data[1]);
+		return Ps::hash(PxU64(h0)|(PxU64(h1)<<32));
+#else
+		return Ps::hash(PxU64(payload.data[0])|(PxU64(payload.data[1])<<32));
+#endif
+	}
+
+#ifdef USE_REGULAR_HASH_MAP
+	struct BucketPrunerPair : public Ps::UserAllocated
+	{
+		PX_FORCE_INLINE	BucketPrunerPair()	{}
+		PX_FORCE_INLINE	BucketPrunerPair(PxU32 index, PxU32 stamp) : mCoreIndex(index), mTimeStamp(stamp)	{}
+		PxU32			mCoreIndex;	// index in mCoreObjects
+		PxU32			mTimeStamp;
+	};
+	typedef Ps::HashMap<PrunerPayload, BucketPrunerPair> BucketPrunerMap;
+#else
+	struct BucketPrunerPair : public Ps::UserAllocated
+	{
+		PrunerPayload	mPayload;
+		PxU32			mCoreIndex;	// index in mCoreObjects
+		PxU32			mTimeStamp;
+	};
+
+	// Custom hash-map - currently faster than the regular hash-map (Ps::HashMap), in particular for 'find-and-erase' operations.
+	class BucketPrunerMap : public Ps::UserAllocated
+	{
+		public:
+												BucketPrunerMap();
+												~BucketPrunerMap();
+
+						void					purge();
+						void					shrinkMemory();
+
+						BucketPrunerPair*		addPair				(const PrunerPayload& payload, PxU32 coreIndex, PxU32 timeStamp);
+						bool					removePair			(const PrunerPayload& payload, PxU32& coreIndex, PxU32& timeStamp);
+						const BucketPrunerPair*	findPair			(const PrunerPayload& payload) const;
+		PX_FORCE_INLINE	PxU32					getPairIndex		(const BucketPrunerPair* pair)		const
+												{
+													return (PxU32((size_t(pair) - size_t(mActivePairs)))/sizeof(BucketPrunerPair));
+												}
+
+						PxU32					mHashSize;
+						PxU32					mMask;
+						PxU32					mNbActivePairs;
+						PxU32*					mHashTable;
+						PxU32*					mNext;
+						BucketPrunerPair*		mActivePairs;
+						PxU32					mReservedMemory;
+
+		PX_FORCE_INLINE	BucketPrunerPair*		findPair(const PrunerPayload& payload, PxU32 hashValue) const;
+						void					removePairInternal(const PrunerPayload& payload, PxU32 hashValue, PxU32 pairIndex);
+						void					reallocPairs();
+						void					reserveMemory(PxU32 memSize);
+	};
+#endif
+
+	class BucketPrunerCore : public Ps::UserAllocated
+	{
+		public:
+											BucketPrunerCore(bool externalMemory=true);
+											~BucketPrunerCore();
+
+						void				release();
+
+						void				setExternalMemory(PxU32 nbObjects, PxBounds3* boxes, PrunerPayload* objects);
+
+						bool				addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp=0);
+						bool				removeObject(const PrunerPayload& object, PxU32& timeStamp);
+						bool				updateObject(const PxBounds3& worldAABB, const PrunerPayload& object);
+
+		// PT: look for objects marked with input timestamp everywhere in the structure, and remove them. This is the same
+		// as calling 'removeObject' individually for all these objects, but much more efficient. Returns number of removed objects.
+						PxU32				removeMarkedObjects(PxU32 timeStamp);
+
+						PxAgain				raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+						PxAgain				overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const;
+						PxAgain				sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+
+						void				shiftOrigin(const PxVec3& shift);
+
+						void				visualize(Cm::RenderOutput& out, PxU32 color) const;
+
+		PX_FORCE_INLINE	void				build()					{ classifyBoxes();	}
+
+		PX_FORCE_INLINE	PxU32				getNbObjects()	const	{ return mNbFree + mCoreNbObjects;	}
+
+//		private:
+						PxU32				mCoreNbObjects;		// Current number of objects in core arrays
+						PxU32				mCoreCapacity;		// Capacity of core arrays
+						PxBounds3*			mCoreBoxes;			// Core array
+						PrunerPayload*		mCoreObjects;		// Core array
+						PxU32*				mCoreRemap;			// Remaps core index to sorted index, i.e. sortedIndex = mCoreRemap[coreIndex]
+
+						BucketBox*			mSortedWorldBoxes;	// Sorted array
+						PrunerPayload*		mSortedObjects;		// Sorted array
+
+						PxU32				mNbFree;						// Current number of objects in the "free array" (mFreeObjects/mFreeBounds)
+						PrunerPayload		mFreeObjects[FREE_PRUNER_SIZE];	// mNbFree objects are stored here
+						PxBounds3			mFreeBounds[FREE_PRUNER_SIZE];	// mNbFree object bounds are stored here
+						PxU32				mFreeStamps[FREE_PRUNER_SIZE];
+
+						BucketPrunerMap		mMap;			// Maps (PrunerPayload) object to corresponding index in core array.
+															// Objects in the free array do not appear in this map.
+						PxU32				mSortedNb;
+						PxU32				mSortedCapacity;
+						PxU32				mSortAxis;
+
+						BucketBox			mGlobalBox;		// Global bounds around all objects in the structure (except the ones in the "free" array)
+						BucketPrunerNode	mLevel1;
+						BucketPrunerNode	mLevel2[5];
+						BucketPrunerNode	mLevel3[5][5];
+
+						bool				mDirty;
+						bool				mOwnMemory;
+		private:
+						void				classifyBoxes();
+						void				allocateSortedMemory(PxU32 nb);
+						void				resizeCore();
+		PX_FORCE_INLINE void				addObjectInternal(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp);
+	};
+
+#if PX_VC 
+     #pragma warning(pop) 
+#endif
+
+	class BucketPruner : public Pruner
+	{
+		public:
+										BucketPruner();
+		virtual							~BucketPruner();
+
+		// Pruner
+		virtual	bool					addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool);
+		virtual	void					removeObjects(const PrunerHandle* handles, PxU32 count);
+		virtual	void					updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count);
+		virtual void				    updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count = 1);
+		virtual	void					commit();
+		virtual	PxAgain					raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+		virtual	PxAgain					overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const;
+		virtual	PxAgain					sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+		virtual	const PrunerPayload&	getPayload(PrunerHandle handle) const { return mPool.getPayload(handle); }
+		virtual	const PrunerPayload&	getPayload(PrunerHandle handle, PxBounds3*& bounds)	const { return mPool.getPayload(handle, bounds); }
+		virtual	void					preallocate(PxU32 entries) { mPool.preallocate(entries); }
+		virtual	void					shiftOrigin(const PxVec3& shift);
+		virtual	void					visualize(Cm::RenderOutput& out, PxU32 color) const;
+		// merge not implemented for bucket pruner
+		virtual void					merge(const void* ) {}
+		//~Pruner
+
+		private:
+				BucketPrunerCore		mCore;
+				PruningPool				mPool;
+	};
+
+} // namespace Sq
+
+}
+
+#endif // SQ_BUCKETPRUNER_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp
new file mode 100644
index 00000000..748817cb
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp
@@ -0,0 +1,887 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+
+#include "SqExtendedBucketPruner.h"
+#include "SqAABBTree.h"
+#include "SqPrunerMergeData.h"
+#include "SqAABBTreeQuery.h"
+#include "GuBounds.h"
+#include "CmBitMap.h"
+
+using namespace physx;
+using namespace Sq;
+using namespace Gu;
+using namespace Ps;
+
+#define NB_OBJECTS_PER_NODE	4
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Constructor, preallocate trees, bounds
+ExtendedBucketPruner::ExtendedBucketPruner(const PruningPool* pool)
+	: mBucketCore(false), mPruningPool(pool), mMainTree(NULL), mBounds(NULL), mMergedTrees(NULL), 
+	mCurrentTreeIndex(0), mTreesDirty(false)
+{
+	// preallocated size for bounds, trees
+	mCurrentTreeCapacity = 32;
+
+	mBounds = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*mCurrentTreeCapacity, "Bounds"));			
+	mMergedTrees = reinterpret_cast<MergedTree*>(PX_ALLOC(sizeof(MergedTree)*mCurrentTreeCapacity, "AABB trees"));			
+	mExtendedBucketPrunerMap.reserve(mCurrentTreeCapacity);
+
+	// create empty main tree
+	mMainTree = PX_NEW(AABBTree);
+
+	// create empty merge trees
+	for (PxU32 i = 0; i < mCurrentTreeCapacity; i++)
+	{
+		mMergedTrees[i].mTimeStamp = 0;
+		mMergedTrees[i].mTree = PX_NEW(AABBTree);
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+ExtendedBucketPruner::~ExtendedBucketPruner()
+{
+	// release main tree
+	if (mMainTree)
+	{		
+		PX_DELETE_AND_RESET(mMainTree);
+	}
+
+	// release merged trees
+	for (PxU32 i = 0; i < mCurrentTreeCapacity; i++)
+	{
+		AABBTree* aabbTree = mMergedTrees[i].mTree;		
+		PX_DELETE(aabbTree);
+	}
+
+	PX_FREE(mBounds);
+	PX_FREE(mMergedTrees);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// release all objects in bucket pruner
+void ExtendedBucketPruner::release()
+{	
+	// release core bucket pruner
+	mBucketCore.release();
+
+	mMainTreeUpdateMap.release();
+	mMergeTreeUpdateMap.release();
+	
+	// release all objecs from the map
+	mExtendedBucketPrunerMap.clear();
+
+	// release all merged trees
+	for (PxU32 i = 0; i < mCurrentTreeCapacity; i++)
+	{
+		mMergedTrees[i].mTimeStamp = 0;
+		mMergedTrees[i].mTree->release();
+	}
+
+	// reset current tree index
+	mCurrentTreeIndex = 0;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Add a tree from a pruning structure 
+// 1. get new tree index
+// 2. initialize merged tree, bounds
+// 3. create update map for the merged tree
+// 4. build new tree of trees from given trees bounds
+// 5. add new objects into extended bucket pruner map
+// 6. shift indices in the merged tree
+void ExtendedBucketPruner::addTree(const AABBTreeMergeData& mergeData, PxU32 timeStamp)
+{
+	// check if we have to resize
+	if(mCurrentTreeIndex == mCurrentTreeCapacity)
+	{
+		resize(mCurrentTreeCapacity*2);
+	}
+
+	// get current merge tree index
+	const PxU32 mergeTreeIndex = mCurrentTreeIndex++;	
+
+	// get payloads pointers - the pointers start at mIndicesOffset, thats where all 
+	// objects were added before merge was called
+	const PrunerPayload* payloads = &mPruningPool->getObjects()[mergeData.mIndicesOffset];
+
+	// setup merged tree with the merge data and timestamp
+	mMergedTrees[mergeTreeIndex].mTimeStamp = timeStamp;
+	AABBTree& mergedTree = *mMergedTrees[mergeTreeIndex].mTree;	
+	mergedTree.initTree(mergeData);
+	// set bounds
+	mBounds[mergeTreeIndex] = mergeData.getRootNode().mBV;
+	
+	// update temporally update map for the current merge tree, map is used to setup the base extended bucket pruner map 
+	mMergeTreeUpdateMap.initMap(mergeData.mNbIndices, mergedTree);
+
+	// create new base tree of trees
+	buildMainAABBTree();
+
+	// Add each object into extended bucket pruner hash map
+	for (PxU32 i = 0; i < mergeData.mNbIndices; i++)
+	{
+		ExtendedBucketPrunerData mapData;
+		mapData.mMergeIndex = mergeTreeIndex;		
+		mapData.mTimeStamp = timeStamp;		
+		PX_ASSERT(mMergeTreeUpdateMap[i] < mergedTree.getNbNodes());
+		// get node information from the merge tree update map
+		mapData.mSubTreeNode = mMergeTreeUpdateMap[i];
+		mExtendedBucketPrunerMap.insert(payloads[i], mapData);		
+	}
+	// merged tree indices needs to be shifted now, we cannot shift it in init - the update map 
+	// could not be constructed otherwise, as the indices wont start from 0. The indices 
+	// needs to be shifted by offset from the pruning pool, where the new objects were added into the pruning pool.
+	mergedTree.shiftIndices(mergeData.mIndicesOffset);
+
+#if PX_DEBUG
+	checkValidity();
+#endif // PX_DEBUG
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Builds the new main AABB tree with given current active merged trees and its bounds
+void ExtendedBucketPruner::buildMainAABBTree()
+{
+	// create the AABB tree from given merged trees bounds
+	AABBTreeBuildParams sTB;
+	sTB.mNbPrimitives = mCurrentTreeIndex;
+	sTB.mAABBArray = mBounds;
+	sTB.mLimit = NB_OBJECTS_PER_NODE;
+	bool status = mMainTree->build(sTB);
+
+	PX_UNUSED(status);
+	PX_ASSERT(status);
+
+	// Init main tree update map for the new main tree
+	mMainTreeUpdateMap.initMap(mCurrentTreeIndex, *mMainTree);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// resize internal memory, buffers
+void ExtendedBucketPruner::resize(PxU32 size)
+{
+	PX_ASSERT(size > mCurrentTreeCapacity);
+	// allocate new bounds
+	PxBounds3* newBounds = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*size, "Bounds"));
+	// copy previous bounds
+	PxMemCopy(newBounds, mBounds, sizeof(PxBounds3)*mCurrentTreeCapacity);
+	PX_FREE(mBounds);
+	mBounds = newBounds;
+
+	// allocate new merged trees
+	MergedTree* newMergeTrees = reinterpret_cast<MergedTree*>(PX_ALLOC(sizeof(MergedTree)*size, "AABB trees"));
+	// copy previous merged trees
+	PxMemCopy(newMergeTrees, mMergedTrees, sizeof(MergedTree)*mCurrentTreeCapacity);
+	PX_FREE(mMergedTrees);
+	mMergedTrees = newMergeTrees;
+	// allocate new trees for merged trees
+	for (PxU32 i = mCurrentTreeCapacity; i < size; i++)
+	{
+		mMergedTrees[i].mTimeStamp = 0;
+		mMergedTrees[i].mTree = PX_NEW(AABBTree);
+	}
+
+	mCurrentTreeCapacity = size;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Update object
+bool ExtendedBucketPruner::updateObject(const PxBounds3& worldAABB, const PrunerPayload& object)
+{	
+	const ExtendedBucketPrunerMap::Entry* extendedPrunerEntry = mExtendedBucketPrunerMap.find(object);
+
+	// if object is not in tree of trees, it is in bucket pruner core
+	if(!extendedPrunerEntry)
+	{		
+		return mBucketCore.updateObject(worldAABB, object);
+	}
+	else
+	{
+		const ExtendedBucketPrunerData& data = extendedPrunerEntry->second;
+
+		PX_ASSERT(data.mMergeIndex < mCurrentTreeIndex);
+
+		// update tree where objects belongs to
+		AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree;
+		PX_ASSERT(data.mSubTreeNode < tree.getNbNodes());
+		// mark for refit node in merged tree
+		tree.markNodeForRefit(data.mSubTreeNode);
+		PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes());
+		// mark for refit node in main aabb tree
+		mMainTree->markNodeForRefit(mMainTreeUpdateMap[data.mMergeIndex]);
+		mTreesDirty = true;
+	}
+	return true;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// refit merged nodes 
+// 1. refit nodes in merged trees
+// 2. check if after refit root node is valid - might happen edge case
+//		where all objects were released - the root node is then invalid
+//		in this edge case we need to compact the merged trees array 
+//		and create new main AABB tree
+// 3. If all merged trees bounds are valid - refit main tree
+// 4. If bounds are invalid create new main AABB tree
+void ExtendedBucketPruner::refitMarkedNodes(const PxBounds3* boxes)
+{
+	// if no tree needs update early exit
+	if(!mTreesDirty)
+		return;
+
+	// refit trees and update bounds for main tree	
+	PxU32 nbValidTrees = 0;
+	for (PxU32 i = mCurrentTreeIndex; i--; )
+	{
+		AABBTree& tree = *mMergedTrees[i].mTree;
+		tree.refitMarkedNodes(boxes);
+		const PxBounds3& bounds = tree.getNodes()[0].mBV;
+		// check if bounds are valid, if all objects of the tree were released, the bounds 
+		// will be invalid, in that case we cannot use this tree anymore.
+		if(bounds.isValid())
+		{			
+			nbValidTrees++;
+		}
+		mBounds[i] = bounds;
+	}
+	
+	if(nbValidTrees == mCurrentTreeIndex)
+	{
+		// no tree has been removed refit main tree
+		mMainTree->refitMarkedNodes(mBounds);
+	}
+	else
+	{	
+		// edge case path, tree does not have a valid root node bounds - all objects from the tree were released
+		// we might even fire perf warning
+		// compact the tree array - no holes in the array, remember the swap position
+		PxU32* swapMap = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mCurrentTreeIndex, "Swap Map"));
+		PxU32 writeIndex = 0;
+		for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+		{
+			AABBTree& tree = *mMergedTrees[i].mTree;
+			if(tree.getNodes()[0].mBV.isValid())
+			{
+				// we have to store the tree into an empty location
+				if(i != writeIndex)
+				{
+					PX_ASSERT(writeIndex < i);
+					AABBTree* ptr = mMergedTrees[writeIndex].mTree;
+					mMergedTrees[writeIndex] = mMergedTrees[i];
+					mMergedTrees[i].mTree = ptr;
+					mBounds[writeIndex] = mBounds[i];
+				}
+				// remember the swap location
+				swapMap[i] = writeIndex;
+				writeIndex++;				
+			}
+			else
+			{
+				// tree is not valid, release it
+				tree.release();
+				mMergedTrees[i].mTimeStamp = 0;
+			}
+
+			// remember the swap
+			swapMap[mCurrentTreeIndex] = i;
+		}		
+
+		PX_ASSERT(writeIndex == nbValidTrees);
+
+		// new merged trees size
+		mCurrentTreeIndex = nbValidTrees;
+
+		// trees have changed, we need to rebuild the main tree
+		buildMainAABBTree();
+
+		// fixup the object entries, the merge index has changed	
+		for (ExtendedBucketPrunerMap::Iterator iter = mExtendedBucketPrunerMap.getIterator(); !iter.done(); ++iter)
+		{			
+			ExtendedBucketPrunerData& data = iter->second;
+			PX_ASSERT(swapMap[data.mMergeIndex] < nbValidTrees);
+			data.mMergeIndex = swapMap[data.mMergeIndex];
+		}		
+		PX_FREE(swapMap);
+	}
+#if PX_DEBUG
+	checkValidity();
+#endif
+	mTreesDirty = false;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// remove object
+bool ExtendedBucketPruner::removeObject(const PrunerPayload& object, PxU32 objectIndex, const PrunerPayload& swapObject,
+	PxU32 swapObjectIndex, PxU32& timeStamp)
+{
+	ExtendedBucketPrunerMap::Entry dataEntry;
+	
+	// if object is not in tree of trees, it is in bucket pruner core
+	if (!mExtendedBucketPrunerMap.erase(object, dataEntry))
+	{
+		// we need to call invalidateObjects, it might happen that the swapped object
+		// does belong to the extended bucket pruner, in that case the objects index
+		// needs to be swapped.
+		swapIndex(objectIndex, swapObject, swapObjectIndex);		
+		return mBucketCore.removeObject(object, timeStamp);
+	}
+	else
+	{	
+		const ExtendedBucketPrunerData& data = dataEntry.second;
+
+		// mark tree nodes where objects belongs to
+		AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree;
+		PX_ASSERT(data.mSubTreeNode < tree.getNbNodes());
+		// mark the merged tree for refit
+		tree.markNodeForRefit(data.mSubTreeNode);
+		PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes());
+		// mark the main tree for refit
+		mMainTree->markNodeForRefit(mMainTreeUpdateMap[data.mMergeIndex]);
+
+		// call invalidate object to swap the object indices in the merged trees
+		invalidateObject(data, objectIndex, swapObject, swapObjectIndex);		
+
+		mTreesDirty = true;
+	}
+#if PX_DEBUG
+	checkValidity();
+#endif // PX_DEBUG
+	return true;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// invalidate object
+// remove the objectIndex from the merged tree
+void ExtendedBucketPruner::invalidateObject(const ExtendedBucketPrunerData& data, PxU32 objectIndex, const PrunerPayload& swapObject, 
+	PxU32 swapObjectIndex)
+{
+	// get the merged tree
+	AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree;
+	PX_ASSERT(data.mSubTreeNode < tree.getNbNodes());
+	PX_ASSERT(tree.getNodes()[data.mSubTreeNode].isLeaf());
+	// get merged tree node
+	AABBTreeRuntimeNode& node0 = tree.getNodes()[data.mSubTreeNode];
+	const PxU32 nbPrims = node0.getNbRuntimePrimitives();
+	PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE);
+
+	// retrieve the primitives pointer
+	PxU32* primitives = node0.getPrimitives(tree.getIndices());
+	PX_ASSERT(primitives);
+
+	// Look for desired pool index in the leaf
+	bool foundIt = false;
+	for (PxU32 i = 0; i < nbPrims; i++)
+	{
+		if (objectIndex == primitives[i])
+		{
+			foundIt = true;
+			const PxU32 last = nbPrims - 1;
+			node0.setNbRunTimePrimitives(last);
+			primitives[i] = INVALID_POOL_ID;			// Mark primitive index as invalid in the node				
+
+			// Swap within the leaf node. No need to update the mapping since they should all point
+			// to the same tree node anyway.
+			if (last != i)
+				Ps::swap(primitives[i], primitives[last]);
+			break;
+		}
+	}
+	PX_ASSERT(foundIt);
+	PX_UNUSED(foundIt);
+
+	swapIndex(objectIndex, swapObject, swapObjectIndex);
+}
+
+// Swap object index
+// if swapObject is in a merged tree its index needs to be swapped with objectIndex
+void ExtendedBucketPruner::swapIndex(PxU32 objectIndex, const PrunerPayload& swapObject, PxU32 swapObjectIndex)
+{
+	if (objectIndex == swapObjectIndex)
+		return;
+
+	const ExtendedBucketPrunerMap::Entry* extendedPrunerSwapEntry = mExtendedBucketPrunerMap.find(swapObject);
+
+	// if swapped object index is in extended pruner, we have to fix the primitives index
+	if (extendedPrunerSwapEntry)
+	{
+		const ExtendedBucketPrunerData& swapData = extendedPrunerSwapEntry->second;
+		AABBTree& swapTree = *mMergedTrees[swapData.mMergeIndex].mTree;
+		// With multiple primitives per leaf, tree nodes may very well be the same for different pool indices.
+		// However the pool indices may be the same when a swap has been skipped in the pruning pool, in which
+		// case there is nothing to do.
+		PX_ASSERT(swapData.mSubTreeNode < swapTree.getNbNodes());
+		PX_ASSERT(swapTree.getNodes()[swapData.mSubTreeNode].isLeaf());
+		AABBTreeRuntimeNode* node1 = swapTree.getNodes() + swapData.mSubTreeNode;
+		const PxU32 nbPrims = node1->getNbRuntimePrimitives();
+		PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE);
+
+		// retrieve the primitives pointer
+		PxU32* primitives = node1->getPrimitives(swapTree.getIndices());
+		PX_ASSERT(primitives);
+
+		// look for desired pool index in the leaf
+		bool foundIt = false;
+		for (PxU32 i = 0; i < nbPrims; i++)
+		{
+			if (swapObjectIndex == primitives[i])
+			{
+				foundIt = true;
+				primitives[i] = objectIndex;	// point node to the pool object moved to 
+				break;
+			}
+		}
+		PX_ASSERT(foundIt);
+		PX_UNUSED(foundIt);
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Optimized removal of timestamped objects from the extended bucket pruner
+PxU32 ExtendedBucketPruner::removeMarkedObjects(PxU32 timeStamp)
+{
+	// remove objects from the core bucket pruner
+	PxU32 retVal = mBucketCore.removeMarkedObjects(timeStamp);
+
+	// nothing to be removed
+	if(!mCurrentTreeIndex)
+		return retVal;
+
+	// if last merged tree is the timeStamp to remove, we can clear all
+	// this is safe as the merged trees array is time ordered, never shifted
+	if(mMergedTrees[mCurrentTreeIndex - 1].mTimeStamp == timeStamp)
+	{
+		retVal += mExtendedBucketPrunerMap.size();
+		cleanTrees();
+		return retVal;
+	}
+
+	// get the highest index in the merged trees array, where timeStamp match
+	// we release than all trees till the index
+	PxU32 highestTreeIndex = 0xFFFFFFFF;
+	for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+	{
+		if(mMergedTrees[i].mTimeStamp == timeStamp)
+			highestTreeIndex = i;
+		else
+			break;
+	}
+
+	// if no timestamp found early exit
+	if(highestTreeIndex == 0xFFFFFFFF)
+	{
+		return retVal;
+	}
+
+	PX_ASSERT(highestTreeIndex < mCurrentTreeIndex);
+	// get offset, where valid trees start
+	const PxU32 mergeTreeOffset = highestTreeIndex + 1;
+
+	// shrink the array to merged trees with a valid timeStamp
+	mCurrentTreeIndex = mCurrentTreeIndex - mergeTreeOffset;
+	// go over trees and swap released trees with valid trees from the back (valid trees are at the back) 
+	for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+	{
+		// store bounds, timestamp
+		mBounds[i] = mMergedTrees[mergeTreeOffset + i].mTree->getNodes()[0].mBV;		
+		mMergedTrees[i].mTimeStamp = mMergedTrees[mergeTreeOffset + i].mTimeStamp;
+
+		// release the tree with timestamp
+		AABBTree* ptr = mMergedTrees[i].mTree;
+		ptr->release();
+
+		// store the valid tree
+		mMergedTrees[i].mTree = mMergedTrees[mergeTreeOffset + i].mTree;
+		// store the release tree at the offset
+		mMergedTrees[mergeTreeOffset + i].mTree = ptr;
+		mMergedTrees[mergeTreeOffset + i].mTimeStamp = 0;
+	}
+	// release the rest of the trees with not valid timestamp
+	for (PxU32 i = mCurrentTreeIndex; i <= highestTreeIndex; i++)
+	{
+		mMergedTrees[i].mTree->release();
+		mMergedTrees[i].mTimeStamp = 0;
+	}
+
+	// build new main AABB tree with only trees with valid valid timeStamp
+	buildMainAABBTree();
+
+	// remove all unnecessary trees and map entries
+	bool removeEntry = false;
+	PxU32 numRemovedEntries = 0;
+	ExtendedBucketPrunerMap::EraseIterator eraseIterator = mExtendedBucketPrunerMap.getEraseIterator();
+	ExtendedBucketPrunerMap::Entry* entry = eraseIterator.eraseCurrentGetNext(removeEntry);
+	while (entry)
+	{
+		ExtendedBucketPrunerData& data = entry->second;
+		// data to be removed
+		if (data.mTimeStamp == timeStamp)
+		{
+			removeEntry = true;
+			numRemovedEntries++;
+		}
+		else
+		{
+			// update the merge index and main tree node index
+			PX_ASSERT(highestTreeIndex < data.mMergeIndex);
+			data.mMergeIndex -= mergeTreeOffset;
+			removeEntry = false;
+		}
+		entry = eraseIterator.eraseCurrentGetNext(removeEntry);
+	}
+
+#if PX_DEBUG
+	checkValidity();
+#endif // PX_DEBUG
+	// return the number of removed objects
+	return retVal + numRemovedEntries;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// clean all trees, all objects have been released
+void ExtendedBucketPruner::cleanTrees()
+{
+	for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+	{
+		mMergedTrees[i].mTree->release();
+		mMergedTrees[i].mTimeStamp = 0;
+	}
+	mExtendedBucketPrunerMap.clear();
+	mCurrentTreeIndex = 0;
+	mMainTree->release();
+}
+
+//////////////////////////////////////////////////////////////////////////
+// shift origin
+void ExtendedBucketPruner::shiftOrigin(const PxVec3& shift)
+{
+	mMainTree->shiftOrigin(shift);
+
+	for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+	{
+		mMergedTrees[i].mTree->shiftOrigin(shift);
+	}
+
+	mBucketCore.shiftOrigin(shift);
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Queries implementation
+//////////////////////////////////////////////////////////////////////////
+// Raycast/sweeps callback for main AABB tree
+template<bool tInflate>
+struct MainTreeRaycastPrunerCallback: public PrunerCallback
+{
+	MainTreeRaycastPrunerCallback(const PxVec3& origin, const PxVec3& unitDir, const PxVec3& extent, PrunerCallback& prunerCallback, const PruningPool* pool)
+		: mOrigin(origin), mUnitDir(unitDir), mExtent(extent), mPrunerCallback(prunerCallback), mPruningPool(pool)
+	{
+	}
+
+	virtual PxAgain invoke(PxReal& distance, const PrunerPayload& payload)
+	{
+		// payload data match merged tree data MergedTree, we can cast it
+		const AABBTree* aabbTree = reinterpret_cast<const AABBTree*> (payload.data[0]);
+		// raycast the merged tree
+		return AABBTreeRaycast<tInflate>()(mPruningPool->getObjects(), mPruningPool->getCurrentWorldBoxes(), *aabbTree, mOrigin, mUnitDir, distance, mExtent, mPrunerCallback);
+	}
+
+	PX_NOCOPY(MainTreeRaycastPrunerCallback)
+
+private:
+	const PxVec3&		mOrigin;
+	const PxVec3&		mUnitDir;	
+	const PxVec3&		mExtent;
+	PrunerCallback&		mPrunerCallback;
+	const PruningPool*	mPruningPool;
+};
+
+//////////////////////////////////////////////////////////////////////////
+// raycast against the extended bucket pruner
+PxAgain ExtendedBucketPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& prunerCallback) const
+{
+	PxAgain again = true;	
+
+	// searc the bucket pruner first
+	if (mBucketCore.getNbObjects())
+		again = mBucketCore.raycast(origin, unitDir, inOutDistance, prunerCallback);
+
+	if (again && mExtendedBucketPrunerMap.size())
+	{
+		const PxVec3 extent(0.0f);
+		// main tree callback
+		MainTreeRaycastPrunerCallback<false> pcb(origin, unitDir, extent, prunerCallback, mPruningPool);
+		// traverse the main tree
+		again = AABBTreeRaycast<false>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, origin, unitDir, inOutDistance, extent, pcb);
+	}
+
+	return again;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// overlap main tree callback
+template<typename Test>
+struct MainTreeOverlapPrunerCallback : public PrunerCallback
+{
+	MainTreeOverlapPrunerCallback(const Test& test, PrunerCallback& prunerCallback, const PruningPool* pool)
+		: mTest(test), mPrunerCallback(prunerCallback), mPruningPool(pool)
+	{
+	}
+
+	virtual PxAgain invoke(PxReal& , const PrunerPayload& payload)
+	{
+		// payload data match merged tree data MergedTree, we can cast it
+		const AABBTree* aabbTree = reinterpret_cast<const AABBTree*> (payload.data[0]);
+		// overlap the merged tree
+		return AABBTreeOverlap<Test>()(mPruningPool->getObjects(), mPruningPool->getCurrentWorldBoxes(), *aabbTree, mTest, mPrunerCallback);
+	}
+
+	PX_NOCOPY(MainTreeOverlapPrunerCallback)
+
+private:
+	const Test&			mTest;	
+	PrunerCallback&		mPrunerCallback;
+	const PruningPool*	mPruningPool;
+};
+
+//////////////////////////////////////////////////////////////////////////
+// overlap implementation
+PxAgain ExtendedBucketPruner::overlap(const Gu::ShapeData& queryVolume, PrunerCallback& prunerCallback) const
+{
+	PxAgain again = true;
+
+	// core bucket pruner overlap
+	if (mBucketCore.getNbObjects())
+		again = mBucketCore.overlap(queryVolume, prunerCallback);
+
+	if(again && mExtendedBucketPrunerMap.size())
+	{
+		switch (queryVolume.getType())
+		{
+		case PxGeometryType::eBOX:
+		{
+			if (queryVolume.isOBB())
+			{
+				const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated());
+				MainTreeOverlapPrunerCallback<Gu::OBBAABBTest> pcb(test, prunerCallback, mPruningPool);
+				again = AABBTreeOverlap<Gu::OBBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);
+			}
+			else
+			{
+				const Gu::AABBAABBTest test(queryVolume.getPrunerInflatedWorldAABB());
+				MainTreeOverlapPrunerCallback<Gu::AABBAABBTest> pcb(test, prunerCallback, mPruningPool);
+				again = AABBTreeOverlap<Gu::AABBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);				
+			}
+		}
+		break;
+		case PxGeometryType::eCAPSULE:
+		{
+			const Gu::Capsule& capsule = queryVolume.getGuCapsule();
+			const Gu::CapsuleAABBTest test(capsule.p1, queryVolume.getPrunerWorldRot33().column0,
+				queryVolume.getCapsuleHalfHeight()*2.0f, PxVec3(capsule.radius*SQ_PRUNER_INFLATION));
+			MainTreeOverlapPrunerCallback<Gu::CapsuleAABBTest> pcb(test, prunerCallback, mPruningPool);			
+			again = AABBTreeOverlap<Gu::CapsuleAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);				
+		}
+		break;
+		case PxGeometryType::eSPHERE:
+		{
+			const Gu::Sphere& sphere = queryVolume.getGuSphere();
+			Gu::SphereAABBTest test(sphere.center, sphere.radius);
+			MainTreeOverlapPrunerCallback<Gu::SphereAABBTest> pcb(test, prunerCallback, mPruningPool);
+			again = AABBTreeOverlap<Gu::SphereAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);				
+		}
+		break;
+		case PxGeometryType::eCONVEXMESH:
+		{
+			const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated());
+			MainTreeOverlapPrunerCallback<Gu::OBBAABBTest> pcb(test, prunerCallback, mPruningPool);			
+			again = AABBTreeOverlap<Gu::OBBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb);				
+		}
+		break;
+		case PxGeometryType::ePLANE:
+		case PxGeometryType::eTRIANGLEMESH:
+		case PxGeometryType::eHEIGHTFIELD:
+		case PxGeometryType::eGEOMETRY_COUNT:
+		case PxGeometryType::eINVALID:
+			PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type");
+		}
+	}
+
+	return again;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// sweep implementation 
+PxAgain ExtendedBucketPruner::sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& prunerCallback) const
+{
+	PxAgain again = true;
+
+	// core bucket pruner sweep
+	if (mBucketCore.getNbObjects())
+		again = mBucketCore.sweep(queryVolume, unitDir, inOutDistance, prunerCallback);
+
+	if(again && mExtendedBucketPrunerMap.size())
+	{
+		const PxBounds3& aabb = queryVolume.getPrunerInflatedWorldAABB();
+		const PxVec3 extents = aabb.getExtents();
+		const PxVec3 center = aabb.getCenter();
+		MainTreeRaycastPrunerCallback<true> pcb(center, unitDir, extents, prunerCallback, mPruningPool);
+		again = AABBTreeRaycast<true>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, center, unitDir, inOutDistance, extents, pcb);
+	}
+	return again;
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+#include "CmRenderOutput.h"
+
+// visualization
+void visualizeTree(Cm::RenderOutput& out, PxU32 color, AABBTree* tree)
+{
+	if (tree)
+	{
+		struct Local
+		{
+			static void _Draw(const AABBTreeRuntimeNode* root, const AABBTreeRuntimeNode* node, Cm::RenderOutput& out_)
+			{
+				out_ << Cm::DebugBox(node->mBV, true);
+				if (node->isLeaf())
+					return;
+				_Draw(root, node->getPos(root), out_);
+				_Draw(root, node->getNeg(root), out_);
+			}
+		};
+		out << PxTransform(PxIdentity);
+		out << color;
+		Local::_Draw(tree->getNodes(), tree->getNodes(), out);
+	}
+}
+
+void ExtendedBucketPruner::visualize(Cm::RenderOutput& out, PxU32 color) const
+{	
+	visualizeTree(out, color, mMainTree);
+
+	for(PxU32 i = 0; i < mCurrentTreeIndex; i++)
+	{
+		visualizeTree(out, color, mMergedTrees[i].mTree);
+	}
+
+	mBucketCore.visualize(out, color);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+#if PX_DEBUG
+// extended bucket pruner validity check
+bool ExtendedBucketPruner::checkValidity()
+{
+	Cm::BitMap testBitmap;
+	testBitmap.resizeAndClear(mCurrentTreeIndex);
+	for (PxU32 i = 0; i < mMainTree->getNbNodes(); i++)
+	{
+		const AABBTreeRuntimeNode& node = mMainTree->getNodes()[i];
+		if(node.isLeaf())
+		{
+			const PxU32 nbPrims = node.getNbRuntimePrimitives();
+			PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE);
+			
+			const PxU32* primitives = node.getPrimitives(mMainTree->getIndices());
+			for (PxU32 j = 0; j < nbPrims; j++)
+			{				
+				const PxU32 index = primitives[j];
+				// check if index is correct
+				PX_ASSERT(index < mCurrentTreeIndex);
+				// mark the index in the test bitmap, must be once set only, all merged trees must be in the main tree
+				PX_ASSERT(testBitmap.test(index) == IntFalse);
+				testBitmap.set(index);
+			}
+		}
+	}
+	
+	Cm::BitMap mergeTreeTestBitmap;
+	mergeTreeTestBitmap.resizeAndClear(mPruningPool->getNbActiveObjects());
+	for (PxU32 i = 0; i < mCurrentTreeIndex; i++)
+	{
+		// check if bounds are the same as the merged tree root bounds
+		PX_ASSERT(mBounds[i].maximum.x == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.x);
+		PX_ASSERT(mBounds[i].maximum.y == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.y);
+		PX_ASSERT(mBounds[i].maximum.z == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.z);
+		PX_ASSERT(mBounds[i].minimum.x == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.x);
+		PX_ASSERT(mBounds[i].minimum.y == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.y);
+		PX_ASSERT(mBounds[i].minimum.z == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.z);
+
+		// check each tree
+		const AABBTree& mergedTree = *mMergedTrees[i].mTree;
+		for (PxU32 j = 0; j < mergedTree.getNbNodes(); j++)
+		{
+			const AABBTreeRuntimeNode& node = mergedTree.getNodes()[j];
+			if (node.isLeaf())
+			{
+				const PxU32 nbPrims = node.getNbRuntimePrimitives();
+				PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE);
+
+				const PxU32* primitives = node.getPrimitives(mergedTree.getIndices());
+				for (PxU32 k = 0; k < nbPrims; k++)
+				{
+					const PxU32 index = primitives[k];
+					// check if index is correct
+					PX_ASSERT(index < mPruningPool->getNbActiveObjects());
+					// mark the index in the test bitmap, must be once set only, all merged trees must be in the main tree
+					PX_ASSERT(mergeTreeTestBitmap.test(index) == IntFalse);
+					mergeTreeTestBitmap.set(index);
+
+					const PrunerPayload& payload = mPruningPool->getObjects()[index];
+					const ExtendedBucketPrunerMap::Entry* extendedPrunerSwapEntry = mExtendedBucketPrunerMap.find(payload);
+					PX_ASSERT(extendedPrunerSwapEntry);
+
+					const ExtendedBucketPrunerData& data = extendedPrunerSwapEntry->second;
+					PX_ASSERT(data.mMergeIndex == i);
+					PX_ASSERT(data.mSubTreeNode == j);
+				}
+			}
+		}
+	}
+	for (PxU32 i = mCurrentTreeIndex; i < mCurrentTreeCapacity; i++)
+	{
+		PX_ASSERT(mMergedTrees[i].mTree->getIndices() == NULL);
+		PX_ASSERT(mMergedTrees[i].mTree->getNodes() == NULL);
+	}
+	for (ExtendedBucketPrunerMap::Iterator iter = mExtendedBucketPrunerMap.getIterator(); !iter.done(); ++iter)
+	{		
+		const ExtendedBucketPrunerData& data = iter->second;
+		PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes());
+		PX_ASSERT(data.mMergeIndex < mCurrentTreeIndex);
+		PX_ASSERT(data.mSubTreeNode < mMergedTrees[data.mMergeIndex].mTree->getNbNodes());
+	}
+	return true;
+}
+#endif
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h
new file mode 100644
index 00000000..ad360e10
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h
@@ -0,0 +1,176 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_EXTENDEDBUCKETPRUNER_H
+#define SQ_EXTENDEDBUCKETPRUNER_H
+
+#include "SqTypedef.h"
+#include "SqBucketPruner.h"
+#include "SqAABBTreeUpdateMap.h"
+#include "PsHashMap.h"
+
+namespace physx
+{
+namespace Sq
+{
+	struct AABBPrunerMergeData;
+	class AABBTreeMergeData;
+
+	// Extended bucket pruner data, if an object belongs to the tree of trees, we need to
+	// remember node for the sub tree, the tree it belongs to and the main tree node
+	struct ExtendedBucketPrunerData
+	{		
+		PxU32			mTimeStamp;				// timestamp 		
+		TreeNodeIndex	mSubTreeNode;			// sub tree node index
+		PxU32			mMergeIndex;			// index in bounds and merged trees array
+	};
+
+	// Merged tree structure, holds tree and its timeStamp, released when no objects is in the tree
+	// or timeStamped objects are released
+	struct MergedTree
+	{
+		AABBTree*	mTree;			// AABB tree 
+		size_t		mTimeStamp;		// needs to be size_t to match PrunerPayload size
+	};
+	// needs to be size_t to match PrunerPayload size, pointer used for AABB tree query callbacks
+	PX_COMPILE_TIME_ASSERT(sizeof(MergedTree) == sizeof(PrunerPayload));
+	
+	// hashing function for PrunerPaylod key
+	struct ExtendedBucketPrunerHash
+	{
+		PX_FORCE_INLINE uint32_t operator()(const PrunerPayload& payload) const
+		{
+#if PX_P64_FAMILY
+			//		const PxU32 h0 = Ps::hash((const void*)payload.data[0]);
+			//		const PxU32 h1 = Ps::hash((const void*)payload.data[1]);
+			const PxU32 h0 = PxU32(PX_MAX_U32 & payload.data[0]);
+			const PxU32 h1 = PxU32(PX_MAX_U32 & payload.data[1]);
+			return Ps::hash(PxU64(h0) | (PxU64(h1) << 32));
+#else
+			return Ps::hash(PxU64(payload.data[0]) | (PxU64(payload.data[1]) << 32));
+#endif
+		}
+		PX_FORCE_INLINE bool equal(const PrunerPayload& k0, const PrunerPayload& k1) const
+		{
+			return (k0.data[0] == k1.data[0]) && (k0.data[1] == k1.data[1]);
+		}
+	};
+
+	// A.B. replace, this is useless, need to be able to traverse the map and release while traversing, also eraseAt failed
+	typedef Ps::HashMap<PrunerPayload, ExtendedBucketPrunerData, ExtendedBucketPrunerHash>	ExtendedBucketPrunerMap;
+
+	// Extended bucket pruner holds single objects in a bucket pruner and AABBtrees in a tree of trees.
+	// Base usage of ExtendedBucketPruner is for dynamic AABBPruner new objects, that did not make it 
+	// into new tree. Single objects go directly into a bucket pruner, while merged AABBtrees 
+	// go into a tree of trees.
+	class ExtendedBucketPruner
+	{
+	public:
+										ExtendedBucketPruner(const PruningPool* pool);
+		virtual							~ExtendedBucketPruner();
+
+		// release 
+		void							release();
+
+		// add single object into a bucket pruner directly
+		PX_FORCE_INLINE bool			addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp)
+		{
+			return mBucketCore.addObject(object, worldAABB, timeStamp);
+		}
+
+		// add AABB tree from pruning structure - adds new primitive into main AABB tree
+		void							addTree(const AABBTreeMergeData& mergeData, PxU32 timeStamp);
+
+		// update object
+		bool							updateObject(const PxBounds3& worldAABB, const PrunerPayload& object);
+
+		// remove object, removed object is replaced in pruning pool by swapped object, indices needs to be updated
+		bool							removeObject(const PrunerPayload& object, PxU32 objectIndex, const PrunerPayload& swapObject,
+											PxU32 swapObjectIndex, PxU32& timeStamp);
+
+		// separate call for indices invalidation, object can be either in AABBPruner or Bucket pruner, but the swapped object can be 
+		// in the tree of trees
+		void							invalidateObject(const ExtendedBucketPrunerData& object, PxU32 objectIndex, const PrunerPayload& swapObject,
+			PxU32 swapObjectIndex);
+
+		// swap object index, the object index can be in bucket pruner or tree of trees
+		void							swapIndex(PxU32 objectIndex, const PrunerPayload& swapObject, PxU32 swapObjectIndex);
+
+		// refit marked nodes in tree of trees
+		void							refitMarkedNodes(const PxBounds3* boxes);
+
+
+		// look for objects marked with input timestamp everywhere in the structure, and remove them. This is the same
+		// as calling 'removeObject' individually for all these objects, but much more efficient. Returns number of removed objects.
+		PxU32							removeMarkedObjects(PxU32 timeStamp);
+
+		// queries against the pruner
+		PxAgain							raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+		PxAgain							overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const;
+		PxAgain							sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const;
+
+		// origin shift
+		void							shiftOrigin(const PxVec3& shift);
+
+		// debug visualize
+		void							visualize(Cm::RenderOutput& out, PxU32 color) const;
+
+		PX_FORCE_INLINE	void			build()					{ mBucketCore.build();	}
+
+		PX_FORCE_INLINE PxU32			getNbObjects()	const	{ return mBucketCore.getNbObjects() + mExtendedBucketPrunerMap.size(); }
+
+	private:
+		void							resize(PxU32 size);
+		void							buildMainAABBTree();
+		void							copyTree(AABBTree& destTree, const AABBPrunerMergeData& inputData);
+		void							cleanTrees();
+
+#if PX_DEBUG
+		// Extended bucket pruner validity check
+		bool							checkValidity();
+#endif
+	private:
+				BucketPrunerCore		mBucketCore;					// Bucket pruner for single objects
+				const PruningPool*		mPruningPool;					// Pruning pool from AABB pruner
+				ExtendedBucketPrunerMap	mExtendedBucketPrunerMap;		// Map holding objects from tree merge - objects in tree of trees
+				AABBTree*				mMainTree;						// Main tree holding merged trees
+				AABBTreeUpdateMap		mMainTreeUpdateMap;				// Main tree updated map - merged trees index to nodes
+				AABBTreeUpdateMap		mMergeTreeUpdateMap;			// Merged tree update map used while tree is merged
+				PxBounds3*				mBounds;						// Merged trees bounds used for main tree building
+				MergedTree*				mMergedTrees;					// Merged trees
+				PxU32					mCurrentTreeIndex;				// Current trees index
+				PxU32					mCurrentTreeCapacity;			// Current tress capacity
+				bool					mTreesDirty;					// Dirty marker
+	};
+
+} // namespace Sq
+
+}
+
+#endif // SQ_EXTENDEDBUCKETPRUNER_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp b/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp
new file mode 100644
index 00000000..86ba1d67
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp
@@ -0,0 +1,57 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "PxMetaData.h"
+
+#include "SqPruningStructure.h"
+
+using namespace physx;
+using namespace Sq;
+
+///////////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::getBinaryMetaData(PxOutputStream& stream)
+{	
+	PX_DEF_BIN_METADATA_VCLASS(stream, PruningStructure)
+		PX_DEF_BIN_METADATA_BASE_CLASS(stream, PruningStructure, PxBase)		
+
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbNodes[0], 0)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbNodes[1], 0)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, AABBTreeRuntimeNode, mAABBTreeNodes[0], PxMetaDataFlag::ePTR)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, AABBTreeRuntimeNode, mAABBTreeNodes[1], PxMetaDataFlag::ePTR)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbObjects[0], 0)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbObjects[1], 0)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mAABBTreeIndices[0], PxMetaDataFlag::ePTR)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mAABBTreeIndices[1], PxMetaDataFlag::ePTR)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbActors, 0)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxActor*, mActors, PxMetaDataFlag::ePTR)
+		PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, bool, mValid, 0)
+}
+
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h b/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h
new file mode 100644
index 00000000..9ded6d26
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h
@@ -0,0 +1,258 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_RAWQUERY_TESTS_SIMD_H
+#define GU_RAWQUERY_TESTS_SIMD_H
+
+#include "foundation/PxTransform.h"
+#include "foundation/PxBounds3.h"
+#include "CmPhysXCommon.h"
+#include "PxBoxGeometry.h"
+#include "PxSphereGeometry.h"
+#include "PxCapsuleGeometry.h"
+#include "PsVecMath.h"
+
+namespace physx
+{
+namespace Gu
+{
+
+struct RayAABBTest
+{
+	PX_FORCE_INLINE RayAABBTest(const PxVec3& origin_, const PxVec3& unitDir_, const PxReal maxDist, const PxVec3& inflation_)
+	: mOrigin(V3LoadU(origin_))
+	, mDir(V3LoadU(unitDir_))
+	, mDirYZX(V3PermYZX(mDir))
+	, mInflation(V3LoadU(inflation_))
+	, mAbsDir(V3Abs(mDir))
+	, mAbsDirYZX(V3PermYZX(mAbsDir))
+	{
+		const PxVec3 ext = maxDist >= PX_MAX_F32 ? PxVec3(	unitDir_.x == 0 ? origin_.x : PxSign(unitDir_.x)*PX_MAX_F32,
+															unitDir_.y == 0 ? origin_.y : PxSign(unitDir_.y)*PX_MAX_F32,
+															unitDir_.z == 0 ? origin_.z : PxSign(unitDir_.z)*PX_MAX_F32)
+										   : origin_ + unitDir_ * maxDist;
+		mRayMin = V3Min(mOrigin, V3LoadU(ext));
+		mRayMax = V3Max(mOrigin, V3LoadU(ext));
+	}
+
+	PX_FORCE_INLINE void setDistance(PxReal distance)
+	{
+		const Vec3V ext = V3ScaleAdd(mDir, FLoad(distance), mOrigin);
+		mRayMin = V3Min(mOrigin, ext);
+		mRayMax = V3Max(mOrigin, ext);
+	}
+
+	template<bool TInflate>
+	PX_FORCE_INLINE PxU32 check(const Vec3V center, const Vec3V extents) const
+	{
+		const Vec3V iExt = TInflate ? V3Add(extents, mInflation) : extents;
+
+		// coordinate axes
+		const Vec3V nodeMax = V3Add(center, iExt);
+		const Vec3V nodeMin = V3Sub(center, iExt);
+
+		// cross axes
+		const Vec3V offset = V3Sub(mOrigin, center);
+		const Vec3V offsetYZX = V3PermYZX(offset);
+		const Vec3V iExtYZX = V3PermYZX(iExt);
+	
+		const Vec3V f = V3NegMulSub(mDirYZX, offset, V3Mul(mDir, offsetYZX));
+		const Vec3V g = V3MulAdd(iExt, mAbsDirYZX, V3Mul(iExtYZX, mAbsDir));
+
+		const BoolV
+			maskA = V3IsGrtrOrEq(nodeMax, mRayMin),
+			maskB = V3IsGrtrOrEq(mRayMax, nodeMin),
+			maskC = V3IsGrtrOrEq(g, V3Abs(f));
+		const BoolV andABCMasks = BAnd(BAnd(maskA, maskB), maskC);
+
+		return BAllEqTTTT(andABCMasks);
+	}
+
+	const Vec3V mOrigin, mDir, mDirYZX, mInflation, mAbsDir, mAbsDirYZX;
+	Vec3V mRayMin, mRayMax;
+protected:
+	RayAABBTest& operator=(const RayAABBTest&);
+};
+
+// probably not worth having a SIMD version of this unless the traversal passes Vec3Vs
+struct AABBAABBTest
+{
+	PX_FORCE_INLINE AABBAABBTest(const PxTransform&t, const PxBoxGeometry&b)
+	: mCenter(V3LoadU(t.p))
+	, mExtents(V3LoadU(b.halfExtents))
+	{ }
+
+	PX_FORCE_INLINE AABBAABBTest(const PxBounds3& b)
+	: mCenter(V3LoadU(b.getCenter()))
+	, mExtents(V3LoadU(b.getExtents()))
+	{ }
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const		
+	{	
+		//PxVec3 c; PxVec3_From_Vec3V(center, c);
+		//PxVec3 e; PxVec3_From_Vec3V(extents, e);
+		//if(PxAbs(c.x - mCenter.x) > mExtents.x + e.x) return Ps::IntFalse;
+		//if(PxAbs(c.y - mCenter.y) > mExtents.y + e.y) return Ps::IntFalse;
+		//if(PxAbs(c.z - mCenter.z) > mExtents.z + e.z) return Ps::IntFalse;
+		//return Ps::IntTrue;
+		return Ps::IntBool(V3AllGrtrOrEq(V3Add(mExtents, extents), V3Abs(V3Sub(center, mCenter))));
+	}
+
+private:
+	AABBAABBTest& operator=(const AABBAABBTest&);
+	const Vec3V mCenter, mExtents;
+};
+
+struct SphereAABBTest
+{
+	PX_FORCE_INLINE SphereAABBTest(const PxTransform& t, const PxSphereGeometry& s)
+	: mCenter(V3LoadU(t.p))
+	, mRadius2(FLoad(s.radius * s.radius))
+	{}
+
+	PX_FORCE_INLINE SphereAABBTest(const PxVec3& center, PxF32 radius)
+	: mCenter(V3LoadU(center))
+	, mRadius2(FLoad(radius * radius))
+	{}
+	
+	PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V boxCenter, const Vec3V boxExtents) const		
+	{	
+		const Vec3V offset = V3Sub(mCenter, boxCenter);
+		const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents);
+		const Vec3V d = V3Sub(offset, closest);
+		return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d))));
+	}
+
+private:
+	SphereAABBTest& operator=(const SphereAABBTest&);
+	const Vec3V mCenter;
+	const FloatV mRadius2;
+};
+
+// The Opcode capsule-AABB traversal test seems to be *exactly* the same as the ray-box test inflated by the capsule radius (so not a true capsule/box test)
+// and the code for the ray-box test is better. TODO: check the zero length case and use the sphere traversal if this one fails.
+// (OTOH it's not that hard to adapt the Ray-AABB test to a capsule test)
+
+struct CapsuleAABBTest: private RayAABBTest
+{
+	PX_FORCE_INLINE CapsuleAABBTest(const PxVec3& origin, const PxVec3& unitDir, const PxReal length, const PxVec3& inflation)
+		: RayAABBTest(origin, unitDir, length, inflation)
+	{}
+
+	PX_FORCE_INLINE Ps::IntBool operator()(const Vec3VArg center, const Vec3VArg extents) const
+	{
+		return Ps::IntBool(RayAABBTest::check<true>(center, extents));
+	}
+};
+
+template<bool fullTest>
+struct OBBAABBTests
+{
+	OBBAABBTests(const PxVec3& pos, const PxMat33& rot, const PxVec3& halfExtentsInflated)
+	{
+		const Vec3V eps = V3Load(1e-6f);
+
+		mT = V3LoadU(pos);
+		mExtents = V3LoadU(halfExtentsInflated);
+
+		// storing the transpose matrices yields a simpler SIMD test
+		mRT = Mat33V_From_PxMat33(rot.getTranspose());
+		mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps));
+		mBB_xyz = M33TrnspsMulV3(mART, mExtents);
+
+		if(fullTest)
+		{
+			const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents);
+
+			mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0)));
+			mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1)));
+			mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2)));
+		}
+	}
+
+	// TODO: force inline it?
+	Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const
+	{	
+		const Vec3V t = V3Sub(mT, center);
+
+		// class I - axes of AABB
+		if(V3OutOfBounds(t, V3Add(extents, mBB_xyz)))
+			return Ps::IntFalse;
+
+		const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2;
+		const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2;
+
+		const FloatV eX = V3GetX(extents), eY = V3GetY(extents), eZ = V3GetZ(extents);
+		const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t);
+
+		// class II - axes of OBB
+		{
+			const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX)));
+			const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents)));
+			if(V3OutOfBounds(v, v2))
+				return Ps::IntFalse;
+		}
+
+		if(!fullTest)
+			return Ps::IntTrue;
+
+		// class III - edge cross products. Almost all OBB tests early-out with type I or type II,
+		// so early-outs here probably aren't useful (TODO: profile)
+
+		const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ));
+		const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123));
+		const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va));
+	
+		const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX));
+		const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456));
+		const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb));
+		
+		const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY));
+		const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789));
+		const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc));
+
+		return Ps::IntBool(BAllEqFFFF(BOr(ba, BOr(bb,bc))));
+	}
+
+	Vec3V		mExtents;	// extents of OBB
+	Vec3V		mT;			// translation of OBB
+	Mat33V		mRT;		// transpose of rotation matrix of OBB
+	Mat33V		mART;		// transpose of mRT, padded by epsilon
+
+	Vec3V		mBB_xyz;	// extents of OBB along coordinate axes
+	Vec3V		mBB_123;	// projections of extents onto edge-cross axes
+	Vec3V		mBB_456;
+	Vec3V		mBB_789;
+};
+
+typedef OBBAABBTests<true> OBBAABBTest;
+
+}
+}
+#endif
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp
new file mode 100644
index 00000000..8a90a1d3
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp
@@ -0,0 +1,182 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#include "foundation/PxMemory.h"
+#include "SqPruningPool.h"
+
+using namespace physx;
+using namespace Sq;
+using namespace Cm;
+
+PruningPool::PruningPool() :
+	mNbObjects			(0),
+	mMaxNbObjects		(0),
+	mWorldBoxes			(NULL),
+	mObjects			(NULL),
+	mHandleToIndex		(NULL),
+	mIndexToHandle		(NULL),
+	mFirstRecycledHandle(INVALID_PRUNERHANDLE)
+{
+}
+
+PruningPool::~PruningPool()
+{
+	PX_FREE_AND_RESET(mWorldBoxes);
+	PX_FREE_AND_RESET(mObjects);
+	PX_FREE_AND_RESET(mHandleToIndex);
+	PX_FREE_AND_RESET(mIndexToHandle);
+}
+
+bool PruningPool::resize(PxU32 newCapacity)
+{
+	// PT: we always allocate one extra box, to make sure we can safely use V4 loads on the array
+	PxBounds3*		newBoxes			= reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(newCapacity+1), "PxBounds3"));
+	PrunerPayload*	newData				= reinterpret_cast<PrunerPayload*>(PX_ALLOC(sizeof(PrunerPayload)*newCapacity, "PrunerPayload*"));
+	PrunerHandle*	newIndexToHandle	= reinterpret_cast<PrunerHandle*>(PX_ALLOC(sizeof(PrunerHandle)*newCapacity, "Pruner Index Mapping"));
+	PoolIndex*		newHandleToIndex	= reinterpret_cast<PoolIndex*>(PX_ALLOC(sizeof(PoolIndex)*newCapacity, "Pruner Index Mapping"));
+	if( (NULL==newBoxes) || (NULL==newData) || (NULL==newIndexToHandle) || (NULL==newHandleToIndex)
+		)
+	{
+		PX_FREE_AND_RESET(newBoxes);
+		PX_FREE_AND_RESET(newData);
+		PX_FREE_AND_RESET(newIndexToHandle);
+		PX_FREE_AND_RESET(newHandleToIndex);
+		return false;
+	}
+
+	if(mWorldBoxes)		PxMemCopy(newBoxes, mWorldBoxes, mNbObjects*sizeof(PxBounds3));
+	if(mObjects)		PxMemCopy(newData, mObjects, mNbObjects*sizeof(PrunerPayload));
+	if(mIndexToHandle)	PxMemCopy(newIndexToHandle, mIndexToHandle, mNbObjects*sizeof(PrunerHandle));
+	if(mHandleToIndex)	PxMemCopy(newHandleToIndex, mHandleToIndex, mMaxNbObjects*sizeof(PoolIndex));
+	mMaxNbObjects = newCapacity;
+
+	PX_FREE_AND_RESET(mWorldBoxes);
+	PX_FREE_AND_RESET(mObjects);
+	PX_FREE_AND_RESET(mHandleToIndex);
+	PX_FREE_AND_RESET(mIndexToHandle);
+	mWorldBoxes		= newBoxes;
+	mObjects		= newData;
+	mHandleToIndex	= newHandleToIndex;
+	mIndexToHandle	= newIndexToHandle;
+
+	return true;
+}
+
+void PruningPool::preallocate(PxU32 newCapacity)
+{
+	if(newCapacity>mMaxNbObjects)
+		resize(newCapacity);
+}
+
+PxU32 PruningPool::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count)
+{
+	for(PxU32 i=0;i<count;i++)
+	{
+		if(mNbObjects==mMaxNbObjects) // increase the capacity on overflow
+		{
+			if(!resize(PxMax<PxU32>(mMaxNbObjects*2, 64)))
+			{
+				// pool can return an invalid handle if memory alloc fails
+				// should probably have an error here or not handle this
+				results[i] = INVALID_PRUNERHANDLE;	// PT: we need to write the potentially invalid handle to let users know which object failed first
+				return i;
+			}
+		}
+		PX_ASSERT(mNbObjects!=mMaxNbObjects);
+
+		const PoolIndex index = mNbObjects++;
+
+		// update mHandleToIndex and mIndexToHandle mappings
+		PrunerHandle handle;
+		if(mFirstRecycledHandle != INVALID_PRUNERHANDLE)
+		{
+			// mFirstRecycledHandle is an entry into a freelist for removed slots
+			// this path is only taken if we have any removed slots
+			handle = mFirstRecycledHandle;
+			mFirstRecycledHandle = mHandleToIndex[handle];
+		}
+		else
+		{
+			handle = index;
+		}
+
+		// PT: TODO: investigate why we added mIndexToHandle/mHandleToIndex. The initial design with 'Prunable' objects didn't need these arrays.
+
+		// PT: these 3 arrays are "parallel"
+		mWorldBoxes		[index] = bounds[i]; // store the payload and AABB in parallel arrays
+		mObjects		[index] = payload[i];
+		mIndexToHandle	[index] = handle;
+
+		mHandleToIndex[handle] = index;
+		results[i] = handle;
+	}
+	return count;
+}
+
+PoolIndex PruningPool::removeObject(PrunerHandle h)
+{
+	PX_ASSERT(mNbObjects);
+
+	// remove the object and its AABB by provided PrunerHandle and update mHandleToIndex and mIndexToHandle mappings
+	const PoolIndex indexOfRemovedObject = mHandleToIndex[h]; // retrieve object's index from handle
+
+	const PoolIndex indexOfLastObject = --mNbObjects; // swap the object at last index with index
+	if(indexOfLastObject!=indexOfRemovedObject)
+	{
+		// PT: move last object's data to recycled spot (from removed object)
+
+		// PT: the last object has moved so we need to handle the mappings for this object
+		// PT: TODO: investigate where this double-mapping comes from. Should not be needed...
+
+		// PT: these 3 arrays are "parallel"
+		const PrunerHandle handleOfLastObject	= mIndexToHandle[indexOfLastObject];
+		mWorldBoxes		[indexOfRemovedObject]	= mWorldBoxes	[indexOfLastObject];
+		mObjects		[indexOfRemovedObject]	= mObjects		[indexOfLastObject];
+		mIndexToHandle	[indexOfRemovedObject]	= handleOfLastObject;
+
+		mHandleToIndex[handleOfLastObject]		= indexOfRemovedObject;
+	}
+
+	// mHandleToIndex also stores the freelist for removed handles (in place of holes formed by removed handles)
+	mHandleToIndex[h] = mFirstRecycledHandle; // update linked list of available recycled handles
+	mFirstRecycledHandle = h; // update the list head
+
+	return indexOfLastObject;
+}
+
+void PruningPool::shiftOrigin(const PxVec3& shift)
+{
+	for(PxU32 i=0; i < mNbObjects; i++)
+	{
+		mWorldBoxes[i].minimum -= shift;
+		mWorldBoxes[i].maximum -= shift;
+	}
+}
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h
new file mode 100644
index 00000000..229ea340
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h
@@ -0,0 +1,120 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_PRUNINGPOOL_H
+#define SQ_PRUNINGPOOL_H
+
+#include "SqPruner.h"
+#include "SqTypedef.h"
+#include "SqBounds.h"
+
+namespace physx
+{
+namespace Sq
+{
+	// This class is designed to maintain a two way mapping between pair(PrunerPayload,AABB) and PrunerHandle
+	// Internally there's also an index for handles (AP: can be simplified?)
+	// This class effectively stores bounded pruner payloads, returns a PrunerHandle and allows O(1)
+	// access to them using a PrunerHandle
+	// Supported operations are add, remove, update bounds
+	class PruningPool 
+	{
+		public:
+												PruningPool();
+												~PruningPool();
+
+		PX_FORCE_INLINE	const PrunerPayload&	getPayload(PrunerHandle handle) const { return mObjects[getIndex(handle)];	}
+
+		PX_FORCE_INLINE	const PrunerPayload&	getPayload(PrunerHandle handle, PxBounds3*& bounds)	const
+												{
+													const PoolIndex index = getIndex(handle);
+													bounds = mWorldBoxes + index;
+													return mObjects[index];
+												}
+
+						void					shiftOrigin(const PxVec3& shift);
+
+		// PT: adds 'count' objects to the pool. Needs 'count' bounds and 'count' payloads passed as input. Writes out 'count' handles
+		// in 'results' array. Function returns number of successfully added objects, ideally 'count' but can be less in case we run
+		// out of memory.
+						PxU32					addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count);
+
+		// this function will swap the last object with the hole formed by removed PrunerHandle object
+		// and return the removed last object's index in the pool
+						PoolIndex				removeObject(PrunerHandle h);
+
+		// Data access
+		PX_FORCE_INLINE	PoolIndex				getIndex(PrunerHandle h)const	{ return mHandleToIndex[h];	}
+		PX_FORCE_INLINE	PrunerPayload*			getObjects()			const	{ return mObjects;			}
+		PX_FORCE_INLINE	PxU32					getNbActiveObjects()	const	{ return mNbObjects;		}
+		PX_FORCE_INLINE	const PxBounds3*		getCurrentWorldBoxes()	const	{ return mWorldBoxes;		}
+		PX_FORCE_INLINE	PxBounds3*				getCurrentWorldBoxes()			{ return mWorldBoxes;		}
+
+		PX_FORCE_INLINE void					setWorldAABB(PrunerHandle h, const PxBounds3& worldAABB)
+												{
+													mWorldBoxes[getIndex(h)] = worldAABB;
+												}
+
+		PX_FORCE_INLINE	const PxBounds3&		getWorldAABB(PrunerHandle h) const
+												{
+													return mWorldBoxes[getIndex(h)];
+												}
+
+		PX_FORCE_INLINE	void					updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count)
+												{
+													for(PxU32 i=0; i<count; i++)
+														Sq::inflateBounds(mWorldBoxes[getIndex(handles[i])], newBounds[indices[i]]);
+												}
+
+						void					preallocate(PxU32 entries);
+//	protected:
+
+						PxU32					mNbObjects;			//!< Current number of objects
+						PxU32					mMaxNbObjects;		//!< Max. number of objects (capacity for mWorldBoxes, mObjects)
+
+						//!< these arrays are parallel
+						PxBounds3*				mWorldBoxes;		//!< List of world boxes, stores mNbObjects, capacity=mMaxNbObjects
+						PrunerPayload*			mObjects;			//!< List of objects, stores mNbObjects, capacity=mMaxNbObjects
+//	private:			
+						PoolIndex*				mHandleToIndex;		//!< Maps from PrunerHandle to internal index (payload index in mObjects)
+						PrunerHandle*			mIndexToHandle;		//!< Inverse map from objectIndex to PrunerHandle
+
+				// this is the head of a list of holes formed in mHandleToIndex
+				// by removed handles
+				// the rest of the list is stored in holes in mHandleToIndex (in place)
+						PrunerHandle			mFirstRecycledHandle;
+
+						bool					resize(PxU32 newCapacity);
+	};
+
+} // namespace Sq
+
+}
+
+#endif // SQ_PRUNINGPOOL_H
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp b/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp
new file mode 100644
index 00000000..d785abb2
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp
@@ -0,0 +1,427 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "SqPruningStructure.h"
+#include "SqAABBPruner.h"
+#include "SqAABBTree.h"
+#include "SqBounds.h"
+
+#include "NpRigidDynamic.h"
+#include "NpRigidStatic.h"
+#include "NpShape.h"
+
+#include "GuBounds.h"
+
+#include "CmTransformUtils.h"
+#include "CmUtils.h"
+
+#include "ScbShape.h"
+
+using namespace physx;
+using namespace Sq;
+
+//////////////////////////////////////////////////////////////////////////
+
+#define NB_OBJECTS_PER_NODE	4
+
+//////////////////////////////////////////////////////////////////////////
+PruningStructure::PruningStructure(PxBaseFlags baseFlags)
+	: PxPruningStructure(baseFlags)
+{
+}
+
+//////////////////////////////////////////////////////////////////////////
+PruningStructure::PruningStructure()
+	: PxPruningStructure(PxConcreteType::ePRUNING_STRUCTURE, PxBaseFlag::eOWNS_MEMORY | PxBaseFlag::eIS_RELEASABLE),
+	mNbActors(0), mActors(0), mValid(true)
+{
+	for (PxU32 i = 0; i < 2; i++)
+	{
+		mNbNodes[i] = 0;
+		mNbObjects[i] = 0;
+		mAABBTreeIndices[i] = NULL;
+		mAABBTreeNodes[i] = NULL;
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+PruningStructure::~PruningStructure()
+{	
+	if(getBaseFlags() & PxBaseFlag::eOWNS_MEMORY)
+	{
+		for (PxU32 i = 0; i < 2; i++)
+		{
+			if(mAABBTreeIndices[i])
+			{			
+				PX_FREE(mAABBTreeIndices[i]);
+			}
+			if (mAABBTreeNodes[i])
+			{
+				PX_FREE(mAABBTreeNodes[i]);
+			}
+		}
+
+		if(mActors)
+		{
+			PX_FREE(mActors);
+		}
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+void PruningStructure::release()
+{
+	// if we release the pruning structure we set the pruner structure to NUUL
+	for (PxU32 i = 0; i < mNbActors; i++)
+	{		
+		PX_ASSERT(mActors[i]);			
+
+		PxType type = mActors[i]->getConcreteType();
+		if (type == PxConcreteType::eRIGID_STATIC)
+		{
+			static_cast<NpRigidStatic*>(mActors[i])->getShapeManager().setPruningStructure(NULL);
+		}
+		else if (type == PxConcreteType::eRIGID_DYNAMIC)
+		{
+			static_cast<NpRigidDynamic*>(mActors[i])->getShapeManager().setPruningStructure(NULL);
+		}
+	}
+
+	if(getBaseFlags() & PxBaseFlag::eOWNS_MEMORY)
+	{
+		delete this;
+	}
+	else
+	{
+		this->~PruningStructure();
+	}
+}
+
+template <typename ActorType>
+static void getShapeBounds(PxRigidActor* actor, bool dynamic, PxBounds3& bounds, PxU32& numShapes)
+{
+	PruningIndex::Enum treeStructure = dynamic ? PruningIndex::eDYNAMIC : PruningIndex::eSTATIC;
+	ActorType& a = *static_cast<ActorType*>(actor);
+	const PxU32 nbShapes = a.getNbShapes();
+	for (PxU32 iShape = 0; iShape < nbShapes; iShape++)
+	{
+		NpShape* shape = a.getShapeManager().getShapes()[iShape];
+		if (shape->getFlags() & PxShapeFlag::eSCENE_QUERY_SHAPE)
+		{
+			const Scb::Shape& scbShape = shape->getScbShape();
+			const Scb::Actor& scbActor = a.getScbActorFast();
+
+			(gComputeBoundsTable[treeStructure])(bounds, scbShape, scbActor);
+			numShapes++;
+		}
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+bool PruningStructure::build(PxRigidActor*const* actors, PxU32 nbActors)
+{
+	PX_ASSERT(actors);
+	PX_ASSERT(nbActors > 0);
+	
+	PxU32 numShapes[2] = { 0, 0 };	
+	// parse the actors first to get the shapes size
+	for (PxU32 actorsDone = 0; actorsDone < nbActors; actorsDone++)
+	{
+		if (actorsDone + 1 < nbActors)
+			Ps::prefetch(actors[actorsDone + 1], sizeof(NpRigidDynamic));	// worst case: PxRigidStatic is smaller
+
+		PxType type = actors[actorsDone]->getConcreteType();
+		const PxRigidActor& actor = *(actors[actorsDone]);
+
+		Scb::ControlState::Enum cs = NpActor::getScbFromPxActor(actor).getControlState();
+		if (!((cs == Scb::ControlState::eNOT_IN_SCENE) || ((cs == Scb::ControlState::eREMOVE_PENDING))))
+		{
+			Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Actor already assigned to a scene!");
+			return false;
+		}
+
+		const PxU32 nbShapes = actor.getNbShapes();
+		bool hasQueryShape = false;
+		for (PxU32 iShape = 0; iShape < nbShapes; iShape++)
+		{
+			PxShape* shape;
+			actor.getShapes(&shape, 1, iShape);
+			if(shape->getFlags() & PxShapeFlag::eSCENE_QUERY_SHAPE)
+			{
+				hasQueryShape = true;
+				if (type == PxConcreteType::eRIGID_STATIC)
+					numShapes[PruningIndex::eSTATIC]++;
+				else
+					numShapes[PruningIndex::eDYNAMIC]++;
+			}
+		}
+
+		// each provided actor must have a query shape
+		if(!hasQueryShape)
+		{
+			Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has no scene query shape!");
+			return false;
+		}
+
+		if (type == PxConcreteType::eRIGID_STATIC)
+		{
+			NpRigidStatic* rs = static_cast<NpRigidStatic*>(actors[actorsDone]);
+			if(rs->getShapeManager().getPruningStructure())
+			{
+				Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has already a pruning structure!");
+				return false;
+			}			
+			rs->getShapeManager().setPruningStructure(this);
+		}
+		else if (type == PxConcreteType::eRIGID_DYNAMIC)
+		{
+			NpRigidDynamic* rd = static_cast<NpRigidDynamic*>(actors[actorsDone]);			
+			if (rd->getShapeManager().getPruningStructure())
+			{
+				Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has already a pruning structure!");
+				return false;
+			}
+			rd->getShapeManager().setPruningStructure(this);
+		}
+		else 
+		{
+			Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor is not a rigid actor!");
+			return false;
+		}
+	}
+	
+	PxBounds3* bounds[2] = { NULL, NULL };
+
+	for (PxU32 i = 0; i < 2; i++)
+	{
+		if(numShapes[i])
+		{
+			bounds[i] = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*numShapes[i], "Pruner bounds"));			
+		}
+	}
+
+	// now I go again and gather bounds and payload
+	numShapes[PruningIndex::eSTATIC] = 0;
+	numShapes[PruningIndex::eDYNAMIC] = 0;
+	for (PxU32 actorsDone = 0; actorsDone < nbActors; actorsDone++)
+	{
+		PxType type = actors[actorsDone]->getConcreteType();
+		if (type == PxConcreteType::eRIGID_STATIC)
+		{
+			getShapeBounds<NpRigidStatic>(actors[actorsDone], false, 
+				bounds[PruningIndex::eSTATIC][numShapes[PruningIndex::eSTATIC]], numShapes[PruningIndex::eSTATIC]);
+		}
+		else if (type == PxConcreteType::eRIGID_DYNAMIC)
+		{
+			getShapeBounds<NpRigidDynamic>(actors[actorsDone], true, 
+				bounds[PruningIndex::eDYNAMIC][numShapes[PruningIndex::eDYNAMIC]], numShapes[PruningIndex::eDYNAMIC]);
+		}
+	}
+	
+	AABBTree aabbTrees[2];
+	for (PxU32 i = 0; i < 2; i++)
+	{
+		mNbObjects[i] = numShapes[i];
+		if (numShapes[i])
+		{
+			// create the AABB tree
+			AABBTreeBuildParams sTB;
+			sTB.mNbPrimitives = numShapes[i];
+			sTB.mAABBArray = bounds[i];
+			sTB.mLimit = NB_OBJECTS_PER_NODE;
+			bool status = aabbTrees[i].build(sTB);
+
+			PX_UNUSED(status);
+			PX_ASSERT(status);
+
+			// store the tree nodes
+			mNbNodes[i] = aabbTrees[i].getNbNodes();
+			mAABBTreeNodes[i] = reinterpret_cast<AABBTreeRuntimeNode*>(PX_ALLOC(sizeof(AABBTreeRuntimeNode)*mNbNodes[i], "AABBTreeRuntimeNode"));
+			PxMemCopy(mAABBTreeNodes[i], aabbTrees[i].getNodes(), sizeof(AABBTreeRuntimeNode)*mNbNodes[i]);
+			mAABBTreeIndices[i] = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mNbObjects[i], "PxU32"));
+			PxMemCopy(mAABBTreeIndices[i], aabbTrees[i].getIndices(), sizeof(PxU32)*mNbObjects[i]);
+
+			// discard the data
+			PX_FREE(bounds[i]);
+		}		
+	}
+
+	// store the actors for verification and serialization
+	mNbActors = nbActors;
+	mActors = reinterpret_cast<PxActor**>(PX_ALLOC(sizeof(PxActor*)*mNbActors, "PxActor*"));
+	PxMemCopy(mActors, actors, sizeof(PxActor*)*mNbActors);
+
+	return true;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+PruningStructure* PruningStructure::createObject(PxU8*& address, PxDeserializationContext& context)
+{
+	PruningStructure* obj = new (address)PruningStructure(PxBaseFlag::eIS_RELEASABLE);
+	address += sizeof(PruningStructure);
+	obj->importExtraData(context);
+	obj->resolveReferences(context);
+	return obj;
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::resolveReferences(PxDeserializationContext& context)
+{
+	if (!isValid())
+		return;
+
+	for (PxU32 i = 0; i < mNbActors; i++)
+	{
+		context.translatePxBase(mActors[i]);
+	}	
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::requires(PxProcessPxBaseCallback& c)
+{
+	if (!isValid())		
+		return;
+	
+	for (PxU32 i = 0; i < mNbActors; i++)
+	{
+		c.process(*mActors[i]);
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::exportExtraData(PxSerializationContext& stream)
+{
+	if (!isValid())
+	{
+		Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::exportExtraData: Pruning structure is invalid!");
+		return;
+	}
+
+	for (PxU32 i = 0; i < 2; i++)
+	{
+		if (mAABBTreeNodes[i])
+		{
+			// store nodes
+			stream.alignData(PX_SERIAL_ALIGN);
+			stream.writeData(mAABBTreeNodes[i], mNbNodes[i] * sizeof(AABBTreeRuntimeNode));
+		}
+
+		if(mAABBTreeIndices[i])
+		{
+			// store indices
+			stream.alignData(PX_SERIAL_ALIGN);
+			stream.writeData(mAABBTreeIndices[i], mNbObjects[i] * sizeof(PxU32));
+		}
+	}
+
+	if(mActors)
+	{
+		// store actor pointers
+		stream.alignData(PX_SERIAL_ALIGN);
+		stream.writeData(mActors, mNbActors * sizeof(PxActor*));
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::importExtraData(PxDeserializationContext& context)
+{
+	if (!isValid())
+	{
+		Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::importExtraData: Pruning structure is invalid!");
+		return;
+	}
+
+	for (PxU32 i = 0; i < 2; i++)
+	{
+		if (mAABBTreeNodes[i])
+		{
+			mAABBTreeNodes[i] = context.readExtraData<Sq::AABBTreeRuntimeNode, PX_SERIAL_ALIGN>(mNbNodes[i]);
+		}
+		if(mAABBTreeIndices[i])
+		{
+			mAABBTreeIndices[i] = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(mNbObjects[i]);
+		}
+	}
+
+	if (mActors)
+	{
+		// read actor pointers
+		mActors = context.readExtraData<PxActor*, PX_SERIAL_ALIGN>(mNbActors);
+	}
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+PxU32 PruningStructure::getRigidActors(PxRigidActor** userBuffer, PxU32 bufferSize, PxU32 startIndex/* =0 */) const
+{	
+	if(!isValid())
+	{
+		Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::getRigidActors: Pruning structure is invalid!");
+		return 0;
+	}
+
+	return Cm::getArrayOfPointers(userBuffer, bufferSize, startIndex, mActors, mNbActors);
+}
+
+//////////////////////////////////////////////////////////////////////////
+
+void PruningStructure::invalidate(PxActor* actor)
+{
+	PX_ASSERT(actor);
+
+	// remove actor from the actor list to avoid mem corruption
+	// this slow, but should be called only with error msg send to user about invalid behavior
+	for (PxU32 i = 0; i < mNbActors; i++)
+	{
+		if(mActors[i] == actor)
+		{
+			// set pruning structure to NULL and remove the actor from the list
+			PxType type = mActors[i]->getConcreteType();
+			if (type == PxConcreteType::eRIGID_STATIC)
+			{
+				static_cast<NpRigidStatic*>(mActors[i])->getShapeManager().setPruningStructure(NULL);
+			}
+			else if (type == PxConcreteType::eRIGID_DYNAMIC)
+			{
+				static_cast<NpRigidDynamic*>(mActors[i])->getShapeManager().setPruningStructure(NULL);
+			}
+
+			mActors[i] = mActors[mNbActors--];
+			break;
+		}		
+	}
+
+	mValid = false;
+}
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp b/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp
new file mode 100644
index 00000000..cd3e25eb
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp
@@ -0,0 +1,500 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "SqSceneQueryManager.h"
+#include "SqAABBPruner.h"
+#include "SqBucketPruner.h"
+#include "SqBounds.h"
+#include "NpBatchQuery.h"
+#include "PxFiltering.h"
+#include "NpRigidDynamic.h"
+#include "NpRigidStatic.h"
+#include "NpArticulationLink.h"
+#include "CmTransformUtils.h"
+#include "PsAllocator.h"
+#include "PxSceneDesc.h"
+#include "ScBodyCore.h"
+#include "SqPruner.h"
+#include "GuBounds.h"
+#include "NpShape.h"
+
+using namespace physx;
+using namespace Sq;
+using namespace Sc;
+
+namespace physx
+{
+	namespace Sq
+	{
+		OffsetTable gOffsetTable;
+	}
+}
+
+PrunerExt::PrunerExt() :
+	mPruner		(NULL),
+	mDirtyList	(PX_DEBUG_EXP("SQmDirtyList")),
+	mPrunerType	(PxPruningStructureType::eLAST),
+	mTimestamp	(0xffffffff)
+{
+}
+
+PrunerExt::~PrunerExt()
+{
+	PX_DELETE_AND_RESET(mPruner);
+}
+
+void PrunerExt::init(PxPruningStructureType::Enum type, PxU64 contextID)
+{
+	mPrunerType = type;
+	mTimestamp	= 0;
+	Pruner* pruner = NULL;
+	switch(type)
+	{
+		case PxPruningStructureType::eNONE:					{ pruner = PX_NEW(BucketPruner);					break;	}
+		case PxPruningStructureType::eDYNAMIC_AABB_TREE:	{ pruner = PX_NEW(AABBPruner)(true, contextID);		break;	}
+		case PxPruningStructureType::eSTATIC_AABB_TREE:		{ pruner = PX_NEW(AABBPruner)(false, contextID);	break;	}
+		case PxPruningStructureType::eLAST:					break;
+	}
+	mPruner = pruner;
+}
+
+void PrunerExt::preallocate(PxU32 nbShapes)
+{
+	if(nbShapes > mDirtyMap.size())
+		mDirtyMap.resize(nbShapes);
+
+	if(mPruner)
+		mPruner->preallocate(nbShapes);
+}
+
+void PrunerExt::flushMemory()
+{
+	if(!mDirtyList.size())
+		mDirtyList.reset();
+
+	// PT: TODO: flush bitmap here
+
+	// PT: TODO: flush pruner here?
+}
+
+void PrunerExt::flushShapes(PxU32 index)
+{
+	const PxU32 numDirtyList = mDirtyList.size();
+	if(!numDirtyList)
+		return;
+	const PrunerHandle* const prunerHandles = mDirtyList.begin();
+
+	const ComputeBoundsFunc func = gComputeBoundsTable[index];
+
+	for(PxU32 i=0; i<numDirtyList; i++)
+	{
+		const PrunerHandle handle = prunerHandles[i];
+		mDirtyMap.reset(handle);
+
+		// PT: we compute the new bounds and store them directly in the pruner structure to avoid copies. We delay the updateObjects() call
+		// to take advantage of batching.
+		PxBounds3* bounds;
+		const PrunerPayload& pp = mPruner->getPayload(handle, bounds);
+		(func)(*bounds, *(reinterpret_cast<Scb::Shape*>(pp.data[0])), *(reinterpret_cast<Scb::Actor*>(pp.data[1])));
+	}
+	// PT: batch update happens after the loop instead of once per loop iteration
+	mPruner->updateObjects(prunerHandles, NULL, numDirtyList);
+	mTimestamp += numDirtyList;
+	mDirtyList.clear();
+}
+
+// PT: TODO: re-inline this
+void PrunerExt::addToDirtyList(PrunerHandle handle)
+{
+	Cm::BitMap& dirtyMap = mDirtyMap;
+	if(!dirtyMap.test(handle))
+	{
+		dirtyMap.set(handle);
+		mDirtyList.pushBack(handle);
+		mTimestamp++;
+	}
+}
+
+// PT: TODO: re-inline this
+Ps::IntBool PrunerExt::isDirty(PrunerHandle handle) const
+{
+	return mDirtyMap.test(handle);
+}
+
+// PT: TODO: re-inline this
+void PrunerExt::removeFromDirtyList(PrunerHandle handle)
+{
+	Cm::BitMap& dirtyMap = mDirtyMap;
+	if(dirtyMap.test(handle))
+	{
+		dirtyMap.reset(handle);
+		mDirtyList.findAndReplaceWithLast(handle);
+	}
+}
+
+// PT: TODO: re-inline this
+void PrunerExt::growDirtyList(PrunerHandle handle)
+{
+	// pruners must either provide indices in order or reuse existing indices, so this 'if' is enough to ensure we have space for the new handle
+	// PT: TODO: fix this. There is just no need for any of it. The pruning pool itself could support the feature for free, similar to what we do
+	// in MBP. There would be no need for the bitmap or the dirty list array. However doing this through the virtual interface would be clumsy,
+	// adding the cost of virtual calls for very cheap & simple operations. It would be a lot easier to drop it and go back to what we had before.
+
+	Cm::BitMap& dirtyMap = mDirtyMap;
+	if(dirtyMap.size() <= handle)
+		dirtyMap.resize(PxMax<PxU32>(dirtyMap.size() * 2, 1024));
+	PX_ASSERT(handle<dirtyMap.size());
+	dirtyMap.reset(handle);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+SceneQueryManager::SceneQueryManager(	Scb::Scene& scene, PxPruningStructureType::Enum staticStructure, 
+										PxPruningStructureType::Enum dynamicStructure, PxU32 dynamicTreeRebuildRateHint,
+										const PxSceneLimits& limits) :
+	mScene		(scene)
+{
+	mPrunerExt[PruningIndex::eSTATIC].init(staticStructure, scene.getContextId());
+	mPrunerExt[PruningIndex::eDYNAMIC].init(dynamicStructure, scene.getContextId());
+
+	setDynamicTreeRebuildRateHint(dynamicTreeRebuildRateHint);
+
+	preallocate(limits.maxNbStaticShapes, limits.maxNbDynamicShapes);
+
+	mDynamicBoundsSync.mPruner = mPrunerExt[PruningIndex::eDYNAMIC].pruner();
+	mDynamicBoundsSync.mTimestamp = &mPrunerExt[PruningIndex::eDYNAMIC].mTimestamp;
+}
+
+SceneQueryManager::~SceneQueryManager()
+{
+}
+
+void SceneQueryManager::flushMemory()
+{
+	for(PxU32 i=0;i<PruningIndex::eCOUNT;i++)
+		mPrunerExt[i].flushMemory();
+}
+
+void SceneQueryManager::markForUpdate(PrunerData data)
+{ 
+	const PxU32 index = getPrunerIndex(data);
+	const PrunerHandle handle = getPrunerHandle(data);
+
+	mPrunerExt[index].addToDirtyList(handle);
+}
+
+void SceneQueryManager::preallocate(PxU32 staticShapes, PxU32 dynamicShapes)
+{
+	mPrunerExt[PruningIndex::eSTATIC].preallocate(staticShapes);
+	mPrunerExt[PruningIndex::eDYNAMIC].preallocate(dynamicShapes);
+}
+
+PrunerData SceneQueryManager::addPrunerShape(const NpShape& shape, const PxRigidActor& actor, bool dynamic, const PxBounds3* bounds, bool hasPrunerStructure)
+{
+	PrunerPayload pp;
+	const Scb::Shape& scbShape = shape.getScbShape();
+	const Scb::Actor& scbActor = gOffsetTable.convertPxActor2Scb(actor);
+	pp.data[0] = size_t(&scbShape);
+	pp.data[1] = size_t(&scbActor);
+
+	PxBounds3 b;
+	if(bounds)
+		inflateBounds(b, *bounds);
+	else
+		(gComputeBoundsTable[dynamic])(b, scbShape, scbActor);
+
+	const PxU32 index = PxU32(dynamic);
+	PrunerHandle handle;
+	PX_ASSERT(mPrunerExt[index].pruner());
+	mPrunerExt[index].pruner()->addObjects(&handle, &b, &pp, 1, hasPrunerStructure);
+	mPrunerExt[index].invalidateTimestamp();
+
+	mPrunerExt[index].growDirtyList(handle);
+
+	return createPrunerData(index, handle);
+}
+
+const PrunerPayload& SceneQueryManager::getPayload(PrunerData data) const
+{
+	const PxU32 index = getPrunerIndex(data);
+	const PrunerHandle handle = getPrunerHandle(data);
+	return mPrunerExt[index].pruner()->getPayload(handle);
+}
+
+void SceneQueryManager::removePrunerShape(PrunerData data)
+{
+	const PxU32 index = getPrunerIndex(data);
+	const PrunerHandle handle = getPrunerHandle(data);
+
+	PX_ASSERT(mPrunerExt[index].pruner());
+
+	mPrunerExt[index].removeFromDirtyList(handle);
+
+	mPrunerExt[index].invalidateTimestamp();
+	mPrunerExt[index].pruner()->removeObjects(&handle);
+}
+
+void SceneQueryManager::setDynamicTreeRebuildRateHint(PxU32 rebuildRateHint)
+{
+	mRebuildRateHint = rebuildRateHint;
+
+	for(PxU32 i=0;i<PruningIndex::eCOUNT;i++)
+	{
+		if(mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE)
+			static_cast<AABBPruner*>(mPrunerExt[i].pruner())->setRebuildRateHint(rebuildRateHint);
+	}
+}
+
+
+static PxBounds3 computeWorldAABB(const Scb::Shape& scbShape, const Sc::BodyCore& bodyCore)
+{
+	const Gu::GeometryUnion& geom = scbShape.getGeometryUnion();
+	const PxTransform& shape2Actor = scbShape.getShape2Actor();
+
+	PX_ALIGN(16, PxTransform) globalPose;
+
+	PX_ALIGN(16, PxTransform) kinematicTarget;
+	PxU16 sqktFlags = PxRigidBodyFlag::eKINEMATIC | PxRigidBodyFlag::eUSE_KINEMATIC_TARGET_FOR_SCENE_QUERIES;
+	bool useTarget = (PxU16(bodyCore.getFlags()) & sqktFlags) == sqktFlags;
+
+	const PxTransform& body2World = (useTarget && bodyCore.getKinematicTarget(kinematicTarget)) ? kinematicTarget : bodyCore.getBody2World();
+	Cm::getDynamicGlobalPoseAligned(body2World, shape2Actor, bodyCore.getBody2Actor(), globalPose);
+
+	PxBounds3 tmp;
+	inflateBounds(tmp, Gu::computeBounds(geom.getGeometry(), globalPose, false));
+	return tmp;
+}
+
+
+void SceneQueryManager::validateSimUpdates()
+{
+	if (mPrunerExt[1].type() != PxPruningStructureType::eDYNAMIC_AABB_TREE)
+		return;
+
+	
+	Sc::BodyCore*const* activeBodies = mScene.getActiveBodiesArray();
+	const PxU32 nbActiveBodies = mScene.getNumActiveBodies();
+
+	for (PxU32 i = 0; i < nbActiveBodies; ++i)
+	{
+		const Sc::BodyCore* bCore = activeBodies[i];
+
+		if (bCore->isFrozen())
+			continue;
+
+		PxRigidBody* pxBody = static_cast<PxRigidBody*>(bCore->getPxActor());
+
+		PX_ASSERT(pxBody->getConcreteType() == PxConcreteType::eRIGID_DYNAMIC || pxBody->getConcreteType() == PxConcreteType::eARTICULATION_LINK);
+
+		NpShapeManager& shapeManager = *NpActor::getShapeManager(*pxBody);
+		const PxU32 nbShapes = shapeManager.getNbShapes();
+		NpShape* const* shape = shapeManager.getShapes();
+		
+
+		for (PxU32 j = 0; j<nbShapes; j++)
+		{
+			PrunerData prunerData = shapeManager.getPrunerData(j);
+			if (prunerData != INVALID_PRUNERHANDLE)
+			{
+				const PrunerHandle handle = getPrunerHandle(prunerData);
+				const PxBounds3 worldAABB = computeWorldAABB(shape[j]->getScbShape(), *bCore);
+				PxBounds3 prunerAABB = static_cast<AABBPruner*>(mPrunerExt[1].pruner())->getAABB(handle);
+				PX_ASSERT((worldAABB.minimum - prunerAABB.minimum).magnitudeSquared() < 0.005f*mScene.getPxScene()->getPhysics().getTolerancesScale().length);
+				PX_ASSERT((worldAABB.maximum - prunerAABB.maximum).magnitudeSquared() < 0.005f*mScene.getPxScene()->getPhysics().getTolerancesScale().length);
+				PX_UNUSED(worldAABB);
+				PX_UNUSED(prunerAABB);
+			}
+		}
+	}
+}
+
+void SceneQueryManager::processSimUpdates()
+{
+	PX_PROFILE_ZONE("Sim.updatePruningTrees", mScene.getContextId());
+
+	{
+		PX_PROFILE_ZONE("SceneQuery.processActiveShapes", mScene.getContextId());
+
+		// update all active objects
+		BodyCore*const* activeBodies = mScene.getScScene().getActiveBodiesArray();
+		PxU32 nbActiveBodies = mScene.getScScene().getNumActiveBodies();
+
+#define NB_BATCHED_OBJECTS	128
+		PrunerHandle batchedHandles[NB_BATCHED_OBJECTS];
+		PxU32 nbBatchedObjects = 0;
+		Pruner* pruner = mPrunerExt[PruningIndex::eDYNAMIC].pruner();
+
+		while(nbActiveBodies--)
+		{
+			// PT: TODO: don't put frozen objects in "active bodies" array? After all they
+			// are also not included in the 'active transforms' or 'active actors' arrays.
+			BodyCore* currentBody = *activeBodies++;
+			if(currentBody->isFrozen())
+				continue;
+
+			PxActorType::Enum type;
+			PxRigidBody* pxBody = static_cast<PxRigidBody*>(getPxActorFromBodyCore(currentBody, type));
+			PX_ASSERT(pxBody->getConcreteType()==PxConcreteType::eRIGID_DYNAMIC || pxBody->getConcreteType()==PxConcreteType::eARTICULATION_LINK);
+
+			NpShapeManager* shapeManager;
+			if(type==PxActorType::eRIGID_DYNAMIC)
+			{
+				NpRigidDynamic* rigidDynamic = static_cast<NpRigidDynamic*>(pxBody);
+				shapeManager = &rigidDynamic->getShapeManager();
+			}
+			else
+			{
+				NpArticulationLink* articulationLink = static_cast<NpArticulationLink*>(pxBody);
+				shapeManager = &articulationLink->getShapeManager();
+			}
+
+			const PxU32 nbShapes = shapeManager->getNbShapes();
+			for(PxU32 i=0; i<nbShapes; i++)
+			{
+				const PrunerData data = shapeManager->getPrunerData(i);
+				if(data!=SQ_INVALID_PRUNER_DATA)
+				{
+					// PT: index can't be zero here!
+					PX_ASSERT(getPrunerIndex(data)==PruningIndex::eDYNAMIC);
+
+					const PrunerHandle handle = getPrunerHandle(data);
+
+					if(!mPrunerExt[PruningIndex::eDYNAMIC].isDirty(handle))	// PT: if dirty, will be updated in "flushShapes"
+					{
+						batchedHandles[nbBatchedObjects] = handle;
+
+						PxBounds3* bounds;
+						const PrunerPayload& pp = pruner->getPayload(handle, bounds);
+						computeDynamicWorldAABB(*bounds, *(reinterpret_cast<Scb::Shape*>(pp.data[0])), *(reinterpret_cast<Scb::Actor*>(pp.data[1])));
+						nbBatchedObjects++;
+
+						if(nbBatchedObjects==NB_BATCHED_OBJECTS)
+						{
+							mPrunerExt[PruningIndex::eDYNAMIC].invalidateTimestamp();
+							pruner->updateObjects(batchedHandles, NULL, nbBatchedObjects);
+							nbBatchedObjects = 0;
+						}
+					}
+				}
+			}
+		}
+		if(nbBatchedObjects)
+		{
+			mPrunerExt[PruningIndex::eDYNAMIC].invalidateTimestamp();
+			pruner->updateObjects(batchedHandles, NULL, nbBatchedObjects);
+		}
+	}
+
+	// flush user modified objects
+	flushShapes();
+
+	for(PxU32 i=0;i<PruningIndex::eCOUNT;i++)
+	{
+		if(mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE)
+			static_cast<AABBPruner*>(mPrunerExt[i].pruner())->buildStep();
+
+		mPrunerExt[i].pruner()->commit();
+	}
+}
+
+void SceneQueryManager::afterSync(bool commit)
+{
+	PX_PROFILE_ZONE("Sim.sceneQueryBuildStep", mScene.getContextId());
+
+	// flush user modified objects
+	flushShapes();
+
+	for (PxU32 i = 0; i<2; i++)
+	{
+		if (mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE)
+			static_cast<AABBPruner*>(mPrunerExt[i].pruner())->buildStep();
+
+		if (commit)
+			mPrunerExt[i].pruner()->commit();
+	}
+}
+
+void SceneQueryManager::flushShapes()
+{
+	PX_PROFILE_ZONE("SceneQuery.flushShapes", mScene.getContextId());
+
+	// must already have acquired writer lock here
+
+	for(PxU32 i=0; i<PruningIndex::eCOUNT; i++)
+		mPrunerExt[i].flushShapes(i);
+}
+
+void SceneQueryManager::flushUpdates()
+{
+	PX_PROFILE_ZONE("SceneQuery.flushUpdates", mScene.getContextId());
+
+	// no need to take lock if manual sq update is enabled
+	// as flushUpdates will only be called from NpScene::flushQueryUpdates()
+	mSceneQueryLock.lock();
+
+	flushShapes();
+
+	for(PxU32 i=0;i<PruningIndex::eCOUNT;i++)
+		if(mPrunerExt[i].pruner())
+			mPrunerExt[i].pruner()->commit();
+
+	mSceneQueryLock.unlock();
+}
+
+void SceneQueryManager::forceDynamicTreeRebuild(bool rebuildStaticStructure, bool rebuildDynamicStructure)
+{
+	PX_PROFILE_ZONE("SceneQuery.forceDynamicTreeRebuild", mScene.getContextId());
+
+	const bool rebuild[PruningIndex::eCOUNT] = { rebuildStaticStructure, rebuildDynamicStructure };
+
+	Ps::Mutex::ScopedLock lock(mSceneQueryLock);
+	for(PxU32 i=0; i<PruningIndex::eCOUNT; i++)
+	{
+		if(rebuild[i] && mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE)
+		{
+			static_cast<AABBPruner*>(mPrunerExt[i].pruner())->purge();
+			static_cast<AABBPruner*>(mPrunerExt[i].pruner())->commit();
+		}
+	}
+}
+
+void SceneQueryManager::shiftOrigin(const PxVec3& shift)
+{
+	for(PxU32 i=0; i<PruningIndex::eCOUNT; i++)
+		mPrunerExt[i].pruner()->shiftOrigin(shift);
+}
+
+void DynamicBoundsSync::sync(const PxU32* sqRefs, const PxU32* indices, const PxBounds3* bounds, PxU32 count)
+{
+	mPruner->updateObjects(sqRefs, indices, bounds, count);
+
+	if (count)
+		(*mTimestamp)++;
+}
+
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h b/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h
new file mode 100644
index 00000000..48b77592
--- /dev/null
+++ b/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h
@@ -0,0 +1,47 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef SQ_TYPEDEF_H
+#define SQ_TYPEDEF_H
+
+#include "CmPhysXCommon.h"
+
+namespace physx
+{
+namespace Sq
+{
+	typedef PxU32 PoolIndex;
+	typedef PxU32 TreeNodeIndex;	
+
+	class AABBTree;
+	class AABBTreeBuildParams;
+}
+}
+
+#endif // SQ_TYPEDEF_H
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/SceneQuery/src
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip