diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/SceneQuery/src | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/SceneQuery/src')
20 files changed, 8794 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp new file mode 100644 index 00000000..895c5776 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.cpp @@ -0,0 +1,816 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "foundation/PxProfiler.h" +#include "PsIntrinsics.h" +#include "PsUserAllocated.h" +#include "PsBitUtils.h" +#include "PsFoundation.h" +#include "SqAABBPruner.h" +#include "SqAABBTree.h" +#include "SqPrunerMergeData.h" +#include "GuSphere.h" +#include "GuBox.h" +#include "GuCapsule.h" +#include "SqAABBTreeQuery.h" +#include "GuBounds.h" + +using namespace physx; +using namespace Gu; +using namespace Sq; +using namespace Cm; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +IncrementalPruner* physx::Sq::createAABBPruner(bool incrementalRebuild) +{ + return PX_NEW(Sq::AABBPruner)(incrementalRebuild, 0); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// PT: currently limited to 15 max +#define NB_OBJECTS_PER_NODE 4 + +AABBPruner::AABBPruner(bool incrementalRebuild, PxU64 contextID) : + mAABBTree (NULL), + mNewTree (NULL), + mCachedBoxes (NULL), + mNbCachedBoxes (0), + mNbCalls (0), + mTimeStamp (0), + mBucketPruner (&mPool), + mProgress (BUILD_NOT_STARTED), + mRebuildRateHint (100), + mAdaptiveRebuildTerm(0), + mIncrementalRebuild (incrementalRebuild), + mUncommittedChanges (false), + mNeedsNewTree (false), + mNewTreeFixups (PX_DEBUG_EXP("AABBPruner::mNewTreeFixups")), + mContextID (contextID) +{ +} + +AABBPruner::~AABBPruner() +{ + release(); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Add, Remove, Update methods + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool AABBPruner::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool hasPruningStructure) +{ + PX_PROFILE_ZONE("SceneQuery.prunerAddObjects", mContextID); + + if(!count) + return true; + + // no need to do refitMarked for added objects since they are not in the tree + + // if we have provided pruning structure, we will merge it, the changes will be applied after the objects has been addded + if(!hasPruningStructure || !mAABBTree) + mUncommittedChanges = true; + + // PT: TODO: 'addObjects' for bucket pruner too. Not urgent since we always call the function with count=1 at the moment + const PxU32 valid = mPool.addObjects(results, bounds, payload, count); + + // Bucket pruner is only used while the dynamic pruner is rebuilding + // For the static pruner a full rebuild will happen in commit() every time we modify something, this is not true if + // pruning structure was provided. The objects tree will be merged directly into the static tree. No rebuild will be triggered. + if(mIncrementalRebuild && mAABBTree) + { + mNeedsNewTree = true; // each add forces a tree rebuild + + // if a pruner structure is provided, we dont move the new objects into bucket pruner + // the pruning structure will be merged into the bucket pruner + if(!hasPruningStructure) + { + for(PxU32 i=0;i<valid;i++) + mBucketPruner.addObject(payload[i], bounds[i], mTimeStamp); + } + } + return valid==count; +} + +void AABBPruner::updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count) +{ + PX_PROFILE_ZONE("SceneQuery.prunerUpdateObjects", mContextID); + + if(!count) + return; + + mUncommittedChanges = true; + + if(newBounds) + { + for(PxU32 i=0; i<count; i++) + mPool.setWorldAABB(handles[i], newBounds[i]); // only updates the bounds + } + + if(mIncrementalRebuild && mAABBTree) + { + mNeedsNewTree = true; // each update forces a tree rebuild + newBounds = mPool.getCurrentWorldBoxes(); + PrunerPayload* payloads = mPool.getObjects(); + for(PxU32 i=0; i<count; i++) + { + const PoolIndex poolIndex = mPool.getIndex(handles[i]); + const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex]; + if(treeNodeIndex!=INVALID_NODE_ID) // this means it's in the current tree still and hasn't been removed + mAABBTree->markNodeForRefit(treeNodeIndex); + else // otherwise it means it should be in the bucket pruner + { + bool found = mBucketPruner.updateObject(newBounds[poolIndex], payloads[poolIndex]); + PX_UNUSED(found); PX_ASSERT(found); + } + + if(mProgress==BUILD_NEW_MAPPING || mProgress==BUILD_FULL_REFIT) + mToRefit.pushBack(poolIndex); + } + } +} + +void AABBPruner::updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count) +{ + PX_PROFILE_ZONE("SceneQuery.prunerUpdateObjects", mContextID); + + mUncommittedChanges = true; + + mPool.updateObjects(handles, indices, newBounds, count); + + if (mIncrementalRebuild && mAABBTree) + { + mNeedsNewTree = true; // each update forces a tree rebuild + for (PxU32 i = 0; i<count; i++) + { + const PoolIndex poolIndex = mPool.getIndex(handles[i]); + const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex]; + if (treeNodeIndex != INVALID_NODE_ID) // this means it's in the current tree still and hasn't been removed + mAABBTree->markNodeForRefit(treeNodeIndex); + else // otherwise it means it should be in the bucket pruner + { + bool found = mBucketPruner.updateObject(newBounds[indices[i]], mPool.getPayload(handles[i])); + PX_UNUSED(found); PX_ASSERT(found); + } + + if (mProgress == BUILD_NEW_MAPPING || mProgress == BUILD_FULL_REFIT) + mToRefit.pushBack(poolIndex); + } + } +} + +void AABBPruner::removeObjects(const PrunerHandle* handles, PxU32 count) +{ + PX_PROFILE_ZONE("SceneQuery.prunerRemoveObjects", mContextID); + + if(!count) + return; + + mUncommittedChanges = true; + + for(PxU32 i=0; i<count; i++) + { + const PrunerHandle h = handles[i]; + // copy the payload before removing it since we need to know the payload to remove it from the bucket pruner + const PrunerPayload removedPayload = mPool.getPayload(h); + const PoolIndex poolIndex = mPool.getIndex(h); // save the pool index for removed object + const PoolIndex poolRelocatedLastIndex = mPool.removeObject(h); // save the lastIndex returned by removeObject + if(mIncrementalRebuild && mAABBTree) + { + mNeedsNewTree = true; + + const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex]; // already removed from pool but still in tree map + const PrunerPayload swappedPayload = mPool.getObjects()[poolIndex]; + if(treeNodeIndex!=INVALID_NODE_ID) // can be invalid if removed + { + mAABBTree->markNodeForRefit(treeNodeIndex); // mark the spot as blank + mBucketPruner.swapIndex(poolIndex, swappedPayload, poolRelocatedLastIndex); // if swapped index is in bucket pruner + } + else + { + PX_ASSERT(treeNodeIndex==INVALID_PRUNERHANDLE); + PxU32 timeStamp; + bool status = mBucketPruner.removeObject(removedPayload, poolIndex, swappedPayload, poolRelocatedLastIndex, timeStamp); + PX_ASSERT(status); + PX_UNUSED(status); + } + + mTreeMap.invalidate(poolIndex, poolRelocatedLastIndex, *mAABBTree); + if(mNewTree) + mNewTreeFixups.pushBack(NewTreeFixup(poolIndex, poolRelocatedLastIndex)); + } + } + + if (mPool.getNbActiveObjects()==0) + { + // this is just to make sure we release all the internal data once all the objects are out of the pruner + // since this is the only place we know that and we don't want to keep memory reserved + release(); + + // Pruner API requires a commit before the next query, even if we ended up removing the entire tree here. This + // forces that to happen. + mUncommittedChanges = true; + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Query Implementation + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +PxAgain AABBPruner::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const +{ + PX_ASSERT(!mUncommittedChanges); + + PxAgain again = true; + + if(mAABBTree) + { + switch(queryVolume.getType()) + { + case PxGeometryType::eBOX: + { + if(queryVolume.isOBB()) + { + const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated()); + again = AABBTreeOverlap<Gu::OBBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb); + } + else + { + const Gu::AABBAABBTest test(queryVolume.getPrunerInflatedWorldAABB()); + again = AABBTreeOverlap<Gu::AABBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb); + } + } + break; + case PxGeometryType::eCAPSULE: + { + const Gu::Capsule& capsule = queryVolume.getGuCapsule(); + const Gu::CapsuleAABBTest test( capsule.p1, queryVolume.getPrunerWorldRot33().column0, + queryVolume.getCapsuleHalfHeight()*2.0f, PxVec3(capsule.radius*SQ_PRUNER_INFLATION)); + again = AABBTreeOverlap<Gu::CapsuleAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb); + } + break; + case PxGeometryType::eSPHERE: + { + const Gu::Sphere& sphere = queryVolume.getGuSphere(); + Gu::SphereAABBTest test(sphere.center, sphere.radius); + again = AABBTreeOverlap<Gu::SphereAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb); + } + break; + case PxGeometryType::eCONVEXMESH: + { + const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated()); + again = AABBTreeOverlap<Gu::OBBAABBTest>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, test, pcb); + } + break; + case PxGeometryType::ePLANE: + case PxGeometryType::eTRIANGLEMESH: + case PxGeometryType::eHEIGHTFIELD: + case PxGeometryType::eGEOMETRY_COUNT: + case PxGeometryType::eINVALID: + PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type"); + } + } + + if(again && mIncrementalRebuild && mBucketPruner.getNbObjects()) + again = mBucketPruner.overlap(queryVolume, pcb); + + return again; +} + +PxAgain AABBPruner::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const +{ + PX_ASSERT(!mUncommittedChanges); + + PxAgain again = true; + + if(mAABBTree) + { + const PxBounds3& aabb = queryVolume.getPrunerInflatedWorldAABB(); + const PxVec3 extents = aabb.getExtents(); + again = AABBTreeRaycast<true>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, aabb.getCenter(), unitDir, inOutDistance, extents, pcb); + } + + if(again && mIncrementalRebuild && mBucketPruner.getNbObjects()) + again = mBucketPruner.sweep(queryVolume, unitDir, inOutDistance, pcb); + + return again; +} + +PxAgain AABBPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const +{ + PX_ASSERT(!mUncommittedChanges); + + PxAgain again = true; + + if(mAABBTree) + again = AABBTreeRaycast<false>()(mPool.getObjects(), mPool.getCurrentWorldBoxes(), *mAABBTree, origin, unitDir, inOutDistance, PxVec3(0.0f), pcb); + + if(again && mIncrementalRebuild && mBucketPruner.getNbObjects()) + again = mBucketPruner.raycast(origin, unitDir, inOutDistance, pcb); + + return again; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Other methods of Pruner Interface + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// This isn't part of the pruner virtual interface, but it is part of the public interface +// of AABBPruner - it gets called by SqManager to force a rebuild, and requires a commit() before +// queries can take place + +void AABBPruner::purge() +{ + release(); + mUncommittedChanges = true; // this ensures a commit() must happen before any query +} + +void AABBPruner::setRebuildRateHint(PxU32 nbStepsForRebuild) +{ + PX_ASSERT(nbStepsForRebuild > 3); + mRebuildRateHint = (nbStepsForRebuild-3); // looks like a magic number to account for the rebuild pipeline latency + mAdaptiveRebuildTerm = 0; +} + +// Commit either performs a refit if background rebuild is not yet finished +// or swaps the current tree for the second tree rebuilt in the background +void AABBPruner::commit() +{ + PX_PROFILE_ZONE("SceneQuery.prunerCommit", mContextID); + + if(!mUncommittedChanges) + // Q: seems like this is both for refit and finalization so is this is correct? + // i.e. in a situation when we started rebuilding a tree and didn't add anything since + // who is going to set mUncommittedChanges to true? + // A: it's set in buildStep at final stage, so that finalization is forced. + // Seems a bit difficult to follow and verify correctness. + return; + + mUncommittedChanges = false; + + if(!mAABBTree || !mIncrementalRebuild) + { +#if PX_CHECKED + if(!mIncrementalRebuild && mAABBTree) + Ps::getFoundation().error(PxErrorCode::ePERF_WARNING, __FILE__, __LINE__, "SceneQuery static AABB Tree rebuilt, because a shape attached to a static actor was added, removed or moved, and PxSceneDesc::staticStructure is set to eSTATIC_AABB_TREE."); +#endif + fullRebuildAABBTree(); + return; + } + + // Note: it is not safe to call AABBPruner::build() here + // because the first thread will perform one step of the incremental update, + // continue raycasting, while the second thread performs the next step in + // the incremental update + + // Calling Refit() below is safe. It will call + // StaticPruner::build() when necessary. Both will early + // exit if the tree is already up to date, if it is not already, then we + // must be the first thread performing raycasts on a dirty tree and other + // scene query threads will be locked out by the write lock in + // SceneQueryManager::flushUpdates() + + + if (mProgress != BUILD_FINISHED) + { + // Calling refit because the second tree is not ready to be swapped in (mProgress != BUILD_FINISHED) + // Generally speaking as long as things keep moving the second build will never catch up with true state + refitUpdatedAndRemoved(); + } + else + { + PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFinalize", mContextID); + + { + PX_PROFILE_ZONE("SceneQuery.prunerNewTreeSwitch", mContextID); + + PX_DELETE(mAABBTree); // delete the old tree + PX_FREE_AND_RESET(mCachedBoxes); + mProgress = BUILD_NOT_STARTED; // reset the build state to initial + + // Adjust adaptive term to get closer to specified rebuild rate. + // perform an even division correction to make sure the rebuild rate adds up + if (mNbCalls > mRebuildRateHint) + mAdaptiveRebuildTerm++; + else if (mNbCalls < mRebuildRateHint) + mAdaptiveRebuildTerm--; + + // Switch trees +#if PX_DEBUG + mNewTree->validate(); +#endif + mAABBTree = mNewTree; // set current tree to progressively rebuilt tree + mNewTree = NULL; // clear out the progressively rebuild tree pointer + } + + { + PX_PROFILE_ZONE("SceneQuery.prunerNewTreeMapping", mContextID); + + // rebuild the tree map to match the current (newly built) tree + mTreeMap.initMap(PxMax(mPool.getNbActiveObjects(), mNbCachedBoxes), *mAABBTree); + + // The new mapping has been computed using only indices stored in the new tree. Those indices map the pruning pool + // we had when starting to build the tree. We need to re-apply recorded moves to fix the tree that finished rebuilding. + // AP: the problem here is while we are rebuilding the tree there are ongoing modifications to the current tree + // but the background build has a cached copy of all the AABBs at the time it was started + // (and will produce indices referencing those) + // Things that can happen in the meantime: update, remove, add, commit + for(NewTreeFixup* r = mNewTreeFixups.begin(); r < mNewTreeFixups.end(); r++) + { + // PT: we're not doing a full refit after this point anymore, so the remaining deleted objects must be manually marked for + // refit (otherwise their AABB in the tree would remain valid, leading to crashes when the corresponding index is 0xffffffff). + // We must do this before invalidating the corresponding tree nodes in the map, obviously (otherwise we'd be reading node + // indices that we already invalidated). + const PoolIndex poolIndex = r->removedIndex; + const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex]; + if(treeNodeIndex!=INVALID_NODE_ID) + mAABBTree->markNodeForRefit(treeNodeIndex); + + mTreeMap.invalidate(r->removedIndex, r->relocatedLastIndex, *mAABBTree); + } + mNewTreeFixups.clear(); // clear out the fixups since we just applied them all + } + + { + PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFinalRefit", mContextID); + + const PxU32 size = mToRefit.size(); + for(PxU32 i=0;i<size;i++) + { + const PoolIndex poolIndex = mToRefit[i]; + const TreeNodeIndex treeNodeIndex = mTreeMap[poolIndex]; + if(treeNodeIndex!=INVALID_NODE_ID) + mAABBTree->markNodeForRefit(treeNodeIndex); + } + mToRefit.clear(); + refitUpdatedAndRemoved(); + } + + { + PX_PROFILE_ZONE("SceneQuery.prunerNewTreeRemoveObjects", mContextID); + + PxU32 nbRemovedPairs = mBucketPruner.removeMarkedObjects(mTimeStamp-1); + PX_UNUSED(nbRemovedPairs); + + mNeedsNewTree = mBucketPruner.getNbObjects()>0; + } + } + + updateBucketPruner(); +} + + +void AABBPruner::shiftOrigin(const PxVec3& shift) +{ + mPool.shiftOrigin(shift); + + if(mAABBTree) + mAABBTree->shiftOrigin(shift); + + if(mIncrementalRebuild) + mBucketPruner.shiftOrigin(shift); + + if(mNewTree) + mNewTree->shiftOrigin(shift); +} + +#include "CmRenderOutput.h" +void AABBPruner::visualize(Cm::RenderOutput& out, PxU32 color) const +{ + // getAABBTree() asserts when pruner is dirty. NpScene::visualization() does not enforce flushUpdate. see DE7834 + const AABBTree* tree = mAABBTree; + + if(tree) + { + struct Local + { + static void _Draw(const AABBTreeRuntimeNode* root, const AABBTreeRuntimeNode* node, Cm::RenderOutput& out_) + { + out_ << Cm::DebugBox(node->mBV, true); + if (node->isLeaf()) + return; + _Draw(root, node->getPos(root), out_); + _Draw(root, node->getNeg(root), out_); + } + }; + out << PxTransform(PxIdentity); + out << color; + Local::_Draw(tree->getNodes(), tree->getNodes(), out); + } + + // Render added objects not yet in the tree + out << PxTransform(PxIdentity); + out << PxU32(PxDebugColor::eARGB_WHITE); + + if(mIncrementalRebuild && mBucketPruner.getNbObjects()) + mBucketPruner.visualize(out, color); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Internal methods + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +bool AABBPruner::buildStep() +{ + PX_PROFILE_ZONE("SceneQuery.prunerBuildStep", mContextID); + + PX_ASSERT(mIncrementalRebuild); + if(mNeedsNewTree) + { + if(mProgress==BUILD_NOT_STARTED) + { + const PxU32 nbObjects = mPool.getNbActiveObjects(); + if(!nbObjects) + return true; + + PX_DELETE(mNewTree); + mNewTree = PX_NEW(AABBTree); + + mNbCachedBoxes = nbObjects; + // PT: we always allocate one extra box, to make sure we can safely use V4 loads on the array + mCachedBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(nbObjects+1), "PxBound3")); + + PxMemCopy(mCachedBoxes, mPool.getCurrentWorldBoxes(), nbObjects*sizeof(PxBounds3)); + + // PT: objects currently in the bucket pruner will be in the new tree. They are marked with the + // current timestamp (mTimeStamp). However more objects can get added while we compute the new tree, + // and those ones will not be part of it. These new objects will be marked with the new timestamp + // value (mTimeStamp+1), and we can use these different values to remove the proper objects from + // the bucket pruner (when switching to the new tree). + mTimeStamp++; + mBuilder.reset(); + mBuilder.mNbPrimitives = mNbCachedBoxes; + mBuilder.mAABBArray = mCachedBoxes; + mBuilder.mLimit = NB_OBJECTS_PER_NODE; + + mBuildStats.reset(); + + // start recording modifications to the tree made during rebuild to reapply (fix the new tree) eventually + PX_ASSERT(mNewTreeFixups.size()==0); + + mProgress = BUILD_INIT; + } + else if(mProgress==BUILD_INIT) + { + mNewTree->progressiveBuild(mBuilder, mBuildStats, 0, 0); + mProgress = BUILD_IN_PROGRESS; + mNbCalls = 0; + + // Use a heuristic to estimate the number of work units needed for rebuilding the tree. + // The general idea is to use the number of work units of the previous tree to build the new tree. + // This works fine as long as the number of leaves remains more or less the same for the old and the + // new tree. If that is not the case, this estimate can be way off and the work units per step will + // be either much too small or too large. Hence, in that case we will try to estimate the number of work + // units based on the number of leaves of the new tree as follows: + // + // - Assume new tree with n leaves is perfectly-balanced + // - Compute the depth of perfectly-balanced tree with n leaves + // - Estimate number of working units for the new tree + + const PxU32 depth = Ps::ilog2(mBuilder.mNbPrimitives); // Note: This is the depth without counting the leaf layer + const PxU32 estimatedNbWorkUnits = depth * mBuilder.mNbPrimitives; // Estimated number of work units for new tree + const PxU32 estimatedNbWorkUnitsOld = mAABBTree->getTotalPrims(); + if ((estimatedNbWorkUnits <= (estimatedNbWorkUnitsOld << 1)) && (estimatedNbWorkUnits >= (estimatedNbWorkUnitsOld >> 1))) + // The two estimates do not differ by more than a factor 2 + mTotalWorkUnits = estimatedNbWorkUnitsOld; + else + { + mAdaptiveRebuildTerm = 0; + mTotalWorkUnits = estimatedNbWorkUnits; + } + + const PxI32 totalWorkUnits = PxI32(mTotalWorkUnits + (mAdaptiveRebuildTerm * mBuilder.mNbPrimitives)); + mTotalWorkUnits = PxU32(PxMax(totalWorkUnits, 0)); + } + else if(mProgress==BUILD_IN_PROGRESS) + { + mNbCalls++; + const PxU32 Limit = 1 + (mTotalWorkUnits / mRebuildRateHint); + // looks like progressiveRebuild returns 0 when finished + if (!mNewTree->progressiveBuild(mBuilder, mBuildStats, 1, Limit)) + { + // Done + mProgress = BUILD_NEW_MAPPING; +#if PX_DEBUG + mNewTree->validate(); +#endif + } + } + else if(mProgress==BUILD_NEW_MAPPING) + { + mNbCalls++; + mProgress = BUILD_FULL_REFIT; + + // PT: we can't call fullRefit without creating the new mapping first: the refit function will fetch boxes from + // the pool using "primitive indices" captured in the tree. But some of these indices may have been invalidated + // if objects got removed while the tree was built. So we need to invalidate the corresponding nodes before refit, + // that way the #prims will be zero and the code won't fetch a wrong box (which may now below to a different object). + { + PX_PROFILE_ZONE("SceneQuery.prunerNewTreeMapping", mContextID); + + if(mNewTreeFixups.size()) + { + mNewTreeMap.initMap(PxMax(mPool.getNbActiveObjects(), mNbCachedBoxes), *mNewTree); + + // The new mapping has been computed using only indices stored in the new tree. Those indices map the pruning pool + // we had when starting to build the tree. We need to re-apply recorded moves to fix the tree. + for(NewTreeFixup* r = mNewTreeFixups.begin(); r < mNewTreeFixups.end(); r++) + mNewTreeMap.invalidate(r->removedIndex, r->relocatedLastIndex, *mNewTree); + + mNewTreeFixups.clear(); +#if PX_DEBUG + mNewTree->validate(); +#endif + } + } + } + else if(mProgress==BUILD_FULL_REFIT) + { + mNbCalls++; + mProgress = BUILD_LAST_FRAME; + + { + PX_PROFILE_ZONE("SceneQuery.prunerNewTreeFullRefit", mContextID); + + // We need to refit the new tree because objects may have moved while we were building it. + mNewTree->fullRefit(mPool.getCurrentWorldBoxes()); + } + } + else if(mProgress==BUILD_LAST_FRAME) + { + mProgress = BUILD_FINISHED; + } + + // This is required to be set because commit handles both refit and a portion of build finalization (why?) + // This is overly conservative also only necessary in case there were no updates at all to the tree since the last tree swap + // It also overly conservative in a sense that it could be set only if mProgress was just set to BUILD_FINISHED + mUncommittedChanges = true; + + return mProgress==BUILD_FINISHED; + } + + return true; +} + + + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/** + * Builds an AABB-tree for objects in the pruning pool. + * \return true if success + */ +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +bool AABBPruner::fullRebuildAABBTree() +{ + PX_PROFILE_ZONE("SceneQuery.prunerFullRebuildAABBTree", mContextID); + + // Release possibly already existing tree + PX_DELETE_AND_RESET(mAABBTree); + + // Don't bother building an AABB-tree if there isn't a single static object + const PxU32 nbObjects = mPool.getNbActiveObjects(); + if(!nbObjects) + return true; + + bool Status; + { + // Create a new tree + mAABBTree = PX_NEW(AABBTree); + + AABBTreeBuildParams TB; + TB.mNbPrimitives = nbObjects; + TB.mAABBArray = mPool.getCurrentWorldBoxes(); + TB.mLimit = NB_OBJECTS_PER_NODE; + Status = mAABBTree->build(TB); + } + + // No need for the tree map for static pruner + if(mIncrementalRebuild) + mTreeMap.initMap(PxMax(nbObjects,mNbCachedBoxes),*mAABBTree); + + return Status; +} + +// called in the end of commit(), but only if mIncrementalRebuild is true +void AABBPruner::updateBucketPruner() +{ + PX_PROFILE_ZONE("SceneQuery.prunerUpdateBucketPruner", mContextID); + + PX_ASSERT(mIncrementalRebuild); + mBucketPruner.build(); +} + +PxBounds3 AABBPruner::getAABB(PrunerHandle handle) +{ + return mPool.getWorldAABB(handle); +} + +void AABBPruner::release() // this can be called from purge() +{ + mBucketPruner.release(); + + mTimeStamp = 0; + + mTreeMap.release(); + mNewTreeMap.release(); + + PX_FREE_AND_RESET(mCachedBoxes); + mBuilder.reset(); + PX_DELETE_AND_RESET(mNewTree); + PX_DELETE_AND_RESET(mAABBTree); + + mNbCachedBoxes = 0; + mProgress = BUILD_NOT_STARTED; + mNewTreeFixups.clear(); + mUncommittedChanges = false; +} + +// Refit current tree +void AABBPruner::refitUpdatedAndRemoved() +{ + PX_PROFILE_ZONE("SceneQuery.prunerRefitUpdatedAndRemoved", mContextID); + + PX_ASSERT(mIncrementalRebuild); + AABBTree* tree = getAABBTree(); + if(!tree) + return; + +#if PX_DEBUG + tree->validate(); +#endif + + //### missing a way to skip work if not needed + + const PxU32 nbObjects = mPool.getNbActiveObjects(); + // At this point there still can be objects in the tree that are blanked out so it's an optimization shortcut (not required) + if(!nbObjects) + return; + + mBucketPruner.refitMarkedNodes(mPool.getCurrentWorldBoxes()); + tree->refitMarkedNodes(mPool.getCurrentWorldBoxes()); +} + +void AABBPruner::merge(const void* mergeParams) +{ + const AABBPrunerMergeData& pruningStructure = *reinterpret_cast<const AABBPrunerMergeData*> (mergeParams); + + if(mAABBTree) + { + // index in pruning pool, where new objects were added + const PxU32 pruningPoolIndex = mPool.getNbActiveObjects() - pruningStructure.mNbObjects; + + // create tree from given nodes and indices + AABBTreeMergeData aabbTreeMergeParams(pruningStructure.mNbNodes, pruningStructure.mAABBTreeNodes, + pruningStructure.mNbObjects, pruningStructure.mAABBTreeIndices, pruningPoolIndex); + + if (!mIncrementalRebuild) + { + // merge tree directly + mAABBTree->mergeTree(aabbTreeMergeParams); + } + else + { + mBucketPruner.addTree(aabbTreeMergeParams, mTimeStamp); + } + } +} diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h new file mode 100644 index 00000000..c5e96aa6 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBPruner.h @@ -0,0 +1,268 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_AABB_PRUNER_H +#define SQ_AABB_PRUNER_H + +#include "SqPruningPool.h" +#include "SqExtendedBucketPruner.h" +#include "SqAABBTreeUpdateMap.h" +#include "SqAABBTree.h" + +namespace physx +{ + +namespace Sq +{ + // PT: we build the new tree over a number of frames/states, in order to limit perf spikes in 'updatePruningTrees'. + // The states are as follows: + // + // BUILD_NOT_STARTED (1 frame, AABBPruner): + // + // This is the initial state, before the new (AABBTree) build even starts. In this frame/state, we perform the AABBPruner-related + // memory allocations: + // - the new AABB tree is allocated + // - the array of cached bounding boxes is allocated and filled + // + // BUILD_INIT (1 frame, AABBTree): + // + // This is the first frame in which the new tree gets built. It deserves its own special state since various things happen in the + // first frame, that do no happen in subsequent frames. Basically most initial AABBTree-related allocations happen here (but no + // build step per se). + // + // BUILD_IN_PROGRESS (N frames, AABBTree): + // + // This is the core build function, actually building the tree. This should be mostly allocation-free, except here and there when + // building non-complete trees, and during the last call when the tree is finally built. + // + // BUILD_NEW_MAPPING (1 frame, AABBPruner): + // + // After the new AABBTree is built, we recreate an AABBTreeUpdateMap for the new tree, and use it to invalidate nodes whose objects + // have been removed during the build. + // + // We need to do that before doing a full refit in the next stage/frame. If we don't do that, the refit code will fetch a wrong box, + // that may very well belong to an entirely new object. + // + // Note that this mapping/update map (mNewTreeMap) is temporary, and only needed for the next stage. + // + // BUILD_FULL_REFIT (1 frame, AABBPruner): + // + // Once the new update map is available, we fully refit the new tree. AABBs of moved objects get updated. AABBs of removed objects + // become empty. + // + // BUILD_LAST_FRAME (1 frame, AABBPruner): + // + // This is an artificial frame used to delay the tree switching code. The switch happens as soon as we reach the BUILD_FINISHED + // state, but we don't want to execute BUILD_FULL_REFIT and the switch in the same frame. This extra BUILD_LAST_FRAME stage buys + // us one frame, i.e. we have one frame in which we do BUILD_FULL_REFIT, and in the next frame we'll do both BUILD_LAST_FRAME / + // BUILD_FINISHED / the switch. + // + // BUILD_FINISHED (1 frame, AABBPruner): + // + // Several things happen in this 'finalization' frame/stage: + // - We switch the trees (old one is deleted, cached boxes are deleted, new tree pointer is setup) + // - A new (final) update map is created (mTreeMap). The map is used to invalidate objects that may have been removed during + // the BUILD_NEW_MAPPING and BUILD_FULL_REFIT frames. The nodes containing these removed objects are marked for refit. + // - Nodes containing objects that have moved during the BUILD_NEW_MAPPING and BUILD_FULL_REFIT frames are marked for refit. + // - We do a partial refit on the new tree, to take these final changes into account. This small partial refit is usually much + // cheaper than the full refit we previously performed here. + // - We remove old objects from the bucket pruner + // + enum BuildStatus + { + BUILD_NOT_STARTED, + BUILD_INIT, + BUILD_IN_PROGRESS, + BUILD_NEW_MAPPING, + BUILD_FULL_REFIT, + BUILD_LAST_FRAME, + BUILD_FINISHED, + + BUILD_FORCE_DWORD = 0xffffffff + }; + + // This class implements the Pruner interface for internal SQ use with some additional specialized functions + // The underlying data structure is a binary AABB tree + // AABBPruner supports insertions, removals and updates for dynamic objects + // The tree is either entirely rebuilt in a single frame (static pruner) or progressively rebuilt over multiple frames (dynamic pruner) + // The rebuild happens on a copy of the tree + // the copy is then swapped with current tree at the time commit() is called (only if mBuildState is BUILD_FINISHED), + // otherwise commit() will perform a refit operation applying any pending changes to the current tree + // While the tree is being rebuilt a temporary data structure (BucketPruner) is also kept in sync and used to speed up + // queries on updated objects that are not yet in either old or new tree. + // The requirements on the order of calls: + // commit() is required to be called before any queries to apply modifications + // queries can be issued on multiple threads after commit is called + // commit, buildStep, add/remove/update have to be called from the same thread or otherwise strictly serialized by external code + // and cannot be issued while a query is running + class AABBPruner : public IncrementalPruner + { + public: + AABBPruner(bool incrementalRebuild, PxU64 contextID); // true is equivalent to former dynamic pruner + virtual ~AABBPruner(); + + // Pruner + virtual bool addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* userData, PxU32 count = 1, bool hasPruningStructure = false); + virtual void removeObjects(const PrunerHandle* handles, PxU32 count = 1); + virtual void updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count = 1); + virtual void updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count = 1); + virtual void commit(); + virtual PxAgain raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const; + virtual PxAgain overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const; + virtual PxAgain sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const; + virtual const PrunerPayload& getPayload(PrunerHandle handle) const { return mPool.getPayload(handle); } + virtual const PrunerPayload& getPayload(PrunerHandle handle, PxBounds3*& bounds) const { return mPool.getPayload(handle, bounds); } + virtual void preallocate(PxU32 entries) { mPool.preallocate(entries); } + virtual void shiftOrigin(const PxVec3& shift); + virtual void visualize(Cm::RenderOutput& out, PxU32 color) const; + virtual void merge(const void* mergeParams); + //~Pruner + + // IncrementalPruner + virtual void purge(); // gets rid of internal accel struct + virtual void setRebuildRateHint(PxU32 nbStepsForRebuild); // Besides the actual rebuild steps, 3 additional steps are needed. + virtual bool buildStep(); // returns true if finished + //~IncrementalPruner + + // direct access for test code + + PX_FORCE_INLINE PxU32 getNbAddedObjects() const { return mBucketPruner.getNbObjects(); } + PX_FORCE_INLINE const Sq::AABBTree* getAABBTree() const { PX_ASSERT(!mUncommittedChanges); return mAABBTree; } + PX_FORCE_INLINE Sq::AABBTree* getAABBTree() { PX_ASSERT(!mUncommittedChanges); return mAABBTree; } + PX_FORCE_INLINE void setAABBTree(Sq::AABBTree* tree) { mAABBTree = tree; } + PX_FORCE_INLINE const Sq::AABBTree* hasAABBTree() const { return mAABBTree; } + PX_FORCE_INLINE BuildStatus getBuildStatus() const { return mProgress; } + + // local functions +// private: + Sq::AABBTree* mAABBTree; // current active tree + Sq::AABBTreeBuildParams mBuilder; // this class deals with the details of the actual tree building + BuildStats mBuildStats; + + // tree with build in progress, assigned to mAABBTree in commit, when mProgress is BUILD_FINISHED + // created in buildStep(), BUILD_NOT_STARTED + // This is non-null when there is a tree rebuild going on in progress + // and thus also indicates that we have to start saving the fixups + Sq::AABBTree* mNewTree; + + // during rebuild the pool might change so we need a copy of boxes for the tree build + PxBounds3* mCachedBoxes; + PxU32 mNbCachedBoxes; + + // incremented in commit(), serves as a progress counter for rebuild + PxU32 mNbCalls; + + // PT: incremented each time we start building a new tree (i.e. effectively identifies a given tree) + // Timestamp is passed to bucket pruner to mark objects added there, linking them to a specific tree. + // When switching to the new tree, timestamp is used to remove old objects (now in the new tree) from + // the bucket pruner. + PxU32 mTimeStamp; + + // this pruner is used for queries on objects that are not in the current tree yet + // includes both the objects in the tree being rebuilt and all the objects added later + ExtendedBucketPruner mBucketPruner; + + BuildStatus mProgress; // current state of second tree build progress + + // Fraction (as in 1/Nth) of the total number of primitives + // that should be processed per step by the AABB builder + // so if this value is 1, all primitives will be rebuilt, 2 => 1/2 of primitives per step etc. + // see also mNbCalls, mNbCalls varies from 0 to mRebuildRateHint-1 + PxU32 mRebuildRateHint; + + // Estimate for how much work has to be done to rebuild the tree. + PxU32 mTotalWorkUnits; + + // Term to correct the work unit estimate if the rebuild rate is not matched + PxI32 mAdaptiveRebuildTerm; + + PruningPool mPool; // Pool of AABBs + + // maps pruning pool indices to aabb tree indices + // maps to INVALID_NODE_ID if the pool entry was removed or "pool index is outside input domain" + // The map is the inverse of the tree mapping: (node[map[poolID]].primitive == poolID) + // So: + // treeNodeIndex = mTreeMap.operator[](poolIndex) + // aabbTree->treeNodes[treeNodeIndex].primitives[0] == poolIndex + AABBTreeUpdateMap mTreeMap; + // Temporary update map, see BuildStatus notes above for details + AABBTreeUpdateMap mNewTreeMap; + + // This is only set once in the constructor and is equivalent to isDynamicTree + // if it set to false then a 1-shot rebuild is performed in commit() + // bucket pruner is only used with incremental rebuild + bool mIncrementalRebuild; + + // A rebuild can be triggered even when the Pruner is not dirty + // mUncommittedChanges is set to true in add, remove, update and buildStep + // mUncommittedChanges is set to false in commit + // mUncommittedChanges has to be false (commit() has to be called) in order to run a query as defined by the + // mUncommittedChanges is not set to true in add, when pruning structure is provided. Scene query shapes + // are merged to current AABB tree directly + // Pruner higher level API + bool mUncommittedChanges; + + // A new AABB tree is built if an object was added, removed or updated + // Changing objects during a build will trigger another rebuild right afterwards + // this is set to true if a new tree has to be created again after the current rebuild is done + bool mNeedsNewTree; + + // This struct is used to record modifications made to the pruner state + // while a tree is building in the background + // this is so we can apply the modifications to the tree at the time of completion + // the recorded fixup information is: removedIndex (in ::remove()) and + // lastIndexMoved which is the last index in the pruner array + // (since the way we remove from PruningPool is by swapping last into removed slot, + // we need to apply a fixup so that it syncs up that operation in the new tree) + struct NewTreeFixup + { + PX_FORCE_INLINE NewTreeFixup(PxU32 removedIndex_, PxU32 relocatedLastIndex_) + : removedIndex(removedIndex_), relocatedLastIndex(relocatedLastIndex_) {} + PxU32 removedIndex; + PxU32 relocatedLastIndex; + }; + Ps::Array<NewTreeFixup> mNewTreeFixups; + + Ps::Array<PoolIndex> mToRefit; + + PxU64 mContextID; + + // Internal methods + bool fullRebuildAABBTree(); // full rebuild function, used with static pruner mode + void release(); + void refitUpdatedAndRemoved(); + void updateBucketPruner(); + PxBounds3 getAABB(PrunerHandle h); + }; + +} // namespace Sq + +} + +#endif // SQ_AABB_PRUNER_H diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp new file mode 100644 index 00000000..191344fe --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.cpp @@ -0,0 +1,1154 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "SqAABBTree.h" +#include "SqAABBTreeUpdateMap.h" + +#include "PsMathUtils.h" +#include "PsFoundation.h" +#include "GuInternal.h" + +using namespace physx; +using namespace Sq; + +#define INVALID_ID 0xffffffff + +// Progressive building +class Sq::FIFOStack : public Ps::UserAllocated +{ + public: + FIFOStack() : mStack(PX_DEBUG_EXP("SQFIFOStack")), mCurIndex(0) {} + ~FIFOStack() {} + + PX_FORCE_INLINE PxU32 getNbEntries() const { return mStack.size(); } + PX_FORCE_INLINE void push(AABBTreeBuildNode* entry) { mStack.pushBack(entry); } + bool pop(AABBTreeBuildNode*& entry); + private: + Ps::Array<AABBTreeBuildNode*> mStack; + PxU32 mCurIndex; //!< Current index within the container +}; + +bool Sq::FIFOStack::pop(AABBTreeBuildNode*& entry) +{ + const PxU32 NbEntries = mStack.size(); // Get current number of entries + if(!NbEntries) + return false; // Can be NULL when no value has been pushed. This is an invalid pop call. + entry = mStack[mCurIndex++]; // Get oldest entry, move to next one + if(mCurIndex==NbEntries) + { + // All values have been poped + mStack.clear(); + mCurIndex=0; + } + return true; +} +//~Progressive building + +NodeAllocator::NodeAllocator() : mPool(NULL), mCurrentSlabIndex(0), mTotalNbNodes(0) +{ +} + +NodeAllocator::~NodeAllocator() +{ + release(); +} + +void NodeAllocator::release() +{ + const PxU32 nbSlabs = mSlabs.size(); + for(PxU32 i=0;i<nbSlabs;i++) + { + Slab& s = mSlabs[i]; + PX_DELETE_ARRAY(s.mPool); + } + + mSlabs.reset(); + mCurrentSlabIndex = 0; + mTotalNbNodes = 0; +} + +void NodeAllocator::init(PxU32 nbPrimitives, PxU32 limit) +{ + const PxU32 maxSize = nbPrimitives*2 - 1; // PT: max possible #nodes for a complete tree + const PxU32 estimatedFinalSize = maxSize<=1024 ? maxSize : maxSize/limit; + mPool = PX_NEW(AABBTreeBuildNode)[estimatedFinalSize]; + PxMemZero(mPool, sizeof(AABBTreeBuildNode)*estimatedFinalSize); + + // Setup initial node. Here we have a complete permutation of the app's primitives. + mPool->mNodeIndex = 0; + mPool->mNbPrimitives = nbPrimitives; + + mSlabs.pushBack(Slab(mPool, 1, estimatedFinalSize)); + mCurrentSlabIndex = 0; + mTotalNbNodes = 1; +} + +// PT: TODO: inline this? +AABBTreeBuildNode* NodeAllocator::getBiNode() +{ + mTotalNbNodes += 2; + Slab& currentSlab = mSlabs[mCurrentSlabIndex]; + if(currentSlab.mNbUsedNodes+2<=currentSlab.mMaxNbNodes) + { + AABBTreeBuildNode* biNode = currentSlab.mPool + currentSlab.mNbUsedNodes; + currentSlab.mNbUsedNodes += 2; + return biNode; + } + else + { + // Allocate new slab + const PxU32 size = 1024; + AABBTreeBuildNode* pool = PX_NEW(AABBTreeBuildNode)[size]; + PxMemZero(pool, sizeof(AABBTreeBuildNode)*size); + + mSlabs.pushBack(Slab(pool, 2, size)); + mCurrentSlabIndex++; + return pool; + } +} + +void NodeAllocator::flatten(AABBTreeRuntimeNode* dest) +{ + // PT: gathers all build nodes allocated so far and flatten them to a linear destination array of smaller runtime nodes + PxU32 offset = 0; + const PxU32 nbSlabs = mSlabs.size(); + for(PxU32 s=0;s<nbSlabs;s++) + { + const Slab& currentSlab = mSlabs[s]; + + AABBTreeBuildNode* pool = currentSlab.mPool; + for(PxU32 i=0;i<currentSlab.mNbUsedNodes;i++) + { + dest[offset].mBV = pool[i].mBV; + if(pool[i].isLeaf()) + { + const PxU32 index = pool[i].mNodeIndex; + + const PxU32 nbPrims = pool[i].getNbPrimitives(); + PX_ASSERT(nbPrims<=16); + + dest[offset].mData = (index<<5)|((nbPrims&15)<<1)|1; + } + else + { + PX_ASSERT(pool[i].mPos); + PxU32 localNodeIndex = 0xffffffff; + PxU32 nodeBase = 0; + for(PxU32 j=0;j<nbSlabs;j++) + { + if(pool[i].mPos>=mSlabs[j].mPool && pool[i].mPos<mSlabs[j].mPool+mSlabs[j].mNbUsedNodes) + { + localNodeIndex = PxU32(pool[i].mPos - mSlabs[j].mPool); + break; + } + nodeBase += mSlabs[j].mNbUsedNodes; + } + const PxU32 nodeIndex = nodeBase + localNodeIndex; + PX_ASSERT(nodeIndex<mTotalNbNodes); + dest[offset].mData = nodeIndex<<1; + } + offset++; + } + } + PX_ASSERT(offset==mTotalNbNodes); + release(); +} + +static PX_FORCE_INLINE float getSplittingValue(const PxBounds3& global_box, PxU32 axis) +{ + // Default split value = middle of the axis (using only the box) + return global_box.getCenter(axis); +} + +static PxU32 split(const PxBounds3& box, PxU32 nb, PxU32* const PX_RESTRICT prims, PxU32 axis, const AABBTreeBuildParams& params) +{ + // Get node split value + const float splitValue = getSplittingValue(box, axis); + + PxU32 nbPos = 0; + // Loop through all node-related primitives. Their indices range from "mNodePrimitives[0]" to "mNodePrimitives[mNbPrimitives-1]", + // with mNodePrimitives = mIndices + mNodeIndex (i.e. those indices map the global list in the tree params). + + // PT: to avoid calling the unsafe [] operator + const size_t ptrValue = size_t(params.mCache) + axis*sizeof(float); + const PxVec3* /*PX_RESTRICT*/ cache = reinterpret_cast<const PxVec3*>(ptrValue); + + for(PxU32 i=0;i<nb;i++) + { + // Get index in global list + const PxU32 index = prims[i]; + + // Test against the splitting value. The primitive value is tested against the enclosing-box center. + // [We only need an approximate partition of the enclosing box here.] + const float primitiveValue = cache[index].x; + PX_ASSERT(primitiveValue==params.mCache[index][axis]); + + // Reorganize the list of indices in this order: positive - negative. + if(primitiveValue > splitValue) + { + // Swap entries + prims[i] = prims[nbPos]; + prims[nbPos] = index; + // Count primitives assigned to positive space + nbPos++; + } + } + return nbPos; +} + +void AABBTreeBuildNode::subdivide(const AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices) +{ + PxU32* const PX_RESTRICT primitives = indices + mNodeIndex; + const PxU32 nbPrims = mNbPrimitives; + + // Compute global box & means for current node. The box is stored in mBV. + Vec4V meansV; + { + const PxBounds3* PX_RESTRICT boxes = params.mAABBArray; + PX_ASSERT(boxes); + PX_ASSERT(primitives); + PX_ASSERT(nbPrims); + + Vec4V minV = V4LoadU(&boxes[primitives[0]].minimum.x); + Vec4V maxV = V4LoadU(&boxes[primitives[0]].maximum.x); + + meansV = V4LoadU(¶ms.mCache[primitives[0]].x); + + for(PxU32 i=1;i<nbPrims;i++) + { + const PxU32 index = primitives[i]; + const Vec4V curMinV = V4LoadU(&boxes[index].minimum.x); + const Vec4V curMaxV = V4LoadU(&boxes[index].maximum.x); + meansV = V4Add(meansV, V4LoadU(¶ms.mCache[index].x)); + minV = V4Min(minV, curMinV); + maxV = V4Max(maxV, curMaxV); + } + + StoreBounds(mBV, minV, maxV); + + const float coeff = 1.0f/float(nbPrims); + meansV = V4Scale(meansV, FLoad(coeff)); + } + + // Check the user-defined limit. Also ensures we stop subdividing if we reach a leaf node. + if(nbPrims<=params.mLimit) + return; + + bool validSplit = true; + PxU32 nbPos; + { + // Compute variances + Vec4V varsV = V4Zero(); + for(PxU32 i=0;i<nbPrims;i++) + { + const PxU32 index = primitives[i]; + Vec4V centerV = V4LoadU(¶ms.mCache[index].x); + centerV = V4Sub(centerV, meansV); + centerV = V4Mul(centerV, centerV); + varsV = V4Add(varsV, centerV); + } + const float coeffNb1 = 1.0f/float(nbPrims-1); + varsV = V4Scale(varsV, FLoad(coeffNb1)); + PX_ALIGN(16, PxVec4) vars; + V4StoreA(varsV, &vars.x); + + // Choose axis with greatest variance + const PxU32 axis = Ps::largestAxis(PxVec3(vars.x, vars.y, vars.z)); + + // Split along the axis + nbPos = split(mBV, nbPrims, primitives, axis, params); + + // Check split validity + if(!nbPos || nbPos==nbPrims) + validSplit = false; + } + + // Check the subdivision has been successful + if(!validSplit) + { + // Here, all boxes lie in the same sub-space. Two strategies: + // - if we are over the split limit, make an arbitrary 50-50 split + // - else stop subdividing + if(nbPrims>params.mLimit) + { + nbPos = nbPrims>>1; + } + else return; + } + + // Now create children and assign their pointers. + mPos = allocator.getBiNode(); + + stats.increaseCount(2); + + // Assign children + PX_ASSERT(!isLeaf()); + AABBTreeBuildNode* Pos = const_cast<AABBTreeBuildNode*>(mPos); + AABBTreeBuildNode* Neg = Pos + 1; + Pos->mNodeIndex = mNodeIndex; + Pos->mNbPrimitives = nbPos; + Neg->mNodeIndex = mNodeIndex + nbPos; + Neg->mNbPrimitives = mNbPrimitives - nbPos; +} + +void AABBTreeBuildNode::_buildHierarchy(AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices) +{ + // Subdivide current node + subdivide(params, stats, nodeBase, indices); + + // Recurse + if(!isLeaf()) + { + AABBTreeBuildNode* Pos = const_cast<AABBTreeBuildNode*>(getPos()); + PX_ASSERT(Pos); + AABBTreeBuildNode* Neg = Pos + 1; + Pos->_buildHierarchy(params, stats, nodeBase, indices); + Neg->_buildHierarchy(params, stats, nodeBase, indices); + } + + stats.mTotalPrims += mNbPrimitives; +} + +AABBTree::AABBTree() : + mIndices (NULL), + mNbIndices (0), + mRuntimePool (NULL), + mParentIndices (NULL), + mTotalNbNodes (0), + mTotalPrims (0) +{ +// Progressive building + mStack = NULL; +//~Progressive building + +// REFIT + mRefitHighestSetWord = 0; +//~REFIT +} + +AABBTree::~AABBTree() +{ + release(false); +} + +void AABBTree::release(bool clearRefitMap) +{ +// Progressive building + PX_DELETE_AND_RESET(mStack); +//~Progressive building + PX_FREE_AND_RESET(mParentIndices); + PX_DELETE_ARRAY(mRuntimePool); + mNodeAllocator.release(); + PX_FREE_AND_RESET(mIndices); + mTotalNbNodes = 0; + mNbIndices = 0; + +// REFIT + if(clearRefitMap) + mRefitBitmask.clearAll(); + mRefitHighestSetWord = 0; +//~REFIT +} + +// Initialize nodes/indices from the input tree merge data +void AABBTree::initTree(const AABBTreeMergeData& tree) +{ + PX_ASSERT(mIndices == NULL); + PX_ASSERT(mRuntimePool == NULL); + PX_ASSERT(mParentIndices == NULL); + + // allocate,copy indices + mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*tree.mNbIndices, "AABB tree indices")); + mNbIndices = tree.mNbIndices; + PxMemCopy(mIndices, tree.mIndices, sizeof(PxU32)*tree.mNbIndices); + + // allocate,copy nodes + mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[tree.mNbNodes]; + mTotalNbNodes = tree.mNbNodes; + PxMemCopy(mRuntimePool, tree.mNodes, sizeof(AABBTreeRuntimeNode)*tree.mNbNodes); +} + +// Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool +void AABBTree::shiftIndices(PxU32 offset) +{ + for (PxU32 i = 0; i < mNbIndices; i++) + { + mIndices[i] += offset; + } +} + +bool AABBTree::buildInit(AABBTreeBuildParams& params, BuildStats& stats) +{ + // Checkings + const PxU32 nbPrimitives = params.mNbPrimitives; + if(!nbPrimitives) + return false; + + // Release previous tree + release(); + + // Init stats + stats.setCount(1); + + // Initialize indices. This list will be modified during build. + mNbIndices = nbPrimitives; + mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbPrimitives, "AABB tree indices")); + // Identity permutation + for(PxU32 i=0;i<nbPrimitives;i++) + mIndices[i] = i; + + // Allocate a pool of nodes + mNodeAllocator.init(nbPrimitives, params.mLimit); + + // Compute box centers only once and cache them + params.mCache = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*(nbPrimitives+1), "cache")); + const float half = 0.5f; + const FloatV halfV = FLoad(half); + for(PxU32 i=0;i<nbPrimitives;i++) + { + const Vec4V curMinV = V4LoadU(¶ms.mAABBArray[i].minimum.x); + const Vec4V curMaxV = V4LoadU(¶ms.mAABBArray[i].maximum.x); + const Vec4V centerV = V4Scale(V4Add(curMaxV, curMinV), halfV); + V4StoreU(centerV, ¶ms.mCache[i].x); + } + return true; +} + +void AABBTree::buildEnd(AABBTreeBuildParams& params, BuildStats& stats) +{ + PX_FREE_AND_RESET(params.mCache); + // Get back total number of nodes + mTotalNbNodes = stats.getCount(); + mTotalPrims = stats.mTotalPrims; + + mRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes]; + PX_ASSERT(mTotalNbNodes==mNodeAllocator.mTotalNbNodes); + mNodeAllocator.flatten(mRuntimePool); +} + +bool AABBTree::build(AABBTreeBuildParams& params) +{ + // Init stats + BuildStats stats; + if(!buildInit(params, stats)) + return false; + + // Build the hierarchy + mNodeAllocator.mPool->_buildHierarchy(params, stats, mNodeAllocator, mIndices); + + buildEnd(params, stats); + return true; +} + +void AABBTree::shiftOrigin(const PxVec3& shift) +{ + AABBTreeRuntimeNode* const nodeBase = mRuntimePool; + const PxU32 totalNbNodes = mTotalNbNodes; + for(PxU32 i=0; i<totalNbNodes; i++) + { + AABBTreeRuntimeNode& current = nodeBase[i]; + if((i+1) < totalNbNodes) + Ps::prefetch(nodeBase + i + 1); + + current.mBV.minimum -= shift; + current.mBV.maximum -= shift; + } +} + +#if PX_DEBUG +void AABBTree::validate() const +{ +} +#endif + +// Progressive building +static PxU32 incrementalBuildHierarchy(FIFOStack& stack, AABBTreeBuildNode* node, AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& nodeBase, PxU32* const indices) +{ + node->subdivide(params, stats, nodeBase, indices); + + if(!node->isLeaf()) + { + AABBTreeBuildNode* pos = const_cast<AABBTreeBuildNode*>(node->getPos()); + PX_ASSERT(pos); + AABBTreeBuildNode* neg = pos + 1; + stack.push(neg); + stack.push(pos); + } + + stats.mTotalPrims += node->mNbPrimitives; + return node->mNbPrimitives; +} + +PxU32 AABBTree::progressiveBuild(AABBTreeBuildParams& params, BuildStats& stats, PxU32 progress, PxU32 limit) +{ + if(progress==0) + { + if(!buildInit(params, stats)) + return PX_INVALID_U32; + + mStack = PX_NEW(FIFOStack); + mStack->push(mNodeAllocator.mPool); + return progress++; + } + else if(progress==1) + { + PxU32 stackCount = mStack->getNbEntries(); + if(stackCount) + { + PxU32 Total = 0; + const PxU32 Limit = limit; + while(Total<Limit) + { + AABBTreeBuildNode* Entry; + if(mStack->pop(Entry)) + Total += incrementalBuildHierarchy(*mStack, Entry, params, stats, mNodeAllocator, mIndices); + else + break; + } + return progress; + } + + buildEnd(params, stats); + + PX_DELETE_AND_RESET(mStack); + + return 0; // Done! + } + return PX_INVALID_U32; +} +//~Progressive building + + + +static PX_FORCE_INLINE PxU32 BitsToDwords(PxU32 nb_bits) +{ + return (nb_bits>>5) + ((nb_bits&31) ? 1 : 0); +} + +bool Sq::BitArray::init(PxU32 nb_bits) +{ + mSize = BitsToDwords(nb_bits); + // Get ram for n bits + PX_FREE(mBits); + mBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mSize, "BitArray::mBits")); + // Set all bits to 0 + clearAll(); + return true; +} + +void Sq::BitArray::resize(PxU32 maxBitNumber) +{ + const PxU32 newSize = BitsToDwords(maxBitNumber); + if (newSize <= mSize) + return; + + PxU32* newBits = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*newSize, "BitArray::mBits")); + PxMemZero(newBits + mSize, (newSize - mSize) * sizeof(PxU32)); + PxMemCopy(newBits, mBits, mSize*sizeof(PxU32)); + PX_FREE(mBits); + mBits = newBits; + mSize = newSize; +} + +static PX_FORCE_INLINE PxU32 getNbPrimitives(PxU32 data) { return (data>>1)&15; } +static PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base, PxU32 data) { return base + (data>>5); } +static PX_FORCE_INLINE const AABBTreeRuntimeNode* getPos(const AABBTreeRuntimeNode* base, PxU32 data) { return base + (data>>1); } +static PX_FORCE_INLINE PxU32 isLeaf(PxU32 data) { return data&1; } + +static PX_FORCE_INLINE void refitNode(AABBTreeRuntimeNode* PX_RESTRICT current, const PxBounds3* PX_RESTRICT boxes, const PxU32* PX_RESTRICT indices, AABBTreeRuntimeNode* PX_RESTRICT const nodeBase) +{ + // PT: we can safely use V4 loads on both boxes and nodes here: + // - it's safe on boxes because we allocated one extra box in the pruning pool + // - it's safe on nodes because there's always some data within the node, after the BV + + const PxU32 data = current->mData; + + Vec4V resultMinV, resultMaxV; + if(isLeaf(data)) + { + const PxU32 nbPrims = getNbPrimitives(data); + if(nbPrims) + { + const PxU32* primitives = getPrimitives(indices, data); + resultMinV = V4LoadU(&boxes[*primitives].minimum.x); + resultMaxV = V4LoadU(&boxes[*primitives].maximum.x); + + if(nbPrims>1) + { + const PxU32* last = primitives + nbPrims; + primitives++; + + while(primitives!=last) + { + resultMinV = V4Min(resultMinV, V4LoadU(&boxes[*primitives].minimum.x)); + resultMaxV = V4Max(resultMaxV, V4LoadU(&boxes[*primitives].maximum.x)); + primitives++; + } + } + } + else + { + // Might happen after a node has been invalidated + const float max = 0.25f * 1e33f; // ### + resultMinV = V4Load(max); + resultMaxV = V4Load(-max); + } + } + else + { + const AABBTreeRuntimeNode* pos = getPos(nodeBase, data); + const AABBTreeRuntimeNode* neg = pos+1; + + const PxBounds3& posBox = pos->mBV; + const PxBounds3& negBox = neg->mBV; + + resultMinV = V4Min(V4LoadU(&posBox.minimum.x), V4LoadU(&negBox.minimum.x)); +// resultMaxV = V4Max(V4LoadU(&posBox.maximum.x), V4LoadU(&negBox.maximum.x)); + +#if PX_INTEL_FAMILY + Vec4V posMinV = V4LoadU(&posBox.minimum.z); + Vec4V negMinV = V4LoadU(&negBox.minimum.z); + posMinV = _mm_shuffle_ps(posMinV, posMinV, _MM_SHUFFLE(0, 3, 2, 1)); + negMinV = _mm_shuffle_ps(negMinV, negMinV, _MM_SHUFFLE(0, 3, 2, 1)); + resultMaxV = V4Max(posMinV, negMinV); +#else + // PT: fixes the perf issue but not really convincing + resultMaxV = Vec4V_From_Vec3V(V3Max(V3LoadU(&posBox.maximum.x), V3LoadU(&negBox.maximum.x))); +#endif + } + + // PT: the V4 stores overwrite the data after the BV, but we just put it back afterwards + V4StoreU(resultMinV, ¤t->mBV.minimum.x); + V4StoreU(resultMaxV, ¤t->mBV.maximum.x); + current->mData = data; +} + +void AABBTree::fullRefit(const PxBounds3* boxes) +{ + PX_ASSERT(boxes); + + const PxU32* indices = mIndices; + AABBTreeRuntimeNode* const nodeBase = mRuntimePool; + PX_ASSERT(nodeBase); + + // Bottom-up update + PxU32 index = mTotalNbNodes; + while(index--) + { + AABBTreeRuntimeNode* current = nodeBase + index; + if(index) + Ps::prefetch(current - 1); + + refitNode(current, boxes, indices, nodeBase); + } +} + +static void _createParentArray(PxU32 totalNbNodes, PxU32* parentIndices, const AABBTreeRuntimeNode* parentNode, const AABBTreeRuntimeNode* currentNode, const AABBTreeRuntimeNode* root) +{ + const PxU32 parentIndex = PxU32(parentNode - root); + const PxU32 currentIndex = PxU32(currentNode - root); + PX_ASSERT(parentIndex<totalNbNodes); + PX_ASSERT(currentIndex<totalNbNodes); + PX_UNUSED(totalNbNodes); + parentIndices[currentIndex] = parentIndex; + + if(!currentNode->isLeaf()) + { + _createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getPos(root), root); + _createParentArray(totalNbNodes, parentIndices, currentNode, currentNode->getNeg(root), root); + } +} + +void AABBTree::markNodeForRefit(TreeNodeIndex nodeIndex) +{ + if(!mRefitBitmask.getBits()) + mRefitBitmask.init(mTotalNbNodes); + + PX_ASSERT(nodeIndex<mTotalNbNodes); + + // PT: lazy-create parent array. Memory is not wasted for purely static trees, or dynamic trees that only do "full refit". + if(!mParentIndices) + { + mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices")); + _createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool); + } + + PxU32 currentIndex = nodeIndex; + while(1) + { + PX_ASSERT(currentIndex<mTotalNbNodes); + if(mRefitBitmask.isSet(currentIndex)) + { + // We can early exit if we already visited the node! + return; + } + else + { + mRefitBitmask.setBit(currentIndex); + const PxU32 currentMarkedWord = currentIndex>>5; + mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord); + + const PxU32 parentIndex = mParentIndices[currentIndex]; + PX_ASSERT(parentIndex == 0 || parentIndex < currentIndex); + if(currentIndex == parentIndex) + break; + currentIndex = parentIndex; + } + } +} + +#define FIRST_VERSION +#ifdef FIRST_VERSION +void AABBTree::refitMarkedNodes(const PxBounds3* boxes) +{ + if(!mRefitBitmask.getBits()) + return; // No refit needed + + { + /*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits()); + PxU32 size = mRefitHighestSetWord+1; +#ifdef _DEBUG + if(1) + { + const PxU32 totalSize = mRefitBitmask.getSize(); + for(PxU32 i=size;i<totalSize;i++) + { + PX_ASSERT(!bits[i]); + } + } + PxU32 nbRefit=0; +#endif + const PxU32* indices = mIndices; + AABBTreeRuntimeNode* const nodeBase = mRuntimePool; + + while(size--) + { + // Test 32 bits at a time + const PxU32 currentBits = bits[size]; + if(!currentBits) + continue; + + PxU32 index = (size+1)<<5; + PxU32 mask = PxU32(1<<((index-1)&31)); + PxU32 _Count=32; + while(_Count--) + { + index--; + Ps::prefetch(nodeBase + index); + + PX_ASSERT(size==index>>5); + PX_ASSERT(mask==PxU32(1<<(index&31))); + if(currentBits & mask) + { + refitNode(nodeBase + index, boxes, indices, nodeBase); +#ifdef _DEBUG + nbRefit++; +#endif + } + mask>>=1; + } + bits[size] = 0; + } + + mRefitHighestSetWord = 0; +// mRefitBitmask.clearAll(); + } +} +#endif + + +//#define SECOND_VERSION +#ifdef SECOND_VERSION +void AABBTree::refitMarkedNodes(const PxBounds3* boxes) +{ + /*const*/ PxU32* bits = const_cast<PxU32*>(mRefitBitmask.getBits()); + if(!bits) + return; // No refit needed + + const PxU32 lastSetBit = mRefitBitmask.findLast(); + + const PxU32* indices = mIndices; + AABBTreeRuntimeNode* const nodeBase = mRuntimePool; + + for(PxU32 w = 0; w <= lastSetBit >> 5; ++w) + { + for(PxU32 b = bits[w]; b; b &= b-1) + { + const PxU32 index = (PxU32)(w<<5|Ps::lowestSetBit(b)); + + + + while(size--) + { + // Test 32 bits at a time + const PxU32 currentBits = bits[size]; + if(!currentBits) + continue; + + PxU32 index = (size+1)<<5; + PxU32 mask = PxU32(1<<((index-1)&31)); + PxU32 _Count=32; + while(_Count--) + { + index--; + Ps::prefetch(nodeBase + index); + + PX_ASSERT(size==index>>5); + PX_ASSERT(mask==PxU32(1<<(index&31))); + if(currentBits & mask) + { + refitNode(nodeBase + index, boxes, indices, nodeBase); +#ifdef _DEBUG + nbRefit++; +#endif + } + mask>>=1; + } + bits[size] = 0; + } + mRefitHighestSetWord = 0; +// mRefitBitmask.clearAll(); + } +} +#endif + +PX_FORCE_INLINE static void setLeafData(PxU32& leafData, const AABBTreeRuntimeNode& node, const PxU32 indicesOffset) +{ + const PxU32 index = indicesOffset + (node.mData >> 5); + const PxU32 nbPrims = node.getNbPrimitives(); + PX_ASSERT(nbPrims <= 16); + leafData = (index << 5) | ((nbPrims & 15) << 1) | 1; +} + +// Copy the tree into nodes. Update node indices, leaf indices. +void AABBTree::addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& treeParams) +{ + PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1); + const PxU32 baseNodeIndex = nodeIndex; + + // copy the src tree into dest tree nodes, update its data + for (PxU32 i = 0; i < treeParams.mNbNodes; i++) + { + PX_ASSERT(nodeIndex < mTotalNbNodes + treeParams.mNbNodes + 1); + mRuntimePool[nodeIndex].mBV = treeParams.mNodes[i].mBV; + if (treeParams.mNodes[i].isLeaf()) + { + setLeafData(mRuntimePool[nodeIndex].mData, treeParams.mNodes[i], mNbIndices); + } + else + { + const PxU32 srcNodeIndex = baseNodeIndex + (treeParams.mNodes[i].getPosIndex()); + mRuntimePool[nodeIndex].mData = srcNodeIndex << 1; + mParentIndices[srcNodeIndex] = nodeIndex; + mParentIndices[srcNodeIndex + 1] = nodeIndex; + } + nodeIndex++; + } +} + +// Merge tree into targetNode, where target node is a leaf +// 1. Allocate new nodes/parent, copy all the nodes/parents +// 2. Create new node at the end, copy the data from target node +// 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices +// Schematic view: +// Target Nodes: ...Tn... +// Input tree: R1->Rc0, Rc1... +// Merged tree: ...Tnc->...->Nc0,R1->Rc0,Rc1... +// where new node: Nc0==Tn and Tnc is not a leaf anymore and points to Nc0 + +void AABBTree::mergeRuntimeLeaf(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex) +{ + PX_ASSERT(mParentIndices); + PX_ASSERT(targetNode.isLeaf()); + + // 1. Allocate new nodes/parent, copy all the nodes/parents + // allocate new runtime pool with max combine number of nodes + // we allocate only 1 additional node each merge + AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1]; + PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices")); + + // copy the whole target nodes, we will add the new node at the end together with the merge tree + PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes)); + PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(mTotalNbNodes)); + + // 2. Create new node at the end, copy the data from target node + PxU32 nodeIndex = mTotalNbNodes; + // copy the targetNode at the end of the new nodes + newRuntimePool[nodeIndex].mBV = targetNode.mBV; + newRuntimePool[nodeIndex].mData = targetNode.mData; + // update the parent information + newParentIndices[nodeIndex] = targetMergeNodeIndex; + + // mark for refit + if (mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex)) + { + mRefitBitmask.setBit(nodeIndex); + const PxU32 currentMarkedWord = nodeIndex >> 5; + mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord); + } + + // swap pointers + PX_DELETE_ARRAY(mRuntimePool); + mRuntimePool = newRuntimePool; + PX_FREE(mParentIndices); + mParentIndices = newParentIndices; + + // 3. Copy the merge tree after the new node, create the parent map for them, update the leaf indices + nodeIndex++; + addRuntimeChilds(nodeIndex, treeParams); + PX_ASSERT(nodeIndex == mTotalNbNodes + 1 + treeParams.mNbNodes); + + // update the parent information for the input tree root node + mParentIndices[mTotalNbNodes + 1] = targetMergeNodeIndex; + + // fix the child information for the target node, was a leaf before + mRuntimePool[targetMergeNodeIndex].mData = mTotalNbNodes << 1; + + // update the total number of nodes + mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes; +} + +// Merge tree into targetNode, where target node is not a leaf +// 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex +// 2. Create new node , copy the data from target node +// 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes +// 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices +// 5. Go through the nodes copied at the end and fix the parents/childs +// Schematic view: +// Target Nodes: ...Tn->...->Tc0,Tc1... +// Input tree: R1->Rc0, Rc1... +// Merged tree: ...Tn->...->Nc0,R1->Rc0,Rc1...,Tc0,Tc1... +// where new node: Nc0->...->Tc0,Tc1 +void AABBTree::mergeRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 targetMergeNodeIndex) +{ + PX_ASSERT(mParentIndices); + PX_ASSERT(!targetNode.isLeaf()); + + // Get the target node child pos, this is where we insert the new node and the input tree + const PxU32 targetNodePosIndex = targetNode.getPosIndex(); + + // 1. Allocate new nodes/parent, copy the nodes/parents till targetNodePosIndex + // allocate new runtime pool with max combine number of nodes + // we allocate only 1 additional node each merge + AABBTreeRuntimeNode* newRuntimePool = PX_NEW(AABBTreeRuntimeNode)[mTotalNbNodes + treeParams.mNbNodes + 1]; + PxU32* newParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mTotalNbNodes + treeParams.mNbNodes + 1), "AABB parent indices")); + // copy the untouched part of the nodes and parents + PxMemCopy(newRuntimePool, mRuntimePool, sizeof(AABBTreeRuntimeNode)*(targetNodePosIndex)); + PxMemCopy(newParentIndices, mParentIndices, sizeof(PxU32)*(targetNodePosIndex)); + + PxU32 nodeIndex = targetNodePosIndex; + // 2. Create new node , copy the data from target node + newRuntimePool[nodeIndex].mBV = targetNode.mBV; + newRuntimePool[nodeIndex].mData = ((targetNode.mData >> 1) + 1 + treeParams.mNbNodes) << 1; + // update parent information + newParentIndices[nodeIndex] = targetMergeNodeIndex; + + // handle mark for refit + if(mRefitBitmask.getBits() && mRefitBitmask.isSet(targetMergeNodeIndex)) + { + mRefitBitmask.setBit(nodeIndex); + const PxU32 currentMarkedWord = nodeIndex >> 5; + mRefitHighestSetWord = PxMax(mRefitHighestSetWord, currentMarkedWord); + } + + // 3. Copy the rest of the target tree nodes/parents at the end -> targetNodePosIndex + 1 + treeParams.mNbNodes + if(mTotalNbNodes - targetNodePosIndex) + { + PX_ASSERT(mTotalNbNodes - targetNodePosIndex > 0); + PxMemCopy(newRuntimePool + targetNodePosIndex + 1 + treeParams.mNbNodes, mRuntimePool + targetNodePosIndex, sizeof(AABBTreeRuntimeNode)*(mTotalNbNodes - targetNodePosIndex)); + PxMemCopy(newParentIndices + targetNodePosIndex + 1 + treeParams.mNbNodes, mParentIndices + targetNodePosIndex, sizeof(PxU32)*(mTotalNbNodes - targetNodePosIndex)); + } + // swap the pointers, release the old memory + PX_DELETE_ARRAY(mRuntimePool); + mRuntimePool = newRuntimePool; + PX_FREE(mParentIndices); + mParentIndices = newParentIndices; + + // 4. Copy the merge tree after the new node, create the parent map for them, update the leaf indices + nodeIndex++; + addRuntimeChilds(nodeIndex, treeParams); + PX_ASSERT(nodeIndex == targetNodePosIndex + 1 + treeParams.mNbNodes); + // update the total number of nodes + mTotalNbNodes = mTotalNbNodes + 1 + treeParams.mNbNodes; + + // update the parent information for the input tree root node + mParentIndices[targetNodePosIndex + 1] = targetMergeNodeIndex; + + // 5. Go through the nodes copied at the end and fix the parents/childs + for (PxU32 i = targetNodePosIndex + 1 + treeParams.mNbNodes; i < mTotalNbNodes; i++) + { + // check if the parent is the targetNode, if yes update the parent to new node + if(mParentIndices[i] == targetMergeNodeIndex) + { + mParentIndices[i] = targetNodePosIndex; + } + else + { + // if parent node has been moved, update the parent node + if(mParentIndices[i] >= targetNodePosIndex) + { + mParentIndices[i] = mParentIndices[i] + 1 + treeParams.mNbNodes; + } + else + { + // if parent has not been moved, update its child information + const PxU32 parentIndex = mParentIndices[i]; + // update the child information to point to Pos child + if(i % 2 != 0) + { + const PxU32 srcNodeIndex = mRuntimePool[parentIndex].getPosIndex(); + // if child index points to a node that has been moved, update the child index + PX_ASSERT(!mRuntimePool[parentIndex].isLeaf()); + PX_ASSERT(srcNodeIndex > targetNodePosIndex); + mRuntimePool[parentIndex].mData = (1 + treeParams.mNbNodes + srcNodeIndex) << 1; + } + } + } + if(!mRuntimePool[i].isLeaf()) + { + // update the child node index + const PxU32 srcNodeIndex = 1 + treeParams.mNbNodes + mRuntimePool[i].getPosIndex(); + mRuntimePool[i].mData = srcNodeIndex << 1; + } + } +} + +// traverse the target node, the tree is inside the targetNode, and find the best place where merge the tree +void AABBTree::traverseRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& treeParams, PxU32 nodeIndex) +{ + const AABBTreeRuntimeNode& srcNode = treeParams.getRootNode(); + PX_ASSERT(srcNode.mBV.isInside(targetNode.mBV)); + + // Check if the srcNode(tree) can fit inside any of the target childs. If yes, traverse the target tree child + AABBTreeRuntimeNode& targetPosChild = *targetNode.getPos(mRuntimePool); + if(srcNode.mBV.isInside(targetPosChild.mBV)) + { + return traverseRuntimeNode(targetPosChild, treeParams, targetNode.getPosIndex()); + } + + AABBTreeRuntimeNode& targetNegChild = *targetNode.getNeg(mRuntimePool); + if (srcNode.mBV.isInside(targetNegChild.mBV)) + { + return traverseRuntimeNode(targetNegChild, treeParams, targetNode.getNegIndex()); + } + + // we cannot traverse target anymore, lets add the srcTree to current target node + if(targetNode.isLeaf()) + mergeRuntimeLeaf(targetNode, treeParams, nodeIndex); + else + mergeRuntimeNode(targetNode, treeParams, nodeIndex); +} + +// Merge the input tree into current tree. +// Traverse the tree and find the smallest node, where the whole new tree fits. When we find the node +// we create one new node pointing to the original children and the to the input tree root. +void AABBTree::mergeTree(const AABBTreeMergeData& treeParams) +{ + // allocate new indices buffer + PxU32* newIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*(mNbIndices + treeParams.mNbIndices), "AABB tree indices")); + PxMemCopy(newIndices, mIndices, sizeof(PxU32)*mNbIndices); + PX_FREE(mIndices); + mIndices = newIndices; + mTotalPrims += treeParams.mNbIndices; + + // copy the new indices, re-index using the provided indicesOffset. Note that indicesOffset + // must be provided, as original mNbIndices can be different than indicesOffset dues to object releases. + for (PxU32 i = 0; i < treeParams.mNbIndices; i++) + { + mIndices[mNbIndices + i] = treeParams.mIndicesOffset + treeParams.mIndices[i]; + } + + // check the mRefitBitmask if we fit all the new nodes + mRefitBitmask.resize(mTotalNbNodes + treeParams.mNbNodes + 1); + + // create the parent information so we can update it + if(!mParentIndices) + { + mParentIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mTotalNbNodes, "AABB parent indices")); + _createParentArray(mTotalNbNodes, mParentIndices, mRuntimePool, mRuntimePool, mRuntimePool); + } + + // if new tree is inside the root AABB we will traverse the tree to find better node where to attach the tree subnodes + // if the root is a leaf we merge with the root. + if(treeParams.getRootNode().mBV.isInside(mRuntimePool[0].mBV) && !mRuntimePool[0].isLeaf()) + { + traverseRuntimeNode(mRuntimePool[0], treeParams, 0); + } + else + { + if(mRuntimePool[0].isLeaf()) + { + mergeRuntimeLeaf(mRuntimePool[0], treeParams, 0); + } + else + { + mergeRuntimeNode(mRuntimePool[0], treeParams, 0); + } + + // increase the tree root AABB + mRuntimePool[0].mBV.include(treeParams.getRootNode().mBV); + } + +#ifdef _DEBUG + //verify parent indices + for (PxU32 i = 0; i < mTotalNbNodes; i++) + { + if (i) + { + PX_ASSERT(mRuntimePool[mParentIndices[i]].getPosIndex() == i || mRuntimePool[mParentIndices[i]].getNegIndex() == i); + } + if (!mRuntimePool[i].isLeaf()) + { + PX_ASSERT(mParentIndices[mRuntimePool[i].getPosIndex()] == i); + PX_ASSERT(mParentIndices[mRuntimePool[i].getNegIndex()] == i); + } + } + + // verify the tree nodes, leafs + for (PxU32 i = 0; i < mTotalNbNodes; i++) + { + if (mRuntimePool[i].isLeaf()) + { + const PxU32 index = mRuntimePool[i].mData >> 5; + const PxU32 nbPrim = mRuntimePool[i].getNbPrimitives(); + PX_ASSERT(index + nbPrim <= mNbIndices + treeParams.mNbIndices); + } + else + { + const PxU32 nodeIndex = (mRuntimePool[i].getPosIndex()); + PX_ASSERT(nodeIndex < mTotalNbNodes); + } + } +#endif // _DEBUG + + mNbIndices += treeParams.mNbIndices; +} + + + diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h new file mode 100644 index 00000000..0962747b --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTree.h @@ -0,0 +1,364 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_AABBTREE_H +#define SQ_AABBTREE_H + +#include "foundation/PxMemory.h" +#include "foundation/PxBounds3.h" +#include "PsUserAllocated.h" +#include "PsVecMath.h" +#include "SqTypedef.h" +#include "PsArray.h" + +namespace physx +{ + +using namespace shdfnd::aos; + +namespace Sq +{ + class AABBTreeUpdateMap; + + typedef Ps::Pair<PxU32, PxU32> TreeMergePair; + typedef Ps::Array<TreeMergePair > TreeMergeMap; + + class BitArray + { + public: + BitArray() : mBits(NULL), mSize(0) {} + BitArray(PxU32 nb_bits) { init(nb_bits); } + ~BitArray() { PX_FREE_AND_RESET(mBits); mBits = NULL; } + + bool init(PxU32 nb_bits); + + // Data management + PX_FORCE_INLINE void setBit(PxU32 bit_number) + { + mBits[bit_number>>5] |= 1<<(bit_number&31); + } + PX_FORCE_INLINE void clearBit(PxU32 bit_number) + { + mBits[bit_number>>5] &= ~(1<<(bit_number&31)); + } + PX_FORCE_INLINE void toggleBit(PxU32 bit_number) + { + mBits[bit_number>>5] ^= 1<<(bit_number&31); + } + + PX_FORCE_INLINE void clearAll() { PxMemZero(mBits, mSize*4); } + PX_FORCE_INLINE void setAll() { PxMemSet(mBits, 0xff, mSize*4); } + + void resize(PxU32 maxBitNumber); + + // Data access + PX_FORCE_INLINE Ps::IntBool isSet(PxU32 bit_number) const + { + return Ps::IntBool(mBits[bit_number>>5] & (1<<(bit_number&31))); + } + + PX_FORCE_INLINE const PxU32* getBits() const { return mBits; } + PX_FORCE_INLINE PxU32 getSize() const { return mSize; } + + protected: + PxU32* mBits; //!< Array of bits + PxU32 mSize; //!< Size of the array in dwords + }; + + //! Contains AABB-tree build statistics + struct BuildStats + { + BuildStats() : mCount(0), mTotalPrims(0) {} + + PxU32 mCount; //!< Number of nodes created + PxU32 mTotalPrims; //!< Total accumulated number of primitives. Should be much higher than the source + //!< number of prims, since it accumulates all prims covered by each node (i.e. internal + //!< nodes too, not just leaf ones) + + PX_FORCE_INLINE void reset() { mCount = mTotalPrims = 0; } + + PX_FORCE_INLINE void setCount(PxU32 nb) { mCount=nb; } + PX_FORCE_INLINE void increaseCount(PxU32 nb) { mCount+=nb; } + PX_FORCE_INLINE PxU32 getCount() const { return mCount; } + }; + + //! Contains AABB-tree build parameters + class AABBTreeBuildParams : public Ps::UserAllocated + { + public: + AABBTreeBuildParams(PxU32 limit=1, PxU32 nb_prims=0, const PxBounds3* boxes=NULL) : + mLimit(limit), mNbPrimitives(nb_prims), mAABBArray(boxes), mCache(NULL) {} + ~AABBTreeBuildParams() + { + reset(); + } + + PX_FORCE_INLINE void reset() + { + mLimit = mNbPrimitives = 0; + mAABBArray = NULL; + PX_FREE_AND_RESET(mCache); + } + + PxU32 mLimit; //!< Limit number of primitives / node. If limit is 1, build a complete tree (2*N-1 nodes) + PxU32 mNbPrimitives; //!< Number of (source) primitives. + const PxBounds3* mAABBArray; //!< Shortcut to an app-controlled array of AABBs. + PxVec3* mCache; //!< Cache for AABB centers - managed by build code. + }; + + class NodeAllocator; + + //! AABB tree node used for building + class AABBTreeBuildNode : public Ps::UserAllocated + { + public: + PX_FORCE_INLINE AABBTreeBuildNode() {} + PX_FORCE_INLINE ~AABBTreeBuildNode() {} + + PX_FORCE_INLINE const PxBounds3& getAABB() const { return mBV; } + PX_FORCE_INLINE const AABBTreeBuildNode* getPos() const { return mPos; } + PX_FORCE_INLINE const AABBTreeBuildNode* getNeg() const { const AABBTreeBuildNode* P = mPos; return P ? P+1 : NULL; } + + PX_FORCE_INLINE bool isLeaf() const { return !getPos(); } + + PxBounds3 mBV; //!< Global bounding-volume enclosing all the node-related primitives + const AABBTreeBuildNode* mPos; //!< "Positive" & "Negative" children + + PxU32 mNodeIndex; //!< Index of node-related primitives (in the tree's mIndices array) + PxU32 mNbPrimitives; //!< Number of primitives for this node + + // Data access + PX_FORCE_INLINE PxU32 getNbPrimitives() const { return mNbPrimitives; } + + PX_FORCE_INLINE PxU32 getNbRuntimePrimitives() const { return mNbPrimitives; } + PX_FORCE_INLINE void setNbRunTimePrimitives(PxU32 val) { mNbPrimitives = val; } + PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base) const { return base+mNodeIndex; } + PX_FORCE_INLINE PxU32* getPrimitives(PxU32* base) { return base+mNodeIndex; } + + // Internal methods + void subdivide(const AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices); + void _buildHierarchy(AABBTreeBuildParams& params, BuildStats& stats, NodeAllocator& allocator, PxU32* const indices); + }; + + //! AABB tree node used for runtime (smaller than for build) + class AABBTreeRuntimeNode : public Ps::UserAllocated + { + public: + PX_FORCE_INLINE AABBTreeRuntimeNode() {} + PX_FORCE_INLINE ~AABBTreeRuntimeNode() {} + + PX_FORCE_INLINE PxU32 isLeaf() const { return mData&1; } + + PX_FORCE_INLINE const PxU32* getPrimitives(const PxU32* base) const { return base + (mData>>5); } + PX_FORCE_INLINE PxU32* getPrimitives(PxU32* base) { return base + (mData>>5); } + PX_FORCE_INLINE PxU32 getNbPrimitives() const { return (mData>>1)&15; } + + PX_FORCE_INLINE PxU32 getPosIndex() const { return mData>>1; } + PX_FORCE_INLINE PxU32 getNegIndex() const { return (mData>>1) + 1; } + PX_FORCE_INLINE const AABBTreeRuntimeNode* getPos(const AABBTreeRuntimeNode* base) const { return base + (mData>>1); } + PX_FORCE_INLINE const AABBTreeRuntimeNode* getNeg(const AABBTreeRuntimeNode* base) const { const AABBTreeRuntimeNode* P = getPos(base); return P ? P+1 : NULL;} + + PX_FORCE_INLINE AABBTreeRuntimeNode* getPos(AABBTreeRuntimeNode* base) { return base + (mData >> 1); } + PX_FORCE_INLINE AABBTreeRuntimeNode* getNeg(AABBTreeRuntimeNode* base) { AABBTreeRuntimeNode* P = getPos(base); return P ? P + 1 : NULL; } + + PX_FORCE_INLINE PxU32 getNbRuntimePrimitives() const { return (mData>>1)&15; } + PX_FORCE_INLINE void setNbRunTimePrimitives(PxU32 val) + { + PX_ASSERT(val<16); + PxU32 data = mData & ~(15<<1); + data |= val<<1; + mData = data; + } + + PX_FORCE_INLINE void getAABBCenterExtentsV(Vec3V* center, Vec3V* extents) const + { + const Vec4V minV = V4LoadU(&mBV.minimum.x); + const Vec4V maxV = V4LoadU(&mBV.maximum.x); + + const float half = 0.5f; + const FloatV halfV = FLoad(half); + + *extents = Vec3V_From_Vec4V(V4Scale(V4Sub(maxV, minV), halfV)); + *center = Vec3V_From_Vec4V(V4Scale(V4Add(maxV, minV), halfV)); + } + + PX_FORCE_INLINE void getAABBCenterExtentsV2(Vec3V* center, Vec3V* extents) const + { + const Vec4V minV = V4LoadU(&mBV.minimum.x); + const Vec4V maxV = V4LoadU(&mBV.maximum.x); + + *extents = Vec3V_From_Vec4V(V4Sub(maxV, minV)); + *center = Vec3V_From_Vec4V(V4Add(maxV, minV)); + } + + PX_FORCE_INLINE void getAABBMinMaxV(Vec4V* minV, Vec4V* maxV) const + { + *minV = V4LoadU(&mBV.minimum.x); + *maxV = V4LoadU(&mBV.maximum.x); + } + + PxBounds3 mBV; // Global bounding-volume enclosing all the node-related primitives + PxU32 mData; // 27 bits node or prim index|4 bits #prims|1 bit leaf + }; + + //! Contains AABB-tree merge parameters + class AABBTreeMergeData + { + public: + AABBTreeMergeData(PxU32 nbNodes, const AABBTreeRuntimeNode* nodes, PxU32 nbIndices, const PxU32* indices, PxU32 indicesOffset) : + mNbNodes(nbNodes), mNodes(nodes), mNbIndices(nbIndices), mIndices(indices), mIndicesOffset(indicesOffset) + { + } + + ~AABBTreeMergeData() {} + + PX_FORCE_INLINE const AABBTreeRuntimeNode& getRootNode() const { return mNodes[0]; } + + public: + PxU32 mNbNodes; //!< Number of nodes of AABB tree merge + const AABBTreeRuntimeNode* mNodes; //!< Nodes of AABB tree merge + + PxU32 mNbIndices; //!< Number of indices of AABB tree merge + const PxU32* mIndices; //!< Indices of AABB tree merge + + PxU32 mIndicesOffset; //!< Indices offset from pruning pool + }; + + // Progressive building + class FIFOStack; + //~Progressive building + + //! For complete trees we can predict the final number of nodes and preallocate them. For incomplete trees we can't. + //! But we don't want to allocate nodes one by one (which would be quite slow), so we use this helper class to + //! allocate N nodes at once, while minimizing the amount of nodes allocated for nothing. An initial amount of + //! nodes is estimated using the max number for a complete tree, and the user-defined number of primitives per leaf. + //! In ideal cases this estimated number will be quite close to the final number of nodes. When that number is not + //! enough though, slabs of N=1024 extra nodes are allocated until the build is complete. + class NodeAllocator : public Ps::UserAllocated + { + public: + NodeAllocator(); + ~NodeAllocator(); + + void release(); + void init(PxU32 nbPrimitives, PxU32 limit); + void flatten(AABBTreeRuntimeNode* dest); + AABBTreeBuildNode* getBiNode(); + + AABBTreeBuildNode* mPool; + + struct Slab + { + PX_FORCE_INLINE Slab() {} + PX_FORCE_INLINE Slab(AABBTreeBuildNode* pool, PxU32 nbUsedNodes, PxU32 maxNbNodes) : mPool(pool), mNbUsedNodes(nbUsedNodes), mMaxNbNodes(maxNbNodes) {} + AABBTreeBuildNode* mPool; + PxU32 mNbUsedNodes; + PxU32 mMaxNbNodes; + }; + Ps::Array<Slab> mSlabs; + PxU32 mCurrentSlabIndex; + PxU32 mTotalNbNodes; + }; + + //! AABB-tree, N primitives/leaf + class AABBTree : public Ps::UserAllocated + { + public: + AABBTree(); + ~AABBTree(); + // Build + bool build(AABBTreeBuildParams& params); + // Progressive building + PxU32 progressiveBuild(AABBTreeBuildParams& params, BuildStats& stats, PxU32 progress, PxU32 limit); + //~Progressive building + void release(bool clearRefitMap=true); + + // Merge tree with another one + void mergeTree(const AABBTreeMergeData& tree); + // Initialize tree from given merge data + void initTree(const AABBTreeMergeData& tree); + + // Data access + PX_FORCE_INLINE const PxU32* getIndices() const { return mIndices; } + PX_FORCE_INLINE PxU32* getIndices() { return mIndices; } + PX_FORCE_INLINE void setIndices(PxU32* indices) { mIndices = indices; } + PX_FORCE_INLINE PxU32 getNbNodes() const { return mTotalNbNodes; } + PX_FORCE_INLINE const AABBTreeRuntimeNode* getNodes() const { return mRuntimePool; } + PX_FORCE_INLINE AABBTreeRuntimeNode* getNodes() { return mRuntimePool; } + PX_FORCE_INLINE void setNodes(AABBTreeRuntimeNode* nodes) { mRuntimePool = nodes; } + PX_FORCE_INLINE PxU32 getTotalPrims() const { return mTotalPrims; } + +#if PX_DEBUG + void validate() const; +#endif + void shiftOrigin(const PxVec3& shift); + + // Shift indices of the tree by offset. Used for merged trees, when initial indices needs to be shifted to match indices in current pruning pool + void shiftIndices(PxU32 offset); + + private: + PxU32* mIndices; //!< Indices in the app list. Indices are reorganized during build (permutation). + PxU32 mNbIndices; //!< Nb indices + AABBTreeRuntimeNode* mRuntimePool; //!< Linear pool of nodes. + NodeAllocator mNodeAllocator; + PxU32* mParentIndices; //!< PT: hot/cold split, keep parent data in separate array + // Stats + PxU32 mTotalNbNodes; //!< Number of nodes in the tree. + PxU32 mTotalPrims; //!< Copy of final BuildStats::mTotalPrims + + // Progressive building + FIFOStack* mStack; + //~Progressive building + bool buildInit(AABBTreeBuildParams& params, BuildStats& stats); + void buildEnd(AABBTreeBuildParams& params, BuildStats& stats); + + // tree merge + void mergeRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 targetNodeIndex); + void mergeRuntimeLeaf(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 targetNodeIndex); + void addRuntimeChilds(PxU32& nodeIndex, const AABBTreeMergeData& tree); + void traverseRuntimeNode(AABBTreeRuntimeNode& targetNode, const AABBTreeMergeData& tree, PxU32 nodeIndex); + // REFIT + public: + void fullRefit(const PxBounds3* boxes); + + // adds node[index] to a list of nodes to refit when refitMarkedNodes is called + // Note that this includes updating the hierarchy up the chain + void markNodeForRefit(TreeNodeIndex nodeIndex); + void refitMarkedNodes(const PxBounds3* boxes); + private: + BitArray mRefitBitmask; //!< bit is set for each node index in markForRefit + PxU32 mRefitHighestSetWord; + //~REFIT + }; + +} // namespace Sq + +} + +#endif // SQ_AABBTREE_H diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h new file mode 100644 index 00000000..299d8993 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeQuery.h @@ -0,0 +1,234 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_AABBTREEQUERY_H +#define SQ_AABBTREEQUERY_H + +#include "SqAABBTree.h" +#include "SqPrunerTestsSIMD.h" + +namespace physx +{ + namespace Sq + { + #define RAW_TRAVERSAL_STACK_SIZE 256 + + ////////////////////////////////////////////////////////////////////////// + + static PX_FORCE_INLINE void getBoundsTimesTwo(Vec4V& center, Vec4V& extents, const PxBounds3* boxes, PoolIndex poolIndex) + { + const PxBounds3* objectBounds = boxes + poolIndex; + + const Vec4V minV = V4LoadU(&objectBounds->minimum.x); + const Vec4V maxV = V4LoadU(&objectBounds->maximum.x); + + center = V4Add(maxV, minV); + extents = V4Sub(maxV, minV); + } + + ////////////////////////////////////////////////////////////////////////// + + template<typename Test> + class AABBTreeOverlap + { + public: + bool operator()(const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree, const Test& test, PrunerCallback& visitor) + { + using namespace Cm; + + const AABBTreeRuntimeNode* stack[RAW_TRAVERSAL_STACK_SIZE]; + const AABBTreeRuntimeNode* const nodeBase = tree.getNodes(); + stack[0] = nodeBase; + PxU32 stackIndex = 1; + + while (stackIndex > 0) + { + const AABBTreeRuntimeNode* node = stack[--stackIndex]; + Vec3V center, extents; + node->getAABBCenterExtentsV(¢er, &extents); + while (test(center, extents)) + { + if (node->isLeaf()) + { + PxU32 nbPrims = node->getNbPrimitives(); + const bool doBoxTest = nbPrims > 1; + const PxU32* prims = node->getPrimitives(tree.getIndices()); + while (nbPrims--) + { + const PxU32* prunableIndex = prims; + prims++; + + const PoolIndex poolIndex = *prunableIndex; + if (doBoxTest) + { + Vec4V center2, extents2; + getBoundsTimesTwo(center2, extents2, boxes, poolIndex); + + const float half = 0.5f; + const FloatV halfV = FLoad(half); + + const Vec4V extents_ = V4Scale(extents2, halfV); + const Vec4V center_ = V4Scale(center2, halfV); + + if (!test(Vec3V_From_Vec4V(center_), Vec3V_From_Vec4V(extents_))) + continue; + } + + PxReal unusedDistance; + if (!visitor.invoke(unusedDistance, objects[poolIndex])) + return false; + } + break; + } + + const AABBTreeRuntimeNode* children = node->getPos(nodeBase); + + node = children; + stack[stackIndex++] = children + 1; + PX_ASSERT(stackIndex < RAW_TRAVERSAL_STACK_SIZE); + node->getAABBCenterExtentsV(¢er, &extents); + } + } + return true; + } + }; + + ////////////////////////////////////////////////////////////////////////// + + template <bool tInflate> // use inflate=true for sweeps, inflate=false for raycasts + static PX_FORCE_INLINE bool doLeafTest(const AABBTreeRuntimeNode* node, Gu::RayAABBTest& test, PxReal& md, PxReal oldMaxDist, + const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree, + PxReal& maxDist, PrunerCallback& pcb) + { + PxU32 nbPrims = node->getNbPrimitives(); + const bool doBoxTest = nbPrims > 1; + const PxU32* prims = node->getPrimitives(tree.getIndices()); + while (nbPrims--) + { + const PxU32* prunableIndex = prims; + prims++; + + const PoolIndex poolIndex = *prunableIndex; + if (doBoxTest) + { + Vec4V center_, extents_; + getBoundsTimesTwo(center_, extents_, boxes, poolIndex); + + if (!test.check<tInflate>(Vec3V_From_Vec4V(center_), Vec3V_From_Vec4V(extents_))) + continue; + } + + if (!pcb.invoke(md, objects[poolIndex])) + return false; + + if (md < oldMaxDist) + { + maxDist = md; + test.setDistance(md); + } + } + return true; + } + + ////////////////////////////////////////////////////////////////////////// + + template <bool tInflate> // use inflate=true for sweeps, inflate=false for raycasts + class AABBTreeRaycast + { + public: + bool operator()( + const PrunerPayload* objects, const PxBounds3* boxes, const AABBTree& tree, + const PxVec3& origin, const PxVec3& unitDir, PxReal& maxDist, const PxVec3& inflation, + PrunerCallback& pcb) + { + using namespace Cm; + + // PT: we will pass center*2 and extents*2 to the ray-box code, to save some work per-box + // So we initialize the test with values multiplied by 2 as well, to get correct results + Gu::RayAABBTest test(origin*2.0f, unitDir*2.0f, maxDist, inflation*2.0f); + + const AABBTreeRuntimeNode* stack[RAW_TRAVERSAL_STACK_SIZE]; // stack always contains PPU addresses + const AABBTreeRuntimeNode* const nodeBase = tree.getNodes(); + stack[0] = nodeBase; + PxU32 stackIndex = 1; + + PxReal oldMaxDist; + while (stackIndex--) + { + const AABBTreeRuntimeNode* node = stack[stackIndex]; + Vec3V center, extents; + node->getAABBCenterExtentsV2(¢er, &extents); + if (test.check<tInflate>(center, extents)) // TODO: try timestamp ray shortening to skip this + { + PxReal md = maxDist; // has to be before the goto below to avoid compile error + while (!node->isLeaf()) + { + const AABBTreeRuntimeNode* children = node->getPos(nodeBase); + + Vec3V c0, e0; + children[0].getAABBCenterExtentsV2(&c0, &e0); + const PxU32 b0 = test.check<tInflate>(c0, e0); + + Vec3V c1, e1; + children[1].getAABBCenterExtentsV2(&c1, &e1); + const PxU32 b1 = test.check<tInflate>(c1, e1); + + if (b0 && b1) // if both intersect, push the one with the further center on the stack for later + { + // & 1 because FAllGrtr behavior differs across platforms + const PxU32 bit = FAllGrtr(V3Dot(V3Sub(c1, c0), test.mDir), FZero()) & 1; + stack[stackIndex++] = children + bit; + node = children + (1 - bit); + PX_ASSERT(stackIndex < RAW_TRAVERSAL_STACK_SIZE); + } + else if (b0) + node = children; + else if (b1) + node = children + 1; + else + goto skip_leaf_code; + } + + oldMaxDist = maxDist; // we copy since maxDist can be updated in the callback and md<maxDist test below can fail + + if (!doLeafTest<tInflate>(node, test, md, oldMaxDist, + objects, boxes, tree, + maxDist, + pcb)) + return false; + skip_leaf_code:; + } + } + return true; + } + }; + } +} + +#endif // SQ_AABBTREEQUERY_H diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp new file mode 100644 index 00000000..807de9d1 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.cpp @@ -0,0 +1,197 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "SqAABBTreeUpdateMap.h" +#include "SqAABBTree.h" + +using namespace physx; +using namespace Sq; + +static const PxU32 SHRINK_THRESHOLD = 1024; + +void AABBTreeUpdateMap::initMap(PxU32 nbObjects, const AABBTree& tree) +{ + if(!nbObjects) + { + release(); + return; + } + + // Memory management + { + const PxU32 mapSize = nbObjects; + const PxU32 targetCapacity = mapSize + (mapSize>>2); + + PxU32 currentCapacity = mMapping.capacity(); + if( ( targetCapacity < (currentCapacity>>1) ) && ( (currentCapacity-targetCapacity) > SHRINK_THRESHOLD ) ) + { + // trigger reallocation of a smaller array, there is enough memory to save + currentCapacity = 0; + } + + if(mapSize > currentCapacity) + { + // the mapping values are invalid and reset below in any case + // so there is no need to copy the values at all + mMapping.reset(); + mMapping.reserve(targetCapacity); // since size is 0, reserve will also just allocate + } + + mMapping.forceSize_Unsafe(mapSize); + + for(PxU32 i=0;i<mapSize;i++) + mMapping[i] = INVALID_NODE_ID; + } + + const PxU32 nbNodes = tree.getNbNodes(); + const AABBTreeRuntimeNode* nodes = tree.getNodes(); + const PxU32* indices = tree.getIndices(); + for(TreeNodeIndex i=0;i<nbNodes;i++) + { + if(nodes[i].isLeaf()) + { + const PxU32 nbPrims = nodes[i].getNbRuntimePrimitives(); + // PT: with multiple primitives per node, several mapping entries will point to the same node. + PX_ASSERT(nbPrims<=16); + for(PxU32 j=0;j<nbPrims;j++) + { + const PxU32 index = nodes[i].getPrimitives(indices)[j]; + PX_ASSERT(index<nbObjects); + mMapping[index] = i; + } + } + } +} + +void AABBTreeUpdateMap::invalidate(PoolIndex prunerIndex0, PoolIndex prunerIndex1, AABBTree& tree) +{ + // prunerIndex0 and prunerIndex1 are both indices into the pool, not handles + // prunerIndex0 is the index in the pruning pool for the node that was just removed + // prunerIndex1 is the index in the pruning pool for the node + const TreeNodeIndex nodeIndex0 = prunerIndex0<mMapping.size() ? mMapping[prunerIndex0] : INVALID_NODE_ID; + const TreeNodeIndex nodeIndex1 = prunerIndex1<mMapping.size() ? mMapping[prunerIndex1] : INVALID_NODE_ID; + + //printf("map invalidate pi0:%x ni0:%x\t",prunerIndex0,nodeIndex0); + //printf(" replace with pi1:%x ni1:%x\n",prunerIndex1,nodeIndex1); + + // if nodeIndex0 exists: + // invalidate node 0 + // invalidate map prunerIndex0 + // if nodeIndex1 exists: + // point node 1 to prunerIndex0 + // map prunerIndex0 to node 1 + // invalidate map prunerIndex1 + + // eventually: + // - node 0 is invalid + // - prunerIndex0 is mapped to node 1 or + // is not mapped if prunerIndex1 is not mapped + // is not mapped if prunerIndex0==prunerIndex1 + // - node 1 points to prunerIndex0 or + // is invalid if prunerIndex1 is not mapped + // is invalid if prunerIndex0==prunerIndex1 + // - prunerIndex1 is not mapped + + AABBTreeRuntimeNode* nodes = tree.getNodes(); + + if(nodeIndex0!=INVALID_NODE_ID) + { + PX_ASSERT(nodeIndex0 < tree.getNbNodes()); + PX_ASSERT(nodes[nodeIndex0].isLeaf()); + AABBTreeRuntimeNode* node0 = nodes + nodeIndex0; + const PxU32 nbPrims = node0->getNbRuntimePrimitives(); + PX_ASSERT(nbPrims <= 16); + + // retrieve the primitives pointer + PxU32* primitives = node0->getPrimitives(tree.getIndices()); + PX_ASSERT(primitives); + + // PT: look for desired pool index in the leaf + bool foundIt = false; + for(PxU32 i=0;i<nbPrims;i++) + { + PX_ASSERT(mMapping[primitives[i]] == nodeIndex0); // PT: all primitives should point to the same leaf node + + if(prunerIndex0 == primitives[i]) + { + foundIt = true; + const PxU32 last = nbPrims-1; + node0->setNbRunTimePrimitives(last); + primitives[i] = INVALID_POOL_ID; // Mark primitive index as invalid in the node + mMapping[prunerIndex0] = INVALID_NODE_ID; // invalidate the node index for pool 0 + + // PT: swap within the leaf node. No need to update the mapping since they should all point + // to the same tree node anyway. + if(last!=i) + Ps::swap(primitives[i], primitives[last]); + break; + } + } + PX_ASSERT(foundIt); + PX_UNUSED(foundIt); + } + + if (nodeIndex1!=INVALID_NODE_ID) + { + // PT: with multiple primitives per leaf, tree nodes may very well be the same for different pool indices. + // However the pool indices may be the same when a swap has been skipped in the pruning pool, in which + // case there is nothing to do. + if(prunerIndex0!=prunerIndex1) + { + PX_ASSERT(nodeIndex1 < tree.getNbNodes()); + PX_ASSERT(nodes[nodeIndex1].isLeaf()); + AABBTreeRuntimeNode* node1 = nodes + nodeIndex1; + const PxU32 nbPrims = node1->getNbRuntimePrimitives(); + PX_ASSERT(nbPrims <= 16); + + // retrieve the primitives pointer + PxU32* primitives = node1->getPrimitives(tree.getIndices()); + PX_ASSERT(primitives); + + // PT: look for desired pool index in the leaf + bool foundIt = false; + for(PxU32 i=0;i<nbPrims;i++) + { + PX_ASSERT(mMapping[primitives[i]] == nodeIndex1); // PT: all primitives should point to the same leaf node + + if(prunerIndex1 == primitives[i]) + { + foundIt = true; + primitives[i] = prunerIndex0; // point node 1 to the pool object moved to ID 0 + mMapping[prunerIndex0] = nodeIndex1; // pool 0 is pointed at by node 1 now + mMapping[prunerIndex1] = INVALID_NODE_ID; // pool 1 is no longer stored in the tree + break; + } + } + PX_ASSERT(foundIt); + PX_UNUSED(foundIt); + } + } +} + diff --git a/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h new file mode 100644 index 00000000..58418b03 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqAABBTreeUpdateMap.h @@ -0,0 +1,82 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_PRUNERTREEMAP_H +#define SQ_PRUNERTREEMAP_H + +#include "SqTypedef.h" +#include "PsArray.h" + +namespace physx +{ +namespace Sq +{ + static const PxU32 INVALID_NODE_ID = 0xFFffFFff; + static const PxU32 INVALID_POOL_ID = 0xFFffFFff; + + // Maps pruning pool indices to AABB-tree indices (i.e. locates the object's box in the aabb-tree nodes pool) + // + // The map spans pool indices from 0..N-1, where N is the number of pool entries when the map was created from a tree. + // + // It maps: + // to node indices in the range 0..M-1, where M is the number of nodes in the tree the map was created from, + // or to INVALID_NODE_ID if the pool entry was removed or pool index is outside input domain. + // + // The map is the inverse of the tree mapping: (node[map[poolID]].primitive == poolID) is true at all times. + + class AABBTreeUpdateMap + { + public: + AABBTreeUpdateMap() {} + ~AABBTreeUpdateMap() {} + + void release() + { + mMapping.reset(); + } + + // indices offset used when indices are shifted from objects (used for merged trees) + void initMap(PxU32 numPoolObjects, const Sq::AABBTree& tree); + + void invalidate(PoolIndex poolIndex, PoolIndex replacementPoolIndex, Sq::AABBTree& tree); + + PX_FORCE_INLINE TreeNodeIndex operator[](PxU32 poolIndex) const + { + return poolIndex < mMapping.size() ? mMapping[poolIndex] : INVALID_NODE_ID; + } + private: + // maps from prunerIndex (index in the PruningPool) to treeNode index + // this will only map to leaf tree nodes + Ps::Array<TreeNodeIndex> mMapping; + }; + +} +} + +#endif diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp b/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp new file mode 100644 index 00000000..3bae047d --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqBounds.cpp @@ -0,0 +1,75 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxTransform.h" +#include "SqBounds.h" +#include "CmTransformUtils.h" +#include "SqPruner.h" +#include "ScbShape.h" +#include "ScbActor.h" +#include "ScbRigidStatic.h" +#include "ScbBody.h" +#include "PsAllocator.h" +#include "GuBounds.h" + +using namespace physx; +using namespace Sq; + +void Sq::computeStaticWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor) +{ + const PxTransform& shape2Actor = scbShape.getShape2Actor(); + + PX_ALIGN(16, PxTransform) globalPose; + + Cm::getStaticGlobalPoseAligned(static_cast<const Scb::RigidStatic&>(scbActor).getActor2World(), shape2Actor, globalPose); + Gu::computeBounds(bounds, scbShape.getGeometry(), globalPose, 0.0f, NULL, SQ_PRUNER_INFLATION, false); +} + +void Sq::computeDynamicWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor) +{ + const PxTransform& shape2Actor = scbShape.getShape2Actor(); + + PX_ALIGN(16, PxTransform) globalPose; + { + const Scb::Body& body = static_cast<const Scb::Body&>(scbActor); + PX_ALIGN(16, PxTransform) kinematicTarget; + const PxU16 sqktFlags = PxRigidBodyFlag::eKINEMATIC | PxRigidBodyFlag::eUSE_KINEMATIC_TARGET_FOR_SCENE_QUERIES; + const bool useTarget = (PxU16(body.getFlags()) & sqktFlags) == sqktFlags; + const PxTransform& body2World = (useTarget && body.getKinematicTarget(kinematicTarget)) ? kinematicTarget : body.getBody2World(); + Cm::getDynamicGlobalPoseAligned(body2World, shape2Actor, body.getBody2Actor(), globalPose); + } + + Gu::computeBounds(bounds, scbShape.getGeometry(), globalPose, 0.0f, NULL, SQ_PRUNER_INFLATION, false); +} + +const ComputeBoundsFunc Sq::gComputeBoundsTable[2] = +{ + computeStaticWorldAABB, + computeDynamicWorldAABB +}; diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBounds.h b/PhysX_3.4/Source/SceneQuery/src/SqBounds.h new file mode 100644 index 00000000..60c6ad6f --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqBounds.h @@ -0,0 +1,70 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_BOUNDS_H +#define SQ_BOUNDS_H + +#include "CmPhysXCommon.h" +#include "foundation/PxBounds3.h" +#include "PsVecMath.h" + +namespace physx +{ + namespace Scb + { + class Shape; + class Actor; + } + +namespace Sq +{ + void computeStaticWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor); + void computeDynamicWorldAABB(PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor); + + typedef void(*ComputeBoundsFunc) (PxBounds3& bounds, const Scb::Shape& scbShape, const Scb::Actor& scbActor); + + extern const ComputeBoundsFunc gComputeBoundsTable[2]; + + PX_FORCE_INLINE void inflateBounds(PxBounds3& dst, const PxBounds3& src) + { + using namespace physx::shdfnd::aos; + + const Vec4V minV = V4LoadU(&src.minimum.x); + const Vec4V maxV = V4LoadU(&src.maximum.x); + const Vec4V eV = V4Scale(V4Sub(maxV, minV), FLoad(0.5f* 0.01f)); + + V4StoreU(V4Sub(minV, eV), &dst.minimum.x); + PX_ALIGN(16, PxVec4) max4; + V4StoreA(V4Add(maxV, eV), &max4.x); + dst.maximum = PxVec3(max4.x, max4.y, max4.z); + } +} +} + +#endif // SQ_BOUNDS_H diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp new file mode 100644 index 00000000..35a5ca13 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.cpp @@ -0,0 +1,2601 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxMemory.h" +#include "SqBucketPruner.h" +#include "GuIntersectionBoxBox.h" +#include "GuInternal.h" +#include "PsVecMath.h" +#include "foundation/PxUnionCast.h" +#include "CmRadixSortBuffered.h" +#include "CmRenderOutput.h" +#include "PsFPU.h" +#include "PsBitUtils.h" +#include "PsIntrinsics.h" +#include "GuBounds.h" + +using namespace physx::shdfnd::aos; + +using namespace physx; +using namespace Sq; +using namespace Gu; +using namespace Ps; + +#define INVALID_HANDLE 0xffffffff + +/* +TODO: +- if Core is always available, mSortedObjects could be replaced with just indices to mCoreObjects => less memory. +- UTS: + - test that queries against empty boxes all return false +- invalidate after 16 removes +- check shiftOrigin stuff (esp what happens to emptied boxes) + - isn't there a very hard-to-find bug waiting to happen in there, + when the shift touches the empty box and overrides mdata0/mdata1 with "wrong" values that break the sort? +- revisit updateObject/removeObject +- optimize/cache computation of free global bounds before clipRay + +- remove temp memory buffers (sorted arrays) +- take care of code duplication +- better code to generate SIMD 0x7fffffff +- refactor SIMD tests +- optimize: + - better split values + - optimize update (bitmap, less data copy, etc) + - use ray limits in traversal code too? + - the SIMD XBOX code operates on Min/Max rather than C/E. Change format? + - or just try the alternative ray-box code (as on PC) ==> pretty much exactly the same speed +*/ + +//#define VERIFY_SORT +//#define BRUTE_FORCE_LIMIT 32 +#define LOCAL_SIZE 256 // Size of various local arrays. Dynamic allocations occur if exceeded. +#define USE_SIMD // Use SIMD code or not (sanity performance check) +#define NODE_SORT // Enable/disable node sorting +#define NODE_SORT_MIN_COUNT 16 // Limit above which node sorting is performed +#if PX_INTEL_FAMILY + #if COMPILE_VECTOR_INTRINSICS + #define CAN_USE_MOVEMASK + #endif +#endif + +#define ALIGN16(size) ((unsigned(size)+15) & unsigned(~15)) + +#ifdef _DEBUG + #define AlignedLoad V4LoadU + #define AlignedStore V4StoreU +#else + #define AlignedLoad V4LoadA + #define AlignedStore V4StoreA +#endif + +// SAT-based ray-box overlap test has accuracy issues for long rays, so we clip them against the global AABB to limit these issues. +static void clipRay(const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, const PxVec3& boxMin, const PxVec3& boxMax) +{ + const PxVec3 boxCenter = (boxMax + boxMin)*0.5f; + const PxVec3 boxExtents = (boxMax - boxMin)*0.5f; + const float dpc = boxCenter.dot(rayDir); + const float extentsMagnitude = boxExtents.magnitude(); + const float dpMin = dpc - extentsMagnitude; + const float dpMax = dpc + extentsMagnitude; + const float dpO = rayOrig.dot(rayDir); + const float boxLength = extentsMagnitude * 2.0f; + const float distToBox = PxMin(PxAbs(dpMin - dpO), PxAbs(dpMax - dpO)); + maxDist = distToBox + boxLength * 2.0f; +} + +BucketPrunerNode::BucketPrunerNode() +{ + for(PxU32 i=0;i<5;i++) + mBucketBox[i].setEmpty(); +} + +static const PxU8 gCodes[] = { 4, 4, 4, 4, 4, 3, 2, 2, + 4, 1, 0, 0, 4, 1, 0, 0, + 4, 1, 0, 0, 2, 1, 0, 0, + 3, 1, 0, 0, 2, 1, 0, 0}; + +#ifdef CAN_USE_MOVEMASK +/*static PX_FORCE_INLINE PxU32 classifyBox_x86(const BucketBox& box, const PxVec4& limits, const bool useY, const bool isCrossBucket) +{ + const Vec4V extents = AlignedLoad(&box.mExtents.x); + const Vec4V center = AlignedLoad(&box.mCenter.x); + const Vec4V plus = V4Add(extents, center); + const Vec4V minus = V4Sub(extents, center); + + Vec4V tmp; + if(useY) // PT: this is a constant so branch prediction works here + tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,1,0,1)); + else + tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,2,0,2)); + + const Vec4V comp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0,2,1,3)); // oh well, nm + + const PxU32 Code = (PxU32)_mm_movemask_ps(V4IsGrtr(V4LoadA(&limits.x), comp)); + return gCodes[Code | PxU32(isCrossBucket)<<4]; +}*/ + +static PX_FORCE_INLINE PxU32 classifyBox_x86(const Vec4V boxMin, const Vec4V boxMax, const PxVec4& limits, const bool useY, const bool isCrossBucket) +{ + const Vec4V plus = boxMax; + const Vec4V minus = V4Neg(boxMin); + + Vec4V tmp; + if(useY) // PT: this is a constant so branch prediction works here + tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,1,0,1)); + else + tmp = _mm_shuffle_ps(plus, minus, _MM_SHUFFLE(0,2,0,2)); + + const Vec4V comp = _mm_shuffle_ps(tmp, tmp, _MM_SHUFFLE(0,2,1,3)); // oh well, nm + + const PxU32 Code = PxU32(_mm_movemask_ps(V4IsGrtr(V4LoadA(&limits.x), comp))); + return gCodes[Code | PxU32(isCrossBucket)<<4]; +} +#endif + +#ifdef CAN_USE_MOVEMASK + #if PX_DEBUG + #define USE_CLASSIFY_BOX + #endif +#else + #define USE_CLASSIFY_BOX +#endif + +#ifdef USE_CLASSIFY_BOX +static PX_FORCE_INLINE PxU32 classifyBox(const BucketBox& box, const float limitX, const float limitYZ, const PxU32 yz, const bool isCrossBucket) +{ + const bool upperPart = (box.mCenter[yz] + box.mExtents[yz])<limitYZ; + const bool lowerPart = (box.mCenter[yz] - box.mExtents[yz])>limitYZ; + const bool leftPart = (box.mCenter.x + box.mExtents.x)<limitX; + const bool rightPart = (box.mCenter.x - box.mExtents.x)>limitX; + + // Table-based box classification avoids many branches + const PxU32 Code = PxU32(rightPart)|(PxU32(leftPart)<<1)|(PxU32(lowerPart)<<2)|(PxU32(upperPart)<<3); + return gCodes[Code + (isCrossBucket ? 16 : 0)]; +} +#endif + +void BucketPrunerNode::classifyBoxes( float limitX, float limitYZ, + PxU32 nb, BucketBox* PX_RESTRICT boxes, const PrunerPayload* PX_RESTRICT objects, + BucketBox* PX_RESTRICT sortedBoxes, PrunerPayload* PX_RESTRICT sortedObjects, + bool isCrossBucket, PxU32 sortAxis) +{ + const PxU32 yz = PxU32(sortAxis == 1 ? 2 : 1); + + #ifdef _DEBUG + { + float prev = boxes[0].mDebugMin; + for(PxU32 i=1;i<nb;i++) + { + const float current = boxes[i].mDebugMin; + PX_ASSERT(current>=prev); + prev = current; + } + } + #endif + + // Local (stack-based) min/max bucket bounds + PX_ALIGN(16, PxVec4) bucketBoxMin[5]; + PX_ALIGN(16, PxVec4) bucketBoxMax[5]; + { + const PxBounds3 empty = PxBounds3::empty(); + for(PxU32 i=0;i<5;i++) + { + mCounters[i] = 0; + bucketBoxMin[i] = PxVec4(empty.minimum, 0.0f); + bucketBoxMax[i] = PxVec4(empty.maximum, 0.0f); + } + } + + { +#ifdef CAN_USE_MOVEMASK + // DS: order doesn't play nice with x86 shuffles :-| + PX_ALIGN(16, PxVec4) limits(-limitX, limitX, -limitYZ, limitYZ); + const bool useY = yz==1; +#endif + // Determine in which bucket each object falls, update bucket bounds + for(PxU32 i=0;i<nb;i++) + { + const Vec4V boxCenterV = AlignedLoad(&boxes[i].mCenter.x); + const Vec4V boxExtentsV = AlignedLoad(&boxes[i].mExtents.x); + const Vec4V boxMinV = V4Sub(boxCenterV, boxExtentsV); + const Vec4V boxMaxV = V4Add(boxCenterV, boxExtentsV); + +#ifdef CAN_USE_MOVEMASK +// const PxU32 index = classifyBox_x86(boxes[i], limits, useY, isCrossBucket); + const PxU32 index = classifyBox_x86(boxMinV, boxMaxV, limits, useY, isCrossBucket); + #if PX_DEBUG + const PxU32 index_ = classifyBox(boxes[i], limitX, limitYZ, yz, isCrossBucket); + PX_ASSERT(index == index_); + #endif +#else + const PxU32 index = classifyBox(boxes[i], limitX, limitYZ, yz, isCrossBucket); +#endif + // Merge boxes + { + const Vec4V mergedMinV = V4Min(V4LoadA(&bucketBoxMin[index].x), boxMinV); + const Vec4V mergedMaxV = V4Max(V4LoadA(&bucketBoxMax[index].x), boxMaxV); + V4StoreA(mergedMinV, &bucketBoxMin[index].x); + V4StoreA(mergedMaxV, &bucketBoxMax[index].x); + } + boxes[i].mData0 = index; // Store bucket index for current box in this temporary location + mCounters[index]++; + } + } + + { + // Regenerate offsets + mOffsets[0]=0; + for(PxU32 i=0;i<4;i++) + mOffsets[i+1] = mOffsets[i] + mCounters[i]; + } + + { + // Group boxes with same bucket index together + for(PxU32 i=0;i<nb;i++) + { + const PxU32 bucketOffset = mOffsets[boxes[i].mData0]++; // Bucket index for current box was stored in mData0 by previous loop + // The 2 following lines are the same as: + // sortedBoxes[bucketOffset] = boxes[i]; + AlignedStore(AlignedLoad(&boxes[i].mCenter.x), &sortedBoxes[bucketOffset].mCenter.x); + AlignedStore(AlignedLoad(&boxes[i].mExtents.x), &sortedBoxes[bucketOffset].mExtents.x); + + #ifdef _DEBUG + sortedBoxes[bucketOffset].mDebugMin = boxes[i].mDebugMin; + #endif + sortedObjects[bucketOffset] = objects[i]; + } + } + + { + // Regenerate offsets + mOffsets[0]=0; + for(PxU32 i=0;i<4;i++) + mOffsets[i+1] = mOffsets[i] + mCounters[i]; + } + + { + // Convert local (stack-based) min/max bucket bounds to persistent center/extents format + const float Half = 0.5f; + const FloatV HalfV = FLoad(Half); + PX_ALIGN(16, PxVec4) bucketCenter; + PX_ALIGN(16, PxVec4) bucketExtents; + for(PxU32 i=0;i<5;i++) + { + // The following lines are the same as: + // mBucketBox[i].mCenter = bucketBox[i].getCenter(); + // mBucketBox[i].mExtents = bucketBox[i].getExtents(); + const Vec4V bucketBoxMinV = V4LoadA(&bucketBoxMin[i].x); + const Vec4V bucketBoxMaxV = V4LoadA(&bucketBoxMax[i].x); + const Vec4V bucketBoxCenterV = V4Scale(V4Add(bucketBoxMaxV, bucketBoxMinV), HalfV); + const Vec4V bucketBoxExtentsV = V4Scale(V4Sub(bucketBoxMaxV, bucketBoxMinV), HalfV); + V4StoreA(bucketBoxCenterV, &bucketCenter.x); + V4StoreA(bucketBoxExtentsV, &bucketExtents.x); + mBucketBox[i].mCenter = PxVec3(bucketCenter.x, bucketCenter.y, bucketCenter.z); + mBucketBox[i].mExtents = PxVec3(bucketExtents.x, bucketExtents.y, bucketExtents.z); + } + } + + #ifdef _DEBUG + for(PxU32 j=0;j<5;j++) + { + const PxU32 count = mCounters[j]; + if(count) + { + const BucketBox* base = sortedBoxes + mOffsets[j]; + float prev = base[0].mDebugMin; + for(PxU32 i=1;i<count;i++) + { + const float current = base[i].mDebugMin; + PX_ASSERT(current>=prev); + prev = current; + } + } + } + #endif +} + +/////////////////////////////////////////////////////////////////////////////// + +static void processChildBuckets(PxU32 nbAllocated, + BucketBox* sortedBoxesInBucket, PrunerPayload* sortedObjectsInBucket, + const BucketPrunerNode& bucket, BucketPrunerNode* PX_RESTRICT childBucket, + BucketBox* PX_RESTRICT baseBucketsBoxes, PrunerPayload* PX_RESTRICT baseBucketsObjects, + PxU32 sortAxis) +{ + PX_UNUSED(nbAllocated); + + const PxU32 yz = PxU32(sortAxis == 1 ? 2 : 1); + for(PxU32 i=0;i<5;i++) + { + const PxU32 nbInBucket = bucket.mCounters[i]; + if(!nbInBucket) + { + childBucket[i].initCounters(); + continue; + } + BucketBox* bucketsBoxes = baseBucketsBoxes + bucket.mOffsets[i]; + PrunerPayload* bucketsObjects = baseBucketsObjects + bucket.mOffsets[i]; + PX_ASSERT(nbInBucket<=nbAllocated); + + const float limitX = bucket.mBucketBox[i].mCenter.x; + const float limitYZ = bucket.mBucketBox[i].mCenter[yz]; + const bool isCrossBucket = i==4; + childBucket[i].classifyBoxes(limitX, limitYZ, nbInBucket, bucketsBoxes, bucketsObjects, + sortedBoxesInBucket, sortedObjectsInBucket, + isCrossBucket, sortAxis); + + PxMemCopy(bucketsBoxes, sortedBoxesInBucket, sizeof(BucketBox)*nbInBucket); + PxMemCopy(bucketsObjects, sortedObjectsInBucket, sizeof(PrunerPayload)*nbInBucket); + } +} + +/////////////////////////////////////////////////////////////////////////////// + +static PX_FORCE_INLINE PxU32 encodeFloat(PxU32 newPos) +{ + //we may need to check on -0 and 0 + //But it should make no practical difference. + if(newPos & PX_SIGN_BITMASK) //negative? + return ~newPos;//reverse sequence of negative numbers + else + return newPos | PX_SIGN_BITMASK; // flip sign +} + +static PX_FORCE_INLINE void computeRayLimits(float& rayMin, float& rayMax, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist, PxU32 sortAxis) +{ + const float rayOrigValue = rayOrig[sortAxis]; + const float rayDirValue = rayDir[sortAxis] * maxDist; + rayMin = PxMin(rayOrigValue, rayOrigValue + rayDirValue); + rayMax = PxMax(rayOrigValue, rayOrigValue + rayDirValue); +} + +static PX_FORCE_INLINE void computeRayLimits(float& rayMin, float& rayMax, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist, const PxVec3& inflate, PxU32 sortAxis) +{ + const float inflateValue = inflate[sortAxis]; + const float rayOrigValue = rayOrig[sortAxis]; + const float rayDirValue = rayDir[sortAxis] * maxDist; + rayMin = PxMin(rayOrigValue, rayOrigValue + rayDirValue) - inflateValue; + rayMax = PxMax(rayOrigValue, rayOrigValue + rayDirValue) + inflateValue; +} + +static PX_FORCE_INLINE void encodeBoxMinMax(BucketBox& box, const PxU32 axis) +{ + const float min = box.mCenter[axis] - box.mExtents[axis]; + const float max = box.mCenter[axis] + box.mExtents[axis]; + + const PxU32* binaryMin = reinterpret_cast<const PxU32*>(&min); + const PxU32* binaryMax = reinterpret_cast<const PxU32*>(&max); + box.mData0 = encodeFloat(binaryMin[0]); + box.mData1 = encodeFloat(binaryMax[0]); +} + +/////////////////////////////////////////////////////////////////////////////// + +BucketPrunerCore::BucketPrunerCore(bool externalMemory) : + mCoreNbObjects (0), + mCoreCapacity (0), + mCoreBoxes (NULL), + mCoreObjects (NULL), + mCoreRemap (NULL), + mSortedWorldBoxes (NULL), + mSortedObjects (NULL), + mNbFree (0), + mSortedNb (0), + mSortedCapacity (0), + mSortAxis (0), + mDirty (true), + mOwnMemory (!externalMemory) +{ + mGlobalBox.setEmpty(); + + mLevel1.initCounters(); + + for(PxU32 i=0;i<5;i++) + mLevel2[i].initCounters(); + for(PxU32 j=0;j<5;j++) + for(PxU32 i=0;i<5;i++) + mLevel3[j][i].initCounters(); +} + +BucketPrunerCore::~BucketPrunerCore() +{ + release(); +} + +void BucketPrunerCore::release() +{ + mDirty = true; + mCoreNbObjects = 0; + + mCoreCapacity = 0; + if(mOwnMemory) + { + PX_FREE_AND_RESET(mCoreBoxes); + PX_FREE_AND_RESET(mCoreObjects); + PX_FREE_AND_RESET(mCoreRemap); + } + + PX_FREE_AND_RESET(mSortedWorldBoxes); + PX_FREE_AND_RESET(mSortedObjects); + mSortedNb = 0; + mSortedCapacity = 0; + + mNbFree = 0; +#ifdef USE_REGULAR_HASH_MAP + mMap.clear(); +#else + mMap.purge(); +#endif +} + +void BucketPrunerCore::setExternalMemory(PxU32 nbObjects, PxBounds3* boxes, PrunerPayload* objects) +{ + PX_ASSERT(!mOwnMemory); + mCoreNbObjects = nbObjects; + mCoreBoxes = boxes; + mCoreObjects = objects; + mCoreRemap = NULL; +} + +void BucketPrunerCore::allocateSortedMemory(PxU32 nb) +{ + mSortedNb = nb; + if(nb<=mSortedCapacity && (nb>=mSortedCapacity/2)) + return; + + const PxU32 capacity = Ps::nextPowerOfTwo(nb); + mSortedCapacity = capacity; + + PxU32 bytesNeededForBoxes = capacity*sizeof(BucketBox); + bytesNeededForBoxes = ALIGN16(bytesNeededForBoxes); + + PxU32 bytesNeededForObjects = capacity*sizeof(PrunerPayload); + bytesNeededForObjects = ALIGN16(bytesNeededForObjects); + + PX_FREE(mSortedObjects); + PX_FREE(mSortedWorldBoxes); + mSortedWorldBoxes = reinterpret_cast<BucketBox*>(PX_ALLOC(bytesNeededForBoxes, "BucketPruner")); + mSortedObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(bytesNeededForObjects, "BucketPruner")); + PX_ASSERT(!(size_t(mSortedWorldBoxes)&15)); + PX_ASSERT(!(size_t(mSortedObjects)&15)); +} + +/////////////////////////////////////////////////////////////////////////////// + +void BucketPrunerCore::resizeCore() +{ + const PxU32 capacity = mCoreCapacity ? mCoreCapacity*2 : 32; + mCoreCapacity = capacity; + + const PxU32 bytesNeededForBoxes = capacity*sizeof(PxBounds3); + const PxU32 bytesNeededForObjects = capacity*sizeof(PrunerPayload); + const PxU32 bytesNeededForRemap = capacity*sizeof(PxU32); + + PxBounds3* newCoreBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(bytesNeededForBoxes, "BucketPruner")); + PrunerPayload* newCoreObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(bytesNeededForObjects, "BucketPruner")); + PxU32* newCoreRemap = reinterpret_cast<PxU32*>(PX_ALLOC(bytesNeededForRemap, "BucketPruner")); + if(mCoreBoxes) + { + PxMemCopy(newCoreBoxes, mCoreBoxes, mCoreNbObjects*sizeof(PxBounds3)); + PX_FREE(mCoreBoxes); + } + if(mCoreObjects) + { + PxMemCopy(newCoreObjects, mCoreObjects, mCoreNbObjects*sizeof(PrunerPayload)); + PX_FREE(mCoreObjects); + } + if(mCoreRemap) + { + PxMemCopy(newCoreRemap, mCoreRemap, mCoreNbObjects*sizeof(PxU32)); + PX_FREE(mCoreRemap); + } + mCoreBoxes = newCoreBoxes; + mCoreObjects = newCoreObjects; + mCoreRemap = newCoreRemap; +} + +PX_FORCE_INLINE void BucketPrunerCore::addObjectInternal(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp) +{ + if(mCoreNbObjects==mCoreCapacity) + resizeCore(); + + const PxU32 index = mCoreNbObjects++; + mCoreObjects[index] = object; + mCoreBoxes[index] = worldAABB; // PT: TODO: check assembly here + mCoreRemap[index] = 0xffffffff; + + // Objects are only inserted into the map once they're part of the main/core arrays. +#ifdef USE_REGULAR_HASH_MAP + bool ok = mMap.insert(object, BucketPrunerPair(index, timeStamp)); +#else + BucketPrunerPair* ok = mMap.addPair(object, index, timeStamp); +#endif + PX_UNUSED(ok); + PX_ASSERT(ok); +} + +bool BucketPrunerCore::addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp) +{ +/* + We should probably use a bigger Payload struct here, which would also contains the external handle. + (EDIT: we can't even do that, because of the setExternalMemory function) + When asked to update/remove an object it would be O(n) to find the proper object in the mSortedObjects array. + + - + + For removing it we can simply empty the corresponding box, and the object will never be returned from queries. + Maybe this isn't even true, since boxes are sorted along one axis. So marking a box as empty could break the code relying on a sorted order. + An alternative is to mark the external handle as invalid, and ignore the object when a hit is found. + + (EDIT: the sorting is now tested via data0/data1 anyway so we could mark the box as empty without breaking this) + + - + + For updating an object we would need to keep the (sub) array sorted (not the whole thing, only the array within a bucket). + We don't know the range (what part of the array maps to our bucket) but we may have the bucket ID somewhere? If we'd have this + we could parse the array left/right and resort just the right boxes. If we don't have this we may be able to "quickly" find the + range by traversing the tree, looking for the proper bucket. In any case I don't think there's a mapping to update within a bucket, + unlike in SAP or MBP. So we should be able to shuffle a bucket without having to update anything. For example there's no mapping + between the Core array and the Sorted array. It's a shame in a way because we'd need one, but it's not there - and in fact I think + we can free the Core array once Sorted is created, we don't need it at all. + + If we don't want to re-sort the full bucket we can just mark it as dirty and ignore the sort-based early exits in the queries. Then we + can incrementally resort it over N frames or something. + + This only works if the updated object remains in the same bucket though. If it moves to another bucket it becomes tempting to just remove + the object and re-insert it. + + - + + Now for adding an object, we can first have a "free pruner" and do the 16 next entries brute-force. Rebuilding every 16 objects might + give a good speedup already. Otherwise we need to do something more complicated. +*/ + + PX_ASSERT(mOwnMemory); + PX_ASSERT(!mDirty || !mNbFree); + if(!mDirty) + { + // In this path the structure is marked as valid. We do not want to invalidate it for each new object... + if(mNbFree<FREE_PRUNER_SIZE) + { + // ...so as long as there is space in the "free array", we store the newly added object there and + // return immediately. Subsequent queries will parse the free array as if it was a free pruner. + const PxU32 index = mNbFree++; + mFreeObjects[index] = object; + mFreeBounds[index] = worldAABB; + mFreeStamps[index] = timeStamp; + return true; + } + + // If we reach this place, the free array is full. We must transfer the objects from the free array to + // the main (core) arrays, mark the structure as invalid, and still deal with the incoming object. + + // First we transfer free objects, reset the number of free objects, and mark the structure as + // invalid/dirty (the core arrays will need rebuilding). + for(PxU32 i=0;i<mNbFree;i++) + addObjectInternal(mFreeObjects[i], mFreeBounds[i], mFreeStamps[i]); + + mNbFree = 0; + mDirty = true; +// mSortedNb = 0; // PT: TODO: investigate if this should be done here + + // After that we still need to deal with the new incoming object (so far we only + // transferred the already existing objects from the full free array). This will + // happen automatically by letting the code continue to the regular codepath below. + } + + // If we reach this place, the structure must be invalid and the incoming object + // must be added to the main arrays. + PX_ASSERT(mDirty); + + addObjectInternal(object, worldAABB, timeStamp); + return true; +} + +bool BucketPrunerCore::removeObject(const PrunerPayload& object, PxU32& timeStamp) +{ + // Even if the structure is already marked as dirty, we still need to update the + // core arrays and the map. + + // The map only contains core objects, so we can use it to determine if the object + // exists in the core arrays or in the free array. +#ifdef USE_REGULAR_HASH_MAP +/* BucketPrunerPair entry; + if(mMap.findAndErase(object, entry)) + { + PxU32 coreIndex = entry.mCoreIndex; + timeStamp = entry.mTimeStamp;*/ + const BucketPrunerMap::Entry* removedEntry = mMap.find(object); + if(removedEntry) + { + PxU32 coreIndex = removedEntry->second.mCoreIndex; + timeStamp = removedEntry->second.mTimeStamp; +#else + PxU32 coreIndex; // This is the object's index in the core arrays. + if(mMap.removePair(object, coreIndex, timeStamp)) + { +#endif + // In this codepath, the object we want to remove exists in the core arrays. + + // We will need to remove it from both the core arrays & the sorted arrays. + const PxU32 sortedIndex = mCoreRemap[coreIndex]; // This is the object's index in the sorted arrays. + +#ifdef USE_REGULAR_HASH_MAP + bool status = mMap.erase(object); + PX_ASSERT(status); + PX_UNUSED(status); +#endif + + // First let's deal with the core arrays + mCoreNbObjects--; + if(coreIndex!=mCoreNbObjects) + { + // If it wasn't the last object in the array, close the gaps as usual + const PrunerPayload& movedObject = mCoreObjects[mCoreNbObjects]; + mCoreBoxes[coreIndex] = mCoreBoxes[mCoreNbObjects]; + mCoreObjects[coreIndex] = movedObject; + mCoreRemap[coreIndex] = mCoreRemap[mCoreNbObjects]; + + // Since we just moved the last object, its index in the core arrays has changed. + // We must reflect this change in the map. +#ifdef USE_REGULAR_HASH_MAP + BucketPrunerMap::Entry* movedEntry = const_cast<BucketPrunerMap::Entry*>(mMap.find(movedObject)); + PX_ASSERT(movedEntry->second.mCoreIndex==mCoreNbObjects); + movedEntry->second.mCoreIndex = coreIndex; +#else + BucketPrunerPair* movedEntry = const_cast<BucketPrunerPair*>(mMap.findPair(movedObject)); + PX_ASSERT(movedEntry->mCoreIndex==mCoreNbObjects); + movedEntry->mCoreIndex = coreIndex; +#endif + } + + // Now, let's deal with the sorted arrays. + // If the structure is dirty, the sorted arrays will be rebuilt from scratch so there's no need to + // update them right now. + if(!mDirty) + { + // If the structure is valid, we want to keep it this way to avoid rebuilding sorted arrays after + // each removal. We can't "close the gaps" easily here because order of objects in the arrays matters. + + // Instead we just invalidate the object by setting its bounding box as empty. + // Queries against empty boxes will never return a hit, so this effectively "removes" the object + // from any subsequent query results. Sorted arrays now contain a "disabled" object, until next build. + + // Invalidating the box does not invalidate the sorting, since it's now captured in mData0/mData1. + // That is, mData0/mData1 keep their previous integer-encoded values, as if the box/object was still here. + PxBounds3 empty; + empty.setEmpty(); + mSortedWorldBoxes[sortedIndex].mCenter = empty.getCenter(); + mSortedWorldBoxes[sortedIndex].mExtents = empty.getExtents(); + // Note that we don't touch mSortedObjects here. We could, but this is not necessary. + } + return true; + } + + // Here, the object we want to remove exists in the free array. So we just parse it. + for(PxU32 i=0;i<mNbFree;i++) + { + if(mFreeObjects[i]==object) + { + // We found the object we want to remove. Close the gap as usual. + timeStamp = mFreeStamps[i]; + mNbFree--; + mFreeBounds[i] = mFreeBounds[mNbFree]; + mFreeObjects[i] = mFreeObjects[mNbFree]; + mFreeStamps[i] = mFreeStamps[mNbFree]; + return true; + } + } + // We didn't find the object. Can happen with a double remove. PX_ASSERT might be an option here. + return false; +} + +bool BucketPrunerCore::updateObject(const PxBounds3& worldAABB, const PrunerPayload& object) +{ + PxU32 timeStamp; + if(!removeObject(object, timeStamp)) + return false; + + return addObject(object, worldAABB, timeStamp); +} + +PxU32 BucketPrunerCore::removeMarkedObjects(PxU32 timeStamp) +{ + PxU32 nbRemoved=0; + // PT: objects can be either in the hash-map, or in the 'free' array. First we look in the hash-map... +#ifdef USE_REGULAR_HASH_MAP + if(mMap.size()) +#else + if(mMap.mNbActivePairs) +#endif + { + PxBounds3 empty; + empty.setEmpty(); + const PxVec3 emptyCenter = empty.getCenter(); + const PxVec3 emptyExtents = empty.getExtents(); + + // PT: hash-map is coalesced so we just parse it in linear order, no holes + PxU32 i=0; +#ifdef USE_REGULAR_HASH_MAP + PxU32 nbActivePairs = mMap.size(); + const BucketPrunerMap::Entry* entries = mMap.mBase.getEntries(); +#else + PxU32 nbActivePairs = mMap.mNbActivePairs; +#endif + PxU32 coreNbObjects = mCoreNbObjects; // PT: to avoid LHS + while(i<nbActivePairs) + { +#ifdef USE_REGULAR_HASH_MAP + const BucketPrunerMap::Entry& p = entries[i]; + if(p.second.mTimeStamp==timeStamp) +#else + const BucketPrunerPair& p = mMap.mActivePairs[i]; + if(p.mTimeStamp==timeStamp) +#endif + { + // PT: timestamps match. We must remove this object. + // PT: we replicate here what we do in BucketPrunerCore::removeObject(). See that function for details. + +#ifdef USE_REGULAR_HASH_MAP + const PxU32 coreIndex = p.second.mCoreIndex; +#else + const PxU32 coreIndex = p.mCoreIndex; +#endif + if(!mDirty) + { + // PT: invalidating the box does not invalidate the sorting, since it's now captured in mData0/mData1 + const PxU32 sortedIndex = mCoreRemap[coreIndex]; + mSortedWorldBoxes[sortedIndex].mCenter = emptyCenter; + mSortedWorldBoxes[sortedIndex].mExtents = emptyExtents; + } + + coreNbObjects--; + if(coreIndex!=coreNbObjects) + { + const PrunerPayload& movedObject = mCoreObjects[coreNbObjects]; + mCoreBoxes[coreIndex] = mCoreBoxes[coreNbObjects]; + mCoreObjects[coreIndex] = movedObject; + mCoreRemap[coreIndex] = mCoreRemap[coreNbObjects]; + +#ifdef USE_REGULAR_HASH_MAP + BucketPrunerMap::Entry* movedEntry = const_cast<BucketPrunerMap::Entry*>(mMap.find(movedObject)); + PX_ASSERT(movedEntry->second.mCoreIndex==coreNbObjects); + movedEntry->second.mCoreIndex = coreIndex; +#else + BucketPrunerPair* movedEntry = const_cast<BucketPrunerPair*>(mMap.findPair(movedObject)); + PX_ASSERT(movedEntry->mCoreIndex==coreNbObjects); + movedEntry->mCoreIndex = coreIndex; +#endif + } + + nbRemoved++; +#ifdef USE_REGULAR_HASH_MAP + bool status = mMap.erase(p.first); + PX_ASSERT(status); + PX_UNUSED(status); +#else + const PxU32 hashValue = hash(p.mPayload) & mMap.mMask; + mMap.removePairInternal(p.mPayload, hashValue, i); +#endif + nbActivePairs--; + } + else i++; + } + mCoreNbObjects = coreNbObjects; + +#ifdef USE_REGULAR_HASH_MAP +#else + mMap.shrinkMemory(); +#endif + } + + // PT: ...then we look in the 'free' array + PxU32 i=0; + while(i<mNbFree) + { + if(mFreeStamps[i]==timeStamp) + { + nbRemoved++; + mNbFree--; + mFreeBounds[i] = mFreeBounds[mNbFree]; + mFreeObjects[i] = mFreeObjects[mNbFree]; + mFreeStamps[i] = mFreeStamps[mNbFree]; + } + else i++; + } + return nbRemoved; +} + +/////////////////////////////////////////////////////////////////////////////// + +static PxU32 sortBoxes( PxU32 nb, const PxBounds3* PX_RESTRICT boxes, const PrunerPayload* PX_RESTRICT objects, + BucketBox& _globalBox, BucketBox* PX_RESTRICT sortedBoxes, PrunerPayload* PX_RESTRICT sortedObjects) +{ + // Compute global box & sort axis + PxU32 sortAxis; + { + PX_ASSERT(nb>0); + Vec4V mergedMinV = V4LoadU(&boxes[nb-1].minimum.x); + Vec4V mergedMaxV = Vec4V_From_Vec3V(V3LoadU(&boxes[nb-1].maximum.x)); + for(PxU32 i=0;i<nb-1;i++) + { + mergedMinV = V4Min(mergedMinV, V4LoadU(&boxes[i].minimum.x)); + mergedMaxV = V4Max(mergedMaxV, V4LoadU(&boxes[i].maximum.x)); + } + +/* PX_ALIGN(16, PxVec4) mergedMin; + PX_ALIGN(16, PxVec4) mergedMax; + V4StoreA(mergedMinV, &mergedMin.x); + V4StoreA(mergedMaxV, &mergedMax.x); + + _globalBox.mCenter.x = (mergedMax.x + mergedMin.x)*0.5f; + _globalBox.mCenter.y = (mergedMax.y + mergedMin.y)*0.5f; + _globalBox.mCenter.z = (mergedMax.z + mergedMin.z)*0.5f; + _globalBox.mExtents.x = (mergedMax.x - mergedMin.x)*0.5f; + _globalBox.mExtents.y = (mergedMax.y - mergedMin.y)*0.5f; + _globalBox.mExtents.z = (mergedMax.z - mergedMin.z)*0.5f;*/ + + const float Half = 0.5f; + const FloatV HalfV = FLoad(Half); + PX_ALIGN(16, PxVec4) mergedCenter; + PX_ALIGN(16, PxVec4) mergedExtents; + + const Vec4V mergedCenterV = V4Scale(V4Add(mergedMaxV, mergedMinV), HalfV); + const Vec4V mergedExtentsV = V4Scale(V4Sub(mergedMaxV, mergedMinV), HalfV); + V4StoreA(mergedCenterV, &mergedCenter.x); + V4StoreA(mergedExtentsV, &mergedExtents.x); + _globalBox.mCenter = PxVec3(mergedCenter.x, mergedCenter.y, mergedCenter.z); + _globalBox.mExtents = PxVec3(mergedExtents.x, mergedExtents.y, mergedExtents.z); + + const PxF32 absY = PxAbs(_globalBox.mExtents.y); + const PxF32 absZ = PxAbs(_globalBox.mExtents.z); + sortAxis = PxU32(absY < absZ ? 1 : 2); +// printf("Sort axis: %d\n", sortAxis); + } + + float* keys = reinterpret_cast<float*>(sortedObjects); + for(PxU32 i=0;i<nb;i++) + keys[i] = boxes[i].minimum[sortAxis]; + + Cm::RadixSortBuffered rs; // ###TODO: some allocs here, remove + const PxU32* ranks = rs.Sort(keys, nb).GetRanks(); + + const float Half = 0.5f; + const FloatV HalfV = FLoad(Half); + for(PxU32 i=0;i<nb;i++) + { + const PxU32 index = *ranks++; +//const PxU32 index = local[i].index; +// sortedBoxes[i].mCenter = boxes[index].getCenter(); +// sortedBoxes[i].mExtents = boxes[index].getExtents(); + + const Vec4V bucketBoxMinV = V4LoadU(&boxes[index].minimum.x); + const Vec4V bucketBoxMaxV = Vec4V_From_Vec3V(V3LoadU(&boxes[index].maximum.x)); + const Vec4V bucketBoxCenterV = V4Scale(V4Add(bucketBoxMaxV, bucketBoxMinV), HalfV); + const Vec4V bucketBoxExtentsV = V4Scale(V4Sub(bucketBoxMaxV, bucketBoxMinV), HalfV); + // We don't need to preserve data0/data1 here + AlignedStore(bucketBoxCenterV, &sortedBoxes[i].mCenter.x); + AlignedStore(bucketBoxExtentsV, &sortedBoxes[i].mExtents.x); + + #ifdef _DEBUG + sortedBoxes[i].mDebugMin = boxes[index].minimum[sortAxis]; + #endif + sortedObjects[i] = objects[index]; + } + + return sortAxis; +} + +#ifdef NODE_SORT + template<class T> + PX_CUDA_CALLABLE PX_FORCE_INLINE void tswap(T& x, T& y) + { + T tmp = x; + x = y; + y = tmp; + } + +/* PX_FORCE_INLINE __m128 DotV(const __m128 a, const __m128 b) + { + const __m128 dot1 = _mm_mul_ps(a, b); + const __m128 shuf1 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(0,0,0,0))); + const __m128 shuf2 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(1,1,1,1))); + const __m128 shuf3 = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dot1), _MM_SHUFFLE(2,2,2,2))); + return _mm_add_ps(_mm_add_ps(shuf1, shuf2), shuf3); + }*/ + +// PT: hmmm, by construction, isn't the order always the same for all bucket pruners? +// => maybe not because the bucket boxes are still around the merged aabbs, not around the bucket +// Still we could do something here +static /*PX_FORCE_INLINE*/ PxU32 sort(const BucketPrunerNode& parent, const PxVec3& rayDir) +{ + const PxU32 totalCount = parent.mCounters[0]+parent.mCounters[1]+parent.mCounters[2]+parent.mCounters[3]+parent.mCounters[4]; + if(totalCount<NODE_SORT_MIN_COUNT) + return 0|(1<<3)|(2<<6)|(3<<9)|(4<<12); + + float dp[5]; +/* const __m128 rayDirV = _mm_loadu_ps(&rayDir.x); + __m128 dp0V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[0].mCenter.x)); _mm_store_ss(&dp[0], dp0V); + __m128 dp1V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[1].mCenter.x)); _mm_store_ss(&dp[1], dp1V); + __m128 dp2V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[2].mCenter.x)); _mm_store_ss(&dp[2], dp2V); + __m128 dp3V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[3].mCenter.x)); _mm_store_ss(&dp[3], dp3V); + __m128 dp4V = DotV(rayDirV, _mm_loadu_ps(&parent.mBucketBox[4].mCenter.x)); _mm_store_ss(&dp[4], dp4V); +*/ + +#ifdef VERIFY_SORT + PxU32 code; + { + dp[0] = parent.mCounters[0] ? PxAbs(parent.mBucketBox[0].mCenter.dot(rayDir)) : PX_MAX_F32; + dp[1] = parent.mCounters[1] ? PxAbs(parent.mBucketBox[1].mCenter.dot(rayDir)) : PX_MAX_F32; + dp[2] = parent.mCounters[2] ? PxAbs(parent.mBucketBox[2].mCenter.dot(rayDir)) : PX_MAX_F32; + dp[3] = parent.mCounters[3] ? PxAbs(parent.mBucketBox[3].mCenter.dot(rayDir)) : PX_MAX_F32; + dp[4] = parent.mCounters[4] ? PxAbs(parent.mBucketBox[4].mCenter.dot(rayDir)) : PX_MAX_F32; + + PxU32 ii0 = 0; + PxU32 ii1 = 1; + PxU32 ii2 = 2; + PxU32 ii3 = 3; + PxU32 ii4 = 4; + + // PT: using integer cmps since we used fabsf above + // const PxU32* values = reinterpret_cast<const PxU32*>(dp); + const PxU32* values = PxUnionCast<PxU32*, PxF32*>(dp); + + PxU32 value0 = values[0]; + PxU32 value1 = values[1]; + PxU32 value2 = values[2]; + PxU32 value3 = values[3]; + PxU32 value4 = values[4]; + + for(PxU32 j=0;j<5-1;j++) + { + if(value1<value0) + { + tswap(value0, value1); + tswap(ii0, ii1); + } + if(value2<value1) + { + tswap(value1, value2); + tswap(ii1, ii2); + } + if(value3<value2) + { + tswap(value2, value3); + tswap(ii2, ii3); + } + if(value4<value3) + { + tswap(value3, value4); + tswap(ii3, ii4); + } + } + //return ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12); + code = ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12); + } +#endif + + dp[0] = parent.mCounters[0] ? parent.mBucketBox[0].mCenter.dot(rayDir) : PX_MAX_F32; + dp[1] = parent.mCounters[1] ? parent.mBucketBox[1].mCenter.dot(rayDir) : PX_MAX_F32; + dp[2] = parent.mCounters[2] ? parent.mBucketBox[2].mCenter.dot(rayDir) : PX_MAX_F32; + dp[3] = parent.mCounters[3] ? parent.mBucketBox[3].mCenter.dot(rayDir) : PX_MAX_F32; + dp[4] = parent.mCounters[4] ? parent.mBucketBox[4].mCenter.dot(rayDir) : PX_MAX_F32; + + const PxU32* values = PxUnionCast<PxU32*, PxF32*>(dp); + +// const PxU32 mask = ~7U; + const PxU32 mask = 0x7ffffff8; + PxU32 value0 = (values[0]&mask); + PxU32 value1 = (values[1]&mask)|1; + PxU32 value2 = (values[2]&mask)|2; + PxU32 value3 = (values[3]&mask)|3; + PxU32 value4 = (values[4]&mask)|4; + +#define SORT_BLOCK \ + if(value1<value0) tswap(value0, value1); \ + if(value2<value1) tswap(value1, value2); \ + if(value3<value2) tswap(value2, value3); \ + if(value4<value3) tswap(value3, value4); + SORT_BLOCK + SORT_BLOCK + SORT_BLOCK + SORT_BLOCK + + const PxU32 ii0 = value0&7; + const PxU32 ii1 = value1&7; + const PxU32 ii2 = value2&7; + const PxU32 ii3 = value3&7; + const PxU32 ii4 = value4&7; + const PxU32 code2 = ii0|(ii1<<3)|(ii2<<6)|(ii3<<9)|(ii4<<12); +#ifdef VERIFY_SORT + PX_ASSERT(code2==code); +#endif + return code2; +} + +static void gPrecomputeSort(BucketPrunerNode& node, const PxVec3* PX_RESTRICT dirs) +{ + for(int i=0;i<8;i++) + node.mOrder[i] = Ps::to16(sort(node, dirs[i])); +} +#endif + +void BucketPrunerCore::classifyBoxes() +{ + if(!mDirty) + return; + + mDirty = false; + + const PxU32 nb = mCoreNbObjects; + if(!nb) + { + mSortedNb=0; + return; + } + + PX_ASSERT(!mNbFree); + +#ifdef BRUTE_FORCE_LIMIT + if(nb<=BRUTE_FORCE_LIMIT) + { + allocateSortedMemory(nb); + BucketBox* sortedBoxes = mSortedWorldBoxes; + PrunerPayload* sortedObjects = mSortedObjects; + + const float Half = 0.5f; + const __m128 HalfV = _mm_load1_ps(&Half); + PX_ALIGN(16, PxVec4) bucketCenter; + PX_ALIGN(16, PxVec4) bucketExtents; + for(PxU32 i=0;i<nb;i++) + { + const __m128 bucketBoxMinV = _mm_loadu_ps(&mCoreBoxes[i].minimum.x); + const __m128 bucketBoxMaxV = _mm_loadu_ps(&mCoreBoxes[i].maximum.x); + const __m128 bucketBoxCenterV = _mm_mul_ps(_mm_add_ps(bucketBoxMaxV, bucketBoxMinV), HalfV); + const __m128 bucketBoxExtentsV = _mm_mul_ps(_mm_sub_ps(bucketBoxMaxV, bucketBoxMinV), HalfV); + _mm_store_ps(&bucketCenter.x, bucketBoxCenterV); + _mm_store_ps(&bucketExtents.x, bucketBoxExtentsV); + sortedBoxes[i].mCenter = PxVec3(bucketCenter.x, bucketCenter.y, bucketCenter.z); + sortedBoxes[i].mExtents = PxVec3(bucketExtents.x, bucketExtents.y, bucketExtents.z); + + sortedObjects[i] = mCoreObjects[i]; + } + return; + } +#endif + + +size_t* remap = reinterpret_cast<size_t*>(PX_ALLOC(nb*sizeof(size_t), "")); +for(PxU32 i=0;i<nb;i++) +{ + remap[i] = mCoreObjects[i].data[0]; + mCoreObjects[i].data[0] = i; +} + +// printf("Nb objects: %d\n", nb); + + PrunerPayload localTempObjects[LOCAL_SIZE]; + BucketBox localTempBoxes[LOCAL_SIZE]; + PrunerPayload* tempObjects; + BucketBox* tempBoxes; + if(nb>LOCAL_SIZE) + { + tempObjects = reinterpret_cast<PrunerPayload*>(PX_ALLOC(sizeof(PrunerPayload)*nb, "BucketPruner")); + tempBoxes = reinterpret_cast<BucketBox*>(PX_ALLOC(nb*sizeof(BucketBox), "BucketPruner")); + } + else + { + tempObjects = localTempObjects; + tempBoxes = localTempBoxes; + } + + mSortAxis = sortBoxes(nb, mCoreBoxes, mCoreObjects, mGlobalBox, tempBoxes, tempObjects); + + PX_ASSERT(mSortAxis); + + allocateSortedMemory(nb); + BucketBox* sortedBoxes = mSortedWorldBoxes; + PrunerPayload* sortedObjects = mSortedObjects; + + const PxU32 yz = PxU32(mSortAxis == 1 ? 2 : 1); + const float limitX = mGlobalBox.mCenter.x; + const float limitYZ = mGlobalBox.mCenter[yz]; + mLevel1.classifyBoxes(limitX, limitYZ, nb, tempBoxes, tempObjects, + sortedBoxes, sortedObjects, + false, mSortAxis); + + processChildBuckets(nb, tempBoxes, tempObjects, + mLevel1, mLevel2, mSortedWorldBoxes, mSortedObjects, + mSortAxis); + + for(PxU32 j=0;j<5;j++) + processChildBuckets(nb, tempBoxes, tempObjects, + mLevel2[j], mLevel3[j], mSortedWorldBoxes + mLevel1.mOffsets[j], mSortedObjects + mLevel1.mOffsets[j], + mSortAxis); + + { + for(PxU32 i=0;i<nb;i++) + { + encodeBoxMinMax(mSortedWorldBoxes[i], mSortAxis); + } + } + + if(nb>LOCAL_SIZE) + { + PX_FREE(tempBoxes); + PX_FREE(tempObjects); + } + +for(PxU32 i=0;i<nb;i++) +{ + const PxU32 coreIndex = PxU32(mSortedObjects[i].data[0]); + const size_t saved = remap[coreIndex]; + mSortedObjects[i].data[0] = saved; + mCoreObjects[coreIndex].data[0] = saved; + if(mCoreRemap) + mCoreRemap[coreIndex] = i; +// remap[i] = mCoreObjects[i].data[0]; +// mCoreObjects[i].data[0] = i; +} +PX_FREE(remap); + +/* if(mOwnMemory) + { + PX_FREE_AND_RESET(mCoreBoxes); + PX_FREE_AND_RESET(mCoreObjects); + }*/ + + +#ifdef NODE_SORT + { + PxVec3 dirs[8]; + dirs[0] = PxVec3(1.0f, 1.0f, 1.0f); + dirs[1] = PxVec3(1.0f, 1.0f, -1.0f); + dirs[2] = PxVec3(1.0f, -1.0f, 1.0f); + dirs[3] = PxVec3(1.0f, -1.0f, -1.0f); + dirs[4] = PxVec3(-1.0f, 1.0f, 1.0f); + dirs[5] = PxVec3(-1.0f, 1.0f, -1.0f); + dirs[6] = PxVec3(-1.0f, -1.0f, 1.0f); + dirs[7] = PxVec3(-1.0f, -1.0f, -1.0f); + for(int i=0;i<8;i++) + dirs[i].normalize(); + + gPrecomputeSort(mLevel1, dirs); + + for(PxU32 i=0;i<5;i++) + gPrecomputeSort(mLevel2[i], dirs); + + for(PxU32 j=0;j<5;j++) + { + for(PxU32 i=0;i<5;i++) + gPrecomputeSort(mLevel3[j][i], dirs); + } + } +#endif +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifdef CAN_USE_MOVEMASK + struct RayParams + { + PX_ALIGN(16, PxVec3 mData2); float padding0; + PX_ALIGN(16, PxVec3 mFDir); float padding1; + PX_ALIGN(16, PxVec3 mData); float padding2; + PX_ALIGN(16, PxVec3 mInflate); float padding3; + }; + + static PX_FORCE_INLINE void precomputeRayData(RayParams* PX_RESTRICT rayParams, const PxVec3& rayOrig, const PxVec3& rayDir, float maxDist) + { + #ifdef USE_SIMD + const float Half = 0.5f * maxDist; + const __m128 HalfV = _mm_load1_ps(&Half); + const __m128 DataV = _mm_mul_ps(_mm_loadu_ps(&rayDir.x), HalfV); + const __m128 Data2V = _mm_add_ps(_mm_loadu_ps(&rayOrig.x), DataV); + const PxU32 MaskI = 0x7fffffff; + const __m128 FDirV = _mm_and_ps(_mm_load1_ps(reinterpret_cast<const float*>(&MaskI)), DataV); + _mm_store_ps(&rayParams->mData.x, DataV); + _mm_store_ps(&rayParams->mData2.x, Data2V); + _mm_store_ps(&rayParams->mFDir.x, FDirV); + #else + const PxVec3 data = 0.5f * rayDir * maxDist; + rayParams->mData = data; + rayParams->mData2 = rayOrig + data; + rayParams->mFDir.x = PxAbs(data.x); + rayParams->mFDir.y = PxAbs(data.y); + rayParams->mFDir.z = PxAbs(data.z); + #endif + } + + template <int inflateT> + static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const RayParams* PX_RESTRICT params) + { + #ifdef USE_SIMD + const PxU32 maskI = 0x7fffffff; + const __m128 fdirV = _mm_load_ps(¶ms->mFDir.x); +// #ifdef _DEBUG + const __m128 extentsV = inflateT ? _mm_add_ps(_mm_loadu_ps(&box.mExtents.x), _mm_load_ps(¶ms->mInflate.x)) : _mm_loadu_ps(&box.mExtents.x); + const __m128 DV = _mm_sub_ps(_mm_load_ps(¶ms->mData2.x), _mm_loadu_ps(&box.mCenter.x)); +/* #else + const __m128 extentsV = inflateT ? _mm_add_ps(_mm_load_ps(&box.mExtents.x), _mm_load_ps(¶ms->mInflate.x)) : _mm_load_ps(&box.mExtents.x); + const __m128 DV = _mm_sub_ps(_mm_load_ps(¶ms->mData2.x), _mm_load_ps(&box.mCenter.x)); + #endif*/ + __m128 absDV = _mm_and_ps(DV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI))); + absDV = _mm_cmpgt_ps(absDV, _mm_add_ps(extentsV, fdirV)); + const PxU32 test = PxU32(_mm_movemask_ps(absDV)); + if(test&7) + return 0; + + const __m128 dataZYX_V = _mm_load_ps(¶ms->mData.x); + const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1))); + const __m128 fV = _mm_sub_ps(_mm_mul_ps(dataZYX_V, DXZY_V), _mm_mul_ps(dataXZY_V, DV)); + + const __m128 fdirZYX_V = _mm_load_ps(¶ms->mFDir.x); + const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1))); + const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1))); + const __m128 fg = _mm_add_ps(_mm_mul_ps(extentsV, fdirXZY_V), _mm_mul_ps(extentsXZY_V, fdirZYX_V)); + + __m128 absfV = _mm_and_ps(fV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI))); + absfV = _mm_cmpgt_ps(absfV, fg); + const PxU32 test2 = PxU32(_mm_movemask_ps(absfV)); + if(test2&7) + return 0; + return 1; + #else + const float boxExtentsx = inflateT ? box.mExtents.x + params->mInflate.x : box.mExtents.x; + const float Dx = params->mData2.x - box.mCenter.x; if(fabsf(Dx) > boxExtentsx + params->mFDir.x) return IntFalse; + + const float boxExtentsz = inflateT ? box.mExtents.z + params->mInflate.z : box.mExtents.z; + const float Dz = params->mData2.z - box.mCenter.z; if(fabsf(Dz) > boxExtentsz + params->mFDir.z) return IntFalse; + + const float boxExtentsy = inflateT ? box.mExtents.y + params->mInflate.y : box.mExtents.y; + const float Dy = params->mData2.y - box.mCenter.y; if(fabsf(Dy) > boxExtentsy + params->mFDir.y) return IntFalse; + + float f; + f = params->mData.y * Dz - params->mData.z * Dy; if(fabsf(f) > boxExtentsy*params->mFDir.z + boxExtentsz*params->mFDir.y) return IntFalse; + f = params->mData.z * Dx - params->mData.x * Dz; if(fabsf(f) > boxExtentsx*params->mFDir.z + boxExtentsz*params->mFDir.x) return IntFalse; + f = params->mData.x * Dy - params->mData.y * Dx; if(fabsf(f) > boxExtentsx*params->mFDir.y + boxExtentsy*params->mFDir.x) return IntFalse; + return IntTrue; + #endif + } +#else + #include "SqPrunerTestsSIMD.h" + + typedef RayAABBTest BPRayAABBTest; + +template <int inflateT> +static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const BPRayAABBTest& test) +{ + return static_cast<IntBool>(test.check<inflateT>(V3LoadU(box.mCenter), V3LoadU(box.mExtents))); +} + +/*static PX_FORCE_INLINE IntBool _segmentAABB(const BucketBox& box, const BPRayAABBTest& test, PxU32 rayMinLimitX, PxU32 rayMaxLimitX) +{ + if(rayMinLimitX>box.mData1) + return 0; + if(rayMaxLimitX<box.mData0) + return 0; + + return test(Vec3V_From_PxVec3(box.mCenter), Vec3V_From_PxVec3(box.mExtents)); +}*/ +#endif + +template <int inflateT> +static PxAgain processBucket( + PxU32 nb, const BucketBox* PX_RESTRICT baseBoxes, PrunerPayload* PX_RESTRICT baseObjects, + PxU32 offset, PxU32 totalAllocated, + const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, +#ifdef CAN_USE_MOVEMASK + RayParams* PX_RESTRICT rayParams, +#else + BPRayAABBTest& test, const PxVec3& inflate, +#endif + PrunerCallback& pcb, PxU32& _rayMinLimitInt, PxU32& _rayMaxLimitInt, PxU32 sortAxis) +{ + PX_UNUSED(totalAllocated); + + const BucketBox* PX_RESTRICT _boxes = baseBoxes + offset; + PrunerPayload* PX_RESTRICT _objects = baseObjects + offset; + + PxU32 rayMinLimitInt = _rayMinLimitInt; + PxU32 rayMaxLimitInt = _rayMaxLimitInt; + + const BucketBox* last = _boxes + nb; + + while(_boxes!=last) + { + const BucketBox& currentBox = *_boxes++; + PrunerPayload* currentObject = _objects++; + + if(currentBox.mData1<rayMinLimitInt) + continue; + + if(currentBox.mData0>rayMaxLimitInt) + goto Exit; + +#ifdef CAN_USE_MOVEMASK + if(!_segmentAABB<inflateT>(currentBox, rayParams)) + continue; +#else + if(!_segmentAABB<inflateT>(currentBox, test)) + continue; +#endif + + const float MaxDist = maxDist; + const PxAgain again = pcb.invoke(maxDist, *currentObject); + if(!again) + return false; + if(maxDist < MaxDist) + { + float rayMinLimit, rayMaxLimit; +#ifdef CAN_USE_MOVEMASK + if(inflateT) + computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, rayParams->mInflate, sortAxis); + else + computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis); + + precomputeRayData(rayParams, rayOrig, rayDir, maxDist); +#else + if(inflateT) + computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, inflate, sortAxis); + else + computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis); + + test.setDistance(maxDist); +#endif + const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&rayMinLimit); + const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&rayMaxLimit); + rayMinLimitInt = encodeFloat(binaryMinLimit[0]); + rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]); + } + } +Exit: + + _rayMinLimitInt = rayMinLimitInt; + _rayMaxLimitInt = rayMaxLimitInt; + return true; +} + +#ifdef NODE_SORT +static PxU32 computeDirMask(const PxVec3& dir) +{ + const PxU32* binary = reinterpret_cast<const PxU32*>(&dir.x); + const PxU32 X = (binary[0])>>31; + const PxU32 Y = (binary[1])>>31; + const PxU32 Z = (binary[2])>>31; + return Z|(Y<<1)|(X<<2); +} +#endif + +template <int inflateT> +static PxAgain stab(const BucketPrunerCore& core, PrunerCallback& pcb, const PxVec3& rayOrig, const PxVec3& rayDir, float& maxDist, const PxVec3 inflate) +{ + const PxU32 nb = core.mSortedNb; + if(!nb && !core.mNbFree) + return true; + + if(maxDist==PX_MAX_F32) + { + /*const*/ PxVec3 boxMin = core.mGlobalBox.getMin() - inflate; + /*const*/ PxVec3 boxMax = core.mGlobalBox.getMax() + inflate; + + if(core.mNbFree) + { + // TODO: optimize this + PxBounds3 freeGlobalBounds; + freeGlobalBounds.setEmpty(); + for(PxU32 i=0;i<core.mNbFree;i++) + freeGlobalBounds.include(core.mFreeBounds[i]); + freeGlobalBounds.minimum -= inflate; + freeGlobalBounds.maximum += inflate; + boxMin = boxMin.minimum(freeGlobalBounds.minimum); + boxMax = boxMax.maximum(freeGlobalBounds.maximum); + } + + clipRay(rayOrig, rayDir, maxDist, boxMin, boxMax); + } + +#ifdef CAN_USE_MOVEMASK + RayParams rayParams; + #ifdef USE_SIMD + rayParams.padding0 = rayParams.padding1 = rayParams.padding2 = rayParams.padding3 = 0.0f; + #endif + if(inflateT) + rayParams.mInflate = inflate; + + precomputeRayData(&rayParams, rayOrig, rayDir, maxDist); +#else + BPRayAABBTest test(rayOrig, rayDir, maxDist, inflateT ? inflate : PxVec3(0.0f)); +#endif + + for(PxU32 i=0;i<core.mNbFree;i++) + { + BucketBox tmp; + tmp.mCenter = core.mFreeBounds[i].getCenter(); + tmp.mExtents = core.mFreeBounds[i].getExtents(); + +#ifdef CAN_USE_MOVEMASK + if(_segmentAABB<inflateT>(tmp, &rayParams)) +#else + if(_segmentAABB<inflateT>(tmp, test)) +#endif + { + if(!pcb.invoke(maxDist, core.mFreeObjects[i])) + return false; + } + } + + if(!nb) + return true; + +#ifdef CAN_USE_MOVEMASK + if(!_segmentAABB<inflateT>(core.mGlobalBox, &rayParams)) + return true; +#else + if(!_segmentAABB<inflateT>(core.mGlobalBox, test)) + return true; +#endif + + const PxU32 sortAxis = core.mSortAxis; + float rayMinLimit, rayMaxLimit; + if(inflateT) + computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, inflate, sortAxis); + else + computeRayLimits(rayMinLimit, rayMaxLimit, rayOrig, rayDir, maxDist, sortAxis); + + const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&rayMinLimit); + const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&rayMaxLimit); + PxU32 rayMinLimitInt = encodeFloat(binaryMinLimit[0]); + PxU32 rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]); +/* +float rayMinLimitX, rayMaxLimitX; +if(inflateT) + computeRayLimits(rayMinLimitX, rayMaxLimitX, rayOrig, rayDir, maxDist, inflate, 0); +else + computeRayLimits(rayMinLimitX, rayMaxLimitX, rayOrig, rayDir, maxDist, 0); + +PxU32 rayMinLimitIntX = encodeFloat(PX_IR(rayMinLimitX)); +PxU32 rayMaxLimitIntX = encodeFloat(PX_IR(rayMaxLimitX)); +*/ + + float currentDist = maxDist; + +#ifdef NODE_SORT + const PxU32 dirIndex = computeDirMask(rayDir); + PxU32 orderi = core.mLevel1.mOrder[dirIndex]; +// PxU32 orderi = sort(core.mLevel1, rayDir); + + for(PxU32 i_=0;i_<5;i_++) + { + const PxU32 i = orderi&7; orderi>>=3; +#else + for(PxU32 i=0;i<5;i++) + { +#endif + +#ifdef CAN_USE_MOVEMASK + if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], &rayParams)) +#else + if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], test)) +// if(core.mLevel1.mCounters[i] && _segmentAABB<inflateT>(core.mLevel1.mBucketBox[i], test, rayMinLimitIntX, rayMaxLimitIntX)) +#endif + { + +#ifdef NODE_SORT + PxU32 orderj = core.mLevel2[i].mOrder[dirIndex]; +// PxU32 orderj = sort(core.mLevel2[i], rayDir); + + for(PxU32 j_=0;j_<5;j_++) + { + const PxU32 j = orderj&7; orderj>>=3; +#else + for(PxU32 j=0;j<5;j++) + { +#endif + +#ifdef CAN_USE_MOVEMASK + if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], &rayParams)) +#else + if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], test)) +// if(core.mLevel2[i].mCounters[j] && _segmentAABB<inflateT>(core.mLevel2[i].mBucketBox[j], test, rayMinLimitIntX, rayMaxLimitIntX)) +#endif + { + const BucketPrunerNode& parent = core.mLevel3[i][j]; + const PxU32 parentOffset = core.mLevel1.mOffsets[i] + core.mLevel2[i].mOffsets[j]; + +#ifdef NODE_SORT + PxU32 orderk = parent.mOrder[dirIndex]; +// PxU32 orderk = sort(parent, rayDir); + + for(PxU32 k_=0;k_<5;k_++) + { + const PxU32 k = orderk&7; orderk>>=3; +#else + for(PxU32 k=0;k<5;k++) + { +#endif + const PxU32 nbInBucket = parent.mCounters[k]; +#ifdef CAN_USE_MOVEMASK + if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], &rayParams)) +#else + if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], test)) +// if(nbInBucket && _segmentAABB<inflateT>(parent.mBucketBox[k], test, rayMinLimitIntX, rayMaxLimitIntX)) +#endif + { + const PxU32 offset = parentOffset + parent.mOffsets[k]; + const PxAgain again = processBucket<inflateT>( nbInBucket, core.mSortedWorldBoxes, core.mSortedObjects, + offset, core.mSortedNb, + rayOrig, rayDir, currentDist, +#ifdef CAN_USE_MOVEMASK + &rayParams, +#else + test, inflate, +#endif + pcb, + rayMinLimitInt, rayMaxLimitInt, + sortAxis); + if(!again) + return false; + } + } + } + } + } + } + + maxDist = currentDist; + return true; +} + +PxAgain BucketPrunerCore::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const +{ + return ::stab<0>(*this, pcb, origin, unitDir, inOutDistance, PxVec3(0.0f)); +} + +PxAgain BucketPrunerCore::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const +{ + const PxVec3 extents = queryVolume.getPrunerInflatedWorldAABB().getExtents(); + return ::stab<1>(*this, pcb, queryVolume.getPrunerInflatedWorldAABB().getCenter(), unitDir, inOutDistance, extents); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +template<bool doAssert, typename Test> +static PX_FORCE_INLINE bool processBucket( PxU32 nb, const BucketBox* PX_RESTRICT baseBoxes, PrunerPayload* PX_RESTRICT baseObjects, + PxU32 offset, PxU32 totalAllocated, + const Test& test, PrunerCallback& pcb, + PxU32 minLimitInt, PxU32 maxLimitInt) +{ + PX_UNUSED(totalAllocated); + + const BucketBox* PX_RESTRICT boxes = baseBoxes + offset; + PrunerPayload* PX_RESTRICT objects = baseObjects + offset; + + while(nb--) + { + const BucketBox& currentBox = *boxes++; + PrunerPayload* currentObject = objects++; + + if(currentBox.mData1<minLimitInt) + { + if(doAssert) + PX_ASSERT(!test(currentBox)); + continue; + } + + if(currentBox.mData0>maxLimitInt) + { + if(doAssert) + PX_ASSERT(!test(currentBox)); + return true; + } + + if(test(currentBox)) + { + PxReal dist = -1.0f; // no distance for overlaps + if(!pcb.invoke(dist, *currentObject)) + return false; + } + } + return true; +} + +template<typename Test, bool isPrecise> +class BucketPrunerOverlapTraversal +{ +public: + PX_FORCE_INLINE BucketPrunerOverlapTraversal() {} + + /*PX_FORCE_INLINE*/ bool operator()(const BucketPrunerCore& core, const Test& test, PrunerCallback& pcb, const PxBounds3& cullBox) const + { + for(PxU32 i=0;i<core.mNbFree;i++) + { + if(test(core.mFreeBounds[i])) + { + PxReal dist = -1.0f; // no distance for overlaps + if(!pcb.invoke(dist, core.mFreeObjects[i])) + return false; + } + } + + const PxU32 nb = core.mSortedNb; + if(!nb) + return true; + +#ifdef BRUTE_FORCE_LIMIT + if(nb<=BRUTE_FORCE_LIMIT) + { + for(PxU32 i=0;i<nb;i++) + { + if(test(core.mSortedWorldBoxes[i])) + { + PxReal dist = -1.0f; // no distance for overlaps + if(!pcb.invoke(dist, core.mSortedObjects[i])) + return false; + } + } + return true; + } +#endif + + if(!test(core.mGlobalBox)) + return true; + + const PxU32 sortAxis = core.mSortAxis; + const float boxMinLimit = cullBox.minimum[sortAxis]; + const float boxMaxLimit = cullBox.maximum[sortAxis]; + + const PxU32* binaryMinLimit = reinterpret_cast<const PxU32*>(&boxMinLimit); + const PxU32* binaryMaxLimit = reinterpret_cast<const PxU32*>(&boxMaxLimit); + const PxU32 rayMinLimitInt = encodeFloat(binaryMinLimit[0]); + const PxU32 rayMaxLimitInt = encodeFloat(binaryMaxLimit[0]); + + for(PxU32 i=0;i<5;i++) + { + if(core.mLevel1.mCounters[i] && test(core.mLevel1.mBucketBox[i])) + { + for(PxU32 j=0;j<5;j++) + { + if(core.mLevel2[i].mCounters[j] && test(core.mLevel2[i].mBucketBox[j])) + { + for(PxU32 k=0;k<5;k++) + { + const PxU32 nbInBucket = core.mLevel3[i][j].mCounters[k]; + if(nbInBucket && test(core.mLevel3[i][j].mBucketBox[k])) + { + const PxU32 offset = core.mLevel1.mOffsets[i] + core.mLevel2[i].mOffsets[j] + core.mLevel3[i][j].mOffsets[k]; + if(!processBucket<isPrecise>(nbInBucket, core.mSortedWorldBoxes, core.mSortedObjects, + offset, core.mSortedNb, test, pcb, rayMinLimitInt, rayMaxLimitInt)) + return false; + } + } + } + } + } + } + return true; + } +}; + +/////////////////////////////////////////////////////////////////////////////// + +#ifdef CAN_USE_MOVEMASK +PX_FORCE_INLINE PxU32 BAllTrue3_R(const BoolV a) +{ + const PxI32 moveMask = _mm_movemask_ps(a); + return PxU32((moveMask & 0x7) == (0x7)); +} +#endif + +#ifdef USE_SIMD +struct SphereAABBTest_SIMD +{ + PX_FORCE_INLINE SphereAABBTest_SIMD(const Gu::Sphere& sphere) : + #ifdef CAN_USE_MOVEMASK + mCenter (V4LoadU(&sphere.center.x)), + #else + mCenter (V3LoadU(sphere.center)), + #endif + mRadius2(FLoad(sphere.radius * sphere.radius)) + {} + + PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const + { + #ifdef CAN_USE_MOVEMASK + const Vec4V boxCenter = AlignedLoad(&box.mCenter.x); + const Vec4V boxExtents = AlignedLoad(&box.mExtents.x); + // + const Vec4V offset = V4Sub(mCenter, boxCenter); + const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents); + const Vec4V d = V4Sub(offset, closest); + + const FloatV dot = V4Dot3(d,d); + return Ps::IntBool(BAllTrue3_R(FIsGrtrOrEq(mRadius2, dot))); + #else + const Vec3V boxCenter = V3LoadU(box.mCenter); + const Vec3V boxExtents = V3LoadU(box.mExtents); + // + const Vec3V offset = V3Sub(mCenter, boxCenter); + const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents); + const Vec3V d = V3Sub(offset, closest); + return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d)))); + #endif + } + + PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const + { + BucketBox tmp; + tmp.mCenter = bounds.getCenter(); + tmp.mExtents = bounds.getExtents(); + return (*this)(tmp); + } + +private: + SphereAABBTest_SIMD& operator=(const SphereAABBTest_SIMD&); + #ifdef CAN_USE_MOVEMASK + const Vec4V mCenter; + #else + const Vec3V mCenter; + #endif + const FloatV mRadius2; +}; +#else +struct SphereAABBTest_Scalar +{ + PX_FORCE_INLINE SphereAABBTest_Scalar(const Gu::Sphere& sphere) : + mCenter (sphere.center), + mRadius2(sphere.radius * sphere.radius) + {} + + PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const + { + const PxVec3 minimum = box.getMin(); + const PxVec3 maximum = box.getMax(); + + float d = 0.0f; + + //find the square of the distance + //from the sphere to the box + for(PxU32 i=0;i<3;i++) + { + if(mCenter[i]<minimum[i]) + { + const float s = mCenter[i] - minimum[i]; + d += s*s; + } + else if(mCenter[i]>maximum[i]) + { + const float s = mCenter[i] - maximum[i]; + d += s*s; + } + } + return d <= mRadius2; + } + +private: + SphereAABBTest_Scalar& operator=(const SphereAABBTest_Scalar&); + const PxVec3 mCenter; + float mRadius2; +}; +#endif + +#ifdef USE_SIMD +typedef SphereAABBTest_SIMD BucketPrunerSphereAABBTest; +#else +typedef SphereAABBTest_Scalar BucketPrunerSphereAABBTest; +#endif + +/////////////////////////////////////////////////////////////////////////////// + +struct BucketPrunerAABBAABBTest +{ + PX_FORCE_INLINE BucketPrunerAABBAABBTest(const PxBounds3& queryBox) : mBox(queryBox) {} + + PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const + { + // PT: we don't use PxBounds3::intersects() because isValid() asserts on our empty boxes! + const PxVec3 bucketMin = box.getMin(); + const PxVec3 bucketMax = box.getMax(); + return !(mBox.minimum.x > bucketMax.x || bucketMin.x > mBox.maximum.x || + mBox.minimum.y > bucketMax.y || bucketMin.y > mBox.maximum.y || + mBox.minimum.z > bucketMax.z || bucketMin.z > mBox.maximum.z); + } + + PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const + { + // PT: we don't use PxBounds3::intersects() because isValid() asserts on our empty boxes! + const PxVec3& bucketMin = bounds.minimum; + const PxVec3& bucketMax = bounds.maximum; + return !(mBox.minimum.x > bucketMax.x || bucketMin.x > mBox.maximum.x || + mBox.minimum.y > bucketMax.y || bucketMin.y > mBox.maximum.y || + mBox.minimum.z > bucketMax.z || bucketMin.z > mBox.maximum.z); + } +private: + BucketPrunerAABBAABBTest& operator=(const BucketPrunerAABBAABBTest&); + const PxBounds3 mBox; +}; + +/*struct BucketPrunerAABBAABBTest_SIMD +{ + PX_FORCE_INLINE BucketPrunerAABBAABBTest_SIMD(const PxBounds3& b) + : mCenter(V3LoadU(b.getCenter())) + , mExtents(V3LoadU(b.getExtents())) + {} + + PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const + { + return V3AllGrtrOrEq(V3Add(mExtents, AlignedLoad(&box.mExtents.x)), V3Abs(V3Sub(AlignedLoad(&box.mCenter.x), mCenter))); + } +private: + BucketPrunerAABBAABBTest_SIMD& operator=(const BucketPrunerAABBAABBTest_SIMD&); + const Vec3V mCenter, mExtents; +};*/ + +/////////////////////////////////////////////////////////////////////////////// + +#ifdef USE_SIMD +struct OBBAABBTest_SIMD +{ + OBBAABBTest_SIMD(const PxMat33& rotation, const PxVec3& translation, const PxVec3& extents) + { + const Vec3V eps = V3Load(1e-6f); + + mT = V3LoadU(translation); + mExtents = V3LoadU(extents); + + // storing the transpose matrices yields a simpler SIMD test + mRT = Mat33V_From_PxMat33(rotation.getTranspose()); + mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps)); + mBB_xyz = M33TrnspsMulV3(mART, mExtents); + +/* if(fullTest) + { + const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents); + + mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0))); + mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1))); + mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2))); + }*/ + } + + PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const + { + const Vec3V extentsV = V3LoadU(box.mExtents); + + const Vec3V t = V3Sub(mT, V3LoadU(box.mCenter)); + + // class I - axes of AABB + if(V3OutOfBounds(t, V3Add(extentsV, mBB_xyz))) + return Ps::IntFalse; + + const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2; + const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2; + + const FloatV eX = V3GetX(extentsV), eY = V3GetY(extentsV), eZ = V3GetZ(extentsV); + const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t); + + // class II - axes of OBB + { + const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX))); + const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents))); + if(V3OutOfBounds(v, v2)) + return Ps::IntFalse; + } + +// if(!fullTest) + return Ps::IntTrue; + +/* // class III - edge cross products. Almost all OBB tests early-out with type I or type II, + // so early-outs here probably aren't useful (TODO: profile) + + const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ)); + const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123)); + const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va)); + + const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX)); + const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456)); + const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb)); + + const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY)); + const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789)); + const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc)); + + return BAllEq(BOr(ba, BOr(bb,bc)), BFFFF());*/ + } + + PX_FORCE_INLINE Ps::IntBool operator()(const PxBounds3& bounds) const + { + BucketBox tmp; + tmp.mCenter = bounds.getCenter(); + tmp.mExtents = bounds.getExtents(); + return (*this)(tmp); + } + + Vec3V mExtents; // extents of OBB + Vec3V mT; // translation of OBB + Mat33V mRT; // transpose of rotation matrix of OBB + Mat33V mART; // transpose of mRT, padded by epsilon + Vec3V mBB_xyz; // extents of OBB along coordinate axes + +/* Vec3V mBB_123; // projections of extents onto edge-cross axes + Vec3V mBB_456; + Vec3V mBB_789;*/ +}; +#else +struct OBBAABBTest_Scalar +{ + OBBAABBTest_Scalar(const PxMat33& rotation, const PxVec3& translation, const PxVec3& extents) + { + mR = rotation; + mT = translation; + mExtents = extents; + + const PxVec3 eps(1e-6f); + mAR = PxMat33(mR[0].abs() + eps, mR[1].abs() + eps, mR[2].abs() + eps); // Epsilon prevents floating-point inaccuracies (strategy borrowed from RAPID) + mBB_xyz = mAR.transform(mExtents); // Precompute box-box data - Courtesy of Erwin de Vries + +/* PxReal ex = mExtents.x, ey = mExtents.y, ez = mExtents.z; + mBB_1 = ey*mAR[2].x + ez*mAR[1].x; mBB_2 = ez*mAR[0].x + ex*mAR[2].x; mBB_3 = ex*mAR[1].x + ey*mAR[0].x; + mBB_4 = ey*mAR[2].y + ez*mAR[1].y; mBB_5 = ez*mAR[0].y + ex*mAR[2].y; mBB_6 = ex*mAR[1].y + ey*mAR[0].y; + mBB_7 = ey*mAR[2].z + ez*mAR[1].z; mBB_8 = ez*mAR[0].z + ex*mAR[2].z; mBB_9 = ex*mAR[1].z + ey*mAR[0].z;*/ + } + + PX_FORCE_INLINE Ps::IntBool operator()(const BucketBox& box) const + { + const PxVec3& c = box.mCenter; + const PxVec3& e = box.mExtents; + + const PxVec3 T = mT - c; + // Class I : A's basis vectors + if(PxAbs(T.x) > e.x + mBB_xyz.x) return Ps::IntFalse; + if(PxAbs(T.y) > e.y + mBB_xyz.y) return Ps::IntFalse; + if(PxAbs(T.z) > e.z + mBB_xyz.z) return Ps::IntFalse; + + // Class II : B's basis vectors + if(PxAbs(T.dot(mR[0])) > e.dot(mAR[0]) + mExtents.x) return Ps::IntFalse; + if(PxAbs(T.dot(mR[1])) > e.dot(mAR[1]) + mExtents.y) return Ps::IntFalse; + if(PxAbs(T.dot(mR[2])) > e.dot(mAR[2]) + mExtents.z) return Ps::IntFalse; + + // Class III : 9 cross products + if(0) + { + if(PxAbs(T.z*mR[0].y - T.y*mR[0].z) > e.y*mAR[0].z + e.z*mAR[0].y + mBB_1) return Ps::IntFalse; // L = A0 x B0 + if(PxAbs(T.z*mR[1].y - T.y*mR[1].z) > e.y*mAR[1].z + e.z*mAR[1].y + mBB_2) return Ps::IntFalse; // L = A0 x B1 + if(PxAbs(T.z*mR[2].y - T.y*mR[2].z) > e.y*mAR[2].z + e.z*mAR[2].y + mBB_3) return Ps::IntFalse; // L = A0 x B2 + + if(PxAbs(T.x*mR[0].z - T.z*mR[0].x) > e.x*mAR[0].z + e.z*mAR[0].x + mBB_4) return Ps::IntFalse; // L = A1 x B0 + if(PxAbs(T.x*mR[1].z - T.z*mR[1].x) > e.x*mAR[1].z + e.z*mAR[1].x + mBB_5) return Ps::IntFalse; // L = A1 x B1 + if(PxAbs(T.x*mR[2].z - T.z*mR[2].x) > e.x*mAR[2].z + e.z*mAR[2].x + mBB_6) return Ps::IntFalse; // L = A1 x B2 + + if(PxAbs(T.y*mR[0].x - T.x*mR[0].y) > e.x*mAR[0].y + e.y*mAR[0].x + mBB_7) return Ps::IntFalse; // L = A2 x B0 + if(PxAbs(T.y*mR[1].x - T.x*mR[1].y) > e.x*mAR[1].y + e.y*mAR[1].x + mBB_8) return Ps::IntFalse; // L = A2 x B1 + if(PxAbs(T.y*mR[2].x - T.x*mR[2].y) > e.x*mAR[2].y + e.y*mAR[2].x + mBB_9) return Ps::IntFalse; // L = A2 x B2 + } + return Ps::IntTrue; + } + +private: + PxMat33 mR; // rotation matrix + PxMat33 mAR; // absolute rotation matrix + PxVec3 mT; // translation from obb space to model space + PxVec3 mExtents; + + PxVec3 mBB_xyz; + + float mBB_1, mBB_2, mBB_3; + float mBB_4, mBB_5, mBB_6; + float mBB_7, mBB_8, mBB_9; +}; +#endif + +#ifdef USE_SIMD +typedef OBBAABBTest_SIMD BucketPrunerOBBAABBTest; +#else +typedef OBBAABBTest_Scalar BucketPrunerOBBAABBTest; +#endif + +/////////////////////////////////////////////////////////////////////////////// + +PxAgain BucketPrunerCore::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const +{ + PX_ASSERT(!mDirty); + PxAgain again = true; + + const PxBounds3& cullBox = queryVolume.getPrunerInflatedWorldAABB(); + + switch(queryVolume.getType()) + { + case PxGeometryType::eBOX: + { + if(queryVolume.isOBB()) + { + const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap; + again = overlap(*this, + BucketPrunerOBBAABBTest( + queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(), + queryVolume.getPrunerBoxGeomExtentsInflated()), + pcb, cullBox); + } + else + { + const BucketPrunerOverlapTraversal<BucketPrunerAABBAABBTest, true> overlap; + again = overlap(*this, BucketPrunerAABBAABBTest(cullBox), pcb, cullBox); + } + } + break; + + case PxGeometryType::eCAPSULE: + { + const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap; + again = overlap(*this, + BucketPrunerOBBAABBTest( + queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(), + queryVolume.getPrunerBoxGeomExtentsInflated()), + pcb, cullBox); + } + break; + + case PxGeometryType::eSPHERE: + { + const Sphere& sphere = queryVolume.getGuSphere(); + const PxVec3 sphereExtents(sphere.radius); + const BucketPrunerOverlapTraversal<BucketPrunerSphereAABBTest, true> overlap; + again = overlap(*this, BucketPrunerSphereAABBTest(sphere), pcb, cullBox); + } + break; + + case PxGeometryType::eCONVEXMESH: + { + const BucketPrunerOverlapTraversal<BucketPrunerOBBAABBTest, false> overlap; + again = overlap(*this, + BucketPrunerOBBAABBTest( + queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerWorldPos(), + queryVolume.getPrunerBoxGeomExtentsInflated()), + pcb, cullBox); + } + break; + + case PxGeometryType::ePLANE: + case PxGeometryType::eTRIANGLEMESH: + case PxGeometryType::eHEIGHTFIELD: + case PxGeometryType::eGEOMETRY_COUNT: + case PxGeometryType::eINVALID: + PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type"); + } + return again; +} + +/////////////////////////////////////////////////////////////////////////////// + +void BucketPrunerCore::shiftOrigin(const PxVec3& shift) +{ + for(PxU32 i=0;i<mNbFree;i++) + { + mFreeBounds[i].minimum -= shift; + mFreeBounds[i].maximum -= shift; + } + + const PxU32 nb = mCoreNbObjects; + //if (nb) + { + mGlobalBox.mCenter -= shift; + + #ifdef _DEBUG + mGlobalBox.mDebugMin -= shift[mSortAxis]; + #endif + + encodeBoxMinMax(mGlobalBox, mSortAxis); + + for(PxU32 i=0; i < nb; i++) + { + mCoreBoxes[i].minimum -= shift; + mCoreBoxes[i].maximum -= shift; + } + + for(PxU32 i=0; i < mSortedNb; i++) + { + mSortedWorldBoxes[i].mCenter -= shift; + + #ifdef _DEBUG + mSortedWorldBoxes[i].mDebugMin -= shift[mSortAxis]; + #endif + encodeBoxMinMax(mSortedWorldBoxes[i], mSortAxis); + } + + for(PxU32 i=0; i < 5; i++) + mLevel1.mBucketBox[i].mCenter -= shift; + + for(PxU32 i=0; i < 5; i++) + for(PxU32 j=0; j < 5; j++) + mLevel2[i].mBucketBox[j].mCenter -= shift; + + for(PxU32 i=0; i < 5; i++) + for(PxU32 j=0; j < 5; j++) + for(PxU32 k=0; k < 5; k++) + mLevel3[i][j].mBucketBox[k].mCenter -= shift; + } +} + +/////////////////////////////////////////////////////////////////////////////// + +static void visualize(Cm::RenderOutput& out, const BucketBox& bounds) +{ + out << Cm::DebugBox(PxBounds3(bounds.getMin(), bounds.getMax()), true); +} + +void BucketPrunerCore::visualize(Cm::RenderOutput& out, PxU32 color) const +{ + const PxTransform idt = PxTransform(PxIdentity); + out << idt; + out << color; + + ::visualize(out, mGlobalBox); + + for(PxU32 i=0;i<5;i++) + { + if(!mLevel1.mCounters[i]) + continue; + + ::visualize(out, mLevel1.mBucketBox[i]); + + for(PxU32 j=0;j<5;j++) + { + if(!mLevel2[i].mCounters[j]) + continue; + + ::visualize(out, mLevel2[i].mBucketBox[j]); + + for(PxU32 k=0;k<5;k++) + { + if(!mLevel3[i][j].mCounters[k]) + continue; + + ::visualize(out, mLevel3[i][j].mBucketBox[k]); + } + } + } +} + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +BucketPruner::BucketPruner() +{ +} + +BucketPruner::~BucketPruner() +{ +} + +bool BucketPruner::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool) +{ + if(!count) + return true; + + const PxU32 valid = mPool.addObjects(results, bounds, payload, count); + mCore.mDirty = true; + + mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects()); + + return valid == count; +} + +void BucketPruner::removeObjects(const PrunerHandle* handles, PxU32 count) +{ + if(!count) + return; + + for(PxU32 i=0;i<count;i++) + mPool.removeObject(handles[i]); + + mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects()); + mCore.mDirty = true; +} + +void BucketPruner::updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count) +{ + if(!count) + return; + + if(newBounds) + { + for(PxU32 i=0;i<count;i++) + mPool.setWorldAABB(handles[i], newBounds[i]); + } + + mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects()); + mCore.mDirty = true; +} + +void BucketPruner::updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count) +{ + mPool.updateObjects(handles, indices, newBounds, count); + mCore.setExternalMemory(mPool.getNbActiveObjects(), mPool.getCurrentWorldBoxes(), mPool.getObjects()); + mCore.mDirty = true; +} + +void BucketPruner::commit() +{ + mCore.build(); +} + +void BucketPruner::shiftOrigin(const PxVec3& shift) +{ + mCore.shiftOrigin(shift); +} + +PxAgain BucketPruner::sweep(const ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const +{ + PX_ASSERT(!mCore.mDirty); + if(mCore.mDirty) + return true; // it may crash otherwise + return mCore.sweep(queryVolume, unitDir, inOutDistance, pcb); +} + +PxAgain BucketPruner::overlap(const ShapeData& queryVolume, PrunerCallback& pcb) const +{ + PX_ASSERT(!mCore.mDirty); + if(mCore.mDirty) + return true; // it may crash otherwise + return mCore.overlap(queryVolume, pcb); +} + +PxAgain BucketPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& pcb) const +{ + PX_ASSERT(!mCore.mDirty); + if(mCore.mDirty) + return true; // it may crash otherwise + return mCore.raycast(origin, unitDir, inOutDistance, pcb); +} + +void BucketPruner::visualize(Cm::RenderOutput& out, PxU32 color) const +{ + mCore.visualize(out, color); +} + + +#define MBP_ALLOC(x) PX_ALLOC(x, "BucketPruner") +#define MBP_ALLOC_TMP(x) PX_ALLOC_TEMP(x, "BucketPruner") +#define MBP_FREE(x) if(x) PX_FREE_AND_RESET(x) +#define DELETESINGLE(x) if (x) { delete x; x = NULL; } +#define DELETEARRAY(x) if (x) { delete []x; x = NULL; } +#define INVALID_ID 0xffffffff + +#ifndef USE_REGULAR_HASH_MAP +static PX_FORCE_INLINE bool differentPair(const BucketPrunerPair& p, const PrunerPayload& payload) +{ + const bool same = p.mPayload == payload; + return !same; +} + +/////////////////////////////////////////////////////////////////////////////// + +BucketPrunerMap::BucketPrunerMap() : + mHashSize (0), + mMask (0), + mNbActivePairs (0), + mHashTable (NULL), + mNext (NULL), + mActivePairs (NULL), + mReservedMemory (0) +{ +} + +/////////////////////////////////////////////////////////////////////////////// + +BucketPrunerMap::~BucketPrunerMap() +{ + purge(); +} + +/////////////////////////////////////////////////////////////////////////////// + +void BucketPrunerMap::purge() +{ + MBP_FREE(mNext); + MBP_FREE(mActivePairs); + MBP_FREE(mHashTable); + mHashSize = 0; + mMask = 0; + mNbActivePairs = 0; +} + +/////////////////////////////////////////////////////////////////////////////// + +const BucketPrunerPair* BucketPrunerMap::findPair(const PrunerPayload& payload) const +{ + if(!mHashTable) + return NULL; // Nothing has been allocated yet + + // Compute hash value for this pair + const PxU32 hashValue = hash(payload) & mMask; + + const BucketPrunerPair* PX_RESTRICT activePairs = mActivePairs; + const PxU32* PX_RESTRICT next = mNext; + + // Look for it in the table + PxU32 offset = mHashTable[hashValue]; + while(offset!=INVALID_ID && differentPair(activePairs[offset], payload)) + { + offset = next[offset]; // Better to have a separate array for this + } + if(offset==INVALID_ID) + return NULL; + PX_ASSERT(offset<mNbActivePairs); + // Match mActivePairs[offset] => the pair is persistent + return &activePairs[offset]; +} + +// Internal version saving hash computation +PX_FORCE_INLINE BucketPrunerPair* BucketPrunerMap::findPair(const PrunerPayload& payload, PxU32 hashValue) const +{ + if(!mHashTable) + return NULL; // Nothing has been allocated yet + + BucketPrunerPair* PX_RESTRICT activePairs = mActivePairs; + const PxU32* PX_RESTRICT next = mNext; + + // Look for it in the table + PxU32 offset = mHashTable[hashValue]; + while(offset!=INVALID_ID && differentPair(activePairs[offset], payload)) + { + offset = next[offset]; // Better to have a separate array for this + } + if(offset==INVALID_ID) + return NULL; + PX_ASSERT(offset<mNbActivePairs); + // Match mActivePairs[offset] => the pair is persistent + return &activePairs[offset]; +} + +/////////////////////////////////////////////////////////////////////////////// + +BucketPrunerPair* BucketPrunerMap::addPair(const PrunerPayload& payload, PxU32 coreIndex, PxU32 timeStamp) +{ + PxU32 hashValue = hash(payload) & mMask; + + { + BucketPrunerPair* PX_RESTRICT p = findPair(payload, hashValue); + if(p) + { + PX_ASSERT(p->mCoreIndex==coreIndex); + PX_ASSERT(p->mTimeStamp==timeStamp); + return p; // Persistent pair + } + } + + // This is a new pair + if(mNbActivePairs >= mHashSize) + { + // Get more entries + mHashSize = Ps::nextPowerOfTwo(mNbActivePairs+1); + mMask = mHashSize-1; + + reallocPairs(); + + // Recompute hash value with new hash size + hashValue = hash(payload) & mMask; // ### redundant hash computation here? + } + + BucketPrunerPair* PX_RESTRICT p = &mActivePairs[mNbActivePairs]; + p->mPayload = payload; + p->mCoreIndex = coreIndex; + p->mTimeStamp = timeStamp; + mNext[mNbActivePairs] = mHashTable[hashValue]; + mHashTable[hashValue] = mNbActivePairs++; + return p; +} + +/////////////////////////////////////////////////////////////////////////////// + +void BucketPrunerMap::removePairInternal(const PrunerPayload& /*payload*/, PxU32 hashValue, PxU32 pairIndex) +{ + // Walk the hash table to fix mNext + { + PxU32 offset = mHashTable[hashValue]; + PX_ASSERT(offset!=INVALID_ID); + + PxU32 previous=INVALID_ID; + while(offset!=pairIndex) + { + previous = offset; + offset = mNext[offset]; + } + + // Let us go/jump us + if(previous!=INVALID_ID) + { + PX_ASSERT(mNext[previous]==pairIndex); + mNext[previous] = mNext[pairIndex]; + } + // else we were the first + else mHashTable[hashValue] = mNext[pairIndex]; + // we're now free to reuse mNext[pairIndex] without breaking the list + } +#if PX_DEBUG + mNext[pairIndex]=INVALID_ID; +#endif + // Invalidate entry + + // Fill holes + if(1) + { + // 1) Remove last pair + const PxU32 lastPairIndex = mNbActivePairs-1; + if(lastPairIndex==pairIndex) + { + mNbActivePairs--; + } + else + { + const BucketPrunerPair* last = &mActivePairs[lastPairIndex]; + const PxU32 lastHashValue = hash(last->mPayload) & mMask; + + // Walk the hash table to fix mNext + PxU32 offset = mHashTable[lastHashValue]; + PX_ASSERT(offset!=INVALID_ID); + + PxU32 previous=INVALID_ID; + while(offset!=lastPairIndex) + { + previous = offset; + offset = mNext[offset]; + } + + // Let us go/jump us + if(previous!=INVALID_ID) + { + PX_ASSERT(mNext[previous]==lastPairIndex); + mNext[previous] = mNext[lastPairIndex]; + } + // else we were the first + else mHashTable[lastHashValue] = mNext[lastPairIndex]; + // we're now free to reuse mNext[lastPairIndex] without breaking the list + +#if PX_DEBUG + mNext[lastPairIndex]=INVALID_ID; +#endif + + // Don't invalidate entry since we're going to shrink the array + + // 2) Re-insert in free slot + mActivePairs[pairIndex] = mActivePairs[lastPairIndex]; +#if PX_DEBUG + PX_ASSERT(mNext[pairIndex]==INVALID_ID); +#endif + mNext[pairIndex] = mHashTable[lastHashValue]; + mHashTable[lastHashValue] = pairIndex; + + mNbActivePairs--; + } + } +} + +/////////////////////////////////////////////////////////////////////////////// + +bool BucketPrunerMap::removePair(const PrunerPayload& payload, PxU32& coreIndex, PxU32& timeStamp) +{ + const PxU32 hashValue = hash(payload) & mMask; + const BucketPrunerPair* p = findPair(payload, hashValue); + if(!p) + return false; + PX_ASSERT(p->mPayload==payload); + + coreIndex = p->mCoreIndex; + timeStamp = p->mTimeStamp; + + removePairInternal(payload, hashValue, getPairIndex(p)); + + shrinkMemory(); + return true; +} + +/////////////////////////////////////////////////////////////////////////////// + +void BucketPrunerMap::shrinkMemory() +{ + // Check correct memory against actually used memory + const PxU32 correctHashSize = Ps::nextPowerOfTwo(mNbActivePairs); + if(mHashSize==correctHashSize) + return; + + if(mReservedMemory && correctHashSize < mReservedMemory) + return; + + // Reduce memory used + mHashSize = correctHashSize; + mMask = mHashSize-1; + + reallocPairs(); +} + +/////////////////////////////////////////////////////////////////////////////// + + static PX_FORCE_INLINE void storeDwords(PxU32* dest, PxU32 nb, PxU32 value) + { + while(nb--) + *dest++ = value; + } + +void BucketPrunerMap::reallocPairs() +{ + MBP_FREE(mHashTable); + mHashTable = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize*sizeof(PxU32))); + storeDwords(mHashTable, mHashSize, INVALID_ID); + + // Get some bytes for new entries + BucketPrunerPair* newPairs = reinterpret_cast<BucketPrunerPair*>(MBP_ALLOC(mHashSize * sizeof(BucketPrunerPair))); + PX_ASSERT(newPairs); + + PxU32* newNext = reinterpret_cast<PxU32*>(MBP_ALLOC(mHashSize * sizeof(PxU32))); + PX_ASSERT(newNext); + + // Copy old data if needed + if(mNbActivePairs) + PxMemCopy(newPairs, mActivePairs, mNbActivePairs*sizeof(BucketPrunerPair)); + // ### check it's actually needed... probably only for pairs whose hash value was cut by the and + // yeah, since hash(id0, id1) is a constant + // However it might not be needed to recompute them => only less efficient but still ok + for(PxU32 i=0;i<mNbActivePairs;i++) + { + const PxU32 hashValue = hash(mActivePairs[i].mPayload) & mMask; // New hash value with new mask + newNext[i] = mHashTable[hashValue]; + mHashTable[hashValue] = i; + } + + // Delete old data + MBP_FREE(mNext); + MBP_FREE(mActivePairs); + + // Assign new pointer + mActivePairs = newPairs; + mNext = newNext; +} + +/////////////////////////////////////////////////////////////////////////////// + +void BucketPrunerMap::reserveMemory(PxU32 memSize) +{ + if(!memSize) + return; + + if(!Ps::isPowerOfTwo(memSize)) + memSize = Ps::nextPowerOfTwo(memSize); + + mHashSize = memSize; + mMask = mHashSize-1; + + mReservedMemory = memSize; + + reallocPairs(); +} + +/////////////////////////////////////////////////////////////////////////////// +#endif diff --git a/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h new file mode 100644 index 00000000..dec62ccd --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqBucketPruner.h @@ -0,0 +1,279 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_BUCKETPRUNER_H +#define SQ_BUCKETPRUNER_H + +#include "SqTypedef.h" +#include "SqPruningPool.h" +#include "PsHash.h" + +#define FREE_PRUNER_SIZE 16 +//#define USE_REGULAR_HASH_MAP +#ifdef USE_REGULAR_HASH_MAP + #include "PsHashMap.h" +#endif + +namespace physx +{ +namespace Sq +{ + typedef PxU32 BucketWord; + +#if PX_VC + #pragma warning(push) + #pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + + PX_ALIGN_PREFIX(16) struct BucketBox + { + PxVec3 mCenter; + PxU32 mData0; // Integer-encoded min value along sorting axis + PxVec3 mExtents; + PxU32 mData1; // Integer-encoded max value along sorting axis + + #ifdef _DEBUG + // PT: we need the original min value for debug checks. Using the center/extents version + // fails because recomputing the min from them introduces FPU accuracy errors in the values. + float mDebugMin; + #endif + + PX_FORCE_INLINE PxVec3 getMin() const + { + return mCenter - mExtents; + } + + PX_FORCE_INLINE PxVec3 getMax() const + { + return mCenter + mExtents; + } + + PX_FORCE_INLINE void setEmpty() + { + mCenter = PxVec3(0.0f); + mExtents = PxVec3(-PX_MAX_BOUNDS_EXTENTS); + + #ifdef _DEBUG + mDebugMin = PX_MAX_BOUNDS_EXTENTS; + #endif + } + }PX_ALIGN_SUFFIX(16); + + PX_ALIGN_PREFIX(16) struct BucketPrunerNode + { + BucketPrunerNode(); + + void classifyBoxes( float limitX, float limitZ, + PxU32 nb, + BucketBox* PX_RESTRICT boxes, + const PrunerPayload* PX_RESTRICT objects, + BucketBox* PX_RESTRICT sortedBoxes, + PrunerPayload* PX_RESTRICT sortedObjects, + bool isCrossBucket, PxU32 sortAxis); + + PX_FORCE_INLINE void initCounters() + { + for(PxU32 i=0;i<5;i++) + mCounters[i] = 0; + for(PxU32 i=0;i<5;i++) + mOffsets[i] = 0; + } + + BucketWord mCounters[5]; // Number of objects in each of the 5 children + BucketWord mOffsets[5]; // Start index of objects for each of the 5 children + BucketBox mBucketBox[5]; // AABBs around objects for each of the 5 children + PxU16 mOrder[8]; // PNS: 5 children => 3 bits/index => 3*5=15 bits total, for each of the 8 canonical directions + }PX_ALIGN_SUFFIX(16); + + PX_FORCE_INLINE PxU32 hash(const PrunerPayload& payload) + { +#if PX_P64_FAMILY +// const PxU32 h0 = Ps::hash((const void*)payload.data[0]); +// const PxU32 h1 = Ps::hash((const void*)payload.data[1]); + const PxU32 h0 = PxU32(PX_MAX_U32 & payload.data[0]); + const PxU32 h1 = PxU32(PX_MAX_U32 & payload.data[1]); + return Ps::hash(PxU64(h0)|(PxU64(h1)<<32)); +#else + return Ps::hash(PxU64(payload.data[0])|(PxU64(payload.data[1])<<32)); +#endif + } + +#ifdef USE_REGULAR_HASH_MAP + struct BucketPrunerPair : public Ps::UserAllocated + { + PX_FORCE_INLINE BucketPrunerPair() {} + PX_FORCE_INLINE BucketPrunerPair(PxU32 index, PxU32 stamp) : mCoreIndex(index), mTimeStamp(stamp) {} + PxU32 mCoreIndex; // index in mCoreObjects + PxU32 mTimeStamp; + }; + typedef Ps::HashMap<PrunerPayload, BucketPrunerPair> BucketPrunerMap; +#else + struct BucketPrunerPair : public Ps::UserAllocated + { + PrunerPayload mPayload; + PxU32 mCoreIndex; // index in mCoreObjects + PxU32 mTimeStamp; + }; + + // Custom hash-map - currently faster than the regular hash-map (Ps::HashMap), in particular for 'find-and-erase' operations. + class BucketPrunerMap : public Ps::UserAllocated + { + public: + BucketPrunerMap(); + ~BucketPrunerMap(); + + void purge(); + void shrinkMemory(); + + BucketPrunerPair* addPair (const PrunerPayload& payload, PxU32 coreIndex, PxU32 timeStamp); + bool removePair (const PrunerPayload& payload, PxU32& coreIndex, PxU32& timeStamp); + const BucketPrunerPair* findPair (const PrunerPayload& payload) const; + PX_FORCE_INLINE PxU32 getPairIndex (const BucketPrunerPair* pair) const + { + return (PxU32((size_t(pair) - size_t(mActivePairs)))/sizeof(BucketPrunerPair)); + } + + PxU32 mHashSize; + PxU32 mMask; + PxU32 mNbActivePairs; + PxU32* mHashTable; + PxU32* mNext; + BucketPrunerPair* mActivePairs; + PxU32 mReservedMemory; + + PX_FORCE_INLINE BucketPrunerPair* findPair(const PrunerPayload& payload, PxU32 hashValue) const; + void removePairInternal(const PrunerPayload& payload, PxU32 hashValue, PxU32 pairIndex); + void reallocPairs(); + void reserveMemory(PxU32 memSize); + }; +#endif + + class BucketPrunerCore : public Ps::UserAllocated + { + public: + BucketPrunerCore(bool externalMemory=true); + ~BucketPrunerCore(); + + void release(); + + void setExternalMemory(PxU32 nbObjects, PxBounds3* boxes, PrunerPayload* objects); + + bool addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp=0); + bool removeObject(const PrunerPayload& object, PxU32& timeStamp); + bool updateObject(const PxBounds3& worldAABB, const PrunerPayload& object); + + // PT: look for objects marked with input timestamp everywhere in the structure, and remove them. This is the same + // as calling 'removeObject' individually for all these objects, but much more efficient. Returns number of removed objects. + PxU32 removeMarkedObjects(PxU32 timeStamp); + + PxAgain raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const; + PxAgain overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const; + PxAgain sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const; + + void shiftOrigin(const PxVec3& shift); + + void visualize(Cm::RenderOutput& out, PxU32 color) const; + + PX_FORCE_INLINE void build() { classifyBoxes(); } + + PX_FORCE_INLINE PxU32 getNbObjects() const { return mNbFree + mCoreNbObjects; } + +// private: + PxU32 mCoreNbObjects; // Current number of objects in core arrays + PxU32 mCoreCapacity; // Capacity of core arrays + PxBounds3* mCoreBoxes; // Core array + PrunerPayload* mCoreObjects; // Core array + PxU32* mCoreRemap; // Remaps core index to sorted index, i.e. sortedIndex = mCoreRemap[coreIndex] + + BucketBox* mSortedWorldBoxes; // Sorted array + PrunerPayload* mSortedObjects; // Sorted array + + PxU32 mNbFree; // Current number of objects in the "free array" (mFreeObjects/mFreeBounds) + PrunerPayload mFreeObjects[FREE_PRUNER_SIZE]; // mNbFree objects are stored here + PxBounds3 mFreeBounds[FREE_PRUNER_SIZE]; // mNbFree object bounds are stored here + PxU32 mFreeStamps[FREE_PRUNER_SIZE]; + + BucketPrunerMap mMap; // Maps (PrunerPayload) object to corresponding index in core array. + // Objects in the free array do not appear in this map. + PxU32 mSortedNb; + PxU32 mSortedCapacity; + PxU32 mSortAxis; + + BucketBox mGlobalBox; // Global bounds around all objects in the structure (except the ones in the "free" array) + BucketPrunerNode mLevel1; + BucketPrunerNode mLevel2[5]; + BucketPrunerNode mLevel3[5][5]; + + bool mDirty; + bool mOwnMemory; + private: + void classifyBoxes(); + void allocateSortedMemory(PxU32 nb); + void resizeCore(); + PX_FORCE_INLINE void addObjectInternal(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp); + }; + +#if PX_VC + #pragma warning(pop) +#endif + + class BucketPruner : public Pruner + { + public: + BucketPruner(); + virtual ~BucketPruner(); + + // Pruner + virtual bool addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count, bool); + virtual void removeObjects(const PrunerHandle* handles, PxU32 count); + virtual void updateObjects(const PrunerHandle* handles, const PxBounds3* newBounds, PxU32 count); + virtual void updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count = 1); + virtual void commit(); + virtual PxAgain raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const; + virtual PxAgain overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const; + virtual PxAgain sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const; + virtual const PrunerPayload& getPayload(PrunerHandle handle) const { return mPool.getPayload(handle); } + virtual const PrunerPayload& getPayload(PrunerHandle handle, PxBounds3*& bounds) const { return mPool.getPayload(handle, bounds); } + virtual void preallocate(PxU32 entries) { mPool.preallocate(entries); } + virtual void shiftOrigin(const PxVec3& shift); + virtual void visualize(Cm::RenderOutput& out, PxU32 color) const; + // merge not implemented for bucket pruner + virtual void merge(const void* ) {} + //~Pruner + + private: + BucketPrunerCore mCore; + PruningPool mPool; + }; + +} // namespace Sq + +} + +#endif // SQ_BUCKETPRUNER_H diff --git a/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp new file mode 100644 index 00000000..748817cb --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.cpp @@ -0,0 +1,887 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "SqExtendedBucketPruner.h" +#include "SqAABBTree.h" +#include "SqPrunerMergeData.h" +#include "SqAABBTreeQuery.h" +#include "GuBounds.h" +#include "CmBitMap.h" + +using namespace physx; +using namespace Sq; +using namespace Gu; +using namespace Ps; + +#define NB_OBJECTS_PER_NODE 4 + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Constructor, preallocate trees, bounds +ExtendedBucketPruner::ExtendedBucketPruner(const PruningPool* pool) + : mBucketCore(false), mPruningPool(pool), mMainTree(NULL), mBounds(NULL), mMergedTrees(NULL), + mCurrentTreeIndex(0), mTreesDirty(false) +{ + // preallocated size for bounds, trees + mCurrentTreeCapacity = 32; + + mBounds = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*mCurrentTreeCapacity, "Bounds")); + mMergedTrees = reinterpret_cast<MergedTree*>(PX_ALLOC(sizeof(MergedTree)*mCurrentTreeCapacity, "AABB trees")); + mExtendedBucketPrunerMap.reserve(mCurrentTreeCapacity); + + // create empty main tree + mMainTree = PX_NEW(AABBTree); + + // create empty merge trees + for (PxU32 i = 0; i < mCurrentTreeCapacity; i++) + { + mMergedTrees[i].mTimeStamp = 0; + mMergedTrees[i].mTree = PX_NEW(AABBTree); + } +} + +////////////////////////////////////////////////////////////////////////// + +ExtendedBucketPruner::~ExtendedBucketPruner() +{ + // release main tree + if (mMainTree) + { + PX_DELETE_AND_RESET(mMainTree); + } + + // release merged trees + for (PxU32 i = 0; i < mCurrentTreeCapacity; i++) + { + AABBTree* aabbTree = mMergedTrees[i].mTree; + PX_DELETE(aabbTree); + } + + PX_FREE(mBounds); + PX_FREE(mMergedTrees); +} + +////////////////////////////////////////////////////////////////////////// +// release all objects in bucket pruner +void ExtendedBucketPruner::release() +{ + // release core bucket pruner + mBucketCore.release(); + + mMainTreeUpdateMap.release(); + mMergeTreeUpdateMap.release(); + + // release all objecs from the map + mExtendedBucketPrunerMap.clear(); + + // release all merged trees + for (PxU32 i = 0; i < mCurrentTreeCapacity; i++) + { + mMergedTrees[i].mTimeStamp = 0; + mMergedTrees[i].mTree->release(); + } + + // reset current tree index + mCurrentTreeIndex = 0; +} + +////////////////////////////////////////////////////////////////////////// +// Add a tree from a pruning structure +// 1. get new tree index +// 2. initialize merged tree, bounds +// 3. create update map for the merged tree +// 4. build new tree of trees from given trees bounds +// 5. add new objects into extended bucket pruner map +// 6. shift indices in the merged tree +void ExtendedBucketPruner::addTree(const AABBTreeMergeData& mergeData, PxU32 timeStamp) +{ + // check if we have to resize + if(mCurrentTreeIndex == mCurrentTreeCapacity) + { + resize(mCurrentTreeCapacity*2); + } + + // get current merge tree index + const PxU32 mergeTreeIndex = mCurrentTreeIndex++; + + // get payloads pointers - the pointers start at mIndicesOffset, thats where all + // objects were added before merge was called + const PrunerPayload* payloads = &mPruningPool->getObjects()[mergeData.mIndicesOffset]; + + // setup merged tree with the merge data and timestamp + mMergedTrees[mergeTreeIndex].mTimeStamp = timeStamp; + AABBTree& mergedTree = *mMergedTrees[mergeTreeIndex].mTree; + mergedTree.initTree(mergeData); + // set bounds + mBounds[mergeTreeIndex] = mergeData.getRootNode().mBV; + + // update temporally update map for the current merge tree, map is used to setup the base extended bucket pruner map + mMergeTreeUpdateMap.initMap(mergeData.mNbIndices, mergedTree); + + // create new base tree of trees + buildMainAABBTree(); + + // Add each object into extended bucket pruner hash map + for (PxU32 i = 0; i < mergeData.mNbIndices; i++) + { + ExtendedBucketPrunerData mapData; + mapData.mMergeIndex = mergeTreeIndex; + mapData.mTimeStamp = timeStamp; + PX_ASSERT(mMergeTreeUpdateMap[i] < mergedTree.getNbNodes()); + // get node information from the merge tree update map + mapData.mSubTreeNode = mMergeTreeUpdateMap[i]; + mExtendedBucketPrunerMap.insert(payloads[i], mapData); + } + // merged tree indices needs to be shifted now, we cannot shift it in init - the update map + // could not be constructed otherwise, as the indices wont start from 0. The indices + // needs to be shifted by offset from the pruning pool, where the new objects were added into the pruning pool. + mergedTree.shiftIndices(mergeData.mIndicesOffset); + +#if PX_DEBUG + checkValidity(); +#endif // PX_DEBUG +} + +////////////////////////////////////////////////////////////////////////// +// Builds the new main AABB tree with given current active merged trees and its bounds +void ExtendedBucketPruner::buildMainAABBTree() +{ + // create the AABB tree from given merged trees bounds + AABBTreeBuildParams sTB; + sTB.mNbPrimitives = mCurrentTreeIndex; + sTB.mAABBArray = mBounds; + sTB.mLimit = NB_OBJECTS_PER_NODE; + bool status = mMainTree->build(sTB); + + PX_UNUSED(status); + PX_ASSERT(status); + + // Init main tree update map for the new main tree + mMainTreeUpdateMap.initMap(mCurrentTreeIndex, *mMainTree); +} + +////////////////////////////////////////////////////////////////////////// +// resize internal memory, buffers +void ExtendedBucketPruner::resize(PxU32 size) +{ + PX_ASSERT(size > mCurrentTreeCapacity); + // allocate new bounds + PxBounds3* newBounds = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*size, "Bounds")); + // copy previous bounds + PxMemCopy(newBounds, mBounds, sizeof(PxBounds3)*mCurrentTreeCapacity); + PX_FREE(mBounds); + mBounds = newBounds; + + // allocate new merged trees + MergedTree* newMergeTrees = reinterpret_cast<MergedTree*>(PX_ALLOC(sizeof(MergedTree)*size, "AABB trees")); + // copy previous merged trees + PxMemCopy(newMergeTrees, mMergedTrees, sizeof(MergedTree)*mCurrentTreeCapacity); + PX_FREE(mMergedTrees); + mMergedTrees = newMergeTrees; + // allocate new trees for merged trees + for (PxU32 i = mCurrentTreeCapacity; i < size; i++) + { + mMergedTrees[i].mTimeStamp = 0; + mMergedTrees[i].mTree = PX_NEW(AABBTree); + } + + mCurrentTreeCapacity = size; +} + +////////////////////////////////////////////////////////////////////////// +// Update object +bool ExtendedBucketPruner::updateObject(const PxBounds3& worldAABB, const PrunerPayload& object) +{ + const ExtendedBucketPrunerMap::Entry* extendedPrunerEntry = mExtendedBucketPrunerMap.find(object); + + // if object is not in tree of trees, it is in bucket pruner core + if(!extendedPrunerEntry) + { + return mBucketCore.updateObject(worldAABB, object); + } + else + { + const ExtendedBucketPrunerData& data = extendedPrunerEntry->second; + + PX_ASSERT(data.mMergeIndex < mCurrentTreeIndex); + + // update tree where objects belongs to + AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree; + PX_ASSERT(data.mSubTreeNode < tree.getNbNodes()); + // mark for refit node in merged tree + tree.markNodeForRefit(data.mSubTreeNode); + PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes()); + // mark for refit node in main aabb tree + mMainTree->markNodeForRefit(mMainTreeUpdateMap[data.mMergeIndex]); + mTreesDirty = true; + } + return true; +} + +////////////////////////////////////////////////////////////////////////// +// refit merged nodes +// 1. refit nodes in merged trees +// 2. check if after refit root node is valid - might happen edge case +// where all objects were released - the root node is then invalid +// in this edge case we need to compact the merged trees array +// and create new main AABB tree +// 3. If all merged trees bounds are valid - refit main tree +// 4. If bounds are invalid create new main AABB tree +void ExtendedBucketPruner::refitMarkedNodes(const PxBounds3* boxes) +{ + // if no tree needs update early exit + if(!mTreesDirty) + return; + + // refit trees and update bounds for main tree + PxU32 nbValidTrees = 0; + for (PxU32 i = mCurrentTreeIndex; i--; ) + { + AABBTree& tree = *mMergedTrees[i].mTree; + tree.refitMarkedNodes(boxes); + const PxBounds3& bounds = tree.getNodes()[0].mBV; + // check if bounds are valid, if all objects of the tree were released, the bounds + // will be invalid, in that case we cannot use this tree anymore. + if(bounds.isValid()) + { + nbValidTrees++; + } + mBounds[i] = bounds; + } + + if(nbValidTrees == mCurrentTreeIndex) + { + // no tree has been removed refit main tree + mMainTree->refitMarkedNodes(mBounds); + } + else + { + // edge case path, tree does not have a valid root node bounds - all objects from the tree were released + // we might even fire perf warning + // compact the tree array - no holes in the array, remember the swap position + PxU32* swapMap = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mCurrentTreeIndex, "Swap Map")); + PxU32 writeIndex = 0; + for (PxU32 i = 0; i < mCurrentTreeIndex; i++) + { + AABBTree& tree = *mMergedTrees[i].mTree; + if(tree.getNodes()[0].mBV.isValid()) + { + // we have to store the tree into an empty location + if(i != writeIndex) + { + PX_ASSERT(writeIndex < i); + AABBTree* ptr = mMergedTrees[writeIndex].mTree; + mMergedTrees[writeIndex] = mMergedTrees[i]; + mMergedTrees[i].mTree = ptr; + mBounds[writeIndex] = mBounds[i]; + } + // remember the swap location + swapMap[i] = writeIndex; + writeIndex++; + } + else + { + // tree is not valid, release it + tree.release(); + mMergedTrees[i].mTimeStamp = 0; + } + + // remember the swap + swapMap[mCurrentTreeIndex] = i; + } + + PX_ASSERT(writeIndex == nbValidTrees); + + // new merged trees size + mCurrentTreeIndex = nbValidTrees; + + // trees have changed, we need to rebuild the main tree + buildMainAABBTree(); + + // fixup the object entries, the merge index has changed + for (ExtendedBucketPrunerMap::Iterator iter = mExtendedBucketPrunerMap.getIterator(); !iter.done(); ++iter) + { + ExtendedBucketPrunerData& data = iter->second; + PX_ASSERT(swapMap[data.mMergeIndex] < nbValidTrees); + data.mMergeIndex = swapMap[data.mMergeIndex]; + } + PX_FREE(swapMap); + } +#if PX_DEBUG + checkValidity(); +#endif + mTreesDirty = false; +} + +////////////////////////////////////////////////////////////////////////// +// remove object +bool ExtendedBucketPruner::removeObject(const PrunerPayload& object, PxU32 objectIndex, const PrunerPayload& swapObject, + PxU32 swapObjectIndex, PxU32& timeStamp) +{ + ExtendedBucketPrunerMap::Entry dataEntry; + + // if object is not in tree of trees, it is in bucket pruner core + if (!mExtendedBucketPrunerMap.erase(object, dataEntry)) + { + // we need to call invalidateObjects, it might happen that the swapped object + // does belong to the extended bucket pruner, in that case the objects index + // needs to be swapped. + swapIndex(objectIndex, swapObject, swapObjectIndex); + return mBucketCore.removeObject(object, timeStamp); + } + else + { + const ExtendedBucketPrunerData& data = dataEntry.second; + + // mark tree nodes where objects belongs to + AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree; + PX_ASSERT(data.mSubTreeNode < tree.getNbNodes()); + // mark the merged tree for refit + tree.markNodeForRefit(data.mSubTreeNode); + PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes()); + // mark the main tree for refit + mMainTree->markNodeForRefit(mMainTreeUpdateMap[data.mMergeIndex]); + + // call invalidate object to swap the object indices in the merged trees + invalidateObject(data, objectIndex, swapObject, swapObjectIndex); + + mTreesDirty = true; + } +#if PX_DEBUG + checkValidity(); +#endif // PX_DEBUG + return true; +} + +////////////////////////////////////////////////////////////////////////// +// invalidate object +// remove the objectIndex from the merged tree +void ExtendedBucketPruner::invalidateObject(const ExtendedBucketPrunerData& data, PxU32 objectIndex, const PrunerPayload& swapObject, + PxU32 swapObjectIndex) +{ + // get the merged tree + AABBTree& tree = *mMergedTrees[data.mMergeIndex].mTree; + PX_ASSERT(data.mSubTreeNode < tree.getNbNodes()); + PX_ASSERT(tree.getNodes()[data.mSubTreeNode].isLeaf()); + // get merged tree node + AABBTreeRuntimeNode& node0 = tree.getNodes()[data.mSubTreeNode]; + const PxU32 nbPrims = node0.getNbRuntimePrimitives(); + PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE); + + // retrieve the primitives pointer + PxU32* primitives = node0.getPrimitives(tree.getIndices()); + PX_ASSERT(primitives); + + // Look for desired pool index in the leaf + bool foundIt = false; + for (PxU32 i = 0; i < nbPrims; i++) + { + if (objectIndex == primitives[i]) + { + foundIt = true; + const PxU32 last = nbPrims - 1; + node0.setNbRunTimePrimitives(last); + primitives[i] = INVALID_POOL_ID; // Mark primitive index as invalid in the node + + // Swap within the leaf node. No need to update the mapping since they should all point + // to the same tree node anyway. + if (last != i) + Ps::swap(primitives[i], primitives[last]); + break; + } + } + PX_ASSERT(foundIt); + PX_UNUSED(foundIt); + + swapIndex(objectIndex, swapObject, swapObjectIndex); +} + +// Swap object index +// if swapObject is in a merged tree its index needs to be swapped with objectIndex +void ExtendedBucketPruner::swapIndex(PxU32 objectIndex, const PrunerPayload& swapObject, PxU32 swapObjectIndex) +{ + if (objectIndex == swapObjectIndex) + return; + + const ExtendedBucketPrunerMap::Entry* extendedPrunerSwapEntry = mExtendedBucketPrunerMap.find(swapObject); + + // if swapped object index is in extended pruner, we have to fix the primitives index + if (extendedPrunerSwapEntry) + { + const ExtendedBucketPrunerData& swapData = extendedPrunerSwapEntry->second; + AABBTree& swapTree = *mMergedTrees[swapData.mMergeIndex].mTree; + // With multiple primitives per leaf, tree nodes may very well be the same for different pool indices. + // However the pool indices may be the same when a swap has been skipped in the pruning pool, in which + // case there is nothing to do. + PX_ASSERT(swapData.mSubTreeNode < swapTree.getNbNodes()); + PX_ASSERT(swapTree.getNodes()[swapData.mSubTreeNode].isLeaf()); + AABBTreeRuntimeNode* node1 = swapTree.getNodes() + swapData.mSubTreeNode; + const PxU32 nbPrims = node1->getNbRuntimePrimitives(); + PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE); + + // retrieve the primitives pointer + PxU32* primitives = node1->getPrimitives(swapTree.getIndices()); + PX_ASSERT(primitives); + + // look for desired pool index in the leaf + bool foundIt = false; + for (PxU32 i = 0; i < nbPrims; i++) + { + if (swapObjectIndex == primitives[i]) + { + foundIt = true; + primitives[i] = objectIndex; // point node to the pool object moved to + break; + } + } + PX_ASSERT(foundIt); + PX_UNUSED(foundIt); + } +} + +////////////////////////////////////////////////////////////////////////// +// Optimized removal of timestamped objects from the extended bucket pruner +PxU32 ExtendedBucketPruner::removeMarkedObjects(PxU32 timeStamp) +{ + // remove objects from the core bucket pruner + PxU32 retVal = mBucketCore.removeMarkedObjects(timeStamp); + + // nothing to be removed + if(!mCurrentTreeIndex) + return retVal; + + // if last merged tree is the timeStamp to remove, we can clear all + // this is safe as the merged trees array is time ordered, never shifted + if(mMergedTrees[mCurrentTreeIndex - 1].mTimeStamp == timeStamp) + { + retVal += mExtendedBucketPrunerMap.size(); + cleanTrees(); + return retVal; + } + + // get the highest index in the merged trees array, where timeStamp match + // we release than all trees till the index + PxU32 highestTreeIndex = 0xFFFFFFFF; + for (PxU32 i = 0; i < mCurrentTreeIndex; i++) + { + if(mMergedTrees[i].mTimeStamp == timeStamp) + highestTreeIndex = i; + else + break; + } + + // if no timestamp found early exit + if(highestTreeIndex == 0xFFFFFFFF) + { + return retVal; + } + + PX_ASSERT(highestTreeIndex < mCurrentTreeIndex); + // get offset, where valid trees start + const PxU32 mergeTreeOffset = highestTreeIndex + 1; + + // shrink the array to merged trees with a valid timeStamp + mCurrentTreeIndex = mCurrentTreeIndex - mergeTreeOffset; + // go over trees and swap released trees with valid trees from the back (valid trees are at the back) + for (PxU32 i = 0; i < mCurrentTreeIndex; i++) + { + // store bounds, timestamp + mBounds[i] = mMergedTrees[mergeTreeOffset + i].mTree->getNodes()[0].mBV; + mMergedTrees[i].mTimeStamp = mMergedTrees[mergeTreeOffset + i].mTimeStamp; + + // release the tree with timestamp + AABBTree* ptr = mMergedTrees[i].mTree; + ptr->release(); + + // store the valid tree + mMergedTrees[i].mTree = mMergedTrees[mergeTreeOffset + i].mTree; + // store the release tree at the offset + mMergedTrees[mergeTreeOffset + i].mTree = ptr; + mMergedTrees[mergeTreeOffset + i].mTimeStamp = 0; + } + // release the rest of the trees with not valid timestamp + for (PxU32 i = mCurrentTreeIndex; i <= highestTreeIndex; i++) + { + mMergedTrees[i].mTree->release(); + mMergedTrees[i].mTimeStamp = 0; + } + + // build new main AABB tree with only trees with valid valid timeStamp + buildMainAABBTree(); + + // remove all unnecessary trees and map entries + bool removeEntry = false; + PxU32 numRemovedEntries = 0; + ExtendedBucketPrunerMap::EraseIterator eraseIterator = mExtendedBucketPrunerMap.getEraseIterator(); + ExtendedBucketPrunerMap::Entry* entry = eraseIterator.eraseCurrentGetNext(removeEntry); + while (entry) + { + ExtendedBucketPrunerData& data = entry->second; + // data to be removed + if (data.mTimeStamp == timeStamp) + { + removeEntry = true; + numRemovedEntries++; + } + else + { + // update the merge index and main tree node index + PX_ASSERT(highestTreeIndex < data.mMergeIndex); + data.mMergeIndex -= mergeTreeOffset; + removeEntry = false; + } + entry = eraseIterator.eraseCurrentGetNext(removeEntry); + } + +#if PX_DEBUG + checkValidity(); +#endif // PX_DEBUG + // return the number of removed objects + return retVal + numRemovedEntries; +} + +////////////////////////////////////////////////////////////////////////// +// clean all trees, all objects have been released +void ExtendedBucketPruner::cleanTrees() +{ + for (PxU32 i = 0; i < mCurrentTreeIndex; i++) + { + mMergedTrees[i].mTree->release(); + mMergedTrees[i].mTimeStamp = 0; + } + mExtendedBucketPrunerMap.clear(); + mCurrentTreeIndex = 0; + mMainTree->release(); +} + +////////////////////////////////////////////////////////////////////////// +// shift origin +void ExtendedBucketPruner::shiftOrigin(const PxVec3& shift) +{ + mMainTree->shiftOrigin(shift); + + for (PxU32 i = 0; i < mCurrentTreeIndex; i++) + { + mMergedTrees[i].mTree->shiftOrigin(shift); + } + + mBucketCore.shiftOrigin(shift); +} + +////////////////////////////////////////////////////////////////////////// +// Queries implementation +////////////////////////////////////////////////////////////////////////// +// Raycast/sweeps callback for main AABB tree +template<bool tInflate> +struct MainTreeRaycastPrunerCallback: public PrunerCallback +{ + MainTreeRaycastPrunerCallback(const PxVec3& origin, const PxVec3& unitDir, const PxVec3& extent, PrunerCallback& prunerCallback, const PruningPool* pool) + : mOrigin(origin), mUnitDir(unitDir), mExtent(extent), mPrunerCallback(prunerCallback), mPruningPool(pool) + { + } + + virtual PxAgain invoke(PxReal& distance, const PrunerPayload& payload) + { + // payload data match merged tree data MergedTree, we can cast it + const AABBTree* aabbTree = reinterpret_cast<const AABBTree*> (payload.data[0]); + // raycast the merged tree + return AABBTreeRaycast<tInflate>()(mPruningPool->getObjects(), mPruningPool->getCurrentWorldBoxes(), *aabbTree, mOrigin, mUnitDir, distance, mExtent, mPrunerCallback); + } + + PX_NOCOPY(MainTreeRaycastPrunerCallback) + +private: + const PxVec3& mOrigin; + const PxVec3& mUnitDir; + const PxVec3& mExtent; + PrunerCallback& mPrunerCallback; + const PruningPool* mPruningPool; +}; + +////////////////////////////////////////////////////////////////////////// +// raycast against the extended bucket pruner +PxAgain ExtendedBucketPruner::raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& prunerCallback) const +{ + PxAgain again = true; + + // searc the bucket pruner first + if (mBucketCore.getNbObjects()) + again = mBucketCore.raycast(origin, unitDir, inOutDistance, prunerCallback); + + if (again && mExtendedBucketPrunerMap.size()) + { + const PxVec3 extent(0.0f); + // main tree callback + MainTreeRaycastPrunerCallback<false> pcb(origin, unitDir, extent, prunerCallback, mPruningPool); + // traverse the main tree + again = AABBTreeRaycast<false>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, origin, unitDir, inOutDistance, extent, pcb); + } + + return again; +} + +////////////////////////////////////////////////////////////////////////// +// overlap main tree callback +template<typename Test> +struct MainTreeOverlapPrunerCallback : public PrunerCallback +{ + MainTreeOverlapPrunerCallback(const Test& test, PrunerCallback& prunerCallback, const PruningPool* pool) + : mTest(test), mPrunerCallback(prunerCallback), mPruningPool(pool) + { + } + + virtual PxAgain invoke(PxReal& , const PrunerPayload& payload) + { + // payload data match merged tree data MergedTree, we can cast it + const AABBTree* aabbTree = reinterpret_cast<const AABBTree*> (payload.data[0]); + // overlap the merged tree + return AABBTreeOverlap<Test>()(mPruningPool->getObjects(), mPruningPool->getCurrentWorldBoxes(), *aabbTree, mTest, mPrunerCallback); + } + + PX_NOCOPY(MainTreeOverlapPrunerCallback) + +private: + const Test& mTest; + PrunerCallback& mPrunerCallback; + const PruningPool* mPruningPool; +}; + +////////////////////////////////////////////////////////////////////////// +// overlap implementation +PxAgain ExtendedBucketPruner::overlap(const Gu::ShapeData& queryVolume, PrunerCallback& prunerCallback) const +{ + PxAgain again = true; + + // core bucket pruner overlap + if (mBucketCore.getNbObjects()) + again = mBucketCore.overlap(queryVolume, prunerCallback); + + if(again && mExtendedBucketPrunerMap.size()) + { + switch (queryVolume.getType()) + { + case PxGeometryType::eBOX: + { + if (queryVolume.isOBB()) + { + const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated()); + MainTreeOverlapPrunerCallback<Gu::OBBAABBTest> pcb(test, prunerCallback, mPruningPool); + again = AABBTreeOverlap<Gu::OBBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb); + } + else + { + const Gu::AABBAABBTest test(queryVolume.getPrunerInflatedWorldAABB()); + MainTreeOverlapPrunerCallback<Gu::AABBAABBTest> pcb(test, prunerCallback, mPruningPool); + again = AABBTreeOverlap<Gu::AABBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb); + } + } + break; + case PxGeometryType::eCAPSULE: + { + const Gu::Capsule& capsule = queryVolume.getGuCapsule(); + const Gu::CapsuleAABBTest test(capsule.p1, queryVolume.getPrunerWorldRot33().column0, + queryVolume.getCapsuleHalfHeight()*2.0f, PxVec3(capsule.radius*SQ_PRUNER_INFLATION)); + MainTreeOverlapPrunerCallback<Gu::CapsuleAABBTest> pcb(test, prunerCallback, mPruningPool); + again = AABBTreeOverlap<Gu::CapsuleAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb); + } + break; + case PxGeometryType::eSPHERE: + { + const Gu::Sphere& sphere = queryVolume.getGuSphere(); + Gu::SphereAABBTest test(sphere.center, sphere.radius); + MainTreeOverlapPrunerCallback<Gu::SphereAABBTest> pcb(test, prunerCallback, mPruningPool); + again = AABBTreeOverlap<Gu::SphereAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb); + } + break; + case PxGeometryType::eCONVEXMESH: + { + const Gu::OBBAABBTest test(queryVolume.getPrunerWorldPos(), queryVolume.getPrunerWorldRot33(), queryVolume.getPrunerBoxGeomExtentsInflated()); + MainTreeOverlapPrunerCallback<Gu::OBBAABBTest> pcb(test, prunerCallback, mPruningPool); + again = AABBTreeOverlap<Gu::OBBAABBTest>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, test, pcb); + } + break; + case PxGeometryType::ePLANE: + case PxGeometryType::eTRIANGLEMESH: + case PxGeometryType::eHEIGHTFIELD: + case PxGeometryType::eGEOMETRY_COUNT: + case PxGeometryType::eINVALID: + PX_ALWAYS_ASSERT_MESSAGE("unsupported overlap query volume geometry type"); + } + } + + return again; +} + +////////////////////////////////////////////////////////////////////////// +// sweep implementation +PxAgain ExtendedBucketPruner::sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback& prunerCallback) const +{ + PxAgain again = true; + + // core bucket pruner sweep + if (mBucketCore.getNbObjects()) + again = mBucketCore.sweep(queryVolume, unitDir, inOutDistance, prunerCallback); + + if(again && mExtendedBucketPrunerMap.size()) + { + const PxBounds3& aabb = queryVolume.getPrunerInflatedWorldAABB(); + const PxVec3 extents = aabb.getExtents(); + const PxVec3 center = aabb.getCenter(); + MainTreeRaycastPrunerCallback<true> pcb(center, unitDir, extents, prunerCallback, mPruningPool); + again = AABBTreeRaycast<true>()(reinterpret_cast<const PrunerPayload*>(mMergedTrees), mBounds, *mMainTree, center, unitDir, inOutDistance, extents, pcb); + } + return again; +} + + +////////////////////////////////////////////////////////////////////////// +#include "CmRenderOutput.h" + +// visualization +void visualizeTree(Cm::RenderOutput& out, PxU32 color, AABBTree* tree) +{ + if (tree) + { + struct Local + { + static void _Draw(const AABBTreeRuntimeNode* root, const AABBTreeRuntimeNode* node, Cm::RenderOutput& out_) + { + out_ << Cm::DebugBox(node->mBV, true); + if (node->isLeaf()) + return; + _Draw(root, node->getPos(root), out_); + _Draw(root, node->getNeg(root), out_); + } + }; + out << PxTransform(PxIdentity); + out << color; + Local::_Draw(tree->getNodes(), tree->getNodes(), out); + } +} + +void ExtendedBucketPruner::visualize(Cm::RenderOutput& out, PxU32 color) const +{ + visualizeTree(out, color, mMainTree); + + for(PxU32 i = 0; i < mCurrentTreeIndex; i++) + { + visualizeTree(out, color, mMergedTrees[i].mTree); + } + + mBucketCore.visualize(out, color); +} + +////////////////////////////////////////////////////////////////////////// + +#if PX_DEBUG +// extended bucket pruner validity check +bool ExtendedBucketPruner::checkValidity() +{ + Cm::BitMap testBitmap; + testBitmap.resizeAndClear(mCurrentTreeIndex); + for (PxU32 i = 0; i < mMainTree->getNbNodes(); i++) + { + const AABBTreeRuntimeNode& node = mMainTree->getNodes()[i]; + if(node.isLeaf()) + { + const PxU32 nbPrims = node.getNbRuntimePrimitives(); + PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE); + + const PxU32* primitives = node.getPrimitives(mMainTree->getIndices()); + for (PxU32 j = 0; j < nbPrims; j++) + { + const PxU32 index = primitives[j]; + // check if index is correct + PX_ASSERT(index < mCurrentTreeIndex); + // mark the index in the test bitmap, must be once set only, all merged trees must be in the main tree + PX_ASSERT(testBitmap.test(index) == IntFalse); + testBitmap.set(index); + } + } + } + + Cm::BitMap mergeTreeTestBitmap; + mergeTreeTestBitmap.resizeAndClear(mPruningPool->getNbActiveObjects()); + for (PxU32 i = 0; i < mCurrentTreeIndex; i++) + { + // check if bounds are the same as the merged tree root bounds + PX_ASSERT(mBounds[i].maximum.x == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.x); + PX_ASSERT(mBounds[i].maximum.y == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.y); + PX_ASSERT(mBounds[i].maximum.z == mMergedTrees[i].mTree->getNodes()[0].mBV.maximum.z); + PX_ASSERT(mBounds[i].minimum.x == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.x); + PX_ASSERT(mBounds[i].minimum.y == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.y); + PX_ASSERT(mBounds[i].minimum.z == mMergedTrees[i].mTree->getNodes()[0].mBV.minimum.z); + + // check each tree + const AABBTree& mergedTree = *mMergedTrees[i].mTree; + for (PxU32 j = 0; j < mergedTree.getNbNodes(); j++) + { + const AABBTreeRuntimeNode& node = mergedTree.getNodes()[j]; + if (node.isLeaf()) + { + const PxU32 nbPrims = node.getNbRuntimePrimitives(); + PX_ASSERT(nbPrims <= NB_OBJECTS_PER_NODE); + + const PxU32* primitives = node.getPrimitives(mergedTree.getIndices()); + for (PxU32 k = 0; k < nbPrims; k++) + { + const PxU32 index = primitives[k]; + // check if index is correct + PX_ASSERT(index < mPruningPool->getNbActiveObjects()); + // mark the index in the test bitmap, must be once set only, all merged trees must be in the main tree + PX_ASSERT(mergeTreeTestBitmap.test(index) == IntFalse); + mergeTreeTestBitmap.set(index); + + const PrunerPayload& payload = mPruningPool->getObjects()[index]; + const ExtendedBucketPrunerMap::Entry* extendedPrunerSwapEntry = mExtendedBucketPrunerMap.find(payload); + PX_ASSERT(extendedPrunerSwapEntry); + + const ExtendedBucketPrunerData& data = extendedPrunerSwapEntry->second; + PX_ASSERT(data.mMergeIndex == i); + PX_ASSERT(data.mSubTreeNode == j); + } + } + } + } + for (PxU32 i = mCurrentTreeIndex; i < mCurrentTreeCapacity; i++) + { + PX_ASSERT(mMergedTrees[i].mTree->getIndices() == NULL); + PX_ASSERT(mMergedTrees[i].mTree->getNodes() == NULL); + } + for (ExtendedBucketPrunerMap::Iterator iter = mExtendedBucketPrunerMap.getIterator(); !iter.done(); ++iter) + { + const ExtendedBucketPrunerData& data = iter->second; + PX_ASSERT(mMainTreeUpdateMap[data.mMergeIndex] < mMainTree->getNbNodes()); + PX_ASSERT(data.mMergeIndex < mCurrentTreeIndex); + PX_ASSERT(data.mSubTreeNode < mMergedTrees[data.mMergeIndex].mTree->getNbNodes()); + } + return true; +} +#endif + diff --git a/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h new file mode 100644 index 00000000..ad360e10 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqExtendedBucketPruner.h @@ -0,0 +1,176 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_EXTENDEDBUCKETPRUNER_H +#define SQ_EXTENDEDBUCKETPRUNER_H + +#include "SqTypedef.h" +#include "SqBucketPruner.h" +#include "SqAABBTreeUpdateMap.h" +#include "PsHashMap.h" + +namespace physx +{ +namespace Sq +{ + struct AABBPrunerMergeData; + class AABBTreeMergeData; + + // Extended bucket pruner data, if an object belongs to the tree of trees, we need to + // remember node for the sub tree, the tree it belongs to and the main tree node + struct ExtendedBucketPrunerData + { + PxU32 mTimeStamp; // timestamp + TreeNodeIndex mSubTreeNode; // sub tree node index + PxU32 mMergeIndex; // index in bounds and merged trees array + }; + + // Merged tree structure, holds tree and its timeStamp, released when no objects is in the tree + // or timeStamped objects are released + struct MergedTree + { + AABBTree* mTree; // AABB tree + size_t mTimeStamp; // needs to be size_t to match PrunerPayload size + }; + // needs to be size_t to match PrunerPayload size, pointer used for AABB tree query callbacks + PX_COMPILE_TIME_ASSERT(sizeof(MergedTree) == sizeof(PrunerPayload)); + + // hashing function for PrunerPaylod key + struct ExtendedBucketPrunerHash + { + PX_FORCE_INLINE uint32_t operator()(const PrunerPayload& payload) const + { +#if PX_P64_FAMILY + // const PxU32 h0 = Ps::hash((const void*)payload.data[0]); + // const PxU32 h1 = Ps::hash((const void*)payload.data[1]); + const PxU32 h0 = PxU32(PX_MAX_U32 & payload.data[0]); + const PxU32 h1 = PxU32(PX_MAX_U32 & payload.data[1]); + return Ps::hash(PxU64(h0) | (PxU64(h1) << 32)); +#else + return Ps::hash(PxU64(payload.data[0]) | (PxU64(payload.data[1]) << 32)); +#endif + } + PX_FORCE_INLINE bool equal(const PrunerPayload& k0, const PrunerPayload& k1) const + { + return (k0.data[0] == k1.data[0]) && (k0.data[1] == k1.data[1]); + } + }; + + // A.B. replace, this is useless, need to be able to traverse the map and release while traversing, also eraseAt failed + typedef Ps::HashMap<PrunerPayload, ExtendedBucketPrunerData, ExtendedBucketPrunerHash> ExtendedBucketPrunerMap; + + // Extended bucket pruner holds single objects in a bucket pruner and AABBtrees in a tree of trees. + // Base usage of ExtendedBucketPruner is for dynamic AABBPruner new objects, that did not make it + // into new tree. Single objects go directly into a bucket pruner, while merged AABBtrees + // go into a tree of trees. + class ExtendedBucketPruner + { + public: + ExtendedBucketPruner(const PruningPool* pool); + virtual ~ExtendedBucketPruner(); + + // release + void release(); + + // add single object into a bucket pruner directly + PX_FORCE_INLINE bool addObject(const PrunerPayload& object, const PxBounds3& worldAABB, PxU32 timeStamp) + { + return mBucketCore.addObject(object, worldAABB, timeStamp); + } + + // add AABB tree from pruning structure - adds new primitive into main AABB tree + void addTree(const AABBTreeMergeData& mergeData, PxU32 timeStamp); + + // update object + bool updateObject(const PxBounds3& worldAABB, const PrunerPayload& object); + + // remove object, removed object is replaced in pruning pool by swapped object, indices needs to be updated + bool removeObject(const PrunerPayload& object, PxU32 objectIndex, const PrunerPayload& swapObject, + PxU32 swapObjectIndex, PxU32& timeStamp); + + // separate call for indices invalidation, object can be either in AABBPruner or Bucket pruner, but the swapped object can be + // in the tree of trees + void invalidateObject(const ExtendedBucketPrunerData& object, PxU32 objectIndex, const PrunerPayload& swapObject, + PxU32 swapObjectIndex); + + // swap object index, the object index can be in bucket pruner or tree of trees + void swapIndex(PxU32 objectIndex, const PrunerPayload& swapObject, PxU32 swapObjectIndex); + + // refit marked nodes in tree of trees + void refitMarkedNodes(const PxBounds3* boxes); + + + // look for objects marked with input timestamp everywhere in the structure, and remove them. This is the same + // as calling 'removeObject' individually for all these objects, but much more efficient. Returns number of removed objects. + PxU32 removeMarkedObjects(PxU32 timeStamp); + + // queries against the pruner + PxAgain raycast(const PxVec3& origin, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const; + PxAgain overlap(const Gu::ShapeData& queryVolume, PrunerCallback&) const; + PxAgain sweep(const Gu::ShapeData& queryVolume, const PxVec3& unitDir, PxReal& inOutDistance, PrunerCallback&) const; + + // origin shift + void shiftOrigin(const PxVec3& shift); + + // debug visualize + void visualize(Cm::RenderOutput& out, PxU32 color) const; + + PX_FORCE_INLINE void build() { mBucketCore.build(); } + + PX_FORCE_INLINE PxU32 getNbObjects() const { return mBucketCore.getNbObjects() + mExtendedBucketPrunerMap.size(); } + + private: + void resize(PxU32 size); + void buildMainAABBTree(); + void copyTree(AABBTree& destTree, const AABBPrunerMergeData& inputData); + void cleanTrees(); + +#if PX_DEBUG + // Extended bucket pruner validity check + bool checkValidity(); +#endif + private: + BucketPrunerCore mBucketCore; // Bucket pruner for single objects + const PruningPool* mPruningPool; // Pruning pool from AABB pruner + ExtendedBucketPrunerMap mExtendedBucketPrunerMap; // Map holding objects from tree merge - objects in tree of trees + AABBTree* mMainTree; // Main tree holding merged trees + AABBTreeUpdateMap mMainTreeUpdateMap; // Main tree updated map - merged trees index to nodes + AABBTreeUpdateMap mMergeTreeUpdateMap; // Merged tree update map used while tree is merged + PxBounds3* mBounds; // Merged trees bounds used for main tree building + MergedTree* mMergedTrees; // Merged trees + PxU32 mCurrentTreeIndex; // Current trees index + PxU32 mCurrentTreeCapacity; // Current tress capacity + bool mTreesDirty; // Dirty marker + }; + +} // namespace Sq + +} + +#endif // SQ_EXTENDEDBUCKETPRUNER_H diff --git a/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp b/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp new file mode 100644 index 00000000..86ba1d67 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqMetaData.cpp @@ -0,0 +1,57 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PxMetaData.h" + +#include "SqPruningStructure.h" + +using namespace physx; +using namespace Sq; + +/////////////////////////////////////////////////////////////////////////////// + +void PruningStructure::getBinaryMetaData(PxOutputStream& stream) +{ + PX_DEF_BIN_METADATA_VCLASS(stream, PruningStructure) + PX_DEF_BIN_METADATA_BASE_CLASS(stream, PruningStructure, PxBase) + + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbNodes[0], 0) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbNodes[1], 0) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, AABBTreeRuntimeNode, mAABBTreeNodes[0], PxMetaDataFlag::ePTR) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, AABBTreeRuntimeNode, mAABBTreeNodes[1], PxMetaDataFlag::ePTR) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbObjects[0], 0) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbObjects[1], 0) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mAABBTreeIndices[0], PxMetaDataFlag::ePTR) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mAABBTreeIndices[1], PxMetaDataFlag::ePTR) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxU32, mNbActors, 0) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, PxActor*, mActors, PxMetaDataFlag::ePTR) + PX_DEF_BIN_METADATA_ITEM(stream, PruningStructure, bool, mValid, 0) +} + + diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h b/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h new file mode 100644 index 00000000..9ded6d26 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h @@ -0,0 +1,258 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_RAWQUERY_TESTS_SIMD_H +#define GU_RAWQUERY_TESTS_SIMD_H + +#include "foundation/PxTransform.h" +#include "foundation/PxBounds3.h" +#include "CmPhysXCommon.h" +#include "PxBoxGeometry.h" +#include "PxSphereGeometry.h" +#include "PxCapsuleGeometry.h" +#include "PsVecMath.h" + +namespace physx +{ +namespace Gu +{ + +struct RayAABBTest +{ + PX_FORCE_INLINE RayAABBTest(const PxVec3& origin_, const PxVec3& unitDir_, const PxReal maxDist, const PxVec3& inflation_) + : mOrigin(V3LoadU(origin_)) + , mDir(V3LoadU(unitDir_)) + , mDirYZX(V3PermYZX(mDir)) + , mInflation(V3LoadU(inflation_)) + , mAbsDir(V3Abs(mDir)) + , mAbsDirYZX(V3PermYZX(mAbsDir)) + { + const PxVec3 ext = maxDist >= PX_MAX_F32 ? PxVec3( unitDir_.x == 0 ? origin_.x : PxSign(unitDir_.x)*PX_MAX_F32, + unitDir_.y == 0 ? origin_.y : PxSign(unitDir_.y)*PX_MAX_F32, + unitDir_.z == 0 ? origin_.z : PxSign(unitDir_.z)*PX_MAX_F32) + : origin_ + unitDir_ * maxDist; + mRayMin = V3Min(mOrigin, V3LoadU(ext)); + mRayMax = V3Max(mOrigin, V3LoadU(ext)); + } + + PX_FORCE_INLINE void setDistance(PxReal distance) + { + const Vec3V ext = V3ScaleAdd(mDir, FLoad(distance), mOrigin); + mRayMin = V3Min(mOrigin, ext); + mRayMax = V3Max(mOrigin, ext); + } + + template<bool TInflate> + PX_FORCE_INLINE PxU32 check(const Vec3V center, const Vec3V extents) const + { + const Vec3V iExt = TInflate ? V3Add(extents, mInflation) : extents; + + // coordinate axes + const Vec3V nodeMax = V3Add(center, iExt); + const Vec3V nodeMin = V3Sub(center, iExt); + + // cross axes + const Vec3V offset = V3Sub(mOrigin, center); + const Vec3V offsetYZX = V3PermYZX(offset); + const Vec3V iExtYZX = V3PermYZX(iExt); + + const Vec3V f = V3NegMulSub(mDirYZX, offset, V3Mul(mDir, offsetYZX)); + const Vec3V g = V3MulAdd(iExt, mAbsDirYZX, V3Mul(iExtYZX, mAbsDir)); + + const BoolV + maskA = V3IsGrtrOrEq(nodeMax, mRayMin), + maskB = V3IsGrtrOrEq(mRayMax, nodeMin), + maskC = V3IsGrtrOrEq(g, V3Abs(f)); + const BoolV andABCMasks = BAnd(BAnd(maskA, maskB), maskC); + + return BAllEqTTTT(andABCMasks); + } + + const Vec3V mOrigin, mDir, mDirYZX, mInflation, mAbsDir, mAbsDirYZX; + Vec3V mRayMin, mRayMax; +protected: + RayAABBTest& operator=(const RayAABBTest&); +}; + +// probably not worth having a SIMD version of this unless the traversal passes Vec3Vs +struct AABBAABBTest +{ + PX_FORCE_INLINE AABBAABBTest(const PxTransform&t, const PxBoxGeometry&b) + : mCenter(V3LoadU(t.p)) + , mExtents(V3LoadU(b.halfExtents)) + { } + + PX_FORCE_INLINE AABBAABBTest(const PxBounds3& b) + : mCenter(V3LoadU(b.getCenter())) + , mExtents(V3LoadU(b.getExtents())) + { } + + PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const + { + //PxVec3 c; PxVec3_From_Vec3V(center, c); + //PxVec3 e; PxVec3_From_Vec3V(extents, e); + //if(PxAbs(c.x - mCenter.x) > mExtents.x + e.x) return Ps::IntFalse; + //if(PxAbs(c.y - mCenter.y) > mExtents.y + e.y) return Ps::IntFalse; + //if(PxAbs(c.z - mCenter.z) > mExtents.z + e.z) return Ps::IntFalse; + //return Ps::IntTrue; + return Ps::IntBool(V3AllGrtrOrEq(V3Add(mExtents, extents), V3Abs(V3Sub(center, mCenter)))); + } + +private: + AABBAABBTest& operator=(const AABBAABBTest&); + const Vec3V mCenter, mExtents; +}; + +struct SphereAABBTest +{ + PX_FORCE_INLINE SphereAABBTest(const PxTransform& t, const PxSphereGeometry& s) + : mCenter(V3LoadU(t.p)) + , mRadius2(FLoad(s.radius * s.radius)) + {} + + PX_FORCE_INLINE SphereAABBTest(const PxVec3& center, PxF32 radius) + : mCenter(V3LoadU(center)) + , mRadius2(FLoad(radius * radius)) + {} + + PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V boxCenter, const Vec3V boxExtents) const + { + const Vec3V offset = V3Sub(mCenter, boxCenter); + const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents); + const Vec3V d = V3Sub(offset, closest); + return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d)))); + } + +private: + SphereAABBTest& operator=(const SphereAABBTest&); + const Vec3V mCenter; + const FloatV mRadius2; +}; + +// The Opcode capsule-AABB traversal test seems to be *exactly* the same as the ray-box test inflated by the capsule radius (so not a true capsule/box test) +// and the code for the ray-box test is better. TODO: check the zero length case and use the sphere traversal if this one fails. +// (OTOH it's not that hard to adapt the Ray-AABB test to a capsule test) + +struct CapsuleAABBTest: private RayAABBTest +{ + PX_FORCE_INLINE CapsuleAABBTest(const PxVec3& origin, const PxVec3& unitDir, const PxReal length, const PxVec3& inflation) + : RayAABBTest(origin, unitDir, length, inflation) + {} + + PX_FORCE_INLINE Ps::IntBool operator()(const Vec3VArg center, const Vec3VArg extents) const + { + return Ps::IntBool(RayAABBTest::check<true>(center, extents)); + } +}; + +template<bool fullTest> +struct OBBAABBTests +{ + OBBAABBTests(const PxVec3& pos, const PxMat33& rot, const PxVec3& halfExtentsInflated) + { + const Vec3V eps = V3Load(1e-6f); + + mT = V3LoadU(pos); + mExtents = V3LoadU(halfExtentsInflated); + + // storing the transpose matrices yields a simpler SIMD test + mRT = Mat33V_From_PxMat33(rot.getTranspose()); + mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps)); + mBB_xyz = M33TrnspsMulV3(mART, mExtents); + + if(fullTest) + { + const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents); + + mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0))); + mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1))); + mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2))); + } + } + + // TODO: force inline it? + Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const + { + const Vec3V t = V3Sub(mT, center); + + // class I - axes of AABB + if(V3OutOfBounds(t, V3Add(extents, mBB_xyz))) + return Ps::IntFalse; + + const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2; + const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2; + + const FloatV eX = V3GetX(extents), eY = V3GetY(extents), eZ = V3GetZ(extents); + const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t); + + // class II - axes of OBB + { + const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX))); + const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents))); + if(V3OutOfBounds(v, v2)) + return Ps::IntFalse; + } + + if(!fullTest) + return Ps::IntTrue; + + // class III - edge cross products. Almost all OBB tests early-out with type I or type II, + // so early-outs here probably aren't useful (TODO: profile) + + const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ)); + const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123)); + const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va)); + + const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX)); + const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456)); + const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb)); + + const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY)); + const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789)); + const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc)); + + return Ps::IntBool(BAllEqFFFF(BOr(ba, BOr(bb,bc)))); + } + + Vec3V mExtents; // extents of OBB + Vec3V mT; // translation of OBB + Mat33V mRT; // transpose of rotation matrix of OBB + Mat33V mART; // transpose of mRT, padded by epsilon + + Vec3V mBB_xyz; // extents of OBB along coordinate axes + Vec3V mBB_123; // projections of extents onto edge-cross axes + Vec3V mBB_456; + Vec3V mBB_789; +}; + +typedef OBBAABBTests<true> OBBAABBTest; + +} +} +#endif diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp new file mode 100644 index 00000000..8a90a1d3 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.cpp @@ -0,0 +1,182 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#include "foundation/PxMemory.h" +#include "SqPruningPool.h" + +using namespace physx; +using namespace Sq; +using namespace Cm; + +PruningPool::PruningPool() : + mNbObjects (0), + mMaxNbObjects (0), + mWorldBoxes (NULL), + mObjects (NULL), + mHandleToIndex (NULL), + mIndexToHandle (NULL), + mFirstRecycledHandle(INVALID_PRUNERHANDLE) +{ +} + +PruningPool::~PruningPool() +{ + PX_FREE_AND_RESET(mWorldBoxes); + PX_FREE_AND_RESET(mObjects); + PX_FREE_AND_RESET(mHandleToIndex); + PX_FREE_AND_RESET(mIndexToHandle); +} + +bool PruningPool::resize(PxU32 newCapacity) +{ + // PT: we always allocate one extra box, to make sure we can safely use V4 loads on the array + PxBounds3* newBoxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(newCapacity+1), "PxBounds3")); + PrunerPayload* newData = reinterpret_cast<PrunerPayload*>(PX_ALLOC(sizeof(PrunerPayload)*newCapacity, "PrunerPayload*")); + PrunerHandle* newIndexToHandle = reinterpret_cast<PrunerHandle*>(PX_ALLOC(sizeof(PrunerHandle)*newCapacity, "Pruner Index Mapping")); + PoolIndex* newHandleToIndex = reinterpret_cast<PoolIndex*>(PX_ALLOC(sizeof(PoolIndex)*newCapacity, "Pruner Index Mapping")); + if( (NULL==newBoxes) || (NULL==newData) || (NULL==newIndexToHandle) || (NULL==newHandleToIndex) + ) + { + PX_FREE_AND_RESET(newBoxes); + PX_FREE_AND_RESET(newData); + PX_FREE_AND_RESET(newIndexToHandle); + PX_FREE_AND_RESET(newHandleToIndex); + return false; + } + + if(mWorldBoxes) PxMemCopy(newBoxes, mWorldBoxes, mNbObjects*sizeof(PxBounds3)); + if(mObjects) PxMemCopy(newData, mObjects, mNbObjects*sizeof(PrunerPayload)); + if(mIndexToHandle) PxMemCopy(newIndexToHandle, mIndexToHandle, mNbObjects*sizeof(PrunerHandle)); + if(mHandleToIndex) PxMemCopy(newHandleToIndex, mHandleToIndex, mMaxNbObjects*sizeof(PoolIndex)); + mMaxNbObjects = newCapacity; + + PX_FREE_AND_RESET(mWorldBoxes); + PX_FREE_AND_RESET(mObjects); + PX_FREE_AND_RESET(mHandleToIndex); + PX_FREE_AND_RESET(mIndexToHandle); + mWorldBoxes = newBoxes; + mObjects = newData; + mHandleToIndex = newHandleToIndex; + mIndexToHandle = newIndexToHandle; + + return true; +} + +void PruningPool::preallocate(PxU32 newCapacity) +{ + if(newCapacity>mMaxNbObjects) + resize(newCapacity); +} + +PxU32 PruningPool::addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count) +{ + for(PxU32 i=0;i<count;i++) + { + if(mNbObjects==mMaxNbObjects) // increase the capacity on overflow + { + if(!resize(PxMax<PxU32>(mMaxNbObjects*2, 64))) + { + // pool can return an invalid handle if memory alloc fails + // should probably have an error here or not handle this + results[i] = INVALID_PRUNERHANDLE; // PT: we need to write the potentially invalid handle to let users know which object failed first + return i; + } + } + PX_ASSERT(mNbObjects!=mMaxNbObjects); + + const PoolIndex index = mNbObjects++; + + // update mHandleToIndex and mIndexToHandle mappings + PrunerHandle handle; + if(mFirstRecycledHandle != INVALID_PRUNERHANDLE) + { + // mFirstRecycledHandle is an entry into a freelist for removed slots + // this path is only taken if we have any removed slots + handle = mFirstRecycledHandle; + mFirstRecycledHandle = mHandleToIndex[handle]; + } + else + { + handle = index; + } + + // PT: TODO: investigate why we added mIndexToHandle/mHandleToIndex. The initial design with 'Prunable' objects didn't need these arrays. + + // PT: these 3 arrays are "parallel" + mWorldBoxes [index] = bounds[i]; // store the payload and AABB in parallel arrays + mObjects [index] = payload[i]; + mIndexToHandle [index] = handle; + + mHandleToIndex[handle] = index; + results[i] = handle; + } + return count; +} + +PoolIndex PruningPool::removeObject(PrunerHandle h) +{ + PX_ASSERT(mNbObjects); + + // remove the object and its AABB by provided PrunerHandle and update mHandleToIndex and mIndexToHandle mappings + const PoolIndex indexOfRemovedObject = mHandleToIndex[h]; // retrieve object's index from handle + + const PoolIndex indexOfLastObject = --mNbObjects; // swap the object at last index with index + if(indexOfLastObject!=indexOfRemovedObject) + { + // PT: move last object's data to recycled spot (from removed object) + + // PT: the last object has moved so we need to handle the mappings for this object + // PT: TODO: investigate where this double-mapping comes from. Should not be needed... + + // PT: these 3 arrays are "parallel" + const PrunerHandle handleOfLastObject = mIndexToHandle[indexOfLastObject]; + mWorldBoxes [indexOfRemovedObject] = mWorldBoxes [indexOfLastObject]; + mObjects [indexOfRemovedObject] = mObjects [indexOfLastObject]; + mIndexToHandle [indexOfRemovedObject] = handleOfLastObject; + + mHandleToIndex[handleOfLastObject] = indexOfRemovedObject; + } + + // mHandleToIndex also stores the freelist for removed handles (in place of holes formed by removed handles) + mHandleToIndex[h] = mFirstRecycledHandle; // update linked list of available recycled handles + mFirstRecycledHandle = h; // update the list head + + return indexOfLastObject; +} + +void PruningPool::shiftOrigin(const PxVec3& shift) +{ + for(PxU32 i=0; i < mNbObjects; i++) + { + mWorldBoxes[i].minimum -= shift; + mWorldBoxes[i].maximum -= shift; + } +} diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h new file mode 100644 index 00000000..229ea340 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningPool.h @@ -0,0 +1,120 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_PRUNINGPOOL_H +#define SQ_PRUNINGPOOL_H + +#include "SqPruner.h" +#include "SqTypedef.h" +#include "SqBounds.h" + +namespace physx +{ +namespace Sq +{ + // This class is designed to maintain a two way mapping between pair(PrunerPayload,AABB) and PrunerHandle + // Internally there's also an index for handles (AP: can be simplified?) + // This class effectively stores bounded pruner payloads, returns a PrunerHandle and allows O(1) + // access to them using a PrunerHandle + // Supported operations are add, remove, update bounds + class PruningPool + { + public: + PruningPool(); + ~PruningPool(); + + PX_FORCE_INLINE const PrunerPayload& getPayload(PrunerHandle handle) const { return mObjects[getIndex(handle)]; } + + PX_FORCE_INLINE const PrunerPayload& getPayload(PrunerHandle handle, PxBounds3*& bounds) const + { + const PoolIndex index = getIndex(handle); + bounds = mWorldBoxes + index; + return mObjects[index]; + } + + void shiftOrigin(const PxVec3& shift); + + // PT: adds 'count' objects to the pool. Needs 'count' bounds and 'count' payloads passed as input. Writes out 'count' handles + // in 'results' array. Function returns number of successfully added objects, ideally 'count' but can be less in case we run + // out of memory. + PxU32 addObjects(PrunerHandle* results, const PxBounds3* bounds, const PrunerPayload* payload, PxU32 count); + + // this function will swap the last object with the hole formed by removed PrunerHandle object + // and return the removed last object's index in the pool + PoolIndex removeObject(PrunerHandle h); + + // Data access + PX_FORCE_INLINE PoolIndex getIndex(PrunerHandle h)const { return mHandleToIndex[h]; } + PX_FORCE_INLINE PrunerPayload* getObjects() const { return mObjects; } + PX_FORCE_INLINE PxU32 getNbActiveObjects() const { return mNbObjects; } + PX_FORCE_INLINE const PxBounds3* getCurrentWorldBoxes() const { return mWorldBoxes; } + PX_FORCE_INLINE PxBounds3* getCurrentWorldBoxes() { return mWorldBoxes; } + + PX_FORCE_INLINE void setWorldAABB(PrunerHandle h, const PxBounds3& worldAABB) + { + mWorldBoxes[getIndex(h)] = worldAABB; + } + + PX_FORCE_INLINE const PxBounds3& getWorldAABB(PrunerHandle h) const + { + return mWorldBoxes[getIndex(h)]; + } + + PX_FORCE_INLINE void updateObjects(const PrunerHandle* handles, const PxU32* indices, const PxBounds3* newBounds, PxU32 count) + { + for(PxU32 i=0; i<count; i++) + Sq::inflateBounds(mWorldBoxes[getIndex(handles[i])], newBounds[indices[i]]); + } + + void preallocate(PxU32 entries); +// protected: + + PxU32 mNbObjects; //!< Current number of objects + PxU32 mMaxNbObjects; //!< Max. number of objects (capacity for mWorldBoxes, mObjects) + + //!< these arrays are parallel + PxBounds3* mWorldBoxes; //!< List of world boxes, stores mNbObjects, capacity=mMaxNbObjects + PrunerPayload* mObjects; //!< List of objects, stores mNbObjects, capacity=mMaxNbObjects +// private: + PoolIndex* mHandleToIndex; //!< Maps from PrunerHandle to internal index (payload index in mObjects) + PrunerHandle* mIndexToHandle; //!< Inverse map from objectIndex to PrunerHandle + + // this is the head of a list of holes formed in mHandleToIndex + // by removed handles + // the rest of the list is stored in holes in mHandleToIndex (in place) + PrunerHandle mFirstRecycledHandle; + + bool resize(PxU32 newCapacity); + }; + +} // namespace Sq + +} + +#endif // SQ_PRUNINGPOOL_H diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp b/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp new file mode 100644 index 00000000..d785abb2 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqPruningStructure.cpp @@ -0,0 +1,427 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "SqPruningStructure.h" +#include "SqAABBPruner.h" +#include "SqAABBTree.h" +#include "SqBounds.h" + +#include "NpRigidDynamic.h" +#include "NpRigidStatic.h" +#include "NpShape.h" + +#include "GuBounds.h" + +#include "CmTransformUtils.h" +#include "CmUtils.h" + +#include "ScbShape.h" + +using namespace physx; +using namespace Sq; + +////////////////////////////////////////////////////////////////////////// + +#define NB_OBJECTS_PER_NODE 4 + +////////////////////////////////////////////////////////////////////////// +PruningStructure::PruningStructure(PxBaseFlags baseFlags) + : PxPruningStructure(baseFlags) +{ +} + +////////////////////////////////////////////////////////////////////////// +PruningStructure::PruningStructure() + : PxPruningStructure(PxConcreteType::ePRUNING_STRUCTURE, PxBaseFlag::eOWNS_MEMORY | PxBaseFlag::eIS_RELEASABLE), + mNbActors(0), mActors(0), mValid(true) +{ + for (PxU32 i = 0; i < 2; i++) + { + mNbNodes[i] = 0; + mNbObjects[i] = 0; + mAABBTreeIndices[i] = NULL; + mAABBTreeNodes[i] = NULL; + } +} + +////////////////////////////////////////////////////////////////////////// +PruningStructure::~PruningStructure() +{ + if(getBaseFlags() & PxBaseFlag::eOWNS_MEMORY) + { + for (PxU32 i = 0; i < 2; i++) + { + if(mAABBTreeIndices[i]) + { + PX_FREE(mAABBTreeIndices[i]); + } + if (mAABBTreeNodes[i]) + { + PX_FREE(mAABBTreeNodes[i]); + } + } + + if(mActors) + { + PX_FREE(mActors); + } + } +} + +////////////////////////////////////////////////////////////////////////// +void PruningStructure::release() +{ + // if we release the pruning structure we set the pruner structure to NUUL + for (PxU32 i = 0; i < mNbActors; i++) + { + PX_ASSERT(mActors[i]); + + PxType type = mActors[i]->getConcreteType(); + if (type == PxConcreteType::eRIGID_STATIC) + { + static_cast<NpRigidStatic*>(mActors[i])->getShapeManager().setPruningStructure(NULL); + } + else if (type == PxConcreteType::eRIGID_DYNAMIC) + { + static_cast<NpRigidDynamic*>(mActors[i])->getShapeManager().setPruningStructure(NULL); + } + } + + if(getBaseFlags() & PxBaseFlag::eOWNS_MEMORY) + { + delete this; + } + else + { + this->~PruningStructure(); + } +} + +template <typename ActorType> +static void getShapeBounds(PxRigidActor* actor, bool dynamic, PxBounds3& bounds, PxU32& numShapes) +{ + PruningIndex::Enum treeStructure = dynamic ? PruningIndex::eDYNAMIC : PruningIndex::eSTATIC; + ActorType& a = *static_cast<ActorType*>(actor); + const PxU32 nbShapes = a.getNbShapes(); + for (PxU32 iShape = 0; iShape < nbShapes; iShape++) + { + NpShape* shape = a.getShapeManager().getShapes()[iShape]; + if (shape->getFlags() & PxShapeFlag::eSCENE_QUERY_SHAPE) + { + const Scb::Shape& scbShape = shape->getScbShape(); + const Scb::Actor& scbActor = a.getScbActorFast(); + + (gComputeBoundsTable[treeStructure])(bounds, scbShape, scbActor); + numShapes++; + } + } +} + +////////////////////////////////////////////////////////////////////////// +bool PruningStructure::build(PxRigidActor*const* actors, PxU32 nbActors) +{ + PX_ASSERT(actors); + PX_ASSERT(nbActors > 0); + + PxU32 numShapes[2] = { 0, 0 }; + // parse the actors first to get the shapes size + for (PxU32 actorsDone = 0; actorsDone < nbActors; actorsDone++) + { + if (actorsDone + 1 < nbActors) + Ps::prefetch(actors[actorsDone + 1], sizeof(NpRigidDynamic)); // worst case: PxRigidStatic is smaller + + PxType type = actors[actorsDone]->getConcreteType(); + const PxRigidActor& actor = *(actors[actorsDone]); + + Scb::ControlState::Enum cs = NpActor::getScbFromPxActor(actor).getControlState(); + if (!((cs == Scb::ControlState::eNOT_IN_SCENE) || ((cs == Scb::ControlState::eREMOVE_PENDING)))) + { + Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Actor already assigned to a scene!"); + return false; + } + + const PxU32 nbShapes = actor.getNbShapes(); + bool hasQueryShape = false; + for (PxU32 iShape = 0; iShape < nbShapes; iShape++) + { + PxShape* shape; + actor.getShapes(&shape, 1, iShape); + if(shape->getFlags() & PxShapeFlag::eSCENE_QUERY_SHAPE) + { + hasQueryShape = true; + if (type == PxConcreteType::eRIGID_STATIC) + numShapes[PruningIndex::eSTATIC]++; + else + numShapes[PruningIndex::eDYNAMIC]++; + } + } + + // each provided actor must have a query shape + if(!hasQueryShape) + { + Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has no scene query shape!"); + return false; + } + + if (type == PxConcreteType::eRIGID_STATIC) + { + NpRigidStatic* rs = static_cast<NpRigidStatic*>(actors[actorsDone]); + if(rs->getShapeManager().getPruningStructure()) + { + Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has already a pruning structure!"); + return false; + } + rs->getShapeManager().setPruningStructure(this); + } + else if (type == PxConcreteType::eRIGID_DYNAMIC) + { + NpRigidDynamic* rd = static_cast<NpRigidDynamic*>(actors[actorsDone]); + if (rd->getShapeManager().getPruningStructure()) + { + Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor has already a pruning structure!"); + return false; + } + rd->getShapeManager().setPruningStructure(this); + } + else + { + Ps::getFoundation().error(PxErrorCode::eINVALID_PARAMETER, __FILE__, __LINE__, "PrunerStructure::build: Provided actor is not a rigid actor!"); + return false; + } + } + + PxBounds3* bounds[2] = { NULL, NULL }; + + for (PxU32 i = 0; i < 2; i++) + { + if(numShapes[i]) + { + bounds[i] = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*numShapes[i], "Pruner bounds")); + } + } + + // now I go again and gather bounds and payload + numShapes[PruningIndex::eSTATIC] = 0; + numShapes[PruningIndex::eDYNAMIC] = 0; + for (PxU32 actorsDone = 0; actorsDone < nbActors; actorsDone++) + { + PxType type = actors[actorsDone]->getConcreteType(); + if (type == PxConcreteType::eRIGID_STATIC) + { + getShapeBounds<NpRigidStatic>(actors[actorsDone], false, + bounds[PruningIndex::eSTATIC][numShapes[PruningIndex::eSTATIC]], numShapes[PruningIndex::eSTATIC]); + } + else if (type == PxConcreteType::eRIGID_DYNAMIC) + { + getShapeBounds<NpRigidDynamic>(actors[actorsDone], true, + bounds[PruningIndex::eDYNAMIC][numShapes[PruningIndex::eDYNAMIC]], numShapes[PruningIndex::eDYNAMIC]); + } + } + + AABBTree aabbTrees[2]; + for (PxU32 i = 0; i < 2; i++) + { + mNbObjects[i] = numShapes[i]; + if (numShapes[i]) + { + // create the AABB tree + AABBTreeBuildParams sTB; + sTB.mNbPrimitives = numShapes[i]; + sTB.mAABBArray = bounds[i]; + sTB.mLimit = NB_OBJECTS_PER_NODE; + bool status = aabbTrees[i].build(sTB); + + PX_UNUSED(status); + PX_ASSERT(status); + + // store the tree nodes + mNbNodes[i] = aabbTrees[i].getNbNodes(); + mAABBTreeNodes[i] = reinterpret_cast<AABBTreeRuntimeNode*>(PX_ALLOC(sizeof(AABBTreeRuntimeNode)*mNbNodes[i], "AABBTreeRuntimeNode")); + PxMemCopy(mAABBTreeNodes[i], aabbTrees[i].getNodes(), sizeof(AABBTreeRuntimeNode)*mNbNodes[i]); + mAABBTreeIndices[i] = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mNbObjects[i], "PxU32")); + PxMemCopy(mAABBTreeIndices[i], aabbTrees[i].getIndices(), sizeof(PxU32)*mNbObjects[i]); + + // discard the data + PX_FREE(bounds[i]); + } + } + + // store the actors for verification and serialization + mNbActors = nbActors; + mActors = reinterpret_cast<PxActor**>(PX_ALLOC(sizeof(PxActor*)*mNbActors, "PxActor*")); + PxMemCopy(mActors, actors, sizeof(PxActor*)*mNbActors); + + return true; +} + +////////////////////////////////////////////////////////////////////////// + +PruningStructure* PruningStructure::createObject(PxU8*& address, PxDeserializationContext& context) +{ + PruningStructure* obj = new (address)PruningStructure(PxBaseFlag::eIS_RELEASABLE); + address += sizeof(PruningStructure); + obj->importExtraData(context); + obj->resolveReferences(context); + return obj; +} + +////////////////////////////////////////////////////////////////////////// + +void PruningStructure::resolveReferences(PxDeserializationContext& context) +{ + if (!isValid()) + return; + + for (PxU32 i = 0; i < mNbActors; i++) + { + context.translatePxBase(mActors[i]); + } +} + +////////////////////////////////////////////////////////////////////////// + +void PruningStructure::requires(PxProcessPxBaseCallback& c) +{ + if (!isValid()) + return; + + for (PxU32 i = 0; i < mNbActors; i++) + { + c.process(*mActors[i]); + } +} + +////////////////////////////////////////////////////////////////////////// + +void PruningStructure::exportExtraData(PxSerializationContext& stream) +{ + if (!isValid()) + { + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::exportExtraData: Pruning structure is invalid!"); + return; + } + + for (PxU32 i = 0; i < 2; i++) + { + if (mAABBTreeNodes[i]) + { + // store nodes + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mAABBTreeNodes[i], mNbNodes[i] * sizeof(AABBTreeRuntimeNode)); + } + + if(mAABBTreeIndices[i]) + { + // store indices + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mAABBTreeIndices[i], mNbObjects[i] * sizeof(PxU32)); + } + } + + if(mActors) + { + // store actor pointers + stream.alignData(PX_SERIAL_ALIGN); + stream.writeData(mActors, mNbActors * sizeof(PxActor*)); + } +} + +////////////////////////////////////////////////////////////////////////// + +void PruningStructure::importExtraData(PxDeserializationContext& context) +{ + if (!isValid()) + { + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::importExtraData: Pruning structure is invalid!"); + return; + } + + for (PxU32 i = 0; i < 2; i++) + { + if (mAABBTreeNodes[i]) + { + mAABBTreeNodes[i] = context.readExtraData<Sq::AABBTreeRuntimeNode, PX_SERIAL_ALIGN>(mNbNodes[i]); + } + if(mAABBTreeIndices[i]) + { + mAABBTreeIndices[i] = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(mNbObjects[i]); + } + } + + if (mActors) + { + // read actor pointers + mActors = context.readExtraData<PxActor*, PX_SERIAL_ALIGN>(mNbActors); + } +} + +////////////////////////////////////////////////////////////////////////// + +PxU32 PruningStructure::getRigidActors(PxRigidActor** userBuffer, PxU32 bufferSize, PxU32 startIndex/* =0 */) const +{ + if(!isValid()) + { + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "PrunerStructure::getRigidActors: Pruning structure is invalid!"); + return 0; + } + + return Cm::getArrayOfPointers(userBuffer, bufferSize, startIndex, mActors, mNbActors); +} + +////////////////////////////////////////////////////////////////////////// + +void PruningStructure::invalidate(PxActor* actor) +{ + PX_ASSERT(actor); + + // remove actor from the actor list to avoid mem corruption + // this slow, but should be called only with error msg send to user about invalid behavior + for (PxU32 i = 0; i < mNbActors; i++) + { + if(mActors[i] == actor) + { + // set pruning structure to NULL and remove the actor from the list + PxType type = mActors[i]->getConcreteType(); + if (type == PxConcreteType::eRIGID_STATIC) + { + static_cast<NpRigidStatic*>(mActors[i])->getShapeManager().setPruningStructure(NULL); + } + else if (type == PxConcreteType::eRIGID_DYNAMIC) + { + static_cast<NpRigidDynamic*>(mActors[i])->getShapeManager().setPruningStructure(NULL); + } + + mActors[i] = mActors[mNbActors--]; + break; + } + } + + mValid = false; +} + diff --git a/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp b/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp new file mode 100644 index 00000000..cd3e25eb --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqSceneQueryManager.cpp @@ -0,0 +1,500 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "SqSceneQueryManager.h" +#include "SqAABBPruner.h" +#include "SqBucketPruner.h" +#include "SqBounds.h" +#include "NpBatchQuery.h" +#include "PxFiltering.h" +#include "NpRigidDynamic.h" +#include "NpRigidStatic.h" +#include "NpArticulationLink.h" +#include "CmTransformUtils.h" +#include "PsAllocator.h" +#include "PxSceneDesc.h" +#include "ScBodyCore.h" +#include "SqPruner.h" +#include "GuBounds.h" +#include "NpShape.h" + +using namespace physx; +using namespace Sq; +using namespace Sc; + +namespace physx +{ + namespace Sq + { + OffsetTable gOffsetTable; + } +} + +PrunerExt::PrunerExt() : + mPruner (NULL), + mDirtyList (PX_DEBUG_EXP("SQmDirtyList")), + mPrunerType (PxPruningStructureType::eLAST), + mTimestamp (0xffffffff) +{ +} + +PrunerExt::~PrunerExt() +{ + PX_DELETE_AND_RESET(mPruner); +} + +void PrunerExt::init(PxPruningStructureType::Enum type, PxU64 contextID) +{ + mPrunerType = type; + mTimestamp = 0; + Pruner* pruner = NULL; + switch(type) + { + case PxPruningStructureType::eNONE: { pruner = PX_NEW(BucketPruner); break; } + case PxPruningStructureType::eDYNAMIC_AABB_TREE: { pruner = PX_NEW(AABBPruner)(true, contextID); break; } + case PxPruningStructureType::eSTATIC_AABB_TREE: { pruner = PX_NEW(AABBPruner)(false, contextID); break; } + case PxPruningStructureType::eLAST: break; + } + mPruner = pruner; +} + +void PrunerExt::preallocate(PxU32 nbShapes) +{ + if(nbShapes > mDirtyMap.size()) + mDirtyMap.resize(nbShapes); + + if(mPruner) + mPruner->preallocate(nbShapes); +} + +void PrunerExt::flushMemory() +{ + if(!mDirtyList.size()) + mDirtyList.reset(); + + // PT: TODO: flush bitmap here + + // PT: TODO: flush pruner here? +} + +void PrunerExt::flushShapes(PxU32 index) +{ + const PxU32 numDirtyList = mDirtyList.size(); + if(!numDirtyList) + return; + const PrunerHandle* const prunerHandles = mDirtyList.begin(); + + const ComputeBoundsFunc func = gComputeBoundsTable[index]; + + for(PxU32 i=0; i<numDirtyList; i++) + { + const PrunerHandle handle = prunerHandles[i]; + mDirtyMap.reset(handle); + + // PT: we compute the new bounds and store them directly in the pruner structure to avoid copies. We delay the updateObjects() call + // to take advantage of batching. + PxBounds3* bounds; + const PrunerPayload& pp = mPruner->getPayload(handle, bounds); + (func)(*bounds, *(reinterpret_cast<Scb::Shape*>(pp.data[0])), *(reinterpret_cast<Scb::Actor*>(pp.data[1]))); + } + // PT: batch update happens after the loop instead of once per loop iteration + mPruner->updateObjects(prunerHandles, NULL, numDirtyList); + mTimestamp += numDirtyList; + mDirtyList.clear(); +} + +// PT: TODO: re-inline this +void PrunerExt::addToDirtyList(PrunerHandle handle) +{ + Cm::BitMap& dirtyMap = mDirtyMap; + if(!dirtyMap.test(handle)) + { + dirtyMap.set(handle); + mDirtyList.pushBack(handle); + mTimestamp++; + } +} + +// PT: TODO: re-inline this +Ps::IntBool PrunerExt::isDirty(PrunerHandle handle) const +{ + return mDirtyMap.test(handle); +} + +// PT: TODO: re-inline this +void PrunerExt::removeFromDirtyList(PrunerHandle handle) +{ + Cm::BitMap& dirtyMap = mDirtyMap; + if(dirtyMap.test(handle)) + { + dirtyMap.reset(handle); + mDirtyList.findAndReplaceWithLast(handle); + } +} + +// PT: TODO: re-inline this +void PrunerExt::growDirtyList(PrunerHandle handle) +{ + // pruners must either provide indices in order or reuse existing indices, so this 'if' is enough to ensure we have space for the new handle + // PT: TODO: fix this. There is just no need for any of it. The pruning pool itself could support the feature for free, similar to what we do + // in MBP. There would be no need for the bitmap or the dirty list array. However doing this through the virtual interface would be clumsy, + // adding the cost of virtual calls for very cheap & simple operations. It would be a lot easier to drop it and go back to what we had before. + + Cm::BitMap& dirtyMap = mDirtyMap; + if(dirtyMap.size() <= handle) + dirtyMap.resize(PxMax<PxU32>(dirtyMap.size() * 2, 1024)); + PX_ASSERT(handle<dirtyMap.size()); + dirtyMap.reset(handle); +} + +/////////////////////////////////////////////////////////////////////////////// + +SceneQueryManager::SceneQueryManager( Scb::Scene& scene, PxPruningStructureType::Enum staticStructure, + PxPruningStructureType::Enum dynamicStructure, PxU32 dynamicTreeRebuildRateHint, + const PxSceneLimits& limits) : + mScene (scene) +{ + mPrunerExt[PruningIndex::eSTATIC].init(staticStructure, scene.getContextId()); + mPrunerExt[PruningIndex::eDYNAMIC].init(dynamicStructure, scene.getContextId()); + + setDynamicTreeRebuildRateHint(dynamicTreeRebuildRateHint); + + preallocate(limits.maxNbStaticShapes, limits.maxNbDynamicShapes); + + mDynamicBoundsSync.mPruner = mPrunerExt[PruningIndex::eDYNAMIC].pruner(); + mDynamicBoundsSync.mTimestamp = &mPrunerExt[PruningIndex::eDYNAMIC].mTimestamp; +} + +SceneQueryManager::~SceneQueryManager() +{ +} + +void SceneQueryManager::flushMemory() +{ + for(PxU32 i=0;i<PruningIndex::eCOUNT;i++) + mPrunerExt[i].flushMemory(); +} + +void SceneQueryManager::markForUpdate(PrunerData data) +{ + const PxU32 index = getPrunerIndex(data); + const PrunerHandle handle = getPrunerHandle(data); + + mPrunerExt[index].addToDirtyList(handle); +} + +void SceneQueryManager::preallocate(PxU32 staticShapes, PxU32 dynamicShapes) +{ + mPrunerExt[PruningIndex::eSTATIC].preallocate(staticShapes); + mPrunerExt[PruningIndex::eDYNAMIC].preallocate(dynamicShapes); +} + +PrunerData SceneQueryManager::addPrunerShape(const NpShape& shape, const PxRigidActor& actor, bool dynamic, const PxBounds3* bounds, bool hasPrunerStructure) +{ + PrunerPayload pp; + const Scb::Shape& scbShape = shape.getScbShape(); + const Scb::Actor& scbActor = gOffsetTable.convertPxActor2Scb(actor); + pp.data[0] = size_t(&scbShape); + pp.data[1] = size_t(&scbActor); + + PxBounds3 b; + if(bounds) + inflateBounds(b, *bounds); + else + (gComputeBoundsTable[dynamic])(b, scbShape, scbActor); + + const PxU32 index = PxU32(dynamic); + PrunerHandle handle; + PX_ASSERT(mPrunerExt[index].pruner()); + mPrunerExt[index].pruner()->addObjects(&handle, &b, &pp, 1, hasPrunerStructure); + mPrunerExt[index].invalidateTimestamp(); + + mPrunerExt[index].growDirtyList(handle); + + return createPrunerData(index, handle); +} + +const PrunerPayload& SceneQueryManager::getPayload(PrunerData data) const +{ + const PxU32 index = getPrunerIndex(data); + const PrunerHandle handle = getPrunerHandle(data); + return mPrunerExt[index].pruner()->getPayload(handle); +} + +void SceneQueryManager::removePrunerShape(PrunerData data) +{ + const PxU32 index = getPrunerIndex(data); + const PrunerHandle handle = getPrunerHandle(data); + + PX_ASSERT(mPrunerExt[index].pruner()); + + mPrunerExt[index].removeFromDirtyList(handle); + + mPrunerExt[index].invalidateTimestamp(); + mPrunerExt[index].pruner()->removeObjects(&handle); +} + +void SceneQueryManager::setDynamicTreeRebuildRateHint(PxU32 rebuildRateHint) +{ + mRebuildRateHint = rebuildRateHint; + + for(PxU32 i=0;i<PruningIndex::eCOUNT;i++) + { + if(mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE) + static_cast<AABBPruner*>(mPrunerExt[i].pruner())->setRebuildRateHint(rebuildRateHint); + } +} + + +static PxBounds3 computeWorldAABB(const Scb::Shape& scbShape, const Sc::BodyCore& bodyCore) +{ + const Gu::GeometryUnion& geom = scbShape.getGeometryUnion(); + const PxTransform& shape2Actor = scbShape.getShape2Actor(); + + PX_ALIGN(16, PxTransform) globalPose; + + PX_ALIGN(16, PxTransform) kinematicTarget; + PxU16 sqktFlags = PxRigidBodyFlag::eKINEMATIC | PxRigidBodyFlag::eUSE_KINEMATIC_TARGET_FOR_SCENE_QUERIES; + bool useTarget = (PxU16(bodyCore.getFlags()) & sqktFlags) == sqktFlags; + + const PxTransform& body2World = (useTarget && bodyCore.getKinematicTarget(kinematicTarget)) ? kinematicTarget : bodyCore.getBody2World(); + Cm::getDynamicGlobalPoseAligned(body2World, shape2Actor, bodyCore.getBody2Actor(), globalPose); + + PxBounds3 tmp; + inflateBounds(tmp, Gu::computeBounds(geom.getGeometry(), globalPose, false)); + return tmp; +} + + +void SceneQueryManager::validateSimUpdates() +{ + if (mPrunerExt[1].type() != PxPruningStructureType::eDYNAMIC_AABB_TREE) + return; + + + Sc::BodyCore*const* activeBodies = mScene.getActiveBodiesArray(); + const PxU32 nbActiveBodies = mScene.getNumActiveBodies(); + + for (PxU32 i = 0; i < nbActiveBodies; ++i) + { + const Sc::BodyCore* bCore = activeBodies[i]; + + if (bCore->isFrozen()) + continue; + + PxRigidBody* pxBody = static_cast<PxRigidBody*>(bCore->getPxActor()); + + PX_ASSERT(pxBody->getConcreteType() == PxConcreteType::eRIGID_DYNAMIC || pxBody->getConcreteType() == PxConcreteType::eARTICULATION_LINK); + + NpShapeManager& shapeManager = *NpActor::getShapeManager(*pxBody); + const PxU32 nbShapes = shapeManager.getNbShapes(); + NpShape* const* shape = shapeManager.getShapes(); + + + for (PxU32 j = 0; j<nbShapes; j++) + { + PrunerData prunerData = shapeManager.getPrunerData(j); + if (prunerData != INVALID_PRUNERHANDLE) + { + const PrunerHandle handle = getPrunerHandle(prunerData); + const PxBounds3 worldAABB = computeWorldAABB(shape[j]->getScbShape(), *bCore); + PxBounds3 prunerAABB = static_cast<AABBPruner*>(mPrunerExt[1].pruner())->getAABB(handle); + PX_ASSERT((worldAABB.minimum - prunerAABB.minimum).magnitudeSquared() < 0.005f*mScene.getPxScene()->getPhysics().getTolerancesScale().length); + PX_ASSERT((worldAABB.maximum - prunerAABB.maximum).magnitudeSquared() < 0.005f*mScene.getPxScene()->getPhysics().getTolerancesScale().length); + PX_UNUSED(worldAABB); + PX_UNUSED(prunerAABB); + } + } + } +} + +void SceneQueryManager::processSimUpdates() +{ + PX_PROFILE_ZONE("Sim.updatePruningTrees", mScene.getContextId()); + + { + PX_PROFILE_ZONE("SceneQuery.processActiveShapes", mScene.getContextId()); + + // update all active objects + BodyCore*const* activeBodies = mScene.getScScene().getActiveBodiesArray(); + PxU32 nbActiveBodies = mScene.getScScene().getNumActiveBodies(); + +#define NB_BATCHED_OBJECTS 128 + PrunerHandle batchedHandles[NB_BATCHED_OBJECTS]; + PxU32 nbBatchedObjects = 0; + Pruner* pruner = mPrunerExt[PruningIndex::eDYNAMIC].pruner(); + + while(nbActiveBodies--) + { + // PT: TODO: don't put frozen objects in "active bodies" array? After all they + // are also not included in the 'active transforms' or 'active actors' arrays. + BodyCore* currentBody = *activeBodies++; + if(currentBody->isFrozen()) + continue; + + PxActorType::Enum type; + PxRigidBody* pxBody = static_cast<PxRigidBody*>(getPxActorFromBodyCore(currentBody, type)); + PX_ASSERT(pxBody->getConcreteType()==PxConcreteType::eRIGID_DYNAMIC || pxBody->getConcreteType()==PxConcreteType::eARTICULATION_LINK); + + NpShapeManager* shapeManager; + if(type==PxActorType::eRIGID_DYNAMIC) + { + NpRigidDynamic* rigidDynamic = static_cast<NpRigidDynamic*>(pxBody); + shapeManager = &rigidDynamic->getShapeManager(); + } + else + { + NpArticulationLink* articulationLink = static_cast<NpArticulationLink*>(pxBody); + shapeManager = &articulationLink->getShapeManager(); + } + + const PxU32 nbShapes = shapeManager->getNbShapes(); + for(PxU32 i=0; i<nbShapes; i++) + { + const PrunerData data = shapeManager->getPrunerData(i); + if(data!=SQ_INVALID_PRUNER_DATA) + { + // PT: index can't be zero here! + PX_ASSERT(getPrunerIndex(data)==PruningIndex::eDYNAMIC); + + const PrunerHandle handle = getPrunerHandle(data); + + if(!mPrunerExt[PruningIndex::eDYNAMIC].isDirty(handle)) // PT: if dirty, will be updated in "flushShapes" + { + batchedHandles[nbBatchedObjects] = handle; + + PxBounds3* bounds; + const PrunerPayload& pp = pruner->getPayload(handle, bounds); + computeDynamicWorldAABB(*bounds, *(reinterpret_cast<Scb::Shape*>(pp.data[0])), *(reinterpret_cast<Scb::Actor*>(pp.data[1]))); + nbBatchedObjects++; + + if(nbBatchedObjects==NB_BATCHED_OBJECTS) + { + mPrunerExt[PruningIndex::eDYNAMIC].invalidateTimestamp(); + pruner->updateObjects(batchedHandles, NULL, nbBatchedObjects); + nbBatchedObjects = 0; + } + } + } + } + } + if(nbBatchedObjects) + { + mPrunerExt[PruningIndex::eDYNAMIC].invalidateTimestamp(); + pruner->updateObjects(batchedHandles, NULL, nbBatchedObjects); + } + } + + // flush user modified objects + flushShapes(); + + for(PxU32 i=0;i<PruningIndex::eCOUNT;i++) + { + if(mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE) + static_cast<AABBPruner*>(mPrunerExt[i].pruner())->buildStep(); + + mPrunerExt[i].pruner()->commit(); + } +} + +void SceneQueryManager::afterSync(bool commit) +{ + PX_PROFILE_ZONE("Sim.sceneQueryBuildStep", mScene.getContextId()); + + // flush user modified objects + flushShapes(); + + for (PxU32 i = 0; i<2; i++) + { + if (mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE) + static_cast<AABBPruner*>(mPrunerExt[i].pruner())->buildStep(); + + if (commit) + mPrunerExt[i].pruner()->commit(); + } +} + +void SceneQueryManager::flushShapes() +{ + PX_PROFILE_ZONE("SceneQuery.flushShapes", mScene.getContextId()); + + // must already have acquired writer lock here + + for(PxU32 i=0; i<PruningIndex::eCOUNT; i++) + mPrunerExt[i].flushShapes(i); +} + +void SceneQueryManager::flushUpdates() +{ + PX_PROFILE_ZONE("SceneQuery.flushUpdates", mScene.getContextId()); + + // no need to take lock if manual sq update is enabled + // as flushUpdates will only be called from NpScene::flushQueryUpdates() + mSceneQueryLock.lock(); + + flushShapes(); + + for(PxU32 i=0;i<PruningIndex::eCOUNT;i++) + if(mPrunerExt[i].pruner()) + mPrunerExt[i].pruner()->commit(); + + mSceneQueryLock.unlock(); +} + +void SceneQueryManager::forceDynamicTreeRebuild(bool rebuildStaticStructure, bool rebuildDynamicStructure) +{ + PX_PROFILE_ZONE("SceneQuery.forceDynamicTreeRebuild", mScene.getContextId()); + + const bool rebuild[PruningIndex::eCOUNT] = { rebuildStaticStructure, rebuildDynamicStructure }; + + Ps::Mutex::ScopedLock lock(mSceneQueryLock); + for(PxU32 i=0; i<PruningIndex::eCOUNT; i++) + { + if(rebuild[i] && mPrunerExt[i].pruner() && mPrunerExt[i].type() == PxPruningStructureType::eDYNAMIC_AABB_TREE) + { + static_cast<AABBPruner*>(mPrunerExt[i].pruner())->purge(); + static_cast<AABBPruner*>(mPrunerExt[i].pruner())->commit(); + } + } +} + +void SceneQueryManager::shiftOrigin(const PxVec3& shift) +{ + for(PxU32 i=0; i<PruningIndex::eCOUNT; i++) + mPrunerExt[i].pruner()->shiftOrigin(shift); +} + +void DynamicBoundsSync::sync(const PxU32* sqRefs, const PxU32* indices, const PxBounds3* bounds, PxU32 count) +{ + mPruner->updateObjects(sqRefs, indices, bounds, count); + + if (count) + (*mTimestamp)++; +} + diff --git a/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h b/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h new file mode 100644 index 00000000..48b77592 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqTypedef.h @@ -0,0 +1,47 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef SQ_TYPEDEF_H +#define SQ_TYPEDEF_H + +#include "CmPhysXCommon.h" + +namespace physx +{ +namespace Sq +{ + typedef PxU32 PoolIndex; + typedef PxU32 TreeNodeIndex; + + class AABBTree; + class AABBTreeBuildParams; +} +} + +#endif // SQ_TYPEDEF_H |