aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp')
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp755
1 files changed, 755 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp
new file mode 100644
index 00000000..1858da15
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp
@@ -0,0 +1,755 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxPreprocessor.h"
+#include "PsAllocator.h"
+#include <new>
+#include <stdio.h>
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverContact.h"
+#include "DyThresholdTable.h"
+#include "DySolverControl.h"
+#include "DyArticulationHelper.h"
+#include "PsAtomic.h"
+#include "PsIntrinsics.h"
+#include "DyArticulationPImpl.h"
+#include "PsThread.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverContext.h"
+#include "DySolverControlPF.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+//-----------------------------------
+
+void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4_Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4Block_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4Block_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void ext1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void writeBack1D4Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+void solveFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFriction_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulomb_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+void solveContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulomb_BStaticConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solveContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulomb_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFriction_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+//Pre-block 1d/2d friction stuff...
+
+void solveContactCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_ConcludeStatic (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFrictionCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solveFrictionCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFrictionCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFrictionCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solveFrictionCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solveFrictionCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+// could move this to PxPreprocessor.h but
+// no implementation available for MSVC
+#if PX_GCC_FAMILY
+#define PX_UNUSED_ATTRIBUTE __attribute__((unused))
+#else
+#define PX_UNUSED_ATTRIBUTE
+#endif
+
+#define DYNAMIC_ARTICULATION_REGISTRATION(x) 0
+
+
+static SolveBlockMethod gVTableSolveBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactCoulombBlock, // DY_SC_TYPE_RB_CONTACT
+ solve1DBlock, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlock), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlock), // DY_SC_TYPE_EXT_1D
+ solveContactCoulomb_BStaticBlock, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactCoulombBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactCoulombPreBlock, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactCoulombPreBlock_Static, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4_Block, // DY_SC_TYPE_BLOCK_1D,
+ solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT
+ solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
+ solveFrictionCoulombPreBlock, // DY_SC_TYPE_BLOCK_FRICTION
+ solveFrictionCoulombPreBlock_Static // DY_SC_TYPE_BLOCK_STATIC_FRICTION
+};
+
+static SolveWriteBackBlockMethod gVTableSolveWriteBackBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactCoulombBlockWriteBack, // DY_SC_TYPE_RB_CONTACT
+ solve1DBlockWriteBack, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlockWriteBack), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlockWriteBack), // DY_SC_TYPE_EXT_1D
+ solveContactCoulomb_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactCoulombBlockWriteBack, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactCoulombPreBlock_WriteBackStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4Block_WriteBack, // DY_SC_TYPE_BLOCK_1D,
+ solveFrictionBlockWriteBack, // DY_SC_TYPE_FRICTION_CONSTRAINT
+ solveFriction_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlockWriteBack), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
+ solveFrictionCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_FRICTION
+ solveFrictionCoulombPreBlock_WriteBackStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION
+};
+
+
+static SolveBlockMethod gVTableSolveConcludeBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactCoulombConcludeBlock, // DY_SC_TYPE_RB_CONTACT
+ solve1DConcludeBlock, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombConcludeBlock), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DConcludeBlock), // DY_SC_TYPE_EXT_1D
+ solveContactCoulomb_BStaticConcludeBlock, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactCoulombConcludeBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactCoulombPreBlock_ConcludeStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4Block_Conclude, // DY_SC_TYPE_BLOCK_1D,
+ solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT
+ solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
+ solveFrictionCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_FRICTION
+ solveFrictionCoulombPreBlock_ConcludeStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION
+};
+
+
+void SolverCoreRegisterArticulationFnsCoulomb()
+{
+ gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlock;
+ gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlock;
+
+ gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlockWriteBack;
+ gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlockWriteBack;
+ gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombConcludeBlock;
+ gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DConcludeBlock;
+
+ gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock;
+ gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlockWriteBack;
+ gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock;
+}
+
+SolverCoreGeneralPF* SolverCoreGeneralPF::create()
+{
+ SolverCoreGeneralPF* scg = reinterpret_cast<SolverCoreGeneralPF*>(
+ PX_ALLOC(sizeof(SolverCoreGeneralPF), "SolverCoreGeneral"));
+
+ if(scg)
+ new (scg) SolverCoreGeneralPF;
+
+ return scg;
+}
+
+void SolverCoreGeneralPF::destroyV()
+{
+ this->~SolverCoreGeneralPF();
+ PX_FREE(this);
+}
+
+void SolverCoreGeneralPF::solveV_Blocks(SolverIslandParams& params) const
+{
+ const PxI32 TempThresholdStreamSize = 32;
+ ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
+
+ SolverContext cache;
+ cache.solverBodyArray = params.bodyDataList;
+ cache.mThresholdStream = tempThresholdStream;
+ cache.mThresholdStreamLength = TempThresholdStreamSize;
+ cache.mThresholdStreamIndex = 0;
+ cache.writeBackIteration = false;
+
+ PxI32 batchCount = PxI32(params.numConstraintHeaders);
+
+ PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
+ const PxU32 bodyListSize = params.bodyListSize;
+
+ Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+
+ const PxU32 velocityIterations = params.velocityIterations;
+ const PxU32 positionIterations = params.positionIterations;
+
+ const PxU32 numConstraintHeaders = params.numConstraintHeaders;
+ const PxU32 articulationListSize = params.articulationListSize;
+
+ ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+
+ PX_ASSERT(velocityIterations >= 1);
+ PX_ASSERT(positionIterations >= 1);
+
+ if(numConstraintHeaders == 0)
+ {
+ for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
+ {
+ Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
+ PxSolverBody& atom = bodyListStart[baIdx];
+ motionVel.linear = atom.linearVelocity;
+ motionVel.angular = atom.angularState;
+ }
+
+ for (PxU32 i = 0; i < articulationListSize; i++)
+ ArticulationPImpl::saveVelocity(articulationListStart[i]);
+
+ return;
+ }
+
+ BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders);
+ BatchIterator frictionIterator(params.frictionConstraintBatches, params.numFrictionConstraintHeaders);
+
+
+ PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders);
+
+ PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
+
+ PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList;
+
+
+ //0-(n-1) iterations
+ PxI32 normalIter = 0;
+ PxI32 frictionIter = 0;
+ for (PxU32 iteration = positionIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters
+ {
+
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter, frictionIter, normalIter);
+ ++normalIter;
+
+ }
+
+ if(frictionBatchCount>0)
+ {
+ const PxU32 numIterations = positionIterations * 2;
+ for (PxU32 iteration = numIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters
+ {
+ SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
+ cache, frictionIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter, frictionIter, frictionIter);
+ ++frictionIter;
+ }
+ }
+
+ for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
+ {
+ const PxSolverBody& atom = bodyListStart[baIdx];
+ Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
+ motionVel.linear = atom.linearVelocity;
+ motionVel.angular = atom.angularState;
+ }
+
+
+ for (PxU32 i = 0; i < articulationListSize; i++)
+ ArticulationPImpl::saveVelocity(articulationListStart[i]);
+
+
+ const PxU32 velItersMinOne = velocityIterations - 1;
+
+ PxU32 iteration = 0;
+
+ for(; iteration < velItersMinOne; ++iteration)
+ {
+
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, gVTableSolveBlockCoulomb, normalIter, frictionIter, normalIter);
+ ++normalIter;
+
+ if(frictionBatchCount > 0)
+ {
+ SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
+ cache, frictionIterator, gVTableSolveBlockCoulomb, normalIter, frictionIter, frictionIter);
+ ++frictionIter;
+ }
+ }
+
+ PxI32* outThresholdPairs = params.outThresholdPairs;
+ ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
+ PxU32 thresholdStreamLength = params.thresholdStreamLength;
+
+ cache.writeBackIteration = true;
+
+ cache.mSharedOutThresholdPairs = outThresholdPairs;
+ cache.mSharedThresholdStreamLength = thresholdStreamLength;
+ cache.mSharedThresholdStream = thresholdStream;
+
+ for(; iteration < velocityIterations; ++iteration)
+ {
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, gVTableSolveWriteBackBlockCoulomb, normalIter, frictionIter, normalIter);
+ ++normalIter;
+
+ if(frictionBatchCount > 0)
+ {
+ SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
+ cache, frictionIterator, gVTableSolveWriteBackBlockCoulomb, normalIter, frictionIter, frictionIter);
+ ++frictionIter;
+ }
+ }
+
+ //Write back remaining threshold streams
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(reinterpret_cast<PxI32*>(&outThresholdPairs), PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
+ {
+ thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+
+}
+
+PxI32 SolverCoreGeneralPF::solveVParallelAndWriteBack(SolverIslandParams& params) const
+{
+ SolverContext cache;
+ cache.solverBodyArray = params.bodyDataList;
+
+ const PxI32 UnrollCount = PxI32(params.batchSize);
+ const PxI32 SaveUnrollCount = 64;
+
+ const PxI32 TempThresholdStreamSize = 32;
+ ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
+
+
+ const PxI32 batchCount = PxI32(params.numConstraintHeaders);
+ const PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders);//frictionConstraintBatches.size();
+ cache.mThresholdStream = tempThresholdStream;
+ cache.mThresholdStreamLength = TempThresholdStreamSize;
+ cache.mThresholdStreamIndex = 0;
+
+ const PxI32 positionIterations = PxI32(params.positionIterations);
+ const PxU32 velocityIterations = params.velocityIterations;
+
+ const PxI32 bodyListSize = PxI32(params.bodyListSize);
+ const PxI32 articulationListSize = PxI32(params.articulationListSize);
+
+ PX_ASSERT(velocityIterations >= 1);
+ PX_ASSERT(positionIterations >= 1);
+
+ PxI32* constraintIndex = &params.constraintIndex;
+ PxI32* constraintIndex2 = &params.constraintIndex2;
+ PxI32* frictionConstraintIndex = &params.frictionConstraintIndex;
+
+ PxI32 endIndexCount = UnrollCount;
+ PxI32 index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ PxI32 frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
+
+
+ BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders);
+ BatchIterator frictionIter(params.frictionConstraintBatches, params.numFrictionConstraintHeaders);
+
+ PxU32* headersPerPartition = params.headersPerPartition;
+ PxU32 nbPartitions = params.nbPartitions;
+
+ PxU32* frictionHeadersPerPartition = params.frictionHeadersPerPartition;
+ PxU32 nbFrictionPartitions = params.nbFrictionPartitions;
+
+ PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
+ PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList;
+
+
+ PxI32 maxNormalIndex = 0;
+ PxI32 maxProgress = 0;
+ PxI32 frictionEndIndexCount = UnrollCount;
+ PxI32 maxFrictionIndex = 0;
+
+ PxI32 normalIteration = 0;
+ PxI32 frictionIteration = 0;
+ PxU32 a = 0;
+ for(PxU32 i = 0; i < 2; ++i)
+ {
+ SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb;
+ for(; a < positionIterations - 1 + i; ++a)
+ {
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxNormalIndex += headersPerPartition[b];
+ maxProgress += headersPerPartition[b];
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, solveTable,
+ normalIteration, frictionIteration, normalIteration);
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+ ++normalIteration;
+ }
+
+ }
+
+
+ for(PxU32 i = 0; i < 2; ++i)
+ {
+ SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb;
+ const PxI32 numIterations = positionIterations *2;
+ for(; a < numIterations - 1 + i; ++a)
+ {
+ for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxProgress += frictionHeadersPerPartition[b];
+ maxFrictionIndex += frictionHeadersPerPartition[b];
+ PxI32 nbSolved = 0;
+ while(frictionIndex < maxFrictionIndex)
+ {
+ const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
+ SolveBlockParallel<false>(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter,
+ solveTable, normalIteration, frictionIteration, frictionIteration);
+ frictionIndex += remainder;
+ frictionEndIndexCount -= remainder;
+ nbSolved += remainder;
+ if(frictionEndIndexCount == 0)
+ {
+ frictionEndIndexCount = UnrollCount;
+ frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+ ++frictionIteration;
+
+ }
+
+ }
+
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+
+
+ PxI32* bodyListIndex = &params.bodyListIndex;
+
+ ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+ PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
+
+ Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+
+ PxI32* bodyListIndex2 = &params.bodyListIndex2;
+
+ PxI32 endIndexCount2 = SaveUnrollCount;
+ PxI32 index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
+ {
+ PxI32 nbConcluded = 0;
+ while(index2 < articulationListSize)
+ {
+ const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2));
+ endIndexCount2 -= remainder;
+ for(PxI32 b = 0; b < remainder; ++b, ++index2)
+ {
+ ArticulationPImpl::saveVelocity(articulationListStart[index2]);
+ }
+ nbConcluded += remainder;
+ if(endIndexCount2 == 0)
+ {
+ index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
+ endIndexCount2 = SaveUnrollCount;
+ }
+ nbConcluded += remainder;
+ }
+
+ index2 -= articulationListSize;
+
+ //save velocity
+
+
+ while(index2 < bodyListSize)
+ {
+ const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2));
+ endIndexCount2 -= remainder;
+ for(PxI32 b = 0; b < remainder; ++b, ++index2)
+ {
+ Ps::prefetchLine(&bodyListStart[index2 + 8]);
+ Ps::prefetchLine(&motionVelocityArray[index2 + 8]);
+ PxSolverBody& body = bodyListStart[index2];
+ Cm::SpatialVector& motionVel = motionVelocityArray[index2];
+ motionVel.linear = body.linearVelocity;
+ motionVel.angular = body.angularState;
+ PX_ASSERT(motionVel.linear.isFinite());
+ PX_ASSERT(motionVel.angular.isFinite());
+ }
+
+ nbConcluded += remainder;
+
+ //Branch not required because this is the last time we use this atomic variable
+ //if(index2 < articulationListSizePlusbodyListSize)
+ {
+ index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize;
+ endIndexCount2 = SaveUnrollCount;
+ }
+ }
+
+ if(nbConcluded)
+ {
+ Ps::memoryBarrier();
+ physx::shdfnd::atomicAdd(bodyListIndex2, nbConcluded);
+ }
+ }
+
+
+ WAIT_FOR_PROGRESS(bodyListIndex2, (bodyListSize + articulationListSize));
+
+ a = 0;
+ for(; a < velocityIterations-1; ++a)
+ {
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxNormalIndex += headersPerPartition[b];
+ maxProgress += headersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb, normalIteration, frictionIteration, normalIteration);
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+ ++normalIteration;
+
+ for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxFrictionIndex += frictionHeadersPerPartition[b];
+ maxProgress += frictionHeadersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(frictionIndex < maxFrictionIndex)
+ {
+ const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb,
+ normalIteration, frictionIteration, normalIteration);
+
+ frictionIndex += remainder;
+ frictionEndIndexCount -= remainder;
+ nbSolved += remainder;
+ if(frictionEndIndexCount == 0)
+ {
+ frictionEndIndexCount = UnrollCount;
+ frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+
+ ++frictionIteration;
+ }
+
+ ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
+ const PxU32 thresholdStreamLength = params.thresholdStreamLength;
+ PxI32* outThresholdPairs = params.outThresholdPairs;
+
+ cache.mSharedThresholdStream = thresholdStream;
+ cache.mSharedOutThresholdPairs = outThresholdPairs;
+ cache.mSharedThresholdStreamLength = thresholdStreamLength;
+
+ {
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxNormalIndex += headersPerPartition[b];
+ maxProgress += headersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+
+ SolveBlockParallel<false>(constraintList, remainder, normalIteration * batchCount, batchCount,
+ cache, contactIter, gVTableSolveWriteBackBlockCoulomb, normalIteration, frictionIteration, normalIteration);
+
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+
+ ++normalIteration;
+
+ cache.mSharedOutThresholdPairs = outThresholdPairs;
+ cache.mSharedThresholdStream = thresholdStream;
+ cache.mSharedThresholdStreamLength = thresholdStreamLength;
+
+ for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxFrictionIndex += frictionHeadersPerPartition[b];
+ maxProgress += frictionHeadersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(frictionIndex < maxFrictionIndex)
+ {
+ const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
+
+ SolveBlockParallel<false>(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter,
+ gVTableSolveWriteBackBlockCoulomb, normalIteration, frictionIteration, frictionIteration);
+
+ frictionIndex += remainder;
+ frictionEndIndexCount -= remainder;
+ nbSolved += remainder;
+ if(frictionEndIndexCount == 0)
+ {
+ frictionEndIndexCount = UnrollCount;
+ frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
+ {
+ thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+
+ ++frictionIteration;
+ }
+
+ return normalIteration * batchCount + frictionIteration * frictionBatchCount;
+}
+
+
+void SolverCoreGeneralPF::writeBackV
+(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 /*constraintListSize*/, PxConstraintBatchHeader* batchHeaders, const PxU32 numBatches,
+ ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
+ PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const
+{
+ SolverContext cache;
+ cache.solverBodyArray = atomListData;
+ cache.mThresholdStream = thresholdStream;
+ cache.mThresholdStreamLength = thresholdStreamLength;
+ cache.mThresholdStreamIndex = 0;
+
+ PxI32 outThreshIndex = 0;
+ for(PxU32 j = 0; j < numBatches; ++j)
+ {
+ PxU8 type = *constraintList[batchHeaders[j].mStartIndex].constraint;
+ writeBackTable[type](constraintList + batchHeaders[j].mStartIndex,
+ batchHeaders[j].mStride, cache);
+ }
+
+ outThresholdPairs = PxU32(outThreshIndex);
+}
+
+}
+
+}
+
+
+//#endif