From 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 Mon Sep 17 00:00:00 2001 From: git perforce import user Date: Tue, 25 Oct 2016 12:29:14 -0600 Subject: Initial commit: PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167] --- .../LowLevelDynamics/src/DySolverControlPF.cpp | 755 +++++++++++++++++++++ 1 file changed, 755 insertions(+) create mode 100644 PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp (limited to 'PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp') diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp new file mode 100644 index 00000000..1858da15 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp @@ -0,0 +1,755 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxPreprocessor.h" +#include "PsAllocator.h" +#include +#include +#include "CmPhysXCommon.h" +#include "DySolverBody.h" +#include "DySolverConstraint1D.h" +#include "DySolverContact.h" +#include "DyThresholdTable.h" +#include "DySolverControl.h" +#include "DyArticulationHelper.h" +#include "PsAtomic.h" +#include "PsIntrinsics.h" +#include "DyArticulationPImpl.h" +#include "PsThread.h" +#include "DySolverConstraintDesc.h" +#include "DySolverContext.h" +#include "DySolverControlPF.h" + +namespace physx +{ + +namespace Dy +{ +//----------------------------------- + +void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4_Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4Block_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4Block_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void ext1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void writeBack1D4Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +void solveFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFriction_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulomb_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +void solveContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulomb_BStaticConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solveContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulomb_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFriction_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +//Pre-block 1d/2d friction stuff... + +void solveContactCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_ConcludeStatic (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFrictionCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solveFrictionCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFrictionCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFrictionCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solveFrictionCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solveFrictionCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +// could move this to PxPreprocessor.h but +// no implementation available for MSVC +#if PX_GCC_FAMILY +#define PX_UNUSED_ATTRIBUTE __attribute__((unused)) +#else +#define PX_UNUSED_ATTRIBUTE +#endif + +#define DYNAMIC_ARTICULATION_REGISTRATION(x) 0 + + +static SolveBlockMethod gVTableSolveBlockCoulomb[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactCoulombBlock, // DY_SC_TYPE_RB_CONTACT + solve1DBlock, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlock), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlock), // DY_SC_TYPE_EXT_1D + solveContactCoulomb_BStaticBlock, // DY_SC_TYPE_STATIC_CONTACT + solveContactCoulombBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactCoulombPreBlock, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactCoulombPreBlock_Static, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4_Block, // DY_SC_TYPE_BLOCK_1D, + solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT + solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT + DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT + solveFrictionCoulombPreBlock, // DY_SC_TYPE_BLOCK_FRICTION + solveFrictionCoulombPreBlock_Static // DY_SC_TYPE_BLOCK_STATIC_FRICTION +}; + +static SolveWriteBackBlockMethod gVTableSolveWriteBackBlockCoulomb[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactCoulombBlockWriteBack, // DY_SC_TYPE_RB_CONTACT + solve1DBlockWriteBack, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlockWriteBack), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlockWriteBack), // DY_SC_TYPE_EXT_1D + solveContactCoulomb_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_CONTACT + solveContactCoulombBlockWriteBack, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactCoulombPreBlock_WriteBackStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4Block_WriteBack, // DY_SC_TYPE_BLOCK_1D, + solveFrictionBlockWriteBack, // DY_SC_TYPE_FRICTION_CONSTRAINT + solveFriction_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT + DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlockWriteBack), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT + solveFrictionCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_FRICTION + solveFrictionCoulombPreBlock_WriteBackStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION +}; + + +static SolveBlockMethod gVTableSolveConcludeBlockCoulomb[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactCoulombConcludeBlock, // DY_SC_TYPE_RB_CONTACT + solve1DConcludeBlock, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombConcludeBlock), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DConcludeBlock), // DY_SC_TYPE_EXT_1D + solveContactCoulomb_BStaticConcludeBlock, // DY_SC_TYPE_STATIC_CONTACT + solveContactCoulombConcludeBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactCoulombPreBlock_ConcludeStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4Block_Conclude, // DY_SC_TYPE_BLOCK_1D, + solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT + solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT + DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT + solveFrictionCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_FRICTION + solveFrictionCoulombPreBlock_ConcludeStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION +}; + + +void SolverCoreRegisterArticulationFnsCoulomb() +{ + gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlock; + gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlock; + + gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlockWriteBack; + gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlockWriteBack; + gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombConcludeBlock; + gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DConcludeBlock; + + gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock; + gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlockWriteBack; + gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock; +} + +SolverCoreGeneralPF* SolverCoreGeneralPF::create() +{ + SolverCoreGeneralPF* scg = reinterpret_cast( + PX_ALLOC(sizeof(SolverCoreGeneralPF), "SolverCoreGeneral")); + + if(scg) + new (scg) SolverCoreGeneralPF; + + return scg; +} + +void SolverCoreGeneralPF::destroyV() +{ + this->~SolverCoreGeneralPF(); + PX_FREE(this); +} + +void SolverCoreGeneralPF::solveV_Blocks(SolverIslandParams& params) const +{ + const PxI32 TempThresholdStreamSize = 32; + ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize]; + + SolverContext cache; + cache.solverBodyArray = params.bodyDataList; + cache.mThresholdStream = tempThresholdStream; + cache.mThresholdStreamLength = TempThresholdStreamSize; + cache.mThresholdStreamIndex = 0; + cache.writeBackIteration = false; + + PxI32 batchCount = PxI32(params.numConstraintHeaders); + + PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart; + const PxU32 bodyListSize = params.bodyListSize; + + Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray; + + const PxU32 velocityIterations = params.velocityIterations; + const PxU32 positionIterations = params.positionIterations; + + const PxU32 numConstraintHeaders = params.numConstraintHeaders; + const PxU32 articulationListSize = params.articulationListSize; + + ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart; + + + PX_ASSERT(velocityIterations >= 1); + PX_ASSERT(positionIterations >= 1); + + if(numConstraintHeaders == 0) + { + for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++) + { + Cm::SpatialVector& motionVel = motionVelocityArray[baIdx]; + PxSolverBody& atom = bodyListStart[baIdx]; + motionVel.linear = atom.linearVelocity; + motionVel.angular = atom.angularState; + } + + for (PxU32 i = 0; i < articulationListSize; i++) + ArticulationPImpl::saveVelocity(articulationListStart[i]); + + return; + } + + BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders); + BatchIterator frictionIterator(params.frictionConstraintBatches, params.numFrictionConstraintHeaders); + + + PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders); + + PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList; + + PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList; + + + //0-(n-1) iterations + PxI32 normalIter = 0; + PxI32 frictionIter = 0; + for (PxU32 iteration = positionIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters + { + + SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter, frictionIter, normalIter); + ++normalIter; + + } + + if(frictionBatchCount>0) + { + const PxU32 numIterations = positionIterations * 2; + for (PxU32 iteration = numIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters + { + SolveBlockParallel(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount, + cache, frictionIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter, frictionIter, frictionIter); + ++frictionIter; + } + } + + for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++) + { + const PxSolverBody& atom = bodyListStart[baIdx]; + Cm::SpatialVector& motionVel = motionVelocityArray[baIdx]; + motionVel.linear = atom.linearVelocity; + motionVel.angular = atom.angularState; + } + + + for (PxU32 i = 0; i < articulationListSize; i++) + ArticulationPImpl::saveVelocity(articulationListStart[i]); + + + const PxU32 velItersMinOne = velocityIterations - 1; + + PxU32 iteration = 0; + + for(; iteration < velItersMinOne; ++iteration) + { + + SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, gVTableSolveBlockCoulomb, normalIter, frictionIter, normalIter); + ++normalIter; + + if(frictionBatchCount > 0) + { + SolveBlockParallel(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount, + cache, frictionIterator, gVTableSolveBlockCoulomb, normalIter, frictionIter, frictionIter); + ++frictionIter; + } + } + + PxI32* outThresholdPairs = params.outThresholdPairs; + ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream; + PxU32 thresholdStreamLength = params.thresholdStreamLength; + + cache.writeBackIteration = true; + + cache.mSharedOutThresholdPairs = outThresholdPairs; + cache.mSharedThresholdStreamLength = thresholdStreamLength; + cache.mSharedThresholdStream = thresholdStream; + + for(; iteration < velocityIterations; ++iteration) + { + SolveBlockParallel(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, gVTableSolveWriteBackBlockCoulomb, normalIter, frictionIter, normalIter); + ++normalIter; + + if(frictionBatchCount > 0) + { + SolveBlockParallel(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount, + cache, frictionIterator, gVTableSolveWriteBackBlockCoulomb, normalIter, frictionIter, frictionIter); + ++frictionIter; + } + } + + //Write back remaining threshold streams + if(cache.mThresholdStreamIndex > 0) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(reinterpret_cast(&outThresholdPairs), PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b) + { + thresholdStream[b + threshIndex] = cache.mThresholdStream[b]; + } + cache.mThresholdStreamIndex = 0; + } + +} + +PxI32 SolverCoreGeneralPF::solveVParallelAndWriteBack(SolverIslandParams& params) const +{ + SolverContext cache; + cache.solverBodyArray = params.bodyDataList; + + const PxI32 UnrollCount = PxI32(params.batchSize); + const PxI32 SaveUnrollCount = 64; + + const PxI32 TempThresholdStreamSize = 32; + ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize]; + + + const PxI32 batchCount = PxI32(params.numConstraintHeaders); + const PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders);//frictionConstraintBatches.size(); + cache.mThresholdStream = tempThresholdStream; + cache.mThresholdStreamLength = TempThresholdStreamSize; + cache.mThresholdStreamIndex = 0; + + const PxI32 positionIterations = PxI32(params.positionIterations); + const PxU32 velocityIterations = params.velocityIterations; + + const PxI32 bodyListSize = PxI32(params.bodyListSize); + const PxI32 articulationListSize = PxI32(params.articulationListSize); + + PX_ASSERT(velocityIterations >= 1); + PX_ASSERT(positionIterations >= 1); + + PxI32* constraintIndex = ¶ms.constraintIndex; + PxI32* constraintIndex2 = ¶ms.constraintIndex2; + PxI32* frictionConstraintIndex = ¶ms.frictionConstraintIndex; + + PxI32 endIndexCount = UnrollCount; + PxI32 index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + PxI32 frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount; + + + BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders); + BatchIterator frictionIter(params.frictionConstraintBatches, params.numFrictionConstraintHeaders); + + PxU32* headersPerPartition = params.headersPerPartition; + PxU32 nbPartitions = params.nbPartitions; + + PxU32* frictionHeadersPerPartition = params.frictionHeadersPerPartition; + PxU32 nbFrictionPartitions = params.nbFrictionPartitions; + + PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList; + PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList; + + + PxI32 maxNormalIndex = 0; + PxI32 maxProgress = 0; + PxI32 frictionEndIndexCount = UnrollCount; + PxI32 maxFrictionIndex = 0; + + PxI32 normalIteration = 0; + PxI32 frictionIteration = 0; + PxU32 a = 0; + for(PxU32 i = 0; i < 2; ++i) + { + SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb; + for(; a < positionIterations - 1 + i; ++a) + { + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxNormalIndex += headersPerPartition[b]; + maxProgress += headersPerPartition[b]; + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, solveTable, + normalIteration, frictionIteration, normalIteration); + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + ++normalIteration; + } + + } + + + for(PxU32 i = 0; i < 2; ++i) + { + SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb; + const PxI32 numIterations = positionIterations *2; + for(; a < numIterations - 1 + i; ++a) + { + for(PxU32 b = 0; b < nbFrictionPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxProgress += frictionHeadersPerPartition[b]; + maxFrictionIndex += frictionHeadersPerPartition[b]; + PxI32 nbSolved = 0; + while(frictionIndex < maxFrictionIndex) + { + const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount); + SolveBlockParallel(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter, + solveTable, normalIteration, frictionIteration, frictionIteration); + frictionIndex += remainder; + frictionEndIndexCount -= remainder; + nbSolved += remainder; + if(frictionEndIndexCount == 0) + { + frictionEndIndexCount = UnrollCount; + frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + ++frictionIteration; + + } + + } + + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + + + PxI32* bodyListIndex = ¶ms.bodyListIndex; + + ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart; + + PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart; + + Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray; + + PxI32* bodyListIndex2 = ¶ms.bodyListIndex2; + + PxI32 endIndexCount2 = SaveUnrollCount; + PxI32 index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount; + { + PxI32 nbConcluded = 0; + while(index2 < articulationListSize) + { + const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2)); + endIndexCount2 -= remainder; + for(PxI32 b = 0; b < remainder; ++b, ++index2) + { + ArticulationPImpl::saveVelocity(articulationListStart[index2]); + } + nbConcluded += remainder; + if(endIndexCount2 == 0) + { + index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount; + endIndexCount2 = SaveUnrollCount; + } + nbConcluded += remainder; + } + + index2 -= articulationListSize; + + //save velocity + + + while(index2 < bodyListSize) + { + const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2)); + endIndexCount2 -= remainder; + for(PxI32 b = 0; b < remainder; ++b, ++index2) + { + Ps::prefetchLine(&bodyListStart[index2 + 8]); + Ps::prefetchLine(&motionVelocityArray[index2 + 8]); + PxSolverBody& body = bodyListStart[index2]; + Cm::SpatialVector& motionVel = motionVelocityArray[index2]; + motionVel.linear = body.linearVelocity; + motionVel.angular = body.angularState; + PX_ASSERT(motionVel.linear.isFinite()); + PX_ASSERT(motionVel.angular.isFinite()); + } + + nbConcluded += remainder; + + //Branch not required because this is the last time we use this atomic variable + //if(index2 < articulationListSizePlusbodyListSize) + { + index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize; + endIndexCount2 = SaveUnrollCount; + } + } + + if(nbConcluded) + { + Ps::memoryBarrier(); + physx::shdfnd::atomicAdd(bodyListIndex2, nbConcluded); + } + } + + + WAIT_FOR_PROGRESS(bodyListIndex2, (bodyListSize + articulationListSize)); + + a = 0; + for(; a < velocityIterations-1; ++a) + { + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxNormalIndex += headersPerPartition[b]; + maxProgress += headersPerPartition[b]; + + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb, normalIteration, frictionIteration, normalIteration); + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + ++normalIteration; + + for(PxU32 b = 0; b < nbFrictionPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxFrictionIndex += frictionHeadersPerPartition[b]; + maxProgress += frictionHeadersPerPartition[b]; + + PxI32 nbSolved = 0; + while(frictionIndex < maxFrictionIndex) + { + const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount); + SolveBlockParallel(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb, + normalIteration, frictionIteration, normalIteration); + + frictionIndex += remainder; + frictionEndIndexCount -= remainder; + nbSolved += remainder; + if(frictionEndIndexCount == 0) + { + frictionEndIndexCount = UnrollCount; + frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + + ++frictionIteration; + } + + ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream; + const PxU32 thresholdStreamLength = params.thresholdStreamLength; + PxI32* outThresholdPairs = params.outThresholdPairs; + + cache.mSharedThresholdStream = thresholdStream; + cache.mSharedOutThresholdPairs = outThresholdPairs; + cache.mSharedThresholdStreamLength = thresholdStreamLength; + + { + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxNormalIndex += headersPerPartition[b]; + maxProgress += headersPerPartition[b]; + + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + + SolveBlockParallel(constraintList, remainder, normalIteration * batchCount, batchCount, + cache, contactIter, gVTableSolveWriteBackBlockCoulomb, normalIteration, frictionIteration, normalIteration); + + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + + ++normalIteration; + + cache.mSharedOutThresholdPairs = outThresholdPairs; + cache.mSharedThresholdStream = thresholdStream; + cache.mSharedThresholdStreamLength = thresholdStreamLength; + + for(PxU32 b = 0; b < nbFrictionPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxFrictionIndex += frictionHeadersPerPartition[b]; + maxProgress += frictionHeadersPerPartition[b]; + + PxI32 nbSolved = 0; + while(frictionIndex < maxFrictionIndex) + { + const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount); + + SolveBlockParallel(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter, + gVTableSolveWriteBackBlockCoulomb, normalIteration, frictionIteration, frictionIteration); + + frictionIndex += remainder; + frictionEndIndexCount -= remainder; + nbSolved += remainder; + if(frictionEndIndexCount == 0) + { + frictionEndIndexCount = UnrollCount; + frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + + if(cache.mThresholdStreamIndex > 0) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b) + { + thresholdStream[b + threshIndex] = cache.mThresholdStream[b]; + } + cache.mThresholdStreamIndex = 0; + } + + ++frictionIteration; + } + + return normalIteration * batchCount + frictionIteration * frictionBatchCount; +} + + +void SolverCoreGeneralPF::writeBackV +(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 /*constraintListSize*/, PxConstraintBatchHeader* batchHeaders, const PxU32 numBatches, + ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs, + PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const +{ + SolverContext cache; + cache.solverBodyArray = atomListData; + cache.mThresholdStream = thresholdStream; + cache.mThresholdStreamLength = thresholdStreamLength; + cache.mThresholdStreamIndex = 0; + + PxI32 outThreshIndex = 0; + for(PxU32 j = 0; j < numBatches; ++j) + { + PxU8 type = *constraintList[batchHeaders[j].mStartIndex].constraint; + writeBackTable[type](constraintList + batchHeaders[j].mStartIndex, + batchHeaders[j].mStride, cache); + } + + outThresholdPairs = PxU32(outThreshIndex); +} + +} + +} + + +//#endif -- cgit v1.2.3