aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/LowLevelDynamics/src
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/LowLevelDynamics/src')
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulation.cpp241
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.cpp408
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.h95
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrepPF.cpp305
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsDebug.h262
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsScalar.h397
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsSimd.h438
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp1344
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.h192
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationPImpl.h108
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationReference.h92
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationSIMD.cpp306
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.cpp575
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.h101
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationUtils.h317
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyBodyCoreIntegrator.h405
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.cpp712
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.h79
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPrep.h92
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp594
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetupBlock.cpp535
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.cpp725
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.h168
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4.cpp1478
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4PF.cpp1017
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepPF.cpp650
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepShared.h301
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyContactReduction.h409
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyCorrelationBuffer.h104
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.cpp2950
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.h483
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionCorrelation.cpp276
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatch.h81
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatchStreamPair.h128
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyRigidBodyToSolverBody.cpp107
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverBody.h60
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D.h203
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D4.h106
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintDesc.h141
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintExtShared.h116
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintTypes.h67
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp1121
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsBlock.cpp1230
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsShared.h221
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact.h228
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact4.h179
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF.h123
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF4.h155
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverContext.h64
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.cpp622
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.h218
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp755
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.h71
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverCore.h242
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverExt.h85
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraints.cpp868
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraintsBlock.cpp985
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySpatial.h142
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.cpp110
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.h203
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyThresholdTable.cpp68
61 files changed, 24858 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulation.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulation.cpp
new file mode 100644
index 00000000..347aecb8
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulation.cpp
@@ -0,0 +1,241 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "PsMathUtils.h"
+#include "CmConeLimitHelper.h"
+#include "DySolverConstraint1D.h"
+#include "DyArticulation.h"
+#include "DyArticulationHelper.h"
+#include "PxsRigidBody.h"
+#include "PxcConstraintBlockStream.h"
+#include "DyArticulationContactPrep.h"
+#include "DyDynamics.h"
+#include "DyArticulationReference.h"
+#include "DyArticulationPImpl.h"
+#include <stdio.h>
+
+using namespace physx;
+
+// we encode articulation link handles in the lower bits of the pointer, so the
+// articulation has to be aligned, which in an aligned pool means we need to size it
+// appropriately
+
+namespace physx
+{
+ namespace Dy
+ {
+ void SolverCoreRegisterArticulationFns();
+
+ void SolverCoreRegisterArticulationFnsCoulomb();
+
+
+PX_COMPILE_TIME_ASSERT((sizeof(Articulation)&(DY_ARTICULATION_MAX_SIZE-1))==0);
+
+Articulation::Articulation(Sc::ArticulationSim* sim)
+: mSolverDesc(NULL), mArticulationSim(sim)
+{
+ PX_ASSERT((reinterpret_cast<size_t>(this) & (DY_ARTICULATION_MAX_SIZE-1))==0);
+}
+
+Articulation::~Articulation()
+{
+}
+
+
+/* computes the implicit impulse and the drive scale at the joint, in joint coords */
+
+PxU32 Articulation::getLinkIndex(ArticulationLinkHandle handle) const
+{
+ return PxU32(handle&DY_ARTICULATION_IDMASK);
+}
+
+#if DY_DEBUG_ARTICULATION
+
+void Articulation::computeResiduals(const Cm::SpatialVector *v,
+ const ArticulationJointTransforms* jointTransforms,
+ bool /*dump*/) const
+{
+ typedef ArticulationFnsScalar Fns;
+
+ PxReal error = 0, energy = 0;
+ for(PxU32 i=1;i<mSolverDesc->linkCount;i++)
+ {
+ const ArticulationJointTransforms &b = jointTransforms[i];
+ PxU32 parent = mSolverDesc->links[i].parent;
+ const ArticulationJointCore &j = *mSolverDesc->links[i].inboundJoint;
+ PX_UNUSED(j);
+
+ Cm::SpatialVector residual = Fns::translateMotion(mSolverDesc->poses[i].p - b.cB2w.p, v[i])
+ - Fns::translateMotion(mSolverDesc->poses[parent].p - b.cB2w.p, v[parent]);
+
+ error += residual.linear.magnitudeSquared();
+ energy += residual.angular.magnitudeSquared();
+
+ }
+// if(dump)
+ printf("Energy %f, Error %f\n", energy, error);
+}
+
+
+Cm::SpatialVector Articulation::computeMomentum(const FsInertia *inertia) const
+{
+ typedef ArticulationFnsScalar Fns;
+
+ Cm::SpatialVector *velocity = reinterpret_cast<Cm::SpatialVector*>(getVelocity(*mSolverDesc->fsData));
+ Cm::SpatialVector m = Cm::SpatialVector::zero();
+ for(PxU32 i=0;i<mSolverDesc->linkCount;i++)
+ m += Fns::translateForce(mSolverDesc->poses[i].p - mSolverDesc->poses[0].p, ArticulationFnsScalar::multiply(inertia[i], velocity[i]));
+ return m;
+}
+
+
+
+void Articulation::checkLimits() const
+{
+ for(PxU32 i=1;i<mSolverDesc->linkCount;i++)
+ {
+ PxTransform cA2w = mSolverDesc->poses[mSolverDesc->links[i].parent].transform(mSolverDesc->links[i].inboundJoint->parentPose);
+ PxTransform cB2w = mSolverDesc->poses[i].transform(mSolverDesc->links[i].inboundJoint->childPose);
+
+ PxTransform cB2cA = cA2w.transformInv(cB2w);
+
+ // the relative quat must be the short way round for limits to work...
+
+ if(cB2cA.q.w<0)
+ cB2cA.q = -cB2cA.q;
+
+ const ArticulationJointCore& j = *mSolverDesc->links[i].inboundJoint;
+
+ PxQuat swing, twist;
+ if(j.twistLimited || j.swingLimited)
+ Ps::separateSwingTwist(cB2cA.q, swing, twist);
+
+ if(j.swingLimited)
+ {
+ PxReal swingLimitContactDistance = PxMin(j.swingYLimit, j.swingZLimit)/4;
+
+ Cm::ConeLimitHelper eh(PxTan(j.swingYLimit/4),
+ PxTan(j.swingZLimit/4),
+ PxTan(swingLimitContactDistance/4));
+
+ PxVec3 axis;
+ PxReal error = 0.0f;
+ if(eh.getLimit(swing, axis, error))
+ printf("%u, (%f, %f), %f, (%f, %f, %f), %f\n", i, j.swingYLimit, j.swingZLimit, swingLimitContactDistance, axis.x, axis.y, axis.z, error);
+ }
+
+// if(j.twistLimited)
+// {
+// PxReal tqTwistHigh = PxTan(j.twistLimitHigh/4),
+// tqTwistLow = PxTan(j.twistLimitLow/4),
+// twistPad = (tqTwistHigh - tqTwistLow)*0.25f;
+// //twistPad = j.twistLimitContactDistance;
+//
+// PxVec3 axis = jointTransforms[i].cB2w.rotate(PxVec3(1,0,0));
+// PxReal tqPhi = Ps::tanHalf(twist.x, twist.w);
+//
+// if(tqPhi < tqTwistLow + twistPad)
+// constraintData.pushBack(ConstraintData(-axis, -(tqTwistLow - tqPhi)*4));
+//
+// if(tqPhi > tqTwistHigh - twistPad)
+// constraintData.pushBack(ConstraintData(axis, (tqTwistHigh - tqPhi)*4));
+// }
+ }
+ puts("");
+}
+
+#endif
+
+void PxvRegisterArticulations()
+{
+ ArticulationPImpl::sComputeUnconstrainedVelocities = &ArticulationHelper::computeUnconstrainedVelocities;
+ ArticulationPImpl::sUpdateBodies = &ArticulationHelper::updateBodies;
+ ArticulationPImpl::sSaveVelocity = &ArticulationHelper::saveVelocity;
+
+ SolverCoreRegisterArticulationFns();
+ SolverCoreRegisterArticulationFnsCoulomb();
+}
+
+void Articulation::getDataSizes(PxU32 linkCount, PxU32 &solverDataSize, PxU32& totalSize, PxU32& scratchSize)
+{
+ solverDataSize = sizeof(FsData) // header
+ + sizeof(Cm::SpatialVectorV) * linkCount // velocity
+ + sizeof(Cm::SpatialVectorV) * linkCount // deferredVelocity
+ + sizeof(Vec3V) * linkCount // deferredSZ
+ + sizeof(PxReal) * ((linkCount + 15) & 0xFFFFFFF0) // The maxPenBias values
+ + sizeof(FsJointVectors) * linkCount // joint offsets
+ + sizeof(FsInertia) // featherstone root inverse inertia
+ + sizeof(FsRow) * linkCount; // featherstone matrix rows
+
+ totalSize = solverDataSize
+ + sizeof(LtbRow) * linkCount // lagrange matrix rows
+ + sizeof(Cm::SpatialVectorV) * linkCount // ref velocity
+ + sizeof(FsRowAux) * linkCount;
+
+ scratchSize = PxU32(sizeof(FsInertia)*linkCount*3
+ + ((sizeof(ArticulationJointTransforms)+15)&~15) * linkCount
+ + sizeof(Mat33V) * linkCount
+ + ((sizeof(ArticulationJointTransforms)+15)&~15) * linkCount);
+}
+
+
+void PxvArticulationDriveCache::initialize(FsData &cache,
+ PxU16 linkCount,
+ const ArticulationLink* links,
+ PxReal compliance,
+ PxU32 iterations,
+ char* scratchMemory,
+ PxU32 scratchMemorySize)
+{
+ ArticulationHelper::initializeDriveCache(cache, linkCount, links, compliance, iterations, scratchMemory, scratchMemorySize);
+}
+
+PxU32 PxvArticulationDriveCache::getLinkCount(const FsData& cache)
+{
+ return cache.linkCount;
+}
+
+void PxvArticulationDriveCache::applyImpulses(const FsData& cache,
+ Cm::SpatialVectorV* Z,
+ Cm::SpatialVectorV* V)
+{
+ ArticulationHelper::applyImpulses(cache, Z, V);
+}
+
+void PxvArticulationDriveCache::getImpulseResponse(const FsData& cache,
+ PxU32 linkID,
+ const Cm::SpatialVectorV& impulse,
+ Cm::SpatialVectorV& deltaV)
+{
+ ArticulationHelper::getImpulseResponse(cache, linkID, impulse, deltaV);
+}
+
+}
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.cpp
new file mode 100644
index 00000000..2adc84ea
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.cpp
@@ -0,0 +1,408 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+#include "DyArticulationContactPrep.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverConstraint1D.h"
+#include "DyArticulationHelper.h"
+#include "PxcNpWorkUnit.h"
+#include "PxsMaterialManager.h"
+#include "PxsMaterialCombiner.h"
+#include "DyCorrelationBuffer.h"
+#include "DySolverConstraintExtShared.h"
+
+using namespace physx::Gu;
+
+namespace physx
+{
+
+namespace Dy
+{
+
+// constraint-gen only, since these use getVelocity methods
+// which aren't valid during the solver phase
+
+PX_INLINE void computeFrictionTangents(const PxVec3& vrel,const PxVec3& unitNormal, PxVec3& t0, PxVec3& t1)
+{
+ PX_ASSERT(PxAbs(unitNormal.magnitude()-1)<1e-3f);
+
+ t0 = vrel - unitNormal * unitNormal.dot(vrel);
+ PxReal ll = t0.magnitudeSquared();
+
+ if (ll > 0.1f) //can set as low as 0.
+ {
+ t0 *= PxRecipSqrt(ll);
+ t1 = unitNormal.cross(t0);
+ }
+ else
+ Ps::normalToTangents(unitNormal, t0, t1); //fallback
+}
+
+PxReal SolverExtBody::projectVelocity(const PxVec3& linear, const PxVec3& angular) const
+{
+ if(mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ {
+ return mBodyData->projectVelocity(linear, angular);
+ }
+ else
+ {
+ PxF32 f;
+ FStore(getVelocity(*mFsData)[mLinkIndex].dot(Cm::SpatialVector(linear, angular)), &f);
+ return f;
+ }
+}
+
+PxVec3 SolverExtBody::getLinVel() const
+{
+ if(mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ return mBodyData->linearVelocity;
+ else
+ {
+ PxVec3 result;
+ V3StoreU(getVelocity(*mFsData)[mLinkIndex].linear, result);
+ return result;
+ }
+}
+
+
+PxVec3 SolverExtBody::getAngVel() const
+{
+ if(mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ return mBodyData->angularVelocity;
+ else
+ {
+ PxVec3 result;
+ V3StoreU(getVelocity(*mFsData)[mLinkIndex].angular, result);
+ return result;
+ }
+}
+
+Cm::SpatialVector createImpulseResponseVector(const PxVec3& linear, const PxVec3& angular, const SolverExtBody& body)
+{
+ if(body.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ {
+ return Cm::SpatialVector(linear, body.mBodyData->sqrtInvInertia * angular);
+ }
+ return Cm::SpatialVector(linear, angular);
+}
+
+PxReal getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVector& impulse0, Cm::SpatialVector& deltaV0, PxReal dom0, PxReal angDom0,
+ const SolverExtBody& b1, const Cm::SpatialVector& impulse1, Cm::SpatialVector& deltaV1, PxReal dom1, PxReal angDom1,
+ bool /*allowSelfCollision*/)
+{
+ PxReal response;
+ // allowSelfCollision = true;
+ // right now self-collision with contacts crashes the solver
+
+ //KS - knocked this out to save some space on SPU
+ //if(allowSelfCollision && b0.mLinkIndex!=PxSolverConstraintDesc::NO_LINK && b0.mFsData == b1.mFsData)
+ //{
+ // ArticulationHelper::getImpulseSelfResponse(*b0.mFsData,b0.mLinkIndex, impulse0, deltaV0,
+ // b1.mLinkIndex, impulse1, deltaV1);
+ // //PxReal response = impulse0.dot(deltaV0*dom0) + impulse1.dot(deltaV1*dom1);
+ // PX_ASSERT(PxAbs(impulse0.dot(deltaV0*dom0) + impulse1.dot(deltaV1*dom1))>0);
+ //}
+ //else
+ {
+
+ if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ {
+ deltaV0.linear = impulse0.linear * b0.mBodyData->invMass * dom0;
+ deltaV0.angular = impulse0.angular * angDom0;
+ }
+ else
+ ArticulationHelper::getImpulseResponse(*b0.mFsData, b0.mLinkIndex, impulse0.scale(dom0, angDom0), deltaV0);
+
+ response = impulse0.dot(deltaV0);
+ if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ {
+ deltaV1.linear = impulse1.linear * b1.mBodyData->invMass * dom1;
+ deltaV1.angular = impulse1.angular * angDom1;
+ }
+ else
+ {
+ ArticulationHelper::getImpulseResponse(*b1.mFsData, b1.mLinkIndex, impulse1.scale(dom1, angDom1), deltaV1);
+
+ }
+ response += impulse1.dot(deltaV1);
+ }
+
+ return response;
+}
+
+
+ void setupFinalizeExtSolverContacts(
+ const ContactPoint* buffer,
+ const CorrelationBuffer& c,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxU8* workspace,
+ const SolverExtBody& b0,
+ const SolverExtBody& b1,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal invMassScale0, PxReal invInertiaScale0,
+ PxReal invMassScale1, PxReal invInertiaScale1,
+ const PxReal restDist,
+ PxU8* frictionDataPtr,
+ PxReal ccdMaxContactDist)
+{
+ // NOTE II: the friction patches are sparse (some of them have no contact patches, and
+ // therefore did not get written back to the cache) but the patch addresses are dense,
+ // corresponding to valid patches
+
+ /*const bool haveFriction = PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;*/
+
+ const FloatV ccdMaxSeparation = FLoad(ccdMaxContactDist);
+
+ PxU8* PX_RESTRICT ptr = workspace;
+
+ const FloatV zero=FZero();
+
+ //KS - TODO - this should all be done in SIMD to avoid LHS
+ const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex];
+ const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex];
+
+ const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1));
+
+
+ const PxReal d0 = invMassScale0;
+ const PxReal d1 = invMassScale1;
+
+ const PxReal angD0 = invInertiaScale0;
+ const PxReal angD1 = invInertiaScale1;
+
+ Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero();
+ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(d0));
+ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(d1));
+
+ const FloatV restDistance = FLoad(restDist);
+
+ PxU32 frictionPatchWritebackAddrIndex = 0;
+ PxU32 contactWritebackCount = 0;
+
+ Ps::prefetchLine(c.contactID);
+ Ps::prefetchLine(c.contactID, 128);
+
+ const FloatV invDt = FLoad(invDtF32);
+ const FloatV p8 = FLoad(0.8f);
+ const FloatV bounceThreshold = FLoad(bounceThresholdF32);
+
+ const FloatV invDtp8 = FMul(invDt, p8);
+
+ PxU8 flags = 0;
+
+ for(PxU32 i=0;i<c.frictionPatchCount;i++)
+ {
+ PxU32 contactCount = c.frictionPatchContactCounts[i];
+ if(contactCount == 0)
+ continue;
+
+ const FrictionPatch& frictionPatch = c.frictionPatches[i];
+ PX_ASSERT(frictionPatch.anchorCount <= 2); //0==anchorCount is allowed if all the contacts in the manifold have a large offset.
+
+ const Gu::ContactPoint* contactBase0 = buffer + c.contactPatches[c.correlationListHeads[i]].start;
+ const PxReal combinedRestitution = contactBase0->restitution;
+
+ const PxReal staticFriction = contactBase0->staticFriction;
+ const PxReal dynamicFriction = contactBase0->dynamicFriction;
+ const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
+ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction));
+ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction));
+
+ SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr);
+ ptr += sizeof(SolverContactHeader);
+
+
+ Ps::prefetchLine(ptr + 128);
+ Ps::prefetchLine(ptr + 256);
+ Ps::prefetchLine(ptr + 384);
+
+ const bool haveFriction = (disableStrongFriction == 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;
+ header->numNormalConstr = Ps::to8(contactCount);
+ header->numFrictionConstr = Ps::to8(haveFriction ? frictionPatch.anchorCount*2 : 0);
+
+ header->type = Ps::to8(DY_SC_TYPE_EXT_CONTACT);
+
+ header->flags = flags;
+
+ const FloatV restitution = FLoad(combinedRestitution);
+
+ header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;
+
+ header->angDom0 = angD0;
+ header->angDom1 = angD1;
+
+ const PxU32 pointStride = sizeof(SolverContactPointExt);
+ const PxU32 frictionStride = sizeof(SolverContactFrictionExt);
+
+ const Vec3V normal = V3LoadU(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);
+
+ header->normal = normal;
+
+ for(PxU32 patch=c.correlationListHeads[i];
+ patch!=CorrelationBuffer::LIST_END;
+ patch = c.contactPatches[patch].next)
+ {
+ const PxU32 count = c.contactPatches[patch].count;
+ const Gu::ContactPoint* contactBase = buffer + c.contactPatches[patch].start;
+
+ PxU8* p = ptr;
+ for(PxU32 j=0;j<count;j++)
+ {
+ const Gu::ContactPoint& contact = contactBase[j];
+
+ SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p);
+ p += pointStride;
+
+ setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0, bodyFrame1, normal, invDt, invDtp8, restDistance, maxPenBias, restitution,
+ bounceThreshold, contact, *solverContact, ccdMaxSeparation);
+
+ }
+
+ ptr = p;
+ }
+ contactWritebackCount += contactCount;
+
+ PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr);
+ PxMemZero(forceBuffer, sizeof(PxF32) * contactCount);
+ ptr += sizeof(PxF32) * ((contactCount + 3) & (~3));
+
+ header->broken = 0;
+
+ if(haveFriction)
+ {
+ //const Vec3V normal = Vec3V_From_PxVec3(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
+ PxVec3 normalS = buffer[c.contactPatches[c.correlationListHeads[i]].start].normal;
+
+ PxVec3 t0, t1;
+ computeFrictionTangents(b0.getLinVel() - b1.getLinVel(), normalS, t0, t1);
+
+ Vec3V vT0 = V3LoadU(t0);
+ Vec3V vT1 = V3LoadU(t1);
+
+ //We want to set the writeBack ptr to point to the broken flag of the friction patch.
+ //On spu we have a slight problem here because the friction patch array is
+ //in local store rather than in main memory. The good news is that the address of the friction
+ //patch array in main memory is stored in the work unit. These two addresses will be equal
+ //except on spu where one is local store memory and the other is the effective address in main memory.
+ //Using the value stored in the work unit guarantees that the main memory address is used on all platforms.
+ PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch);
+
+ header->frictionBrokenWritebackByte = writeback;
+
+ for(PxU32 j = 0; j < frictionPatch.anchorCount; j++)
+ {
+ SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(ptr);
+ ptr += frictionStride;
+ SolverContactFrictionExt* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionExt*>(ptr);
+ ptr += frictionStride;
+
+ PxVec3 ra = bodyFrame0.q.rotate(frictionPatch.body0Anchors[j]);
+ PxVec3 rb = bodyFrame1.q.rotate(frictionPatch.body1Anchors[j]);
+ PxVec3 error = (ra + bodyFrame0.p) - (rb + bodyFrame1.p);
+
+ {
+ const PxVec3 raXn = ra.cross(t0);
+ const PxVec3 rbXn = rb.cross(t0);
+
+ Cm::SpatialVector deltaV0, deltaV1;
+
+ const Cm::SpatialVector resp0 = createImpulseResponseVector(t0, raXn, b0);
+ const Cm::SpatialVector resp1 = createImpulseResponseVector(-t1, -rbXn, b1);
+ FloatV resp = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
+ b1, resp1, deltaV1, d1, angD1));
+
+ const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FMul(p8, FRecip(resp)), zero);
+
+ PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
+ PxF32 targetVel = buffer[index].targetVel.dot(t0);
+
+ if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ targetVel -= b0.projectVelocity(t0, raXn);
+ else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ targetVel += b1.projectVelocity(t0, rbXn);
+
+ f0->normalXYZ_appliedForceW = V4SetW(vT0, zero);
+ f0->raXnXYZ_velMultiplierW = V4SetW(V4LoadA(&resp0.angular.x), velMultiplier);
+ f0->rbXnXYZ_biasW = V4SetW(V4Neg(V4LoadA(&resp1.angular.x)), FLoad(t0.dot(error) * invDtF32));
+ f0->linDeltaVA = V3LoadA(deltaV0.linear);
+ f0->angDeltaVA = V3LoadA(deltaV0.angular);
+ f0->linDeltaVB = V3LoadA(deltaV1.linear);
+ f0->angDeltaVB = V3LoadA(deltaV1.angular);
+ f0->targetVel = targetVel;
+ }
+
+ {
+
+ const PxVec3 raXn = ra.cross(t1);
+ const PxVec3 rbXn = rb.cross(t1);
+
+ Cm::SpatialVector deltaV0, deltaV1;
+
+
+ const Cm::SpatialVector resp0 = createImpulseResponseVector(t1, raXn, b0);
+ const Cm::SpatialVector resp1 = createImpulseResponseVector(-t1, -rbXn, b1);
+
+ FloatV resp = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
+ b1, resp1, deltaV1, d1, angD1));
+
+ const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FMul(p8, FRecip(resp)), zero);
+
+ PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
+ PxF32 targetVel = buffer[index].targetVel.dot(t0);
+
+ if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ targetVel -= b0.projectVelocity(t1, raXn);
+ else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ targetVel += b1.projectVelocity(t1, rbXn);
+
+ f1->normalXYZ_appliedForceW = V4SetW(vT1, zero);
+ f1->raXnXYZ_velMultiplierW = V4SetW(V4LoadA(&resp0.angular.x), velMultiplier);
+ f1->rbXnXYZ_biasW = V4SetW(V4Neg(V4LoadA(&resp1.angular.x)), FLoad(t1.dot(error) * invDtF32));
+ f1->linDeltaVA = V3LoadA(deltaV0.linear);
+ f1->angDeltaVA = V3LoadA(deltaV0.angular);
+ f1->linDeltaVB = V3LoadA(deltaV1.linear);
+ f1->angDeltaVB = V3LoadA(deltaV1.angular);
+ f1->targetVel = targetVel;
+ }
+ }
+ }
+
+ frictionPatchWritebackAddrIndex++;
+ }
+}
+
+}
+
+
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.h
new file mode 100644
index 00000000..4e927b10
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.h
@@ -0,0 +1,95 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVERCONSTRAINTEXT_H
+#define DY_SOLVERCONSTRAINTEXT_H
+
+#include "DySolverExt.h"
+
+namespace physx
+{
+
+struct PxcNpWorkUnit;
+
+
+namespace Gu
+{
+ class ContactBuffer;
+ struct ContactPoint;
+}
+
+namespace Dy
+{
+
+struct CorrelationBuffer;
+
+PxReal getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVector& impulse0, Cm::SpatialVector& deltaV0, PxReal dom0, PxReal angDom0,
+ const SolverExtBody& b1, const Cm::SpatialVector& impulse1, Cm::SpatialVector& deltaV1, PxReal dom1, PxReal angDom1,
+ bool allowSelfCollision = false);
+
+Cm::SpatialVector createImpulseResponseVector(const PxVec3& linear, const PxVec3& angular, const SolverExtBody& body);
+
+void setupFinalizeExtSolverContacts(
+ const Gu::ContactPoint* buffer,
+ const CorrelationBuffer& c,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxU8* workspace,
+ const SolverExtBody& b0,
+ const SolverExtBody& b1,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal invMassScale0, PxReal invInertiaScale0,
+ PxReal invMassScale1, PxReal invInertiaScale1,
+ PxReal restDistance, PxU8* frictionDataPtr,
+ PxReal ccdMaxContactDist);
+
+
+bool setupFinalizeExtSolverContactsCoulomb(
+ const Gu::ContactBuffer& buffer,
+ const CorrelationBuffer& c,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxU8* workspace,
+ PxReal invDt,
+ PxReal bounceThreshold,
+ const SolverExtBody& b0,
+ const SolverExtBody& b1,
+ PxU32 frictionCountPerPoint,
+ PxReal invMassScale0, PxReal invInertiaScale0,
+ PxReal invMassScale1, PxReal invInertiaScale1,
+ PxReal restDist,
+ PxReal ccdMaxContactDist);
+
+}
+
+}
+
+#endif //DY_SOLVERCONSTRAINTEXT_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrepPF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrepPF.cpp
new file mode 100644
index 00000000..8c954b71
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrepPF.cpp
@@ -0,0 +1,305 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+#include "DyArticulationContactPrep.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverContact.h"
+#include "DySolverContactPF.h"
+#include "DyArticulationHelper.h"
+#include "PxcNpWorkUnit.h"
+#include "PxsMaterialManager.h"
+#include "PxsMaterialCombiner.h"
+#include "DyCorrelationBuffer.h"
+#include "DySolverConstraintExtShared.h"
+
+using namespace physx;
+using namespace Gu;
+
+// constraint-gen only, since these use getVelocityFast methods
+// which aren't valid during the solver phase
+
+namespace physx
+{
+
+namespace Dy
+{
+
+
+bool setupFinalizeExtSolverContactsCoulomb(
+ const ContactBuffer& buffer,
+ const CorrelationBuffer& c,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxU8* workspace,
+ PxReal invDt,
+ PxReal bounceThresholdF32,
+ const SolverExtBody& b0,
+ const SolverExtBody& b1,
+ PxU32 frictionCountPerPoint,
+ PxReal invMassScale0, PxReal invInertiaScale0,
+ PxReal invMassScale1, PxReal invInertiaScale1,
+ PxReal restDist,
+ PxReal ccdMaxDistance)
+{
+ // NOTE II: the friction patches are sparse (some of them have no contact patches, and
+ // therefore did not get written back to the cache) but the patch addresses are dense,
+ // corresponding to valid patches
+
+ const FloatV ccdMaxSeparation = FLoad(ccdMaxDistance);
+
+ PxU8* PX_RESTRICT ptr = workspace;
+
+ //KS - TODO - this should all be done in SIMD to avoid LHS
+ const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex];
+ const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex];
+
+ const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1)/invDt);
+
+ const FloatV restDistance = FLoad(restDist);
+ const FloatV bounceThreshold = FLoad(bounceThresholdF32);
+
+ const FloatV invDtV = FLoad(invDt);
+ const FloatV pt8 = FLoad(0.8f);
+
+ const FloatV invDtp8 = FMul(invDtV, pt8);
+
+ Ps::prefetchLine(c.contactID);
+ Ps::prefetchLine(c.contactID, 128);
+
+ const PxU32 frictionPatchCount = c.frictionPatchCount;
+
+ const PxU32 pointStride = sizeof(SolverContactPointExt);
+ const PxU32 frictionStride = sizeof(SolverContactFrictionExt);
+ const PxU8 pointHeaderType = DY_SC_TYPE_EXT_CONTACT;
+ const PxU8 frictionHeaderType = DY_SC_TYPE_EXT_FRICTION;
+
+ PxReal d0 = invMassScale0;
+ PxReal d1 = invMassScale1;
+ PxReal angD0 = invInertiaScale0;
+ PxReal angD1 = invInertiaScale1;
+
+ PxU8 flags = 0;
+
+ for(PxU32 i=0;i< frictionPatchCount;i++)
+ {
+ const PxU32 contactCount = c.frictionPatchContactCounts[i];
+ if(contactCount == 0)
+ continue;
+
+ const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;
+
+ const Vec3V normalV = Ps::aos::V3LoadA(contactBase0->normal);
+ const Vec3V normal = V3LoadA(contactBase0->normal);
+
+ const PxReal combinedRestitution = contactBase0->restitution;
+
+
+ SolverContactCoulombHeader* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader*>(ptr);
+ ptr += sizeof(SolverContactCoulombHeader);
+
+ Ps::prefetchLine(ptr, 128);
+ Ps::prefetchLine(ptr, 256);
+ Ps::prefetchLine(ptr, 384);
+
+ const FloatV restitution = FLoad(combinedRestitution);
+
+
+ header->numNormalConstr = PxU8(contactCount);
+ header->type = pointHeaderType;
+ //header->setRestitution(combinedRestitution);
+
+ header->setDominance0(d0);
+ header->setDominance1(d1);
+ header->angDom0 = angD0;
+ header->angDom1 = angD1;
+ header->flags = flags;
+
+ header->setNormal(normalV);
+
+ for(PxU32 patch=c.correlationListHeads[i];
+ patch!=CorrelationBuffer::LIST_END;
+ patch = c.contactPatches[patch].next)
+ {
+ const PxU32 count = c.contactPatches[patch].count;
+ const Gu::ContactPoint* contactBase = buffer.contacts + c.contactPatches[patch].start;
+
+ PxU8* p = ptr;
+ for(PxU32 j=0;j<count;j++)
+ {
+ const Gu::ContactPoint& contact = contactBase[j];
+
+ SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p);
+ p += pointStride;
+
+ setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0, bodyFrame1, normal, invDtV, invDtp8, restDistance, maxPenBias, restitution,
+ bounceThreshold, contact, *solverContact, ccdMaxSeparation);
+ }
+ ptr = p;
+ }
+ }
+
+ //construct all the frictions
+
+ PxU8* PX_RESTRICT ptr2 = workspace;
+
+ const PxF32 orthoThreshold = 0.70710678f;
+ const PxF32 eps = 0.00001f;
+ bool hasFriction = false;
+
+ for(PxU32 i=0;i< frictionPatchCount;i++)
+ {
+ const PxU32 contactCount = c.frictionPatchContactCounts[i];
+ if(contactCount == 0)
+ continue;
+
+ SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(ptr2);
+ header->frictionOffset = PxU16(ptr - ptr2);
+ ptr2 += sizeof(SolverContactCoulombHeader) + header->numNormalConstr * pointStride;
+
+ const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;
+
+ PxVec3 normal = contactBase0->normal;
+
+ const PxReal staticFriction = contactBase0->staticFriction;
+ const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
+ const bool haveFriction = (disableStrongFriction == 0);
+
+ SolverFrictionHeader* frictionHeader = reinterpret_cast<SolverFrictionHeader*>(ptr);
+ frictionHeader->numNormalConstr = Ps::to8(c.frictionPatchContactCounts[i]);
+ frictionHeader->numFrictionConstr = Ps::to8(haveFriction ? c.frictionPatchContactCounts[i] * frictionCountPerPoint : 0);
+ frictionHeader->flags = flags;
+ ptr += sizeof(SolverFrictionHeader);
+ PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr);
+ ptr += frictionHeader->getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
+ PxMemZero(forceBuffer, sizeof(PxF32) * c.frictionPatchContactCounts[i]);
+ Ps::prefetchLine(ptr, 128);
+ Ps::prefetchLine(ptr, 256);
+ Ps::prefetchLine(ptr, 384);
+
+
+ const PxVec3 t0Fallback1(0.f, -normal.z, normal.y);
+ const PxVec3 t0Fallback2(-normal.y, normal.x, 0.f) ;
+ const PxVec3 tFallback1 = orthoThreshold > PxAbs(normal.x) ? t0Fallback1 : t0Fallback2;
+ const PxVec3 vrel = b0.getLinVel() - b1.getLinVel();
+ const PxVec3 t0_ = vrel - normal * (normal.dot(vrel));
+ const PxReal sqDist = t0_.dot(t0_);
+ const PxVec3 tDir0 = (sqDist > eps ? t0_: tFallback1).getNormalized();
+ const PxVec3 tDir1 = tDir0.cross(normal);
+ PxVec3 tFallback[2] = {tDir0, tDir1};
+
+ PxU32 ind = 0;
+
+ if(haveFriction)
+ {
+ hasFriction = true;
+ frictionHeader->setStaticFriction(staticFriction);
+ frictionHeader->invMass0D0 = d0;
+ frictionHeader->invMass1D1 = d1;
+ frictionHeader->angDom0 = angD0;
+ frictionHeader->angDom1 = angD1;
+ frictionHeader->type = frictionHeaderType;
+
+ PxU32 totalPatchContactCount = 0;
+
+ for(PxU32 patch=c.correlationListHeads[i];
+ patch!=CorrelationBuffer::LIST_END;
+ patch = c.contactPatches[patch].next)
+ {
+ const PxU32 count = c.contactPatches[patch].count;
+ const PxU32 start = c.contactPatches[patch].start;
+ const Gu::ContactPoint* contactBase = buffer.contacts + start;
+
+ PxU8* p = ptr;
+
+ for(PxU32 j =0; j < count; j++)
+ {
+ const Gu::ContactPoint& contact = contactBase[j];
+ const PxVec3 ra = contact.point - bodyFrame0.p;
+ const PxVec3 rb = contact.point - bodyFrame1.p;
+
+ const PxVec3 targetVel = contact.targetVel;
+ const PxVec3 pVRa = b0.getLinVel() + b0.getAngVel().cross(ra);
+ const PxVec3 pVRb = b1.getLinVel() + b1.getAngVel().cross(rb);
+ //const PxVec3 vrel = pVRa - pVRb;
+
+ for(PxU32 k = 0; k < frictionCountPerPoint; ++k)
+ {
+ SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(p);
+ p += frictionStride;
+
+ PxVec3 t0 = tFallback[ind];
+ ind = 1 - ind;
+ PxVec3 raXn = ra.cross(t0);
+ PxVec3 rbXn = rb.cross(t0);
+ Cm::SpatialVector deltaV0, deltaV1;
+
+ const Cm::SpatialVector resp0 = createImpulseResponseVector(t0, raXn, b0);
+ const Cm::SpatialVector resp1 = createImpulseResponseVector(-t0, -rbXn, b1);
+
+ PxReal unitResponse = getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
+ b1, resp1, deltaV1, d1, angD1);
+
+ PxReal tv = targetVel.dot(t0);
+ if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ tv += pVRa.dot(t0);
+ else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ tv -= pVRb.dot(t0);
+
+
+ f0->setVelMultiplier(FLoad(unitResponse>0.0f ? 1.f/unitResponse : 0.0f));
+ f0->setRaXn(resp0.angular);
+ f0->setRbXn(-resp1.angular);
+ f0->targetVel = tv;
+ f0->setNormal(t0);
+ f0->setAppliedForce(0.0f);
+ f0->linDeltaVA = V3LoadA(deltaV0.linear);
+ f0->angDeltaVA = V3LoadA(deltaV0.angular);
+ f0->linDeltaVB = V3LoadA(deltaV1.linear);
+ f0->angDeltaVB = V3LoadA(deltaV1.angular);
+ }
+ }
+
+ totalPatchContactCount += c.contactPatches[patch].count;
+
+ ptr = p;
+ }
+ }
+ }
+ //PX_ASSERT(ptr - workspace == n.solverConstraintSize);
+ return hasFriction;
+}
+
+
+}
+
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsDebug.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsDebug.h
new file mode 100644
index 00000000..901eef93
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsDebug.h
@@ -0,0 +1,262 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_ARTICULATION_DEBUG_FNS_H
+#define DY_ARTICULATION_DEBUG_FNS_H
+
+#include "DyArticulationFnsScalar.h"
+#include "DyArticulationFnsSimd.h"
+
+namespace physx
+{
+namespace Dy
+{
+#if 0
+ void printMomentum(const char* id, PxTransform* pose, Cm::SpatialVector* velocity, FsInertia* inertia, PxU32 linkCount)
+ {
+ typedef ArticulationFnsScalar Fns;
+
+ Cm::SpatialVector m = Cm::SpatialVector::zero();
+ for(PxU32 i=0;i<linkCount;i++)
+ m += Fns::translateForce(pose[i].p - pose[0].p, Fns::multiply(inertia[i], velocity[i]));
+ printf("momentum (%20s): (%f, %f, %f), (%f, %f, %f)\n", id, m.linear.x, m.linear.y, m.linear.z, m.angular.x, m.angular.y, m.angular.z);
+ }
+#endif
+
+class ArticulationFnsDebug
+{
+ typedef ArticulationFnsSimdBase SimdBase;
+ typedef ArticulationFnsSimd<ArticulationFnsDebug> Simd;
+ typedef ArticulationFnsScalar Scalar;
+
+public:
+
+ static PX_FORCE_INLINE FsInertia addInertia(const FsInertia& in1, const FsInertia& in2)
+ {
+ return FsInertia(M33Add(in1.ll, in2.ll),
+ M33Add(in1.la, in2.la),
+ M33Add(in1.aa, in2.aa));
+ }
+
+ static PX_FORCE_INLINE FsInertia subtractInertia(const FsInertia& in1, const FsInertia& in2)
+ {
+ return FsInertia(M33Sub(in1.ll, in2.ll),
+ M33Sub(in1.la, in2.la),
+ M33Sub(in1.aa, in2.aa));
+ }
+
+ static Mat33V invertSym33(const Mat33V &m)
+ {
+ PxMat33 n_ = Scalar::invertSym33(unsimdify(m));
+ Mat33V n = SimdBase::invertSym33(m);
+ compare33(n_, unsimdify(n));
+
+ return n;
+ }
+
+ static Mat33V invSqrt(const Mat33V &m)
+ {
+ PxMat33 n_ = Scalar::invSqrt(unsimdify(m));
+ Mat33V n = SimdBase::invSqrt(m);
+ compare33(n_, unsimdify(n));
+
+ return n;
+ }
+
+
+
+ static FsInertia invertInertia(const FsInertia &I)
+ {
+ SpInertia J_ = Scalar::invertInertia(unsimdify(I));
+ FsInertia J = SimdBase::invertInertia(I);
+ compareInertias(J_,unsimdify(J));
+
+ return J;
+ }
+
+ static Mat33V computeSIS(const FsInertia &I, const Cm::SpatialVectorV S[3], Cm::SpatialVectorV*PX_RESTRICT IS)
+ {
+ Cm::SpatialVector IS_[3];
+ Scalar::multiply(IS_, unsimdify(I), unsimdify(&S[0]));
+ PxMat33 D_ = Scalar::multiplySym(IS_, unsimdify(&S[0]));
+
+ Mat33V D = SimdBase::computeSIS(I, S, IS);
+
+ compare33(unsimdify(D), D_);
+
+ return D;
+ }
+
+
+ static FsInertia multiplySubtract(const FsInertia &I, const Mat33V &D, const Cm::SpatialVectorV IS[3], Cm::SpatialVectorV*PX_RESTRICT DSI)
+ {
+ Cm::SpatialVector DSI_[3];
+
+ Scalar::multiply(DSI_, unsimdify(IS), unsimdify(D));
+ SpInertia J_ = Scalar::multiplySubtract(unsimdify(I), DSI_, unsimdify(IS));
+
+ FsInertia J = SimdBase::multiplySubtract(I, D, IS, DSI);
+
+ compareInertias(unsimdify(J), J_);
+
+ return J;
+ }
+
+
+ static FsInertia multiplySubtract(const FsInertia &I, const Cm::SpatialVectorV S[3])
+ {
+ SpInertia J_ = Scalar::multiplySubtract(unsimdify(I), unsimdify(S), unsimdify(S));
+ FsInertia J = SimdBase::multiplySubtract(I, S);
+ compareInertias(unsimdify(J), J_);
+ return J;
+ }
+
+
+ static FsInertia translateInertia(Vec3V offset, const FsInertia &I)
+ {
+ PxVec3 offset_;
+ V3StoreU(offset, offset_);
+ SpInertia J_ = Scalar::translate(offset_, unsimdify(I));
+ FsInertia J = SimdBase::translateInertia(offset, I);
+ compareInertias(J_, unsimdify(J));
+
+ return J;
+ }
+
+
+ static PX_FORCE_INLINE FsInertia propagate(const FsInertia &I,
+ const Cm::SpatialVectorV S[3],
+ const Mat33V &load,
+ const FloatV isf)
+ {
+ SpInertia J_ = Scalar::propagate(unsimdify(I), unsimdify(&S[0]), unsimdify(load), unsimdify(isf));
+ FsInertia J = Simd::propagate(I, S, load, isf);
+
+ compareInertias(J_, unsimdify(J));
+ return J;
+ }
+
+
+ static PX_FORCE_INLINE Mat33V computeDriveInertia(const FsInertia &I0,
+ const FsInertia &I1,
+ const Cm::SpatialVectorV S[3])
+ {
+ PxMat33 m_ = Scalar::computeDriveInertia(unsimdify(I0), unsimdify(I1), unsimdify(&S[0]));
+ Mat33V m = Simd::computeDriveInertia(I0, I1, S);
+
+ compare33(m_, unsimdify(m));
+ return m;
+ }
+
+ static const PxMat33 unsimdify(const Mat33V &m)
+ {
+ PX_ALIGN(16, PxMat33) m_;
+ PxMat33_From_Mat33V(m, m_);
+ return m_;
+ }
+
+ static PxReal unsimdify(const FloatV &m)
+ {
+ PxF32 f;
+ FStore(m, &f);
+ return f;
+ }
+
+ static SpInertia unsimdify(const FsInertia &I)
+ {
+ return SpInertia (unsimdify(I.ll),
+ unsimdify(I.la),
+ unsimdify(I.aa));
+ }
+
+ static const Cm::SpatialVector* unsimdify(const Cm::SpatialVectorV *S)
+ {
+ return reinterpret_cast<const Cm::SpatialVector*>(S);
+ }
+
+
+private:
+
+ static PxReal absmax(const PxVec3& n)
+ {
+ return PxMax(PxAbs(n.x), PxMax(PxAbs(n.y),PxAbs(n.z)));
+ }
+
+ static PxReal norm(const PxMat33& n)
+ {
+ return PxMax(absmax(n.column0), PxMax(absmax(n.column1), absmax(n.column2)));
+ }
+
+ static void compare33(const PxMat33& ref, const PxMat33& n)
+ {
+ PxReal errNorm = norm(ref-n);
+ PX_UNUSED(errNorm);
+ PX_ASSERT(errNorm <= PxMax(norm(ref)*1e-3f, 1e-4f));
+ }
+
+ static void compareInertias(const SpInertia& a, const SpInertia& b)
+ {
+ compare33(a.mLL, b.mLL);
+ compare33(a.mLA, b.mLA);
+ compare33(a.mAA, b.mAA);
+ }
+
+
+};
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+static bool isPositiveDefinite(const Mat33V& m)
+{
+ PX_ALIGN_PREFIX(16) PxMat33 m1 PX_ALIGN_SUFFIX(16);
+ PxMat33_From_Mat33V(m, m1);
+ return isPositiveDefinite(m1);
+}
+
+
+static bool isPositiveDefinite(const FsInertia& s)
+{
+ return isPositiveDefinite(ArticulationFnsDebug::unsimdify(s));
+}
+
+static PxReal magnitude(const Cm::SpatialVectorV &v)
+{
+ return PxSqrt(FStore(V3Dot(v.linear, v.linear)) + FStore(V3Dot(v.angular, v.angular)));
+}
+
+static bool almostEqual(const Cm::SpatialVectorV &ref, const Cm::SpatialVectorV& test, PxReal tolerance)
+{
+ return magnitude(ref-test)<=tolerance*magnitude(ref);
+}
+#endif
+}
+}
+
+#endif //DY_ARTICULATION_DEBUG_FNS_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsScalar.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsScalar.h
new file mode 100644
index 00000000..1efb2708
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsScalar.h
@@ -0,0 +1,397 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_ARTICULATION_SCALAR_FNS_H
+#define DY_ARTICULATION_SCALAR_FNS_H
+
+// Scalar helpers for articulations
+
+#include "DyArticulationUtils.h"
+#include "DyArticulationScalar.h"
+#include "DySpatial.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+/*
+namespace
+{
+ static void print(const PxMat33 &m)
+ {
+ printf("(%f, %f, %f)\n(%f, %f, %f)\n(%f, %f, %f)\n\n",
+ m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]);
+ }
+
+ static void print(const Cm::SpatialVector *v, PxU32 count)
+ {
+ for(PxU32 i=0;i<count;i++)
+ {
+ printf("(%f, %f, %f), (%f, %f, %f)\n",
+ v[i].linear.x, v[i].linear.y, v[i].linear.z,
+ v[i].angular.x, v[i].angular.y, v[i].angular.z);
+ }
+ }
+}
+*/
+
+class ArticulationDiagnostics
+{
+public:
+static bool cholesky(const PxMat33& in, PxMat33& out)
+{
+ out = in;
+
+ if(out[0][0]<=0)
+ return false;
+
+ out[0] /= PxSqrt(out[0][0]);
+ out[1] -= out[0][1]*out[0];
+ out[2] -= out[0][2]*out[0];
+
+ if(out[1][1]<=0)
+ return false;
+
+ out[1] /= PxSqrt(out[1][1]);
+
+ out[2] -= out[1][2]*out[1];
+ if(out[2][2]<=0)
+ return false;
+ out[2] /= PxSqrt(out[2][2]);
+
+ out[1][0] = out[2][0] = out[2][1] = 0;
+ return true;
+}
+
+static bool isSymmetric(const PxMat33&a)
+{
+ return a[0][1] == a[1][0] && a[0][2] == a[2][0] && a[1][2] == a[2][1];
+}
+
+static bool isSymmetric(const Mat33V&a)
+{
+ PxMat33 m;
+ PxMat33_From_Mat33V(a,m);
+ return isSymmetric(m);
+}
+
+static bool isSymmetric(const SpInertia&a)
+{
+ return isSymmetric(a.mLL) && isSymmetric(a.mAA);
+}
+
+
+static bool isPositiveDefinite(const PxMat33& m)
+{
+ PxMat33 _;
+ return cholesky(m, _);
+}
+
+
+static bool isPositiveDefinite(const SpInertia &s)
+{
+ // compute
+ // (a 0)
+ // (b c)
+
+ PxMat33 a;
+ if(!cholesky(s.mLL, a))
+ return false;
+
+ PxMat33 bt = a.getInverse() * s.mLA;
+ PxMat33 x = s.mAA - bt.getTranspose()*bt;
+ PxMat33 c;
+ return cholesky(x, c);
+}
+
+};
+
+class ArticulationFnsScalar
+{
+public:
+
+ static PX_FORCE_INLINE Cm::SpatialVector translateMotion(const PxVec3& p, const Cm::SpatialVector& v)
+ {
+ return Cm::SpatialVector(v.linear + p.cross(v.angular), v.angular);
+ }
+
+ // translate a force resolved at position p to the origin
+
+ static PX_FORCE_INLINE Cm::SpatialVector translateForce(const PxVec3& p, const Cm::SpatialVector& v)
+ {
+ return Cm::SpatialVector(v.linear, v.angular + p.cross(v.linear));
+ }
+
+ static PX_FORCE_INLINE PxMat33 invertSym33(const PxMat33& in)
+ {
+ PxVec3 v0 = in[1].cross(in[2]),
+ v1 = in[2].cross(in[0]),
+ v2 = in[0].cross(in[1]);
+
+ PxReal det = v0.dot(in[0]);
+
+
+ PX_ASSERT(det!=0);
+ PxReal recipDet = 1.0f/det;
+
+ return PxMat33(v0 * recipDet,
+ PxVec3(v0.y, v1.y, v1.z) * recipDet,
+ PxVec3(v0.z, v1.z, v2.z) * recipDet);
+ }
+
+ static PX_FORCE_INLINE SpInertia multiplySubtract(const SpInertia& I, const Cm::SpatialVector in0[3], const Cm::SpatialVector in1[3])
+ {
+ return I - SpInertia::dyad(in0[0], in1[0])
+ - SpInertia::dyad(in0[1], in1[1])
+ - SpInertia::dyad(in0[2], in1[2]);
+ }
+
+ static PX_FORCE_INLINE PxMat33 multiplySym(const Cm::SpatialVector* IS, const Cm::SpatialVector* S)
+ {
+ // return PxMat33(axisDot(IS, S[0]), axisDot(IS, S[1]), axisDot(IS, S[2]));
+
+ PxReal a00 = IS[0].dot(S[0]), a01 = IS[0].dot(S[1]), a02 = IS[0].dot(S[2]),
+ a11 = IS[1].dot(S[1]), a12 = IS[1].dot(S[2]),
+ a22 = IS[2].dot(S[2]);
+
+ return PxMat33(PxVec3(a00, a01, a02),
+ PxVec3(a01, a11, a12),
+ PxVec3(a02, a12, a22));
+ }
+
+ static PX_FORCE_INLINE void multiply(Cm::SpatialVector out[3], const SpInertia& I, const Cm::SpatialVector in[3])
+ {
+ out[0] = I * in[0];
+ out[1] = I * in[1];
+ out[2] = I * in[2];
+ }
+
+ static PX_FORCE_INLINE void multiply(Cm::SpatialVector out[3], const Cm::SpatialVector in[3], const PxMat33& D)
+ {
+ out[0] = axisMultiply(in, D[0]);
+ out[1] = axisMultiply(in, D[1]);
+ out[2] = axisMultiply(in, D[2]);
+ }
+
+ static PxMat33 invSqrt(const PxMat33 &m)
+ {
+ // cholesky factor to
+ // (a 0 0)
+ // (b c 0)
+ // (d e f)
+ // except that a,c,f are the reciprocal sqrts rather than sqrts
+
+ PxVec3 v0 = m.column0, v1 = m.column1, v2 = m.column2;
+
+ PxReal a = PxRecipSqrt(v0.x);
+ PxReal b = v0.y*a;
+ PxReal c = PxRecipSqrt(v1.y - b*b);
+ PxReal d = v0.z*a;
+ PxReal e = (v1.z-d*b) * c;
+ PxReal f = PxRecipSqrt(v2.z - d*d - e*e);
+
+ // invert
+ PxReal x = -b*a*c, y = (-e*x-d*a)*f, z = -e*c*f;
+
+ PxMat33 r(PxVec3(a, 0, 0 ),
+ PxVec3(x, c, 0 ),
+ PxVec3(y, z, f));
+
+ return r;
+ }
+
+
+ static PX_FORCE_INLINE PxMat33 computeSIS(const Cm::SpatialVector S[3], const SpInertia& I)
+ {
+ Cm::SpatialVector IS[3];
+ multiply(IS, I, S);
+ return multiplySym(IS, S);
+ }
+
+ // translate from COM-centered world-aligned inertia matrix to a displaced frame
+ static PX_INLINE SpInertia translate(const PxVec3& p, const SpInertia& i)
+ {
+ PxMat33 S = Ps::star(p), ST = S.getTranspose();
+ PxMat33 sla = S * i.mLA, llst = i.mLL * ST;
+// return SpInertia(i.mLL, i.mLA + llst, i.mAA + sla + sla.getTranspose() + S * llst);
+
+ // this yields a symmetric result
+ PxMat33 t = sla+S*llst*0.5f;
+ return SpInertia(i.mLL, i.mLA + llst, i.mAA + (t+t.getTranspose())); }
+
+ static PX_FORCE_INLINE Cm::SpatialVector axisMultiply(const Cm::SpatialVector* a, const PxVec3& v)
+ {
+ return a[0]*v[0]+a[1]*v[1]+a[2]*v[2];
+ }
+
+ static PX_FORCE_INLINE PxVec3 axisDot(const Cm::SpatialVector* a, const Cm::SpatialVector& v)
+ {
+ return PxVec3(a[0].dot(v), a[1].dot(v), a[2].dot(v));
+ }
+
+ static PX_FORCE_INLINE SpInertia invertInertia(const SpInertia& I)
+ {
+ PxMat33 aa = I.mAA, ll = I.mLL, la = I.mLA;
+
+ aa = (aa + aa.getTranspose())*0.5f;
+ ll = (ll + ll.getTranspose())*0.5f;
+
+ PxMat33 AAInv = invertSym33(aa);
+
+ PxMat33 z = -la * AAInv;
+ PxMat33 S = ll + z * la.getTranspose(); // Schur complement of mAA
+
+ PxMat33 LL = invertSym33(S);
+
+ PxMat33 LA = LL * z;
+ PxMat33 AA = AAInv + z.getTranspose() * LA;
+
+ SpInertia result(LL, LA, AA);
+
+ return result;
+ }
+
+ static SpInertia propagate(const SpInertia& I,
+ const Cm::SpatialVector S[3],
+ const PxMat33& load,
+ PxReal isf)
+ {
+ Cm::SpatialVector IS[3], ISD[3];
+ multiply(IS, I, S);
+
+ PxMat33 SIS = multiplySym(S, IS);
+
+ // yields a symmetric result
+ PxMat33 D = invSqrt(SIS+load*isf);
+ multiply(ISD, IS, D);
+ return multiplySubtract(I, ISD, ISD);
+ }
+
+ static PxMat33 computeDriveInertia(const SpInertia& I0,
+ const SpInertia& I1,
+ const Cm::SpatialVector S[3])
+ {
+ // this could be a lot more efficient, especially since it can be combined with
+ // the inertia accumulation. Also it turns out to be symmetric in I0 and I1, which
+ // isn't obvious from the formulation, so it's likely there's a more efficient formulation
+
+ PxMat33 D = invertSym33(computeSIS(S,I0));
+ Cm::SpatialVector IS[3], ISD[3];
+
+ multiply(IS,I0,S);
+ multiply(ISD, IS, D);
+
+ SpInertia tot = multiplySubtract(I0+I1,ISD,IS);
+ SpInertia invTot = invertInertia(tot);
+
+ PxMat33 E = computeSIS(ISD,invTot);
+
+ PxMat33 load = invertSym33(E+D);
+
+ PX_ASSERT(load[0].isFinite() && load[1].isFinite() && load[2].isFinite());
+ PX_ASSERT(ArticulationDiagnostics::isSymmetric(load) && ArticulationDiagnostics::isPositiveDefinite(load));
+ return load;
+ }
+
+ static PX_INLINE Cm::SpatialVector propagateImpulse(const FsRow& row,
+ const FsJointVectors& jv,
+ PxVec3& SZ,
+ const Cm::SpatialVector& Z,
+ const FsRowAux& aux)
+ {
+ PX_UNUSED(aux);
+ SZ = Z.angular + Z.linear.cross(getJointOffset(jv));
+ Cm::SpatialVector result = translateForce(getParentOffset(jv), Z - axisMultiply(getDSI(row), SZ));
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ PxVec3 SZcheck;
+ Cm::SpatialVector check = ArticulationRef::propagateImpulse(row, jv, SZcheck, Z, aux);
+ PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f));
+ PX_ASSERT((SZ-SZcheck).magnitude()<1e-5*PxMax(SZcheck.magnitude(), 1.0f));
+#endif
+ return result;
+ }
+
+ static PX_INLINE Cm::SpatialVector propagateVelocity(const FsRow& row,
+ const FsJointVectors& jv,
+ const PxVec3& SZ,
+ const Cm::SpatialVector& v,
+ const FsRowAux& aux)
+ {
+ PX_UNUSED(aux);
+
+ Cm::SpatialVector w = translateMotion(-getParentOffset(jv), v);
+ PxVec3 DSZ = multiply(row.D, SZ);
+
+ PxVec3 n = axisDot(getDSI(row), w) + DSZ;
+ Cm::SpatialVector result = w - Cm::SpatialVector(getJointOffset(jv).cross(n),n);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, SZ, v, aux);
+ PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f));
+#endif
+ return result;
+ }
+
+
+ static PX_FORCE_INLINE PxVec3 multiply(const Mat33V& m, const PxVec3& v)
+ {
+ return reinterpret_cast<const PxVec3&>(m.col0) * v.x
+ + reinterpret_cast<const PxVec3&>(m.col1) * v.y
+ + reinterpret_cast<const PxVec3&>(m.col2) * v.z;
+ }
+
+ static PX_FORCE_INLINE PxVec3 multiplyTranspose(const Mat33V& m, const PxVec3& v)
+ {
+ return PxVec3(v.dot(reinterpret_cast<const PxVec3&>(m.col0)),
+ v.dot(reinterpret_cast<const PxVec3&>(m.col1)),
+ v.dot(reinterpret_cast<const PxVec3&>(m.col2)));
+ }
+
+ static Cm::SpatialVector multiply(const FsInertia& m, const Cm::SpatialVector& v)
+ {
+ return Cm::SpatialVector(multiply(m.ll,v.linear) + multiply(m.la,v.angular),
+ multiplyTranspose(m.la, v.linear) + multiply(m.aa, v.angular));
+ }
+
+ static PX_FORCE_INLINE Cm::SpatialVector getRootDeltaV(const FsData& matrix, const Cm::SpatialVector& Z)
+ {
+ return multiply(getRootInverseInertia(matrix), Z);
+ }
+};
+
+}
+
+}
+
+#endif //DY_ARTICULATION_SCALAR_FNS_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsSimd.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsSimd.h
new file mode 100644
index 00000000..182abc66
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsSimd.h
@@ -0,0 +1,438 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_ARTICULATION_SIMD_FNS_H
+#define DY_ARTICULATION_SIMD_FNS_H
+
+#include "DyArticulationUtils.h"
+
+namespace physx
+{
+namespace Dy
+{
+
+template <typename T, PxU32 count>
+class PodULike
+{
+ PxU8 space[sizeof(T)*count];
+public:
+ PX_FORCE_INLINE operator T*() { return reinterpret_cast<T*>(space); }
+};
+
+#define POD_U_LIKE(_T, _count, _alignment) PX_ALIGN_PREFIX(_alignment) PodULike<_T, _count> PX_ALIGN_SUFFIX(_alignment)
+
+class ArticulationFnsSimdBase
+{
+public:
+
+ static PX_FORCE_INLINE FsInertia addInertia(const FsInertia& in1, const FsInertia& in2)
+ {
+ return FsInertia(M33Add(in1.ll, in2.ll),
+ M33Add(in1.la, in2.la),
+ M33Add(in1.aa, in2.aa));
+ }
+
+ static PX_FORCE_INLINE FsInertia subtractInertia(const FsInertia& in1, const FsInertia& in2)
+ {
+ return FsInertia(M33Sub(in1.ll, in2.ll),
+ M33Sub(in1.la, in2.la),
+ M33Sub(in1.aa, in2.aa));
+ }
+
+ static PX_FORCE_INLINE Vec3V axisDot(const Cm::SpatialVectorV S[3], const Cm::SpatialVectorV &v)
+ {
+ return V3Merge(FAdd(V3Dot(S[0].linear,v.linear), V3Dot(S[0].angular,v.angular)),
+ FAdd(V3Dot(S[1].linear,v.linear), V3Dot(S[1].angular,v.angular)),
+ FAdd(V3Dot(S[2].linear,v.linear), V3Dot(S[2].angular,v.angular)));
+ }
+
+ static PX_FORCE_INLINE Cm::SpatialVectorV axisMultiply(const Cm::SpatialVectorV S[3], Vec3V v)
+ {
+ return Cm::SpatialVectorV(V3ScaleAdd(S[0].linear, V3GetX(v), V3ScaleAdd(S[1].linear, V3GetY(v), V3Scale(S[2].linear, V3GetZ(v)))),
+ V3ScaleAdd(S[0].angular, V3GetX(v), V3ScaleAdd(S[1].angular, V3GetY(v), V3Scale(S[2].angular, V3GetZ(v)))));
+ }
+
+
+ static PX_FORCE_INLINE Cm::SpatialVectorV subtract(const Cm::SpatialVectorV &a, const Cm::SpatialVectorV &b)
+ {
+ return Cm::SpatialVectorV(V3Sub(a.linear, b.linear), V3Sub(a.angular, b.angular));
+ }
+
+ static PX_FORCE_INLINE Cm::SpatialVectorV add(const Cm::SpatialVectorV &a, const Cm::SpatialVectorV &b)
+ {
+ return Cm::SpatialVectorV(V3Add(a.linear, b.linear), V3Add(a.angular, b.angular));
+ }
+
+
+ static PX_FORCE_INLINE Cm::SpatialVectorV multiply(const FsInertia &I, const Cm::SpatialVectorV &S)
+ {
+ return Cm::SpatialVectorV(V3Add(M33MulV3(I.ll,S.linear), M33MulV3(I.la,S.angular)),
+ V3Add(M33TrnspsMulV3(I.la,S.linear), M33MulV3(I.aa,S.angular)));
+ }
+
+
+ static PX_FORCE_INLINE Cm::SpatialVectorV translateMotion(const Vec3V& p, const Cm::SpatialVectorV& v)
+ {
+ return Cm::SpatialVectorV(V3Add(v.linear, V3Cross(p, v.angular)), v.angular);
+ }
+
+ // translate a force resolved at position p to the origin
+
+ static PX_FORCE_INLINE Cm::SpatialVectorV translateForce(const Vec3V& p, const Cm::SpatialVectorV& v)
+ {
+ return Cm::SpatialVectorV(v.linear, V3Add(v.angular, V3Cross(p, v.linear)));
+ }
+
+ static PX_FORCE_INLINE Mat33V invertSym33(const Mat33V &m)
+ {
+ Vec3V a0 = V3Cross(m.col1, m.col2);
+ Vec3V a1 = V3Cross(m.col2, m.col0);
+ Vec3V a2 = V3Cross(m.col0, m.col1);
+ FloatV det = V3Dot(a0, m.col0);
+ FloatV recipDet = FRecip(det);
+
+ a1 = V3SetX(a1, V3GetY(a0));
+ a2 = V3Merge(V3GetZ(a0), V3GetZ(a1), V3GetZ(a2)); // make sure it's symmetric
+
+ return Mat33V(V3Scale(a0, recipDet),
+ V3Scale(a1, recipDet),
+ V3Scale(a2, recipDet));
+ }
+
+
+ static PX_FORCE_INLINE FloatV safeInvSqrt(FloatV v)
+ {
+ return FSqrt(FMax(FZero(), FRecip(v)));
+ }
+ static PX_FORCE_INLINE Mat33V invSqrt(const Mat33V& m)
+ {
+ // cholesky factor to
+ // (a 0 0)
+ // (b c 0)
+ // (d e f)
+ // except that a,c,f are the reciprocal sqrts rather than sqrts
+
+ // PxVec3 v0 = m.column0, v1 = m.column1, v2 = m.column2;
+ Vec3V v0 = m.col0, v1 = m.col1, v2 = m.col2;
+
+ const FloatV x0 = V3GetX(v0), y1 = V3GetY(v1), z2 = V3GetZ(v2);
+
+ FloatV a = safeInvSqrt(x0); // PxReal a = PxRecipSqrt(v0.x);
+
+ Vec3V abd = V3Scale(v0, a); // PxReal b = v0.y*a;
+ FloatV b = V3GetY(abd);
+
+ FloatV c2 = FNegScaleSub(b, b, y1); // PxReal c = PxRecipSqrt(v1.y - b*b);
+ FloatV c = safeInvSqrt(c2);
+
+ FloatV d = V3GetZ(abd); // PxReal d = v0.z*a;
+
+ FloatV e = FMul(FNegScaleSub(b, d, V3GetZ(v1)), c); // PxReal e = (v1.z-d*b) * c;
+
+ FloatV f2 = FNegScaleSub(d, d, FNegScaleSub(e, e, z2)); // PxReal f = PxRecipSqrt(v2.z - d*d - e*e);
+ FloatV f = safeInvSqrt(f2);
+
+ // invert
+ FloatV x = FMul(FMul(b,a),c), // x = -b*a*c
+ y = FMul((FNegScaleSub(d,a, FMul(e,x))), f), // y = (-e*x-d*a)*f
+ z = FMul(e, FMul(c,f)); // z = -e*c*f
+
+ return Mat33V(V3Merge(a, FZero(), FZero()),
+ V3Merge(FNeg(x), c, FZero()),
+ V3Merge(y, FNeg(z), f));
+ }
+
+
+ static PX_FORCE_INLINE FsInertia invertInertia(const FsInertia &I)
+ {
+ Mat33V aa = M33Scale(M33Add(I.aa, M33Trnsps(I.aa)), FHalf());
+ Mat33V ll = M33Scale(M33Add(I.ll, M33Trnsps(I.ll)), FHalf());
+
+ Mat33V AAInv = invertSym33(aa);
+ Mat33V z = M33MulM33(M33Neg(I.la), AAInv);
+ Mat33V S = M33Add(ll, M33MulM33(z, M33Trnsps(I.la)));
+
+ Mat33V LL = invertSym33(S);
+ Mat33V LA = M33MulM33(LL, z);
+ Mat33V AA = M33Add(AAInv, M33MulM33(M33Trnsps(z), LA));
+
+ return FsInertia(LL, LA, AA);
+ }
+
+ static PX_NOINLINE Mat33V computeSIS(const FsInertia &I, const Cm::SpatialVectorV S[3], Cm::SpatialVectorV IS[3])
+ {
+ Vec3V S0l = S[0].linear, S0a = S[0].angular;
+ Vec3V S1l = S[1].linear, S1a = S[1].angular;
+ Vec3V S2l = S[2].linear, S2a = S[2].angular;
+
+ Vec3V IS0l = V3Add(M33MulV3(I.ll,S0l), M33MulV3(I.la,S0a));
+ Vec3V IS0a = V3Add(M33TrnspsMulV3(I.la,S0l), M33MulV3(I.aa,S0a));
+ Vec3V IS1l = V3Add(M33MulV3(I.ll,S1l), M33MulV3(I.la,S1a));
+ Vec3V IS1a = V3Add(M33TrnspsMulV3(I.la,S1l), M33MulV3(I.aa,S1a));
+ Vec3V IS2l = V3Add(M33MulV3(I.ll,S2l), M33MulV3(I.la,S2a));
+ Vec3V IS2a = V3Add(M33TrnspsMulV3(I.la,S2l), M33MulV3(I.aa,S2a));
+
+ // compute SIS
+ FloatV a00 = FAdd(V3Dot(S0l, IS0l), V3Dot(S0a, IS0a));
+ FloatV a01 = FAdd(V3Dot(S0l, IS1l), V3Dot(S0a, IS1a));
+ FloatV a02 = FAdd(V3Dot(S0l, IS2l), V3Dot(S0a, IS2a));
+ FloatV a11 = FAdd(V3Dot(S1l, IS1l), V3Dot(S1a, IS1a));
+ FloatV a12 = FAdd(V3Dot(S1l, IS2l), V3Dot(S1a, IS2a));
+ FloatV a22 = FAdd(V3Dot(S2l, IS2l), V3Dot(S2a, IS2a));
+
+ // write IS, a useful side-effect
+ IS[0].linear = IS0l; IS[0].angular = IS0a;
+ IS[1].linear = IS1l; IS[1].angular = IS1a;
+ IS[2].linear = IS2l; IS[2].angular = IS2a;
+
+ return Mat33V(V3Merge(a00, a01, a02),
+ V3Merge(a01, a11, a12),
+ V3Merge(a02, a12, a22));
+ }
+
+
+ static PX_FORCE_INLINE FsInertia multiplySubtract(const FsInertia &I, const Mat33V &D, const Cm::SpatialVectorV IS[3], Cm::SpatialVectorV DSI[3])
+ {
+ // cut'n'paste, how I love ya, how I love ya
+
+ Vec3V IS0l = IS[0].linear, IS0a = IS[0].angular;
+ Vec3V IS1l = IS[1].linear, IS1a = IS[1].angular;
+ Vec3V IS2l = IS[2].linear, IS2a = IS[2].angular;
+
+ Vec3V D0 = D.col0, D1 = D.col1, D2 = D.col2;
+
+ // compute IDS
+ Vec3V DSI0l = V3ScaleAdd(IS0l, V3GetX(D0), V3ScaleAdd(IS1l, V3GetY(D0), V3Scale(IS2l, V3GetZ(D0))));
+ Vec3V DSI1l = V3ScaleAdd(IS0l, V3GetX(D1), V3ScaleAdd(IS1l, V3GetY(D1), V3Scale(IS2l, V3GetZ(D1))));
+ Vec3V DSI2l = V3ScaleAdd(IS0l, V3GetX(D2), V3ScaleAdd(IS1l, V3GetY(D2), V3Scale(IS2l, V3GetZ(D2))));
+
+ Vec3V DSI0a = V3ScaleAdd(IS0a, V3GetX(D0), V3ScaleAdd(IS1a, V3GetY(D0), V3Scale(IS2a, V3GetZ(D0))));
+ Vec3V DSI1a = V3ScaleAdd(IS0a, V3GetX(D1), V3ScaleAdd(IS1a, V3GetY(D1), V3Scale(IS2a, V3GetZ(D1))));
+ Vec3V DSI2a = V3ScaleAdd(IS0a, V3GetX(D2), V3ScaleAdd(IS1a, V3GetY(D2), V3Scale(IS2a, V3GetZ(D2))));
+
+ // compute J = I - DSI' IS. Each row of DSI' IS generates an inertia dyad
+
+ Vec3V ll0 = I.ll.col0, ll1 = I.ll.col1, ll2 = I.ll.col2;
+ Vec3V la0 = I.la.col0, la1 = I.la.col1, la2 = I.la.col2;
+ Vec3V aa0 = I.aa.col0, aa1 = I.aa.col1, aa2 = I.aa.col2;
+
+#define SUBTRACT_DYAD(_a, _b) \
+ ll0 = V3NegScaleSub(_b##l, V3GetX(_a##l), ll0); la0 = V3NegScaleSub(_b##l, V3GetX(_a##a), la0); aa0 = V3NegScaleSub(_b##a, V3GetX(_a##a), aa0); \
+ ll1 = V3NegScaleSub(_b##l, V3GetY(_a##l), ll1); la1 = V3NegScaleSub(_b##l, V3GetY(_a##a), la1); aa1 = V3NegScaleSub(_b##a, V3GetY(_a##a), aa1); \
+ ll2 = V3NegScaleSub(_b##l, V3GetZ(_a##l), ll2); la2 = V3NegScaleSub(_b##l, V3GetZ(_a##a), la2); aa2 = V3NegScaleSub(_b##a, V3GetZ(_a##a), aa2);
+
+ SUBTRACT_DYAD(IS0, DSI0);
+ SUBTRACT_DYAD(IS1, DSI1);
+ SUBTRACT_DYAD(IS2, DSI2);
+#undef SUBTRACT_DYAD
+
+ DSI[0].linear = DSI0l; DSI[0].angular = DSI0a;
+ DSI[1].linear = DSI1l; DSI[1].angular = DSI1a;
+ DSI[2].linear = DSI2l; DSI[2].angular = DSI2a;
+
+ return FsInertia(Mat33V(ll0, ll1, ll2),
+ Mat33V(la0, la1, la2),
+ Mat33V(aa0, aa1, aa2));
+ }
+
+
+ static PX_FORCE_INLINE FsInertia multiplySubtract(const FsInertia &I, const Cm::SpatialVectorV S[3])
+ {
+ // cut'n'paste, how I love ya, how I love ya
+
+ const Vec3V S0l = S[0].linear, S0a = S[0].angular;
+ const Vec3V S1l = S[1].linear, S1a = S[1].angular;
+ const Vec3V S2l = S[2].linear, S2a = S[2].angular;
+
+ // compute J = I - DSI' IS. Each row of DSI' IS generates an inertia dyad
+
+ Vec3V ll0 = I.ll.col0, ll1 = I.ll.col1, ll2 = I.ll.col2;
+ Vec3V la0 = I.la.col0, la1 = I.la.col1, la2 = I.la.col2;
+ Vec3V aa0 = I.aa.col0, aa1 = I.aa.col1, aa2 = I.aa.col2;
+
+#define SUBTRACT_DYAD(_a, _b) \
+ ll0 = V3NegScaleSub(_b##l, V3GetX(_a##l), ll0); la0 = V3NegScaleSub(_b##l, V3GetX(_a##a), la0); aa0 = V3NegScaleSub(_b##a, V3GetX(_a##a), aa0); \
+ ll1 = V3NegScaleSub(_b##l, V3GetY(_a##l), ll1); la1 = V3NegScaleSub(_b##l, V3GetY(_a##a), la1); aa1 = V3NegScaleSub(_b##a, V3GetY(_a##a), aa1); \
+ ll2 = V3NegScaleSub(_b##l, V3GetZ(_a##l), ll2); la2 = V3NegScaleSub(_b##l, V3GetZ(_a##a), la2); aa2 = V3NegScaleSub(_b##a, V3GetZ(_a##a), aa2);
+
+ SUBTRACT_DYAD(S0, S0);
+ SUBTRACT_DYAD(S1, S1);
+ SUBTRACT_DYAD(S2, S2);
+#undef SUBTRACT_DYAD
+
+ return FsInertia(Mat33V(ll0, ll1, ll2),
+ Mat33V(la0, la1, la2),
+ Mat33V(aa0, aa1, aa2));
+ }
+
+
+ static PX_FORCE_INLINE FsInertia translateInertia(Vec3V a, const FsInertia &input)
+ {
+ Vec3V b = V3Neg(a);
+
+ Vec3V la0 = input.la.col0, la1 = input.la.col1, la2 = input.la.col2;
+ Vec3V ll0 = input.ll.col0, ll1 = input.ll.col1, ll2 = input.ll.col2;
+ Vec3V aa0 = input.aa.col0, aa1 = input.aa.col1, aa2 = input.aa.col2;
+
+ FloatV aX = V3GetX(a), aY = V3GetY(a), aZ = V3GetZ(a);
+ FloatV bX = V3GetX(b), bY = V3GetY(b), bZ = V3GetZ(b);
+ FloatV Z = FZero();
+
+ // s - star matrix of a
+ Vec3V s0 = V3Merge(Z, aZ, bY),
+ s1 = V3Merge(bZ, Z, aX),
+ s2 = V3Merge(aY, bX, Z);
+
+ // s * la
+ Vec3V sla0 = V3ScaleAdd(s0, V3GetX(la0), V3ScaleAdd(s1, V3GetY(la0), V3Scale(s2, V3GetZ(la0))));
+ Vec3V sla1 = V3ScaleAdd(s0, V3GetX(la1), V3ScaleAdd(s1, V3GetY(la1), V3Scale(s2, V3GetZ(la1))));
+ Vec3V sla2 = V3ScaleAdd(s0, V3GetX(la2), V3ScaleAdd(s1, V3GetY(la2), V3Scale(s2, V3GetZ(la2))));
+
+ // ll * s.transpose() (ll is symmetric)
+ Vec3V llst0 = V3ScaleAdd(ll2, aY, V3Scale(ll1, bZ)),
+ llst1 = V3ScaleAdd(ll0, aZ, V3Scale(ll2, bX)),
+ llst2 = V3ScaleAdd(ll1, aX, V3Scale(ll0, bY));
+
+ // t = sla+S*llst*0.5f;
+
+ Vec3V sllst0 = V3ScaleAdd(s2, V3GetZ(llst0), V3ScaleAdd(s1, V3GetY(llst0), V3Scale(s0, V3GetX(llst0))));
+ Vec3V sllst1 = V3ScaleAdd(s2, V3GetZ(llst1), V3ScaleAdd(s1, V3GetY(llst1), V3Scale(s0, V3GetX(llst1))));
+ Vec3V sllst2 = V3ScaleAdd(s2, V3GetZ(llst2), V3ScaleAdd(s1, V3GetY(llst2), V3Scale(s0, V3GetX(llst2))));
+
+ Vec3V t0 = V3ScaleAdd(sllst0, FHalf(), sla0);
+ Vec3V t1 = V3ScaleAdd(sllst1, FHalf(), sla1);
+ Vec3V t2 = V3ScaleAdd(sllst2, FHalf(), sla2);
+
+ // t+t.transpose()
+ Vec3V r0 = V3Add(t0, V3Merge(V3GetX(t0), V3GetX(t1), V3GetX(t2))),
+ r1 = V3Add(t1, V3Merge(V3GetY(t0), V3GetY(t1), V3GetY(t2))),
+ r2 = V3Add(t2, V3Merge(V3GetZ(t0), V3GetZ(t1), V3GetZ(t2)));
+
+ return FsInertia(Mat33V(ll0, ll1, ll2),
+
+ Mat33V(V3Add(la0, llst0),
+ V3Add(la1, llst1),
+ V3Add(la2, llst2)),
+
+ Mat33V(V3Add(aa0, r0),
+ V3Add(aa1, r1),
+ V3Add(aa2, r2)));
+ }
+
+};
+
+template<class Base>
+class ArticulationFnsSimd : public Base
+{
+ static PX_FORCE_INLINE void axisMultiplyLowerTriangular(Cm::SpatialVectorV ES[3], const Mat33V&E, const Cm::SpatialVectorV S[3])
+ {
+ const Vec3V l0 = S[0].linear, l1 = S[1].linear, l2 = S[2].linear;
+ const Vec3V a0 = S[0].angular, a1 = S[1].angular, a2 = S[2].angular;
+ ES[0] = Cm::SpatialVectorV(V3Scale(l0, V3GetX(E.col0)),
+ V3Scale(a0, V3GetX(E.col0)));
+ ES[1] = Cm::SpatialVectorV(V3ScaleAdd(l0, V3GetX(E.col1), V3Scale(l1, V3GetY(E.col1))),
+ V3ScaleAdd(a0, V3GetX(E.col1), V3Scale(a1, V3GetY(E.col1))));
+ ES[2] = Cm::SpatialVectorV(V3ScaleAdd(l0, V3GetX(E.col2), V3ScaleAdd(l1, V3GetY(E.col2), V3Scale(l2, V3GetZ(E.col2)))),
+ V3ScaleAdd(a0, V3GetX(E.col2), V3ScaleAdd(a1, V3GetY(E.col2), V3Scale(a2, V3GetZ(E.col2)))));
+ }
+
+public:
+ static PX_FORCE_INLINE FsInertia propagate(const FsInertia &I,
+ const Cm::SpatialVectorV S[3],
+ const Mat33V &load,
+ const FloatV isf)
+ {
+ Cm::SpatialVectorV IS[3], ISE[3];
+ Mat33V D = Base::computeSIS(I, S, IS);
+
+ D.col0 = V3ScaleAdd(load.col0, isf, D.col0);
+ D.col1 = V3ScaleAdd(load.col1, isf, D.col1);
+ D.col2 = V3ScaleAdd(load.col2, isf, D.col2);
+
+ axisMultiplyLowerTriangular(ISE, Base::invSqrt(D), IS);
+ return Base::multiplySubtract(I, ISE);
+ }
+
+
+
+ static PX_INLINE Cm::SpatialVectorV propagateImpulse(const FsRow& row,
+ const FsJointVectors& jv,
+ Vec3V& SZ,
+ const Cm::SpatialVectorV& Z,
+ const FsRowAux& aux)
+ {
+ PX_UNUSED(aux);
+
+ SZ = V3Add(Z.angular, V3Cross(Z.linear, jv.jointOffset));
+ return Base::translateForce(jv.parentOffset, Z - Base::axisMultiply(row.DSI, SZ));
+ }
+
+ static PX_INLINE Cm::SpatialVectorV propagateVelocity(const FsRow& row,
+ const FsJointVectors& jv,
+ const Vec3V& SZ,
+ const Cm::SpatialVectorV& v,
+ const FsRowAux& aux)
+ {
+ PX_UNUSED(aux);
+
+ Cm::SpatialVectorV w = Base::translateMotion(V3Neg(jv.parentOffset), v);
+ Vec3V DSZ = M33MulV3(row.D, SZ);
+
+ Vec3V n = V3Add(Base::axisDot(row.DSI, w), DSZ);
+ return w - Cm::SpatialVectorV(V3Cross(jv.jointOffset, n), n);
+ }
+
+
+
+
+
+ static PX_FORCE_INLINE Mat33V computeDriveInertia(const FsInertia &I0,
+ const FsInertia &I1,
+ const Cm::SpatialVectorV S[3])
+ {
+ POD_U_LIKE(Cm::SpatialVectorV, 3, 16) IS, ISD, dummy;
+ Mat33V D = Base::computeSIS(I0, S, IS);
+ Mat33V DInv = Base::invertSym33(D);
+
+ FsInertia tmp = Base::addInertia(I0, I1);
+ tmp = Base::multiplySubtract(tmp, DInv, IS, ISD);
+ FsInertia J = Base::invertInertia(tmp);
+
+ Mat33V E = Base::computeSIS(J, ISD, dummy);
+ return Base::invertSym33(M33Add(DInv,E));
+
+ }
+};
+
+}
+}
+
+#endif //DY_ARTICULATION_SIMD_FNS_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
new file mode 100644
index 00000000..ea9ccb8d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
@@ -0,0 +1,1344 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxMath.h"
+#include "foundation/PxMemory.h"
+#include "foundation/PxProfiler.h"
+
+#include "PsUtilities.h"
+#include "CmSpatialVector.h"
+#include "DyArticulationHelper.h"
+#include "DyArticulationReference.h"
+#include "DyArticulationFnsSimd.h"
+#include "DyArticulationFnsScalar.h"
+#include "DyArticulationFnsDebug.h"
+#include "DySolverConstraintDesc.h"
+#include "PxvDynamics.h"
+#include "DyArticulation.h"
+#include "PxcRigidBody.h"
+#include "CmConeLimitHelper.h"
+#include "DySolverConstraint1D.h"
+#include "PxcConstraintBlockStream.h"
+#include "DySolverConstraint1D.h"
+#include "DyArticulationPImpl.h"
+#include "PsFoundation.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+void PxcFsFlushVelocity(FsData& matrix);
+
+// we pass this around by value so that when we return from a function the size is unaltered. That means we don't preserve state
+// across functions - even though that could be handy to preserve baseInertia and jointTransforms across the solver so that if we
+// need to run position projection positions they don't get recomputed.
+
+struct PxcFsScratchAllocator
+{
+ char* base;
+ size_t size;
+ size_t taken;
+ PxcFsScratchAllocator(char* p, size_t s): base(p), size(s), taken(0) {}
+
+ template<typename T>
+ static size_t sizeof16()
+ {
+ return (sizeof(T)+15)&~15;
+ }
+
+ template<class T> T* alloc(PxU32 count)
+ {
+ size_t s = sizeof16<T>();
+ PX_ASSERT(taken+s*count <= size);
+ T* result = reinterpret_cast<T*>(base+taken);
+ taken+=s*count;
+ return result;
+ }
+};
+
+void PxcLtbFactor(FsData& m)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+ LtbRow* rows = getLtbRows(m);
+
+ for(PxU32 i=m.linkCount; --i>0;)
+ {
+ LtbRow& b = rows[i];
+ PxU32 p = m.parent[i];
+ const FsInertia inertia = Fns::invertInertia(b.inertia);
+ const Mat33V jResponse = Fns::invertSym33(M33Neg(Fns::computeSIS(inertia, b.j1, b.j1)));
+ b.inertia = inertia;
+ rows[p].inertia = Fns::multiplySubtract(rows[p].inertia, jResponse, b.j0, b.j0);
+ b.jResponse = jResponse;
+
+ }
+ rows[0].inertia = Fns::invertInertia(rows[0].inertia);
+}
+
+void PxcLtbSolve(const FsData& m,
+ Vec3V* b, // rhs error to solve for
+ Cm::SpatialVectorV* y) // velocity delta output
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ const LtbRow* rows = getLtbRows(m);
+ PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVectorV));
+
+ for(PxU32 i=m.linkCount;i-->1;)
+ {
+ const LtbRow& r = rows[i];
+ const PxU32 p = m.parent[i];
+
+ const Vec3V t = V3Sub(b[i], Fns::axisDot(r.j1, y[i]));
+ b[i] = t;
+ y[p] = Fns::subtract(y[p], Fns::axisMultiply(r.j0, t));
+ }
+
+ y[0] = Fns::multiply(rows[0].inertia, y[0]);
+
+ for(PxU32 i=1; i<m.linkCount; i++)
+ {
+ const LtbRow& r = rows[i];
+ const PxU32 p = m.parent[i];
+
+ const Vec3V t = V3Sub(M33MulV3(r.jResponse, b[i]), Fns::axisDot(r.j0, y[p]));
+ y[i] = Fns::subtract(Fns::multiply(r.inertia, y[i]), Fns::axisMultiply(r.j1, t));
+ }
+}
+
+void PxcLtbProject(const FsData& m,
+ Cm::SpatialVectorV* velocity,
+ Vec3V* b)
+{
+ PX_ASSERT(m.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ Cm::SpatialVectorV y[DY_ARTICULATION_MAX_SIZE];
+
+ PxcLtbSolve(m, b, y);
+
+ for(PxU32 i=0;i<m.linkCount;i++)
+ velocity[i] -= y[i];
+}
+
+void PxcFsPropagateDrivenInertiaSimd(FsData& matrix,
+ const FsInertia* baseInertia,
+ const PxReal* isf,
+ const Mat33V* load,
+ PxcFsScratchAllocator allocator)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ Cm::SpatialVectorV IS[3];
+
+ FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ FsInertia* inertia = allocator.alloc<FsInertia>(matrix.linkCount);
+ PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(FsInertia));
+
+ for(PxU32 i=matrix.linkCount; --i>0;)
+ {
+ FsRow& r = rows[i];
+ const FsRowAux& a = aux[i];
+ const FsJointVectors& jv = jointVectors[i];
+
+ const Mat33V m = Fns::computeSIS(inertia[i], a.S, IS);
+ const FloatV f = FLoad(isf[i]);
+
+ const Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0),
+ V3ScaleAdd(load[i].col1, f, m.col1),
+ V3ScaleAdd(load[i].col2, f, m.col2)));
+ r.D = D;
+
+ inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]],
+ Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D, IS, r.DSI)));
+ }
+
+ getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]);
+}
+
+PX_FORCE_INLINE Cm::SpatialVectorV propagateDrivenImpulse(const FsRow& row,
+ const FsJointVectors& jv,
+ Vec3V& SZMinusQ,
+ const Cm::SpatialVectorV& Z,
+ const Vec3V& Q)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ SZMinusQ = V3Sub(V3Add(Z.angular, V3Cross(Z.linear,jv.jointOffset)), Q);
+ Cm::SpatialVectorV result = Fns::translateForce(jv.parentOffset, Z - Fns::axisMultiply(row.DSI, SZMinusQ));
+
+ return result;
+}
+
+void PxcFsApplyJointDrives(FsData& matrix,
+ const Vec3V* Q)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE];
+ Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE];
+ Vec3V SZminusQ[DY_ARTICULATION_MAX_SIZE];
+
+ PxMemZero(Z, matrix.linkCount*sizeof(Cm::SpatialVectorV));
+
+ for(PxU32 i=matrix.linkCount;i-->1;)
+ Z[matrix.parent[i]] += propagateDrivenImpulse(rows[i], jointVectors[i], SZminusQ[i], Z[i], Q[i]);
+
+
+ dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]);
+
+ for(PxU32 i=1;i<matrix.linkCount;i++)
+ dV[i] = Fns::propagateVelocity(rows[i], jointVectors[i], SZminusQ[i], dV[matrix.parent[i]], aux[i]);
+
+ Cm::SpatialVectorV* V = getVelocity(matrix);
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ V[i] += dV[i];
+}
+
+void ArticulationHelper::applyImpulses( const FsData& matrix,
+ Cm::SpatialVectorV* Z,
+ Cm::SpatialVectorV* V)
+{
+ // note: Z is the negated impulse
+
+
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE];
+ Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+ for(PxU32 i=matrix.linkCount;i-->1;)
+ Z[matrix.parent[i]] += Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z[i], aux[i]);
+
+ dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]);
+
+ for(PxU32 i=1;i<matrix.linkCount;i++)
+ dV[i] = Fns::propagateVelocity(rows[i], jointVectors[i], SZ[i], dV[matrix.parent[i]], aux[i]);
+
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ V[i] += dV[i];
+}
+
+void getImpulseResponseSlow(const FsData& matrix,
+ PxU32 linkID0,
+ const Cm::SpatialVectorV& impulse0,
+ Cm::SpatialVectorV& deltaV0,
+ PxU32 linkID1,
+ const Cm::SpatialVectorV& impulse1,
+ Cm::SpatialVectorV& deltaV1)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ PxU32 stack[DY_ARTICULATION_MAX_SIZE];
+ Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+ PxU32 i0, i1, ic;
+
+ for(i0 = linkID0, i1 = linkID1; i0!=i1;) // find common path
+ {
+ if(i0<i1)
+ i1 = matrix.parent[i1];
+ else
+ i0 = matrix.parent[i0];
+ }
+
+ PxU32 common = i0;
+
+ Cm::SpatialVectorV Z0 = -impulse0, Z1 = -impulse1;
+ for(i0 = 0; linkID0!=common; linkID0 = matrix.parent[linkID0])
+ {
+ Z0 = Fns::propagateImpulse(rows[linkID0], jointVectors[linkID0], SZ[linkID0], Z0, aux[linkID0]);
+ stack[i0++] = linkID0;
+ }
+
+ for(i1 = i0; linkID1!=common; linkID1 = matrix.parent[linkID1])
+ {
+ Z1 = Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ[linkID1], Z1, aux[linkID1]);
+ stack[i1++] = linkID1;
+ }
+
+ Cm::SpatialVectorV Z = Z0 + Z1;
+ for(ic = i1; common; common = matrix.parent[common])
+ {
+ Z = Fns::propagateImpulse(rows[common], jointVectors[common], SZ[common], Z, aux[common]);
+ stack[ic++] = common;
+ }
+
+ Cm::SpatialVectorV v = Fns::multiply(getRootInverseInertia(matrix), -Z);
+
+ for(PxU32 index = ic; index-->i1 ;)
+ v = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], v, aux[stack[index]]);
+
+ deltaV1 = v;
+ for(PxU32 index = i1; index-->i0 ;)
+ deltaV1 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV1, aux[stack[index]]);
+
+ deltaV0 = v;
+ for(PxU32 index = i0; index-->0;)
+ deltaV0 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV0, aux[stack[index]]);
+}
+
+void PxcFsGetImpulseResponse(const FsData& matrix,
+ PxU32 linkID,
+ const Cm::SpatialVectorV& impulse,
+ Cm::SpatialVectorV& deltaV)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ Cm::SpatialVectorV Z = -impulse;
+
+ for(PxU32 i = linkID; i; i = matrix.parent[i])
+ Z = Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z, aux[i]);
+
+ deltaV = Fns::multiply(getRootInverseInertia(matrix), -Z);
+
+ PX_ASSERT(rows[linkID].pathToRoot&1);
+
+ for(ArticulationBitField i=rows[linkID].pathToRoot-1; i; i &= (i-1))
+ {
+ const PxU32 index = ArticulationLowestSetBit(i);
+ deltaV = Fns::propagateVelocity(rows[index], jointVectors[index], SZ[index], deltaV, aux[index]);
+ }
+}
+
+void PxcFsGetImpulseSelfResponse(const FsData& matrix,
+ PxU32 linkID0,
+ const Cm::SpatialVectorV& impulse0,
+ Cm::SpatialVectorV& deltaV0,
+ PxU32 linkID1,
+ const Cm::SpatialVectorV& impulse1,
+ Cm::SpatialVectorV& deltaV1)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PX_ASSERT(linkID0 != linkID1);
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ // standard case: parent-child limit
+ if(matrix.parent[linkID1] == linkID0)
+ {
+ Vec3V SZ;
+ const Cm::SpatialVectorV Z = impulse0 - Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ, -impulse1, aux[linkID1]);
+ PxcFsGetImpulseResponse(matrix, linkID0, Z, deltaV0);
+ deltaV1 = Fns::propagateVelocity(rows[linkID1], jointVectors[linkID1], SZ, deltaV0, aux[linkID1]);
+ }
+ else
+ getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector V[DY_ARTICULATION_MAX_SIZE];
+ for(PxU32 i=0;i<matrix.linkCount;i++) V[i] = Cm::SpatialVector::zero();
+ ArticulationRef::applyImpulse(matrix,V,linkID0, reinterpret_cast<const Cm::SpatialVector&>(impulse0));
+ ArticulationRef::applyImpulse(matrix,V,linkID1, reinterpret_cast<const Cm::SpatialVector&>(impulse1));
+
+ Cm::SpatialVector refV0 = V[linkID0];
+ Cm::SpatialVector refV1 = V[linkID1];
+#endif
+}
+
+namespace
+{
+
+ PX_FORCE_INLINE Cm::SpatialVectorV getImpulseResponseSimd(const FsData& matrix, PxU32 linkID, Vec3V lZ, Vec3V aZ)
+ {
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+ PxU32 indices[DY_ARTICULATION_MAX_SIZE], iCount = 0;
+
+ const FsRow*PX_RESTRICT rows = getFsRows(matrix);
+ const FsRowAux*PX_RESTRICT aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_UNUSED(aux);
+ PX_ASSERT(rows[linkID].pathToRoot&1);
+
+ lZ = V3Neg(lZ);
+ aZ = V3Neg(aZ);
+
+ for(PxU32 i = linkID; i; i = matrix.parent[i])
+ {
+ const FsRow& r = rows[i];
+ const FsJointVectors& j = jointVectors[i];
+
+ Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
+ SZ[iCount] = sz;
+
+ lZ = V3NegScaleSub(r.DSI[0].linear, V3GetX(sz), V3NegScaleSub(r.DSI[1].linear, V3GetY(sz), V3NegScaleSub(r.DSI[2].linear, V3GetZ(sz), lZ)));
+ aZ = V3NegScaleSub(r.DSI[0].angular, V3GetX(sz), V3NegScaleSub(r.DSI[1].angular, V3GetY(sz), V3NegScaleSub(r.DSI[2].angular, V3GetZ(sz), aZ)));
+
+ aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));
+ indices[iCount++] = i;
+ }
+
+ const FsInertia& I = getRootInverseInertia(matrix);
+
+ Vec3V lV = V3Neg(V3Add(M33MulV3(I.ll, lZ), M33MulV3(I.la, aZ)));
+ Vec3V aV = V3Neg(V3Add(M33TrnspsMulV3(I.la, lZ), M33MulV3(I.aa, aZ)));
+
+ while(iCount)
+ {
+ PxU32 i = indices[--iCount];
+ const FsRow& r = rows[i];
+ const FsJointVectors& j = jointVectors[i];
+
+ lV = V3Sub(lV, V3Cross(j.parentOffset, aV));
+
+ Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)),
+ V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));
+
+ n = V3Add(n, M33MulV3(r.D, SZ[iCount]));
+ lV = V3Sub(lV, V3Cross(j.jointOffset, n));
+ aV = V3Sub(aV, n);
+ }
+
+ return Cm::SpatialVectorV(lV, aV);
+ }
+}
+
+void ArticulationHelper::getImpulseResponse(const FsData& matrix,
+ PxU32 linkID,
+ const Cm::SpatialVectorV& impulse,
+ Cm::SpatialVectorV& deltaV)
+{
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+
+ deltaV = getImpulseResponseSimd(matrix, linkID, impulse.linear, impulse.angular);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVectorV deltaV_;
+ PxcFsGetImpulseResponse(matrix, linkID, impulse, deltaV_);
+ PX_ASSERT(almostEqual(deltaV_, deltaV,1e-3f));
+#endif
+}
+
+void ArticulationHelper::getImpulseSelfResponse(const FsData& matrix,
+ PxU32 linkID0,
+ const Cm::SpatialVectorV& impulse0,
+ Cm::SpatialVectorV& deltaV0,
+ PxU32 linkID1,
+ const Cm::SpatialVectorV& impulse1,
+ Cm::SpatialVectorV& deltaV1)
+{
+ PX_ASSERT(linkID0 != linkID1);
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_UNUSED(aux);
+
+ Cm::SpatialVectorV& dV0 = deltaV0,
+ & dV1 = deltaV1;
+
+ // standard case: parent-child limit
+ if(matrix.parent[linkID1] == linkID0)
+ {
+ const FsRow& r = rows[linkID1];
+ const FsJointVectors& j = jointVectors[linkID1];
+
+ Vec3V lZ = V3Neg(impulse1.linear),
+ aZ = V3Neg(impulse1.angular);
+
+ Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
+
+ lZ = V3Sub(lZ, V3ScaleAdd(r.DSI[0].linear, V3GetX(sz), V3ScaleAdd(r.DSI[1].linear, V3GetY(sz), V3Scale(r.DSI[2].linear, V3GetZ(sz)))));
+ aZ = V3Sub(aZ, V3ScaleAdd(r.DSI[0].angular, V3GetX(sz), V3ScaleAdd(r.DSI[1].angular, V3GetY(sz), V3Scale(r.DSI[2].angular, V3GetZ(sz)))));
+
+ aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));
+
+ lZ = V3Sub(impulse0.linear, lZ);
+ aZ = V3Sub(impulse0.angular, aZ);
+
+ dV0 = getImpulseResponseSimd(matrix, linkID0, lZ, aZ);
+
+ Vec3V aV = dV0.angular;
+ Vec3V lV = V3Sub(dV0.linear, V3Cross(j.parentOffset, aV));
+
+ Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)),
+ V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));
+
+ n = V3Add(n, M33MulV3(r.D, sz));
+ lV = V3Sub(lV, V3Cross(j.jointOffset, n));
+ aV = V3Sub(aV, n);
+
+ dV1 = Cm::SpatialVectorV(lV, aV);
+ }
+ else
+ getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVectorV dV0_, dV1_;
+ PxcFsGetImpulseSelfResponse(matrix, linkID0, impulse0, dV0_, linkID1, impulse1, dV1_);
+
+ PX_ASSERT(almostEqual(dV0_, dV0, 1e-3f));
+ PX_ASSERT(almostEqual(dV1_, dV1, 1e-3f));
+#endif
+}
+
+void PxcLtbComputeJv(Vec3V* jv, const FsData& m, const Cm::SpatialVectorV* velocity)
+{
+ const LtbRow* rows = getLtbRows(m);
+ const FsRow* fsRows = getFsRows(m);
+ const FsJointVectors* jointVectors = getJointVectors(m);
+
+ PX_UNUSED(rows);
+ PX_UNUSED(fsRows);
+
+ for(PxU32 i=1;i<m.linkCount;i++)
+ {
+ Cm::SpatialVectorV pv = velocity[m.parent[i]], v = velocity[i];
+
+ Vec3V parentOffset = V3Add(jointVectors[i].jointOffset, jointVectors[i].parentOffset);
+
+ Vec3V k0v = V3Add(pv.linear, V3Cross(pv.angular, parentOffset)),
+ k1v = V3Add(v.linear, V3Cross(v.angular,jointVectors[i].jointOffset));
+ jv[i] = V3Sub(k0v, k1v);
+ }
+}
+
+void ArticulationHelper::saveVelocity(const ArticulationSolverDesc& d)
+{
+ Vec3V b[DY_ARTICULATION_MAX_SIZE];
+ FsData& m = *d.fsData;
+
+ Cm::SpatialVectorV* velocity = getVelocity(m);
+ PxcFsFlushVelocity(m);
+
+ // save off the motion velocity
+
+ for(PxU32 i=0;i<m.linkCount;i++)
+ {
+ d.motionVelocity[i] = velocity[i];
+ PX_ASSERT(isFiniteVec3V(velocity[i].linear));
+ PX_ASSERT(isFiniteVec3V(velocity[i].angular));
+ }
+
+ // and now re-solve to use the unbiased velocities
+
+ PxcLtbComputeJv(b, m, velocity);
+ PxcLtbProject(m, velocity, b);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ for(PxU32 i=0;i<m.linkCount;i++)
+ getRefVelocity(m)[i] = velocity[i];
+#endif
+}
+
+void PxcFsComputeJointLoadsSimd(const FsData& matrix,
+ const FsInertia*PX_RESTRICT baseInertia,
+ Mat33V*PX_RESTRICT load,
+ const PxReal*PX_RESTRICT isf_,
+ PxU32 linkCount,
+ PxU32 maxIterations,
+ PxcFsScratchAllocator allocator)
+{
+ // dsequeira: this is really difficult to optimize on XBox: not inlining generates lots of LHSs,
+ // inlining generates lots of cache misses because the fn is so huge (almost 2000 instrs.)
+ // Timing says even for 1 iteration the cache misses are slighly preferable for a
+ // 20-bone articulation, for more iters it's *much* better to take the cache misses.
+ //
+ // about 400 instructions come from unnecessary and inexplicable branch checks
+
+ if(!maxIterations)
+ return;
+
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ FloatV isf[DY_ARTICULATION_MAX_SIZE];
+
+ for(PxU32 i=1;i<linkCount;i++)
+ isf[i] = FLoad(isf_[i]);
+
+ FsInertia*PX_RESTRICT inertia = allocator.alloc<FsInertia>(linkCount);
+ FsInertia*PX_RESTRICT contribToParent = allocator.alloc<FsInertia>(linkCount);
+
+ const FsRow*PX_RESTRICT row = getFsRows(matrix);
+ const FsRowAux*PX_RESTRICT aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_UNUSED(row);
+
+ // gets rid of about 200 LHSs, need to change the matrix format to make this part of it
+ PxU64 parent[DY_ARTICULATION_MAX_SIZE];
+ for(PxU32 i=0;i<linkCount;i++)
+ parent[i] = matrix.parent[i];
+
+ while(maxIterations--)
+ {
+ PxMemCopy(inertia, baseInertia, sizeof(FsInertia)*linkCount);
+
+ for(PxU32 i=linkCount;i-->1;)
+ {
+ const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S;
+
+ Ps::prefetch(&load[i-1]);
+ Ps::prefetch(&jointVectors[i-1]);
+ const FsInertia tmp = Fns::propagate(inertia[i], S, load[i], isf[i]);
+ inertia[parent[i]] = Fns::addInertia(inertia[parent[i]], Fns::translateInertia(jointVectors[i].parentOffset, tmp));
+ contribToParent[i] = tmp;
+ }
+
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S;
+
+ const FsInertia rootwardInertia = Fns::subtractInertia(Fns::translateInertia(V3Neg(jointVectors[i].parentOffset), inertia[parent[i]]), contribToParent[i]);
+ const FsInertia tmp = Fns::propagate(rootwardInertia, S, load[i], isf[i]);
+ load[i] = Fns::computeDriveInertia(inertia[i], rootwardInertia, S);
+ inertia[i] = Fns::addInertia(inertia[i], tmp);
+ }
+ }
+}
+
+PxU32 ArticulationHelper::getFsDataSize(PxU32 linkCount)
+{
+ return sizeof(FsInertia) + sizeof(FsRow) * linkCount;
+}
+
+PxU32 ArticulationHelper::getLtbDataSize(PxU32 linkCount)
+{
+ return sizeof(LtbRow) * linkCount;
+}
+
+void ArticulationHelper::prepareDataBlock( FsData& fsData,
+ const ArticulationLink* links,
+ PxU16 linkCount,
+ PxTransform* poses,
+ FsInertia* baseInertia,
+ ArticulationJointTransforms* jointTransforms,
+ PxU32 expectedSize)
+{
+ PxU32 stateSize = sizeof(FsData)
+ + sizeof(Cm::SpatialVectorV) * linkCount
+ + sizeof(Cm::SpatialVectorV) * linkCount
+ + sizeof(Vec3V) * linkCount
+ + sizeof(PxReal) * ((linkCount + 15) & 0xfffffff0);
+
+ PxU32 jointVectorSize = sizeof(FsJointVectors) * linkCount;
+
+ PxU32 fsDataSize = getFsDataSize(linkCount);
+ PxU32 ltbDataSize = getLtbDataSize(linkCount);
+
+ PxU32 totalSize = stateSize
+ + jointVectorSize
+ + fsDataSize
+ + ltbDataSize
+ + sizeof(Cm::SpatialVectorV) * linkCount
+ + sizeof(FsRowAux) * linkCount;
+
+ PX_UNUSED(totalSize);
+ PX_UNUSED(expectedSize);
+ PX_ASSERT(expectedSize == 0 || totalSize == expectedSize);
+
+ PxMemZero(&fsData, stateSize);
+ fsData.jointVectorOffset = PxU16(stateSize);
+ fsData.fsDataOffset = PxU16(stateSize+jointVectorSize);
+ fsData.ltbDataOffset = PxU16(stateSize+jointVectorSize+fsDataSize);
+ fsData.linkCount = linkCount;
+
+ for(PxU32 i=1;i<linkCount;i++)
+ fsData.parent[i] = PxU8(links[i].parent);
+ fsData.deferredZ = Cm::SpatialVectorV(PxZero);
+
+ Cm::SpatialVector* velocity = reinterpret_cast<Cm::SpatialVector*>(getVelocity(fsData));
+
+ PxMemZero(baseInertia, sizeof(FsInertia)*linkCount);
+
+ PxReal* maxPenBias = getMaxPenBias(fsData);
+
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ if((i+2)<linkCount)
+ {
+ Ps::prefetch(links[i+2].bodyCore);
+ Ps::prefetch(links[i+2].inboundJoint);
+ }
+ PxsBodyCore& core = *links[i].bodyCore;
+ poses[i] = core.body2World;
+ velocity[i] = Cm::SpatialVector(core.linearVelocity, core.angularVelocity);
+ setInertia(baseInertia[i], core, core.body2World);
+ maxPenBias[i] = core.maxPenBias;
+
+ if(i)
+ setJointTransforms(jointTransforms[i], poses[links[i].parent], core.body2World, *links[i].inboundJoint);
+ }
+
+ FsJointVectors* jointVectors = getJointVectors(fsData);
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ PX_ALIGN(16, PxVec3) parentOffset = poses[i].p - poses[fsData.parent[i]].p;
+ PX_ALIGN(16, PxVec3) jointOffset = jointTransforms[i].cB2w.p - poses[i].p;
+ jointVectors[i].parentOffset = V3LoadA(parentOffset);
+ jointVectors[i].jointOffset = V3LoadA(jointOffset);
+ }
+}
+
+PxU32 ArticulationHelper::computeUnconstrainedVelocities( const ArticulationSolverDesc& desc,
+ PxReal dt,
+ PxcConstraintBlockStream& stream,
+ PxSolverConstraintDesc* constraintDesc,
+ PxU32& acCount,
+ PxsConstraintBlockManager& constraintBlockManager,
+ const PxVec3& gravity, PxU64 contextID)
+{
+ PX_UNUSED(contextID);
+ const ArticulationLink* links = desc.links;
+ PxU16 linkCount = desc.linkCount;
+ FsData& fsData = *desc.fsData;
+ PxTransform* poses = desc.poses;
+
+ PxcFsScratchAllocator allocator(desc.scratchMemory, desc.scratchMemorySize);
+ FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(desc.linkCount);
+ ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(desc.linkCount);
+
+ {
+ PX_PROFILE_ZONE("Articulations.prepareDataBlock", contextID);
+ prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, desc.totalDataSize);
+ }
+
+ const PxReal recipDt = 1.0f/dt;
+
+ Cm::SpatialVectorV* velocity = getVelocity(fsData);
+
+ {
+
+ PX_PROFILE_ZONE("Articulations.setupProject", contextID);
+
+ PxMemZero(getLtbRows(fsData), getLtbDataSize(linkCount));
+ prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt);
+
+ PxcLtbFactor(fsData);
+
+ Vec3V b[DY_ARTICULATION_MAX_SIZE];
+ PxcLtbComputeJv(b, fsData, velocity);
+
+ LtbRow* rows = getLtbRows(fsData);
+ for(PxU32 i=1;i<linkCount;i++)
+ b[i] = V3Add(b[i], rows[i].jC);
+
+ PxcLtbProject(fsData, velocity, b);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.prepareFsData", contextID);
+ PxMemZero(addAddr<void*>(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount));
+ prepareFsData(fsData, links);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.setupDrives", contextID);
+
+ if(!(desc.core->externalDriveIterations & 0x80000000))
+ PxMemZero(desc.externalLoads, sizeof(Mat33V) * linkCount);
+
+ if(!(desc.core->internalDriveIterations & 0x80000000))
+ PxMemZero(desc.internalLoads, sizeof(Mat33V) * linkCount);
+
+ PxReal isf[DY_ARTICULATION_MAX_SIZE], esf[DY_ARTICULATION_MAX_SIZE]; // spring factors
+ Vec3V drive[DY_ARTICULATION_MAX_SIZE];
+
+ bool externalEqualsInternalCompliance = (desc.core->internalDriveIterations&0xffff) == (desc.core->externalDriveIterations&0xffff);
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ const ArticulationJointCore& j = *links[i].inboundJoint;
+ isf[i] = (1 + j.damping * dt + j.spring * dt * dt) * getResistance(j.internalCompliance);
+ esf[i] = (1 + j.damping * dt + j.spring * dt * dt) * getResistance(j.externalCompliance);
+
+ externalEqualsInternalCompliance = externalEqualsInternalCompliance && j.internalCompliance == j.externalCompliance;
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.jointInternalLoads", contextID);
+ PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.internalLoads, isf, linkCount, desc.core->internalDriveIterations&0xffff, allocator);
+
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID);
+ PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, isf, desc.internalLoads, allocator);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.computeJointDrives", contextID);
+ computeJointDrives(fsData, drive, links, poses, jointTransforms, desc.internalLoads, dt);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.applyJointDrives", contextID);
+ PxcFsApplyJointDrives(fsData, drive);
+ }
+
+ if(!externalEqualsInternalCompliance)
+ {
+ {
+ PX_PROFILE_ZONE("Articulations.jointExternalLoads", contextID);
+ PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.externalLoads, esf, linkCount, desc.core->externalDriveIterations&0xffff, allocator);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID);
+ PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, esf, desc.externalLoads, allocator);
+ }
+ }
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.applyExternalImpulses", contextID);
+ Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE];
+
+ FloatV h = FLoad(dt);
+
+ const Cm::SpatialVector* acceleration = desc.acceleration;
+
+ const Vec3V vGravity = V3LoadU(gravity);
+
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ Vec3V linearAccel = V3LoadA(acceleration[i].linear);
+
+ if (!(desc.links[i].body->mInternalFlags & PxcRigidBody::eDISABLE_GRAVITY))
+ linearAccel = V3Add(linearAccel, vGravity);
+ Cm::SpatialVectorV a(linearAccel, V3LoadA(acceleration[i].angular));
+ Z[i] = -ArticulationFnsSimd<ArticulationFnsSimdBase>::multiply(baseInertia[i], a) * h;
+ }
+
+ applyImpulses(fsData, Z, getVelocity(fsData));
+ }
+
+ // save off the motion velocity in case there are no constraints with the articulation
+
+ PxMemCopy(desc.motionVelocity, velocity, linkCount*sizeof(Cm::SpatialVectorV));
+
+ // set up for deferred-update solve
+
+ fsData.dirty = 0;
+
+ // solver progress counters
+ fsData.maxSolverNormalProgress = 0;
+ fsData.maxSolverFrictionProgress = 0;
+ fsData.solverProgress = 0;
+
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ for(PxU32 i=0;i<linkCount;i++)
+ getRefVelocity(fsData)[i] = getVelocity(fsData)[i];
+#endif
+
+ {
+ PX_PROFILE_ZONE("Articulations.setupConstraints", contextID);
+ return setupSolverConstraints(fsData, desc.solverDataSize, stream, constraintDesc, links, jointTransforms, dt, acCount, constraintBlockManager);
+ }
+}
+
+void ArticulationHelper::initializeDriveCache( FsData& fsData,
+ PxU16 linkCount,
+ const ArticulationLink* links,
+ PxReal compliance,
+ PxU32 iterations,
+ char* scratchMemory,
+ PxU32 scratchMemorySize)
+{
+ PxcFsScratchAllocator allocator(scratchMemory, scratchMemorySize);
+ FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(linkCount);
+ ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(linkCount);
+ PxTransform* PX_RESTRICT poses = allocator.alloc<PxTransform>(linkCount);
+ Mat33V* PX_RESTRICT jointLoads = allocator.alloc<Mat33V>(linkCount);
+
+ PxReal springFactor[DY_ARTICULATION_MAX_SIZE]; // spring factors
+
+ prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, 0);
+
+ PxMemZero(addAddr<void*>(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount));
+ prepareFsData(fsData, links);
+
+ springFactor[0] = 0.0f;
+ for(PxU32 i=1;i<linkCount;i++)
+ springFactor[i] = getResistance(compliance);
+
+ PxMemZero(jointLoads, sizeof(Mat33V)*linkCount);
+ PxcFsComputeJointLoadsSimd(fsData, baseInertia, jointLoads, springFactor, linkCount, iterations&0xffff, allocator);
+ PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, springFactor, jointLoads, allocator);
+}
+
+void ArticulationHelper::updateBodies(const ArticulationSolverDesc& desc, PxReal dt)
+{
+ FsData& fsData = *desc.fsData;
+ const ArticulationCore& core = *desc.core;
+ const ArticulationLink* links = desc.links;
+ PxTransform* poses = desc.poses;
+ Cm::SpatialVectorV* motionVelocity = desc.motionVelocity;
+
+ Vec3V b[DY_ARTICULATION_MAX_SIZE];
+
+ PxU32 linkCount = fsData.linkCount;
+
+ PxcFsFlushVelocity(fsData);
+ PxcLtbComputeJv(b, fsData, getVelocity(fsData));
+ PxcLtbProject(fsData, getVelocity(fsData), b);
+
+ // update positions
+ PxcFsScratchAllocator allocator(desc.scratchMemory, desc.scratchMemorySize);
+ PxTransform* PX_RESTRICT oldPose = allocator.alloc<PxTransform>(desc.linkCount);
+
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ const PxVec3& lv = reinterpret_cast<PxVec3&>(motionVelocity[i].linear);
+ const PxVec3& av = reinterpret_cast<PxVec3&>(motionVelocity[i].angular);
+ oldPose[i] = poses[i];
+ poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q);
+ }
+
+ bool projected = false;
+ const PxReal recipDt = 1.0f/dt;
+
+ FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(desc.linkCount);
+ ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(desc.linkCount);
+
+ for(PxU32 iterations = 0; iterations < core.maxProjectionIterations; iterations++)
+ {
+ PxReal maxSeparation = -PX_MAX_F32;
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ const ArticulationJointCore& j = *links[i].inboundJoint;
+ maxSeparation = PxMax(maxSeparation,
+ (poses[links[i].parent].transform(j.parentPose).p -
+ poses[i].transform(j.childPose).p).magnitude());
+ }
+
+ if(maxSeparation<=core.separationTolerance)
+ break;
+
+ projected = true;
+
+ // we go around again, finding velocities which pull us back together - this
+ // form of projection is momentum-preserving but slow compared to hierarchical
+ // projection
+
+ PxMemZero(baseInertia, sizeof(FsInertia)*linkCount);
+
+ ArticulationHelper::setInertia(baseInertia[0], *links[0].bodyCore, poses[0]);
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ ArticulationHelper::setInertia(baseInertia[i], *links[i].bodyCore, poses[i]);
+ ArticulationHelper::setJointTransforms(jointTransforms[i], poses[links[i].parent], poses[i], *links[i].inboundJoint);
+ }
+
+ ArticulationHelper::prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt);
+ PxcLtbFactor(fsData);
+
+ LtbRow* rows = getLtbRows(fsData);
+
+ for(PxU32 i=1;i<linkCount;i++)
+ b[i] = rows[i].jC;
+
+ PxMemZero(motionVelocity, linkCount*sizeof(Cm::SpatialVectorV));
+
+ PxcLtbProject(fsData, motionVelocity, b);
+
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ const PxVec3& lv = reinterpret_cast<PxVec3&>(motionVelocity[i].linear);
+ const PxVec3& av = reinterpret_cast<PxVec3&>(motionVelocity[i].angular);
+ poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q);
+ }
+ }
+
+ if(projected)
+ {
+ // recompute motion velocities.
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ motionVelocity[i].linear = V3LoadU((poses[i].p - oldPose[i].p) * recipDt);
+ motionVelocity[i].angular = V3LoadU(Ps::log(poses[i].q * oldPose[i].q.getConjugate()) * recipDt);
+ }
+ }
+
+ Cm::SpatialVectorV* velocity = getVelocity(fsData);
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ links[i].bodyCore->body2World = poses[i];
+
+ V3StoreA(velocity[i].linear, links[i].bodyCore->linearVelocity);
+ V3StoreA(velocity[i].angular, links[i].bodyCore->angularVelocity);
+ }
+}
+
+void ArticulationHelper::setInertia(FsInertia& inertia,
+ const PxsBodyCore& body,
+ const PxTransform& pose)
+{
+ // assumes that elements that are supposed to be zero (i.e. la matrix and off diagonal elements of ll) are zero
+
+ const PxMat33 R(pose.q);
+ const PxVec3& v = body.inverseInertia;
+ const PxReal m = 1.0f/body.inverseMass;
+ V3WriteX(inertia.ll.col0, m);
+ V3WriteY(inertia.ll.col1, m);
+ V3WriteZ(inertia.ll.col2, m);
+
+ PX_ALIGN_PREFIX(16) PxMat33 PX_ALIGN_SUFFIX(16) alignedInertia = R * PxMat33::createDiagonal(PxVec3(1.0f/v.x, 1.0f/v.y, 1.0f/v.z)) * R.getTranspose();
+ alignedInertia = (alignedInertia + alignedInertia.getTranspose())*0.5f;
+ inertia.aa = Mat33V_From_PxMat33(alignedInertia);
+}
+
+void ArticulationHelper::setJointTransforms(ArticulationJointTransforms& transforms,
+ const PxTransform& parentPose,
+ const PxTransform& childPose,
+ const ArticulationJointCore& joint)
+{
+ transforms.cA2w = parentPose.transform(joint.parentPose);
+ transforms.cB2w = childPose.transform(joint.childPose);
+ transforms.cB2cA = transforms.cA2w.transformInv(transforms.cB2w);
+ if(transforms.cB2cA.q.w<0) // the relative quat must be the short way round for limits to work...
+ {
+ transforms.cB2cA.q = -transforms.cB2cA.q;
+ transforms.cB2w.q = -transforms.cB2w.q;
+ }
+}
+
+void ArticulationHelper::prepareLtbMatrix( FsData& fsData,
+ const FsInertia* baseInertia,
+ const PxTransform* poses,
+ const ArticulationJointTransforms* jointTransforms,
+ PxReal recipDt)
+{
+ PxU32 linkCount = fsData.linkCount;
+ LtbRow* rows = getLtbRows(fsData);
+
+ rows[0].inertia = baseInertia[0];
+
+ const PxVec3 axis[3] = { PxVec3(1.0f,0.0f,0.0f), PxVec3(0.0f,1.0f,0.0f), PxVec3(0.0f,0.0f,1.0f) };
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ rows[i].inertia = baseInertia[i];
+ const ArticulationJointTransforms& s = jointTransforms[i];
+
+ const PxU32 p = fsData.parent[i];
+
+ // we put the action point of the constraint at the root of the child
+
+ const PxVec3 ra = s.cB2w.p - poses[p].p;
+ const PxVec3 rb = s.cB2w.p - poses[i].p;
+
+ // A bit different from the 1D solver,
+ // there we use a formulation j0.v0 - j1.v1 + c = 0
+ // here we use the homogeneous j0.v0 + j1.v1 + c = 0
+
+ const PxVec3 error = (s.cA2w.p - s.cB2w.p) * 0.99f;
+
+ Cm::SpatialVectorV* j0 = rows[i].j0;
+ Cm::SpatialVectorV* j1 = rows[i].j1;
+
+ for(PxU32 j=0;j<3;j++)
+ {
+ PxVec3 n = axis[j];
+ j0[j] = Cm::SpatialVector(n, ra.cross(n));
+ j1[j] = Cm::SpatialVector(-n, -rb.cross(n));
+ }
+
+ rows[i].jC = V3LoadU(error*recipDt);
+ }
+}
+
+void ArticulationHelper::prepareFsData(FsData& fsData, const ArticulationLink* links)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PxU32 linkCount = fsData.linkCount;
+ FsRow* rows = getFsRows(fsData);
+ FsRowAux* aux = getAux(fsData);
+ const FsJointVectors* jointVectors = getJointVectors(fsData);
+
+ rows[0].children = links[0].children;
+ rows[0].pathToRoot = 1;
+
+ PX_ALIGN_PREFIX(16) PxVec4 v[] PX_ALIGN_SUFFIX(16) = { PxVec4(1.f,0,0,0), PxVec4(0,1.f,0,0), PxVec4(0,0,1.f,0) } ;
+ const Vec3V* axes = reinterpret_cast<const Vec3V*>(v);
+
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ PxU32 p = links[i].parent;
+ FsRow& r = rows[i];
+ FsRowAux& a = aux[i];
+
+ PX_UNUSED(p);
+
+ r.children = links[i].children;
+ r.pathToRoot = links[i].pathToRoot;
+
+ const Vec3V jointOffset = jointVectors[i].jointOffset;
+
+ // the joint coords are world oriented, located at the joint.
+ a.S[0] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[0]));
+ a.S[1] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[1]));
+ a.S[2] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[2]));
+ }
+}
+
+PX_FORCE_INLINE PxReal ArticulationHelper::getResistance(PxReal compliance)
+{
+ PX_ASSERT(compliance>0);
+ return 1.0f/compliance;
+}
+
+void ArticulationHelper::createHardLimit( const FsData& fsData,
+ const ArticulationLink* links,
+ PxU32 linkIndex,
+ SolverConstraint1DExt& s,
+ const PxVec3& axis,
+ PxReal err,
+ PxReal recipDt)
+{
+ init(s, PxVec3(0), PxVec3(0), axis, axis, 0, PX_MAX_F32);
+
+ ArticulationHelper::getImpulseSelfResponse(fsData,
+ links[linkIndex].parent,Cm::SpatialVector(PxVec3(0), axis), s.deltaVA,
+ linkIndex, Cm::SpatialVector(PxVec3(0), -axis), s.deltaVB);
+
+ const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular));
+ if(unitResponse<0.0f)
+ Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, joint limit ignored");
+
+ const PxReal recipResponse = unitResponse>0.0f ? 1.0f/unitResponse : 0.0f;
+
+ s.constant = recipResponse * -err * recipDt;
+ s.unbiasedConstant = err>0.0f ? s.constant : 0.0f;
+ s.velMultiplier = -recipResponse;
+ s.impulseMultiplier = 1.0f;
+}
+
+void ArticulationHelper::createTangentialSpring(const FsData& fsData,
+ const ArticulationLink* links,
+ PxU32 linkIndex,
+ SolverConstraint1DExt& s,
+ const PxVec3& axis,
+ PxReal stiffness,
+ PxReal damping,
+ PxReal dt)
+{
+ init(s, PxVec3(0), PxVec3(0), axis, axis, -PX_MAX_F32, PX_MAX_F32);
+
+ Cm::SpatialVector axis6(PxVec3(0), axis);
+ PxU32 parent = links[linkIndex].parent;
+ getImpulseSelfResponse(fsData, parent, axis6, s.deltaVA, linkIndex, -axis6, s.deltaVB);
+
+ const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular));
+ if(unitResponse<0.0f)
+ Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, tangential spring ignored");
+ const PxReal recipResponse = unitResponse>0.0F ? 1.0f/unitResponse : 0.0f;
+
+ // this is a specialization of the spring code in setSolverConstants() for acceleration springs.
+ // general case is b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError);
+ // but geomError and velocityTarget are both zero
+
+ const PxReal a = dt * dt * stiffness + dt * damping;
+ const PxReal x = 1.0f/(1.0f+a);
+ s.constant = s.unbiasedConstant = 0.0f;
+ s.velMultiplier = -x * recipResponse * a;
+ s.impulseMultiplier = 1.0f - x;
+}
+
+PxU32 ArticulationHelper::setupSolverConstraints( FsData& fsData, PxU32 solverDataSize,
+ PxcConstraintBlockStream& stream,
+ PxSolverConstraintDesc* constraintDesc,
+ const ArticulationLink* links,
+ const ArticulationJointTransforms* jointTransforms,
+ PxReal dt,
+ PxU32& acCount,
+ PxsConstraintBlockManager& constraintBlockManager)
+{
+ acCount = 0;
+
+ const PxU16 linkCount = fsData.linkCount;
+ PxU32 descCount = 0;
+ const PxReal recipDt = 1.0f/dt;
+
+ const PxConstraintInvMassScale ims(1.0f, 1.0f, 1.0f, 1.0f);
+
+ for(PxU16 i=1;i<linkCount;i++)
+ {
+ const ArticulationJointCore& j = *links[i].inboundJoint;
+
+ if(i+1<linkCount)
+ {
+ Ps::prefetch(links[i+1].inboundJoint, sizeof (ArticulationJointCore));
+ Ps::prefetch(&jointTransforms[i+1], sizeof(ArticulationJointTransforms));
+ }
+
+ if(!(j.twistLimited || j.swingLimited))
+ continue;
+
+ PxQuat swing, twist;
+ Ps::separateSwingTwist(jointTransforms[i].cB2cA.q, swing, twist);
+
+ Cm::ConeLimitHelper eh(j.tanQSwingY, j.tanQSwingZ, j.tanQSwingPad);
+ PxVec3 swingLimitAxis;
+ PxReal swingLimitError = 0.0f;
+
+ const bool swingLimited = j.swingLimited && eh.getLimit(swing, swingLimitAxis, swingLimitError);
+ const bool tangentialStiffness = swingLimited && (j.tangentialStiffness>0 || j.tangentialDamping>0);
+
+ const PxVec3 twistAxis = jointTransforms[i].cB2w.rotate(PxVec3(1.0f,0,0));
+ const PxReal tqTwistAngle = Ps::tanHalf(twist.x, twist.w);
+
+ const bool twistLowerLimited = j.twistLimited && tqTwistAngle < Cm::tanAdd(j.tanQTwistLow, j.tanQTwistPad);
+ const bool twistUpperLimited = j.twistLimited && tqTwistAngle > Cm::tanAdd(j.tanQTwistHigh, -j.tanQTwistPad);
+
+ const PxU8 constraintCount = PxU8(swingLimited + tangentialStiffness + twistUpperLimited + twistLowerLimited);
+ if(!constraintCount)
+ continue;
+
+ PxSolverConstraintDesc& desc = constraintDesc[descCount++];
+
+ desc.articulationA = &fsData;
+ desc.linkIndexA = Ps::to16(links[i].parent);
+ desc.articulationALength = Ps::to16(solverDataSize);
+
+ desc.articulationB = &fsData;
+ desc.linkIndexB = i;
+ desc.articulationBLength = Ps::to16(solverDataSize);
+
+ const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) +
+ sizeof(SolverConstraint1DExt) * constraintCount;
+
+ PX_ASSERT(0==(constraintLength & 0x0f));
+ desc.constraintLengthOver16 = Ps::to16(constraintLength/16);
+
+ desc.constraint = stream.reserve(constraintLength + 16u, constraintBlockManager);
+
+ desc.writeBack = NULL;
+
+ SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+ SolverConstraint1DExt* constraints = reinterpret_cast<SolverConstraint1DExt*>(desc.constraint + sizeof(SolverConstraint1DHeader));
+
+ init(*header, constraintCount, true, ims);
+
+ PxU32 cIndex = 0;
+
+ if(swingLimited)
+ {
+ const PxVec3 normal = jointTransforms[i].cA2w.rotate(swingLimitAxis);
+ createHardLimit(fsData, links, i, constraints[cIndex++], normal, swingLimitError, recipDt);
+ if(tangentialStiffness)
+ {
+ const PxVec3 tangent = twistAxis.cross(normal).getNormalized();
+ createTangentialSpring(fsData, links, i, constraints[cIndex++], tangent, j.tangentialStiffness, j.tangentialDamping, dt);
+ }
+ }
+
+ if(twistUpperLimited)
+ createHardLimit(fsData, links, i, constraints[cIndex++], twistAxis, (j.tanQTwistHigh - tqTwistAngle)*4, recipDt);
+
+ if(twistLowerLimited)
+ createHardLimit(fsData, links, i, constraints[cIndex++], -twistAxis, -(j.tanQTwistLow - tqTwistAngle)*4, recipDt);
+
+ *(desc.constraint + getConstraintLength(desc)) = 0;
+
+ PX_ASSERT(cIndex == constraintCount);
+ acCount += constraintCount;
+ }
+
+ return descCount;
+}
+
+void ArticulationHelper::computeJointDrives(FsData& fsData,
+ Vec3V* drives,
+ const ArticulationLink* links,
+ const PxTransform* poses,
+ const ArticulationJointTransforms* transforms,
+ const Mat33V* loads,
+ PxReal dt)
+{
+ typedef ArticulationFnsScalar Fns;
+
+ const PxU32 linkCount = fsData.linkCount;
+ const Cm::SpatialVector* velocity = reinterpret_cast<const Cm::SpatialVector*>(getVelocity(fsData));
+
+ for(PxU32 i=1; i<linkCount;i++)
+ {
+ PxU32 parent = links[i].parent;
+ const ArticulationJointTransforms& b = transforms[i];
+ const ArticulationJointCore& j = *links[i].inboundJoint;
+
+ const Cm::SpatialVector currentVel = Fns::translateMotion(poses[i].p - b.cA2w.p, velocity[i])
+ - Fns::translateMotion(poses[parent].p - b.cA2w.p, velocity[parent]);
+
+ // we want the quat such that q * cB2cA = targetPosition
+ PxVec3 rotVec;
+ if(j.driveType == PxU8(PxArticulationJointDriveType::eTARGET))
+ rotVec = Ps::log(j.targetPosition * b.cB2cA.q.getConjugate()); // as a rotation vector
+ else
+ rotVec = j.targetPosition.getImaginaryPart();
+
+ // NM's Tests indicate behavior is better without the term commented out below, even though
+ // an implicit spring derivation suggests it should be there.
+
+ const PxVec3 posError = b.cA2w.rotate(rotVec); // - currentVel.angular * 0.5f * dt
+ const PxVec3 velError = b.cA2w.rotate(j.targetVelocity) - currentVel.angular;
+
+ drives[i] = M33MulV3(loads[i], V3LoadU((j.spring * posError + j.damping * velError) * dt * getResistance(j.internalCompliance)));
+ }
+}
+
+ArticulationPImpl::ComputeUnconstrainedVelocitiesFn ArticulationPImpl::sComputeUnconstrainedVelocities = NULL;
+ArticulationPImpl::UpdateBodiesFn ArticulationPImpl::sUpdateBodies = NULL;
+ArticulationPImpl::SaveVelocityFn ArticulationPImpl::sSaveVelocity = NULL;
+
+}
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.h
new file mode 100644
index 00000000..1c2b28b7
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.h
@@ -0,0 +1,192 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_ARTICULATION_HELPER_H
+#define DY_ARTICULATION_HELPER_H
+
+
+#include "DyArticulation.h"
+
+namespace physx
+{
+struct PxsBodyCore;
+
+class PxcConstraintBlockStream;
+class PxcRigidBody;
+class PxsConstraintBlockManager;
+struct PxSolverConstraintDesc;
+
+namespace Dy
+{
+ struct FsInertia;
+ struct SolverConstraint1DExt;
+ struct ArticulationJointCore;
+ struct ArticulationSolverDesc;
+
+
+struct ArticulationJointTransforms
+{
+ PxTransform cA2w; // joint parent frame in world space
+ PxTransform cB2w; // joint child frame in world space
+ PxTransform cB2cA; // joint relative pose in world space
+};
+
+class ArticulationHelper
+{
+public:
+ static PxU32 computeUnconstrainedVelocities(const ArticulationSolverDesc& desc,
+ PxReal dt,
+ PxcConstraintBlockStream& stream,
+ PxSolverConstraintDesc* constraintDesc,
+ PxU32& acCount,
+ PxsConstraintBlockManager& constraintBlockManager,
+ const PxVec3& gravity, PxU64 contextID);
+
+ static void updateBodies(const ArticulationSolverDesc& desc,
+ PxReal dt);
+
+
+ static void getImpulseResponse(const FsData& matrix,
+ PxU32 linkID,
+ const Cm::SpatialVectorV& impulse,
+ Cm::SpatialVectorV& deltaV);
+
+
+ static PX_FORCE_INLINE
+ void getImpulseResponse(const FsData& matrix,
+ PxU32 linkID,
+ const Cm::SpatialVector& impulse,
+ Cm::SpatialVector& deltaV)
+ {
+ getImpulseResponse(matrix, linkID, reinterpret_cast<const Cm::SpatialVectorV&>(impulse), reinterpret_cast<Cm::SpatialVectorV&>(deltaV));
+ }
+
+ static void getImpulseSelfResponse(const FsData& matrix,
+ PxU32 linkID0,
+ const Cm::SpatialVectorV& impulse0,
+ Cm::SpatialVectorV& deltaV0,
+ PxU32 linkID1,
+ const Cm::SpatialVectorV& impulse1,
+ Cm::SpatialVectorV& deltaV1);
+
+ static void flushVelocity(FsData& matrix);
+
+ static void saveVelocity(const ArticulationSolverDesc& m);
+
+ static void getDataSizes(PxU32 linkCount, PxU32 &solverDataSize, PxU32& totalSize, PxU32& scratchSize);
+
+ static void initializeDriveCache(FsData &data,
+ PxU16 linkCount,
+ const ArticulationLink* links,
+ PxReal compliance,
+ PxU32 iterations,
+ char* scratchMemory,
+ PxU32 scratchMemorySize);
+
+ static PxU32 getDriveCacheLinkCount(const FsData& cache);
+
+ static void applyImpulses(const FsData& matrix,
+ Cm::SpatialVectorV* Z,
+ Cm::SpatialVectorV* V);
+
+private:
+ static PxU32 getLtbDataSize(PxU32 linkCount);
+ static PxU32 getFsDataSize(PxU32 linkCount);
+
+ static void prepareDataBlock(FsData& fsData,
+ const ArticulationLink* links,
+ PxU16 linkCount,
+ PxTransform* poses,
+ FsInertia *baseInertia,
+ ArticulationJointTransforms* jointTransforms,
+ PxU32 expectedSize);
+
+ static void setInertia(FsInertia& inertia,
+ const PxsBodyCore& body,
+ const PxTransform& pose);
+
+ static void setJointTransforms(ArticulationJointTransforms& transforms,
+ const PxTransform& parentPose,
+ const PxTransform& childPose,
+ const ArticulationJointCore& joint);
+
+ static void prepareLtbMatrix(FsData& fsData,
+ const FsInertia* baseInertia,
+ const PxTransform* poses,
+ const ArticulationJointTransforms* jointTransforms,
+ PxReal recipDt);
+
+ static void prepareFsData(FsData& fsData,
+ const ArticulationLink* links);
+
+ static PX_FORCE_INLINE PxReal getResistance(PxReal compliance);
+
+
+ static void createHardLimit(const FsData& fsData,
+ const ArticulationLink* links,
+ PxU32 linkIndex,
+ SolverConstraint1DExt& s,
+ const PxVec3& axis,
+ PxReal err,
+ PxReal recipDt);
+
+ static void createTangentialSpring(const FsData& fsData,
+ const ArticulationLink* links,
+ PxU32 linkIndex,
+ SolverConstraint1DExt& s,
+ const PxVec3& axis,
+ PxReal stiffness,
+ PxReal damping,
+ PxReal dt);
+
+ static PxU32 setupSolverConstraints(FsData& fsData, PxU32 solverDataSize,
+ PxcConstraintBlockStream& stream,
+ PxSolverConstraintDesc* constraintDesc,
+ const ArticulationLink* links,
+ const ArticulationJointTransforms* jointTransforms,
+ PxReal dt,
+ PxU32& acCount,
+ PxsConstraintBlockManager& constraintBlockManager);
+
+ static void computeJointDrives(FsData& fsData,
+ Ps::aos::Vec3V* drives,
+ const ArticulationLink* links,
+ const PxTransform* poses,
+ const ArticulationJointTransforms* transforms,
+ const Ps::aos::Mat33V* loads,
+ PxReal dt);
+
+};
+
+}
+
+}
+
+#endif //DY_ARTICULATION_HELPER_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationPImpl.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationPImpl.h
new file mode 100644
index 00000000..e73cc373
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationPImpl.h
@@ -0,0 +1,108 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_ARTICULATION_INTERFACE_H
+#define DY_ARTICULATION_INTERFACE_H
+
+#include "DyArticulationUtils.h"
+
+namespace physx
+{
+
+class PxcConstraintBlockStream;
+class PxcScratchAllocator;
+class PxsConstraintBlockManager;
+struct PxSolverConstraintDesc;
+
+namespace Dy
+{
+
+ struct ArticulationSolverDesc;
+
+
+class ArticulationPImpl
+{
+public:
+
+ typedef PxU32 (*ComputeUnconstrainedVelocitiesFn)(const ArticulationSolverDesc& desc,
+ PxReal dt,
+ PxcConstraintBlockStream& stream,
+ PxSolverConstraintDesc* constraintDesc,
+ PxU32& acCount,
+ PxsConstraintBlockManager& constraintBlockManager,
+ const PxVec3& gravity, PxU64 contextID);
+
+ typedef void (*UpdateBodiesFn)(const ArticulationSolverDesc& desc,
+ PxReal dt);
+
+ typedef void (*SaveVelocityFn)(const ArticulationSolverDesc &m);
+
+ static ComputeUnconstrainedVelocitiesFn sComputeUnconstrainedVelocities;
+ static UpdateBodiesFn sUpdateBodies;
+ static SaveVelocityFn sSaveVelocity;
+
+ static PxU32 computeUnconstrainedVelocities(const ArticulationSolverDesc& desc,
+ PxReal dt,
+ PxcConstraintBlockStream& stream,
+ PxSolverConstraintDesc* constraintDesc,
+ PxU32& acCount,
+ PxcScratchAllocator&,
+ PxsConstraintBlockManager& constraintBlockManager,
+ const PxVec3& gravity, PxU64 contextID)
+ {
+ PX_ASSERT(sComputeUnconstrainedVelocities);
+ if(sComputeUnconstrainedVelocities)
+ return (sComputeUnconstrainedVelocities)(desc, dt, stream, constraintDesc, acCount, constraintBlockManager, gravity, contextID);
+ else
+ return 0;
+ }
+
+ static void updateBodies(const ArticulationSolverDesc& desc,
+ PxReal dt)
+ {
+ PX_ASSERT(sUpdateBodies);
+ if(sUpdateBodies)
+ (*sUpdateBodies)(desc, dt);
+ }
+
+ static void saveVelocity(const ArticulationSolverDesc& desc)
+ {
+ PX_ASSERT(sSaveVelocity);
+ if(sSaveVelocity)
+ (*sSaveVelocity)(desc);
+ }
+};
+
+
+}
+}
+#endif //DY_ARTICULATION_INTERFACE_H
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationReference.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationReference.h
new file mode 100644
index 00000000..ff4d0d6e
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationReference.h
@@ -0,0 +1,92 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_ARTICULATION_REFERENCE_H
+#define DY_ARTICULATION_REFERENCE_H
+
+// a per-row struct where we put extra data for debug and setup - ultimately this will move to be just
+// debug only
+
+
+
+#include "DyArticulationUtils.h"
+#include "DyArticulationScalar.h"
+#include "DyArticulationFnsScalar.h"
+#include "DySpatial.h"
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+
+namespace physx
+{
+
+PX_FORCE_INLINE Cm::SpatialVector propagateVelocity(const FsRow& row,
+ const FsJointVectors& jv,
+ const PxVec3& SZ,
+ const Cm::SpatialVector& v,
+ const FsRowAux& aux)
+{
+ typedef ArticulationFnsScalar Fns;
+
+ Cm::SpatialVector w = Fns::translateMotion(-getParentOffset(jv), v);
+ PxVec3 DSZ = Fns::multiply(row.D, SZ);
+
+ PxVec3 n = Fns::axisDot(getDSI(row), w) + DSZ;
+ Cm::SpatialVector result = w - Cm::SpatialVector(getJointOffset(jv).cross(n),n);
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, SZ, v, aux);
+ PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f));
+#endif
+ return result;
+}
+
+PX_FORCE_INLINE Cm::SpatialVector propagateImpulse(const FsRow& row,
+ const FsJointVectors& jv,
+ PxVec3& SZ,
+ const Cm::SpatialVector& Z,
+ const FsRowAux& aux)
+{
+ typedef ArticulationFnsScalar Fns;
+
+ SZ = Z.angular + Z.linear.cross(getJointOffset(jv));
+ Cm::SpatialVector result = Fns::translateForce(getParentOffset(jv), Z - Fns::axisMultiply(getDSI(row), SZ));
+#if DY_ARTICULATION_DEBUG_VERIFY
+ PxVec3 SZcheck;
+ Cm::SpatialVector check = ArticulationRef::propagateImpulse(row, jv, SZcheck, Z, aux);
+ PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f));
+ PX_ASSERT((SZ-SZcheck).magnitude()<1e-5*PxMax(SZcheck.magnitude(), 1.0f));
+#endif
+ return result;
+}
+
+}
+#endif
+
+#endif //DY_ARTICULATION_REFERENCE_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationSIMD.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationSIMD.cpp
new file mode 100644
index 00000000..e138c192
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationSIMD.cpp
@@ -0,0 +1,306 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "DySpatial.h"
+#include "DyArticulation.h"
+#include "DyArticulationScalar.h"
+#include "DyArticulationFnsScalar.h"
+#include "DyArticulationReference.h"
+#include "DyArticulationFnsSimd.h"
+
+
+namespace physx
+{
+namespace Dy
+{
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+namespace
+{
+ Cm::SpatialVector SpV(Vec3V linear, Vec3V angular)
+ {
+ return Cm::SpatialVector((PxVec3 &)linear, (PxVec3&)angular);
+ }
+}
+#endif
+
+void PxcFsApplyImpulse(FsData &matrix,
+ PxU32 linkID,
+ Vec3V linear,
+ Vec3V angular)
+{
+#if DY_ARTICULATION_DEBUG_VERIFY
+ {
+ Cm::SpatialVectorV imp(linear, angular);
+ ArticulationRef::applyImpulse(matrix, reinterpret_cast<Cm::SpatialVector *>(getRefVelocity(matrix)), linkID, reinterpret_cast<Cm::SpatialVector&>(imp));
+ }
+#endif
+
+
+ Vec3V linZ = V3Neg(linear);
+ Vec3V angZ = V3Neg(angular);
+
+ const FsRow *rows = getFsRows(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ const FsRowAux *aux = getAux(matrix);
+#endif
+ Vec3V *deferredSZ = getDeferredSZ(matrix);
+
+ for(PxU32 i = linkID; i!=0; i = matrix.parent[i])
+ {
+ const FsRow &row = rows[i];
+ const FsJointVectors& jv = jointVectors[i];
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ PxVec3 SZcheck;
+ Cm::SpatialVector Zcheck = ArticulationRef::propagateImpulse(row, jv, SZcheck, SpV(linZ, angZ), aux[i]);
+#endif
+
+ Vec3V SZ = V3Add(angZ, V3Cross(linZ, jv.jointOffset));
+ Vec3V lrLinear = V3Sub(linZ, V3ScaleAdd(row.DSI[0].linear, V3GetX(SZ),
+ V3ScaleAdd(row.DSI[1].linear, V3GetY(SZ),
+ V3Scale(row.DSI[2].linear, V3GetZ(SZ)))));
+
+ Vec3V lrAngular = V3Sub(angZ, V3ScaleAdd(row.DSI[0].angular, V3GetX(SZ),
+ V3ScaleAdd(row.DSI[1].angular, V3GetY(SZ),
+ V3Scale(row.DSI[2].angular, V3GetZ(SZ)))));
+
+ linZ = lrLinear;
+ angZ = V3Add(lrAngular, V3Cross(jv.parentOffset, lrLinear));
+ deferredSZ[i] = V3Add(deferredSZ[i], SZ);
+
+ PX_ASSERT(Ps::aos::isFiniteVec3V(linZ));
+ PX_ASSERT(Ps::aos::isFiniteVec3V(angZ));
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector Z = SpV(linZ,angZ);
+ PX_ASSERT((Z - Zcheck).magnitude()<1e-4*PxMax(Zcheck.magnitude(), 1.0f));
+ PX_ASSERT(((PxVec3&)SZ-SZcheck).magnitude()<1e-4*PxMax(SZcheck.magnitude(), 1.0f));
+#endif
+ }
+
+ matrix.deferredZ.linear = V3Add(matrix.deferredZ.linear, linZ);
+ matrix.deferredZ.angular = V3Add(matrix.deferredZ.angular, angZ);
+
+ matrix.dirty |= rows[linkID].pathToRoot;
+}
+
+Cm::SpatialVectorV PxcFsGetVelocity(FsData &matrix,
+ PxU32 linkID)
+{
+ const FsRow *rows = getFsRows(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ const FsRowAux *aux = getAux(matrix);
+#endif
+ Cm::SpatialVectorV* PX_RESTRICT V = getVelocity(matrix);
+
+ // find the dirty node on the path (including the root) with the lowest index
+ ArticulationBitField toUpdate = rows[linkID].pathToRoot & matrix.dirty;
+
+
+ if(toUpdate)
+ {
+ // store the dV elements densely and use an array map to decode - hopefully cache friendlier
+ PxU32 indexToStackLoc[DY_ARTICULATION_MAX_SIZE], count = 0;
+ Cm::SpatialVectorV dVStack[DY_ARTICULATION_MAX_SIZE];
+
+ ArticulationBitField ignoreNodes = (toUpdate & (0-toUpdate))-1;
+ ArticulationBitField path = rows[linkID].pathToRoot & ~ignoreNodes, p = path;
+ ArticulationBitField newDirty = 0;
+
+ Vec3V ldV = V3Zero(), adV = V3Zero();
+ Cm::SpatialVectorV* PX_RESTRICT defV = getDeferredVel(matrix);
+ Vec3V* PX_RESTRICT SZ = getDeferredSZ(matrix);
+
+ if(p & 1)
+ {
+ const FsInertia &m = getRootInverseInertia(matrix);
+ Vec3V lZ = V3Neg(matrix.deferredZ.linear);
+ Vec3V aZ = V3Neg(matrix.deferredZ.angular);
+
+ ldV = V3Add(M33MulV3(m.ll,lZ), M33MulV3(m.la,aZ));
+ adV = V3Add(M33TrnspsMulV3(m.la,lZ), M33MulV3(m.aa,aZ));
+
+ V[0].linear = V3Add(V[0].linear, ldV);
+ V[0].angular = V3Add(V[0].angular, adV);
+
+ matrix.deferredZ.linear = V3Zero();
+ matrix.deferredZ.angular = V3Zero();
+
+ indexToStackLoc[0] = count;
+ Cm::SpatialVectorV &e = dVStack[count++];
+
+ e.linear = ldV;
+ e.angular = adV;
+
+ newDirty = rows[0].children;
+ p--;
+ }
+
+
+ while(p) // using "for(;p;p &= (p-1))" here generates LHSs from the ArticulationLowestSetBit
+ {
+ PxU32 i = ArticulationLowestSetBit(p);
+ const FsJointVectors& jv = jointVectors[i];
+
+ p &= (p-1);
+
+ const FsRow* PX_RESTRICT row = rows + i;
+
+ ldV = V3Add(ldV, defV[i].linear);
+ adV = V3Add(adV, defV[i].angular);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector dVcheck = ArticulationRef::propagateVelocity(*row, jv, (PxVec3&)SZ[i], SpV(ldV,adV), aux[i]);
+#endif
+
+ Vec3V DSZ = M33MulV3(row->D, SZ[i]);
+
+ Vec3V lW = V3Add(ldV, V3Cross(adV,jv.parentOffset));
+ Vec3V aW = adV;
+
+ const Cm::SpatialVectorV*PX_RESTRICT DSI = row->DSI;
+ Vec3V lN = V3Merge(V3Dot(DSI[0].linear, lW), V3Dot(DSI[1].linear, lW), V3Dot(DSI[2].linear, lW));
+ Vec3V aN = V3Merge(V3Dot(DSI[0].angular, aW), V3Dot(DSI[1].angular, aW), V3Dot(DSI[2].angular, aW));
+
+ Vec3V n = V3Add(V3Add(lN, aN), DSZ);
+
+ ldV = V3Sub(lW, V3Cross(jv.jointOffset,n));
+ adV = V3Sub(aW, n);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector dV = SpV(ldV,adV);
+ PX_ASSERT((dV-dVcheck).magnitude()<1e-4*PxMax(dVcheck.magnitude(), 1.0f));
+#endif
+
+ V[i].linear = V3Add(V[i].linear, ldV);
+ V[i].angular = V3Add(V[i].angular, adV);
+
+ defV[i].linear = V3Zero();
+ defV[i].angular = V3Zero();
+ SZ[i] = V3Zero();
+
+ indexToStackLoc[i] = count;
+ Cm::SpatialVectorV &e = dVStack[count++];
+ newDirty |= rows[i].children;
+
+ e.linear = ldV;
+ e.angular = adV;
+ }
+
+ for(ArticulationBitField defer = newDirty&~path; defer; defer &= (defer-1))
+ {
+ PxU32 i = ArticulationLowestSetBit(defer);
+ PxU32 parent = indexToStackLoc[matrix.parent[i]];
+
+ defV[i].linear = V3Add(defV[i].linear, dVStack[parent].linear);
+ defV[i].angular = V3Add(defV[i].angular, dVStack[parent].angular);
+ }
+
+ matrix.dirty = (matrix.dirty | newDirty)&~path;
+ }
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector v = reinterpret_cast<Cm::SpatialVector&>(V[linkID]);
+ Cm::SpatialVector rv = reinterpret_cast<Cm::SpatialVector&>(getRefVelocity(matrix)[linkID]);
+ PX_ASSERT((v-rv).magnitude()<1e-4f * PxMax(rv.magnitude(),1.0f));
+#endif
+
+ return V[linkID];
+}
+
+PX_FORCE_INLINE Cm::SpatialVectorV propagateVelocitySIMD(const FsRow& row,
+ const FsJointVectors& jv,
+ const Vec3V& SZ,
+ const Cm::SpatialVectorV& v,
+ const FsRowAux& aux)
+{
+ PX_UNUSED(aux);
+
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ Cm::SpatialVectorV w(V3Add(v.linear, V3Cross(v.angular, jv.parentOffset)), v.angular);
+ Vec3V DSZ = M33MulV3(row.D, SZ);
+
+ Vec3V n = V3Add(Fns::axisDot(row.DSI, w), DSZ);
+ Cm::SpatialVectorV result = w - Cm::SpatialVectorV(V3Cross(jv.jointOffset,n), n);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, reinterpret_cast<const PxVec3&>(SZ), reinterpret_cast<const Cm::SpatialVector&>(v), aux);
+ PX_ASSERT((reinterpret_cast<const Cm::SpatialVector&>(result)-check).magnitude()<1e-4*PxMax(check.magnitude(), 1.0f));
+#endif
+
+ return result;
+}
+
+void PxcFsFlushVelocity(FsData& matrix)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ const FsRow* PX_RESTRICT rows = getFsRows(matrix);
+ const FsRowAux* PX_RESTRICT aux = getAux(matrix);
+ const FsJointVectors*PX_RESTRICT jointVectors = getJointVectors(matrix);
+
+ Cm::SpatialVectorV V0 = Fns::multiply(getRootInverseInertia(matrix), -matrix.deferredZ);
+ matrix.deferredZ = Cm::SpatialVectorV(PxZero);
+
+ getVelocity(matrix)[0] += V0;
+ for(ArticulationBitField defer = rows[0].children; defer; defer &= (defer-1))
+ getDeferredVel(matrix)[ArticulationLowestSetBit(defer)] += V0;
+
+ for(PxU32 i = 1; i<matrix.linkCount; i++)
+ {
+ Cm::SpatialVectorV V = propagateVelocitySIMD(rows[i], jointVectors[i], getDeferredSZ(matrix)[i], getDeferredVel(matrix)[i], aux[i]);
+ getDeferredVel(matrix)[i] = Cm::SpatialVectorV(PxZero);
+ getDeferredSZ(matrix)[i] = V3Zero();
+ getVelocity(matrix)[i] += V;
+ for(ArticulationBitField defer = rows[i].children; defer; defer &= (defer-1))
+ getDeferredVel(matrix)[ArticulationLowestSetBit(defer)] += V;
+ }
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ {
+ Cm::SpatialVector v = velocityRef(matrix,i), rv = reinterpret_cast<Cm::SpatialVector&>(getRefVelocity(matrix)[i]);
+ Cm::SpatialVector diff = v-rv;
+ PxReal m = rv.magnitude();
+ PX_UNUSED(m);
+ PX_ASSERT(diff.magnitude()<1e-4*PxMax(1.0f,m));
+ }
+#endif
+
+ matrix.dirty = 0;
+}
+}
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.cpp
new file mode 100644
index 00000000..af00a367
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.cpp
@@ -0,0 +1,575 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "DyArticulationUtils.h"
+#include "DyArticulationScalar.h"
+#include "DyArticulationReference.h"
+#include "DyArticulationFnsDebug.h"
+
+namespace physx
+{
+namespace Dy
+{
+namespace ArticulationRef
+{
+ Cm::SpatialVector propagateImpulse(const FsRow& row,
+ const FsJointVectors& j,
+ PxVec3& SZ,
+ const Cm::SpatialVector& Z,
+ const FsRowAux& aux)
+ {
+ typedef ArticulationFnsScalar Fns;
+
+ SZ = Fns::axisDot(reinterpret_cast<const Cm::SpatialVector*>(aux.S), Z);
+ return Fns::translateForce(getParentOffset(j), Z - Fns::axisMultiply(getDSI(row), SZ));
+ }
+
+ Cm::SpatialVector propagateVelocity(const FsRow& row,
+ const FsJointVectors& j,
+ const PxVec3& SZ,
+ const Cm::SpatialVector& v,
+ const FsRowAux& aux)
+ {
+ typedef ArticulationFnsScalar Fns;
+
+ Cm::SpatialVector w = Fns::translateMotion(-getParentOffset(j), v);
+ PxVec3 DSZ = Fns::multiply(row.D, SZ);
+
+ return w - Fns::axisMultiply(reinterpret_cast<const Cm::SpatialVector*>(aux.S), DSZ + Fns::axisDot(getDSI(row), w));
+ }
+
+ void applyImpulse(const FsData& matrix,
+ Cm::SpatialVector* velocity,
+ PxU32 linkID,
+ const Cm::SpatialVector& impulse)
+ {
+ typedef ArticulationFnsScalar Fns;
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ Cm::SpatialVector dV[DY_ARTICULATION_MAX_SIZE];
+ PxVec3 SZ[DY_ARTICULATION_MAX_SIZE];
+
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ SZ[i] = PxVec3(0);
+
+ Cm::SpatialVector Z = -impulse;
+
+ for(;linkID!=0; linkID = matrix.parent[linkID])
+ Z = ArticulationRef::propagateImpulse(rows[linkID], jointVectors[linkID], SZ[linkID], Z, aux[linkID]);
+
+ dV[0] = Fns::getRootDeltaV(matrix,-Z);
+
+ for(PxU32 i=1;i<matrix.linkCount; i++)
+ dV[i] = ArticulationRef::propagateVelocity(rows[i], jointVectors[i], SZ[i], dV[matrix.parent[i]], aux[i]);
+
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ velocity[i] += dV[i];
+ }
+
+ void ltbFactor(FsData& m)
+ {
+ typedef ArticulationFnsScalar Fns;
+ LtbRow* rows = getLtbRows(m);
+
+ SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
+ for(PxU32 i=0;i<m.linkCount;i++)
+ inertia[i] = ArticulationFnsDebug::unsimdify(rows[i].inertia);
+
+ Cm::SpatialVector j[3];
+ for(PxU32 i=m.linkCount; --i>0;)
+ {
+ LtbRow& b = rows[i];
+ inertia[i] = Fns::invertInertia(inertia[i]);
+ PxU32 p = m.parent[i];
+
+ Cm::SpatialVector* j0 = &reinterpret_cast<Cm::SpatialVector&>(*b.j0),
+ * j1 = &reinterpret_cast<Cm::SpatialVector&>(*b.j1);
+
+ Fns::multiply(j, inertia[i], j1);
+ PxMat33 jResponse = Fns::invertSym33(-Fns::multiplySym(j, j1));
+ j1[0] = j[0]; j1[1] = j[1]; j1[2] = j[2];
+
+ b.jResponse = Mat33V_From_PxMat33(jResponse);
+ Fns::multiply(j, j0, jResponse);
+ inertia[p] = Fns::multiplySubtract(inertia[p], j, j0);
+ j0[0] = j[0]; j0[1] = j[1]; j0[2] = j[2];
+ }
+
+ rows[0].inertia = Fns::invertInertia(inertia[0]);
+ for(PxU32 i=1;i<m.linkCount;i++)
+ rows[i].inertia = inertia[i];
+ }
+
+
+}
+
+#if 0
+
+
+void ltbSolve(const FsData& m,
+ Vec3V* c, // rhs error to solve for
+ Cm::SpatialVector* y) // velocity delta output
+{
+ typedef ArticulationFnsScalar Fns;
+
+ PxVec4* b = reinterpret_cast<PxVec4*>(c);
+ const LtbRow* rows = getLtbRows(m);
+ PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVector));
+
+ for(PxU32 i=m.linkCount;i-->1;)
+ {
+ PxU32 p = m.parent[i];
+ const LtbRow& r = rows[i];
+ b[i] -= PxVec4(Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j1), y[i]),0);
+ y[p] -= Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j0), b[i].getXYZ());
+ }
+
+ y[0] = Fns::multiply(rows[0].inertia,y[0]);
+
+ for(PxU32 i=1; i<m.linkCount; i++)
+ {
+ PxU32 p = m.parent[i];
+ const LtbRow& r = rows[i];
+ PxVec3 t = Fns::multiply(r.jResponse, b[i].getXYZ()) - Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j0), y[p]);
+ y[i] = Fns::multiply(r.inertia, y[i]) - Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j1), t);
+ }
+}
+
+void PxcFsPropagateDrivenInertiaScalar(FsData& matrix,
+ const FsInertia* baseInertia,
+ const PxReal* isf,
+ const Mat33V* load)
+{
+ typedef ArticulationFnsScalar Fns;
+
+ Cm::SpatialVector IS[3], DSI[3];
+ PxMat33 D;
+
+ FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]);
+
+ for(PxU32 i=matrix.linkCount; --i>0;)
+ {
+ FsRow& r = rows[i];
+ const FsRowAux& a = aux[i];
+ const FsJointVectors& jv = jointVectors[i];
+
+ Fns::multiply(IS, inertia[i], &static_cast<const Cm::SpatialVector&>(*a.S));
+
+ PX_ALIGN(16, PxMat33) L;
+ PxMat33_From_Mat33V(load[i], L);
+ D = Fns::invertSym33(Fns::multiplySym(&static_cast<const Cm::SpatialVector&>(*a.S), IS) + L*isf[i]);
+
+ Fns::multiply(DSI, IS, D);
+
+ r.D = Mat33V_From_PxMat33(D);
+ static_cast<Cm::SpatialVector&>(r.DSI[0]) = DSI[0];
+ static_cast<Cm::SpatialVector&>(r.DSI[1]) = DSI[1];
+ static_cast<Cm::SpatialVector&>(r.DSI[2]) = DSI[2];
+
+ inertia[matrix.parent[i]] += Fns::translate(getParentOffset(jv), Fns::multiplySubtract(inertia[i], DSI, IS));
+ }
+
+ FsInertia& m = getRootInverseInertia(matrix);
+ m = FsInertia(Fns::invertInertia(inertia[0]));
+}
+
+// no need to compile this ecxcept for verification, and it consumes huge amounts of stack space
+void PxcFsComputeJointLoadsScalar(const FsData& matrix,
+ const FsInertia*PX_RESTRICT baseInertia,
+ Mat33V*PX_RESTRICT load,
+ const PxReal*PX_RESTRICT isf,
+ PxU32 linkCount,
+ PxU32 maxIterations)
+{
+ typedef ArticulationFnsScalar Fns;
+
+ // the childward S
+ SpInertia leafwardInertia[DY_ARTICULATION_MAX_SIZE];
+ SpInertia rootwardInertia[DY_ARTICULATION_MAX_SIZE];
+ SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
+ SpInertia contribToParent[DY_ARTICULATION_MAX_SIZE];
+
+ // total articulated inertia assuming the articulation is rooted here
+
+ const FsRow* row = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_UNUSED(row);
+
+ PxMat33 load_[DY_ARTICULATION_MAX_SIZE];
+
+ for(PxU32 iter=0;iter<maxIterations;iter++)
+ {
+ for(PxU32 i=0;i<linkCount;i++)
+ inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]);
+
+ for(PxU32 i=linkCount;i-->1;)
+ {
+ const FsJointVectors& j = jointVectors[i];
+
+ leafwardInertia[i] = inertia[i];
+ contribToParent[i] = Fns::propagate(inertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]);
+ inertia[matrix.parent[i]] += Fns::translate((PxVec3&)j.parentOffset, contribToParent[i]);
+ }
+
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ rootwardInertia[i] = Fns::translate(-(PxVec3&)jointVectors[i].parentOffset, inertia[matrix.parent[i]]) - contribToParent[i];
+ inertia[i] += Fns::propagate(rootwardInertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]);
+ }
+
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ load_[i] = Fns::computeDriveInertia(leafwardInertia[i], rootwardInertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S));
+ PX_ASSERT(load_[i][0].isFinite() && load_[i][1].isFinite() && load_[2][i].isFinite());
+ }
+ }
+ for(PxU32 i=1;i<linkCount;i++)
+ load[i] = Mat33V_From_PxMat33(load_[i]);
+}
+
+
+void PxcFsApplyImpulse(const FsData& matrix,
+ PxU32 linkID,
+ const Cm::SpatialVector& impulse)
+{
+#if DY_ARTICULATION_DEBUG_VERIFY
+ PxcFsRefApplyImpulse(matrix, state.refVelocity, linkID, impulse);
+#endif
+
+ Cm::SpatialVector Z = -impulse;
+
+ for(PxU32 i = linkID; i!=0; i = matrix.row[i].parent)
+ {
+ PxVec3 SZ;
+ Z = propagateImpulse(matrix.row[i], SZ, Z, matrix.aux[i]);
+ deferredSZRef(state,i) += SZ;
+ }
+
+ static_cast<Cm::SpatialVector &>(state.deferredZ) += Z;
+ state.dirty |= matrix.row[linkID].pathToRoot;
+}
+
+Cm::SpatialVector PxcFsGetVelocity(const FsData& matrix,
+ PxU32 linkID)
+{
+ // find the dirty node on the path (including the root) with the lowest index
+ ArticulationBitField toUpdate = matrix.row[linkID].pathToRoot & state.dirty;
+
+ if(toUpdate)
+ {
+ ArticulationBitField ignoreNodes = (toUpdate & (0-toUpdate))-1;
+ ArticulationBitField path = matrix.row[linkID].pathToRoot & ~ignoreNodes, p = path;
+ ArticulationBitField newDirty = 0;
+
+ Cm::SpatialVector dV = Cm::SpatialVector::zero();
+ if(p & 1)
+ {
+ dV = getRootDeltaV(matrix, -deferredZ(state));
+
+ velocityRef(state, 0) += dV;
+ for(ArticulationBitField defer = matrix.row[0].children & ~path; defer; defer &= (defer-1))
+ deferredVelRef(state, ArticulationLowestSetBit(defer)) += dV;
+
+ deferredZRef(state) = Cm::SpatialVector::zero();
+ newDirty = matrix.row[0].children;
+ p--;
+ }
+
+ for(; p; p &= (p-1))
+ {
+ PxU32 i = ArticulationLowestSetBit(p);
+
+ dV = propagateVelocity(matrix.row[i], deferredSZ(state,i), dV + state.deferredVel[i], matrix.aux[i]);
+
+ velocityRef(state,i) += dV;
+ for(ArticulationBitField defer = matrix.row[i].children & ~path; defer; defer &= (defer-1))
+ deferredVelRef(state,ArticulationLowestSetBit(defer)) += dV;
+
+ newDirty |= matrix.row[i].children;
+ deferredVelRef(state,i) = Cm::SpatialVector::zero();
+ deferredSZRef(state,i) = PxVec3(0);
+ }
+
+ state.dirty = (state.dirty | newDirty)&~path;
+ }
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector v = state.velocity[linkID];
+ Cm::SpatialVector rv = state.refVelocity[linkID];
+ PX_ASSERT((v-rv).magnitude()<1e-4f * rv.magnitude());
+#endif
+
+ return state.velocity[linkID];
+}
+
+void PxcFsFlushVelocity(const FsData& matrix)
+{
+ Cm::SpatialVector V = getRootDeltaV(matrix, -deferredZ(state));
+ deferredZRef(state) = Cm::SpatialVector::zero();
+ velocityRef(state,0) += V;
+ for(ArticulationBitField defer = matrix.row[0].children; defer; defer &= (defer-1))
+ deferredVelRef(state,ArticulationLowestSetBit(defer)) += V;
+
+ for(PxU32 i = 1; i<matrix.linkCount; i++)
+ {
+ Cm::SpatialVector V = propagateVelocity(matrix.row[i], deferredSZ(state,i), state.deferredVel[i], matrix.aux[i]);
+ deferredVelRef(state,i) = Cm::SpatialVector::zero();
+ deferredSZRef(state,i) = PxVec3(0);
+ velocityRef(state,i) += V;
+ for(ArticulationBitField defer = matrix.row[i].children; defer; defer &= (defer-1))
+ deferredVelRef(state,ArticulationLowestSetBit(defer)) += V;
+ }
+
+ state.dirty = 0;
+}
+
+void PxcFsPropagateDrivenInertiaScalar(FsData& matrix,
+ const FsInertia* baseInertia,
+ const PxReal* isf,
+ const Mat33V* load,
+ PxcFsScratchAllocator allocator)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ Cm::SpatialVectorV IS[3];
+ PxMat33 D;
+
+ FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ FsInertia *inertia = allocator.alloc<FsInertia>(matrix.linkCount);
+ PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(FsInertia));
+
+ for(PxU32 i=matrix.linkCount; --i>0;)
+ {
+ FsRow& r = rows[i];
+ const FsRowAux& a = aux[i];
+ const FsJointVectors& jv = jointVectors[i];
+
+ Mat33V m = Fns::computeSIS(inertia[i], a.S, IS);
+ FloatV f = FLoad(isf[i]);
+
+ Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0),
+ V3ScaleAdd(load[i].col1, f, m.col1),
+ V3ScaleAdd(load[i].col2, f, m.col2)));
+ r.D = D;
+
+ inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]],
+ Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D, IS, r.DSI)));
+ }
+
+ getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]);
+}
+
+void PxcLtbSolve(const FsData& m,
+ Vec3V* c, // rhs error to solve for
+ Cm::SpatialVector* y) // velocity delta output
+{
+ typedef ArticulationFnsScalar Fns;
+
+ PxVec4* b = reinterpret_cast<PxVec4*>(c);
+ const LtbRow* rows = getLtbRows(m);
+ PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVector));
+
+ for(PxU32 i=m.linkCount;i-->1;)
+ {
+ PxU32 p = m.parent[i];
+ const LtbRow& r = rows[i];
+ b[i] -= PxVec4(Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j1), y[i]),0);
+ y[p] -= Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j0), b[i].getXYZ());
+ }
+
+ y[0] = Fns::multiply(rows[0].inertia,y[0]);
+
+ for(PxU32 i=1; i<m.linkCount; i++)
+ {
+ PxU32 p = m.parent[i];
+ const LtbRow& r = rows[i];
+ PxVec3 t = Fns::multiply(r.jResponse, b[i].getXYZ()) - Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j0), y[p]);
+ y[i] = Fns::multiply(r.inertia, y[i]) - Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j1), t);
+ }
+}
+
+
+#endif
+
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+void PxcLtbFactorScalar(FsData& m)
+{
+ typedef ArticulationFnsScalar Fns;
+ LtbRow* rows = getLtbRows(m);
+
+ SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
+ for(PxU32 i=0;i<m.linkCount;i++)
+ inertia[i] = ArticulationFnsDebug::unsimdify(rows[i].inertia);
+
+ Cm::SpatialVector j[3];
+ for(PxU32 i=m.linkCount; --i>0;)
+ {
+ LtbRow& b = rows[i];
+ inertia[i] = Fns::invertInertia(inertia[i]);
+ PxU32 p = m.parent[i];
+
+ Cm::SpatialVector* j0 = &reinterpret_cast<Cm::SpatialVector&>(*b.j0),
+ * j1 = &reinterpret_cast<Cm::SpatialVector&>(*b.j1);
+
+ Fns::multiply(j, inertia[i], j1);
+ PxMat33 jResponse = Fns::invertSym33(-Fns::multiplySym(j, j1));
+ j1[0] = j[0]; j1[1] = j[1]; j1[2] = j[2];
+
+ b.jResponse = Mat33V_From_PxMat33(jResponse);
+ Fns::multiply(j, j0, jResponse);
+ inertia[p] = Fns::multiplySubtract(inertia[p], j, j0);
+ j0[0] = j[0]; j0[1] = j[1]; j0[2] = j[2];
+ }
+
+ rows[0].inertia = Fns::invertInertia(inertia[0]);
+ for(PxU32 i=1;i<m.linkCount;i++)
+ rows[i].inertia = inertia[i];
+}
+
+void PxcFsPropagateDrivenInertiaScalar(FsData& matrix,
+ const FsInertia* baseInertia,
+ const PxReal* isf,
+ const Mat33V* load)
+{
+ typedef ArticulationFnsScalar Fns;
+
+ Cm::SpatialVector IS[3], DSI[3];
+ PxMat33 D;
+
+ FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]);
+
+ for(PxU32 i=matrix.linkCount; --i>0;)
+ {
+ FsRow& r = rows[i];
+ const FsRowAux& a = aux[i];
+ const FsJointVectors& jv = jointVectors[i];
+
+ Fns::multiply(IS, inertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*a.S));
+
+ PX_ALIGN(16, PxMat33) L;
+ PxMat33_From_Mat33V(load[i], L);
+ D = Fns::invertSym33(Fns::multiplySym(&reinterpret_cast<const Cm::SpatialVector&>(*a.S), IS) + L*isf[i]);
+
+ Fns::multiply(DSI, IS, D);
+
+ r.D = Mat33V_From_PxMat33(D);
+ reinterpret_cast<Cm::SpatialVector&>(r.DSI[0]) = DSI[0];
+ reinterpret_cast<Cm::SpatialVector&>(r.DSI[1]) = DSI[1];
+ reinterpret_cast<Cm::SpatialVector&>(r.DSI[2]) = DSI[2];
+
+ inertia[matrix.parent[i]] += Fns::translate(getParentOffset(jv), Fns::multiplySubtract(inertia[i], DSI, IS));
+ }
+
+ FsInertia& m = getRootInverseInertia(matrix);
+ m = FsInertia(Fns::invertInertia(inertia[0]));
+}
+
+// no need to compile this ecxcept for verification, and it consumes huge amounts of stack space
+void PxcFsComputeJointLoadsScalar(const FsData& matrix,
+ const FsInertia*PX_RESTRICT baseInertia,
+ Mat33V*PX_RESTRICT load,
+ const PxReal*PX_RESTRICT isf,
+ PxU32 linkCount,
+ PxU32 maxIterations)
+{
+ typedef ArticulationFnsScalar Fns;
+
+ // the childward S
+ SpInertia leafwardInertia[DY_ARTICULATION_MAX_SIZE];
+ SpInertia rootwardInertia[DY_ARTICULATION_MAX_SIZE];
+ SpInertia inertia[DY_ARTICULATION_MAX_SIZE];
+ SpInertia contribToParent[DY_ARTICULATION_MAX_SIZE];
+
+ // total articulated inertia assuming the articulation is rooted here
+
+ const FsRow* row = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_UNUSED(row);
+
+ PxMat33 load_[DY_ARTICULATION_MAX_SIZE];
+
+ for(PxU32 iter=0;iter<maxIterations;iter++)
+ {
+ for(PxU32 i=0;i<linkCount;i++)
+ inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]);
+
+ for(PxU32 i=linkCount;i-->1;)
+ {
+ const FsJointVectors& j = jointVectors[i];
+
+ leafwardInertia[i] = inertia[i];
+ contribToParent[i] = Fns::propagate(inertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]);
+ inertia[matrix.parent[i]] += Fns::translate((PxVec3&)j.parentOffset, contribToParent[i]);
+ }
+
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ rootwardInertia[i] = Fns::translate(-(PxVec3&)jointVectors[i].parentOffset, inertia[matrix.parent[i]]) - contribToParent[i];
+ inertia[i] += Fns::propagate(rootwardInertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]);
+ }
+
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ load_[i] = Fns::computeDriveInertia(leafwardInertia[i], rootwardInertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S));
+ PX_ASSERT(load_[i][0].isFinite() && load_[i][1].isFinite() && load_[2][i].isFinite());
+ }
+ }
+ for(PxU32 i=1;i<linkCount;i++)
+ load[i] = Mat33V_From_PxMat33(load_[i]);
+}
+#endif
+
+}
+
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.h
new file mode 100644
index 00000000..8d639de3
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.h
@@ -0,0 +1,101 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_ARTICULATION_SCALAR_H
+#define DY_ARTICULATION_SCALAR_H
+
+// Scalar helpers for articulations
+
+#include "foundation/PxUnionCast.h"
+#include "DyArticulationUtils.h"
+#include "DySpatial.h"
+#include "PsFPU.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+PX_FORCE_INLINE Cm::SpatialVector& velocityRef(FsData &m, PxU32 i)
+{
+ return reinterpret_cast<Cm::SpatialVector&>(getVelocity(m)[i]);
+}
+
+PX_FORCE_INLINE Cm::SpatialVector& deferredVelRef(FsData &m, PxU32 i)
+{
+ return reinterpret_cast<Cm::SpatialVector&>(getDeferredVel(m)[i]);
+}
+
+PX_FORCE_INLINE PxVec3& deferredSZRef(FsData &m, PxU32 i)
+{
+ return reinterpret_cast<PxVec3 &>(getDeferredSZ(m)[i]);
+}
+
+PX_FORCE_INLINE const PxVec3& deferredSZ(const FsData &s, PxU32 i)
+{
+ return reinterpret_cast<const PxVec3 &>(getDeferredSZ(s)[i]);
+}
+
+PX_FORCE_INLINE Cm::SpatialVector& deferredZRef(FsData &s)
+{
+ return unsimdRef(s.deferredZ);
+}
+
+
+PX_FORCE_INLINE const Cm::SpatialVector& deferredZ(const FsData &s)
+{
+ return unsimdRef(s.deferredZ);
+}
+
+PX_FORCE_INLINE const PxVec3& getJointOffset(const FsJointVectors& j)
+{
+ return reinterpret_cast<const PxVec3& >(j.jointOffset);
+}
+
+PX_FORCE_INLINE const PxVec3& getParentOffset(const FsJointVectors& j)
+{
+ return reinterpret_cast<const PxVec3&>(j.parentOffset);
+}
+
+
+
+
+PX_FORCE_INLINE const Cm::SpatialVector* getDSI(const FsRow& row)
+{
+ return PxUnionCast<const Cm::SpatialVector*,const Cm::SpatialVectorV*>(row.DSI); //reinterpret_cast<const Cm::SpatialVector*>(row.DSI);
+}
+
+}
+
+}
+
+#endif //DY_ARTICULATION_SCALAR_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationUtils.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationUtils.h
new file mode 100644
index 00000000..67c4270d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationUtils.h
@@ -0,0 +1,317 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_ARTICULATION_H
+#define DY_ARTICULATION_H
+
+#include "PsVecMath.h"
+#include "CmSpatialVector.h"
+#include "DySpatial.h"
+#include "PsBitUtils.h"
+#include "DyArticulation.h"
+#include "DyArticulationHelper.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+ struct ArticulationCore;
+ struct ArticulationLink;
+ typedef size_t ArticulationLinkHandle;
+ class Articulation;
+
+#define DY_ARTICULATION_DEBUG_VERIFY 0
+
+PX_FORCE_INLINE PxU32 ArticulationLowestSetBit(ArticulationBitField val)
+{
+ PxU32 low = PxU32(val&0xffffffff), high = PxU32(val>>32);
+ PxU32 mask = PxU32((!low)-1);
+ PxU32 result = (mask&Ps::lowestSetBitUnsafe(low)) | ((~mask)&(Ps::lowestSetBitUnsafe(high)+32));
+ PX_ASSERT(val & (PxU64(1)<<result));
+ PX_ASSERT(!(val & ((PxU64(1)<<result)-1)));
+ return result;
+}
+
+using namespace Ps::aos;
+
+
+
+PX_FORCE_INLINE Cm::SpatialVector& unsimdRef(Cm::SpatialVectorV& v) { return reinterpret_cast<Cm::SpatialVector&>(v); }
+PX_FORCE_INLINE const Cm::SpatialVector& unsimdRef(const Cm::SpatialVectorV& v) { return reinterpret_cast<const Cm::SpatialVector&>(v); }
+
+
+PX_ALIGN_PREFIX(16)
+struct FsJointVectors
+{
+ Vec3V parentOffset; // 16 bytes world-space offset from parent to child
+ Vec3V jointOffset; // 16 bytes world-space offset from child to joint
+}
+PX_ALIGN_SUFFIX(16);
+
+PX_ALIGN_PREFIX(16)
+struct FsRow
+{
+ Cm::SpatialVectorV DSI[3]; // 96 bytes
+ Mat33V D; // 48 bytes
+ ArticulationBitField children; // 8 bytes bitmap of children
+ ArticulationBitField pathToRoot; // 8 bytes bitmap of nodes to root, including self and root
+}
+PX_ALIGN_SUFFIX(16);
+
+PX_COMPILE_TIME_ASSERT(sizeof(FsRow)==160);
+
+
+
+PX_ALIGN_PREFIX(16)
+struct FsInertia
+{
+ Mat33V ll, la, aa;
+ PX_FORCE_INLINE FsInertia(const Mat33V& _ll, const Mat33V& _la, const Mat33V& _aa): ll(_ll), la(_la), aa(_aa) {}
+ PX_FORCE_INLINE FsInertia(const SpInertia& I)
+ : ll(Mat33V_From_PxMat33(I.mLL)), la(Mat33V_From_PxMat33(I.mLA)), aa(Mat33V_From_PxMat33(I.mAA)) {}
+ PX_FORCE_INLINE FsInertia() {}
+
+ PX_FORCE_INLINE void operator=(const FsInertia& other)
+ {
+ ll.col0 = other.ll.col0; ll.col1 = other.ll.col1; ll.col2 = other.ll.col2;
+ la.col0 = other.la.col0; la.col1 = other.la.col1; la.col2 = other.la.col2;
+ aa.col0 = other.aa.col0; aa.col1 = other.aa.col1; aa.col2 = other.aa.col2;
+ }
+
+ PX_FORCE_INLINE FsInertia(const FsInertia& other)
+ {
+ ll.col0 = other.ll.col0; ll.col1 = other.ll.col1; ll.col2 = other.ll.col2;
+ la.col0 = other.la.col0; la.col1 = other.la.col1; la.col2 = other.la.col2;
+ aa.col0 = other.aa.col0; aa.col1 = other.aa.col1; aa.col2 = other.aa.col2;
+ }
+
+}PX_ALIGN_SUFFIX(16);
+
+PX_ALIGN_PREFIX(16)
+struct LtbRow
+{
+ FsInertia inertia; // body inertia in world space
+ Cm::SpatialVectorV j0[3], j1[3]; // jacobians
+ Mat33V jResponse; // inverse response matrix of joint
+ Vec3V jC;
+} PX_ALIGN_SUFFIX(16);
+
+PX_ALIGN_PREFIX(16)
+struct FsRowAux
+{
+ Cm::SpatialVectorV S[3]; // motion subspace
+}PX_ALIGN_SUFFIX(16);
+
+
+struct FsData
+{
+ Articulation* articulationX; //4
+
+#if !PX_P64_FAMILY
+ PxU32 pad0; //8
+#endif
+ PxU16 linkCount; // number of links //10
+ PxU16 jointVectorOffset; // offset of read-only data //12
+ PxU16 maxSolverNormalProgress; //14
+ PxU16 maxSolverFrictionProgress; //16
+
+ PxU64 dirty; //24
+ PxU16 ltbDataOffset; // offset of save-velocity data //26
+ PxU16 fsDataOffset; // offset of joint references //28
+ PxU32 solverProgress; //32
+
+
+ Cm::SpatialVectorV deferredZ; //64
+ PxU8 parent[DY_ARTICULATION_MAX_SIZE]; //128
+};
+
+PX_COMPILE_TIME_ASSERT(0 == (sizeof(FsData) & 0x0f));
+
+#define SOLVER_BODY_SOLVER_PROGRESS_OFFSET 28
+#define SOLVER_BODY_MAX_SOLVER_PROGRESS_OFFSET 12
+
+namespace
+{
+ template<class T> PX_FORCE_INLINE T addAddr(void* addr, PxU32 increment)
+ {
+ return reinterpret_cast<T>(reinterpret_cast<char*>(addr)+increment);
+ }
+
+ template<class T> PX_FORCE_INLINE T addAddr(const void* addr, PxU32 increment)
+ {
+ return reinterpret_cast<T>(reinterpret_cast<const char*>(addr)+increment);
+ }
+}
+
+PX_FORCE_INLINE Cm::SpatialVectorV* getVelocity(FsData& matrix)
+{
+ return addAddr<Cm::SpatialVectorV*>(&matrix, sizeof(FsData));
+}
+
+
+
+
+PX_FORCE_INLINE const Cm::SpatialVectorV* getVelocity(const FsData& matrix)
+{
+ return addAddr<const Cm::SpatialVectorV*>(&matrix, sizeof(FsData));
+}
+
+PX_FORCE_INLINE Cm::SpatialVectorV* getDeferredVel(FsData& matrix)
+{
+ return addAddr<Cm::SpatialVectorV*>(getVelocity(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
+}
+
+PX_FORCE_INLINE const Cm::SpatialVectorV* getDeferredVel(const FsData& matrix)
+{
+ return addAddr<const Cm::SpatialVectorV*>(getVelocity(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
+}
+
+PX_FORCE_INLINE Vec3V* getDeferredSZ(FsData& matrix)
+{
+ return addAddr<Vec3V*>(getDeferredVel(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
+}
+
+PX_FORCE_INLINE const Vec3V* getDeferredSZ(const FsData& matrix)
+{
+ return addAddr<const Vec3V*>(getDeferredVel(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount);
+}
+
+PX_FORCE_INLINE const PxReal* getMaxPenBias(const FsData& matrix)
+{
+ return addAddr<const PxReal*>(getDeferredSZ(matrix), sizeof(Vec3V) * matrix.linkCount);
+}
+
+PX_FORCE_INLINE PxReal* getMaxPenBias(FsData& matrix)
+{
+ return addAddr<PxReal*>(getDeferredSZ(matrix), sizeof(Vec3V) * matrix.linkCount);
+}
+
+
+PX_FORCE_INLINE FsJointVectors* getJointVectors(FsData& matrix)
+{
+ return addAddr<FsJointVectors *>(&matrix,matrix.jointVectorOffset);
+}
+
+PX_FORCE_INLINE const FsJointVectors* getJointVectors(const FsData& matrix)
+{
+ return addAddr<const FsJointVectors *>(&matrix,matrix.jointVectorOffset);
+}
+
+PX_FORCE_INLINE FsInertia& getRootInverseInertia(FsData& matrix)
+{
+ return *addAddr<FsInertia*>(&matrix,matrix.fsDataOffset);
+}
+
+PX_FORCE_INLINE const FsInertia& getRootInverseInertia(const FsData& matrix)
+{
+ return *addAddr<const FsInertia*>(&matrix,matrix.fsDataOffset);
+
+}
+
+PX_FORCE_INLINE FsRow* getFsRows(FsData& matrix)
+{
+ return addAddr<FsRow*>(&getRootInverseInertia(matrix),sizeof(FsInertia));
+}
+
+PX_FORCE_INLINE const FsRow* getFsRows(const FsData& matrix)
+{
+ return addAddr<const FsRow*>(&getRootInverseInertia(matrix),sizeof(FsInertia));
+}
+
+
+PX_FORCE_INLINE LtbRow* getLtbRows(FsData& matrix)
+{
+ return addAddr<LtbRow*>(&matrix,matrix.ltbDataOffset);
+}
+
+PX_FORCE_INLINE const LtbRow* getLtbRows(const FsData& matrix)
+{
+ return addAddr<const LtbRow*>(&matrix,matrix.ltbDataOffset);
+}
+
+
+PX_FORCE_INLINE Cm::SpatialVectorV* getRefVelocity(FsData& matrix)
+{
+ return addAddr<Cm::SpatialVectorV*>(getLtbRows(matrix), sizeof(LtbRow)*matrix.linkCount);
+}
+
+PX_FORCE_INLINE const Cm::SpatialVectorV* getRefVelocity(const FsData& matrix)
+{
+ return addAddr<const Cm::SpatialVectorV*>(getLtbRows(matrix), sizeof(LtbRow)*matrix.linkCount);
+}
+
+PX_FORCE_INLINE FsRowAux* getAux(FsData& matrix)
+{
+ return addAddr<FsRowAux*>(getRefVelocity(matrix),sizeof(Cm::SpatialVectorV)*matrix.linkCount);
+}
+
+PX_FORCE_INLINE const FsRowAux* getAux(const FsData& matrix)
+{
+ return addAddr<const FsRowAux*>(getRefVelocity(matrix),sizeof(Cm::SpatialVectorV)*matrix.linkCount);
+}
+
+void PxcFsApplyImpulse(FsData& matrix,
+ PxU32 linkID,
+ Vec3V linear,
+ Vec3V angular);
+
+Cm::SpatialVectorV PxcFsGetVelocity(FsData& matrix,
+ PxU32 linkID);
+
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+namespace ArticulationRef
+{
+ Cm::SpatialVector propagateVelocity(const FsRow& row,
+ const FsJointVectors& jv,
+ const PxVec3& SZ,
+ const Cm::SpatialVector& v,
+ const FsRowAux& aux);
+
+ Cm::SpatialVector propagateImpulse(const FsRow& row,
+ const FsJointVectors& jv,
+ PxVec3& SZ,
+ const Cm::SpatialVector& Z,
+ const FsRowAux& aux);
+
+ void applyImpulse(const FsData& matrix,
+ Cm::SpatialVector* velocity,
+ PxU32 linkID,
+ const Cm::SpatialVector& impulse);
+
+}
+#endif
+
+}
+}
+
+#endif //DY_ARTICULATION_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyBodyCoreIntegrator.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyBodyCoreIntegrator.h
new file mode 100644
index 00000000..3e842341
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyBodyCoreIntegrator.h
@@ -0,0 +1,405 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_BODYCORE_INTEGRATOR_H
+#define DY_BODYCORE_INTEGRATOR_H
+
+#include "CmPhysXCommon.h"
+#include "PxvDynamics.h"
+#include "PsMathUtils.h"
+#include "PxsRigidBody.h"
+#include "DySolverBody.h"
+#include "DySleepingConfigulation.h"
+#include "PxsIslandSim.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+PX_FORCE_INLINE void bodyCoreComputeUnconstrainedVelocity
+(const PxVec3& gravity, const PxReal dt, const PxReal linearDamping, const PxReal angularDamping, const PxReal accelScale,
+const PxReal maxLinearVelocitySq, const PxReal maxAngularVelocitySq, PxVec3& inOutLinearVelocity, PxVec3& inOutAngularVelocity,
+bool disableGravity)
+{
+
+ //Multiply everything that needs multiplied by dt to improve code generation.
+
+ PxVec3 linearVelocity = inOutLinearVelocity;
+ PxVec3 angularVelocity = inOutAngularVelocity;
+
+ const PxReal linearDampingTimesDT=linearDamping*dt;
+ const PxReal angularDampingTimesDT=angularDamping*dt;
+ const PxReal oneMinusLinearDampingTimesDT=1.0f-linearDampingTimesDT;
+ const PxReal oneMinusAngularDampingTimesDT=1.0f-angularDampingTimesDT;
+
+ //TODO context-global gravity
+ if (!disableGravity)
+ {
+ const PxVec3 linearAccelTimesDT = gravity*dt *accelScale;
+ linearVelocity += linearAccelTimesDT;
+ }
+
+ //Apply damping.
+ const PxReal linVelMultiplier = physx::intrinsics::fsel(oneMinusLinearDampingTimesDT, oneMinusLinearDampingTimesDT, 0.0f);
+ const PxReal angVelMultiplier = physx::intrinsics::fsel(oneMinusAngularDampingTimesDT, oneMinusAngularDampingTimesDT, 0.0f);
+ linearVelocity*=linVelMultiplier;
+ angularVelocity*=angVelMultiplier;
+
+ // Clamp velocity
+ const PxReal linVelSq = linearVelocity.magnitudeSquared();
+ if(linVelSq > maxLinearVelocitySq)
+ {
+ linearVelocity *= PxSqrt(maxLinearVelocitySq / linVelSq);
+ }
+ const PxReal angVelSq = angularVelocity.magnitudeSquared();
+ if(angVelSq > maxAngularVelocitySq)
+ {
+ angularVelocity *= PxSqrt(maxAngularVelocitySq / angVelSq);
+ }
+
+ inOutLinearVelocity = linearVelocity;
+ inOutAngularVelocity = angularVelocity;
+}
+
+
+PX_FORCE_INLINE void integrateCore(PxVec3& motionLinearVelocity, PxVec3& motionAngularVelocity, PxSolverBody& solverBody, PxSolverBodyData& solverBodyData, const PxF32 dt)
+{
+ PxU32 lockFlags = solverBodyData.lockFlags;
+ if (lockFlags)
+ {
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_X)
+ {
+ motionLinearVelocity.x = 0.f;
+ solverBody.linearVelocity.x = 0.f;
+ }
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Y)
+ {
+ motionLinearVelocity.y = 0.f;
+ solverBody.linearVelocity.y = 0.f;
+ }
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Z)
+ {
+ motionLinearVelocity.z = 0.f;
+ solverBody.linearVelocity.z = 0.f;
+ }
+
+ //The angular velocity should be 0 because it is now impossible to make it rotate around that axis!
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_X)
+ {
+ motionAngularVelocity.x = 0.f;
+ solverBody.angularState.x = 0.f;
+ }
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Y)
+ {
+ motionAngularVelocity.y = 0.f;
+ solverBody.angularState.y = 0.f;
+ }
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Z)
+ {
+ motionAngularVelocity.z = 0.f;
+ solverBody.angularState.z = 0.f;
+ }
+ }
+
+ // Integrate linear part
+ PxVec3 linearMotionVel = solverBodyData.linearVelocity + motionLinearVelocity;
+ PxVec3 delta = linearMotionVel * dt;
+ PxVec3 angularMotionVel = solverBodyData.angularVelocity + solverBodyData.sqrtInvInertia * motionAngularVelocity;
+ PxReal w = angularMotionVel.magnitudeSquared();
+ solverBodyData.body2World.p += delta;
+ PX_ASSERT(solverBodyData.body2World.p.isFinite());
+
+ //Store back the linear and angular velocities
+ //core.linearVelocity += solverBody.linearVelocity * solverBodyData.sqrtInvMass;
+ solverBodyData.linearVelocity += solverBody.linearVelocity;
+ solverBodyData.angularVelocity += solverBodyData.sqrtInvInertia * solverBody.angularState;
+
+ // Integrate the rotation using closed form quaternion integrator
+ if (w != 0.0f)
+ {
+ w = PxSqrt(w);
+ // Perform a post-solver clamping
+ // TODO(dsequeira): ignore this for the moment
+ //just clamp motionVel to half float-range
+ const PxReal maxW = 1e+7f; //Should be about sqrt(PX_MAX_REAL/2) or smaller
+ if (w > maxW)
+ {
+ angularMotionVel = angularMotionVel.getNormalized() * maxW;
+ w = maxW;
+ }
+ const PxReal v = dt * w * 0.5f;
+ PxReal s, q;
+ Ps::sincos(v, s, q);
+ s /= w;
+
+ const PxVec3 pqr = angularMotionVel * s;
+ const PxQuat quatVel(pqr.x, pqr.y, pqr.z, 0);
+ PxQuat result = quatVel * solverBodyData.body2World.q;
+
+ result += solverBodyData.body2World.q * q;
+
+ solverBodyData.body2World.q = result.getNormalized();
+ PX_ASSERT(solverBodyData.body2World.q.isSane());
+ PX_ASSERT(solverBodyData.body2World.q.isFinite());
+ }
+
+ motionLinearVelocity = linearMotionVel;
+ motionAngularVelocity = angularMotionVel;
+}
+
+
+PX_FORCE_INLINE PxReal updateWakeCounter(PxsRigidBody* originalBody, PxReal dt, PxReal /*invDt*/, const bool enableStabilization, const bool useAdaptiveForce, Cm::SpatialVector& motionVelocity,
+ bool hasStaticTouch)
+{
+ //KS - at most one of these features can be enabled at any time
+ PX_ASSERT(!useAdaptiveForce || !enableStabilization);
+ PxsBodyCore& bodyCore = originalBody->getCore();
+
+ // update the body's sleep state and
+ PxReal wakeCounterResetTime = 20.0f*0.02f;
+
+ PxReal wc = bodyCore.wakeCounter;
+
+ {
+ if (enableStabilization)
+ {
+ bool freeze = false;
+ const PxTransform& body2World = bodyCore.body2World;
+
+ // calculate normalized energy: kinetic energy divided by mass
+
+ const PxVec3 t = bodyCore.inverseInertia;
+ const PxVec3 inertia(t.x > 0.f ? 1.0f / t.x : 1.f, t.y > 0.f ? 1.0f / t.y : 1.f, t.z > 0.f ? 1.0f / t.z : 1.f);
+
+
+ PxVec3 sleepLinVelAcc = motionVelocity.linear;
+ PxVec3 sleepAngVelAcc = body2World.q.rotateInv(motionVelocity.angular);
+
+ // scale threshold by cluster factor (more contacts => higher sleep threshold)
+ //const PxReal clusterFactor = PxReal(1u + getNumUniqueInteractions());
+
+ PxReal invMass = bodyCore.inverseMass;
+ if (invMass == 0.f)
+ invMass = 1.f;
+
+ const PxReal angular = sleepAngVelAcc.multiply(sleepAngVelAcc).dot(inertia) * invMass;
+ const PxReal linear = sleepLinVelAcc.magnitudeSquared();
+ PxReal frameNormalizedEnergy = 0.5f * (angular + linear);
+
+ const PxReal cf = hasStaticTouch ? PxReal(PxMin(10u, bodyCore.numBodyInteractions)) : 0.f;
+ const PxReal freezeThresh = cf*bodyCore.freezeThreshold;
+
+ originalBody->freezeCount = PxMax(originalBody->freezeCount - dt, 0.0f);
+ bool settled = true;
+
+ PxReal accelScale = PxMin(1.f, originalBody->accelScale + dt);
+
+ if (!hasStaticTouch)
+ accelScale = 1.f;
+
+ if (frameNormalizedEnergy >= freezeThresh)
+ {
+ settled = false;
+ originalBody->freezeCount = PXD_FREEZE_INTERVAL;
+ }
+
+ if (settled)
+ {
+ //Dampen bodies that are just about to go to sleep
+ if (cf > 1.f)
+ {
+ const PxReal sleepDamping = PXD_SLEEP_DAMPING;
+ const PxReal sleepDampingTimesDT = sleepDamping*dt;
+ const PxReal d = 1.0f - sleepDampingTimesDT;
+ bodyCore.linearVelocity = bodyCore.linearVelocity * d;
+ bodyCore.angularVelocity = bodyCore.angularVelocity * d;
+ accelScale = PXD_FREEZE_SCALE;
+ }
+ freeze = originalBody->freezeCount == 0.f && frameNormalizedEnergy < (bodyCore.freezeThreshold * PXD_FREEZE_TOLERANCE);
+ }
+
+ originalBody->accelScale = accelScale;
+
+ if (freeze)
+ {
+ //current flag isn't frozen but freeze flag raise so we need to raise the frozen flag in this frame
+ bool wasNotFrozen = (originalBody->mInternalFlags & PxsRigidBody::eFROZEN) == 0;
+ PxU16 flags = PxU16((originalBody->mInternalFlags & PxsRigidBody::eDISABLE_GRAVITY) | PxsRigidBody::eFROZEN);
+ if (wasNotFrozen)
+ {
+ flags |= PxsRigidBody::eFREEZE_THIS_FRAME;
+ }
+ originalBody->mInternalFlags = flags;
+ bodyCore.body2World = originalBody->getLastCCDTransform();
+ }
+ else
+ {
+ PxU16 flags = PxU16(originalBody->mInternalFlags & PxsRigidBody::eDISABLE_GRAVITY);
+ bool wasFrozen = (originalBody->mInternalFlags & PxsRigidBody::eFROZEN) != 0;
+ if (wasFrozen)
+ {
+ flags |= PxsRigidBody::eUNFREEZE_THIS_FRAME;
+ }
+ originalBody->mInternalFlags = flags;
+ }
+
+ /*KS: New algorithm for sleeping when using stabilization:
+ * Energy *this frame* must be higher than sleep threshold and accumulated energy over previous frames
+ * must be higher than clusterFactor*energyThreshold.
+ */
+ if (wc < wakeCounterResetTime * 0.5f || wc < dt)
+ {
+ //Accumulate energy
+ originalBody->sleepLinVelAcc += sleepLinVelAcc;
+ originalBody->sleepAngVelAcc += sleepAngVelAcc;
+
+ //If energy this frame is high
+ if (frameNormalizedEnergy >= bodyCore.sleepThreshold)
+ {
+ //Compute energy over sleep preparation time
+ const PxReal sleepAngular = originalBody->sleepAngVelAcc.multiply(originalBody->sleepAngVelAcc).dot(inertia) * invMass;
+ const PxReal sleepLinear = originalBody->sleepLinVelAcc.magnitudeSquared();
+ PxReal normalizedEnergy = 0.5f * (sleepAngular + sleepLinear);
+ const PxReal sleepClusterFactor = PxReal(1u + bodyCore.numCountedInteractions);
+ // scale threshold by cluster factor (more contacts => higher sleep threshold)
+ const PxReal threshold = sleepClusterFactor*bodyCore.sleepThreshold;
+
+ //If energy over sleep preparation time is high
+ if (normalizedEnergy >= threshold)
+ {
+ //Wake up
+ //PX_ASSERT(isActive());
+ originalBody->sleepAngVelAcc = PxVec3(0);
+ originalBody->sleepLinVelAcc = PxVec3(0);
+
+ const float factor = bodyCore.sleepThreshold == 0.f ? 2.0f : PxMin(normalizedEnergy / threshold, 2.0f);
+ PxReal oldWc = wc;
+ wc = factor * 0.5f * wakeCounterResetTime + dt * (sleepClusterFactor - 1.0f);
+ bodyCore.solverWakeCounter = wc;
+ //if (oldWc == 0.0f) // for the case where a sleeping body got activated by the system (not the user) AND got processed by the solver as well
+ // notifyNotReadyForSleeping(bodyCore.nodeIndex);
+
+ if (oldWc == 0.0f)
+ originalBody->mInternalFlags |= PxsRigidBody::eACTIVATE_THIS_FRAME;
+
+ return wc;
+ }
+ }
+ }
+
+ }
+ else
+ {
+ if (useAdaptiveForce)
+ {
+ if (hasStaticTouch && bodyCore.numBodyInteractions > 1)
+ originalBody->accelScale = 1.f / PxReal(bodyCore.numBodyInteractions);
+ else
+ originalBody->accelScale = 1.f;
+ }
+ if (wc < wakeCounterResetTime * 0.5f || wc < dt)
+ {
+ const PxTransform& body2World = bodyCore.body2World;
+
+ // calculate normalized energy: kinetic energy divided by mass
+ const PxVec3 t = bodyCore.inverseInertia;
+ const PxVec3 inertia(t.x > 0.f ? 1.0f / t.x : 1.f, t.y > 0.f ? 1.0f / t.y : 1.f, t.z > 0.f ? 1.0f / t.z : 1.f);
+
+ PxVec3 sleepLinVelAcc = motionVelocity.linear;
+ PxVec3 sleepAngVelAcc = body2World.q.rotateInv(motionVelocity.angular);
+
+ originalBody->sleepLinVelAcc += sleepLinVelAcc;
+ originalBody->sleepAngVelAcc += sleepAngVelAcc;
+
+ PxReal invMass = bodyCore.inverseMass;
+ if (invMass == 0.f)
+ invMass = 1.f;
+
+ const PxReal angular = originalBody->sleepAngVelAcc.multiply(originalBody->sleepAngVelAcc).dot(inertia) * invMass;
+ const PxReal linear = originalBody->sleepLinVelAcc.magnitudeSquared();
+ PxReal normalizedEnergy = 0.5f * (angular + linear);
+
+ // scale threshold by cluster factor (more contacts => higher sleep threshold)
+ const PxReal clusterFactor = PxReal(1 + bodyCore.numCountedInteractions);
+ const PxReal threshold = clusterFactor*bodyCore.sleepThreshold;
+
+ if (normalizedEnergy >= threshold)
+ {
+ //PX_ASSERT(isActive());
+ originalBody->sleepLinVelAcc = PxVec3(0);
+ originalBody->sleepAngVelAcc = PxVec3(0);
+ const float factor = threshold == 0.f ? 2.0f : PxMin(normalizedEnergy / threshold, 2.0f);
+ PxReal oldWc = wc;
+ wc = factor * 0.5f * wakeCounterResetTime + dt * (clusterFactor - 1.0f);
+ bodyCore.solverWakeCounter = wc;
+ PxU16 flags = PxU16(originalBody->mInternalFlags & PxsRigidBody::eDISABLE_GRAVITY);
+ if (oldWc == 0.0f) // for the case where a sleeping body got activated by the system (not the user) AND got processed by the solver as well
+ {
+ flags |= PxsRigidBody::eACTIVATE_THIS_FRAME;
+ //notifyNotReadyForSleeping(bodyCore.nodeIndex);
+ }
+
+ originalBody->mInternalFlags = flags;
+
+ return wc;
+ }
+ }
+ }
+ }
+
+ wc = PxMax(wc - dt, 0.0f);
+ bodyCore.solverWakeCounter = wc;
+ return wc;
+}
+
+PX_FORCE_INLINE void sleepCheck(PxsRigidBody* originalBody, const PxReal dt, const PxReal intDt, const bool enableStabilization, bool useAdaptiveForce, Cm::SpatialVector& motionVelocity,
+ bool hasStaticTouch)
+{
+
+ PxReal wc = updateWakeCounter(originalBody, dt, intDt, enableStabilization, useAdaptiveForce, motionVelocity, hasStaticTouch);
+ bool wakeCounterZero = (wc == 0.0f);
+
+ if (wakeCounterZero)
+ {
+ //PxsBodyCore& bodyCore = originalBody->getCore();
+ originalBody->mInternalFlags |= PxsRigidBody::eDEACTIVATE_THIS_FRAME;
+ // notifyReadyForSleeping(bodyCore.nodeIndex);
+ originalBody->sleepLinVelAcc = PxVec3(0);
+ originalBody->sleepAngVelAcc = PxVec3(0);
+ }
+}
+
+}
+
+}
+
+#endif //DY_BODYCORE_INTEGRATOR_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.cpp
new file mode 100644
index 00000000..03751640
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.cpp
@@ -0,0 +1,712 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+
+#include "DyConstraintPartition.h"
+#include "DyArticulationUtils.h"
+
+#define INTERLEAVE_SELF_CONSTRAINTS 1
+
+
+namespace physx
+{
+namespace Dy
+{
+
+namespace
+{
+
+PX_FORCE_INLINE PxU32 getArticulationIndex(const uintptr_t eaFsData, const uintptr_t* eas, const PxU32 numEas)
+{
+ PxU32 index=0xffffffff;
+ for(PxU32 i=0;i<numEas;i++)
+ {
+ if(eas[i]==eaFsData)
+ {
+ index=i;
+ break;
+ }
+ }
+ PX_ASSERT(index!=0xffffffff);
+ return index;
+}
+
+
+#define MAX_NUM_PARTITIONS 32
+
+static PxU32 bitTable[32] =
+{
+ 1u<<0, 1u<<1, 1u<<2, 1u<<3, 1u<<4, 1u<<5, 1u<<6, 1u<<7, 1u<<8, 1u<<9, 1u<<10, 1u<<11, 1u<<12, 1u<<13, 1u<<14, 1u<<15, 1u<<16, 1u<<17,
+ 1u<<18, 1u<<19, 1u<<20, 1u<<21, 1u<<22, 1u<<23, 1u<<24, 1u<<25, 1u<<26, 1u<<27, 1u<<28, 1u<<29, 1u<<30, 1u<<31
+};
+
+PxU32 getBit(const PxU32 index)
+{
+ PX_ASSERT(index < 32);
+ return bitTable[index];
+}
+
+
+class RigidBodyClassification
+{
+ PxSolverBody* PX_RESTRICT mBodies;
+ PxU32 mNumBodies;
+
+public:
+ RigidBodyClassification(PxSolverBody* PX_RESTRICT bodies, PxU32 numBodies) : mBodies(bodies), mNumBodies(numBodies)
+ {
+ }
+
+ //Returns true if it is a dynamic-dynamic constriant; false if it is a dynamic-static or dynamic-kinematic constraint
+ PX_FORCE_INLINE bool classifyConstraint(const PxSolverConstraintDesc& desc, uintptr_t& indexA, uintptr_t& indexB, bool& activeA, bool& activeB) const
+ {
+ indexA=uintptr_t(desc.bodyA - mBodies);
+ indexB=uintptr_t(desc.bodyB - mBodies);
+ activeA = indexA < mNumBodies;
+ activeB = indexB < mNumBodies;
+ return activeA && activeB;
+ }
+
+ PX_FORCE_INLINE void clearState()
+ {
+ for(PxU32 a = 0; a < mNumBodies; ++a)
+ mBodies[a].solverProgress = 0;
+ }
+
+ PX_FORCE_INLINE void reserveSpaceForStaticConstraints(Ps::Array<PxU32>& numConstraintsPerPartition)
+ {
+ for(PxU32 a = 0; a < mNumBodies; ++a)
+ {
+ mBodies[a].solverProgress = 0;
+
+ PxU32 requiredSize = PxU32(mBodies[a].maxSolverNormalProgress + mBodies[a].maxSolverFrictionProgress);
+ if(requiredSize > numConstraintsPerPartition.size())
+ {
+ numConstraintsPerPartition.resize(requiredSize);
+ }
+
+ for(PxU32 b = 0; b < mBodies[a].maxSolverFrictionProgress; ++b)
+ {
+ numConstraintsPerPartition[mBodies[a].maxSolverNormalProgress + b]++;
+ }
+ }
+ }
+};
+
+class ExtendedRigidBodyClassification
+{
+
+ PxSolverBody* PX_RESTRICT mBodies;
+ PxU32 mNumBodies;
+ uintptr_t* PX_RESTRICT mFsDatas;
+ PxU32 mNumArticulations;
+
+public:
+
+ ExtendedRigidBodyClassification(PxSolverBody* PX_RESTRICT bodies, PxU32 numBodies, uintptr_t* PX_RESTRICT fsDatas, PxU32 numArticulations)
+ : mBodies(bodies), mNumBodies(numBodies), mFsDatas(fsDatas), mNumArticulations(numArticulations)
+ {
+ }
+
+ //Returns true if it is a dynamic-dynamic constriant; false if it is a dynamic-static or dynamic-kinematic constraint
+ PX_FORCE_INLINE bool classifyConstraint(const PxSolverConstraintDesc& desc, uintptr_t& indexA, uintptr_t& indexB, bool& activeA, bool& activeB) const
+ {
+ if(PxSolverConstraintDesc::NO_LINK == desc.linkIndexA)
+ {
+ indexA=uintptr_t(desc.bodyA - mBodies);
+ activeA = indexA < mNumBodies;
+ }
+ else
+ {
+ indexA=mNumBodies+getArticulationIndex(uintptr_t(desc.articulationA),mFsDatas,mNumArticulations);
+ activeA = true;
+ }
+ if(PxSolverConstraintDesc::NO_LINK == desc.linkIndexB)
+ {
+ indexB=uintptr_t(desc.bodyB - mBodies);
+ activeB = indexB < mNumBodies;
+ }
+ else
+ {
+ indexB=mNumBodies+getArticulationIndex(uintptr_t(desc.articulationB),mFsDatas,mNumArticulations);
+ activeB = true;
+ }
+ return activeA && activeB;
+ }
+
+ PX_FORCE_INLINE void clearState()
+ {
+ for(PxU32 a = 0; a < mNumBodies; ++a)
+ mBodies[a].solverProgress = 0;
+
+ for(PxU32 a = 0; a < mNumArticulations; ++a)
+ (reinterpret_cast<FsData*>(mFsDatas[a]))->solverProgress = 0;
+ }
+
+ PX_FORCE_INLINE void reserveSpaceForStaticConstraints(Ps::Array<PxU32>& numConstraintsPerPartition)
+ {
+ for(PxU32 a = 0; a < mNumBodies; ++a)
+ {
+ mBodies[a].solverProgress = 0;
+
+ PxU32 requiredSize = PxU32(mBodies[a].maxSolverNormalProgress + mBodies[a].maxSolverFrictionProgress);
+ if(requiredSize > numConstraintsPerPartition.size())
+ {
+ numConstraintsPerPartition.resize(requiredSize);
+ }
+
+ for(PxU32 b = 0; b < mBodies[a].maxSolverFrictionProgress; ++b)
+ {
+ numConstraintsPerPartition[mBodies[a].maxSolverNormalProgress + b]++;
+ }
+ }
+
+ for(PxU32 a = 0; a < mNumArticulations; ++a)
+ {
+ FsData* data = reinterpret_cast<FsData*>(mFsDatas[a]);
+ data->solverProgress = 0;
+
+ PxU32 requiredSize = PxU32(data->maxSolverNormalProgress + data->maxSolverFrictionProgress);
+ if(requiredSize > numConstraintsPerPartition.size())
+ {
+ numConstraintsPerPartition.resize(requiredSize);
+ }
+
+ for(PxU32 b = 0; b < data->maxSolverFrictionProgress; ++b)
+ {
+ numConstraintsPerPartition[data->maxSolverNormalProgress + b]++;
+ }
+ }
+ }
+
+};
+
+template <typename Classification>
+void classifyConstraintDesc(const PxSolverConstraintDesc* PX_RESTRICT descs, const PxU32 numConstraints, Classification& classification,
+ Ps::Array<PxU32>& numConstraintsPerPartition, PxSolverConstraintDesc* PX_RESTRICT eaTempConstraintDescriptors)
+{
+ const PxSolverConstraintDesc* _desc = descs;
+ const PxU32 numConstraintsMin1 = numConstraints - 1;
+
+ PxU32 numUnpartitionedConstraints = 0;
+
+ numConstraintsPerPartition.forceSize_Unsafe(32);
+
+ PxMemZero(numConstraintsPerPartition.begin(), sizeof(PxU32) * 32);
+
+ for(PxU32 i = 0; i < numConstraints; ++i, _desc++)
+ {
+ const PxU32 prefetchOffset = PxMin(numConstraintsMin1 - i, 4u);
+ Ps::prefetchLine(_desc[prefetchOffset].constraint);
+ Ps::prefetchLine(_desc[prefetchOffset].bodyA);
+ Ps::prefetchLine(_desc[prefetchOffset].bodyB);
+ Ps::prefetchLine(_desc + 8);
+
+ uintptr_t indexA, indexB;
+ bool activeA, activeB;
+
+ const bool notContainsStatic = classification.classifyConstraint(*_desc, indexA, indexB, activeA, activeB);
+
+ if(notContainsStatic)
+ {
+ PxU32 partitionsA=_desc->bodyA->solverProgress;
+ PxU32 partitionsB=_desc->bodyB->solverProgress;
+
+ PxU32 availablePartition;
+ {
+ const PxU32 combinedMask = (~partitionsA & ~partitionsB);
+ availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask);
+ if(availablePartition == MAX_NUM_PARTITIONS)
+ {
+ eaTempConstraintDescriptors[numUnpartitionedConstraints++] = *_desc;
+ continue;
+ }
+
+ const PxU32 partitionBit = getBit(availablePartition);
+ partitionsA |= partitionBit;
+ partitionsB |= partitionBit;
+ }
+
+ _desc->bodyA->solverProgress = partitionsA;
+ _desc->bodyB->solverProgress = partitionsB;
+ numConstraintsPerPartition[availablePartition]++;
+ availablePartition++;
+ _desc->bodyA->maxSolverNormalProgress = PxMax(_desc->bodyA->maxSolverNormalProgress, PxU16(availablePartition));
+ _desc->bodyB->maxSolverNormalProgress = PxMax(_desc->bodyB->maxSolverNormalProgress, PxU16(availablePartition));
+
+
+ }
+ else
+ {
+ //Just count the number of static constraints and store in maxSolverFrictionProgress...
+ if(activeA)
+ _desc->bodyA->maxSolverFrictionProgress++;
+ else if(activeB)
+ _desc->bodyB->maxSolverFrictionProgress++;
+ }
+ }
+
+ PxU32 partitionStartIndex = 0;
+
+ while(numUnpartitionedConstraints > 0)
+ {
+ classification.clearState();
+
+ partitionStartIndex += 32;
+ //Keep partitioning the un-partitioned constraints and blat the whole thing to 0!
+ numConstraintsPerPartition.resize(32 + numConstraintsPerPartition.size());
+ PxMemZero(numConstraintsPerPartition.begin() + partitionStartIndex, sizeof(PxU32) * 32);
+
+ PxU32 newNumUnpartitionedConstraints = 0;
+
+ for(PxU32 i = 0; i < numUnpartitionedConstraints; ++i)
+ {
+ const PxSolverConstraintDesc& desc = eaTempConstraintDescriptors[i];
+
+ PxU32 partitionsA=desc.bodyA->solverProgress;
+ PxU32 partitionsB=desc.bodyB->solverProgress;
+
+ PxU32 availablePartition;
+ {
+ const PxU32 combinedMask = (~partitionsA & ~partitionsB);
+ availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask);
+ if(availablePartition == MAX_NUM_PARTITIONS)
+ {
+ //Need to shuffle around unpartitioned constraints...
+ eaTempConstraintDescriptors[newNumUnpartitionedConstraints++] = desc;
+ continue;
+ }
+
+ const PxU32 partitionBit = getBit(availablePartition);
+ partitionsA |= partitionBit;
+ partitionsB |= partitionBit;
+ }
+
+ desc.bodyA->solverProgress = partitionsA;
+ desc.bodyB->solverProgress = partitionsB;
+ availablePartition += partitionStartIndex;
+ numConstraintsPerPartition[availablePartition]++;
+ availablePartition++;
+ desc.bodyA->maxSolverNormalProgress = PxMax(desc.bodyA->maxSolverNormalProgress, PxU16(availablePartition));
+ desc.bodyB->maxSolverNormalProgress = PxMax(desc.bodyB->maxSolverNormalProgress, PxU16(availablePartition));
+ }
+
+ numUnpartitionedConstraints = newNumUnpartitionedConstraints;
+ }
+
+ classification.reserveSpaceForStaticConstraints(numConstraintsPerPartition);
+
+}
+
+template <typename Classification>
+void writeConstraintDesc(const PxSolverConstraintDesc* PX_RESTRICT descs, const PxU32 numConstraints, Classification& classification,
+ Ps::Array<PxU32>& accumulatedConstraintsPerPartition, PxSolverConstraintDesc* eaTempConstraintDescriptors,
+ PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDesc)
+{
+ PX_UNUSED(eaTempConstraintDescriptors);
+ const PxSolverConstraintDesc* _desc = descs;
+ const PxU32 numConstraintsMin1 = numConstraints - 1;
+
+ PxU32 numUnpartitionedConstraints = 0;
+
+ for(PxU32 i = 0; i < numConstraints; ++i, _desc++)
+ {
+ const PxU32 prefetchOffset = PxMin(numConstraintsMin1 - i, 4u);
+ Ps::prefetchLine(_desc[prefetchOffset].constraint);
+ Ps::prefetchLine(_desc[prefetchOffset].bodyA);
+ Ps::prefetchLine(_desc[prefetchOffset].bodyB);
+ Ps::prefetchLine(_desc + 8);
+
+ uintptr_t indexA, indexB;
+ bool activeA, activeB;
+ const bool notContainsStatic = classification.classifyConstraint(*_desc, indexA, indexB, activeA, activeB);
+
+ if(notContainsStatic)
+ {
+ PxU32 partitionsA=_desc->bodyA->solverProgress;
+ PxU32 partitionsB=_desc->bodyB->solverProgress;
+
+ PxU32 availablePartition;
+ {
+ const PxU32 combinedMask = (~partitionsA & ~partitionsB);
+ availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask);
+ if(availablePartition == MAX_NUM_PARTITIONS)
+ {
+ eaTempConstraintDescriptors[numUnpartitionedConstraints++] = *_desc;
+ continue;
+ }
+
+ const PxU32 partitionBit = getBit(availablePartition);
+
+ partitionsA |= partitionBit;
+ partitionsB |= partitionBit;
+ }
+
+ _desc->bodyA->solverProgress = partitionsA;
+ _desc->bodyB->solverProgress = partitionsB;
+
+ eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[availablePartition]++] = *_desc;
+ }
+ else
+ {
+ //Just count the number of static constraints and store in maxSolverFrictionProgress...
+ PxU32 index = 0;
+ if(activeA)
+ index = PxU32(_desc->bodyA->maxSolverNormalProgress + _desc->bodyA->maxSolverFrictionProgress++);
+ else if(activeB)
+ index = PxU32(_desc->bodyB->maxSolverNormalProgress + _desc->bodyB->maxSolverFrictionProgress++);
+
+ eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[index]++] = *_desc;
+ }
+ }
+
+ PxU32 partitionStartIndex = 0;
+
+ while(numUnpartitionedConstraints > 0)
+ {
+ classification.clearState();
+
+ partitionStartIndex += 32;
+ PxU32 newNumUnpartitionedConstraints = 0;
+
+ for(PxU32 i = 0; i < numUnpartitionedConstraints; ++i)
+ {
+ const PxSolverConstraintDesc& desc = eaTempConstraintDescriptors[i];
+
+ PxU32 partitionsA=desc.bodyA->solverProgress;
+ PxU32 partitionsB=desc.bodyB->solverProgress;
+
+ PxU32 availablePartition;
+ {
+ const PxU32 combinedMask = (~partitionsA & ~partitionsB);
+ availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask);
+ if(availablePartition == MAX_NUM_PARTITIONS)
+ {
+ //Need to shuffle around unpartitioned constraints...
+ eaTempConstraintDescriptors[newNumUnpartitionedConstraints++] = desc;
+ continue;
+ }
+
+ const PxU32 partitionBit = getBit(availablePartition);
+
+ partitionsA |= partitionBit;
+ partitionsB |= partitionBit;
+ }
+
+ desc.bodyA->solverProgress = partitionsA;
+ desc.bodyB->solverProgress = partitionsB;
+ availablePartition += partitionStartIndex;
+ eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[availablePartition]++] = desc;
+ }
+
+ numUnpartitionedConstraints = newNumUnpartitionedConstraints;
+ }
+}
+
+}
+
+#define PX_NORMALIZE_PARTITIONS 1
+
+#if PX_NORMALIZE_PARTITIONS
+
+template<typename Classification>
+PxU32 normalizePartitions(Ps::Array<PxU32>& accumulatedConstraintsPerPartition, PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDescriptors,
+ const PxU32 numConstraintDescriptors, Ps::Array<PxU32>& bitField, const Classification& classification, const PxU32 numBodies, const PxU32 numArticulations)
+{
+ PxU32 numPartitions = 0;
+
+ PxU32 prevAccumulation = 0;
+ for(; numPartitions < accumulatedConstraintsPerPartition.size() && accumulatedConstraintsPerPartition[numPartitions] > prevAccumulation;
+ prevAccumulation = accumulatedConstraintsPerPartition[numPartitions++]);
+
+ PxU32 targetSize = (numPartitions == 0 ? 0 : (numConstraintDescriptors)/numPartitions);
+
+ bitField.reserve((numBodies + numArticulations + 31)/32);
+ bitField.forceSize_Unsafe((numBodies + numArticulations + 31)/32);
+
+ for(PxU32 i = numPartitions; i > 0; i--)
+ {
+ PxU32 partitionIndex = i-1;
+
+ //Build the partition mask...
+
+ PxU32 startIndex = partitionIndex == 0 ? 0 : accumulatedConstraintsPerPartition[partitionIndex-1];
+ PxU32 endIndex = accumulatedConstraintsPerPartition[partitionIndex];
+
+ //If its greater than target size, there's nothing that will be pulled into it from earlier partitions
+ if((endIndex - startIndex) >= targetSize)
+ continue;
+
+
+ PxMemZero(bitField.begin(), sizeof(PxU32)*bitField.size());
+
+ for(PxU32 a = startIndex; a < endIndex; ++a)
+ {
+ PxSolverConstraintDesc& desc = eaOrderedConstraintDescriptors[a];
+
+ uintptr_t indexA, indexB;
+ bool activeA, activeB;
+
+ classification.classifyConstraint(desc, indexA, indexB, activeA, activeB);
+
+ if(activeA)
+ bitField[PxU32(indexA)/32] |= getBit(indexA & 31);
+ if(activeB)
+ bitField[PxU32(indexB)/32] |= getBit(indexB & 31);
+ }
+
+ bool bTerm = false;
+ for(PxU32 a = partitionIndex; a > 0 && !bTerm; --a)
+ {
+ PxU32 pInd = a-1;
+
+ PxU32 si = pInd == 0 ? 0 : accumulatedConstraintsPerPartition[pInd-1];
+ PxU32 ei = accumulatedConstraintsPerPartition[pInd];
+
+ for(PxU32 b = ei; b > si && !bTerm; --b)
+ {
+ PxU32 ind = b-1;
+ PxSolverConstraintDesc& desc = eaOrderedConstraintDescriptors[ind];
+
+ uintptr_t indexA, indexB;
+ bool activeA, activeB;
+
+ classification.classifyConstraint(desc, indexA, indexB, activeA, activeB);
+
+ bool canAdd = true;
+
+ if(activeA && (bitField[PxU32(indexA)/32] & (getBit(indexA & 31))))
+ canAdd = false;
+ if(activeB && (bitField[PxU32(indexB)/32] & (getBit(indexB & 31))))
+ canAdd = false;
+
+ if(canAdd)
+ {
+ PxSolverConstraintDesc tmp = eaOrderedConstraintDescriptors[ind];
+
+ if(activeA)
+ bitField[PxU32(indexA)/32] |= (getBit(indexA & 31));
+ if(activeB)
+ bitField[PxU32(indexB)/32] |= (getBit(indexB & 31));
+
+ PxU32 index = ind;
+ for(PxU32 c = pInd; c < partitionIndex; ++c)
+ {
+ PxU32 newIndex = --accumulatedConstraintsPerPartition[c];
+ if(index != newIndex)
+ eaOrderedConstraintDescriptors[index] = eaOrderedConstraintDescriptors[newIndex];
+ index = newIndex;
+ }
+
+ if(index != ind)
+ eaOrderedConstraintDescriptors[index] = tmp;
+
+ if((accumulatedConstraintsPerPartition[partitionIndex] - accumulatedConstraintsPerPartition[partitionIndex-1]) >= targetSize)
+ {
+ bTerm = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ PxU32 partitionCount = 0;
+ PxU32 lastPartitionCount = 0;
+ for (PxU32 a = 0; a < numPartitions; ++a)
+ {
+ const PxU32 constraintCount = accumulatedConstraintsPerPartition[a];
+ accumulatedConstraintsPerPartition[partitionCount] = constraintCount;
+ if (constraintCount != lastPartitionCount)
+ {
+ lastPartitionCount = constraintCount;
+ partitionCount++;
+ }
+ }
+
+ accumulatedConstraintsPerPartition.forceSize_Unsafe(partitionCount);
+
+ return partitionCount;
+}
+
+#endif
+
+PxU32 partitionContactConstraints(ConstraintPartitionArgs& args)
+{
+ PxU32 maxPartition = 0;
+ //Unpack the input data.
+ const PxU32 numBodies=args.mNumBodies;
+ PxSolverBody* PX_RESTRICT eaAtoms=args.mBodies;
+ const PxU32 numArticulations=args.mNumArticulationPtrs;
+
+ const PxU32 numConstraintDescriptors=args.mNumContactConstraintDescriptors;
+
+ PxSolverConstraintDesc* PX_RESTRICT eaConstraintDescriptors=args.mContactConstraintDescriptors;
+ PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDescriptors=args.mOrderedContactConstraintDescriptors;
+ PxSolverConstraintDesc* PX_RESTRICT eaTempConstraintDescriptors=args.mTempContactConstraintDescriptors;
+
+ Ps::Array<PxU32>& constraintsPerPartition = *args.mConstraintsPerPartition;
+ constraintsPerPartition.forceSize_Unsafe(0);
+
+ for(PxU32 a = 0; a < numBodies; ++a)
+ {
+ PxSolverBody& body = args.mBodies[a];
+ Ps::prefetchLine(&args.mBodies[a], 256);
+ body.solverProgress = 0;
+ //We re-use maxSolverFrictionProgress and maxSolverNormalProgress to record the
+ //maximum partition used by dynamic constraints and the number of static constraints affecting
+ //a body. We use this to make partitioning much cheaper and be able to support
+ body.maxSolverFrictionProgress = 0;
+ body.maxSolverNormalProgress = 0;
+ }
+
+ PxU32 numOrderedConstraints=0;
+
+ PxU32 numSelfConstraintBlocks=0;
+
+ if(numArticulations == 0)
+ {
+ RigidBodyClassification classification(eaAtoms, numBodies);
+ classifyConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition,
+ eaTempConstraintDescriptors);
+
+ PxU32 accumulation = 0;
+ for(PxU32 a = 0; a < constraintsPerPartition.size(); ++a)
+ {
+ PxU32 count = constraintsPerPartition[a];
+ constraintsPerPartition[a] = accumulation;
+ accumulation += count;
+ }
+
+ for(PxU32 a = 0; a < numBodies; ++a)
+ {
+ PxSolverBody& body = args.mBodies[a];
+ Ps::prefetchLine(&args.mBodies[a], 256);
+ body.solverProgress = 0;
+ //Keep the dynamic constraint count but bump the static constraint count back to 0.
+ //This allows us to place the static constraints in the appropriate place when we see them
+ //because we know the maximum index for the dynamic constraints...
+ body.maxSolverFrictionProgress = 0;
+ }
+
+ writeConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition,
+ eaTempConstraintDescriptors, eaOrderedConstraintDescriptors);
+
+ numOrderedConstraints = numConstraintDescriptors;
+
+ if(!args.enhancedDeterminism)
+ maxPartition = normalizePartitions(constraintsPerPartition, eaOrderedConstraintDescriptors, numConstraintDescriptors, *args.mBitField,
+ classification, numBodies, 0);
+
+ }
+ else
+ {
+
+ const ArticulationSolverDesc* articulationDescs=args.mArticulationPtrs;
+ PX_ALLOCA(_eaFsData, uintptr_t, numArticulations);
+ uintptr_t* eaFsDatas = _eaFsData;
+ for(PxU32 i=0;i<numArticulations;i++)
+ {
+ FsData* data = articulationDescs[i].fsData;
+ eaFsDatas[i]=uintptr_t(data);
+ data->solverProgress = 0;
+ data->maxSolverFrictionProgress = 0;
+ data->maxSolverNormalProgress = 0;
+ }
+ ExtendedRigidBodyClassification classification(eaAtoms, numBodies, eaFsDatas, numArticulations);
+
+ classifyConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification,
+ constraintsPerPartition, eaTempConstraintDescriptors);
+
+ PxU32 accumulation = 0;
+ for(PxU32 a = 0; a < constraintsPerPartition.size(); ++a)
+ {
+ PxU32 count = constraintsPerPartition[a];
+ constraintsPerPartition[a] = accumulation;
+ accumulation += count;
+ }
+
+ for(PxU32 a = 0; a < numBodies; ++a)
+ {
+ PxSolverBody& body = args.mBodies[a];
+ Ps::prefetchLine(&args.mBodies[a], 256);
+ body.solverProgress = 0;
+ //Keep the dynamic constraint count but bump the static constraint count back to 0.
+ //This allows us to place the static constraints in the appropriate place when we see them
+ //because we know the maximum index for the dynamic constraints...
+ body.maxSolverFrictionProgress = 0;
+ }
+
+ for(PxU32 a = 0; a < numArticulations; ++a)
+ {
+ FsData* data = reinterpret_cast<FsData*>(eaFsDatas[a]);
+ data->solverProgress = 0;
+ data->maxSolverFrictionProgress = 0;
+ }
+
+ writeConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition,
+ eaTempConstraintDescriptors, eaOrderedConstraintDescriptors);
+
+ numOrderedConstraints = numConstraintDescriptors;
+
+ if (!args.enhancedDeterminism)
+ maxPartition = normalizePartitions(constraintsPerPartition, eaOrderedConstraintDescriptors,
+ numConstraintDescriptors, *args.mBitField, classification, numBodies, numArticulations);
+
+ }
+
+
+
+ const PxU32 numConstraintsDifferentBodies=numOrderedConstraints;
+
+ PX_ASSERT(numConstraintsDifferentBodies == numConstraintDescriptors);
+
+ //Now handle the articulated self-constraints.
+ PxU32 totalConstraintCount = numConstraintsDifferentBodies;
+
+ args.mNumSelfConstraintBlocks=numSelfConstraintBlocks;
+
+ args.mNumDifferentBodyConstraints=numConstraintsDifferentBodies;
+ args.mNumSelfConstraints=totalConstraintCount-numConstraintsDifferentBodies;
+
+ if (args.enhancedDeterminism)
+ {
+ PxU32 prevPartitionSize = 0;
+ maxPartition = 0;
+ for (PxU32 a = 0; a < constraintsPerPartition.size(); ++a, maxPartition++)
+ {
+ if (constraintsPerPartition[a] == prevPartitionSize)
+ break;
+ prevPartitionSize = constraintsPerPartition[a];
+ }
+ }
+
+ return maxPartition;
+}
+
+}
+
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.h
new file mode 100644
index 00000000..ba4c8c29
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.h
@@ -0,0 +1,79 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_CONSTRAINTPARTITION_H
+#define DY_CONSTRAINTPARTITION_H
+
+#include "DyDynamics.h"
+
+
+
+namespace physx
+{
+
+namespace Dy
+{
+struct ConstraintPartitionArgs
+{
+ enum
+ {
+ eMAX_NUM_BODIES = 8192
+ };
+
+ //Input
+ PxSolverBody* mBodies;
+ PxU32 mNumBodies;
+ ArticulationSolverDesc* mArticulationPtrs;
+ PxU32 mNumArticulationPtrs;
+ PxSolverConstraintDesc* mContactConstraintDescriptors;
+ PxU32 mNumContactConstraintDescriptors;
+ //output
+ PxSolverConstraintDesc* mOrderedContactConstraintDescriptors;
+ PxSolverConstraintDesc* mTempContactConstraintDescriptors;
+ PxU32 mNumSelfConstraintBlocks;
+ PxU32 mNumDifferentBodyConstraints;
+ PxU32 mNumSelfConstraints;
+ Ps::Array<PxU32>* mConstraintsPerPartition;
+ //Ps::Array<PxU32>* mStartIndices;
+ Ps::Array<PxU32>* mBitField;
+
+ bool enhancedDeterminism;
+};
+
+PxU32 partitionContactConstraints(ConstraintPartitionArgs& args);
+
+} // namespace physx
+
+}
+
+
+
+#endif // DY_CONSTRAINTPARTITION_H
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPrep.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPrep.h
new file mode 100644
index 00000000..e7202a78
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPrep.h
@@ -0,0 +1,92 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_CONSTRAINTSHADER_H
+#define DY_CONSTRAINTSHADER_H
+
+#include "DyConstraint.h"
+
+#include "DySolverConstraintDesc.h"
+#include "PsArray.h"
+
+namespace physx
+{
+
+class PxcConstraintBlockStream;
+class PxsConstraintBlockManager;
+struct PxSolverBody;
+struct PxSolverBodyData;
+struct PxSolverConstraintDesc;
+
+namespace Dy
+{
+
+ static const PxU32 MAX_CONSTRAINT_ROWS = 12;
+
+struct SolverConstraintShaderPrepDesc
+{
+ const Constraint* constraint;
+ PxConstraintSolverPrep solverPrep;
+ const void* constantBlock;
+ PxU32 constantBlockByteSize;
+};
+
+SolverConstraintPrepState::Enum setupSolverConstraint4
+ (SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
+ PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+ const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+ PxConstraintAllocator& allocator);
+
+SolverConstraintPrepState::Enum setupSolverConstraint4
+ (PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+ const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+ PxConstraintAllocator& allocator, PxU32 maxRows);
+
+PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc,
+ PxSolverConstraintPrepDesc& prepDesc,
+ PxConstraintAllocator& allocator,
+ PxReal dt, PxReal invdt);
+
+
+class ConstraintHelper
+{
+public:
+
+ static PxU32 setupSolverConstraint(
+ PxSolverConstraintPrepDesc& prepDesc,
+ PxConstraintAllocator& allocator,
+ PxReal dt, PxReal invdt);
+};
+
+}
+
+}
+
+#endif //DY_CONSTRAINTSHADER_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp
new file mode 100644
index 00000000..c5777c12
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp
@@ -0,0 +1,594 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxMemory.h"
+#include "DyConstraintPrep.h"
+#include "PxsRigidBody.h"
+#include "DySolverConstraint1D.h"
+#include "PsSort.h"
+#include "DySolverConstraintDesc.h"
+#include "PxcConstraintBlockStream.h"
+#include "DyArticulationContactPrep.h"
+#include "PsFoundation.h"
+
+namespace physx
+{
+namespace Dy
+{
+ // dsequeira:
+ //
+ // we can choose any linear combination of equality constraints and get the same solution
+ // Hence we can orthogonalize the constraints using the inner product given by the
+ // inverse mass matrix, so that when we use PGS, solving a constraint row for a joint
+ // don't disturb the solution of prior rows.
+ //
+ // We also eliminate the equality constraints from the hard inequality constraints -
+ // (essentially projecting the direction corresponding to the lagrange multiplier
+ // onto the equality constraint subspace) but 'til I've verified this generates
+ // exactly the same KKT/complementarity conditions, status is 'experimental'.
+ //
+ // since for equality constraints the resulting rows have the property that applying
+ // an impulse along one row doesn't alter the projected velocity along another row,
+ // all equality constraints (plus one inequality constraint) can be processed in parallel
+ // using SIMD
+ //
+ // Eliminating the inequality constraints from each other would require a solver change
+ // and not give us any more parallelism, although we might get better convergence.
+
+namespace
+{
+ PX_FORCE_INLINE Vec3V V3FromV4(Vec4V x) { return Vec3V_From_Vec4V(x); }
+ PX_FORCE_INLINE Vec3V V3FromV4Unsafe(Vec4V x) { return Vec3V_From_Vec4V_WUndefined(x); }
+ PX_FORCE_INLINE Vec4V V4FromV3(Vec3V x) { return Vec4V_From_Vec3V(x); }
+ //PX_FORCE_INLINE Vec4V V4ClearW(Vec4V x) { return V4SetW(x, FZero()); }
+
+struct MassProps
+{
+ FloatV invMass0;
+ FloatV invMass1;
+ FloatV invInertiaScale0;
+ FloatV invInertiaScale1;
+
+ PX_FORCE_INLINE MassProps(const PxSolverBodyData& bd0,
+ const PxSolverBodyData& bd1,
+ const PxConstraintInvMassScale& ims)
+ :
+ invMass0(FLoad(bd0.invMass * ims.linear0))
+ , invMass1(FLoad(bd1.invMass * ims.linear1))
+ , invInertiaScale0(FLoad(ims.angular0))
+ , invInertiaScale1(FLoad(ims.angular1))
+ {}
+};
+
+
+PX_FORCE_INLINE PxReal innerProduct(const Px1DConstraint& row0, Px1DConstraint& row1,
+ PxVec4& row0AngSqrtInvInertia0, PxVec4& row0AngSqrtInvInertia1,
+ PxVec4& row1AngSqrtInvInertia0, PxVec4& row1AngSqrtInvInertia1, const MassProps& m)
+{
+ const Vec3V l0 = V3Mul(V3Scale(V3LoadA(row0.linear0), m.invMass0), V3LoadA(row1.linear0));
+ const Vec3V l1 = V3Mul(V3Scale(V3LoadA(row0.linear1), m.invMass1), V3LoadA(row1.linear1));
+ Vec4V r0ang0 = V4LoadA(&row0AngSqrtInvInertia0.x);
+ Vec4V r1ang0 = V4LoadA(&row1AngSqrtInvInertia0.x);
+ Vec4V r0ang1 = V4LoadA(&row0AngSqrtInvInertia1.x);
+ Vec4V r1ang1 = V4LoadA(&row1AngSqrtInvInertia1.x);
+
+ const Vec3V i0 = V3ScaleAdd(V3Mul(Vec3V_From_Vec4V(r0ang0), Vec3V_From_Vec4V(r1ang0)), m.invInertiaScale0, l0);
+ const Vec3V i1 = V3ScaleAdd(V3MulAdd(Vec3V_From_Vec4V(r0ang1), Vec3V_From_Vec4V(r1ang1), i0), m.invInertiaScale1, l1);
+ PxF32 f;
+ FStore(V3SumElems(i1), &f);
+ return f;
+}
+
+
+// indexed rotation around axis, with sine and cosine of half-angle
+PX_FORCE_INLINE PxQuat indexedRotation(PxU32 axis, PxReal s, PxReal c)
+{
+ PxQuat q(0,0,0,c);
+ reinterpret_cast<PxReal*>(&q)[axis] = s;
+ return q;
+}
+
+PxQuat diagonalize(const PxMat33& m) // jacobi rotation using quaternions
+{
+ const PxU32 MAX_ITERS = 5;
+
+ PxQuat q = PxQuat(PxIdentity);
+
+ PxMat33 d;
+ for(PxU32 i=0; i < MAX_ITERS;i++)
+ {
+ const PxMat33 axes(q);
+ d = axes.getTranspose() * m * axes;
+
+ const PxReal d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]);
+ const PxU32 a = PxU32(d0 > d1 && d0 > d2 ? 0 : d1 > d2 ? 1 : 2); // rotation axis index, from largest off-diagonal element
+
+ const PxU32 a1 = Ps::getNextIndex3(a), a2 = Ps::getNextIndex3(a1);
+ if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1]-d[a2][a2]) > 2e6f*PxAbs(2.0f*d[a1][a2]))
+ break;
+
+ const PxReal w = (d[a1][a1]-d[a2][a2]) / (2.0f*d[a1][a2]); // cot(2 * phi), where phi is the rotation angle
+ const PxReal absw = PxAbs(w);
+
+ PxQuat r;
+ if(absw>1000)
+ r = indexedRotation(a, 1.0f/(4.0f*w), 1.f); // h will be very close to 1, so use small angle approx instead
+ else
+ {
+ const PxReal t = 1 / (absw + PxSqrt(w*w+1)); // absolute value of tan phi
+ const PxReal h = 1 / PxSqrt(t*t+1); // absolute value of cos phi
+
+ PX_ASSERT(h!=1); // |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8)
+ r = indexedRotation(a, PxSqrt((1-h)/2) * PxSign(w), PxSqrt((1+h)/2));
+ }
+
+ q = (q*r).getNormalized();
+ }
+
+ return q;
+}
+
+
+PX_FORCE_INLINE void rescale(const Mat33V& m, PxVec3& a0, PxVec3& a1, PxVec3& a2)
+{
+ const Vec3V va0 = V3LoadU(a0);
+ const Vec3V va1 = V3LoadU(a1);
+ const Vec3V va2 = V3LoadU(a2);
+
+ const Vec3V b0 = V3ScaleAdd(va0, V3GetX(m.col0), V3ScaleAdd(va1, V3GetY(m.col0), V3Scale(va2, V3GetZ(m.col0))));
+ const Vec3V b1 = V3ScaleAdd(va0, V3GetX(m.col1), V3ScaleAdd(va1, V3GetY(m.col1), V3Scale(va2, V3GetZ(m.col1))));
+ const Vec3V b2 = V3ScaleAdd(va0, V3GetX(m.col2), V3ScaleAdd(va1, V3GetY(m.col2), V3Scale(va2, V3GetZ(m.col2))));
+
+ V3StoreU(b0, a0);
+ V3StoreU(b1, a1);
+ V3StoreU(b2, a2);
+}
+
+PX_FORCE_INLINE void rescale4(const Mat33V& m, PxReal* a0, PxReal* a1, PxReal* a2)
+{
+ const Vec4V va0 = V4LoadA(a0);
+ const Vec4V va1 = V4LoadA(a1);
+ const Vec4V va2 = V4LoadA(a2);
+
+ const Vec4V b0 = V4ScaleAdd(va0, V3GetX(m.col0), V4ScaleAdd(va1, V3GetY(m.col0), V4Scale(va2, V3GetZ(m.col0))));
+ const Vec4V b1 = V4ScaleAdd(va0, V3GetX(m.col1), V4ScaleAdd(va1, V3GetY(m.col1), V4Scale(va2, V3GetZ(m.col1))));
+ const Vec4V b2 = V4ScaleAdd(va0, V3GetX(m.col2), V4ScaleAdd(va1, V3GetY(m.col2), V4Scale(va2, V3GetZ(m.col2))));
+
+ V4StoreA(b0, a0);
+ V4StoreA(b1, a1);
+ V4StoreA(b2, a2);
+}
+
+
+template<typename T>
+PX_FORCE_INLINE void rescale(const PxMat33& m, T& a0, T& a1, T& a2)
+{
+ T b0 = a0*m(0,0) + a1 * m(1,0) + a2 * m(2,0);
+ T b1 = a0*m(0,1) + a1 * m(1,1) + a2 * m(2,1);
+ T b2 = a0*m(0,2) + a1 * m(1,2) + a2 * m(2,2);
+
+ a0 = b0;
+ a1 = b1;
+ a2 = b2;
+}
+
+void diagonalize(Px1DConstraint** row,
+ PxVec4* angSqrtInvInertia0,
+ PxVec4* angSqrtInvInertia1,
+ const MassProps &m)
+{
+ PxReal a00 = innerProduct(*row[0], *row[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], m);
+ PxReal a01 = innerProduct(*row[0], *row[1], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m);
+ PxReal a02 = innerProduct(*row[0], *row[2], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+ PxReal a11 = innerProduct(*row[1], *row[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m);
+ PxReal a12 = innerProduct(*row[1], *row[2], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+ PxReal a22 = innerProduct(*row[2], *row[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+
+ PxMat33 a(PxVec3(a00, a01, a02),
+ PxVec3(a01, a11, a12),
+ PxVec3(a02, a12, a22));
+
+ PxQuat q = diagonalize(a);
+
+ PxMat33 n(-q);
+
+ Mat33V mn(V3LoadU(n.column0), V3LoadU(n.column1), V3LoadU(n.column2));
+
+ //KS - We treat as a Vec4V so that we get geometricError rescaled for free along with linear0
+ rescale4(mn, &row[0]->linear0.x, &row[1]->linear0.x, &row[2]->linear0.x);
+ rescale(mn, row[0]->linear1, row[1]->linear1, row[2]->linear1);
+ //KS - We treat as a PxVec4 so that we get velocityTarget rescaled for free
+ rescale4(mn, &row[0]->angular0.x, &row[1]->angular0.x, &row[2]->angular0.x);
+ rescale(mn, row[0]->angular1, row[1]->angular1, row[2]->angular1);
+ rescale4(mn, &angSqrtInvInertia0[0].x, &angSqrtInvInertia0[1].x, &angSqrtInvInertia0[2].x);
+ rescale4(mn, &angSqrtInvInertia1[0].x, &angSqrtInvInertia1[1].x, &angSqrtInvInertia1[2].x);
+
+}
+
+void orthogonalize(Px1DConstraint** row,
+ PxVec4* angSqrtInvInertia0,
+ PxVec4* angSqrtInvInertia1,
+ PxU32 rowCount,
+ PxU32 eqRowCount,
+ const MassProps &m)
+{
+ PX_ASSERT(eqRowCount<=6);
+
+ const FloatV zero = FZero();
+
+ Vec3V lin1m[6], ang1m[6], lin1[6], ang1[6];
+ Vec4V lin0m[6], ang0m[6]; // must have 0 in the W-field
+ Vec4V lin0AndG[6], ang0AndT[6];
+
+ for(PxU32 i=0;i<rowCount;i++)
+ {
+ Vec4V l0AndG = V4LoadA(&row[i]->linear0.x); // linear0 and geometric error
+ Vec4V a0AndT = V4LoadA(&row[i]->angular0.x); // angular0 and velocity target
+
+ Vec3V l1 = V3FromV4(V4LoadA(&row[i]->linear1.x));
+ Vec3V a1 = V3FromV4(V4LoadA(&row[i]->angular1.x));
+
+ Vec4V angSqrtL0 = V4LoadA(&angSqrtInvInertia0[i].x);
+ Vec4V angSqrtL1 = V4LoadA(&angSqrtInvInertia1[i].x);
+
+ PxU32 eliminationRows = PxMin<PxU32>(i, eqRowCount);
+ for(PxU32 j=0;j<eliminationRows;j++)
+ {
+ const Vec3V s0 = V3MulAdd(l1, lin1m[j], V3FromV4Unsafe(V4Mul(l0AndG, lin0m[j])));
+ const Vec3V s1 = V3MulAdd(V3FromV4Unsafe(angSqrtL1), ang1m[j], V3FromV4Unsafe(V4Mul(angSqrtL0, ang0m[j])));
+ FloatV t = V3SumElems(V3Add(s0, s1));
+
+ l0AndG = V4NegScaleSub(lin0AndG[j], t, l0AndG);
+ a0AndT = V4NegScaleSub(ang0AndT[j], t, a0AndT);
+ l1 = V3NegScaleSub(lin1[j], t, l1);
+ a1 = V3NegScaleSub(ang1[j], t, a1);
+ angSqrtL0 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia0[j].x), t, angSqrtL0);
+ angSqrtL1 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia1[j].x), t, angSqrtL1);
+ }
+
+ V4StoreA(l0AndG, &row[i]->linear0.x);
+ V4StoreA(a0AndT, &row[i]->angular0.x);
+ V3StoreA(l1, row[i]->linear1);
+ V3StoreA(a1, row[i]->angular1);
+ V4StoreA(angSqrtL0, &angSqrtInvInertia0[i].x);
+ V4StoreA(angSqrtL1, &angSqrtInvInertia1[i].x);
+
+ if(i<eqRowCount)
+ {
+ lin0AndG[i] = l0AndG;
+ ang0AndT[i] = a0AndT;
+ lin1[i] = l1;
+ ang1[i] = a1;
+
+ const Vec3V l0 = V3FromV4(l0AndG);
+
+ const Vec3V l0m = V3Scale(l0, m.invMass0);
+ const Vec3V l1m = V3Scale(l1, m.invMass1);
+ const Vec4V a0m = V4Scale(angSqrtL0, m.invInertiaScale0);
+ const Vec4V a1m = V4Scale(angSqrtL1, m.invInertiaScale1);
+
+ const Vec3V s0 = V3MulAdd(l0, l0m, V3Mul(l1, l1m));
+ const Vec4V s1 = V4MulAdd(a0m, angSqrtL0, V4Mul(a1m, angSqrtL1));
+ const FloatV s = V3SumElems(V3Add(s0, V3FromV4Unsafe(s1)));
+ const FloatV a = FSel(FIsGrtr(s, zero), FRecip(s), zero); // with mass scaling, it's possible for the inner product of a row to be zero
+
+ lin0m[i] = V4Scale(V4ClearW(V4FromV3(l0m)), a);
+ ang0m[i] = V4Scale(V4ClearW(a0m), a);
+ lin1m[i] = V3Scale(l1m, a);
+ ang1m[i] = V3Scale(V3FromV4Unsafe(a1m), a);
+ }
+ }
+}
+}
+
+
+void preprocessRows(Px1DConstraint** sorted,
+ Px1DConstraint* rows,
+ PxVec4* angSqrtInvInertia0,
+ PxVec4* angSqrtInvInertia1,
+ PxU32 rowCount,
+ const PxSolverBodyData& bd0,
+ const PxSolverBodyData& bd1,
+ const PxConstraintInvMassScale& ims,
+ bool disablePreprocessing,
+ bool diagonalizeDrive)
+{
+ // j is maxed at 12, typically around 7, so insertion sort is fine
+ for(PxU32 i=0; i<rowCount; i++)
+ {
+ Px1DConstraint* r = rows+i;
+
+ PxU32 j = i;
+ for(;j>0 && r->solveHint < sorted[j-1]->solveHint; j--)
+ sorted[j] = sorted[j-1];
+
+ sorted[j] = r;
+ }
+
+ for(PxU32 i=0;i<rowCount-1;i++)
+ PX_ASSERT(sorted[i]->solveHint <= sorted[i+1]->solveHint);
+
+ for (PxU32 i = 0; i<rowCount; i++)
+ rows[i].forInternalUse = rows[i].flags & Px1DConstraintFlag::eKEEPBIAS ? rows[i].geometricError : 0;
+
+
+ const Mat33V sqrtInvInertia0 = Mat33V(V3LoadU(bd0.sqrtInvInertia.column0), V3LoadU(bd0.sqrtInvInertia.column1),
+ V3LoadU(bd0.sqrtInvInertia.column2));
+
+ const Mat33V sqrtInvInertia1 = Mat33V(V3LoadU(bd1.sqrtInvInertia.column0), V3LoadU(bd1.sqrtInvInertia.column1),
+ V3LoadU(bd1.sqrtInvInertia.column2));
+
+ PX_ASSERT(((uintptr_t(angSqrtInvInertia0)) & 0xF) == 0);
+ PX_ASSERT(((uintptr_t(angSqrtInvInertia1)) & 0xF) == 0);
+
+ for(PxU32 i = 0; i < rowCount; ++i)
+ {
+ const Vec3V angDelta0 = M33MulV3(sqrtInvInertia0, V3LoadU(sorted[i]->angular0));
+ const Vec3V angDelta1 = M33MulV3(sqrtInvInertia1, V3LoadU(sorted[i]->angular1));
+ V4StoreA(Vec4V_From_Vec3V(angDelta0), &angSqrtInvInertia0[i].x);
+ V4StoreA(Vec4V_From_Vec3V(angDelta1), &angSqrtInvInertia1[i].x);
+ }
+
+ if(disablePreprocessing)
+ return;
+
+ MassProps m(bd0, bd1, ims);
+ for(PxU32 i=0;i<rowCount;)
+ {
+ const PxU32 groupMajorId = PxU32(sorted[i]->solveHint>>8), start = i++;
+ while(i<rowCount && PxU32(sorted[i]->solveHint>>8) == groupMajorId)
+ i++;
+
+ if(groupMajorId == 4)
+ {
+ PxU32 bCount = start; // count of bilateral constraints
+ for(; bCount<i && (sorted[bCount]->solveHint&255)==0; bCount++)
+ ;
+ orthogonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, i-start, bCount-start, m);
+ }
+
+ if(groupMajorId == 1 && diagonalizeDrive)
+ {
+ PxU32 slerp = start; // count of bilateral constraints
+ for(; slerp<i && (sorted[slerp]->solveHint&255)!=2; slerp++)
+ ;
+ if(slerp+3 == i)
+ diagonalize(sorted+slerp, angSqrtInvInertia0+slerp, angSqrtInvInertia1+slerp, m);
+
+ PX_ASSERT(i-start==3);
+ diagonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, m);
+ }
+ }
+}
+
+
+
+
+
+PxU32 ConstraintHelper::setupSolverConstraint(
+PxSolverConstraintPrepDesc& prepDesc,
+PxConstraintAllocator& allocator,
+PxReal dt, PxReal invdt)
+{
+ if (prepDesc .numRows== 0)
+ return 0;
+
+ PxSolverConstraintDesc& desc = *prepDesc.desc;
+
+ bool isExtended = desc.linkIndexA != PxSolverConstraintDesc::NO_LINK
+ || desc.linkIndexB != PxSolverConstraintDesc::NO_LINK;
+
+ PxU32 stride = isExtended ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+ const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + stride * prepDesc.numRows;
+
+ //KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we
+ //know SPU DMAs have completed)
+ PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u);
+ if(NULL == ptr || (reinterpret_cast<PxU8*>(-1))==ptr)
+ {
+ if(NULL==ptr)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ return 0;
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of constraint data. "
+ "Either accept joints detaching/exploding or simplify constraints.");
+ ptr=NULL;
+ return 0;
+ }
+ }
+ desc.constraint = ptr;
+
+ setConstraintLength(desc,constraintLength);
+
+ desc.writeBack = prepDesc.writeback;
+ setWritebackLength(desc, sizeof(ConstraintWriteback));
+
+ memset(desc.constraint, 0, constraintLength);
+
+ SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+ PxU8* constraints = desc.constraint + sizeof(SolverConstraint1DHeader);
+ init(*header, Ps::to8(prepDesc.numRows), isExtended, prepDesc.mInvMassScales);
+ header->body0WorldOffset = prepDesc.body0WorldOffset;
+ header->linBreakImpulse = prepDesc.linBreakForce * dt;
+ header->angBreakImpulse = prepDesc.angBreakForce * dt;
+ header->breakable = PxU8((prepDesc.linBreakForce != PX_MAX_F32) || (prepDesc.angBreakForce != PX_MAX_F32));
+ header->invMass0D0 = prepDesc.data0->invMass * prepDesc.mInvMassScales.linear0;
+ header->invMass1D1 = prepDesc.data1->invMass * prepDesc.mInvMassScales.linear1;
+
+
+ PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS];
+ PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS];
+
+ Px1DConstraint* sorted[MAX_CONSTRAINT_ROWS];
+
+ preprocessRows(sorted, prepDesc.rows, angSqrtInvInertia0, angSqrtInvInertia1, prepDesc.numRows, *prepDesc.data0, *prepDesc.data1, prepDesc.mInvMassScales,
+ isExtended || prepDesc.disablePreprocessing, prepDesc.improvedSlerp);
+
+ const PxReal erp = 1.0f;
+ for (PxU32 i = 0; i<prepDesc.numRows; i++)
+ {
+ Ps::prefetchLine(constraints, 128);
+ SolverConstraint1D &s = *reinterpret_cast<SolverConstraint1D *>(constraints);
+ Px1DConstraint& c = *sorted[i];
+
+ PxReal driveScale = c.flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && prepDesc.driveLimitsAreForces ? PxMin(dt, 1.0f) : 1.0f;
+
+ PxReal unitResponse;
+ PxReal normalVel = 0.0f;
+ PxReal initVel = 0.f;
+
+ if(!isExtended)
+ {
+ init(s, c.linear0, c.linear1, PxVec3(angSqrtInvInertia0[i].x, angSqrtInvInertia0[i].y, angSqrtInvInertia0[i].z),
+ PxVec3(angSqrtInvInertia1[i].x, angSqrtInvInertia1[i].y, angSqrtInvInertia1[i].z), c.minImpulse * driveScale, c.maxImpulse * driveScale);
+ s.ang0Writeback = c.angular0;
+ PxReal resp0 = s.lin0.magnitudeSquared() * prepDesc.data0->invMass * prepDesc.mInvMassScales.linear0 + s.ang0.magnitudeSquared() * prepDesc.mInvMassScales.angular0;
+ PxReal resp1 = s.lin1.magnitudeSquared() * prepDesc.data1->invMass * prepDesc.mInvMassScales.linear1 + s.ang1.magnitudeSquared() * prepDesc.mInvMassScales.angular1;
+ unitResponse = resp0 + resp1;
+ initVel = normalVel = prepDesc.data0->projectVelocity(c.linear0, c.angular0) - prepDesc.data1->projectVelocity(c.linear1, c.angular1);
+ }
+ else
+ {
+ init(s, c.linear0, c.linear1, c.angular0, c.angular1, c.minImpulse * driveScale, c.maxImpulse * driveScale);
+ SolverConstraint1DExt& e = static_cast<SolverConstraint1DExt&>(s);
+
+ const SolverExtBody eb0(reinterpret_cast<const void*>(prepDesc.body0), prepDesc.data0, desc.linkIndexA);
+ const SolverExtBody eb1(reinterpret_cast<const void*>(prepDesc.body1), prepDesc.data1, desc.linkIndexB);
+
+ const Cm::SpatialVector resp0 = createImpulseResponseVector(e.lin0, e.ang0, eb0);
+ const Cm::SpatialVector resp1 = createImpulseResponseVector(-e.lin1, -e.ang1, eb1);
+ unitResponse = getImpulseResponse(eb0, resp0, unsimdRef(e.deltaVA), prepDesc.mInvMassScales.linear0, prepDesc.mInvMassScales.angular0,
+ eb1, resp1, unsimdRef(e.deltaVB), prepDesc.mInvMassScales.linear1, prepDesc.mInvMassScales.angular1, true);
+
+ s.ang0Writeback = c.angular0;
+ s.lin0 = resp0.linear;
+ s.ang0 = resp0.angular;
+ s.lin1 = -resp1.linear;
+ s.ang1 = -resp1.angular;
+ PxReal vel0, vel1;
+ if(needsNormalVel(c) || eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK || eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ {
+ vel0 = eb0.projectVelocity(c.linear0, c.angular0);
+ vel1 = eb1.projectVelocity(c.linear1, c.angular1);
+
+ normalVel = vel0 - vel1;
+
+ //normalVel = eb0.projectVelocity(s.lin0, s.ang0) - eb1.projectVelocity(s.lin1, s.ang1);
+ if(eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ initVel = vel0;
+ else if(eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ initVel = -vel1;
+
+ }
+ }
+
+ setSolverConstants(s.constant, s.unbiasedConstant, s.velMultiplier, s.impulseMultiplier,
+ c, normalVel, unitResponse, prepDesc.minResponseThreshold, erp, dt, invdt);
+
+ //s.targetVelocity = initVel;
+ const PxReal velBias = initVel * s.velMultiplier;
+ s.constant += velBias;
+ s.unbiasedConstant += velBias;
+
+ if(c.flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+ s.flags |= DY_SC_FLAG_OUTPUT_FORCE;
+
+ constraints += stride;
+ }
+
+ //KS - Set the solve count at the end to 0
+ *(reinterpret_cast<PxU32*>(constraints)) = 0;
+ *(reinterpret_cast<PxU32*>(constraints + 4)) = 0;
+ PX_ASSERT(desc.constraint + getConstraintLength(desc) == constraints);
+ return prepDesc.numRows;
+}
+
+PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc,
+ PxSolverConstraintPrepDesc& prepDesc,
+ PxConstraintAllocator& allocator,
+ PxReal dt, PxReal invdt)
+{
+ // LL shouldn't see broken constraints
+
+ PX_ASSERT(!(reinterpret_cast<ConstraintWriteback*>(prepDesc.writeback)->broken));
+
+ setConstraintLength(*prepDesc.desc, 0);
+
+ if (!shaderDesc.solverPrep)
+ return 0;
+
+ //PxU32 numAxisConstraints = 0;
+
+ Px1DConstraint rows[MAX_CONSTRAINT_ROWS];
+
+ // This is necessary so that there will be sensible defaults and shaders will
+ // continue to work (albeit with a recompile) if the row format changes.
+ // It's a bit inefficient because it fills in all constraint rows even if there
+ // is only going to be one generated. A way around this would be for the shader to
+ // specify the maximum number of rows it needs, or it could call a subroutine to
+ // prep the row before it starts filling it it.
+
+ PxMemZero(rows, sizeof(Px1DConstraint)*MAX_CONSTRAINT_ROWS);
+
+ for (PxU32 i = 0; i<MAX_CONSTRAINT_ROWS; i++)
+ {
+ Px1DConstraint& c = rows[i];
+ //Px1DConstraintInit(c);
+ c.minImpulse = -PX_MAX_REAL;
+ c.maxImpulse = PX_MAX_REAL;
+ }
+
+ prepDesc.mInvMassScales.linear0 = prepDesc.mInvMassScales.linear1 = prepDesc.mInvMassScales.angular0 = prepDesc.mInvMassScales.angular1 = 1.f;
+
+ PxVec3 body0WorldOffset(0.f);
+ PxU32 constraintCount = (*shaderDesc.solverPrep)(rows,
+ body0WorldOffset,
+ MAX_CONSTRAINT_ROWS,
+ prepDesc.mInvMassScales,
+ shaderDesc.constantBlock,
+ prepDesc.bodyFrame0, prepDesc.bodyFrame1);
+
+ prepDesc.rows = rows;
+ prepDesc.numRows = constraintCount;
+
+ prepDesc.body0WorldOffset = body0WorldOffset;
+
+ return ConstraintHelper::setupSolverConstraint(prepDesc, allocator, dt, invdt);
+}
+
+}
+
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetupBlock.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetupBlock.cpp
new file mode 100644
index 00000000..5c72f36e
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetupBlock.cpp
@@ -0,0 +1,535 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxMemory.h"
+#include "DyConstraintPrep.h"
+#include "PxsRigidBody.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraint1D4.h"
+#include "PsSort.h"
+#include "PxcConstraintBlockStream.h"
+#include "DyArticulationContactPrep.h"
+#include "PsFoundation.h"
+namespace physx
+{
+
+namespace Dy
+{
+
+void preprocessRows(Px1DConstraint** sorted,
+ Px1DConstraint* rows,
+ PxVec4* angSqrtInvInertia0,
+ PxVec4* angSqrtInvInertia1,
+ PxU32 rowCount,
+ const PxSolverBodyData& bd0,
+ const PxSolverBodyData& bd1,
+ const PxConstraintInvMassScale& ims,
+ bool disablePreprocessing,
+ bool diagonalizeDrive);
+
+
+namespace
+{
+void setConstants(PxReal& constant, PxReal& unbiasedConstant, PxReal& velMultiplier, PxReal& impulseMultiplier,
+ const Px1DConstraint& c, PxReal unitResponse, PxReal minRowResponse, PxReal erp, PxReal dt, PxReal recipdt,
+ const PxSolverBodyData& b0, const PxSolverBodyData& b1, const bool finished)
+{
+ if(finished)
+ {
+ constant = 0.f;
+ unbiasedConstant = 0.f;
+ velMultiplier = 0.f;
+ impulseMultiplier = 0.f;
+ return;
+ }
+ PxReal nv = needsNormalVel(c) ? b0.projectVelocity(c.linear0, c.angular0) - b1.projectVelocity(c.linear1, c.angular1)
+ : 0;
+
+ setSolverConstants(constant, unbiasedConstant, velMultiplier, impulseMultiplier,
+ c, nv, unitResponse, minRowResponse, erp, dt, recipdt);
+}
+}
+
+SolverConstraintPrepState::Enum setupSolverConstraint4
+ (PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+ const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+ PxConstraintAllocator& allocator, PxU32 maxRows);
+
+SolverConstraintPrepState::Enum setupSolverConstraint4
+(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs,
+PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+PxConstraintAllocator& allocator)
+
+{
+ //KS - we will never get here with constraints involving articulations so we don't need to stress about those in here
+
+ totalRows = 0;
+
+ Px1DConstraint allRows[MAX_CONSTRAINT_ROWS * 4];
+
+ PxU32 numRows = 0;
+
+ PxU32 maxRows = 0;
+ PxU32 preppedIndex = 0;
+
+ for (PxU32 a = 0; a < 4; ++a)
+ {
+ Px1DConstraint* rows = allRows + numRows;
+ SolverConstraintShaderPrepDesc& shaderDesc = constraintShaderDescs[a];
+ PxSolverConstraintPrepDesc& desc = constraintDescs[a];
+
+ if (!shaderDesc.solverPrep)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+
+ PxMemZero(rows + preppedIndex, sizeof(Px1DConstraint)*(MAX_CONSTRAINT_ROWS));
+ for (PxU32 b = preppedIndex; b < MAX_CONSTRAINT_ROWS; ++b)
+ {
+ Px1DConstraint& c = rows[b];
+ //Px1DConstraintInit(c);
+ c.minImpulse = -PX_MAX_REAL;
+ c.maxImpulse = PX_MAX_REAL;
+ }
+
+ desc.mInvMassScales.linear0 = desc.mInvMassScales.linear1 = desc.mInvMassScales.angular0 = desc.mInvMassScales.angular1 = 1.f;
+
+ desc.body0WorldOffset = PxVec3(0.f);
+
+ PxU32 constraintCount = (*shaderDesc.solverPrep)(rows,
+ desc.body0WorldOffset,
+ MAX_CONSTRAINT_ROWS,
+ desc.mInvMassScales,
+ shaderDesc.constantBlock,
+ desc.bodyFrame0, desc.bodyFrame1);
+
+ preppedIndex = MAX_CONSTRAINT_ROWS - constraintCount;
+
+ maxRows = PxMax(constraintCount, maxRows);
+
+ if (constraintCount == 0)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+
+ desc.rows = rows;
+ desc.numRows = constraintCount;
+ numRows += constraintCount;
+ }
+
+ return setupSolverConstraint4(constraintDescs, dt, recipdt, totalRows, allocator, maxRows);
+}
+
+SolverConstraintPrepState::Enum setupSolverConstraint4
+(PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs,
+const PxReal dt, const PxReal recipdt, PxU32& totalRows,
+PxConstraintAllocator& allocator, PxU32 maxRows)
+{
+ const Vec4V zero = V4Zero();
+ Px1DConstraint* allSorted[MAX_CONSTRAINT_ROWS * 4];
+ PxU32 startIndex[4];
+ PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS * 4];
+ PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS * 4];
+
+ PxU32 numRows = 0;
+
+ for (PxU32 a = 0; a < 4; ++a)
+ {
+ startIndex[a] = numRows;
+ PxSolverConstraintPrepDesc& desc = constraintDescs[a];
+ Px1DConstraint** sorted = allSorted + numRows;
+
+ preprocessRows(sorted, desc.rows, angSqrtInvInertia0 + numRows, angSqrtInvInertia1 + numRows, desc.numRows, *desc.data0, *desc.data1, desc.mInvMassScales,
+ desc.disablePreprocessing, desc.improvedSlerp);
+
+ numRows += desc.numRows;
+ }
+
+
+ PxU32 stride = sizeof(SolverConstraint1DDynamic4);
+
+
+ const PxU32 constraintLength = sizeof(SolverConstraint1DHeader4) + stride * maxRows;
+
+ //KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we
+ //know SPU DMAs have completed)
+ PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u);
+ if(NULL == ptr || (reinterpret_cast<PxU8*>(-1))==ptr)
+ {
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ PxSolverConstraintPrepDesc& desc = constraintDescs[a];
+ desc.desc->constraint = NULL;
+ setConstraintLength(*desc.desc, 0);
+ desc.desc->writeBack = desc.writeback;
+ }
+
+ if(NULL==ptr)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ return SolverConstraintPrepState::eOUT_OF_MEMORY;
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of constraint data. "
+ "Either accept joints detaching/exploding or simplify constraints.");
+ ptr=NULL;
+ return SolverConstraintPrepState::eOUT_OF_MEMORY;
+ }
+ }
+ //desc.constraint = ptr;
+
+ totalRows = numRows;
+
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ PxSolverConstraintPrepDesc& desc = constraintDescs[a];
+ desc.desc->constraint = ptr;
+ setConstraintLength(*desc.desc, constraintLength);
+ desc.desc->writeBack = desc.writeback;
+ }
+
+ const PxReal erp[4] = { 1.0f, 1.0f, 1.0f, 1.0f};
+ //OK, now we build all 4 constraints into a single set of rows
+
+ {
+ PxU8* currPtr = ptr;
+ SolverConstraint1DHeader4* header = reinterpret_cast<SolverConstraint1DHeader4*>(currPtr);
+ currPtr += sizeof(SolverConstraint1DHeader4);
+
+ const PxSolverBodyData& bd00 = *constraintDescs[0].data0;
+ const PxSolverBodyData& bd01 = *constraintDescs[1].data0;
+ const PxSolverBodyData& bd02 = *constraintDescs[2].data0;
+ const PxSolverBodyData& bd03 = *constraintDescs[3].data0;
+
+ const PxSolverBodyData& bd10 = *constraintDescs[0].data1;
+ const PxSolverBodyData& bd11 = *constraintDescs[1].data1;
+ const PxSolverBodyData& bd12 = *constraintDescs[2].data1;
+ const PxSolverBodyData& bd13 = *constraintDescs[3].data1;
+
+ //Load up masses, invInertia, velocity etc.
+
+ const Vec4V invMassScale0 = V4LoadXYZW(constraintDescs[0].mInvMassScales.linear0, constraintDescs[1].mInvMassScales.linear0,
+ constraintDescs[2].mInvMassScales.linear0, constraintDescs[3].mInvMassScales.linear0);
+ const Vec4V invMassScale1 = V4LoadXYZW(constraintDescs[0].mInvMassScales.linear1, constraintDescs[1].mInvMassScales.linear1,
+ constraintDescs[2].mInvMassScales.linear1, constraintDescs[3].mInvMassScales.linear1);
+
+
+ const Vec4V iMass0 = V4LoadXYZW(bd00.invMass, bd01.invMass, bd02.invMass, bd03.invMass);
+
+ const Vec4V iMass1 = V4LoadXYZW(bd10.invMass, bd11.invMass, bd12.invMass, bd13.invMass);
+
+ const Vec4V invMass0 = V4Mul(iMass0, invMassScale0);
+ const Vec4V invMass1 = V4Mul(iMass1, invMassScale1);
+
+
+ const Vec4V invInertiaScale0 = V4LoadXYZW(constraintDescs[0].mInvMassScales.angular0, constraintDescs[1].mInvMassScales.angular0,
+ constraintDescs[2].mInvMassScales.angular0, constraintDescs[3].mInvMassScales.angular0);
+ const Vec4V invInertiaScale1 = V4LoadXYZW(constraintDescs[0].mInvMassScales.angular1, constraintDescs[1].mInvMassScales.angular1,
+ constraintDescs[2].mInvMassScales.angular1, constraintDescs[3].mInvMassScales.angular1);
+
+ //Velocities
+ Vec4V linVel00 = V4LoadA(&bd00.linearVelocity.x);
+ Vec4V linVel01 = V4LoadA(&bd10.linearVelocity.x);
+ Vec4V angVel00 = V4LoadA(&bd00.angularVelocity.x);
+ Vec4V angVel01 = V4LoadA(&bd10.angularVelocity.x);
+
+ Vec4V linVel10 = V4LoadA(&bd01.linearVelocity.x);
+ Vec4V linVel11 = V4LoadA(&bd11.linearVelocity.x);
+ Vec4V angVel10 = V4LoadA(&bd01.angularVelocity.x);
+ Vec4V angVel11 = V4LoadA(&bd11.angularVelocity.x);
+
+ Vec4V linVel20 = V4LoadA(&bd02.linearVelocity.x);
+ Vec4V linVel21 = V4LoadA(&bd12.linearVelocity.x);
+ Vec4V angVel20 = V4LoadA(&bd02.angularVelocity.x);
+ Vec4V angVel21 = V4LoadA(&bd12.angularVelocity.x);
+
+ Vec4V linVel30 = V4LoadA(&bd03.linearVelocity.x);
+ Vec4V linVel31 = V4LoadA(&bd13.linearVelocity.x);
+ Vec4V angVel30 = V4LoadA(&bd03.angularVelocity.x);
+ Vec4V angVel31 = V4LoadA(&bd13.angularVelocity.x);
+
+
+ Vec4V linVel0T0, linVel0T1, linVel0T2;
+ Vec4V linVel1T0, linVel1T1, linVel1T2;
+ Vec4V angVel0T0, angVel0T1, angVel0T2;
+ Vec4V angVel1T0, angVel1T1, angVel1T2;
+
+
+ PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2);
+ PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2);
+ PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVel0T0, angVel0T1, angVel0T2);
+ PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVel1T0, angVel1T1, angVel1T2);
+
+
+
+ //body world offsets
+ Vec4V workOffset0 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[0].body0WorldOffset));
+ Vec4V workOffset1 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[1].body0WorldOffset));
+ Vec4V workOffset2 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[2].body0WorldOffset));
+ Vec4V workOffset3 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[3].body0WorldOffset));
+
+ Vec4V workOffsetX, workOffsetY, workOffsetZ;
+
+ PX_TRANSPOSE_44_34(workOffset0, workOffset1, workOffset2, workOffset3, workOffsetX, workOffsetY, workOffsetZ);
+
+ const FloatV dtV = FLoad(dt);
+ Vec4V linBreakForce = V4LoadXYZW(constraintDescs[0].linBreakForce, constraintDescs[1].linBreakForce,
+ constraintDescs[2].linBreakForce, constraintDescs[3].linBreakForce);
+ Vec4V angBreakForce = V4LoadXYZW(constraintDescs[0].angBreakForce, constraintDescs[1].angBreakForce,
+ constraintDescs[2].angBreakForce, constraintDescs[3].angBreakForce);
+
+
+ header->break0 = PxU8((constraintDescs[0].linBreakForce != PX_MAX_F32) || (constraintDescs[0].angBreakForce != PX_MAX_F32));
+ header->break1 = PxU8((constraintDescs[1].linBreakForce != PX_MAX_F32) || (constraintDescs[1].angBreakForce != PX_MAX_F32));
+ header->break2 = PxU8((constraintDescs[2].linBreakForce != PX_MAX_F32) || (constraintDescs[2].angBreakForce != PX_MAX_F32));
+ header->break3 = PxU8((constraintDescs[3].linBreakForce != PX_MAX_F32) || (constraintDescs[3].angBreakForce != PX_MAX_F32));
+
+
+ //OK, I think that's everything loaded in
+
+ header->invMass0D0 = invMass0;
+ header->invMass1D1 = invMass1;
+ header->angD0 = invInertiaScale0;
+ header->angD1 = invInertiaScale1;
+ header->body0WorkOffsetX = workOffsetX;
+ header->body0WorkOffsetY = workOffsetY;
+ header->body0WorkOffsetZ = workOffsetZ;
+
+ header->count = maxRows;
+ header->type = DY_SC_TYPE_BLOCK_1D;
+ header->linBreakImpulse = V4Scale(linBreakForce, dtV);
+ header->angBreakImpulse = V4Scale(angBreakForce, dtV);
+ header->count0 = Ps::to8(constraintDescs[0].numRows);
+ header->count1 = Ps::to8(constraintDescs[1].numRows);
+ header->count2 = Ps::to8(constraintDescs[2].numRows);
+ header->count3 = Ps::to8(constraintDescs[3].numRows);
+
+ //Now we loop over the constraints and build the results...
+
+ PxU32 index0 = 0;
+ PxU32 endIndex0 = constraintDescs[0].numRows - 1;
+ PxU32 index1 = startIndex[1];
+ PxU32 endIndex1 = index1 + constraintDescs[1].numRows - 1;
+ PxU32 index2 = startIndex[2];
+ PxU32 endIndex2 = index2 + constraintDescs[2].numRows - 1;
+ PxU32 index3 = startIndex[3];
+ PxU32 endIndex3 = index3 + constraintDescs[3].numRows - 1;
+
+ const FloatV one = FOne();
+
+ for(PxU32 a = 0; a < maxRows; ++a)
+ {
+ SolverConstraint1DDynamic4* c = reinterpret_cast<SolverConstraint1DDynamic4*>(currPtr);
+ currPtr += stride;
+
+ Px1DConstraint* con0 = allSorted[index0];
+ Px1DConstraint* con1 = allSorted[index1];
+ Px1DConstraint* con2 = allSorted[index2];
+ Px1DConstraint* con3 = allSorted[index3];
+
+ Vec4V cangDelta00 = V4LoadA(&angSqrtInvInertia0[index0].x);
+ Vec4V cangDelta01 = V4LoadA(&angSqrtInvInertia0[index1].x);
+ Vec4V cangDelta02 = V4LoadA(&angSqrtInvInertia0[index2].x);
+ Vec4V cangDelta03 = V4LoadA(&angSqrtInvInertia0[index3].x);
+
+ Vec4V cangDelta10 = V4LoadA(&angSqrtInvInertia1[index0].x);
+ Vec4V cangDelta11 = V4LoadA(&angSqrtInvInertia1[index1].x);
+ Vec4V cangDelta12 = V4LoadA(&angSqrtInvInertia1[index2].x);
+ Vec4V cangDelta13 = V4LoadA(&angSqrtInvInertia1[index3].x);
+
+ index0 = index0 == endIndex0 ? index0 : index0 + 1;
+ index1 = index1 == endIndex1 ? index1 : index1 + 1;
+ index2 = index2 == endIndex2 ? index2 : index2 + 1;
+ index3 = index3 == endIndex3 ? index3 : index3 + 1;
+
+ Vec4V driveScale = V4Splat(one);
+ if (con0->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[0].driveLimitsAreForces)
+ driveScale = V4SetX(driveScale, FMin(one, dtV));
+ if (con1->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[1].driveLimitsAreForces)
+ driveScale = V4SetY(driveScale, FMin(one, dtV));
+ if (con2->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[2].driveLimitsAreForces)
+ driveScale = V4SetZ(driveScale, FMin(one, dtV));
+ if (con3->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[3].driveLimitsAreForces)
+ driveScale = V4SetW(driveScale, FMin(one, dtV));
+
+
+ Vec4V clin00 = V4LoadA(&con0->linear0.x);
+ Vec4V clin01 = V4LoadA(&con1->linear0.x);
+ Vec4V clin02 = V4LoadA(&con2->linear0.x);
+ Vec4V clin03 = V4LoadA(&con3->linear0.x);
+
+ Vec4V cang00 = V4LoadA(&con0->angular0.x);
+ Vec4V cang01 = V4LoadA(&con1->angular0.x);
+ Vec4V cang02 = V4LoadA(&con2->angular0.x);
+ Vec4V cang03 = V4LoadA(&con3->angular0.x);
+
+ Vec4V clin0X, clin0Y, clin0Z;
+ Vec4V cang0X, cang0Y, cang0Z;
+
+ PX_TRANSPOSE_44_34(clin00, clin01, clin02, clin03, clin0X, clin0Y, clin0Z);
+ PX_TRANSPOSE_44_34(cang00, cang01, cang02, cang03, cang0X, cang0Y, cang0Z);
+
+ const Vec4V maxImpulse = V4LoadXYZW(con0->maxImpulse, con1->maxImpulse, con2->maxImpulse, con3->maxImpulse);
+ const Vec4V minImpulse = V4LoadXYZW(con0->minImpulse, con1->minImpulse, con2->minImpulse, con3->minImpulse);
+
+ Vec4V angDelta0X, angDelta0Y, angDelta0Z;
+
+ PX_TRANSPOSE_44_34(cangDelta00, cangDelta01, cangDelta02, cangDelta03, angDelta0X, angDelta0Y, angDelta0Z);
+
+ c->flags[0] = 0;
+ c->flags[1] = 0;
+ c->flags[2] = 0;
+ c->flags[3] = 0;
+
+ c->lin0X = clin0X;
+ c->lin0Y = clin0Y;
+ c->lin0Z = clin0Z;
+ c->ang0X = angDelta0X;
+ c->ang0Y = angDelta0Y;
+ c->ang0Z = angDelta0Z;
+ c->ang0WritebackX = cang0X;
+ c->ang0WritebackY = cang0Y;
+ c->ang0WritebackZ = cang0Z;
+
+ c->minImpulse = V4Mul(minImpulse, driveScale);
+ c->maxImpulse = V4Mul(maxImpulse, driveScale);
+ c->appliedForce = zero;
+
+ const Vec4V lin0MagSq = V4MulAdd(clin0Z, clin0Z, V4MulAdd(clin0Y, clin0Y, V4Mul(clin0X, clin0X)));
+ const Vec4V cang0DotAngDelta = V4MulAdd(angDelta0Z, angDelta0Z, V4MulAdd(angDelta0Y, angDelta0Y, V4Mul(angDelta0X, angDelta0X)));
+ c->flags[0] = 0;
+ c->flags[1] = 0;
+ c->flags[2] = 0;
+ c->flags[3] = 0;
+
+ Vec4V unitResponse = V4MulAdd(lin0MagSq, invMass0, V4Mul(cang0DotAngDelta, invInertiaScale0));
+
+ Vec4V clin10 = V4LoadA(&con0->linear1.x);
+ Vec4V clin11 = V4LoadA(&con1->linear1.x);
+ Vec4V clin12 = V4LoadA(&con2->linear1.x);
+ Vec4V clin13 = V4LoadA(&con3->linear1.x);
+
+ Vec4V cang10 = V4LoadA(&con0->angular1.x);
+ Vec4V cang11 = V4LoadA(&con1->angular1.x);
+ Vec4V cang12 = V4LoadA(&con2->angular1.x);
+ Vec4V cang13 = V4LoadA(&con3->angular1.x);
+
+ Vec4V clin1X, clin1Y, clin1Z;
+ Vec4V cang1X, cang1Y, cang1Z;
+ PX_TRANSPOSE_44_34(clin10, clin11, clin12, clin13, clin1X, clin1Y, clin1Z);
+ PX_TRANSPOSE_44_34(cang10, cang11, cang12, cang13, cang1X, cang1Y, cang1Z);
+
+ Vec4V angDelta1X, angDelta1Y, angDelta1Z;
+
+ PX_TRANSPOSE_44_34(cangDelta10, cangDelta11, cangDelta12, cangDelta13, angDelta1X, angDelta1Y, angDelta1Z);
+
+ const Vec4V lin1MagSq = V4MulAdd(clin1Z, clin1Z, V4MulAdd(clin1Y, clin1Y, V4Mul(clin1X, clin1X)));
+ const Vec4V cang1DotAngDelta = V4MulAdd(angDelta1Z, angDelta1Z, V4MulAdd(angDelta1Y, angDelta1Y, V4Mul(angDelta1X, angDelta1X)));
+
+ c->lin1X = clin1X;
+ c->lin1Y = clin1Y;
+ c->lin1Z = clin1Z;
+
+ c->ang1X = angDelta1X;
+ c->ang1Y = angDelta1Y;
+ c->ang1Z = angDelta1Z;
+
+ unitResponse = V4Add(unitResponse, V4MulAdd(lin1MagSq, invMass1, V4Mul(cang1DotAngDelta, invInertiaScale1)));
+
+ Vec4V linProj0(V4Mul(clin0X, linVel0T0));
+ Vec4V linProj1(V4Mul(clin1X, linVel1T0));
+ Vec4V angProj0(V4Mul(cang0X, angVel0T0));
+ Vec4V angProj1(V4Mul(cang1X, angVel1T0));
+
+ linProj0 = V4MulAdd(clin0Y, linVel0T1, linProj0);
+ linProj1 = V4MulAdd(clin1Y, linVel1T1, linProj1);
+ angProj0 = V4MulAdd(cang0Y, angVel0T1, angProj0);
+ angProj1 = V4MulAdd(cang1Y, angVel1T1, angProj1);
+
+ linProj0 = V4MulAdd(clin0Z, linVel0T2, linProj0);
+ linProj1 = V4MulAdd(clin1Z, linVel1T2, linProj1);
+ angProj0 = V4MulAdd(cang0Z, angVel0T2, angProj0);
+ angProj1 = V4MulAdd(cang1Z, angVel1T2, angProj1);
+
+ const Vec4V projectVel0 = V4Add(linProj0, angProj0);
+ const Vec4V projectVel1 = V4Add(linProj1, angProj1);
+
+ const Vec4V normalVel = V4Sub(projectVel0, projectVel1);
+
+
+ {
+ const PxVec4& ur = reinterpret_cast<const PxVec4&>(unitResponse);
+ PxVec4& cConstant = reinterpret_cast<PxVec4&>(c->constant);
+ PxVec4& cUnbiasedConstant = reinterpret_cast<PxVec4&>(c->unbiasedConstant);
+ PxVec4& cVelMultiplier = reinterpret_cast<PxVec4&>(c->velMultiplier);
+ PxVec4& cImpulseMultiplier = reinterpret_cast<PxVec4&>(c->impulseMultiplier);
+
+ setConstants(cConstant.x, cUnbiasedConstant.x, cVelMultiplier.x, cImpulseMultiplier.x,
+ *con0, ur.x, constraintDescs[0].minResponseThreshold, erp[0], dt, recipdt,
+ *constraintDescs[0].data0, *constraintDescs[0].data1, a >= constraintDescs[0].numRows);
+
+ setConstants(cConstant.y, cUnbiasedConstant.y, cVelMultiplier.y, cImpulseMultiplier.y,
+ *con1, ur.y, constraintDescs[1].minResponseThreshold, erp[1], dt, recipdt,
+ *constraintDescs[1].data0, *constraintDescs[1].data1, a >= constraintDescs[1].numRows);
+
+ setConstants(cConstant.z, cUnbiasedConstant.z, cVelMultiplier.z, cImpulseMultiplier.z,
+ *con2, ur.z, constraintDescs[2].minResponseThreshold, erp[2], dt, recipdt,
+ *constraintDescs[2].data0, *constraintDescs[2].data1, a >= constraintDescs[2].numRows);
+
+ setConstants(cConstant.w, cUnbiasedConstant.w, cVelMultiplier.w, cImpulseMultiplier.w,
+ *con3, ur.w, constraintDescs[3].minResponseThreshold, erp[3], dt, recipdt,
+ *constraintDescs[3].data0, *constraintDescs[3].data1, a >= constraintDescs[3].numRows);
+ }
+
+ const Vec4V velBias = V4Mul(c->velMultiplier, normalVel);
+ c->constant = V4Add(c->constant, velBias);
+ c->unbiasedConstant = V4Add(c->unbiasedConstant, velBias);
+
+ if(con0->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+ c->flags[0] |= DY_SC_FLAG_OUTPUT_FORCE;
+ if(con1->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+ c->flags[1] |= DY_SC_FLAG_OUTPUT_FORCE;
+ if(con2->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+ c->flags[2] |= DY_SC_FLAG_OUTPUT_FORCE;
+ if(con3->flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+ c->flags[3] |= DY_SC_FLAG_OUTPUT_FORCE;
+ }
+ *(reinterpret_cast<PxU32*>(currPtr)) = 0;
+ *(reinterpret_cast<PxU32*>(currPtr + 4)) = 0;
+ }
+
+ //OK, we're ready to allocate and solve prep these constraints now :-)
+ return SolverConstraintPrepState::eSUCCESS;
+}
+
+}
+
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.cpp
new file mode 100644
index 00000000..1e21f1e3
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.cpp
@@ -0,0 +1,725 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxPreprocessor.h"
+#include "PxSceneDesc.h"
+#include "PsVecMath.h"
+#include "PsMathUtils.h"
+#include "DySolverContact.h"
+#include "DySolverContact4.h"
+#include "DySolverConstraintTypes.h"
+#include "PxcNpWorkUnit.h"
+#include "DyThreadContext.h"
+#include "DyContactPrep.h"
+#include "PxcNpContactPrepShared.h"
+#include "PxvDynamics.h"
+#include "DyCorrelationBuffer.h"
+#include "DyDynamics.h"
+#include "DyArticulationContactPrep.h"
+#include "PxsContactManager.h"
+#include "PsFoundation.h"
+
+using namespace physx;
+using namespace Gu;
+
+
+#include "PsVecMath.h"
+#include "PxContactModifyCallback.h"
+#include "PxsMaterialManager.h"
+#include "PxsMaterialCombiner.h"
+#include "DyContactPrepShared.h"
+
+using namespace Ps::aos;
+
+namespace physx
+{
+namespace Dy
+{
+
+PxcCreateFinalizeSolverContactMethod createFinalizeMethods[3] =
+{
+ createFinalizeSolverContacts,
+ createFinalizeSolverContactsCoulomb1D,
+ createFinalizeSolverContactsCoulomb2D
+};
+
+
+
+static void setupFinalizeSolverConstraints(Sc::ShapeInteraction* shapeInteraction,
+ const ContactPoint* buffer,
+ const CorrelationBuffer& c,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxU8* workspace,
+ const PxSolverBodyData& data0,
+ const PxSolverBodyData& data1,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal invMassScale0, PxReal invInertiaScale0,
+ PxReal invMassScale1, PxReal invInertiaScale1,
+ bool hasForceThreshold, bool staticOrKinematicBody,
+ const PxReal restDist, PxU8* frictionDataPtr,
+ const PxReal maxCCDSeparation)
+{
+ // NOTE II: the friction patches are sparse (some of them have no contact patches, and
+ // therefore did not get written back to the cache) but the patch addresses are dense,
+ // corresponding to valid patches
+
+ const FloatV ccdMaxSeparation = FLoad(maxCCDSeparation);
+
+ PxU8 flags = PxU8(hasForceThreshold ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0);
+
+ PxU8* PX_RESTRICT ptr = workspace;
+
+ PxU8 type = Ps::to8(staticOrKinematicBody ? DY_SC_TYPE_STATIC_CONTACT
+ : DY_SC_TYPE_RB_CONTACT);
+
+ const FloatV zero=FZero();
+
+ const FloatV d0 = FLoad(invMassScale0);
+ const FloatV d1 = FLoad(invMassScale1);
+ const FloatV angD0 = FLoad(invInertiaScale0);
+ const FloatV angD1 = FLoad(invInertiaScale1);
+
+ const FloatV nDom1fV = FNeg(d1);
+
+ const FloatV invMass0 = FLoad(data0.invMass);
+ const FloatV invMass1 = FLoad(data1.invMass);
+
+ const FloatV invMass0_dom0fV = FMul(d0, invMass0);
+ const FloatV invMass1_dom1fV = FMul(nDom1fV, invMass1);
+
+
+ Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero();
+ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, invMass0_dom0fV);
+ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, invMass1_dom1fV);
+
+ const FloatV restDistance = FLoad(restDist);
+
+ const FloatV maxPenBias = FMax(FLoad(data0.penBiasClamp), FLoad(data1.penBiasClamp));
+
+ const QuatV bodyFrame0q = QuatVLoadU(&bodyFrame0.q.x);
+ const Vec3V bodyFrame0p = V3LoadU(bodyFrame0.p);
+
+ const QuatV bodyFrame1q = QuatVLoadU(&bodyFrame1.q.x);
+ const Vec3V bodyFrame1p = V3LoadU(bodyFrame1.p);
+
+ PxU32 frictionPatchWritebackAddrIndex = 0;
+ PxU32 contactWritebackCount = 0;
+
+ Ps::prefetchLine(c.contactID);
+ Ps::prefetchLine(c.contactID, 128);
+
+ const Vec3V linVel0 = V3LoadU_SafeReadW(data0.linearVelocity); // PT: safe because 'invMass' follows 'initialLinVel' in PxSolverBodyData
+ const Vec3V linVel1 = V3LoadU_SafeReadW(data1.linearVelocity); // PT: safe because 'invMass' follows 'initialLinVel' in PxSolverBodyData
+ const Vec3V angVel0 = V3LoadU_SafeReadW(data0.angularVelocity); // PT: safe because 'reportThreshold' follows 'initialAngVel' in PxSolverBodyData
+ const Vec3V angVel1 = V3LoadU_SafeReadW(data1.angularVelocity); // PT: safe because 'reportThreshold' follows 'initialAngVel' in PxSolverBodyData
+
+ PX_ALIGN(16, const Mat33V invSqrtInertia0)
+ (
+ V3LoadU_SafeReadW(data0.sqrtInvInertia.column0), // PT: safe because 'column1' follows 'column0' in PxMat33
+ V3LoadU_SafeReadW(data0.sqrtInvInertia.column1), // PT: safe because 'column2' follows 'column1' in PxMat33
+ V3LoadU(data0.sqrtInvInertia.column2)
+ );
+
+ PX_ALIGN(16, const Mat33V invSqrtInertia1)
+ (
+ V3LoadU_SafeReadW(data1.sqrtInvInertia.column0), // PT: safe because 'column1' follows 'column0' in PxMat33
+ V3LoadU_SafeReadW(data1.sqrtInvInertia.column1), // PT: safe because 'column2' follows 'column1' in PxMat33
+ V3LoadU(data1.sqrtInvInertia.column2)
+ );
+
+ const FloatV invDt = FLoad(invDtF32);
+ const FloatV p8 = FLoad(0.8f);
+ const FloatV bounceThreshold = FLoad(bounceThresholdF32);
+
+ const FloatV invDtp8 = FMul(invDt, p8);
+
+
+ for(PxU32 i=0;i<c.frictionPatchCount;i++)
+ {
+ PxU32 contactCount = c.frictionPatchContactCounts[i];
+ if(contactCount == 0)
+ continue;
+
+ const FrictionPatch& frictionPatch = c.frictionPatches[i];
+ PX_ASSERT(frictionPatch.anchorCount <= 2);
+
+ PxU32 firstPatch = c.correlationListHeads[i];
+ const Gu::ContactPoint* contactBase0 = buffer + c.contactPatches[firstPatch].start;
+
+ const PxReal combinedRestitution = contactBase0->restitution;
+
+ SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr);
+ ptr += sizeof(SolverContactHeader);
+
+
+ Ps::prefetchLine(ptr, 128);
+ Ps::prefetchLine(ptr, 256);
+
+ header->shapeInteraction = shapeInteraction;
+ header->flags = flags;
+ FStore(invMass0_dom0fV, &header->invMass0);
+ FStore(FNeg(invMass1_dom1fV), &header->invMass1);
+ const FloatV restitution = FLoad(combinedRestitution);
+
+ PxU32 pointStride = sizeof(SolverContactPoint);
+ PxU32 frictionStride = sizeof(SolverContactFriction);
+
+ const Vec3V normal = V3LoadA(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal);
+ const FloatV normalLenSq = V3LengthSq(normal);
+ const VecCrossV norCross = V3PrepareCross(normal);
+ const FloatV norVel = V3SumElems(V3NegMulSub(normal, linVel1, V3Mul(normal, linVel0)));
+
+ const FloatV invMassNorLenSq0 = FMul(invMass0_dom0fV, normalLenSq);
+ const FloatV invMassNorLenSq1 = FMul(invMass1_dom1fV, normalLenSq);
+
+ header->normal = normal;
+
+ for(PxU32 patch=c.correlationListHeads[i];
+ patch!=CorrelationBuffer::LIST_END;
+ patch = c.contactPatches[patch].next)
+ {
+ const PxU32 count = c.contactPatches[patch].count;
+ const Gu::ContactPoint* contactBase = buffer + c.contactPatches[patch].start;
+
+ PxU8* p = ptr;
+
+ for(PxU32 j=0;j<count;j++)
+ {
+ Ps::prefetchLine(p, 256);
+ const Gu::ContactPoint& contact = contactBase[j];
+
+ SolverContactPoint* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPoint*>(p);
+ p += pointStride;
+
+ constructContactConstraint(invSqrtInertia0, invSqrtInertia1, invMassNorLenSq0,
+ invMassNorLenSq1, angD0, angD1, bodyFrame0p, bodyFrame1p,
+ normal, norVel, norCross, angVel0, angVel1,
+ invDt, invDtp8, restDistance, maxPenBias, restitution,
+ bounceThreshold, contact, *solverContact,
+ ccdMaxSeparation);
+ }
+
+ ptr = p;
+ }
+ contactWritebackCount += contactCount;
+
+ PxF32* forceBuffers = reinterpret_cast<PxF32*>(ptr);
+ PxMemZero(forceBuffers, sizeof(PxF32) * contactCount);
+ ptr += ((contactCount + 3) & (~3)) * sizeof(PxF32); // jump to next 16-byte boundary
+
+ const PxReal staticFriction = contactBase0->staticFriction;
+ const PxReal dynamicFriction = contactBase0->dynamicFriction;
+ const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
+ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction));
+ staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction));
+
+ const bool haveFriction = (disableStrongFriction == 0 && frictionPatch.anchorCount != 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;
+ header->numNormalConstr = Ps::to8(contactCount);
+ header->numFrictionConstr = Ps::to8(haveFriction ? frictionPatch.anchorCount*2 : 0);
+
+ header->type = type;
+
+ header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W;
+ FStore(angD0, &header->angDom0);
+ FStore(angD1, &header->angDom1);
+
+ header->broken = 0;
+
+ if(haveFriction)
+ {
+ const Vec3V linVrel = V3Sub(linVel0, linVel1);
+ //const Vec3V normal = Vec3V_From_PxVec3_Aligned(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
+
+ const FloatV orthoThreshold = FLoad(0.70710678f);
+ const FloatV p1 = FLoad(0.1f);
+ // fallback: normal.cross((1,0,0)) or normal.cross((0,0,1))
+ const FloatV normalX = V3GetX(normal);
+ const FloatV normalY = V3GetY(normal);
+ const FloatV normalZ = V3GetZ(normal);
+
+ Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY);
+ Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero) ;
+ Vec3V t0Fallback = V3Sel(FIsGrtr(orthoThreshold, FAbs(normalX)), t0Fallback1, t0Fallback2);
+
+ Vec3V t0 = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel)));
+ t0 = V3Sel(FIsGrtr(V3LengthSq(t0), p1), t0, t0Fallback);
+ t0 = V3Normalize(t0);
+
+ const VecCrossV t0Cross = V3PrepareCross(t0);
+
+ const Vec3V t1 = V3Cross(norCross, t0Cross);
+ const VecCrossV t1Cross = V3PrepareCross(t1);
+
+
+ // since we don't even have the body velocities we can't compute the tangent dirs, so
+ // the only thing we can do right now is to write the geometric information (which is the
+ // same for both axis constraints of an anchor) We put ra in the raXn field, rb in the rbXn
+ // field, and the error in the normal field. See corresponding comments in
+ // completeContactFriction()
+
+ //We want to set the writeBack ptr to point to the broken flag of the friction patch.
+ //On spu we have a slight problem here because the friction patch array is
+ //in local store rather than in main memory. The good news is that the address of the friction
+ //patch array in main memory is stored in the work unit. These two addresses will be equal
+ //except on spu where one is local store memory and the other is the effective address in main memory.
+ //Using the value stored in the work unit guarantees that the main memory address is used on all platforms.
+ PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch);
+
+ header->frictionBrokenWritebackByte = writeback;
+
+ for(PxU32 j = 0; j < frictionPatch.anchorCount; j++)
+ {
+ Ps::prefetchLine(ptr, 256);
+ Ps::prefetchLine(ptr, 384);
+ SolverContactFriction* PX_RESTRICT f0 = reinterpret_cast<SolverContactFriction*>(ptr);
+ ptr += frictionStride;
+ SolverContactFriction* PX_RESTRICT f1 = reinterpret_cast<SolverContactFriction*>(ptr);
+ ptr += frictionStride;
+
+ Vec3V body0Anchor = V3LoadU(frictionPatch.body0Anchors[j]);
+ Vec3V body1Anchor = V3LoadU(frictionPatch.body1Anchors[j]);
+
+ Vec3V ra = QuatRotate(bodyFrame0q, body0Anchor);
+ Vec3V rb = QuatRotate(bodyFrame1q, body1Anchor);
+ Vec3V error =V3Sub(V3Add(ra, bodyFrame0p), V3Add(rb, bodyFrame1p));
+
+ const PxU32 index = c.contactPatches[c.correlationListHeads[i]].start;
+ const Vec3V tvel = V3LoadA(buffer[index].targetVel);
+
+ {
+ const Vec3V raXn = V3Cross(ra, t0Cross);
+ const Vec3V rbXn = V3Cross(rb, t0Cross);
+
+ const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
+ const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);
+
+
+ const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(raXnSqrtInertia, raXnSqrtInertia)));
+ const FloatV resp1 = FSub(FMul(angD1, V3Dot(rbXnSqrtInertia, rbXnSqrtInertia)), invMass1_dom1fV);
+ const FloatV resp = FAdd(resp0, resp1);
+
+ const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FDiv(p8, resp), zero);
+
+ FloatV targetVel = V3Dot(tvel, t0);
+
+ const FloatV vrel1 = FAdd(V3Dot(t0, linVel0), V3Dot(raXn, angVel0));
+ const FloatV vrel2 = FAdd(V3Dot(t0, linVel1), V3Dot(rbXn, angVel1));
+ const FloatV vrel = FSub(vrel1, vrel2);
+
+ targetVel = FSub(targetVel, vrel);
+
+ f0->normalXYZ_appliedForceW = V4SetW(t0, zero);
+ f0->raXnXYZ_velMultiplierW = V4SetW(raXnSqrtInertia, velMultiplier);
+ f0->rbXnXYZ_biasW = V4SetW(rbXnSqrtInertia, FMul(V3Dot(t0, error), invDt));
+ FStore(targetVel, &f0->targetVel);
+ }
+
+ {
+
+ const Vec3V raXn = V3Cross(ra, t1Cross);
+ const Vec3V rbXn = V3Cross(rb, t1Cross);
+
+ const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
+ const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);
+
+ const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(raXnSqrtInertia, raXnSqrtInertia)));
+ const FloatV resp1 = FSub(FMul(angD1, V3Dot(rbXnSqrtInertia, rbXnSqrtInertia)), invMass1_dom1fV);
+ const FloatV resp = FAdd(resp0, resp1);
+
+ const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FDiv(p8, resp), zero);
+
+ FloatV targetVel = V3Dot(tvel, t1);
+
+ const FloatV vrel1 = FAdd(V3Dot(t1, linVel0), V3Dot(raXn, angVel0));
+ const FloatV vrel2 = FAdd(V3Dot(t1, linVel1), V3Dot(rbXn, angVel1));
+ const FloatV vrel = FSub(vrel1, vrel2);
+
+ targetVel = FSub(targetVel, vrel);
+
+ f1->normalXYZ_appliedForceW = V4SetW(t1, zero);
+ f1->raXnXYZ_velMultiplierW = V4SetW(raXnSqrtInertia, velMultiplier);
+ f1->rbXnXYZ_biasW = V4SetW(rbXnSqrtInertia, FMul(V3Dot(t1, error), invDt));
+ FStore(targetVel, &f1->targetVel);
+ }
+ }
+ }
+
+ frictionPatchWritebackAddrIndex++;
+ }
+}
+
+
+PX_FORCE_INLINE void computeBlockStreamByteSizes(const bool useExtContacts, const CorrelationBuffer& c,
+ PxU32& _solverConstraintByteSize, PxU32& _frictionPatchByteSize, PxU32& _numFrictionPatches,
+ PxU32& _axisConstraintCount)
+{
+ PX_ASSERT(0 == _solverConstraintByteSize);
+ PX_ASSERT(0 == _frictionPatchByteSize);
+ PX_ASSERT(0 == _numFrictionPatches);
+ PX_ASSERT(0 == _axisConstraintCount);
+
+ // PT: use local vars to remove LHS
+ PxU32 solverConstraintByteSize = 0;
+ PxU32 numFrictionPatches = 0;
+ PxU32 axisConstraintCount = 0;
+
+
+ for(PxU32 i = 0; i < c.frictionPatchCount; i++)
+ {
+ //Friction patches.
+ if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END)
+ numFrictionPatches++;
+
+ const FrictionPatch& frictionPatch = c.frictionPatches[i];
+
+ const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0;
+
+ //Solver constraint data.
+ if(c.frictionPatchContactCounts[i]!=0)
+ {
+ solverConstraintByteSize += sizeof(SolverContactHeader);
+ solverConstraintByteSize += useExtContacts ? c.frictionPatchContactCounts[i] * sizeof(SolverContactPointExt)
+ : c.frictionPatchContactCounts[i] * sizeof(SolverContactPoint);
+ solverConstraintByteSize += sizeof(PxF32) * ((c.frictionPatchContactCounts[i] + 3)&(~3)); //Add on space for applied impulses
+
+ axisConstraintCount += c.frictionPatchContactCounts[i];
+
+ if(haveFriction)
+ {
+ solverConstraintByteSize += useExtContacts ? c.frictionPatches[i].anchorCount * 2 * sizeof(SolverContactFrictionExt)
+ : c.frictionPatches[i].anchorCount * 2 * sizeof(SolverContactFriction);
+ axisConstraintCount += c.frictionPatches[i].anchorCount * 2;
+
+ }
+ }
+ }
+ PxU32 frictionPatchByteSize = numFrictionPatches*sizeof(FrictionPatch);
+
+ _numFrictionPatches = numFrictionPatches;
+ _axisConstraintCount = axisConstraintCount;
+
+ //16-byte alignment.
+ _frictionPatchByteSize = ((frictionPatchByteSize + 0x0f) & ~0x0f);
+ _solverConstraintByteSize = ((solverConstraintByteSize + 0x0f) & ~0x0f);
+ PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f));
+ PX_ASSERT(0 == (_frictionPatchByteSize & 0x0f));
+}
+
+static bool reserveBlockStreams(const bool useExtContacts, Dy::CorrelationBuffer& cBuffer,
+ PxU8*& solverConstraint,
+ FrictionPatch*& _frictionPatches,
+ PxU32& numFrictionPatches, PxU32& solverConstraintByteSize,
+ PxU32& axisConstraintCount, PxConstraintAllocator& constraintAllocator)
+{
+ PX_ASSERT(NULL == solverConstraint);
+ PX_ASSERT(NULL == _frictionPatches);
+ PX_ASSERT(0 == numFrictionPatches);
+ PX_ASSERT(0 == solverConstraintByteSize);
+ PX_ASSERT(0 == axisConstraintCount);
+
+ //From frictionPatchStream we just need to reserve a single buffer.
+ PxU32 frictionPatchByteSize = 0;
+ //Compute the sizes of all the buffers.
+ computeBlockStreamByteSizes(
+ useExtContacts, cBuffer,
+ solverConstraintByteSize, frictionPatchByteSize, numFrictionPatches,
+ axisConstraintCount);
+
+ //Reserve the buffers.
+
+ //First reserve the accumulated buffer size for the constraint block.
+ PxU8* constraintBlock = NULL;
+ const PxU32 constraintBlockByteSize = solverConstraintByteSize;
+ if(constraintBlockByteSize > 0)
+ {
+ constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u);
+
+ if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock)
+ {
+ if(0==constraintBlock)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. "
+ "Either accept dropped contacts or simplify collision geometry.");
+ constraintBlock=NULL;
+ }
+ }
+ }
+
+ FrictionPatch* frictionPatches = NULL;
+ //If the constraint block reservation didn't fail then reserve the friction buffer too.
+ if(frictionPatchByteSize >0 && (0==constraintBlockByteSize || constraintBlock))
+ {
+ frictionPatches = reinterpret_cast<FrictionPatch*>(constraintAllocator.reserveFrictionData(frictionPatchByteSize));
+
+ if(0==frictionPatches || (reinterpret_cast<FrictionPatch*>(-1))==frictionPatches)
+ {
+ if(0==frictionPatches)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of friction data for a single contact pair in constraint prep. "
+ "Either accept dropped contacts or simplify collision geometry.");
+ frictionPatches=NULL;
+ }
+ }
+ }
+
+ _frictionPatches = frictionPatches;
+
+ //Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail).
+ if(0==constraintBlockByteSize || constraintBlock)
+ {
+ if(solverConstraintByteSize)
+ {
+ solverConstraint = constraintBlock;
+ PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f));
+ }
+ }
+
+ //Return true if neither of the two block reservations failed.
+ return ((0==constraintBlockByteSize || constraintBlock) && (0==frictionPatchByteSize || frictionPatches));
+}
+
+
+bool createFinalizeSolverContacts(
+ PxSolverContactDesc& contactDesc,
+ CorrelationBuffer& c,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator)
+{
+ Ps::prefetchLine(contactDesc.body0);
+ Ps::prefetchLine(contactDesc.body1);
+ Ps::prefetchLine(contactDesc.data0);
+ Ps::prefetchLine(contactDesc.data1);
+
+ c.frictionPatchCount = 0;
+ c.contactPatchCount = 0;
+
+ const bool hasForceThreshold = contactDesc.hasForceThresholds;
+ const bool staticOrKinematicBody = contactDesc.bodyState1 == PxSolverContactDesc::eKINEMATIC_BODY || contactDesc.bodyState1 == PxSolverContactDesc::eSTATIC_BODY;
+
+ const bool disableStrongFriction = contactDesc.disableStrongFriction;
+ const bool useExtContacts = ((contactDesc.bodyState0 | contactDesc.bodyState1) & PxSolverContactDesc::eARTICULATION) != 0;
+
+ PxSolverConstraintDesc& desc = *contactDesc.desc;
+
+ desc.constraintLengthOver16 = 0;
+
+
+ if (contactDesc.numContacts == 0)
+ {
+ contactDesc.frictionPtr = NULL;
+ contactDesc.frictionCount = 0;
+ desc.constraint = NULL;
+ return true;
+ }
+
+ if (!disableStrongFriction)
+ {
+ getFrictionPatches(c, contactDesc.frictionPtr, contactDesc.frictionCount, contactDesc.bodyFrame0, contactDesc.bodyFrame1, correlationDistance);
+ }
+
+ bool overflow = !createContactPatches(c, contactDesc.contacts, contactDesc.numContacts, PXC_SAME_NORMAL);
+ overflow = correlatePatches(c, contactDesc.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, PXC_SAME_NORMAL, 0, 0) || overflow;
+ PX_UNUSED(overflow);
+
+#if PX_CHECKED
+ if (overflow)
+ {
+ Ps::getFoundation().error(physx::PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__,
+ "Dropping contacts in solver because we exceeded limit of 32 friction patches.");
+ }
+#endif
+
+ growPatches(c, contactDesc.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, correlationDistance, 0, frictionOffsetThreshold + contactDesc.restDistance);
+
+ //PX_ASSERT(patchCount == c.frictionPatchCount);
+
+ FrictionPatch* frictionPatches = NULL;
+ PxU8* solverConstraint = NULL;
+ PxU32 numFrictionPatches = 0;
+ PxU32 solverConstraintByteSize = 0;
+ PxU32 axisConstraintCount = 0;
+
+ const bool successfulReserve = reserveBlockStreams(
+ useExtContacts, c,
+ solverConstraint, frictionPatches,
+ numFrictionPatches,
+ solverConstraintByteSize,
+ axisConstraintCount,
+ constraintAllocator);
+ // initialise the work unit's ptrs to the various buffers.
+
+ contactDesc.frictionPtr = NULL;
+ contactDesc.frictionCount = 0;
+ desc.constraint = NULL;
+ desc.constraintLengthOver16 = 0;
+ // patch up the work unit with the reserved buffers and set the reserved buffer data as appropriate.
+
+ if (successfulReserve)
+ {
+ PxU8* frictionDataPtr = reinterpret_cast<PxU8*>(frictionPatches);
+ contactDesc.frictionPtr = frictionDataPtr;
+ desc.constraint = solverConstraint;
+ //output.nbContacts = Ps::to8(numContacts);
+ contactDesc.frictionCount = Ps::to8(numFrictionPatches);
+ desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize / 16);
+ desc.writeBack = contactDesc.contactForces;
+ desc.writeBackLengthOver4 = PxU16(contactDesc.contactForces ? contactDesc.numContacts : 0);
+
+ //Initialise friction buffer.
+ if (frictionPatches)
+ {
+ // PT: TODO: revisit this... not very satisfying
+ //const PxU32 maxSize = numFrictionPatches*sizeof(FrictionPatch);
+ Ps::prefetchLine(frictionPatches);
+ Ps::prefetchLine(frictionPatches, 128);
+ Ps::prefetchLine(frictionPatches, 256);
+
+ for (PxU32 i = 0; i<c.frictionPatchCount; i++)
+ {
+ //if(c.correlationListHeads[i]!=CorrelationBuffer::LIST_END)
+ if (c.frictionPatchContactCounts[i])
+ {
+ *frictionPatches++ = c.frictionPatches[i];
+ Ps::prefetchLine(frictionPatches, 256);
+ }
+ }
+ }
+
+ //Initialise solverConstraint buffer.
+ if (solverConstraint)
+ {
+ if (useExtContacts)
+ {
+ const PxSolverBodyData& data0 = *contactDesc.data0;
+ const PxSolverBodyData& data1 = *contactDesc.data1;
+
+ const SolverExtBody b0(reinterpret_cast<const void*>(contactDesc.body0), reinterpret_cast<const void*>(&data0), desc.linkIndexA);
+ const SolverExtBody b1(reinterpret_cast<const void*>(contactDesc.body1), reinterpret_cast<const void*>(&data1), desc.linkIndexB);
+
+ setupFinalizeExtSolverContacts(contactDesc.contacts, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
+ b0, b1, invDtF32, bounceThresholdF32,
+ contactDesc.mInvMassScales.linear0, contactDesc.mInvMassScales.angular0, contactDesc.mInvMassScales.linear1, contactDesc.mInvMassScales.angular1,
+ contactDesc.restDistance, frictionDataPtr, contactDesc.maxCCDSeparation);
+ }
+ else
+ {
+ const PxSolverBodyData& data0 = *contactDesc.data0;
+ const PxSolverBodyData& data1 = *contactDesc.data1;
+ setupFinalizeSolverConstraints(contactDesc.shapeInteraction, contactDesc.contacts, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
+ data0, data1, invDtF32, bounceThresholdF32,
+ contactDesc.mInvMassScales.linear0, contactDesc.mInvMassScales.angular0, contactDesc.mInvMassScales.linear1, contactDesc.mInvMassScales.angular1,
+ hasForceThreshold, staticOrKinematicBody, contactDesc.restDistance, frictionDataPtr, contactDesc.maxCCDSeparation);
+ }
+ //KS - set to 0 so we have a counter for the number of times we solved the constraint
+ //only going to be used on SPU but might as well set on all platforms because this code is shared
+ *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0;
+ }
+ }
+
+ return successfulReserve;
+}
+
+
+
+bool createFinalizeSolverContacts(PxSolverContactDesc& contactDesc,
+ PxsContactManagerOutput& output,
+ ThreadContext& threadContext,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator)
+{
+ ContactBuffer& buffer = threadContext.mContactBuffer;
+
+
+
+ buffer.count = 0;
+
+ // We pull the friction patches out of the cache to remove the dependency on how
+ // the cache is organized. Remember original addrs so we can write them back
+ // efficiently.
+
+ PxU32 numContacts = 0;
+ {
+ PxReal invMassScale0 = 1.f;
+ PxReal invMassScale1 = 1.f;
+ PxReal invInertiaScale0 = 1.f;
+ PxReal invInertiaScale1 = 1.f;
+
+ bool hasMaxImpulse = false, hasTargetVelocity = false;
+
+ numContacts = extractContacts(buffer, output, hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1,
+ invInertiaScale0, invInertiaScale1, PxMin(contactDesc.data0->maxContactImpulse, contactDesc.data1->maxContactImpulse));
+
+ contactDesc.contacts = buffer.contacts;
+ contactDesc.numContacts = numContacts;
+ contactDesc.disableStrongFriction = contactDesc.disableStrongFriction || hasTargetVelocity;
+ contactDesc.hasMaxImpulse = hasMaxImpulse;
+ contactDesc.mInvMassScales.linear0 *= invMassScale0;
+ contactDesc.mInvMassScales.linear1 *= invMassScale1;
+ contactDesc.mInvMassScales.angular0 *= invInertiaScale0;
+ contactDesc.mInvMassScales.angular1 *= invInertiaScale1;
+ }
+
+ CorrelationBuffer& c = threadContext.mCorrelationBuffer;
+
+ return createFinalizeSolverContacts(contactDesc, c, invDtF32, bounceThresholdF32, frictionOffsetThreshold, correlationDistance, constraintAllocator);
+}
+
+PxU32 getContactManagerConstraintDesc(const PxsContactManagerOutput& cmOutput, const PxsContactManager& /*cm*/, PxSolverConstraintDesc& desc)
+{
+ desc.writeBackLengthOver4 = cmOutput.nbContacts;
+ desc.writeBack = cmOutput.contactForces;
+ return cmOutput.nbContacts;// cm.getWorkUnit().axisConstraintCount;
+}
+
+}
+
+}
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.h
new file mode 100644
index 00000000..2e4a7ba2
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.h
@@ -0,0 +1,168 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_CONTACTPREP_H
+#define DY_CONTACTPREP_H
+
+#include "DySolverConstraintDesc.h"
+#include "PxSceneDesc.h"
+#include "DySolverContact4.h"
+
+
+namespace physx
+{
+
+struct PxcNpWorkUnit;
+class PxsConstraintBlockManager;
+struct PxsContactManagerOutput;
+struct PxSolverBody;
+struct PxSolverBodyData;
+struct PxSolverConstraintDesc;
+
+namespace Dy
+{
+ class ThreadContext;
+ struct CorrelationBuffer;
+
+#define CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS \
+ PxSolverContactDesc& contactDesc, \
+ PxsContactManagerOutput& output, \
+ ThreadContext& threadContext, \
+ const PxReal invDtF32, \
+ PxReal bounceThresholdF32, \
+ PxReal frictionOffsetThreshold, \
+ PxReal correlationDistance, \
+ PxConstraintAllocator& constraintAllocator
+
+#define CREATE_FINALIZE_SOVLER_CONTACT_METHOD_ARGS_4 \
+ PxsContactManagerOutput** outputs, \
+ ThreadContext& threadContext, \
+ PxSolverContactDesc* blockDescs, \
+ const PxReal invDtF32, \
+ PxReal bounceThresholdF32, \
+ PxReal frictionThresholdF32, \
+ PxReal correlationDistanceF32, \
+ PxConstraintAllocator& constraintAllocator
+
+
+/*!
+Method prototype for create finalize solver contact
+*/
+
+typedef bool (*PxcCreateFinalizeSolverContactMethod)(CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS);
+
+extern PxcCreateFinalizeSolverContactMethod createFinalizeMethods[3];
+
+typedef SolverConstraintPrepState::Enum (*PxcCreateFinalizeSolverContactMethod4)(CREATE_FINALIZE_SOVLER_CONTACT_METHOD_ARGS_4);
+
+extern PxcCreateFinalizeSolverContactMethod4 createFinalizeMethods4[3];
+
+
+bool createFinalizeSolverContacts( PxSolverContactDesc& contactDesc,
+ PxsContactManagerOutput& output,
+ ThreadContext& threadContext,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator);
+
+bool createFinalizeSolverContacts( PxSolverContactDesc& contactDesc,
+ CorrelationBuffer& c,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator);
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4( PxsContactManagerOutput** outputs,
+ ThreadContext& threadContext,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator);
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4( Dy::CorrelationBuffer& c,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator);
+
+
+
+bool createFinalizeSolverContactsCoulomb1D(PxSolverContactDesc& contactDesc,
+ PxsContactManagerOutput& output,
+ ThreadContext& threadContext,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator);
+
+bool createFinalizeSolverContactsCoulomb2D(PxSolverContactDesc& contactDesc,
+ PxsContactManagerOutput& output,
+ ThreadContext& threadContext,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator);
+
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb1D( PxsContactManagerOutput** outputs,
+ ThreadContext& threadContext,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator);
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb2D(PxsContactManagerOutput** outputs,
+ ThreadContext& threadContext,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator);
+
+
+PxU32 getContactManagerConstraintDesc(const PxsContactManagerOutput& cmOutput, const PxsContactManager& cm, PxSolverConstraintDesc& desc);
+
+}
+
+}
+
+#endif //DY_CONTACTPREP_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4.cpp
new file mode 100644
index 00000000..5bbf9637
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4.cpp
@@ -0,0 +1,1478 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxPreprocessor.h"
+#include "PxSceneDesc.h"
+#include "PsVecMath.h"
+#include "PsMathUtils.h"
+#include "DySolverContact.h"
+#include "DySolverContact4.h"
+#include "DySolverConstraintTypes.h"
+#include "PxcNpWorkUnit.h"
+#include "DyThreadContext.h"
+#include "DyContactPrep.h"
+#include "PxcNpContactPrepShared.h"
+#include "PxvDynamics.h"
+#include "DyCorrelationBuffer.h"
+#include "DyDynamics.h"
+#include "DyArticulationContactPrep.h"
+#include "PxsContactManager.h"
+
+#include "PsFoundation.h"
+
+using namespace physx;
+using namespace Gu;
+
+
+#include "PsVecMath.h"
+#include "PxContactModifyCallback.h"
+#include "PxsMaterialManager.h"
+#include "PxsMaterialCombiner.h"
+#include "DyContactPrepShared.h"
+
+using namespace Ps::aos;
+
+namespace physx
+{
+namespace Dy
+{
+
+PxcCreateFinalizeSolverContactMethod4 createFinalizeMethods4[3] =
+{
+ createFinalizeSolverContacts4,
+ createFinalizeSolverContacts4Coulomb1D,
+ createFinalizeSolverContacts4Coulomb2D
+};
+
+inline bool ValidateVec4(const Vec4V v)
+{
+ PX_ALIGN(16, PxVec4 vF);
+ Ps::aos::V4StoreA(v, &vF.x);
+ return vF.isFinite();
+}
+
+static void setupFinalizeSolverConstraints4(PxSolverContactDesc* PX_RESTRICT descs, CorrelationBuffer& c, PxU8* PX_RESTRICT workspace,
+ const PxReal invDtF32, PxReal bounceThresholdF32,
+ const Ps::aos::Vec4VArg invMassScale0, const Ps::aos::Vec4VArg invInertiaScale0,
+ const Ps::aos::Vec4VArg invMassScale1, const Ps::aos::Vec4VArg invInertiaScale1)
+{
+
+ //OK, we have a workspace of pre-allocated space to store all 4 descs in. We now need to create the constraints in it
+
+ const Vec4V ccdMaxSeparation = Ps::aos::V4LoadXYZW(descs[0].maxCCDSeparation, descs[1].maxCCDSeparation, descs[2].maxCCDSeparation, descs[3].maxCCDSeparation);
+
+ const Vec4V zero = V4Zero();
+ const BoolV bFalse = BFFFF();
+ const FloatV fZero = FZero();
+
+ PxU8 flags[4] = { PxU8(descs[0].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
+ PxU8(descs[1].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
+ PxU8(descs[2].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
+ PxU8(descs[3].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0) };
+
+ bool hasMaxImpulse = descs[0].hasMaxImpulse || descs[1].hasMaxImpulse || descs[2].hasMaxImpulse || descs[3].hasMaxImpulse;
+
+ //The block is dynamic if **any** of the constraints have a non-static body B. This allows us to batch static and non-static constraints but we only get a memory/perf
+ //saving if all 4 are static. This simplifies the constraint partitioning such that it only needs to care about separating contacts and 1D constraints (which it already does)
+ bool isDynamic = false;
+ bool hasKinematic = false;
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ isDynamic = isDynamic || (descs[a].bodyState1 == PxSolverContactDesc::eDYNAMIC_BODY);
+ hasKinematic = hasKinematic || descs[a].bodyState1 == PxSolverContactDesc::eKINEMATIC_BODY;
+ }
+
+ const PxU32 constraintSize = isDynamic ? sizeof(SolverContactBatchPointDynamic4) : sizeof(SolverContactBatchPointBase4);
+ const PxU32 frictionSize = isDynamic ? sizeof(SolverContactFrictionDynamic4) : sizeof(SolverContactFrictionBase4);
+
+ PxU8* PX_RESTRICT ptr = workspace;
+
+ const Vec4V dom0 = invMassScale0;
+ const Vec4V dom1 = invMassScale1;
+ const Vec4V angDom0 = invInertiaScale0;
+ const Vec4V angDom1 = invInertiaScale1;
+
+ const Vec4V maxPenBias = V4Max(V4LoadXYZW(descs[0].data0->penBiasClamp, descs[1].data0->penBiasClamp,
+ descs[2].data0->penBiasClamp, descs[3].data0->penBiasClamp),
+ V4LoadXYZW(descs[0].data1->penBiasClamp, descs[1].data1->penBiasClamp,
+ descs[2].data1->penBiasClamp, descs[3].data1->penBiasClamp));
+
+ const Vec4V restDistance = V4LoadXYZW(descs[0].restDistance, descs[1].restDistance, descs[2].restDistance,
+ descs[3].restDistance);
+
+
+ //load up velocities
+ Vec4V linVel00 = V4LoadA(&descs[0].data0->linearVelocity.x);
+ Vec4V linVel10 = V4LoadA(&descs[1].data0->linearVelocity.x);
+ Vec4V linVel20 = V4LoadA(&descs[2].data0->linearVelocity.x);
+ Vec4V linVel30 = V4LoadA(&descs[3].data0->linearVelocity.x);
+
+ Vec4V linVel01 = V4LoadA(&descs[0].data1->linearVelocity.x);
+ Vec4V linVel11 = V4LoadA(&descs[1].data1->linearVelocity.x);
+ Vec4V linVel21 = V4LoadA(&descs[2].data1->linearVelocity.x);
+ Vec4V linVel31 = V4LoadA(&descs[3].data1->linearVelocity.x);
+
+ Vec4V angVel00 = V4LoadA(&descs[0].data0->angularVelocity.x);
+ Vec4V angVel10 = V4LoadA(&descs[1].data0->angularVelocity.x);
+ Vec4V angVel20 = V4LoadA(&descs[2].data0->angularVelocity.x);
+ Vec4V angVel30 = V4LoadA(&descs[3].data0->angularVelocity.x);
+
+ Vec4V angVel01 = V4LoadA(&descs[0].data1->angularVelocity.x);
+ Vec4V angVel11 = V4LoadA(&descs[1].data1->angularVelocity.x);
+ Vec4V angVel21 = V4LoadA(&descs[2].data1->angularVelocity.x);
+ Vec4V angVel31 = V4LoadA(&descs[3].data1->angularVelocity.x);
+
+ Vec4V linVelT00, linVelT10, linVelT20;
+ Vec4V linVelT01, linVelT11, linVelT21;
+ Vec4V angVelT00, angVelT10, angVelT20;
+ Vec4V angVelT01, angVelT11, angVelT21;
+
+ PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVelT00, linVelT10, linVelT20);
+ PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVelT01, linVelT11, linVelT21);
+ PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVelT00, angVelT10, angVelT20);
+ PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVelT01, angVelT11, angVelT21);
+
+ const Vec4V vrelX = V4Sub(linVelT00, linVelT01);
+ const Vec4V vrelY = V4Sub(linVelT10, linVelT11);
+ const Vec4V vrelZ = V4Sub(linVelT20, linVelT21);
+
+ //Load up masses and invInertia
+
+ /*const Vec4V sqrtInvMass0 = V4Merge(FLoad(descs[0].data0->sqrtInvMass), FLoad(descs[1].data0->sqrtInvMass), FLoad(descs[2].data0->sqrtInvMass),
+ FLoad(descs[3].data0->sqrtInvMass));
+
+ const Vec4V sqrtInvMass1 = V4Merge(FLoad(descs[0].data1->sqrtInvMass), FLoad(descs[1].data1->sqrtInvMass), FLoad(descs[2].data1->sqrtInvMass),
+ FLoad(descs[3].data1->sqrtInvMass));*/
+
+ const Vec4V invMass0 = V4LoadXYZW(descs[0].data0->invMass, descs[1].data0->invMass, descs[2].data0->invMass, descs[3].data0->invMass);
+ const Vec4V invMass1 = V4LoadXYZW(descs[0].data1->invMass, descs[1].data1->invMass, descs[2].data1->invMass, descs[3].data1->invMass);
+
+ const Vec4V invMass0D0 = V4Mul(dom0, invMass0);
+ const Vec4V invMass1D1 = V4Mul(dom1, invMass1);
+
+ Vec4V invInertia00X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[0].data0->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33
+ Vec4V invInertia00Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[0].data0->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33
+ Vec4V invInertia00Z = Vec4V_From_Vec3V(V3LoadU(descs[0].data0->sqrtInvInertia.column2));
+
+ Vec4V invInertia10X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[1].data0->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33
+ Vec4V invInertia10Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[1].data0->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33
+ Vec4V invInertia10Z = Vec4V_From_Vec3V(V3LoadU(descs[1].data0->sqrtInvInertia.column2));
+
+ Vec4V invInertia20X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[2].data0->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33
+ Vec4V invInertia20Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[2].data0->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33
+ Vec4V invInertia20Z = Vec4V_From_Vec3V(V3LoadU(descs[2].data0->sqrtInvInertia.column2));
+
+ Vec4V invInertia30X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[3].data0->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33
+ Vec4V invInertia30Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[3].data0->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33
+ Vec4V invInertia30Z = Vec4V_From_Vec3V(V3LoadU(descs[3].data0->sqrtInvInertia.column2));
+
+ Vec4V invInertia01X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[0].data1->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33
+ Vec4V invInertia01Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[0].data1->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33
+ Vec4V invInertia01Z = Vec4V_From_Vec3V(V3LoadU(descs[0].data1->sqrtInvInertia.column2));
+
+ Vec4V invInertia11X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[1].data1->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33
+ Vec4V invInertia11Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[1].data1->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33
+ Vec4V invInertia11Z = Vec4V_From_Vec3V(V3LoadU(descs[1].data1->sqrtInvInertia.column2));
+
+ Vec4V invInertia21X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[2].data1->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33
+ Vec4V invInertia21Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[2].data1->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33
+ Vec4V invInertia21Z = Vec4V_From_Vec3V(V3LoadU(descs[2].data1->sqrtInvInertia.column2));
+
+ Vec4V invInertia31X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[3].data1->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33
+ Vec4V invInertia31Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[3].data1->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33
+ Vec4V invInertia31Z = Vec4V_From_Vec3V(V3LoadU(descs[3].data1->sqrtInvInertia.column2));
+
+ Vec4V invInertia0X0, invInertia0X1, invInertia0X2;
+ Vec4V invInertia0Y0, invInertia0Y1, invInertia0Y2;
+ Vec4V invInertia0Z0, invInertia0Z1, invInertia0Z2;
+
+ Vec4V invInertia1X0, invInertia1X1, invInertia1X2;
+ Vec4V invInertia1Y0, invInertia1Y1, invInertia1Y2;
+ Vec4V invInertia1Z0, invInertia1Z1, invInertia1Z2;
+
+ PX_TRANSPOSE_44_34(invInertia00X, invInertia10X, invInertia20X, invInertia30X, invInertia0X0, invInertia0Y0, invInertia0Z0);
+ PX_TRANSPOSE_44_34(invInertia00Y, invInertia10Y, invInertia20Y, invInertia30Y, invInertia0X1, invInertia0Y1, invInertia0Z1);
+ PX_TRANSPOSE_44_34(invInertia00Z, invInertia10Z, invInertia20Z, invInertia30Z, invInertia0X2, invInertia0Y2, invInertia0Z2);
+
+ PX_TRANSPOSE_44_34(invInertia01X, invInertia11X, invInertia21X, invInertia31X, invInertia1X0, invInertia1Y0, invInertia1Z0);
+ PX_TRANSPOSE_44_34(invInertia01Y, invInertia11Y, invInertia21Y, invInertia31Y, invInertia1X1, invInertia1Y1, invInertia1Z1);
+ PX_TRANSPOSE_44_34(invInertia01Z, invInertia11Z, invInertia21Z, invInertia31Z, invInertia1X2, invInertia1Y2, invInertia1Z2);
+
+
+ const FloatV invDt = FLoad(invDtF32);
+ const FloatV p8 = FLoad(0.8f);
+ const Vec4V p84 = V4Splat(p8);
+ const Vec4V bounceThreshold = V4Splat(FLoad(bounceThresholdF32));
+
+ const FloatV invDtp8 = FMul(invDt, p8);
+
+ const Vec3V bodyFrame00p = V3LoadU(descs[0].bodyFrame0.p);
+ const Vec3V bodyFrame01p = V3LoadU(descs[1].bodyFrame0.p);
+ const Vec3V bodyFrame02p = V3LoadU(descs[2].bodyFrame0.p);
+ const Vec3V bodyFrame03p = V3LoadU(descs[3].bodyFrame0.p);
+
+ Vec4V bodyFrame00p4 = Vec4V_From_Vec3V(bodyFrame00p);
+ Vec4V bodyFrame01p4 = Vec4V_From_Vec3V(bodyFrame01p);
+ Vec4V bodyFrame02p4 = Vec4V_From_Vec3V(bodyFrame02p);
+ Vec4V bodyFrame03p4 = Vec4V_From_Vec3V(bodyFrame03p);
+
+ Vec4V bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ;
+ PX_TRANSPOSE_44_34(bodyFrame00p4, bodyFrame01p4, bodyFrame02p4, bodyFrame03p4, bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ);
+
+
+ const Vec3V bodyFrame10p = V3LoadU(descs[0].bodyFrame1.p);
+ const Vec3V bodyFrame11p = V3LoadU(descs[1].bodyFrame1.p);
+ const Vec3V bodyFrame12p = V3LoadU(descs[2].bodyFrame1.p);
+ const Vec3V bodyFrame13p = V3LoadU(descs[3].bodyFrame1.p);
+
+ Vec4V bodyFrame10p4 = Vec4V_From_Vec3V(bodyFrame10p);
+ Vec4V bodyFrame11p4 = Vec4V_From_Vec3V(bodyFrame11p);
+ Vec4V bodyFrame12p4 = Vec4V_From_Vec3V(bodyFrame12p);
+ Vec4V bodyFrame13p4 = Vec4V_From_Vec3V(bodyFrame13p);
+
+ Vec4V bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ;
+ PX_TRANSPOSE_44_34(bodyFrame10p4, bodyFrame11p4, bodyFrame12p4, bodyFrame13p4, bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ);
+
+
+ const QuatV bodyFrame00q = QuatVLoadU(&descs[0].bodyFrame0.q.x);
+ const QuatV bodyFrame01q = QuatVLoadU(&descs[1].bodyFrame0.q.x);
+ const QuatV bodyFrame02q = QuatVLoadU(&descs[2].bodyFrame0.q.x);
+ const QuatV bodyFrame03q = QuatVLoadU(&descs[3].bodyFrame0.q.x);
+
+ const QuatV bodyFrame10q = QuatVLoadU(&descs[0].bodyFrame1.q.x);
+ const QuatV bodyFrame11q = QuatVLoadU(&descs[1].bodyFrame1.q.x);
+ const QuatV bodyFrame12q = QuatVLoadU(&descs[2].bodyFrame1.q.x);
+ const QuatV bodyFrame13q = QuatVLoadU(&descs[3].bodyFrame1.q.x);
+
+ PxU32 frictionPatchWritebackAddrIndex0 = 0;
+ PxU32 frictionPatchWritebackAddrIndex1 = 0;
+ PxU32 frictionPatchWritebackAddrIndex2 = 0;
+ PxU32 frictionPatchWritebackAddrIndex3 = 0;
+
+ Ps::prefetchLine(c.contactID);
+ Ps::prefetchLine(c.contactID, 128);
+
+ PxU32 frictionIndex0 = 0, frictionIndex1 = 0, frictionIndex2 = 0, frictionIndex3 = 0;
+ //PxU32 contactIndex0 = 0, contactIndex1 = 0, contactIndex2 = 0, contactIndex3 = 0;
+
+
+ //OK, we iterate through all friction patch counts in the constraint patch, building up the constraint list etc.
+
+ PxU32 maxPatches = PxMax(descs[0].numFrictionPatches, PxMax(descs[1].numFrictionPatches, PxMax(descs[2].numFrictionPatches, descs[3].numFrictionPatches)));
+
+ const Vec4V p1 = V4Splat(FLoad(0.1f));
+ const Vec4V orthoThreshold = V4Splat(FLoad(0.70710678f));
+
+
+ PxU32 contact0 = 0, contact1 = 0, contact2 = 0, contact3 = 0;
+ PxU32 patch0 = 0, patch1 = 0, patch2 = 0, patch3 = 0;
+
+ PxU8 flag = 0;
+ if(hasMaxImpulse)
+ flag |= SolverContactHeader4::eHAS_MAX_IMPULSE;
+
+ for(PxU32 i=0;i<maxPatches;i++)
+ {
+ const bool hasFinished0 = i >= descs[0].numFrictionPatches;
+ const bool hasFinished1 = i >= descs[1].numFrictionPatches;
+ const bool hasFinished2 = i >= descs[2].numFrictionPatches;
+ const bool hasFinished3 = i >= descs[3].numFrictionPatches;
+
+
+ frictionIndex0 = hasFinished0 ? frictionIndex0 : descs[0].startFrictionPatchIndex + i;
+ frictionIndex1 = hasFinished1 ? frictionIndex1 : descs[1].startFrictionPatchIndex + i;
+ frictionIndex2 = hasFinished2 ? frictionIndex2 : descs[2].startFrictionPatchIndex + i;
+ frictionIndex3 = hasFinished3 ? frictionIndex3 : descs[3].startFrictionPatchIndex + i;
+
+ PxU32 clampedContacts0 = hasFinished0 ? 0 : c.frictionPatchContactCounts[frictionIndex0];
+ PxU32 clampedContacts1 = hasFinished1 ? 0 : c.frictionPatchContactCounts[frictionIndex1];
+ PxU32 clampedContacts2 = hasFinished2 ? 0 : c.frictionPatchContactCounts[frictionIndex2];
+ PxU32 clampedContacts3 = hasFinished3 ? 0 : c.frictionPatchContactCounts[frictionIndex3];
+
+ PxU32 firstPatch0 = c.correlationListHeads[frictionIndex0];
+ PxU32 firstPatch1 = c.correlationListHeads[frictionIndex1];
+ PxU32 firstPatch2 = c.correlationListHeads[frictionIndex2];
+ PxU32 firstPatch3 = c.correlationListHeads[frictionIndex3];
+
+ const Gu::ContactPoint* contactBase0 = descs[0].contacts + c.contactPatches[firstPatch0].start;
+ const Gu::ContactPoint* contactBase1 = descs[1].contacts + c.contactPatches[firstPatch1].start;
+ const Gu::ContactPoint* contactBase2 = descs[2].contacts + c.contactPatches[firstPatch2].start;
+ const Gu::ContactPoint* contactBase3 = descs[3].contacts + c.contactPatches[firstPatch3].start;
+
+ const Vec4V restitution = V4Neg(V4LoadXYZW(contactBase0->restitution, contactBase1->restitution, contactBase2->restitution,
+ contactBase3->restitution));
+
+ SolverContactHeader4* PX_RESTRICT header = reinterpret_cast<SolverContactHeader4*>(ptr);
+ ptr += sizeof(SolverContactHeader4);
+
+
+ header->flags[0] = flags[0];
+ header->flags[1] = flags[1];
+ header->flags[2] = flags[2];
+ header->flags[3] = flags[3];
+
+ header->flag = flag;
+
+ PxU32 totalContacts = PxMax(clampedContacts0, PxMax(clampedContacts1, PxMax(clampedContacts2, clampedContacts3)));
+
+ Vec4V* PX_RESTRICT appliedNormalForces = reinterpret_cast<Vec4V*>(ptr);
+ ptr += sizeof(Vec4V)*totalContacts;
+
+ PxMemZero(appliedNormalForces, sizeof(Vec4V) * totalContacts);
+
+ header->numNormalConstr = Ps::to8(totalContacts);
+ header->numNormalConstr0 = Ps::to8(clampedContacts0);
+ header->numNormalConstr1 = Ps::to8(clampedContacts1);
+ header->numNormalConstr2 = Ps::to8(clampedContacts2);
+ header->numNormalConstr3 = Ps::to8(clampedContacts3);
+ //header->sqrtInvMassA = sqrtInvMass0;
+ //header->sqrtInvMassB = sqrtInvMass1;
+ header->invMass0D0 = invMass0D0;
+ header->invMass1D1 = invMass1D1;
+ header->angDom0 = angDom0;
+ header->angDom1 = angDom1;
+ header->shapeInteraction[0] = descs[0].shapeInteraction; header->shapeInteraction[1] = descs[1].shapeInteraction;
+ header->shapeInteraction[2] = descs[2].shapeInteraction; header->shapeInteraction[3] = descs[3].shapeInteraction;
+
+ Vec4V* maxImpulse = reinterpret_cast<Vec4V*>(ptr + constraintSize * totalContacts);
+
+ header->restitution = restitution;
+
+ Vec4V normal0 = V4LoadA(&contactBase0->normal.x);
+ Vec4V normal1 = V4LoadA(&contactBase1->normal.x);
+ Vec4V normal2 = V4LoadA(&contactBase2->normal.x);
+ Vec4V normal3 = V4LoadA(&contactBase3->normal.x);
+
+ Vec4V normalX, normalY, normalZ;
+ PX_TRANSPOSE_44_34(normal0, normal1, normal2, normal3, normalX, normalY, normalZ);
+
+ PX_ASSERT(ValidateVec4(normalX));
+ PX_ASSERT(ValidateVec4(normalY));
+ PX_ASSERT(ValidateVec4(normalZ));
+
+ header->normalX = normalX;
+ header->normalY = normalY;
+ header->normalZ = normalZ;
+
+ const Vec4V norVel0 = V4MulAdd(normalZ, linVelT20, V4MulAdd(normalY, linVelT10, V4Mul(normalX, linVelT00)));
+ const Vec4V norVel1 = V4MulAdd(normalZ, linVelT21, V4MulAdd(normalY, linVelT11, V4Mul(normalX, linVelT01)));
+ const Vec4V relNorVel = V4Sub(norVel0, norVel1);
+
+ //For all correlation heads - need to pull this out I think
+
+ //OK, we have a counter for all our patches...
+ PxU32 finished = (PxU32(hasFinished0)) |
+ ((PxU32(hasFinished1)) << 1) |
+ ((PxU32(hasFinished2)) << 2) |
+ ((PxU32(hasFinished3)) << 3);
+
+ CorrelationListIterator iter0(c, firstPatch0);
+ CorrelationListIterator iter1(c, firstPatch1);
+ CorrelationListIterator iter2(c, firstPatch2);
+ CorrelationListIterator iter3(c, firstPatch3);
+
+ //PxU32 contact0, contact1, contact2, contact3;
+ //PxU32 patch0, patch1, patch2, patch3;
+
+ if(!hasFinished0)
+ iter0.nextContact(patch0, contact0);
+ if(!hasFinished1)
+ iter1.nextContact(patch1, contact1);
+ if(!hasFinished2)
+ iter2.nextContact(patch2, contact2);
+ if(!hasFinished3)
+ iter3.nextContact(patch3, contact3);
+
+ PxU8* p = ptr;
+
+ PxU32 contactCount = 0;
+ PxU32 newFinished =
+ (PxU32(hasFinished0 || !iter0.hasNextContact())) |
+ ((PxU32(hasFinished1 || !iter1.hasNextContact())) << 1) |
+ ((PxU32(hasFinished2 || !iter2.hasNextContact())) << 2) |
+ ((PxU32(hasFinished3 || !iter3.hasNextContact())) << 3);
+
+ while(finished != 0xf)
+ {
+ finished = newFinished;
+ ++contactCount;
+ Ps::prefetchLine(p, 384);
+ Ps::prefetchLine(p, 512);
+ Ps::prefetchLine(p, 640);
+
+ SolverContactBatchPointBase4* PX_RESTRICT solverContact = reinterpret_cast<SolverContactBatchPointBase4*>(p);
+ p += constraintSize;
+
+ const Gu::ContactPoint& con0 = descs[0].contacts[c.contactPatches[patch0].start + contact0];
+ const Gu::ContactPoint& con1 = descs[1].contacts[c.contactPatches[patch1].start + contact1];
+ const Gu::ContactPoint& con2 = descs[2].contacts[c.contactPatches[patch2].start + contact2];
+ const Gu::ContactPoint& con3 = descs[3].contacts[c.contactPatches[patch3].start + contact3];
+
+ //Now we need to splice these 4 contacts into a single structure
+
+ {
+ Vec4V point0 = V4LoadA(&con0.point.x);
+ Vec4V point1 = V4LoadA(&con1.point.x);
+ Vec4V point2 = V4LoadA(&con2.point.x);
+ Vec4V point3 = V4LoadA(&con3.point.x);
+
+ Vec4V pointX, pointY, pointZ;
+ PX_TRANSPOSE_44_34(point0, point1, point2, point3, pointX, pointY, pointZ);
+
+ PX_ASSERT(ValidateVec4(pointX));
+ PX_ASSERT(ValidateVec4(pointY));
+ PX_ASSERT(ValidateVec4(pointZ));
+
+ Vec4V cTargetVel0 = V4LoadA(&con0.targetVel.x);
+ Vec4V cTargetVel1 = V4LoadA(&con1.targetVel.x);
+ Vec4V cTargetVel2 = V4LoadA(&con2.targetVel.x);
+ Vec4V cTargetVel3 = V4LoadA(&con3.targetVel.x);
+
+ Vec4V cTargetVelX, cTargetVelY, cTargetVelZ;
+ PX_TRANSPOSE_44_34(cTargetVel0, cTargetVel1, cTargetVel2, cTargetVel3, cTargetVelX, cTargetVelY, cTargetVelZ);
+
+ const Vec4V separation = V4LoadXYZW(con0.separation, con1.separation, con2.separation, con3.separation);
+
+ const Vec4V cTargetNorVel = V4MulAdd(cTargetVelX, normalX, V4MulAdd(cTargetVelY, normalY, V4Mul(cTargetVelZ, normalZ)));
+
+ const Vec4V raX = V4Sub(pointX, bodyFrame0pX);
+ const Vec4V raY = V4Sub(pointY, bodyFrame0pY);
+ const Vec4V raZ = V4Sub(pointZ, bodyFrame0pZ);
+
+ const Vec4V rbX = V4Sub(pointX, bodyFrame1pX);
+ const Vec4V rbY = V4Sub(pointY, bodyFrame1pY);
+ const Vec4V rbZ = V4Sub(pointZ, bodyFrame1pZ);
+
+ PX_ASSERT(ValidateVec4(raX));
+ PX_ASSERT(ValidateVec4(raY));
+ PX_ASSERT(ValidateVec4(raZ));
+
+ PX_ASSERT(ValidateVec4(rbX));
+ PX_ASSERT(ValidateVec4(rbY));
+ PX_ASSERT(ValidateVec4(rbZ));
+
+
+ //raXn = cross(ra, normal) which = Vec3V( a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
+
+ const Vec4V raXnX = V4NegMulSub(raZ, normalY, V4Mul(raY, normalZ));
+ const Vec4V raXnY = V4NegMulSub(raX, normalZ, V4Mul(raZ, normalX));
+ const Vec4V raXnZ = V4NegMulSub(raY, normalX, V4Mul(raX, normalY));
+
+ Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX);
+ Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX);
+ Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX);
+
+ delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X);
+ delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y);
+ delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z);
+
+ delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X);
+ delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y);
+ delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z);
+
+
+ PX_ASSERT(ValidateVec4(delAngVel0X));
+ PX_ASSERT(ValidateVec4(delAngVel0Y));
+ PX_ASSERT(ValidateVec4(delAngVel0Z));
+
+ const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0X, delAngVel0X, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0Z, delAngVel0Z)));
+ const Vec4V dotRaXnAngVel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4Mul(raXnX, angVelT00)));
+
+ Vec4V unitResponse = V4MulAdd(invMass0D0, angDom0, dotDelAngVel0);
+ Vec4V vrel = V4Add(relNorVel, dotRaXnAngVel0);
+
+
+ //The dynamic-only parts - need to if-statement these up. A branch here shouldn't cost us too much
+ if(isDynamic)
+ {
+ SolverContactBatchPointDynamic4* PX_RESTRICT dynamicContact = static_cast<SolverContactBatchPointDynamic4*>(solverContact);
+ const Vec4V rbXnX = V4NegMulSub(rbZ, normalY, V4Mul(rbY, normalZ));
+ const Vec4V rbXnY = V4NegMulSub(rbX, normalZ, V4Mul(rbZ, normalX));
+ const Vec4V rbXnZ = V4NegMulSub(rbY, normalX, V4Mul(rbX, normalY));
+
+ Vec4V delAngVel1X = V4Mul(invInertia1X0, rbXnX);
+ Vec4V delAngVel1Y = V4Mul(invInertia1X1, rbXnX);
+ Vec4V delAngVel1Z = V4Mul(invInertia1X2, rbXnX);
+
+ delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X);
+ delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y);
+ delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z);
+
+ delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X);
+ delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y);
+ delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z);
+
+ PX_ASSERT(ValidateVec4(delAngVel1X));
+ PX_ASSERT(ValidateVec4(delAngVel1Y));
+ PX_ASSERT(ValidateVec4(delAngVel1Z));
+
+ const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1X, delAngVel1X, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1Z, delAngVel1Z)));
+ const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01)));
+
+ const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1);
+
+ unitResponse = V4Add(unitResponse, resp1);
+
+ vrel = V4Sub(vrel, dotRbXnAngVel1);
+
+ //These are for dynamic-only contacts.
+ dynamicContact->rbXnX = delAngVel1X;
+ dynamicContact->rbXnY = delAngVel1Y;
+ dynamicContact->rbXnZ = delAngVel1Z;
+
+ }
+ else if(hasKinematic)
+ {
+ const Vec4V rbXnX = V4NegMulSub(rbZ, normalY, V4Mul(rbY, normalZ));
+ const Vec4V rbXnY = V4NegMulSub(rbX, normalZ, V4Mul(rbZ, normalX));
+ const Vec4V rbXnZ = V4NegMulSub(rbY, normalX, V4Mul(rbX, normalY));
+
+ const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01)));
+
+ vrel = V4Sub(vrel, dotRbXnAngVel1);
+ }
+
+ const Vec4V velMultiplier = V4Sel(V4IsGrtr(unitResponse, zero), V4Recip(unitResponse), zero);
+
+ const Vec4V penetration = V4Sub(separation, restDistance);
+ const Vec4V penInvDtPt8 = V4Max(maxPenBias, V4Scale(penetration, invDtp8));
+ Vec4V scaledBias = V4Mul(penInvDtPt8, velMultiplier);
+
+ const Vec4V penetrationInvDt = V4Scale(penetration, invDt);
+
+ const BoolV isGreater2 = BAnd(BAnd(V4IsGrtr(zero, restitution), V4IsGrtr(bounceThreshold, vrel)),
+ V4IsGrtr(V4Neg(vrel), penetrationInvDt));
+
+ const BoolV ccdSeparationCondition = V4IsGrtrOrEq(ccdMaxSeparation, penetration);
+
+ scaledBias = V4Sel(BAnd(ccdSeparationCondition, isGreater2), zero, V4Neg(scaledBias));
+
+ const Vec4V targetVelocity = V4Sel(isGreater2, V4Mul(velMultiplier, V4Mul(vrel, restitution)), zero);
+
+ //Vec4V biasedErr = V4Sel(isGreater2, targetVelocity, scaledBias);
+ Vec4V biasedErr = V4Add(targetVelocity, scaledBias);
+
+ biasedErr = V4NegMulSub(V4Sub(vrel, cTargetNorVel), velMultiplier, biasedErr);
+
+ //These values are present for static and dynamic contacts
+ solverContact->raXnX = delAngVel0X;
+ solverContact->raXnY = delAngVel0Y;
+ solverContact->raXnZ = delAngVel0Z;
+ solverContact->velMultiplier = velMultiplier;
+ solverContact->biasedErr = biasedErr;
+
+ //solverContact->scaledBias = V4Max(zero, scaledBias);
+ solverContact->scaledBias = V4Sel(isGreater2, scaledBias, V4Max(zero, scaledBias));
+
+ if(hasMaxImpulse)
+ {
+ maxImpulse[contactCount-1] = V4Merge(FLoad(con0.maxImpulse), FLoad(con1.maxImpulse), FLoad(con2.maxImpulse),
+ FLoad(con3.maxImpulse));
+ }
+ }
+ if(!(finished & 0x1))
+ {
+ iter0.nextContact(patch0, contact0);
+ newFinished |= PxU32(!iter0.hasNextContact());
+ }
+
+ if(!(finished & 0x2))
+ {
+ iter1.nextContact(patch1, contact1);
+ newFinished |= (PxU32(!iter1.hasNextContact()) << 1);
+ }
+
+ if(!(finished & 0x4))
+ {
+ iter2.nextContact(patch2, contact2);
+ newFinished |= (PxU32(!iter2.hasNextContact()) << 2);
+ }
+
+ if(!(finished & 0x8))
+ {
+ iter3.nextContact(patch3, contact3);
+ newFinished |= (PxU32(!iter3.hasNextContact()) << 3);
+ }
+ }
+ ptr = p;
+ if(hasMaxImpulse)
+ {
+ ptr += sizeof(Vec4V) * totalContacts;
+ }
+
+ //OK...friction time :-)
+
+ Vec4V maxImpulseScale = V4One();
+ {
+ const Vec4V staticFriction = V4LoadXYZW(contactBase0->staticFriction, contactBase1->staticFriction,
+ contactBase2->staticFriction, contactBase3->staticFriction);
+
+ const Vec4V dynamicFriction = V4LoadXYZW(contactBase0->dynamicFriction, contactBase1->dynamicFriction,
+ contactBase2->dynamicFriction, contactBase3->dynamicFriction);
+
+ PX_ASSERT(totalContacts == contactCount);
+ header->dynamicFriction = dynamicFriction;
+ header->staticFriction = staticFriction;
+
+ const FrictionPatch& frictionPatch0 = c.frictionPatches[frictionIndex0];
+ const FrictionPatch& frictionPatch1 = c.frictionPatches[frictionIndex1];
+ const FrictionPatch& frictionPatch2 = c.frictionPatches[frictionIndex2];
+ const FrictionPatch& frictionPatch3 = c.frictionPatches[frictionIndex3];
+
+ PxU32 anchorCount0 = frictionPatch0.anchorCount;
+ PxU32 anchorCount1 = frictionPatch1.anchorCount;
+ PxU32 anchorCount2 = frictionPatch2.anchorCount;
+ PxU32 anchorCount3 = frictionPatch3.anchorCount;
+
+ PxU32 clampedAnchorCount0 = hasFinished0 || (contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount0;
+ PxU32 clampedAnchorCount1 = hasFinished1 || (contactBase1->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount1;
+ PxU32 clampedAnchorCount2 = hasFinished2 || (contactBase2->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount2;
+ PxU32 clampedAnchorCount3 = hasFinished3 || (contactBase3->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount3;
+
+ const PxU32 maxAnchorCount = PxMax(clampedAnchorCount0, PxMax(clampedAnchorCount1, PxMax(clampedAnchorCount2, clampedAnchorCount3)));
+
+ //if(clampedAnchorCount0 != clampedAnchorCount1 || clampedAnchorCount0 != clampedAnchorCount2 || clampedAnchorCount0 != clampedAnchorCount3)
+ // Ps::debugBreak();
+
+
+ //const bool haveFriction = maxAnchorCount != 0;
+ header->numFrictionConstr = Ps::to8(maxAnchorCount*2);
+ header->numFrictionConstr0 = Ps::to8(clampedAnchorCount0*2);
+ header->numFrictionConstr1 = Ps::to8(clampedAnchorCount1*2);
+ header->numFrictionConstr2 = Ps::to8(clampedAnchorCount2*2);
+ header->numFrictionConstr3 = Ps::to8(clampedAnchorCount3*2);
+
+ //KS - TODO - extend this if needed
+ header->type = Ps::to8(isDynamic ? DY_SC_TYPE_BLOCK_RB_CONTACT : DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT);
+
+ if(maxAnchorCount)
+ {
+
+ //Allocate the shared friction data...
+
+ SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(ptr);
+ ptr += sizeof(SolverFrictionSharedData4);
+ PX_UNUSED(fd);
+
+ const BoolV cond =V4IsGrtr(orthoThreshold, V4Abs(normalX));
+
+ const Vec4V t0FallbackX = V4Sel(cond, zero, V4Neg(normalY));
+ const Vec4V t0FallbackY = V4Sel(cond, V4Neg(normalZ), normalX);
+ const Vec4V t0FallbackZ = V4Sel(cond, normalY, zero);
+
+ //const Vec4V dotNormalVrel = V4MulAdd(normalZ, vrelZ, V4MulAdd(normalY, vrelY, V4Mul(normalX, vrelX)));
+ const Vec4V vrelSubNorVelX = V4NegMulSub(normalX, relNorVel, vrelX);
+ const Vec4V vrelSubNorVelY = V4NegMulSub(normalY, relNorVel, vrelY);
+ const Vec4V vrelSubNorVelZ = V4NegMulSub(normalZ, relNorVel, vrelZ);
+
+ const Vec4V lenSqvrelSubNorVelZ = V4MulAdd(vrelSubNorVelX, vrelSubNorVelX, V4MulAdd(vrelSubNorVelY, vrelSubNorVelY, V4Mul(vrelSubNorVelZ, vrelSubNorVelZ)));
+
+ const BoolV bcon2 = V4IsGrtr(lenSqvrelSubNorVelZ, p1);
+
+ Vec4V t0X = V4Sel(bcon2, vrelSubNorVelX, t0FallbackX);
+ Vec4V t0Y = V4Sel(bcon2, vrelSubNorVelY, t0FallbackY);
+ Vec4V t0Z = V4Sel(bcon2, vrelSubNorVelZ, t0FallbackZ);
+
+
+ //Now normalize this...
+ const Vec4V recipLen = V4Rsqrt(V4MulAdd(t0Z, t0Z, V4MulAdd(t0Y, t0Y, V4Mul(t0X, t0X))));
+
+ t0X = V4Mul(t0X, recipLen);
+ t0Y = V4Mul(t0Y, recipLen);
+ t0Z = V4Mul(t0Z, recipLen);
+
+ Vec4V t1X = V4NegMulSub(normalZ, t0Y, V4Mul(normalY, t0Z));
+ Vec4V t1Y = V4NegMulSub(normalX, t0Z, V4Mul(normalZ, t0X));
+ Vec4V t1Z = V4NegMulSub(normalY, t0X, V4Mul(normalX, t0Y));
+
+ PX_ASSERT((uintptr_t(descs[0].frictionPtr) & 0xF) == 0);
+ PX_ASSERT((uintptr_t(descs[1].frictionPtr) & 0xF) == 0);
+ PX_ASSERT((uintptr_t(descs[2].frictionPtr) & 0xF) == 0);
+ PX_ASSERT((uintptr_t(descs[3].frictionPtr) & 0xF) == 0);
+
+
+ PxU8* PX_RESTRICT writeback0 = descs[0].frictionPtr + frictionPatchWritebackAddrIndex0*sizeof(FrictionPatch);
+ PxU8* PX_RESTRICT writeback1 = descs[1].frictionPtr + frictionPatchWritebackAddrIndex1*sizeof(FrictionPatch);
+ PxU8* PX_RESTRICT writeback2 = descs[2].frictionPtr + frictionPatchWritebackAddrIndex2*sizeof(FrictionPatch);
+ PxU8* PX_RESTRICT writeback3 = descs[3].frictionPtr + frictionPatchWritebackAddrIndex3*sizeof(FrictionPatch);
+
+ PxU32 index0 = 0, index1 = 0, index2 = 0, index3 = 0;
+
+ fd->broken = bFalse;
+ fd->frictionBrokenWritebackByte[0] = writeback0;
+ fd->frictionBrokenWritebackByte[1] = writeback1;
+ fd->frictionBrokenWritebackByte[2] = writeback2;
+ fd->frictionBrokenWritebackByte[3] = writeback3;
+
+
+ fd->normalX[0] = t0X;
+ fd->normalY[0] = t0Y;
+ fd->normalZ[0] = t0Z;
+
+ fd->normalX[1] = t1X;
+ fd->normalY[1] = t1Y;
+ fd->normalZ[1] = t1Z;
+
+ Vec4V* PX_RESTRICT appliedForces = reinterpret_cast<Vec4V*>(ptr);
+ ptr += sizeof(Vec4V)*header->numFrictionConstr;
+
+ PxMemZero(appliedForces, sizeof(Vec4V) * header->numFrictionConstr);
+
+ for(PxU32 j = 0; j < maxAnchorCount; j++)
+ {
+ Ps::prefetchLine(ptr, 384);
+ Ps::prefetchLine(ptr, 512);
+ Ps::prefetchLine(ptr, 640);
+ SolverContactFrictionBase4* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionBase4*>(ptr);
+ ptr += frictionSize;
+ SolverContactFrictionBase4* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionBase4*>(ptr);
+ ptr += frictionSize;
+
+ index0 = j < clampedAnchorCount0 ? j : index0;
+ index1 = j < clampedAnchorCount1 ? j : index1;
+ index2 = j < clampedAnchorCount2 ? j : index2;
+ index3 = j < clampedAnchorCount3 ? j : index3;
+
+ if(j >= clampedAnchorCount0)
+ maxImpulseScale = V4SetX(maxImpulseScale, fZero);
+ if(j >= clampedAnchorCount1)
+ maxImpulseScale = V4SetY(maxImpulseScale, fZero);
+ if(j >= clampedAnchorCount2)
+ maxImpulseScale = V4SetZ(maxImpulseScale, fZero);
+ if(j >= clampedAnchorCount3)
+ maxImpulseScale = V4SetW(maxImpulseScale, fZero);
+
+ t0X = V4Mul(maxImpulseScale, t0X);
+ t0Y = V4Mul(maxImpulseScale, t0Y);
+ t0Z = V4Mul(maxImpulseScale, t0Z);
+
+ t1X = V4Mul(maxImpulseScale, t1X);
+ t1Y = V4Mul(maxImpulseScale, t1Y);
+ t1Z = V4Mul(maxImpulseScale, t1Z);
+
+
+ Vec3V body0Anchor0 = V3LoadU(frictionPatch0.body0Anchors[index0]);
+ Vec3V body0Anchor1 = V3LoadU(frictionPatch1.body0Anchors[index1]);
+ Vec3V body0Anchor2 = V3LoadU(frictionPatch2.body0Anchors[index2]);
+ Vec3V body0Anchor3 = V3LoadU(frictionPatch3.body0Anchors[index3]);
+
+ Vec4V ra0 = Vec4V_From_Vec3V(QuatRotate(bodyFrame00q, body0Anchor0));
+ Vec4V ra1 = Vec4V_From_Vec3V(QuatRotate(bodyFrame01q, body0Anchor1));
+ Vec4V ra2 = Vec4V_From_Vec3V(QuatRotate(bodyFrame02q, body0Anchor2));
+ Vec4V ra3 = Vec4V_From_Vec3V(QuatRotate(bodyFrame03q, body0Anchor3));
+
+ Vec4V raX, raY, raZ;
+ PX_TRANSPOSE_44_34(ra0, ra1, ra2, ra3, raX, raY, raZ);
+
+ const Vec4V raWorldX = V4Add(raX, bodyFrame0pX);
+ const Vec4V raWorldY = V4Add(raY, bodyFrame0pY);
+ const Vec4V raWorldZ = V4Add(raZ, bodyFrame0pZ);
+
+ Vec3V body1Anchor0 = V3LoadU(frictionPatch0.body1Anchors[index0]);
+ Vec3V body1Anchor1 = V3LoadU(frictionPatch1.body1Anchors[index1]);
+ Vec3V body1Anchor2 = V3LoadU(frictionPatch2.body1Anchors[index2]);
+ Vec3V body1Anchor3 = V3LoadU(frictionPatch3.body1Anchors[index3]);
+
+ Vec4V rb0 = Vec4V_From_Vec3V(QuatRotate(bodyFrame10q, body1Anchor0));
+ Vec4V rb1 = Vec4V_From_Vec3V(QuatRotate(bodyFrame11q, body1Anchor1));
+ Vec4V rb2 = Vec4V_From_Vec3V(QuatRotate(bodyFrame12q, body1Anchor2));
+ Vec4V rb3 = Vec4V_From_Vec3V(QuatRotate(bodyFrame13q, body1Anchor3));
+
+ Vec4V rbX, rbY, rbZ;
+ PX_TRANSPOSE_44_34(rb0, rb1, rb2, rb3, rbX, rbY, rbZ);
+
+ const Vec4V rbWorldX = V4Add(rbX, bodyFrame1pX);
+ const Vec4V rbWorldY = V4Add(rbY, bodyFrame1pY);
+ const Vec4V rbWorldZ = V4Add(rbZ, bodyFrame1pZ);
+
+ const Vec4V errorX = V4Sub(raWorldX, rbWorldX);
+ const Vec4V errorY = V4Sub(raWorldY, rbWorldY);
+ const Vec4V errorZ = V4Sub(raWorldZ, rbWorldZ);
+
+ //KS - todo - get this working with per-point friction
+ //PxU32 index0 = /*perPointFriction ? c.contactID[i][j] : */c.contactPatches[c.correlationListHeads[i]].start;
+
+ Vec4V targetVel0 = V4LoadA(&contactBase0->targetVel.x);
+ Vec4V targetVel1 = V4LoadA(&contactBase1->targetVel.x);
+ Vec4V targetVel2 = V4LoadA(&contactBase2->targetVel.x);
+ Vec4V targetVel3 = V4LoadA(&contactBase3->targetVel.x);
+
+ Vec4V targetVelX, targetVelY, targetVelZ;
+ PX_TRANSPOSE_44_34(targetVel0, targetVel1, targetVel2, targetVel3, targetVelX, targetVelY, targetVelZ);
+
+
+ {
+ const Vec4V raXnX = V4NegMulSub(raZ, t0Y, V4Mul(raY, t0Z));
+ const Vec4V raXnY = V4NegMulSub(raX, t0Z, V4Mul(raZ, t0X));
+ const Vec4V raXnZ = V4NegMulSub(raY, t0X, V4Mul(raX, t0Y));
+
+ Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX);
+ Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX);
+ Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX);
+
+ delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X);
+ delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y);
+ delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z);
+
+ delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X);
+ delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y);
+ delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z);
+
+ const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X)));
+
+ Vec4V resp = V4MulAdd(dotDelAngVel0, angDom0, invMass0D0);
+
+ const Vec4V tVel0 = V4MulAdd(t0Z, linVelT20, V4MulAdd(t0Y, linVelT10, V4Mul(t0X, linVelT00)));
+ Vec4V vrel = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4MulAdd(raXnX, angVelT00, tVel0)));
+
+ if(isDynamic)
+ {
+ SolverContactFrictionDynamic4* PX_RESTRICT dynamicF0 = static_cast<SolverContactFrictionDynamic4*>(f0);
+
+ const Vec4V rbXnX = V4NegMulSub(rbZ, t0Y, V4Mul(rbY, t0Z));
+ const Vec4V rbXnY = V4NegMulSub(rbX, t0Z, V4Mul(rbZ, t0X));
+ const Vec4V rbXnZ = V4NegMulSub(rbY, t0X, V4Mul(rbX, t0Y));
+
+ Vec4V delAngVel1X = V4Mul(invInertia1X0, rbXnX);
+ Vec4V delAngVel1Y = V4Mul(invInertia1X1, rbXnX);
+ Vec4V delAngVel1Z = V4Mul(invInertia1X2, rbXnX);
+
+ delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X);
+ delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y);
+ delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z);
+
+ delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X);
+ delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y);
+ delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z);
+
+ const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X)));
+
+ const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1);
+
+ resp = V4Add(resp, resp1);
+
+ dynamicF0->rbXnX = delAngVel1X;
+ dynamicF0->rbXnY = delAngVel1Y;
+ dynamicF0->rbXnZ = delAngVel1Z;
+
+ const Vec4V tVel1 = V4MulAdd(t0Z, linVelT21, V4MulAdd(t0Y, linVelT11, V4Mul(t0X, linVelT01)));
+ const Vec4V vel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4MulAdd(rbXnX, angVelT01, tVel1)));
+
+ vrel = V4Sub(vrel, vel1);
+ }
+ else if(hasKinematic)
+ {
+ const Vec4V rbXnX = V4NegMulSub(rbZ, t0Y, V4Mul(rbY, t0Z));
+ const Vec4V rbXnY = V4NegMulSub(rbX, t0Z, V4Mul(rbZ, t0X));
+ const Vec4V rbXnZ = V4NegMulSub(rbY, t0X, V4Mul(rbX, t0Y));
+
+ const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01)));
+
+ vrel = V4Sub(vrel, dotRbXnAngVel1);
+ }
+
+
+ const Vec4V velMultiplier = V4Mul(maxImpulseScale, V4Sel(V4IsGrtr(resp, zero), V4Div(p84, resp), zero));
+
+ Vec4V bias = V4Scale(V4MulAdd(t0Z, errorZ, V4MulAdd(t0Y, errorY, V4Mul(t0X, errorX))), invDt);
+
+ Vec4V targetVel = V4MulAdd(t0Z, targetVelZ,V4MulAdd(t0Y, targetVelY, V4Mul(t0X, targetVelX)));
+ targetVel = V4Sub(targetVel, vrel);
+ f0->targetVelocity = V4Neg(V4Mul(targetVel, velMultiplier));
+ bias = V4Sub(bias, targetVel);
+
+ f0->raXnX = delAngVel0X;
+ f0->raXnY = delAngVel0Y;
+ f0->raXnZ = delAngVel0Z;
+ f0->scaledBias = V4Mul(bias, velMultiplier);
+ f0->velMultiplier = velMultiplier;
+ }
+
+ {
+ const Vec4V raXnX = V4NegMulSub(raZ, t1Y, V4Mul(raY, t1Z));
+ const Vec4V raXnY = V4NegMulSub(raX, t1Z, V4Mul(raZ, t1X));
+ const Vec4V raXnZ = V4NegMulSub(raY, t1X, V4Mul(raX, t1Y));
+
+ Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX);
+ Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX);
+ Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX);
+
+ delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X);
+ delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y);
+ delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z);
+
+ delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X);
+ delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y);
+ delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z);
+
+ const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X)));
+
+ Vec4V resp = V4MulAdd(dotDelAngVel0, angDom0, invMass0D0);
+
+ const Vec4V tVel0 = V4MulAdd(t1Z, linVelT20, V4MulAdd(t1Y, linVelT10, V4Mul(t1X, linVelT00)));
+ Vec4V vrel = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4MulAdd(raXnX, angVelT00, tVel0)));
+
+ if(isDynamic)
+ {
+ SolverContactFrictionDynamic4* PX_RESTRICT dynamicF1 = static_cast<SolverContactFrictionDynamic4*>(f1);
+
+ const Vec4V rbXnX = V4NegMulSub(rbZ, t1Y, V4Mul(rbY, t1Z));
+ const Vec4V rbXnY = V4NegMulSub(rbX, t1Z, V4Mul(rbZ, t1X));
+ const Vec4V rbXnZ = V4NegMulSub(rbY, t1X, V4Mul(rbX, t1Y));
+
+ Vec4V delAngVel1X = V4Mul(invInertia1X0, rbXnX);
+ Vec4V delAngVel1Y = V4Mul(invInertia1X1, rbXnX);
+ Vec4V delAngVel1Z = V4Mul(invInertia1X2, rbXnX);
+
+ delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X);
+ delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y);
+ delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z);
+
+ delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X);
+ delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y);
+ delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z);
+
+ const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X)));
+
+ const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1);
+
+ resp = V4Add(resp, resp1);
+
+ dynamicF1->rbXnX = delAngVel1X;
+ dynamicF1->rbXnY = delAngVel1Y;
+ dynamicF1->rbXnZ = delAngVel1Z;
+
+ const Vec4V tVel1 = V4MulAdd(t1Z, linVelT21, V4MulAdd(t1Y, linVelT11, V4Mul(t1X, linVelT01)));
+ const Vec4V vel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4MulAdd(rbXnX, angVelT01, tVel1)));
+
+ vrel = V4Sub(vrel, vel1);
+
+ }
+ else if(hasKinematic)
+ {
+ const Vec4V rbXnX = V4NegMulSub(rbZ, t1Y, V4Mul(rbY, t1Z));
+ const Vec4V rbXnY = V4NegMulSub(rbX, t1Z, V4Mul(rbZ, t1X));
+ const Vec4V rbXnZ = V4NegMulSub(rbY, t1X, V4Mul(rbX, t1Y));
+
+ const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01)));
+
+ vrel = V4Sub(vrel, dotRbXnAngVel1);
+ }
+
+
+ const Vec4V velMultiplier = V4Mul(maxImpulseScale, V4Sel(V4IsGrtr(resp, zero), V4Div(p84, resp), zero));
+
+ Vec4V bias = V4Scale(V4MulAdd(t1Z, errorZ, V4MulAdd(t1Y, errorY, V4Mul(t1X, errorX))), invDt);
+
+ Vec4V targetVel = V4MulAdd(t1Z, targetVelZ,V4MulAdd(t1Y, targetVelY, V4Mul(t1X, targetVelX)));
+ targetVel = V4Sub(targetVel, vrel);
+ f1->targetVelocity = V4Neg(V4Mul(targetVel, velMultiplier));
+ bias = V4Sub(bias, targetVel);
+ f1->raXnX = delAngVel0X;
+ f1->raXnY = delAngVel0Y;
+ f1->raXnZ = delAngVel0Z;
+ f1->scaledBias = V4Mul(bias, velMultiplier);
+ f1->velMultiplier = velMultiplier;
+ }
+ }
+
+ frictionPatchWritebackAddrIndex0++;
+ frictionPatchWritebackAddrIndex1++;
+ frictionPatchWritebackAddrIndex2++;
+ frictionPatchWritebackAddrIndex3++;
+ }
+ }
+ }
+}
+
+
+
+PX_FORCE_INLINE void computeBlockStreamFrictionByteSizes(const CorrelationBuffer& c,
+ PxU32& _frictionPatchByteSize, PxU32& _numFrictionPatches,
+ PxU32 frictionPatchStartIndex, PxU32 frictionPatchEndIndex)
+{
+ // PT: use local vars to remove LHS
+ PxU32 numFrictionPatches = 0;
+
+ for(PxU32 i = frictionPatchStartIndex; i < frictionPatchEndIndex; i++)
+ {
+ //Friction patches.
+ if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END)
+ numFrictionPatches++;
+ }
+ PxU32 frictionPatchByteSize = numFrictionPatches*sizeof(FrictionPatch);
+
+ _numFrictionPatches = numFrictionPatches;
+
+ //16-byte alignment.
+ _frictionPatchByteSize = ((frictionPatchByteSize + 0x0f) & ~0x0f);
+ PX_ASSERT(0 == (_frictionPatchByteSize & 0x0f));
+}
+
+static bool reserveFrictionBlockStreams(const CorrelationBuffer& c, PxConstraintAllocator& constraintAllocator, PxU32 frictionPatchStartIndex, PxU32 frictionPatchEndIndex,
+ FrictionPatch*& _frictionPatches,
+ PxU32& numFrictionPatches)
+{
+
+ //From frictionPatchStream we just need to reserve a single buffer.
+ PxU32 frictionPatchByteSize = 0;
+ //Compute the sizes of all the buffers.
+
+ computeBlockStreamFrictionByteSizes(c, frictionPatchByteSize, numFrictionPatches, frictionPatchStartIndex, frictionPatchEndIndex);
+
+ FrictionPatch* frictionPatches = NULL;
+ //If the constraint block reservation didn't fail then reserve the friction buffer too.
+ if(frictionPatchByteSize > 0)
+ {
+ frictionPatches = reinterpret_cast<FrictionPatch*>(constraintAllocator.reserveFrictionData(frictionPatchByteSize));
+
+ if(0==frictionPatches || (reinterpret_cast<FrictionPatch*>(-1))==frictionPatches)
+ {
+ if(0==frictionPatches)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of friction data for a single contact pair in constraint prep. "
+ "Either accept dropped contacts or simplify collision geometry.");
+ frictionPatches=NULL;
+ }
+ }
+ }
+
+ _frictionPatches = frictionPatches;
+
+ //Return true if neither of the two block reservations failed.
+ return (0==frictionPatchByteSize || frictionPatches);
+}
+
+//The persistent friction patch correlation/allocation will already have happenned as this is per-pair.
+//This function just computes the size of the combined solve data.
+void computeBlockStreamByteSizes4(PxSolverContactDesc* descs,
+ PxU32& _solverConstraintByteSize, PxU32* _axisConstraintCount,
+ const CorrelationBuffer& c)
+{
+ PX_ASSERT(0 == _solverConstraintByteSize);
+
+ PxU32 maxPatches = 0;
+ PxU32 maxFrictionPatches = 0;
+ PxU32 maxContactCount[CorrelationBuffer::MAX_FRICTION_PATCHES];
+ PxU32 maxFrictionCount[CorrelationBuffer::MAX_FRICTION_PATCHES];
+ PxMemZero(maxContactCount, sizeof(maxContactCount));
+ PxMemZero(maxFrictionCount, sizeof(maxFrictionCount));
+ bool hasMaxImpulse = false;
+
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ PxU32 axisConstraintCount = 0;
+ hasMaxImpulse = hasMaxImpulse || descs[a].hasMaxImpulse;
+ for(PxU32 i = 0; i < descs[a].numFrictionPatches; i++)
+ {
+ PxU32 ind = i + descs[a].startFrictionPatchIndex;
+
+ const FrictionPatch& frictionPatch = c.frictionPatches[ind];
+
+ const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0
+ && frictionPatch.anchorCount != 0;
+ //Solver constraint data.
+ if(c.frictionPatchContactCounts[ind]!=0)
+ {
+ maxContactCount[i] = PxMax(c.frictionPatchContactCounts[ind], maxContactCount[i]);
+ axisConstraintCount += c.frictionPatchContactCounts[ind];
+
+ if(haveFriction)
+ {
+ const PxU32 fricCount = PxU32(c.frictionPatches[ind].anchorCount) * 2;
+ maxFrictionCount[i] = PxMax(fricCount, maxFrictionCount[i]);
+ axisConstraintCount += fricCount;
+ }
+ }
+ }
+ maxPatches = PxMax(descs[a].numFrictionPatches, maxPatches);
+ _axisConstraintCount[a] = axisConstraintCount;
+ }
+
+ for(PxU32 a = 0; a < maxPatches; ++a)
+ {
+ if(maxFrictionCount[a] > 0)
+ maxFrictionPatches++;
+ }
+
+
+ PxU32 totalContacts = 0, totalFriction = 0;
+ for(PxU32 a = 0; a < maxPatches; ++a)
+ {
+ totalContacts += maxContactCount[a];
+ totalFriction += maxFrictionCount[a];
+ }
+
+ //OK, we have a given number of friction patches, contact points and friction constraints so we can calculate how much memory we need
+
+ //Body 2 is considered static if it is either *not dynamic* or *kinematic*
+
+ bool hasDynamicBody = false;
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ hasDynamicBody = hasDynamicBody || ((descs[a].bodyState1 == PxSolverContactDesc::eDYNAMIC_BODY));
+ }
+
+
+ const bool isStatic = !hasDynamicBody;
+
+ const PxU32 headerSize = sizeof(SolverContactHeader4) * maxPatches + sizeof(SolverFrictionSharedData4) * maxFrictionPatches;
+ PxU32 constraintSize = isStatic ? (sizeof(SolverContactBatchPointBase4) * totalContacts) + ( sizeof(SolverContactFrictionBase4) * totalFriction) :
+ (sizeof(SolverContactBatchPointDynamic4) * totalContacts) + (sizeof(SolverContactFrictionDynamic4) * totalFriction);
+
+ //Space for the appliedForce buffer
+ constraintSize += sizeof(Vec4V)*(totalContacts+totalFriction);
+
+ //If we have max impulse, reserve a buffer for it
+ if(hasMaxImpulse)
+ constraintSize += sizeof(Ps::aos::Vec4V) * totalContacts;
+
+ _solverConstraintByteSize = ((constraintSize + headerSize + 0x0f) & ~0x0f);
+ PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f));
+}
+
+static SolverConstraintPrepState::Enum reserveBlockStreams4(PxSolverContactDesc* descs, Dy::CorrelationBuffer& c,
+ PxU8*& solverConstraint, PxU32* axisConstraintCount,
+ PxU32& solverConstraintByteSize,
+ PxConstraintAllocator& constraintAllocator)
+{
+ PX_ASSERT(NULL == solverConstraint);
+ PX_ASSERT(0 == solverConstraintByteSize);
+
+ //Compute the sizes of all the buffers.
+ computeBlockStreamByteSizes4(descs,
+ solverConstraintByteSize, axisConstraintCount,
+ c);
+
+ //Reserve the buffers.
+
+ //First reserve the accumulated buffer size for the constraint block.
+ PxU8* constraintBlock = NULL;
+ const PxU32 constraintBlockByteSize = solverConstraintByteSize;
+ if(constraintBlockByteSize > 0)
+ {
+ if((constraintBlockByteSize + 16u) > 16384)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+
+ constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u);
+
+ if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock)
+ {
+ if(0==constraintBlock)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. "
+ "Either accept dropped contacts or simplify collision geometry.");
+ constraintBlock=NULL;
+ }
+ }
+ }
+
+ //Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail).
+ if(0==constraintBlockByteSize || constraintBlock)
+ {
+ if(solverConstraintByteSize)
+ {
+ solverConstraint = constraintBlock;
+ PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f));
+ }
+ }
+
+ return ((0==constraintBlockByteSize || constraintBlock)) ? SolverConstraintPrepState::eSUCCESS : SolverConstraintPrepState::eOUT_OF_MEMORY;
+}
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4(
+ Dy::CorrelationBuffer& c,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator)
+{
+
+ PX_ALIGN(16, PxReal invMassScale0[4]);
+ PX_ALIGN(16, PxReal invMassScale1[4]);
+ PX_ALIGN(16, PxReal invInertiaScale0[4]);
+ PX_ALIGN(16, PxReal invInertiaScale1[4]);
+
+ c.frictionPatchCount = 0;
+ c.contactPatchCount = 0;
+
+ for (PxU32 a = 0; a < 4; ++a)
+ {
+ PxSolverContactDesc& blockDesc = blockDescs[a];
+
+ invMassScale0[a] = blockDesc.mInvMassScales.linear0;
+ invMassScale1[a] = blockDesc.mInvMassScales.linear1;
+ invInertiaScale0[a] = blockDesc.mInvMassScales.angular0;
+ invInertiaScale1[a] = blockDesc.mInvMassScales.angular1;
+
+ blockDesc.startFrictionPatchIndex = c.frictionPatchCount;
+ if (!(blockDesc.disableStrongFriction))
+ {
+ bool valid = getFrictionPatches(c, blockDesc.frictionPtr, blockDesc.frictionCount,
+ blockDesc.bodyFrame0, blockDesc.bodyFrame1, correlationDistance);
+ if (!valid)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+ }
+ //Create the contact patches
+ blockDesc.startContactPatchIndex = c.contactPatchCount;
+ if (!createContactPatches(c, blockDesc.contacts, blockDesc.numContacts, PXC_SAME_NORMAL))
+ return SolverConstraintPrepState::eUNBATCHABLE;
+ blockDesc.numContactPatches = PxU16(c.contactPatchCount - blockDesc.startContactPatchIndex);
+
+ bool overflow = correlatePatches(c, blockDesc.contacts, blockDesc.bodyFrame0, blockDesc.bodyFrame1, PXC_SAME_NORMAL,
+ blockDesc.startContactPatchIndex, blockDesc.startFrictionPatchIndex);
+
+ if (overflow)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+
+ growPatches(c, blockDesc.contacts, blockDesc.bodyFrame0, blockDesc.bodyFrame1, correlationDistance, blockDesc.startFrictionPatchIndex,
+ frictionOffsetThreshold + blockDescs[a].restDistance);
+
+ //Remove the empty friction patches - do we actually need to do this?
+ for (PxU32 p = c.frictionPatchCount; p > blockDesc.startFrictionPatchIndex; --p)
+ {
+ if (c.correlationListHeads[p - 1] == 0xffff)
+ {
+ //We have an empty patch...need to bin this one...
+ for (PxU32 p2 = p; p2 < c.frictionPatchCount; ++p2)
+ {
+ c.correlationListHeads[p2 - 1] = c.correlationListHeads[p2];
+ c.frictionPatchContactCounts[p2 - 1] = c.frictionPatchContactCounts[p2];
+ }
+ c.frictionPatchCount--;
+ }
+ }
+
+ PxU32 numFricPatches = c.frictionPatchCount - blockDesc.startFrictionPatchIndex;
+ blockDesc.numFrictionPatches = numFricPatches;
+ }
+
+ FrictionPatch* frictionPatchArray[4];
+ PxU32 frictionPatchCounts[4];
+
+ for (PxU32 a = 0; a < 4; ++a)
+ {
+ PxSolverContactDesc& blockDesc = blockDescs[a];
+
+ const bool successfulReserve = reserveFrictionBlockStreams(c, constraintAllocator, blockDesc.startFrictionPatchIndex, blockDesc.numFrictionPatches + blockDesc.startFrictionPatchIndex,
+ frictionPatchArray[a],
+ frictionPatchCounts[a]);
+
+ //KS - TODO - how can we recover if we failed to allocate this memory?
+ if (!successfulReserve)
+ {
+ return SolverConstraintPrepState::eOUT_OF_MEMORY;
+ }
+ }
+ //At this point, all the friction data has been calculated, the correlation has been done. Provided this was all successful,
+ //we are ready to create the batched constraints
+
+ PxU8* solverConstraint = NULL;
+ PxU32 solverConstraintByteSize = 0;
+
+
+
+ {
+ PxU32 axisConstraintCount[4];
+ SolverConstraintPrepState::Enum state = reserveBlockStreams4(blockDescs, c,
+ solverConstraint, axisConstraintCount,
+ solverConstraintByteSize,
+ constraintAllocator);
+
+ if (state != SolverConstraintPrepState::eSUCCESS)
+ return state;
+
+
+ for (PxU32 a = 0; a < 4; ++a)
+ {
+
+ FrictionPatch* frictionPatches = frictionPatchArray[a];
+
+ PxSolverContactDesc& blockDesc = blockDescs[a];
+ PxSolverConstraintDesc& desc = *blockDesc.desc;
+ blockDesc.frictionPtr = reinterpret_cast<PxU8*>(frictionPatches);
+ blockDesc.frictionCount = Ps::to8(frictionPatchCounts[a]);
+
+ //Initialise friction buffer.
+ if (frictionPatches)
+ {
+ // PT: TODO: revisit this... not very satisfying
+ //const PxU32 maxSize = numFrictionPatches*sizeof(FrictionPatch);
+ Ps::prefetchLine(frictionPatches);
+ Ps::prefetchLine(frictionPatches, 128);
+ Ps::prefetchLine(frictionPatches, 256);
+
+ for (PxU32 i = 0; i<blockDesc.numFrictionPatches; i++)
+ {
+ if (c.correlationListHeads[blockDesc.startFrictionPatchIndex + i] != CorrelationBuffer::LIST_END)
+ {
+ //*frictionPatches++ = c.frictionPatches[blockDesc.startFrictionPatchIndex + i];
+ PxMemCopy(frictionPatches++, &c.frictionPatches[blockDesc.startFrictionPatchIndex + i], sizeof(FrictionPatch));
+ //Ps::prefetchLine(frictionPatches, 256);
+ }
+ }
+ }
+
+
+ blockDesc.axisConstraintCount += Ps::to16(axisConstraintCount[a]);
+
+ desc.constraint = solverConstraint;
+ desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize / 16);
+ desc.writeBackLengthOver4 = PxU16(blockDesc.numContacts);
+ desc.writeBack = blockDesc.contactForces;
+ }
+
+ const Vec4V iMassScale0 = V4LoadA(invMassScale0);
+ const Vec4V iInertiaScale0 = V4LoadA(invInertiaScale0);
+ const Vec4V iMassScale1 = V4LoadA(invMassScale1);
+ const Vec4V iInertiaScale1 = V4LoadA(invInertiaScale1);
+
+ setupFinalizeSolverConstraints4(blockDescs, c, solverConstraint, invDtF32, bounceThresholdF32,
+ iMassScale0, iInertiaScale0, iMassScale1, iInertiaScale1);
+
+ PX_ASSERT((*solverConstraint == DY_SC_TYPE_BLOCK_RB_CONTACT) || (*solverConstraint == DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT));
+
+ *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0;
+ }
+ return SolverConstraintPrepState::eSUCCESS;
+}
+
+
+//This returns 1 of 3 states: success, unbatchable or out-of-memory. If the constraint is unbatchable, we must fall back on 4 separate constraint
+//prep calls
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4(
+ PxsContactManagerOutput** cmOutputs,
+ ThreadContext& threadContext,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator)
+{
+
+ for (PxU32 a = 0; a < 4; ++a)
+ {
+ blockDescs[a].desc->constraintLengthOver16 = 0;
+ }
+
+ PX_ASSERT(cmOutputs[0]->nbContacts && cmOutputs[1]->nbContacts && cmOutputs[2]->nbContacts && cmOutputs[3]->nbContacts);
+
+
+ Gu::ContactBuffer& buffer = threadContext.mContactBuffer;
+
+ buffer.count = 0;
+
+ //PxTransform idt = PxTransform(PxIdentity);
+
+ CorrelationBuffer& c = threadContext.mCorrelationBuffer;
+
+ for (PxU32 a = 0; a < 4; ++a)
+ {
+ PxSolverContactDesc& blockDesc = blockDescs[a];
+ PxSolverConstraintDesc& desc = *blockDesc.desc;
+
+ //blockDesc.startContactIndex = buffer.count;
+ blockDesc.contacts = buffer.contacts + buffer.count;
+
+ Ps::prefetchLine(desc.bodyA);
+ Ps::prefetchLine(desc.bodyB);
+
+
+ if ((buffer.count + cmOutputs[a]->nbContacts) > 64)
+ {
+ return SolverConstraintPrepState::eUNBATCHABLE;
+ }
+
+ bool hasMaxImpulse = false;
+ bool hasTargetVelocity = false;
+
+ //OK...do the correlation here as well...
+ Ps::prefetchLine(blockDescs[a].frictionPtr);
+ Ps::prefetchLine(blockDescs[a].frictionPtr, 64);
+ Ps::prefetchLine(blockDescs[a].frictionPtr, 128);
+
+ if (a < 3)
+ {
+ Ps::prefetchLine(cmOutputs[a]->contactPatches);
+ Ps::prefetchLine(cmOutputs[a]->contactPoints);
+ }
+
+ PxReal invMassScale0, invMassScale1, invInertiaScale0, invInertiaScale1;
+
+ const PxReal defaultMaxImpulse = PxMin(blockDesc.data0->maxContactImpulse, blockDesc.data1->maxContactImpulse);
+
+ PxU32 contactCount = extractContacts(buffer, *cmOutputs[a], hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1,
+ invInertiaScale0, invInertiaScale1, defaultMaxImpulse);
+
+ if (contactCount == 0)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+
+ blockDesc.numContacts = contactCount;
+ blockDesc.hasMaxImpulse = hasMaxImpulse;
+ blockDesc.disableStrongFriction = blockDesc.disableStrongFriction || hasTargetVelocity;
+
+ blockDesc.mInvMassScales.linear0 *= invMassScale0;
+ blockDesc.mInvMassScales.linear1 *= invMassScale1;
+ blockDesc.mInvMassScales.angular0 *= invInertiaScale0;
+ blockDesc.mInvMassScales.angular1 *= invInertiaScale1;
+
+ //blockDesc.frictionPtr = &blockDescs[a].frictionPtr;
+ //blockDesc.frictionCount = blockDescs[a].frictionCount;
+
+ }
+ return createFinalizeSolverContacts4(c, blockDescs,
+ invDtF32, bounceThresholdF32, frictionOffsetThreshold,
+ correlationDistance, constraintAllocator);
+}
+
+
+
+
+}
+
+}
+
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4PF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4PF.cpp
new file mode 100644
index 00000000..4442b433
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4PF.cpp
@@ -0,0 +1,1017 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+#include "PsMathUtils.h"
+#include "DySolverContact.h"
+#include "DySolverContactPF.h"
+#include "DySolverConstraintTypes.h"
+#include "PxcNpWorkUnit.h"
+#include "DyThreadContext.h"
+#include "DyContactPrep.h"
+#include "PxcNpContactPrepShared.h"
+//#include "PxvGeometry.h"
+#include "PxvDynamics.h"
+#include "DyCorrelationBuffer.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverBody.h"
+#include "DySolverContact4.h"
+#include "DySolverContactPF4.h"
+
+
+#include "PsVecMath.h"
+#include "PxContactModifyCallback.h"
+#include "PxsMaterialManager.h"
+#include "PxsMaterialCombiner.h"
+#include "DySolverExt.h"
+#include "DyArticulationContactPrep.h"
+#include "DyContactPrepShared.h"
+#include "PsFoundation.h"
+
+using namespace physx::Gu;
+using namespace physx::shdfnd::aos;
+
+namespace physx
+{
+namespace Dy
+{
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb(
+ PxsContactManagerOutput** outputs,
+ ThreadContext& threadContext,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator,
+ PxFrictionType::Enum frictionType);
+
+static bool setupFinalizeSolverConstraintsCoulomb4(PxSolverContactDesc* PX_RESTRICT descs, PxU8* PX_RESTRICT workspace,
+ const PxReal invDtF32, PxReal bounceThresholdF32, CorrelationBuffer& c, const PxU32 numFrictionPerPoint,
+ const PxU32 numContactPoints4, const PxU32 /*solverConstraintByteSize*/,
+ const Ps::aos::Vec4VArg invMassScale0, const Ps::aos::Vec4VArg invInertiaScale0,
+ const Ps::aos::Vec4VArg invMassScale1, const Ps::aos::Vec4VArg invInertiaScale1)
+{
+ //KS - final step. Create the constraints in the place we pre-allocated...
+
+ const Vec4V ccdMaxSeparation = Ps::aos::V4LoadXYZW(descs[0].maxCCDSeparation, descs[1].maxCCDSeparation, descs[2].maxCCDSeparation, descs[3].maxCCDSeparation);
+
+ const Vec4V zero = V4Zero();
+
+ PxU8 flags[4] = { PxU8(descs[0].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
+ PxU8(descs[1].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
+ PxU8(descs[2].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0),
+ PxU8(descs[3].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0) };
+
+
+ //The block is dynamic if **any** of the constraints have a non-static body B. This allows us to batch static and non-static constraints but we only get a memory/perf
+ //saving if all 4 are static. This simplifies the constraint partitioning such that it only needs to care about separating contacts and 1D constraints (which it already does)
+ const bool isDynamic = ((descs[0].bodyState1 | descs[1].bodyState1 | descs[2].bodyState1 | descs[3].bodyState1) & PxSolverContactDesc::eDYNAMIC_BODY) != 0;
+
+ const PxU32 constraintSize = isDynamic ? sizeof(SolverContact4Dynamic) : sizeof(SolverContact4Base);
+ const PxU32 frictionSize = isDynamic ? sizeof(SolverFriction4Dynamic) : sizeof(SolverFriction4Base);
+
+ PxU8* PX_RESTRICT ptr = workspace;
+
+ const Vec4V dom0 = invMassScale0;
+ const Vec4V dom1 = invMassScale1;
+ const Vec4V angDom0 = invInertiaScale0;
+ const Vec4V angDom1 = invInertiaScale1;
+
+ const Vec4V maxPenBias = V4Max(V4Merge(FLoad(descs[0].data0->penBiasClamp), FLoad(descs[1].data0->penBiasClamp),
+ FLoad(descs[2].data0->penBiasClamp), FLoad(descs[3].data0->penBiasClamp)),
+ V4Merge(FLoad(descs[0].data1->penBiasClamp), FLoad(descs[1].data1->penBiasClamp),
+ FLoad(descs[2].data1->penBiasClamp), FLoad(descs[3].data1->penBiasClamp)));
+
+ const Vec4V restDistance = V4Merge(FLoad(descs[0].restDistance), FLoad(descs[1].restDistance), FLoad(descs[2].restDistance),
+ FLoad(descs[3].restDistance));
+
+ //load up velocities
+ Vec4V linVel00 = V4LoadA(&descs[0].data0->linearVelocity.x);
+ Vec4V linVel10 = V4LoadA(&descs[1].data0->linearVelocity.x);
+ Vec4V linVel20 = V4LoadA(&descs[2].data0->linearVelocity.x);
+ Vec4V linVel30 = V4LoadA(&descs[3].data0->linearVelocity.x);
+
+ Vec4V linVel01 = V4LoadA(&descs[0].data1->linearVelocity.x);
+ Vec4V linVel11 = V4LoadA(&descs[1].data1->linearVelocity.x);
+ Vec4V linVel21 = V4LoadA(&descs[2].data1->linearVelocity.x);
+ Vec4V linVel31 = V4LoadA(&descs[3].data1->linearVelocity.x);
+
+ Vec4V angVel00 = V4LoadA(&descs[0].data0->angularVelocity.x);
+ Vec4V angVel10 = V4LoadA(&descs[1].data0->angularVelocity.x);
+ Vec4V angVel20 = V4LoadA(&descs[2].data0->angularVelocity.x);
+ Vec4V angVel30 = V4LoadA(&descs[3].data0->angularVelocity.x);
+
+ Vec4V angVel01 = V4LoadA(&descs[0].data1->angularVelocity.x);
+ Vec4V angVel11 = V4LoadA(&descs[1].data1->angularVelocity.x);
+ Vec4V angVel21 = V4LoadA(&descs[2].data1->angularVelocity.x);
+ Vec4V angVel31 = V4LoadA(&descs[3].data1->angularVelocity.x);
+
+ Vec4V linVelT00, linVelT10, linVelT20;
+ Vec4V linVelT01, linVelT11, linVelT21;
+ Vec4V angVelT00, angVelT10, angVelT20;
+ Vec4V angVelT01, angVelT11, angVelT21;
+
+ PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVelT00, linVelT10, linVelT20);
+ PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVelT01, linVelT11, linVelT21);
+ PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVelT00, angVelT10, angVelT20);
+ PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVelT01, angVelT11, angVelT21);
+
+ const Vec4V vrelX = V4Sub(linVelT00, linVelT01);
+ const Vec4V vrelY = V4Sub(linVelT10, linVelT11);
+ const Vec4V vrelZ = V4Sub(linVelT20, linVelT21);
+
+
+
+ //Load up masses and invInertia
+
+ const Vec4V invMass0 = V4Merge(FLoad(descs[0].data0->invMass), FLoad(descs[1].data0->invMass), FLoad(descs[2].data0->invMass),
+ FLoad(descs[3].data0->invMass));
+
+ const Vec4V invMass1 = V4Merge(FLoad(descs[0].data1->invMass), FLoad(descs[1].data1->invMass), FLoad(descs[2].data1->invMass),
+ FLoad(descs[3].data1->invMass));
+
+ const Vec4V invMass0_dom0fV = V4Mul(dom0, invMass0);
+ const Vec4V invMass1_dom1fV = V4Mul(dom1, invMass1);
+
+ Vec4V invInertia00X = Vec4V_From_Vec3V(V3LoadU(descs[0].data0->sqrtInvInertia.column0));
+ Vec4V invInertia00Y = Vec4V_From_Vec3V(V3LoadU(descs[0].data0->sqrtInvInertia.column1));
+ Vec4V invInertia00Z = Vec4V_From_Vec3V(V3LoadU(descs[0].data0->sqrtInvInertia.column2));
+
+ Vec4V invInertia10X = Vec4V_From_Vec3V(V3LoadU(descs[1].data0->sqrtInvInertia.column0));
+ Vec4V invInertia10Y = Vec4V_From_Vec3V(V3LoadU(descs[1].data0->sqrtInvInertia.column1));
+ Vec4V invInertia10Z = Vec4V_From_Vec3V(V3LoadU(descs[1].data0->sqrtInvInertia.column2));
+
+ Vec4V invInertia20X = Vec4V_From_Vec3V(V3LoadU(descs[2].data0->sqrtInvInertia.column0));
+ Vec4V invInertia20Y = Vec4V_From_Vec3V(V3LoadU(descs[2].data0->sqrtInvInertia.column1));
+ Vec4V invInertia20Z = Vec4V_From_Vec3V(V3LoadU(descs[2].data0->sqrtInvInertia.column2));
+
+ Vec4V invInertia30X = Vec4V_From_Vec3V(V3LoadU(descs[3].data0->sqrtInvInertia.column0));
+ Vec4V invInertia30Y = Vec4V_From_Vec3V(V3LoadU(descs[3].data0->sqrtInvInertia.column1));
+ Vec4V invInertia30Z = Vec4V_From_Vec3V(V3LoadU(descs[3].data0->sqrtInvInertia.column2));
+
+ Vec4V invInertia01X = Vec4V_From_Vec3V(V3LoadU(descs[0].data1->sqrtInvInertia.column0));
+ Vec4V invInertia01Y = Vec4V_From_Vec3V(V3LoadU(descs[0].data1->sqrtInvInertia.column1));
+ Vec4V invInertia01Z = Vec4V_From_Vec3V(V3LoadU(descs[0].data1->sqrtInvInertia.column2));
+
+ Vec4V invInertia11X = Vec4V_From_Vec3V(V3LoadU(descs[1].data1->sqrtInvInertia.column0));
+ Vec4V invInertia11Y = Vec4V_From_Vec3V(V3LoadU(descs[1].data1->sqrtInvInertia.column1));
+ Vec4V invInertia11Z = Vec4V_From_Vec3V(V3LoadU(descs[1].data1->sqrtInvInertia.column2));
+
+ Vec4V invInertia21X = Vec4V_From_Vec3V(V3LoadU(descs[2].data1->sqrtInvInertia.column0));
+ Vec4V invInertia21Y = Vec4V_From_Vec3V(V3LoadU(descs[2].data1->sqrtInvInertia.column1));
+ Vec4V invInertia21Z = Vec4V_From_Vec3V(V3LoadU(descs[2].data1->sqrtInvInertia.column2));
+
+ Vec4V invInertia31X = Vec4V_From_Vec3V(V3LoadU(descs[3].data1->sqrtInvInertia.column0));
+ Vec4V invInertia31Y = Vec4V_From_Vec3V(V3LoadU(descs[3].data1->sqrtInvInertia.column1));
+ Vec4V invInertia31Z = Vec4V_From_Vec3V(V3LoadU(descs[3].data1->sqrtInvInertia.column2));
+
+ Vec4V invInertia0X0, invInertia0X1, invInertia0X2;
+ Vec4V invInertia0Y0, invInertia0Y1, invInertia0Y2;
+ Vec4V invInertia0Z0, invInertia0Z1, invInertia0Z2;
+
+ Vec4V invInertia1X0, invInertia1X1, invInertia1X2;
+ Vec4V invInertia1Y0, invInertia1Y1, invInertia1Y2;
+ Vec4V invInertia1Z0, invInertia1Z1, invInertia1Z2;
+
+ PX_TRANSPOSE_44_34(invInertia00X, invInertia10X, invInertia20X, invInertia30X, invInertia0X0, invInertia0Y0, invInertia0Z0);
+ PX_TRANSPOSE_44_34(invInertia00Y, invInertia10Y, invInertia20Y, invInertia30Y, invInertia0X1, invInertia0Y1, invInertia0Z1);
+ PX_TRANSPOSE_44_34(invInertia00Z, invInertia10Z, invInertia20Z, invInertia30Z, invInertia0X2, invInertia0Y2, invInertia0Z2);
+
+ PX_TRANSPOSE_44_34(invInertia01X, invInertia11X, invInertia21X, invInertia31X, invInertia1X0, invInertia1Y0, invInertia1Z0);
+ PX_TRANSPOSE_44_34(invInertia01Y, invInertia11Y, invInertia21Y, invInertia31Y, invInertia1X1, invInertia1Y1, invInertia1Z1);
+ PX_TRANSPOSE_44_34(invInertia01Z, invInertia11Z, invInertia21Z, invInertia31Z, invInertia1X2, invInertia1Y2, invInertia1Z2);
+
+ const FloatV invDt = FLoad(invDtF32);
+ const FloatV p8 = FLoad(0.8f);
+ //const Vec4V p84 = V4Splat(p8);
+ const Vec4V p1 = V4Splat(FLoad(0.1f));
+ const Vec4V bounceThreshold = V4Splat(FLoad(bounceThresholdF32));
+ const Vec4V orthoThreshold = V4Splat(FLoad(0.70710678f));
+
+ const FloatV invDtp8 = FMul(invDt, p8);
+
+ const Vec3V bodyFrame00p = V3LoadU(descs[0].bodyFrame0.p);
+ const Vec3V bodyFrame01p = V3LoadU(descs[1].bodyFrame0.p);
+ const Vec3V bodyFrame02p = V3LoadU(descs[2].bodyFrame0.p);
+ const Vec3V bodyFrame03p = V3LoadU(descs[3].bodyFrame0.p);
+
+ Vec4V bodyFrame00p4 = Vec4V_From_Vec3V(bodyFrame00p);
+ Vec4V bodyFrame01p4 = Vec4V_From_Vec3V(bodyFrame01p);
+ Vec4V bodyFrame02p4 = Vec4V_From_Vec3V(bodyFrame02p);
+ Vec4V bodyFrame03p4 = Vec4V_From_Vec3V(bodyFrame03p);
+
+ Vec4V bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ;
+ PX_TRANSPOSE_44_34(bodyFrame00p4, bodyFrame01p4, bodyFrame02p4, bodyFrame03p4, bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ);
+
+
+ const Vec3V bodyFrame10p = V3LoadU(descs[0].bodyFrame1.p);
+ const Vec3V bodyFrame11p = V3LoadU(descs[1].bodyFrame1.p);
+ const Vec3V bodyFrame12p = V3LoadU(descs[2].bodyFrame1.p);
+ const Vec3V bodyFrame13p = V3LoadU(descs[3].bodyFrame1.p);
+
+ Vec4V bodyFrame10p4 = Vec4V_From_Vec3V(bodyFrame10p);
+ Vec4V bodyFrame11p4 = Vec4V_From_Vec3V(bodyFrame11p);
+ Vec4V bodyFrame12p4 = Vec4V_From_Vec3V(bodyFrame12p);
+ Vec4V bodyFrame13p4 = Vec4V_From_Vec3V(bodyFrame13p);
+
+ Vec4V bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ;
+ PX_TRANSPOSE_44_34(bodyFrame10p4, bodyFrame11p4, bodyFrame12p4, bodyFrame13p4, bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ);
+
+
+ Ps::prefetchLine(c.contactID);
+ Ps::prefetchLine(c.contactID, 128);
+
+ PxU32 frictionIndex0 = 0, frictionIndex1 = 0, frictionIndex2 = 0, frictionIndex3 = 0;
+
+
+ PxU32 maxPatches = PxMax(descs[0].numFrictionPatches, PxMax(descs[1].numFrictionPatches, PxMax(descs[2].numFrictionPatches, descs[3].numFrictionPatches)));
+ PxU32 maxContacts = numContactPoints4;
+
+ //This is the address at which the first friction patch exists
+ PxU8* ptr2 = ptr + ((sizeof(SolverContactCoulombHeader4) * maxPatches) + constraintSize * maxContacts);
+
+ //PxU32 contactId = 0;
+
+ for(PxU32 i=0;i<maxPatches;i++)
+ {
+ const bool hasFinished0 = i >= descs[0].numFrictionPatches;
+ const bool hasFinished1 = i >= descs[1].numFrictionPatches;
+ const bool hasFinished2 = i >= descs[2].numFrictionPatches;
+ const bool hasFinished3 = i >= descs[3].numFrictionPatches;
+
+
+ frictionIndex0 = hasFinished0 ? frictionIndex0 : descs[0].startFrictionPatchIndex + i;
+ frictionIndex1 = hasFinished1 ? frictionIndex1 : descs[1].startFrictionPatchIndex + i;
+ frictionIndex2 = hasFinished2 ? frictionIndex2 : descs[2].startFrictionPatchIndex + i;
+ frictionIndex3 = hasFinished3 ? frictionIndex3 : descs[3].startFrictionPatchIndex + i;
+
+ PxU32 clampedContacts0 = hasFinished0 ? 0 : c.frictionPatchContactCounts[frictionIndex0];
+ PxU32 clampedContacts1 = hasFinished1 ? 0 : c.frictionPatchContactCounts[frictionIndex1];
+ PxU32 clampedContacts2 = hasFinished2 ? 0 : c.frictionPatchContactCounts[frictionIndex2];
+ PxU32 clampedContacts3 = hasFinished3 ? 0 : c.frictionPatchContactCounts[frictionIndex3];
+
+ PxU32 clampedFric0 = clampedContacts0 * numFrictionPerPoint;
+ PxU32 clampedFric1 = clampedContacts1 * numFrictionPerPoint;
+ PxU32 clampedFric2 = clampedContacts2 * numFrictionPerPoint;
+ PxU32 clampedFric3 = clampedContacts3 * numFrictionPerPoint;
+
+
+ const PxU32 numContacts = PxMax(clampedContacts0, PxMax(clampedContacts1, PxMax(clampedContacts2, clampedContacts3)));
+ const PxU32 numFrictions = PxMax(clampedFric0, PxMax(clampedFric1, PxMax(clampedFric2, clampedFric3)));
+
+ PxU32 firstPatch0 = c.correlationListHeads[frictionIndex0];
+ PxU32 firstPatch1 = c.correlationListHeads[frictionIndex1];
+ PxU32 firstPatch2 = c.correlationListHeads[frictionIndex2];
+ PxU32 firstPatch3 = c.correlationListHeads[frictionIndex3];
+
+ const Gu::ContactPoint* contactBase0 = descs[0].contacts + c.contactPatches[firstPatch0].start;
+ const Gu::ContactPoint* contactBase1 = descs[1].contacts + c.contactPatches[firstPatch1].start;
+ const Gu::ContactPoint* contactBase2 = descs[2].contacts + c.contactPatches[firstPatch2].start;
+ const Gu::ContactPoint* contactBase3 = descs[3].contacts + c.contactPatches[firstPatch3].start;
+
+ const Vec4V restitution = V4Merge(FLoad(contactBase0->restitution), FLoad(contactBase1->restitution), FLoad(contactBase2->restitution),
+ FLoad(contactBase3->restitution));
+
+ const Vec4V staticFriction = V4Merge(FLoad(contactBase0->staticFriction), FLoad(contactBase1->staticFriction), FLoad(contactBase2->staticFriction),
+ FLoad(contactBase3->staticFriction));
+
+ SolverContactCoulombHeader4* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader4*>(ptr);
+
+ header->frictionOffset = PxU16(ptr2 - ptr);
+
+ ptr += sizeof(SolverContactCoulombHeader4);
+
+ SolverFrictionHeader4* PX_RESTRICT fricHeader = reinterpret_cast<SolverFrictionHeader4*>(ptr2);
+ ptr2 += sizeof(SolverFrictionHeader4) + sizeof(Vec4V) * numContacts;
+
+
+ header->numNormalConstr0 = Ps::to8(clampedContacts0);
+ header->numNormalConstr1 = Ps::to8(clampedContacts1);
+ header->numNormalConstr2 = Ps::to8(clampedContacts2);
+ header->numNormalConstr3 = Ps::to8(clampedContacts3);
+ header->numNormalConstr = Ps::to8(numContacts);
+ header->invMassADom = invMass0_dom0fV;
+ header->invMassBDom = invMass1_dom1fV;
+ header->angD0 = angDom0;
+ header->angD1 = angDom1;
+ header->restitution = restitution;
+
+ header->flags[0] = flags[0]; header->flags[1] = flags[1]; header->flags[2] = flags[2]; header->flags[3] = flags[3];
+
+ header->type = Ps::to8(isDynamic ? DY_SC_TYPE_BLOCK_RB_CONTACT : DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT);
+ header->shapeInteraction[0] = descs[0].shapeInteraction; header->shapeInteraction[1] = descs[1].shapeInteraction;
+ header->shapeInteraction[2] = descs[2].shapeInteraction; header->shapeInteraction[3] = descs[3].shapeInteraction;
+
+
+ fricHeader->invMassADom = invMass0_dom0fV;
+ fricHeader->invMassBDom = invMass1_dom1fV;
+ fricHeader->angD0 = angDom0;
+ fricHeader->angD1 = angDom1;
+ fricHeader->numFrictionConstr0 = Ps::to8(clampedFric0);
+ fricHeader->numFrictionConstr1 = Ps::to8(clampedFric1);
+ fricHeader->numFrictionConstr2 = Ps::to8(clampedFric2);
+ fricHeader->numFrictionConstr3 = Ps::to8(clampedFric3);
+ fricHeader->numNormalConstr = Ps::to8(numContacts);
+ fricHeader->numNormalConstr0 = Ps::to8(clampedContacts0);
+ fricHeader->numNormalConstr1 = Ps::to8(clampedContacts1);
+ fricHeader->numNormalConstr2 = Ps::to8(clampedContacts2);
+ fricHeader->numNormalConstr3 = Ps::to8(clampedContacts3);
+ fricHeader->type = Ps::to8(isDynamic ? DY_SC_TYPE_BLOCK_FRICTION : DY_SC_TYPE_BLOCK_STATIC_FRICTION);
+ fricHeader->staticFriction = staticFriction;
+ fricHeader->frictionPerContact = PxU32(numFrictionPerPoint == 2 ? 1 : 0);
+
+ fricHeader->numFrictionConstr = Ps::to8(numFrictions);
+
+ Vec4V normal0 = V4LoadA(&contactBase0->normal.x);
+ Vec4V normal1 = V4LoadA(&contactBase1->normal.x);
+ Vec4V normal2 = V4LoadA(&contactBase2->normal.x);
+ Vec4V normal3 = V4LoadA(&contactBase3->normal.x);
+
+ Vec4V normalX, normalY, normalZ;
+ PX_TRANSPOSE_44_34(normal0, normal1, normal2, normal3, normalX, normalY, normalZ);
+ header->normalX = normalX;
+ header->normalY = normalY;
+ header->normalZ = normalZ;
+
+ const Vec4V normalLenSq = V4MulAdd(normalZ, normalZ, V4MulAdd(normalY, normalY, V4Mul(normalX, normalX)));
+
+ const Vec4V linNorVel0 = V4MulAdd(normalZ, linVelT20, V4MulAdd(normalY, linVelT10, V4Mul(normalX, linVelT00)));
+ const Vec4V linNorVel1 = V4MulAdd(normalZ, linVelT21, V4MulAdd(normalY, linVelT11, V4Mul(normalX, linVelT01)));
+
+ const Vec4V invMassNorLenSq0 = V4Mul(invMass0_dom0fV, normalLenSq);
+ const Vec4V invMassNorLenSq1 = V4Mul(invMass1_dom1fV, normalLenSq);
+
+
+ //Calculate friction directions
+ const BoolV cond =V4IsGrtr(orthoThreshold, V4Abs(normalX));
+
+ const Vec4V t0FallbackX = V4Sel(cond, zero, V4Neg(normalY));
+ const Vec4V t0FallbackY = V4Sel(cond, V4Neg(normalZ), normalX);
+ const Vec4V t0FallbackZ = V4Sel(cond, normalY, zero);
+
+ const Vec4V dotNormalVrel = V4MulAdd(normalZ, vrelZ, V4MulAdd(normalY, vrelY, V4Mul(normalX, vrelX)));
+ const Vec4V vrelSubNorVelX = V4NegMulSub(normalX, dotNormalVrel, vrelX);
+ const Vec4V vrelSubNorVelY = V4NegMulSub(normalY, dotNormalVrel, vrelY);
+ const Vec4V vrelSubNorVelZ = V4NegMulSub(normalZ, dotNormalVrel, vrelZ);
+
+ const Vec4V lenSqvrelSubNorVelZ = V4MulAdd(vrelSubNorVelX, vrelSubNorVelX, V4MulAdd(vrelSubNorVelY, vrelSubNorVelY, V4Mul(vrelSubNorVelZ, vrelSubNorVelZ)));
+
+ const BoolV bcon2 = V4IsGrtr(lenSqvrelSubNorVelZ, p1);
+
+ Vec4V t0X = V4Sel(bcon2, vrelSubNorVelX, t0FallbackX);
+ Vec4V t0Y = V4Sel(bcon2, vrelSubNorVelY, t0FallbackY);
+ Vec4V t0Z = V4Sel(bcon2, vrelSubNorVelZ, t0FallbackZ);
+
+ //Now normalize this...
+ const Vec4V recipLen = V4Rsqrt(V4MulAdd(t0X, t0X, V4MulAdd(t0Y, t0Y, V4Mul(t0Z, t0Z))));
+
+ t0X = V4Mul(t0X, recipLen);
+ t0Y = V4Mul(t0Y, recipLen);
+ t0Z = V4Mul(t0Z, recipLen);
+
+ const Vec4V t1X = V4NegMulSub(normalZ, t0Y, V4Mul(normalY, t0Z));
+ const Vec4V t1Y = V4NegMulSub(normalX, t0Z, V4Mul(normalZ, t0X));
+ const Vec4V t1Z = V4NegMulSub(normalY, t0X, V4Mul(normalX, t0Y));
+
+ const Vec4V tFallbackX[2] = {t0X, t1X};
+ const Vec4V tFallbackY[2] = {t0Y, t1Y};
+ const Vec4V tFallbackZ[2] = {t0Z, t1Z};
+
+
+ //For all correlation heads - need to pull this out I think
+
+ //OK, we have a counter for all our patches...
+ PxU32 finished = (PxU32(hasFinished0)) |
+ ((PxU32(hasFinished1)) << 1) |
+ ((PxU32(hasFinished2)) << 2) |
+ ((PxU32(hasFinished3)) << 3);
+
+ CorrelationListIterator iter0(c, firstPatch0);
+ CorrelationListIterator iter1(c, firstPatch1);
+ CorrelationListIterator iter2(c, firstPatch2);
+ CorrelationListIterator iter3(c, firstPatch3);
+
+ PxU32 contact0, contact1, contact2, contact3;
+ PxU32 patch0, patch1, patch2, patch3;
+
+ iter0.nextContact(patch0, contact0);
+ iter1.nextContact(patch1, contact1);
+ iter2.nextContact(patch2, contact2);
+ iter3.nextContact(patch3, contact3);
+
+ PxU8* p = ptr;
+
+ PxU32 contactCount = 0;
+ PxU32 newFinished =
+ (PxU32(hasFinished0 || !iter0.hasNextContact())) |
+ ((PxU32(hasFinished1 || !iter1.hasNextContact())) << 1) |
+ ((PxU32(hasFinished2 || !iter2.hasNextContact())) << 2) |
+ ((PxU32(hasFinished3 || !iter3.hasNextContact())) << 3);
+
+ PxU32 fricIndex = 0;
+
+ while(finished != 0xf)
+ {
+ finished = newFinished;
+ ++contactCount;
+ Ps::prefetchLine(p, 384);
+ Ps::prefetchLine(p, 512);
+ Ps::prefetchLine(p, 640);
+
+ SolverContact4Base* PX_RESTRICT solverContact = reinterpret_cast<SolverContact4Base*>(p);
+ p += constraintSize;
+
+ const Gu::ContactPoint& con0 = descs[0].contacts[c.contactPatches[patch0].start + contact0];
+ const Gu::ContactPoint& con1 = descs[1].contacts[c.contactPatches[patch1].start + contact1];
+ const Gu::ContactPoint& con2 = descs[2].contacts[c.contactPatches[patch2].start + contact2];
+ const Gu::ContactPoint& con3 = descs[3].contacts[c.contactPatches[patch3].start + contact3];
+
+ //Now we need to splice these 4 contacts into a single structure
+
+ {
+ Vec4V point0 = V4LoadA(&con0.point.x);
+ Vec4V point1 = V4LoadA(&con1.point.x);
+ Vec4V point2 = V4LoadA(&con2.point.x);
+ Vec4V point3 = V4LoadA(&con3.point.x);
+
+ Vec4V pointX, pointY, pointZ;
+ PX_TRANSPOSE_44_34(point0, point1, point2, point3, pointX, pointY, pointZ);
+
+ Vec4V targetVel0 = V4LoadA(&con0.targetVel.x);
+ Vec4V targetVel1 = V4LoadA(&con1.targetVel.x);
+ Vec4V targetVel2 = V4LoadA(&con2.targetVel.x);
+ Vec4V targetVel3 = V4LoadA(&con3.targetVel.x);
+
+ Vec4V targetVelX, targetVelY, targetVelZ;
+ PX_TRANSPOSE_44_34(targetVel0, targetVel1, targetVel2, targetVel3, targetVelX, targetVelY, targetVelZ);
+
+ const Vec4V raX = V4Sub(pointX, bodyFrame0pX);
+ const Vec4V raY = V4Sub(pointY, bodyFrame0pY);
+ const Vec4V raZ = V4Sub(pointZ, bodyFrame0pZ);
+
+ const Vec4V rbX = V4Sub(pointX, bodyFrame1pX);
+ const Vec4V rbY = V4Sub(pointY, bodyFrame1pY);
+ const Vec4V rbZ = V4Sub(pointZ, bodyFrame1pZ);
+
+ {
+ const Vec4V separation = V4Merge(FLoad(con0.separation), FLoad(con1.separation), FLoad(con2.separation),
+ FLoad(con3.separation));
+ const Vec4V maxImpulse = V4Merge(FLoad(con0.maxImpulse), FLoad(con1.maxImpulse), FLoad(con2.maxImpulse),
+ FLoad(con3.maxImpulse));
+
+ const Vec4V cTargetVel = V4MulAdd(normalX, targetVelX, V4MulAdd(normalY, targetVelY, V4Mul(normalZ, targetVelZ)));
+
+ //raXn = cross(ra, normal) which = Vec3V( a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x);
+ const Vec4V raXnX = V4NegMulSub(raZ, normalY, V4Mul(raY, normalZ));
+ const Vec4V raXnY = V4NegMulSub(raX, normalZ, V4Mul(raZ, normalX));
+ const Vec4V raXnZ = V4NegMulSub(raY, normalX, V4Mul(raX, normalY));
+
+ const Vec4V v0a0 = V4Mul(invInertia0X0, raXnX);
+ const Vec4V v0a1 = V4Mul(invInertia0X1, raXnX);
+ const Vec4V v0a2 = V4Mul(invInertia0X2, raXnX);
+
+ const Vec4V v0PlusV1a0 = V4MulAdd(invInertia0Y0, raXnY, v0a0);
+ const Vec4V v0PlusV1a1 = V4MulAdd(invInertia0Y1, raXnY, v0a1);
+ const Vec4V v0PlusV1a2 = V4MulAdd(invInertia0Y2, raXnY, v0a2);
+
+ const Vec4V delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, v0PlusV1a0);
+ const Vec4V delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, v0PlusV1a1);
+ const Vec4V delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, v0PlusV1a2);
+
+ const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X)));
+ const Vec4V dotRaXnAngVel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4Mul(raXnX, angVelT00)));
+
+ Vec4V unitResponse = V4Add(invMassNorLenSq0, dotDelAngVel0);
+ Vec4V vrel = V4Add(linNorVel0, dotRaXnAngVel0);
+
+
+ //The dynamic-only parts - need to if-statement these up. A branch here shouldn't cost us too much
+ if(isDynamic)
+ {
+ SolverContact4Dynamic* PX_RESTRICT dynamicContact = static_cast<SolverContact4Dynamic*>(solverContact);
+ const Vec4V rbXnX = V4NegMulSub(rbZ, normalY, V4Mul(rbY, normalZ));
+ const Vec4V rbXnY = V4NegMulSub(rbX, normalZ, V4Mul(rbZ, normalX));
+ const Vec4V rbXnZ = V4NegMulSub(rbY, normalX, V4Mul(rbX, normalY));
+
+ const Vec4V v0b0 = V4Mul(invInertia1X0, rbXnX);
+ const Vec4V v0b1 = V4Mul(invInertia1X1, rbXnX);
+ const Vec4V v0b2 = V4Mul(invInertia1X2, rbXnX);
+
+ const Vec4V v0PlusV1b0 = V4MulAdd(invInertia1Y0, rbXnY, v0b0);
+ const Vec4V v0PlusV1b1 = V4MulAdd(invInertia1Y1, rbXnY, v0b1);
+ const Vec4V v0PlusV1b2 = V4MulAdd(invInertia1Y2, rbXnY, v0b2);
+
+ const Vec4V delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, v0PlusV1b0);
+ const Vec4V delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, v0PlusV1b1);
+ const Vec4V delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, v0PlusV1b2);
+
+
+ //V3Dot(raXn, delAngVel0)
+
+ const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X)));
+
+ const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01)));
+
+ const Vec4V resp1 = V4Add(dotDelAngVel1, invMassNorLenSq1);
+
+ unitResponse = V4Add(unitResponse, resp1);
+
+ const Vec4V vrel2 = V4Add(linNorVel1, dotRbXnAngVel1);
+ vrel = V4Sub(vrel, vrel2);
+
+ //These are for dynamic-only contacts.
+ dynamicContact->rbXnX = delAngVel1X;
+ dynamicContact->rbXnY = delAngVel1Y;
+ dynamicContact->rbXnZ = delAngVel1Z;
+
+ }
+
+ const Vec4V velMultiplier = V4Sel(V4IsGrtr(unitResponse, zero), V4Recip(unitResponse), zero);
+
+ const Vec4V penetration = V4Sub(separation, restDistance);
+
+ const Vec4V penInvDtp8 = V4Max(maxPenBias, V4Scale(penetration, invDtp8));
+
+ Vec4V scaledBias = V4Mul(velMultiplier, penInvDtp8);
+
+ const Vec4V penetrationInvDt = V4Scale(penetration, invDt);
+
+ const BoolV isGreater2 = BAnd(BAnd(V4IsGrtr(restitution, zero), V4IsGrtr(bounceThreshold, vrel)),
+ V4IsGrtr(V4Neg(vrel), penetrationInvDt));
+
+ const BoolV ccdSeparationCondition = V4IsGrtrOrEq(ccdMaxSeparation, penetration);
+
+ scaledBias = V4Sel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias);
+
+ const Vec4V sumVRel(vrel);
+
+ const Vec4V targetVelocity = V4Sub(V4Add(V4Sel(isGreater2, V4Mul(V4Neg(sumVRel), restitution), zero), cTargetVel), vrel);
+
+ //These values are present for static and dynamic contacts
+ solverContact->raXnX = delAngVel0X;
+ solverContact->raXnY = delAngVel0Y;
+ solverContact->raXnZ = delAngVel0Z;
+ solverContact->velMultiplier = velMultiplier;
+ solverContact->appliedForce = zero;
+ solverContact->scaledBias = scaledBias;
+ solverContact->targetVelocity = targetVelocity;
+ solverContact->maxImpulse = maxImpulse;
+ }
+
+ //PxU32 conId = contactId++;
+
+ /*Vec4V targetVel0 = V4LoadA(&con0.targetVel.x);
+ Vec4V targetVel1 = V4LoadA(&con1.targetVel.x);
+ Vec4V targetVel2 = V4LoadA(&con2.targetVel.x);
+ Vec4V targetVel3 = V4LoadA(&con3.targetVel.x);
+
+ Vec4V targetVelX, targetVelY, targetVelZ;
+ PX_TRANSPOSE_44_34(targetVel0, targetVel1, targetVel2, targetVel3, targetVelX, targetVelY, targetVelZ);*/
+
+ for(PxU32 a = 0; a < numFrictionPerPoint; ++a)
+ {
+ SolverFriction4Base* PX_RESTRICT friction = reinterpret_cast<SolverFriction4Base*>(ptr2);
+
+ ptr2 += frictionSize;
+
+ const Vec4V tX = tFallbackX[fricIndex];
+ const Vec4V tY = tFallbackY[fricIndex];
+ const Vec4V tZ = tFallbackZ[fricIndex];
+
+ fricIndex = 1 - fricIndex;
+
+ const Vec4V raXnX = V4NegMulSub(raZ, tY, V4Mul(raY, tZ));
+ const Vec4V raXnY = V4NegMulSub(raX, tZ, V4Mul(raZ, tX));
+ const Vec4V raXnZ = V4NegMulSub(raY, tX, V4Mul(raX, tY));
+
+ const Vec4V v0a0 = V4Mul(invInertia0X0, raXnX);
+ const Vec4V v0a1 = V4Mul(invInertia0X1, raXnX);
+ const Vec4V v0a2 = V4Mul(invInertia0X2, raXnX);
+
+ const Vec4V v0PlusV1a0 = V4MulAdd(invInertia0Y0, raXnY, v0a0);
+ const Vec4V v0PlusV1a1 = V4MulAdd(invInertia0Y1, raXnY, v0a1);
+ const Vec4V v0PlusV1a2 = V4MulAdd(invInertia0Y2, raXnY, v0a2);
+
+ const Vec4V delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, v0PlusV1a0);
+ const Vec4V delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, v0PlusV1a1);
+ const Vec4V delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, v0PlusV1a2);
+
+ const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X)));
+
+ const Vec4V norVel0 = V4MulAdd(tX, linVelT00, V4MulAdd(tY, linVelT10, V4Mul(tZ, linVelT20)));
+ const Vec4V dotRaXnAngVel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4Mul(raXnX, angVelT00)));
+ Vec4V vrel = V4Add(norVel0, dotRaXnAngVel0);
+
+ Vec4V unitResponse = V4Add(invMass0_dom0fV, dotDelAngVel0);
+
+ if(isDynamic)
+ {
+ SolverFriction4Dynamic* PX_RESTRICT dFric = static_cast<SolverFriction4Dynamic*>(friction);
+
+ const Vec4V rbXnX = V4NegMulSub(rbZ, tY, V4Mul(rbY, tZ));
+ const Vec4V rbXnY = V4NegMulSub(rbX, tZ, V4Mul(rbZ, tX));
+ const Vec4V rbXnZ = V4NegMulSub(rbY, tX, V4Mul(rbX, tY));
+
+ const Vec4V v0b0 = V4Mul(invInertia1X0, rbXnX);
+ const Vec4V v0b1 = V4Mul(invInertia1X1, rbXnX);
+ const Vec4V v0b2 = V4Mul(invInertia1X2, rbXnX);
+
+ const Vec4V v0PlusV1b0 = V4MulAdd(invInertia1Y0, rbXnY, v0b0);
+ const Vec4V v0PlusV1b1 = V4MulAdd(invInertia1Y1, rbXnY, v0b1);
+ const Vec4V v0PlusV1b2 = V4MulAdd(invInertia1Y2, rbXnY, v0b2);
+
+ const Vec4V delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, v0PlusV1b0);
+ const Vec4V delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, v0PlusV1b1);
+ const Vec4V delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, v0PlusV1b2);
+
+ const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X)));
+
+ const Vec4V norVel1 = V4MulAdd(tX, linVelT01, V4MulAdd(tY, linVelT11, V4Mul(tZ, linVelT21)));
+ const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01)));
+ vrel = V4Sub(vrel, V4Add(norVel1, dotRbXnAngVel1));
+
+ const Vec4V resp1 = V4Add(dotDelAngVel1, invMassNorLenSq1);
+
+ unitResponse = V4Add(unitResponse, resp1);
+
+ dFric->rbXnX = delAngVel1X;
+ dFric->rbXnY = delAngVel1Y;
+ dFric->rbXnZ = delAngVel1Z;
+ }
+
+ const Vec4V velMultiplier = V4Neg(V4Sel(V4IsGrtr(unitResponse, zero), V4Recip(unitResponse), zero));
+
+ friction->appliedForce = zero;
+ friction->raXnX = delAngVel0X;
+ friction->raXnY = delAngVel0Y;
+ friction->raXnZ = delAngVel0Z;
+ friction->velMultiplier = velMultiplier;
+ friction->targetVelocity = V4Sub(V4MulAdd(targetVelZ, tZ, V4MulAdd(targetVelY, tY, V4Mul(targetVelX, tX))), vrel);
+ friction->normalX = tX;
+ friction->normalY = tY;
+ friction->normalZ = tZ;
+ }
+ }
+ if(!(finished & 0x1))
+ {
+ iter0.nextContact(patch0, contact0);
+ newFinished |= PxU32(!iter0.hasNextContact());
+ }
+
+ if(!(finished & 0x2))
+ {
+ iter1.nextContact(patch1, contact1);
+ newFinished |= (PxU32(!iter1.hasNextContact()) << 1);
+ }
+
+ if(!(finished & 0x4))
+ {
+ iter2.nextContact(patch2, contact2);
+ newFinished |= (PxU32(!iter2.hasNextContact()) << 2);
+ }
+
+ if(!(finished & 0x8))
+ {
+ iter3.nextContact(patch3, contact3);
+ newFinished |= (PxU32(!iter3.hasNextContact()) << 3);
+ }
+ }
+ ptr = p;
+ }
+ return true;
+}
+
+
+
+//The persistent friction patch correlation/allocation will already have happenned as this is per-pair.
+//This function just computes the size of the combined solve data.
+void computeBlockStreamByteSizesCoulomb4(PxSolverContactDesc* descs,
+ ThreadContext& threadContext, const CorrelationBuffer& c,
+ const PxU32 numFrictionPerPoint,
+ PxU32& _solverConstraintByteSize, PxU32* _axisConstraintCount, PxU32& _numContactPoints4)
+{
+ PX_ASSERT(0 == _solverConstraintByteSize);
+ PX_UNUSED(threadContext);
+
+ PxU32 maxPatches = 0;
+ PxU32 maxContactCount[CorrelationBuffer::MAX_FRICTION_PATCHES];
+ PxU32 maxFrictionCount[CorrelationBuffer::MAX_FRICTION_PATCHES];
+ PxMemZero(maxContactCount, sizeof(maxContactCount));
+ PxMemZero(maxFrictionCount, sizeof(maxFrictionCount));
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ PxU32 axisConstraintCount = 0;
+
+ for(PxU32 i = 0; i < descs[a].numFrictionPatches; i++)
+ {
+ PxU32 ind = i + descs[a].startFrictionPatchIndex;
+
+ const FrictionPatch& frictionPatch = c.frictionPatches[ind];
+
+ const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0;
+ //Solver constraint data.
+ if(c.frictionPatchContactCounts[ind]!=0)
+ {
+ maxContactCount[i] = PxMax(c.frictionPatchContactCounts[ind], maxContactCount[i]);
+ axisConstraintCount += c.frictionPatchContactCounts[ind];
+
+ if(haveFriction)
+ {
+ //const PxU32 fricCount = c.frictionPatches[ind].numConstraints;
+ const PxU32 fricCount = c.frictionPatchContactCounts[ind] * numFrictionPerPoint;
+ maxFrictionCount[i] = PxMax(fricCount, maxFrictionCount[i]);
+ axisConstraintCount += fricCount;
+ }
+ }
+ }
+ maxPatches = PxMax(descs[a].numFrictionPatches, maxPatches);
+ _axisConstraintCount[a] = axisConstraintCount;
+ }
+
+ PxU32 totalContacts = 0, totalFriction = 0;
+ for(PxU32 a = 0; a < maxPatches; ++a)
+ {
+ totalContacts += maxContactCount[a];
+ totalFriction += maxFrictionCount[a];
+ }
+
+ _numContactPoints4 = totalContacts;
+
+
+ //OK, we have a given number of friction patches, contact points and friction constraints so we can calculate how much memory we need
+
+ const bool isStatic = (((descs[0].bodyState1 | descs[1].bodyState1 | descs[2].bodyState1 | descs[3].bodyState1) & PxSolverContactDesc::eDYNAMIC_BODY) == 0);
+
+ const PxU32 headerSize = (sizeof(SolverContactCoulombHeader4) + sizeof(SolverFrictionHeader4)) * maxPatches;
+ //Add on 1 Vec4V per contact for the applied force buffer
+ const PxU32 constraintSize = isStatic ? ((sizeof(SolverContact4Base) + sizeof(Vec4V)) * totalContacts) + ( sizeof(SolverFriction4Base) * totalFriction) :
+ ((sizeof(SolverContact4Dynamic) + sizeof(Vec4V)) * totalContacts) + (sizeof(SolverFriction4Dynamic) * totalFriction);
+
+ _solverConstraintByteSize = ((constraintSize + headerSize + 0x0f) & ~0x0f);
+ PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f));
+}
+
+
+static SolverConstraintPrepState::Enum reserveBlockStreamsCoulomb4(PxSolverContactDesc* descs, ThreadContext& threadContext, const CorrelationBuffer& c,
+ PxU8*& solverConstraint, const PxU32 numFrictionPerContactPoint,
+ PxU32& solverConstraintByteSize,
+ PxU32* axisConstraintCount, PxU32& numContactPoints4, PxConstraintAllocator& constraintAllocator)
+{
+ PX_ASSERT(NULL == solverConstraint);
+ PX_ASSERT(0 == solverConstraintByteSize);
+
+ //From constraintBlockStream we need to reserve contact points, contact forces, and a char buffer for the solver constraint data (already have a variable for this).
+ //From frictionPatchStream we just need to reserve a single buffer.
+
+ //Compute the sizes of all the buffers.
+ computeBlockStreamByteSizesCoulomb4(
+ descs, threadContext, c, numFrictionPerContactPoint, solverConstraintByteSize,
+ axisConstraintCount, numContactPoints4);
+
+ //Reserve the buffers.
+
+ //First reserve the accumulated buffer size for the constraint block.
+ PxU8* constraintBlock = NULL;
+ const PxU32 constraintBlockByteSize = solverConstraintByteSize;
+ if(constraintBlockByteSize > 0)
+ {
+ if((constraintBlockByteSize + 16u) > 16384)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+
+ constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u);
+
+ if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock)
+ {
+ if(0==constraintBlock)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. "
+ "Either accept dropped contacts or simplify collision geometry.");
+ constraintBlock=NULL;
+ }
+ }
+ }
+
+ //Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail).
+ if(0==constraintBlockByteSize || constraintBlock)
+ {
+ if(solverConstraintByteSize)
+ {
+ solverConstraint = constraintBlock;
+ PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f));
+ }
+ }
+
+ //Return true if neither of the two block reservations failed.
+ return ((0==constraintBlockByteSize || constraintBlock)) ? SolverConstraintPrepState::eSUCCESS : SolverConstraintPrepState::eOUT_OF_MEMORY;
+}
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb1D(
+ PxsContactManagerOutput** outputs,
+ ThreadContext& threadContext,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator)
+{
+ return createFinalizeSolverContacts4Coulomb(outputs, threadContext, blockDescs, invDtF32, bounceThresholdF32,
+ frictionOffsetThreshold, correlationDistance, constraintAllocator, PxFrictionType::eONE_DIRECTIONAL);
+}
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb2D(
+ PxsContactManagerOutput** outputs,
+ ThreadContext& threadContext,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator)
+{
+ return createFinalizeSolverContacts4Coulomb(outputs, threadContext, blockDescs, invDtF32, bounceThresholdF32,
+ frictionOffsetThreshold, correlationDistance, constraintAllocator, PxFrictionType::eTWO_DIRECTIONAL);
+}
+
+
+SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb(
+ PxsContactManagerOutput** outputs,
+ ThreadContext& threadContext,
+ PxSolverContactDesc* blockDescs,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator,
+ PxFrictionType::Enum frictionType)
+{
+ PX_UNUSED(frictionOffsetThreshold);
+ PX_UNUSED(correlationDistance);
+
+ for(PxU32 i = 0; i < 4; ++i)
+ {
+ blockDescs[i].desc->constraintLengthOver16 = 0;
+ }
+
+ PX_ASSERT(outputs[0]->nbContacts && outputs[1]->nbContacts && outputs[2]->nbContacts && outputs[3]->nbContacts);
+
+ Gu::ContactBuffer& buffer = threadContext.mContactBuffer;
+
+ buffer.count = 0;
+
+ PxU32 numContacts = 0;
+
+ CorrelationBuffer& c = threadContext.mCorrelationBuffer;
+
+ c.frictionPatchCount = 0;
+ c.contactPatchCount = 0;
+
+ PxU32 numFrictionPerPoint = PxU32(frictionType == PxFrictionType::eONE_DIRECTIONAL ? 1 : 2);
+
+ PX_ALIGN(16, PxReal invMassScale0[4]);
+ PX_ALIGN(16, PxReal invMassScale1[4]);
+ PX_ALIGN(16, PxReal invInertiaScale0[4]);
+ PX_ALIGN(16, PxReal invInertiaScale1[4]);
+
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ PxSolverContactDesc& blockDesc = blockDescs[a];
+ PxSolverConstraintDesc& desc = *blockDesc.desc;
+
+ //blockDesc.startContactIndex = numContacts;
+ blockDesc.contacts = &buffer.contacts[numContacts];
+
+ Ps::prefetchLine(desc.bodyA);
+ Ps::prefetchLine(desc.bodyB);
+
+ if((numContacts + outputs[a]->nbContacts) > 64)
+ {
+ return SolverConstraintPrepState::eUNBATCHABLE;
+ }
+ bool hasMaxImpulse, hasTargetVelocity;
+
+ const PxReal defaultMaxImpulse = PxMin(blockDesc.data0->maxContactImpulse, blockDesc.data1->maxContactImpulse);
+
+ PxU32 contactCount = extractContacts(buffer, *outputs[a], hasMaxImpulse, hasTargetVelocity, invMassScale0[a], invMassScale1[a],
+ invInertiaScale0[a], invInertiaScale1[a], defaultMaxImpulse);
+
+ if(contactCount == 0)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+
+ numContacts+=contactCount;
+
+ blockDesc.numContacts = contactCount;
+ blockDesc.hasMaxImpulse = hasMaxImpulse;
+
+ blockDesc.startFrictionPatchIndex = c.frictionPatchCount;
+ blockDesc.startContactPatchIndex = c.contactPatchCount;
+
+ createContactPatches(c, blockDesc.contacts, contactCount, PXC_SAME_NORMAL);
+
+ bool overflow = correlatePatches(c, blockDesc.contacts, blockDesc.bodyFrame0, blockDesc.bodyFrame1, PXC_SAME_NORMAL, blockDesc.startContactPatchIndex,
+ blockDesc.startFrictionPatchIndex);
+ if(overflow)
+ return SolverConstraintPrepState::eUNBATCHABLE;
+
+ blockDesc.numContactPatches = PxU16(c.contactPatchCount - blockDesc.startContactPatchIndex);
+ blockDesc.numFrictionPatches = c.frictionPatchCount - blockDesc.startFrictionPatchIndex;
+
+ invMassScale0[a] *= blockDesc.mInvMassScales.linear0;
+ invMassScale1[a] *= blockDesc.mInvMassScales.linear1;
+ invInertiaScale0[a] *= blockDesc.mInvMassScales.angular0;
+ invInertiaScale1[a] *= blockDesc.mInvMassScales.angular1;
+
+ }
+
+ //OK, now we need to work out how much memory to allocate, allocate it and then block-create the constraints...
+
+ PxU8* solverConstraint = NULL;
+ PxU32 solverConstraintByteSize = 0;
+ PxU32 axisConstraintCount[4];
+ PxU32 numContactPoints4 = 0;
+
+ SolverConstraintPrepState::Enum state = reserveBlockStreamsCoulomb4(blockDescs, threadContext, c,
+ solverConstraint, numFrictionPerPoint,
+ solverConstraintByteSize,
+ axisConstraintCount, numContactPoints4, constraintAllocator);
+
+ if(state != SolverConstraintPrepState::eSUCCESS)
+ return state;
+
+ //OK, we allocated the memory, now let's create the constraints
+
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ PxSolverConstraintDesc& desc = *blockDescs[a].desc;
+ //n[a]->solverConstraintPointer = solverConstraint;
+ desc.constraint = solverConstraint;
+
+ //KS - TODO - add back in counters for axisConstraintCount somewhere...
+ blockDescs[a].axisConstraintCount += Ps::to16(axisConstraintCount[a]);
+
+ desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize/16);
+
+ PxU32 writeBackLength = outputs[a]->nbContacts * sizeof(PxReal);
+ void* writeBack = outputs[a]->contactForces;
+ desc.writeBack = writeBack;
+ setWritebackLength(desc, writeBackLength);
+ }
+
+ const Vec4V iMassScale0 = V4LoadA(invMassScale0);
+ const Vec4V iInertiaScale0 = V4LoadA(invInertiaScale0);
+ const Vec4V iMassScale1 = V4LoadA(invMassScale1);
+ const Vec4V iInertiaScale1 = V4LoadA(invInertiaScale1);
+
+
+ bool hasFriction = setupFinalizeSolverConstraintsCoulomb4(blockDescs, solverConstraint,
+ invDtF32, bounceThresholdF32, c, numFrictionPerPoint, numContactPoints4, solverConstraintByteSize,
+ iMassScale0, iInertiaScale0, iMassScale1, iInertiaScale1);
+
+ *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0;
+ *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize + 4)) = hasFriction ? 0xFFFFFFFF : 0;
+
+
+ return SolverConstraintPrepState::eSUCCESS;
+}
+
+}
+
+}
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepPF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepPF.cpp
new file mode 100644
index 00000000..4651605b
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepPF.cpp
@@ -0,0 +1,650 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+#include "PsMathUtils.h"
+#include "DySolverContact.h"
+#include "DySolverContactPF.h"
+#include "DySolverConstraintTypes.h"
+#include "PxcNpWorkUnit.h"
+#include "DyThreadContext.h"
+#include "DyContactPrep.h"
+#include "PxcNpContactPrepShared.h"
+//#include "PxvGeometry.h"
+#include "PxvDynamics.h"
+#include "DyCorrelationBuffer.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverBody.h"
+#include "DySolverContact4.h"
+#include "DySolverContactPF4.h"
+
+
+#include "PsVecMath.h"
+#include "PxContactModifyCallback.h"
+#include "PxsMaterialManager.h"
+#include "PxsMaterialCombiner.h"
+#include "DySolverExt.h"
+#include "DyArticulationContactPrep.h"
+#include "DyContactPrepShared.h"
+
+#include "PsFoundation.h"
+
+using namespace physx::Gu;
+using namespace physx::shdfnd::aos;
+
+namespace physx
+{
+namespace Dy
+{
+
+bool createFinalizeSolverContactsCoulomb(PxSolverContactDesc& contactDesc,
+ PxsContactManagerOutput& output,
+ ThreadContext& threadContext,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator,
+ PxFrictionType::Enum frictionType);
+
+static bool setupFinalizeSolverConstraintsCoulomb(
+ Sc::ShapeInteraction* shapeInteraction,
+ const ContactBuffer& buffer,
+ const CorrelationBuffer& c,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxU8* workspace,
+ const PxSolverBodyData& data0,
+ const PxSolverBodyData& data1,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxU32 frictionPerPointCount,
+ const bool hasForceThresholds,
+ const bool staticBody,
+ PxReal invMassScale0, PxReal invInertiaScale0,
+ PxReal invMassScale1, PxReal invInertiaScale1,
+ PxReal restDist,
+ const PxReal maxCCDSeparation)
+{
+ const FloatV ccdMaxSeparation = FLoad(maxCCDSeparation);
+ PxU8* PX_RESTRICT ptr = workspace;
+ const FloatV zero=FZero();
+
+ PxU8 flags = PxU8(hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0);
+
+ const FloatV restDistance = FLoad(restDist);
+
+ const Vec3V bodyFrame0p = V3LoadU(bodyFrame0.p);
+ const Vec3V bodyFrame1p = V3LoadU(bodyFrame1.p);
+
+ Ps::prefetchLine(c.contactID);
+ Ps::prefetchLine(c.contactID, 128);
+
+ const PxU32 frictionPatchCount = c.frictionPatchCount;
+
+ const PxU32 pointStride = sizeof(SolverContactPoint);
+ const PxU32 frictionStride = sizeof(SolverContactFriction);
+ const PxU8 pointHeaderType = Ps::to8(staticBody ? DY_SC_TYPE_STATIC_CONTACT : DY_SC_TYPE_RB_CONTACT);
+ const PxU8 frictionHeaderType = Ps::to8(staticBody ? DY_SC_TYPE_STATIC_FRICTION : DY_SC_TYPE_FRICTION);
+
+
+ const Vec3V linVel0 = V3LoadU(data0.linearVelocity);
+ const Vec3V linVel1 = V3LoadU(data1.linearVelocity);
+ const Vec3V angVel0 = V3LoadU(data0.angularVelocity);
+ const Vec3V angVel1 = V3LoadU(data1.angularVelocity);
+
+
+ const FloatV invMass0 = FLoad(data0.invMass);
+ const FloatV invMass1 = FLoad(data1.invMass);
+
+ const FloatV maxPenBias = FMax(FLoad(data0.penBiasClamp), FLoad(data1.penBiasClamp));
+
+ // PT: the matrix is symmetric so we can read it as a PxMat33! Gets rid of 25000+ LHS.
+ const PxMat33& invIn0 = reinterpret_cast<const PxMat33&>(data0.sqrtInvInertia);
+ PX_ALIGN(16, const Mat33V invSqrtInertia0)
+ (
+ V3LoadU(invIn0.column0),
+ V3LoadU(invIn0.column1),
+ V3LoadU(invIn0.column2)
+ );
+ const PxMat33& invIn1 = reinterpret_cast<const PxMat33&>(data1.sqrtInvInertia);
+ PX_ALIGN(16, const Mat33V invSqrtInertia1)
+ (
+ V3LoadU(invIn1.column0),
+ V3LoadU(invIn1.column1),
+ V3LoadU(invIn1.column2)
+ );
+
+ const FloatV invDt = FLoad(invDtF32);
+ const FloatV p8 = FLoad(0.8f);
+ const FloatV bounceThreshold = FLoad(bounceThresholdF32);
+ const FloatV orthoThreshold = FLoad(0.70710678f);
+ const FloatV eps = FLoad(0.00001f);
+
+ const FloatV invDtp8 = FMul(invDt, p8);
+
+ const FloatV d0 = FLoad(invMassScale0);
+ const FloatV d1 = FLoad(invMassScale1);
+ const FloatV nDom1fV = FNeg(d1);
+ const FloatV angD0 = FLoad(invInertiaScale0);
+ const FloatV angD1 = FLoad(invInertiaScale1);
+
+ const FloatV invMass0_dom0fV = FMul(d0, invMass0);
+ const FloatV invMass1_dom1fV = FMul(nDom1fV, invMass1);
+
+
+ for(PxU32 i=0;i< frictionPatchCount;i++)
+ {
+ const PxU32 contactCount = c.frictionPatchContactCounts[i];
+ if(contactCount == 0)
+ continue;
+
+ const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;
+
+ const Vec3V normal = Ps::aos::V3LoadA(contactBase0->normal);
+
+ const FloatV normalLenSq = V3LengthSq(normal);
+ const VecCrossV norCross = V3PrepareCross(normal);
+
+ const FloatV restitution = FLoad(contactBase0->restitution);
+
+ const FloatV norVel = V3SumElems(V3NegMulSub(normal, linVel1, V3Mul(normal, linVel0)));
+ /*const FloatV norVel0 = V3Dot(normal, linVel0);
+ const FloatV norVel1 = V3Dot(normal, linVel1);
+ const FloatV norVel = FSub(norVel0, norVel1);*/
+
+ const FloatV invMassNorLenSq0 = FMul(invMass0_dom0fV, normalLenSq);
+ const FloatV invMassNorLenSq1 = FMul(invMass1_dom1fV, normalLenSq);
+
+
+ SolverContactCoulombHeader* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader*>(ptr);
+ ptr += sizeof(SolverContactCoulombHeader);
+
+ Ps::prefetchLine(ptr, 128);
+ Ps::prefetchLine(ptr, 256);
+ Ps::prefetchLine(ptr, 384);
+
+
+ header->numNormalConstr = PxU8(contactCount);
+ header->type = pointHeaderType;
+ //header->setRestitution(n.restitution);
+ //header->setRestitution(contactBase0->restitution);
+
+ header->setDominance0(invMass0_dom0fV);
+ header->setDominance1(FNeg(invMass1_dom1fV));
+ FStore(angD0, &header->angDom0);
+ FStore(angD1, &header->angDom1);
+ header->setNormal(normal);
+ header->flags = flags;
+ header->shapeInteraction = shapeInteraction;
+
+
+ for(PxU32 patch=c.correlationListHeads[i];
+ patch!=CorrelationBuffer::LIST_END;
+ patch = c.contactPatches[patch].next)
+ {
+ const PxU32 count = c.contactPatches[patch].count;
+ const Gu::ContactPoint* contactBase = buffer.contacts + c.contactPatches[patch].start;
+
+
+ PxU8* p = ptr;
+ for(PxU32 j=0;j<count;j++)
+ {
+ const Gu::ContactPoint& contact = contactBase[j];
+
+ SolverContactPoint* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPoint*>(p);
+ p += pointStride;
+
+ constructContactConstraint(invSqrtInertia0, invSqrtInertia1, invMassNorLenSq0,
+ invMassNorLenSq1, angD0, angD1, bodyFrame0p, bodyFrame1p,
+ normal, norVel, norCross, angVel0, angVel1,
+ invDt, invDtp8, restDistance, maxPenBias, restitution,
+ bounceThreshold, contact, *solverContact, ccdMaxSeparation);
+ }
+ ptr = p;
+ }
+ }
+
+ //construct all the frictions
+
+ PxU8* PX_RESTRICT ptr2 = workspace;
+
+ bool hasFriction = false;
+ for(PxU32 i=0;i< frictionPatchCount;i++)
+ {
+ const PxU32 contactCount = c.frictionPatchContactCounts[i];
+ if(contactCount == 0)
+ continue;
+
+ const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start;
+
+ SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(ptr2);
+ header->frictionOffset = PxU16(ptr - ptr2);// + sizeof(SolverFrictionHeader);
+ ptr2 += sizeof(SolverContactCoulombHeader) + header->numNormalConstr * pointStride;
+
+ const PxReal staticFriction = contactBase0->staticFriction;
+ const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION);
+ const bool haveFriction = (disableStrongFriction == 0);
+
+ SolverFrictionHeader* frictionHeader = reinterpret_cast<SolverFrictionHeader*>(ptr);
+ frictionHeader->numNormalConstr = Ps::to8(c.frictionPatchContactCounts[i]);
+ frictionHeader->numFrictionConstr = Ps::to8(haveFriction ? c.frictionPatchContactCounts[i] * frictionPerPointCount : 0);
+ ptr += sizeof(SolverFrictionHeader);
+ PxF32* appliedForceBuffer = reinterpret_cast<PxF32*>(ptr);
+ ptr += frictionHeader->getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
+ PxMemZero(appliedForceBuffer, sizeof(PxF32)*contactCount*frictionPerPointCount);
+ Ps::prefetchLine(ptr, 128);
+ Ps::prefetchLine(ptr, 256);
+ Ps::prefetchLine(ptr, 384);
+
+ const Vec3V normal = V3LoadU(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal);
+
+ const FloatV normalX = V3GetX(normal);
+ const FloatV normalY = V3GetY(normal);
+ const FloatV normalZ = V3GetZ(normal);
+
+ const Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY);
+ const Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero) ;
+
+ const BoolV con = FIsGrtr(orthoThreshold, FAbs(normalX));
+ const Vec3V tFallback1 = V3Sel(con, t0Fallback1, t0Fallback2);
+
+ const Vec3V linVrel = V3Sub(linVel0, linVel1);
+ const Vec3V t0_ = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel)));
+ const FloatV sqDist = V3Dot(t0_,t0_);
+ const BoolV con1 = FIsGrtr(sqDist, eps);
+ const Vec3V tDir0 =V3Normalize(V3Sel(con1, t0_, tFallback1));
+ const Vec3V tDir1 = V3Cross(tDir0, normal);
+
+ Vec3V tFallback = tDir0;
+ Vec3V tFallbackAlt = tDir1;
+
+ if(haveFriction)
+ {
+ //frictionHeader->setStaticFriction(n.staticFriction);
+ frictionHeader->setStaticFriction(staticFriction);
+ FStore(invMass0_dom0fV, &frictionHeader->invMass0D0);
+ FStore(FNeg(invMass1_dom1fV), &frictionHeader->invMass1D1);
+ FStore(angD0, &frictionHeader->angDom0);
+ FStore(angD1, &frictionHeader->angDom1);
+ frictionHeader->type = frictionHeaderType;
+
+ PxU32 totalPatchContactCount = 0;
+
+ for(PxU32 patch=c.correlationListHeads[i];
+ patch!=CorrelationBuffer::LIST_END;
+ patch = c.contactPatches[patch].next)
+ {
+ const PxU32 count = c.contactPatches[patch].count;
+ const PxU32 start = c.contactPatches[patch].start;
+ const Gu::ContactPoint* contactBase = buffer.contacts + start;
+
+ PxU8* p = ptr;
+ for(PxU32 j =0; j < count; j++)
+ {
+ hasFriction = true;
+ const Gu::ContactPoint& contact = contactBase[j];
+ const Vec3V point = V3LoadU(contact.point);
+ const Vec3V ra = V3Sub(point, bodyFrame0p);
+ const Vec3V rb = V3Sub(point, bodyFrame1p);
+ const Vec3V targetVel = V3LoadU(contact.targetVel);
+
+ for(PxU32 k = 0; k < frictionPerPointCount; ++k)
+ {
+ const Vec3V t0 = tFallback;
+ tFallback = tFallbackAlt;
+ tFallbackAlt = t0;
+
+ SolverContactFriction* PX_RESTRICT f0 = reinterpret_cast<SolverContactFriction*>(p);
+ p += frictionStride;
+ //f0->brokenOrContactIndex = contactId;
+
+ const Vec3V raXn = V3Cross(ra, t0);
+ const Vec3V rbXn = V3Cross(rb, t0);
+
+ const Vec3V delAngVel0 = M33MulV3(invSqrtInertia0, raXn);
+ const Vec3V delAngVel1 = M33MulV3(invSqrtInertia1, rbXn);
+
+ const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(delAngVel0, delAngVel0)));
+ const FloatV resp1 = FSub(FMul(angD1, V3Dot(delAngVel1, delAngVel1)), invMass1_dom1fV);
+ const FloatV resp = FAdd(resp0, resp1);
+
+ const FloatV velMultiplier = FNeg(FSel(FIsGrtr(resp, zero), FRecip(resp), zero));
+
+ const FloatV vrel1 = FAdd(V3Dot(t0, linVel0), V3Dot(raXn, angVel0));
+ const FloatV vrel2 = FAdd(V3Dot(t0, linVel1), V3Dot(rbXn, angVel1));
+ const FloatV vrel = FSub(vrel1, vrel2);
+
+
+ f0->normalXYZ_appliedForceW = V4SetW(Vec4V_From_Vec3V(t0), zero);
+ f0->raXnXYZ_velMultiplierW = V4SetW(Vec4V_From_Vec3V(delAngVel0), velMultiplier);
+ //f0->rbXnXYZ_targetVelocityW = V4SetW(Vec4V_From_Vec3V(delAngVel1), FSub(V3Dot(targetVel, t0), vrel));
+ f0->rbXnXYZ_biasW = Vec4V_From_Vec3V(delAngVel1);
+ FStore(FSub(V3Dot(targetVel, t0), vrel), &f0->targetVel);
+ }
+ }
+
+ totalPatchContactCount += c.contactPatches[patch].count;
+
+ ptr = p;
+ }
+ }
+ }
+ *ptr = 0;
+ return hasFriction;
+}
+
+
+
+static void computeBlockStreamByteSizesCoulomb(const CorrelationBuffer& c,
+ const PxU32 frictionCountPerPoint, PxU32& _solverConstraintByteSize,
+ PxU32& _axisConstraintCount,
+ bool useExtContacts)
+{
+ PX_ASSERT(0 == _solverConstraintByteSize);
+ PX_ASSERT(0 == _axisConstraintCount);
+
+ // PT: use local vars to remove LHS
+ PxU32 solverConstraintByteSize = 0;
+ PxU32 numFrictionPatches = 0;
+ PxU32 axisConstraintCount = 0;
+
+ for(PxU32 i = 0; i < c.frictionPatchCount; i++)
+ {
+ //Friction patches.
+ if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END)
+ numFrictionPatches++;
+
+
+ const FrictionPatch& frictionPatch = c.frictionPatches[i];
+ const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0;
+
+ //Solver constraint data.
+ if(c.frictionPatchContactCounts[i]!=0)
+ {
+ solverConstraintByteSize += sizeof(SolverContactCoulombHeader);
+
+ solverConstraintByteSize += useExtContacts ? c.frictionPatchContactCounts[i] * sizeof(SolverContactPointExt)
+ : c.frictionPatchContactCounts[i] * sizeof(SolverContactPoint);
+
+ axisConstraintCount += c.frictionPatchContactCounts[i];
+
+ //We always need the friction headers to write the accumulated
+ if(haveFriction)
+ {
+ //4 bytes
+ solverConstraintByteSize += sizeof(SolverFrictionHeader);
+ //buffer to store applied forces in
+ solverConstraintByteSize += SolverFrictionHeader::getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
+
+ const PxU32 nbFrictionConstraints = c.frictionPatchContactCounts[i] * frictionCountPerPoint;
+
+ solverConstraintByteSize += useExtContacts ? nbFrictionConstraints * sizeof(SolverContactFrictionExt)
+ : nbFrictionConstraints * sizeof(SolverContactFriction);
+ axisConstraintCount += c.frictionPatchContactCounts[i];
+ }
+ else
+ {
+ //reserve buffers for storing accumulated impulses
+ solverConstraintByteSize += sizeof(SolverFrictionHeader);
+ solverConstraintByteSize += SolverFrictionHeader::getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]);
+ }
+ }
+ }
+ _axisConstraintCount = axisConstraintCount;
+
+ //16-byte alignment.
+ _solverConstraintByteSize = ((solverConstraintByteSize + 0x0f) & ~0x0f);
+ PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f));
+}
+
+static bool reserveBlockStreamsCoulomb(const CorrelationBuffer& c,
+ PxU8*& solverConstraint, PxU32 frictionCountPerPoint,
+ PxU32& solverConstraintByteSize,
+ PxU32& axisConstraintCount, PxConstraintAllocator& constraintAllocator,
+ bool useExtContacts)
+{
+ PX_ASSERT(NULL == solverConstraint);
+ PX_ASSERT(0 == solverConstraintByteSize);
+ PX_ASSERT(0 == axisConstraintCount);
+
+
+ //From constraintBlockStream we need to reserve contact points, contact forces, and a char buffer for the solver constraint data (already have a variable for this).
+ //From frictionPatchStream we just need to reserve a single buffer.
+
+ //Compute the sizes of all the buffers.
+ computeBlockStreamByteSizesCoulomb(
+ c,
+ frictionCountPerPoint, solverConstraintByteSize,
+ axisConstraintCount, useExtContacts);
+
+ //Reserve the buffers.
+
+ //First reserve the accumulated buffer size for the constraint block.
+ PxU8* constraintBlock = NULL;
+ const PxU32 constraintBlockByteSize = solverConstraintByteSize;
+ if(constraintBlockByteSize > 0)
+ {
+ constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u);
+
+ if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock)
+ {
+ if(0==constraintBlock)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. "
+ "Either accept dropped contacts or simplify collision geometry.");
+ constraintBlock=NULL;
+ }
+ }
+ }
+
+ //Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail).
+ if(0==constraintBlockByteSize || constraintBlock)
+ {
+ if(solverConstraintByteSize)
+ {
+ solverConstraint = constraintBlock;
+ PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f));
+ }
+ }
+
+ //Return true if neither of the two block reservations failed.
+ return ((0==constraintBlockByteSize || constraintBlock));
+}
+
+bool createFinalizeSolverContactsCoulomb1D(PxSolverContactDesc& contactDesc,
+ PxsContactManagerOutput& output,
+ ThreadContext& threadContext,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator)
+{
+ return createFinalizeSolverContactsCoulomb(contactDesc, output, threadContext, invDtF32, bounceThresholdF32, frictionOffsetThreshold, correlationDistance, constraintAllocator, PxFrictionType::eONE_DIRECTIONAL);
+}
+
+bool createFinalizeSolverContactsCoulomb2D(PxSolverContactDesc& contactDesc,
+ PxsContactManagerOutput& output,
+ ThreadContext& threadContext,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator)
+
+{
+ return createFinalizeSolverContactsCoulomb(contactDesc, output, threadContext, invDtF32, bounceThresholdF32, frictionOffsetThreshold, correlationDistance, constraintAllocator, PxFrictionType::eTWO_DIRECTIONAL);
+}
+
+bool createFinalizeSolverContactsCoulomb(PxSolverContactDesc& contactDesc,
+ PxsContactManagerOutput& output,
+ ThreadContext& threadContext,
+ const PxReal invDtF32,
+ PxReal bounceThresholdF32,
+ PxReal frictionOffsetThreshold,
+ PxReal correlationDistance,
+ PxConstraintAllocator& constraintAllocator,
+ PxFrictionType::Enum frictionType)
+{
+ PX_UNUSED(frictionOffsetThreshold);
+ PX_UNUSED(correlationDistance);
+
+ PxSolverConstraintDesc& desc = *contactDesc.desc;
+
+ desc.constraintLengthOver16 = 0;
+
+ ContactBuffer& buffer = threadContext.mContactBuffer;
+
+ buffer.count = 0;
+
+ // We pull the friction patches out of the cache to remove the dependency on how
+ // the cache is organized. Remember original addrs so we can write them back
+ // efficiently.
+
+ Ps::prefetchLine(contactDesc.frictionPtr);
+
+ PxReal invMassScale0 = 1.f;
+ PxReal invMassScale1 = 1.f;
+ PxReal invInertiaScale0 = 1.f;
+ PxReal invInertiaScale1 = 1.f;
+
+ bool hasMaxImpulse = false, hasTargetVelocity = false;
+
+ PxU32 numContacts = extractContacts(buffer, output, hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1,
+ invInertiaScale0, invInertiaScale1, PxMin(contactDesc.data0->maxContactImpulse, contactDesc.data1->maxContactImpulse));
+
+ if(numContacts == 0)
+ {
+ contactDesc.frictionPtr = NULL;
+ contactDesc.frictionCount = 0;
+ return true;
+ }
+
+ Ps::prefetchLine(contactDesc.body0);
+ Ps::prefetchLine(contactDesc.body1);
+ Ps::prefetchLine(contactDesc.data0);
+ Ps::prefetchLine(contactDesc.data1);
+
+ CorrelationBuffer& c = threadContext.mCorrelationBuffer;
+ c.frictionPatchCount = 0;
+ c.contactPatchCount = 0;
+
+ createContactPatches(c, buffer.contacts, buffer.count, PXC_SAME_NORMAL);
+
+ PxU32 numFrictionPerPatch = PxU32(frictionType == PxFrictionType::eONE_DIRECTIONAL ? 1 : 2);
+
+ bool overflow = correlatePatches(c, buffer.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, PXC_SAME_NORMAL, 0, 0);
+ PX_UNUSED(overflow);
+#if PX_CHECKED
+ if(overflow)
+ {
+ Ps::getFoundation().error(physx::PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__,
+ "Dropping contacts in solver because we exceeded limit of 32 friction patches.");
+ }
+#endif
+
+
+ //PX_ASSERT(patchCount == c.frictionPatchCount);
+
+ PxU8* solverConstraint = NULL;
+ PxU32 solverConstraintByteSize = 0;
+ PxU32 axisConstraintCount = 0;
+
+ bool useExtContacts = !!((contactDesc.bodyState0 | contactDesc.bodyState1) & PxSolverContactDesc::eARTICULATION);
+
+ const bool successfulReserve = reserveBlockStreamsCoulomb(
+ c,
+ solverConstraint, numFrictionPerPatch,
+ solverConstraintByteSize,
+ axisConstraintCount,
+ constraintAllocator,
+ useExtContacts);
+
+ // initialise the work unit's ptrs to the various buffers.
+
+ contactDesc.frictionPtr = NULL;
+ desc.constraint = NULL;
+ desc.constraintLengthOver16 = 0;
+ contactDesc.frictionCount = 0;
+
+ // patch up the work unit with the reserved buffers and set the reserved buffer data as appropriate.
+
+ if(successfulReserve)
+ {
+ desc.constraint = solverConstraint;
+ output.nbContacts = Ps::to8(numContacts);
+ desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize/16);
+
+ //Initialise solverConstraint buffer.
+ if(solverConstraint)
+ {
+ bool hasFriction = false;
+ if(useExtContacts)
+ {
+ const PxSolverBodyData& data0 = *contactDesc.data0;
+ const PxSolverBodyData& data1 = *contactDesc.data1;
+
+ const SolverExtBody b0(reinterpret_cast<const void*>(contactDesc.body0), reinterpret_cast<const void*>(&data0), desc.linkIndexA);
+ const SolverExtBody b1(reinterpret_cast<const void*>(contactDesc.body1), reinterpret_cast<const void*>(&data1), desc.linkIndexB);
+
+ hasFriction = setupFinalizeExtSolverContactsCoulomb(buffer, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
+ invDtF32, bounceThresholdF32, b0, b1, numFrictionPerPatch,
+ invMassScale0, invInertiaScale0, invMassScale1, invInertiaScale1, contactDesc.restDistance, contactDesc.maxCCDSeparation);
+ }
+ else
+ {
+ const PxSolverBodyData& data0 = *contactDesc.data0;
+ const PxSolverBodyData& data1 = *contactDesc.data1;
+
+ hasFriction = setupFinalizeSolverConstraintsCoulomb(contactDesc.shapeInteraction, buffer, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint,
+ data0, data1, invDtF32, bounceThresholdF32, numFrictionPerPatch, contactDesc.hasForceThresholds, contactDesc.bodyState1 == PxSolverContactDesc::eSTATIC_BODY,
+ invMassScale0, invInertiaScale0, invMassScale1, invInertiaScale1, contactDesc.restDistance, contactDesc.maxCCDSeparation);
+ }
+ *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0;
+ *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize + 4)) = hasFriction ? 0xFFFFFFFF : 0;
+ }
+ }
+
+ return successfulReserve;
+}
+
+}
+}
+
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepShared.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepShared.h
new file mode 100644
index 00000000..7accabd3
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepShared.h
@@ -0,0 +1,301 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef DY_CONTACT_PREP_SHARED_H
+#define DY_CONTACT_PREP_SHARED_H
+
+#include "foundation/PxPreprocessor.h"
+#include "PxSceneDesc.h"
+#include "PsVecMath.h"
+#include "PsMathUtils.h"
+#include "DyContactPrep.h"
+#include "DyCorrelationBuffer.h"
+#include "DyArticulationContactPrep.h"
+#include "PxsContactManager.h"
+#include "PxsContactManagerState.h"
+
+namespace physx
+{
+namespace Dy
+{
+
+
+PX_FORCE_INLINE bool pointsAreClose(const PxTransform& body1ToBody0,
+ const PxVec3& localAnchor0, const PxVec3& localAnchor1,
+ const PxVec3& axis, float correlDist)
+{
+ const PxVec3 body0PatchPoint1 = body1ToBody0.transform(localAnchor1);
+
+ return PxAbs((localAnchor0 - body0PatchPoint1).dot(axis))<correlDist;
+}
+
+PX_FORCE_INLINE bool isSeparated(const FrictionPatch& patch, const PxTransform& body1ToBody0, const PxReal correlationDistance)
+{
+ PX_ASSERT(patch.anchorCount <= 2);
+ for(PxU32 a = 0; a < patch.anchorCount; ++a)
+ {
+ if(!pointsAreClose(body1ToBody0, patch.body0Anchors[a], patch.body1Anchors[a], patch.body0Normal, correlationDistance))
+ return true;
+ }
+ return false;
+}
+
+
+inline bool getFrictionPatches(CorrelationBuffer& c,
+ const PxU8* frictionCookie,
+ PxU32 frictionPatchCount,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxReal correlationDistance)
+{
+ PX_UNUSED(correlationDistance);
+ if(frictionCookie == NULL || frictionPatchCount == 0)
+ return true;
+
+ //KS - this is now DMA'd inside the shader so we don't need to immediate DMA it here
+ const FrictionPatch* patches = reinterpret_cast<const FrictionPatch*>(frictionCookie);
+
+ //Try working out relative transforms! TODO - can we compute this lazily for the first friction patch
+ bool evaluated = false;
+ PxTransform body1ToBody0;
+
+ while(frictionPatchCount--)
+ {
+ Ps::prefetchLine(patches,128);
+ const FrictionPatch& patch = *patches++;
+ PX_ASSERT (patch.broken == 0 || patch.broken == 1);
+ if(!patch.broken)
+ {
+ // if the eDISABLE_STRONG_FRICTION flag is there we need to blow away the previous frame's friction correlation, so
+ // that we can associate each friction anchor with a target velocity. So we lose strong friction.
+ if(patch.anchorCount != 0 && !(patch.materialFlags & PxMaterialFlag::eDISABLE_STRONG_FRICTION))
+ {
+ PX_ASSERT(patch.anchorCount <= 2);
+
+
+ if(!evaluated)
+ {
+ body1ToBody0 = bodyFrame0.transformInv(bodyFrame1);
+ evaluated = true;
+ }
+
+
+ if(patch.body0Normal.dot(body1ToBody0.rotate(patch.body1Normal)) > PXC_SAME_NORMAL)
+ {
+ if(!isSeparated(patch, body1ToBody0, correlationDistance))
+ {
+ if(c.frictionPatchCount == CorrelationBuffer::MAX_FRICTION_PATCHES)
+ return false;
+ {
+ c.contactID[c.frictionPatchCount][0] = 0xffff;
+ c.contactID[c.frictionPatchCount][1] = 0xffff;
+ //Rotate the contact normal into world space
+ c.frictionPatchWorldNormal[c.frictionPatchCount] = bodyFrame0.rotate(patch.body0Normal);
+ c.frictionPatchContactCounts[c.frictionPatchCount] = 0;
+ c.correlationListHeads[c.frictionPatchCount] = CorrelationBuffer::LIST_END;
+ PxMemCopy(&c.frictionPatches[c.frictionPatchCount++], &patch, sizeof(FrictionPatch));
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+}
+
+PX_FORCE_INLINE PxU32 extractContacts(Gu::ContactBuffer& buffer, PxsContactManagerOutput& npOutput, bool& hasMaxImpulse, bool& hasTargetVelocity,
+ PxReal& invMassScale0, PxReal& invMassScale1, PxReal& invInertiaScale0, PxReal& invInertiaScale1, PxReal defaultMaxImpulse)
+{
+ PxContactStreamIterator iter(npOutput.contactPatches, npOutput.contactPoints, npOutput.getInternalFaceIndice(), npOutput.nbPatches, npOutput.nbContacts);
+
+ PxU32 numContacts = buffer.count, origContactCount = buffer.count;
+ if(!iter.forceNoResponse)
+ {
+ invMassScale0 = iter.getInvMassScale0();
+ invMassScale1 = iter.getInvMassScale1();
+ invInertiaScale0 = iter.getInvInertiaScale0();
+ invInertiaScale1 = iter.getInvInertiaScale1();
+ hasMaxImpulse = (iter.patch->internalFlags & PxContactPatch::eHAS_MAX_IMPULSE) != 0;
+ hasTargetVelocity = (iter.patch->internalFlags & PxContactPatch::eHAS_TARGET_VELOCITY) != 0;
+
+ while(iter.hasNextPatch())
+ {
+ iter.nextPatch();
+ while(iter.hasNextContact())
+ {
+ iter.nextContact();
+ Ps::prefetchLine(iter.contact, 128);
+ Ps::prefetchLine(&buffer.contacts[numContacts], 128);
+ PxReal maxImpulse = hasMaxImpulse ? iter.getMaxImpulse() : defaultMaxImpulse;
+ if(maxImpulse != 0.f)
+ {
+ PX_ASSERT(numContacts < Gu::ContactBuffer::MAX_CONTACTS);
+ buffer.contacts[numContacts].normal = iter.getContactNormal();
+ buffer.contacts[numContacts].point = iter.getContactPoint();
+ buffer.contacts[numContacts].separation = iter.getSeparation();
+ //KS - we use the face indices to cache the material indices and flags - avoids bloating the PxContact structure
+ buffer.contacts[numContacts].materialFlags = PxU8(iter.getMaterialFlags());
+ buffer.contacts[numContacts].maxImpulse = maxImpulse;
+ buffer.contacts[numContacts].staticFriction = iter.getStaticFriction();
+ buffer.contacts[numContacts].dynamicFriction = iter.getDynamicFriction();
+ buffer.contacts[numContacts].restitution = iter.getRestitution();
+ const PxVec3& targetVel = iter.getTargetVel();
+ buffer.contacts[numContacts].targetVel = targetVel;
+ ++numContacts;
+ }
+ }
+ }
+ }
+ const PxU32 contactCount = numContacts - origContactCount;
+ buffer.count = numContacts;
+ return contactCount;
+}
+
+struct CorrelationListIterator
+{
+ CorrelationBuffer& buffer;
+ PxU32 currPatch;
+ PxU32 currContact;
+
+ CorrelationListIterator(CorrelationBuffer& correlationBuffer, PxU32 startPatch) : buffer(correlationBuffer)
+ {
+ //We need to force us to advance the correlation buffer to the first available contact (if one exists)
+ PxU32 newPatch = startPatch, newContact = 0;
+
+ while(newPatch != CorrelationBuffer::LIST_END && newContact == buffer.contactPatches[newPatch].count)
+ {
+ newPatch = buffer.contactPatches[newPatch].next;
+ newContact = 0;
+ }
+
+ currPatch = newPatch;
+ currContact = newContact;
+ }
+
+ //Returns true if it has another contact pre-loaded. Returns false otherwise
+ PX_FORCE_INLINE bool hasNextContact()
+ {
+ return (currPatch != CorrelationBuffer::LIST_END && currContact < buffer.contactPatches[currPatch].count);
+ }
+
+ inline void nextContact(PxU32& patch, PxU32& contact)
+ {
+ PX_ASSERT(currPatch != CorrelationBuffer::LIST_END);
+ PX_ASSERT(currContact < buffer.contactPatches[currPatch].count);
+
+ patch = currPatch;
+ contact = currContact;
+ PxU32 newPatch = currPatch, newContact = currContact + 1;
+
+ while(newPatch != CorrelationBuffer::LIST_END && newContact == buffer.contactPatches[newPatch].count)
+ {
+ newPatch = buffer.contactPatches[newPatch].next;
+ newContact = 0;
+ }
+
+ currPatch = newPatch;
+ currContact = newContact;
+ }
+
+private:
+ CorrelationListIterator& operator=(const CorrelationListIterator&);
+
+};
+
+
+ PX_FORCE_INLINE void constructContactConstraint(const Mat33V& invSqrtInertia0, const Mat33V& invSqrtInertia1, const FloatVArg invMassNorLenSq0,
+ const FloatVArg invMassNorLenSq1, const FloatVArg angD0, const FloatVArg angD1, const Vec3VArg bodyFrame0p, const Vec3VArg bodyFrame1p,
+ const Vec3VArg normal, const FloatVArg norVel, const VecCrossV& norCross, const Vec3VArg angVel0, const Vec3VArg angVel1,
+ const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution,
+ const FloatVArg bounceThreshold, const Gu::ContactPoint& contact, SolverContactPoint& solverContact,
+ const FloatVArg ccdMaxSeparation)
+ {
+ const FloatV zero = FZero();
+ const Vec3V point = V3LoadA(contact.point);
+ const FloatV separation = FLoad(contact.separation);
+
+ const FloatV cTargetVel = V3Dot(normal, V3LoadA(contact.targetVel));
+
+ const Vec3V ra = V3Sub(point, bodyFrame0p);
+ const Vec3V rb = V3Sub(point, bodyFrame1p);
+
+ const Vec3V raXn = V3Cross(ra, norCross);
+ const Vec3V rbXn = V3Cross(rb, norCross);
+
+ const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn);
+ const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn);
+
+ const FloatV resp0 = FAdd(invMassNorLenSq0, FMul(V3Dot(raXnSqrtInertia, raXnSqrtInertia), angD0));
+ const FloatV resp1 = FSub(FMul(V3Dot(rbXnSqrtInertia, rbXnSqrtInertia), angD1), invMassNorLenSq1);
+
+ const FloatV unitResponse = FAdd(resp0, resp1);
+
+ const FloatV vrel1 = FAdd(norVel, V3Dot(raXn, angVel0));
+ const FloatV vrel2 = V3Dot(rbXn, angVel1);
+ const FloatV vrel = FSub(vrel1, vrel2);
+
+ const FloatV velMultiplier = FSel(FIsGrtr(unitResponse, zero), FRecip(unitResponse), zero);
+
+ const FloatV penetration = FSub(separation, restDistance);
+
+ const FloatV penetrationInvDt = FMul(penetration, invDt);
+
+ const FloatV penetrationInvDtPt8 = FMax(maxPenBias, FMul(penetration, invDtp8));
+
+ FloatV scaledBias = FMul(velMultiplier, penetrationInvDtPt8);
+
+ const BoolV isGreater2 = BAnd(BAnd(FIsGrtr(restitution, zero), FIsGrtr(bounceThreshold, vrel)), FIsGrtr(FNeg(vrel), penetrationInvDt));
+
+ const BoolV ccdSeparationCondition = FIsGrtrOrEq(ccdMaxSeparation, penetration);
+
+ scaledBias = FSel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias);
+
+ const FloatV sumVRel(vrel);
+
+ FloatV targetVelocity = FAdd(cTargetVel, FSel(isGreater2, FMul(FNeg(sumVRel), restitution), zero));
+
+ //Note - we add on the initial target velocity
+ targetVelocity = FSub(targetVelocity, vrel);
+
+ const FloatV biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias));
+ const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FSel(isGreater2, zero, FNeg(FMax(scaledBias, zero))));
+ //const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(FMax(scaledBias, zero)));
+
+ FStore(velMultiplier, &solverContact.velMultiplier);
+ FStore(biasedErr, &solverContact.biasedErr);
+ FStore(unbiasedErr, &solverContact.unbiasedErr);
+ solverContact.maxImpulse = contact.maxImpulse;
+
+ solverContact.raXn = raXnSqrtInertia;
+ solverContact.rbXn = rbXnSqrtInertia;
+ }
+}
+}
+
+#endif //DY_CONTACT_PREP_SHARED_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactReduction.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactReduction.h
new file mode 100644
index 00000000..a02fe8e9
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactReduction.h
@@ -0,0 +1,409 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef DY_CONTACT_REDUCTION_H
+#define DY_CONTACT_REDUCTION_H
+
+#include "GuContactPoint.h"
+#include "PxsMaterialManager.h"
+
+namespace physx
+{
+
+
+namespace Dy
+{
+
+//KS - might be OK with 4 but 5 guarantees the deepest + 4 contacts that contribute to largest surface area
+#define CONTACT_REDUCTION_MAX_CONTACTS 6
+#define CONTACT_REDUCTION_MAX_PATCHES 32
+#define PXS_NORMAL_TOLERANCE 0.995f
+#define PXS_SEPARATION_TOLERANCE 0.001f
+
+
+ //A patch contains a normal, pair of material indices and a list of indices. These indices are
+ //used to index into the PxContact array that's passed by the user
+ struct ReducedContactPatch
+ {
+ PxU32 numContactPoints;
+ PxU32 contactPoints[CONTACT_REDUCTION_MAX_CONTACTS];
+ };
+
+ struct ContactPatch
+ {
+ PxVec3 rootNormal;
+ ContactPatch* mNextPatch;
+ PxReal maxPenetration;
+ PxU16 startIndex;
+ PxU16 stride;
+ PxU16 rootIndex;
+ PxU16 index;
+ };
+
+ struct SortBoundsPredicateManifold
+ {
+ bool operator()(const ContactPatch* idx1, const ContactPatch* idx2) const
+ {
+ return idx1->maxPenetration < idx2->maxPenetration;
+ }
+ };
+
+
+
+ template <PxU32 MaxPatches>
+ class ContactReduction
+ {
+ public:
+ ReducedContactPatch mPatches[MaxPatches];
+ PxU32 mNumPatches;
+ ContactPatch mIntermediatePatches[CONTACT_REDUCTION_MAX_PATCHES];
+ ContactPatch* mIntermediatePatchesPtrs[CONTACT_REDUCTION_MAX_PATCHES];
+ PxU32 mNumIntermediatePatches;
+ Gu::ContactPoint* PX_RESTRICT mOriginalContacts;
+ PxsMaterialInfo* PX_RESTRICT mMaterialInfo;
+ PxU32 mNumOriginalContacts;
+
+ ContactReduction(Gu::ContactPoint* PX_RESTRICT originalContacts, PxsMaterialInfo* PX_RESTRICT materialInfo, PxU32 numContacts) :
+ mNumPatches(0), mNumIntermediatePatches(0), mOriginalContacts(originalContacts), mMaterialInfo(materialInfo), mNumOriginalContacts(numContacts)
+ {
+ }
+
+ void reduceContacts()
+ {
+ //First pass, break up into contact patches, storing the start and stride of the patches
+ //We will need to have contact patches and then coallesce them
+ mIntermediatePatches[0].rootNormal = mOriginalContacts[0].normal;
+ mIntermediatePatches[0].mNextPatch = NULL;
+ mIntermediatePatches[0].startIndex = 0;
+ mIntermediatePatches[0].rootIndex = 0;
+ mIntermediatePatches[0].maxPenetration = mOriginalContacts[0].separation;
+ mIntermediatePatches[0].index = 0;
+ PxU16 numPatches = 1;
+ //PxU32 startIndex = 0;
+ PxU32 numUniquePatches = 1;
+ PxU16 m = 1;
+ for(; m < mNumOriginalContacts; ++m)
+ {
+ PxI32 index = -1;
+ for(PxU32 b = numPatches; b > 0; --b)
+ {
+ ContactPatch& patch = mIntermediatePatches[b-1];
+ if(mMaterialInfo[patch.startIndex].mMaterialIndex0 == mMaterialInfo[m].mMaterialIndex0 && mMaterialInfo[patch.startIndex].mMaterialIndex1 == mMaterialInfo[m].mMaterialIndex1 &&
+ patch.rootNormal.dot(mOriginalContacts[m].normal) >= PXS_NORMAL_TOLERANCE)
+ {
+ index = PxI32(b-1);
+ break;
+ }
+ }
+
+ if(index != numPatches - 1)
+ {
+ mIntermediatePatches[numPatches-1].stride = PxU16(m - mIntermediatePatches[numPatches - 1].startIndex);
+ //Create a new patch...
+ if(numPatches == CONTACT_REDUCTION_MAX_PATCHES)
+ {
+ break;
+ }
+ mIntermediatePatches[numPatches].startIndex = m;
+ mIntermediatePatches[numPatches].mNextPatch = NULL;
+ if(index == -1)
+ {
+ mIntermediatePatches[numPatches].rootIndex = numPatches;
+ mIntermediatePatches[numPatches].rootNormal = mOriginalContacts[m].normal;
+ mIntermediatePatches[numPatches].maxPenetration = mOriginalContacts[m].separation;
+ mIntermediatePatches[numPatches].index = numPatches;
+ ++numUniquePatches;
+ }
+ else
+ {
+ //Find last element in the link
+ PxU16 rootIndex = mIntermediatePatches[index].rootIndex;
+ mIntermediatePatches[index].mNextPatch = &mIntermediatePatches[numPatches];
+ mIntermediatePatches[numPatches].rootNormal = mIntermediatePatches[index].rootNormal;
+ mIntermediatePatches[rootIndex].maxPenetration = mIntermediatePatches[numPatches].maxPenetration = PxMin(mIntermediatePatches[rootIndex].maxPenetration, mOriginalContacts[m].separation);
+ mIntermediatePatches[numPatches].rootIndex = rootIndex;
+ mIntermediatePatches[numPatches].index = numPatches;
+ }
+ ++numPatches;
+ }
+ }
+ mIntermediatePatches[numPatches-1].stride = PxU16(m - mIntermediatePatches[numPatches-1].startIndex);
+
+ //OK, we have a list of contact patches so that we can start contact reduction per-patch
+
+ //OK, now we can go and reduce the contacts on a per-patch basis...
+
+ for(PxU32 a = 0; a < numPatches; ++a)
+ {
+ mIntermediatePatchesPtrs[a] = &mIntermediatePatches[a];
+ }
+
+
+ SortBoundsPredicateManifold predicate;
+ Ps::sort(mIntermediatePatchesPtrs, numPatches, predicate);
+
+ PxU32 numReducedPatches = 0;
+ for(PxU32 a = 0; a < numPatches; ++a)
+ {
+ if(mIntermediatePatchesPtrs[a]->rootIndex == mIntermediatePatchesPtrs[a]->index)
+ {
+ //Reduce this patch...
+ if(numReducedPatches == MaxPatches)
+ break;
+
+ ReducedContactPatch& reducedPatch = mPatches[numReducedPatches++];
+ //OK, now we need to work out if we have to reduce patches...
+ PxU32 contactCount = 0;
+ {
+ ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+
+ while(tmpPatch)
+ {
+ contactCount += tmpPatch->stride;
+ tmpPatch = tmpPatch->mNextPatch;
+ }
+ }
+
+ if(contactCount <= CONTACT_REDUCTION_MAX_CONTACTS)
+ {
+ //Just add the contacts...
+ ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+
+ PxU32 ind = 0;
+ while(tmpPatch)
+ {
+ for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+ {
+ reducedPatch.contactPoints[ind++] = tmpPatch->startIndex + b;
+ }
+ tmpPatch = tmpPatch->mNextPatch;
+ }
+ reducedPatch.numContactPoints = contactCount;
+ }
+ else
+ {
+ //Iterate through and find the most extreme point
+
+
+ PxU32 ind = 0;
+
+ {
+ PxReal dist = 0.f;
+ ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+ while(tmpPatch)
+ {
+ for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+ {
+ PxReal magSq = mOriginalContacts[tmpPatch->startIndex + b].point.magnitudeSquared();
+ if(dist < magSq)
+ {
+ ind = tmpPatch->startIndex + b;
+ dist = magSq;
+ }
+ }
+ tmpPatch = tmpPatch->mNextPatch;
+ }
+ }
+ reducedPatch.contactPoints[0] = ind;
+ const PxVec3 p0 = mOriginalContacts[ind].point;
+
+ //Now find the point farthest from this point...
+ {
+ PxReal maxDist = 0.f;
+ ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+ while(tmpPatch)
+ {
+ for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+ {
+ PxReal magSq = (p0 - mOriginalContacts[tmpPatch->startIndex + b].point).magnitudeSquared();
+ if(magSq > maxDist)
+ {
+ ind = tmpPatch->startIndex + b;
+ maxDist = magSq;
+ }
+ }
+ tmpPatch = tmpPatch->mNextPatch;
+ }
+ }
+ reducedPatch.contactPoints[1] = ind;
+ const PxVec3 p1 = mOriginalContacts[ind].point;
+
+ //Now find the point farthest from the segment
+
+ PxVec3 n = (p0 - p1).cross(mIntermediatePatchesPtrs[a]->rootNormal);
+
+ //PxReal tVal = 0.f;
+ {
+ PxReal maxDist = 0.f;
+ //PxReal tmpTVal;
+
+ ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+ while(tmpPatch)
+ {
+ for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+ {
+
+ //PxReal magSq = tmpDistancePointSegmentSquared(p0, p1, mOriginalContacts[tmpPatch->startIndex + b].point, tmpTVal);
+ PxReal magSq = (mOriginalContacts[tmpPatch->startIndex + b].point - p0).dot(n);
+ if(magSq > maxDist)
+ {
+ ind = tmpPatch->startIndex + b;
+ //tVal = tmpTVal;
+ maxDist = magSq;
+ }
+ }
+ tmpPatch = tmpPatch->mNextPatch;
+ }
+ }
+ reducedPatch.contactPoints[2] = ind;
+
+ //const PxVec3 closest = (p0 + (p1 - p0) * tVal);
+
+ const PxVec3 dir = -n;//closest - p3;
+
+ {
+ PxReal maxDist = 0.f;
+ //PxReal tVal = 0.f;
+ ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+ while(tmpPatch)
+ {
+ for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+ {
+ PxReal magSq = (mOriginalContacts[tmpPatch->startIndex + b].point - p0).dot(dir);
+ if(magSq > maxDist)
+ {
+ ind = tmpPatch->startIndex + b;
+ maxDist = magSq;
+ }
+ }
+ tmpPatch = tmpPatch->mNextPatch;
+ }
+ }
+ reducedPatch.contactPoints[3] = ind;
+
+ //Now, we iterate through all the points, and cluster the points. From this, we establish the deepest point that's within a
+ //tolerance of this point and keep that point
+
+ PxReal separation[CONTACT_REDUCTION_MAX_CONTACTS];
+ PxU32 deepestInd[CONTACT_REDUCTION_MAX_CONTACTS];
+ for(PxU32 i = 0; i < 4; ++i)
+ {
+ PxU32 index = reducedPatch.contactPoints[i];
+ separation[i] = mOriginalContacts[index].separation - PXS_SEPARATION_TOLERANCE;
+ deepestInd[i] = index;
+ }
+
+ ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a];
+ while(tmpPatch)
+ {
+ for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+ {
+ Gu::ContactPoint& point = mOriginalContacts[tmpPatch->startIndex + b];
+
+ PxReal distance = PX_MAX_REAL;
+ PxU32 index = 0;
+ for(PxU32 c = 0; c < 4; ++c)
+ {
+ PxVec3 dif = mOriginalContacts[reducedPatch.contactPoints[c]].point - point.point;
+ PxReal d = dif.magnitudeSquared();
+ if(distance > d)
+ {
+ distance = d;
+ index = c;
+ }
+ }
+ if(separation[index] > point.separation)
+ {
+ deepestInd[index] = tmpPatch->startIndex+b;
+ separation[index] = point.separation;
+ }
+
+ }
+ tmpPatch = tmpPatch->mNextPatch;
+ }
+
+ bool chosen[64];
+ PxMemZero(chosen, sizeof(chosen));
+ for(PxU32 i = 0; i < 4; ++i)
+ {
+ reducedPatch.contactPoints[i] = deepestInd[i];
+ chosen[deepestInd[i]] = true;
+ }
+
+ for(PxU32 i = 4; i < CONTACT_REDUCTION_MAX_CONTACTS; ++i)
+ {
+ separation[i] = PX_MAX_REAL;
+ deepestInd[i] = 0;
+ }
+ tmpPatch = mIntermediatePatchesPtrs[a];
+ while(tmpPatch)
+ {
+ for(PxU32 b = 0; b < tmpPatch->stride; ++b)
+ {
+ if(!chosen[tmpPatch->startIndex+b])
+ {
+ Gu::ContactPoint& point = mOriginalContacts[tmpPatch->startIndex + b];
+ for(PxU32 j = 4; j < CONTACT_REDUCTION_MAX_CONTACTS; ++j)
+ {
+ if(point.separation < separation[j])
+ {
+ for(PxU32 k = CONTACT_REDUCTION_MAX_CONTACTS-1; k > j; --k)
+ {
+ separation[k] = separation[k-1];
+ deepestInd[k] = deepestInd[k-1];
+ }
+ separation[j] = point.separation;
+ deepestInd[j] = tmpPatch->startIndex+b;
+ break;
+ }
+ }
+ }
+ }
+ tmpPatch = tmpPatch->mNextPatch;
+ }
+
+ for(PxU32 i = 4; i < CONTACT_REDUCTION_MAX_CONTACTS; ++i)
+ {
+ reducedPatch.contactPoints[i] = deepestInd[i];
+ }
+
+ reducedPatch.numContactPoints = CONTACT_REDUCTION_MAX_CONTACTS;
+ }
+ }
+ }
+ mNumPatches = numReducedPatches;
+ }
+
+ };
+}
+
+}
+
+
+#endif //DY_CONTACT_REDUCTION_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyCorrelationBuffer.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyCorrelationBuffer.h
new file mode 100644
index 00000000..9e4d491d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyCorrelationBuffer.h
@@ -0,0 +1,104 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_CORRELATIONBUFFER_H
+#define DY_CORRELATIONBUFFER_H
+
+#include "PxvConfig.h"
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "foundation/PxTransform.h"
+#include "DyFrictionPatch.h"
+#include "GuContactBuffer.h"
+
+namespace physx
+{
+
+struct PxcNpWorkUnit;
+struct PxsMaterialInfo;
+
+namespace Dy
+{
+
+struct CorrelationBuffer
+{
+ static const PxU32 MAX_FRICTION_PATCHES = 32;
+ static const PxU16 LIST_END = 0xffff;
+
+ struct ContactPatchData
+ {
+ PxU16 start;
+ PxU16 next;
+ PxU8 flags;
+ PxU8 count;
+ PxReal staticFriction, dynamicFriction, restitution;
+ };
+
+ // we can have as many contact patches as contacts, unfortunately
+ ContactPatchData contactPatches[Gu::ContactBuffer::MAX_CONTACTS];
+
+ FrictionPatch PX_ALIGN(16, frictionPatches[MAX_FRICTION_PATCHES]);
+ PxVec3 PX_ALIGN(16, frictionPatchWorldNormal[MAX_FRICTION_PATCHES]);
+
+ PxU32 frictionPatchContactCounts[MAX_FRICTION_PATCHES];
+ PxU32 correlationListHeads[MAX_FRICTION_PATCHES+1];
+
+ // contact IDs are only used to identify auxiliary contact data when velocity
+ // targets have been set.
+ PxU16 contactID[MAX_FRICTION_PATCHES][2];
+
+ PxU32 contactPatchCount, frictionPatchCount;
+
+};
+
+bool createContactPatches(CorrelationBuffer& fb, const Gu::ContactPoint* cb, PxU32 contactCount, PxReal normalTolerance);
+
+bool correlatePatches(CorrelationBuffer& fb,
+ const Gu::ContactPoint* cb,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxReal normalTolerance,
+ PxU32 startContactPatchIndex,
+ PxU32 startFrictionPatchIndex);
+
+void growPatches(CorrelationBuffer& fb,
+ const Gu::ContactPoint* buffer,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxReal normalTolerance,
+ PxU32 frictionPatchStartIndex,
+ PxReal frictionOffsetThreshold);
+
+}
+
+}
+
+#endif //DY_CORRELATIONBUFFER_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.cpp
new file mode 100644
index 00000000..07f3b642
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.cpp
@@ -0,0 +1,2950 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "PsTime.h"
+#include "PsAtomic.h"
+#include "PxvDynamics.h"
+
+#include "foundation/PxProfiler.h"
+#include "PxsRigidBody.h"
+#include "PxsContactManager.h"
+#include "DyDynamics.h"
+#include "DyBodyCoreIntegrator.h"
+#include "DySolverCore.h"
+#include "DySolverControl.h"
+#include "DySolverContact.h"
+#include "DySolverContactPF.h"
+#include "DyArticulationContactPrep.h"
+#include "DySolverBody.h"
+
+#include "DyConstraintPrep.h"
+#include "DyConstraintPartition.h"
+#include "DyArticulation.h"
+
+#include "CmFlushPool.h"
+#include "DyArticulationPImpl.h"
+#include "PxsMaterialManager.h"
+#include "DySolverContactPF4.h"
+#include "DyContactReduction.h"
+#include "PxcNpContactPrepShared.h"
+#include "DyContactPrep.h"
+#include "DySolverControlPF.h"
+#include "PxSceneDesc.h"
+#include "PxsSimpleIslandManager.h"
+#include "PxvNphaseImplementationContext.h"
+#include "PxsContactManagerState.h"
+#include "PxsDefaultMemoryManager.h"
+#include "DyContactPrepShared.h"
+
+//KS - used to turn on/off batched SIMD constraints.
+#define DY_BATCH_CONSTRAINTS 1
+//KS - used to specifically turn on/off batches 1D SIMD constraints.
+#define DY_BATCH_1D 1
+
+namespace physx
+{
+namespace Dy
+{
+
+struct SolverIslandObjects
+{
+ PxsRigidBody** bodies;
+ Articulation** articulations;
+ Dy::Articulation** articulationOwners;
+ PxsIndexedContactManager* contactManagers;
+ //PxsIndexedConstraint* constraints;
+
+ const IG::IslandId* islandIds;
+ PxU32 numIslands;
+ PxU32* bodyRemapTable;
+ PxU32* nodeIndexArray;
+
+ PxSolverConstraintDesc* constraintDescs;
+ PxSolverConstraintDesc* orderedConstraintDescs;
+ PxSolverConstraintDesc* tempConstraintDescs;
+ PxConstraintBatchHeader* constraintBatchHeaders;
+ Cm::SpatialVector* motionVelocities;
+ PxsBodyCore** bodyCoreArray;
+
+ SolverIslandObjects() : bodies(NULL), articulations(NULL), articulationOwners(NULL),
+ contactManagers(NULL), islandIds(NULL), numIslands(0), nodeIndexArray(NULL), constraintDescs(NULL), orderedConstraintDescs(NULL),
+ tempConstraintDescs(NULL), constraintBatchHeaders(NULL), motionVelocities(NULL), bodyCoreArray(NULL)
+ {
+ }
+};
+
+Context* createDynamicsContext( PxcNpMemBlockPool* memBlockPool,
+ PxcScratchAllocator& scratchAllocator, Cm::FlushPool& taskPool,
+ PxvSimStats& simStats, PxTaskManager* taskManager, Ps::VirtualAllocatorCallback* allocatorCallback, PxsMaterialManager* materialManager,
+ IG::IslandSim* accurateIslandSim, PxU64 contextID,
+ const bool enableStabilization, const bool useEnhancedDeterminism, const bool useAdaptiveForce
+ )
+{
+ return DynamicsContext::create( memBlockPool, scratchAllocator, taskPool, simStats, taskManager, allocatorCallback, materialManager, accurateIslandSim,
+ contextID, enableStabilization, useEnhancedDeterminism, useAdaptiveForce);
+}
+
+// PT: TODO: consider removing this function. We already have "createDynamicsContext".
+DynamicsContext* DynamicsContext::create( PxcNpMemBlockPool* memBlockPool,
+ PxcScratchAllocator& scratchAllocator,
+ Cm::FlushPool& taskPool,
+ PxvSimStats& simStats,
+ PxTaskManager* taskManager,
+ Ps::VirtualAllocatorCallback* allocatorCallback,
+ PxsMaterialManager* materialManager,
+ IG::IslandSim* accurateIslandSim,
+ PxU64 contextID,
+ const bool enableStabilization,
+ const bool useEnhancedDeterminism,
+ const bool useAdaptiveForce
+ )
+{
+ // PT: TODO: inherit from UserAllocated, remove placement new
+ DynamicsContext* dc = reinterpret_cast<DynamicsContext*>(PX_ALLOC(sizeof(DynamicsContext), "DynamicsContext"));
+ if(dc)
+ {
+ new(dc)DynamicsContext(memBlockPool, scratchAllocator, taskPool, simStats, taskManager, allocatorCallback, materialManager, accurateIslandSim, contextID, enableStabilization, useEnhancedDeterminism, useAdaptiveForce);
+ }
+ return dc;
+}
+
+
+void DynamicsContext::destroy()
+{
+ this->~DynamicsContext();
+ PX_FREE(this);
+}
+
+void DynamicsContext::resetThreadContexts()
+{
+ PxcThreadCoherentCacheIterator<ThreadContext, PxcNpMemBlockPool> threadContextIt(mThreadContextPool);
+ ThreadContext* threadContext = threadContextIt.getNext();
+
+ while(threadContext != NULL)
+ {
+ threadContext->reset();
+ threadContext = threadContextIt.getNext();
+ }
+}
+
+
+// =========================== Basic methods
+
+
+DynamicsContext::DynamicsContext( PxcNpMemBlockPool* memBlockPool,
+ PxcScratchAllocator& scratchAllocator,
+ Cm::FlushPool& taskPool,
+ PxvSimStats& simStats,
+ PxTaskManager* taskManager,
+ Ps::VirtualAllocatorCallback* allocatorCallback,
+ PxsMaterialManager* materialManager,
+ IG::IslandSim* accurateIslandSim,
+ PxU64 contextID,
+ const bool enableStabilization,
+ const bool useEnhancedDeterminism,
+ const bool useAdaptiveForce
+ ) :
+ Dy::Context (accurateIslandSim, allocatorCallback, simStats, enableStabilization, useEnhancedDeterminism, useAdaptiveForce),
+ mThreadContextPool (memBlockPool),
+ mMaterialManager (materialManager),
+ mScratchAllocator (scratchAllocator),
+ mTaskPool (taskPool),
+ mTaskManager (taskManager),
+ mContextID (contextID)
+{
+ createThresholdStream(*allocatorCallback);
+ createForceChangeThresholdStream(*allocatorCallback);
+ mExceededForceThresholdStream[0] = PX_PLACEMENT_NEW(PX_ALLOC(sizeof(ThresholdStream), PX_DEBUG_EXP("ExceededForceThresholdStream[0]")), ThresholdStream(*allocatorCallback));
+ mExceededForceThresholdStream[1] = PX_PLACEMENT_NEW(PX_ALLOC(sizeof(ThresholdStream), PX_DEBUG_EXP("ExceededForceThresholdStream[1]")), ThresholdStream(*allocatorCallback));
+ mThresholdStreamOut = 0;
+ mCurrentIndex = 0;
+ mWorldSolverBody.linearVelocity = PxVec3(0);
+ mWorldSolverBody.angularState = PxVec3(0);
+ mWorldSolverBodyData.invMass = 0;
+ mWorldSolverBodyData.sqrtInvInertia = PxMat33(PxZero);
+ mWorldSolverBodyData.nodeIndex = IG_INVALID_NODE;
+ mWorldSolverBodyData.reportThreshold = PX_MAX_REAL;
+ mWorldSolverBodyData.penBiasClamp = -PX_MAX_REAL;
+ mWorldSolverBodyData.maxContactImpulse = PX_MAX_REAL;
+ mWorldSolverBody.solverProgress=MAX_PERMITTED_SOLVER_PROGRESS;
+ mWorldSolverBody.maxSolverNormalProgress=MAX_PERMITTED_SOLVER_PROGRESS;
+ mWorldSolverBody.maxSolverFrictionProgress=MAX_PERMITTED_SOLVER_PROGRESS;
+ mWorldSolverBodyData.linearVelocity = mWorldSolverBodyData.angularVelocity = PxVec3(0.f);
+ mWorldSolverBodyData.body2World = PxTransform(PxIdentity);
+ mWorldSolverBodyData.lockFlags = 0;
+ mSolverCore[PxFrictionType::ePATCH] = SolverCoreGeneral::create();
+ mSolverCore[PxFrictionType::eONE_DIRECTIONAL] = SolverCoreGeneralPF::create();
+ mSolverCore[PxFrictionType::eTWO_DIRECTIONAL] = SolverCoreGeneralPF::create();
+}
+
+DynamicsContext::~DynamicsContext()
+{
+ for(PxU32 i = 0; i < PxFrictionType::eFRICTION_COUNT; ++i)
+ {
+ mSolverCore[i]->destroyV();
+ }
+
+ if(mExceededForceThresholdStream[0])
+ {
+ mExceededForceThresholdStream[0]->~ThresholdStream();
+ PX_FREE(mExceededForceThresholdStream[0]);
+ }
+ mExceededForceThresholdStream[0] = NULL;
+
+ if(mExceededForceThresholdStream[1])
+ {
+ mExceededForceThresholdStream[1]->~ThresholdStream();
+ PX_FREE(mExceededForceThresholdStream[1]);
+ }
+ mExceededForceThresholdStream[1] = NULL;
+
+}
+
+#if PX_ENABLE_SIM_STATS
+void DynamicsContext::addThreadStats(const ThreadContext::ThreadSimStats& stats)
+{
+ mSimStats.mNbActiveConstraints += stats.numActiveConstraints;
+ mSimStats.mNbActiveDynamicBodies += stats.numActiveDynamicBodies;
+ mSimStats.mNbActiveKinematicBodies += stats.numActiveKinematicBodies;
+ mSimStats.mNbAxisSolverConstraints += stats.numAxisSolverConstraints;
+}
+#endif
+
+// =========================== Solve methods!
+
+void DynamicsContext::setDescFromIndices(PxSolverConstraintDesc& desc, const PxsIndexedInteraction& constraint, const PxU32 solverBodyOffset)
+{
+ PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eBODY == 0);
+ PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eKINEMATIC == 1);
+ const PxU32 offsetMap[] = {solverBodyOffset, 0};
+ //const PxU32 offsetMap[] = {mKinematicCount, 0};
+
+ if(constraint.indexType0 == PxsIndexedInteraction::eARTICULATION)
+ {
+ Articulation* a = getArticulation(constraint.articulation0);
+ desc.articulationA = a->getFsDataPtr();
+ desc.articulationALength = Ps::to16(a->getSolverDataSize());
+ PX_ASSERT(0==(desc.articulationALength & 0x0f));
+ desc.linkIndexA = Ps::to16(a->getLinkIndex(constraint.articulation0));
+ }
+ else
+ {
+ desc.linkIndexA = PxSolverConstraintDesc::NO_LINK;
+ //desc.articulationALength = 0; //this is unioned with bodyADataIndex
+ /*desc.bodyA = constraint.indexType0 == PxsIndexedInteraction::eWORLD ? &mWorldSolverBody
+ : &mSolverBodyPool[(PxU32)constraint.solverBody0 + offsetMap[constraint.indexType0]];
+ desc.bodyADataIndex = PxU16(constraint.indexType0 == PxsIndexedInteraction::eWORLD ? 0
+ : (PxU16)constraint.solverBody0 + 1 + offsetMap[constraint.indexType0]);*/
+
+ desc.bodyA = constraint.indexType0 == PxsIndexedInteraction::eWORLD ? &mWorldSolverBody
+ : &mSolverBodyPool[PxU32(constraint.solverBody0) + offsetMap[constraint.indexType0]];
+ desc.bodyADataIndex = PxU16(constraint.indexType0 == PxsIndexedInteraction::eWORLD ? 0
+ : PxU16(constraint.solverBody0) + 1 + offsetMap[constraint.indexType0]);
+ }
+
+ if(constraint.indexType1 == PxsIndexedInteraction::eARTICULATION)
+ {
+ Articulation* a = getArticulation(constraint.articulation1);
+ desc.articulationB = a->getFsDataPtr();
+ desc.articulationBLength = Ps::to16(a->getSolverDataSize());
+ PX_ASSERT(0==(desc.articulationBLength & 0x0f));
+ desc.linkIndexB = Ps::to16(a->getLinkIndex(constraint.articulation1));
+ }
+ else
+ {
+ desc.linkIndexB = PxSolverConstraintDesc::NO_LINK;
+ //desc.articulationBLength = 0; //this is unioned with bodyBDataIndex
+ desc.bodyB = constraint.indexType1 == PxsIndexedInteraction::eWORLD ? &mWorldSolverBody
+ : &mSolverBodyPool[PxU32(constraint.solverBody1) + offsetMap[constraint.indexType1]];
+ desc.bodyBDataIndex = PxU16(constraint.indexType1 == PxsIndexedInteraction::eWORLD ? 0
+ : PxU16(constraint.solverBody1) + 1 + offsetMap[constraint.indexType1]);
+ }
+}
+
+void DynamicsContext::setDescFromIndices(PxSolverConstraintDesc& desc, IG::EdgeIndex edgeIndex, const IG::SimpleIslandManager& islandManager,
+ PxU32* bodyRemap, const PxU32 solverBodyOffset)
+{
+ PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eBODY == 0);
+ PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eKINEMATIC == 1);
+
+ const IG::IslandSim& islandSim = islandManager.getAccurateIslandSim();
+
+ IG::NodeIndex node1 = islandSim.getNodeIndex1(edgeIndex);
+ if (node1.isStaticBody())
+ {
+ desc.bodyA = &mWorldSolverBody;
+ desc.bodyADataIndex = 0;
+ desc.linkIndexA = PxSolverConstraintDesc::NO_LINK;
+ }
+ else
+ {
+ const IG::Node& node = islandSim.getNode(node1);
+ if (node.getNodeType() == IG::Node::eARTICULATION_TYPE)
+ {
+ Dy::Articulation* a = islandSim.getLLArticulation(node1);
+ desc.articulationA = a->getFsDataPtr();
+ desc.articulationALength = Ps::to16(a->getSolverDataSize());
+ PX_ASSERT(0 == (desc.articulationALength & 0x0f));
+ desc.linkIndexA = Ps::to16(node1.articulationLinkId());
+ }
+ else
+ {
+ PxU32 activeIndex = islandSim.getActiveNodeIndex(node1);
+ PxU32 index = node.isKinematic() ? activeIndex : bodyRemap[activeIndex] + solverBodyOffset;
+ desc.bodyA = &mSolverBodyPool[index];
+ desc.bodyADataIndex = Ps::to16(index + 1);
+ desc.linkIndexA = PxSolverConstraintDesc::NO_LINK;
+ }
+ }
+
+ IG::NodeIndex node2 = islandSim.getNodeIndex2(edgeIndex);
+ if (node2.isStaticBody())
+ {
+ desc.bodyB = &mWorldSolverBody;
+ desc.bodyBDataIndex = 0;
+ desc.linkIndexB = PxSolverConstraintDesc::NO_LINK;
+ }
+ else
+ {
+ const IG::Node& node = islandSim.getNode(node2);
+ if (node.getNodeType() == IG::Node::eARTICULATION_TYPE)
+ {
+ Dy::Articulation* a = islandSim.getLLArticulation(node2);
+ desc.articulationB = a->getFsDataPtr();
+ desc.articulationBLength = Ps::to16(a->getSolverDataSize());
+ PX_ASSERT(0 == (desc.articulationBLength & 0x0f));
+ desc.linkIndexB = Ps::to16(node2.articulationLinkId());
+ }
+ else
+ {
+ PxU32 activeIndex = islandSim.getActiveNodeIndex(node2);
+ PxU32 index = node.isKinematic() ? activeIndex : bodyRemap[activeIndex] + solverBodyOffset;
+ desc.bodyB = &mSolverBodyPool[index];
+ desc.bodyBDataIndex = Ps::to16(index + 1);
+ desc.linkIndexB = PxSolverConstraintDesc::NO_LINK;
+ }
+ }
+}
+
+
+class PxsPreIntegrateTask : public Cm::Task
+{
+ PxsPreIntegrateTask& operator=(const PxsPreIntegrateTask&);
+public:
+ PxsPreIntegrateTask( DynamicsContext& context,
+ PxsBodyCore*const* bodyArray,
+ PxsRigidBody*const* originalBodyArray,
+ PxU32 const* nodeIndexArray,
+ PxSolverBody* solverBodies,
+ PxSolverBodyData* solverBodyDataPool,
+ PxF32 dt,
+ PxU32 numBodies,
+ volatile PxU32* maxSolverPositionIterations,
+ volatile PxU32* maxSolverVelocityIterations,
+ const PxU32 startIndex,
+ const PxU32 numToIntegrate,
+ const PxVec3& gravity) :
+ mContext(context),
+ mBodyArray(bodyArray),
+ mOriginalBodyArray(originalBodyArray),
+ mNodeIndexArray(nodeIndexArray),
+ mSolverBodies(solverBodies),
+ mSolverBodyDataPool(solverBodyDataPool),
+ mDt(dt),
+ mNumBodies(numBodies),
+ mMaxSolverPositionIterations(maxSolverPositionIterations),
+ mMaxSolverVelocityIterations(maxSolverVelocityIterations),
+ mStartIndex(startIndex),
+ mNumToIntegrate(numToIntegrate),
+ mGravity(gravity)
+ {}
+
+ virtual void runInternal();
+
+ virtual const char* getName() const
+ {
+ return "PxsDynamics.preIntegrate";
+ }
+
+public:
+ DynamicsContext& mContext;
+ PxsBodyCore*const* mBodyArray;
+ PxsRigidBody*const* mOriginalBodyArray;
+ PxU32 const* mNodeIndexArray;
+ PxSolverBody* mSolverBodies;
+ PxSolverBodyData* mSolverBodyDataPool;
+ PxF32 mDt;
+ PxU32 mNumBodies;
+ volatile PxU32* mMaxSolverPositionIterations;
+ volatile PxU32* mMaxSolverVelocityIterations;
+ PxU32 mStartIndex;
+ PxU32 mNumToIntegrate;
+ PxVec3 mGravity;
+
+};
+
+
+
+class PxsParallelSolverTask : public Cm::Task
+{
+ PxsParallelSolverTask& operator=(PxsParallelSolverTask&);
+public:
+
+ PxsParallelSolverTask(SolverIslandParams& params, DynamicsContext& context, PxFrictionType::Enum frictionType, IG::IslandSim& islandSim)
+ : mParams(params), mContext(context), mFrictionType(frictionType), mIslandSim(islandSim)
+ {
+ }
+
+ virtual void runInternal()
+ {
+ solveParallel(mContext, mParams, mIslandSim);
+ }
+
+ virtual const char* getName() const
+ {
+ return "PxsDynamics.parallelSolver";
+ }
+
+ SolverIslandParams& mParams;
+ DynamicsContext& mContext;
+ PxFrictionType::Enum mFrictionType;
+ IG::IslandSim& mIslandSim;
+};
+
+
+#define PX_CONTACT_REDUCTION 1
+
+class PxsSolverConstraintPostProcessTask : public Cm::Task
+{
+ PxsSolverConstraintPostProcessTask& operator=(const PxsSolverConstraintPostProcessTask&);
+public:
+
+ PxsSolverConstraintPostProcessTask(DynamicsContext& context,
+ ThreadContext& threadContext,
+ const SolverIslandObjects& objects,
+ const PxU32 solverBodyOffset,
+ PxU32 startIndex,
+ PxU32 stride,
+ PxsMaterialManager* materialManager,
+ PxsContactManagerOutputIterator& iterator) :
+ mContext(context),
+ mThreadContext(threadContext),
+ mObjects(objects),
+ mSolverBodyOffset(solverBodyOffset),
+ mStartIndex(startIndex),
+ mStride(stride),
+ mMaterialManager(materialManager),
+ mOutputs(iterator)
+ {}
+
+ void mergeContacts(CompoundContactManager& header, ThreadContext& threadContext)
+ {
+ Gu::ContactBuffer& buffer = threadContext.mContactBuffer;
+ PxsMaterialInfo materialInfo[Gu::ContactBuffer::MAX_CONTACTS];
+ PxU32 size = 0;
+
+ for(PxU32 a = 0; a < header.mStride; ++a)
+ {
+ PxsContactManager* manager = mThreadContext.orderedContactList[a+header.mStartIndex]->contactManager;
+ PxcNpWorkUnit& unit = manager->getWorkUnit();
+ PxsContactManagerOutput& output = mOutputs.getContactManager(unit.mNpIndex);
+ PxContactStreamIterator iter(output.contactPatches, output.contactPoints, output.getInternalFaceIndice(), output.nbPatches, output.nbContacts);
+
+ PxU32 origSize = size;
+ PX_UNUSED(origSize);
+ if(!iter.forceNoResponse)
+ {
+ while(iter.hasNextPatch())
+ {
+ iter.nextPatch();
+ while(iter.hasNextContact())
+ {
+ PX_ASSERT(size < Gu::ContactBuffer::MAX_CONTACTS);
+ iter.nextContact();
+ PxsMaterialInfo& info = materialInfo[size];
+ Gu::ContactPoint& point = buffer.contacts[size++];
+ point.dynamicFriction = iter.getDynamicFriction();
+ point.staticFriction = iter.getStaticFriction();
+ point.restitution = iter.getRestitution();
+ point.internalFaceIndex1 = iter.getFaceIndex1();
+ point.materialFlags = PxU8(iter.getMaterialFlags());
+ point.maxImpulse = iter.getMaxImpulse();
+ point.targetVel = iter.getTargetVel();
+ point.normal = iter.getContactNormal();
+ point.point = iter.getContactPoint();
+ point.separation = iter.getSeparation();
+ info.mMaterialIndex0 = iter.getMaterialIndex0();
+ info.mMaterialIndex1 = iter.getMaterialIndex1();
+ }
+ }
+ PX_ASSERT(output.nbContacts == (size - origSize));
+ }
+ }
+
+ PxU32 origSize = size;
+#if PX_CONTACT_REDUCTION
+ ContactReduction<6> reduction(buffer.contacts, materialInfo, size);
+ reduction.reduceContacts();
+ //OK, now we write back the contacts...
+
+ PxU8 histo[Gu::ContactBuffer::MAX_CONTACTS];
+ PxMemZero(histo, sizeof(histo));
+
+ size = 0;
+ for(PxU32 a = 0; a < reduction.mNumPatches; ++a)
+ {
+ ReducedContactPatch& patch = reduction.mPatches[a];
+ for(PxU32 b = 0; b < patch.numContactPoints; ++b)
+ {
+ histo[patch.contactPoints[b]] = 1;
+ ++size;
+ }
+ }
+#endif
+
+ PxU16* PX_RESTRICT data = reinterpret_cast<PxU16*>(threadContext.mConstraintBlockStream.reserve(size * sizeof(PxU16), mThreadContext.mConstraintBlockManager));
+ header.forceBufferList = data;
+
+
+#if PX_CONTACT_REDUCTION
+ const PxU32 reservedSize = size;
+ PX_UNUSED(reservedSize);
+ size = 0;
+ for(PxU32 a = 0; a < origSize; ++a)
+ {
+ if(histo[a])
+ {
+ if(size != a)
+ {
+ buffer.contacts[size] = buffer.contacts[a];
+ materialInfo[size] = materialInfo[a];
+ }
+ data[size] = Ps::to16(a);
+ size++;
+ }
+ }
+ PX_ASSERT(reservedSize >= size);
+#else
+ for(PxU32 a = 0; a < size; ++a)
+ data[a] = a;
+#endif
+
+
+ PxU32 contactForceByteSize = size * sizeof(PxReal);
+
+
+ PxsContactManagerOutput& output = mOutputs.getContactManager(header.unit->mNpIndex);
+
+ PxU16 compressedContactSize;
+
+ physx::writeCompressedContact(buffer.contacts, size, NULL, output.nbContacts, output.contactPatches, output.contactPoints, compressedContactSize,
+ reinterpret_cast<PxReal*&>(output.contactForces), contactForceByteSize, mMaterialManager, false,
+ false, materialInfo, output.nbPatches, 0, &mThreadContext.mConstraintBlockManager, &threadContext.mConstraintBlockStream, false);
+ }
+
+ virtual void runInternal()
+ {
+ PxU32 endIndex = mStartIndex + mStride;
+
+ ThreadContext* threadContext = mContext.getThreadContext();
+ //TODO - we need to do this somewhere else
+ //threadContext->mContactBlockStream.reset();
+ threadContext->mConstraintBlockStream.reset();
+
+ for(PxU32 a = mStartIndex; a < endIndex; ++a)
+ {
+ mergeContacts(mThreadContext.compoundConstraints[a], *threadContext);
+ }
+ mContext.putThreadContext(threadContext);
+ }
+
+ virtual const char* getName() const { return "PxsDynamics.solverConstraintPostProcess"; }
+
+
+ DynamicsContext& mContext;
+ ThreadContext& mThreadContext;
+ const SolverIslandObjects mObjects;
+ PxU32 mSolverBodyOffset;
+ PxU32 mStartIndex;
+ PxU32 mStride;
+ PxsMaterialManager* mMaterialManager;
+ PxsContactManagerOutputIterator& mOutputs;
+};
+
+class PxsForceThresholdTask : public Cm::Task
+{
+ DynamicsContext& mDynamicsContext;
+
+ PxsForceThresholdTask& operator=(const PxsForceThresholdTask&);
+public:
+
+ PxsForceThresholdTask(DynamicsContext& context): mDynamicsContext(context)
+ {
+ }
+
+ void createForceChangeThresholdStream()
+ {
+ ThresholdStream& thresholdStream = mDynamicsContext.getThresholdStream();
+ //bool haveThresholding = thresholdStream.size()!=0;
+
+ ThresholdTable& thresholdTable = mDynamicsContext.getThresholdTable();
+ thresholdTable.build(thresholdStream);
+
+ //generate current force exceeded threshold stream
+ ThresholdStream& curExceededForceThresholdStream = *mDynamicsContext.mExceededForceThresholdStream[mDynamicsContext.mCurrentIndex];
+ ThresholdStream& preExceededForceThresholdStream = *mDynamicsContext.mExceededForceThresholdStream[1 - mDynamicsContext.mCurrentIndex];
+ curExceededForceThresholdStream.forceSize_Unsafe(0);
+
+ //fill in the currrent exceeded force threshold stream
+ for(PxU32 i=0; i<thresholdTable.mPairsSize; ++i)
+ {
+ ThresholdTable::Pair& pair = thresholdTable.mPairs[i];
+ ThresholdStreamElement& elem = thresholdStream[pair.thresholdStreamIndex];
+ if(pair.accumulatedForce > elem.threshold * mDynamicsContext.mDt)
+ {
+ elem.accumulatedForce = pair.accumulatedForce;
+ curExceededForceThresholdStream.pushBack(elem);
+ }
+ }
+
+ ThresholdStream& forceChangeThresholdStream = mDynamicsContext.getForceChangedThresholdStream();
+ forceChangeThresholdStream.forceSize_Unsafe(0);
+ Ps::Array<PxU32>& forceChangeMask = mDynamicsContext.mExceededForceThresholdStreamMask;
+
+ const PxU32 nbPreExceededForce = preExceededForceThresholdStream.size();
+ const PxU32 nbCurExceededForce = curExceededForceThresholdStream.size();
+
+ //generate force change thresholdStream
+ if(nbPreExceededForce)
+ {
+ thresholdTable.build(preExceededForceThresholdStream);
+
+ //set force change mask
+ const PxU32 nbTotalExceededForce = nbPreExceededForce + nbCurExceededForce;
+ forceChangeMask.reserve(nbTotalExceededForce);
+ forceChangeMask.forceSize_Unsafe(nbTotalExceededForce);
+
+ //initialize the forceChangeMask
+ for (PxU32 i = 0; i < nbTotalExceededForce; ++i)
+ forceChangeMask[i] = 1;
+
+ for(PxU32 i=0; i< nbCurExceededForce; ++i)
+ {
+ ThresholdStreamElement& curElem = curExceededForceThresholdStream[i];
+
+ PxU32 pos;
+ if(thresholdTable.check(preExceededForceThresholdStream, curElem, pos))
+ {
+ forceChangeMask[pos] = 0;
+ forceChangeMask[i + nbPreExceededForce] = 0;
+ }
+ }
+
+ //create force change threshold stream
+ for(PxU32 i=0; i<nbTotalExceededForce; ++i)
+ {
+ const PxU32 hasForceChange = forceChangeMask[i];
+ if(hasForceChange)
+ {
+ bool lostPair = (i < nbPreExceededForce);
+ ThresholdStreamElement& elem = lostPair ? preExceededForceThresholdStream[i] : curExceededForceThresholdStream[i - nbPreExceededForce];
+ ThresholdStreamElement elt;
+ elt = elem;
+ elt.accumulatedForce = lostPair ? 0.f : elem.accumulatedForce;
+ forceChangeThresholdStream.pushBack(elt);
+ }
+ else
+ {
+ //persistent pair
+ if (i < nbPreExceededForce)
+ {
+ ThresholdStreamElement& elem = preExceededForceThresholdStream[i];
+ ThresholdStreamElement elt;
+ elt = elem;
+ elt.accumulatedForce = elem.accumulatedForce;
+ forceChangeThresholdStream.pushBack(elt);
+ }
+ }
+ }
+ }
+ else
+ {
+ forceChangeThresholdStream.reserve(nbCurExceededForce);
+ forceChangeThresholdStream.forceSize_Unsafe(nbCurExceededForce);
+ PxMemCopy(forceChangeThresholdStream.begin(), curExceededForceThresholdStream.begin(), sizeof(ThresholdStreamElement) * nbCurExceededForce);
+ }
+ }
+
+ virtual void runInternal()
+ {
+ mDynamicsContext.getThresholdStream().forceSize_Unsafe(PxU32(mDynamicsContext.mThresholdStreamOut));
+ createForceChangeThresholdStream();
+ }
+
+ virtual const char* getName() const { return "PxsDynamics.createForceChangeThresholdStream"; }
+};
+
+
+struct ConstraintLess
+{
+ bool operator()(const PxSolverConstraintDesc& left, const PxSolverConstraintDesc& right) const
+ {
+ return reinterpret_cast<Constraint*>(left.constraint)->index > reinterpret_cast<Constraint*>(right.constraint)->index;
+ }
+};
+
+struct ArticulationSortPredicate
+{
+ bool operator()(const PxsIndexedContactManager*& left, const PxsIndexedContactManager*& right) const
+ {
+ return left->contactManager->getWorkUnit().index < right->contactManager->getWorkUnit().index;
+ }
+};
+
+class SolverArticulationUpdateTask : public Cm::Task
+{
+
+
+ ThreadContext& mIslandThreadContext;
+
+ Articulation** mArticulations;
+ ArticulationSolverDesc* mArticulationDescArray;
+ PxU32 mNbToProcess;
+
+ Dy::DynamicsContext& mContext;
+ PxU32 mStartIdx;
+
+public:
+
+ static const PxU32 NbArticulationsPerTask = 8;
+
+ SolverArticulationUpdateTask(ThreadContext& islandThreadContext, Articulation** articulations, ArticulationSolverDesc* articulationDescArray, PxU32 nbToProcess, Dy::DynamicsContext& context,
+ PxU32 startIdx):
+ mIslandThreadContext(islandThreadContext), mArticulations(articulations), mArticulationDescArray(articulationDescArray), mNbToProcess(nbToProcess), mContext(context), mStartIdx(startIdx)
+ {
+ }
+
+ virtual const char* getName() const { return "SolverArticulationUpdateTask"; }
+
+ virtual void runInternal()
+ {
+ ThreadContext& threadContext = *mContext.getThreadContext();
+
+ threadContext.mConstraintBlockStream.reset(); //Clear in case there's some left-over memory in this context, for which the block has already been freed
+ PxU32 maxVelIters = 0;
+ PxU32 maxPosIters = 0;
+ PxU32 maxArticulationLength = 0;
+ PxU32 maxSolverArticLength = 0;
+
+ PxU32 startIdx = mStartIdx;
+ for(PxU32 i=0;i<mNbToProcess; i++)
+ {
+ Articulation& a = *(mArticulations[i]);
+ a.getSolverDesc(mArticulationDescArray[i]);
+
+ PxU32 acCount, descCount;
+
+ descCount = ArticulationPImpl::computeUnconstrainedVelocities(mArticulationDescArray[i], mContext.mDt, threadContext.mConstraintBlockStream,
+ mIslandThreadContext.mContactDescPtr + startIdx, acCount, mContext.getScratchAllocator(),
+ mIslandThreadContext.mConstraintBlockManager, mContext.getGravity(), mContext.getContextId());
+
+ mArticulationDescArray[i].numInternalConstraints = Ps::to8(descCount);
+
+ maxArticulationLength = PxMax(maxArticulationLength, PxU32(mArticulationDescArray[i].totalDataSize));
+ maxSolverArticLength = PxMax(maxSolverArticLength, PxU32(mArticulationDescArray[i].solverDataSize));
+
+ const PxU16 iterWord = a.getIterationCounts();
+ maxVelIters = PxMax<PxU32>(PxU32(iterWord >> 8), maxVelIters);
+ maxPosIters = PxMax<PxU32>(PxU32(iterWord & 0xff), maxPosIters);
+ startIdx += DY_ARTICULATION_MAX_SIZE;
+ }
+ Ps::atomicMax(reinterpret_cast<PxI32*>(&mIslandThreadContext.mMaxSolverPositionIterations), PxI32(maxPosIters));
+ Ps::atomicMax(reinterpret_cast<PxI32*>(&mIslandThreadContext.mMaxSolverVelocityIterations), PxI32(maxVelIters));
+ Ps::atomicMax(reinterpret_cast<PxI32*>(&mIslandThreadContext.mMaxArticulationLength), PxI32(maxArticulationLength));
+ Ps::atomicMax(reinterpret_cast<PxI32*>(&mIslandThreadContext.mMaxArticulationSolverLength), PxI32(maxSolverArticLength));
+
+ mContext.putThreadContext(&threadContext);
+ }
+
+private:
+ PX_NOCOPY(SolverArticulationUpdateTask)
+};
+
+
+struct EnhancedSortPredicate
+{
+ bool operator()(const PxsIndexedContactManager& left, const PxsIndexedContactManager& right) const
+ {
+ PxcNpWorkUnit& unit0 = left.contactManager->getWorkUnit();
+ PxcNpWorkUnit& unit1 = right.contactManager->getWorkUnit();
+ return (unit0.mTransformCache0 < unit1.mTransformCache0) ||
+ ((unit0.mTransformCache0 == unit1.mTransformCache0) && (unit0.mTransformCache1 < unit1.mTransformCache1));
+ }
+};
+
+
+class PxsSolverStartTask : public Cm::Task
+{
+ PxsSolverStartTask& operator=(const PxsSolverStartTask&);
+public:
+
+ PxsSolverStartTask(DynamicsContext& context,
+ IslandContext& islandContext,
+ const SolverIslandObjects& objects,
+ const PxU32 solverBodyOffset,
+ const PxU32 kinematicCount,
+ IG::SimpleIslandManager& islandManager,
+ PxU32* bodyRemapTable,
+ PxsMaterialManager* materialManager,
+ PxsContactManagerOutputIterator& iterator,
+ bool enhancedDeterminism
+ ) :
+ mContext (context),
+ mIslandContext (islandContext),
+ mObjects (objects),
+ mSolverBodyOffset (solverBodyOffset),
+ mKinematicCount (kinematicCount),
+ mIslandManager (islandManager),
+ mBodyRemapTable (bodyRemapTable),
+ mMaterialManager (materialManager),
+ mOutputs (iterator),
+ mEnhancedDeterminism (enhancedDeterminism)
+ {}
+
+ void startTasks()
+ {
+ PX_PROFILE_ZONE("Dynamics.solveGroup", mContext.getContextId());
+ {
+ ThreadContext& mThreadContext = *mContext.getThreadContext();
+
+ mIslandContext.mThreadContext = &mThreadContext;
+
+ mThreadContext.mMaxSolverPositionIterations = 0;
+ mThreadContext.mMaxSolverVelocityIterations = 0;
+ mThreadContext.mAxisConstraintCount = 0;
+ mThreadContext.mContactDescPtr = mThreadContext.contactConstraintDescArray;
+ mThreadContext.mFrictionDescPtr = mThreadContext.frictionConstraintDescArray.begin();
+ mThreadContext.mNumDifferentBodyConstraints = 0;
+ mThreadContext.mNumSelfConstraintBlocks = 0;
+ mThreadContext.mNumSelfConstraints = 0;
+ mThreadContext.mNumDifferentBodyFrictionConstraints = 0;
+ mThreadContext.mNumSelfConstraintFrictionBlocks = 0;
+ mThreadContext.mNumSelfFrictionConstraints = 0;
+ mThreadContext.numContactConstraintBatches = 0;
+ mThreadContext.contactDescArraySize = 0;
+
+
+ mThreadContext.contactConstraintDescArray = mObjects.constraintDescs;
+ mThreadContext.orderedContactConstraints = mObjects.orderedConstraintDescs;
+ mThreadContext.mContactDescPtr = mObjects.constraintDescs;
+ mThreadContext.tempConstraintDescArray = mObjects.tempConstraintDescs;
+ mThreadContext.contactConstraintBatchHeaders = mObjects.constraintBatchHeaders;
+ mThreadContext.motionVelocityArray = mObjects.motionVelocities;
+ mThreadContext.mBodyCoreArray = mObjects.bodyCoreArray;
+ mThreadContext.mRigidBodyArray = mObjects.bodies;
+ mThreadContext.mArticulationArray = mObjects.articulations;
+ mThreadContext.bodyRemapTable = mObjects.bodyRemapTable;
+ mThreadContext.mNodeIndexArray = mObjects.nodeIndexArray;
+
+ const PxU32 frictionConstraintCount = mContext.getFrictionType() == PxFrictionType::ePATCH ? 0 : PxU32(mIslandContext.mCounts.contactManagers);
+ mThreadContext.resizeArrays(frictionConstraintCount, mIslandContext.mCounts.articulations);
+
+ PxsBodyCore** PX_RESTRICT bodyArrayPtr = mThreadContext.mBodyCoreArray;
+ PxsRigidBody** PX_RESTRICT rigidBodyPtr = mThreadContext.mRigidBodyArray;
+ Articulation** PX_RESTRICT articulationPtr = mThreadContext.mArticulationArray;
+ PxU32* PX_RESTRICT bodyRemapTable = mThreadContext.bodyRemapTable;
+ PxU32* PX_RESTRICT nodeIndexArray = mThreadContext.mNodeIndexArray;
+
+ PxU32 nbIslands = mObjects.numIslands;
+ const IG::IslandId* const islandIds = mObjects.islandIds;
+
+ const IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
+
+ PxU32 bodyIndex = 0, articIndex = 0;
+ for(PxU32 i = 0; i < nbIslands; ++i)
+ {
+ const IG::Island& island = islandSim.getIsland(islandIds[i]);
+
+ IG::NodeIndex currentIndex = island.mRootNode;
+
+ while(currentIndex.isValid())
+ {
+ const IG::Node& node = islandSim.getNode(currentIndex);
+
+ if(node.getNodeType() == IG::Node::eARTICULATION_TYPE)
+ {
+ articulationPtr[articIndex++] = node.getArticulation();
+ }
+ else
+ {
+ PxsRigidBody* rigid = node.getRigidBody();
+ PX_ASSERT(bodyIndex < (mIslandContext.mCounts.bodies + mContext.mKinematicCount + 1));
+ rigidBodyPtr[bodyIndex] = rigid;
+ bodyArrayPtr[bodyIndex] = &rigid->getCore();
+ nodeIndexArray[bodyIndex] = currentIndex.index();
+ bodyRemapTable[islandSim.getActiveNodeIndex(currentIndex)] = bodyIndex++;
+ }
+
+ currentIndex = node.mNextNode;
+ }
+ }
+
+
+ PxsIndexedContactManager* indexedManagers = mObjects.contactManagers;
+
+ PxU32 currentContactIndex = 0;
+ for(PxU32 i = 0; i < nbIslands; ++i)
+ {
+ const IG::Island& island = islandSim.getIsland(islandIds[i]);
+
+ IG::EdgeIndex contactEdgeIndex = island.mFirstEdge[IG::Edge::eCONTACT_MANAGER];
+
+ while(contactEdgeIndex != IG_INVALID_EDGE)
+ {
+ const IG::Edge& edge = islandSim.getEdge(contactEdgeIndex);
+
+ PxsContactManager* contactManager = mIslandManager.getContactManager(contactEdgeIndex);
+
+ if(contactManager)
+ {
+ const IG::NodeIndex nodeIndex1 = islandSim.getNodeIndex1(contactEdgeIndex);
+ const IG::NodeIndex nodeIndex2 = islandSim.getNodeIndex2(contactEdgeIndex);
+
+ PxsIndexedContactManager& indexedManager = indexedManagers[currentContactIndex++];
+ indexedManager.contactManager = contactManager;
+
+ PX_ASSERT(!nodeIndex1.isStaticBody());
+ {
+ const IG::Node& node1 = islandSim.getNode(nodeIndex1);
+
+ //Is it an articulation or not???
+ if(node1.getNodeType() == IG::Node::eARTICULATION_TYPE)
+ {
+ indexedManager.indexType0 = PxsIndexedInteraction::eARTICULATION;
+ indexedManager.solverBody0 = size_t(node1.getArticulation()) | nodeIndex1.articulationLinkId();
+ }
+ else
+ {
+ if(node1.isKinematic())
+ {
+ indexedManager.indexType0 = PxsIndexedInteraction::eKINEMATIC;
+ indexedManager.solverBody0 = islandSim.getActiveNodeIndex(nodeIndex1);
+ }
+ else
+ {
+ indexedManager.indexType0 = PxsIndexedInteraction::eBODY;
+ indexedManager.solverBody0 = bodyRemapTable[islandSim.getActiveNodeIndex(nodeIndex1)];
+ }
+ PX_ASSERT(indexedManager.solverBody0 < (mIslandContext.mCounts.bodies + mContext.mKinematicCount + 1));
+ }
+
+ }
+
+ if(nodeIndex2.isStaticBody())
+ {
+ indexedManager.indexType1 = PxsIndexedInteraction::eWORLD;
+ }
+ else
+ {
+ const IG::Node& node2 = islandSim.getNode(nodeIndex2);
+
+ //Is it an articulation or not???
+ if(node2.getNodeType() == IG::Node::eARTICULATION_TYPE)
+ {
+ indexedManager.indexType1 = PxsIndexedInteraction::eARTICULATION;
+ indexedManager.solverBody1 = size_t(node2.getArticulation()) | nodeIndex2.articulationLinkId();
+ }
+ else
+ {
+ if(node2.isKinematic())
+ {
+ indexedManager.indexType1 = PxsIndexedInteraction::eKINEMATIC;
+ indexedManager.solverBody1 = islandSim.getActiveNodeIndex(nodeIndex2);
+ }
+ else
+ {
+ indexedManager.indexType1 = PxsIndexedInteraction::eBODY;
+ indexedManager.solverBody1 = bodyRemapTable[islandSim.getActiveNodeIndex(nodeIndex2)];
+ }
+ PX_ASSERT(indexedManager.solverBody1 < (mIslandContext.mCounts.bodies + mContext.mKinematicCount + 1));
+ }
+ }
+
+ }
+ contactEdgeIndex = edge.mNextIslandEdge;
+ }
+ }
+
+ if (mEnhancedDeterminism)
+ {
+ Ps::sort(indexedManagers, currentContactIndex, EnhancedSortPredicate());
+ }
+
+ mIslandContext.mCounts.contactManagers = currentContactIndex;
+ }
+ }
+
+ void integrate()
+ {
+ ThreadContext& mThreadContext = *mIslandContext.mThreadContext;
+ PxSolverBody* solverBodies = mContext.mSolverBodyPool.begin() + mSolverBodyOffset;
+ PxSolverBodyData* solverBodyData = mContext.mSolverBodyDataPool.begin() + mSolverBodyOffset;
+
+ {
+ PX_PROFILE_ZONE("Dynamics.updateVelocities", mContext.getContextId());
+
+ mContext.preIntegrationParallel(
+ mContext.mDt,
+ mThreadContext.mBodyCoreArray,
+ mObjects.bodies,
+ mThreadContext.mNodeIndexArray,
+ mIslandContext.mCounts.bodies,
+ solverBodies,
+ solverBodyData,
+ mThreadContext.motionVelocityArray,
+ mThreadContext.mMaxSolverPositionIterations,
+ mThreadContext.mMaxSolverVelocityIterations,
+ *mCont
+ );
+ }
+ }
+
+ void articulationTask()
+ {
+ ThreadContext& mThreadContext = *mIslandContext.mThreadContext;
+ ArticulationSolverDesc* articulationDescArray = mThreadContext.getArticulations().begin();
+
+ for(PxU32 i=0;i<mIslandContext.mCounts.articulations; i+= SolverArticulationUpdateTask::NbArticulationsPerTask)
+ {
+
+ SolverArticulationUpdateTask* task = PX_PLACEMENT_NEW(mContext.getTaskPool().allocate(sizeof(SolverArticulationUpdateTask)), SolverArticulationUpdateTask)(mThreadContext,
+ &mObjects.articulations[i], &articulationDescArray[i], PxMin(SolverArticulationUpdateTask::NbArticulationsPerTask, mIslandContext.mCounts.articulations - i), mContext,
+ i*DY_ARTICULATION_MAX_SIZE);
+
+ task->setContinuation(mCont);
+ task->removeReference();
+
+ }
+ }
+
+ void setupDescTask()
+ {
+ ThreadContext& mThreadContext = *mIslandContext.mThreadContext;
+ PxSolverConstraintDesc* contactDescPtr = mThreadContext.mContactDescPtr;
+
+ //PxU32 constraintCount = mCounts.constraints + mCounts.contactManagers;
+
+ PxU32 nbIslands = mObjects.numIslands;
+ const IG::IslandId* const islandIds = mObjects.islandIds;
+
+ const IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim();
+
+ for(PxU32 i = 0; i < nbIslands; ++i)
+ {
+ const IG::Island& island = islandSim.getIsland(islandIds[i]);
+
+ IG::EdgeIndex edgeId = island.mFirstEdge[IG::Edge::eCONSTRAINT];
+
+ while(edgeId != IG_INVALID_EDGE)
+ {
+ PxSolverConstraintDesc& desc = *contactDescPtr;
+
+ const IG::Edge& edge = islandSim.getEdge(edgeId);
+ Dy::Constraint* constraint = mIslandManager.getConstraint(edgeId);
+ mContext.setDescFromIndices(desc, edgeId, mIslandManager, mBodyRemapTable, mSolverBodyOffset);
+ desc.constraint = reinterpret_cast<PxU8*>(constraint);
+ desc.constraintLengthOver16 = DY_SC_TYPE_RB_1D;
+ contactDescPtr++;
+ edgeId = edge.mNextIslandEdge;
+ }
+
+ }
+
+#if 1
+ Ps::sort(mThreadContext.mContactDescPtr, PxU32(contactDescPtr - mThreadContext.mContactDescPtr), ConstraintLess());
+#endif
+
+
+ mThreadContext.orderedContactList.forceSize_Unsafe(0);
+ mThreadContext.orderedContactList.reserve(mIslandContext.mCounts.contactManagers);
+ mThreadContext.orderedContactList.forceSize_Unsafe(mIslandContext.mCounts.contactManagers);
+ mThreadContext.tempContactList.forceSize_Unsafe(0);
+ mThreadContext.tempContactList.reserve(mIslandContext.mCounts.contactManagers);
+ mThreadContext.tempContactList.forceSize_Unsafe(mIslandContext.mCounts.contactManagers);
+
+ const PxsIndexedContactManager** constraints = mThreadContext.orderedContactList.begin();
+
+
+ //OK, we sort the orderedContactList
+
+ mThreadContext.compoundConstraints.forceSize_Unsafe(0);
+ if(mIslandContext.mCounts.contactManagers)
+ {
+ {
+ mThreadContext.sortIndexArray.forceSize_Unsafe(0);
+
+ PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eBODY == 0);
+ PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eKINEMATIC == 1);
+
+ const PxI32 offsetMap[] = {PxI32(mContext.mKinematicCount), 0};
+
+ const PxU32 totalBodies = mContext.mKinematicCount + mIslandContext.mCounts.bodies+1;
+
+ mThreadContext.sortIndexArray.reserve(totalBodies);
+ mThreadContext.sortIndexArray.forceSize_Unsafe(totalBodies);
+ PxMemZero(mThreadContext.sortIndexArray.begin(), totalBodies * 4);
+
+ //Iterate over the array based on solverBodyDatapool, creating a list of sorted constraints (in order of body pair)
+ //We only do this with contacts. It's important that this is done this way because we don't want to break our rules that all joints
+ //appear before all contacts in the constraint list otherwise we will lose all guarantees about sorting joints.
+
+ for(PxU32 a = 0; a < mIslandContext.mCounts.contactManagers; ++a)
+ {
+ PX_ASSERT(mObjects.contactManagers[a].indexType0 != PxsIndexedInteraction::eWORLD);
+ //Index first body...
+ PxU8 indexType = mObjects.contactManagers[a].indexType0;
+ if(indexType != PxsIndexedInteraction::eARTICULATION && mObjects.contactManagers[a].indexType1 != PxsIndexedInteraction::eARTICULATION)
+ {
+ PX_ASSERT((indexType == PxsIndexedInteraction::eBODY) || (indexType == PxsIndexedInteraction::eKINEMATIC));
+
+ PxI32 index = PxI32(mObjects.contactManagers[a].solverBody0 + offsetMap[indexType]);
+ PX_ASSERT(index >= 0);
+ mThreadContext.sortIndexArray[PxU32(index)]++;
+ }
+ }
+
+ PxU32 accumulatedCount = 0;
+
+ for(PxU32 a = mThreadContext.sortIndexArray.size(); a > 0; --a)
+ {
+ PxU32 ind = a - 1;
+ PxU32 val = mThreadContext.sortIndexArray[ind];
+ mThreadContext.sortIndexArray[ind] = accumulatedCount;
+ accumulatedCount += val;
+ }
+
+ //OK, now copy across data to orderedConstraintDescs, pushing articulations to the end...
+ for(PxU32 a = 0; a < mIslandContext.mCounts.contactManagers; ++a)
+ {
+ //Index first body...
+ PxU8 indexType = mObjects.contactManagers[a].indexType0;
+ if(indexType != PxsIndexedInteraction::eARTICULATION && mObjects.contactManagers[a].indexType1 != PxsIndexedInteraction::eARTICULATION)
+ {
+ PX_ASSERT((indexType == PxsIndexedInteraction::eBODY) || (indexType == PxsIndexedInteraction::eKINEMATIC));
+
+ PxI32 index = PxI32(mObjects.contactManagers[a].solverBody0 + offsetMap[indexType]);
+ PX_ASSERT(index >= 0);
+ mThreadContext.tempContactList[mThreadContext.sortIndexArray[PxU32(index)]++] = &mObjects.contactManagers[a];
+ }
+ else
+ {
+ mThreadContext.tempContactList[accumulatedCount++] = &mObjects.contactManagers[a];
+ }
+ }
+
+ //Now do the same again with bodyB, being careful not to overwrite the joints
+ PxMemZero(mThreadContext.sortIndexArray.begin(), totalBodies * 4);
+
+
+ for(PxU32 a = 0; a < mIslandContext.mCounts.contactManagers; ++a)
+ {
+ //Index first body...
+ PxU8 indexType = mThreadContext.tempContactList[a]->indexType1;
+ if(indexType != PxsIndexedInteraction::eARTICULATION && mObjects.contactManagers[a].indexType0 != PxsIndexedInteraction::eARTICULATION)
+ {
+ PX_ASSERT((indexType == PxsIndexedInteraction::eBODY) || (indexType == PxsIndexedInteraction::eKINEMATIC) || (indexType == PxsIndexedInteraction::eWORLD));
+
+ PxI32 index = (indexType == PxsIndexedInteraction::eWORLD) ? 0 : PxI32(mThreadContext.tempContactList[a]->solverBody1 + offsetMap[indexType]);
+ PX_ASSERT(index >= 0);
+ mThreadContext.sortIndexArray[PxU32(index)]++;
+ }
+ }
+
+ accumulatedCount = 0;
+ for(PxU32 a = mThreadContext.sortIndexArray.size(); a > 0; --a)
+ {
+ PxU32 ind = a - 1;
+ PxU32 val = mThreadContext.sortIndexArray[ind];
+ mThreadContext.sortIndexArray[ind] = accumulatedCount;
+ accumulatedCount += val;
+ }
+
+ PxU32 articulationStartIndex = accumulatedCount;
+
+ //OK, now copy across data to orderedConstraintDescs, pushing articulations to the end...
+ for(PxU32 a = 0; a < mIslandContext.mCounts.contactManagers; ++a)
+ {
+ //Index first body...
+ PxU8 indexType = mThreadContext.tempContactList[a]->indexType1;
+ if(indexType != PxsIndexedInteraction::eARTICULATION && mObjects.contactManagers[a].indexType0 != PxsIndexedInteraction::eARTICULATION)
+ {
+ PX_ASSERT((indexType == PxsIndexedInteraction::eBODY) || (indexType == PxsIndexedInteraction::eKINEMATIC) || (indexType == PxsIndexedInteraction::eWORLD));
+
+ PxI32 index = (indexType == PxsIndexedInteraction::eWORLD) ? 0 : PxI32(mThreadContext.tempContactList[a]->solverBody1 + offsetMap[indexType]);
+ PX_ASSERT(index >= 0);
+ constraints[mThreadContext.sortIndexArray[PxU32(index)]++] = mThreadContext.tempContactList[a];
+ }
+ else
+ {
+ constraints[accumulatedCount++] = mThreadContext.tempContactList[a];
+ }
+ }
+
+#if 1
+ Ps::sort(constraints + articulationStartIndex, accumulatedCount - articulationStartIndex, ArticulationSortPredicate());
+#endif
+ }
+
+ mThreadContext.mStartContactDescPtr = contactDescPtr;
+
+ mThreadContext.compoundConstraints.reserve(1024);
+ mThreadContext.compoundConstraints.forceSize_Unsafe(0);
+ //mThreadContext.compoundConstraints.forceSize_Unsafe(mCounts.contactManagers);
+
+ PxSolverConstraintDesc* startDesc = contactDescPtr;
+ mContext.setDescFromIndices(*startDesc, *constraints[0], mSolverBodyOffset);
+ startDesc->constraint = reinterpret_cast<PxU8*>(constraints[0]->contactManager);
+ startDesc->constraintLengthOver16 = DY_SC_TYPE_RB_CONTACT;
+
+ PxsContactManagerOutput* startManagerOutput = &mOutputs.getContactManager(constraints[0]->contactManager->getWorkUnit().mNpIndex);
+ PxU32 contactCount = startManagerOutput->nbContacts;
+ PxU32 startIndex = 0;
+ PxU32 numHeaders = 0;
+ for(PxU32 a = 1; a < mIslandContext.mCounts.contactManagers; ++a)
+ {
+ PxSolverConstraintDesc& desc = *(contactDescPtr+1);
+ mContext.setDescFromIndices(desc, *constraints[a], mSolverBodyOffset);
+
+ PxsContactManager* manager = constraints[a]->contactManager;
+ PxsContactManagerOutput& output = mOutputs.getContactManager(manager->getWorkUnit().mNpIndex);
+
+ desc.constraint = reinterpret_cast<PxU8*>(constraints[a]->contactManager);
+ desc.constraintLengthOver16 = DY_SC_TYPE_RB_CONTACT;
+
+ if (contactCount == 0)
+ {
+ //This is the first object in the pair
+ *startDesc = *(contactDescPtr + 1);
+ startIndex = a;
+ startManagerOutput = &output;
+ }
+
+ if(startDesc->bodyA != desc.bodyA || startDesc->bodyB != desc.bodyB
+ || startDesc->linkIndexA != PxSolverConstraintDesc::NO_LINK || startDesc->linkIndexB != PxSolverConstraintDesc::NO_LINK
+ || contactCount + output.nbContacts > Gu::ContactBuffer::MAX_CONTACTS
+ || manager->isChangeable()
+ ) //If this is the first thing and no contacts...then we skip
+ {
+ PxU32 stride = a - startIndex;
+ if(contactCount > 0)
+ {
+ if(stride > 1)
+ {
+ ++numHeaders;
+ CompoundContactManager& header = mThreadContext.compoundConstraints.insert();
+ header.mStartIndex = startIndex;
+ header.mStride = Ps::to16(stride);
+ header.mReducedContactCount = Ps::to16(contactCount);
+ PxsContactManager* manager1 = constraints[startIndex]->contactManager;
+ PxcNpWorkUnit& unit = manager1->getWorkUnit();
+
+ PX_ASSERT(startManagerOutput == &mOutputs.getContactManager(unit.mNpIndex));
+
+ header.unit = &unit;
+ header.cmOutput = startManagerOutput;
+ header.originalContactPatches = startManagerOutput->contactPatches;
+ header.originalContactPoints = startManagerOutput->contactPoints;
+ header.originalContactCount = startManagerOutput->nbContacts;
+ header.originalPatchCount = startManagerOutput->nbPatches;
+ header.originalForceBuffer = reinterpret_cast<PxReal*>(startManagerOutput->contactForces);
+ header.originalStatusFlags = startManagerOutput->statusFlag;
+ }
+ startDesc = ++contactDescPtr;
+ }
+ else
+ {
+ //Copy back next contactDescPtr
+ *startDesc = *(contactDescPtr+1);
+ }
+ contactCount = 0;
+ startIndex = a;
+ startManagerOutput = &output;
+ }
+ contactCount += output.nbContacts;
+
+ }
+ PxU32 stride = mIslandContext.mCounts.contactManagers - startIndex;
+ if(contactCount > 0)
+ {
+ if(stride > 1)
+ {
+ ++numHeaders;
+ CompoundContactManager& header = mThreadContext.compoundConstraints.insert();
+ header.mStartIndex = startIndex;
+ header.mStride = Ps::to16(stride);
+ header.mReducedContactCount = Ps::to16(contactCount);
+ PxsContactManager* manager = constraints[startIndex]->contactManager;
+ PxcNpWorkUnit& unit = manager->getWorkUnit();
+ header.unit = &unit;
+ header.cmOutput = startManagerOutput;
+ header.originalContactPatches = startManagerOutput->contactPatches;
+ header.originalContactPoints = startManagerOutput->contactPoints;
+ header.originalContactCount = startManagerOutput->nbContacts;
+ header.originalPatchCount = startManagerOutput->nbPatches;
+ header.originalForceBuffer = reinterpret_cast<PxReal*>(startManagerOutput->contactForces);
+ header.originalStatusFlags = startManagerOutput->statusFlag;
+ }
+ contactDescPtr++;
+ }
+
+ if(numHeaders)
+ {
+ const PxU32 unrollSize = 8;
+ for(PxU32 a = 0; a < numHeaders; a+= unrollSize)
+ {
+ PxsSolverConstraintPostProcessTask* postProcessTask = PX_PLACEMENT_NEW( mContext.getTaskPool().allocate(sizeof(PxsSolverConstraintPostProcessTask)),
+ PxsSolverConstraintPostProcessTask)(mContext, mThreadContext, mObjects, mSolverBodyOffset, a, PxMin(unrollSize, numHeaders - a), mMaterialManager,
+ mOutputs);
+ postProcessTask->setContinuation(mCont);
+ postProcessTask->removeReference();
+ }
+ }
+ }
+ mThreadContext.contactDescArraySize = PxU32(contactDescPtr - mThreadContext.contactConstraintDescArray);
+ mThreadContext.mContactDescPtr = contactDescPtr;
+ }
+
+ virtual void runInternal()
+ {
+ startTasks();
+ integrate();
+ setupDescTask();
+ articulationTask();
+ }
+
+ virtual const char* getName() const
+ {
+ return "PxsDynamics.solverStart";
+ }
+
+private:
+ DynamicsContext& mContext;
+ IslandContext& mIslandContext;
+ const SolverIslandObjects mObjects;
+ const PxU32 mSolverBodyOffset;
+ const PxU32 mKinematicCount;
+ IG::SimpleIslandManager& mIslandManager;
+ PxU32* mBodyRemapTable;
+ PxsMaterialManager* mMaterialManager;
+ PxsContactManagerOutputIterator& mOutputs;
+ bool mEnhancedDeterminism;
+};
+
+class PxsSolverConstraintPartitionTask : public Cm::Task
+{
+ PxsSolverConstraintPartitionTask& operator=(const PxsSolverConstraintPartitionTask&);
+public:
+
+ PxsSolverConstraintPartitionTask(DynamicsContext& context,
+ IslandContext& islandContext,
+ const SolverIslandObjects& objects,
+ const PxU32 solverBodyOffset, bool enhancedDeterminism) :
+ mContext(context),
+ mIslandContext(islandContext),
+ mObjects(objects),
+ mSolverBodyOffset(solverBodyOffset),
+ mEnhancedDeterminism(enhancedDeterminism)
+ {}
+
+ virtual void runInternal()
+ {
+
+ ThreadContext& mThreadContext = *mIslandContext.mThreadContext;
+
+ //Compact articulation pairs...
+ ArticulationSolverDesc* artics = mThreadContext.getArticulations().begin();
+
+ if(mIslandContext.mCounts.articulations)
+ {
+ PxU32 nbArticConstraints = artics[0].numInternalConstraints;
+
+ PxSolverConstraintDesc* currDesc = mThreadContext.mContactDescPtr;
+ for(PxU32 a = 1; a < mIslandContext.mCounts.articulations; ++a)
+ {
+ //Compact pairs...
+ const PxU32 nbInternalConstraints = artics[a].numInternalConstraints;
+ const PxU32 startIdx = a * DY_ARTICULATION_MAX_SIZE;
+ const PxU32 endIdx = startIdx + nbInternalConstraints;
+
+ for(PxU32 b = startIdx; b < endIdx; ++b)
+ {
+ currDesc[nbArticConstraints++] = currDesc[b];
+ }
+ }
+
+ mThreadContext.contactDescArraySize += nbArticConstraints;
+ }
+
+ PxSolverConstraintDesc* descBegin = mThreadContext.contactConstraintDescArray;
+ PxU32 descCount = mThreadContext.contactDescArraySize;
+
+ PxSolverBody* solverBodies = mContext.mSolverBodyPool.begin() + mSolverBodyOffset;
+
+ mThreadContext.mNumDifferentBodyConstraints = descCount;
+
+ {
+ mThreadContext.mNumDifferentBodyConstraints = 0;
+ mThreadContext.mNumSelfConstraints = 0;
+ mThreadContext.mNumSelfConstraintBlocks = 0;
+ mThreadContext.mNumDifferentBodyFrictionConstraints = 0;
+ mThreadContext.mNumSelfConstraintFrictionBlocks = 0;
+ mThreadContext.mNumSelfFrictionConstraints = 0;
+
+ if(descCount > 0)
+ {
+ ConstraintPartitionArgs args;
+ args.mBodies = solverBodies;
+ args.mArticulationPtrs = artics;
+ args.mContactConstraintDescriptors = descBegin;
+ args.mNumArticulationPtrs = mThreadContext.getArticulations().size();
+ args.mNumBodies = mIslandContext.mCounts.bodies;
+ args.mNumContactConstraintDescriptors = descCount;
+ args.mOrderedContactConstraintDescriptors = mThreadContext.orderedContactConstraints;
+ args.mTempContactConstraintDescriptors = mThreadContext.tempConstraintDescArray;
+ args.mNumDifferentBodyConstraints = args.mNumSelfConstraints = args.mNumSelfConstraintBlocks = 0;
+ args.mConstraintsPerPartition = &mThreadContext.mConstraintsPerPartition;
+ args.mBitField = &mThreadContext.mPartitionNormalizationBitmap;
+ args.enhancedDeterminism = mEnhancedDeterminism;
+
+ mThreadContext.mMaxPartitions = partitionContactConstraints(args);
+ mThreadContext.mNumDifferentBodyConstraints = args.mNumDifferentBodyConstraints;
+ mThreadContext.mNumSelfConstraints = args.mNumSelfConstraints;
+ mThreadContext.mNumSelfConstraintBlocks = args.mNumSelfConstraintBlocks;
+ }
+ else
+ {
+ PxMemZero(mThreadContext.mConstraintsPerPartition.begin(), sizeof(PxU32)*mThreadContext.mConstraintsPerPartition.capacity());
+ }
+
+ PX_ASSERT((mThreadContext.mNumDifferentBodyConstraints + mThreadContext.mNumSelfConstraints) == descCount);
+ }
+
+ }
+
+ virtual const char* getName() const { return "PxsDynamics.solverConstraintPartition"; }
+
+ DynamicsContext& mContext;
+ IslandContext& mIslandContext;
+ const SolverIslandObjects mObjects;
+ PxU32 mSolverBodyOffset;
+ bool mEnhancedDeterminism;
+};
+
+
+class PxsSolverSetupSolveTask : public Cm::Task
+{
+ PxsSolverSetupSolveTask& operator=(const PxsSolverSetupSolveTask&);
+public:
+
+ PxsSolverSetupSolveTask(
+ DynamicsContext& context,
+ IslandContext& islandContext,
+ const SolverIslandObjects& objects,
+ const PxU32 solverBodyOffset,
+ IG::IslandSim& islandSim) :
+ mContext(context),
+ mIslandContext(islandContext),
+ mObjects(objects),
+ mSolverBodyOffset(solverBodyOffset),
+ mIslandSim(islandSim)
+ {}
+
+
+ virtual void runInternal()
+ {
+ ThreadContext& mThreadContext = *mIslandContext.mThreadContext;
+
+ PxSolverConstraintDesc* contactDescBegin = mThreadContext.orderedContactConstraints;
+ PxSolverConstraintDesc* contactDescPtr = mThreadContext.orderedContactConstraints;
+
+ PxSolverBody* solverBodies = mContext.mSolverBodyPool.begin() + mSolverBodyOffset;
+ PxSolverBodyData* solverBodyDatas = mContext.mSolverBodyDataPool.begin();
+
+ PxU32 frictionDescCount = mThreadContext.mNumDifferentBodyFrictionConstraints;
+
+ PxU32 j = 0, i = 0;
+
+ //On PS3, self-constraints will be bumped to the end of the constraint list
+ //and processed separately. On PC/360, they will be mixed in the array and
+ //classed as "different body" constraints regardless of the fact that they're self-constraints.
+ //PxU32 numBatches = mThreadContext.numDifferentBodyBatchHeaders;
+ // TODO: maybe replace with non-null joints from end of the array
+
+ PxU32 numBatches = 0;
+
+ PxU32 currIndex = 0;
+ for(PxU32 a = 0; a < mThreadContext.mConstraintsPerPartition.size(); ++a)
+ {
+ PxU32 endIndex = currIndex + mThreadContext.mConstraintsPerPartition[a];
+
+ PxU32 numBatchesInPartition = 0;
+ for(PxU32 b = currIndex; b < endIndex; ++b)
+ {
+ PxConstraintBatchHeader& _header = mThreadContext.contactConstraintBatchHeaders[b];
+ PxU16 stride = _header.mStride, newStride = _header.mStride;
+ PxU32 startIndex = j;
+ for(PxU16 c = 0; c < stride; ++c)
+ {
+ if(getConstraintLength(contactDescBegin[i]) == 0)
+ {
+ newStride--;
+ i++;
+ }
+ else
+ {
+ if(i!=j)
+ contactDescBegin[j] = contactDescBegin[i];
+ i++;
+ j++;
+ contactDescPtr++;
+ }
+ }
+
+ if(newStride != 0)
+ {
+ mThreadContext.contactConstraintBatchHeaders[numBatches].mStartIndex = startIndex;
+ mThreadContext.contactConstraintBatchHeaders[numBatches].mStride = newStride;
+ PxU8 type = *contactDescBegin[startIndex].constraint;
+ if(type == DY_SC_TYPE_STATIC_CONTACT)
+ {
+ //Check if any block of constraints is classified as type static (single) contact constraint.
+ //If they are, iterate over all constraints grouped with it and switch to "dynamic" contact constraint
+ //type if there's a dynamic contact constraint in the group.
+ for(PxU32 c = 1; c < newStride; ++c)
+ {
+ if(*contactDescBegin[startIndex+c].constraint == DY_SC_TYPE_RB_CONTACT)
+ {
+ type = DY_SC_TYPE_RB_CONTACT;
+ }
+ }
+ }
+
+ mThreadContext.contactConstraintBatchHeaders[numBatches].mConstraintType = type;
+ numBatches++;
+ numBatchesInPartition++;
+ }
+ }
+ PxU32 numHeaders = numBatchesInPartition;
+ currIndex += mThreadContext.mConstraintsPerPartition[a];
+ mThreadContext.mConstraintsPerPartition[a] = numHeaders;
+ }
+
+ PxU32 contactDescCount = PxU32(contactDescPtr - contactDescBegin);
+
+ mThreadContext.mNumDifferentBodyConstraints = contactDescCount;
+
+ PxU32 numSelfConstraintBlocks = mThreadContext.mNumSelfConstraintBlocks;
+
+ //Remap self constraint array. Self-constraint blocks exists on PS3 as an optimization for SPU solver.
+ for(PxU32 a = 0; a < numSelfConstraintBlocks; ++a)
+ {
+ PX_ASSERT(mThreadContext.mSelfConstraintBlocks[a].startId == i);
+ PxU32 origNumSelfConstraints = mThreadContext.mSelfConstraintBlocks[a].numSelfConstraints;
+ PxU32 startId = j;
+
+ for(PxU32 b = 0; b < origNumSelfConstraints; ++b)
+ {
+ PxSolverConstraintDesc& desc = contactDescBegin[i];
+
+ if(getConstraintLength(desc))
+ {
+ PxConstraintBatchHeader& header = mThreadContext.contactConstraintBatchHeaders[numBatches++];
+ header.mStride = 1;
+ header.mStartIndex = j;
+ header.mConstraintType = *desc.constraint;
+ if(i != j)
+ contactDescBegin[j] = contactDescBegin[i];
+ j++;
+ }
+ i++;
+ }
+ mThreadContext.mSelfConstraintBlocks[a].startId = startId;
+ mThreadContext.mSelfConstraintBlocks[a].numSelfConstraints = j - startId;
+ }
+
+ mThreadContext.numContactConstraintBatches = numBatches;
+ mThreadContext.mNumSelfConstraints = j - contactDescCount; //self constraint count
+ contactDescCount = j;
+ mThreadContext.mOrderedContactDescCount = j;
+
+ //Now do the friction constraints if we're not using the sticky model
+ if(mContext.getFrictionType() != PxFrictionType::ePATCH)
+ {
+ PxSolverConstraintDesc* frictionDescBegin = mThreadContext.frictionConstraintDescArray.begin();
+ PxSolverConstraintDesc* frictionDescPtr = frictionDescBegin;
+
+ Ps::Array<PxConstraintBatchHeader>& frictionHeaderArray = mThreadContext.frictionConstraintBatchHeaders;
+ frictionHeaderArray.forceSize_Unsafe(0);
+ frictionHeaderArray.reserve(mThreadContext.numContactConstraintBatches);
+ PxConstraintBatchHeader* headers = frictionHeaderArray.begin();
+
+ Ps::Array<PxU32>& constraintsPerPartition = mThreadContext.mConstraintsPerPartition;
+ Ps::Array<PxU32>& frictionConstraintsPerPartition = mThreadContext.mFrictionConstraintsPerPartition;
+ frictionConstraintsPerPartition.forceSize_Unsafe(0);
+ frictionConstraintsPerPartition.reserve(constraintsPerPartition.capacity());
+
+
+ PxU32 fricI = 0;
+ PxU32 startIndex = 0;
+ PxU32 fricHeaders = 0;
+ for(PxU32 k = 0; k < constraintsPerPartition.size(); ++k)
+ {
+ PxU32 numBatchesInK = constraintsPerPartition[k];
+ PxU32 endIndex = startIndex + numBatchesInK;
+
+ PxU32 startFricH = fricHeaders;
+
+ for(PxU32 a = startIndex; a < endIndex; ++a)
+ {
+ PxConstraintBatchHeader& _header = mThreadContext.contactConstraintBatchHeaders[a];
+ PxU16 stride = _header.mStride;
+ if(_header.mConstraintType == DY_SC_TYPE_RB_CONTACT || _header.mConstraintType == DY_SC_TYPE_EXT_CONTACT ||
+ _header.mConstraintType == DY_SC_TYPE_STATIC_CONTACT)
+ {
+ PxU8 type = 0;
+ //Extract friction from this constraint
+ for(PxU16 b = 0; b < stride; ++b)
+ {
+ //create the headers...
+ PxSolverConstraintDesc& desc = contactDescBegin[_header.mStartIndex + b];
+ PX_ASSERT(desc.constraint);
+ SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint);
+ PxU32 frictionOffset = header->frictionOffset;
+ PxU8* PX_RESTRICT constraint = reinterpret_cast<PxU8*>(header) + frictionOffset;
+ const PxU32 origLength = getConstraintLength(desc);
+ const PxU32 length = (origLength - frictionOffset);
+
+ setConstraintLength(*frictionDescPtr, length);
+ frictionDescPtr->constraint = constraint;
+ frictionDescPtr->bodyA = desc.bodyA;
+ frictionDescPtr->bodyB = desc.bodyB;
+ frictionDescPtr->bodyADataIndex = desc.bodyADataIndex;
+ frictionDescPtr->bodyBDataIndex = desc.bodyBDataIndex;
+ frictionDescPtr->linkIndexA = desc.linkIndexA;
+ frictionDescPtr->linkIndexB = desc.linkIndexB;
+ frictionDescPtr->writeBack = NULL;
+ frictionDescPtr->writeBackLengthOver4 = 0;
+ type = *constraint;
+ frictionDescPtr++;
+ }
+ headers->mStartIndex = fricI;
+ headers->mStride = stride;
+ headers->mConstraintType = type;
+ headers++;
+ fricHeaders++;
+ fricI += stride;
+ }
+ else if(_header.mConstraintType == DY_SC_TYPE_BLOCK_RB_CONTACT || _header.mConstraintType == DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT)
+ {
+ //KS - TODO - Extract block of 4 contacts from this constraint. This isn't implemented yet for coulomb friction model
+ PX_ASSERT(contactDescBegin[_header.mStartIndex].constraint);
+ SolverContactCoulombHeader4* head = reinterpret_cast<SolverContactCoulombHeader4*>(contactDescBegin[_header.mStartIndex].constraint);
+ PxU32 frictionOffset = head->frictionOffset;
+ PxU8* PX_RESTRICT constraint = reinterpret_cast<PxU8*>(head) + frictionOffset;
+ const PxU32 origLength = getConstraintLength(contactDescBegin[_header.mStartIndex]);
+ const PxU32 length = (origLength - frictionOffset);
+ PxU8 type = *constraint;
+ PX_ASSERT(type == DY_SC_TYPE_BLOCK_FRICTION || type == DY_SC_TYPE_BLOCK_STATIC_FRICTION);
+ for(PxU32 b = 0; b < 4; ++b)
+ {
+ PxSolverConstraintDesc& desc = contactDescBegin[_header.mStartIndex+b];
+ setConstraintLength(*frictionDescPtr, length);
+ frictionDescPtr->constraint = constraint;
+ frictionDescPtr->bodyA = desc.bodyA;
+ frictionDescPtr->bodyB = desc.bodyB;
+ frictionDescPtr->bodyADataIndex = desc.bodyADataIndex;
+ frictionDescPtr->bodyBDataIndex = desc.bodyBDataIndex;
+ frictionDescPtr->linkIndexA = desc.linkIndexA;
+ frictionDescPtr->linkIndexB = desc.linkIndexB;
+ frictionDescPtr->writeBack = NULL;
+ frictionDescPtr->writeBackLengthOver4 = 0;
+ frictionDescPtr++;
+ }
+ headers->mStartIndex = fricI;
+ headers->mStride = stride;
+ headers->mConstraintType = type;
+ headers++;
+ fricHeaders++;
+ fricI += stride;
+ }
+ }
+ startIndex += numBatchesInK;
+ if(startFricH < fricHeaders)
+ {
+ frictionConstraintsPerPartition.pushBack(fricHeaders - startFricH);
+ }
+ }
+
+
+ frictionDescCount = PxU32(frictionDescPtr - frictionDescBegin);
+
+ mThreadContext.mNumDifferentBodyFrictionConstraints = frictionDescCount;
+
+ frictionHeaderArray.forceSize_Unsafe(PxU32(headers - frictionHeaderArray.begin()));
+
+ mThreadContext.mNumSelfFrictionConstraints = fricI - frictionDescCount; //self constraint count
+ mThreadContext.mNumDifferentBodyFrictionConstraints = frictionDescCount;
+ frictionDescCount = fricI;
+ mThreadContext.mOrderedFrictionDescCount = frictionDescCount;
+
+
+ }
+
+ {
+ {
+ PX_PROFILE_ZONE("Dynamics.solver", mContext.getContextId());
+
+ PxSolverConstraintDesc* contactDescs = mThreadContext.orderedContactConstraints;
+ PxSolverConstraintDesc* frictionDescs = mThreadContext.frictionConstraintDescArray.begin();
+
+ PxI32* thresholdPairsOut = &mContext.mThresholdStreamOut;
+
+ SolverIslandParams& params = *reinterpret_cast<SolverIslandParams*>(mContext.getTaskPool().allocate(sizeof(SolverIslandParams)));
+ params.positionIterations = mThreadContext.mMaxSolverPositionIterations;
+ params.velocityIterations = mThreadContext.mMaxSolverVelocityIterations;
+ params.bodyListStart = solverBodies;
+ params.bodyDataList = solverBodyDatas;
+ params.solverBodyOffset = mSolverBodyOffset;
+ params.bodyListSize = mIslandContext.mCounts.bodies;
+ params.articulationListStart = mThreadContext.getArticulations().begin();
+ params.articulationListSize = mThreadContext.getArticulations().size();
+ params.constraintList = contactDescs;
+ params.constraintIndex = 0;
+ params.constraintIndex2 = 0;
+ params.bodyListIndex = 0;
+ params.bodyListIndex2 = 0;
+ params.bodyIntegrationListIndex = 0;
+ params.thresholdStream = mContext.getThresholdStream().begin();
+ params.thresholdStreamLength = mContext.getThresholdStream().size();
+ params.outThresholdPairs = thresholdPairsOut;
+ params.motionVelocityArray = mThreadContext.motionVelocityArray;
+ params.bodyArray = mThreadContext.mBodyCoreArray;
+ params.numObjectsIntegrated = 0;
+ params.constraintBatchHeaders = mThreadContext.contactConstraintBatchHeaders;
+ params.numConstraintHeaders = mThreadContext.numContactConstraintBatches;
+ params.headersPerPartition = mThreadContext.mConstraintsPerPartition.begin();
+ params.nbPartitions = mThreadContext.mConstraintsPerPartition.size();
+ params.rigidBodies = const_cast<PxsRigidBody**>(mObjects.bodies);
+ params.frictionHeadersPerPartition = mThreadContext.mFrictionConstraintsPerPartition.begin();
+ params.nbFrictionPartitions = mThreadContext.mFrictionConstraintsPerPartition.size();
+ params.frictionConstraintBatches = mThreadContext.frictionConstraintBatchHeaders.begin();
+ params.numFrictionConstraintHeaders = mThreadContext.frictionConstraintBatchHeaders.size();
+ params.frictionConstraintIndex = 0;
+ params.frictionConstraintList = frictionDescs;
+
+ const PxU32 unrollSize = 8;
+ const PxU32 denom = PxMax(1u, (mThreadContext.mMaxPartitions*unrollSize));
+ const PxU32 MaxTasks = getTaskManager()->getCpuDispatcher()->getWorkerCount();
+ const PxU32 idealThreads = mThreadContext.numContactConstraintBatches/denom;
+ const PxU32 numTasks = PxMax(1u, PxMin(idealThreads, MaxTasks));
+
+ if(numTasks > 1)
+ {
+ const PxU32 idealBatchSize = PxMax(unrollSize, idealThreads*unrollSize/(numTasks*2));
+
+ params.batchSize = idealBatchSize; //assigning ideal batch size for the solver to grab work at. Only needed by the multi-threaded island solver.
+
+ for(PxU32 a = 1; a < numTasks; ++a)
+ {
+ void* tsk = mContext.getTaskPool().allocate(sizeof(PxsParallelSolverTask));
+ PxsParallelSolverTask* pTask = PX_PLACEMENT_NEW(tsk, PxsParallelSolverTask)(
+ params, mContext, mContext.getFrictionType(), mIslandSim);
+
+ //Force to complete before merge task!
+ pTask->setContinuation(mCont);
+
+ pTask->removeReference();
+ }
+
+ //Avoid kicking off one parallel task when we can do the work inline in this function
+ {
+ PX_PROFILE_ZONE("Dynamics.parallelSolve", mContext.getContextId());
+
+ solveParallel(mContext, params, mIslandSim);
+ }
+ const PxI32 numBodiesPlusArtics = PxI32( mIslandContext.mCounts.bodies + mIslandContext.mCounts.articulations );
+
+ PxI32* numObjectsIntegrated = &params.numObjectsIntegrated;
+
+ WAIT_FOR_PROGRESS_NO_TIMER(numObjectsIntegrated, numBodiesPlusArtics);
+
+ }
+ else
+ {
+
+ //Only one task - a small island so do a sequential solve (avoid the atomic overheads)
+ solveVBlock(mContext.mSolverCore[mContext.getFrictionType()], params);
+
+ const PxU32 bodyCountMin1 = mIslandContext.mCounts.bodies - 1u;
+ PxSolverBodyData* solverBodyData2 = solverBodyDatas + mSolverBodyOffset + 1;
+ for(PxU32 k=0; k < mIslandContext.mCounts.bodies; k++)
+ {
+ const PxU32 prefetchAddress = PxMin(k+4, bodyCountMin1);
+ Ps::prefetchLine(mThreadContext.mBodyCoreArray[prefetchAddress]);
+ Ps::prefetchLine(&mThreadContext.motionVelocityArray[k], 128);
+ Ps::prefetchLine(&mThreadContext.mBodyCoreArray[prefetchAddress], 128);
+ Ps::prefetchLine(&mObjects.bodies[prefetchAddress]);
+
+ PxSolverBodyData& solverBodyData = solverBodyData2[k];
+
+ integrateCore(mThreadContext.motionVelocityArray[k].linear, mThreadContext.motionVelocityArray[k].angular,
+ solverBodies[k], solverBodyData, mContext.mDt);
+
+ PxsRigidBody& rBody = *mObjects.bodies[k];
+ PxsBodyCore& core = rBody.getCore();
+ rBody.mLastTransform = core.body2World;
+ core.body2World = solverBodyData.body2World;
+ core.linearVelocity = solverBodyData.linearVelocity;
+ core.angularVelocity = solverBodyData.angularVelocity;
+
+
+ bool hasStaticTouch = mIslandSim.getIslandStaticTouchCount(IG::NodeIndex(solverBodyData.nodeIndex)) != 0;
+ sleepCheck(const_cast<PxsRigidBody*>(mObjects.bodies[k]), mContext.mDt, mContext.mInvDt, mContext.mEnableStabilization, mContext.mUseAdaptiveForce, mThreadContext.motionVelocityArray[k],
+ hasStaticTouch);
+ }
+
+ for(PxU32 cnt=0;cnt<mIslandContext.mCounts.articulations;cnt++)
+ {
+ ArticulationSolverDesc &d = mThreadContext.getArticulations()[cnt];
+ PX_PROFILE_ZONE("Articulations.integrate", mContext.getContextId());
+
+ ArticulationPImpl::updateBodies(d, mContext.getDt());
+ }
+ }
+ }
+ }
+ }
+
+ virtual const char* getName() const { return "PxsDynamics.solverSetupSolve"; }
+
+ DynamicsContext& mContext;
+ IslandContext& mIslandContext;
+ const SolverIslandObjects mObjects;
+ PxU32 mSolverBodyOffset;
+ IG::IslandSim& mIslandSim;
+};
+
+class PxsSolverEndTask : public Cm::Task
+{
+ PxsSolverEndTask& operator=(const PxsSolverEndTask&);
+public:
+
+ PxsSolverEndTask(DynamicsContext& context,
+ IslandContext& islandContext,
+ const SolverIslandObjects& objects,
+ const PxU32 solverBodyOffset,
+ PxsContactManagerOutputIterator& cmOutputs) :
+ mContext (context),
+ mIslandContext (islandContext),
+ mObjects (objects),
+ mSolverBodyOffset (solverBodyOffset),
+ mOutputs (cmOutputs)
+ {}
+
+ virtual void runInternal()
+ {
+ ThreadContext& mThreadContext = *mIslandContext.mThreadContext;
+#if PX_ENABLE_SIM_STATS
+ mThreadContext.getSimStats().numAxisSolverConstraints += mThreadContext.mAxisConstraintCount;
+#endif
+ //Patch up the contact managers (TODO - fix up force writeback)
+ PxU32 numCompoundConstraints = mThreadContext.compoundConstraints.size();
+ for(PxU32 i = 0; i < numCompoundConstraints; ++i)
+ {
+ CompoundContactManager& manager = mThreadContext.compoundConstraints[i];
+ PxsContactManagerOutput* cmOutput = manager.cmOutput;
+
+ PxReal* contactForces = reinterpret_cast<PxReal*>(cmOutput->contactForces);
+ PxU32 contactCount = cmOutput->nbContacts;
+
+ cmOutput->contactPatches = manager.originalContactPatches;
+ cmOutput->contactPoints = manager.originalContactPoints;
+ cmOutput->nbContacts = manager.originalContactCount;
+ cmOutput->nbPatches = manager.originalPatchCount;
+ cmOutput->statusFlag = manager.originalStatusFlags;
+ cmOutput->contactForces = manager.originalForceBuffer;
+
+ for(PxU32 a = 1; a < manager.mStride; ++a)
+ {
+ PxsContactManager* pManager = mThreadContext.orderedContactList[manager.mStartIndex + a]->contactManager;
+ pManager->getWorkUnit().frictionDataPtr = manager.unit->frictionDataPtr;
+ pManager->getWorkUnit().frictionPatchCount = manager.unit->frictionPatchCount;
+ //pManager->getWorkUnit().prevFrictionPatchCount = manager.unit->prevFrictionPatchCount;
+ }
+
+ //This is a stride-based contact force writer. The assumption is that we may have skipped certain unimportant contacts reported by the
+ //discrete narrow phase
+ if(contactForces)
+ {
+ PxU32 currentContactIndex = 0;
+ PxU32 currentManagerIndex = manager.mStartIndex;
+ PxU32 currentManagerContactIndex = 0;
+
+ for(PxU32 a = 0; a < contactCount; ++a)
+ {
+ PxU32 index = manager.forceBufferList[a];
+ PxsContactManager* pManager = mThreadContext.orderedContactList[currentManagerIndex]->contactManager;
+ PxsContactManagerOutput* output = &mOutputs.getContactManager(pManager->getWorkUnit().mNpIndex);
+ while(currentContactIndex < index || output->nbContacts == 0)
+ {
+ //Step forwards...first in this manager...
+
+ PxU32 numToStep = PxMin(index - currentContactIndex, PxU32(output->nbContacts) - currentManagerContactIndex);
+ currentContactIndex += numToStep;
+ currentManagerContactIndex += numToStep;
+ if(currentManagerContactIndex == output->nbContacts)
+ {
+ currentManagerIndex++;
+ currentManagerContactIndex = 0;
+ pManager = mThreadContext.orderedContactList[currentManagerIndex]->contactManager;
+ output = &mOutputs.getContactManager(pManager->getWorkUnit().mNpIndex);
+ }
+ }
+ if(output->nbContacts > 0 && output->contactForces)
+ output->contactForces[currentManagerContactIndex] = contactForces[a];
+ }
+ }
+ }
+
+ mThreadContext.compoundConstraints.forceSize_Unsafe(0);
+
+ mThreadContext.mConstraintBlockManager.reset();
+
+ mContext.putThreadContext(&mThreadContext);
+ }
+
+
+ virtual const char* getName() const
+ {
+ return "PxsDynamics.solverEnd";
+ }
+
+ DynamicsContext& mContext;
+ IslandContext& mIslandContext;
+ const SolverIslandObjects mObjects;
+ const PxU32 mSolverBodyOffset;
+ PxsContactManagerOutputIterator& mOutputs;
+};
+
+class PxsSolverCreateFinalizeConstraintsTask : public Cm::Task
+{
+ PxsSolverCreateFinalizeConstraintsTask& operator=(const PxsSolverCreateFinalizeConstraintsTask&);
+public:
+
+ PxsSolverCreateFinalizeConstraintsTask(
+ DynamicsContext& context,
+ IslandContext& islandContext,
+ PxU32 solverDataOffset,
+ PxsContactManagerOutputIterator& outputs,
+ bool enhancedDeterminism) :
+ mContext (context),
+ mIslandContext (islandContext),
+ mSolverDataOffset (solverDataOffset),
+ mOutputs (outputs),
+ mEnhancedDeterminism (enhancedDeterminism)
+ {
+ }
+
+ virtual void runInternal();
+
+ virtual const char* getName() const { return "PxsDynamics.solverCreateFinalizeConstraints"; }
+
+ DynamicsContext& mContext;
+ IslandContext& mIslandContext;
+ PxU32 mSolverDataOffset;
+ PxsContactManagerOutputIterator& mOutputs;
+ bool mEnhancedDeterminism;
+};
+
+
+// helper function to join two tasks together and ensure ref counts are correct
+void chainTasks(PxLightCpuTask* first, PxLightCpuTask* next)
+{
+ first->setContinuation(next);
+ next->removeReference();
+}
+
+PxBaseTask* createSolverTaskChain(DynamicsContext& dynamicContext,
+ const SolverIslandObjects& objects,
+ const PxsIslandIndices& counts,
+ const PxU32 solverBodyOffset,
+ IG::SimpleIslandManager& islandManager,
+ PxU32* bodyRemapTable, PxsMaterialManager* materialManager, PxBaseTask* continuation,
+ PxsContactManagerOutputIterator& iterator, bool useEnhancedDeterminism)
+{
+ Cm::FlushPool& taskPool = dynamicContext.getTaskPool();
+
+ taskPool.lock();
+
+
+ IslandContext* islandContext = reinterpret_cast<IslandContext*>(taskPool.allocate(sizeof(IslandContext)));
+ islandContext->mThreadContext = NULL;
+ islandContext->mCounts = counts;
+
+
+ // create lead task
+ PxsSolverStartTask* startTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverStartTask)), PxsSolverStartTask)(dynamicContext, *islandContext, objects, solverBodyOffset, dynamicContext.getKinematicCount(),
+ islandManager, bodyRemapTable, materialManager, iterator, useEnhancedDeterminism);
+ PxsSolverEndTask* endTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverEndTask)), PxsSolverEndTask)(dynamicContext, *islandContext, objects, solverBodyOffset, iterator);
+
+
+ PxsSolverCreateFinalizeConstraintsTask* createFinalizeConstraintsTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverCreateFinalizeConstraintsTask)), PxsSolverCreateFinalizeConstraintsTask)(dynamicContext, *islandContext, solverBodyOffset, iterator, useEnhancedDeterminism);
+ PxsSolverSetupSolveTask* setupSolveTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverSetupSolveTask)), PxsSolverSetupSolveTask)(dynamicContext, *islandContext, objects, solverBodyOffset, islandManager.getAccurateIslandSim());
+
+ PxsSolverConstraintPartitionTask* partitionConstraintsTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverConstraintPartitionTask)), PxsSolverConstraintPartitionTask)(dynamicContext, *islandContext, objects, solverBodyOffset, useEnhancedDeterminism);
+
+ endTask->setContinuation(continuation);
+
+ // set up task chain in reverse order
+ chainTasks(setupSolveTask, endTask);
+ chainTasks(createFinalizeConstraintsTask, setupSolveTask);
+ chainTasks(partitionConstraintsTask, createFinalizeConstraintsTask);
+ chainTasks(startTask, partitionConstraintsTask);
+
+ taskPool.unlock();
+
+ return startTask;
+}
+
+
+void DynamicsContext::update(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* /*continuation*/, PxBaseTask* lostTouchTask,
+ PxsContactManager** /*foundPatchManagers*/, PxU32 /*nbFoundPatchManagers*/,
+ PxsContactManager** /*lostPatchManagers*/, PxU32 /*nbLostPatchManagers*/,
+ PxU32 /*maxPatchesPerCM*/,
+ PxsContactManagerOutputIterator& iterator,
+ PxsContactManagerOutput*,
+ const PxReal dt, const PxVec3& gravity, const PxU32 /*bitMapWordCounts*/)
+{
+ PX_PROFILE_ZONE("Dynamics.solverQueueTasks", mContextID);
+
+ PX_UNUSED(simpleIslandManager);
+
+ mOutputIterator = iterator;
+
+ mDt = dt;
+ mInvDt = dt == 0.0f ? 0.0f : 1.0f/dt;
+ mGravity = gravity;
+
+ const IG::IslandSim& islandSim = simpleIslandManager.getAccurateIslandSim();
+
+ const PxU32 islandCount = islandSim.getNbActiveIslands();
+
+ const PxU32 activatedContactCount = islandSim.getNbActivatedEdges(IG::Edge::eCONTACT_MANAGER);
+ const IG::EdgeIndex* const activatingEdges = islandSim.getActivatedEdges(IG::Edge::eCONTACT_MANAGER);
+
+ for(PxU32 a = 0; a < activatedContactCount; ++a)
+ {
+ PxsContactManager* cm = simpleIslandManager.getContactManager(activatingEdges[a]);
+ if(cm)
+ {
+ cm->getWorkUnit().frictionPatchCount = 0; //KS - zero the friction patch count on any activating edges
+ }
+ }
+
+#if PX_ENABLE_SIM_STATS
+ if(islandCount > 0)
+ {
+ mSimStats.mNbActiveKinematicBodies = islandSim.getNbActiveKinematics();
+ mSimStats.mNbActiveDynamicBodies = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE);
+ mSimStats.mNbActiveConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT);
+ }
+ else
+ {
+ mSimStats.mNbActiveKinematicBodies = islandSim.getNbActiveKinematics();
+ mSimStats.mNbActiveDynamicBodies = 0;
+ mSimStats.mNbActiveConstraints = 0;
+ }
+#endif
+
+ mThresholdStreamOut = 0;
+
+ resetThreadContexts();
+
+ //If there is no work to do then we can do nothing at all.
+ if(0 == islandCount)
+ {
+ return;
+ }
+
+ //KS - test that world solver body's velocities are finite and 0, then set it to 0.
+ //Technically, the velocity should always be 0 but can be stomped if a NAN creeps into the simulation.
+ PX_ASSERT(mWorldSolverBody.linearVelocity == PxVec3(0.f));
+ PX_ASSERT(mWorldSolverBody.angularState == PxVec3(0.f));
+ PX_ASSERT(mWorldSolverBody.linearVelocity.isFinite());
+ PX_ASSERT(mWorldSolverBody.angularState.isFinite());
+
+ mWorldSolverBody.linearVelocity = mWorldSolverBody.angularState = PxVec3(0.f);
+
+ const PxU32 kinematicCount = islandSim.getNbActiveKinematics();
+ const IG::NodeIndex* const kinematicIndices = islandSim.getActiveKinematics();
+ mKinematicCount = kinematicCount;
+
+ const PxU32 bodyCount = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE);
+
+ PxU32 numArtics = islandSim.getNbActiveNodes(IG::Node::eARTICULATION_TYPE);
+
+ {
+ if(kinematicCount + bodyCount > mSolverBodyPool.capacity())
+ {
+ mSolverBodyPool.reserve((kinematicCount + bodyCount + 31) & ~31); // pad out to 32 * 128 = 4k to prevent alloc churn
+ mSolverBodyDataPool.reserve((kinematicCount + bodyCount + 31 + 1) & ~31); // pad out to 32 * 128 = 4k to prevent alloc churn
+ mSolverBodyRemapTable.reserve((kinematicCount + bodyCount + 31 + 1) & ~31);
+ }
+
+ {
+ PxSolverBody emptySolverBody;
+ PxMemZero(&emptySolverBody, sizeof(PxSolverBody));
+ mSolverBodyPool.resize(kinematicCount + bodyCount, emptySolverBody);
+ PxSolverBodyData emptySolverBodyData;
+ PxMemZero(&emptySolverBodyData, sizeof(PxSolverBodyData));
+ mSolverBodyDataPool.resize(kinematicCount + bodyCount + 1, emptySolverBodyData);
+ mSolverBodyRemapTable.resize(bodyCount);
+ }
+
+ // integrate and copy all the kinematics - overkill, since not all kinematics
+ // need solver bodies
+
+ mSolverBodyDataPool[0] = mWorldSolverBodyData;
+
+
+ {
+ PX_PROFILE_ZONE("Dynamics.updateKinematics", mContextID);
+ PxMemZero(mSolverBodyPool.begin(), kinematicCount*sizeof(PxSolverBody));
+ for(PxU32 i=0;i<kinematicCount;i++)
+ {
+ PxsRigidBody* rigidBody = islandSim.getRigidBody(kinematicIndices[i]);
+ const PxsBodyCore& core = rigidBody->getCore();
+ copyToSolverBodyData(core.linearVelocity, core.angularVelocity, core.inverseMass, core.inverseInertia, core.body2World, core.maxPenBias,
+ core.maxContactImpulse, kinematicIndices[i].index(), core.contactReportThreshold, mSolverBodyDataPool[i + 1], core.lockFlags);
+ rigidBody->saveLastCCDTransform();
+ // Only really necessary for PS3 at the moment (for the cross island parallel constraint solver
+ // but we might switch to the same on other platforms)
+ mSolverBodyPool[i].solverProgress=MAX_PERMITTED_SOLVER_PROGRESS;
+ mSolverBodyPool[i].maxSolverNormalProgress=MAX_PERMITTED_SOLVER_PROGRESS;
+ mSolverBodyPool[i].maxSolverFrictionProgress=MAX_PERMITTED_SOLVER_PROGRESS;
+ }
+ }
+ }
+
+ PxU32 solverBatchMax = mSolverBatchSize;
+ PxU32 articulationBatchMax = 2;
+ PxU32 minimumConstraintCount = 1;
+
+
+ //Resize arrays of solver constraints...
+ PxU32 numArticulationConstraints=numArtics*Dy::DY_ARTICULATION_MAX_SIZE; //Just allocate enough memory to fit worst-case maximum size articulations...
+
+ const PxU32 nbActiveContactManagers = islandSim.getNbActiveEdges(IG::Edge::eCONTACT_MANAGER);
+ const PxU32 nbActiveConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT);
+
+ PxU32 totalConstraintCount = nbActiveConstraints + nbActiveContactManagers + numArticulationConstraints;
+
+ mSolverConstraintDescPool.forceSize_Unsafe(0);
+ mSolverConstraintDescPool.reserve((totalConstraintCount + 63) & (~63));
+ mSolverConstraintDescPool.forceSize_Unsafe(totalConstraintCount);
+
+ mOrderedSolverConstraintDescPool.forceSize_Unsafe(0);
+ mOrderedSolverConstraintDescPool.reserve((totalConstraintCount + 63) & (~63));
+ mOrderedSolverConstraintDescPool.forceSize_Unsafe(totalConstraintCount);
+
+ mTempSolverConstraintDescPool.forceSize_Unsafe(0);
+ mTempSolverConstraintDescPool.reserve((totalConstraintCount + 63) & (~63));
+ mTempSolverConstraintDescPool.forceSize_Unsafe(totalConstraintCount);
+
+ mContactConstraintBatchHeaders.forceSize_Unsafe(0);
+ mContactConstraintBatchHeaders.reserve((totalConstraintCount + 63) & (~63));
+ mContactConstraintBatchHeaders.forceSize_Unsafe(totalConstraintCount);
+
+ mContactList.forceSize_Unsafe(0);
+ mContactList.reserve((nbActiveContactManagers +63u) & (~63u));
+ mContactList.forceSize_Unsafe(nbActiveContactManagers);
+
+ mMotionVelocityArray.forceSize_Unsafe(0);
+ mMotionVelocityArray.reserve((bodyCount + 63u) & (~63u));
+ mMotionVelocityArray.forceSize_Unsafe(bodyCount);
+
+ mBodyCoreArray.forceSize_Unsafe(0);
+ mBodyCoreArray.reserve((bodyCount + 63u) & (~63u));
+ mBodyCoreArray.forceSize_Unsafe(bodyCount);
+
+ mRigidBodyArray.forceSize_Unsafe(0);
+ mRigidBodyArray.reserve((bodyCount + 63u) & (~63u));
+ mRigidBodyArray.forceSize_Unsafe(bodyCount);
+
+ mArticulationArray.forceSize_Unsafe(0);
+ mArticulationArray.reserve((numArtics + 63u) & (~63u));
+ mArticulationArray.forceSize_Unsafe(numArtics);
+
+ mNodeIndexArray.forceSize_Unsafe(0);
+ mNodeIndexArray.reserve((bodyCount + 63u) & (~63u));
+ mNodeIndexArray.forceSize_Unsafe(bodyCount);
+
+
+ ThresholdStream& stream = getThresholdStream();
+ stream.forceSize_Unsafe(0);
+ stream.reserve(Ps::nextPowerOfTwo(nbActiveContactManagers != 0 ? nbActiveContactManagers-1 : nbActiveContactManagers));
+
+ PxU32 constraintIndex = 0;
+
+ //flip exceeded force threshold buffer
+ mCurrentIndex = 1 - mCurrentIndex;
+
+ //create force threshold tasks to produce force change events
+ PxsForceThresholdTask* forceThresholdTask = PX_PLACEMENT_NEW(getTaskPool().allocateNotThreadSafe(sizeof(PxsForceThresholdTask)), PxsForceThresholdTask)(*this);
+ forceThresholdTask->setContinuation(lostTouchTask);
+
+ const IG::IslandId*const islandIds = islandSim.getActiveIslands();
+
+ PxU32 currentIsland = 0;
+ PxU32 currentBodyIndex = 0;
+ PxU32 currentArticulation = 0;
+ PxU32 currentContact = 0;
+ //while(start<sentinel)
+ while(currentIsland < islandCount)
+ {
+ SolverIslandObjects objectStarts;
+ objectStarts.articulations = mArticulationArray.begin()+ currentArticulation;
+ objectStarts.bodies = mRigidBodyArray.begin() + currentBodyIndex;
+ objectStarts.contactManagers = mContactList.begin() + currentContact;
+ objectStarts.constraintDescs = mSolverConstraintDescPool.begin() + constraintIndex;
+ objectStarts.orderedConstraintDescs = mOrderedSolverConstraintDescPool.begin() + constraintIndex;
+ objectStarts.tempConstraintDescs = mTempSolverConstraintDescPool.begin() + constraintIndex;
+ objectStarts.constraintBatchHeaders = mContactConstraintBatchHeaders.begin() + constraintIndex;
+ objectStarts.motionVelocities = mMotionVelocityArray.begin() + currentBodyIndex;
+ objectStarts.bodyCoreArray = mBodyCoreArray.begin() + currentBodyIndex;
+ objectStarts.islandIds = islandIds + currentIsland;
+ objectStarts.bodyRemapTable = mSolverBodyRemapTable.begin();
+ objectStarts.nodeIndexArray = mNodeIndexArray.begin() + currentBodyIndex;
+
+ PxU32 startIsland = currentIsland;
+ PxU32 constraintCount = 0;
+
+ PxU32 nbArticulations = 0;
+ PxU32 nbBodies = 0;
+ PxU32 nbConstraints = 0;
+ PxU32 nbContactManagers =0;
+
+ //KS - logic is a bit funky here. We will keep rolling the island together provided currentIsland < islandCount AND either we haven't exceeded the max number of bodies or we have
+ //zero constraints AND we haven't exceeded articulation batch counts (it's still currently beneficial to keep articulations in separate islands but this is only temporary).
+ while((currentIsland < islandCount && (nbBodies < solverBatchMax || constraintCount < minimumConstraintCount)) && nbArticulations < articulationBatchMax)
+ {
+ const IG::Island& island = islandSim.getIsland(islandIds[currentIsland]);
+ nbBodies += island.mSize[IG::Node::eRIGID_BODY_TYPE];
+ nbArticulations += island.mSize[IG::Node::eARTICULATION_TYPE];
+ nbConstraints += island.mEdgeCount[IG::Edge::eCONSTRAINT];
+ nbContactManagers += island.mEdgeCount[IG::Edge::eCONTACT_MANAGER];
+ constraintCount = nbConstraints + nbContactManagers;
+ currentIsland++;
+ }
+
+
+ objectStarts.numIslands = currentIsland - startIsland;
+
+ constraintIndex += nbArticulations*Dy::DY_ARTICULATION_MAX_SIZE;
+
+ PxsIslandIndices counts;
+
+ counts.articulations = nbArticulations;
+ counts.bodies = nbBodies;
+
+ counts.constraints = nbConstraints;
+ counts.contactManagers = nbContactManagers;
+ if(counts.articulations + counts.bodies > 0)
+ {
+ PxBaseTask* task = createSolverTaskChain(*this, objectStarts, counts,
+ kinematicCount + currentBodyIndex, simpleIslandManager, mSolverBodyRemapTable.begin(), mMaterialManager, forceThresholdTask, mOutputIterator, mUseEnhancedDeterminism);
+ task->removeReference();
+ }
+
+ currentBodyIndex += nbBodies;
+ currentArticulation += nbArticulations;
+ currentContact += nbContactManagers;
+
+ constraintIndex += constraintCount;
+ }
+
+ //kick off forceThresholdTask
+ forceThresholdTask->removeReference();
+}
+
+void DynamicsContext::updateBodyCore(PxBaseTask* continuation)
+{
+ PX_UNUSED(continuation);
+}
+
+void DynamicsContext::mergeResults()
+{
+ PX_PROFILE_ZONE("Dynamics.solverMergeResults", mContextID);
+ //OK. Sum up sim stats here...
+
+#if PX_ENABLE_SIM_STATS
+ PxcThreadCoherentCacheIterator<ThreadContext, PxcNpMemBlockPool> threadContextIt(mThreadContextPool);
+ ThreadContext* threadContext = threadContextIt.getNext();
+
+ while(threadContext != NULL)
+ {
+ ThreadContext::ThreadSimStats& threadStats = threadContext->getSimStats();
+ addThreadStats(threadStats);
+ threadStats.clear();
+ threadContext = threadContextIt.getNext();
+ }
+#endif
+}
+
+
+static void preIntegrationParallel(
+ const PxF32 dt,
+ PxsBodyCore*const* bodyArray, // INOUT: core body attributes
+ PxsRigidBody*const* originalBodyArray, // IN: original bodies (LEGACY - DON'T deref the ptrs!!)
+ PxU32 const* nodeIndexArray, // IN: island node index
+ PxU32 bodyCount, // IN: body count
+ PxSolverBody* solverBodyPool, // IN: solver body pool (space preallocated)
+ PxSolverBodyData* solverBodyDataPool, // IN: solver body data pool (space preallocated)
+ volatile PxU32* maxSolverPositionIterations,
+ volatile PxU32* maxSolverVelocityIterations,
+ const PxVec3& gravity)
+{
+ PxU32 localMaxPosIter = 0;
+ PxU32 localMaxVelIter = 0;
+
+
+ for(PxU32 a = 1; a < bodyCount; ++a)
+ {
+ PxU32 i = a-1;
+ Ps::prefetchLine(bodyArray[a]);
+ Ps::prefetchLine(bodyArray[a],128);
+ Ps::prefetchLine(&solverBodyDataPool[a]);
+ Ps::prefetchLine(&solverBodyDataPool[a],128);
+
+ PxsBodyCore& core = *bodyArray[i];
+ const PxsRigidBody& rBody = *originalBodyArray[i];
+
+ PxU16 iterWord = core.solverIterationCounts;
+ localMaxPosIter = PxMax<PxU32>(PxU32(iterWord & 0xff), localMaxPosIter);
+ localMaxVelIter = PxMax<PxU32>(PxU32(iterWord >> 8), localMaxVelIter);
+
+ //const Cm::SpatialVector& accel = originalBodyArray[i]->getAccelerationV();
+ bodyCoreComputeUnconstrainedVelocity(gravity, dt, core.linearDamping, core.angularDamping, rBody.accelScale, core.maxLinearVelocitySq, core.maxAngularVelocitySq,
+ core.linearVelocity, core.angularVelocity, !!(rBody.mInternalFlags & PxcRigidBody::eDISABLE_GRAVITY));
+
+ copyToSolverBodyData(core.linearVelocity, core.angularVelocity, core.inverseMass, core.inverseInertia, core.body2World, core.maxPenBias, core.maxContactImpulse, nodeIndexArray[i],
+ core.contactReportThreshold, solverBodyDataPool[i + 1], core.lockFlags);
+ solverBodyPool[i].solverProgress = 0;
+ solverBodyPool[i].maxSolverNormalProgress = 0;
+ solverBodyPool[i].maxSolverFrictionProgress = 0;
+ }
+ const PxU32 i = bodyCount - 1;
+ PxsBodyCore& core = *bodyArray[i];
+ const PxsRigidBody& rBody = *originalBodyArray[i];
+
+ PxU16 iterWord = core.solverIterationCounts;
+ localMaxPosIter = PxMax<PxU32>(PxU32(iterWord & 0xff), localMaxPosIter);
+ localMaxVelIter = PxMax<PxU32>(PxU32(iterWord >> 8), localMaxVelIter);
+
+ bodyCoreComputeUnconstrainedVelocity(gravity, dt, core.linearDamping, core.angularDamping, rBody.accelScale, core.maxLinearVelocitySq, core.maxAngularVelocitySq,
+ core.linearVelocity, core.angularVelocity, !!(rBody.mInternalFlags & PxcRigidBody::eDISABLE_GRAVITY));
+
+ copyToSolverBodyData(core.linearVelocity, core.angularVelocity, core.inverseMass, core.inverseInertia, core.body2World, core.maxPenBias, core.maxContactImpulse, nodeIndexArray[i],
+ core.contactReportThreshold, solverBodyDataPool[i + 1], core.lockFlags);
+ solverBodyPool[i].solverProgress = 0;
+ solverBodyPool[i].maxSolverNormalProgress = 0;
+ solverBodyPool[i].maxSolverFrictionProgress = 0;
+
+ physx::shdfnd::atomicMax(reinterpret_cast<volatile PxI32*>(maxSolverPositionIterations), PxI32(localMaxPosIter));
+ physx::shdfnd::atomicMax(reinterpret_cast<volatile PxI32*>(maxSolverVelocityIterations), PxI32(localMaxVelIter));
+}
+
+
+void PxsPreIntegrateTask::runInternal()
+{
+ {
+ preIntegrationParallel(mDt, mBodyArray + mStartIndex, mOriginalBodyArray + mStartIndex, mNodeIndexArray + mStartIndex, mNumToIntegrate,
+ mSolverBodies + mStartIndex, mSolverBodyDataPool + mStartIndex,
+ mMaxSolverPositionIterations, mMaxSolverVelocityIterations, mGravity);
+ }
+}
+
+void DynamicsContext::preIntegrationParallel(
+ const PxF32 dt,
+ PxsBodyCore*const* bodyArray, // INOUT: core body attributes
+ PxsRigidBody*const* originalBodyArray, // IN: original bodies (LEGACY - DON'T deref the ptrs!!)
+ PxU32 const* nodeIndexArray, // IN: island node index
+ PxU32 bodyCount, // IN: body count
+ PxSolverBody* solverBodyPool, // IN: solver body pool (space preallocated)
+ PxSolverBodyData* solverBodyDataPool, // IN: solver body data pool (space preallocated)
+ Cm::SpatialVector* /*motionVelocityArray*/, // OUT: motion velocities
+ PxU32& maxSolverPositionIterations,
+ PxU32& maxSolverVelocityIterations,
+ PxBaseTask& task
+ )
+{
+ //TODO - make this based on some variables so we can try different configurations
+ const PxU32 IntegrationPerThread = 256;
+
+ const PxU32 numTasks = ((bodyCount + IntegrationPerThread-1)/IntegrationPerThread);
+ const PxU32 taskBatchSize = 64;
+
+ for(PxU32 i = 0; i < numTasks; i+=taskBatchSize)
+ {
+ const PxU32 nbTasks = PxMin(numTasks - i, taskBatchSize);
+ PxsPreIntegrateTask* tasks = reinterpret_cast<PxsPreIntegrateTask*>(getTaskPool().allocate(sizeof(PxsPreIntegrateTask)*nbTasks));
+ for(PxU32 a = 0; a < nbTasks; ++a)
+ {
+ PxU32 startIndex = (i+a)*IntegrationPerThread;
+ PxU32 nbToIntegrate = PxMin((bodyCount-startIndex), IntegrationPerThread);
+ PxsPreIntegrateTask* pTask = PX_PLACEMENT_NEW(&tasks[a], PxsPreIntegrateTask)(*this, bodyArray,
+ originalBodyArray, nodeIndexArray, solverBodyPool, solverBodyDataPool, dt, bodyCount,
+ &maxSolverPositionIterations, &maxSolverVelocityIterations, startIndex,
+ nbToIntegrate, mGravity);
+
+ pTask->setContinuation(&task);
+ pTask->removeReference();
+ }
+ }
+
+ PxMemZero(solverBodyPool, bodyCount * sizeof(PxSolverBody));
+}
+
+inline void WaitBodyRequiredState(volatile PxU32* state, PxU32 requiredState)
+{
+ while(requiredState != *state );
+}
+
+void solveParallel(SOLVER_PARALLEL_METHOD_ARGS)
+{
+ context.solveParallel(params, islandSim);
+}
+
+
+void DynamicsContext::solveParallel(SolverIslandParams& params, IG::IslandSim& islandSim)
+{
+ PxI32 targetCount = mSolverCore[mFrictionType]->solveVParallelAndWriteBack(params);
+
+ PxI32* solveCount = &params.constraintIndex2;
+
+ //PxI32 targetCount = (PxI32)(params.numConstraintHeaders * (params.velocityIterations + params.positionIterations));
+
+ WAIT_FOR_PROGRESS_NO_TIMER(solveCount, targetCount);
+
+ integrateCoreParallel(params, islandSim);
+}
+
+void DynamicsContext::integrateCoreParallel(SolverIslandParams& params, IG::IslandSim& islandSim)
+{
+ const PxI32 unrollCount = 128;
+
+ PxI32* bodyIntegrationListIndex = &params.bodyIntegrationListIndex;
+
+ PxI32 index = physx::shdfnd::atomicAdd(bodyIntegrationListIndex, unrollCount) - unrollCount;
+
+ const PxI32 numBodies = PxI32(params.bodyListSize);
+ const PxI32 numArtics = PxI32(params.articulationListSize);
+
+ Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+ PxsBodyCore*const* bodyArray = params.bodyArray;
+ PxsRigidBody** PX_RESTRICT rigidBodies = params.rigidBodies;
+ ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+
+ PxI32 numIntegrated = 0;
+
+ PxI32 bodyRemainder = unrollCount;
+
+ while(index < numArtics)
+ {
+ const PxI32 remainder = PxMin(numArtics - index, unrollCount);
+ bodyRemainder -= remainder;
+
+ for(PxI32 a = 0; a < remainder; ++a, index++)
+ {
+ const PxI32 i = index;
+ {
+ PX_PROFILE_ZONE("Articulations.integrate", mContextID);
+
+ ArticulationPImpl::updateBodies(articulationListStart[i], mDt);
+ }
+
+ ++numIntegrated;
+ }
+ if(bodyRemainder == 0)
+ {
+ index = physx::shdfnd::atomicAdd(bodyIntegrationListIndex, unrollCount) - unrollCount;
+ bodyRemainder = unrollCount;
+ }
+ }
+
+ index -= numArtics;
+
+ const PxI32 unrollPlusArtics = unrollCount + numArtics;
+
+ PxSolverBody* PX_RESTRICT solverBodies = params.bodyListStart;
+ PxSolverBodyData* PX_RESTRICT solverBodyData = params.bodyDataList + params.solverBodyOffset+1;
+
+ while(index < numBodies)
+ {
+ const PxI32 remainder = PxMin(numBodies - index, bodyRemainder);
+ bodyRemainder -= remainder;
+ for(PxI32 a = 0; a < remainder; ++a, index++)
+ {
+ const PxI32 prefetch = PxMin(index+4, numBodies - 1);
+ Ps::prefetchLine(bodyArray[prefetch]);
+ Ps::prefetchLine(bodyArray[prefetch],128);
+ Ps::prefetchLine(&solverBodies[index],128);
+ Ps::prefetchLine(&motionVelocityArray[index],128);
+ Ps::prefetchLine(&bodyArray[index+32]);
+ Ps::prefetchLine(&rigidBodies[prefetch]);
+
+ PxSolverBodyData& data = solverBodyData[index];
+
+ integrateCore(motionVelocityArray[index].linear, motionVelocityArray[index].angular,
+ solverBodies[index], data, mDt);
+
+ PxsRigidBody& rBody = *rigidBodies[index];
+ PxsBodyCore& core = rBody.getCore();
+ rBody.mLastTransform = core.body2World;
+ core.body2World = data.body2World;
+ core.linearVelocity = data.linearVelocity;
+ core.angularVelocity = data.angularVelocity;
+
+ bool hasStaticTouch = islandSim.getIslandStaticTouchCount(IG::NodeIndex(data.nodeIndex)) != 0;
+ sleepCheck(rigidBodies[index], mDt, mInvDt, mEnableStabilization, mUseAdaptiveForce, motionVelocityArray[index], hasStaticTouch);
+
+ ++numIntegrated;
+ }
+
+ {
+ index = physx::shdfnd::atomicAdd(bodyIntegrationListIndex, unrollCount) - unrollPlusArtics;
+ bodyRemainder = unrollCount;
+ }
+ }
+
+ Ps::memoryBarrier();
+ physx::shdfnd::atomicAdd(&params.numObjectsIntegrated, numIntegrated);
+}
+
+class BlockAllocator : public PxConstraintAllocator
+{
+ PxsConstraintBlockManager& mConstraintBlockManager;
+ PxcConstraintBlockStream& mConstraintBlockStream;
+ FrictionPatchStreamPair& mFrictionPatchStreamPair;
+ PxU32& mTotalConstraintByteSize;
+public:
+
+ BlockAllocator(PxsConstraintBlockManager& constraintBlockManager, PxcConstraintBlockStream& constraintBlockStream, FrictionPatchStreamPair& frictionPatchStreamPair,
+ PxU32& totalConstraintByteSize) :
+ mConstraintBlockManager(constraintBlockManager), mConstraintBlockStream(constraintBlockStream), mFrictionPatchStreamPair(frictionPatchStreamPair),
+ mTotalConstraintByteSize(totalConstraintByteSize)
+ {
+ }
+
+ virtual PxU8* reserveConstraintData(const PxU32 size)
+ {
+ mTotalConstraintByteSize += size;
+ return mConstraintBlockStream.reserve(size, mConstraintBlockManager);
+ }
+
+ virtual PxU8* reserveFrictionData(const PxU32 size)
+ {
+ return mFrictionPatchStreamPair.reserve<PxU8>(size);
+ }
+
+ virtual PxU8* findInputPatches(PxU8* frictionCookie)
+ {
+ return frictionCookie;
+ }
+
+ PX_NOCOPY(BlockAllocator)
+
+};
+
+
+
+static PxU32 createFinalizeContacts_Parallel(PxSolverBodyData* solverBodyData, ThreadContext& mThreadContext, DynamicsContext& context,
+ PxU32 startIndex, PxU32 endIndex, PxsContactManagerOutputIterator& outputs)
+{
+ const PxFrictionType::Enum frictionType = context.getFrictionType();
+ const PxReal bounceThreshold = context.getBounceThreshold();
+ const PxReal frictionOffsetThreshold = context.getFrictionOffsetThreshold();
+ const PxReal dt = context.getDt();
+ const PxReal invDt = context.getInvDt();
+
+ PxSolverConstraintDesc* contactDescPtr = mThreadContext.orderedContactConstraints;
+
+ PxConstraintBatchHeader* headers = mThreadContext.contactConstraintBatchHeaders;
+
+ PxI32 axisConstraintCount = 0;
+ ThreadContext* threadContext = context.getThreadContext();
+ threadContext->mConstraintBlockStream.reset(); //ensure there's no left-over memory that belonged to another island
+
+ PxTransform idt(PxIdentity);
+
+ BlockAllocator blockAllocator(mThreadContext.mConstraintBlockManager, threadContext->mConstraintBlockStream, threadContext->mFrictionPatchStreamPair, threadContext->mConstraintSize);
+
+ const PxReal ccdMaxSeparation = context.getCCDSeparationThreshold();
+
+ for(PxU32 a = startIndex; a < endIndex; ++a)
+ {
+
+ PxConstraintBatchHeader& header = headers[a];
+
+ if(contactDescPtr[header.mStartIndex].constraintLengthOver16 == DY_SC_TYPE_RB_CONTACT)
+ {
+ SolverConstraintPrepState::Enum state = SolverConstraintPrepState::eUNBATCHABLE;
+
+ PxSolverContactDesc blockDescs[4];
+ PxsContactManagerOutput* cmOutputs[4];
+ PxsContactManager* cms[4];
+ for (PxU32 i = 0; i < header.mStride; ++i)
+ {
+ PxSolverConstraintDesc& desc = contactDescPtr[header.mStartIndex + i];
+ PxSolverContactDesc& blockDesc = blockDescs[i];
+ PxsContactManager* cm = reinterpret_cast<PxsContactManager*>(desc.constraint);
+
+ cms[i] = cm;
+
+ PxcNpWorkUnit& unit = cm->getWorkUnit();
+
+ cmOutputs[i] = &outputs.getContactManager(unit.mNpIndex);
+
+ PxSolverBodyData& data0 = desc.linkIndexA != 0xffff ? solverBodyData[0] : solverBodyData[desc.bodyADataIndex];
+ PxSolverBodyData& data1 = desc.linkIndexB != 0xffff ? solverBodyData[0] : solverBodyData[desc.bodyBDataIndex];
+
+ blockDesc.data0 = &data0;
+ blockDesc.data1 = &data1;
+
+ PxU8 flags = unit.rigidCore0->mFlags;
+ if (unit.rigidCore1)
+ flags |= PxU8(unit.rigidCore1->mFlags);
+
+ blockDesc.bodyFrame0 = unit.rigidCore0->body2World;
+ blockDesc.bodyFrame1 = unit.rigidCore1 ? unit.rigidCore1->body2World : idt;
+ blockDesc.shapeInteraction = cm->getShapeInteraction();
+ blockDesc.contactForces = cmOutputs[i]->contactForces;
+ blockDesc.desc = &desc;
+ blockDesc.body0 = desc.bodyA;
+ blockDesc.body1 = desc.bodyB;
+ blockDesc.hasForceThresholds = !!(unit.flags & PxcNpWorkUnitFlag::eFORCE_THRESHOLD);
+ blockDesc.disableStrongFriction = !!(unit.flags & PxcNpWorkUnitFlag::eDISABLE_STRONG_FRICTION);
+ blockDesc.bodyState0 = (unit.flags & PxcNpWorkUnitFlag::eARTICULATION_BODY0) ? PxSolverContactDesc::eARTICULATION : PxSolverContactDesc::eDYNAMIC_BODY;
+ blockDesc.bodyState1 = (unit.flags & PxcNpWorkUnitFlag::eARTICULATION_BODY1) ? PxSolverContactDesc::eARTICULATION : (unit.flags & PxcNpWorkUnitFlag::eHAS_KINEMATIC_ACTOR) ? PxSolverContactDesc::eKINEMATIC_BODY :
+ ((unit.flags & PxcNpWorkUnitFlag::eDYNAMIC_BODY1) ? PxSolverContactDesc::eDYNAMIC_BODY : PxSolverContactDesc::eSTATIC_BODY);
+ //blockDesc.flags = unit.flags;
+
+ PxReal dominance0 = unit.dominance0 ? 1.f : 0.f;
+ PxReal dominance1 = unit.dominance1 ? 1.f : 0.f;
+
+ blockDesc.mInvMassScales.linear0 = blockDesc.mInvMassScales.angular0 = dominance0;
+ blockDesc.mInvMassScales.linear1 = blockDesc.mInvMassScales.angular1 = dominance1;
+ blockDesc.restDistance = unit.restDistance;
+ blockDesc.frictionPtr = unit.frictionDataPtr;
+ blockDesc.frictionCount = unit.frictionPatchCount;
+ blockDesc.maxCCDSeparation = (flags & PxRigidBodyFlag::eENABLE_SPECULATIVE_CCD) ? ccdMaxSeparation : PX_MAX_F32;
+
+ }
+
+ if(header.mStride == 4)
+ {
+ //KS - todo - plumb in axisConstraintCount into this method to keep track of the number of axes
+ state = createFinalizeMethods4[frictionType](cmOutputs, *threadContext,
+ blockDescs,
+ invDt,
+ bounceThreshold,
+ frictionOffsetThreshold,
+ context.getCorrelationDistance(),
+ blockAllocator);
+
+ }
+ if(SolverConstraintPrepState::eSUCCESS != state)
+ {
+ for(PxU32 i = 0; i < header.mStride; ++i)
+ {
+ PxSolverConstraintDesc& desc = contactDescPtr[header.mStartIndex+i];
+ PxsContactManager* cm = reinterpret_cast<PxsContactManager*>(desc.constraint);
+ PxcNpWorkUnit& n = cm->getWorkUnit();
+
+ PxsContactManagerOutput& output = outputs.getContactManager(n.mNpIndex);
+
+ createFinalizeMethods[frictionType](blockDescs[i], output, *threadContext,
+ invDt, bounceThreshold, frictionOffsetThreshold, context.getCorrelationDistance(), blockAllocator);
+
+ getContactManagerConstraintDesc(output,*cm,desc);
+ }
+ }
+
+ for (PxU32 i = 0; i < header.mStride; ++i)
+ {
+ PxsContactManager* cm = cms[i];
+
+ PxcNpWorkUnit& unit = cm->getWorkUnit();
+ unit.frictionDataPtr = blockDescs[i].frictionPtr;
+ unit.frictionPatchCount = blockDescs[i].frictionCount;
+ axisConstraintCount += blockDescs[i].axisConstraintCount;
+
+ }
+ }
+ else if(contactDescPtr[header.mStartIndex].constraintLengthOver16 == DY_SC_TYPE_RB_1D)
+ {
+
+ SolverConstraintShaderPrepDesc shaderDescs[4];
+ PxSolverConstraintPrepDesc descs[4];
+
+ PxTransform id(PxIdentity);
+
+ for (PxU32 i = 0; i < header.mStride; ++i)
+ {
+ PxSolverConstraintDesc& desc = contactDescPtr[header.mStartIndex + i];
+ const Constraint* constraint = reinterpret_cast<const Constraint*>(desc.constraint);
+
+ SolverConstraintShaderPrepDesc& shaderPrepDesc = shaderDescs[i];
+ PxSolverConstraintPrepDesc& prepDesc = descs[i];
+
+ const PxConstraintSolverPrep solverPrep = constraint->solverPrep;
+ const void* constantBlock = constraint->constantBlock;
+ const PxU32 constantBlockByteSize = constraint->constantBlockSize;
+ const PxTransform& pose0 = (constraint->body0 ? constraint->body0->getPose() : id);
+ const PxTransform& pose1 = (constraint->body1 ? constraint->body1->getPose() : id);
+ const PxSolverBody* sbody0 = desc.bodyA;
+ const PxSolverBody* sbody1 = desc.bodyB;
+ PxSolverBodyData* sbodyData0 = &solverBodyData[desc.linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc.bodyADataIndex];
+ PxSolverBodyData* sbodyData1 = &solverBodyData[desc.linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc.bodyBDataIndex];
+
+ shaderPrepDesc.constantBlock = constantBlock;
+ shaderPrepDesc.constantBlockByteSize = constantBlockByteSize;
+ shaderPrepDesc.constraint = constraint;
+ shaderPrepDesc.solverPrep = solverPrep;
+
+ prepDesc.desc = &desc;
+ prepDesc.bodyFrame0 = pose0;
+ prepDesc.bodyFrame1 = pose1;
+ prepDesc.data0 = sbodyData0;
+ prepDesc.data1 = sbodyData1;
+ prepDesc.body0 = sbody0;
+ prepDesc.body1 = sbody1;
+ prepDesc.linBreakForce = constraint->linBreakForce;
+ prepDesc.angBreakForce = constraint->angBreakForce;
+ prepDesc.writeback = &context.getConstraintWriteBackPool()[constraint->index];
+ prepDesc.disablePreprocessing = !!(constraint->flags & PxConstraintFlag::eDISABLE_PREPROCESSING);
+ prepDesc.improvedSlerp = !!(constraint->flags & PxConstraintFlag::eIMPROVED_SLERP);
+ prepDesc.driveLimitsAreForces = !!(constraint->flags & PxConstraintFlag::eDRIVE_LIMITS_ARE_FORCES);
+ prepDesc.minResponseThreshold = constraint->minResponseThreshold;
+ }
+
+#if DY_BATCH_1D
+ SolverConstraintPrepState::Enum state = SolverConstraintPrepState::eUNBATCHABLE;
+ if(header.mStride == 4)
+ {
+ PxU32 totalRows;
+ state = setupSolverConstraint4
+ (shaderDescs, descs, dt, invDt, totalRows,
+ blockAllocator);
+
+ axisConstraintCount += totalRows;
+ }
+ if(state != SolverConstraintPrepState::eSUCCESS)
+#endif
+ {
+ for(PxU32 i = 0; i < header.mStride; ++i)
+ {
+ axisConstraintCount += SetupSolverConstraint(shaderDescs[i], descs[i], blockAllocator, dt, invDt);
+ }
+ }
+ }
+ }
+
+ threadContext->getSimStats().numAxisSolverConstraints += axisConstraintCount;
+
+ context.putThreadContext(threadContext);
+ return PxU32(axisConstraintCount); //Can't write to mThreadContext as it's shared!!!!
+}
+
+class PxsCreateFinalizeContactsTask : public Cm::Task
+{
+ PxsCreateFinalizeContactsTask& operator=(const PxsCreateFinalizeContactsTask&);
+public:
+ PxsCreateFinalizeContactsTask( const PxU32 numConstraints, PxSolverConstraintDesc* descArray, PxSolverBodyData* solverBodyData,
+ ThreadContext& threadContext, DynamicsContext& context, PxU32 startIndex, PxU32 endIndex, PxsContactManagerOutputIterator& outputs) :
+ mNumConstraints(numConstraints), mDescArray(descArray), mSolverBodyData(solverBodyData),
+ mThreadContext(threadContext), mDynamicsContext(context),
+ mOutputs(outputs),
+ mStartIndex(startIndex), mEndIndex(endIndex)
+ {}
+
+ virtual void runInternal()
+ {
+ createFinalizeContacts_Parallel(mSolverBodyData, mThreadContext, mDynamicsContext, mStartIndex, mEndIndex, mOutputs);
+ }
+
+ virtual const char* getName() const
+ {
+ return "PxsDynamics.createFinalizeContacts";
+ }
+
+public:
+ const PxU32 mNumConstraints;
+ PxSolverConstraintDesc* mDescArray;
+ PxSolverBodyData* mSolverBodyData;
+ ThreadContext& mThreadContext;
+ DynamicsContext& mDynamicsContext;
+ PxsContactManagerOutputIterator& mOutputs;
+ PxU32 mStartIndex;
+ PxU32 mEndIndex;
+};
+
+void PxsSolverCreateFinalizeConstraintsTask::runInternal()
+{
+ ThreadContext& mThreadContext = *mIslandContext.mThreadContext;
+
+
+
+ PxU32 descCount = mThreadContext.mNumDifferentBodyConstraints;
+ PxU32 selfConstraintDescCount = mThreadContext.contactDescArraySize - mThreadContext.mNumDifferentBodyConstraints;
+
+ Ps::Array<PxU32>& accumulatedConstraintsPerPartition = mThreadContext.mConstraintsPerPartition;
+
+ PxU32 numHeaders = 0;
+ PxU32 currentPartition = 0;
+ PxU32 maxJ = descCount == 0 ? 0 : accumulatedConstraintsPerPartition[0];
+
+ const PxU32 maxBatchPartition = 0xFFFFFFFF;
+
+ const PxU32 maxBatchSize = mEnhancedDeterminism ? 1u : 4u;
+
+ PxU32 headersPerPartition = 0;
+ for(PxU32 a = 0; a < descCount;)
+ {
+
+
+ PxU32 loopMax = PxMin(maxJ - a, maxBatchSize);
+ PxU16 j = 0;
+ if(loopMax > 0)
+ {
+ PxConstraintBatchHeader& header = mThreadContext.contactConstraintBatchHeaders[numHeaders++];
+
+ j=1;
+ PxSolverConstraintDesc& desc = mThreadContext.orderedContactConstraints[a];
+ if(!isArticulationConstraint(desc) && (desc.constraintLengthOver16 == DY_SC_TYPE_RB_CONTACT ||
+ desc.constraintLengthOver16 == DY_SC_TYPE_RB_1D) && currentPartition < maxBatchPartition)
+ {
+ for(; j < loopMax && desc.constraintLengthOver16 == mThreadContext.orderedContactConstraints[a+j].constraintLengthOver16 &&
+ !isArticulationConstraint(mThreadContext.orderedContactConstraints[a+j]); ++j);
+ }
+ header.mStartIndex = a;
+ header.mStride = j;
+ headersPerPartition++;
+ }
+ if(maxJ == (a + j) && maxJ != descCount)
+ {
+ //Go to next partition!
+ accumulatedConstraintsPerPartition[currentPartition] = headersPerPartition;
+ headersPerPartition = 0;
+ currentPartition++;
+ maxJ = accumulatedConstraintsPerPartition[currentPartition];
+ }
+ a+= j;
+ }
+ if(descCount)
+ accumulatedConstraintsPerPartition[currentPartition] = headersPerPartition;
+
+
+
+ accumulatedConstraintsPerPartition.forceSize_Unsafe(mThreadContext.mMaxPartitions);
+
+ PxU32 numDifferentBodyBatchHeaders = numHeaders;
+
+ for(PxU32 a = 0; a < selfConstraintDescCount; ++a)
+ {
+ PxConstraintBatchHeader& header = mThreadContext.contactConstraintBatchHeaders[numHeaders++];
+ header.mStartIndex = a + descCount;
+ header.mStride = 1;
+ }
+
+ PxU32 numSelfConstraintBatchHeaders = numHeaders - numDifferentBodyBatchHeaders;
+
+ mThreadContext.numDifferentBodyBatchHeaders = numDifferentBodyBatchHeaders;
+ mThreadContext.numSelfConstraintBatchHeaders = numSelfConstraintBatchHeaders;
+ mThreadContext.numContactConstraintBatches = numHeaders;
+
+ PX_UNUSED(descCount);
+
+ {
+ PxSolverConstraintDesc* descBegin = mThreadContext.orderedContactConstraints;
+
+ const PxU32 numThreads = getTaskManager()->getCpuDispatcher()->getWorkerCount();
+
+ //Choose an appropriate number of constraint prep tasks. This must be proportionate to the number of constraints to prep and the number
+ //of worker threads available.
+ const PxU32 TaskBlockSize = 16;
+ const PxU32 TaskBlockLargeSize = 64;
+ const PxU32 BlockAllocationSize = 64;
+
+ PxU32 numTasks = (numHeaders+TaskBlockLargeSize-1)/TaskBlockLargeSize;
+
+ if(numTasks)
+ {
+
+ if(numTasks < numThreads)
+ numTasks = PxMax(1u, (numHeaders+TaskBlockSize-1)/TaskBlockSize);
+
+ const PxU32 constraintsPerTask = (numHeaders + numTasks-1)/numTasks;
+
+ for(PxU32 i = 0; i < numTasks; i+=BlockAllocationSize)
+ {
+ PxU32 blockSize = PxMin(numTasks - i, BlockAllocationSize);
+
+ PxsCreateFinalizeContactsTask* tasks = reinterpret_cast<PxsCreateFinalizeContactsTask*>(mContext.getTaskPool().allocate(sizeof(PxsCreateFinalizeContactsTask)*blockSize));
+
+ for(PxU32 a = 0; a < blockSize; ++a)
+ {
+ PxU32 startIndex = (a + i) * constraintsPerTask;
+ PxU32 endIndex = PxMin(startIndex + constraintsPerTask, numHeaders);
+ PxsCreateFinalizeContactsTask* pTask = PX_PLACEMENT_NEW(&tasks[a], PxsCreateFinalizeContactsTask( descCount, descBegin, mContext.mSolverBodyDataPool.begin(), mThreadContext, mContext, startIndex, endIndex, mOutputs));
+
+ pTask->setContinuation(mCont);
+ pTask->removeReference();
+ }
+ }
+ }
+ }
+}
+
+}
+}
+
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.h
new file mode 100644
index 00000000..9fb1d94d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.h
@@ -0,0 +1,483 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_DYNAMICS_H
+#define DY_DYNAMICS_H
+
+#include "PxvConfig.h"
+#include "CmSpatialVector.h"
+#include "CmTask.h"
+#include "CmPool.h"
+#include "PxcThreadCoherentCache.h"
+#include "DyThreadContext.h"
+#include "PxcConstraintBlockStream.h"
+#include "DySolverBody.h"
+#include "DyContext.h"
+#include "PxsIslandManagerTypes.h"
+#include "PxvNphaseImplementationContext.h"
+#include "solver/PxSolverDefs.h"
+
+namespace physx
+{
+
+namespace Cm
+{
+ class FlushPool;
+}
+
+namespace IG
+{
+ class SimpleIslandManager;
+ struct Edge;
+}
+
+class PxsRigidBody;
+
+class PxsStreamedThresholdTable;
+
+struct PxsBodyCore;
+struct PxsIslandObjects;
+class PxsIslandIndices;
+struct PxsIndexedInteraction;
+class PxsIslandManager;
+struct PxsIndexedConstraint;
+struct PxsIndexedContactManager;
+class PxsHeapMemoryAllocator;
+class PxsMemoryManager;
+class PxsDefaultMemoryManager;
+struct PxSolverConstraintDesc;
+
+namespace Cm
+{
+ class Bitmap;
+ class SpatialVector;
+}
+
+namespace Dy
+{
+ class SolverCore;
+ struct SolverIslandParams;
+ struct ArticulationSolverDesc;
+ class Articulation;
+ class DynamicsContext;
+
+
+
+
+#define SOLVER_PARALLEL_METHOD_ARGS \
+ DynamicsContext& context, \
+ SolverIslandParams& params, \
+ IG::IslandSim& islandSim
+
+//typedef void (*PxsSolveParallelMethod)(SOLVER_PARALLEL_METHOD_ARGS);
+//extern PxsSolveParallelMethod solveParallel[3];
+
+void solveParallel(SOLVER_PARALLEL_METHOD_ARGS);
+void solveParallelCouloumFriction(SOLVER_PARALLEL_METHOD_ARGS);
+
+
+struct SolverIslandObjects;
+
+/**
+\brief Solver body pool (array) that enforces 128-byte alignment for base address of array.
+\note This reduces cache misses on platforms with 128-byte-size cache lines by aligning the start of the array to the beginning of a cache line.
+*/
+class SolverBodyPool : public Ps::Array<PxSolverBody, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverBody> > >
+{
+ PX_NOCOPY(SolverBodyPool)
+public:
+ SolverBodyPool() {}
+};
+
+/**
+\brief Solver body data pool (array) that enforces 128-byte alignment for base address of array.
+\note This reduces cache misses on platforms with 128-byte-size cache lines by aligning the start of the array to the beginning of a cache line.
+*/
+class SolverBodyDataPool : public Ps::Array<PxSolverBodyData, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverBodyData> > >
+{
+ PX_NOCOPY(SolverBodyDataPool)
+public:
+ SolverBodyDataPool() {}
+};
+
+class SolverConstraintDescPool : public Ps::Array<PxSolverConstraintDesc, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverConstraintDesc> > >
+{
+ PX_NOCOPY(SolverConstraintDescPool)
+public:
+ SolverConstraintDescPool() { }
+};
+
+/**
+\brief Encapsulates an island's context
+*/
+
+struct IslandContext
+{
+ //The thread context for this island (set in in the island start task, released in the island end task)
+ ThreadContext* mThreadContext;
+ PxsIslandIndices mCounts;
+};
+
+
+/**
+\brief Encapsules the data used by the constraint solver.
+*/
+
+#if PX_VC
+ #pragma warning(push)
+ #pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+
+class DynamicsContext : public Context
+{
+ PX_NOCOPY(DynamicsContext)
+public:
+
+ /**
+ \brief Creates a DynamicsContext associated with a PxsContext
+ \return A pointer to the newly-created DynamicsContext.
+ */
+ static DynamicsContext* create( PxcNpMemBlockPool* memBlockPool,
+ PxcScratchAllocator& scratchAllocator,
+ Cm::FlushPool& taskPool,
+ PxvSimStats& simStats,
+ PxTaskManager* taskManager,
+ Ps::VirtualAllocatorCallback* allocator,
+ PxsMaterialManager* materialManager,
+ IG::IslandSim* accurateIslandSim,
+ PxU64 contextID,
+ const bool enableStabilization,
+ const bool useEnhancedDeterminism,
+ const bool useAdaptiveForce
+ );
+
+ /**
+ \brief Destroys this DynamicsContext
+ */
+ void destroy();
+
+ /**
+ \brief Returns the static world solver body
+ \return The static world solver body.
+ */
+ PX_FORCE_INLINE PxSolverBody& getWorldSolverBody() { return mWorldSolverBody; }
+
+ PX_FORCE_INLINE Cm::FlushPool& getTaskPool() { return mTaskPool; }
+
+ PX_FORCE_INLINE ThresholdStream& getThresholdStream() { return *mThresholdStream; }
+
+ PX_FORCE_INLINE PxvSimStats& getSimStats() { return mSimStats; }
+
+#if PX_ENABLE_SIM_STATS
+ void addThreadStats(const ThreadContext::ThreadSimStats& stats);
+#endif
+
+ /**
+ \brief The entry point for the constraint solver.
+ \param[in] dt The simulation time-step
+ \param[in] continuation The continuation task for the solver
+
+ This method is called after the island generation has completed. Its main responsibilities are:
+ (1) Reserving the solver body pools
+ (2) Initializing the static and kinematic solver bodies, which are shared resources between islands.
+ (3) Construct the solver task chains for each island
+
+ Each island is solved as an independent solver task chain in parallel.
+
+ */
+
+ virtual void update(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* continuation, PxBaseTask* lostTouchTask,
+ PxsContactManager** foundPatchManagers, PxU32 nbFoundPatchManagers, PxsContactManager** lostPatchManagers, PxU32 nbLostPatchManagers,
+ PxU32 maxPatchesPerCM, PxsContactManagerOutputIterator& iter, PxsContactManagerOutput* gpuOutputs, const PxReal dt, const PxVec3& gravity, const PxU32 bitMapWordCounts);
+
+ virtual void processLostPatches(IG::SimpleIslandManager& /*simpleIslandManager*/, PxsContactManager** /*lostPatchManagers*/, PxU32 /*nbLostPatchManagers*/, PxsContactManagerOutputIterator& /*iterator*/){}
+
+ virtual void updateBodyCore(PxBaseTask* continuation);
+
+ virtual void setSimulationController(PxsSimulationController* simulationController ){ mSimulationController = simulationController; }
+ /**
+ \brief This method combines the results of several islands, e.g. constructing scene-level simulation statistics and merging together threshold streams for contact notification.
+ */
+ virtual void mergeResults();
+
+ virtual void getDataStreamBase(void*& /*contactStreamBase*/, void*& /*patchStreamBase*/, void*& /*forceAndIndicesStreamBase*/){}
+
+ /**
+ \brief Allocates and returns a thread context object.
+ \return A thread context.
+ */
+ PX_FORCE_INLINE ThreadContext* getThreadContext()
+ {
+ return mThreadContextPool.get();
+ }
+
+ /**
+ \brief Returns a thread context to the thread context pool.
+ \param[in] context The thread context to return to the thread context pool.
+ */
+ void putThreadContext(ThreadContext* context)
+ {
+ mThreadContextPool.put(context);
+ }
+
+
+ PX_FORCE_INLINE PxU32 getKinematicCount() const { return mKinematicCount; }
+ PX_FORCE_INLINE PxU64 getContextId() const { return mContextID; }
+
+protected:
+
+ /**
+ \brief Constructor for DynamicsContext
+ */
+ DynamicsContext(PxcNpMemBlockPool* memBlockPool,
+ PxcScratchAllocator& scratchAllocator,
+ Cm::FlushPool& taskPool,
+ PxvSimStats& simStats,
+ PxTaskManager* taskManager,
+ Ps::VirtualAllocatorCallback* allocator,
+ PxsMaterialManager* materialManager,
+ IG::IslandSim* accurateIslandSim,
+ PxU64 contextID,
+ const bool enableStabilization,
+ const bool useEnhancedDeterminism,
+ const bool useAdaptiveForce
+ );
+ /**
+ \brief Destructor for DynamicsContext
+ */
+ virtual ~DynamicsContext();
+
+
+ // Solver helper-methods
+ /**
+ \brief Computes the unconstrained velocity for a given PxsRigidBody
+ \param[in] atom The PxsRigidBody
+ */
+ void computeUnconstrainedVelocity(PxsRigidBody* atom) const;
+
+ /**
+ \brief fills in a PxSolverConstraintDesc from an indexed interaction
+ \param[in,out] desc The PxSolverConstraintDesc
+ \param[in] constraint The PxsIndexedInteraction
+ */
+ void setDescFromIndices(PxSolverConstraintDesc& desc,
+ const PxsIndexedInteraction& constraint, const PxU32 solverBodyOffset);
+
+
+ void setDescFromIndices(PxSolverConstraintDesc& desc, IG::EdgeIndex edgeIndex,
+ const IG::SimpleIslandManager& islandManager, PxU32* bodyRemapTable, const PxU32 solverBodyOffset);
+
+ /**
+ \brief Compute the unconstrained velocity for set of bodies in parallel. This function may spawn additional tasks.
+ \param[in] dt The timestep
+ \param[in] bodyArray The array of body cores
+ \param[in] originalBodyArray The array of PxsRigidBody
+ \param[in] nodeIndexArray The array of island node index
+ \param[in] bodyCount The number of bodies
+ \param[out] solverBodyPool The pool of solver bodies. These are synced with the corresponding body in bodyArray.
+ \param[out] solverBodyDataPool The pool of solver body data. These are synced with the corresponding body in bodyArray
+ \param[out] motionVelocityArray The motion velocities for the bodies
+ \param[out] maxSolverPositionIterations The maximum number of position iterations requested by any body in the island
+ \param[out] maxSolverVelocityIterations The maximum number of velocity iterations requested by any body in the island
+ \param[out] integrateTask The continuation task for any tasks spawned by this function.
+ */
+ void preIntegrationParallel(
+ const PxF32 dt,
+ PxsBodyCore*const* bodyArray, // INOUT: core body attributes
+ PxsRigidBody*const* originalBodyArray, // IN: original body atom names (LEGACY - DON'T deref the ptrs!!)
+ PxU32 const* nodeIndexArray, // IN: island node index
+ PxU32 bodyCount, // IN: body count
+ PxSolverBody* solverBodyPool, // IN: solver atom pool (space preallocated)
+ PxSolverBodyData* solverBodyDataPool,
+ Cm::SpatialVector* motionVelocityArray, // OUT: motion velocities
+ PxU32& maxSolverPositionIterations,
+ PxU32& maxSolverVelocityIterations,
+ PxBaseTask& integrateTask
+ );
+
+ /**
+ \brief Solves an island in parallel.
+
+ \param[in] params Solver parameter structure
+ */
+
+ void solveParallel(SolverIslandParams& params, IG::IslandSim& islandSim);
+
+
+
+ void integrateCoreParallel(SolverIslandParams& params, IG::IslandSim& islandSim);
+
+
+
+
+ /**
+ \brief Resets the thread contexts
+ */
+ void resetThreadContexts();
+
+ /**
+ \brief Returns the scratch memory allocator.
+ \return The scratch memory allocator.
+ */
+ PX_FORCE_INLINE PxcScratchAllocator& getScratchAllocator() { return mScratchAllocator; }
+
+ //Data
+
+ /**
+ \brief Body to represent the world static body.
+ */
+ PX_ALIGN(16, PxSolverBody mWorldSolverBody);
+ /**
+ \brief Body data to represent the world static body.
+ */
+ PX_ALIGN(16, PxSolverBodyData mWorldSolverBodyData);
+
+ /**
+ \brief A thread context pool
+ */
+ PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool> mThreadContextPool;
+
+ /**
+ \brief Solver constraint desc array
+ */
+ SolverConstraintDescPool mSolverConstraintDescPool;
+
+ /**
+ \brief Ordered sover constraint desc array (after partitioning)
+ */
+ SolverConstraintDescPool mOrderedSolverConstraintDescPool;
+
+ /**
+ \brief A temporary array of constraint descs used for partitioning
+ */
+ SolverConstraintDescPool mTempSolverConstraintDescPool;
+
+ /**
+ \brief An array of contact constraint batch headers
+ */
+ Ps::Array<PxConstraintBatchHeader> mContactConstraintBatchHeaders;
+
+ /**
+ \brief Array of motion velocities for all bodies in the scene.
+ */
+ Ps::Array<Cm::SpatialVector> mMotionVelocityArray;
+
+ /**
+ \brief Array of body core pointers for all bodies in the scene.
+ */
+ Ps::Array<PxsBodyCore*> mBodyCoreArray;
+
+ /**
+ \brief Array of rigid body pointers for all bodies in the scene.
+ */
+ Ps::Array<PxsRigidBody*> mRigidBodyArray;
+
+ /**
+ \brief Array of articulationpointers for all articulations in the scene.
+ */
+ Ps::Array<Articulation*> mArticulationArray;
+
+ /**
+ \brief Global pool for solver bodies. Kinematic bodies are at the start, and then dynamic bodies
+ */
+ SolverBodyPool mSolverBodyPool;
+ /**
+ \brief Global pool for solver body data. Kinematic bodies are at the start, and then dynamic bodies
+ */
+ SolverBodyDataPool mSolverBodyDataPool;
+
+
+ ThresholdStream* mExceededForceThresholdStream[2]; //this store previous and current exceeded force thresholdStream
+
+ Ps::Array<PxU32> mExceededForceThresholdStreamMask;
+
+ /**
+ \brief Interface to the solver core.
+ \note We currently only support PxsSolverCoreSIMD. Other cores may be added in future releases.
+ */
+ SolverCore* mSolverCore[PxFrictionType::eFRICTION_COUNT];
+
+ Ps::Array<PxU32> mSolverBodyRemapTable; //Remaps from the "active island" index to the index within a solver island
+
+ Ps::Array<PxU32> mNodeIndexArray; //island node index
+
+ Ps::Array<PxsIndexedContactManager> mContactList;
+
+ /**
+ \brief The total number of kinematic bodies in the scene
+ */
+ PxU32 mKinematicCount;
+
+ /**
+ \brief Atomic counter for the number of threshold stream elements.
+ */
+ PxI32 mThresholdStreamOut;
+
+
+
+ PxsMaterialManager* mMaterialManager;
+
+ PxsContactManagerOutputIterator mOutputIterator;
+
+private:
+ //private:
+ PxcScratchAllocator& mScratchAllocator;
+ Cm::FlushPool& mTaskPool;
+ PxTaskManager* mTaskManager;
+ PxU32 mCurrentIndex; // this is the index point to the current exceeded force threshold stream
+
+ PxU64 mContextID;
+
+ protected:
+
+ friend class PxsSolverStartTask;
+ friend class PxsSolverAticulationsTask;
+ friend class PxsSolverSetupConstraintsTask;
+ friend class PxsSolverCreateFinalizeConstraintsTask;
+ friend class PxsSolverConstraintPartitionTask;
+ friend class PxsSolverSetupSolveTask;
+ friend class PxsSolverIntegrateTask;
+ friend class PxsSolverEndTask;
+ friend class PxsSolverConstraintPostProcessTask;
+ friend class PxsForceThresholdTask;
+ friend class SolverArticulationUpdateTask;
+
+ friend void solveParallel(SOLVER_PARALLEL_METHOD_ARGS);
+};
+
+#if PX_VC
+ #pragma warning(pop)
+#endif
+
+}
+}
+
+#endif //DY_DYNAMICS_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionCorrelation.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionCorrelation.cpp
new file mode 100644
index 00000000..ba7c2b1d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionCorrelation.cpp
@@ -0,0 +1,276 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "PxvConfig.h"
+#include "DyCorrelationBuffer.h"
+#include "PxsMaterialManager.h"
+#include "PsUtilities.h"
+
+using namespace physx;
+using namespace Gu;
+
+namespace physx
+{
+
+namespace Dy
+{
+
+namespace
+{
+PX_FORCE_INLINE void initContactPatch(CorrelationBuffer::ContactPatchData& patch, PxU16 index, PxReal restitution, PxReal staticFriction, PxReal dynamicFriction,
+ PxU8 flags)
+{
+ patch.start = index;
+ patch.count = 1;
+ patch.next = 0;
+ patch.flags = flags;
+ patch.restitution = restitution;
+ patch.staticFriction = staticFriction;
+ patch.dynamicFriction = dynamicFriction;
+}
+
+PX_FORCE_INLINE void initFrictionPatch(FrictionPatch& p, const PxVec3& worldNormal, const PxTransform& body0Pose, const PxTransform& body1Pose,
+ PxReal restitution, PxReal staticFriction, PxReal dynamicFriction, PxU8 materialFlags)
+{
+ p.body0Normal = body0Pose.rotateInv(worldNormal);
+ p.body1Normal = body1Pose.rotateInv(worldNormal);
+ p.anchorCount = 0;
+ p.broken = 0;
+ p.staticFriction = staticFriction;
+ p.dynamicFriction = dynamicFriction;
+ p.restitution = restitution;
+ p.materialFlags = materialFlags;
+}
+}
+
+
+bool createContactPatches(CorrelationBuffer& fb, const Gu::ContactPoint* cb, PxU32 contactCount, PxReal normalTolerance)
+{
+
+ // PT: this rewritten version below doesn't have LHS
+
+ PxU32 contactPatchCount = fb.contactPatchCount;
+ if(contactPatchCount == Gu::ContactBuffer::MAX_CONTACTS)
+ return false;
+ if(contactCount>0)
+ {
+ CorrelationBuffer::ContactPatchData* currentPatchData = fb.contactPatches + contactPatchCount;
+ const Gu::ContactPoint* PX_RESTRICT contacts = cb;
+
+ PxU8 count=1;
+
+ initContactPatch(fb.contactPatches[contactPatchCount++], Ps::to16(0), contacts[0].restitution,
+ contacts[0].staticFriction, contacts[0].dynamicFriction, PxU8(contacts[0].materialFlags));
+
+ PxU32 patchIndex = 0;
+
+ for (PxU32 i = 1; i<contactCount; i++)
+ {
+ const Gu::ContactPoint& curContact = contacts[i];
+ const Gu::ContactPoint& preContact = contacts[patchIndex];
+
+ if(curContact.staticFriction == preContact.staticFriction
+ && curContact.dynamicFriction == preContact.dynamicFriction
+ && curContact.restitution == preContact.restitution
+ && curContact.normal.dot(preContact.normal)>=normalTolerance)
+ {
+ count++;
+ }
+ else
+ {
+ if(contactPatchCount == Gu::ContactBuffer::MAX_CONTACTS)
+ return false;
+ patchIndex = i;
+ currentPatchData->count = count;
+ count = 1;
+ currentPatchData = fb.contactPatches + contactPatchCount;
+
+ initContactPatch(fb.contactPatches[contactPatchCount++], Ps::to16(i), curContact.restitution,
+ curContact.staticFriction, curContact.dynamicFriction, PxU8(curContact.materialFlags));
+ }
+ }
+ if(count!=1)
+ currentPatchData->count = count;
+ }
+ fb.contactPatchCount = contactPatchCount;
+ return true;
+}
+
+bool correlatePatches(CorrelationBuffer& fb,
+ const Gu::ContactPoint* cb,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxReal normalTolerance,
+ PxU32 startContactPatchIndex,
+ PxU32 startFrictionPatchIndex)
+{
+ bool overflow = false;
+ PxU32 frictionPatchCount = fb.frictionPatchCount;
+
+ for(PxU32 i=startContactPatchIndex;i<fb.contactPatchCount;i++)
+ {
+ CorrelationBuffer::ContactPatchData &c = fb.contactPatches[i];
+ const PxVec3 patchNormal = cb[c.start].normal;
+
+ PxU32 j=startFrictionPatchIndex;
+ for(;j<frictionPatchCount && ((patchNormal.dot(fb.frictionPatchWorldNormal[j]) < normalTolerance)
+ || fb.frictionPatches[j].restitution != c.restitution|| fb.frictionPatches[j].staticFriction != c.staticFriction ||
+ fb.frictionPatches[j].dynamicFriction != c.dynamicFriction);j++)
+ ;
+
+ if(j==frictionPatchCount)
+ {
+ overflow |= j==CorrelationBuffer::MAX_FRICTION_PATCHES;
+ if(overflow)
+ continue;
+
+ initFrictionPatch(fb.frictionPatches[frictionPatchCount], patchNormal, bodyFrame0, bodyFrame1, c.restitution, c.staticFriction, c.dynamicFriction, c.flags);
+ fb.frictionPatchWorldNormal[j] = patchNormal;
+ fb.frictionPatchContactCounts[frictionPatchCount] = c.count;
+ fb.contactID[frictionPatchCount][0] = 0xffff;
+ fb.contactID[frictionPatchCount++][1] = 0xffff;
+ c.next = CorrelationBuffer::LIST_END;
+ }
+ else
+ {
+ fb.frictionPatchContactCounts[j] += c.count;
+ c.next = Ps::to16(fb.correlationListHeads[j]);
+ }
+
+ fb.correlationListHeads[j] = i;
+ }
+
+ fb.frictionPatchCount = frictionPatchCount;
+
+ return overflow;
+}
+
+// run over the friction patches, trying to find two anchors per patch. If we already have
+// anchors that are close, we keep them, which gives us persistent spring behavior
+
+void growPatches(CorrelationBuffer& fb,
+ const ContactPoint* cb,
+ const PxTransform& bodyFrame0,
+ const PxTransform& bodyFrame1,
+ PxReal , //unused correlationDistance
+ PxU32 frictionPatchStartIndex,
+ PxReal frictionOffsetThreshold)
+{
+ for(PxU32 i=frictionPatchStartIndex;i<fb.frictionPatchCount;i++)
+ {
+ FrictionPatch& fp = fb.frictionPatches[i];
+
+ if(fp.anchorCount==2 || fb.correlationListHeads[i]==CorrelationBuffer::LIST_END)
+ continue;
+
+ PxVec3 worldAnchors[2];
+ PxU16 anchorCount = 0;
+ PxReal pointDistSq = 0.0f, dist0, dist1;
+
+ // if we have an anchor already, keep it
+ if(fp.anchorCount == 1)
+ {
+ worldAnchors[anchorCount++] = bodyFrame0.transform(fp.body0Anchors[0]);
+ }
+
+ for(PxU32 patch = fb.correlationListHeads[i];
+ patch!=CorrelationBuffer::LIST_END;
+ patch = fb.contactPatches[patch].next)
+ {
+ CorrelationBuffer::ContactPatchData& cp = fb.contactPatches[patch];
+ for(PxU16 j=0;j<cp.count;j++)
+ {
+ const PxVec3& worldPoint = cb[cp.start+j].point;
+
+ if(cb[cp.start+j].separation < frictionOffsetThreshold)
+ {
+
+ switch(anchorCount)
+ {
+ case 0:
+ fb.contactID[i][0] = PxU16(cp.start+j);
+ worldAnchors[0] = worldPoint;
+ anchorCount++;
+ break;
+ case 1:
+ pointDistSq = (worldPoint-worldAnchors[0]).magnitudeSquared();
+ if (pointDistSq > (0.025f * 0.025f))
+ {
+ fb.contactID[i][1] = PxU16(cp.start+j);
+ worldAnchors[1] = worldPoint;
+ anchorCount++;
+ }
+ break;
+ default: //case 2
+ dist0 = (worldPoint-worldAnchors[0]).magnitudeSquared();
+ dist1 = (worldPoint-worldAnchors[1]).magnitudeSquared();
+ if (dist0 > dist1)
+ {
+ if(dist0 > pointDistSq)
+ {
+ fb.contactID[i][1] = PxU16(cp.start+j);
+ worldAnchors[1] = worldPoint;
+ pointDistSq = dist0;
+ }
+ }
+ else if (dist1 > pointDistSq)
+ {
+ fb.contactID[i][0] = PxU16(cp.start+j);
+ worldAnchors[0] = worldPoint;
+ pointDistSq = dist1;
+ }
+ }
+ }
+ }
+ }
+
+ //PX_ASSERT(anchorCount > 0);
+
+ // add the new anchor(s) to the patch
+ for(PxU32 j = fp.anchorCount; j < anchorCount; j++)
+ {
+ fp.body0Anchors[j] = bodyFrame0.transformInv(worldAnchors[j]);
+ fp.body1Anchors[j] = bodyFrame1.transformInv(worldAnchors[j]);
+ }
+
+ // the block contact solver always reads at least one anchor per patch for performance reasons even if there are no valid patches,
+ // so we need to initialize this in the unexpected case that we have no anchors
+
+ if(anchorCount==0)
+ fp.body0Anchors[0] = fp.body1Anchors[0] = PxVec3(0);
+
+ fp.anchorCount = anchorCount;
+ }
+}
+
+}
+
+}
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatch.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatch.h
new file mode 100644
index 00000000..507e7f12
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatch.h
@@ -0,0 +1,81 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef PXC_FRICTIONPATCH_H
+#define PXC_FRICTIONPATCH_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+struct FrictionPatch
+{
+ PxU8 broken; // PT: must be first byte of struct, see "frictionBrokenWritebackByte"
+ PxU8 materialFlags;
+ PxU16 anchorCount;
+ PxReal restitution;
+ PxReal staticFriction;
+ PxReal dynamicFriction;
+ PxVec3 body0Normal;
+ PxVec3 body1Normal;
+ PxVec3 body0Anchors[2];
+ PxVec3 body1Anchors[2];
+
+ PX_FORCE_INLINE void operator = (const FrictionPatch& other)
+ {
+ broken = other.broken;
+ materialFlags = other.materialFlags;
+ anchorCount = other.anchorCount;
+ body0Normal = other.body0Normal;
+ body1Normal = other.body1Normal;
+ body0Anchors[0] = other.body0Anchors[0];
+ body0Anchors[1] = other.body0Anchors[1];
+ body1Anchors[0] = other.body1Anchors[0];
+ body1Anchors[1] = other.body1Anchors[1];
+ restitution = other.restitution;
+ staticFriction = other.staticFriction;
+ dynamicFriction = other.dynamicFriction;
+ }
+};
+
+//PX_COMPILE_TIME_ASSERT(sizeof(FrictionPatch)==80);
+
+}
+
+}
+
+#endif
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatchStreamPair.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatchStreamPair.h
new file mode 100644
index 00000000..8219918f
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatchStreamPair.h
@@ -0,0 +1,128 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef PXC_FRICTIONPATCHPOOL_H
+#define PXC_FRICTIONPATCHPOOL_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "PxvConfig.h"
+#include "PsMutex.h"
+#include "PsArray.h"
+
+// Each narrow phase thread has an input stream of friction patches from the
+// previous frame and an output stream of friction patches which will be
+// saved for next frame. The patches persist for exactly one frame at which
+// point they get thrown away.
+
+
+// There is a stream pair per thread. A contact callback reserves space
+// for its friction patches and gets a cookie in return that can stash
+// for next frame. Cookies are valid for one frame only.
+//
+// note that all friction patches reserved are guaranteed to be contiguous;
+// this might turn out to be a bit inefficient if we often have a large
+// number of friction patches
+
+#include "PxcNpMemBlockPool.h"
+
+namespace physx
+{
+
+class FrictionPatchStreamPair
+{
+public:
+ FrictionPatchStreamPair(PxcNpMemBlockPool& blockPool);
+
+ // reserve can fail and return null. Read should never fail
+ template<class FrictionPatch>
+ FrictionPatch* reserve(const PxU32 size);
+
+ template<class FrictionPatch>
+ const FrictionPatch* findInputPatches(const PxU8* ptr) const;
+ void reset();
+
+ PxcNpMemBlockPool& getBlockPool() { return mBlockPool;}
+private:
+ PxcNpMemBlockPool& mBlockPool;
+ PxcNpMemBlock* mBlock;
+ PxU32 mUsed;
+
+ FrictionPatchStreamPair& operator=(const FrictionPatchStreamPair&);
+};
+
+PX_FORCE_INLINE FrictionPatchStreamPair::FrictionPatchStreamPair(PxcNpMemBlockPool& blockPool):
+ mBlockPool(blockPool), mBlock(NULL), mUsed(0)
+{
+}
+
+PX_FORCE_INLINE void FrictionPatchStreamPair::reset()
+{
+ mBlock = NULL;
+ mUsed = 0;
+}
+
+// reserve can fail and return null. Read should never fail
+template <class FrictionPatch>
+FrictionPatch* FrictionPatchStreamPair::reserve(const PxU32 size)
+{
+ if(size>PxcNpMemBlock::SIZE)
+ {
+ return reinterpret_cast<FrictionPatch*>(-1);
+ }
+
+ PX_ASSERT(size <= PxcNpMemBlock::SIZE);
+
+ FrictionPatch* ptr = NULL;
+
+ if(mBlock == NULL || mUsed + size > PxcNpMemBlock::SIZE)
+ {
+ mBlock = mBlockPool.acquireFrictionBlock();
+ mUsed = 0;
+ }
+
+ if(mBlock)
+ {
+ ptr = reinterpret_cast<FrictionPatch*>(mBlock->data+mUsed);
+ mUsed += size;
+ }
+
+ return ptr;
+}
+
+template <class FrictionPatch>
+const FrictionPatch* FrictionPatchStreamPair::findInputPatches(const PxU8* ptr) const
+{
+ return reinterpret_cast<const FrictionPatch*>(ptr);
+}
+
+}
+
+#endif
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyRigidBodyToSolverBody.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyRigidBodyToSolverBody.cpp
new file mode 100644
index 00000000..c6c66e8a
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyRigidBodyToSolverBody.cpp
@@ -0,0 +1,107 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "CmUtils.h"
+#include "DySolverBody.h"
+#include "PxsRigidBody.h"
+#include "PxvDynamics.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+//This method returns values of 0 when the inertia is 0. This is a bit of a hack but allows us to
+//represent kinematic objects' velocities in our new format
+PX_FORCE_INLINE PxVec3 computeSafeSqrtInertia(const PxVec3& v)
+{
+ return PxVec3(v.x == 0.f ? 0.f : PxSqrt(v.x), v.y == 0.f ? 0.f : PxSqrt(v.y), v.z == 0.f ? 0.f : PxSqrt(v.z));
+}
+
+void copyToSolverBodyData(const PxVec3& linearVelocity, const PxVec3& angularVelocity, const PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose,
+ const PxReal maxDepenetrationVelocity, const PxReal maxContactImpulse, const PxU32 nodeIndex, const PxReal reportThreshold, PxSolverBodyData& data, PxU32 lockFlags)
+{
+ data.nodeIndex = nodeIndex;
+
+ PxVec3 safeSqrtInvInertia = computeSafeSqrtInertia(invInertia);
+
+ PxMat33 rotation(globalPose.q);
+
+ Cm::transformInertiaTensor(safeSqrtInvInertia, rotation, data.sqrtInvInertia);
+
+ // Copy simple properties
+ data.linearVelocity = linearVelocity;
+ data.angularVelocity = angularVelocity;
+
+ if (lockFlags)
+ {
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_X)
+ data.linearVelocity.x = 0.f;
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Y)
+ data.linearVelocity.y = 0.f;
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Z)
+ data.linearVelocity.z = 0.f;
+
+ //KS - technically, we can zero the inertia columns and produce stiffer constraints. However, this can cause numerical issues with the
+ //joint solver, which is fixed by disabling joint preprocessing and setting minResponseThreshold to some reasonable value > 0. However, until
+ //this is handled automatically, it's probably better not to zero these inertia rows
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_X)
+ {
+ data.angularVelocity.x = 0.f;
+ //data.sqrtInvInertia.column0 = PxVec3(0.f);
+ }
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Y)
+ {
+ data.angularVelocity.y = 0.f;
+ //data.sqrtInvInertia.column1 = PxVec3(0.f);
+ }
+ if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Z)
+ {
+ data.angularVelocity.z = 0.f;
+ //data.sqrtInvInertia.column2 = PxVec3(0.f);
+ }
+ }
+
+
+ PX_ASSERT(linearVelocity.isFinite());
+ PX_ASSERT(angularVelocity.isFinite());
+
+ data.invMass = invMass;
+ data.penBiasClamp = maxDepenetrationVelocity;
+ data.maxContactImpulse = maxContactImpulse;
+ data.body2World = globalPose;
+ data.lockFlags = lockFlags;
+
+ data.reportThreshold = reportThreshold;
+}
+
+}
+
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverBody.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverBody.h
new file mode 100644
index 00000000..566f1ca1
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverBody.h
@@ -0,0 +1,60 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVERATOM_H
+#define DY_SOLVERATOM_H
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxTransform.h"
+#include "foundation/PxMat33.h"
+#include "CmPhysXCommon.h"
+#include "CmSpatialVector.h"
+#include "solver/PxSolverDefs.h"
+
+namespace physx
+{
+
+class PxsRigidBody;
+struct PxsBodyCore;
+
+namespace Dy
+{
+
+//void copyToSolverBodyData(PxSolverBodyData& data, const PxsBodyCore& core, const PxU32 nodeIndex);
+
+
+void copyToSolverBodyData(const PxVec3& linearVelocity, const PxVec3& angularVelocity, const PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose,
+ const PxReal maxDepenetrationVelocity, const PxReal maxContactImpulse, const PxU32 nodeIndex, const PxReal reportThreshold, PxSolverBodyData& solverBodyData, PxU32 lockFlags);
+
+}
+
+}
+
+#endif //DY_SOLVERATOM_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D.h
new file mode 100644
index 00000000..4291530b
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D.h
@@ -0,0 +1,203 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVER_CONSTRAINT_1D_H
+#define DY_SOLVER_CONSTRAINT_1D_H
+
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "DyArticulationUtils.h"
+#include "DySolverConstraintTypes.h"
+#include "DySolverBody.h"
+#include "PxConstraintDesc.h"
+#include "DySolverConstraintDesc.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+// dsequeira: we should probably fork these structures for constraints and extended constraints,
+// since there's a few things that are used for one but not the other
+
+struct SolverConstraint1DHeader
+{
+ PxU8 type; // enum SolverConstraintType - must be first byte
+ PxU8 count; // count of following 1D constraints
+ PxU8 dominance;
+ PxU8 breakable; // indicate whether this constraint is breakable or not
+
+ PxReal linBreakImpulse;
+ PxReal angBreakImpulse;
+ PxReal invMass0D0;
+ PxVec3 body0WorldOffset;
+ PxReal invMass1D1;
+ PxReal linearInvMassScale0; // only used by articulations
+ PxReal angularInvMassScale0; // only used by articulations
+ PxReal linearInvMassScale1; // only used by articulations
+ PxReal angularInvMassScale1; // only used by articulations
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DHeader) == 48);
+
+PX_ALIGN_PREFIX(16)
+struct SolverConstraint1D
+{
+public:
+ PxVec3 lin0; //!< linear velocity projection (body 0)
+ PxReal constant; //!< constraint constant term
+
+ PxVec3 lin1; //!< linear velocity projection (body 1)
+ PxReal unbiasedConstant; //!< constraint constant term without bias
+
+ PxVec3 ang0; //!< angular velocity projection (body 0)
+ PxReal velMultiplier; //!< constraint velocity multiplier
+
+ PxVec3 ang1; //!< angular velocity projection (body 1)
+ PxReal impulseMultiplier; //!< constraint impulse multiplier
+
+ PxVec3 ang0Writeback; //!< unscaled angular velocity projection (body 0)
+ PxU32 pad;
+
+ PxReal minImpulse; //!< Lower bound on impulse magnitude
+ PxReal maxImpulse; //!< Upper bound on impulse magnitude
+ PxReal appliedForce; //!< applied force to correct velocity+bias
+ PxU32 flags;
+} PX_ALIGN_SUFFIX(16);
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1D) == 96);
+
+
+struct SolverConstraint1DExt : public SolverConstraint1D
+{
+public:
+ Cm::SpatialVectorV deltaVA;
+ Cm::SpatialVectorV deltaVB;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DExt) == 160);
+
+
+PX_FORCE_INLINE void init(SolverConstraint1DHeader& h,
+ PxU8 count,
+ bool isExtended,
+ const PxConstraintInvMassScale& ims)
+{
+ h.type = PxU8(isExtended ? DY_SC_TYPE_EXT_1D : DY_SC_TYPE_RB_1D);
+ h.count = count;
+ h.dominance = 0;
+ h.linearInvMassScale0 = ims.linear0;
+ h.angularInvMassScale0 = ims.angular0;
+ h.linearInvMassScale1 = -ims.linear1;
+ h.angularInvMassScale1 = -ims.angular1;
+}
+
+PX_FORCE_INLINE void init(SolverConstraint1D& c,
+ const PxVec3& _linear0, const PxVec3& _linear1,
+ const PxVec3& _angular0, const PxVec3& _angular1,
+ PxReal _minImpulse, PxReal _maxImpulse)
+{
+ PX_ASSERT(_linear0.isFinite());
+ PX_ASSERT(_linear1.isFinite());
+ c.lin0 = _linear0;
+ c.lin1 = _linear1;
+ c.ang0 = _angular0;
+ c.ang1 = _angular1;
+ c.minImpulse = _minImpulse;
+ c.maxImpulse = _maxImpulse;
+ c.flags = 0;
+ c.appliedForce = 0;
+}
+
+PX_FORCE_INLINE bool needsNormalVel(const Px1DConstraint &c)
+{
+ return c.flags & Px1DConstraintFlag::eRESTITUTION
+ || (c.flags & Px1DConstraintFlag::eSPRING && c.flags & Px1DConstraintFlag::eACCELERATION_SPRING);
+}
+
+PX_FORCE_INLINE void setSolverConstants(PxReal& constant,
+ PxReal& unbiasedConstant,
+ PxReal& velMultiplier,
+ PxReal& impulseMultiplier,
+ const Px1DConstraint& c,
+ PxReal normalVel,
+ PxReal unitResponse,
+ PxReal minRowResponse,
+ PxReal erp,
+ PxReal dt,
+ PxReal recipdt)
+{
+ PX_ASSERT(PxIsFinite(unitResponse));
+ PxReal recipResponse = unitResponse <= minRowResponse ? 0 : 1.0f/unitResponse;
+ PxReal geomError = c.geometricError * erp;
+
+ if(c.flags & Px1DConstraintFlag::eSPRING)
+ {
+ PxReal a = dt * dt * c.mods.spring.stiffness + dt * c.mods.spring.damping;
+ PxReal b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError);
+
+ if(c.flags & Px1DConstraintFlag::eACCELERATION_SPRING)
+ {
+ PxReal x = 1.0f/(1.0f+a);
+ constant = unbiasedConstant = x * recipResponse * b;
+ velMultiplier = -x * recipResponse * a;
+ impulseMultiplier = 1.0f-x;
+ }
+ else
+ {
+ PxReal x = 1.0f/(1.0f+a*unitResponse);
+ constant = unbiasedConstant = x * b;
+ velMultiplier = -x*a;
+ impulseMultiplier = 1.0f-x;
+ }
+ }
+ else
+ {
+ velMultiplier = -recipResponse;
+ impulseMultiplier = 1.0f;
+
+ if(c.flags & Px1DConstraintFlag::eRESTITUTION && -normalVel>c.mods.bounce.velocityThreshold)
+ {
+ unbiasedConstant = constant = recipResponse * c.mods.bounce.restitution*-normalVel;
+ }
+ else
+ {
+ // see usage of 'for internal use' in preprocessRows()
+ constant = recipResponse * (c.velocityTarget - geomError*recipdt);
+ unbiasedConstant = recipResponse * (c.velocityTarget - c.forInternalUse*recipdt);
+ }
+ }
+}
+
+}
+}
+
+#endif //DY_SOLVER_CONSTRAINT_1D_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D4.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D4.h
new file mode 100644
index 00000000..833f7934
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D4.h
@@ -0,0 +1,106 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef DY_SOLVERCONSTRAINT1D4_H
+#define DY_SOLVERCONSTRAINT1D4_H
+
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "DyArticulationUtils.h"
+#include "DySolverConstraint1D.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+struct SolverConstraint1DHeader4
+{
+ PxU8 type; // enum SolverConstraintType - must be first byte
+ PxU8 pad0[3];
+ //These counts are the max of the 4 sets of data.
+ //When certain pairs have fewer constraints than others, they are padded with 0s so that no work is performed but
+ //calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD)
+ PxU32 count;
+ PxU8 count0, count1, count2, count3;
+ PxU8 break0, break1, break2, break3;
+
+ Vec4V linBreakImpulse;
+ Vec4V angBreakImpulse;
+ Vec4V invMass0D0;
+ Vec4V invMass1D1;
+ Vec4V angD0;
+ Vec4V angD1;
+
+ Vec4V body0WorkOffsetX;
+ Vec4V body0WorkOffsetY;
+ Vec4V body0WorkOffsetZ;
+};
+
+struct SolverConstraint1DBase4
+{
+public:
+ Vec4V lin0X;
+ Vec4V lin0Y;
+ Vec4V lin0Z;
+ Vec4V ang0X;
+ Vec4V ang0Y;
+ Vec4V ang0Z;
+ Vec4V ang0WritebackX;
+ Vec4V ang0WritebackY;
+ Vec4V ang0WritebackZ;
+ Vec4V constant;
+ Vec4V unbiasedConstant;
+ Vec4V velMultiplier;
+ Vec4V impulseMultiplier;
+ Vec4V minImpulse;
+ Vec4V maxImpulse;
+ Vec4V appliedForce;
+ PxU32 flags[4];
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DBase4) == 272);
+
+struct SolverConstraint1DDynamic4 : public SolverConstraint1DBase4
+{
+ Vec4V lin1X;
+ Vec4V lin1Y;
+ Vec4V lin1Z;
+ Vec4V ang1X;
+ Vec4V ang1Y;
+ Vec4V ang1Z;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DDynamic4) == 368);
+
+}
+
+}
+
+#endif //DY_SOLVERCONSTRAINT1D4_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintDesc.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintDesc.h
new file mode 100644
index 00000000..e74b0374
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintDesc.h
@@ -0,0 +1,141 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVERCONSTRAINTDESC_H
+#define DY_SOLVERCONSTRAINTDESC_H
+
+#include "PxvConfig.h"
+#include "DySolverConstraintTypes.h"
+#include "PsUtilities.h"
+#include "PxConstraintDesc.h"
+#include "solver/PxSolverDefs.h"
+
+namespace physx
+{
+
+struct PxcNpWorkUnit;
+
+struct PxsContactManagerOutput;
+
+namespace Cm
+{
+ class SpatialVector;
+}
+
+struct PxSolverBody;
+struct PxSolverBodyData;
+
+namespace Dy
+{
+
+struct FsData;
+
+
+
+
+// dsequeira: moved this articulation stuff here to sever a build dep on Articulation.h through DyThreadContext.h and onward
+
+struct SelfConstraintBlock
+{
+ PxU32 startId;
+ PxU32 numSelfConstraints;
+ PxU16 fsDataLength;
+ PxU16 requiredSolverProgress;
+ uintptr_t eaFsData;
+};
+
+//This class rolls together multiple contact managers into a single contact manager.
+struct CompoundContactManager
+{
+ PxU32 mStartIndex;
+ PxU16 mStride;
+ PxU16 mReducedContactCount;
+
+ PxcNpWorkUnit* unit; //This is a work unit but the contact buffer has been adjusted to contain all the contacts for all the subsequent pairs
+ PxsContactManagerOutput* cmOutput;
+ PxU8* originalContactPatches; //This is the original contact buffer that we replaced with a combined buffer
+ PxU8* originalContactPoints;
+ PxU8 originalContactCount;
+ PxU8 originalPatchCount;
+ PxU8 originalStatusFlags;
+ PxReal* originalForceBuffer; //This is the original force buffer that we replaced with a combined force buffer
+ PxU16* forceBufferList; //This is a list of indices from the reduced force buffer to the original force buffers - we need this to fix up the write-backs from the solver
+};
+
+struct SolverConstraintPrepState
+{
+enum Enum
+{
+ eOUT_OF_MEMORY,
+ eUNBATCHABLE,
+ eSUCCESS
+};
+};
+
+PX_FORCE_INLINE bool isArticulationConstraint(const PxSolverConstraintDesc& desc)
+{
+ return desc.linkIndexA != PxSolverConstraintDesc::NO_LINK ||
+ desc.linkIndexB != PxSolverConstraintDesc::NO_LINK;
+}
+
+
+PX_FORCE_INLINE void setConstraintLength(PxSolverConstraintDesc& desc, const PxU32 constraintLength)
+{
+ PX_ASSERT(0==(constraintLength & 0x0f));
+ PX_ASSERT(constraintLength <= PX_MAX_U16 * 16);
+ desc.constraintLengthOver16 = Ps::to16(constraintLength >> 4);
+}
+
+PX_FORCE_INLINE void setWritebackLength(PxSolverConstraintDesc& desc, const PxU32 writeBackLength)
+{
+ PX_ASSERT(0==(writeBackLength & 0x03));
+ PX_ASSERT(writeBackLength <= PX_MAX_U16 * 4);
+ desc.writeBackLengthOver4 = Ps::to16(writeBackLength >> 2);
+}
+
+PX_FORCE_INLINE PxU32 getConstraintLength(const PxSolverConstraintDesc& desc)
+{
+ return PxU32(desc.constraintLengthOver16 << 4);
+}
+
+PX_FORCE_INLINE PxU32 getWritebackLength(const PxSolverConstraintDesc& desc)
+{
+ return PxU32(desc.writeBackLengthOver4 << 2);
+}
+
+PX_COMPILE_TIME_ASSERT(0 == (0x0f & sizeof(PxSolverConstraintDesc)));
+
+#define MAX_PERMITTED_SOLVER_PROGRESS 0xFFFF
+
+}
+
+}
+
+#endif //DY_SOLVERCONSTRAINTDESC_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintExtShared.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintExtShared.h
new file mode 100644
index 00000000..2c2f59f9
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintExtShared.h
@@ -0,0 +1,116 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef DY_SOLVER_CONSTRAINT_EXT_SHARED_H
+#define DY_SOLVER_CONSTRAINT_EXT_SHARED_H
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+#include "DyArticulationContactPrep.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverContact.h"
+#include "DySolverContactPF.h"
+#include "DyArticulationHelper.h"
+#include "PxcNpWorkUnit.h"
+#include "PxsMaterialManager.h"
+#include "PxsMaterialCombiner.h"
+
+namespace physx
+{
+namespace Dy
+{
+ PX_FORCE_INLINE void setupExtSolverContact(const SolverExtBody& b0, const SolverExtBody& b1,
+ const PxF32 d0, const PxF32 d1, const PxF32 angD0, const PxF32 angD1, const PxTransform& bodyFrame0, const PxTransform& bodyFrame1,
+ const Vec3VArg normal, const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution,
+ const FloatVArg bounceThreshold, const Gu::ContactPoint& contact, SolverContactPointExt& solverContact, const FloatVArg ccdMaxSeparation)
+ {
+ const FloatV zero = FZero();
+ const FloatV separation = FLoad(contact.separation);
+
+ const FloatV penetration = FSub(separation, restDistance);
+
+ const PxVec3 ra = contact.point - bodyFrame0.p;
+ const PxVec3 rb = contact.point - bodyFrame1.p;
+
+ const PxVec3 raXn = ra.cross(contact.normal);
+ const PxVec3 rbXn = rb.cross(contact.normal);
+
+ Cm::SpatialVector deltaV0, deltaV1;
+
+ const Cm::SpatialVector resp0 = createImpulseResponseVector(contact.normal, raXn, b0);
+ const Cm::SpatialVector resp1 = createImpulseResponseVector(-contact.normal, -rbXn, b1);
+
+ const FloatV unitResponse = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0,
+ b1, resp1, deltaV1, d1, angD1));
+
+ const FloatV vel0 = FLoad(b0.projectVelocity(contact.normal, raXn));
+ const FloatV vel1 = FLoad(b1.projectVelocity(contact.normal, rbXn));
+ const FloatV vrel = FSub(vel0, vel1);
+
+ FloatV velMultiplier = FSel(FIsEq(unitResponse, zero), zero, FRecip(unitResponse));
+ FloatV scaledBias = FMul(velMultiplier, FMax(maxPenBias, FMul(penetration, invDtp8)));
+ const FloatV penetrationInvDt = FMul(penetration, invDt);
+
+ const BoolV isGreater2 = BAnd(BAnd(FIsGrtr(restitution, zero), FIsGrtr(bounceThreshold, vrel)), FIsGrtr(FNeg(vrel), penetrationInvDt));
+
+ const BoolV ccdSeparationCondition = FIsGrtrOrEq(ccdMaxSeparation, penetration);
+
+ scaledBias = FSel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias);
+
+ FloatV targetVelocity = FSel(isGreater2, FMul(FNeg(vrel), restitution), zero);
+
+ //Get the rigid body's current velocity and embed into the constraint target velocities
+ if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ targetVelocity = FSub(targetVelocity, vel0);
+ else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ targetVelocity = FAdd(targetVelocity, vel1);
+
+ targetVelocity = FAdd(targetVelocity, V3Dot(V3LoadA(contact.targetVel), normal));
+
+ const FloatV biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias));
+ const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FSel(isGreater2, zero, FNeg(FMax(scaledBias, zero))));
+
+
+ FStore(velMultiplier, &solverContact.velMultiplier);
+ FStore(biasedErr, &solverContact.biasedErr);
+ FStore(unbiasedErr, &solverContact.unbiasedErr);
+ solverContact.maxImpulse = contact.maxImpulse;
+
+ solverContact.raXn = V3LoadA(resp0.angular);
+ solverContact.rbXn = V3Neg(V3LoadA(resp1.angular));
+ solverContact.linDeltaVA = V3LoadA(deltaV0.linear);
+ solverContact.angDeltaVA = V3LoadA(deltaV0.angular);
+ solverContact.linDeltaVB = V3LoadA(deltaV1.linear);
+ solverContact.angDeltaVB = V3LoadA(deltaV1.angular);
+ }
+}
+}
+
+#endif //DY_SOLVER_CONSTRAINT_EXT_SHARED_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintTypes.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintTypes.h
new file mode 100644
index 00000000..2b13c190
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintTypes.h
@@ -0,0 +1,67 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVERCONSTRAINTTYPES_H
+#define DY_SOLVERCONSTRAINTTYPES_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "PxvConfig.h"
+
+namespace physx
+{
+
+enum SolverConstraintType
+{
+ DY_SC_TYPE_NONE = 0,
+ DY_SC_TYPE_RB_CONTACT, // RB-only contact
+ DY_SC_TYPE_RB_1D, // RB-only 1D constraint
+ DY_SC_TYPE_EXT_CONTACT, // contact involving articulations
+ DY_SC_TYPE_EXT_1D, // 1D constraint involving articulations
+ DY_SC_TYPE_STATIC_CONTACT, // RB-only contact where body b is static
+ DY_SC_TYPE_NOFRICTION_RB_CONTACT, //RB-only contact with no friction patch
+ DY_SC_TYPE_BLOCK_RB_CONTACT,
+ DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT,
+ DY_SC_TYPE_BLOCK_1D,
+ DY_SC_TYPE_FRICTION,
+ DY_SC_TYPE_STATIC_FRICTION,
+ DY_SC_TYPE_EXT_FRICTION,
+ DY_SC_TYPE_BLOCK_FRICTION,
+ DY_SC_TYPE_BLOCK_STATIC_FRICTION,
+ DY_SC_CONSTRAINT_TYPE_COUNT //Count of the number of different constraint types in the solver
+};
+
+enum SolverConstraintFlags
+{
+ DY_SC_FLAG_OUTPUT_FORCE = (1<<1)
+};
+
+}
+
+#endif //DY_SOLVERCONSTRAINTTYPES_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
new file mode 100644
index 00000000..ea935ce9
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
@@ -0,0 +1,1121 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+
+#ifdef PX_SUPPORT_SIMD
+
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverContact.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraintDesc.h"
+#include "DyThresholdTable.h"
+#include "DySolverContext.h"
+#include "PsUtilities.h"
+#include "DyConstraint.h"
+#include "PsAtomic.h"
+#include "DySolverConstraintsShared.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+//Port of scalar implementation to SIMD maths with some interleaving of instructions
+void solve1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ PX_UNUSED(cache);
+ PxSolverBody& b0 = *desc.bodyA;
+ PxSolverBody& b1 = *desc.bodyB;
+
+ PxU8* PX_RESTRICT bPtr = desc.constraint;
+ //PxU32 length = desc.constraintLength;
+
+ const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
+ SolverConstraint1D* PX_RESTRICT base = reinterpret_cast<SolverConstraint1D*>(bPtr + sizeof(SolverConstraint1DHeader));
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V linVel1 = V3LoadA(b1.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+ Vec3V angState1 = V3LoadA(b1.angularState);
+
+ const FloatV invMass0 = FLoad(header->invMass0D0);
+ const FloatV invMass1 = FLoad(header->invMass1D1);
+ const FloatV invInertiaScale0 = FLoad(header->angularInvMassScale0);
+ const FloatV invInertiaScale1 = FLoad(header->angularInvMassScale1);
+
+
+ for(PxU32 i=0; i<header->count;++i, base++)
+ {
+ Ps::prefetchLine(base+1);
+ SolverConstraint1D& c = *base;
+
+ const Vec3V clinVel0 = V3LoadA(c.lin0);
+ const Vec3V clinVel1 = V3LoadA(c.lin1);
+ const Vec3V cangVel0 = V3LoadA(c.ang0);
+ const Vec3V cangVel1 = V3LoadA(c.ang1);
+
+ const FloatV constant = FLoad(c.constant);
+ const FloatV vMul = FLoad(c.velMultiplier);
+ const FloatV iMul = FLoad(c.impulseMultiplier);
+ const FloatV appliedForce = FLoad(c.appliedForce);
+ //const FloatV targetVel = FLoad(c.targetVelocity);
+
+ const FloatV maxImpulse = FLoad(c.maxImpulse);
+ const FloatV minImpulse = FLoad(c.minImpulse);
+
+ const Vec3V v0 = V3MulAdd(linVel0, clinVel0, V3Mul(angState0, cangVel0));
+ const Vec3V v1 = V3MulAdd(linVel1, clinVel1, V3Mul(angState1, cangVel1));
+
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+ const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant));
+ const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
+ const FloatV deltaF = FSub(clampedForce, appliedForce);
+
+ FStore(clampedForce, &c.appliedForce);
+ linVel0 = V3ScaleAdd(clinVel0, FMul(deltaF, invMass0), linVel0);
+ linVel1 = V3NegScaleSub(clinVel1, FMul(deltaF, invMass1), linVel1);
+ angState0 = V3ScaleAdd(cangVel0, FMul(deltaF, invInertiaScale0), angState0);
+ //This should be negScaleSub but invInertiaScale1 is negated already
+ angState1 = V3ScaleAdd(cangVel1, FMul(deltaF, invInertiaScale1), angState1);
+
+ }
+
+ V3StoreA(linVel0, b0.linearVelocity);
+ V3StoreA(angState0, b0.angularState);
+ V3StoreA(linVel1, b1.linearVelocity);
+ V3StoreA(angState1, b1.angularState);
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+ PX_ASSERT(b1.linearVelocity.isFinite());
+ PX_ASSERT(b1.angularState.isFinite());
+}
+
+void conclude1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+ PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader);
+ PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+
+ for(PxU32 i=0; i<header->count; i++)
+ {
+ SolverConstraint1D& c = *reinterpret_cast<SolverConstraint1D*>(base);
+
+ c.constant = c.unbiasedConstant;
+
+ base += stride;
+ }
+ PX_ASSERT(desc.constraint + getConstraintLength(desc) == base);
+}
+
+// ==============================================================
+
+void solveContact(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ PxSolverBody& b0 = *desc.bodyA;
+ PxSolverBody& b1 = *desc.bodyB;
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V linVel1 = V3LoadA(b1.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+ Vec3V angState1 = V3LoadA(b1.angularState);
+
+ const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ while(currPtr < last)
+ {
+ SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+ currPtr += sizeof(SolverContactHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
+ Ps::prefetchLine(contacts);
+ currPtr += numNormalConstr * sizeof(SolverContactPoint);
+
+ PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr);
+ currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+ SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFriction);
+
+ const FloatV invMassA = FLoad(hdr->invMass0);
+ const FloatV invMassB = FLoad(hdr->invMass1);
+
+ const FloatV angDom0 = FLoad(hdr->angDom0);
+ const FloatV angDom1 = FLoad(hdr->angDom1);
+
+ const Vec3V contactNormal = hdr->normal;
+
+ const FloatV accumulatedNormalImpulse = solveDynamicContacts(contacts, numNormalConstr, contactNormal, invMassA, invMassB,
+ angDom0, angDom1, linVel0, angState0, linVel1, angState1, forceBuffer);
+
+ if(cache.doFriction && numFrictionConstr)
+ {
+ const FloatV staticFrictionCof = hdr->getStaticFriction();
+ const FloatV dynamicFrictionCof = hdr->getDynamicFriction();
+ const FloatV maxFrictionImpulse = FMul(staticFrictionCof, accumulatedNormalImpulse);
+ const FloatV maxDynFrictionImpulse = FMul(dynamicFrictionCof, accumulatedNormalImpulse);
+ const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+
+ BoolV broken = BFFFF();
+
+ if(cache.writeBackIteration)
+ Ps::prefetchLine(hdr->frictionBrokenWritebackByte);
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFriction& f = frictions[i];
+ Ps::prefetchLine(&frictions[i],128);
+
+
+ const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+ const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+ const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+ const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+ const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+ const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW);
+
+ const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+ const FloatV bias = V4GetW(rbXnXYZ_biasW);
+ const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ const Vec3V delLinVel0 = V3Scale(normal, invMassA);
+ const Vec3V delLinVel1 = V3Scale(normal, invMassB);
+
+ const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn));
+ const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angState1, rbXn));
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+
+
+ // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+ const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);
+
+ // Algorithm:
+ // if abs(appliedForce + deltaF) > maxFrictionImpulse
+ // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+ // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+ // set broken flag to true || broken flag
+
+ // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+ // FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+ const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+ // On XBox this clamping code uses the vector simple pipe rather than vector float,
+ // which eliminates a lot of stall cycles
+
+ const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse);
+
+ const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse));
+
+ const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse);
+
+ broken = BOr(broken, clamp);
+
+ FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+ // we could get rid of the stall here by calculating and clamping delta separately, but
+ // the complexity isn't really worth it.
+
+ linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+ linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
+ angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+ angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1);
+
+ f.setAppliedForce(newAppliedForce);
+
+
+ }
+ Store_From_BoolV(broken, &hdr->broken);
+ }
+
+ }
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+ PX_ASSERT(b1.linearVelocity.isFinite());
+ PX_ASSERT(b1.angularState.isFinite());
+
+ // Write back
+ V3StoreU(linVel0, b0.linearVelocity);
+ V3StoreU(linVel1, b1.linearVelocity);
+ V3StoreU(angState0, b0.angularState);
+ V3StoreU(angState1, b1.angularState);
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+ PX_ASSERT(b1.linearVelocity.isFinite());
+ PX_ASSERT(b1.angularState.isFinite());
+
+ PX_ASSERT(currPtr == last);
+}
+
+void solveContact_BStatic(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ PxSolverBody& b0 = *desc.bodyA;
+ //PxSolverBody& b1 = *desc.bodyB;
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+
+ const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ while(currPtr < last)
+ {
+ SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+ currPtr += sizeof(SolverContactHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
+ //Ps::prefetchLine(contacts);
+ currPtr += numNormalConstr * sizeof(SolverContactPoint);
+
+ PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr);
+ currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+ SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFriction);
+
+
+
+ const FloatV invMassA = FLoad(hdr->invMass0);
+
+ const Vec3V contactNormal = hdr->normal;
+ const FloatV angDom0 = FLoad(hdr->angDom0);
+
+
+ const FloatV accumulatedNormalImpulse = solveStaticContacts(contacts, numNormalConstr, contactNormal,
+ invMassA, angDom0, linVel0, angState0, forceBuffer);
+
+ if(cache.doFriction && numFrictionConstr)
+ {
+ const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse);
+ const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse);
+
+ BoolV broken = BFFFF();
+ if(cache.writeBackIteration)
+ Ps::prefetchLine(hdr->frictionBrokenWritebackByte);
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFriction& f = frictions[i];
+ Ps::prefetchLine(&frictions[i],128);
+
+
+ const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+ const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+ const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+ const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+ const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+
+ const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+ const FloatV bias = V4GetW(rbXnXYZ_biasW);
+ const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+
+ const Vec3V delLinVel0 = V3Scale(normal, invMassA);
+ //const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse);
+
+ const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn));
+ const FloatV normalVel = V3SumElems(v0);
+
+
+ // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+ const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);
+
+ // Algorithm:
+ // if abs(appliedForce + deltaF) > maxFrictionImpulse
+ // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+ // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+ // set broken flag to true || broken flag
+
+ // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+ // FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+ const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+ // On XBox this clamping code uses the vector simple pipe rather than vector float,
+ // which eliminates a lot of stall cycles
+
+ const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse);
+
+ const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse));
+
+ broken = BOr(broken, clamp);
+
+ const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse);
+
+ FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+ // we could get rid of the stall here by calculating and clamping delta separately, but
+ // the complexity isn't really worth it.
+
+ linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+ angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+
+ f.setAppliedForce(newAppliedForce);
+
+ }
+ Store_From_BoolV(broken, &hdr->broken);
+ }
+
+ }
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+
+ // Write back
+ V3StoreA(linVel0, b0.linearVelocity);
+ V3StoreA(angState0, b0.angularState);
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+
+ PX_ASSERT(currPtr == last);
+}
+
+
+void concludeContact(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxU8* PX_RESTRICT cPtr = desc.constraint;
+
+ const FloatV zero = FZero();
+
+ PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+ while(cPtr < last)
+ {
+ const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr);
+ cPtr += sizeof(SolverContactHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ //if(cPtr < last)
+ //Ps::prefetchLine(cPtr, 512);
+ Ps::prefetchLine(cPtr,128);
+ Ps::prefetchLine(cPtr,256);
+ Ps::prefetchLine(cPtr,384);
+
+ const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
+ : sizeof(SolverContactPoint);
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ SolverContactPoint *c = reinterpret_cast<SolverContactPoint*>(cPtr);
+ cPtr += pointStride;
+ //c->scaledBias = PxMin(c->scaledBias, 0.f);
+ c->biasedErr = c->unbiasedErr;
+ }
+
+ cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); //Jump over force buffers
+
+ const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt)
+ : sizeof(SolverContactFriction);
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFriction *f = reinterpret_cast<SolverContactFriction*>(cPtr);
+ cPtr += frictionStride;
+ f->setBias(zero);
+ }
+ }
+ PX_ASSERT(cPtr == last);
+}
+
+void writeBackContact(const PxSolverConstraintDesc& desc, SolverContext& cache,
+ PxSolverBodyData& bd0, PxSolverBodyData& bd1)
+{
+
+ PxReal normalForce = 0;
+
+ PxU8* PX_RESTRICT cPtr = desc.constraint;
+ PxReal* PX_RESTRICT vForceWriteback = reinterpret_cast<PxReal*>(desc.writeBack);
+ PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+ bool forceThreshold = false;
+
+ while(cPtr < last)
+ {
+ const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr);
+ cPtr += sizeof(SolverContactHeader);
+
+ forceThreshold = hdr->flags & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ //if(cPtr < last)
+ Ps::prefetchLine(cPtr, 256);
+ Ps::prefetchLine(cPtr, 384);
+
+ const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
+ : sizeof(SolverContactPoint);
+
+ cPtr += pointStride * numNormalConstr;
+ PxF32* forceBuffer = reinterpret_cast<PxF32*>(cPtr);
+ cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+ if(vForceWriteback!=NULL)
+ {
+ for(PxU32 i=0; i<numNormalConstr; i++)
+ {
+ PxReal appliedForce = forceBuffer[i];
+ *vForceWriteback++ = appliedForce;
+ normalForce += appliedForce;
+ }
+ }
+
+ const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt)
+ : sizeof(SolverContactFriction);
+
+ if(hdr->broken && hdr->frictionBrokenWritebackByte != NULL)
+ {
+ *hdr->frictionBrokenWritebackByte = 1;
+ }
+
+ cPtr += frictionStride * numFrictionConstr;
+
+ }
+ PX_ASSERT(cPtr == last);
+
+
+
+ if(forceThreshold && desc.linkIndexA == PxSolverConstraintDesc::NO_LINK && desc.linkIndexB == PxSolverConstraintDesc::NO_LINK &&
+ normalForce !=0 && (bd0.reportThreshold < PX_MAX_REAL || bd1.reportThreshold < PX_MAX_REAL))
+ {
+ ThresholdStreamElement elt;
+ elt.normalForce = normalForce;
+ elt.threshold = PxMin<float>(bd0.reportThreshold, bd1.reportThreshold);
+ elt.nodeIndexA = bd0.nodeIndex;
+ elt.nodeIndexB = bd1.nodeIndex;
+ elt.shapeInteraction = reinterpret_cast<const SolverContactHeader*>(desc.constraint)->shapeInteraction;
+ Ps::order(elt.nodeIndexA, elt.nodeIndexB);
+ PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
+ PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
+ cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
+ }
+}
+
+// adjust from CoM to joint
+
+void writeBack1D(const PxSolverConstraintDesc& desc, SolverContext&, PxSolverBodyData&, PxSolverBodyData&)
+{
+ ConstraintWriteback* writeback = reinterpret_cast<ConstraintWriteback*>(desc.writeBack);
+ if(writeback)
+ {
+ SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+ PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader);
+ PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+
+ PxVec3 lin(0), ang(0);
+ for(PxU32 i=0; i<header->count; i++)
+ {
+ const SolverConstraint1D* c = reinterpret_cast<SolverConstraint1D*>(base);
+ if(c->flags & DY_SC_FLAG_OUTPUT_FORCE)
+ {
+ lin += c->lin0 * c->appliedForce;
+ ang += c->ang0Writeback * c->appliedForce;
+ }
+ base += stride;
+ }
+
+ ang -= header->body0WorldOffset.cross(lin);
+ writeback->linearImpulse = lin;
+ writeback->angularImpulse = ang;
+ writeback->broken = header->breakable ? PxU32(lin.magnitude()>header->linBreakImpulse || ang.magnitude()>header->angBreakImpulse) : 0;
+
+ PX_ASSERT(desc.constraint + getConstraintLength(desc) == base);
+ }
+}
+
+
+void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solve1D(desc[a-1], cache);
+ }
+ solve1D(desc[constraintCount-1], cache);
+}
+
+void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solve1D(desc[a-1], cache);
+ conclude1D(desc[a-1], cache);
+ }
+ solve1D(desc[constraintCount-1], cache);
+ conclude1D(desc[constraintCount-1], cache);
+}
+
+void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+ solve1D(desc[a-1], cache);
+ writeBack1D(desc[a-1], cache, bd0, bd1);
+ }
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+ solve1D(desc[constraintCount-1], cache);
+ writeBack1D(desc[constraintCount-1], cache, bd0, bd1);
+}
+
+void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+ writeBack1D(desc[a-1], cache, bd0, bd1);
+ }
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+ writeBack1D(desc[constraintCount-1], cache, bd0, bd1);
+}
+
+void solveContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solveContact(desc[a-1], cache);
+ }
+ solveContact(desc[constraintCount-1], cache);
+}
+
+void solveContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solveContact(desc[a-1], cache);
+ concludeContact(desc[a-1], cache);
+ }
+ solveContact(desc[constraintCount-1], cache);
+ concludeContact(desc[constraintCount-1], cache);
+}
+
+void solveContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+ solveContact(desc[a-1], cache);
+ writeBackContact(desc[a-1], cache, bd0, bd1);
+ }
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+ solveContact(desc[constraintCount-1], cache);
+ writeBackContact(desc[constraintCount-1], cache, bd0, bd1);
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveContact_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solveContact_BStatic(desc[a-1], cache);
+ }
+ solveContact_BStatic(desc[constraintCount-1], cache);
+}
+
+void solveContact_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solveContact_BStatic(desc[a-1], cache);
+ concludeContact(desc[a-1], cache);
+ }
+ solveContact_BStatic(desc[constraintCount-1], cache);
+ concludeContact(desc[constraintCount-1], cache);
+}
+
+void solveContact_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+ solveContact_BStatic(desc[a-1], cache);
+ writeBackContact(desc[a-1], cache, bd0, bd1);
+ }
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+ solveContact_BStatic(desc[constraintCount-1], cache);
+ writeBackContact(desc[constraintCount-1], cache, bd0, bd1);
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Not enough space to write 4 more thresholds back!
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+//Port of scalar implementation to SIMD maths with some interleaving of instructions
+void solveExt1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxU8* PX_RESTRICT bPtr = desc.constraint;
+ //PxU32 length = desc.constraintLength;
+
+ const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
+ SolverConstraint1DExt* PX_RESTRICT base = reinterpret_cast<SolverConstraint1DExt*>(bPtr + sizeof(SolverConstraint1DHeader));
+
+ Vec3V linVel0, angVel0, linVel1, angVel1;
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel0 = V3LoadA(desc.bodyA->linearVelocity);
+ angVel0 = V3LoadA(desc.bodyA->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA);
+ linVel0 = v.linear;
+ angVel0 = v.angular;
+ }
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel1 = V3LoadA(desc.bodyB->linearVelocity);
+ angVel1 = V3LoadA(desc.bodyB->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB);
+ linVel1 = v.linear;
+ angVel1 = v.angular;
+ }
+
+ Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
+
+ for(PxU32 i=0; i<header->count;++i, base++)
+ {
+ Ps::prefetchLine(base+1);
+
+ const Vec4V lin0XYZ_constantW = V4LoadA(&base->lin0.x);
+ const Vec4V lin1XYZ_unbiasedConstantW = V4LoadA(&base->lin1.x);
+ const Vec4V ang0XYZ_velMultiplierW = V4LoadA(&base->ang0.x);
+ const Vec4V ang1XYZ_impulseMultiplierW = V4LoadA(&base->ang1.x);
+ const Vec4V minImpulseX_maxImpulseY_appliedForceZ = V4LoadA(&base->minImpulse);
+
+ const Vec3V lin0 = Vec3V_From_Vec4V(lin0XYZ_constantW); FloatV constant = V4GetW(lin0XYZ_constantW);
+ const Vec3V lin1 = Vec3V_From_Vec4V(lin1XYZ_unbiasedConstantW);
+ const Vec3V ang0 = Vec3V_From_Vec4V(ang0XYZ_velMultiplierW); FloatV vMul = V4GetW(ang0XYZ_velMultiplierW);
+ const Vec3V ang1 = Vec3V_From_Vec4V(ang1XYZ_impulseMultiplierW); FloatV iMul = V4GetW(ang1XYZ_impulseMultiplierW);
+
+ const FloatV minImpulse = V4GetX(minImpulseX_maxImpulseY_appliedForceZ);
+ const FloatV maxImpulse = V4GetY(minImpulseX_maxImpulseY_appliedForceZ);
+ const FloatV appliedForce = V4GetZ(minImpulseX_maxImpulseY_appliedForceZ);
+
+ const Vec3V v0 = V3MulAdd(linVel0, lin0, V3Mul(angVel0, ang0));
+ const Vec3V v1 = V3MulAdd(linVel1, lin1, V3Mul(angVel1, ang1));
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+ const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant));
+ const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
+ const FloatV deltaF = FSub(clampedForce, appliedForce);
+
+ FStore(clampedForce, &base->appliedForce);
+ li0 = V3ScaleAdd(lin0, deltaF, li0); ai0 = V3ScaleAdd(ang0, deltaF, ai0);
+ li1 = V3ScaleAdd(lin1, deltaF, li1); ai1 = V3ScaleAdd(ang1, deltaF, ai1);
+
+ linVel0 = V3ScaleAdd(base->deltaVA.linear, deltaF, linVel0); angVel0 = V3ScaleAdd(base->deltaVA.angular, deltaF, angVel0);
+ linVel1 = V3ScaleAdd(base->deltaVB.linear, deltaF, linVel1); angVel1 = V3ScaleAdd(base->deltaVB.angular, deltaF, angVel1);
+ }
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel0, desc.bodyA->linearVelocity);
+ V3StoreA(angVel0, desc.bodyA->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, V3Scale(li0, FLoad(header->linearInvMassScale0)),
+ V3Scale(ai0, FLoad(header->angularInvMassScale0)));
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel1, desc.bodyB->linearVelocity);
+ V3StoreA(angVel1, desc.bodyB->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, V3Scale(li1, FLoad(header->linearInvMassScale1)),
+ V3Scale(ai1, FLoad(header->angularInvMassScale1)));
+}
+
+void solveExtContact(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ Vec3V linVel0, angVel0, linVel1, angVel1;
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel0 = V3LoadA(desc.bodyA->linearVelocity);
+ angVel0 = V3LoadA(desc.bodyA->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA);
+ linVel0 = v.linear;
+ angVel0 = v.angular;
+ }
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel1 = V3LoadA(desc.bodyB->linearVelocity);
+ angVel1 = V3LoadA(desc.bodyB->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB);
+ linVel1 = v.linear;
+ angVel1 = v.angular;
+ }
+
+ const PxU8* PX_RESTRICT last = desc.constraint + desc.constraintLengthOver16*16;
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero();
+
+ while(currPtr < last)
+ {
+ SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+ currPtr += sizeof(SolverContactHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ SolverContactPointExt* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointExt*>(currPtr);
+ Ps::prefetchLine(contacts);
+ currPtr += numNormalConstr * sizeof(SolverContactPointExt);
+
+ PxF32* appliedForceBuffer = reinterpret_cast<PxF32*>(currPtr);
+ currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+ SolverContactFrictionExt* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionExt*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFrictionExt);
+
+
+
+ Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
+
+ const Vec3V contactNormal = hdr->normal;
+
+ const FloatV accumulatedNormalImpulse = solveExtContacts(contacts, numNormalConstr, contactNormal, linVel0, angVel0, linVel1,
+ angVel1, li0, ai0, li1, ai1, appliedForceBuffer);
+
+
+ if(cache.doFriction && numFrictionConstr)
+ {
+ Ps::prefetchLine(frictions);
+ const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse);
+ const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse);
+
+ BoolV broken = BFFFF();
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFrictionExt& f = frictions[i];
+ Ps::prefetchLine(&frictions[i+1]);
+
+ const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+ const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+ const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+ const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+ /*const Vec3V normal0 = V3Scale(normal, sqrtInvMass0);
+ const Vec3V normal1 = V3Scale(normal, sqrtInvMass1);*/
+ const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+ const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW);
+
+ const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+ const FloatV bias = V4GetW(rbXnXYZ_biasW);
+ const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+ const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse);
+
+ const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angVel0, raXn));
+ const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angVel1, rbXn));
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+ // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+ const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);
+
+ // Algorithm:
+ // if abs(appliedForce + deltaF) > maxFrictionImpulse
+ // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+ // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+ // set broken flag to true || broken flag
+
+ // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+ // FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+ const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+ // On XBox this clamping code uses the vector simple pipe rather than vector float,
+ // which eliminates a lot of stall cycles
+
+ const BoolV clampLow = FIsGrtr(negMaxFrictionImpulse, totalImpulse);
+ const BoolV clampHigh = FIsGrtr(totalImpulse, maxFrictionImpulse);
+
+ const FloatV totalClampedLow = FMax(negMaxDynFrictionImpulse, totalImpulse);
+ const FloatV totalClampedHigh = FMin(maxDynFrictionImpulse, totalImpulse);
+
+ const FloatV newAppliedForce = FSel(clampLow, totalClampedLow,
+ FSel(clampHigh, totalClampedHigh, totalImpulse));
+
+ broken = BOr(broken, BOr(clampLow, clampHigh));
+
+ FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+ linVel0 = V3ScaleAdd(f.linDeltaVA, deltaF, linVel0);
+ angVel0 = V3ScaleAdd(f.angDeltaVA, deltaF, angVel0);
+ linVel1 = V3ScaleAdd(f.linDeltaVB, deltaF, linVel1);
+ angVel1 = V3ScaleAdd(f.angDeltaVB, deltaF, angVel1);
+
+ li0 = V3ScaleAdd(normal, deltaF, li0); ai0 = V3ScaleAdd(raXn, deltaF, ai0);
+ li1 = V3ScaleAdd(normal, deltaF, li1); ai1 = V3ScaleAdd(rbXn, deltaF, ai1);
+
+ f.setAppliedForce(newAppliedForce);
+ }
+ Store_From_BoolV(broken, &hdr->broken);
+ }
+
+ linImpulse0 = V3ScaleAdd(li0, hdr->getDominance0(), linImpulse0);
+ angImpulse0 = V3ScaleAdd(ai0, FLoad(hdr->angDom0), angImpulse0);
+ linImpulse1 = V3NegScaleSub(li1, hdr->getDominance1(), linImpulse1);
+ angImpulse1 = V3NegScaleSub(ai1, FLoad(hdr->angDom1), angImpulse1);
+ }
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel0, desc.bodyA->linearVelocity);
+ V3StoreA(angVel0, desc.bodyA->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, linImpulse0, angImpulse0);
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel1, desc.bodyB->linearVelocity);
+ V3StoreA(angVel1, desc.bodyB->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, linImpulse1, angImpulse1);
+
+ PX_ASSERT(currPtr == last);
+}
+
+
+void solveExtContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtContact(desc[a], cache);
+ }
+}
+
+void solveExtContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtContact(desc[a], cache);
+ concludeContact(desc[a], cache);
+ }
+}
+
+void solveExtContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+
+ solveExtContact(desc[a], cache);
+ writeBackContact(desc[a], cache, bd0, bd1);
+ }
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Not enough space to write 4 more thresholds back!
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveExt1DBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExt1D(desc[a], cache);
+ }
+}
+
+void solveExt1DConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExt1D(desc[a], cache);
+ conclude1D(desc[a], cache);
+ }
+}
+
+void solveExt1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+ solveExt1D(desc[a], cache);
+ writeBack1D(desc[a], cache, bd0, bd1);
+ }
+}
+
+void ext1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+ writeBack1D(desc[a], cache, bd0, bd1);
+ }
+}
+
+void solveConcludeExtContact (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveExtContact(desc, cache);
+ concludeContact(desc, cache);
+}
+
+void solveConcludeExt1D (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveExt1D(desc, cache);
+ conclude1D(desc, cache);
+}
+
+
+void solveConclude1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solve1D(desc, cache);
+ conclude1D(desc, cache);
+}
+
+void solveConcludeContact (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveContact(desc, cache);
+ concludeContact(desc, cache);
+}
+
+void solveConcludeContact_BStatic (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveContact_BStatic(desc, cache);
+ concludeContact(desc, cache);
+}
+
+
+}
+
+}
+
+#endif
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsBlock.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsBlock.cpp
new file mode 100644
index 00000000..aa06dfcf
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsBlock.cpp
@@ -0,0 +1,1230 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+#include "PsFPU.h"
+
+#ifdef PX_SUPPORT_SIMD
+
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverContact.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraintDesc.h"
+#include "DyThresholdTable.h"
+#include "DySolverContext.h"
+#include "PsUtilities.h"
+#include "DyConstraint.h"
+#include "PsAtomic.h"
+#include "DySolverContact4.h"
+#include "DySolverConstraint1D4.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+static void solveContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& cache)
+{
+ PxSolverBody& b00 = *desc[0].bodyA;
+ PxSolverBody& b01 = *desc[0].bodyB;
+ PxSolverBody& b10 = *desc[1].bodyA;
+ PxSolverBody& b11 = *desc[1].bodyB;
+ PxSolverBody& b20 = *desc[2].bodyA;
+ PxSolverBody& b21 = *desc[2].bodyB;
+ PxSolverBody& b30 = *desc[3].bodyA;
+ PxSolverBody& b31 = *desc[3].bodyB;
+
+ //We'll need this.
+ const Vec4V vZero = V4Zero();
+
+ Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
+ Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x);
+ Vec4V angState00 = V4LoadA(&b00.angularState.x);
+ Vec4V angState01 = V4LoadA(&b01.angularState.x);
+
+ Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
+ Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x);
+ Vec4V angState10 = V4LoadA(&b10.angularState.x);
+ Vec4V angState11 = V4LoadA(&b11.angularState.x);
+
+ Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
+ Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x);
+ Vec4V angState20 = V4LoadA(&b20.angularState.x);
+ Vec4V angState21 = V4LoadA(&b21.angularState.x);
+
+ Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
+ Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x);
+ Vec4V angState30 = V4LoadA(&b30.angularState.x);
+ Vec4V angState31 = V4LoadA(&b31.angularState.x);
+
+
+ Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
+ Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3;
+ Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
+ Vec4V angState1T0, angState1T1, angState1T2, angState1T3;
+
+
+ PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
+ PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3);
+ PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
+ PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3);
+
+
+ const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc[0].constraint;
+
+ Vec4V vMax = V4Splat(FMax());
+
+ const PxU8* PX_RESTRICT prefetchAddress = currPtr + sizeof(SolverContactHeader4) + sizeof(SolverContactBatchPointDynamic4);
+
+ const SolverContactHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader4*>(currPtr);
+
+ const Vec4V invMassA = hdr->invMass0D0;
+ const Vec4V invMassB = hdr->invMass1D1;
+
+ const Vec4V sumInvMass = V4Add(invMassA, invMassB);
+
+
+ while(currPtr < last)
+ {
+
+ hdr = reinterpret_cast<const SolverContactHeader4*>(currPtr);
+
+ PX_ASSERT(hdr->type == DY_SC_TYPE_BLOCK_RB_CONTACT);
+
+ currPtr = reinterpret_cast<PxU8*>(const_cast<SolverContactHeader4*>(hdr) + 1);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0;
+
+ Vec4V* appliedForces = reinterpret_cast<Vec4V*>(currPtr);
+ currPtr += sizeof(Vec4V)*numNormalConstr;
+
+ SolverContactBatchPointDynamic4* PX_RESTRICT contacts = reinterpret_cast<SolverContactBatchPointDynamic4*>(currPtr);
+
+ Vec4V* maxImpulses;
+ currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr);
+ PxU32 maxImpulseMask = 0;
+ if(hasMaxImpulse)
+ {
+ maxImpulseMask = 0xFFFFFFFF;
+ maxImpulses = reinterpret_cast<Vec4V*>(currPtr);
+ currPtr += sizeof(Vec4V) * numNormalConstr;
+ }
+ else
+ {
+ maxImpulses = &vMax;
+ }
+
+
+ SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr);
+ if(numFrictionConstr)
+ currPtr += sizeof(SolverFrictionSharedData4);
+
+ Vec4V* frictionAppliedForce = reinterpret_cast<Vec4V*>(currPtr);
+ currPtr += sizeof(Vec4V)*numFrictionConstr;
+
+ const SolverContactFrictionDynamic4* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionDynamic4*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFrictionDynamic4);
+
+ Vec4V accumulatedNormalImpulse = vZero;
+
+ const Vec4V angD0 = hdr->angDom0;
+ const Vec4V angD1 = hdr->angDom1;
+
+ const Vec4V _normalT0 = hdr->normalX;
+ const Vec4V _normalT1 = hdr->normalY;
+ const Vec4V _normalT2 = hdr->normalZ;
+
+ Vec4V contactNormalVel1 = V4Mul(linVel0T0, _normalT0);
+ Vec4V contactNormalVel3 = V4Mul(linVel1T0, _normalT0);
+ contactNormalVel1 = V4MulAdd(linVel0T1, _normalT1, contactNormalVel1);
+ contactNormalVel3 = V4MulAdd(linVel1T1, _normalT1, contactNormalVel3);
+ contactNormalVel1 = V4MulAdd(linVel0T2, _normalT2, contactNormalVel1);
+ contactNormalVel3 = V4MulAdd(linVel1T2, _normalT2, contactNormalVel3);
+
+ Vec4V relVel1 = V4Sub(contactNormalVel1, contactNormalVel3);
+
+ Vec4V accumDeltaF = vZero;
+
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ const SolverContactBatchPointDynamic4& c = contacts[i];
+
+ PxU32 offset = 0;
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ prefetchAddress += offset;
+
+ const Vec4V appliedForce = appliedForces[i];
+ const Vec4V maxImpulse = maxImpulses[i & maxImpulseMask];
+
+ Vec4V contactNormalVel2 = V4Mul(c.raXnX, angState0T0);
+ Vec4V contactNormalVel4 = V4Mul(c.rbXnX, angState1T0);
+
+ contactNormalVel2 = V4MulAdd(c.raXnY, angState0T1, contactNormalVel2);
+ contactNormalVel4 = V4MulAdd(c.rbXnY, angState1T1, contactNormalVel4);
+
+ contactNormalVel2 = V4MulAdd(c.raXnZ, angState0T2, contactNormalVel2);
+ contactNormalVel4 = V4MulAdd(c.rbXnZ, angState1T2, contactNormalVel4);
+
+ const Vec4V normalVel = V4Add(relVel1, V4Sub(contactNormalVel2, contactNormalVel4));
+
+ Vec4V deltaF = V4NegMulSub(normalVel, c.velMultiplier, c.biasedErr);
+
+ deltaF = V4Max(deltaF, V4Neg(appliedForce));
+ const Vec4V newAppliedForce = V4Min(V4Add(appliedForce, deltaF), maxImpulse);
+ deltaF = V4Sub(newAppliedForce, appliedForce);
+
+ accumDeltaF = V4Add(accumDeltaF, deltaF);
+
+ const Vec4V angDetaF0 = V4Mul(deltaF, angD0);
+ const Vec4V angDetaF1 = V4Mul(deltaF, angD1);
+
+ relVel1 = V4MulAdd(sumInvMass, deltaF, relVel1);
+
+ angState0T0 = V4MulAdd(c.raXnX, angDetaF0, angState0T0);
+ angState1T0 = V4NegMulSub(c.rbXnX, angDetaF1, angState1T0);
+
+ angState0T1 = V4MulAdd(c.raXnY, angDetaF0, angState0T1);
+ angState1T1 = V4NegMulSub(c.rbXnY, angDetaF1, angState1T1);
+
+ angState0T2 = V4MulAdd(c.raXnZ, angDetaF0, angState0T2);
+ angState1T2 = V4NegMulSub(c.rbXnZ, angDetaF1, angState1T2);
+
+ appliedForces[i] = newAppliedForce;
+
+ accumulatedNormalImpulse = V4Add(accumulatedNormalImpulse, newAppliedForce);
+ }
+
+ const Vec4V accumDeltaF_IM0 = V4Mul(accumDeltaF, invMassA);
+ const Vec4V accumDeltaF_IM1 = V4Mul(accumDeltaF, invMassB);
+
+ linVel0T0 = V4MulAdd(_normalT0, accumDeltaF_IM0, linVel0T0);
+ linVel1T0 = V4NegMulSub(_normalT0, accumDeltaF_IM1, linVel1T0);
+ linVel0T1 = V4MulAdd(_normalT1, accumDeltaF_IM0, linVel0T1);
+ linVel1T1 = V4NegMulSub(_normalT1, accumDeltaF_IM1, linVel1T1);
+ linVel0T2 = V4MulAdd(_normalT2, accumDeltaF_IM0, linVel0T2);
+ linVel1T2 = V4NegMulSub(_normalT2, accumDeltaF_IM1, linVel1T2);
+
+
+ if(cache.doFriction && numFrictionConstr)
+ {
+ const Vec4V staticFric = hdr->staticFriction;
+ const Vec4V dynamicFric = hdr->dynamicFriction;
+
+ const Vec4V maxFrictionImpulse = V4Mul(staticFric, accumulatedNormalImpulse);
+ const Vec4V maxDynFrictionImpulse = V4Mul(dynamicFric, accumulatedNormalImpulse);
+ const Vec4V negMaxDynFrictionImpulse = V4Neg(maxDynFrictionImpulse);
+ //const Vec4V negMaxFrictionImpulse = V4Neg(maxFrictionImpulse);
+ BoolV broken = BFFFF();
+
+ if(cache.writeBackIteration)
+ {
+ Ps::prefetchLine(fd->frictionBrokenWritebackByte[0]);
+ Ps::prefetchLine(fd->frictionBrokenWritebackByte[1]);
+ Ps::prefetchLine(fd->frictionBrokenWritebackByte[2]);
+ }
+
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ const SolverContactFrictionDynamic4& f = frictions[i];
+
+ PxU32 offset = 0;
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ prefetchAddress += offset;
+
+ const Vec4V appliedForce = frictionAppliedForce[i];
+
+ const Vec4V normalT0 = fd->normalX[i&1];
+ const Vec4V normalT1 = fd->normalY[i&1];
+ const Vec4V normalT2 = fd->normalZ[i&1];
+
+ Vec4V normalVel1 = V4Mul(linVel0T0, normalT0);
+ Vec4V normalVel2 = V4Mul(f.raXnX, angState0T0);
+ Vec4V normalVel3 = V4Mul(linVel1T0, normalT0);
+ Vec4V normalVel4 = V4Mul(f.rbXnX, angState1T0);
+
+ normalVel1 = V4MulAdd(linVel0T1, normalT1, normalVel1);
+ normalVel2 = V4MulAdd(f.raXnY, angState0T1, normalVel2);
+ normalVel3 = V4MulAdd(linVel1T1, normalT1, normalVel3);
+ normalVel4 = V4MulAdd(f.rbXnY, angState1T1, normalVel4);
+
+ normalVel1 = V4MulAdd(linVel0T2, normalT2, normalVel1);
+ normalVel2 = V4MulAdd(f.raXnZ, angState0T2, normalVel2);
+ normalVel3 = V4MulAdd(linVel1T2, normalT2, normalVel3);
+ normalVel4 = V4MulAdd(f.rbXnZ, angState1T2, normalVel4);
+
+ const Vec4V _normalVel = V4Add(normalVel1, normalVel2);
+ const Vec4V __normalVel = V4Add(normalVel3, normalVel4);
+
+ // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+
+ const Vec4V normalVel = V4Sub(_normalVel, __normalVel );
+
+ const Vec4V tmp1 = V4Sub(appliedForce, f.scaledBias);
+
+ const Vec4V totalImpulse = V4NegMulSub(normalVel, f.velMultiplier, tmp1);
+
+ broken = BOr(broken, V4IsGrtr(V4Abs(totalImpulse), maxFrictionImpulse));
+
+ const Vec4V newAppliedForce = V4Sel(broken, V4Min(maxDynFrictionImpulse, V4Max(negMaxDynFrictionImpulse, totalImpulse)), totalImpulse);
+
+ const Vec4V deltaF =V4Sub(newAppliedForce, appliedForce);
+
+ frictionAppliedForce[i] = newAppliedForce;
+
+ const Vec4V deltaFIM0 = V4Mul(deltaF, invMassA);
+ const Vec4V deltaFIM1 = V4Mul(deltaF, invMassB);
+
+ const Vec4V angDetaF0 = V4Mul(deltaF, angD0);
+ const Vec4V angDetaF1 = V4Mul(deltaF, angD1);
+
+ linVel0T0 = V4MulAdd(normalT0, deltaFIM0, linVel0T0);
+ linVel1T0 = V4NegMulSub(normalT0, deltaFIM1, linVel1T0);
+ angState0T0 = V4MulAdd(f.raXnX, angDetaF0, angState0T0);
+ angState1T0 = V4NegMulSub(f.rbXnX, angDetaF1, angState1T0);
+
+ linVel0T1 = V4MulAdd(normalT1, deltaFIM0, linVel0T1);
+ linVel1T1 = V4NegMulSub(normalT1, deltaFIM1, linVel1T1);
+ angState0T1 = V4MulAdd(f.raXnY, angDetaF0, angState0T1);
+ angState1T1 = V4NegMulSub(f.rbXnY, angDetaF1, angState1T1);
+
+ linVel0T2 = V4MulAdd(normalT2, deltaFIM0, linVel0T2);
+ linVel1T2 = V4NegMulSub(normalT2, deltaFIM1, linVel1T2);
+ angState0T2 = V4MulAdd(f.raXnZ, angDetaF0, angState0T2);
+ angState1T2 = V4NegMulSub(f.rbXnZ, angDetaF1, angState1T2);
+ }
+ fd->broken = broken;
+ }
+ }
+
+ PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
+ PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31);
+ PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
+ PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31);
+
+ PX_ASSERT(b00.linearVelocity.isFinite());
+ PX_ASSERT(b00.angularState.isFinite());
+ PX_ASSERT(b10.linearVelocity.isFinite());
+ PX_ASSERT(b10.angularState.isFinite());
+ PX_ASSERT(b20.linearVelocity.isFinite());
+ PX_ASSERT(b20.angularState.isFinite());
+ PX_ASSERT(b30.linearVelocity.isFinite());
+ PX_ASSERT(b30.angularState.isFinite());
+
+ PX_ASSERT(b01.linearVelocity.isFinite());
+ PX_ASSERT(b01.angularState.isFinite());
+ PX_ASSERT(b11.linearVelocity.isFinite());
+ PX_ASSERT(b11.angularState.isFinite());
+ PX_ASSERT(b21.linearVelocity.isFinite());
+ PX_ASSERT(b21.angularState.isFinite());
+ PX_ASSERT(b31.linearVelocity.isFinite());
+ PX_ASSERT(b31.angularState.isFinite());
+
+ // Write back
+ V4StoreA(linVel00, &b00.linearVelocity.x);
+ V4StoreA(angState00, &b00.angularState.x);
+ V4StoreA(linVel10, &b10.linearVelocity.x);
+ V4StoreA(angState10, &b10.angularState.x);
+ V4StoreA(linVel20, &b20.linearVelocity.x);
+ V4StoreA(angState20, &b20.angularState.x);
+ V4StoreA(linVel30, &b30.linearVelocity.x);
+ V4StoreA(angState30, &b30.angularState.x);
+
+ if(desc[0].bodyBDataIndex != 0)
+ {
+ V4StoreA(linVel01, &b01.linearVelocity.x);
+ V4StoreA(angState01, &b01.angularState.x);
+ }
+ if(desc[1].bodyBDataIndex != 0)
+ {
+ V4StoreA(linVel11, &b11.linearVelocity.x);
+ V4StoreA(angState11, &b11.angularState.x);
+ }
+ if(desc[2].bodyBDataIndex != 0)
+ {
+ V4StoreA(linVel21, &b21.linearVelocity.x);
+ V4StoreA(angState21, &b21.angularState.x);
+ }
+ if(desc[3].bodyBDataIndex != 0)
+ {
+ V4StoreA(linVel31, &b31.linearVelocity.x);
+ V4StoreA(angState31, &b31.angularState.x);
+ }
+
+ PX_ASSERT(b00.linearVelocity.isFinite());
+ PX_ASSERT(b00.angularState.isFinite());
+ PX_ASSERT(b10.linearVelocity.isFinite());
+ PX_ASSERT(b10.angularState.isFinite());
+ PX_ASSERT(b20.linearVelocity.isFinite());
+ PX_ASSERT(b20.angularState.isFinite());
+ PX_ASSERT(b30.linearVelocity.isFinite());
+ PX_ASSERT(b30.angularState.isFinite());
+
+ PX_ASSERT(b01.linearVelocity.isFinite());
+ PX_ASSERT(b01.angularState.isFinite());
+ PX_ASSERT(b11.linearVelocity.isFinite());
+ PX_ASSERT(b11.angularState.isFinite());
+ PX_ASSERT(b21.linearVelocity.isFinite());
+ PX_ASSERT(b21.angularState.isFinite());
+ PX_ASSERT(b31.linearVelocity.isFinite());
+ PX_ASSERT(b31.angularState.isFinite());
+}
+
+static void solveContact4_StaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& cache)
+{
+ PxSolverBody& b00 = *desc[0].bodyA;
+ PxSolverBody& b10 = *desc[1].bodyA;
+ PxSolverBody& b20 = *desc[2].bodyA;
+ PxSolverBody& b30 = *desc[3].bodyA;
+
+ const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc[0].constraint;
+
+
+ //We'll need this.
+ const Vec4V vZero = V4Zero();
+ Vec4V vMax = V4Splat(FMax());
+
+ Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
+ Vec4V angState00 = V4LoadA(&b00.angularState.x);
+
+ Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
+ Vec4V angState10 = V4LoadA(&b10.angularState.x);
+
+ Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
+ Vec4V angState20 = V4LoadA(&b20.angularState.x);
+
+ Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
+ Vec4V angState30 = V4LoadA(&b30.angularState.x);
+
+ Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
+ Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
+
+
+ PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
+ PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
+
+ const PxU8* PX_RESTRICT prefetchAddress = currPtr + sizeof(SolverContactHeader4) + sizeof(SolverContactBatchPointBase4);
+
+ const SolverContactHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader4*>(currPtr);
+
+ const Vec4V invMass0 = hdr->invMass0D0;
+
+ while((currPtr < last))
+ {
+ hdr = reinterpret_cast<const SolverContactHeader4*>(currPtr);
+
+ PX_ASSERT(hdr->type == DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT);
+
+ currPtr = const_cast<PxU8*>(reinterpret_cast<const PxU8*>(hdr + 1));
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+ bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0;
+
+ Vec4V* appliedForces = reinterpret_cast<Vec4V*>(currPtr);
+ currPtr += sizeof(Vec4V)*numNormalConstr;
+
+ SolverContactBatchPointBase4* PX_RESTRICT contacts = reinterpret_cast<SolverContactBatchPointBase4*>(currPtr);
+
+ currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr);
+
+ Vec4V* maxImpulses;
+ PxU32 maxImpulseMask;
+ if(hasMaxImpulse)
+ {
+ maxImpulseMask = 0xFFFFFFFF;
+ maxImpulses = reinterpret_cast<Vec4V*>(currPtr);
+ currPtr += sizeof(Vec4V) * numNormalConstr;
+ }
+ else
+ {
+ maxImpulseMask = 0;
+ maxImpulses = &vMax;
+ }
+
+ SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr);
+ if(numFrictionConstr)
+ currPtr += sizeof(SolverFrictionSharedData4);
+
+ Vec4V* frictionAppliedForces = reinterpret_cast<Vec4V*>(currPtr);
+ currPtr += sizeof(Vec4V)*numFrictionConstr;
+
+ const SolverContactFrictionBase4* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionBase4*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFrictionBase4);
+
+
+ Vec4V accumulatedNormalImpulse = vZero;
+
+ const Vec4V angD0 = hdr->angDom0;
+ const Vec4V _normalT0 = hdr->normalX;
+ const Vec4V _normalT1 = hdr->normalY;
+ const Vec4V _normalT2 = hdr->normalZ;
+
+ Vec4V contactNormalVel1 = V4Mul(linVel0T0, _normalT0);
+ contactNormalVel1 = V4MulAdd(linVel0T1, _normalT1, contactNormalVel1);
+
+ contactNormalVel1 = V4MulAdd(linVel0T2, _normalT2, contactNormalVel1);
+
+ Vec4V accumDeltaF = vZero;
+
+
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ const SolverContactBatchPointBase4& c = contacts[i];
+
+ PxU32 offset = 0;
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ prefetchAddress += offset;
+
+ const Vec4V appliedForce = appliedForces[i];
+ const Vec4V maxImpulse = maxImpulses[i&maxImpulseMask];
+ Vec4V contactNormalVel2 = V4MulAdd(c.raXnX, angState0T0, contactNormalVel1);
+ contactNormalVel2 = V4MulAdd(c.raXnY, angState0T1, contactNormalVel2);
+ const Vec4V normalVel = V4MulAdd(c.raXnZ, angState0T2, contactNormalVel2);
+
+ const Vec4V _deltaF = V4Max(V4NegMulSub(normalVel, c.velMultiplier, c.biasedErr), V4Neg(appliedForce));
+
+ Vec4V newAppliedForce(V4Add(appliedForce, _deltaF));
+ newAppliedForce = V4Min(newAppliedForce, maxImpulse);
+ const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
+ const Vec4V angDeltaF = V4Mul(angD0, deltaF);
+
+ accumDeltaF = V4Add(accumDeltaF, deltaF);
+
+ contactNormalVel1 = V4MulAdd(invMass0, deltaF, contactNormalVel1);
+ angState0T0 = V4MulAdd(c.raXnX, angDeltaF, angState0T0);
+ angState0T1 = V4MulAdd(c.raXnY, angDeltaF, angState0T1);
+ angState0T2 = V4MulAdd(c.raXnZ, angDeltaF, angState0T2);
+
+#if 1
+ appliedForces[i] = newAppliedForce;
+#endif
+
+ accumulatedNormalImpulse = V4Add(accumulatedNormalImpulse, newAppliedForce);
+ }
+
+ const Vec4V deltaFInvMass0 = V4Mul(accumDeltaF, invMass0);
+
+ linVel0T0 = V4MulAdd(_normalT0, deltaFInvMass0, linVel0T0);
+ linVel0T1 = V4MulAdd(_normalT1, deltaFInvMass0, linVel0T1);
+ linVel0T2 = V4MulAdd(_normalT2, deltaFInvMass0, linVel0T2);
+
+ if(cache.doFriction && numFrictionConstr)
+ {
+ const Vec4V staticFric = hdr->staticFriction;
+
+ const Vec4V dynamicFric = hdr->dynamicFriction;
+
+ const Vec4V maxFrictionImpulse = V4Mul(staticFric, accumulatedNormalImpulse);
+ const Vec4V maxDynFrictionImpulse = V4Mul(dynamicFric, accumulatedNormalImpulse);
+ const Vec4V negMaxDynFrictionImpulse = V4Neg(maxDynFrictionImpulse);
+
+ BoolV broken = BFFFF();
+
+ if(cache.writeBackIteration)
+ {
+ Ps::prefetchLine(fd->frictionBrokenWritebackByte[0]);
+ Ps::prefetchLine(fd->frictionBrokenWritebackByte[1]);
+ Ps::prefetchLine(fd->frictionBrokenWritebackByte[2]);
+ Ps::prefetchLine(fd->frictionBrokenWritebackByte[3]);
+ }
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ const SolverContactFrictionBase4& f = frictions[i];
+
+ PxU32 offset = 0;
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ Ps::prefetchLine(prefetchAddress, offset += 64);
+ prefetchAddress += offset;
+
+ const Vec4V appliedForce = frictionAppliedForces[i];
+
+ const Vec4V normalT0 = fd->normalX[i&1];
+ const Vec4V normalT1 = fd->normalY[i&1];
+ const Vec4V normalT2 = fd->normalZ[i&1];
+
+ Vec4V normalVel1 = V4Mul(linVel0T0, normalT0);
+ Vec4V normalVel2 = V4Mul(f.raXnX, angState0T0);
+
+ normalVel1 = V4MulAdd(linVel0T1, normalT1, normalVel1);
+ normalVel2 = V4MulAdd(f.raXnY, angState0T1, normalVel2);
+
+ normalVel1 = V4MulAdd(linVel0T2, normalT2, normalVel1);
+ normalVel2 = V4MulAdd(f.raXnZ, angState0T2, normalVel2);
+
+ //relative normal velocity for all 4 constraints
+ const Vec4V normalVel = V4Add(normalVel1, normalVel2);
+
+ // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+ const Vec4V tmp1 = V4Sub(appliedForce, f.scaledBias);
+
+ const Vec4V totalImpulse = V4NegMulSub(normalVel, f.velMultiplier, tmp1);
+
+ broken = BOr(broken, V4IsGrtr(V4Abs(totalImpulse), maxFrictionImpulse));
+
+ const Vec4V newAppliedForce = V4Sel(broken, V4Min(maxDynFrictionImpulse, V4Max(negMaxDynFrictionImpulse, totalImpulse)), totalImpulse);
+
+ const Vec4V deltaF =V4Sub(newAppliedForce, appliedForce);
+
+ const Vec4V deltaFInvMass = V4Mul(invMass0, deltaF);
+ const Vec4V angDeltaF = V4Mul(angD0, deltaF);
+
+ linVel0T0 = V4MulAdd(normalT0, deltaFInvMass, linVel0T0);
+ angState0T0 = V4MulAdd(f.raXnX, angDeltaF, angState0T0);
+
+ linVel0T1 = V4MulAdd(normalT1, deltaFInvMass, linVel0T1);
+ angState0T1 = V4MulAdd(f.raXnY, angDeltaF, angState0T1);
+
+ linVel0T2 = V4MulAdd(normalT2, deltaFInvMass, linVel0T2);
+ angState0T2 = V4MulAdd(f.raXnZ, angDeltaF, angState0T2);
+
+#if 1
+ frictionAppliedForces[i] = newAppliedForce;
+#endif
+
+ }
+
+ fd->broken = broken;
+ }
+ }
+
+ PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
+ PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
+
+ PX_ASSERT(b00.linearVelocity.isFinite());
+ PX_ASSERT(b00.angularState.isFinite());
+ PX_ASSERT(b10.linearVelocity.isFinite());
+ PX_ASSERT(b10.angularState.isFinite());
+ PX_ASSERT(b20.linearVelocity.isFinite());
+ PX_ASSERT(b20.angularState.isFinite());
+ PX_ASSERT(b30.linearVelocity.isFinite());
+ PX_ASSERT(b30.angularState.isFinite());
+
+ // Write back
+ V4StoreA(linVel00, &b00.linearVelocity.x);
+ V4StoreA(linVel10, &b10.linearVelocity.x);
+ V4StoreA(linVel20, &b20.linearVelocity.x);
+ V4StoreA(linVel30, &b30.linearVelocity.x);
+
+ V4StoreA(angState00, &b00.angularState.x);
+ V4StoreA(angState10, &b10.angularState.x);
+ V4StoreA(angState20, &b20.angularState.x);
+ V4StoreA(angState30, &b30.angularState.x);
+
+ PX_ASSERT(b00.linearVelocity.isFinite());
+ PX_ASSERT(b00.angularState.isFinite());
+ PX_ASSERT(b10.linearVelocity.isFinite());
+ PX_ASSERT(b10.angularState.isFinite());
+ PX_ASSERT(b20.linearVelocity.isFinite());
+ PX_ASSERT(b20.angularState.isFinite());
+ PX_ASSERT(b30.linearVelocity.isFinite());
+ PX_ASSERT(b30.angularState.isFinite());
+}
+
+static void concludeContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/, PxU32 contactSize, PxU32 frictionSize)
+{
+ const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc[0].constraint;
+
+ while((currPtr < last))
+ {
+ const SolverContactHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader4*>(currPtr);
+
+ currPtr = const_cast<PxU8*>(reinterpret_cast<const PxU8*>(hdr + 1));
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ currPtr += sizeof(Vec4V)*numNormalConstr;
+
+ SolverContactBatchPointBase4* PX_RESTRICT contacts = reinterpret_cast<SolverContactBatchPointBase4*>(currPtr);
+ currPtr += (numNormalConstr * contactSize);
+ bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0;
+
+ if(hasMaxImpulse)
+ currPtr += sizeof(Vec4V) * numNormalConstr;
+
+ currPtr += sizeof(Vec4V)*numFrictionConstr;
+
+ SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr);
+ if(numFrictionConstr)
+ currPtr += sizeof(SolverFrictionSharedData4);
+ PX_UNUSED(fd);
+
+ SolverContactFrictionBase4* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionBase4*>(currPtr);
+ currPtr += (numFrictionConstr * frictionSize);
+
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ SolverContactBatchPointBase4& c = *contacts;
+ contacts = reinterpret_cast<SolverContactBatchPointBase4*>((reinterpret_cast<PxU8*>(contacts)) + contactSize);
+ c.biasedErr = V4Sub(c.biasedErr, c.scaledBias);
+ }
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFrictionBase4& f = *frictions;
+ frictions = reinterpret_cast<SolverContactFrictionBase4*>((reinterpret_cast<PxU8*>(frictions)) + frictionSize);
+ f.scaledBias = f.targetVelocity;
+ }
+ }
+}
+
+void writeBackContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& cache,
+ const PxSolverBodyData** PX_RESTRICT bd0, const PxSolverBodyData** PX_RESTRICT bd1)
+{
+ const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc[0].constraint;
+ PxReal* PX_RESTRICT vForceWriteback0 = reinterpret_cast<PxReal*>(desc[0].writeBack);
+ PxReal* PX_RESTRICT vForceWriteback1 = reinterpret_cast<PxReal*>(desc[1].writeBack);
+ PxReal* PX_RESTRICT vForceWriteback2 = reinterpret_cast<PxReal*>(desc[2].writeBack);
+ PxReal* PX_RESTRICT vForceWriteback3 = reinterpret_cast<PxReal*>(desc[3].writeBack);
+
+ const PxU8 type = *desc[0].constraint;
+ const PxU32 contactSize = type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContactBatchPointDynamic4) : sizeof(SolverContactBatchPointBase4);
+ const PxU32 frictionSize = type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContactFrictionDynamic4) : sizeof(SolverContactFrictionBase4);
+
+
+ Vec4V normalForce = V4Zero();
+
+
+ //We'll need this.
+ //const Vec4V vZero = V4Zero();
+
+ bool writeBackThresholds[4] = {false, false, false, false};
+
+ while((currPtr < last))
+ {
+ SolverContactHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader4*>(currPtr);
+
+ currPtr = reinterpret_cast<PxU8*>(hdr + 1);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ Vec4V* PX_RESTRICT appliedForces = reinterpret_cast<Vec4V*>(currPtr);
+ currPtr += sizeof(Vec4V)*numNormalConstr;
+
+ //SolverContactBatchPointBase4* PX_RESTRICT contacts = (SolverContactBatchPointBase4*)currPtr;
+ currPtr += (numNormalConstr * contactSize);
+
+ bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0;
+
+ if(hasMaxImpulse)
+ currPtr += sizeof(Vec4V) * numNormalConstr;
+
+ SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr);
+ if(numFrictionConstr)
+ currPtr += sizeof(SolverFrictionSharedData4);
+
+ currPtr += sizeof(Vec4V)*numFrictionConstr;
+
+ //SolverContactFrictionBase4* PX_RESTRICT frictions = (SolverContactFrictionBase4*)currPtr;
+ currPtr += (numFrictionConstr * frictionSize);
+
+ writeBackThresholds[0] = hdr->flags[0] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+ writeBackThresholds[1] = hdr->flags[1] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+ writeBackThresholds[2] = hdr->flags[2] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+ writeBackThresholds[3] = hdr->flags[3] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+
+
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ //contacts = (SolverContactBatchPointBase4*)(((PxU8*)contacts) + contactSize);
+ const FloatV appliedForce0 = V4GetX(appliedForces[i]);
+ const FloatV appliedForce1 = V4GetY(appliedForces[i]);
+ const FloatV appliedForce2 = V4GetZ(appliedForces[i]);
+ const FloatV appliedForce3 = V4GetW(appliedForces[i]);
+
+ normalForce = V4Add(normalForce, appliedForces[i]);
+
+ if(vForceWriteback0 && i < hdr->numNormalConstr0)
+ FStore(appliedForce0, vForceWriteback0++);
+ if(vForceWriteback1 && i < hdr->numNormalConstr1)
+ FStore(appliedForce1, vForceWriteback1++);
+ if(vForceWriteback2 && i < hdr->numNormalConstr2)
+ FStore(appliedForce2, vForceWriteback2++);
+ if(vForceWriteback3 && i < hdr->numNormalConstr3)
+ FStore(appliedForce3, vForceWriteback3++);
+ }
+
+ if(numFrictionConstr)
+ {
+ PX_ALIGN(16, PxU32 broken[4]);
+ BStoreA(fd->broken, broken);
+
+ PxU8* frictionCounts = &hdr->numFrictionConstr0;
+
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ if(frictionCounts[a] && broken[a])
+ *fd->frictionBrokenWritebackByte[a] = 1; // PT: bad L2 miss here
+ }
+ }
+ }
+
+ PX_ALIGN(16, PxReal nf[4]);
+ V4StoreA(normalForce, nf);
+
+ Sc::ShapeInteraction** shapeInteractions = reinterpret_cast<SolverContactHeader4*>(desc[0].constraint)->shapeInteraction;
+
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ if(writeBackThresholds[a] && desc[a].linkIndexA == PxSolverConstraintDesc::NO_LINK && desc[a].linkIndexB == PxSolverConstraintDesc::NO_LINK &&
+ nf[a] !=0.f && (bd0[a]->reportThreshold < PX_MAX_REAL || bd1[a]->reportThreshold < PX_MAX_REAL))
+ {
+ ThresholdStreamElement elt;
+ elt.normalForce = nf[a];
+ elt.threshold = PxMin<float>(bd0[a]->reportThreshold, bd1[a]->reportThreshold);
+ elt.nodeIndexA = bd0[a]->nodeIndex;
+ elt.nodeIndexB = bd1[a]->nodeIndex;
+ elt.shapeInteraction = shapeInteractions[a];
+ Ps::order(elt.nodeIndexA, elt.nodeIndexB);
+ PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
+ PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
+ cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
+ }
+ }
+}
+
+static void solve1D4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
+{
+
+ PxSolverBody& b00 = *desc[0].bodyA;
+ PxSolverBody& b01 = *desc[0].bodyB;
+
+ PxSolverBody& b10 = *desc[1].bodyA;
+ PxSolverBody& b11 = *desc[1].bodyB;
+
+ PxSolverBody& b20 = *desc[2].bodyA;
+ PxSolverBody& b21 = *desc[2].bodyB;
+
+ PxSolverBody& b30 = *desc[3].bodyA;
+ PxSolverBody& b31 = *desc[3].bodyB;
+
+ PxU8* PX_RESTRICT bPtr = desc[0].constraint;
+ //PxU32 length = desc.constraintLength;
+
+ SolverConstraint1DHeader4* PX_RESTRICT header = reinterpret_cast<SolverConstraint1DHeader4*>(bPtr);
+ SolverConstraint1DDynamic4* PX_RESTRICT base = reinterpret_cast<SolverConstraint1DDynamic4*>(header+1);
+
+ //const FloatV fZero = FZero();
+ Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
+ Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x);
+ Vec4V angState00 = V4LoadA(&b00.angularState.x);
+ Vec4V angState01 = V4LoadA(&b01.angularState.x);
+
+ Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
+ Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x);
+ Vec4V angState10 = V4LoadA(&b10.angularState.x);
+ Vec4V angState11 = V4LoadA(&b11.angularState.x);
+
+ Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
+ Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x);
+ Vec4V angState20 = V4LoadA(&b20.angularState.x);
+ Vec4V angState21 = V4LoadA(&b21.angularState.x);
+
+ Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
+ Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x);
+ Vec4V angState30 = V4LoadA(&b30.angularState.x);
+ Vec4V angState31 = V4LoadA(&b31.angularState.x);
+
+
+ Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
+ Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3;
+ Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
+ Vec4V angState1T0, angState1T1, angState1T2, angState1T3;
+
+
+ PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
+ PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3);
+ PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
+ PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3);
+
+ const Vec4V invMass0D0 = header->invMass0D0;
+ const Vec4V invMass1D1 = header->invMass1D1;
+
+ const Vec4V angD0 = header->angD0;
+ const Vec4V angD1 = header->angD1;
+
+ PxU32 maxConstraints = header->count;
+
+ for(PxU32 a = 0; a < maxConstraints; ++a)
+ {
+ SolverConstraint1DDynamic4& c = *base;
+ base++;
+
+ Ps::prefetchLine(base);
+ Ps::prefetchLine(base, 64);
+ Ps::prefetchLine(base, 128);
+ Ps::prefetchLine(base, 192);
+ Ps::prefetchLine(base, 256);
+
+ const Vec4V appliedForce = c.appliedForce;
+
+ Vec4V linProj0(V4Mul(c.lin0X, linVel0T0));
+ Vec4V linProj1(V4Mul(c.lin1X, linVel1T0));
+ Vec4V angProj0(V4Mul(c.ang0X, angState0T0));
+ Vec4V angProj1(V4Mul(c.ang1X, angState1T0));
+
+ linProj0 = V4MulAdd(c.lin0Y, linVel0T1, linProj0);
+ linProj1 = V4MulAdd(c.lin1Y, linVel1T1, linProj1);
+ angProj0 = V4MulAdd(c.ang0Y, angState0T1, angProj0);
+ angProj1 = V4MulAdd(c.ang1Y, angState1T1, angProj1);
+
+ linProj0 = V4MulAdd(c.lin0Z, linVel0T2, linProj0);
+ linProj1 = V4MulAdd(c.lin1Z, linVel1T2, linProj1);
+ angProj0 = V4MulAdd(c.ang0Z, angState0T2, angProj0);
+ angProj1 = V4MulAdd(c.ang1Z, angState1T2, angProj1);
+
+ const Vec4V projectVel0 = V4Add(linProj0, angProj0);
+ const Vec4V projectVel1 = V4Add(linProj1, angProj1);
+
+ const Vec4V normalVel = V4Sub(projectVel0, projectVel1);
+
+ const Vec4V unclampedForce = V4MulAdd(appliedForce, c.impulseMultiplier, V4MulAdd(normalVel, c.velMultiplier, c.constant));
+ const Vec4V clampedForce = V4Max(c.minImpulse, V4Min(c.maxImpulse, unclampedForce));
+ const Vec4V deltaF = V4Sub(clampedForce, appliedForce);
+ c.appliedForce = clampedForce;
+
+ const Vec4V deltaFInvMass0 = V4Mul(deltaF, invMass0D0);
+ const Vec4V deltaFInvMass1 = V4Mul(deltaF, invMass1D1);
+
+ const Vec4V angDeltaFInvMass0 = V4Mul(deltaF, angD0);
+ const Vec4V angDeltaFInvMass1 = V4Mul(deltaF, angD1);
+
+ linVel0T0 = V4MulAdd(c.lin0X, deltaFInvMass0, linVel0T0);
+ linVel1T0 = V4NegMulSub(c.lin1X, deltaFInvMass1, linVel1T0);
+ angState0T0 = V4MulAdd(c.ang0X, angDeltaFInvMass0, angState0T0);
+ angState1T0 = V4NegMulSub(c.ang1X, angDeltaFInvMass1, angState1T0);
+
+ linVel0T1 = V4MulAdd(c.lin0Y, deltaFInvMass0, linVel0T1);
+ linVel1T1 = V4NegMulSub(c.lin1Y, deltaFInvMass1, linVel1T1);
+ angState0T1 = V4MulAdd(c.ang0Y, angDeltaFInvMass0, angState0T1);
+ angState1T1 = V4NegMulSub(c.ang1Y, angDeltaFInvMass1, angState1T1);
+
+ linVel0T2 = V4MulAdd(c.lin0Z, deltaFInvMass0, linVel0T2);
+ linVel1T2 = V4NegMulSub(c.lin1Z, deltaFInvMass1, linVel1T2);
+ angState0T2 = V4MulAdd(c.ang0Z, angDeltaFInvMass0, angState0T2);
+ angState1T2 = V4NegMulSub(c.ang1Z, angDeltaFInvMass1, angState1T2);
+ }
+
+ PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
+ PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31);
+ PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
+ PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31);
+
+
+ // Write back
+ V4StoreA(linVel00, &b00.linearVelocity.x);
+ V4StoreA(linVel10, &b10.linearVelocity.x);
+ V4StoreA(linVel20, &b20.linearVelocity.x);
+ V4StoreA(linVel30, &b30.linearVelocity.x);
+
+ V4StoreA(linVel01, &b01.linearVelocity.x);
+ V4StoreA(linVel11, &b11.linearVelocity.x);
+ V4StoreA(linVel21, &b21.linearVelocity.x);
+ V4StoreA(linVel31, &b31.linearVelocity.x);
+
+ V4StoreA(angState00, &b00.angularState.x);
+ V4StoreA(angState10, &b10.angularState.x);
+ V4StoreA(angState20, &b20.angularState.x);
+ V4StoreA(angState30, &b30.angularState.x);
+
+ V4StoreA(angState01, &b01.angularState.x);
+ V4StoreA(angState11, &b11.angularState.x);
+ V4StoreA(angState21, &b21.angularState.x);
+ V4StoreA(angState31, &b31.angularState.x);
+
+}
+
+static void conclude1D4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
+{
+ SolverConstraint1DHeader4* header = reinterpret_cast<SolverConstraint1DHeader4*>(desc[0].constraint);
+ PxU8* base = desc[0].constraint + sizeof(SolverConstraint1DHeader4);
+ PxU32 stride = header->type == DY_SC_TYPE_BLOCK_1D ? sizeof(SolverConstraint1DDynamic4) : sizeof(SolverConstraint1DBase4);
+
+ for(PxU32 i=0; i<header->count; i++)
+ {
+ SolverConstraint1DBase4& c = *reinterpret_cast<SolverConstraint1DBase4*>(base);
+ c.constant = c.unbiasedConstant;
+ base += stride;
+ }
+ PX_ASSERT(desc[0].constraint + getConstraintLength(desc[0]) == base);
+}
+
+void writeBack1D4(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/,
+ const PxSolverBodyData** PX_RESTRICT /*bd0*/, const PxSolverBodyData** PX_RESTRICT /*bd1*/)
+{
+ ConstraintWriteback* writeback0 = reinterpret_cast<ConstraintWriteback*>(desc[0].writeBack);
+ ConstraintWriteback* writeback1 = reinterpret_cast<ConstraintWriteback*>(desc[1].writeBack);
+ ConstraintWriteback* writeback2 = reinterpret_cast<ConstraintWriteback*>(desc[2].writeBack);
+ ConstraintWriteback* writeback3 = reinterpret_cast<ConstraintWriteback*>(desc[3].writeBack);
+
+ if(writeback0 || writeback1 || writeback2 || writeback3)
+ {
+ SolverConstraint1DHeader4* header = reinterpret_cast<SolverConstraint1DHeader4*>(desc[0].constraint);
+ PxU8* base = desc[0].constraint + sizeof(SolverConstraint1DHeader4);
+ PxU32 stride = header->type == DY_SC_TYPE_BLOCK_1D ? sizeof(SolverConstraint1DDynamic4) : sizeof(SolverConstraint1DBase4);
+
+ const Vec4V zero = V4Zero();
+ Vec4V linX(zero), linY(zero), linZ(zero);
+ Vec4V angX(zero), angY(zero), angZ(zero);
+
+ for(PxU32 i=0; i<header->count; i++)
+ {
+ const SolverConstraint1DBase4* c = reinterpret_cast<SolverConstraint1DBase4*>(base);
+
+ //Load in flags
+ const VecI32V flags = I4LoadU(reinterpret_cast<const PxI32*>(&c->flags[0]));
+ //Work out masks
+ const VecI32V mask = I4Load(DY_SC_FLAG_OUTPUT_FORCE);
+
+ const VecI32V masked = VecI32V_And(flags, mask);
+ const BoolV isEq = VecI32V_IsEq(masked, mask);
+
+ const Vec4V appliedForce = V4Sel(isEq, c->appliedForce, zero);
+
+ linX = V4MulAdd(c->lin0X, appliedForce, linX);
+ linY = V4MulAdd(c->lin0Y, appliedForce, linY);
+ linZ = V4MulAdd(c->lin0Z, appliedForce, linZ);
+
+ angX = V4MulAdd(c->ang0WritebackX, appliedForce, angX);
+ angY = V4MulAdd(c->ang0WritebackY, appliedForce, angY);
+ angZ = V4MulAdd(c->ang0WritebackZ, appliedForce, angZ);
+
+ base += stride;
+ }
+
+ //We need to do the cross product now
+
+ angX = V4Sub(angX, V4NegMulSub(header->body0WorkOffsetZ, linY, V4Mul(header->body0WorkOffsetY, linZ)));
+ angY = V4Sub(angY, V4NegMulSub(header->body0WorkOffsetX, linZ, V4Mul(header->body0WorkOffsetZ, linX)));
+ angZ = V4Sub(angZ, V4NegMulSub(header->body0WorkOffsetY, linX, V4Mul(header->body0WorkOffsetX, linY)));
+
+ const Vec4V linLenSq = V4MulAdd(linZ, linZ, V4MulAdd(linY, linY, V4Mul(linX, linX)));
+ const Vec4V angLenSq = V4MulAdd(angZ, angZ, V4MulAdd(angY, angY, V4Mul(angX, angX)));
+
+ const Vec4V linLen = V4Sqrt(linLenSq);
+ const Vec4V angLen = V4Sqrt(angLenSq);
+
+ const BoolV broken = BOr(V4IsGrtr(linLen, header->linBreakImpulse), V4IsGrtr(angLen, header->angBreakImpulse));
+
+ PX_ALIGN(16, PxU32 iBroken[4]);
+ BStoreA(broken, iBroken);
+
+ Vec4V lin0, lin1, lin2, lin3;
+ Vec4V ang0, ang1, ang2, ang3;
+
+ PX_TRANSPOSE_34_44(linX, linY, linZ, lin0, lin1, lin2, lin3);
+ PX_TRANSPOSE_34_44(angX, angY, angZ, ang0, ang1, ang2, ang3);
+
+ if(writeback0)
+ {
+ V3StoreU(Vec3V_From_Vec4V_WUndefined(lin0), writeback0->linearImpulse);
+ V3StoreU(Vec3V_From_Vec4V_WUndefined(ang0), writeback0->angularImpulse);
+ writeback0->broken = header->break0 ? PxU32(iBroken[0] != 0) : 0;
+ }
+ if(writeback1)
+ {
+ V3StoreU(Vec3V_From_Vec4V_WUndefined(lin1), writeback1->linearImpulse);
+ V3StoreU(Vec3V_From_Vec4V_WUndefined(ang1), writeback1->angularImpulse);
+ writeback1->broken = header->break1 ? PxU32(iBroken[1] != 0) : 0;
+ }
+ if(writeback2)
+ {
+ V3StoreU(Vec3V_From_Vec4V_WUndefined(lin2), writeback2->linearImpulse);
+ V3StoreU(Vec3V_From_Vec4V_WUndefined(ang2), writeback2->angularImpulse);
+ writeback2->broken = header->break2 ? PxU32(iBroken[2] != 0) : 0;
+ }
+ if(writeback3)
+ {
+ V3StoreU(Vec3V_From_Vec4V_WUndefined(lin3), writeback3->linearImpulse);
+ V3StoreU(Vec3V_From_Vec4V_WUndefined(ang3), writeback3->angularImpulse);
+ writeback3->broken = header->break3 ? PxU32(iBroken[3] != 0) : 0;
+ }
+
+ PX_ASSERT(desc[0].constraint + getConstraintLength(desc[0]) == base);
+ }
+}
+
+
+void solveContactPreBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContact4_Block(desc, cache);
+}
+
+void solveContactPreBlock_Static(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContact4_StaticBlock(desc, cache);
+}
+
+void solveContactPreBlock_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContact4_Block(desc, cache);
+ concludeContact4_Block(desc, cache, sizeof(SolverContactBatchPointDynamic4), sizeof(SolverContactFrictionDynamic4));
+}
+
+void solveContactPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContact4_StaticBlock(desc, cache);
+ concludeContact4_Block(desc, cache, sizeof(SolverContactBatchPointBase4), sizeof(SolverContactFrictionBase4));
+}
+
+void solveContactPreBlock_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContact4_Block(desc, cache);
+
+ const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex],
+ &cache.solverBodyArray[desc[1].bodyADataIndex],
+ &cache.solverBodyArray[desc[2].bodyADataIndex],
+ &cache.solverBodyArray[desc[3].bodyADataIndex]};
+
+ const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex],
+ &cache.solverBodyArray[desc[1].bodyBDataIndex],
+ &cache.solverBodyArray[desc[2].bodyBDataIndex],
+ &cache.solverBodyArray[desc[3].bodyBDataIndex]};
+
+ writeBackContact4_Block(desc, cache, bd0, bd1);
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveContactPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContact4_StaticBlock(desc, cache);
+ const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex],
+ &cache.solverBodyArray[desc[1].bodyADataIndex],
+ &cache.solverBodyArray[desc[2].bodyADataIndex],
+ &cache.solverBodyArray[desc[3].bodyADataIndex]};
+
+ const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex],
+ &cache.solverBodyArray[desc[1].bodyBDataIndex],
+ &cache.solverBodyArray[desc[2].bodyBDataIndex],
+ &cache.solverBodyArray[desc[3].bodyBDataIndex]};
+
+ writeBackContact4_Block(desc, cache, bd0, bd1);
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solve1D4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solve1D4_Block(desc, cache);
+}
+
+
+void solve1D4Block_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solve1D4_Block(desc, cache);
+ conclude1D4_Block(desc, cache);
+}
+
+
+void solve1D4Block_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solve1D4_Block(desc, cache);
+
+ const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex],
+ &cache.solverBodyArray[desc[1].bodyADataIndex],
+ &cache.solverBodyArray[desc[2].bodyADataIndex],
+ &cache.solverBodyArray[desc[3].bodyADataIndex]};
+
+ const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex],
+ &cache.solverBodyArray[desc[1].bodyBDataIndex],
+ &cache.solverBodyArray[desc[2].bodyBDataIndex],
+ &cache.solverBodyArray[desc[3].bodyBDataIndex]};
+
+ writeBack1D4(desc, cache, bd0, bd1);
+}
+
+void writeBack1D4Block(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex],
+ &cache.solverBodyArray[desc[1].bodyADataIndex],
+ &cache.solverBodyArray[desc[2].bodyADataIndex],
+ &cache.solverBodyArray[desc[3].bodyADataIndex]};
+
+ const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex],
+ &cache.solverBodyArray[desc[1].bodyBDataIndex],
+ &cache.solverBodyArray[desc[2].bodyBDataIndex],
+ &cache.solverBodyArray[desc[3].bodyBDataIndex]};
+
+ writeBack1D4(desc, cache, bd0, bd1);
+}
+
+}
+
+}
+
+#endif
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsShared.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsShared.h
new file mode 100644
index 00000000..13c8a0e2
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsShared.h
@@ -0,0 +1,221 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef DY_SOLVER_CORE_SHARED_H
+#define DY_SOLVER_CORE_SHARED_H
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+
+#ifdef PX_SUPPORT_SIMD
+
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverContact.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraintDesc.h"
+#include "PsUtilities.h"
+#include "DyConstraint.h"
+#include "PsAtomic.h"
+
+
+namespace physx
+{
+
+namespace Dy
+{
+ PX_FORCE_INLINE static FloatV solveDynamicContacts(SolverContactPoint* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal,
+ const FloatVArg invMassA, const FloatVArg invMassB, const FloatVArg angDom0, const FloatVArg angDom1, Vec3V& linVel0_, Vec3V& angState0_,
+ Vec3V& linVel1_, Vec3V& angState1_, PxF32* PX_RESTRICT forceBuffer)
+{
+ Vec3V linVel0 = linVel0_;
+ Vec3V angState0 = angState0_;
+ Vec3V linVel1 = linVel1_;
+ Vec3V angState1 = angState1_;
+ FloatV accumulatedNormalImpulse = FZero();
+
+ const Vec3V delLinVel0 = V3Scale(contactNormal, invMassA);
+ const Vec3V delLinVel1 = V3Scale(contactNormal, invMassB);
+
+ for(PxU32 i=0;i<nbContactPoints;i++)
+ {
+ SolverContactPoint& c = contacts[i];
+ Ps::prefetchLine(&contacts[i], 128);
+
+ const Vec3V raXn = c.raXn;
+
+ const Vec3V rbXn = c.rbXn;
+
+ const FloatV appliedForce = FLoad(forceBuffer[i]);
+ const FloatV velMultiplier = c.getVelMultiplier();
+
+ /*const FloatV targetVel = c.getTargetVelocity();
+ const FloatV nScaledBias = c.getScaledBias();*/
+ const FloatV maxImpulse = c.getMaxImpulse();
+
+ //Compute the normal velocity of the constraint.
+ const Vec3V v0 = V3MulAdd(linVel0, contactNormal, V3Mul(angState0, raXn));
+ const Vec3V v1 = V3MulAdd(linVel1, contactNormal, V3Mul(angState1, rbXn));
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+ const FloatV biasedErr = c.getBiasedErr();//FScaleAdd(targetVel, velMultiplier, nScaledBias);
+
+ //KS - clamp the maximum force
+ const FloatV _deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
+ const FloatV _newForce = FAdd(appliedForce, _deltaF);
+ const FloatV newForce = FMin(_newForce, maxImpulse);
+ const FloatV deltaF = FSub(newForce, appliedForce);
+
+ linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+ linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
+ angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+ angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1);
+
+ FStore(newForce, &forceBuffer[i]);
+
+ accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newForce);
+ }
+
+ linVel0_ = linVel0;
+ angState0_ = angState0;
+ linVel1_ = linVel1;
+ angState1_ = angState1;
+ return accumulatedNormalImpulse;
+}
+
+PX_FORCE_INLINE static FloatV solveStaticContacts(SolverContactPoint* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal,
+ const FloatVArg invMassA, const FloatVArg angDom0, Vec3V& linVel0_, Vec3V& angState0_, PxF32* PX_RESTRICT forceBuffer)
+{
+ Vec3V linVel0 = linVel0_;
+ Vec3V angState0 = angState0_;
+ FloatV accumulatedNormalImpulse = FZero();
+
+ const Vec3V delLinVel0 = V3Scale(contactNormal, invMassA);
+
+ for(PxU32 i=0;i<nbContactPoints;i++)
+ {
+ SolverContactPoint& c = contacts[i];
+ Ps::prefetchLine(&contacts[i],128);
+
+ const Vec3V raXn = c.raXn;
+
+ const FloatV appliedForce = FLoad(forceBuffer[i]);
+ const FloatV velMultiplier = c.getVelMultiplier();
+
+ /*const FloatV targetVel = c.getTargetVelocity();
+ const FloatV nScaledBias = c.getScaledBias();*/
+ const FloatV maxImpulse = c.getMaxImpulse();
+
+ const Vec3V v0 = V3MulAdd(linVel0, contactNormal, V3Mul(angState0, raXn));
+ const FloatV normalVel = V3SumElems(v0);
+
+
+ const FloatV biasedErr = c.getBiasedErr();//FScaleAdd(targetVel, velMultiplier, nScaledBias);
+
+ // still lots to do here: using loop pipelining we can interweave this code with the
+ // above - the code here has a lot of stalls that we would thereby eliminate
+ const FloatV _deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
+ const FloatV _newForce = FAdd(appliedForce, _deltaF);
+ const FloatV newForce = FMin(_newForce, maxImpulse);
+ const FloatV deltaF = FSub(newForce, appliedForce);
+
+ linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+ angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+
+ FStore(newForce, &forceBuffer[i]);
+
+ accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newForce);
+ }
+
+ linVel0_ = linVel0;
+ angState0_ = angState0;
+ return accumulatedNormalImpulse;
+}
+
+PX_FORCE_INLINE static FloatV solveExtContacts(SolverContactPointExt* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal,
+ Vec3V& linVel0, Vec3V& angVel0,
+ Vec3V& linVel1, Vec3V& angVel1,
+ Vec3V& li0, Vec3V& ai0,
+ Vec3V& li1, Vec3V& ai1,
+ PxF32* PX_RESTRICT appliedForceBuffer)
+ {
+
+ FloatV accumulatedNormalImpulse = FZero();
+ for(PxU32 i=0;i<nbContactPoints;i++)
+ {
+ SolverContactPointExt& c = contacts[i];
+ Ps::prefetchLine(&contacts[i+1]);
+
+ const Vec3V raXn = c.raXn;
+ const Vec3V rbXn = c.rbXn;
+
+ const FloatV appliedForce = FLoad(appliedForceBuffer[i]);
+ const FloatV velMultiplier = c.getVelMultiplier();
+
+ /*const FloatV targetVel = c.getTargetVelocity();
+ const FloatV scaledBias = c.getScaledBias();*/
+
+ //Compute the normal velocity of the constraint.
+
+ Vec3V v = V3MulAdd(linVel0, contactNormal, V3Mul(angVel0, raXn));
+ v = V3Sub(v, V3MulAdd(linVel1, contactNormal, V3Mul(angVel1, rbXn)));
+ const FloatV normalVel = V3SumElems(v);
+
+ const FloatV biasedErr = c.getBiasedErr();//FNeg(scaledBias);
+
+ // still lots to do here: using loop pipelining we can interweave this code with the
+ // above - the code here has a lot of stalls that we would thereby eliminate
+
+ const FloatV deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce));
+
+ linVel0 = V3ScaleAdd(c.linDeltaVA, deltaF, linVel0);
+ angVel0 = V3ScaleAdd(c.angDeltaVA, deltaF, angVel0);
+ linVel1 = V3ScaleAdd(c.linDeltaVB, deltaF, linVel1);
+ angVel1 = V3ScaleAdd(c.angDeltaVB, deltaF, angVel1);
+
+ li0 = V3ScaleAdd(contactNormal, deltaF, li0); ai0 = V3ScaleAdd(raXn, deltaF, ai0);
+ li1 = V3ScaleAdd(contactNormal, deltaF, li1); ai1 = V3ScaleAdd(rbXn, deltaF, ai1);
+
+ const FloatV newAppliedForce = FAdd(appliedForce, deltaF);
+
+ FStore(newAppliedForce, &appliedForceBuffer[i]);
+
+ accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newAppliedForce);
+ }
+ return accumulatedNormalImpulse;
+ }
+
+}
+
+}
+
+#endif //PX_SUPPORT_SIMD
+
+#endif //DY_SOLVER_CORE_SHARED_H
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact.h
new file mode 100644
index 00000000..f204633c
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact.h
@@ -0,0 +1,228 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_SOLVERCONTACT_H
+#define DY_SOLVERCONTACT_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "PsVecMath.h"
+
+namespace physx
+{
+
+using namespace Ps::aos;
+
+namespace Sc
+{
+ class ShapeInteraction;
+}
+/**
+\brief A header to represent a friction patch for the solver.
+*/
+
+namespace Dy
+{
+
+struct SolverContactHeader
+{
+ enum DySolverContactFlags
+ {
+ eHAS_FORCE_THRESHOLDS = 0x1
+ };
+
+ PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
+ PxU8 flags;
+ PxU8 numNormalConstr;
+ PxU8 numFrictionConstr; //4
+
+ PxReal angDom0; //8
+ PxReal angDom1; //12
+ PxReal invMass0; //16
+
+ Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W; //32
+ Vec3V normal; //48
+
+ PxReal invMass1; //52
+ PxU32 broken; //56
+ PxU8* frictionBrokenWritebackByte; //60 64
+ Sc::ShapeInteraction* shapeInteraction; //64 72
+#if PX_P64_FAMILY
+ PxU32 pad[2]; //64 80
+#endif // PX_X64
+
+
+ PX_FORCE_INLINE void setStaticFriction(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);}
+ PX_FORCE_INLINE void setDynamicFriction(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);}
+ PX_FORCE_INLINE void setDominance0(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);}
+ PX_FORCE_INLINE void setDominance1(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);}
+
+ PX_FORCE_INLINE FloatV getStaticFriction() const {return V4GetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
+ PX_FORCE_INLINE FloatV getDynamicFriction() const {return V4GetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
+ PX_FORCE_INLINE FloatV getDominance0() const {return V4GetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
+ PX_FORCE_INLINE FloatV getDominance1() const {return V4GetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
+
+ PX_FORCE_INLINE void setStaticFriction(PxF32 f) {V4WriteX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);}
+ PX_FORCE_INLINE void setDynamicFriction(PxF32 f) {V4WriteY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);}
+ PX_FORCE_INLINE void setDominance0(PxF32 f) {V4WriteZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);}
+ PX_FORCE_INLINE void setDominance1(PxF32 f) {V4WriteW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);}
+
+ PX_FORCE_INLINE PxF32 getStaticFrictionPxF32() const {return V4ReadX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
+ PX_FORCE_INLINE PxF32 getDynamicFrictionPxF32() const {return V4ReadY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
+ PX_FORCE_INLINE PxF32 getDominance0PxF32() const {return V4ReadZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
+ PX_FORCE_INLINE PxF32 getDominance1PxF32() const {return V4ReadW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);}
+};
+
+#if !PX_P64_FAMILY
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader) == 64);
+#else
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader) == 80);
+#endif
+
+/**
+\brief A single rigid body contact point for the solver.
+*/
+struct SolverContactPoint
+{
+ Vec3V raXn;
+ Vec3V rbXn;
+
+ PxF32 velMultiplier;
+ PxF32 biasedErr;
+ PxF32 unbiasedErr;
+ PxF32 maxImpulse;
+
+ PX_FORCE_INLINE FloatV getVelMultiplier() const {return FLoad(velMultiplier);}
+
+ PX_FORCE_INLINE FloatV getBiasedErr() const {return FLoad(biasedErr);}
+ PX_FORCE_INLINE FloatV getMaxImpulse() const {return FLoad(maxImpulse);}
+
+
+#ifdef PX_SUPPORT_SIMD
+ PX_FORCE_INLINE Vec3V getRaXn() const {return raXn;}
+ PX_FORCE_INLINE Vec3V getRbXn() const {return rbXn;}
+#endif
+
+ PX_FORCE_INLINE void setRaXn(const PxVec3& v) {V3WriteXYZ(raXn, v);}
+ PX_FORCE_INLINE void setRbXn(const PxVec3& v) {V3WriteXYZ(rbXn, v);}
+ PX_FORCE_INLINE void setVelMultiplier(PxF32 f) {velMultiplier = f;}
+
+ PX_FORCE_INLINE void setBiasedErr(PxF32 f) {biasedErr = f;}
+ PX_FORCE_INLINE void setUnbiasedErr(PxF32 f) {unbiasedErr = f;}
+
+ PX_FORCE_INLINE PxF32 getVelMultiplierPxF32() const {return velMultiplier;}
+ PX_FORCE_INLINE const PxVec3& getRaXnPxVec3() const {return V3ReadXYZ(raXn);}
+ PX_FORCE_INLINE const PxVec3& getRbXnPxVec3() const {return V3ReadXYZ(rbXn);}
+ PX_FORCE_INLINE PxF32 getBiasedErrPxF32() const {return biasedErr;}
+};
+
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactPoint) == 48);
+
+/**
+\brief A single extended articulation contact point for the solver.
+*/
+struct SolverContactPointExt : public SolverContactPoint
+{
+ Vec3V linDeltaVA;
+ Vec3V angDeltaVA;
+ Vec3V linDeltaVB;
+ Vec3V angDeltaVB;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactPointExt) == 112);
+
+
+/**
+\brief A single friction constraint for the solver.
+*/
+struct SolverContactFriction
+{
+ Vec4V normalXYZ_appliedForceW; //16
+ Vec4V raXnXYZ_velMultiplierW; //32
+ Vec4V rbXnXYZ_biasW; //48
+ PxReal targetVel; //52
+ PxU32 mPad[3]; //64
+
+ PX_FORCE_INLINE void setAppliedForce(const FloatV f) {normalXYZ_appliedForceW=V4SetW(normalXYZ_appliedForceW,f);}
+ PX_FORCE_INLINE void setVelMultiplier(const FloatV f) {raXnXYZ_velMultiplierW=V4SetW(raXnXYZ_velMultiplierW,f);}
+ PX_FORCE_INLINE void setBias(const FloatV f) {rbXnXYZ_biasW=V4SetW(rbXnXYZ_biasW,f);}
+
+ PX_FORCE_INLINE FloatV getAppliedForce() const {return V4GetW(normalXYZ_appliedForceW);}
+ PX_FORCE_INLINE FloatV getVelMultiplier() const {return V4GetW(raXnXYZ_velMultiplierW);}
+ PX_FORCE_INLINE FloatV getBias() const {return V4GetW(rbXnXYZ_biasW);}
+
+#ifdef PX_SUPPORT_SIMD
+ PX_FORCE_INLINE Vec3V getNormal() const {return Vec3V_From_Vec4V(normalXYZ_appliedForceW);}
+ PX_FORCE_INLINE Vec3V getRaXn() const {return Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);}
+ PX_FORCE_INLINE Vec3V getRbXn() const {return Vec3V_From_Vec4V(rbXnXYZ_biasW);}
+#endif
+
+ PX_FORCE_INLINE void setNormal(const PxVec3& v) {V4WriteXYZ(normalXYZ_appliedForceW, v);}
+ PX_FORCE_INLINE void setRaXn(const PxVec3& v) {V4WriteXYZ(raXnXYZ_velMultiplierW, v);}
+ PX_FORCE_INLINE void setRbXn(const PxVec3& v) {V4WriteXYZ(rbXnXYZ_biasW, v);}
+
+ PX_FORCE_INLINE const PxVec3& getNormalPxVec3() const {return V4ReadXYZ(normalXYZ_appliedForceW);}
+ PX_FORCE_INLINE const PxVec3& getRaXnPxVec3() const {return V4ReadXYZ(raXnXYZ_velMultiplierW);}
+ PX_FORCE_INLINE const PxVec3& getRbXnPxVec3() const {return V4ReadXYZ(rbXnXYZ_biasW);}
+
+ PX_FORCE_INLINE void setAppliedForce(PxF32 f) {V4WriteW(normalXYZ_appliedForceW, f);}
+ PX_FORCE_INLINE void setVelMultiplier(PxF32 f) {V4WriteW(raXnXYZ_velMultiplierW, f);}
+ PX_FORCE_INLINE void setBias(PxF32 f) {V4WriteW(rbXnXYZ_biasW, f);}
+
+ PX_FORCE_INLINE PxF32 getAppliedForcePxF32() const {return V4ReadW(normalXYZ_appliedForceW);}
+ PX_FORCE_INLINE PxF32 getVelMultiplierPxF32() const {return V4ReadW(raXnXYZ_velMultiplierW);}
+ PX_FORCE_INLINE PxF32 getBiasPxF32() const {return V4ReadW(rbXnXYZ_biasW);}
+
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFriction) == 64);
+
+/**
+\brief A single extended articulation friction constraint for the solver.
+*/
+struct SolverContactFrictionExt : public SolverContactFriction
+{
+ Vec3V linDeltaVA;
+ Vec3V angDeltaVA;
+ Vec3V linDeltaVB;
+ Vec3V angDeltaVB;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionExt) == 128);
+
+}
+
+}
+
+
+
+#endif //DY_SOLVERCONTACT_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact4.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact4.h
new file mode 100644
index 00000000..31fc9a6d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact4.h
@@ -0,0 +1,179 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef DY_SOLVERCONTACT4_H
+#define DY_SOLVERCONTACT4_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "PsVecMath.h"
+#include "DySolverContact.h"
+
+namespace physx
+{
+
+struct PxcNpWorkUnit;
+struct PxSolverBody;
+struct PxSolverBodyData;
+struct PxSolverConstraintDesc;
+
+namespace Sc
+{
+ class ShapeInteraction;
+}
+
+namespace Dy
+{
+
+
+
+
+/**
+\brief Batched SOA contact data. Note, we don't support batching with extended contacts for the simple reason that handling multiple articulations would be complex.
+*/
+struct SolverContactHeader4
+{
+ enum
+ {
+ eHAS_MAX_IMPULSE = 1 << 0,
+ eHAS_TARGET_VELOCITY = 1 << 1
+ };
+
+ PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
+ PxU8 numNormalConstr;
+ PxU8 numFrictionConstr;
+ PxU8 flag;
+
+ PxU8 flags[4];
+ //These counts are the max of the 4 sets of data.
+ //When certain pairs have fewer patches/contacts than others, they are padded with 0s so that no work is performed but
+ //calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD)
+
+ //KS - used for write-back only
+ PxU8 numNormalConstr0, numNormalConstr1, numNormalConstr2, numNormalConstr3;
+ PxU8 numFrictionConstr0, numFrictionConstr1, numFrictionConstr2, numFrictionConstr3;
+
+ Vec4V restitution;
+ Vec4V staticFriction;
+ Vec4V dynamicFriction;
+ //Technically, these mass properties could be pulled out into a new structure and shared. For multi-manifold contacts,
+ //this would save 64 bytes per-manifold after the cost of the first manifold
+ Vec4V invMass0D0;
+ Vec4V invMass1D1;
+ Vec4V angDom0;
+ Vec4V angDom1;
+ //Normal is shared between all contacts in the batch. This will save some memory!
+ Vec4V normalX;
+ Vec4V normalY;
+ Vec4V normalZ;
+
+ Sc::ShapeInteraction* shapeInteraction[4]; //192 or 208
+};
+
+#if !PX_P64_FAMILY
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader4) == 192);
+#else
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader4) == 208);
+#endif
+
+
+/**
+\brief This represents a batch of 4 contacts with static rolled into a single structure
+*/
+struct SolverContactBatchPointBase4
+{
+ Vec4V raXnX;
+ Vec4V raXnY;
+ Vec4V raXnZ;
+ Vec4V velMultiplier;
+ Vec4V scaledBias;
+ Vec4V biasedErr;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactBatchPointBase4) == 96);
+
+/**
+\brief Contains the additional data required to represent 4 contacts between 2 dynamic bodies
+@see SolverContactBatchPointBase4
+*/
+struct SolverContactBatchPointDynamic4 : public SolverContactBatchPointBase4
+{
+ Vec4V rbXnX;
+ Vec4V rbXnY;
+ Vec4V rbXnZ;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactBatchPointDynamic4) == 144);
+
+/**
+\brief This represents the shared information of a batch of 4 friction constraints
+*/
+struct SolverFrictionSharedData4
+{
+ BoolV broken;
+ PxU8* frictionBrokenWritebackByte[4];
+ Vec4V normalX[2];
+ Vec4V normalY[2];
+ Vec4V normalZ[2];
+};
+#if !PX_P64_FAMILY
+PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionSharedData4) == 128);
+#endif
+
+
+/**
+\brief This represents a batch of 4 friction constraints with static rolled into a single structure
+*/
+struct SolverContactFrictionBase4
+{
+ Vec4V raXnX;
+ Vec4V raXnY;
+ Vec4V raXnZ;
+ Vec4V scaledBias;
+ Vec4V velMultiplier;
+ Vec4V targetVelocity;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionBase4) == 96);
+
+/**
+\brief Contains the additional data required to represent 4 friction constraints between 2 dynamic bodies
+@see SolverContactFrictionBase4
+*/
+struct SolverContactFrictionDynamic4 : public SolverContactFrictionBase4
+{
+ Vec4V rbXnX;
+ Vec4V rbXnY;
+ Vec4V rbXnZ;
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionDynamic4) == 144);
+
+}
+
+}
+
+#endif //DY_SOLVERCONTACT4_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF.h
new file mode 100644
index 00000000..e18421e9
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF.h
@@ -0,0 +1,123 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+
+#ifndef DY_SOLVERCONTACTPF_H
+#define DY_SOLVERCONTACTPF_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "PsVecMath.h"
+
+namespace physx
+{
+
+using namespace Ps::aos;
+
+namespace Dy
+{
+
+struct SolverContactCoulombHeader
+{
+ PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
+ PxU8 numNormalConstr;
+ PxU16 frictionOffset; //4
+ //PxF32 restitution;
+ PxF32 angDom0; //8
+ PxF32 dominance0; //12
+ PxF32 dominance1; //16
+ PX_ALIGN(16, PxVec3 normalXYZ); //28
+ PxF32 angDom1; //32
+
+ Sc::ShapeInteraction* shapeInteraction; //36 40
+ PxU8 flags; //37 41
+ PxU8 pad0[3]; //40 44
+#if !PX_P64_FAMILY
+ PxU32 pad1[2]; //48
+#else
+ PxU32 pad1; // 48
+#endif
+
+
+
+ PX_FORCE_INLINE void setDominance0(const FloatV f) {FStore(f, &dominance0);}
+ PX_FORCE_INLINE void setDominance1(const FloatV f) {FStore(f, &dominance1);}
+ PX_FORCE_INLINE void setNormal(const Vec3V n) {V3StoreA(n, normalXYZ);}
+
+ PX_FORCE_INLINE FloatV getDominance0() const {return FLoad(dominance0);}
+ PX_FORCE_INLINE FloatV getDominance1() const {return FLoad(dominance1);}
+ //PX_FORCE_INLINE FloatV getRestitution() const {return FLoad(restitution);}
+ PX_FORCE_INLINE Vec3V getNormal()const {return V3LoadA(normalXYZ);}
+
+
+ PX_FORCE_INLINE void setDominance0(PxF32 f) { dominance0 = f; }
+ PX_FORCE_INLINE void setDominance1(PxF32 f) { dominance1 = f;}
+ //PX_FORCE_INLINE void setRestitution(PxF32 f) { restitution = f;}
+
+ PX_FORCE_INLINE PxF32 getDominance0PxF32() const {return dominance0;}
+ PX_FORCE_INLINE PxF32 getDominance1PxF32() const {return dominance1;}
+ //PX_FORCE_INLINE PxF32 getRestitutionPxF32() const {return restitution;}
+
+};
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader) == 48);
+
+struct SolverFrictionHeader
+{
+ PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
+ PxU8 numNormalConstr;
+ PxU8 numFrictionConstr;
+ PxU8 flags;
+ PxF32 staticFriction;
+ PxF32 invMass0D0;
+ PxF32 invMass1D1;
+ PxF32 angDom0;
+ PxF32 angDom1;
+ PxU32 pad2[2];
+
+ PX_FORCE_INLINE void setStaticFriction(const FloatV f) {FStore(f, &staticFriction);}
+
+ PX_FORCE_INLINE FloatV getStaticFriction() const {return FLoad(staticFriction);}
+
+ PX_FORCE_INLINE void setStaticFriction(PxF32 f) {staticFriction = f;}
+
+ PX_FORCE_INLINE PxF32 getStaticFrictionPxF32() const {return staticFriction;}
+
+ PX_FORCE_INLINE PxU32 getAppliedForcePaddingSize() const {return sizeof(PxU32)*((4 * ((numNormalConstr + 3)/4)));}
+ static PX_FORCE_INLINE PxU32 getAppliedForcePaddingSize(const PxU32 numConstr) {return sizeof(PxU32)*((4 * ((numConstr + 3)/4)));}
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionHeader) == 32);
+
+}
+
+}
+
+#endif //DY_SOLVERCONTACTPF_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF4.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF4.h
new file mode 100644
index 00000000..7cf3b94d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF4.h
@@ -0,0 +1,155 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef DY_SOLVER_CONTACT_PF_4_H
+#define DY_SOLVER_CONTACT_PF_4_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec3.h"
+#include "PxvConfig.h"
+#include "PsVecMath.h"
+
+namespace physx
+{
+
+using namespace Ps::aos;
+
+namespace Sc
+{
+ class ShapeInteraction;
+}
+
+namespace Dy
+{
+
+struct SolverContactCoulombHeader4
+{
+ PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
+ PxU8 numNormalConstr;
+ PxU16 frictionOffset;
+ PxU8 numNormalConstr0, numNormalConstr1, numNormalConstr2, numNormalConstr3;
+ PxU8 flags[4];
+ PxU32 pad; //16
+ Vec4V restitution; //32
+ Vec4V normalX; //48
+ Vec4V normalY; //64
+ Vec4V normalZ; //80
+ Vec4V invMassADom; //96
+ Vec4V invMassBDom; //112
+ Vec4V angD0; //128
+ Vec4V angD1; //144
+ Sc::ShapeInteraction* shapeInteraction[4]; //160 or 176
+};
+
+#if !PX_P64_FAMILY
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader4) == 160);
+#else
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader4) == 176);
+#endif
+
+struct SolverContact4Base
+{
+ Vec4V raXnX;
+ Vec4V raXnY;
+ Vec4V raXnZ;
+ Vec4V appliedForce;
+ Vec4V velMultiplier;
+ Vec4V targetVelocity;
+ Vec4V scaledBias;
+ Vec4V maxImpulse;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContact4Base) == 128);
+
+struct SolverContact4Dynamic : public SolverContact4Base
+{
+ Vec4V rbXnX;
+ Vec4V rbXnY;
+ Vec4V rbXnZ;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverContact4Dynamic) == 176);
+
+struct SolverFrictionHeader4
+{
+ PxU8 type; //Note: mType should be first as the solver expects a type in the first byte.
+ PxU8 numNormalConstr;
+ PxU8 numFrictionConstr;
+ PxU8 numNormalConstr0;
+ PxU8 numNormalConstr1;
+ PxU8 numNormalConstr2;
+ PxU8 numNormalConstr3;
+ PxU8 numFrictionConstr0;
+ PxU8 numFrictionConstr1;
+ PxU8 numFrictionConstr2;
+ PxU8 numFrictionConstr3;
+ PxU8 pad0;
+ PxU32 frictionPerContact;
+
+ Vec4V staticFriction;
+ Vec4V invMassADom;
+ Vec4V invMassBDom;
+ Vec4V angD0;
+ Vec4V angD1;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionHeader4) == 96);
+
+struct SolverFriction4Base
+{
+ Vec4V normalX;
+ Vec4V normalY;
+ Vec4V normalZ;
+ Vec4V raXnX;
+ Vec4V raXnY;
+ Vec4V raXnZ;
+ Vec4V appliedForce;
+ Vec4V velMultiplier;
+ Vec4V targetVelocity;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverFriction4Base) == 144);
+
+struct SolverFriction4Dynamic : public SolverFriction4Base
+{
+ Vec4V rbXnX;
+ Vec4V rbXnY;
+ Vec4V rbXnZ;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(SolverFriction4Dynamic) == 192);
+
+}
+
+}
+
+
+
+#endif //DY_SOLVER_CONTACT_PF_4_H
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContext.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContext.h
new file mode 100644
index 00000000..df3d7fea
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContext.h
@@ -0,0 +1,64 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVERCONTEXT_H
+#define DY_SOLVERCONTEXT_H
+
+namespace physx
+{
+ struct PxSolverBodyData;
+
+namespace Dy
+{
+ struct ThresholdStreamElement;
+
+
+struct SolverContext
+{
+ bool doFriction;
+ bool writeBackIteration;
+
+ // for threshold stream output
+ ThresholdStreamElement* mThresholdStream;
+ PxU32 mThresholdStreamIndex;
+ PxU32 mThresholdStreamLength;
+ PxSolverBodyData* solverBodyArray;
+
+ ThresholdStreamElement* PX_RESTRICT mSharedThresholdStream;
+ PxU32 mSharedThresholdStreamLength;
+ PxI32* mSharedOutThresholdPairs;
+
+};
+
+}
+
+}
+
+#endif //DY_SOLVERCONTEXT_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.cpp
new file mode 100644
index 00000000..688e0b81
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.cpp
@@ -0,0 +1,622 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxPreprocessor.h"
+
+#include "PsAllocator.h"
+#include <new>
+#include <stdio.h>
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverContact.h"
+#include "DyThresholdTable.h"
+#include "DySolverControl.h"
+#include "DyArticulationHelper.h"
+#include "PsAtomic.h"
+#include "PsIntrinsics.h"
+#include "DyArticulationPImpl.h"
+#include "PsThread.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverContext.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+//-----------------------------------
+
+void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContact_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4_Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContact_BStaticConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4Block_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContact_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4Block_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void contactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void extContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void ext1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void contactPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void writeBack1D4Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+// could move this to PxPreprocessor.h but
+// no implementation available for MSVC
+#if PX_GCC_FAMILY
+#define PX_UNUSED_ATTRIBUTE __attribute__((unused))
+#else
+#define PX_UNUSED_ATTRIBUTE
+#endif
+
+#define DYNAMIC_ARTICULATION_REGISTRATION(x) 0
+
+static SolveBlockMethod gVTableSolveBlock[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactBlock, // DY_SC_TYPE_RB_CONTACT
+ solve1DBlock, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactBlock), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlock), // DY_SC_TYPE_EXT_1D
+ solveContact_BStaticBlock, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactPreBlock, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactPreBlock_Static, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4_Block, // DY_SC_TYPE_BLOCK_1D,
+};
+
+static SolveWriteBackBlockMethod gVTableSolveWriteBackBlock[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactBlockWriteBack, // DY_SC_TYPE_RB_CONTACT
+ solve1DBlockWriteBack, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactBlockWriteBack), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlockWriteBack), // DY_SC_TYPE_EXT_1D
+ solveContact_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactBlockWriteBack, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactPreBlock_WriteBackStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4Block_WriteBack, // DY_SC_TYPE_BLOCK_1D,
+};
+
+static SolveBlockMethod gVTableSolveConcludeBlock[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactConcludeBlock, // DY_SC_TYPE_RB_CONTACT
+ solve1DConcludeBlock, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactConcludeBlock), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DConcludeBlock), // DY_SC_TYPE_EXT_1D
+ solveContact_BStaticConcludeBlock, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactConcludeBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactPreBlock_Conclude, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactPreBlock_ConcludeStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4Block_Conclude, // DY_SC_TYPE_BLOCK_1D,
+};
+
+void SolverCoreRegisterArticulationFns()
+{
+ gVTableSolveBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactBlock;
+ gVTableSolveBlock[DY_SC_TYPE_EXT_1D] = solveExt1DBlock;
+
+ gVTableSolveWriteBackBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactBlockWriteBack;
+ gVTableSolveWriteBackBlock[DY_SC_TYPE_EXT_1D] = solveExt1DBlockWriteBack;
+ gVTableSolveConcludeBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactConcludeBlock;
+ gVTableSolveConcludeBlock[DY_SC_TYPE_EXT_1D] = solveExt1DConcludeBlock;
+}
+
+
+SolveBlockMethod* getSolveBlockTable()
+{
+ return gVTableSolveBlock;
+}
+
+SolveBlockMethod* getSolverConcludeBlockTable()
+{
+ return gVTableSolveConcludeBlock;
+}
+
+SolveWriteBackBlockMethod* getSolveWritebackBlockTable()
+{
+ return gVTableSolveWriteBackBlock;
+}
+
+
+
+
+SolverCoreGeneral* SolverCoreGeneral::create()
+{
+ SolverCoreGeneral* scg = reinterpret_cast<SolverCoreGeneral*>(
+ PX_ALLOC(sizeof(SolverCoreGeneral), "SolverCoreGeneral"));
+
+ if(scg)
+ new (scg) SolverCoreGeneral;
+
+ return scg;
+}
+
+void SolverCoreGeneral::destroyV()
+{
+ this->~SolverCoreGeneral();
+ PX_FREE(this);
+}
+
+void SolverCoreGeneral::solveV_Blocks(SolverIslandParams& params) const
+{
+
+ const PxI32 TempThresholdStreamSize = 32;
+ ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
+
+ SolverContext cache;
+ cache.solverBodyArray = params.bodyDataList;
+ cache.mThresholdStream = tempThresholdStream;
+ cache.mThresholdStreamLength = TempThresholdStreamSize;
+ cache.mThresholdStreamIndex = 0;
+ cache.writeBackIteration = false;
+
+ PxI32 batchCount = PxI32(params.numConstraintHeaders);
+
+ PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
+ const PxU32 bodyListSize = params.bodyListSize;
+
+ Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+
+ const PxU32 velocityIterations = params.velocityIterations;
+ const PxU32 positionIterations = params.positionIterations;
+
+ const PxU32 numConstraintHeaders = params.numConstraintHeaders;
+ const PxU32 articulationListSize = params.articulationListSize;
+
+ ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+ PX_ASSERT(velocityIterations >= 1);
+ PX_ASSERT(positionIterations >= 1);
+
+ if(numConstraintHeaders == 0)
+ {
+ for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
+ {
+ Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
+ PxSolverBody& atom = bodyListStart[baIdx];
+
+ motionVel.linear = atom.linearVelocity;
+ motionVel.angular = atom.angularState;
+ }
+
+ for (PxU32 i = 0; i < articulationListSize; i++)
+ ArticulationPImpl::saveVelocity(articulationListStart[i]);
+
+ return;
+ }
+
+ BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders);
+
+ PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
+
+ //0-(n-1) iterations
+ PxI32 normalIter = 0;
+ PxI32 frictionIter = 0;
+
+ for (PxU32 iteration = positionIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters
+ {
+ cache.doFriction = iteration<=3;
+
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlock : gVTableSolveBlock, normalIter, frictionIter, normalIter);
+
+ ++normalIter;
+ }
+
+ for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
+ {
+ const PxSolverBody& atom = bodyListStart[baIdx];
+ Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
+ motionVel.linear = atom.linearVelocity;
+ motionVel.angular = atom.angularState;
+ }
+
+
+ for (PxU32 i = 0; i < articulationListSize; i++)
+ ArticulationPImpl::saveVelocity(articulationListStart[i]);
+
+
+ const PxI32 velItersMinOne = (PxI32(velocityIterations)) - 1;
+
+ PxI32 iteration = 0;
+
+ for(; iteration < velItersMinOne; ++iteration)
+ {
+
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, gVTableSolveBlock, normalIter, frictionIter, normalIter);
+ ++normalIter;
+
+ }
+
+ PxI32* outThresholdPairs = params.outThresholdPairs;
+ ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
+ PxU32 thresholdStreamLength = params.thresholdStreamLength;
+
+ cache.writeBackIteration = true;
+ cache.mSharedThresholdStream = thresholdStream;
+ cache.mSharedThresholdStreamLength = thresholdStreamLength;
+ cache.mSharedOutThresholdPairs = outThresholdPairs;
+ for(; iteration < PxI32(velocityIterations); ++iteration)
+ {
+
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, gVTableSolveWriteBackBlock, normalIter, frictionIter, normalIter);
+ ++normalIter;
+
+ }
+
+ //Write back remaining threshold streams
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
+ {
+ thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+PxI32 SolverCoreGeneral::solveVParallelAndWriteBack
+(SolverIslandParams& params) const
+{
+#if PX_PROFILE_SOLVE_STALLS
+ PxU64 startTime = readTimer();
+
+ PxU64 stallCount = 0;
+#endif
+
+ SolverContext cache;
+ cache.solverBodyArray = params.bodyDataList;
+ const PxU32 batchSize = params.batchSize;
+
+ const PxI32 UnrollCount = PxI32(batchSize);
+ const PxI32 SaveUnrollCount = 32;
+
+ const PxI32 TempThresholdStreamSize = 32;
+ ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
+
+ const PxI32 bodyListSize = PxI32(params.bodyListSize);
+ const PxI32 articulationListSize = PxI32(params.articulationListSize);
+
+
+ const PxI32 batchCount = PxI32(params.numConstraintHeaders);
+ cache.mThresholdStream = tempThresholdStream;
+ cache.mThresholdStreamLength = TempThresholdStreamSize;
+ cache.mThresholdStreamIndex = 0;
+ cache.writeBackIteration = false;
+
+ const PxI32 positionIterations = PxI32(params.positionIterations);
+ const PxI32 velocityIterations = PxI32(params.velocityIterations);
+
+ PxI32* constraintIndex = &params.constraintIndex;
+ PxI32* constraintIndex2 = &params.constraintIndex2;
+
+ PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
+
+ const PxU32 nbPartitions = params.nbPartitions;
+
+ PxU32* headersPerPartition = params.headersPerPartition;
+
+ PX_UNUSED(velocityIterations);
+
+ PX_ASSERT(velocityIterations >= 1);
+ PX_ASSERT(positionIterations >= 1);
+
+ PxI32 endIndexCount = UnrollCount;
+ PxI32 index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+
+ BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders);
+
+ PxI32 maxNormalIndex = 0;
+ PxI32 normalIteration = 0;
+ PxI32 frictionIteration = 0;
+ PxU32 a = 0;
+ PxI32 targetConstraintIndex = 0;
+ for(PxU32 i = 0; i < 2; ++i)
+ {
+ SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlock : gVTableSolveConcludeBlock;
+ for(; a < positionIterations - 1 + i; ++a)
+ {
+ cache.doFriction = (positionIterations - a) <= 3;
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
+
+ maxNormalIndex += headersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, solveTable,
+ normalIteration, frictionIteration, normalIteration);
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ physx::shdfnd::atomicAdd(constraintIndex2, nbSolved);
+ }
+ targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
+ }
+ ++normalIteration;
+ }
+ }
+
+ PxI32* bodyListIndex = &params.bodyListIndex;
+ PxI32* bodyListIndex2 = &params.bodyListIndex2;
+
+ ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+ PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
+ Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+
+
+ //Save velocity - articulated
+ PxI32 endIndexCount2 = SaveUnrollCount;
+ PxI32 index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
+ PxI32 nbConcluded = 0;
+ while(index2 < articulationListSize)
+ {
+ const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2));
+ endIndexCount2 -= remainder;
+ for(PxI32 b = 0; b < remainder; ++b, ++index2)
+ {
+ ArticulationPImpl::saveVelocity(articulationListStart[index2]);
+ }
+ if(endIndexCount2 == 0)
+ {
+ index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
+ endIndexCount2 = SaveUnrollCount;
+ }
+ nbConcluded += remainder;
+ }
+
+ index2 -= articulationListSize;
+
+ //save velocity
+
+
+ while(index2 < bodyListSize)
+ {
+ const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2));
+ endIndexCount2 -= remainder;
+ for(PxI32 b = 0; b < remainder; ++b, ++index2)
+ {
+ Ps::prefetchLine(&bodyListStart[index2 + 8]);
+ Ps::prefetchLine(&motionVelocityArray[index2 + 8]);
+ PxSolverBody& body = bodyListStart[index2];
+ Cm::SpatialVector& motionVel = motionVelocityArray[index2];
+ motionVel.linear = body.linearVelocity;
+ motionVel.angular = body.angularState;
+ PX_ASSERT(motionVel.linear.isFinite());
+ PX_ASSERT(motionVel.angular.isFinite());
+ }
+
+ nbConcluded += remainder;
+
+ //Branch not required because this is the last time we use this atomic variable
+ //if(index2 < articulationListSizePlusbodyListSize)
+ {
+ index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize;
+ endIndexCount2 = SaveUnrollCount;
+ }
+ }
+
+ if(nbConcluded)
+ {
+ Ps::memoryBarrier();
+ physx::shdfnd::atomicAdd(bodyListIndex2, nbConcluded);
+ }
+ }
+
+
+ WAIT_FOR_PROGRESS(bodyListIndex2, (bodyListSize + articulationListSize));
+
+ a = 1;
+ for(; a < params.velocityIterations; ++a)
+ {
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
+
+ maxNormalIndex += headersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlock,
+ normalIteration, 0, normalIteration);
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ physx::shdfnd::atomicAdd(constraintIndex2, nbSolved);
+ }
+ targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
+ }
+ ++normalIteration;
+ }
+
+ ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
+ PxU32 thresholdStreamLength = params.thresholdStreamLength;
+ PxI32* outThresholdPairs = params.outThresholdPairs;
+
+ cache.mSharedOutThresholdPairs = outThresholdPairs;
+ cache.mSharedThresholdStream = thresholdStream;
+ cache.mSharedThresholdStreamLength = thresholdStreamLength;
+
+ //Last iteration - do writeback as well!
+ cache.writeBackIteration = true;
+ {
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex);
+
+ maxNormalIndex += headersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveWriteBackBlock,
+ normalIteration, 0, normalIteration);
+
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ physx::shdfnd::atomicAdd(constraintIndex2, nbSolved);
+ }
+ targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count
+ }
+
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
+ {
+ thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+
+ ++normalIteration;
+
+ }
+
+#if PX_PROFILE_SOLVE_STALLS
+
+
+ PxU64 endTime = readTimer();
+ PxReal totalTime = (PxReal)(endTime - startTime);
+ PxReal stallTime = (PxReal)stallCount;
+ PxReal stallRatio = stallTime/totalTime;
+ if(0)//stallRatio > 0.2f)
+ {
+ LARGE_INTEGER frequency;
+ QueryPerformanceFrequency( &frequency );
+ printf("Warning -- percentage time stalled = %f; stalled for %f seconds; total Time took %f seconds\n",
+ stallRatio * 100.f, stallTime/(PxReal)frequency.QuadPart, totalTime/(PxReal)frequency.QuadPart);
+ }
+#endif
+
+ return normalIteration * batchCount;
+
+}
+
+
+void SolverCoreGeneral::writeBackV
+(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 /*constraintListSize*/, PxConstraintBatchHeader* batchHeaders, const PxU32 numBatches,
+ ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
+ PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const
+{
+ SolverContext cache;
+ cache.solverBodyArray = atomListData;
+ cache.mThresholdStream = thresholdStream;
+ cache.mThresholdStreamLength = thresholdStreamLength;
+ cache.mThresholdStreamIndex = 0;
+
+ PxI32 outThreshIndex = 0;
+ for(PxU32 j = 0; j < numBatches; ++j)
+ {
+ PxU8 type = *constraintList[batchHeaders[j].mStartIndex].constraint;
+ writeBackTable[type](constraintList + batchHeaders[j].mStartIndex,
+ batchHeaders[j].mStride, cache);
+ }
+
+ outThresholdPairs = PxU32(outThreshIndex);
+}
+
+void solveVBlock(SOLVEV_BLOCK_METHOD_ARGS)
+{
+ solverCore->solveV_Blocks(params);
+}
+
+}
+}
+
+
+//#endif
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.h
new file mode 100644
index 00000000..bfccb2b6
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.h
@@ -0,0 +1,218 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVERCOREGENERAL_H
+#define DY_SOLVERCOREGENERAL_H
+
+#include "DySolverCore.h"
+#include "DySolverConstraintDesc.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+struct FsData;
+
+inline void BusyWaitState(volatile PxU32* state, const PxU32 requiredState)
+{
+ while(requiredState != *state );
+}
+
+inline void WaitBodyRequiredState(PxU32* state, const PxU32 requiredState)
+{
+ if(*state != requiredState)
+ {
+ BusyWaitState(state, requiredState);
+ }
+}
+
+inline void BusyWaitStates(volatile PxU32* stateA, volatile PxU32* stateB, const PxU32 requiredStateA, const PxU32 requiredStateB)
+{
+ while(*stateA != requiredStateA);
+ while(*stateB != requiredStateB);
+}
+
+
+PX_FORCE_INLINE void WaitBodyABodyBRequiredState(const PxSolverConstraintDesc& desc, const PxI32 iterationA, const PxI32 iterationB)
+{
+ PxSolverBody* PX_RESTRICT pBodyA = desc.bodyA;
+ PxSolverBody* PX_RESTRICT pBodyB = desc.bodyB;
+
+ const PxU32 requiredProgressA=(desc.bodyASolverProgress == 0xFFFF) ? 0xFFFF : PxU32(desc.bodyASolverProgress + iterationA * pBodyA->maxSolverNormalProgress + iterationB * pBodyA->maxSolverFrictionProgress);
+ const PxU32 requiredProgressB=(desc.bodyBSolverProgress == 0xFFFF) ? 0xFFFF : PxU32(desc.bodyBSolverProgress + iterationA * pBodyB->maxSolverNormalProgress + iterationB * pBodyB->maxSolverFrictionProgress);
+ PX_ASSERT(requiredProgressA!=0xFFFFFFFF || requiredProgressB!=0xFFFFFFFF);
+
+ const PxU32 solverProgressA = pBodyA->solverProgress;
+ const PxU32 solverProgressB = pBodyB->solverProgress;
+
+ if(solverProgressA != requiredProgressA || solverProgressB != requiredProgressB)
+ {
+ BusyWaitStates(&pBodyA->solverProgress, &pBodyB->solverProgress, requiredProgressA, requiredProgressB);
+ }
+}
+
+PX_FORCE_INLINE void IncrementBodyProgress(const PxSolverConstraintDesc& desc)
+{
+ PxSolverBody* PX_RESTRICT pBodyA = desc.bodyA;
+ PxSolverBody* PX_RESTRICT pBodyB = desc.bodyB;
+
+ const PxU32 maxProgressA = pBodyA->maxSolverNormalProgress;
+ const PxU32 maxProgressB = pBodyB->maxSolverNormalProgress;
+
+ //NB - this approach removes the need for an imul (which is a non-pipeline instruction on PPC chips)
+ const PxU32 requiredProgressA=(maxProgressA == 0xFFFF) ? 0xFFFF : pBodyA->solverProgress + 1;
+ const PxU32 requiredProgressB=(maxProgressB == 0xFFFF) ? 0xFFFF : pBodyB->solverProgress + 1;
+
+ volatile PxU32* solveProgressA = &pBodyA->solverProgress;
+ volatile PxU32* solveProgressB = &pBodyB->solverProgress;
+
+ *solveProgressA=requiredProgressA;
+ *solveProgressB=requiredProgressB;
+
+}
+
+
+class BatchIterator
+{
+public:
+ PxConstraintBatchHeader* constraintBatchHeaders;
+ PxU32 mSize;
+ PxU32 mCurrentIndex;
+
+ BatchIterator(PxConstraintBatchHeader* _constraintBatchHeaders, PxU32 size) : constraintBatchHeaders(_constraintBatchHeaders),
+ mSize(size), mCurrentIndex(0)
+ {
+ }
+
+ PX_FORCE_INLINE const PxConstraintBatchHeader& GetCurrentHeader(const PxU32 constraintIndex)
+ {
+ PxU32 currentIndex = mCurrentIndex;
+ while((constraintIndex - constraintBatchHeaders[currentIndex].mStartIndex) >= constraintBatchHeaders[currentIndex].mStride)
+ currentIndex = (currentIndex + 1)%mSize;
+ Ps::prefetchLine(&constraintBatchHeaders[currentIndex], 128);
+ mCurrentIndex = currentIndex;
+ return constraintBatchHeaders[currentIndex];
+ }
+private:
+ BatchIterator& operator=(const BatchIterator&);
+};
+
+
+template<bool bWaitIncrement>
+void SolveBlockParallel (PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxI32 batchCount, const PxI32 index,
+ const PxI32 headerCount, SolverContext& cache, BatchIterator& iterator,
+ SolveBlockMethod solveTable[], const PxI32 normalIteration, const PxI32 frictionIteration,
+ const PxI32 iteration
+ )
+{
+ const PxI32 indA = index - (iteration * headerCount);
+
+ const PxConstraintBatchHeader* PX_RESTRICT headers = iterator.constraintBatchHeaders;
+
+ const PxI32 endIndex = indA + batchCount;
+ for(PxI32 i = indA; i < endIndex; ++i)
+ {
+ const PxConstraintBatchHeader& header = headers[i];
+
+ const PxI32 numToGrab = header.mStride;
+ PxSolverConstraintDesc* PX_RESTRICT block = &constraintList[header.mStartIndex];
+
+ Ps::prefetch(block[0].constraint, 384);
+
+ for(PxI32 b = 0; b < numToGrab; ++b)
+ {
+ Ps::prefetchLine(block[b].bodyA);
+ Ps::prefetchLine(block[b].bodyB);
+ if(bWaitIncrement)
+ WaitBodyABodyBRequiredState(block[b], normalIteration, frictionIteration);
+ }
+
+ //OK. We have a number of constraints to run...
+ solveTable[header.mConstraintType](block, PxU32(numToGrab), cache);
+
+ //Increment body progresses
+ if(bWaitIncrement)
+ {
+ Ps::memoryBarrier();
+ for(PxI32 j = 0; j < numToGrab; ++j)
+ {
+ IncrementBodyProgress(block[j]);
+ }
+ }
+ }
+}
+
+
+
+
+class SolverCoreGeneral : public SolverCore
+{
+public:
+ static SolverCoreGeneral* create();
+
+ // Implements SolverCore
+ virtual void destroyV();
+
+ virtual PxI32 solveVParallelAndWriteBack
+ (SolverIslandParams& params) const;
+
+ virtual void solveV_Blocks
+ (SolverIslandParams& params) const;
+
+ virtual void writeBackV
+ (const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numBatches,
+ ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
+ PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const;
+
+private:
+
+ //~Implements SolverCore
+};
+
+#define SOLVEV_BLOCK_METHOD_ARGS \
+ SolverCore* solverCore, \
+ SolverIslandParams& params
+
+void solveVBlock(SOLVEV_BLOCK_METHOD_ARGS);
+
+SolveBlockMethod* getSolveBlockTable();
+
+SolveBlockMethod* getSolverConcludeBlockTable();
+
+SolveWriteBackBlockMethod* getSolveWritebackBlockTable();
+
+
+}
+
+}
+
+#endif //DY_SOLVERCOREGENERAL_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp
new file mode 100644
index 00000000..1858da15
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp
@@ -0,0 +1,755 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxPreprocessor.h"
+#include "PsAllocator.h"
+#include <new>
+#include <stdio.h>
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverContact.h"
+#include "DyThresholdTable.h"
+#include "DySolverControl.h"
+#include "DyArticulationHelper.h"
+#include "PsAtomic.h"
+#include "PsIntrinsics.h"
+#include "DyArticulationPImpl.h"
+#include "PsThread.h"
+#include "DySolverConstraintDesc.h"
+#include "DySolverContext.h"
+#include "DySolverControlPF.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+//-----------------------------------
+
+void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4_Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4Block_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExt1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solve1D4Block_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void ext1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void writeBack1D4Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+void solveFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFriction_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulomb_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+void solveContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulomb_BStaticConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solveContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulomb_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFriction_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveExtFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+//Pre-block 1d/2d friction stuff...
+
+void solveContactCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_ConcludeStatic (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveContactCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFrictionCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solveFrictionCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFrictionCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+void solveFrictionCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solveFrictionCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+void solveFrictionCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache);
+
+
+// could move this to PxPreprocessor.h but
+// no implementation available for MSVC
+#if PX_GCC_FAMILY
+#define PX_UNUSED_ATTRIBUTE __attribute__((unused))
+#else
+#define PX_UNUSED_ATTRIBUTE
+#endif
+
+#define DYNAMIC_ARTICULATION_REGISTRATION(x) 0
+
+
+static SolveBlockMethod gVTableSolveBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactCoulombBlock, // DY_SC_TYPE_RB_CONTACT
+ solve1DBlock, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlock), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlock), // DY_SC_TYPE_EXT_1D
+ solveContactCoulomb_BStaticBlock, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactCoulombBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactCoulombPreBlock, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactCoulombPreBlock_Static, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4_Block, // DY_SC_TYPE_BLOCK_1D,
+ solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT
+ solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
+ solveFrictionCoulombPreBlock, // DY_SC_TYPE_BLOCK_FRICTION
+ solveFrictionCoulombPreBlock_Static // DY_SC_TYPE_BLOCK_STATIC_FRICTION
+};
+
+static SolveWriteBackBlockMethod gVTableSolveWriteBackBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactCoulombBlockWriteBack, // DY_SC_TYPE_RB_CONTACT
+ solve1DBlockWriteBack, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlockWriteBack), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlockWriteBack), // DY_SC_TYPE_EXT_1D
+ solveContactCoulomb_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactCoulombBlockWriteBack, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactCoulombPreBlock_WriteBackStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4Block_WriteBack, // DY_SC_TYPE_BLOCK_1D,
+ solveFrictionBlockWriteBack, // DY_SC_TYPE_FRICTION_CONSTRAINT
+ solveFriction_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlockWriteBack), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
+ solveFrictionCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_FRICTION
+ solveFrictionCoulombPreBlock_WriteBackStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION
+};
+
+
+static SolveBlockMethod gVTableSolveConcludeBlockCoulomb[] PX_UNUSED_ATTRIBUTE =
+{
+ 0,
+ solveContactCoulombConcludeBlock, // DY_SC_TYPE_RB_CONTACT
+ solve1DConcludeBlock, // DY_SC_TYPE_RB_1D
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombConcludeBlock), // DY_SC_TYPE_EXT_CONTACT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DConcludeBlock), // DY_SC_TYPE_EXT_1D
+ solveContactCoulomb_BStaticConcludeBlock, // DY_SC_TYPE_STATIC_CONTACT
+ solveContactCoulombConcludeBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT
+ solveContactCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_RB_CONTACT
+ solveContactCoulombPreBlock_ConcludeStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT
+ solve1D4Block_Conclude, // DY_SC_TYPE_BLOCK_1D,
+ solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT
+ solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT
+ DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT
+ solveFrictionCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_FRICTION
+ solveFrictionCoulombPreBlock_ConcludeStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION
+};
+
+
+void SolverCoreRegisterArticulationFnsCoulomb()
+{
+ gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlock;
+ gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlock;
+
+ gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlockWriteBack;
+ gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlockWriteBack;
+ gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombConcludeBlock;
+ gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DConcludeBlock;
+
+ gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock;
+ gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlockWriteBack;
+ gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock;
+}
+
+SolverCoreGeneralPF* SolverCoreGeneralPF::create()
+{
+ SolverCoreGeneralPF* scg = reinterpret_cast<SolverCoreGeneralPF*>(
+ PX_ALLOC(sizeof(SolverCoreGeneralPF), "SolverCoreGeneral"));
+
+ if(scg)
+ new (scg) SolverCoreGeneralPF;
+
+ return scg;
+}
+
+void SolverCoreGeneralPF::destroyV()
+{
+ this->~SolverCoreGeneralPF();
+ PX_FREE(this);
+}
+
+void SolverCoreGeneralPF::solveV_Blocks(SolverIslandParams& params) const
+{
+ const PxI32 TempThresholdStreamSize = 32;
+ ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
+
+ SolverContext cache;
+ cache.solverBodyArray = params.bodyDataList;
+ cache.mThresholdStream = tempThresholdStream;
+ cache.mThresholdStreamLength = TempThresholdStreamSize;
+ cache.mThresholdStreamIndex = 0;
+ cache.writeBackIteration = false;
+
+ PxI32 batchCount = PxI32(params.numConstraintHeaders);
+
+ PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
+ const PxU32 bodyListSize = params.bodyListSize;
+
+ Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+
+ const PxU32 velocityIterations = params.velocityIterations;
+ const PxU32 positionIterations = params.positionIterations;
+
+ const PxU32 numConstraintHeaders = params.numConstraintHeaders;
+ const PxU32 articulationListSize = params.articulationListSize;
+
+ ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+
+ PX_ASSERT(velocityIterations >= 1);
+ PX_ASSERT(positionIterations >= 1);
+
+ if(numConstraintHeaders == 0)
+ {
+ for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
+ {
+ Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
+ PxSolverBody& atom = bodyListStart[baIdx];
+ motionVel.linear = atom.linearVelocity;
+ motionVel.angular = atom.angularState;
+ }
+
+ for (PxU32 i = 0; i < articulationListSize; i++)
+ ArticulationPImpl::saveVelocity(articulationListStart[i]);
+
+ return;
+ }
+
+ BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders);
+ BatchIterator frictionIterator(params.frictionConstraintBatches, params.numFrictionConstraintHeaders);
+
+
+ PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders);
+
+ PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
+
+ PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList;
+
+
+ //0-(n-1) iterations
+ PxI32 normalIter = 0;
+ PxI32 frictionIter = 0;
+ for (PxU32 iteration = positionIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters
+ {
+
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter, frictionIter, normalIter);
+ ++normalIter;
+
+ }
+
+ if(frictionBatchCount>0)
+ {
+ const PxU32 numIterations = positionIterations * 2;
+ for (PxU32 iteration = numIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters
+ {
+ SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
+ cache, frictionIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter, frictionIter, frictionIter);
+ ++frictionIter;
+ }
+ }
+
+ for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++)
+ {
+ const PxSolverBody& atom = bodyListStart[baIdx];
+ Cm::SpatialVector& motionVel = motionVelocityArray[baIdx];
+ motionVel.linear = atom.linearVelocity;
+ motionVel.angular = atom.angularState;
+ }
+
+
+ for (PxU32 i = 0; i < articulationListSize; i++)
+ ArticulationPImpl::saveVelocity(articulationListStart[i]);
+
+
+ const PxU32 velItersMinOne = velocityIterations - 1;
+
+ PxU32 iteration = 0;
+
+ for(; iteration < velItersMinOne; ++iteration)
+ {
+
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, gVTableSolveBlockCoulomb, normalIter, frictionIter, normalIter);
+ ++normalIter;
+
+ if(frictionBatchCount > 0)
+ {
+ SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
+ cache, frictionIterator, gVTableSolveBlockCoulomb, normalIter, frictionIter, frictionIter);
+ ++frictionIter;
+ }
+ }
+
+ PxI32* outThresholdPairs = params.outThresholdPairs;
+ ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
+ PxU32 thresholdStreamLength = params.thresholdStreamLength;
+
+ cache.writeBackIteration = true;
+
+ cache.mSharedOutThresholdPairs = outThresholdPairs;
+ cache.mSharedThresholdStreamLength = thresholdStreamLength;
+ cache.mSharedThresholdStream = thresholdStream;
+
+ for(; iteration < velocityIterations; ++iteration)
+ {
+ SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount,
+ cache, contactIterator, gVTableSolveWriteBackBlockCoulomb, normalIter, frictionIter, normalIter);
+ ++normalIter;
+
+ if(frictionBatchCount > 0)
+ {
+ SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount,
+ cache, frictionIterator, gVTableSolveWriteBackBlockCoulomb, normalIter, frictionIter, frictionIter);
+ ++frictionIter;
+ }
+ }
+
+ //Write back remaining threshold streams
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(reinterpret_cast<PxI32*>(&outThresholdPairs), PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
+ {
+ thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+
+}
+
+PxI32 SolverCoreGeneralPF::solveVParallelAndWriteBack(SolverIslandParams& params) const
+{
+ SolverContext cache;
+ cache.solverBodyArray = params.bodyDataList;
+
+ const PxI32 UnrollCount = PxI32(params.batchSize);
+ const PxI32 SaveUnrollCount = 64;
+
+ const PxI32 TempThresholdStreamSize = 32;
+ ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize];
+
+
+ const PxI32 batchCount = PxI32(params.numConstraintHeaders);
+ const PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders);//frictionConstraintBatches.size();
+ cache.mThresholdStream = tempThresholdStream;
+ cache.mThresholdStreamLength = TempThresholdStreamSize;
+ cache.mThresholdStreamIndex = 0;
+
+ const PxI32 positionIterations = PxI32(params.positionIterations);
+ const PxU32 velocityIterations = params.velocityIterations;
+
+ const PxI32 bodyListSize = PxI32(params.bodyListSize);
+ const PxI32 articulationListSize = PxI32(params.articulationListSize);
+
+ PX_ASSERT(velocityIterations >= 1);
+ PX_ASSERT(positionIterations >= 1);
+
+ PxI32* constraintIndex = &params.constraintIndex;
+ PxI32* constraintIndex2 = &params.constraintIndex2;
+ PxI32* frictionConstraintIndex = &params.frictionConstraintIndex;
+
+ PxI32 endIndexCount = UnrollCount;
+ PxI32 index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ PxI32 frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
+
+
+ BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders);
+ BatchIterator frictionIter(params.frictionConstraintBatches, params.numFrictionConstraintHeaders);
+
+ PxU32* headersPerPartition = params.headersPerPartition;
+ PxU32 nbPartitions = params.nbPartitions;
+
+ PxU32* frictionHeadersPerPartition = params.frictionHeadersPerPartition;
+ PxU32 nbFrictionPartitions = params.nbFrictionPartitions;
+
+ PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList;
+ PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList;
+
+
+ PxI32 maxNormalIndex = 0;
+ PxI32 maxProgress = 0;
+ PxI32 frictionEndIndexCount = UnrollCount;
+ PxI32 maxFrictionIndex = 0;
+
+ PxI32 normalIteration = 0;
+ PxI32 frictionIteration = 0;
+ PxU32 a = 0;
+ for(PxU32 i = 0; i < 2; ++i)
+ {
+ SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb;
+ for(; a < positionIterations - 1 + i; ++a)
+ {
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxNormalIndex += headersPerPartition[b];
+ maxProgress += headersPerPartition[b];
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, solveTable,
+ normalIteration, frictionIteration, normalIteration);
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+ ++normalIteration;
+ }
+
+ }
+
+
+ for(PxU32 i = 0; i < 2; ++i)
+ {
+ SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb;
+ const PxI32 numIterations = positionIterations *2;
+ for(; a < numIterations - 1 + i; ++a)
+ {
+ for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxProgress += frictionHeadersPerPartition[b];
+ maxFrictionIndex += frictionHeadersPerPartition[b];
+ PxI32 nbSolved = 0;
+ while(frictionIndex < maxFrictionIndex)
+ {
+ const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
+ SolveBlockParallel<false>(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter,
+ solveTable, normalIteration, frictionIteration, frictionIteration);
+ frictionIndex += remainder;
+ frictionEndIndexCount -= remainder;
+ nbSolved += remainder;
+ if(frictionEndIndexCount == 0)
+ {
+ frictionEndIndexCount = UnrollCount;
+ frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+ ++frictionIteration;
+
+ }
+
+ }
+
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+
+
+ PxI32* bodyListIndex = &params.bodyListIndex;
+
+ ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart;
+
+ PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart;
+
+ Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray;
+
+ PxI32* bodyListIndex2 = &params.bodyListIndex2;
+
+ PxI32 endIndexCount2 = SaveUnrollCount;
+ PxI32 index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
+ {
+ PxI32 nbConcluded = 0;
+ while(index2 < articulationListSize)
+ {
+ const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2));
+ endIndexCount2 -= remainder;
+ for(PxI32 b = 0; b < remainder; ++b, ++index2)
+ {
+ ArticulationPImpl::saveVelocity(articulationListStart[index2]);
+ }
+ nbConcluded += remainder;
+ if(endIndexCount2 == 0)
+ {
+ index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount;
+ endIndexCount2 = SaveUnrollCount;
+ }
+ nbConcluded += remainder;
+ }
+
+ index2 -= articulationListSize;
+
+ //save velocity
+
+
+ while(index2 < bodyListSize)
+ {
+ const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2));
+ endIndexCount2 -= remainder;
+ for(PxI32 b = 0; b < remainder; ++b, ++index2)
+ {
+ Ps::prefetchLine(&bodyListStart[index2 + 8]);
+ Ps::prefetchLine(&motionVelocityArray[index2 + 8]);
+ PxSolverBody& body = bodyListStart[index2];
+ Cm::SpatialVector& motionVel = motionVelocityArray[index2];
+ motionVel.linear = body.linearVelocity;
+ motionVel.angular = body.angularState;
+ PX_ASSERT(motionVel.linear.isFinite());
+ PX_ASSERT(motionVel.angular.isFinite());
+ }
+
+ nbConcluded += remainder;
+
+ //Branch not required because this is the last time we use this atomic variable
+ //if(index2 < articulationListSizePlusbodyListSize)
+ {
+ index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize;
+ endIndexCount2 = SaveUnrollCount;
+ }
+ }
+
+ if(nbConcluded)
+ {
+ Ps::memoryBarrier();
+ physx::shdfnd::atomicAdd(bodyListIndex2, nbConcluded);
+ }
+ }
+
+
+ WAIT_FOR_PROGRESS(bodyListIndex2, (bodyListSize + articulationListSize));
+
+ a = 0;
+ for(; a < velocityIterations-1; ++a)
+ {
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxNormalIndex += headersPerPartition[b];
+ maxProgress += headersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb, normalIteration, frictionIteration, normalIteration);
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+ ++normalIteration;
+
+ for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxFrictionIndex += frictionHeadersPerPartition[b];
+ maxProgress += frictionHeadersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(frictionIndex < maxFrictionIndex)
+ {
+ const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
+ SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb,
+ normalIteration, frictionIteration, normalIteration);
+
+ frictionIndex += remainder;
+ frictionEndIndexCount -= remainder;
+ nbSolved += remainder;
+ if(frictionEndIndexCount == 0)
+ {
+ frictionEndIndexCount = UnrollCount;
+ frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+
+ ++frictionIteration;
+ }
+
+ ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream;
+ const PxU32 thresholdStreamLength = params.thresholdStreamLength;
+ PxI32* outThresholdPairs = params.outThresholdPairs;
+
+ cache.mSharedThresholdStream = thresholdStream;
+ cache.mSharedOutThresholdPairs = outThresholdPairs;
+ cache.mSharedThresholdStreamLength = thresholdStreamLength;
+
+ {
+ for(PxU32 b = 0; b < nbPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxNormalIndex += headersPerPartition[b];
+ maxProgress += headersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(index < maxNormalIndex)
+ {
+ const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount);
+
+ SolveBlockParallel<false>(constraintList, remainder, normalIteration * batchCount, batchCount,
+ cache, contactIter, gVTableSolveWriteBackBlockCoulomb, normalIteration, frictionIteration, normalIteration);
+
+ index += remainder;
+ endIndexCount -= remainder;
+ nbSolved += remainder;
+ if(endIndexCount == 0)
+ {
+ endIndexCount = UnrollCount;
+ index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+
+ ++normalIteration;
+
+ cache.mSharedOutThresholdPairs = outThresholdPairs;
+ cache.mSharedThresholdStream = thresholdStream;
+ cache.mSharedThresholdStreamLength = thresholdStreamLength;
+
+ for(PxU32 b = 0; b < nbFrictionPartitions; ++b)
+ {
+ WAIT_FOR_PROGRESS(constraintIndex2, maxProgress);
+ maxFrictionIndex += frictionHeadersPerPartition[b];
+ maxProgress += frictionHeadersPerPartition[b];
+
+ PxI32 nbSolved = 0;
+ while(frictionIndex < maxFrictionIndex)
+ {
+ const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount);
+
+ SolveBlockParallel<false>(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter,
+ gVTableSolveWriteBackBlockCoulomb, normalIteration, frictionIteration, frictionIteration);
+
+ frictionIndex += remainder;
+ frictionEndIndexCount -= remainder;
+ nbSolved += remainder;
+ if(frictionEndIndexCount == 0)
+ {
+ frictionEndIndexCount = UnrollCount;
+ frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount;
+ }
+ }
+ if(nbSolved)
+ {
+ Ps::memoryBarrier();
+ Ps::atomicAdd(constraintIndex2, nbSolved);
+ }
+ }
+
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b)
+ {
+ thresholdStream[b + threshIndex] = cache.mThresholdStream[b];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+
+ ++frictionIteration;
+ }
+
+ return normalIteration * batchCount + frictionIteration * frictionBatchCount;
+}
+
+
+void SolverCoreGeneralPF::writeBackV
+(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 /*constraintListSize*/, PxConstraintBatchHeader* batchHeaders, const PxU32 numBatches,
+ ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
+ PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const
+{
+ SolverContext cache;
+ cache.solverBodyArray = atomListData;
+ cache.mThresholdStream = thresholdStream;
+ cache.mThresholdStreamLength = thresholdStreamLength;
+ cache.mThresholdStreamIndex = 0;
+
+ PxI32 outThreshIndex = 0;
+ for(PxU32 j = 0; j < numBatches; ++j)
+ {
+ PxU8 type = *constraintList[batchHeaders[j].mStartIndex].constraint;
+ writeBackTable[type](constraintList + batchHeaders[j].mStartIndex,
+ batchHeaders[j].mStride, cache);
+ }
+
+ outThresholdPairs = PxU32(outThreshIndex);
+}
+
+}
+
+}
+
+
+//#endif
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.h
new file mode 100644
index 00000000..b8684cbb
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.h
@@ -0,0 +1,71 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVERCONTROLPF_H
+#define DY_SOLVERCONTROLPF_H
+
+#include "DySolverCore.h"
+#include "DySolverConstraintDesc.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+class SolverCoreGeneralPF : public SolverCore
+{
+public:
+ static SolverCoreGeneralPF* create();
+
+ // Implements SolverCore
+ virtual void destroyV();
+
+ virtual PxI32 solveVParallelAndWriteBack
+ (SolverIslandParams& params) const;
+
+ virtual void solveV_Blocks
+ (SolverIslandParams& params) const;
+
+ virtual void writeBackV
+ (const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numBatches,
+ ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
+ PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const;
+
+private:
+
+ //~Implements SolverCore
+};
+
+}
+
+}
+
+#endif //DY_SOLVERCOREGENERALPF_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverCore.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverCore.h
new file mode 100644
index 00000000..a6f579f9
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverCore.h
@@ -0,0 +1,242 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVERCORE_H
+#define DY_SOLVERCORE_H
+
+#include "PxvConfig.h"
+#include "PsArray.h"
+#include "PsThread.h"
+
+
+namespace physx
+{
+
+struct PxSolverBody;
+struct PxSolverBodyData;
+struct PxSolverConstraintDesc;
+struct PxConstraintBatchHeader;
+
+namespace Dy
+{
+struct ThresholdStreamElement;
+
+
+struct ArticulationSolverDesc;
+class Articulation;
+struct SolverContext;
+
+typedef void (*WriteBackMethod)(const PxSolverConstraintDesc& desc, SolverContext& cache, PxSolverBodyData& sbd0, PxSolverBodyData& sbd1);
+typedef void (*SolveMethod)(const PxSolverConstraintDesc& desc, SolverContext& cache);
+typedef void (*SolveBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache);
+typedef void (*SolveWriteBackBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache);
+typedef void (*WriteBackBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache);
+
+#define PX_PROFILE_SOLVE_STALLS 0
+#if PX_PROFILE_SOLVE_STALLS
+#if PX_WINDOWS
+#include <windows.h>
+
+
+PX_FORCE_INLINE PxU64 readTimer()
+{
+ //return __rdtsc();
+
+ LARGE_INTEGER i;
+ QueryPerformanceCounter(&i);
+ return i.QuadPart;
+}
+
+#endif
+#endif
+
+
+#define YIELD_THREADS 1
+
+#if YIELD_THREADS
+
+#define ATTEMPTS_BEFORE_BACKOFF 30000
+#define ATTEMPTS_BEFORE_RETEST 10000
+
+#endif
+
+PX_INLINE void WaitForProgressCount(volatile PxI32* pGlobalIndex, const PxI32 targetIndex)
+{
+#if YIELD_THREADS
+ if(*pGlobalIndex < targetIndex)
+ {
+ bool satisfied = false;
+ PxU32 count = ATTEMPTS_BEFORE_BACKOFF;
+ do
+ {
+ satisfied = true;
+ while(*pGlobalIndex < targetIndex)
+ {
+ if(--count == 0)
+ {
+ satisfied = false;
+ break;
+ }
+ }
+ if(!satisfied)
+ Ps::Thread::yield();
+ count = ATTEMPTS_BEFORE_RETEST;
+ }
+ while(!satisfied);
+ }
+#else
+ while(*pGlobalIndex < targetIndex);
+#endif
+}
+
+
+#if PX_PROFILE_SOLVE_STALLS
+PX_INLINE void WaitForProgressCount(volatile PxI32* pGlobalIndex, const PxI32 targetIndex, PxU64& stallTime)
+{
+ if(*pGlobalIndex < targetIndex)
+ {
+ bool satisfied = false;
+ PxU32 count = ATTEMPTS_BEFORE_BACKOFF;
+ do
+ {
+ satisfied = true;
+ PxU64 startTime = readTimer();
+ while(*pGlobalIndex < targetIndex)
+ {
+ if(--count == 0)
+ {
+ satisfied = false;
+ break;
+ }
+ }
+ PxU64 endTime = readTimer();
+ stallTime += (endTime - startTime);
+ if(!satisfied)
+ Ps::Thread::yield();
+ count = ATTEMPTS_BEFORE_BACKOFF;
+ }
+ while(!satisfied);
+ }
+}
+
+#define WAIT_FOR_PROGRESS(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex, stallCount)
+#else
+#define WAIT_FOR_PROGRESS(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex)
+#endif
+#define WAIT_FOR_PROGRESS_NO_TIMER(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex)
+
+
+struct SolverIslandParams
+{
+ //Default friction model params
+ PxU32 positionIterations;
+ PxU32 velocityIterations;
+ PxSolverBody* PX_RESTRICT bodyListStart;
+ PxSolverBodyData* PX_RESTRICT bodyDataList;
+ PxU32 bodyListSize;
+ PxU32 solverBodyOffset;
+ ArticulationSolverDesc* PX_RESTRICT articulationListStart;
+ PxU32 articulationListSize;
+ PxSolverConstraintDesc* PX_RESTRICT constraintList;
+ PxConstraintBatchHeader* constraintBatchHeaders;
+ PxU32 numConstraintHeaders;
+ PxU32* headersPerPartition;
+ PxU32 nbPartitions;
+ Cm::SpatialVector* PX_RESTRICT motionVelocityArray;
+ PxU32 batchSize;
+ PxsBodyCore*const* bodyArray;
+ PxsRigidBody** PX_RESTRICT rigidBodies;
+
+ //Shared state progress counters
+ PxI32 constraintIndex;
+ PxI32 constraintIndex2;
+ PxI32 bodyListIndex;
+ PxI32 bodyListIndex2;
+ PxI32 bodyIntegrationListIndex;
+ PxI32 numObjectsIntegrated;
+
+
+ //Additional 1d/2d friction model params
+ PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList;
+
+ PxConstraintBatchHeader* frictionConstraintBatches;
+ PxU32 numFrictionConstraintHeaders;
+ PxU32* frictionHeadersPerPartition;
+ PxU32 nbFrictionPartitions;
+
+ //Additional Shared state progress counters
+ PxI32 frictionConstraintIndex;
+
+ //Write-back threshold information
+ ThresholdStreamElement* PX_RESTRICT thresholdStream;
+ PxU32 thresholdStreamLength;
+
+ PxI32* outThresholdPairs;
+};
+
+
+/*!
+Interface to constraint solver cores
+
+*/
+class SolverCore
+{
+public:
+ virtual void destroyV() = 0;
+ virtual ~SolverCore() {}
+ /*
+ solves dual problem exactly by GS-iterating until convergence stops
+ only uses regular velocity vector for storing results, and backs up initial state, which is restored.
+ the solution forces are saved in a vector.
+
+ state should not be stored, this function is safe to call from multiple threads.
+
+ Returns the total number of constraints that should be solved across all threads. Used for synchronization outside of this method
+ */
+
+ virtual PxI32 solveVParallelAndWriteBack
+ (SolverIslandParams& params) const = 0;
+
+
+ virtual void solveV_Blocks
+ (SolverIslandParams& params) const = 0;
+
+
+ virtual void writeBackV
+ (const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numConstraintBatches,
+ ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs,
+ PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const = 0;
+};
+
+}
+
+}
+
+#endif //DY_SOLVERCORE_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverExt.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverExt.h
new file mode 100644
index 00000000..18fd5bcc
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverExt.h
@@ -0,0 +1,85 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SOLVEREXTBODY_H
+#define DY_SOLVEREXTBODY_H
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxTransform.h"
+#include "CmPhysXCommon.h"
+#include "CmSpatialVector.h"
+
+namespace physx
+{
+
+class PxsRigidBody;
+struct PxsBodyCore;
+struct PxSolverBody;
+struct PxSolverBodyData;
+
+
+namespace Dy
+{
+
+
+struct FsData;
+struct SolverConstraint1D;
+
+class SolverExtBody
+{
+public:
+ union
+ {
+ const FsData* mFsData;
+ const PxSolverBody* mBody;
+ };
+ const PxSolverBodyData* mBodyData;
+
+ PxU16 mLinkIndex;
+
+ SolverExtBody(const void* bodyOrArticulation, const void* bodyData, PxU16 linkIndex):
+ mBody(reinterpret_cast<const PxSolverBody*>(bodyOrArticulation)),
+ mBodyData(reinterpret_cast<const PxSolverBodyData*>(bodyData)),
+ mLinkIndex(linkIndex)
+ {}
+
+ void getResponse(const PxVec3& linImpulse, const PxVec3& angImpulse,
+ PxVec3& linDeltaV, PxVec3& angDeltaV, PxReal dominance) const;
+
+ PxReal projectVelocity(const PxVec3& linear, const PxVec3& angular) const;
+ PxVec3 getLinVel() const;
+ PxVec3 getAngVel() const;
+};
+
+}
+
+}
+
+#endif //DY_SOLVEREXTBODY_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraints.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraints.cpp
new file mode 100644
index 00000000..e5eb3328
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraints.cpp
@@ -0,0 +1,868 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+
+#ifdef PX_SUPPORT_SIMD
+
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverContact.h"
+#include "DySolverContactPF.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraintDesc.h"
+#include "DyThresholdTable.h"
+#include "DySolverContext.h"
+#include "PsUtilities.h"
+#include "DyConstraint.h"
+#include "PsAtomic.h"
+#include "DyThresholdTable.h"
+#include "DySolverConstraintsShared.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+void solveContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxSolverBody& b0 = *desc.bodyA;
+ PxSolverBody& b1 = *desc.bodyB;
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V linVel1 = V3LoadA(b1.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+ Vec3V angState1 = V3LoadA(b1.angularState);
+
+ SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint);
+ const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+
+ //const FloatV zero = FZero();
+
+ while(currPtr < last)
+ {
+ SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
+ currPtr += sizeof(SolverContactCoulombHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ const Vec3V normal = hdr->getNormal();
+ const FloatV invMassDom0 = FLoad(hdr->dominance0);
+ const FloatV invMassDom1 = FLoad(hdr->dominance1);
+ const FloatV angD0 = FLoad(hdr->angDom0);
+ const FloatV angD1 = FLoad(hdr->angDom1);
+
+
+
+ SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
+ currPtr += numNormalConstr * sizeof(SolverContactPoint);
+
+ PxF32* appliedImpulse = reinterpret_cast<PxF32*> ((reinterpret_cast<PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader));
+ Ps::prefetchLine(appliedImpulse);
+
+ solveDynamicContacts(contacts, numNormalConstr, normal, invMassDom0, invMassDom1,
+ angD0, angD1, linVel0, angState0, linVel1, angState1, appliedImpulse);
+ }
+
+ // Write back
+ V3StoreA(linVel0, b0.linearVelocity);
+ V3StoreA(linVel1, b1.linearVelocity);
+ V3StoreA(angState0, b0.angularState);
+ V3StoreA(angState1, b1.angularState);
+
+ PX_ASSERT(currPtr == last);
+}
+
+void solveFriction(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxSolverBody& b0 = *desc.bodyA;
+ PxSolverBody& b1 = *desc.bodyB;
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V linVel1 = V3LoadA(b1.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+ Vec3V angState1 = V3LoadA(b1.angularState);
+
+ PxU8* PX_RESTRICT ptr = desc.constraint;
+ PxU8* PX_RESTRICT currPtr = ptr;
+
+ const PxU8* PX_RESTRICT last = ptr + getConstraintLength(desc);
+
+
+ while(currPtr < last)
+ {
+ const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr);
+ currPtr += sizeof(SolverFrictionHeader);
+ PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr);
+ currPtr += frictionHeader->getAppliedForcePaddingSize();
+
+ SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
+ const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
+ const PxU32 numNormalConstr = frictionHeader->numNormalConstr;
+
+ const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr;
+
+ currPtr += numFrictionConstr * sizeof(SolverContactFriction);
+ const FloatV staticFriction = frictionHeader->getStaticFriction();
+
+ const FloatV invMass0D0 = FLoad(frictionHeader->invMass0D0);
+ const FloatV invMass1D1 = FLoad(frictionHeader->invMass1D1);
+
+
+ const FloatV angD0 = FLoad(frictionHeader->angDom0);
+ const FloatV angD1 = FLoad(frictionHeader->angDom1);
+
+ for(PxU32 i=0, j = 0;i<numFrictionConstr;j++)
+ {
+ for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++)
+ {
+
+ SolverContactFriction& f = frictions[i];
+ Ps::prefetchLine(&frictions[i], 128);
+
+ const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
+ const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);
+ const Vec3V rbXt0 = Vec3V_From_Vec4V(f.rbXnXYZ_biasW);
+
+ const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
+ const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);
+
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ const FloatV normalImpulse = FLoad(appliedImpulse[j]);
+ const FloatV maxFriction = FMul(staticFriction, normalImpulse);
+ const FloatV nMaxFriction = FNeg(maxFriction);
+
+ //Compute the normal velocity of the constraint.
+
+ const FloatV t0Vel1 = V3Dot(t0, linVel0);
+ const FloatV t0Vel2 = V3Dot(raXt0, angState0);
+ const FloatV t0Vel3 = V3Dot(t0, linVel1);
+ const FloatV t0Vel4 = V3Dot(rbXt0, angState1);
+
+
+ const FloatV t0Vel = FSub(FAdd(t0Vel1, t0Vel2), FAdd(t0Vel3, t0Vel4));
+
+ const Vec3V delLinVel0 = V3Scale(t0, invMass0D0);
+ const Vec3V delLinVel1 = V3Scale(t0, invMass1D1);
+
+ // still lots to do here: using loop pipelining we can interweave this code with the
+ // above - the code here has a lot of stalls that we would thereby eliminate
+
+ const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce);
+ FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp);
+ newForce = FClamp(newForce, nMaxFriction, maxFriction);
+ FloatV deltaF = FSub(newForce, appliedForce);
+
+ linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+ linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
+ angState0 = V3ScaleAdd(raXt0, FMul(deltaF, angD0), angState0);
+ angState1 = V3NegScaleSub(rbXt0, FMul(deltaF, angD1), angState1);
+
+ f.setAppliedForce(newForce);
+ }
+ }
+ }
+
+ // Write back
+ V3StoreA(linVel0, b0.linearVelocity);
+ V3StoreA(linVel1, b1.linearVelocity);
+ V3StoreA(angState0, b0.angularState);
+ V3StoreA(angState1, b1.angularState);
+
+
+ PX_ASSERT(currPtr == last);
+}
+
+void solveContactCoulomb_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxSolverBody& b0 = *desc.bodyA;
+
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+
+ SolverContactCoulombHeader* firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint);
+ const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ //const FloatV zero = FZero();
+
+ while(currPtr < last)
+ {
+ SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
+ currPtr += sizeof(SolverContactCoulombHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
+ Ps::prefetchLine(contacts);
+ currPtr += numNormalConstr * sizeof(SolverContactPoint);
+
+ PxF32* appliedImpulse = reinterpret_cast<PxF32*> ((reinterpret_cast<PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader));
+ Ps::prefetchLine(appliedImpulse);
+
+ const Vec3V normal = hdr->getNormal();
+
+ const FloatV invMassDom0 = FLoad(hdr->dominance0);
+
+ const FloatV angD0 = FLoad(hdr->angDom0);
+
+ solveStaticContacts(contacts, numNormalConstr, normal, invMassDom0,
+ angD0, linVel0, angState0, appliedImpulse);
+ }
+
+ // Write back
+ V3StoreA(linVel0, b0.linearVelocity);
+ V3StoreA(angState0, b0.angularState);
+
+ PX_ASSERT(currPtr == last);
+}
+
+void solveFriction_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxSolverBody& b0 = *desc.bodyA;
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc);
+
+ while(currPtr < last)
+ {
+
+ const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr);
+ const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
+ const PxU32 numNormalConstr = frictionHeader->numNormalConstr;
+ const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr;
+ currPtr +=sizeof(SolverFrictionHeader);
+ PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr);
+ currPtr +=frictionHeader->getAppliedForcePaddingSize();
+
+ SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFriction);
+
+ const FloatV invMass0 = FLoad(frictionHeader->invMass0D0);
+ const FloatV angD0 = FLoad(frictionHeader->angDom0);
+ //const FloatV angD1 = FLoad(frictionHeader->angDom1);
+
+
+ const FloatV staticFriction = frictionHeader->getStaticFriction();
+
+ for(PxU32 i=0, j = 0;i<numFrictionConstr;j++)
+ {
+ for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++)
+ {
+ SolverContactFriction& f = frictions[i];
+ Ps::prefetchLine(&frictions[i+1]);
+
+ const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
+ const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);
+
+ const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
+ const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);
+
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ //const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce();
+ const FloatV normalImpulse = FLoad(appliedImpulse[j]);
+ const FloatV maxFriction = FMul(staticFriction, normalImpulse);
+ const FloatV nMaxFriction = FNeg(maxFriction);
+
+ //Compute the normal velocity of the constraint.
+
+ const FloatV t0Vel1 = V3Dot(t0, linVel0);
+ const FloatV t0Vel2 = V3Dot(raXt0, angState0);
+
+ const FloatV t0Vel = FAdd(t0Vel1, t0Vel2);
+
+ const Vec3V delangState0 = V3Scale(raXt0, angD0);
+ const Vec3V delLinVel0 = V3Scale(t0, invMass0);
+
+ // still lots to do here: using loop pipelining we can interweave this code with the
+ // above - the code here has a lot of stalls that we would thereby eliminate
+
+ const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce);
+ FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp);
+ newForce = FClamp(newForce, nMaxFriction, maxFriction);
+ const FloatV deltaF = FSub(newForce, appliedForce);
+
+ linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+ angState0 = V3ScaleAdd(delangState0, deltaF, angState0);
+
+ f.setAppliedForce(newForce);
+ }
+ }
+ }
+
+ // Write back
+ V3StoreA(linVel0, b0.linearVelocity);
+ V3StoreA(angState0, b0.angularState);
+
+ PX_ASSERT(currPtr == last);
+}
+
+
+void concludeContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxU8* PX_RESTRICT cPtr = desc.constraint;
+
+ const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr);
+ PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc);
+ while(cPtr < last)
+ {
+ const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr);
+ cPtr += sizeof(SolverContactCoulombHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ //if(cPtr < last)
+ //Ps::prefetchLine(cPtr, 512);
+ Ps::prefetchLine(cPtr,128);
+ Ps::prefetchLine(cPtr,256);
+ Ps::prefetchLine(cPtr,384);
+
+ const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
+ : sizeof(SolverContactPoint);
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ SolverContactPoint *c = reinterpret_cast<SolverContactPoint*>(cPtr);
+ cPtr += pointStride;
+ //c->scaledBias = PxMin(c->scaledBias, 0.f);
+ c->biasedErr = c->unbiasedErr;
+ }
+ }
+ PX_ASSERT(cPtr == last);
+}
+
+void writeBackContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& cache,
+ PxSolverBodyData& bd0, PxSolverBodyData& bd1)
+{
+
+ PxReal normalForce = 0.f;
+
+ PxU8* PX_RESTRICT cPtr = desc.constraint;
+ PxReal* PX_RESTRICT vForceWriteback = reinterpret_cast<PxReal*>(desc.writeBack);
+ const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr);
+ PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;
+
+ const PxU32 pointStride = firstHeader->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
+ : sizeof(SolverContactPoint);
+
+ bool hasForceThresholds = false;
+ while(cPtr < last)
+ {
+ const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr);
+ cPtr += sizeof(SolverContactCoulombHeader);
+
+ PxF32* appliedImpulse = reinterpret_cast<PxF32*> (const_cast<PxU8*>((reinterpret_cast<const PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader)));
+
+ hasForceThresholds = hdr->flags & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ Ps::prefetchLine(cPtr, 256);
+ Ps::prefetchLine(cPtr, 384);
+
+ if(vForceWriteback!=NULL)
+ {
+ for(PxU32 i=0; i<numNormalConstr; i++)
+ {
+ PxF32 imp = appliedImpulse[i];
+ *vForceWriteback = imp;
+ vForceWriteback++;
+ normalForce += imp;
+ }
+ }
+ cPtr += numNormalConstr * pointStride;
+ }
+ PX_ASSERT(cPtr == last);
+
+ if(hasForceThresholds && desc.linkIndexA == PxSolverConstraintDesc::NO_LINK && desc.linkIndexB == PxSolverConstraintDesc::NO_LINK &&
+ normalForce !=0 && (bd0.reportThreshold < PX_MAX_REAL || bd1.reportThreshold < PX_MAX_REAL))
+ {
+ ThresholdStreamElement elt;
+ elt.normalForce = normalForce;
+ elt.threshold = PxMin<float>(bd0.reportThreshold, bd1.reportThreshold);
+ elt.nodeIndexA = bd0.nodeIndex;
+ elt.nodeIndexB = bd1.nodeIndex;
+ elt.shapeInteraction = (reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint))->shapeInteraction;
+ Ps::order(elt.nodeIndexA, elt.nodeIndexB);
+ PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
+
+ PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
+ cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
+ }
+
+}
+
+
+void solveFrictionBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveFriction(desc[a], cache);
+ }
+}
+
+
+void solveFrictionBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveFriction(desc[a], cache);
+ }
+}
+
+void solveFriction_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveFriction_BStatic(desc[a], cache);
+ }
+}
+
+
+void solveFriction_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveFriction_BStatic(desc[a], cache);
+ }
+}
+
+void solveFriction_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveFriction_BStatic(desc[a], cache);
+ }
+}
+
+
+void solveContactCoulombBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveContactCoulomb(desc[a], cache);
+ }
+}
+
+void solveContactCoulombConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveContactCoulomb(desc[a], cache);
+ concludeContactCoulomb(desc[a], cache);
+ }
+}
+
+void solveContactCoulombBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].bodyBDataIndex];
+ solveContactCoulomb(desc[a], cache);
+ writeBackContactCoulomb(desc[a], cache, bd0, bd1);
+ }
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveContactCoulomb_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveContactCoulomb_BStatic(desc[a], cache);
+ }
+}
+
+void solveContactCoulomb_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveContactCoulomb_BStatic(desc[a], cache);
+ concludeContactCoulomb(desc[a], cache);
+ }
+}
+
+void solveContactCoulomb_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].bodyBDataIndex];
+ solveContactCoulomb_BStatic(desc[a], cache);
+ writeBackContactCoulomb(desc[a], cache, bd0, bd1);
+ }
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Not enough space to write 4 more thresholds back!
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveExtContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ //We'll need this.
+// const FloatV zero = FZero();
+// const FloatV one = FOne();
+
+ Vec3V linVel0, angVel0, linVel1, angVel1;
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel0 = V3LoadA(desc.bodyA->linearVelocity);
+ angVel0 = V3LoadA(desc.bodyA->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA);
+ linVel0 = v.linear;
+ angVel0 = v.angular;
+ }
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel1 = V3LoadA(desc.bodyB->linearVelocity);
+ angVel1 = V3LoadA(desc.bodyB->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB);
+ linVel1 = v.linear;
+ angVel1 = v.angular;
+ }
+
+ //const PxU8* PX_RESTRICT last = desc.constraint + desc.constraintLengthOver16*16;
+
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
+
+ const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;
+
+ //hopefully pointer aliasing doesn't bite.
+
+ Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero();
+
+ while(currPtr < last)
+ {
+ const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr);
+ currPtr += sizeof(SolverContactCoulombHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ PxF32* appliedImpulse = reinterpret_cast<PxF32*>(const_cast<PxU8*>(((reinterpret_cast<const PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader))));
+ Ps::prefetchLine(appliedImpulse);
+
+ SolverContactPointExt* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointExt*>(currPtr);
+ Ps::prefetchLine(contacts);
+ currPtr += numNormalConstr * sizeof(SolverContactPointExt);
+
+ Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
+
+ const Vec3V normal = hdr->getNormal();
+
+ solveExtContacts(contacts, numNormalConstr, normal, linVel0, angVel0, linVel1, angVel1, li0, ai0, li1, ai1, appliedImpulse);
+
+ linImpulse0 = V3ScaleAdd(li0, FLoad(hdr->dominance0), linImpulse0);
+ angImpulse0 = V3ScaleAdd(ai0, FLoad(hdr->angDom0), angImpulse0);
+ linImpulse1 = V3NegScaleSub(li1, FLoad(hdr->dominance1), linImpulse1);
+ angImpulse1 = V3NegScaleSub(ai1, FLoad(hdr->angDom1), angImpulse1);
+ }
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel0, desc.bodyA->linearVelocity);
+ V3StoreA(angVel0, desc.bodyA->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, linImpulse0, angImpulse0);
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel1, desc.bodyB->linearVelocity);
+ V3StoreA(angVel1, desc.bodyB->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, linImpulse1, angImpulse1);
+
+ PX_ASSERT(currPtr == last);
+}
+
+void solveExtFriction(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ Vec3V linVel0, angVel0, linVel1, angVel1;
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel0 = V3LoadA(desc.bodyA->linearVelocity);
+ angVel0 = V3LoadA(desc.bodyA->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA);
+ linVel0 = v.linear;
+ angVel0 = v.angular;
+ }
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel1 = V3LoadA(desc.bodyB->linearVelocity);
+ angVel1 = V3LoadA(desc.bodyB->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB);
+ linVel1 = v.linear;
+ angVel1 = v.angular;
+ }
+
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ const PxU8* PX_RESTRICT last = currPtr + desc.constraintLengthOver16*16;
+
+ Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero();
+
+ while(currPtr < last)
+ {
+
+ const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr);
+ currPtr += sizeof(SolverFrictionHeader);
+ PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr);
+ currPtr += frictionHeader->getAppliedForcePaddingSize();
+
+ SolverContactFrictionExt* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionExt*>(currPtr);
+ const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr;
+
+ currPtr += numFrictionConstr * sizeof(SolverContactFrictionExt);
+ const FloatV staticFriction = frictionHeader->getStaticFriction();
+
+
+ Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
+
+ PxU32 numNormalConstr = frictionHeader->numNormalConstr;
+ PxU32 nbFrictionsPerPoint = numFrictionConstr/numNormalConstr;
+
+
+
+
+ for(PxU32 i = 0, j = 0; i < numFrictionConstr; j++)
+ {
+ for(PxU32 p=0;p<nbFrictionsPerPoint;p++, i++)
+ {
+ SolverContactFrictionExt& f = frictions[i];
+ Ps::prefetchLine(&frictions[i+1]);
+
+
+ const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW);
+ const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW);
+ const Vec3V rbXt0 = Vec3V_From_Vec4V(f.rbXnXYZ_biasW);
+
+ const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW);
+ const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW);
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ const FloatV normalImpulse = FLoad(appliedImpulse[j]);//contacts[f.contactIndex].getAppliedForce();
+ const FloatV maxFriction = FMul(staticFriction, normalImpulse);
+ const FloatV nMaxFriction = FNeg(maxFriction);
+
+ //Compute the normal velocity of the constraint.
+
+ Vec3V rVel = V3MulAdd(linVel0, t0, V3Mul(angVel0, raXt0));
+ rVel = V3Sub(rVel, V3MulAdd(linVel1, t0, V3Mul(angVel1, rbXt0)));
+ const FloatV t0Vel = FAdd(V3SumElems(rVel), targetVel);
+
+ FloatV deltaF = FNeg(FMul(t0Vel, velMultiplier));
+ FloatV newForce = FAdd(appliedForce, deltaF);
+ newForce = FClamp(newForce, nMaxFriction, maxFriction);
+ deltaF = FSub(newForce, appliedForce);
+
+ linVel0 = V3ScaleAdd(f.linDeltaVA, deltaF, linVel0);
+ angVel0 = V3ScaleAdd(f.angDeltaVA, deltaF, angVel0);
+ linVel1 = V3ScaleAdd(f.linDeltaVB, deltaF, linVel1);
+ angVel1 = V3ScaleAdd(f.angDeltaVB, deltaF, angVel1);
+
+ li0 = V3ScaleAdd(t0, deltaF, li0); ai0 = V3ScaleAdd(raXt0, deltaF, ai0);
+ li1 = V3ScaleAdd(t0, deltaF, li1); ai1 = V3ScaleAdd(rbXt0, deltaF, ai1);
+
+ f.setAppliedForce(newForce);
+ }
+ }
+
+
+ linImpulse0 = V3ScaleAdd(li0, FLoad(frictionHeader->invMass0D0), linImpulse0);
+ angImpulse0 = V3ScaleAdd(ai0, FLoad(frictionHeader->angDom0), angImpulse0);
+ linImpulse1 = V3NegScaleSub(li1, FLoad(frictionHeader->invMass1D1), linImpulse1);
+ angImpulse1 = V3NegScaleSub(ai1, FLoad(frictionHeader->angDom1), angImpulse1);
+ }
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel0, desc.bodyA->linearVelocity);
+ V3StoreA(angVel0, desc.bodyA->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, linImpulse0, angImpulse0);
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel1, desc.bodyB->linearVelocity);
+ V3StoreA(angVel1, desc.bodyB->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, linImpulse1, angImpulse1);
+
+ PX_ASSERT(currPtr == last);
+
+}
+
+void solveExtFrictionBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtFriction(desc[a], cache);
+ }
+}
+
+void solveExtFrictionConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtFriction(desc[a], cache);
+ }
+}
+
+void solveExtFrictionBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtFriction(desc[a], cache);
+ }
+}
+
+
+void solveConcludeExtContactCoulomb (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveExtContactCoulomb(desc, cache);
+ concludeContactCoulomb(desc, cache);
+}
+
+void solveExtContactCoulombBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtContactCoulomb(desc[a], cache);
+ }
+}
+
+void solveExtContactCoulombConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtContactCoulomb(desc[a], cache);
+ concludeContactCoulomb(desc[a], cache);
+ }
+}
+
+void solveExtContactCoulombBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+
+ solveExtContactCoulomb(desc[a], cache);
+ writeBackContactCoulomb(desc[a], cache, bd0, bd1);
+ }
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Not enough space to write 4 more thresholds back!
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+
+void solveConcludeContactCoulomb (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveContactCoulomb(desc, cache);
+ concludeContactCoulomb(desc, cache);
+}
+
+
+void solveConcludeContactCoulomb_BStatic (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveContactCoulomb_BStatic(desc, cache);
+ concludeContactCoulomb(desc, cache);
+}
+
+
+
+}
+
+}
+
+#endif //PX_SUPPORT_SIMD
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraintsBlock.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraintsBlock.cpp
new file mode 100644
index 00000000..c6d7288e
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraintsBlock.cpp
@@ -0,0 +1,985 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+#include "PsFPU.h"
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverContactPF4.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraintDesc.h"
+#include "DyThresholdTable.h"
+#include "DySolverContext.h"
+#include "PsUtilities.h"
+#include "DyConstraint.h"
+#include "PsAtomic.h"
+#include "DySolverContact.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+static void solveContactCoulomb4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
+{
+ PxSolverBody& b00 = *desc[0].bodyA;
+ PxSolverBody& b01 = *desc[0].bodyB;
+ PxSolverBody& b10 = *desc[1].bodyA;
+ PxSolverBody& b11 = *desc[1].bodyB;
+ PxSolverBody& b20 = *desc[2].bodyA;
+ PxSolverBody& b21 = *desc[2].bodyB;
+ PxSolverBody& b30 = *desc[3].bodyA;
+ PxSolverBody& b31 = *desc[3].bodyB;
+
+ //We'll need this.
+ const Vec4V vZero = V4Zero();
+
+ Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
+ Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x);
+ Vec4V angState00 = V4LoadA(&b00.angularState.x);
+ Vec4V angState01 = V4LoadA(&b01.angularState.x);
+
+ Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
+ Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x);
+ Vec4V angState10 = V4LoadA(&b10.angularState.x);
+ Vec4V angState11 = V4LoadA(&b11.angularState.x);
+
+ Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
+ Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x);
+ Vec4V angState20 = V4LoadA(&b20.angularState.x);
+ Vec4V angState21 = V4LoadA(&b21.angularState.x);
+
+ Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
+ Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x);
+ Vec4V angState30 = V4LoadA(&b30.angularState.x);
+ Vec4V angState31 = V4LoadA(&b31.angularState.x);
+
+
+ Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
+ Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3;
+ Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
+ Vec4V angState1T0, angState1T1, angState1T2, angState1T3;
+
+
+ PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
+ PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3);
+ PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
+ PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3);
+
+
+
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc[0].constraint;
+
+ SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr);
+
+ const PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset;
+
+ //const PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]);
+
+
+ //TODO - can I avoid this many tests???
+ while(currPtr < last)
+ {
+
+ SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr);
+
+ Vec4V* appliedForceBuffer = reinterpret_cast<Vec4V*>(currPtr + hdr->frictionOffset + sizeof(SolverFrictionHeader4));
+
+ //PX_ASSERT((PxU8*)appliedForceBuffer < endPtr);
+
+ currPtr = reinterpret_cast<PxU8*>(hdr + 1);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ SolverContact4Dynamic* PX_RESTRICT contacts = reinterpret_cast<SolverContact4Dynamic*>(currPtr);
+ //const Vec4V dominance1 = V4Neg(__dominance1);
+
+ currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr);
+
+ const Vec4V invMass0D0 = hdr->invMassADom;
+ const Vec4V invMass1D1 = hdr->invMassBDom;
+ const Vec4V angD0 = hdr->angD0;
+ const Vec4V angD1 = hdr->angD1;
+
+ const Vec4V normalT0 = hdr->normalX;
+ const Vec4V normalT1 = hdr->normalY;
+ const Vec4V normalT2 = hdr->normalZ;
+
+ const Vec4V __normalVel1 = V4Mul(linVel0T0, normalT0);
+ const Vec4V __normalVel3 = V4Mul(linVel1T0, normalT0);
+ const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalT1, __normalVel1);
+ const Vec4V _normalVel3 = V4MulAdd(linVel1T1, normalT1, __normalVel3);
+
+ Vec4V normalVel1 = V4MulAdd(linVel0T2, normalT2, _normalVel1);
+ Vec4V normalVel3 = V4MulAdd(linVel1T2, normalT2, _normalVel3);
+
+ Vec4V accumDeltaF = vZero;
+
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ SolverContact4Dynamic& c = contacts[i];
+ Ps::prefetchLine((&contacts[i+1]));
+ Ps::prefetchLine((&contacts[i+1]), 128);
+ Ps::prefetchLine((&contacts[i+1]), 256);
+ Ps::prefetchLine((&contacts[i+1]), 384);
+
+ const Vec4V appliedForce = c.appliedForce;
+ const Vec4V velMultiplier = c.velMultiplier;
+
+ const Vec4V targetVel = c.targetVelocity;
+ const Vec4V scaledBias = c.scaledBias;
+ const Vec4V maxImpulse = c.maxImpulse;
+
+ const Vec4V raXnT0 = c.raXnX;
+ const Vec4V raXnT1 = c.raXnY;
+ const Vec4V raXnT2 = c.raXnZ;
+ const Vec4V rbXnT0 = c.rbXnX;
+ const Vec4V rbXnT1 = c.rbXnY;
+ const Vec4V rbXnT2 = c.rbXnZ;
+
+
+ const Vec4V __normalVel2 = V4Mul(raXnT0, angState0T0);
+ const Vec4V __normalVel4 = V4Mul(rbXnT0, angState1T0);
+
+
+ const Vec4V _normalVel2 = V4MulAdd(raXnT1, angState0T1, __normalVel2);
+ const Vec4V _normalVel4 = V4MulAdd(rbXnT1, angState1T1, __normalVel4);
+
+
+ const Vec4V normalVel2 = V4MulAdd(raXnT2, angState0T2, _normalVel2);
+ const Vec4V normalVel4 = V4MulAdd(rbXnT2, angState1T2, _normalVel4);
+
+ const Vec4V biasedErr = V4MulAdd(targetVel, velMultiplier, V4Neg(scaledBias));
+
+ //Linear component - normal * invMass_dom
+
+ const Vec4V _normalVel(V4Add(normalVel1, normalVel2));
+ const Vec4V __normalVel(V4Add(normalVel3, normalVel4));
+
+ const Vec4V normalVel = V4Sub(_normalVel, __normalVel );
+
+ const Vec4V _deltaF = V4NegMulSub(normalVel, velMultiplier, biasedErr);
+ const Vec4V nAppliedForce = V4Neg(appliedForce);
+ const Vec4V _deltaF2 = V4Max(_deltaF, nAppliedForce);
+ const Vec4V _newAppliedForce(V4Add(appliedForce, _deltaF2));
+ const Vec4V newAppliedForce = V4Min(_newAppliedForce, maxImpulse);
+ const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
+
+ normalVel1 = V4MulAdd(invMass0D0, deltaF, normalVel1);
+ normalVel3 = V4NegMulSub(invMass1D1, deltaF, normalVel3);
+
+ accumDeltaF = V4Add(deltaF, accumDeltaF);
+
+ const Vec4V deltaFAng0 = V4Mul(angD0, deltaF);
+ const Vec4V deltaFAng1 = V4Mul(angD1, deltaF);
+
+ angState0T0 = V4MulAdd(raXnT0, deltaFAng0, angState0T0);
+ angState1T0 = V4NegMulSub(rbXnT0, deltaFAng1, angState1T0);
+
+ angState0T1 = V4MulAdd(raXnT1, deltaFAng0, angState0T1);
+ angState1T1 = V4NegMulSub(rbXnT1, deltaFAng1, angState1T1);
+
+ angState0T2 = V4MulAdd(raXnT2, deltaFAng0, angState0T2);
+ angState1T2 = V4NegMulSub(rbXnT2, deltaFAng1, angState1T2);
+
+ c.appliedForce = newAppliedForce;
+ appliedForceBuffer[i] = newAppliedForce;
+ }
+
+ const Vec4V accumDeltaF0 = V4Mul(accumDeltaF, invMass0D0);
+ const Vec4V accumDeltaF1 = V4Mul(accumDeltaF, invMass1D1);
+
+ linVel0T0 = V4MulAdd(normalT0, accumDeltaF0, linVel0T0);
+ linVel1T0 = V4NegMulSub(normalT0, accumDeltaF1, linVel1T0);
+ linVel0T1 = V4MulAdd(normalT1, accumDeltaF0, linVel0T1);
+ linVel1T1 = V4NegMulSub(normalT1, accumDeltaF1, linVel1T1);
+ linVel0T2 = V4MulAdd(normalT2, accumDeltaF0, linVel0T2);
+ linVel1T2 = V4NegMulSub(normalT2, accumDeltaF1, linVel1T2);
+ }
+
+ PX_ASSERT(currPtr == last);
+
+
+ //KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to
+ //store the bodies' progress counters.
+
+ PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
+ PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31);
+ PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
+ PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31);
+
+
+ // Write back
+ V4StoreA(linVel00, &b00.linearVelocity.x);
+ V4StoreA(linVel10, &b10.linearVelocity.x);
+ V4StoreA(linVel20, &b20.linearVelocity.x);
+ V4StoreA(linVel30, &b30.linearVelocity.x);
+
+ V4StoreA(linVel01, &b01.linearVelocity.x);
+ V4StoreA(linVel11, &b11.linearVelocity.x);
+ V4StoreA(linVel21, &b21.linearVelocity.x);
+ V4StoreA(linVel31, &b31.linearVelocity.x);
+
+ V4StoreA(angState00, &b00.angularState.x);
+ V4StoreA(angState10, &b10.angularState.x);
+ V4StoreA(angState20, &b20.angularState.x);
+ V4StoreA(angState30, &b30.angularState.x);
+
+ V4StoreA(angState01, &b01.angularState.x);
+ V4StoreA(angState11, &b11.angularState.x);
+ V4StoreA(angState21, &b21.angularState.x);
+ V4StoreA(angState31, &b31.angularState.x);
+}
+
+
+static void solveContactCoulomb4_StaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
+{
+ PxSolverBody& b00 = *desc[0].bodyA;
+ PxSolverBody& b10 = *desc[1].bodyA;
+ PxSolverBody& b20 = *desc[2].bodyA;
+ PxSolverBody& b30 = *desc[3].bodyA;
+
+ //We'll need this.
+ const Vec4V vZero = V4Zero();
+
+ Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
+ Vec4V angState00 = V4LoadA(&b00.angularState.x);
+
+ Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
+ Vec4V angState10 = V4LoadA(&b10.angularState.x);
+
+ Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
+ Vec4V angState20 = V4LoadA(&b20.angularState.x);
+
+ Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
+ Vec4V angState30 = V4LoadA(&b30.angularState.x);
+
+
+ Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
+ Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
+
+
+ PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
+ PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
+
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc[0].constraint;
+
+ SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr);
+
+ const PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset;
+
+
+ //TODO - can I avoid this many tests???
+ while(currPtr < last)
+ {
+
+ SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr);
+
+ Vec4V* appliedForceBuffer = reinterpret_cast<Vec4V*>(currPtr + hdr->frictionOffset + sizeof(SolverFrictionHeader4));
+
+ currPtr = reinterpret_cast<PxU8*>(hdr + 1);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ SolverContact4Base* PX_RESTRICT contacts = reinterpret_cast<SolverContact4Base*>(currPtr);
+
+ currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr);
+
+ const Vec4V invMass0D0 = hdr->invMassADom;
+ const Vec4V angD0 = hdr->angD0;
+
+ const Vec4V normalT0 = hdr->normalX;
+ const Vec4V normalT1 = hdr->normalY;
+ const Vec4V normalT2 = hdr->normalZ;
+
+ const Vec4V __normalVel1 = V4Mul(linVel0T0, normalT0);
+ const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalT1, __normalVel1);
+
+ Vec4V normalVel1 = V4MulAdd(linVel0T2, normalT2, _normalVel1);
+
+ Vec4V accumDeltaF = vZero;
+
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ SolverContact4Base& c = contacts[i];
+ Ps::prefetchLine((&contacts[i+1]));
+ Ps::prefetchLine((&contacts[i+1]), 128);
+ Ps::prefetchLine((&contacts[i+1]), 256);
+
+ const Vec4V appliedForce = c.appliedForce;
+ const Vec4V velMultiplier = c.velMultiplier;
+
+ const Vec4V targetVel = c.targetVelocity;
+ const Vec4V scaledBias = c.scaledBias;
+ const Vec4V maxImpulse = c.maxImpulse;
+
+ const Vec4V raXnT0 = c.raXnX;
+ const Vec4V raXnT1 = c.raXnY;
+ const Vec4V raXnT2 = c.raXnZ;
+
+
+ const Vec4V __normalVel2 = V4Mul(raXnT0, angState0T0);
+
+ const Vec4V _normalVel2 = V4MulAdd(raXnT1, angState0T1, __normalVel2);
+
+ const Vec4V normalVel2 = V4MulAdd(raXnT2, angState0T2, _normalVel2);
+
+ const Vec4V biasedErr = V4MulAdd(targetVel, velMultiplier, V4Neg(scaledBias));
+
+ //Linear component - normal * invMass_dom
+
+ const Vec4V normalVel(V4Add(normalVel1, normalVel2));
+
+ const Vec4V _deltaF = V4NegMulSub(normalVel, velMultiplier, biasedErr);
+ const Vec4V nAppliedForce = V4Neg(appliedForce);
+
+ const Vec4V _deltaF2 = V4Max(_deltaF, nAppliedForce);
+
+ const Vec4V _newAppliedForce(V4Add(appliedForce, _deltaF2));
+ const Vec4V newAppliedForce = V4Min(_newAppliedForce, maxImpulse);
+ const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
+ const Vec4V deltaAngF = V4Mul(deltaF, angD0);
+
+ normalVel1 = V4MulAdd(invMass0D0, deltaF, normalVel1);
+
+ accumDeltaF = V4Add(deltaF, accumDeltaF);
+
+ angState0T0 = V4MulAdd(raXnT0, deltaAngF, angState0T0);
+ angState0T1 = V4MulAdd(raXnT1, deltaAngF, angState0T1);
+ angState0T2 = V4MulAdd(raXnT2, deltaAngF, angState0T2);
+
+ c.appliedForce = newAppliedForce;
+ appliedForceBuffer[i] = newAppliedForce;
+ }
+ const Vec4V scaledAccumDeltaF = V4Mul(accumDeltaF, invMass0D0);
+ linVel0T0 = V4MulAdd(normalT0, scaledAccumDeltaF, linVel0T0);
+ linVel0T1 = V4MulAdd(normalT1, scaledAccumDeltaF, linVel0T1);
+ linVel0T2 = V4MulAdd(normalT2, scaledAccumDeltaF, linVel0T2);
+ }
+
+ PX_ASSERT(currPtr == last);
+
+ //KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to
+ //store the bodies' progress counters.
+
+ PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
+ PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
+
+ // Write back
+ // Write back
+ V4StoreA(linVel00, &b00.linearVelocity.x);
+ V4StoreA(linVel10, &b10.linearVelocity.x);
+ V4StoreA(linVel20, &b20.linearVelocity.x);
+ V4StoreA(linVel30, &b30.linearVelocity.x);
+
+ V4StoreA(angState00, &b00.angularState.x);
+ V4StoreA(angState10, &b10.angularState.x);
+ V4StoreA(angState20, &b20.angularState.x);
+ V4StoreA(angState30, &b30.angularState.x);
+}
+
+static void solveFriction4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
+{
+ PxSolverBody& b00 = *desc[0].bodyA;
+ PxSolverBody& b01 = *desc[0].bodyB;
+ PxSolverBody& b10 = *desc[1].bodyA;
+ PxSolverBody& b11 = *desc[1].bodyB;
+ PxSolverBody& b20 = *desc[2].bodyA;
+ PxSolverBody& b21 = *desc[2].bodyB;
+ PxSolverBody& b30 = *desc[3].bodyA;
+ PxSolverBody& b31 = *desc[3].bodyB;
+
+
+ Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
+ Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x);
+ Vec4V angState00 = V4LoadA(&b00.angularState.x);
+ Vec4V angState01 = V4LoadA(&b01.angularState.x);
+
+ Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
+ Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x);
+ Vec4V angState10 = V4LoadA(&b10.angularState.x);
+ Vec4V angState11 = V4LoadA(&b11.angularState.x);
+
+ Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
+ Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x);
+ Vec4V angState20 = V4LoadA(&b20.angularState.x);
+ Vec4V angState21 = V4LoadA(&b21.angularState.x);
+
+ Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
+ Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x);
+ Vec4V angState30 = V4LoadA(&b30.angularState.x);
+ Vec4V angState31 = V4LoadA(&b31.angularState.x);
+
+
+ Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
+ Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3;
+ Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
+ Vec4V angState1T0, angState1T1, angState1T2, angState1T3;
+
+
+ PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
+ PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3);
+ PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
+ PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3);
+
+ PxU8* PX_RESTRICT currPtr = desc[0].constraint;
+ PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]);
+
+
+ while(currPtr < endPtr)
+ {
+ SolverFrictionHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverFrictionHeader4*>(currPtr);
+
+ currPtr = reinterpret_cast<PxU8*>(hdr + 1);
+
+ Vec4V* appliedImpulses = reinterpret_cast<Vec4V*>(currPtr);
+
+ currPtr += hdr->numNormalConstr * sizeof(Vec4V);
+
+ Ps::prefetchLine(currPtr, 128);
+ Ps::prefetchLine(currPtr,256);
+ Ps::prefetchLine(currPtr,384);
+
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ SolverFriction4Dynamic* PX_RESTRICT frictions = reinterpret_cast<SolverFriction4Dynamic*>(currPtr);
+
+ currPtr = reinterpret_cast<PxU8*>(frictions + hdr->numFrictionConstr);
+
+ const PxU32 maxFrictionConstr = numFrictionConstr;
+
+ const Vec4V staticFric = hdr->staticFriction;
+
+ const Vec4V invMass0D0 = hdr->invMassADom;
+ const Vec4V invMass1D1 = hdr->invMassBDom;
+
+ const Vec4V angD0 = hdr->angD0;
+ const Vec4V angD1 = hdr->angD1;
+
+ for(PxU32 i=0;i<maxFrictionConstr;i++)
+ {
+ SolverFriction4Dynamic& f = frictions[i];
+ Ps::prefetchLine((&f)+1);
+ Ps::prefetchLine((&f)+1,128);
+ Ps::prefetchLine((&f)+1,256);
+ Ps::prefetchLine((&f)+1,384);
+
+ const Vec4V appliedImpulse = appliedImpulses[i>>hdr->frictionPerContact];
+
+ const Vec4V maxFriction = V4Mul(staticFric, appliedImpulse);
+
+ const Vec4V nMaxFriction = V4Neg(maxFriction);
+
+ const Vec4V normalX = f.normalX;
+ const Vec4V normalY = f.normalY;
+ const Vec4V normalZ = f.normalZ;
+
+ const Vec4V raXnX = f.raXnX;
+ const Vec4V raXnY = f.raXnY;
+ const Vec4V raXnZ = f.raXnZ;
+
+ const Vec4V rbXnX = f.rbXnX;
+ const Vec4V rbXnY = f.rbXnY;
+ const Vec4V rbXnZ = f.rbXnZ;
+
+ const Vec4V appliedForce(f.appliedForce);
+ const Vec4V velMultiplier(f.velMultiplier);
+ const Vec4V targetVel(f.targetVelocity);
+
+ //4 x 4 Dot3 products encoded as 8 M44 transposes, 4 MulV and 8 MulAdd ops
+
+ const Vec4V __normalVel1 = V4Mul(linVel0T0, normalX);
+ const Vec4V __normalVel2 = V4Mul(raXnX, angState0T0);
+ const Vec4V __normalVel3 = V4Mul(linVel1T0, normalX);
+ const Vec4V __normalVel4 = V4Mul(rbXnX, angState1T0);
+
+ const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalY, __normalVel1);
+ const Vec4V _normalVel2 = V4MulAdd(raXnY, angState0T1, __normalVel2);
+ const Vec4V _normalVel3 = V4MulAdd(linVel1T1, normalY, __normalVel3);
+ const Vec4V _normalVel4 = V4MulAdd(rbXnY, angState1T1, __normalVel4);
+
+ const Vec4V normalVel1 = V4MulAdd(linVel0T2, normalZ, _normalVel1);
+ const Vec4V normalVel2 = V4MulAdd(raXnZ, angState0T2, _normalVel2);
+ const Vec4V normalVel3 = V4MulAdd(linVel1T2, normalZ, _normalVel3);
+ const Vec4V normalVel4 = V4MulAdd(rbXnZ, angState1T2, _normalVel4);
+
+
+ const Vec4V _normalVel = V4Add(normalVel1, normalVel2);
+ const Vec4V __normalVel = V4Add(normalVel3, normalVel4);
+
+ const Vec4V normalVel = V4Sub(_normalVel, __normalVel );
+
+ const Vec4V tmp = V4NegMulSub(targetVel, velMultiplier, appliedForce);
+ Vec4V newAppliedForce = V4MulAdd(normalVel, velMultiplier, tmp);
+ newAppliedForce = V4Clamp(newAppliedForce,nMaxFriction, maxFriction);
+ const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
+
+ const Vec4V deltaLinF0 = V4Mul(invMass0D0, deltaF);
+ const Vec4V deltaLinF1 = V4Mul(invMass1D1, deltaF);
+
+ const Vec4V deltaAngF0 = V4Mul(angD0, deltaF);
+ const Vec4V deltaAngF1 = V4Mul(angD1, deltaF);
+
+
+ linVel0T0 = V4MulAdd(normalX, deltaLinF0, linVel0T0);
+ linVel1T0 = V4NegMulSub(normalX, deltaLinF1, linVel1T0);
+ angState0T0 = V4MulAdd(raXnX, deltaAngF0, angState0T0);
+ angState1T0 = V4NegMulSub(rbXnX, deltaAngF1, angState1T0);
+
+ linVel0T1 = V4MulAdd(normalY, deltaLinF0, linVel0T1);
+ linVel1T1 = V4NegMulSub(normalY, deltaLinF1, linVel1T1);
+ angState0T1 = V4MulAdd(raXnY, deltaAngF0, angState0T1);
+ angState1T1 = V4NegMulSub(rbXnY, deltaAngF1, angState1T1);
+
+ linVel0T2 = V4MulAdd(normalZ, deltaLinF0, linVel0T2);
+ linVel1T2 = V4NegMulSub(normalZ, deltaLinF1, linVel1T2);
+ angState0T2 = V4MulAdd(raXnZ, deltaAngF0, angState0T2);
+ angState1T2 = V4NegMulSub(rbXnZ, deltaAngF1, angState1T2);
+
+ f.appliedForce = newAppliedForce;
+ }
+ }
+
+ PX_ASSERT(currPtr == endPtr);
+
+ //KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to
+ //store the bodies' progress counters.
+
+ PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
+ PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31);
+ PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
+ PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31);
+
+
+ // Write back
+ // Write back
+ V4StoreA(linVel00, &b00.linearVelocity.x);
+ V4StoreA(linVel10, &b10.linearVelocity.x);
+ V4StoreA(linVel20, &b20.linearVelocity.x);
+ V4StoreA(linVel30, &b30.linearVelocity.x);
+
+ V4StoreA(linVel01, &b01.linearVelocity.x);
+ V4StoreA(linVel11, &b11.linearVelocity.x);
+ V4StoreA(linVel21, &b21.linearVelocity.x);
+ V4StoreA(linVel31, &b31.linearVelocity.x);
+
+ V4StoreA(angState00, &b00.angularState.x);
+ V4StoreA(angState10, &b10.angularState.x);
+ V4StoreA(angState20, &b20.angularState.x);
+ V4StoreA(angState30, &b30.angularState.x);
+
+ V4StoreA(angState01, &b01.angularState.x);
+ V4StoreA(angState11, &b11.angularState.x);
+ V4StoreA(angState21, &b21.angularState.x);
+ V4StoreA(angState31, &b31.angularState.x);
+
+}
+
+
+static void solveFriction4_StaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/)
+{
+
+ PxSolverBody& b00 = *desc[0].bodyA;
+ PxSolverBody& b10 = *desc[1].bodyA;
+ PxSolverBody& b20 = *desc[2].bodyA;
+ PxSolverBody& b30 = *desc[3].bodyA;
+
+
+ Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x);
+ Vec4V angState00 = V4LoadA(&b00.angularState.x);
+
+ Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x);
+ Vec4V angState10 = V4LoadA(&b10.angularState.x);
+
+ Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x);
+ Vec4V angState20 = V4LoadA(&b20.angularState.x);
+
+ Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x);
+ Vec4V angState30 = V4LoadA(&b30.angularState.x);
+
+
+ Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3;
+ Vec4V angState0T0, angState0T1, angState0T2, angState0T3;
+
+
+ PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3);
+ PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3);
+
+ PxU8* PX_RESTRICT currPtr = desc[0].constraint;
+ PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]);
+
+
+ while(currPtr < endPtr)
+ {
+ SolverFrictionHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverFrictionHeader4*>(currPtr);
+
+ currPtr = reinterpret_cast<PxU8*>(hdr + 1);
+
+ Vec4V* appliedImpulses = reinterpret_cast<Vec4V*>(currPtr);
+
+ currPtr += hdr->numNormalConstr * sizeof(Vec4V);
+
+ Ps::prefetchLine(currPtr, 128);
+ Ps::prefetchLine(currPtr,256);
+ Ps::prefetchLine(currPtr,384);
+
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ SolverFriction4Base* PX_RESTRICT frictions = reinterpret_cast<SolverFriction4Base*>(currPtr);
+
+ currPtr = reinterpret_cast<PxU8*>(frictions + hdr->numFrictionConstr);
+
+ const PxU32 maxFrictionConstr = numFrictionConstr;
+
+ const Vec4V staticFric = hdr->staticFriction;
+
+ const Vec4V invMass0D0 = hdr->invMassADom;
+ const Vec4V angD0 = hdr->angD0;
+
+ for(PxU32 i=0;i<maxFrictionConstr;i++)
+ {
+ SolverFriction4Base& f = frictions[i];
+ Ps::prefetchLine((&f)+1);
+ Ps::prefetchLine((&f)+1,128);
+ Ps::prefetchLine((&f)+1,256);
+
+ const Vec4V appliedImpulse = appliedImpulses[i>>hdr->frictionPerContact];
+
+ const Vec4V maxFriction = V4Mul(staticFric, appliedImpulse);
+
+ const Vec4V nMaxFriction = V4Neg(maxFriction);
+
+ const Vec4V normalX = f.normalX;
+ const Vec4V normalY = f.normalY;
+ const Vec4V normalZ = f.normalZ;
+
+ const Vec4V raXnX = f.raXnX;
+ const Vec4V raXnY = f.raXnY;
+ const Vec4V raXnZ = f.raXnZ;
+
+ const Vec4V appliedForce(f.appliedForce);
+ const Vec4V velMultiplier(f.velMultiplier);
+ const Vec4V targetVel(f.targetVelocity);
+
+ //4 x 4 Dot3 products encoded as 8 M44 transposes, 4 MulV and 8 MulAdd ops
+
+ const Vec4V __normalVel1 = V4Mul(linVel0T0, normalX);
+ const Vec4V __normalVel2 = V4Mul(raXnX, angState0T0);
+
+ const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalY, __normalVel1);
+ const Vec4V _normalVel2 = V4MulAdd(raXnY, angState0T1, __normalVel2);
+
+ const Vec4V normalVel1 = V4MulAdd(linVel0T2, normalZ, _normalVel1);
+ const Vec4V normalVel2 = V4MulAdd(raXnZ, angState0T2, _normalVel2);
+
+ const Vec4V delLinVel00 = V4Mul(normalX, invMass0D0);
+
+ const Vec4V delLinVel10 = V4Mul(normalY, invMass0D0);
+
+ const Vec4V normalVel = V4Add(normalVel1, normalVel2);
+
+ const Vec4V delLinVel20 = V4Mul(normalZ, invMass0D0);
+
+ const Vec4V tmp = V4NegMulSub(targetVel, velMultiplier, appliedForce);
+
+ Vec4V newAppliedForce = V4MulAdd(normalVel, velMultiplier, tmp);
+ newAppliedForce = V4Clamp(newAppliedForce,nMaxFriction, maxFriction);
+ const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce);
+
+ const Vec4V deltaAngF0 = V4Mul(angD0, deltaF);
+
+ linVel0T0 = V4MulAdd(delLinVel00, deltaF, linVel0T0);
+ angState0T0 = V4MulAdd(raXnX, deltaAngF0, angState0T0);
+
+ linVel0T1 = V4MulAdd(delLinVel10, deltaF, linVel0T1);
+ angState0T1 = V4MulAdd(raXnY, deltaAngF0, angState0T1);
+
+ linVel0T2 = V4MulAdd(delLinVel20, deltaF, linVel0T2);
+ angState0T2 = V4MulAdd(raXnZ, deltaAngF0, angState0T2);
+
+ f.appliedForce = newAppliedForce;
+ }
+ }
+
+ PX_ASSERT(currPtr == endPtr);
+
+ //KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to
+ //store the bodies' progress counters.
+
+ PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30);
+ PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30);
+
+ // Write back
+ // Write back
+ V4StoreA(linVel00, &b00.linearVelocity.x);
+ V4StoreA(linVel10, &b10.linearVelocity.x);
+ V4StoreA(linVel20, &b20.linearVelocity.x);
+ V4StoreA(linVel30, &b30.linearVelocity.x);
+
+ V4StoreA(angState00, &b00.angularState.x);
+ V4StoreA(angState10, &b10.angularState.x);
+ V4StoreA(angState20, &b20.angularState.x);
+ V4StoreA(angState30, &b30.angularState.x);
+}
+
+static void concludeContactCoulomb4(const PxSolverConstraintDesc* desc, SolverContext& /*cache*/)
+{
+ PxU8* PX_RESTRICT cPtr = desc[0].constraint;
+
+ const Vec4V zero = V4Zero();
+
+ const SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr);
+ PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset;
+
+ PxU32 pointStride = firstHeader->type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContact4Dynamic) : sizeof(SolverContact4Base);
+
+ while(cPtr < last)
+ {
+ const SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr);
+ cPtr += sizeof(SolverContactCoulombHeader4);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ //if(cPtr < last)
+ //Ps::prefetchLine(cPtr, 512);
+ Ps::prefetchLine(cPtr,128);
+ Ps::prefetchLine(cPtr,256);
+ Ps::prefetchLine(cPtr,384);
+
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ SolverContact4Base *c = reinterpret_cast<SolverContact4Base*>(cPtr);
+ cPtr += pointStride;
+ c->scaledBias = V4Max(c->scaledBias, zero);
+ }
+ }
+ PX_ASSERT(cPtr == last);
+}
+
+void writeBackContactCoulomb4(const PxSolverConstraintDesc* desc, SolverContext& cache,
+ const PxSolverBodyData** PX_RESTRICT bd0, const PxSolverBodyData** PX_RESTRICT bd1)
+{
+ Vec4V normalForceV = V4Zero();
+ PxU8* PX_RESTRICT cPtr = desc[0].constraint;
+ PxReal* PX_RESTRICT vForceWriteback0 = reinterpret_cast<PxReal*>(desc[0].writeBack);
+ PxReal* PX_RESTRICT vForceWriteback1 = reinterpret_cast<PxReal*>(desc[1].writeBack);
+ PxReal* PX_RESTRICT vForceWriteback2 = reinterpret_cast<PxReal*>(desc[2].writeBack);
+ PxReal* PX_RESTRICT vForceWriteback3 = reinterpret_cast<PxReal*>(desc[3].writeBack);
+
+ const SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr);
+ PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset;
+
+ const PxU32 pointStride = firstHeader->type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContact4Dynamic)
+ : sizeof(SolverContact4Base);
+
+ bool writeBackThresholds[4] = {false, false, false, false};
+
+
+ while(cPtr < last)
+ {
+ const SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr);
+ cPtr += sizeof(SolverContactCoulombHeader4);
+
+ writeBackThresholds[0] = hdr->flags[0] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+ writeBackThresholds[1] = hdr->flags[1] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+ writeBackThresholds[2] = hdr->flags[2] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+ writeBackThresholds[3] = hdr->flags[3] & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+
+ Ps::prefetchLine(cPtr, 256);
+ Ps::prefetchLine(cPtr, 384);
+
+
+ for(PxU32 i=0; i<numNormalConstr; i++)
+ {
+ SolverContact4Base* c = reinterpret_cast<SolverContact4Base*>(cPtr);
+ cPtr += pointStride;
+
+ const Vec4V appliedForce = c->appliedForce;
+ if(vForceWriteback0 && i < hdr->numNormalConstr0)
+ FStore(V4GetX(appliedForce), vForceWriteback0++);
+ if(vForceWriteback1 && i < hdr->numNormalConstr1)
+ FStore(V4GetY(appliedForce), vForceWriteback1++);
+ if(vForceWriteback2 && i < hdr->numNormalConstr2)
+ FStore(V4GetZ(appliedForce), vForceWriteback2++);
+ if(vForceWriteback3 && i < hdr->numNormalConstr3)
+ FStore(V4GetW(appliedForce), vForceWriteback3++);
+
+ normalForceV = V4Add(normalForceV, appliedForce);
+ }
+ }
+ PX_ASSERT(cPtr == last);
+
+ PX_ALIGN(16, PxReal nf[4]);
+ V4StoreA(normalForceV, nf);
+
+ //all constraint pointer in descs are the same constraint
+ Sc::ShapeInteraction** shapeInteractions = reinterpret_cast<SolverContactCoulombHeader4*>(desc[0].constraint)->shapeInteraction;
+
+ for(PxU32 a = 0; a < 4; ++a)
+ {
+ if(writeBackThresholds[a] && desc[a].linkIndexA == PxSolverConstraintDesc::NO_LINK && desc[a].linkIndexB == PxSolverConstraintDesc::NO_LINK &&
+ nf[a] !=0.f && (bd0[a]->reportThreshold < PX_MAX_REAL || bd1[a]->reportThreshold < PX_MAX_REAL))
+ {
+ ThresholdStreamElement elt;
+ elt.normalForce = nf[a];
+ elt.threshold = PxMin<float>(bd0[a]->reportThreshold, bd1[a]->reportThreshold);
+ elt.nodeIndexA = bd0[a]->nodeIndex;
+ elt.nodeIndexB = bd1[a]->nodeIndex;
+ elt.shapeInteraction = shapeInteractions[a];
+ Ps::order(elt.nodeIndexA, elt.nodeIndexB);
+ PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
+ PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
+ cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
+ }
+ }
+}
+
+void solveContactCoulombPreBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContactCoulomb4_Block(desc, cache);
+}
+
+void solveContactCoulombPreBlock_Static(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContactCoulomb4_StaticBlock(desc, cache);
+}
+
+void solveContactCoulombPreBlock_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContactCoulomb4_Block(desc, cache);
+ concludeContactCoulomb4(desc, cache);
+}
+
+void solveContactCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContactCoulomb4_StaticBlock(desc, cache);
+ concludeContactCoulomb4(desc, cache);
+}
+
+void solveContactCoulombPreBlock_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContactCoulomb4_Block(desc, cache);
+
+ const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex],
+ &cache.solverBodyArray[desc[1].bodyADataIndex],
+ &cache.solverBodyArray[desc[2].bodyADataIndex],
+ &cache.solverBodyArray[desc[3].bodyADataIndex]};
+
+ const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex],
+ &cache.solverBodyArray[desc[1].bodyBDataIndex],
+ &cache.solverBodyArray[desc[2].bodyBDataIndex],
+ &cache.solverBodyArray[desc[3].bodyBDataIndex]};
+
+
+
+ writeBackContactCoulomb4(desc, cache, bd0, bd1);
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveContactCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveContactCoulomb4_StaticBlock(desc, cache);
+ const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex],
+ &cache.solverBodyArray[desc[1].bodyADataIndex],
+ &cache.solverBodyArray[desc[2].bodyADataIndex],
+ &cache.solverBodyArray[desc[3].bodyADataIndex]};
+
+ const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex],
+ &cache.solverBodyArray[desc[1].bodyBDataIndex],
+ &cache.solverBodyArray[desc[2].bodyBDataIndex],
+ &cache.solverBodyArray[desc[3].bodyBDataIndex]};
+
+ writeBackContactCoulomb4(desc, cache, bd0, bd1);
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveFrictionCoulombPreBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveFriction4_Block(desc, cache);
+}
+
+void solveFrictionCoulombPreBlock_Static(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveFriction4_StaticBlock(desc, cache);
+}
+
+void solveFrictionCoulombPreBlock_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveFriction4_Block(desc, cache);
+}
+
+void solveFrictionCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveFriction4_StaticBlock(desc, cache);
+}
+
+void solveFrictionCoulombPreBlock_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveFriction4_Block(desc, cache);
+}
+
+void solveFrictionCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache)
+{
+ solveFriction4_StaticBlock(desc, cache);
+}
+
+
+}
+
+}
+
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySpatial.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySpatial.h
new file mode 100644
index 00000000..e27406b3
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySpatial.h
@@ -0,0 +1,142 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_SPATIAL_H
+#define DY_SPATIAL_H
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxTransform.h"
+#include "PsMathUtils.h"
+#include "CmSpatialVector.h"
+
+namespace physx
+{
+namespace Dy
+{
+// translate a motion resolved at position p to the origin
+
+
+// should have a 'from' frame and a 'to' frame
+class SpInertia
+{
+public:
+ SpInertia() {}
+
+ SpInertia(const PxMat33& ll, const PxMat33& la, const PxMat33& aa): mLL(ll), mLA(la), mAA(aa)
+ {
+ }
+
+ static SpInertia getZero()
+ {
+ return SpInertia(PxMat33(PxZero), PxMat33(PxZero),
+ PxMat33(PxZero));
+ }
+
+ static SpInertia dyad(const Cm::SpatialVector& column, const Cm::SpatialVector& row)
+ {
+ return SpInertia(dyad(column.linear, row.linear),
+ dyad(column.linear, row.angular),
+ dyad(column.angular, row.angular));
+ }
+
+
+ static SpInertia inertia(PxReal mass, const PxVec3& inertia)
+ {
+ return SpInertia(PxMat33::createDiagonal(PxVec3(mass,mass,mass)), PxMat33(PxZero),
+ PxMat33::createDiagonal(inertia));
+ }
+
+
+ SpInertia operator+(const SpInertia& m) const
+ {
+ return SpInertia(mLL+m.mLL, mLA+m.mLA, mAA+m.mAA);
+ }
+
+ SpInertia operator-(const SpInertia& m) const
+ {
+ return SpInertia(mLL-m.mLL, mLA-m.mLA, mAA-m.mAA);
+ }
+
+ SpInertia operator*(PxReal r) const
+ {
+ return SpInertia(mLL*r, mLA*r, mAA*r);
+ }
+
+ void operator+=(const SpInertia& m)
+ {
+ mLL+=m.mLL;
+ mLA+=m.mLA;
+ mAA+=m.mAA;
+ }
+
+ void operator-=(const SpInertia& m)
+ {
+ mLL-=m.mLL;
+ mLA-=m.mLA;
+ mAA-=m.mAA;
+ }
+
+
+ PX_FORCE_INLINE Cm::SpatialVector operator *(const Cm::SpatialVector& v) const
+ {
+ return Cm::SpatialVector(mLL*v.linear +mLA*v.angular,
+ mLA.transformTranspose(v.linear)+mAA*v.angular);
+ }
+
+ SpInertia operator *(const SpInertia& v) const
+ {
+ return SpInertia(mLL*v.mLL + mLA * v.mLA.getTranspose(),
+ mLL*v.mLA + mLA * v.mAA,
+ mLA.getTranspose()*v.mLA + mAA * v.mAA);
+ }
+
+
+ bool isFinite() const
+ {
+ return true;
+// return mLL.isFinite() && mLA.isFinite() && mAA.isFinite();
+ }
+
+ PxMat33 mLL, mLA; // linear force from angular motion, linear force from linear motion
+ PxMat33 mAA; // angular force from angular motion, mAL = mLA.transpose()
+
+private:
+ static PxMat33 dyad(PxVec3 col, PxVec3 row)
+ {
+ return PxMat33(col*row.x, col*row.y, col*row.z);
+ }
+
+
+};
+
+}
+}
+
+#endif //DY_SPATIAL_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.cpp
new file mode 100644
index 00000000..5526b83a
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.cpp
@@ -0,0 +1,110 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "DyThreadContext.h"
+#include "PsBitUtils.h"
+
+namespace physx
+{
+namespace Dy
+{
+
+ThreadContext::ThreadContext(PxcNpMemBlockPool* memBlockPool):
+ mFrictionPatchStreamPair(*memBlockPool),
+ mConstraintBlockManager (*memBlockPool),
+ mConstraintBlockStream (*memBlockPool),
+ mNumDifferentBodyConstraints(0),
+ mNumSelfConstraints(0),
+ mNumSelfConstraintBlocks(0),
+ mConstraintsPerPartition(PX_DEBUG_EXP("ThreadContext::mConstraintsPerPartition")),
+ mFrictionConstraintsPerPartition(PX_DEBUG_EXP("ThreadContext::frictionsConstraintsPerPartition")),
+ mPartitionNormalizationBitmap(PX_DEBUG_EXP("ThreadContext::mPartitionNormalizationBitmap")),
+ frictionConstraintDescArray(PX_DEBUG_EXP("ThreadContext::solverFrictionConstraintArray")),
+ frictionConstraintBatchHeaders(PX_DEBUG_EXP("ThreadContext::frictionConstraintBatchHeaders")),
+ compoundConstraints(PX_DEBUG_EXP("ThreadContext::compoundConstraints")),
+ orderedContactList(PX_DEBUG_EXP("ThreadContext::orderedContactList")),
+ tempContactList(PX_DEBUG_EXP("ThreadContext::tempContactList")),
+ sortIndexArray(PX_DEBUG_EXP("ThreadContext::sortIndexArray")),
+ mConstraintSize (0),
+ mAxisConstraintCount(0),
+ mSelfConstraintBlocks(NULL),
+ mMaxPartitions(0),
+ mMaxSolverPositionIterations(0),
+ mMaxSolverVelocityIterations(0),
+ mMaxArticulationLength(0),
+ mContactDescPtr(NULL),
+ mFrictionDescPtr(NULL),
+ mArticulations(PX_DEBUG_EXP("ThreadContext::articulations"))
+
+{
+#if PX_ENABLE_SIM_STATS
+ mThreadSimStats.clear();
+#endif
+ //Defaulted to have space for 16384 bodies
+ mPartitionNormalizationBitmap.reserve(512);
+ //Defaulted to have space for 128 partitions (should be more-than-enough)
+ mConstraintsPerPartition.reserve(128);
+}
+
+void ThreadContext::resizeArrays(PxU32 frictionConstraintDescCount, PxU32 articulationCount)
+{
+ // resize resizes smaller arrays to the exact target size, which can generate a lot of churn
+ frictionConstraintDescArray.forceSize_Unsafe(0);
+ frictionConstraintDescArray.reserve((frictionConstraintDescCount+63)&~63);
+
+ mArticulations.forceSize_Unsafe(0);
+ mArticulations.reserve(PxMax<PxU32>(Ps::nextPowerOfTwo(articulationCount), 16));
+ mArticulations.forceSize_Unsafe(articulationCount);
+
+ mContactDescPtr = contactConstraintDescArray;
+ mFrictionDescPtr = frictionConstraintDescArray.begin();
+}
+
+void ThreadContext::reset()
+{
+ // TODO: move these to the PxcNpThreadContext
+ mFrictionPatchStreamPair.reset();
+ mConstraintBlockStream.reset();
+
+ mContactDescPtr = contactConstraintDescArray;
+ mFrictionDescPtr = frictionConstraintDescArray.begin();
+
+ mAxisConstraintCount = 0;
+ mMaxSolverPositionIterations = 0;
+ mMaxSolverVelocityIterations = 0;
+ mNumDifferentBodyConstraints = 0;
+ mNumSelfConstraints = 0;
+ mSelfConstraintBlocks = NULL;
+ mNumSelfConstraintBlocks = 0;
+ mConstraintSize = 0;
+}
+
+}
+}
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.h
new file mode 100644
index 00000000..a958ac23
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.h
@@ -0,0 +1,203 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#ifndef DY_THREADCONTEXT_H
+#define DY_THREADCONTEXT_H
+
+#include "foundation/PxTransform.h"
+#include "PxvConfig.h"
+#include "CmBitMap.h"
+#include "CmMatrix34.h"
+#include "PxcThreadCoherentCache.h"
+#include "DyThresholdTable.h"
+#include "PsAllocator.h"
+#include "PsAllocator.h"
+#include "GuContactBuffer.h"
+#include "DySolverConstraintDesc.h"
+#include "PxvDynamics.h"
+#include "DyArticulation.h"
+#include "DyFrictionPatchStreamPair.h"
+#include "PxcConstraintBlockStream.h"
+#include "DyCorrelationBuffer.h"
+
+namespace physx
+{
+struct PxsIndexedContactManager;
+
+namespace Dy
+{
+
+/*!
+Cache information specific to the software implementation(non common).
+
+See PxcgetThreadContext.
+
+Not thread-safe, so remember to have one object per thread!
+
+TODO! refactor this and rename(it is a general per thread cache). Move transform cache into its own class.
+*/
+class ThreadContext :
+ public PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool>::EntryBase
+{
+ PX_NOCOPY(ThreadContext)
+public:
+
+#if PX_ENABLE_SIM_STATS
+ struct ThreadSimStats
+ {
+ void clear()
+ {
+
+ numActiveConstraints = 0;
+ numActiveDynamicBodies = 0;
+ numActiveKinematicBodies = 0;
+ numAxisSolverConstraints = 0;
+
+ }
+
+ PxU32 numActiveConstraints;
+ PxU32 numActiveDynamicBodies;
+ PxU32 numActiveKinematicBodies;
+ PxU32 numAxisSolverConstraints;
+
+ };
+#endif
+
+ //TODO: tune cache size based on number of active objects.
+ ThreadContext(PxcNpMemBlockPool* memBlockPool);
+ void reset();
+ void resizeArrays(PxU32 frictionConstraintDescCount, PxU32 articulationCount);
+
+ PX_FORCE_INLINE Ps::Array<ArticulationSolverDesc>& getArticulations() { return mArticulations; }
+
+
+#if PX_ENABLE_SIM_STATS
+ PX_FORCE_INLINE ThreadSimStats& getSimStats()
+ {
+ return mThreadSimStats;
+ }
+#endif
+
+ Gu::ContactBuffer mContactBuffer;
+
+ // temporary buffer for correlation
+ PX_ALIGN(16, CorrelationBuffer mCorrelationBuffer);
+
+ FrictionPatchStreamPair mFrictionPatchStreamPair; // patch streams
+
+ PxsConstraintBlockManager mConstraintBlockManager; // for when this thread context is "lead" on an island
+ PxcConstraintBlockStream mConstraintBlockStream; // constraint block pool
+
+
+ // this stuff is just used for reformatting the solver data. Hopefully we should have a more
+ // sane format for this when the dust settles - so it's just temporary. If we keep this around
+ // here we should move these from public to private
+
+ PxU32 mNumDifferentBodyConstraints;
+ PxU32 mNumDifferentBodyFrictionConstraints;
+ PxU32 mNumSelfConstraints;
+ PxU32 mNumSelfFrictionConstraints;
+ PxU32 mNumSelfConstraintBlocks;
+ PxU32 mNumSelfConstraintFrictionBlocks;
+
+ Ps::Array<PxU32> mConstraintsPerPartition;
+ Ps::Array<PxU32> mFrictionConstraintsPerPartition;
+ Ps::Array<PxU32> mPartitionNormalizationBitmap;
+ PxsBodyCore** mBodyCoreArray;
+ PxsRigidBody** mRigidBodyArray;
+ Articulation** mArticulationArray;
+ Cm::SpatialVector* motionVelocityArray;
+ PxU32* bodyRemapTable;
+ PxU32* mNodeIndexArray;
+
+ //Constraint info for normal constraint sovler
+ PxSolverConstraintDesc* contactConstraintDescArray;
+ PxU32 contactDescArraySize;
+ PxSolverConstraintDesc* orderedContactConstraints;
+ PxConstraintBatchHeader* contactConstraintBatchHeaders;
+ PxU32 numContactConstraintBatches;
+
+ //Constraint info for partitioning
+ PxSolverConstraintDesc* tempConstraintDescArray;
+
+ //Additional constraint info for 1d/2d friction model
+ Ps::Array<PxSolverConstraintDesc> frictionConstraintDescArray;
+ Ps::Array<PxConstraintBatchHeader> frictionConstraintBatchHeaders;
+
+ //Info for tracking compound contact managers (temporary data - could use scratch memory!)
+ Ps::Array<CompoundContactManager> compoundConstraints;
+
+ //Used for sorting constraints. Temporary, could use scratch memory
+ Ps::Array<const PxsIndexedContactManager*> orderedContactList;
+ Ps::Array<const PxsIndexedContactManager*> tempContactList;
+ Ps::Array<PxU32> sortIndexArray;
+
+ PxU32 numDifferentBodyBatchHeaders;
+ PxU32 numSelfConstraintBatchHeaders;
+
+
+ PxU32 mOrderedContactDescCount;
+ PxU32 mOrderedFrictionDescCount;
+
+ PxU32 mConstraintSize;
+
+ PxU32 mAxisConstraintCount;
+ SelfConstraintBlock* mSelfConstraintBlocks;
+
+ SelfConstraintBlock* mSelfConstraintFrictionBlocks;
+
+ PxU32 mMaxPartitions;
+ PxU32 mMaxFrictionPartitions;
+ PxU32 mMaxSolverPositionIterations;
+ PxU32 mMaxSolverVelocityIterations;
+ PxU32 mMaxArticulationLength;
+ PxU32 mMaxArticulationSolverLength;
+
+ PxSolverConstraintDesc* mContactDescPtr;
+ PxSolverConstraintDesc* mStartContactDescPtr;
+ PxSolverConstraintDesc* mFrictionDescPtr;
+
+private:
+
+ Ps::Array<ArticulationSolverDesc> mArticulations;
+
+#if PX_ENABLE_SIM_STATS
+ ThreadSimStats mThreadSimStats;
+#endif
+
+ public:
+
+};
+
+}
+
+}
+
+#endif //DY_THREADCONTEXT_H
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyThresholdTable.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyThresholdTable.cpp
new file mode 100644
index 00000000..b7b613f6
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyThresholdTable.cpp
@@ -0,0 +1,68 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxMemory.h"
+#include "DyThresholdTable.h"
+#include "PsHash.h"
+#include "PsUtilities.h"
+#include "PsAllocator.h"
+
+namespace physx
+{
+ namespace Dy
+ {
+ bool ThresholdTable::check(const ThresholdStream& stream, const PxU32 nodeIndexA, const PxU32 nodeIndexB, PxReal dt)
+ {
+ PxU32* PX_RESTRICT hashes = mHash;
+ PxU32* PX_RESTRICT nextIndices = mNexts;
+ Pair* PX_RESTRICT pairs = mPairs;
+
+ /*const PxsRigidBody* b0 = PxMin(body0, body1);
+ const PxsRigidBody* b1 = PxMax(body0, body1);*/
+
+ const PxU32 nA = PxMin(nodeIndexA, nodeIndexB);
+ const PxU32 nB = PxMax(nodeIndexA, nodeIndexB);
+
+ PxU32 hashKey = computeHashKey(nodeIndexA, nodeIndexB, mHashSize);
+
+ PxU32 pairIndex = hashes[hashKey];
+ while(NO_INDEX != pairIndex)
+ {
+ Pair& pair = pairs[pairIndex];
+ const PxU32 thresholdStreamIndex = pair.thresholdStreamIndex;
+ PX_ASSERT(thresholdStreamIndex < stream.size());
+ const ThresholdStreamElement& otherElement = stream[thresholdStreamIndex];
+ if(otherElement.nodeIndexA==nA && otherElement.nodeIndexB==nB)
+ return (pair.accumulatedForce > (otherElement.threshold * dt));
+ pairIndex = nextIndices[pairIndex];
+ }
+ return false;
+ }
+ }
+}