aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp')
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp1121
1 files changed, 1121 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
new file mode 100644
index 00000000..ea935ce9
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
@@ -0,0 +1,1121 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+
+#ifdef PX_SUPPORT_SIMD
+
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverContact.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraintDesc.h"
+#include "DyThresholdTable.h"
+#include "DySolverContext.h"
+#include "PsUtilities.h"
+#include "DyConstraint.h"
+#include "PsAtomic.h"
+#include "DySolverConstraintsShared.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+//Port of scalar implementation to SIMD maths with some interleaving of instructions
+void solve1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ PX_UNUSED(cache);
+ PxSolverBody& b0 = *desc.bodyA;
+ PxSolverBody& b1 = *desc.bodyB;
+
+ PxU8* PX_RESTRICT bPtr = desc.constraint;
+ //PxU32 length = desc.constraintLength;
+
+ const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
+ SolverConstraint1D* PX_RESTRICT base = reinterpret_cast<SolverConstraint1D*>(bPtr + sizeof(SolverConstraint1DHeader));
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V linVel1 = V3LoadA(b1.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+ Vec3V angState1 = V3LoadA(b1.angularState);
+
+ const FloatV invMass0 = FLoad(header->invMass0D0);
+ const FloatV invMass1 = FLoad(header->invMass1D1);
+ const FloatV invInertiaScale0 = FLoad(header->angularInvMassScale0);
+ const FloatV invInertiaScale1 = FLoad(header->angularInvMassScale1);
+
+
+ for(PxU32 i=0; i<header->count;++i, base++)
+ {
+ Ps::prefetchLine(base+1);
+ SolverConstraint1D& c = *base;
+
+ const Vec3V clinVel0 = V3LoadA(c.lin0);
+ const Vec3V clinVel1 = V3LoadA(c.lin1);
+ const Vec3V cangVel0 = V3LoadA(c.ang0);
+ const Vec3V cangVel1 = V3LoadA(c.ang1);
+
+ const FloatV constant = FLoad(c.constant);
+ const FloatV vMul = FLoad(c.velMultiplier);
+ const FloatV iMul = FLoad(c.impulseMultiplier);
+ const FloatV appliedForce = FLoad(c.appliedForce);
+ //const FloatV targetVel = FLoad(c.targetVelocity);
+
+ const FloatV maxImpulse = FLoad(c.maxImpulse);
+ const FloatV minImpulse = FLoad(c.minImpulse);
+
+ const Vec3V v0 = V3MulAdd(linVel0, clinVel0, V3Mul(angState0, cangVel0));
+ const Vec3V v1 = V3MulAdd(linVel1, clinVel1, V3Mul(angState1, cangVel1));
+
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+ const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant));
+ const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
+ const FloatV deltaF = FSub(clampedForce, appliedForce);
+
+ FStore(clampedForce, &c.appliedForce);
+ linVel0 = V3ScaleAdd(clinVel0, FMul(deltaF, invMass0), linVel0);
+ linVel1 = V3NegScaleSub(clinVel1, FMul(deltaF, invMass1), linVel1);
+ angState0 = V3ScaleAdd(cangVel0, FMul(deltaF, invInertiaScale0), angState0);
+ //This should be negScaleSub but invInertiaScale1 is negated already
+ angState1 = V3ScaleAdd(cangVel1, FMul(deltaF, invInertiaScale1), angState1);
+
+ }
+
+ V3StoreA(linVel0, b0.linearVelocity);
+ V3StoreA(angState0, b0.angularState);
+ V3StoreA(linVel1, b1.linearVelocity);
+ V3StoreA(angState1, b1.angularState);
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+ PX_ASSERT(b1.linearVelocity.isFinite());
+ PX_ASSERT(b1.angularState.isFinite());
+}
+
+void conclude1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+ PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader);
+ PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+
+ for(PxU32 i=0; i<header->count; i++)
+ {
+ SolverConstraint1D& c = *reinterpret_cast<SolverConstraint1D*>(base);
+
+ c.constant = c.unbiasedConstant;
+
+ base += stride;
+ }
+ PX_ASSERT(desc.constraint + getConstraintLength(desc) == base);
+}
+
+// ==============================================================
+
+void solveContact(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ PxSolverBody& b0 = *desc.bodyA;
+ PxSolverBody& b1 = *desc.bodyB;
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V linVel1 = V3LoadA(b1.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+ Vec3V angState1 = V3LoadA(b1.angularState);
+
+ const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ while(currPtr < last)
+ {
+ SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+ currPtr += sizeof(SolverContactHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
+ Ps::prefetchLine(contacts);
+ currPtr += numNormalConstr * sizeof(SolverContactPoint);
+
+ PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr);
+ currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+ SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFriction);
+
+ const FloatV invMassA = FLoad(hdr->invMass0);
+ const FloatV invMassB = FLoad(hdr->invMass1);
+
+ const FloatV angDom0 = FLoad(hdr->angDom0);
+ const FloatV angDom1 = FLoad(hdr->angDom1);
+
+ const Vec3V contactNormal = hdr->normal;
+
+ const FloatV accumulatedNormalImpulse = solveDynamicContacts(contacts, numNormalConstr, contactNormal, invMassA, invMassB,
+ angDom0, angDom1, linVel0, angState0, linVel1, angState1, forceBuffer);
+
+ if(cache.doFriction && numFrictionConstr)
+ {
+ const FloatV staticFrictionCof = hdr->getStaticFriction();
+ const FloatV dynamicFrictionCof = hdr->getDynamicFriction();
+ const FloatV maxFrictionImpulse = FMul(staticFrictionCof, accumulatedNormalImpulse);
+ const FloatV maxDynFrictionImpulse = FMul(dynamicFrictionCof, accumulatedNormalImpulse);
+ const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+
+ BoolV broken = BFFFF();
+
+ if(cache.writeBackIteration)
+ Ps::prefetchLine(hdr->frictionBrokenWritebackByte);
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFriction& f = frictions[i];
+ Ps::prefetchLine(&frictions[i],128);
+
+
+ const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+ const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+ const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+ const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+ const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+ const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW);
+
+ const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+ const FloatV bias = V4GetW(rbXnXYZ_biasW);
+ const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ const Vec3V delLinVel0 = V3Scale(normal, invMassA);
+ const Vec3V delLinVel1 = V3Scale(normal, invMassB);
+
+ const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn));
+ const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angState1, rbXn));
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+
+
+ // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+ const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);
+
+ // Algorithm:
+ // if abs(appliedForce + deltaF) > maxFrictionImpulse
+ // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+ // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+ // set broken flag to true || broken flag
+
+ // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+ // FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+ const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+ // On XBox this clamping code uses the vector simple pipe rather than vector float,
+ // which eliminates a lot of stall cycles
+
+ const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse);
+
+ const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse));
+
+ const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse);
+
+ broken = BOr(broken, clamp);
+
+ FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+ // we could get rid of the stall here by calculating and clamping delta separately, but
+ // the complexity isn't really worth it.
+
+ linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+ linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
+ angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+ angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1);
+
+ f.setAppliedForce(newAppliedForce);
+
+
+ }
+ Store_From_BoolV(broken, &hdr->broken);
+ }
+
+ }
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+ PX_ASSERT(b1.linearVelocity.isFinite());
+ PX_ASSERT(b1.angularState.isFinite());
+
+ // Write back
+ V3StoreU(linVel0, b0.linearVelocity);
+ V3StoreU(linVel1, b1.linearVelocity);
+ V3StoreU(angState0, b0.angularState);
+ V3StoreU(angState1, b1.angularState);
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+ PX_ASSERT(b1.linearVelocity.isFinite());
+ PX_ASSERT(b1.angularState.isFinite());
+
+ PX_ASSERT(currPtr == last);
+}
+
+void solveContact_BStatic(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ PxSolverBody& b0 = *desc.bodyA;
+ //PxSolverBody& b1 = *desc.bodyB;
+
+ Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+ Vec3V angState0 = V3LoadA(b0.angularState);
+
+ const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ while(currPtr < last)
+ {
+ SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+ currPtr += sizeof(SolverContactHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
+ //Ps::prefetchLine(contacts);
+ currPtr += numNormalConstr * sizeof(SolverContactPoint);
+
+ PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr);
+ currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+ SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFriction);
+
+
+
+ const FloatV invMassA = FLoad(hdr->invMass0);
+
+ const Vec3V contactNormal = hdr->normal;
+ const FloatV angDom0 = FLoad(hdr->angDom0);
+
+
+ const FloatV accumulatedNormalImpulse = solveStaticContacts(contacts, numNormalConstr, contactNormal,
+ invMassA, angDom0, linVel0, angState0, forceBuffer);
+
+ if(cache.doFriction && numFrictionConstr)
+ {
+ const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse);
+ const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse);
+
+ BoolV broken = BFFFF();
+ if(cache.writeBackIteration)
+ Ps::prefetchLine(hdr->frictionBrokenWritebackByte);
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFriction& f = frictions[i];
+ Ps::prefetchLine(&frictions[i],128);
+
+
+ const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+ const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+ const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+ const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+ const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+
+ const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+ const FloatV bias = V4GetW(rbXnXYZ_biasW);
+ const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+
+ const Vec3V delLinVel0 = V3Scale(normal, invMassA);
+ //const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse);
+
+ const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn));
+ const FloatV normalVel = V3SumElems(v0);
+
+
+ // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+ const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);
+
+ // Algorithm:
+ // if abs(appliedForce + deltaF) > maxFrictionImpulse
+ // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+ // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+ // set broken flag to true || broken flag
+
+ // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+ // FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+ const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+ // On XBox this clamping code uses the vector simple pipe rather than vector float,
+ // which eliminates a lot of stall cycles
+
+ const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse);
+
+ const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse));
+
+ broken = BOr(broken, clamp);
+
+ const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse);
+
+ FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+ // we could get rid of the stall here by calculating and clamping delta separately, but
+ // the complexity isn't really worth it.
+
+ linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+ angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+
+ f.setAppliedForce(newAppliedForce);
+
+ }
+ Store_From_BoolV(broken, &hdr->broken);
+ }
+
+ }
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+
+ // Write back
+ V3StoreA(linVel0, b0.linearVelocity);
+ V3StoreA(angState0, b0.angularState);
+
+ PX_ASSERT(b0.linearVelocity.isFinite());
+ PX_ASSERT(b0.angularState.isFinite());
+
+ PX_ASSERT(currPtr == last);
+}
+
+
+void concludeContact(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxU8* PX_RESTRICT cPtr = desc.constraint;
+
+ const FloatV zero = FZero();
+
+ PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+ while(cPtr < last)
+ {
+ const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr);
+ cPtr += sizeof(SolverContactHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ //if(cPtr < last)
+ //Ps::prefetchLine(cPtr, 512);
+ Ps::prefetchLine(cPtr,128);
+ Ps::prefetchLine(cPtr,256);
+ Ps::prefetchLine(cPtr,384);
+
+ const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
+ : sizeof(SolverContactPoint);
+ for(PxU32 i=0;i<numNormalConstr;i++)
+ {
+ SolverContactPoint *c = reinterpret_cast<SolverContactPoint*>(cPtr);
+ cPtr += pointStride;
+ //c->scaledBias = PxMin(c->scaledBias, 0.f);
+ c->biasedErr = c->unbiasedErr;
+ }
+
+ cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); //Jump over force buffers
+
+ const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt)
+ : sizeof(SolverContactFriction);
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFriction *f = reinterpret_cast<SolverContactFriction*>(cPtr);
+ cPtr += frictionStride;
+ f->setBias(zero);
+ }
+ }
+ PX_ASSERT(cPtr == last);
+}
+
+void writeBackContact(const PxSolverConstraintDesc& desc, SolverContext& cache,
+ PxSolverBodyData& bd0, PxSolverBodyData& bd1)
+{
+
+ PxReal normalForce = 0;
+
+ PxU8* PX_RESTRICT cPtr = desc.constraint;
+ PxReal* PX_RESTRICT vForceWriteback = reinterpret_cast<PxReal*>(desc.writeBack);
+ PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+ bool forceThreshold = false;
+
+ while(cPtr < last)
+ {
+ const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr);
+ cPtr += sizeof(SolverContactHeader);
+
+ forceThreshold = hdr->flags & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ //if(cPtr < last)
+ Ps::prefetchLine(cPtr, 256);
+ Ps::prefetchLine(cPtr, 384);
+
+ const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
+ : sizeof(SolverContactPoint);
+
+ cPtr += pointStride * numNormalConstr;
+ PxF32* forceBuffer = reinterpret_cast<PxF32*>(cPtr);
+ cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+ if(vForceWriteback!=NULL)
+ {
+ for(PxU32 i=0; i<numNormalConstr; i++)
+ {
+ PxReal appliedForce = forceBuffer[i];
+ *vForceWriteback++ = appliedForce;
+ normalForce += appliedForce;
+ }
+ }
+
+ const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt)
+ : sizeof(SolverContactFriction);
+
+ if(hdr->broken && hdr->frictionBrokenWritebackByte != NULL)
+ {
+ *hdr->frictionBrokenWritebackByte = 1;
+ }
+
+ cPtr += frictionStride * numFrictionConstr;
+
+ }
+ PX_ASSERT(cPtr == last);
+
+
+
+ if(forceThreshold && desc.linkIndexA == PxSolverConstraintDesc::NO_LINK && desc.linkIndexB == PxSolverConstraintDesc::NO_LINK &&
+ normalForce !=0 && (bd0.reportThreshold < PX_MAX_REAL || bd1.reportThreshold < PX_MAX_REAL))
+ {
+ ThresholdStreamElement elt;
+ elt.normalForce = normalForce;
+ elt.threshold = PxMin<float>(bd0.reportThreshold, bd1.reportThreshold);
+ elt.nodeIndexA = bd0.nodeIndex;
+ elt.nodeIndexB = bd1.nodeIndex;
+ elt.shapeInteraction = reinterpret_cast<const SolverContactHeader*>(desc.constraint)->shapeInteraction;
+ Ps::order(elt.nodeIndexA, elt.nodeIndexB);
+ PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
+ PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
+ cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
+ }
+}
+
+// adjust from CoM to joint
+
+void writeBack1D(const PxSolverConstraintDesc& desc, SolverContext&, PxSolverBodyData&, PxSolverBodyData&)
+{
+ ConstraintWriteback* writeback = reinterpret_cast<ConstraintWriteback*>(desc.writeBack);
+ if(writeback)
+ {
+ SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+ PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader);
+ PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+
+ PxVec3 lin(0), ang(0);
+ for(PxU32 i=0; i<header->count; i++)
+ {
+ const SolverConstraint1D* c = reinterpret_cast<SolverConstraint1D*>(base);
+ if(c->flags & DY_SC_FLAG_OUTPUT_FORCE)
+ {
+ lin += c->lin0 * c->appliedForce;
+ ang += c->ang0Writeback * c->appliedForce;
+ }
+ base += stride;
+ }
+
+ ang -= header->body0WorldOffset.cross(lin);
+ writeback->linearImpulse = lin;
+ writeback->angularImpulse = ang;
+ writeback->broken = header->breakable ? PxU32(lin.magnitude()>header->linBreakImpulse || ang.magnitude()>header->angBreakImpulse) : 0;
+
+ PX_ASSERT(desc.constraint + getConstraintLength(desc) == base);
+ }
+}
+
+
+void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solve1D(desc[a-1], cache);
+ }
+ solve1D(desc[constraintCount-1], cache);
+}
+
+void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solve1D(desc[a-1], cache);
+ conclude1D(desc[a-1], cache);
+ }
+ solve1D(desc[constraintCount-1], cache);
+ conclude1D(desc[constraintCount-1], cache);
+}
+
+void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+ solve1D(desc[a-1], cache);
+ writeBack1D(desc[a-1], cache, bd0, bd1);
+ }
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+ solve1D(desc[constraintCount-1], cache);
+ writeBack1D(desc[constraintCount-1], cache, bd0, bd1);
+}
+
+void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+ writeBack1D(desc[a-1], cache, bd0, bd1);
+ }
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+ writeBack1D(desc[constraintCount-1], cache, bd0, bd1);
+}
+
+void solveContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solveContact(desc[a-1], cache);
+ }
+ solveContact(desc[constraintCount-1], cache);
+}
+
+void solveContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solveContact(desc[a-1], cache);
+ concludeContact(desc[a-1], cache);
+ }
+ solveContact(desc[constraintCount-1], cache);
+ concludeContact(desc[constraintCount-1], cache);
+}
+
+void solveContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+ solveContact(desc[a-1], cache);
+ writeBackContact(desc[a-1], cache, bd0, bd1);
+ }
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+ solveContact(desc[constraintCount-1], cache);
+ writeBackContact(desc[constraintCount-1], cache, bd0, bd1);
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveContact_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solveContact_BStatic(desc[a-1], cache);
+ }
+ solveContact_BStatic(desc[constraintCount-1], cache);
+}
+
+void solveContact_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ solveContact_BStatic(desc[a-1], cache);
+ concludeContact(desc[a-1], cache);
+ }
+ solveContact_BStatic(desc[constraintCount-1], cache);
+ concludeContact(desc[constraintCount-1], cache);
+}
+
+void solveContact_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 1; a < constraintCount; ++a)
+ {
+ Ps::prefetchLine(desc[a].constraint);
+ Ps::prefetchLine(desc[a].constraint, 128);
+ Ps::prefetchLine(desc[a].constraint, 256);
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+ solveContact_BStatic(desc[a-1], cache);
+ writeBackContact(desc[a-1], cache, bd0, bd1);
+ }
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+ solveContact_BStatic(desc[constraintCount-1], cache);
+ writeBackContact(desc[constraintCount-1], cache, bd0, bd1);
+
+ if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+ {
+ //Not enough space to write 4 more thresholds back!
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+//Port of scalar implementation to SIMD maths with some interleaving of instructions
+void solveExt1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+ PxU8* PX_RESTRICT bPtr = desc.constraint;
+ //PxU32 length = desc.constraintLength;
+
+ const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
+ SolverConstraint1DExt* PX_RESTRICT base = reinterpret_cast<SolverConstraint1DExt*>(bPtr + sizeof(SolverConstraint1DHeader));
+
+ Vec3V linVel0, angVel0, linVel1, angVel1;
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel0 = V3LoadA(desc.bodyA->linearVelocity);
+ angVel0 = V3LoadA(desc.bodyA->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA);
+ linVel0 = v.linear;
+ angVel0 = v.angular;
+ }
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel1 = V3LoadA(desc.bodyB->linearVelocity);
+ angVel1 = V3LoadA(desc.bodyB->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB);
+ linVel1 = v.linear;
+ angVel1 = v.angular;
+ }
+
+ Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
+
+ for(PxU32 i=0; i<header->count;++i, base++)
+ {
+ Ps::prefetchLine(base+1);
+
+ const Vec4V lin0XYZ_constantW = V4LoadA(&base->lin0.x);
+ const Vec4V lin1XYZ_unbiasedConstantW = V4LoadA(&base->lin1.x);
+ const Vec4V ang0XYZ_velMultiplierW = V4LoadA(&base->ang0.x);
+ const Vec4V ang1XYZ_impulseMultiplierW = V4LoadA(&base->ang1.x);
+ const Vec4V minImpulseX_maxImpulseY_appliedForceZ = V4LoadA(&base->minImpulse);
+
+ const Vec3V lin0 = Vec3V_From_Vec4V(lin0XYZ_constantW); FloatV constant = V4GetW(lin0XYZ_constantW);
+ const Vec3V lin1 = Vec3V_From_Vec4V(lin1XYZ_unbiasedConstantW);
+ const Vec3V ang0 = Vec3V_From_Vec4V(ang0XYZ_velMultiplierW); FloatV vMul = V4GetW(ang0XYZ_velMultiplierW);
+ const Vec3V ang1 = Vec3V_From_Vec4V(ang1XYZ_impulseMultiplierW); FloatV iMul = V4GetW(ang1XYZ_impulseMultiplierW);
+
+ const FloatV minImpulse = V4GetX(minImpulseX_maxImpulseY_appliedForceZ);
+ const FloatV maxImpulse = V4GetY(minImpulseX_maxImpulseY_appliedForceZ);
+ const FloatV appliedForce = V4GetZ(minImpulseX_maxImpulseY_appliedForceZ);
+
+ const Vec3V v0 = V3MulAdd(linVel0, lin0, V3Mul(angVel0, ang0));
+ const Vec3V v1 = V3MulAdd(linVel1, lin1, V3Mul(angVel1, ang1));
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+ const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant));
+ const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
+ const FloatV deltaF = FSub(clampedForce, appliedForce);
+
+ FStore(clampedForce, &base->appliedForce);
+ li0 = V3ScaleAdd(lin0, deltaF, li0); ai0 = V3ScaleAdd(ang0, deltaF, ai0);
+ li1 = V3ScaleAdd(lin1, deltaF, li1); ai1 = V3ScaleAdd(ang1, deltaF, ai1);
+
+ linVel0 = V3ScaleAdd(base->deltaVA.linear, deltaF, linVel0); angVel0 = V3ScaleAdd(base->deltaVA.angular, deltaF, angVel0);
+ linVel1 = V3ScaleAdd(base->deltaVB.linear, deltaF, linVel1); angVel1 = V3ScaleAdd(base->deltaVB.angular, deltaF, angVel1);
+ }
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel0, desc.bodyA->linearVelocity);
+ V3StoreA(angVel0, desc.bodyA->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, V3Scale(li0, FLoad(header->linearInvMassScale0)),
+ V3Scale(ai0, FLoad(header->angularInvMassScale0)));
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel1, desc.bodyB->linearVelocity);
+ V3StoreA(angVel1, desc.bodyB->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, V3Scale(li1, FLoad(header->linearInvMassScale1)),
+ V3Scale(ai1, FLoad(header->angularInvMassScale1)));
+}
+
+void solveExtContact(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ Vec3V linVel0, angVel0, linVel1, angVel1;
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel0 = V3LoadA(desc.bodyA->linearVelocity);
+ angVel0 = V3LoadA(desc.bodyA->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA);
+ linVel0 = v.linear;
+ angVel0 = v.angular;
+ }
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ linVel1 = V3LoadA(desc.bodyB->linearVelocity);
+ angVel1 = V3LoadA(desc.bodyB->angularState);
+ }
+ else
+ {
+ Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB);
+ linVel1 = v.linear;
+ angVel1 = v.angular;
+ }
+
+ const PxU8* PX_RESTRICT last = desc.constraint + desc.constraintLengthOver16*16;
+
+ //hopefully pointer aliasing doesn't bite.
+ PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+ Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero();
+
+ while(currPtr < last)
+ {
+ SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+ currPtr += sizeof(SolverContactHeader);
+
+ const PxU32 numNormalConstr = hdr->numNormalConstr;
+ const PxU32 numFrictionConstr = hdr->numFrictionConstr;
+
+ SolverContactPointExt* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointExt*>(currPtr);
+ Ps::prefetchLine(contacts);
+ currPtr += numNormalConstr * sizeof(SolverContactPointExt);
+
+ PxF32* appliedForceBuffer = reinterpret_cast<PxF32*>(currPtr);
+ currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+ SolverContactFrictionExt* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionExt*>(currPtr);
+ currPtr += numFrictionConstr * sizeof(SolverContactFrictionExt);
+
+
+
+ Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
+
+ const Vec3V contactNormal = hdr->normal;
+
+ const FloatV accumulatedNormalImpulse = solveExtContacts(contacts, numNormalConstr, contactNormal, linVel0, angVel0, linVel1,
+ angVel1, li0, ai0, li1, ai1, appliedForceBuffer);
+
+
+ if(cache.doFriction && numFrictionConstr)
+ {
+ Ps::prefetchLine(frictions);
+ const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse);
+ const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse);
+
+ BoolV broken = BFFFF();
+
+ for(PxU32 i=0;i<numFrictionConstr;i++)
+ {
+ SolverContactFrictionExt& f = frictions[i];
+ Ps::prefetchLine(&frictions[i+1]);
+
+ const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+ const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+ const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+ const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+ /*const Vec3V normal0 = V3Scale(normal, sqrtInvMass0);
+ const Vec3V normal1 = V3Scale(normal, sqrtInvMass1);*/
+ const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+ const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW);
+
+ const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+ const FloatV bias = V4GetW(rbXnXYZ_biasW);
+ const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+
+ const FloatV targetVel = FLoad(f.targetVel);
+
+ const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+ const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse);
+
+ const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angVel0, raXn));
+ const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angVel1, rbXn));
+ const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+ // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+ const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);
+
+ // Algorithm:
+ // if abs(appliedForce + deltaF) > maxFrictionImpulse
+ // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+ // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+ // set broken flag to true || broken flag
+
+ // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+ // FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+ const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+ // On XBox this clamping code uses the vector simple pipe rather than vector float,
+ // which eliminates a lot of stall cycles
+
+ const BoolV clampLow = FIsGrtr(negMaxFrictionImpulse, totalImpulse);
+ const BoolV clampHigh = FIsGrtr(totalImpulse, maxFrictionImpulse);
+
+ const FloatV totalClampedLow = FMax(negMaxDynFrictionImpulse, totalImpulse);
+ const FloatV totalClampedHigh = FMin(maxDynFrictionImpulse, totalImpulse);
+
+ const FloatV newAppliedForce = FSel(clampLow, totalClampedLow,
+ FSel(clampHigh, totalClampedHigh, totalImpulse));
+
+ broken = BOr(broken, BOr(clampLow, clampHigh));
+
+ FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+ linVel0 = V3ScaleAdd(f.linDeltaVA, deltaF, linVel0);
+ angVel0 = V3ScaleAdd(f.angDeltaVA, deltaF, angVel0);
+ linVel1 = V3ScaleAdd(f.linDeltaVB, deltaF, linVel1);
+ angVel1 = V3ScaleAdd(f.angDeltaVB, deltaF, angVel1);
+
+ li0 = V3ScaleAdd(normal, deltaF, li0); ai0 = V3ScaleAdd(raXn, deltaF, ai0);
+ li1 = V3ScaleAdd(normal, deltaF, li1); ai1 = V3ScaleAdd(rbXn, deltaF, ai1);
+
+ f.setAppliedForce(newAppliedForce);
+ }
+ Store_From_BoolV(broken, &hdr->broken);
+ }
+
+ linImpulse0 = V3ScaleAdd(li0, hdr->getDominance0(), linImpulse0);
+ angImpulse0 = V3ScaleAdd(ai0, FLoad(hdr->angDom0), angImpulse0);
+ linImpulse1 = V3NegScaleSub(li1, hdr->getDominance1(), linImpulse1);
+ angImpulse1 = V3NegScaleSub(ai1, FLoad(hdr->angDom1), angImpulse1);
+ }
+
+ if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel0, desc.bodyA->linearVelocity);
+ V3StoreA(angVel0, desc.bodyA->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, linImpulse0, angImpulse0);
+
+ if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+ {
+ V3StoreA(linVel1, desc.bodyB->linearVelocity);
+ V3StoreA(angVel1, desc.bodyB->angularState);
+ }
+ else
+ PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, linImpulse1, angImpulse1);
+
+ PX_ASSERT(currPtr == last);
+}
+
+
+void solveExtContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtContact(desc[a], cache);
+ }
+}
+
+void solveExtContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExtContact(desc[a], cache);
+ concludeContact(desc[a], cache);
+ }
+}
+
+void solveExtContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+
+ solveExtContact(desc[a], cache);
+ writeBackContact(desc[a], cache, bd0, bd1);
+ }
+ if(cache.mThresholdStreamIndex > 0)
+ {
+ //Not enough space to write 4 more thresholds back!
+ //Write back to global buffer
+ PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+ for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+ {
+ cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+ }
+ cache.mThresholdStreamIndex = 0;
+ }
+}
+
+void solveExt1DBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExt1D(desc[a], cache);
+ }
+}
+
+void solveExt1DConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ solveExt1D(desc[a], cache);
+ conclude1D(desc[a], cache);
+ }
+}
+
+void solveExt1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+ solveExt1D(desc[a], cache);
+ writeBack1D(desc[a], cache, bd0, bd1);
+ }
+}
+
+void ext1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+ for(PxU32 a = 0; a < constraintCount; ++a)
+ {
+ PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+ PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+ writeBack1D(desc[a], cache, bd0, bd1);
+ }
+}
+
+void solveConcludeExtContact (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveExtContact(desc, cache);
+ concludeContact(desc, cache);
+}
+
+void solveConcludeExt1D (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveExt1D(desc, cache);
+ conclude1D(desc, cache);
+}
+
+
+void solveConclude1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solve1D(desc, cache);
+ conclude1D(desc, cache);
+}
+
+void solveConcludeContact (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveContact(desc, cache);
+ concludeContact(desc, cache);
+}
+
+void solveConcludeContact_BStatic (const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+ solveContact_BStatic(desc, cache);
+ concludeContact(desc, cache);
+}
+
+
+}
+
+}
+
+#endif