Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
1 files changed, 1121 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
new file mode 100644
index 00000000..ea935ce9
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
@@ -0,0 +1,1121 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+
+#include "foundation/PxPreprocessor.h"
+#include "PsVecMath.h"
+
+#ifdef PX_SUPPORT_SIMD
+
+#include "CmPhysXCommon.h"
+#include "DySolverBody.h"
+#include "DySolverContact.h"
+#include "DySolverConstraint1D.h"
+#include "DySolverConstraintDesc.h"
+#include "DyThresholdTable.h"
+#include "DySolverContext.h"
+#include "PsUtilities.h"
+#include "DyConstraint.h"
+#include "PsAtomic.h"
+#include "DySolverConstraintsShared.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+//Port of scalar implementation to SIMD maths with some interleaving of instructions
+void solve1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	PX_UNUSED(cache);
+	PxSolverBody& b0 = *desc.bodyA;
+	PxSolverBody& b1 = *desc.bodyB;
+
+	PxU8* PX_RESTRICT bPtr = desc.constraint;
+	//PxU32 length = desc.constraintLength;
+
+	const SolverConstraint1DHeader* PX_RESTRICT  header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
+	SolverConstraint1D* PX_RESTRICT base = reinterpret_cast<SolverConstraint1D*>(bPtr + sizeof(SolverConstraint1DHeader));
+
+	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+	Vec3V linVel1 = V3LoadA(b1.linearVelocity);
+	Vec3V angState0 = V3LoadA(b0.angularState);
+	Vec3V angState1 = V3LoadA(b1.angularState);
+
+	const FloatV invMass0 = FLoad(header->invMass0D0);
+	const FloatV invMass1 = FLoad(header->invMass1D1);
+	const FloatV invInertiaScale0 = FLoad(header->angularInvMassScale0);
+	const FloatV invInertiaScale1 = FLoad(header->angularInvMassScale1);
+
+
+	for(PxU32 i=0; i<header->count;++i, base++)
+	{
+		Ps::prefetchLine(base+1);
+		SolverConstraint1D& c = *base;
+
+		const Vec3V clinVel0 = V3LoadA(c.lin0);
+		const Vec3V clinVel1 = V3LoadA(c.lin1);
+		const Vec3V cangVel0 = V3LoadA(c.ang0);
+		const Vec3V cangVel1 = V3LoadA(c.ang1);
+
+		const FloatV constant = FLoad(c.constant);
+		const FloatV vMul = FLoad(c.velMultiplier);
+		const FloatV iMul = FLoad(c.impulseMultiplier);
+		const FloatV appliedForce = FLoad(c.appliedForce);
+		//const FloatV targetVel = FLoad(c.targetVelocity);
+		
+		const FloatV maxImpulse = FLoad(c.maxImpulse);
+		const FloatV minImpulse = FLoad(c.minImpulse);
+
+		const Vec3V v0 = V3MulAdd(linVel0, clinVel0, V3Mul(angState0, cangVel0));
+		const Vec3V v1 = V3MulAdd(linVel1, clinVel1, V3Mul(angState1, cangVel1));
+
+		const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+		const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant));
+		const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
+		const FloatV deltaF = FSub(clampedForce, appliedForce);
+			
+		FStore(clampedForce, &c.appliedForce);
+		linVel0 = V3ScaleAdd(clinVel0, FMul(deltaF, invMass0), linVel0);			
+		linVel1 = V3NegScaleSub(clinVel1, FMul(deltaF, invMass1), linVel1);
+		angState0 = V3ScaleAdd(cangVel0, FMul(deltaF, invInertiaScale0), angState0);
+		//This should be negScaleSub but invInertiaScale1 is negated already
+		angState1 = V3ScaleAdd(cangVel1, FMul(deltaF, invInertiaScale1), angState1);
+
+	}
+
+	V3StoreA(linVel0, b0.linearVelocity);
+	V3StoreA(angState0, b0.angularState);
+	V3StoreA(linVel1, b1.linearVelocity);
+	V3StoreA(angState1, b1.angularState);
+	
+	PX_ASSERT(b0.linearVelocity.isFinite());
+	PX_ASSERT(b0.angularState.isFinite());
+	PX_ASSERT(b1.linearVelocity.isFinite());
+	PX_ASSERT(b1.angularState.isFinite());
+}
+
+void conclude1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+	SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+	PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader);
+	PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+
+	for(PxU32 i=0; i<header->count; i++)
+	{
+		SolverConstraint1D& c = *reinterpret_cast<SolverConstraint1D*>(base);
+
+		c.constant = c.unbiasedConstant;
+
+		base += stride;
+	}
+	PX_ASSERT(desc.constraint + getConstraintLength(desc) == base);
+}
+
+// ==============================================================
+
+void solveContact(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	PxSolverBody& b0 = *desc.bodyA;
+	PxSolverBody& b1 = *desc.bodyB;
+
+	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+	Vec3V linVel1 = V3LoadA(b1.linearVelocity);
+	Vec3V angState0 = V3LoadA(b0.angularState);
+	Vec3V angState1 = V3LoadA(b1.angularState);
+
+	const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+	//hopefully pointer aliasing doesn't bite.
+	PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+	while(currPtr < last)
+	{
+		SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+		currPtr += sizeof(SolverContactHeader);
+
+		const PxU32 numNormalConstr = hdr->numNormalConstr;
+		const PxU32	numFrictionConstr = hdr->numFrictionConstr;
+
+		SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
+		Ps::prefetchLine(contacts);
+		currPtr += numNormalConstr * sizeof(SolverContactPoint);
+
+		PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr);
+		currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+		SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
+		currPtr += numFrictionConstr * sizeof(SolverContactFriction);
+
+		const FloatV invMassA = FLoad(hdr->invMass0);
+		const FloatV invMassB = FLoad(hdr->invMass1);
+
+		const FloatV angDom0 = FLoad(hdr->angDom0);
+		const FloatV angDom1 = FLoad(hdr->angDom1);
+
+		const Vec3V contactNormal = hdr->normal;
+
+		const FloatV accumulatedNormalImpulse = solveDynamicContacts(contacts, numNormalConstr, contactNormal, invMassA, invMassB, 
+			angDom0, angDom1, linVel0, angState0, linVel1, angState1, forceBuffer); 
+
+		if(cache.doFriction && numFrictionConstr)
+		{
+			const FloatV staticFrictionCof = hdr->getStaticFriction();
+			const FloatV dynamicFrictionCof = hdr->getDynamicFriction();
+			const FloatV maxFrictionImpulse = FMul(staticFrictionCof, accumulatedNormalImpulse);
+			const FloatV maxDynFrictionImpulse = FMul(dynamicFrictionCof, accumulatedNormalImpulse);
+			const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+
+			BoolV broken = BFFFF();
+
+			if(cache.writeBackIteration)
+				Ps::prefetchLine(hdr->frictionBrokenWritebackByte);
+
+			for(PxU32 i=0;i<numFrictionConstr;i++)
+			{
+				SolverContactFriction& f = frictions[i];
+				Ps::prefetchLine(&frictions[i],128);
+
+
+				const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+				const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+				const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+				const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+				const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+				const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW);
+
+				const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+				const FloatV bias = V4GetW(rbXnXYZ_biasW);
+				const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+				
+				const FloatV targetVel = FLoad(f.targetVel);
+
+				const Vec3V delLinVel0 = V3Scale(normal, invMassA);
+				const Vec3V delLinVel1 = V3Scale(normal, invMassB);
+
+				const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn));
+				const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angState1, rbXn));
+				const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+
+
+				// appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+				const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce);				
+
+				// Algorithm:
+				// if abs(appliedForce + deltaF) > maxFrictionImpulse
+				//    clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+				//      (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+				//    set broken flag to true || broken flag
+
+				// FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+				// FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+				const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+				// On XBox this clamping code uses the vector simple pipe rather than vector float,
+				// which eliminates a lot of stall cycles
+
+				const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse);
+				
+				const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse));
+
+				const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse);
+
+				broken = BOr(broken, clamp);
+
+				FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+				// we could get rid of the stall here by calculating and clamping delta separately, but
+				// the complexity isn't really worth it.
+
+				linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+				linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1);
+				angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+				angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1);
+
+				f.setAppliedForce(newAppliedForce);
+
+				
+			}
+			Store_From_BoolV(broken, &hdr->broken);
+		}
+
+	}
+
+	PX_ASSERT(b0.linearVelocity.isFinite());
+	PX_ASSERT(b0.angularState.isFinite());
+	PX_ASSERT(b1.linearVelocity.isFinite());
+	PX_ASSERT(b1.angularState.isFinite());
+
+	// Write back
+	V3StoreU(linVel0, b0.linearVelocity);
+	V3StoreU(linVel1, b1.linearVelocity);
+	V3StoreU(angState0, b0.angularState);
+	V3StoreU(angState1, b1.angularState);
+
+	PX_ASSERT(b0.linearVelocity.isFinite());
+	PX_ASSERT(b0.angularState.isFinite());
+	PX_ASSERT(b1.linearVelocity.isFinite());
+	PX_ASSERT(b1.angularState.isFinite());
+
+	PX_ASSERT(currPtr == last);
+}
+
+void solveContact_BStatic(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	PxSolverBody& b0 = *desc.bodyA;
+	//PxSolverBody& b1 = *desc.bodyB;
+
+	Vec3V linVel0 = V3LoadA(b0.linearVelocity);
+	Vec3V angState0 = V3LoadA(b0.angularState);
+
+	const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+	//hopefully pointer aliasing doesn't bite.
+	PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+	while(currPtr < last)
+	{
+		SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+		currPtr += sizeof(SolverContactHeader);
+
+		const PxU32 numNormalConstr = hdr->numNormalConstr;
+		const PxU32	numFrictionConstr = hdr->numFrictionConstr;
+
+		SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr);
+		//Ps::prefetchLine(contacts);
+		currPtr += numNormalConstr * sizeof(SolverContactPoint);
+
+		PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr);
+		currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+		SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr);
+		currPtr += numFrictionConstr * sizeof(SolverContactFriction);
+
+		
+
+		const FloatV invMassA = FLoad(hdr->invMass0);
+
+		const Vec3V contactNormal = hdr->normal;
+		const FloatV angDom0 = FLoad(hdr->angDom0);
+
+
+		const FloatV accumulatedNormalImpulse = solveStaticContacts(contacts, numNormalConstr, contactNormal,
+			invMassA, angDom0, linVel0, angState0, forceBuffer);
+
+		if(cache.doFriction && numFrictionConstr)
+		{
+			const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse);
+			const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse);
+
+			BoolV broken = BFFFF();
+			if(cache.writeBackIteration)
+				Ps::prefetchLine(hdr->frictionBrokenWritebackByte);
+
+			for(PxU32 i=0;i<numFrictionConstr;i++)
+			{
+				SolverContactFriction& f = frictions[i];
+				Ps::prefetchLine(&frictions[i],128);
+				
+
+				const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+				const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+				const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+				const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+				const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+
+				const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+				const FloatV bias = V4GetW(rbXnXYZ_biasW);
+				const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+
+				const FloatV targetVel = FLoad(f.targetVel);
+	
+				const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+
+				const Vec3V delLinVel0 = V3Scale(normal, invMassA);
+				//const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse);
+
+				const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn));
+				const FloatV normalVel = V3SumElems(v0);
+
+
+				// appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+				const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce); 
+
+				// Algorithm:
+				// if abs(appliedForce + deltaF) > maxFrictionImpulse
+				//    clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+				//      (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+				//    set broken flag to true || broken flag
+
+				// FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+				// FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+				const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+				// On XBox this clamping code uses the vector simple pipe rather than vector float,
+				// which eliminates a lot of stall cycles
+
+				const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse);
+				
+				const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse));
+				
+				broken = BOr(broken, clamp);
+
+				const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse);
+
+				FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+				// we could get rid of the stall here by calculating and clamping delta separately, but
+				// the complexity isn't really worth it.
+
+				linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0);
+				angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0);
+
+				f.setAppliedForce(newAppliedForce);
+
+			}
+			Store_From_BoolV(broken, &hdr->broken);
+		}
+
+	}
+
+	PX_ASSERT(b0.linearVelocity.isFinite());
+	PX_ASSERT(b0.angularState.isFinite());
+
+	// Write back
+	V3StoreA(linVel0, b0.linearVelocity);
+	V3StoreA(angState0, b0.angularState);
+
+	PX_ASSERT(b0.linearVelocity.isFinite());
+	PX_ASSERT(b0.angularState.isFinite());
+
+	PX_ASSERT(currPtr == last);
+}
+
+
+void concludeContact(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+	PxU8* PX_RESTRICT cPtr = desc.constraint;
+
+	const FloatV zero = FZero();
+
+	PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+	while(cPtr < last)
+	{
+		const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr);
+		cPtr += sizeof(SolverContactHeader);
+
+		const PxU32 numNormalConstr = hdr->numNormalConstr;
+		const PxU32	numFrictionConstr = hdr->numFrictionConstr;
+
+		//if(cPtr < last)
+		//Ps::prefetchLine(cPtr, 512);
+		Ps::prefetchLine(cPtr,128);
+		Ps::prefetchLine(cPtr,256);
+		Ps::prefetchLine(cPtr,384);
+
+		const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
+																	   : sizeof(SolverContactPoint);
+		for(PxU32 i=0;i<numNormalConstr;i++)
+		{
+			SolverContactPoint *c = reinterpret_cast<SolverContactPoint*>(cPtr);
+			cPtr += pointStride;
+			//c->scaledBias = PxMin(c->scaledBias, 0.f);
+			c->biasedErr = c->unbiasedErr;
+		}
+
+		cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); //Jump over force buffers
+
+		const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt)
+																		  : sizeof(SolverContactFriction);
+		for(PxU32 i=0;i<numFrictionConstr;i++)
+		{
+			SolverContactFriction *f = reinterpret_cast<SolverContactFriction*>(cPtr);
+			cPtr += frictionStride;
+			f->setBias(zero);
+		}
+	}
+	PX_ASSERT(cPtr == last);
+}
+
+void writeBackContact(const PxSolverConstraintDesc& desc, SolverContext& cache,
+					  PxSolverBodyData& bd0, PxSolverBodyData& bd1)
+{
+
+	PxReal normalForce = 0;
+
+	PxU8* PX_RESTRICT cPtr = desc.constraint;
+	PxReal* PX_RESTRICT vForceWriteback = reinterpret_cast<PxReal*>(desc.writeBack);
+	PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc);
+
+	bool forceThreshold = false;
+
+	while(cPtr < last)
+	{
+		const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr);
+		cPtr += sizeof(SolverContactHeader);
+
+		forceThreshold = hdr->flags & SolverContactHeader::eHAS_FORCE_THRESHOLDS;
+		const PxU32 numNormalConstr = hdr->numNormalConstr;
+		const PxU32	numFrictionConstr = hdr->numFrictionConstr;
+
+		//if(cPtr < last)
+		Ps::prefetchLine(cPtr, 256);
+		Ps::prefetchLine(cPtr, 384);
+
+		const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt)
+																	   : sizeof(SolverContactPoint);
+
+		cPtr += pointStride * numNormalConstr;
+		PxF32* forceBuffer = reinterpret_cast<PxF32*>(cPtr);
+		cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+		if(vForceWriteback!=NULL)
+		{
+			for(PxU32 i=0; i<numNormalConstr; i++)
+			{
+				PxReal appliedForce = forceBuffer[i];
+				*vForceWriteback++ = appliedForce;
+				normalForce += appliedForce;
+			}
+		}
+
+		const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt)
+																		  : sizeof(SolverContactFriction);
+
+		if(hdr->broken && hdr->frictionBrokenWritebackByte != NULL)
+		{
+			*hdr->frictionBrokenWritebackByte = 1;
+		}
+
+		cPtr += frictionStride * numFrictionConstr;
+
+	}
+	PX_ASSERT(cPtr == last);
+
+	
+
+	if(forceThreshold && desc.linkIndexA == PxSolverConstraintDesc::NO_LINK && desc.linkIndexB == PxSolverConstraintDesc::NO_LINK &&
+		normalForce !=0 && (bd0.reportThreshold < PX_MAX_REAL  || bd1.reportThreshold < PX_MAX_REAL))
+	{
+		ThresholdStreamElement elt;
+		elt.normalForce = normalForce;
+		elt.threshold = PxMin<float>(bd0.reportThreshold, bd1.reportThreshold);
+		elt.nodeIndexA = bd0.nodeIndex;
+		elt.nodeIndexB = bd1.nodeIndex;
+		elt.shapeInteraction  = reinterpret_cast<const SolverContactHeader*>(desc.constraint)->shapeInteraction;
+		Ps::order(elt.nodeIndexA, elt.nodeIndexB);
+		PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB);
+		PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength);
+		cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt;
+	}
+}
+
+// adjust from CoM to joint
+
+void writeBack1D(const PxSolverConstraintDesc& desc, SolverContext&, PxSolverBodyData&, PxSolverBodyData&)
+{
+	ConstraintWriteback* writeback = reinterpret_cast<ConstraintWriteback*>(desc.writeBack);
+	if(writeback)
+	{
+		SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+		PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader);
+		PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+
+		PxVec3 lin(0), ang(0);
+		for(PxU32 i=0; i<header->count; i++)
+		{
+			const SolverConstraint1D* c = reinterpret_cast<SolverConstraint1D*>(base);
+			if(c->flags & DY_SC_FLAG_OUTPUT_FORCE)
+			{
+				lin += c->lin0 * c->appliedForce;
+				ang += c->ang0Writeback * c->appliedForce;
+			}
+			base += stride;
+		}
+
+		ang -= header->body0WorldOffset.cross(lin);
+		writeback->linearImpulse = lin;
+		writeback->angularImpulse = ang;
+		writeback->broken = header->breakable ? PxU32(lin.magnitude()>header->linBreakImpulse || ang.magnitude()>header->angBreakImpulse) : 0;
+
+		PX_ASSERT(desc.constraint + getConstraintLength(desc) == base);
+	}
+}
+
+
+void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		solve1D(desc[a-1], cache);
+	}
+	solve1D(desc[constraintCount-1], cache);
+}
+
+void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		solve1D(desc[a-1], cache);
+		conclude1D(desc[a-1], cache);
+	}
+	solve1D(desc[constraintCount-1], cache);
+	conclude1D(desc[constraintCount-1], cache);
+}
+
+void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+		PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+		solve1D(desc[a-1], cache);
+		writeBack1D(desc[a-1], cache, bd0, bd1);
+	}
+	PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+	PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+	solve1D(desc[constraintCount-1], cache);
+	writeBack1D(desc[constraintCount-1], cache, bd0, bd1);
+}
+
+void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+		PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+		writeBack1D(desc[a-1], cache, bd0, bd1);
+	}
+	PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+	PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+	writeBack1D(desc[constraintCount-1], cache, bd0, bd1);
+}
+
+void solveContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		solveContact(desc[a-1], cache);
+	}
+	solveContact(desc[constraintCount-1], cache);
+}
+
+void solveContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		solveContact(desc[a-1], cache);
+		concludeContact(desc[a-1], cache);
+	}
+	solveContact(desc[constraintCount-1], cache);
+	concludeContact(desc[constraintCount-1], cache);
+}
+
+void solveContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+		PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+		solveContact(desc[a-1], cache);
+		writeBackContact(desc[a-1], cache, bd0, bd1);
+	}
+	PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+	PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+	solveContact(desc[constraintCount-1], cache);
+	writeBackContact(desc[constraintCount-1], cache, bd0, bd1);
+
+	if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+	{
+		//Write back to global buffer
+		PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+		for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+		{
+			cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+		}
+		cache.mThresholdStreamIndex = 0;
+	}
+}
+
+void solveContact_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		solveContact_BStatic(desc[a-1], cache);
+	}
+	solveContact_BStatic(desc[constraintCount-1], cache);
+}
+
+void solveContact_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		solveContact_BStatic(desc[a-1], cache);
+		concludeContact(desc[a-1], cache);
+	}
+	solveContact_BStatic(desc[constraintCount-1], cache);
+	concludeContact(desc[constraintCount-1], cache);
+}
+
+void solveContact_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 1; a < constraintCount; ++a)
+	{
+		Ps::prefetchLine(desc[a].constraint);
+		Ps::prefetchLine(desc[a].constraint, 128);
+		Ps::prefetchLine(desc[a].constraint, 256);
+		PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex];
+		PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex];
+		solveContact_BStatic(desc[a-1], cache);
+		writeBackContact(desc[a-1], cache, bd0, bd1);
+	}
+	PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex];
+	PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex];
+	solveContact_BStatic(desc[constraintCount-1], cache);
+	writeBackContact(desc[constraintCount-1], cache, bd0, bd1);
+
+	if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4))
+	{
+		//Not enough space to write 4 more thresholds back!
+		//Write back to global buffer
+		PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+		for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+		{
+			cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+		}
+		cache.mThresholdStreamIndex = 0;
+	}
+}
+
+//Port of scalar implementation to SIMD maths with some interleaving of instructions
+void solveExt1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/)
+{
+	PxU8* PX_RESTRICT bPtr = desc.constraint;
+	//PxU32 length = desc.constraintLength;
+
+	const SolverConstraint1DHeader* PX_RESTRICT  header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr);
+	SolverConstraint1DExt* PX_RESTRICT base = reinterpret_cast<SolverConstraint1DExt*>(bPtr + sizeof(SolverConstraint1DHeader));
+
+	Vec3V linVel0, angVel0, linVel1, angVel1;
+	if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+	{
+		linVel0 = V3LoadA(desc.bodyA->linearVelocity);
+		angVel0 = V3LoadA(desc.bodyA->angularState);
+	}
+	else
+	{
+		Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA);
+		linVel0 = v.linear;
+		angVel0 = v.angular;
+	}
+
+	if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+	{
+		linVel1 = V3LoadA(desc.bodyB->linearVelocity);
+		angVel1 = V3LoadA(desc.bodyB->angularState);
+	}
+	else
+	{
+		Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB);
+		linVel1 = v.linear;
+		angVel1 = v.angular;
+	}
+
+	Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
+
+	for(PxU32 i=0; i<header->count;++i, base++)
+	{
+		Ps::prefetchLine(base+1);
+
+		const Vec4V lin0XYZ_constantW						= V4LoadA(&base->lin0.x);	
+		const Vec4V lin1XYZ_unbiasedConstantW				= V4LoadA(&base->lin1.x);
+		const Vec4V ang0XYZ_velMultiplierW					= V4LoadA(&base->ang0.x);
+		const Vec4V ang1XYZ_impulseMultiplierW				= V4LoadA(&base->ang1.x);	
+		const Vec4V minImpulseX_maxImpulseY_appliedForceZ	= V4LoadA(&base->minImpulse);
+
+		const Vec3V lin0 = Vec3V_From_Vec4V(lin0XYZ_constantW);				FloatV constant = V4GetW(lin0XYZ_constantW);
+		const Vec3V lin1 = Vec3V_From_Vec4V(lin1XYZ_unbiasedConstantW);
+		const Vec3V ang0 = Vec3V_From_Vec4V(ang0XYZ_velMultiplierW);		FloatV vMul = V4GetW(ang0XYZ_velMultiplierW);
+		const Vec3V ang1 = Vec3V_From_Vec4V(ang1XYZ_impulseMultiplierW);	FloatV iMul = V4GetW(ang1XYZ_impulseMultiplierW);
+
+		const FloatV minImpulse		= V4GetX(minImpulseX_maxImpulseY_appliedForceZ);
+		const FloatV maxImpulse		= V4GetY(minImpulseX_maxImpulseY_appliedForceZ);
+		const FloatV appliedForce	= V4GetZ(minImpulseX_maxImpulseY_appliedForceZ);
+
+		const Vec3V v0 = V3MulAdd(linVel0, lin0, V3Mul(angVel0, ang0));
+		const Vec3V v1 = V3MulAdd(linVel1, lin1, V3Mul(angVel1, ang1));
+		const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+		const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant));
+		const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce)));
+		const FloatV deltaF = FSub(clampedForce, appliedForce);
+
+		FStore(clampedForce, &base->appliedForce);
+		li0 = V3ScaleAdd(lin0, deltaF, li0);	ai0 = V3ScaleAdd(ang0, deltaF, ai0);
+		li1 = V3ScaleAdd(lin1, deltaF, li1);	ai1 = V3ScaleAdd(ang1, deltaF, ai1);
+
+		linVel0 = V3ScaleAdd(base->deltaVA.linear, deltaF, linVel0); 		angVel0 = V3ScaleAdd(base->deltaVA.angular, deltaF, angVel0);
+		linVel1 = V3ScaleAdd(base->deltaVB.linear, deltaF, linVel1); 		angVel1 = V3ScaleAdd(base->deltaVB.angular, deltaF, angVel1);
+	}
+
+	if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+	{
+		V3StoreA(linVel0, desc.bodyA->linearVelocity);
+		V3StoreA(angVel0, desc.bodyA->angularState);
+	}
+	else
+		PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, V3Scale(li0, FLoad(header->linearInvMassScale0)),
+																V3Scale(ai0, FLoad(header->angularInvMassScale0)));
+
+	if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+	{
+		V3StoreA(linVel1, desc.bodyB->linearVelocity);
+		V3StoreA(angVel1, desc.bodyB->angularState);
+	}
+	else
+		PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, V3Scale(li1, FLoad(header->linearInvMassScale1)), 
+																V3Scale(ai1, FLoad(header->angularInvMassScale1)));
+}
+
+void solveExtContact(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	Vec3V linVel0, angVel0, linVel1, angVel1;
+
+	if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+	{
+		linVel0 = V3LoadA(desc.bodyA->linearVelocity);
+		angVel0 = V3LoadA(desc.bodyA->angularState);
+	}
+	else
+	{
+		Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA);
+		linVel0 = v.linear;
+		angVel0 = v.angular;
+	}
+
+	if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+	{
+		linVel1 = V3LoadA(desc.bodyB->linearVelocity);
+		angVel1 = V3LoadA(desc.bodyB->angularState);
+	}
+	else
+	{
+		Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB);
+		linVel1 = v.linear;
+		angVel1 = v.angular;
+	}
+
+	const PxU8* PX_RESTRICT last = desc.constraint + desc.constraintLengthOver16*16;
+
+	//hopefully pointer aliasing doesn't bite.
+	PxU8* PX_RESTRICT currPtr = desc.constraint;
+
+	Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero();
+
+	while(currPtr < last)
+	{
+		SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr);
+		currPtr += sizeof(SolverContactHeader);
+
+		const PxU32 numNormalConstr = hdr->numNormalConstr;
+		const PxU32	numFrictionConstr = hdr->numFrictionConstr;
+
+		SolverContactPointExt* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointExt*>(currPtr);
+		Ps::prefetchLine(contacts);
+		currPtr += numNormalConstr * sizeof(SolverContactPointExt);
+
+		PxF32* appliedForceBuffer = reinterpret_cast<PxF32*>(currPtr);
+		currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3));
+
+		SolverContactFrictionExt* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionExt*>(currPtr);
+		currPtr += numFrictionConstr * sizeof(SolverContactFrictionExt);
+
+		
+
+		Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero();
+
+		const Vec3V contactNormal = hdr->normal;
+
+		const FloatV accumulatedNormalImpulse = solveExtContacts(contacts, numNormalConstr, contactNormal, linVel0, angVel0, linVel1, 
+			angVel1, li0, ai0, li1, ai1, appliedForceBuffer);
+
+
+		if(cache.doFriction && numFrictionConstr)
+		{
+			Ps::prefetchLine(frictions);
+			const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse);
+			const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse);
+
+			BoolV broken = BFFFF();
+
+			for(PxU32 i=0;i<numFrictionConstr;i++)
+			{
+				SolverContactFrictionExt& f = frictions[i];
+				Ps::prefetchLine(&frictions[i+1]);
+
+				const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW;
+				const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW;
+				const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW;
+
+				const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW);
+				/*const Vec3V normal0 = V3Scale(normal, sqrtInvMass0);
+				const Vec3V normal1 = V3Scale(normal, sqrtInvMass1);*/
+				const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);
+				const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW);
+
+				const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW);
+				const FloatV bias = V4GetW(rbXnXYZ_biasW);
+				const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW);
+
+				const FloatV targetVel = FLoad(f.targetVel);
+
+				const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse);
+				const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse);
+
+				const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angVel0, raXn));
+				const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angVel1, rbXn));
+				const FloatV normalVel = V3SumElems(V3Sub(v0, v1));
+
+				// appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation
+				const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce); 
+
+				// Algorithm:
+				// if abs(appliedForce + deltaF) > maxFrictionImpulse
+				//    clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse]
+				//      (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce]
+				//    set broken flag to true || broken flag
+
+				// FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier);
+				// FloatV potentialSumF = FAdd(appliedForce, deltaF);
+
+				const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1);
+
+				// On XBox this clamping code uses the vector simple pipe rather than vector float,
+				// which eliminates a lot of stall cycles
+
+				const BoolV clampLow = FIsGrtr(negMaxFrictionImpulse, totalImpulse);
+				const BoolV clampHigh = FIsGrtr(totalImpulse, maxFrictionImpulse);
+
+				const FloatV totalClampedLow = FMax(negMaxDynFrictionImpulse, totalImpulse);
+				const FloatV totalClampedHigh = FMin(maxDynFrictionImpulse, totalImpulse);
+
+				const FloatV newAppliedForce = FSel(clampLow, totalClampedLow,
+															  FSel(clampHigh, totalClampedHigh, totalImpulse));
+
+				broken = BOr(broken, BOr(clampLow, clampHigh));
+
+				FloatV deltaF = FSub(newAppliedForce, appliedForce);
+
+				linVel0 = V3ScaleAdd(f.linDeltaVA, deltaF, linVel0);	
+				angVel0 = V3ScaleAdd(f.angDeltaVA, deltaF, angVel0);
+				linVel1 = V3ScaleAdd(f.linDeltaVB, deltaF, linVel1);	
+				angVel1 = V3ScaleAdd(f.angDeltaVB, deltaF, angVel1);
+
+				li0 = V3ScaleAdd(normal, deltaF, li0);	ai0 = V3ScaleAdd(raXn, deltaF, ai0);
+				li1 = V3ScaleAdd(normal, deltaF, li1);	ai1 = V3ScaleAdd(rbXn, deltaF, ai1);
+
+				f.setAppliedForce(newAppliedForce);
+			}
+			Store_From_BoolV(broken, &hdr->broken);
+		}
+
+		linImpulse0 = V3ScaleAdd(li0, hdr->getDominance0(), linImpulse0);		
+		angImpulse0 = V3ScaleAdd(ai0, FLoad(hdr->angDom0), angImpulse0);
+		linImpulse1 = V3NegScaleSub(li1, hdr->getDominance1(), linImpulse1);	
+		angImpulse1 = V3NegScaleSub(ai1, FLoad(hdr->angDom1), angImpulse1);
+	}
+
+	if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK)
+	{
+		V3StoreA(linVel0, desc.bodyA->linearVelocity);
+		V3StoreA(angVel0, desc.bodyA->angularState);
+	}
+	else
+		PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, linImpulse0, angImpulse0);
+
+	if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK)
+	{
+		V3StoreA(linVel1, desc.bodyB->linearVelocity);
+		V3StoreA(angVel1, desc.bodyB->angularState);
+	}
+	else
+		PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, linImpulse1, angImpulse1);
+
+	PX_ASSERT(currPtr == last);
+}
+
+
+void solveExtContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 0; a < constraintCount; ++a)
+	{
+		solveExtContact(desc[a], cache);
+	}
+}
+
+void solveExtContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 0; a < constraintCount; ++a)
+	{
+		solveExtContact(desc[a], cache);
+		concludeContact(desc[a], cache);
+	}
+}
+
+void solveExtContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 0; a < constraintCount; ++a)
+	{
+		PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+		PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+
+		solveExtContact(desc[a], cache);
+		writeBackContact(desc[a], cache, bd0, bd1);
+	}
+	if(cache.mThresholdStreamIndex > 0)
+	{
+		//Not enough space to write 4 more thresholds back!
+		//Write back to global buffer
+		PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex);
+		for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a)
+		{
+			cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a];
+		}
+		cache.mThresholdStreamIndex = 0;
+	}
+}
+
+void solveExt1DBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 0; a < constraintCount; ++a)
+	{
+		solveExt1D(desc[a], cache);
+	}
+}
+
+void solveExt1DConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 0; a < constraintCount; ++a)
+	{
+		solveExt1D(desc[a], cache);
+		conclude1D(desc[a], cache);
+	}
+}
+
+void solveExt1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 0; a < constraintCount; ++a)
+	{
+		PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+		PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+		solveExt1D(desc[a], cache);
+		writeBack1D(desc[a], cache, bd0, bd1);
+	}
+}
+
+void ext1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache)
+{
+	for(PxU32 a = 0; a < constraintCount; ++a)
+	{
+		PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex];
+		PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex];
+		writeBack1D(desc[a], cache, bd0, bd1);
+	}
+}
+
+void solveConcludeExtContact		(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	solveExtContact(desc, cache);
+	concludeContact(desc, cache);
+}
+
+void solveConcludeExt1D				(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	solveExt1D(desc, cache);
+	conclude1D(desc, cache);
+}
+
+
+void solveConclude1D(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	solve1D(desc, cache);
+	conclude1D(desc, cache);
+}
+
+void solveConcludeContact			(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	solveContact(desc, cache);
+	concludeContact(desc, cache);
+}
+
+void solveConcludeContact_BStatic	(const PxSolverConstraintDesc& desc, SolverContext& cache)
+{
+	solveContact_BStatic(desc, cache);
+	concludeContact(desc, cache);
+}
+
+
+}
+
+}
+
+#endif
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip