Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
1 files changed, 1344 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
new file mode 100644
index 00000000..ea9ccb8d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
@@ -0,0 +1,1344 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxMath.h"
+#include "foundation/PxMemory.h"
+#include "foundation/PxProfiler.h"
+
+#include "PsUtilities.h"
+#include "CmSpatialVector.h"
+#include "DyArticulationHelper.h"
+#include "DyArticulationReference.h"
+#include "DyArticulationFnsSimd.h"
+#include "DyArticulationFnsScalar.h"
+#include "DyArticulationFnsDebug.h"
+#include "DySolverConstraintDesc.h"
+#include "PxvDynamics.h"
+#include "DyArticulation.h"
+#include "PxcRigidBody.h"
+#include "CmConeLimitHelper.h"
+#include "DySolverConstraint1D.h"
+#include "PxcConstraintBlockStream.h"
+#include "DySolverConstraint1D.h"
+#include "DyArticulationPImpl.h"
+#include "PsFoundation.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+void PxcFsFlushVelocity(FsData& matrix);
+
+// we pass this around by value so that when we return from a function the size is unaltered. That means we don't preserve state
+// across functions - even though that could be handy to preserve baseInertia and jointTransforms across the solver so that if we 
+// need to run position projection  positions they don't get recomputed.
+
+struct PxcFsScratchAllocator
+{
+	char*   base;
+	size_t	size;
+	size_t	taken;
+	PxcFsScratchAllocator(char* p, size_t s): base(p), size(s), taken(0) {}
+
+	template<typename T>
+	static size_t sizeof16()
+	{
+		return (sizeof(T)+15)&~15;
+	}
+
+	template<class T> T* alloc(PxU32 count)
+	{
+		size_t s = sizeof16<T>();
+		PX_ASSERT(taken+s*count <= size);
+		T* result = reinterpret_cast<T*>(base+taken);
+		taken+=s*count;
+		return result;
+	}
+};
+
+void PxcLtbFactor(FsData& m)
+{
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+	LtbRow* rows = getLtbRows(m);
+
+	for(PxU32 i=m.linkCount; --i>0;)
+	{
+		LtbRow& b = rows[i];
+		PxU32 p = m.parent[i];
+		const FsInertia inertia = Fns::invertInertia(b.inertia);
+		const Mat33V jResponse = Fns::invertSym33(M33Neg(Fns::computeSIS(inertia, b.j1, b.j1)));
+		b.inertia = inertia;
+		rows[p].inertia = Fns::multiplySubtract(rows[p].inertia, jResponse, b.j0, b.j0);
+		b.jResponse = jResponse;
+
+	}
+	rows[0].inertia = Fns::invertInertia(rows[0].inertia);
+}
+
+void PxcLtbSolve(const FsData& m, 
+				 Vec3V* b,					// rhs error to solve for
+				 Cm::SpatialVectorV* y)		// velocity delta output
+{
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	const LtbRow* rows = getLtbRows(m);
+	PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVectorV));
+
+	for(PxU32 i=m.linkCount;i-->1;)
+	{
+		const LtbRow& r = rows[i];
+		const PxU32 p = m.parent[i];
+
+		const Vec3V t = V3Sub(b[i], Fns::axisDot(r.j1, y[i]));
+		b[i] = t;
+		y[p] = Fns::subtract(y[p], Fns::axisMultiply(r.j0, t));
+	}
+
+	y[0] = Fns::multiply(rows[0].inertia, y[0]);
+
+	for(PxU32 i=1; i<m.linkCount; i++)
+	{
+		const LtbRow& r = rows[i];
+		const PxU32 p = m.parent[i];
+
+		const Vec3V t = V3Sub(M33MulV3(r.jResponse, b[i]), Fns::axisDot(r.j0, y[p]));
+		y[i] = Fns::subtract(Fns::multiply(r.inertia, y[i]), Fns::axisMultiply(r.j1, t));
+	}
+}
+
+void PxcLtbProject(const FsData& m,
+				   Cm::SpatialVectorV* velocity,
+				   Vec3V* b)
+{
+	PX_ASSERT(m.linkCount<=DY_ARTICULATION_MAX_SIZE);
+	Cm::SpatialVectorV y[DY_ARTICULATION_MAX_SIZE];
+
+	PxcLtbSolve(m, b, y);
+
+	for(PxU32 i=0;i<m.linkCount;i++)
+		velocity[i] -= y[i];
+}
+
+void PxcFsPropagateDrivenInertiaSimd(FsData& matrix,
+									 const FsInertia* baseInertia,
+									 const PxReal* isf,
+									 const Mat33V* load,
+									 PxcFsScratchAllocator allocator)
+{
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	Cm::SpatialVectorV IS[3];
+
+	FsRow* rows = getFsRows(matrix);
+	const FsRowAux* aux = getAux(matrix);
+	const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+	FsInertia* inertia = allocator.alloc<FsInertia>(matrix.linkCount);
+	PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(FsInertia));
+
+	for(PxU32 i=matrix.linkCount; --i>0;)
+	{
+		FsRow& r = rows[i];
+		const FsRowAux& a = aux[i];
+		const FsJointVectors& jv = jointVectors[i];
+
+		const Mat33V m = Fns::computeSIS(inertia[i], a.S, IS);
+		const FloatV f = FLoad(isf[i]);
+
+		const Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0),
+										   V3ScaleAdd(load[i].col1, f, m.col1),
+										   V3ScaleAdd(load[i].col2, f, m.col2)));
+		r.D = D;
+
+		inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]], 
+													Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D,  IS,  r.DSI)));
+	}
+
+	getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]);
+}
+
+PX_FORCE_INLINE Cm::SpatialVectorV propagateDrivenImpulse(const FsRow& row, 
+														  const FsJointVectors& jv,
+														  Vec3V& SZMinusQ, 
+														  const Cm::SpatialVectorV& Z,
+														  const Vec3V& Q)
+{
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	SZMinusQ = V3Sub(V3Add(Z.angular, V3Cross(Z.linear,jv.jointOffset)), Q);
+	Cm::SpatialVectorV result = Fns::translateForce(jv.parentOffset, Z - Fns::axisMultiply(row.DSI, SZMinusQ));
+
+	return result;
+}
+
+void PxcFsApplyJointDrives(FsData& matrix,
+						   const Vec3V* Q)
+{				
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+
+	const FsRow* rows = getFsRows(matrix);
+	const FsRowAux* aux = getAux(matrix);
+	const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+	Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE];
+	Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE];
+	Vec3V SZminusQ[DY_ARTICULATION_MAX_SIZE];
+
+	PxMemZero(Z, matrix.linkCount*sizeof(Cm::SpatialVectorV));
+
+	for(PxU32 i=matrix.linkCount;i-->1;)
+		Z[matrix.parent[i]] += propagateDrivenImpulse(rows[i], jointVectors[i], SZminusQ[i], Z[i], Q[i]);
+
+	
+	dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]);
+
+	for(PxU32 i=1;i<matrix.linkCount;i++)
+		dV[i] = Fns::propagateVelocity(rows[i], jointVectors[i], SZminusQ[i], dV[matrix.parent[i]], aux[i]);
+
+	Cm::SpatialVectorV* V = getVelocity(matrix);
+	for(PxU32 i=0;i<matrix.linkCount;i++)
+		V[i] += dV[i];
+}
+
+void ArticulationHelper::applyImpulses(	const FsData& matrix,
+										Cm::SpatialVectorV* Z,
+										Cm::SpatialVectorV* V)
+{	
+	// note: Z is the negated impulse
+
+
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+	const FsRow* rows = getFsRows(matrix);
+	const FsRowAux* aux = getAux(matrix);
+	const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+	Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE];
+	Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+	for(PxU32 i=matrix.linkCount;i-->1;)
+		Z[matrix.parent[i]] += Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z[i], aux[i]);
+
+	dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]);
+
+	for(PxU32 i=1;i<matrix.linkCount;i++)
+		dV[i] = Fns::propagateVelocity(rows[i], jointVectors[i], SZ[i], dV[matrix.parent[i]], aux[i]);
+
+	for(PxU32 i=0;i<matrix.linkCount;i++)
+		V[i] += dV[i];
+}
+
+void getImpulseResponseSlow(const FsData& matrix, 
+							PxU32 linkID0, 
+							const Cm::SpatialVectorV& impulse0,
+							Cm::SpatialVectorV& deltaV0,
+							PxU32 linkID1,
+							const Cm::SpatialVectorV& impulse1,
+							Cm::SpatialVectorV& deltaV1)
+{
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	const FsRow* rows = getFsRows(matrix);
+	const FsRowAux* aux = getAux(matrix);
+	const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+	PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+	PxU32 stack[DY_ARTICULATION_MAX_SIZE];
+	Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+	PxU32 i0, i1, ic;
+	
+	for(i0 = linkID0, i1 = linkID1; i0!=i1;)	// find common path
+	{
+		if(i0<i1)
+			i1 = matrix.parent[i1];
+		else
+			i0 = matrix.parent[i0];
+	}
+
+	PxU32 common = i0;
+
+	Cm::SpatialVectorV Z0 = -impulse0, Z1 = -impulse1;
+	for(i0 = 0; linkID0!=common; linkID0 = matrix.parent[linkID0])
+	{
+		Z0 = Fns::propagateImpulse(rows[linkID0], jointVectors[linkID0], SZ[linkID0], Z0, aux[linkID0]);
+		stack[i0++] = linkID0;
+	}
+
+	for(i1 = i0; linkID1!=common; linkID1 = matrix.parent[linkID1])
+	{
+		Z1 = Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ[linkID1], Z1, aux[linkID1]);
+		stack[i1++] = linkID1;
+	}
+
+	Cm::SpatialVectorV Z = Z0 + Z1;
+	for(ic = i1; common; common = matrix.parent[common])
+	{
+		Z = Fns::propagateImpulse(rows[common], jointVectors[common], SZ[common], Z, aux[common]);
+		stack[ic++] = common;
+	}
+
+	Cm::SpatialVectorV v = Fns::multiply(getRootInverseInertia(matrix), -Z);
+
+	for(PxU32 index = ic; index-->i1 ;)
+		v = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], v, aux[stack[index]]);
+
+	deltaV1 = v;
+	for(PxU32 index = i1; index-->i0 ;)
+		deltaV1 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV1, aux[stack[index]]);
+
+	deltaV0 = v;
+	for(PxU32 index = i0; index-->0;)
+		deltaV0 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV0, aux[stack[index]]);
+}
+
+void PxcFsGetImpulseResponse(const FsData& matrix,
+							 PxU32 linkID,
+							 const Cm::SpatialVectorV& impulse,
+							 Cm::SpatialVectorV& deltaV)
+{
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+	Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+	const FsRow* rows = getFsRows(matrix);
+	const FsRowAux* aux = getAux(matrix);
+	const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+	Cm::SpatialVectorV Z = -impulse;
+	
+	for(PxU32 i = linkID; i; i = matrix.parent[i])
+		Z = Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z, aux[i]);
+
+	deltaV = Fns::multiply(getRootInverseInertia(matrix), -Z);
+
+	PX_ASSERT(rows[linkID].pathToRoot&1);
+
+	for(ArticulationBitField i=rows[linkID].pathToRoot-1; i; i &= (i-1))
+	{
+		const PxU32 index = ArticulationLowestSetBit(i);
+		deltaV = Fns::propagateVelocity(rows[index], jointVectors[index], SZ[index], deltaV, aux[index]);
+	}
+}
+
+void PxcFsGetImpulseSelfResponse(const FsData& matrix, 
+								 PxU32 linkID0, 
+								 const Cm::SpatialVectorV& impulse0,
+								 Cm::SpatialVectorV& deltaV0,
+								 PxU32 linkID1,
+								 const Cm::SpatialVectorV& impulse1,
+								 Cm::SpatialVectorV& deltaV1)
+{
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	PX_ASSERT(linkID0 != linkID1);
+
+	const FsRow* rows = getFsRows(matrix);
+	const FsRowAux* aux = getAux(matrix);
+	const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+	// standard case: parent-child limit
+	if(matrix.parent[linkID1] == linkID0)
+	{
+		Vec3V SZ;
+		const Cm::SpatialVectorV Z = impulse0 - Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ, -impulse1, aux[linkID1]);
+		PxcFsGetImpulseResponse(matrix, linkID0, Z, deltaV0);
+		deltaV1 = Fns::propagateVelocity(rows[linkID1], jointVectors[linkID1], SZ, deltaV0, aux[linkID1]);
+	}
+	else
+		getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+	Cm::SpatialVector V[DY_ARTICULATION_MAX_SIZE];
+	for(PxU32 i=0;i<matrix.linkCount;i++) V[i] = Cm::SpatialVector::zero();
+	ArticulationRef::applyImpulse(matrix,V,linkID0, reinterpret_cast<const Cm::SpatialVector&>(impulse0));
+	ArticulationRef::applyImpulse(matrix,V,linkID1, reinterpret_cast<const Cm::SpatialVector&>(impulse1));
+
+	Cm::SpatialVector refV0 = V[linkID0];
+	Cm::SpatialVector refV1 = V[linkID1];
+#endif
+}
+
+namespace
+{
+
+	PX_FORCE_INLINE Cm::SpatialVectorV getImpulseResponseSimd(const FsData& matrix, PxU32 linkID, Vec3V lZ, Vec3V aZ)
+	{
+		PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+		Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+		PxU32 indices[DY_ARTICULATION_MAX_SIZE], iCount = 0;
+
+		const FsRow*PX_RESTRICT rows = getFsRows(matrix);
+		const FsRowAux*PX_RESTRICT aux = getAux(matrix);
+		const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+		PX_UNUSED(aux);
+		PX_ASSERT(rows[linkID].pathToRoot&1);
+
+		lZ = V3Neg(lZ);
+		aZ = V3Neg(aZ);
+
+		for(PxU32 i = linkID; i; i = matrix.parent[i])
+		{
+			const FsRow& r = rows[i];
+			const FsJointVectors& j = jointVectors[i];
+
+			Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
+			SZ[iCount] = sz;
+			
+			lZ = V3NegScaleSub(r.DSI[0].linear, V3GetX(sz), V3NegScaleSub(r.DSI[1].linear, V3GetY(sz), V3NegScaleSub(r.DSI[2].linear, V3GetZ(sz), lZ)));
+			aZ = V3NegScaleSub(r.DSI[0].angular, V3GetX(sz), V3NegScaleSub(r.DSI[1].angular, V3GetY(sz), V3NegScaleSub(r.DSI[2].angular, V3GetZ(sz), aZ)));
+
+			aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));
+			indices[iCount++] = i;
+		}
+
+		const FsInertia& I = getRootInverseInertia(matrix);
+
+		Vec3V lV = V3Neg(V3Add(M33MulV3(I.ll, lZ), M33MulV3(I.la, aZ)));
+		Vec3V aV = V3Neg(V3Add(M33TrnspsMulV3(I.la, lZ), M33MulV3(I.aa, aZ)));
+
+		while(iCount)
+		{
+			PxU32 i = indices[--iCount];
+			const FsRow& r = rows[i];
+			const FsJointVectors& j = jointVectors[i];
+
+			lV = V3Sub(lV, V3Cross(j.parentOffset, aV));
+
+			Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV),  V3Dot(r.DSI[1].linear, lV),  V3Dot(r.DSI[2].linear, lV)),
+							V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));
+
+			n = V3Add(n, M33MulV3(r.D, SZ[iCount]));
+			lV = V3Sub(lV, V3Cross(j.jointOffset, n));
+			aV = V3Sub(aV, n);
+		}
+
+		return Cm::SpatialVectorV(lV, aV);
+	}
+}
+					
+void ArticulationHelper::getImpulseResponse(const FsData& matrix,
+											PxU32 linkID,
+											const Cm::SpatialVectorV& impulse,
+											Cm::SpatialVectorV& deltaV)
+{
+	PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+
+	deltaV = getImpulseResponseSimd(matrix, linkID, impulse.linear, impulse.angular);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+	Cm::SpatialVectorV deltaV_;
+	PxcFsGetImpulseResponse(matrix, linkID, impulse, deltaV_);
+	PX_ASSERT(almostEqual(deltaV_, deltaV,1e-3f));
+#endif
+}
+
+void ArticulationHelper::getImpulseSelfResponse(const FsData& matrix,
+												PxU32 linkID0,
+												const Cm::SpatialVectorV& impulse0,
+												Cm::SpatialVectorV& deltaV0,
+												PxU32 linkID1,
+												const Cm::SpatialVectorV& impulse1,
+												Cm::SpatialVectorV& deltaV1)
+{
+	PX_ASSERT(linkID0 != linkID1);
+
+	const FsRow* rows = getFsRows(matrix);
+	const FsRowAux* aux = getAux(matrix);
+	const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+	PX_UNUSED(aux);
+
+	Cm::SpatialVectorV& dV0 = deltaV0, 
+				  & dV1 = deltaV1;
+
+	// standard case: parent-child limit
+	if(matrix.parent[linkID1] == linkID0)
+	{
+		const FsRow& r = rows[linkID1];
+		const FsJointVectors& j = jointVectors[linkID1];
+
+		Vec3V lZ = V3Neg(impulse1.linear),
+			  aZ = V3Neg(impulse1.angular);
+
+		Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
+		
+		lZ = V3Sub(lZ, V3ScaleAdd(r.DSI[0].linear, V3GetX(sz), V3ScaleAdd(r.DSI[1].linear, V3GetY(sz), V3Scale(r.DSI[2].linear, V3GetZ(sz)))));
+		aZ = V3Sub(aZ, V3ScaleAdd(r.DSI[0].angular, V3GetX(sz), V3ScaleAdd(r.DSI[1].angular, V3GetY(sz), V3Scale(r.DSI[2].angular, V3GetZ(sz)))));
+
+		aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));
+
+		lZ = V3Sub(impulse0.linear, lZ);
+		aZ = V3Sub(impulse0.angular, aZ);
+
+		dV0 = getImpulseResponseSimd(matrix, linkID0, lZ, aZ);
+
+		Vec3V aV = dV0.angular;
+		Vec3V lV = V3Sub(dV0.linear, V3Cross(j.parentOffset, aV));
+
+		Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV),  V3Dot(r.DSI[1].linear, lV),  V3Dot(r.DSI[2].linear, lV)),
+						V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));
+
+		n = V3Add(n, M33MulV3(r.D, sz));
+		lV = V3Sub(lV, V3Cross(j.jointOffset, n));
+		aV = V3Sub(aV, n);
+
+		dV1 = Cm::SpatialVectorV(lV, aV);
+	}
+	else
+		getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+	Cm::SpatialVectorV dV0_, dV1_;
+	PxcFsGetImpulseSelfResponse(matrix, linkID0, impulse0, dV0_, linkID1, impulse1, dV1_);
+
+	PX_ASSERT(almostEqual(dV0_, dV0, 1e-3f));
+	PX_ASSERT(almostEqual(dV1_, dV1, 1e-3f));
+#endif
+}
+
+void PxcLtbComputeJv(Vec3V* jv, const FsData& m, const Cm::SpatialVectorV* velocity)
+{
+	const LtbRow* rows = getLtbRows(m);
+	const FsRow* fsRows = getFsRows(m);
+	const FsJointVectors* jointVectors = getJointVectors(m);
+
+	PX_UNUSED(rows);
+	PX_UNUSED(fsRows);
+
+	for(PxU32 i=1;i<m.linkCount;i++)
+	{
+		Cm::SpatialVectorV pv = velocity[m.parent[i]], v = velocity[i];
+
+		Vec3V parentOffset = V3Add(jointVectors[i].jointOffset, jointVectors[i].parentOffset);
+
+		Vec3V k0v = V3Add(pv.linear, V3Cross(pv.angular, parentOffset)),
+			  k1v = V3Add(v.linear,  V3Cross(v.angular,jointVectors[i].jointOffset));
+		jv[i] = V3Sub(k0v, k1v);
+	}
+}
+
+void ArticulationHelper::saveVelocity(const ArticulationSolverDesc& d)
+{
+	Vec3V b[DY_ARTICULATION_MAX_SIZE];
+	FsData& m = *d.fsData;
+
+	Cm::SpatialVectorV* velocity = getVelocity(m);
+	PxcFsFlushVelocity(m);
+
+	// save off the motion velocity
+
+	for(PxU32 i=0;i<m.linkCount;i++)
+	{
+		d.motionVelocity[i] = velocity[i];
+		PX_ASSERT(isFiniteVec3V(velocity[i].linear));
+		PX_ASSERT(isFiniteVec3V(velocity[i].angular));
+	}
+
+	// and now re-solve to use the unbiased velocities
+
+	PxcLtbComputeJv(b, m, velocity);
+	PxcLtbProject(m, velocity, b);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+	for(PxU32 i=0;i<m.linkCount;i++)
+		getRefVelocity(m)[i] = velocity[i];
+#endif
+}
+
+void PxcFsComputeJointLoadsSimd(const FsData& matrix,
+								const FsInertia*PX_RESTRICT baseInertia,
+								Mat33V*PX_RESTRICT load,
+								const PxReal*PX_RESTRICT isf_,
+								PxU32 linkCount,
+								PxU32 maxIterations,
+								PxcFsScratchAllocator allocator)
+{
+	// dsequeira: this is really difficult to optimize on XBox: not inlining generates lots of LHSs, 
+	// inlining generates lots of cache misses because the fn is so huge (almost 2000 instrs.) 
+	// Timing says even for 1 iteration the cache misses are slighly preferable for a
+	// 20-bone articulation, for more iters it's *much* better to take the cache misses.
+	//
+	// about 400 instructions come from unnecessary and inexplicable branch checks
+
+	if(!maxIterations)
+		return;
+
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	FloatV isf[DY_ARTICULATION_MAX_SIZE];
+
+	for(PxU32 i=1;i<linkCount;i++)
+		isf[i] = FLoad(isf_[i]);
+
+	FsInertia*PX_RESTRICT inertia = allocator.alloc<FsInertia>(linkCount);
+	FsInertia*PX_RESTRICT contribToParent = allocator.alloc<FsInertia>(linkCount);
+
+	const FsRow*PX_RESTRICT row = getFsRows(matrix); 
+	const FsRowAux*PX_RESTRICT aux = getAux(matrix);
+	const FsJointVectors* jointVectors = getJointVectors(matrix);
+	
+	PX_UNUSED(row);
+
+	// gets rid of about 200 LHSs, need to change the matrix format to make this part of it
+	PxU64 parent[DY_ARTICULATION_MAX_SIZE];
+	for(PxU32 i=0;i<linkCount;i++)
+		parent[i] = matrix.parent[i];
+
+	while(maxIterations--)
+	{
+		PxMemCopy(inertia, baseInertia, sizeof(FsInertia)*linkCount);
+			
+		for(PxU32 i=linkCount;i-->1;)
+		{
+			const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S;
+
+			Ps::prefetch(&load[i-1]);
+			Ps::prefetch(&jointVectors[i-1]);
+			const FsInertia tmp = Fns::propagate(inertia[i], S, load[i], isf[i]);
+			inertia[parent[i]] = Fns::addInertia(inertia[parent[i]], Fns::translateInertia(jointVectors[i].parentOffset, tmp));
+			contribToParent[i] = tmp;
+		}
+
+		for(PxU32 i=1;i<linkCount;i++)
+		{
+			const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S;
+
+			const FsInertia rootwardInertia = Fns::subtractInertia(Fns::translateInertia(V3Neg(jointVectors[i].parentOffset), inertia[parent[i]]), contribToParent[i]);
+			const FsInertia tmp = Fns::propagate(rootwardInertia, S, load[i], isf[i]);
+			load[i] = Fns::computeDriveInertia(inertia[i], rootwardInertia, S);
+			inertia[i] = Fns::addInertia(inertia[i], tmp);
+		}
+	}
+}
+
+PxU32 ArticulationHelper::getFsDataSize(PxU32 linkCount)
+{
+	return sizeof(FsInertia) + sizeof(FsRow) * linkCount;
+}
+
+PxU32 ArticulationHelper::getLtbDataSize(PxU32 linkCount)
+{
+	return sizeof(LtbRow) * linkCount;
+}
+
+void ArticulationHelper::prepareDataBlock(	FsData& fsData,
+										    const ArticulationLink* links, 
+											PxU16 linkCount,
+											PxTransform* poses,
+										 	FsInertia* baseInertia,
+											ArticulationJointTransforms* jointTransforms,
+											PxU32 expectedSize)
+{
+	PxU32 stateSize = sizeof(FsData)
+					+ sizeof(Cm::SpatialVectorV) * linkCount
+					+ sizeof(Cm::SpatialVectorV) * linkCount
+					+ sizeof(Vec3V)			 * linkCount
+					+ sizeof(PxReal)		 * ((linkCount + 15) & 0xfffffff0);
+
+	PxU32 jointVectorSize = sizeof(FsJointVectors) * linkCount;
+
+	PxU32 fsDataSize  = getFsDataSize(linkCount);
+	PxU32 ltbDataSize = getLtbDataSize(linkCount);
+
+	PxU32 totalSize	= stateSize 
+					+ jointVectorSize
+					+ fsDataSize 
+					+ ltbDataSize
+					+ sizeof(Cm::SpatialVectorV) * linkCount
+					+ sizeof(FsRowAux)    * linkCount;
+
+	PX_UNUSED(totalSize);
+	PX_UNUSED(expectedSize);
+	PX_ASSERT(expectedSize == 0 || totalSize == expectedSize);
+
+	PxMemZero(&fsData, stateSize);
+	fsData.jointVectorOffset	= PxU16(stateSize);
+	fsData.fsDataOffset			= PxU16(stateSize+jointVectorSize);
+	fsData.ltbDataOffset		= PxU16(stateSize+jointVectorSize+fsDataSize);
+	fsData.linkCount			= linkCount;
+
+	for(PxU32 i=1;i<linkCount;i++)
+		fsData.parent[i] = PxU8(links[i].parent);
+	fsData.deferredZ = Cm::SpatialVectorV(PxZero);
+
+	Cm::SpatialVector* velocity = reinterpret_cast<Cm::SpatialVector*>(getVelocity(fsData));
+
+	PxMemZero(baseInertia, sizeof(FsInertia)*linkCount);
+
+	PxReal* maxPenBias = getMaxPenBias(fsData);
+
+	for(PxU32 i=0;i<linkCount;i++)
+	{
+		if((i+2)<linkCount)
+		{
+			Ps::prefetch(links[i+2].bodyCore);
+			Ps::prefetch(links[i+2].inboundJoint);
+		}
+		PxsBodyCore& core = *links[i].bodyCore;
+		poses[i] = core.body2World;
+		velocity[i] = Cm::SpatialVector(core.linearVelocity, core.angularVelocity);
+		setInertia(baseInertia[i], core, core.body2World);
+		maxPenBias[i] = core.maxPenBias;
+
+		if(i)
+			setJointTransforms(jointTransforms[i], poses[links[i].parent], core.body2World, *links[i].inboundJoint);
+	}
+
+	FsJointVectors* jointVectors = getJointVectors(fsData);
+	for(PxU32 i=1;i<linkCount;i++)
+	{
+		PX_ALIGN(16, PxVec3) parentOffset = poses[i].p - poses[fsData.parent[i]].p;
+		PX_ALIGN(16, PxVec3) jointOffset = jointTransforms[i].cB2w.p - poses[i].p;
+		jointVectors[i].parentOffset = V3LoadA(parentOffset);
+		jointVectors[i].jointOffset = V3LoadA(jointOffset);
+	}
+}
+
+PxU32 ArticulationHelper::computeUnconstrainedVelocities(	const ArticulationSolverDesc& desc,
+															PxReal dt,
+															PxcConstraintBlockStream& stream,
+															PxSolverConstraintDesc* constraintDesc,
+															PxU32& acCount,
+															PxsConstraintBlockManager& constraintBlockManager,
+															const PxVec3& gravity, PxU64 contextID)
+{
+	PX_UNUSED(contextID);
+	const ArticulationLink* links = desc.links;
+	PxU16 linkCount = desc.linkCount;
+	FsData& fsData = *desc.fsData;
+	PxTransform* poses = desc.poses;
+
+	PxcFsScratchAllocator allocator(desc.scratchMemory, desc.scratchMemorySize);
+	FsInertia*						PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(desc.linkCount);
+	ArticulationJointTransforms*	PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(desc.linkCount);
+
+	{
+		PX_PROFILE_ZONE("Articulations.prepareDataBlock", contextID);
+		prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, desc.totalDataSize);
+	}
+
+	const PxReal recipDt = 1.0f/dt;
+
+	Cm::SpatialVectorV* velocity = getVelocity(fsData);
+
+	{
+
+		PX_PROFILE_ZONE("Articulations.setupProject", contextID);
+
+		PxMemZero(getLtbRows(fsData), getLtbDataSize(linkCount));
+		prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt);
+
+		PxcLtbFactor(fsData);
+	
+		Vec3V b[DY_ARTICULATION_MAX_SIZE];
+		PxcLtbComputeJv(b, fsData, velocity);
+
+		LtbRow* rows = getLtbRows(fsData);
+		for(PxU32 i=1;i<linkCount;i++)
+			b[i] = V3Add(b[i], rows[i].jC);
+
+		PxcLtbProject(fsData, velocity, b);
+	}
+
+	{
+		PX_PROFILE_ZONE("Articulations.prepareFsData", contextID);
+		PxMemZero(addAddr<void*>(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount));
+		prepareFsData(fsData, links);
+	}
+
+	{
+		PX_PROFILE_ZONE("Articulations.setupDrives", contextID);
+	
+		if(!(desc.core->externalDriveIterations & 0x80000000))
+			PxMemZero(desc.externalLoads, sizeof(Mat33V) * linkCount);
+
+		if(!(desc.core->internalDriveIterations & 0x80000000))
+			PxMemZero(desc.internalLoads, sizeof(Mat33V) * linkCount);
+
+		PxReal				isf[DY_ARTICULATION_MAX_SIZE], esf[DY_ARTICULATION_MAX_SIZE];			// spring factors
+		Vec3V				drive[DY_ARTICULATION_MAX_SIZE];
+
+		bool externalEqualsInternalCompliance = (desc.core->internalDriveIterations&0xffff) == (desc.core->externalDriveIterations&0xffff);
+		for(PxU32 i=1;i<linkCount;i++)
+		{
+			const ArticulationJointCore& j = *links[i].inboundJoint;
+			isf[i] = (1 + j.damping * dt + j.spring * dt * dt) * getResistance(j.internalCompliance);
+			esf[i] = (1 + j.damping * dt + j.spring * dt * dt) * getResistance(j.externalCompliance);
+
+			externalEqualsInternalCompliance = externalEqualsInternalCompliance && j.internalCompliance == j.externalCompliance;
+		}
+
+		{
+			PX_PROFILE_ZONE("Articulations.jointInternalLoads", contextID);
+			PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.internalLoads, isf, linkCount, desc.core->internalDriveIterations&0xffff, allocator);
+			
+		}
+
+		{
+			PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID);
+			PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, isf, desc.internalLoads, allocator);
+		}
+
+		{
+			PX_PROFILE_ZONE("Articulations.computeJointDrives", contextID);
+			computeJointDrives(fsData, drive, links, poses, jointTransforms, desc.internalLoads, dt);
+		}
+
+		{
+			PX_PROFILE_ZONE("Articulations.applyJointDrives", contextID);
+			PxcFsApplyJointDrives(fsData, drive);
+		}
+
+		if(!externalEqualsInternalCompliance)
+		{
+			{
+				PX_PROFILE_ZONE("Articulations.jointExternalLoads", contextID);
+				PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.externalLoads, esf, linkCount, desc.core->externalDriveIterations&0xffff, allocator);
+			}
+
+			{
+				PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID);
+				PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, esf, desc.externalLoads, allocator);
+			}
+		}
+	}
+
+	{
+		PX_PROFILE_ZONE("Articulations.applyExternalImpulses", contextID);
+		Cm::SpatialVectorV	Z[DY_ARTICULATION_MAX_SIZE];
+
+		FloatV h = FLoad(dt);
+
+		const Cm::SpatialVector* acceleration = desc.acceleration;
+
+		const Vec3V vGravity = V3LoadU(gravity);
+
+		for(PxU32 i=0;i<linkCount;i++)
+		{
+			Vec3V linearAccel = V3LoadA(acceleration[i].linear);
+
+			if (!(desc.links[i].body->mInternalFlags & PxcRigidBody::eDISABLE_GRAVITY))
+				linearAccel = V3Add(linearAccel, vGravity);
+			Cm::SpatialVectorV a(linearAccel, V3LoadA(acceleration[i].angular));
+			Z[i] = -ArticulationFnsSimd<ArticulationFnsSimdBase>::multiply(baseInertia[i], a) * h;
+		}
+
+		applyImpulses(fsData, Z, getVelocity(fsData));
+	}
+
+	// save off the motion velocity in case there are no constraints with the articulation
+
+	PxMemCopy(desc.motionVelocity, velocity, linkCount*sizeof(Cm::SpatialVectorV));
+
+	// set up for deferred-update solve
+	
+	fsData.dirty = 0;
+
+	// solver progress counters
+	fsData.maxSolverNormalProgress		= 0;
+	fsData.maxSolverFrictionProgress	= 0;
+	fsData.solverProgress				= 0;
+
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+	for(PxU32 i=0;i<linkCount;i++)
+		getRefVelocity(fsData)[i] = getVelocity(fsData)[i];
+#endif
+
+	{
+		PX_PROFILE_ZONE("Articulations.setupConstraints", contextID);
+		return setupSolverConstraints(fsData, desc.solverDataSize, stream, constraintDesc, links, jointTransforms, dt, acCount, constraintBlockManager);
+	}
+}
+
+void ArticulationHelper::initializeDriveCache(	FsData& fsData,
+												PxU16 linkCount,
+												const ArticulationLink* links,
+												PxReal compliance,
+												PxU32 iterations,
+												char* scratchMemory,
+												PxU32 scratchMemorySize)
+{
+	PxcFsScratchAllocator allocator(scratchMemory, scratchMemorySize);
+	FsInertia*						PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(linkCount);
+	ArticulationJointTransforms*	PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(linkCount);
+	PxTransform*					PX_RESTRICT poses = allocator.alloc<PxTransform>(linkCount);
+	Mat33V*							PX_RESTRICT jointLoads = allocator.alloc<Mat33V>(linkCount);
+
+	PxReal								springFactor[DY_ARTICULATION_MAX_SIZE];			// spring factors
+
+	prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, 0);
+
+	PxMemZero(addAddr<void*>(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount));
+	prepareFsData(fsData, links);
+
+	springFactor[0] = 0.0f;
+	for(PxU32 i=1;i<linkCount;i++)
+		springFactor[i] = getResistance(compliance);
+
+	PxMemZero(jointLoads, sizeof(Mat33V)*linkCount);
+	PxcFsComputeJointLoadsSimd(fsData, baseInertia, jointLoads, springFactor, linkCount, iterations&0xffff, allocator);
+	PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, springFactor, jointLoads, allocator);
+}
+
+void ArticulationHelper::updateBodies(const ArticulationSolverDesc& desc, PxReal dt)
+{
+	FsData& fsData = *desc.fsData;
+	const ArticulationCore& core = *desc.core;
+	const ArticulationLink* links = desc.links;
+	PxTransform* poses = desc.poses;
+	Cm::SpatialVectorV* motionVelocity = desc.motionVelocity;
+
+	Vec3V b[DY_ARTICULATION_MAX_SIZE];
+	
+	PxU32 linkCount = fsData.linkCount;
+
+	PxcFsFlushVelocity(fsData);
+	PxcLtbComputeJv(b, fsData, getVelocity(fsData));
+	PxcLtbProject(fsData, getVelocity(fsData), b);
+
+	// update positions
+	PxcFsScratchAllocator allocator(desc.scratchMemory, desc.scratchMemorySize);
+	PxTransform*		PX_RESTRICT oldPose = allocator.alloc<PxTransform>(desc.linkCount);
+
+	for(PxU32 i=0;i<linkCount;i++)
+	{
+		const PxVec3& lv = reinterpret_cast<PxVec3&>(motionVelocity[i].linear);
+		const PxVec3& av = reinterpret_cast<PxVec3&>(motionVelocity[i].angular);
+		oldPose[i] = poses[i];
+		poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q);
+	}
+
+	bool projected = false;
+	const PxReal recipDt = 1.0f/dt;
+
+	FsInertia*						PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(desc.linkCount);
+	ArticulationJointTransforms*	PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(desc.linkCount);
+
+	for(PxU32 iterations = 0; iterations < core.maxProjectionIterations; iterations++)
+	{
+		PxReal maxSeparation = -PX_MAX_F32;
+		for(PxU32 i=1;i<linkCount;i++)
+		{
+			const ArticulationJointCore& j = *links[i].inboundJoint;
+			maxSeparation = PxMax(maxSeparation,
+								  (poses[links[i].parent].transform(j.parentPose).p -
+								   poses[i].transform(j.childPose).p).magnitude());
+		}
+
+		if(maxSeparation<=core.separationTolerance)
+			break;
+
+		projected = true;
+
+		// we go around again, finding velocities which pull us back together - this
+		// form of projection is momentum-preserving but slow compared to hierarchical
+		// projection
+
+		PxMemZero(baseInertia, sizeof(FsInertia)*linkCount);
+
+		ArticulationHelper::setInertia(baseInertia[0], *links[0].bodyCore, poses[0]);
+		for(PxU32 i=1;i<linkCount;i++)
+		{
+			ArticulationHelper::setInertia(baseInertia[i], *links[i].bodyCore, poses[i]);
+			ArticulationHelper::setJointTransforms(jointTransforms[i], poses[links[i].parent], poses[i], *links[i].inboundJoint);
+		}
+
+		ArticulationHelper::prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt);
+		PxcLtbFactor(fsData);
+
+		LtbRow* rows = getLtbRows(fsData);
+
+		for(PxU32 i=1;i<linkCount;i++)
+			b[i] = rows[i].jC;
+
+		PxMemZero(motionVelocity, linkCount*sizeof(Cm::SpatialVectorV));
+
+		PxcLtbProject(fsData, motionVelocity, b);
+
+		for(PxU32 i=0;i<linkCount;i++)
+		{
+			const PxVec3& lv = reinterpret_cast<PxVec3&>(motionVelocity[i].linear);
+			const PxVec3& av = reinterpret_cast<PxVec3&>(motionVelocity[i].angular);
+			poses[i] = PxTransform(poses[i].p + lv * dt,  Ps::exp(av*dt) * poses[i].q);
+		}
+	}
+
+	if(projected)
+	{
+		// recompute motion velocities.
+		for(PxU32 i=0;i<linkCount;i++)
+		{
+			motionVelocity[i].linear = V3LoadU((poses[i].p - oldPose[i].p) * recipDt);
+			motionVelocity[i].angular = V3LoadU(Ps::log(poses[i].q * oldPose[i].q.getConjugate()) * recipDt);
+		}
+	}
+
+	Cm::SpatialVectorV* velocity = getVelocity(fsData);
+	for(PxU32 i=0;i<linkCount;i++)
+	{
+		links[i].bodyCore->body2World = poses[i];
+
+		V3StoreA(velocity[i].linear,  links[i].bodyCore->linearVelocity);
+		V3StoreA(velocity[i].angular, links[i].bodyCore->angularVelocity);
+	}
+}
+
+void ArticulationHelper::setInertia(FsInertia& inertia,
+									const PxsBodyCore& body,
+									const PxTransform& pose)
+{
+	// assumes that elements that are supposed to be zero (i.e. la matrix and off diagonal elements of ll) are zero
+
+	const PxMat33 R(pose.q);
+	const PxVec3& v = body.inverseInertia;
+	const PxReal m = 1.0f/body.inverseMass;
+	V3WriteX(inertia.ll.col0, m);
+	V3WriteY(inertia.ll.col1, m);
+	V3WriteZ(inertia.ll.col2, m);
+
+	PX_ALIGN_PREFIX(16) PxMat33 PX_ALIGN_SUFFIX(16) alignedInertia = R * PxMat33::createDiagonal(PxVec3(1.0f/v.x, 1.0f/v.y, 1.0f/v.z)) * R.getTranspose();
+	alignedInertia = (alignedInertia + alignedInertia.getTranspose())*0.5f;
+	inertia.aa = Mat33V_From_PxMat33(alignedInertia);
+}
+
+void ArticulationHelper::setJointTransforms(ArticulationJointTransforms& transforms,
+											const PxTransform& parentPose,
+											const PxTransform& childPose,
+											const ArticulationJointCore& joint)
+{
+	transforms.cA2w = parentPose.transform(joint.parentPose);
+	transforms.cB2w = childPose.transform(joint.childPose);
+	transforms.cB2cA = transforms.cA2w.transformInv(transforms.cB2w);
+	if(transforms.cB2cA.q.w<0)	// the relative quat must be the short way round for limits to work...
+	{
+		transforms.cB2cA.q	= -transforms.cB2cA.q;
+		transforms.cB2w.q	= -transforms.cB2w.q;
+	}
+}
+
+void ArticulationHelper::prepareLtbMatrix(	FsData& fsData,
+											const FsInertia* baseInertia,
+											const PxTransform* poses,
+											const ArticulationJointTransforms* jointTransforms,
+											PxReal recipDt)
+{
+	PxU32 linkCount = fsData.linkCount;
+	LtbRow* rows = getLtbRows(fsData);
+
+	rows[0].inertia = baseInertia[0];
+
+	const PxVec3 axis[3] = { PxVec3(1.0f,0.0f,0.0f), PxVec3(0.0f,1.0f,0.0f), PxVec3(0.0f,0.0f,1.0f) };
+	for(PxU32 i=1;i<linkCount;i++)
+	{
+		rows[i].inertia = baseInertia[i];
+		const ArticulationJointTransforms& s = jointTransforms[i];
+
+		const PxU32 p = fsData.parent[i];
+
+		// we put the action point of the constraint at the root of the child
+
+		const PxVec3 ra = s.cB2w.p - poses[p].p;
+		const PxVec3 rb = s.cB2w.p - poses[i].p;
+
+		// A bit different from the 1D solver, 
+		// there we use a formulation	j0.v0 - j1.v1 + c = 0
+		// here we use the homogeneous	j0.v0 + j1.v1 + c = 0
+		
+		const PxVec3 error = (s.cA2w.p - s.cB2w.p) * 0.99f;
+
+		Cm::SpatialVectorV* j0 = rows[i].j0;
+		Cm::SpatialVectorV* j1 = rows[i].j1;
+
+		for(PxU32 j=0;j<3;j++)
+		{
+			PxVec3 n = axis[j];
+			j0[j] = Cm::SpatialVector(n, ra.cross(n));
+			j1[j] = Cm::SpatialVector(-n, -rb.cross(n));
+		}
+
+		rows[i].jC = V3LoadU(error*recipDt);
+	}
+}
+
+void ArticulationHelper::prepareFsData(FsData& fsData, const ArticulationLink* links)
+{
+	typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+	PxU32 linkCount = fsData.linkCount;
+	FsRow* rows = getFsRows(fsData);
+	FsRowAux* aux = getAux(fsData);
+	const FsJointVectors* jointVectors = getJointVectors(fsData);
+
+	rows[0].children = links[0].children;
+	rows[0].pathToRoot = 1;
+
+	PX_ALIGN_PREFIX(16) PxVec4 v[] PX_ALIGN_SUFFIX(16) = { PxVec4(1.f,0,0,0), PxVec4(0,1.f,0,0), PxVec4(0,0,1.f,0) } ;
+	const Vec3V* axes = reinterpret_cast<const Vec3V*>(v);
+
+	for(PxU32 i=1;i<linkCount;i++)
+	{
+		PxU32 p = links[i].parent;
+		FsRow& r = rows[i];
+		FsRowAux& a = aux[i];
+
+		PX_UNUSED(p);
+
+		r.children = links[i].children;
+		r.pathToRoot = links[i].pathToRoot;
+		
+		const Vec3V jointOffset =	jointVectors[i].jointOffset;
+		
+		// the joint coords are world oriented, located at the joint.
+		a.S[0] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[0]));
+		a.S[1] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[1]));
+		a.S[2] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[2]));
+	}
+}
+
+PX_FORCE_INLINE PxReal ArticulationHelper::getResistance(PxReal compliance)
+{
+	PX_ASSERT(compliance>0);
+	return 1.0f/compliance;
+}
+
+void ArticulationHelper::createHardLimit(	const FsData& fsData,
+											const ArticulationLink* links,
+											PxU32 linkIndex,
+											SolverConstraint1DExt& s, 
+											const PxVec3& axis, 
+											PxReal err,
+											PxReal recipDt)
+{
+	init(s, PxVec3(0), PxVec3(0), axis, axis, 0, PX_MAX_F32);
+
+	ArticulationHelper::getImpulseSelfResponse(fsData, 
+												  links[linkIndex].parent,Cm::SpatialVector(PxVec3(0), axis), s.deltaVA,
+												  linkIndex, Cm::SpatialVector(PxVec3(0), -axis), s.deltaVB);
+
+	const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular));
+	if(unitResponse<0.0f)
+		Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, joint limit ignored");
+
+	const PxReal recipResponse = unitResponse>0.0f ? 1.0f/unitResponse : 0.0f;
+
+	s.constant = recipResponse * -err * recipDt;
+	s.unbiasedConstant = err>0.0f ? s.constant : 0.0f;
+	s.velMultiplier = -recipResponse;
+	s.impulseMultiplier = 1.0f;
+}
+
+void ArticulationHelper::createTangentialSpring(const FsData& fsData,
+												const ArticulationLink* links,
+												PxU32 linkIndex,
+												SolverConstraint1DExt& s, 
+												const PxVec3& axis, 
+												PxReal stiffness,
+												PxReal damping,
+												PxReal dt)
+{
+	init(s, PxVec3(0), PxVec3(0), axis, axis, -PX_MAX_F32, PX_MAX_F32);
+
+	Cm::SpatialVector axis6(PxVec3(0), axis);
+	PxU32 parent = links[linkIndex].parent;
+	getImpulseSelfResponse(fsData, parent, axis6, s.deltaVA, linkIndex, -axis6, s.deltaVB);
+
+	const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular));
+	if(unitResponse<0.0f)
+		Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, tangential spring ignored");
+	const PxReal recipResponse = unitResponse>0.0F ? 1.0f/unitResponse : 0.0f;
+
+	// this is a specialization of the spring code in setSolverConstants() for acceleration springs.
+	// general case is  b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError);
+    // but geomError and velocityTarget are both zero
+
+	const PxReal a = dt * dt * stiffness + dt * damping;
+    const PxReal x = 1.0f/(1.0f+a);
+    s.constant = s.unbiasedConstant = 0.0f;
+    s.velMultiplier = -x * recipResponse * a;
+    s.impulseMultiplier = 1.0f - x;
+}
+
+PxU32 ArticulationHelper::setupSolverConstraints(	FsData& fsData, PxU32 solverDataSize,
+													PxcConstraintBlockStream& stream,
+													PxSolverConstraintDesc* constraintDesc,
+													const ArticulationLink* links,
+													const ArticulationJointTransforms* jointTransforms,
+													PxReal dt,
+													PxU32& acCount,
+													PxsConstraintBlockManager& constraintBlockManager)
+{
+	acCount = 0;
+
+	const PxU16 linkCount = fsData.linkCount;
+	PxU32 descCount = 0;
+	const PxReal recipDt = 1.0f/dt;
+
+	const PxConstraintInvMassScale ims(1.0f, 1.0f, 1.0f, 1.0f);
+
+	for(PxU16 i=1;i<linkCount;i++)
+	{
+		const ArticulationJointCore& j = *links[i].inboundJoint;
+
+		if(i+1<linkCount)
+		{
+			Ps::prefetch(links[i+1].inboundJoint, sizeof (ArticulationJointCore));
+			Ps::prefetch(&jointTransforms[i+1], sizeof(ArticulationJointTransforms));
+		}
+		
+		if(!(j.twistLimited || j.swingLimited))
+			continue;
+
+		PxQuat swing, twist;
+		Ps::separateSwingTwist(jointTransforms[i].cB2cA.q, swing, twist);
+	
+		Cm::ConeLimitHelper eh(j.tanQSwingY, j.tanQSwingZ, j.tanQSwingPad);
+		PxVec3 swingLimitAxis;
+		PxReal swingLimitError = 0.0f;
+
+		const bool swingLimited = j.swingLimited && eh.getLimit(swing, swingLimitAxis, swingLimitError);
+		const bool tangentialStiffness = swingLimited && (j.tangentialStiffness>0 || j.tangentialDamping>0);
+
+		const PxVec3 twistAxis = jointTransforms[i].cB2w.rotate(PxVec3(1.0f,0,0));
+		const PxReal tqTwistAngle = Ps::tanHalf(twist.x, twist.w);
+
+		const bool twistLowerLimited = j.twistLimited && tqTwistAngle < Cm::tanAdd(j.tanQTwistLow, j.tanQTwistPad);
+		const bool twistUpperLimited = j.twistLimited && tqTwistAngle > Cm::tanAdd(j.tanQTwistHigh, -j.tanQTwistPad);
+	
+		const PxU8 constraintCount = PxU8(swingLimited + tangentialStiffness + twistUpperLimited + twistLowerLimited);
+		if(!constraintCount)
+			continue;
+
+		PxSolverConstraintDesc& desc = constraintDesc[descCount++];
+
+		desc.articulationA = &fsData;
+		desc.linkIndexA = Ps::to16(links[i].parent);
+		desc.articulationALength = Ps::to16(solverDataSize);
+
+		desc.articulationB = &fsData;
+		desc.linkIndexB = i;
+		desc.articulationBLength = Ps::to16(solverDataSize);
+
+		const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + 
+								 sizeof(SolverConstraint1DExt) * constraintCount;
+
+		PX_ASSERT(0==(constraintLength & 0x0f));
+		desc.constraintLengthOver16 = Ps::to16(constraintLength/16);
+		
+		desc.constraint = stream.reserve(constraintLength + 16u, constraintBlockManager);
+
+		desc.writeBack = NULL;
+		
+		SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+		SolverConstraint1DExt* constraints = reinterpret_cast<SolverConstraint1DExt*>(desc.constraint + sizeof(SolverConstraint1DHeader));
+
+		init(*header, constraintCount, true, ims);
+
+		PxU32 cIndex = 0;
+
+		if(swingLimited)
+		{
+			const PxVec3 normal = jointTransforms[i].cA2w.rotate(swingLimitAxis);
+			createHardLimit(fsData, links, i, constraints[cIndex++], normal, swingLimitError, recipDt);
+			if(tangentialStiffness)
+			{
+				const PxVec3 tangent = twistAxis.cross(normal).getNormalized();
+				createTangentialSpring(fsData, links, i, constraints[cIndex++], tangent, j.tangentialStiffness, j.tangentialDamping, dt);
+			}
+		}
+
+		if(twistUpperLimited)
+			createHardLimit(fsData, links, i, constraints[cIndex++], twistAxis, (j.tanQTwistHigh - tqTwistAngle)*4, recipDt);
+
+		if(twistLowerLimited)
+			createHardLimit(fsData, links, i, constraints[cIndex++], -twistAxis, -(j.tanQTwistLow - tqTwistAngle)*4, recipDt);
+
+		*(desc.constraint + getConstraintLength(desc)) = 0;
+
+		PX_ASSERT(cIndex == constraintCount);
+		acCount += constraintCount;
+	}
+
+	return descCount;
+}
+
+void ArticulationHelper::computeJointDrives(FsData& fsData,
+											Vec3V* drives, 
+											const ArticulationLink* links,
+											const PxTransform* poses, 
+											const ArticulationJointTransforms* transforms, 
+											const Mat33V* loads, 
+											PxReal dt)
+{
+	typedef ArticulationFnsScalar Fns;
+
+	const PxU32 linkCount = fsData.linkCount;
+	const Cm::SpatialVector* velocity = reinterpret_cast<const Cm::SpatialVector*>(getVelocity(fsData));
+
+	for(PxU32 i=1; i<linkCount;i++)
+	{
+		PxU32 parent = links[i].parent;
+		const ArticulationJointTransforms& b = transforms[i];
+		const ArticulationJointCore& j = *links[i].inboundJoint;
+
+		const Cm::SpatialVector currentVel = Fns::translateMotion(poses[i].p - b.cA2w.p, velocity[i])
+											 - Fns::translateMotion(poses[parent].p - b.cA2w.p, velocity[parent]);
+
+		// we want the quat such that q * cB2cA = targetPosition
+		PxVec3 rotVec;
+		if(j.driveType == PxU8(PxArticulationJointDriveType::eTARGET))
+			rotVec = Ps::log(j.targetPosition * b.cB2cA.q.getConjugate()); // as a rotation vector
+		else
+			rotVec = j.targetPosition.getImaginaryPart();
+
+		// NM's Tests indicate behavior is better without the term commented out below, even though
+		// an implicit spring derivation suggests it should be there.
+
+		const PxVec3 posError = b.cA2w.rotate(rotVec); // - currentVel.angular * 0.5f * dt
+		const PxVec3 velError = b.cA2w.rotate(j.targetVelocity) - currentVel.angular;
+
+		drives[i] = M33MulV3(loads[i], V3LoadU((j.spring * posError + j.damping * velError) * dt * getResistance(j.internalCompliance)));
+	}
+}
+
+ArticulationPImpl::ComputeUnconstrainedVelocitiesFn ArticulationPImpl::sComputeUnconstrainedVelocities = NULL;
+ArticulationPImpl::UpdateBodiesFn ArticulationPImpl::sUpdateBodies = NULL;
+ArticulationPImpl::SaveVelocityFn ArticulationPImpl::sSaveVelocity = NULL;
+
+}
+}
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip