aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp')
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp1344
1 files changed, 1344 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
new file mode 100644
index 00000000..ea9ccb8d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp
@@ -0,0 +1,1344 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxMath.h"
+#include "foundation/PxMemory.h"
+#include "foundation/PxProfiler.h"
+
+#include "PsUtilities.h"
+#include "CmSpatialVector.h"
+#include "DyArticulationHelper.h"
+#include "DyArticulationReference.h"
+#include "DyArticulationFnsSimd.h"
+#include "DyArticulationFnsScalar.h"
+#include "DyArticulationFnsDebug.h"
+#include "DySolverConstraintDesc.h"
+#include "PxvDynamics.h"
+#include "DyArticulation.h"
+#include "PxcRigidBody.h"
+#include "CmConeLimitHelper.h"
+#include "DySolverConstraint1D.h"
+#include "PxcConstraintBlockStream.h"
+#include "DySolverConstraint1D.h"
+#include "DyArticulationPImpl.h"
+#include "PsFoundation.h"
+
+namespace physx
+{
+
+namespace Dy
+{
+
+void PxcFsFlushVelocity(FsData& matrix);
+
+// we pass this around by value so that when we return from a function the size is unaltered. That means we don't preserve state
+// across functions - even though that could be handy to preserve baseInertia and jointTransforms across the solver so that if we
+// need to run position projection positions they don't get recomputed.
+
+struct PxcFsScratchAllocator
+{
+ char* base;
+ size_t size;
+ size_t taken;
+ PxcFsScratchAllocator(char* p, size_t s): base(p), size(s), taken(0) {}
+
+ template<typename T>
+ static size_t sizeof16()
+ {
+ return (sizeof(T)+15)&~15;
+ }
+
+ template<class T> T* alloc(PxU32 count)
+ {
+ size_t s = sizeof16<T>();
+ PX_ASSERT(taken+s*count <= size);
+ T* result = reinterpret_cast<T*>(base+taken);
+ taken+=s*count;
+ return result;
+ }
+};
+
+void PxcLtbFactor(FsData& m)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+ LtbRow* rows = getLtbRows(m);
+
+ for(PxU32 i=m.linkCount; --i>0;)
+ {
+ LtbRow& b = rows[i];
+ PxU32 p = m.parent[i];
+ const FsInertia inertia = Fns::invertInertia(b.inertia);
+ const Mat33V jResponse = Fns::invertSym33(M33Neg(Fns::computeSIS(inertia, b.j1, b.j1)));
+ b.inertia = inertia;
+ rows[p].inertia = Fns::multiplySubtract(rows[p].inertia, jResponse, b.j0, b.j0);
+ b.jResponse = jResponse;
+
+ }
+ rows[0].inertia = Fns::invertInertia(rows[0].inertia);
+}
+
+void PxcLtbSolve(const FsData& m,
+ Vec3V* b, // rhs error to solve for
+ Cm::SpatialVectorV* y) // velocity delta output
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ const LtbRow* rows = getLtbRows(m);
+ PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVectorV));
+
+ for(PxU32 i=m.linkCount;i-->1;)
+ {
+ const LtbRow& r = rows[i];
+ const PxU32 p = m.parent[i];
+
+ const Vec3V t = V3Sub(b[i], Fns::axisDot(r.j1, y[i]));
+ b[i] = t;
+ y[p] = Fns::subtract(y[p], Fns::axisMultiply(r.j0, t));
+ }
+
+ y[0] = Fns::multiply(rows[0].inertia, y[0]);
+
+ for(PxU32 i=1; i<m.linkCount; i++)
+ {
+ const LtbRow& r = rows[i];
+ const PxU32 p = m.parent[i];
+
+ const Vec3V t = V3Sub(M33MulV3(r.jResponse, b[i]), Fns::axisDot(r.j0, y[p]));
+ y[i] = Fns::subtract(Fns::multiply(r.inertia, y[i]), Fns::axisMultiply(r.j1, t));
+ }
+}
+
+void PxcLtbProject(const FsData& m,
+ Cm::SpatialVectorV* velocity,
+ Vec3V* b)
+{
+ PX_ASSERT(m.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ Cm::SpatialVectorV y[DY_ARTICULATION_MAX_SIZE];
+
+ PxcLtbSolve(m, b, y);
+
+ for(PxU32 i=0;i<m.linkCount;i++)
+ velocity[i] -= y[i];
+}
+
+void PxcFsPropagateDrivenInertiaSimd(FsData& matrix,
+ const FsInertia* baseInertia,
+ const PxReal* isf,
+ const Mat33V* load,
+ PxcFsScratchAllocator allocator)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ Cm::SpatialVectorV IS[3];
+
+ FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ FsInertia* inertia = allocator.alloc<FsInertia>(matrix.linkCount);
+ PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(FsInertia));
+
+ for(PxU32 i=matrix.linkCount; --i>0;)
+ {
+ FsRow& r = rows[i];
+ const FsRowAux& a = aux[i];
+ const FsJointVectors& jv = jointVectors[i];
+
+ const Mat33V m = Fns::computeSIS(inertia[i], a.S, IS);
+ const FloatV f = FLoad(isf[i]);
+
+ const Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0),
+ V3ScaleAdd(load[i].col1, f, m.col1),
+ V3ScaleAdd(load[i].col2, f, m.col2)));
+ r.D = D;
+
+ inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]],
+ Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D, IS, r.DSI)));
+ }
+
+ getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]);
+}
+
+PX_FORCE_INLINE Cm::SpatialVectorV propagateDrivenImpulse(const FsRow& row,
+ const FsJointVectors& jv,
+ Vec3V& SZMinusQ,
+ const Cm::SpatialVectorV& Z,
+ const Vec3V& Q)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ SZMinusQ = V3Sub(V3Add(Z.angular, V3Cross(Z.linear,jv.jointOffset)), Q);
+ Cm::SpatialVectorV result = Fns::translateForce(jv.parentOffset, Z - Fns::axisMultiply(row.DSI, SZMinusQ));
+
+ return result;
+}
+
+void PxcFsApplyJointDrives(FsData& matrix,
+ const Vec3V* Q)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE];
+ Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE];
+ Vec3V SZminusQ[DY_ARTICULATION_MAX_SIZE];
+
+ PxMemZero(Z, matrix.linkCount*sizeof(Cm::SpatialVectorV));
+
+ for(PxU32 i=matrix.linkCount;i-->1;)
+ Z[matrix.parent[i]] += propagateDrivenImpulse(rows[i], jointVectors[i], SZminusQ[i], Z[i], Q[i]);
+
+
+ dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]);
+
+ for(PxU32 i=1;i<matrix.linkCount;i++)
+ dV[i] = Fns::propagateVelocity(rows[i], jointVectors[i], SZminusQ[i], dV[matrix.parent[i]], aux[i]);
+
+ Cm::SpatialVectorV* V = getVelocity(matrix);
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ V[i] += dV[i];
+}
+
+void ArticulationHelper::applyImpulses( const FsData& matrix,
+ Cm::SpatialVectorV* Z,
+ Cm::SpatialVectorV* V)
+{
+ // note: Z is the negated impulse
+
+
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE];
+ Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+ for(PxU32 i=matrix.linkCount;i-->1;)
+ Z[matrix.parent[i]] += Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z[i], aux[i]);
+
+ dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]);
+
+ for(PxU32 i=1;i<matrix.linkCount;i++)
+ dV[i] = Fns::propagateVelocity(rows[i], jointVectors[i], SZ[i], dV[matrix.parent[i]], aux[i]);
+
+ for(PxU32 i=0;i<matrix.linkCount;i++)
+ V[i] += dV[i];
+}
+
+void getImpulseResponseSlow(const FsData& matrix,
+ PxU32 linkID0,
+ const Cm::SpatialVectorV& impulse0,
+ Cm::SpatialVectorV& deltaV0,
+ PxU32 linkID1,
+ const Cm::SpatialVectorV& impulse1,
+ Cm::SpatialVectorV& deltaV1)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ PxU32 stack[DY_ARTICULATION_MAX_SIZE];
+ Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+ PxU32 i0, i1, ic;
+
+ for(i0 = linkID0, i1 = linkID1; i0!=i1;) // find common path
+ {
+ if(i0<i1)
+ i1 = matrix.parent[i1];
+ else
+ i0 = matrix.parent[i0];
+ }
+
+ PxU32 common = i0;
+
+ Cm::SpatialVectorV Z0 = -impulse0, Z1 = -impulse1;
+ for(i0 = 0; linkID0!=common; linkID0 = matrix.parent[linkID0])
+ {
+ Z0 = Fns::propagateImpulse(rows[linkID0], jointVectors[linkID0], SZ[linkID0], Z0, aux[linkID0]);
+ stack[i0++] = linkID0;
+ }
+
+ for(i1 = i0; linkID1!=common; linkID1 = matrix.parent[linkID1])
+ {
+ Z1 = Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ[linkID1], Z1, aux[linkID1]);
+ stack[i1++] = linkID1;
+ }
+
+ Cm::SpatialVectorV Z = Z0 + Z1;
+ for(ic = i1; common; common = matrix.parent[common])
+ {
+ Z = Fns::propagateImpulse(rows[common], jointVectors[common], SZ[common], Z, aux[common]);
+ stack[ic++] = common;
+ }
+
+ Cm::SpatialVectorV v = Fns::multiply(getRootInverseInertia(matrix), -Z);
+
+ for(PxU32 index = ic; index-->i1 ;)
+ v = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], v, aux[stack[index]]);
+
+ deltaV1 = v;
+ for(PxU32 index = i1; index-->i0 ;)
+ deltaV1 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV1, aux[stack[index]]);
+
+ deltaV0 = v;
+ for(PxU32 index = i0; index-->0;)
+ deltaV0 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV0, aux[stack[index]]);
+}
+
+void PxcFsGetImpulseResponse(const FsData& matrix,
+ PxU32 linkID,
+ const Cm::SpatialVectorV& impulse,
+ Cm::SpatialVectorV& deltaV)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ Cm::SpatialVectorV Z = -impulse;
+
+ for(PxU32 i = linkID; i; i = matrix.parent[i])
+ Z = Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z, aux[i]);
+
+ deltaV = Fns::multiply(getRootInverseInertia(matrix), -Z);
+
+ PX_ASSERT(rows[linkID].pathToRoot&1);
+
+ for(ArticulationBitField i=rows[linkID].pathToRoot-1; i; i &= (i-1))
+ {
+ const PxU32 index = ArticulationLowestSetBit(i);
+ deltaV = Fns::propagateVelocity(rows[index], jointVectors[index], SZ[index], deltaV, aux[index]);
+ }
+}
+
+void PxcFsGetImpulseSelfResponse(const FsData& matrix,
+ PxU32 linkID0,
+ const Cm::SpatialVectorV& impulse0,
+ Cm::SpatialVectorV& deltaV0,
+ PxU32 linkID1,
+ const Cm::SpatialVectorV& impulse1,
+ Cm::SpatialVectorV& deltaV1)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PX_ASSERT(linkID0 != linkID1);
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ // standard case: parent-child limit
+ if(matrix.parent[linkID1] == linkID0)
+ {
+ Vec3V SZ;
+ const Cm::SpatialVectorV Z = impulse0 - Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ, -impulse1, aux[linkID1]);
+ PxcFsGetImpulseResponse(matrix, linkID0, Z, deltaV0);
+ deltaV1 = Fns::propagateVelocity(rows[linkID1], jointVectors[linkID1], SZ, deltaV0, aux[linkID1]);
+ }
+ else
+ getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVector V[DY_ARTICULATION_MAX_SIZE];
+ for(PxU32 i=0;i<matrix.linkCount;i++) V[i] = Cm::SpatialVector::zero();
+ ArticulationRef::applyImpulse(matrix,V,linkID0, reinterpret_cast<const Cm::SpatialVector&>(impulse0));
+ ArticulationRef::applyImpulse(matrix,V,linkID1, reinterpret_cast<const Cm::SpatialVector&>(impulse1));
+
+ Cm::SpatialVector refV0 = V[linkID0];
+ Cm::SpatialVector refV1 = V[linkID1];
+#endif
+}
+
+namespace
+{
+
+ PX_FORCE_INLINE Cm::SpatialVectorV getImpulseResponseSimd(const FsData& matrix, PxU32 linkID, Vec3V lZ, Vec3V aZ)
+ {
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+ Vec3V SZ[DY_ARTICULATION_MAX_SIZE];
+ PxU32 indices[DY_ARTICULATION_MAX_SIZE], iCount = 0;
+
+ const FsRow*PX_RESTRICT rows = getFsRows(matrix);
+ const FsRowAux*PX_RESTRICT aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_UNUSED(aux);
+ PX_ASSERT(rows[linkID].pathToRoot&1);
+
+ lZ = V3Neg(lZ);
+ aZ = V3Neg(aZ);
+
+ for(PxU32 i = linkID; i; i = matrix.parent[i])
+ {
+ const FsRow& r = rows[i];
+ const FsJointVectors& j = jointVectors[i];
+
+ Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
+ SZ[iCount] = sz;
+
+ lZ = V3NegScaleSub(r.DSI[0].linear, V3GetX(sz), V3NegScaleSub(r.DSI[1].linear, V3GetY(sz), V3NegScaleSub(r.DSI[2].linear, V3GetZ(sz), lZ)));
+ aZ = V3NegScaleSub(r.DSI[0].angular, V3GetX(sz), V3NegScaleSub(r.DSI[1].angular, V3GetY(sz), V3NegScaleSub(r.DSI[2].angular, V3GetZ(sz), aZ)));
+
+ aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));
+ indices[iCount++] = i;
+ }
+
+ const FsInertia& I = getRootInverseInertia(matrix);
+
+ Vec3V lV = V3Neg(V3Add(M33MulV3(I.ll, lZ), M33MulV3(I.la, aZ)));
+ Vec3V aV = V3Neg(V3Add(M33TrnspsMulV3(I.la, lZ), M33MulV3(I.aa, aZ)));
+
+ while(iCount)
+ {
+ PxU32 i = indices[--iCount];
+ const FsRow& r = rows[i];
+ const FsJointVectors& j = jointVectors[i];
+
+ lV = V3Sub(lV, V3Cross(j.parentOffset, aV));
+
+ Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)),
+ V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));
+
+ n = V3Add(n, M33MulV3(r.D, SZ[iCount]));
+ lV = V3Sub(lV, V3Cross(j.jointOffset, n));
+ aV = V3Sub(aV, n);
+ }
+
+ return Cm::SpatialVectorV(lV, aV);
+ }
+}
+
+void ArticulationHelper::getImpulseResponse(const FsData& matrix,
+ PxU32 linkID,
+ const Cm::SpatialVectorV& impulse,
+ Cm::SpatialVectorV& deltaV)
+{
+ PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE);
+
+ deltaV = getImpulseResponseSimd(matrix, linkID, impulse.linear, impulse.angular);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVectorV deltaV_;
+ PxcFsGetImpulseResponse(matrix, linkID, impulse, deltaV_);
+ PX_ASSERT(almostEqual(deltaV_, deltaV,1e-3f));
+#endif
+}
+
+void ArticulationHelper::getImpulseSelfResponse(const FsData& matrix,
+ PxU32 linkID0,
+ const Cm::SpatialVectorV& impulse0,
+ Cm::SpatialVectorV& deltaV0,
+ PxU32 linkID1,
+ const Cm::SpatialVectorV& impulse1,
+ Cm::SpatialVectorV& deltaV1)
+{
+ PX_ASSERT(linkID0 != linkID1);
+
+ const FsRow* rows = getFsRows(matrix);
+ const FsRowAux* aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_UNUSED(aux);
+
+ Cm::SpatialVectorV& dV0 = deltaV0,
+ & dV1 = deltaV1;
+
+ // standard case: parent-child limit
+ if(matrix.parent[linkID1] == linkID0)
+ {
+ const FsRow& r = rows[linkID1];
+ const FsJointVectors& j = jointVectors[linkID1];
+
+ Vec3V lZ = V3Neg(impulse1.linear),
+ aZ = V3Neg(impulse1.angular);
+
+ Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset));
+
+ lZ = V3Sub(lZ, V3ScaleAdd(r.DSI[0].linear, V3GetX(sz), V3ScaleAdd(r.DSI[1].linear, V3GetY(sz), V3Scale(r.DSI[2].linear, V3GetZ(sz)))));
+ aZ = V3Sub(aZ, V3ScaleAdd(r.DSI[0].angular, V3GetX(sz), V3ScaleAdd(r.DSI[1].angular, V3GetY(sz), V3Scale(r.DSI[2].angular, V3GetZ(sz)))));
+
+ aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ));
+
+ lZ = V3Sub(impulse0.linear, lZ);
+ aZ = V3Sub(impulse0.angular, aZ);
+
+ dV0 = getImpulseResponseSimd(matrix, linkID0, lZ, aZ);
+
+ Vec3V aV = dV0.angular;
+ Vec3V lV = V3Sub(dV0.linear, V3Cross(j.parentOffset, aV));
+
+ Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)),
+ V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV)));
+
+ n = V3Add(n, M33MulV3(r.D, sz));
+ lV = V3Sub(lV, V3Cross(j.jointOffset, n));
+ aV = V3Sub(aV, n);
+
+ dV1 = Cm::SpatialVectorV(lV, aV);
+ }
+ else
+ getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ Cm::SpatialVectorV dV0_, dV1_;
+ PxcFsGetImpulseSelfResponse(matrix, linkID0, impulse0, dV0_, linkID1, impulse1, dV1_);
+
+ PX_ASSERT(almostEqual(dV0_, dV0, 1e-3f));
+ PX_ASSERT(almostEqual(dV1_, dV1, 1e-3f));
+#endif
+}
+
+void PxcLtbComputeJv(Vec3V* jv, const FsData& m, const Cm::SpatialVectorV* velocity)
+{
+ const LtbRow* rows = getLtbRows(m);
+ const FsRow* fsRows = getFsRows(m);
+ const FsJointVectors* jointVectors = getJointVectors(m);
+
+ PX_UNUSED(rows);
+ PX_UNUSED(fsRows);
+
+ for(PxU32 i=1;i<m.linkCount;i++)
+ {
+ Cm::SpatialVectorV pv = velocity[m.parent[i]], v = velocity[i];
+
+ Vec3V parentOffset = V3Add(jointVectors[i].jointOffset, jointVectors[i].parentOffset);
+
+ Vec3V k0v = V3Add(pv.linear, V3Cross(pv.angular, parentOffset)),
+ k1v = V3Add(v.linear, V3Cross(v.angular,jointVectors[i].jointOffset));
+ jv[i] = V3Sub(k0v, k1v);
+ }
+}
+
+void ArticulationHelper::saveVelocity(const ArticulationSolverDesc& d)
+{
+ Vec3V b[DY_ARTICULATION_MAX_SIZE];
+ FsData& m = *d.fsData;
+
+ Cm::SpatialVectorV* velocity = getVelocity(m);
+ PxcFsFlushVelocity(m);
+
+ // save off the motion velocity
+
+ for(PxU32 i=0;i<m.linkCount;i++)
+ {
+ d.motionVelocity[i] = velocity[i];
+ PX_ASSERT(isFiniteVec3V(velocity[i].linear));
+ PX_ASSERT(isFiniteVec3V(velocity[i].angular));
+ }
+
+ // and now re-solve to use the unbiased velocities
+
+ PxcLtbComputeJv(b, m, velocity);
+ PxcLtbProject(m, velocity, b);
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ for(PxU32 i=0;i<m.linkCount;i++)
+ getRefVelocity(m)[i] = velocity[i];
+#endif
+}
+
+void PxcFsComputeJointLoadsSimd(const FsData& matrix,
+ const FsInertia*PX_RESTRICT baseInertia,
+ Mat33V*PX_RESTRICT load,
+ const PxReal*PX_RESTRICT isf_,
+ PxU32 linkCount,
+ PxU32 maxIterations,
+ PxcFsScratchAllocator allocator)
+{
+ // dsequeira: this is really difficult to optimize on XBox: not inlining generates lots of LHSs,
+ // inlining generates lots of cache misses because the fn is so huge (almost 2000 instrs.)
+ // Timing says even for 1 iteration the cache misses are slighly preferable for a
+ // 20-bone articulation, for more iters it's *much* better to take the cache misses.
+ //
+ // about 400 instructions come from unnecessary and inexplicable branch checks
+
+ if(!maxIterations)
+ return;
+
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ FloatV isf[DY_ARTICULATION_MAX_SIZE];
+
+ for(PxU32 i=1;i<linkCount;i++)
+ isf[i] = FLoad(isf_[i]);
+
+ FsInertia*PX_RESTRICT inertia = allocator.alloc<FsInertia>(linkCount);
+ FsInertia*PX_RESTRICT contribToParent = allocator.alloc<FsInertia>(linkCount);
+
+ const FsRow*PX_RESTRICT row = getFsRows(matrix);
+ const FsRowAux*PX_RESTRICT aux = getAux(matrix);
+ const FsJointVectors* jointVectors = getJointVectors(matrix);
+
+ PX_UNUSED(row);
+
+ // gets rid of about 200 LHSs, need to change the matrix format to make this part of it
+ PxU64 parent[DY_ARTICULATION_MAX_SIZE];
+ for(PxU32 i=0;i<linkCount;i++)
+ parent[i] = matrix.parent[i];
+
+ while(maxIterations--)
+ {
+ PxMemCopy(inertia, baseInertia, sizeof(FsInertia)*linkCount);
+
+ for(PxU32 i=linkCount;i-->1;)
+ {
+ const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S;
+
+ Ps::prefetch(&load[i-1]);
+ Ps::prefetch(&jointVectors[i-1]);
+ const FsInertia tmp = Fns::propagate(inertia[i], S, load[i], isf[i]);
+ inertia[parent[i]] = Fns::addInertia(inertia[parent[i]], Fns::translateInertia(jointVectors[i].parentOffset, tmp));
+ contribToParent[i] = tmp;
+ }
+
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S;
+
+ const FsInertia rootwardInertia = Fns::subtractInertia(Fns::translateInertia(V3Neg(jointVectors[i].parentOffset), inertia[parent[i]]), contribToParent[i]);
+ const FsInertia tmp = Fns::propagate(rootwardInertia, S, load[i], isf[i]);
+ load[i] = Fns::computeDriveInertia(inertia[i], rootwardInertia, S);
+ inertia[i] = Fns::addInertia(inertia[i], tmp);
+ }
+ }
+}
+
+PxU32 ArticulationHelper::getFsDataSize(PxU32 linkCount)
+{
+ return sizeof(FsInertia) + sizeof(FsRow) * linkCount;
+}
+
+PxU32 ArticulationHelper::getLtbDataSize(PxU32 linkCount)
+{
+ return sizeof(LtbRow) * linkCount;
+}
+
+void ArticulationHelper::prepareDataBlock( FsData& fsData,
+ const ArticulationLink* links,
+ PxU16 linkCount,
+ PxTransform* poses,
+ FsInertia* baseInertia,
+ ArticulationJointTransforms* jointTransforms,
+ PxU32 expectedSize)
+{
+ PxU32 stateSize = sizeof(FsData)
+ + sizeof(Cm::SpatialVectorV) * linkCount
+ + sizeof(Cm::SpatialVectorV) * linkCount
+ + sizeof(Vec3V) * linkCount
+ + sizeof(PxReal) * ((linkCount + 15) & 0xfffffff0);
+
+ PxU32 jointVectorSize = sizeof(FsJointVectors) * linkCount;
+
+ PxU32 fsDataSize = getFsDataSize(linkCount);
+ PxU32 ltbDataSize = getLtbDataSize(linkCount);
+
+ PxU32 totalSize = stateSize
+ + jointVectorSize
+ + fsDataSize
+ + ltbDataSize
+ + sizeof(Cm::SpatialVectorV) * linkCount
+ + sizeof(FsRowAux) * linkCount;
+
+ PX_UNUSED(totalSize);
+ PX_UNUSED(expectedSize);
+ PX_ASSERT(expectedSize == 0 || totalSize == expectedSize);
+
+ PxMemZero(&fsData, stateSize);
+ fsData.jointVectorOffset = PxU16(stateSize);
+ fsData.fsDataOffset = PxU16(stateSize+jointVectorSize);
+ fsData.ltbDataOffset = PxU16(stateSize+jointVectorSize+fsDataSize);
+ fsData.linkCount = linkCount;
+
+ for(PxU32 i=1;i<linkCount;i++)
+ fsData.parent[i] = PxU8(links[i].parent);
+ fsData.deferredZ = Cm::SpatialVectorV(PxZero);
+
+ Cm::SpatialVector* velocity = reinterpret_cast<Cm::SpatialVector*>(getVelocity(fsData));
+
+ PxMemZero(baseInertia, sizeof(FsInertia)*linkCount);
+
+ PxReal* maxPenBias = getMaxPenBias(fsData);
+
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ if((i+2)<linkCount)
+ {
+ Ps::prefetch(links[i+2].bodyCore);
+ Ps::prefetch(links[i+2].inboundJoint);
+ }
+ PxsBodyCore& core = *links[i].bodyCore;
+ poses[i] = core.body2World;
+ velocity[i] = Cm::SpatialVector(core.linearVelocity, core.angularVelocity);
+ setInertia(baseInertia[i], core, core.body2World);
+ maxPenBias[i] = core.maxPenBias;
+
+ if(i)
+ setJointTransforms(jointTransforms[i], poses[links[i].parent], core.body2World, *links[i].inboundJoint);
+ }
+
+ FsJointVectors* jointVectors = getJointVectors(fsData);
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ PX_ALIGN(16, PxVec3) parentOffset = poses[i].p - poses[fsData.parent[i]].p;
+ PX_ALIGN(16, PxVec3) jointOffset = jointTransforms[i].cB2w.p - poses[i].p;
+ jointVectors[i].parentOffset = V3LoadA(parentOffset);
+ jointVectors[i].jointOffset = V3LoadA(jointOffset);
+ }
+}
+
+PxU32 ArticulationHelper::computeUnconstrainedVelocities( const ArticulationSolverDesc& desc,
+ PxReal dt,
+ PxcConstraintBlockStream& stream,
+ PxSolverConstraintDesc* constraintDesc,
+ PxU32& acCount,
+ PxsConstraintBlockManager& constraintBlockManager,
+ const PxVec3& gravity, PxU64 contextID)
+{
+ PX_UNUSED(contextID);
+ const ArticulationLink* links = desc.links;
+ PxU16 linkCount = desc.linkCount;
+ FsData& fsData = *desc.fsData;
+ PxTransform* poses = desc.poses;
+
+ PxcFsScratchAllocator allocator(desc.scratchMemory, desc.scratchMemorySize);
+ FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(desc.linkCount);
+ ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(desc.linkCount);
+
+ {
+ PX_PROFILE_ZONE("Articulations.prepareDataBlock", contextID);
+ prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, desc.totalDataSize);
+ }
+
+ const PxReal recipDt = 1.0f/dt;
+
+ Cm::SpatialVectorV* velocity = getVelocity(fsData);
+
+ {
+
+ PX_PROFILE_ZONE("Articulations.setupProject", contextID);
+
+ PxMemZero(getLtbRows(fsData), getLtbDataSize(linkCount));
+ prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt);
+
+ PxcLtbFactor(fsData);
+
+ Vec3V b[DY_ARTICULATION_MAX_SIZE];
+ PxcLtbComputeJv(b, fsData, velocity);
+
+ LtbRow* rows = getLtbRows(fsData);
+ for(PxU32 i=1;i<linkCount;i++)
+ b[i] = V3Add(b[i], rows[i].jC);
+
+ PxcLtbProject(fsData, velocity, b);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.prepareFsData", contextID);
+ PxMemZero(addAddr<void*>(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount));
+ prepareFsData(fsData, links);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.setupDrives", contextID);
+
+ if(!(desc.core->externalDriveIterations & 0x80000000))
+ PxMemZero(desc.externalLoads, sizeof(Mat33V) * linkCount);
+
+ if(!(desc.core->internalDriveIterations & 0x80000000))
+ PxMemZero(desc.internalLoads, sizeof(Mat33V) * linkCount);
+
+ PxReal isf[DY_ARTICULATION_MAX_SIZE], esf[DY_ARTICULATION_MAX_SIZE]; // spring factors
+ Vec3V drive[DY_ARTICULATION_MAX_SIZE];
+
+ bool externalEqualsInternalCompliance = (desc.core->internalDriveIterations&0xffff) == (desc.core->externalDriveIterations&0xffff);
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ const ArticulationJointCore& j = *links[i].inboundJoint;
+ isf[i] = (1 + j.damping * dt + j.spring * dt * dt) * getResistance(j.internalCompliance);
+ esf[i] = (1 + j.damping * dt + j.spring * dt * dt) * getResistance(j.externalCompliance);
+
+ externalEqualsInternalCompliance = externalEqualsInternalCompliance && j.internalCompliance == j.externalCompliance;
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.jointInternalLoads", contextID);
+ PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.internalLoads, isf, linkCount, desc.core->internalDriveIterations&0xffff, allocator);
+
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID);
+ PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, isf, desc.internalLoads, allocator);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.computeJointDrives", contextID);
+ computeJointDrives(fsData, drive, links, poses, jointTransforms, desc.internalLoads, dt);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.applyJointDrives", contextID);
+ PxcFsApplyJointDrives(fsData, drive);
+ }
+
+ if(!externalEqualsInternalCompliance)
+ {
+ {
+ PX_PROFILE_ZONE("Articulations.jointExternalLoads", contextID);
+ PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.externalLoads, esf, linkCount, desc.core->externalDriveIterations&0xffff, allocator);
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID);
+ PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, esf, desc.externalLoads, allocator);
+ }
+ }
+ }
+
+ {
+ PX_PROFILE_ZONE("Articulations.applyExternalImpulses", contextID);
+ Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE];
+
+ FloatV h = FLoad(dt);
+
+ const Cm::SpatialVector* acceleration = desc.acceleration;
+
+ const Vec3V vGravity = V3LoadU(gravity);
+
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ Vec3V linearAccel = V3LoadA(acceleration[i].linear);
+
+ if (!(desc.links[i].body->mInternalFlags & PxcRigidBody::eDISABLE_GRAVITY))
+ linearAccel = V3Add(linearAccel, vGravity);
+ Cm::SpatialVectorV a(linearAccel, V3LoadA(acceleration[i].angular));
+ Z[i] = -ArticulationFnsSimd<ArticulationFnsSimdBase>::multiply(baseInertia[i], a) * h;
+ }
+
+ applyImpulses(fsData, Z, getVelocity(fsData));
+ }
+
+ // save off the motion velocity in case there are no constraints with the articulation
+
+ PxMemCopy(desc.motionVelocity, velocity, linkCount*sizeof(Cm::SpatialVectorV));
+
+ // set up for deferred-update solve
+
+ fsData.dirty = 0;
+
+ // solver progress counters
+ fsData.maxSolverNormalProgress = 0;
+ fsData.maxSolverFrictionProgress = 0;
+ fsData.solverProgress = 0;
+
+
+#if DY_ARTICULATION_DEBUG_VERIFY
+ for(PxU32 i=0;i<linkCount;i++)
+ getRefVelocity(fsData)[i] = getVelocity(fsData)[i];
+#endif
+
+ {
+ PX_PROFILE_ZONE("Articulations.setupConstraints", contextID);
+ return setupSolverConstraints(fsData, desc.solverDataSize, stream, constraintDesc, links, jointTransforms, dt, acCount, constraintBlockManager);
+ }
+}
+
+void ArticulationHelper::initializeDriveCache( FsData& fsData,
+ PxU16 linkCount,
+ const ArticulationLink* links,
+ PxReal compliance,
+ PxU32 iterations,
+ char* scratchMemory,
+ PxU32 scratchMemorySize)
+{
+ PxcFsScratchAllocator allocator(scratchMemory, scratchMemorySize);
+ FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(linkCount);
+ ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(linkCount);
+ PxTransform* PX_RESTRICT poses = allocator.alloc<PxTransform>(linkCount);
+ Mat33V* PX_RESTRICT jointLoads = allocator.alloc<Mat33V>(linkCount);
+
+ PxReal springFactor[DY_ARTICULATION_MAX_SIZE]; // spring factors
+
+ prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, 0);
+
+ PxMemZero(addAddr<void*>(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount));
+ prepareFsData(fsData, links);
+
+ springFactor[0] = 0.0f;
+ for(PxU32 i=1;i<linkCount;i++)
+ springFactor[i] = getResistance(compliance);
+
+ PxMemZero(jointLoads, sizeof(Mat33V)*linkCount);
+ PxcFsComputeJointLoadsSimd(fsData, baseInertia, jointLoads, springFactor, linkCount, iterations&0xffff, allocator);
+ PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, springFactor, jointLoads, allocator);
+}
+
+void ArticulationHelper::updateBodies(const ArticulationSolverDesc& desc, PxReal dt)
+{
+ FsData& fsData = *desc.fsData;
+ const ArticulationCore& core = *desc.core;
+ const ArticulationLink* links = desc.links;
+ PxTransform* poses = desc.poses;
+ Cm::SpatialVectorV* motionVelocity = desc.motionVelocity;
+
+ Vec3V b[DY_ARTICULATION_MAX_SIZE];
+
+ PxU32 linkCount = fsData.linkCount;
+
+ PxcFsFlushVelocity(fsData);
+ PxcLtbComputeJv(b, fsData, getVelocity(fsData));
+ PxcLtbProject(fsData, getVelocity(fsData), b);
+
+ // update positions
+ PxcFsScratchAllocator allocator(desc.scratchMemory, desc.scratchMemorySize);
+ PxTransform* PX_RESTRICT oldPose = allocator.alloc<PxTransform>(desc.linkCount);
+
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ const PxVec3& lv = reinterpret_cast<PxVec3&>(motionVelocity[i].linear);
+ const PxVec3& av = reinterpret_cast<PxVec3&>(motionVelocity[i].angular);
+ oldPose[i] = poses[i];
+ poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q);
+ }
+
+ bool projected = false;
+ const PxReal recipDt = 1.0f/dt;
+
+ FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(desc.linkCount);
+ ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(desc.linkCount);
+
+ for(PxU32 iterations = 0; iterations < core.maxProjectionIterations; iterations++)
+ {
+ PxReal maxSeparation = -PX_MAX_F32;
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ const ArticulationJointCore& j = *links[i].inboundJoint;
+ maxSeparation = PxMax(maxSeparation,
+ (poses[links[i].parent].transform(j.parentPose).p -
+ poses[i].transform(j.childPose).p).magnitude());
+ }
+
+ if(maxSeparation<=core.separationTolerance)
+ break;
+
+ projected = true;
+
+ // we go around again, finding velocities which pull us back together - this
+ // form of projection is momentum-preserving but slow compared to hierarchical
+ // projection
+
+ PxMemZero(baseInertia, sizeof(FsInertia)*linkCount);
+
+ ArticulationHelper::setInertia(baseInertia[0], *links[0].bodyCore, poses[0]);
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ ArticulationHelper::setInertia(baseInertia[i], *links[i].bodyCore, poses[i]);
+ ArticulationHelper::setJointTransforms(jointTransforms[i], poses[links[i].parent], poses[i], *links[i].inboundJoint);
+ }
+
+ ArticulationHelper::prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt);
+ PxcLtbFactor(fsData);
+
+ LtbRow* rows = getLtbRows(fsData);
+
+ for(PxU32 i=1;i<linkCount;i++)
+ b[i] = rows[i].jC;
+
+ PxMemZero(motionVelocity, linkCount*sizeof(Cm::SpatialVectorV));
+
+ PxcLtbProject(fsData, motionVelocity, b);
+
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ const PxVec3& lv = reinterpret_cast<PxVec3&>(motionVelocity[i].linear);
+ const PxVec3& av = reinterpret_cast<PxVec3&>(motionVelocity[i].angular);
+ poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q);
+ }
+ }
+
+ if(projected)
+ {
+ // recompute motion velocities.
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ motionVelocity[i].linear = V3LoadU((poses[i].p - oldPose[i].p) * recipDt);
+ motionVelocity[i].angular = V3LoadU(Ps::log(poses[i].q * oldPose[i].q.getConjugate()) * recipDt);
+ }
+ }
+
+ Cm::SpatialVectorV* velocity = getVelocity(fsData);
+ for(PxU32 i=0;i<linkCount;i++)
+ {
+ links[i].bodyCore->body2World = poses[i];
+
+ V3StoreA(velocity[i].linear, links[i].bodyCore->linearVelocity);
+ V3StoreA(velocity[i].angular, links[i].bodyCore->angularVelocity);
+ }
+}
+
+void ArticulationHelper::setInertia(FsInertia& inertia,
+ const PxsBodyCore& body,
+ const PxTransform& pose)
+{
+ // assumes that elements that are supposed to be zero (i.e. la matrix and off diagonal elements of ll) are zero
+
+ const PxMat33 R(pose.q);
+ const PxVec3& v = body.inverseInertia;
+ const PxReal m = 1.0f/body.inverseMass;
+ V3WriteX(inertia.ll.col0, m);
+ V3WriteY(inertia.ll.col1, m);
+ V3WriteZ(inertia.ll.col2, m);
+
+ PX_ALIGN_PREFIX(16) PxMat33 PX_ALIGN_SUFFIX(16) alignedInertia = R * PxMat33::createDiagonal(PxVec3(1.0f/v.x, 1.0f/v.y, 1.0f/v.z)) * R.getTranspose();
+ alignedInertia = (alignedInertia + alignedInertia.getTranspose())*0.5f;
+ inertia.aa = Mat33V_From_PxMat33(alignedInertia);
+}
+
+void ArticulationHelper::setJointTransforms(ArticulationJointTransforms& transforms,
+ const PxTransform& parentPose,
+ const PxTransform& childPose,
+ const ArticulationJointCore& joint)
+{
+ transforms.cA2w = parentPose.transform(joint.parentPose);
+ transforms.cB2w = childPose.transform(joint.childPose);
+ transforms.cB2cA = transforms.cA2w.transformInv(transforms.cB2w);
+ if(transforms.cB2cA.q.w<0) // the relative quat must be the short way round for limits to work...
+ {
+ transforms.cB2cA.q = -transforms.cB2cA.q;
+ transforms.cB2w.q = -transforms.cB2w.q;
+ }
+}
+
+void ArticulationHelper::prepareLtbMatrix( FsData& fsData,
+ const FsInertia* baseInertia,
+ const PxTransform* poses,
+ const ArticulationJointTransforms* jointTransforms,
+ PxReal recipDt)
+{
+ PxU32 linkCount = fsData.linkCount;
+ LtbRow* rows = getLtbRows(fsData);
+
+ rows[0].inertia = baseInertia[0];
+
+ const PxVec3 axis[3] = { PxVec3(1.0f,0.0f,0.0f), PxVec3(0.0f,1.0f,0.0f), PxVec3(0.0f,0.0f,1.0f) };
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ rows[i].inertia = baseInertia[i];
+ const ArticulationJointTransforms& s = jointTransforms[i];
+
+ const PxU32 p = fsData.parent[i];
+
+ // we put the action point of the constraint at the root of the child
+
+ const PxVec3 ra = s.cB2w.p - poses[p].p;
+ const PxVec3 rb = s.cB2w.p - poses[i].p;
+
+ // A bit different from the 1D solver,
+ // there we use a formulation j0.v0 - j1.v1 + c = 0
+ // here we use the homogeneous j0.v0 + j1.v1 + c = 0
+
+ const PxVec3 error = (s.cA2w.p - s.cB2w.p) * 0.99f;
+
+ Cm::SpatialVectorV* j0 = rows[i].j0;
+ Cm::SpatialVectorV* j1 = rows[i].j1;
+
+ for(PxU32 j=0;j<3;j++)
+ {
+ PxVec3 n = axis[j];
+ j0[j] = Cm::SpatialVector(n, ra.cross(n));
+ j1[j] = Cm::SpatialVector(-n, -rb.cross(n));
+ }
+
+ rows[i].jC = V3LoadU(error*recipDt);
+ }
+}
+
+void ArticulationHelper::prepareFsData(FsData& fsData, const ArticulationLink* links)
+{
+ typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns;
+
+ PxU32 linkCount = fsData.linkCount;
+ FsRow* rows = getFsRows(fsData);
+ FsRowAux* aux = getAux(fsData);
+ const FsJointVectors* jointVectors = getJointVectors(fsData);
+
+ rows[0].children = links[0].children;
+ rows[0].pathToRoot = 1;
+
+ PX_ALIGN_PREFIX(16) PxVec4 v[] PX_ALIGN_SUFFIX(16) = { PxVec4(1.f,0,0,0), PxVec4(0,1.f,0,0), PxVec4(0,0,1.f,0) } ;
+ const Vec3V* axes = reinterpret_cast<const Vec3V*>(v);
+
+ for(PxU32 i=1;i<linkCount;i++)
+ {
+ PxU32 p = links[i].parent;
+ FsRow& r = rows[i];
+ FsRowAux& a = aux[i];
+
+ PX_UNUSED(p);
+
+ r.children = links[i].children;
+ r.pathToRoot = links[i].pathToRoot;
+
+ const Vec3V jointOffset = jointVectors[i].jointOffset;
+
+ // the joint coords are world oriented, located at the joint.
+ a.S[0] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[0]));
+ a.S[1] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[1]));
+ a.S[2] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[2]));
+ }
+}
+
+PX_FORCE_INLINE PxReal ArticulationHelper::getResistance(PxReal compliance)
+{
+ PX_ASSERT(compliance>0);
+ return 1.0f/compliance;
+}
+
+void ArticulationHelper::createHardLimit( const FsData& fsData,
+ const ArticulationLink* links,
+ PxU32 linkIndex,
+ SolverConstraint1DExt& s,
+ const PxVec3& axis,
+ PxReal err,
+ PxReal recipDt)
+{
+ init(s, PxVec3(0), PxVec3(0), axis, axis, 0, PX_MAX_F32);
+
+ ArticulationHelper::getImpulseSelfResponse(fsData,
+ links[linkIndex].parent,Cm::SpatialVector(PxVec3(0), axis), s.deltaVA,
+ linkIndex, Cm::SpatialVector(PxVec3(0), -axis), s.deltaVB);
+
+ const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular));
+ if(unitResponse<0.0f)
+ Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, joint limit ignored");
+
+ const PxReal recipResponse = unitResponse>0.0f ? 1.0f/unitResponse : 0.0f;
+
+ s.constant = recipResponse * -err * recipDt;
+ s.unbiasedConstant = err>0.0f ? s.constant : 0.0f;
+ s.velMultiplier = -recipResponse;
+ s.impulseMultiplier = 1.0f;
+}
+
+void ArticulationHelper::createTangentialSpring(const FsData& fsData,
+ const ArticulationLink* links,
+ PxU32 linkIndex,
+ SolverConstraint1DExt& s,
+ const PxVec3& axis,
+ PxReal stiffness,
+ PxReal damping,
+ PxReal dt)
+{
+ init(s, PxVec3(0), PxVec3(0), axis, axis, -PX_MAX_F32, PX_MAX_F32);
+
+ Cm::SpatialVector axis6(PxVec3(0), axis);
+ PxU32 parent = links[linkIndex].parent;
+ getImpulseSelfResponse(fsData, parent, axis6, s.deltaVA, linkIndex, -axis6, s.deltaVB);
+
+ const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular));
+ if(unitResponse<0.0f)
+ Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, tangential spring ignored");
+ const PxReal recipResponse = unitResponse>0.0F ? 1.0f/unitResponse : 0.0f;
+
+ // this is a specialization of the spring code in setSolverConstants() for acceleration springs.
+ // general case is b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError);
+ // but geomError and velocityTarget are both zero
+
+ const PxReal a = dt * dt * stiffness + dt * damping;
+ const PxReal x = 1.0f/(1.0f+a);
+ s.constant = s.unbiasedConstant = 0.0f;
+ s.velMultiplier = -x * recipResponse * a;
+ s.impulseMultiplier = 1.0f - x;
+}
+
+PxU32 ArticulationHelper::setupSolverConstraints( FsData& fsData, PxU32 solverDataSize,
+ PxcConstraintBlockStream& stream,
+ PxSolverConstraintDesc* constraintDesc,
+ const ArticulationLink* links,
+ const ArticulationJointTransforms* jointTransforms,
+ PxReal dt,
+ PxU32& acCount,
+ PxsConstraintBlockManager& constraintBlockManager)
+{
+ acCount = 0;
+
+ const PxU16 linkCount = fsData.linkCount;
+ PxU32 descCount = 0;
+ const PxReal recipDt = 1.0f/dt;
+
+ const PxConstraintInvMassScale ims(1.0f, 1.0f, 1.0f, 1.0f);
+
+ for(PxU16 i=1;i<linkCount;i++)
+ {
+ const ArticulationJointCore& j = *links[i].inboundJoint;
+
+ if(i+1<linkCount)
+ {
+ Ps::prefetch(links[i+1].inboundJoint, sizeof (ArticulationJointCore));
+ Ps::prefetch(&jointTransforms[i+1], sizeof(ArticulationJointTransforms));
+ }
+
+ if(!(j.twistLimited || j.swingLimited))
+ continue;
+
+ PxQuat swing, twist;
+ Ps::separateSwingTwist(jointTransforms[i].cB2cA.q, swing, twist);
+
+ Cm::ConeLimitHelper eh(j.tanQSwingY, j.tanQSwingZ, j.tanQSwingPad);
+ PxVec3 swingLimitAxis;
+ PxReal swingLimitError = 0.0f;
+
+ const bool swingLimited = j.swingLimited && eh.getLimit(swing, swingLimitAxis, swingLimitError);
+ const bool tangentialStiffness = swingLimited && (j.tangentialStiffness>0 || j.tangentialDamping>0);
+
+ const PxVec3 twistAxis = jointTransforms[i].cB2w.rotate(PxVec3(1.0f,0,0));
+ const PxReal tqTwistAngle = Ps::tanHalf(twist.x, twist.w);
+
+ const bool twistLowerLimited = j.twistLimited && tqTwistAngle < Cm::tanAdd(j.tanQTwistLow, j.tanQTwistPad);
+ const bool twistUpperLimited = j.twistLimited && tqTwistAngle > Cm::tanAdd(j.tanQTwistHigh, -j.tanQTwistPad);
+
+ const PxU8 constraintCount = PxU8(swingLimited + tangentialStiffness + twistUpperLimited + twistLowerLimited);
+ if(!constraintCount)
+ continue;
+
+ PxSolverConstraintDesc& desc = constraintDesc[descCount++];
+
+ desc.articulationA = &fsData;
+ desc.linkIndexA = Ps::to16(links[i].parent);
+ desc.articulationALength = Ps::to16(solverDataSize);
+
+ desc.articulationB = &fsData;
+ desc.linkIndexB = i;
+ desc.articulationBLength = Ps::to16(solverDataSize);
+
+ const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) +
+ sizeof(SolverConstraint1DExt) * constraintCount;
+
+ PX_ASSERT(0==(constraintLength & 0x0f));
+ desc.constraintLengthOver16 = Ps::to16(constraintLength/16);
+
+ desc.constraint = stream.reserve(constraintLength + 16u, constraintBlockManager);
+
+ desc.writeBack = NULL;
+
+ SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+ SolverConstraint1DExt* constraints = reinterpret_cast<SolverConstraint1DExt*>(desc.constraint + sizeof(SolverConstraint1DHeader));
+
+ init(*header, constraintCount, true, ims);
+
+ PxU32 cIndex = 0;
+
+ if(swingLimited)
+ {
+ const PxVec3 normal = jointTransforms[i].cA2w.rotate(swingLimitAxis);
+ createHardLimit(fsData, links, i, constraints[cIndex++], normal, swingLimitError, recipDt);
+ if(tangentialStiffness)
+ {
+ const PxVec3 tangent = twistAxis.cross(normal).getNormalized();
+ createTangentialSpring(fsData, links, i, constraints[cIndex++], tangent, j.tangentialStiffness, j.tangentialDamping, dt);
+ }
+ }
+
+ if(twistUpperLimited)
+ createHardLimit(fsData, links, i, constraints[cIndex++], twistAxis, (j.tanQTwistHigh - tqTwistAngle)*4, recipDt);
+
+ if(twistLowerLimited)
+ createHardLimit(fsData, links, i, constraints[cIndex++], -twistAxis, -(j.tanQTwistLow - tqTwistAngle)*4, recipDt);
+
+ *(desc.constraint + getConstraintLength(desc)) = 0;
+
+ PX_ASSERT(cIndex == constraintCount);
+ acCount += constraintCount;
+ }
+
+ return descCount;
+}
+
+void ArticulationHelper::computeJointDrives(FsData& fsData,
+ Vec3V* drives,
+ const ArticulationLink* links,
+ const PxTransform* poses,
+ const ArticulationJointTransforms* transforms,
+ const Mat33V* loads,
+ PxReal dt)
+{
+ typedef ArticulationFnsScalar Fns;
+
+ const PxU32 linkCount = fsData.linkCount;
+ const Cm::SpatialVector* velocity = reinterpret_cast<const Cm::SpatialVector*>(getVelocity(fsData));
+
+ for(PxU32 i=1; i<linkCount;i++)
+ {
+ PxU32 parent = links[i].parent;
+ const ArticulationJointTransforms& b = transforms[i];
+ const ArticulationJointCore& j = *links[i].inboundJoint;
+
+ const Cm::SpatialVector currentVel = Fns::translateMotion(poses[i].p - b.cA2w.p, velocity[i])
+ - Fns::translateMotion(poses[parent].p - b.cA2w.p, velocity[parent]);
+
+ // we want the quat such that q * cB2cA = targetPosition
+ PxVec3 rotVec;
+ if(j.driveType == PxU8(PxArticulationJointDriveType::eTARGET))
+ rotVec = Ps::log(j.targetPosition * b.cB2cA.q.getConjugate()); // as a rotation vector
+ else
+ rotVec = j.targetPosition.getImaginaryPart();
+
+ // NM's Tests indicate behavior is better without the term commented out below, even though
+ // an implicit spring derivation suggests it should be there.
+
+ const PxVec3 posError = b.cA2w.rotate(rotVec); // - currentVel.angular * 0.5f * dt
+ const PxVec3 velError = b.cA2w.rotate(j.targetVelocity) - currentVel.angular;
+
+ drives[i] = M33MulV3(loads[i], V3LoadU((j.spring * posError + j.damping * velError) * dt * getResistance(j.internalCompliance)));
+ }
+}
+
+ArticulationPImpl::ComputeUnconstrainedVelocitiesFn ArticulationPImpl::sComputeUnconstrainedVelocities = NULL;
+ArticulationPImpl::UpdateBodiesFn ArticulationPImpl::sUpdateBodies = NULL;
+ArticulationPImpl::SaveVelocityFn ArticulationPImpl::sSaveVelocity = NULL;
+
+}
+}