From 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 Mon Sep 17 00:00:00 2001 From: git perforce import user Date: Tue, 25 Oct 2016 12:29:14 -0600 Subject: Initial commit: PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167] --- .../LowLevelDynamics/src/DyArticulationHelper.cpp | 1344 ++++++++++++++++++++ 1 file changed, 1344 insertions(+) create mode 100644 PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp (limited to 'PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp') diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp new file mode 100644 index 00000000..ea9ccb8d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp @@ -0,0 +1,1344 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxVec3.h" +#include "foundation/PxMath.h" +#include "foundation/PxMemory.h" +#include "foundation/PxProfiler.h" + +#include "PsUtilities.h" +#include "CmSpatialVector.h" +#include "DyArticulationHelper.h" +#include "DyArticulationReference.h" +#include "DyArticulationFnsSimd.h" +#include "DyArticulationFnsScalar.h" +#include "DyArticulationFnsDebug.h" +#include "DySolverConstraintDesc.h" +#include "PxvDynamics.h" +#include "DyArticulation.h" +#include "PxcRigidBody.h" +#include "CmConeLimitHelper.h" +#include "DySolverConstraint1D.h" +#include "PxcConstraintBlockStream.h" +#include "DySolverConstraint1D.h" +#include "DyArticulationPImpl.h" +#include "PsFoundation.h" + +namespace physx +{ + +namespace Dy +{ + +void PxcFsFlushVelocity(FsData& matrix); + +// we pass this around by value so that when we return from a function the size is unaltered. That means we don't preserve state +// across functions - even though that could be handy to preserve baseInertia and jointTransforms across the solver so that if we +// need to run position projection positions they don't get recomputed. + +struct PxcFsScratchAllocator +{ + char* base; + size_t size; + size_t taken; + PxcFsScratchAllocator(char* p, size_t s): base(p), size(s), taken(0) {} + + template + static size_t sizeof16() + { + return (sizeof(T)+15)&~15; + } + + template T* alloc(PxU32 count) + { + size_t s = sizeof16(); + PX_ASSERT(taken+s*count <= size); + T* result = reinterpret_cast(base+taken); + taken+=s*count; + return result; + } +}; + +void PxcLtbFactor(FsData& m) +{ + typedef ArticulationFnsSimd Fns; + LtbRow* rows = getLtbRows(m); + + for(PxU32 i=m.linkCount; --i>0;) + { + LtbRow& b = rows[i]; + PxU32 p = m.parent[i]; + const FsInertia inertia = Fns::invertInertia(b.inertia); + const Mat33V jResponse = Fns::invertSym33(M33Neg(Fns::computeSIS(inertia, b.j1, b.j1))); + b.inertia = inertia; + rows[p].inertia = Fns::multiplySubtract(rows[p].inertia, jResponse, b.j0, b.j0); + b.jResponse = jResponse; + + } + rows[0].inertia = Fns::invertInertia(rows[0].inertia); +} + +void PxcLtbSolve(const FsData& m, + Vec3V* b, // rhs error to solve for + Cm::SpatialVectorV* y) // velocity delta output +{ + typedef ArticulationFnsSimd Fns; + + const LtbRow* rows = getLtbRows(m); + PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVectorV)); + + for(PxU32 i=m.linkCount;i-->1;) + { + const LtbRow& r = rows[i]; + const PxU32 p = m.parent[i]; + + const Vec3V t = V3Sub(b[i], Fns::axisDot(r.j1, y[i])); + b[i] = t; + y[p] = Fns::subtract(y[p], Fns::axisMultiply(r.j0, t)); + } + + y[0] = Fns::multiply(rows[0].inertia, y[0]); + + for(PxU32 i=1; i Fns; + + Cm::SpatialVectorV IS[3]; + + FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + FsInertia* inertia = allocator.alloc(matrix.linkCount); + PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(FsInertia)); + + for(PxU32 i=matrix.linkCount; --i>0;) + { + FsRow& r = rows[i]; + const FsRowAux& a = aux[i]; + const FsJointVectors& jv = jointVectors[i]; + + const Mat33V m = Fns::computeSIS(inertia[i], a.S, IS); + const FloatV f = FLoad(isf[i]); + + const Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0), + V3ScaleAdd(load[i].col1, f, m.col1), + V3ScaleAdd(load[i].col2, f, m.col2))); + r.D = D; + + inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]], + Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D, IS, r.DSI))); + } + + getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]); +} + +PX_FORCE_INLINE Cm::SpatialVectorV propagateDrivenImpulse(const FsRow& row, + const FsJointVectors& jv, + Vec3V& SZMinusQ, + const Cm::SpatialVectorV& Z, + const Vec3V& Q) +{ + typedef ArticulationFnsSimd Fns; + + SZMinusQ = V3Sub(V3Add(Z.angular, V3Cross(Z.linear,jv.jointOffset)), Q); + Cm::SpatialVectorV result = Fns::translateForce(jv.parentOffset, Z - Fns::axisMultiply(row.DSI, SZMinusQ)); + + return result; +} + +void PxcFsApplyJointDrives(FsData& matrix, + const Vec3V* Q) +{ + typedef ArticulationFnsSimd Fns; + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE]; + Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE]; + Vec3V SZminusQ[DY_ARTICULATION_MAX_SIZE]; + + PxMemZero(Z, matrix.linkCount*sizeof(Cm::SpatialVectorV)); + + for(PxU32 i=matrix.linkCount;i-->1;) + Z[matrix.parent[i]] += propagateDrivenImpulse(rows[i], jointVectors[i], SZminusQ[i], Z[i], Q[i]); + + + dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]); + + for(PxU32 i=1;i Fns; + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE]; + Vec3V SZ[DY_ARTICULATION_MAX_SIZE]; + + for(PxU32 i=matrix.linkCount;i-->1;) + Z[matrix.parent[i]] += Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z[i], aux[i]); + + dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]); + + for(PxU32 i=1;i Fns; + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + PxU32 stack[DY_ARTICULATION_MAX_SIZE]; + Vec3V SZ[DY_ARTICULATION_MAX_SIZE]; + + PxU32 i0, i1, ic; + + for(i0 = linkID0, i1 = linkID1; i0!=i1;) // find common path + { + if(i0i1 ;) + v = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], v, aux[stack[index]]); + + deltaV1 = v; + for(PxU32 index = i1; index-->i0 ;) + deltaV1 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV1, aux[stack[index]]); + + deltaV0 = v; + for(PxU32 index = i0; index-->0;) + deltaV0 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV0, aux[stack[index]]); +} + +void PxcFsGetImpulseResponse(const FsData& matrix, + PxU32 linkID, + const Cm::SpatialVectorV& impulse, + Cm::SpatialVectorV& deltaV) +{ + typedef ArticulationFnsSimd Fns; + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + Vec3V SZ[DY_ARTICULATION_MAX_SIZE]; + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + Cm::SpatialVectorV Z = -impulse; + + for(PxU32 i = linkID; i; i = matrix.parent[i]) + Z = Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z, aux[i]); + + deltaV = Fns::multiply(getRootInverseInertia(matrix), -Z); + + PX_ASSERT(rows[linkID].pathToRoot&1); + + for(ArticulationBitField i=rows[linkID].pathToRoot-1; i; i &= (i-1)) + { + const PxU32 index = ArticulationLowestSetBit(i); + deltaV = Fns::propagateVelocity(rows[index], jointVectors[index], SZ[index], deltaV, aux[index]); + } +} + +void PxcFsGetImpulseSelfResponse(const FsData& matrix, + PxU32 linkID0, + const Cm::SpatialVectorV& impulse0, + Cm::SpatialVectorV& deltaV0, + PxU32 linkID1, + const Cm::SpatialVectorV& impulse1, + Cm::SpatialVectorV& deltaV1) +{ + typedef ArticulationFnsSimd Fns; + + PX_ASSERT(linkID0 != linkID1); + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + // standard case: parent-child limit + if(matrix.parent[linkID1] == linkID0) + { + Vec3V SZ; + const Cm::SpatialVectorV Z = impulse0 - Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ, -impulse1, aux[linkID1]); + PxcFsGetImpulseResponse(matrix, linkID0, Z, deltaV0); + deltaV1 = Fns::propagateVelocity(rows[linkID1], jointVectors[linkID1], SZ, deltaV0, aux[linkID1]); + } + else + getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector V[DY_ARTICULATION_MAX_SIZE]; + for(PxU32 i=0;i(impulse0)); + ArticulationRef::applyImpulse(matrix,V,linkID1, reinterpret_cast(impulse1)); + + Cm::SpatialVector refV0 = V[linkID0]; + Cm::SpatialVector refV1 = V[linkID1]; +#endif +} + +namespace +{ + + PX_FORCE_INLINE Cm::SpatialVectorV getImpulseResponseSimd(const FsData& matrix, PxU32 linkID, Vec3V lZ, Vec3V aZ) + { + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + Vec3V SZ[DY_ARTICULATION_MAX_SIZE]; + PxU32 indices[DY_ARTICULATION_MAX_SIZE], iCount = 0; + + const FsRow*PX_RESTRICT rows = getFsRows(matrix); + const FsRowAux*PX_RESTRICT aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_UNUSED(aux); + PX_ASSERT(rows[linkID].pathToRoot&1); + + lZ = V3Neg(lZ); + aZ = V3Neg(aZ); + + for(PxU32 i = linkID; i; i = matrix.parent[i]) + { + const FsRow& r = rows[i]; + const FsJointVectors& j = jointVectors[i]; + + Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset)); + SZ[iCount] = sz; + + lZ = V3NegScaleSub(r.DSI[0].linear, V3GetX(sz), V3NegScaleSub(r.DSI[1].linear, V3GetY(sz), V3NegScaleSub(r.DSI[2].linear, V3GetZ(sz), lZ))); + aZ = V3NegScaleSub(r.DSI[0].angular, V3GetX(sz), V3NegScaleSub(r.DSI[1].angular, V3GetY(sz), V3NegScaleSub(r.DSI[2].angular, V3GetZ(sz), aZ))); + + aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ)); + indices[iCount++] = i; + } + + const FsInertia& I = getRootInverseInertia(matrix); + + Vec3V lV = V3Neg(V3Add(M33MulV3(I.ll, lZ), M33MulV3(I.la, aZ))); + Vec3V aV = V3Neg(V3Add(M33TrnspsMulV3(I.la, lZ), M33MulV3(I.aa, aZ))); + + while(iCount) + { + PxU32 i = indices[--iCount]; + const FsRow& r = rows[i]; + const FsJointVectors& j = jointVectors[i]; + + lV = V3Sub(lV, V3Cross(j.parentOffset, aV)); + + Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)), + V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV))); + + n = V3Add(n, M33MulV3(r.D, SZ[iCount])); + lV = V3Sub(lV, V3Cross(j.jointOffset, n)); + aV = V3Sub(aV, n); + } + + return Cm::SpatialVectorV(lV, aV); + } +} + +void ArticulationHelper::getImpulseResponse(const FsData& matrix, + PxU32 linkID, + const Cm::SpatialVectorV& impulse, + Cm::SpatialVectorV& deltaV) +{ + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + + deltaV = getImpulseResponseSimd(matrix, linkID, impulse.linear, impulse.angular); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVectorV deltaV_; + PxcFsGetImpulseResponse(matrix, linkID, impulse, deltaV_); + PX_ASSERT(almostEqual(deltaV_, deltaV,1e-3f)); +#endif +} + +void ArticulationHelper::getImpulseSelfResponse(const FsData& matrix, + PxU32 linkID0, + const Cm::SpatialVectorV& impulse0, + Cm::SpatialVectorV& deltaV0, + PxU32 linkID1, + const Cm::SpatialVectorV& impulse1, + Cm::SpatialVectorV& deltaV1) +{ + PX_ASSERT(linkID0 != linkID1); + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_UNUSED(aux); + + Cm::SpatialVectorV& dV0 = deltaV0, + & dV1 = deltaV1; + + // standard case: parent-child limit + if(matrix.parent[linkID1] == linkID0) + { + const FsRow& r = rows[linkID1]; + const FsJointVectors& j = jointVectors[linkID1]; + + Vec3V lZ = V3Neg(impulse1.linear), + aZ = V3Neg(impulse1.angular); + + Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset)); + + lZ = V3Sub(lZ, V3ScaleAdd(r.DSI[0].linear, V3GetX(sz), V3ScaleAdd(r.DSI[1].linear, V3GetY(sz), V3Scale(r.DSI[2].linear, V3GetZ(sz))))); + aZ = V3Sub(aZ, V3ScaleAdd(r.DSI[0].angular, V3GetX(sz), V3ScaleAdd(r.DSI[1].angular, V3GetY(sz), V3Scale(r.DSI[2].angular, V3GetZ(sz))))); + + aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ)); + + lZ = V3Sub(impulse0.linear, lZ); + aZ = V3Sub(impulse0.angular, aZ); + + dV0 = getImpulseResponseSimd(matrix, linkID0, lZ, aZ); + + Vec3V aV = dV0.angular; + Vec3V lV = V3Sub(dV0.linear, V3Cross(j.parentOffset, aV)); + + Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)), + V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV))); + + n = V3Add(n, M33MulV3(r.D, sz)); + lV = V3Sub(lV, V3Cross(j.jointOffset, n)); + aV = V3Sub(aV, n); + + dV1 = Cm::SpatialVectorV(lV, aV); + } + else + getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVectorV dV0_, dV1_; + PxcFsGetImpulseSelfResponse(matrix, linkID0, impulse0, dV0_, linkID1, impulse1, dV1_); + + PX_ASSERT(almostEqual(dV0_, dV0, 1e-3f)); + PX_ASSERT(almostEqual(dV1_, dV1, 1e-3f)); +#endif +} + +void PxcLtbComputeJv(Vec3V* jv, const FsData& m, const Cm::SpatialVectorV* velocity) +{ + const LtbRow* rows = getLtbRows(m); + const FsRow* fsRows = getFsRows(m); + const FsJointVectors* jointVectors = getJointVectors(m); + + PX_UNUSED(rows); + PX_UNUSED(fsRows); + + for(PxU32 i=1;i Fns; + + FloatV isf[DY_ARTICULATION_MAX_SIZE]; + + for(PxU32 i=1;i(linkCount); + FsInertia*PX_RESTRICT contribToParent = allocator.alloc(linkCount); + + const FsRow*PX_RESTRICT row = getFsRows(matrix); + const FsRowAux*PX_RESTRICT aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_UNUSED(row); + + // gets rid of about 200 LHSs, need to change the matrix format to make this part of it + PxU64 parent[DY_ARTICULATION_MAX_SIZE]; + for(PxU32 i=0;i1;) + { + const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S; + + Ps::prefetch(&load[i-1]); + Ps::prefetch(&jointVectors[i-1]); + const FsInertia tmp = Fns::propagate(inertia[i], S, load[i], isf[i]); + inertia[parent[i]] = Fns::addInertia(inertia[parent[i]], Fns::translateInertia(jointVectors[i].parentOffset, tmp)); + contribToParent[i] = tmp; + } + + for(PxU32 i=1;i(getVelocity(fsData)); + + PxMemZero(baseInertia, sizeof(FsInertia)*linkCount); + + PxReal* maxPenBias = getMaxPenBias(fsData); + + for(PxU32 i=0;i(desc.linkCount); + ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc(desc.linkCount); + + { + PX_PROFILE_ZONE("Articulations.prepareDataBlock", contextID); + prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, desc.totalDataSize); + } + + const PxReal recipDt = 1.0f/dt; + + Cm::SpatialVectorV* velocity = getVelocity(fsData); + + { + + PX_PROFILE_ZONE("Articulations.setupProject", contextID); + + PxMemZero(getLtbRows(fsData), getLtbDataSize(linkCount)); + prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt); + + PxcLtbFactor(fsData); + + Vec3V b[DY_ARTICULATION_MAX_SIZE]; + PxcLtbComputeJv(b, fsData, velocity); + + LtbRow* rows = getLtbRows(fsData); + for(PxU32 i=1;i(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount)); + prepareFsData(fsData, links); + } + + { + PX_PROFILE_ZONE("Articulations.setupDrives", contextID); + + if(!(desc.core->externalDriveIterations & 0x80000000)) + PxMemZero(desc.externalLoads, sizeof(Mat33V) * linkCount); + + if(!(desc.core->internalDriveIterations & 0x80000000)) + PxMemZero(desc.internalLoads, sizeof(Mat33V) * linkCount); + + PxReal isf[DY_ARTICULATION_MAX_SIZE], esf[DY_ARTICULATION_MAX_SIZE]; // spring factors + Vec3V drive[DY_ARTICULATION_MAX_SIZE]; + + bool externalEqualsInternalCompliance = (desc.core->internalDriveIterations&0xffff) == (desc.core->externalDriveIterations&0xffff); + for(PxU32 i=1;iinternalDriveIterations&0xffff, allocator); + + } + + { + PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID); + PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, isf, desc.internalLoads, allocator); + } + + { + PX_PROFILE_ZONE("Articulations.computeJointDrives", contextID); + computeJointDrives(fsData, drive, links, poses, jointTransforms, desc.internalLoads, dt); + } + + { + PX_PROFILE_ZONE("Articulations.applyJointDrives", contextID); + PxcFsApplyJointDrives(fsData, drive); + } + + if(!externalEqualsInternalCompliance) + { + { + PX_PROFILE_ZONE("Articulations.jointExternalLoads", contextID); + PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.externalLoads, esf, linkCount, desc.core->externalDriveIterations&0xffff, allocator); + } + + { + PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID); + PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, esf, desc.externalLoads, allocator); + } + } + } + + { + PX_PROFILE_ZONE("Articulations.applyExternalImpulses", contextID); + Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE]; + + FloatV h = FLoad(dt); + + const Cm::SpatialVector* acceleration = desc.acceleration; + + const Vec3V vGravity = V3LoadU(gravity); + + for(PxU32 i=0;imInternalFlags & PxcRigidBody::eDISABLE_GRAVITY)) + linearAccel = V3Add(linearAccel, vGravity); + Cm::SpatialVectorV a(linearAccel, V3LoadA(acceleration[i].angular)); + Z[i] = -ArticulationFnsSimd::multiply(baseInertia[i], a) * h; + } + + applyImpulses(fsData, Z, getVelocity(fsData)); + } + + // save off the motion velocity in case there are no constraints with the articulation + + PxMemCopy(desc.motionVelocity, velocity, linkCount*sizeof(Cm::SpatialVectorV)); + + // set up for deferred-update solve + + fsData.dirty = 0; + + // solver progress counters + fsData.maxSolverNormalProgress = 0; + fsData.maxSolverFrictionProgress = 0; + fsData.solverProgress = 0; + + +#if DY_ARTICULATION_DEBUG_VERIFY + for(PxU32 i=0;i(linkCount); + ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc(linkCount); + PxTransform* PX_RESTRICT poses = allocator.alloc(linkCount); + Mat33V* PX_RESTRICT jointLoads = allocator.alloc(linkCount); + + PxReal springFactor[DY_ARTICULATION_MAX_SIZE]; // spring factors + + prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, 0); + + PxMemZero(addAddr(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount)); + prepareFsData(fsData, links); + + springFactor[0] = 0.0f; + for(PxU32 i=1;i(desc.linkCount); + + for(PxU32 i=0;i(motionVelocity[i].linear); + const PxVec3& av = reinterpret_cast(motionVelocity[i].angular); + oldPose[i] = poses[i]; + poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q); + } + + bool projected = false; + const PxReal recipDt = 1.0f/dt; + + FsInertia* PX_RESTRICT baseInertia = allocator.alloc(desc.linkCount); + ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc(desc.linkCount); + + for(PxU32 iterations = 0; iterations < core.maxProjectionIterations; iterations++) + { + PxReal maxSeparation = -PX_MAX_F32; + for(PxU32 i=1;i(motionVelocity[i].linear); + const PxVec3& av = reinterpret_cast(motionVelocity[i].angular); + poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q); + } + } + + if(projected) + { + // recompute motion velocities. + for(PxU32 i=0;ibody2World = poses[i]; + + V3StoreA(velocity[i].linear, links[i].bodyCore->linearVelocity); + V3StoreA(velocity[i].angular, links[i].bodyCore->angularVelocity); + } +} + +void ArticulationHelper::setInertia(FsInertia& inertia, + const PxsBodyCore& body, + const PxTransform& pose) +{ + // assumes that elements that are supposed to be zero (i.e. la matrix and off diagonal elements of ll) are zero + + const PxMat33 R(pose.q); + const PxVec3& v = body.inverseInertia; + const PxReal m = 1.0f/body.inverseMass; + V3WriteX(inertia.ll.col0, m); + V3WriteY(inertia.ll.col1, m); + V3WriteZ(inertia.ll.col2, m); + + PX_ALIGN_PREFIX(16) PxMat33 PX_ALIGN_SUFFIX(16) alignedInertia = R * PxMat33::createDiagonal(PxVec3(1.0f/v.x, 1.0f/v.y, 1.0f/v.z)) * R.getTranspose(); + alignedInertia = (alignedInertia + alignedInertia.getTranspose())*0.5f; + inertia.aa = Mat33V_From_PxMat33(alignedInertia); +} + +void ArticulationHelper::setJointTransforms(ArticulationJointTransforms& transforms, + const PxTransform& parentPose, + const PxTransform& childPose, + const ArticulationJointCore& joint) +{ + transforms.cA2w = parentPose.transform(joint.parentPose); + transforms.cB2w = childPose.transform(joint.childPose); + transforms.cB2cA = transforms.cA2w.transformInv(transforms.cB2w); + if(transforms.cB2cA.q.w<0) // the relative quat must be the short way round for limits to work... + { + transforms.cB2cA.q = -transforms.cB2cA.q; + transforms.cB2w.q = -transforms.cB2w.q; + } +} + +void ArticulationHelper::prepareLtbMatrix( FsData& fsData, + const FsInertia* baseInertia, + const PxTransform* poses, + const ArticulationJointTransforms* jointTransforms, + PxReal recipDt) +{ + PxU32 linkCount = fsData.linkCount; + LtbRow* rows = getLtbRows(fsData); + + rows[0].inertia = baseInertia[0]; + + const PxVec3 axis[3] = { PxVec3(1.0f,0.0f,0.0f), PxVec3(0.0f,1.0f,0.0f), PxVec3(0.0f,0.0f,1.0f) }; + for(PxU32 i=1;i Fns; + + PxU32 linkCount = fsData.linkCount; + FsRow* rows = getFsRows(fsData); + FsRowAux* aux = getAux(fsData); + const FsJointVectors* jointVectors = getJointVectors(fsData); + + rows[0].children = links[0].children; + rows[0].pathToRoot = 1; + + PX_ALIGN_PREFIX(16) PxVec4 v[] PX_ALIGN_SUFFIX(16) = { PxVec4(1.f,0,0,0), PxVec4(0,1.f,0,0), PxVec4(0,0,1.f,0) } ; + const Vec3V* axes = reinterpret_cast(v); + + for(PxU32 i=1;i0); + return 1.0f/compliance; +} + +void ArticulationHelper::createHardLimit( const FsData& fsData, + const ArticulationLink* links, + PxU32 linkIndex, + SolverConstraint1DExt& s, + const PxVec3& axis, + PxReal err, + PxReal recipDt) +{ + init(s, PxVec3(0), PxVec3(0), axis, axis, 0, PX_MAX_F32); + + ArticulationHelper::getImpulseSelfResponse(fsData, + links[linkIndex].parent,Cm::SpatialVector(PxVec3(0), axis), s.deltaVA, + linkIndex, Cm::SpatialVector(PxVec3(0), -axis), s.deltaVB); + + const PxReal unitResponse = axis.dot(reinterpret_cast(s.deltaVA.angular)) - axis.dot(reinterpret_cast(s.deltaVB.angular)); + if(unitResponse<0.0f) + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, joint limit ignored"); + + const PxReal recipResponse = unitResponse>0.0f ? 1.0f/unitResponse : 0.0f; + + s.constant = recipResponse * -err * recipDt; + s.unbiasedConstant = err>0.0f ? s.constant : 0.0f; + s.velMultiplier = -recipResponse; + s.impulseMultiplier = 1.0f; +} + +void ArticulationHelper::createTangentialSpring(const FsData& fsData, + const ArticulationLink* links, + PxU32 linkIndex, + SolverConstraint1DExt& s, + const PxVec3& axis, + PxReal stiffness, + PxReal damping, + PxReal dt) +{ + init(s, PxVec3(0), PxVec3(0), axis, axis, -PX_MAX_F32, PX_MAX_F32); + + Cm::SpatialVector axis6(PxVec3(0), axis); + PxU32 parent = links[linkIndex].parent; + getImpulseSelfResponse(fsData, parent, axis6, s.deltaVA, linkIndex, -axis6, s.deltaVB); + + const PxReal unitResponse = axis.dot(reinterpret_cast(s.deltaVA.angular)) - axis.dot(reinterpret_cast(s.deltaVB.angular)); + if(unitResponse<0.0f) + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, tangential spring ignored"); + const PxReal recipResponse = unitResponse>0.0F ? 1.0f/unitResponse : 0.0f; + + // this is a specialization of the spring code in setSolverConstants() for acceleration springs. + // general case is b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError); + // but geomError and velocityTarget are both zero + + const PxReal a = dt * dt * stiffness + dt * damping; + const PxReal x = 1.0f/(1.0f+a); + s.constant = s.unbiasedConstant = 0.0f; + s.velMultiplier = -x * recipResponse * a; + s.impulseMultiplier = 1.0f - x; +} + +PxU32 ArticulationHelper::setupSolverConstraints( FsData& fsData, PxU32 solverDataSize, + PxcConstraintBlockStream& stream, + PxSolverConstraintDesc* constraintDesc, + const ArticulationLink* links, + const ArticulationJointTransforms* jointTransforms, + PxReal dt, + PxU32& acCount, + PxsConstraintBlockManager& constraintBlockManager) +{ + acCount = 0; + + const PxU16 linkCount = fsData.linkCount; + PxU32 descCount = 0; + const PxReal recipDt = 1.0f/dt; + + const PxConstraintInvMassScale ims(1.0f, 1.0f, 1.0f, 1.0f); + + for(PxU16 i=1;i0 || j.tangentialDamping>0); + + const PxVec3 twistAxis = jointTransforms[i].cB2w.rotate(PxVec3(1.0f,0,0)); + const PxReal tqTwistAngle = Ps::tanHalf(twist.x, twist.w); + + const bool twistLowerLimited = j.twistLimited && tqTwistAngle < Cm::tanAdd(j.tanQTwistLow, j.tanQTwistPad); + const bool twistUpperLimited = j.twistLimited && tqTwistAngle > Cm::tanAdd(j.tanQTwistHigh, -j.tanQTwistPad); + + const PxU8 constraintCount = PxU8(swingLimited + tangentialStiffness + twistUpperLimited + twistLowerLimited); + if(!constraintCount) + continue; + + PxSolverConstraintDesc& desc = constraintDesc[descCount++]; + + desc.articulationA = &fsData; + desc.linkIndexA = Ps::to16(links[i].parent); + desc.articulationALength = Ps::to16(solverDataSize); + + desc.articulationB = &fsData; + desc.linkIndexB = i; + desc.articulationBLength = Ps::to16(solverDataSize); + + const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + + sizeof(SolverConstraint1DExt) * constraintCount; + + PX_ASSERT(0==(constraintLength & 0x0f)); + desc.constraintLengthOver16 = Ps::to16(constraintLength/16); + + desc.constraint = stream.reserve(constraintLength + 16u, constraintBlockManager); + + desc.writeBack = NULL; + + SolverConstraint1DHeader* header = reinterpret_cast(desc.constraint); + SolverConstraint1DExt* constraints = reinterpret_cast(desc.constraint + sizeof(SolverConstraint1DHeader)); + + init(*header, constraintCount, true, ims); + + PxU32 cIndex = 0; + + if(swingLimited) + { + const PxVec3 normal = jointTransforms[i].cA2w.rotate(swingLimitAxis); + createHardLimit(fsData, links, i, constraints[cIndex++], normal, swingLimitError, recipDt); + if(tangentialStiffness) + { + const PxVec3 tangent = twistAxis.cross(normal).getNormalized(); + createTangentialSpring(fsData, links, i, constraints[cIndex++], tangent, j.tangentialStiffness, j.tangentialDamping, dt); + } + } + + if(twistUpperLimited) + createHardLimit(fsData, links, i, constraints[cIndex++], twistAxis, (j.tanQTwistHigh - tqTwistAngle)*4, recipDt); + + if(twistLowerLimited) + createHardLimit(fsData, links, i, constraints[cIndex++], -twistAxis, -(j.tanQTwistLow - tqTwistAngle)*4, recipDt); + + *(desc.constraint + getConstraintLength(desc)) = 0; + + PX_ASSERT(cIndex == constraintCount); + acCount += constraintCount; + } + + return descCount; +} + +void ArticulationHelper::computeJointDrives(FsData& fsData, + Vec3V* drives, + const ArticulationLink* links, + const PxTransform* poses, + const ArticulationJointTransforms* transforms, + const Mat33V* loads, + PxReal dt) +{ + typedef ArticulationFnsScalar Fns; + + const PxU32 linkCount = fsData.linkCount; + const Cm::SpatialVector* velocity = reinterpret_cast(getVelocity(fsData)); + + for(PxU32 i=1; i