diff options
Diffstat (limited to 'PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp')
| -rw-r--r-- | PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp | 594 |
1 files changed, 594 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp new file mode 100644 index 00000000..c5777c12 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp @@ -0,0 +1,594 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxMemory.h" +#include "DyConstraintPrep.h" +#include "PxsRigidBody.h" +#include "DySolverConstraint1D.h" +#include "PsSort.h" +#include "DySolverConstraintDesc.h" +#include "PxcConstraintBlockStream.h" +#include "DyArticulationContactPrep.h" +#include "PsFoundation.h" + +namespace physx +{ +namespace Dy +{ + // dsequeira: + // + // we can choose any linear combination of equality constraints and get the same solution + // Hence we can orthogonalize the constraints using the inner product given by the + // inverse mass matrix, so that when we use PGS, solving a constraint row for a joint + // don't disturb the solution of prior rows. + // + // We also eliminate the equality constraints from the hard inequality constraints - + // (essentially projecting the direction corresponding to the lagrange multiplier + // onto the equality constraint subspace) but 'til I've verified this generates + // exactly the same KKT/complementarity conditions, status is 'experimental'. + // + // since for equality constraints the resulting rows have the property that applying + // an impulse along one row doesn't alter the projected velocity along another row, + // all equality constraints (plus one inequality constraint) can be processed in parallel + // using SIMD + // + // Eliminating the inequality constraints from each other would require a solver change + // and not give us any more parallelism, although we might get better convergence. + +namespace +{ + PX_FORCE_INLINE Vec3V V3FromV4(Vec4V x) { return Vec3V_From_Vec4V(x); } + PX_FORCE_INLINE Vec3V V3FromV4Unsafe(Vec4V x) { return Vec3V_From_Vec4V_WUndefined(x); } + PX_FORCE_INLINE Vec4V V4FromV3(Vec3V x) { return Vec4V_From_Vec3V(x); } + //PX_FORCE_INLINE Vec4V V4ClearW(Vec4V x) { return V4SetW(x, FZero()); } + +struct MassProps +{ + FloatV invMass0; + FloatV invMass1; + FloatV invInertiaScale0; + FloatV invInertiaScale1; + + PX_FORCE_INLINE MassProps(const PxSolverBodyData& bd0, + const PxSolverBodyData& bd1, + const PxConstraintInvMassScale& ims) + : + invMass0(FLoad(bd0.invMass * ims.linear0)) + , invMass1(FLoad(bd1.invMass * ims.linear1)) + , invInertiaScale0(FLoad(ims.angular0)) + , invInertiaScale1(FLoad(ims.angular1)) + {} +}; + + +PX_FORCE_INLINE PxReal innerProduct(const Px1DConstraint& row0, Px1DConstraint& row1, + PxVec4& row0AngSqrtInvInertia0, PxVec4& row0AngSqrtInvInertia1, + PxVec4& row1AngSqrtInvInertia0, PxVec4& row1AngSqrtInvInertia1, const MassProps& m) +{ + const Vec3V l0 = V3Mul(V3Scale(V3LoadA(row0.linear0), m.invMass0), V3LoadA(row1.linear0)); + const Vec3V l1 = V3Mul(V3Scale(V3LoadA(row0.linear1), m.invMass1), V3LoadA(row1.linear1)); + Vec4V r0ang0 = V4LoadA(&row0AngSqrtInvInertia0.x); + Vec4V r1ang0 = V4LoadA(&row1AngSqrtInvInertia0.x); + Vec4V r0ang1 = V4LoadA(&row0AngSqrtInvInertia1.x); + Vec4V r1ang1 = V4LoadA(&row1AngSqrtInvInertia1.x); + + const Vec3V i0 = V3ScaleAdd(V3Mul(Vec3V_From_Vec4V(r0ang0), Vec3V_From_Vec4V(r1ang0)), m.invInertiaScale0, l0); + const Vec3V i1 = V3ScaleAdd(V3MulAdd(Vec3V_From_Vec4V(r0ang1), Vec3V_From_Vec4V(r1ang1), i0), m.invInertiaScale1, l1); + PxF32 f; + FStore(V3SumElems(i1), &f); + return f; +} + + +// indexed rotation around axis, with sine and cosine of half-angle +PX_FORCE_INLINE PxQuat indexedRotation(PxU32 axis, PxReal s, PxReal c) +{ + PxQuat q(0,0,0,c); + reinterpret_cast<PxReal*>(&q)[axis] = s; + return q; +} + +PxQuat diagonalize(const PxMat33& m) // jacobi rotation using quaternions +{ + const PxU32 MAX_ITERS = 5; + + PxQuat q = PxQuat(PxIdentity); + + PxMat33 d; + for(PxU32 i=0; i < MAX_ITERS;i++) + { + const PxMat33 axes(q); + d = axes.getTranspose() * m * axes; + + const PxReal d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]); + const PxU32 a = PxU32(d0 > d1 && d0 > d2 ? 0 : d1 > d2 ? 1 : 2); // rotation axis index, from largest off-diagonal element + + const PxU32 a1 = Ps::getNextIndex3(a), a2 = Ps::getNextIndex3(a1); + if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1]-d[a2][a2]) > 2e6f*PxAbs(2.0f*d[a1][a2])) + break; + + const PxReal w = (d[a1][a1]-d[a2][a2]) / (2.0f*d[a1][a2]); // cot(2 * phi), where phi is the rotation angle + const PxReal absw = PxAbs(w); + + PxQuat r; + if(absw>1000) + r = indexedRotation(a, 1.0f/(4.0f*w), 1.f); // h will be very close to 1, so use small angle approx instead + else + { + const PxReal t = 1 / (absw + PxSqrt(w*w+1)); // absolute value of tan phi + const PxReal h = 1 / PxSqrt(t*t+1); // absolute value of cos phi + + PX_ASSERT(h!=1); // |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8) + r = indexedRotation(a, PxSqrt((1-h)/2) * PxSign(w), PxSqrt((1+h)/2)); + } + + q = (q*r).getNormalized(); + } + + return q; +} + + +PX_FORCE_INLINE void rescale(const Mat33V& m, PxVec3& a0, PxVec3& a1, PxVec3& a2) +{ + const Vec3V va0 = V3LoadU(a0); + const Vec3V va1 = V3LoadU(a1); + const Vec3V va2 = V3LoadU(a2); + + const Vec3V b0 = V3ScaleAdd(va0, V3GetX(m.col0), V3ScaleAdd(va1, V3GetY(m.col0), V3Scale(va2, V3GetZ(m.col0)))); + const Vec3V b1 = V3ScaleAdd(va0, V3GetX(m.col1), V3ScaleAdd(va1, V3GetY(m.col1), V3Scale(va2, V3GetZ(m.col1)))); + const Vec3V b2 = V3ScaleAdd(va0, V3GetX(m.col2), V3ScaleAdd(va1, V3GetY(m.col2), V3Scale(va2, V3GetZ(m.col2)))); + + V3StoreU(b0, a0); + V3StoreU(b1, a1); + V3StoreU(b2, a2); +} + +PX_FORCE_INLINE void rescale4(const Mat33V& m, PxReal* a0, PxReal* a1, PxReal* a2) +{ + const Vec4V va0 = V4LoadA(a0); + const Vec4V va1 = V4LoadA(a1); + const Vec4V va2 = V4LoadA(a2); + + const Vec4V b0 = V4ScaleAdd(va0, V3GetX(m.col0), V4ScaleAdd(va1, V3GetY(m.col0), V4Scale(va2, V3GetZ(m.col0)))); + const Vec4V b1 = V4ScaleAdd(va0, V3GetX(m.col1), V4ScaleAdd(va1, V3GetY(m.col1), V4Scale(va2, V3GetZ(m.col1)))); + const Vec4V b2 = V4ScaleAdd(va0, V3GetX(m.col2), V4ScaleAdd(va1, V3GetY(m.col2), V4Scale(va2, V3GetZ(m.col2)))); + + V4StoreA(b0, a0); + V4StoreA(b1, a1); + V4StoreA(b2, a2); +} + + +template<typename T> +PX_FORCE_INLINE void rescale(const PxMat33& m, T& a0, T& a1, T& a2) +{ + T b0 = a0*m(0,0) + a1 * m(1,0) + a2 * m(2,0); + T b1 = a0*m(0,1) + a1 * m(1,1) + a2 * m(2,1); + T b2 = a0*m(0,2) + a1 * m(1,2) + a2 * m(2,2); + + a0 = b0; + a1 = b1; + a2 = b2; +} + +void diagonalize(Px1DConstraint** row, + PxVec4* angSqrtInvInertia0, + PxVec4* angSqrtInvInertia1, + const MassProps &m) +{ + PxReal a00 = innerProduct(*row[0], *row[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], m); + PxReal a01 = innerProduct(*row[0], *row[1], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m); + PxReal a02 = innerProduct(*row[0], *row[2], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); + PxReal a11 = innerProduct(*row[1], *row[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m); + PxReal a12 = innerProduct(*row[1], *row[2], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); + PxReal a22 = innerProduct(*row[2], *row[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); + + PxMat33 a(PxVec3(a00, a01, a02), + PxVec3(a01, a11, a12), + PxVec3(a02, a12, a22)); + + PxQuat q = diagonalize(a); + + PxMat33 n(-q); + + Mat33V mn(V3LoadU(n.column0), V3LoadU(n.column1), V3LoadU(n.column2)); + + //KS - We treat as a Vec4V so that we get geometricError rescaled for free along with linear0 + rescale4(mn, &row[0]->linear0.x, &row[1]->linear0.x, &row[2]->linear0.x); + rescale(mn, row[0]->linear1, row[1]->linear1, row[2]->linear1); + //KS - We treat as a PxVec4 so that we get velocityTarget rescaled for free + rescale4(mn, &row[0]->angular0.x, &row[1]->angular0.x, &row[2]->angular0.x); + rescale(mn, row[0]->angular1, row[1]->angular1, row[2]->angular1); + rescale4(mn, &angSqrtInvInertia0[0].x, &angSqrtInvInertia0[1].x, &angSqrtInvInertia0[2].x); + rescale4(mn, &angSqrtInvInertia1[0].x, &angSqrtInvInertia1[1].x, &angSqrtInvInertia1[2].x); + +} + +void orthogonalize(Px1DConstraint** row, + PxVec4* angSqrtInvInertia0, + PxVec4* angSqrtInvInertia1, + PxU32 rowCount, + PxU32 eqRowCount, + const MassProps &m) +{ + PX_ASSERT(eqRowCount<=6); + + const FloatV zero = FZero(); + + Vec3V lin1m[6], ang1m[6], lin1[6], ang1[6]; + Vec4V lin0m[6], ang0m[6]; // must have 0 in the W-field + Vec4V lin0AndG[6], ang0AndT[6]; + + for(PxU32 i=0;i<rowCount;i++) + { + Vec4V l0AndG = V4LoadA(&row[i]->linear0.x); // linear0 and geometric error + Vec4V a0AndT = V4LoadA(&row[i]->angular0.x); // angular0 and velocity target + + Vec3V l1 = V3FromV4(V4LoadA(&row[i]->linear1.x)); + Vec3V a1 = V3FromV4(V4LoadA(&row[i]->angular1.x)); + + Vec4V angSqrtL0 = V4LoadA(&angSqrtInvInertia0[i].x); + Vec4V angSqrtL1 = V4LoadA(&angSqrtInvInertia1[i].x); + + PxU32 eliminationRows = PxMin<PxU32>(i, eqRowCount); + for(PxU32 j=0;j<eliminationRows;j++) + { + const Vec3V s0 = V3MulAdd(l1, lin1m[j], V3FromV4Unsafe(V4Mul(l0AndG, lin0m[j]))); + const Vec3V s1 = V3MulAdd(V3FromV4Unsafe(angSqrtL1), ang1m[j], V3FromV4Unsafe(V4Mul(angSqrtL0, ang0m[j]))); + FloatV t = V3SumElems(V3Add(s0, s1)); + + l0AndG = V4NegScaleSub(lin0AndG[j], t, l0AndG); + a0AndT = V4NegScaleSub(ang0AndT[j], t, a0AndT); + l1 = V3NegScaleSub(lin1[j], t, l1); + a1 = V3NegScaleSub(ang1[j], t, a1); + angSqrtL0 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia0[j].x), t, angSqrtL0); + angSqrtL1 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia1[j].x), t, angSqrtL1); + } + + V4StoreA(l0AndG, &row[i]->linear0.x); + V4StoreA(a0AndT, &row[i]->angular0.x); + V3StoreA(l1, row[i]->linear1); + V3StoreA(a1, row[i]->angular1); + V4StoreA(angSqrtL0, &angSqrtInvInertia0[i].x); + V4StoreA(angSqrtL1, &angSqrtInvInertia1[i].x); + + if(i<eqRowCount) + { + lin0AndG[i] = l0AndG; + ang0AndT[i] = a0AndT; + lin1[i] = l1; + ang1[i] = a1; + + const Vec3V l0 = V3FromV4(l0AndG); + + const Vec3V l0m = V3Scale(l0, m.invMass0); + const Vec3V l1m = V3Scale(l1, m.invMass1); + const Vec4V a0m = V4Scale(angSqrtL0, m.invInertiaScale0); + const Vec4V a1m = V4Scale(angSqrtL1, m.invInertiaScale1); + + const Vec3V s0 = V3MulAdd(l0, l0m, V3Mul(l1, l1m)); + const Vec4V s1 = V4MulAdd(a0m, angSqrtL0, V4Mul(a1m, angSqrtL1)); + const FloatV s = V3SumElems(V3Add(s0, V3FromV4Unsafe(s1))); + const FloatV a = FSel(FIsGrtr(s, zero), FRecip(s), zero); // with mass scaling, it's possible for the inner product of a row to be zero + + lin0m[i] = V4Scale(V4ClearW(V4FromV3(l0m)), a); + ang0m[i] = V4Scale(V4ClearW(a0m), a); + lin1m[i] = V3Scale(l1m, a); + ang1m[i] = V3Scale(V3FromV4Unsafe(a1m), a); + } + } +} +} + + +void preprocessRows(Px1DConstraint** sorted, + Px1DConstraint* rows, + PxVec4* angSqrtInvInertia0, + PxVec4* angSqrtInvInertia1, + PxU32 rowCount, + const PxSolverBodyData& bd0, + const PxSolverBodyData& bd1, + const PxConstraintInvMassScale& ims, + bool disablePreprocessing, + bool diagonalizeDrive) +{ + // j is maxed at 12, typically around 7, so insertion sort is fine + for(PxU32 i=0; i<rowCount; i++) + { + Px1DConstraint* r = rows+i; + + PxU32 j = i; + for(;j>0 && r->solveHint < sorted[j-1]->solveHint; j--) + sorted[j] = sorted[j-1]; + + sorted[j] = r; + } + + for(PxU32 i=0;i<rowCount-1;i++) + PX_ASSERT(sorted[i]->solveHint <= sorted[i+1]->solveHint); + + for (PxU32 i = 0; i<rowCount; i++) + rows[i].forInternalUse = rows[i].flags & Px1DConstraintFlag::eKEEPBIAS ? rows[i].geometricError : 0; + + + const Mat33V sqrtInvInertia0 = Mat33V(V3LoadU(bd0.sqrtInvInertia.column0), V3LoadU(bd0.sqrtInvInertia.column1), + V3LoadU(bd0.sqrtInvInertia.column2)); + + const Mat33V sqrtInvInertia1 = Mat33V(V3LoadU(bd1.sqrtInvInertia.column0), V3LoadU(bd1.sqrtInvInertia.column1), + V3LoadU(bd1.sqrtInvInertia.column2)); + + PX_ASSERT(((uintptr_t(angSqrtInvInertia0)) & 0xF) == 0); + PX_ASSERT(((uintptr_t(angSqrtInvInertia1)) & 0xF) == 0); + + for(PxU32 i = 0; i < rowCount; ++i) + { + const Vec3V angDelta0 = M33MulV3(sqrtInvInertia0, V3LoadU(sorted[i]->angular0)); + const Vec3V angDelta1 = M33MulV3(sqrtInvInertia1, V3LoadU(sorted[i]->angular1)); + V4StoreA(Vec4V_From_Vec3V(angDelta0), &angSqrtInvInertia0[i].x); + V4StoreA(Vec4V_From_Vec3V(angDelta1), &angSqrtInvInertia1[i].x); + } + + if(disablePreprocessing) + return; + + MassProps m(bd0, bd1, ims); + for(PxU32 i=0;i<rowCount;) + { + const PxU32 groupMajorId = PxU32(sorted[i]->solveHint>>8), start = i++; + while(i<rowCount && PxU32(sorted[i]->solveHint>>8) == groupMajorId) + i++; + + if(groupMajorId == 4) + { + PxU32 bCount = start; // count of bilateral constraints + for(; bCount<i && (sorted[bCount]->solveHint&255)==0; bCount++) + ; + orthogonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, i-start, bCount-start, m); + } + + if(groupMajorId == 1 && diagonalizeDrive) + { + PxU32 slerp = start; // count of bilateral constraints + for(; slerp<i && (sorted[slerp]->solveHint&255)!=2; slerp++) + ; + if(slerp+3 == i) + diagonalize(sorted+slerp, angSqrtInvInertia0+slerp, angSqrtInvInertia1+slerp, m); + + PX_ASSERT(i-start==3); + diagonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, m); + } + } +} + + + + + +PxU32 ConstraintHelper::setupSolverConstraint( +PxSolverConstraintPrepDesc& prepDesc, +PxConstraintAllocator& allocator, +PxReal dt, PxReal invdt) +{ + if (prepDesc .numRows== 0) + return 0; + + PxSolverConstraintDesc& desc = *prepDesc.desc; + + bool isExtended = desc.linkIndexA != PxSolverConstraintDesc::NO_LINK + || desc.linkIndexB != PxSolverConstraintDesc::NO_LINK; + + PxU32 stride = isExtended ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D); + const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + stride * prepDesc.numRows; + + //KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we + //know SPU DMAs have completed) + PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u); + if(NULL == ptr || (reinterpret_cast<PxU8*>(-1))==ptr) + { + if(NULL==ptr) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks."); + return 0; + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of constraint data. " + "Either accept joints detaching/exploding or simplify constraints."); + ptr=NULL; + return 0; + } + } + desc.constraint = ptr; + + setConstraintLength(desc,constraintLength); + + desc.writeBack = prepDesc.writeback; + setWritebackLength(desc, sizeof(ConstraintWriteback)); + + memset(desc.constraint, 0, constraintLength); + + SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint); + PxU8* constraints = desc.constraint + sizeof(SolverConstraint1DHeader); + init(*header, Ps::to8(prepDesc.numRows), isExtended, prepDesc.mInvMassScales); + header->body0WorldOffset = prepDesc.body0WorldOffset; + header->linBreakImpulse = prepDesc.linBreakForce * dt; + header->angBreakImpulse = prepDesc.angBreakForce * dt; + header->breakable = PxU8((prepDesc.linBreakForce != PX_MAX_F32) || (prepDesc.angBreakForce != PX_MAX_F32)); + header->invMass0D0 = prepDesc.data0->invMass * prepDesc.mInvMassScales.linear0; + header->invMass1D1 = prepDesc.data1->invMass * prepDesc.mInvMassScales.linear1; + + + PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS]; + PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS]; + + Px1DConstraint* sorted[MAX_CONSTRAINT_ROWS]; + + preprocessRows(sorted, prepDesc.rows, angSqrtInvInertia0, angSqrtInvInertia1, prepDesc.numRows, *prepDesc.data0, *prepDesc.data1, prepDesc.mInvMassScales, + isExtended || prepDesc.disablePreprocessing, prepDesc.improvedSlerp); + + const PxReal erp = 1.0f; + for (PxU32 i = 0; i<prepDesc.numRows; i++) + { + Ps::prefetchLine(constraints, 128); + SolverConstraint1D &s = *reinterpret_cast<SolverConstraint1D *>(constraints); + Px1DConstraint& c = *sorted[i]; + + PxReal driveScale = c.flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && prepDesc.driveLimitsAreForces ? PxMin(dt, 1.0f) : 1.0f; + + PxReal unitResponse; + PxReal normalVel = 0.0f; + PxReal initVel = 0.f; + + if(!isExtended) + { + init(s, c.linear0, c.linear1, PxVec3(angSqrtInvInertia0[i].x, angSqrtInvInertia0[i].y, angSqrtInvInertia0[i].z), + PxVec3(angSqrtInvInertia1[i].x, angSqrtInvInertia1[i].y, angSqrtInvInertia1[i].z), c.minImpulse * driveScale, c.maxImpulse * driveScale); + s.ang0Writeback = c.angular0; + PxReal resp0 = s.lin0.magnitudeSquared() * prepDesc.data0->invMass * prepDesc.mInvMassScales.linear0 + s.ang0.magnitudeSquared() * prepDesc.mInvMassScales.angular0; + PxReal resp1 = s.lin1.magnitudeSquared() * prepDesc.data1->invMass * prepDesc.mInvMassScales.linear1 + s.ang1.magnitudeSquared() * prepDesc.mInvMassScales.angular1; + unitResponse = resp0 + resp1; + initVel = normalVel = prepDesc.data0->projectVelocity(c.linear0, c.angular0) - prepDesc.data1->projectVelocity(c.linear1, c.angular1); + } + else + { + init(s, c.linear0, c.linear1, c.angular0, c.angular1, c.minImpulse * driveScale, c.maxImpulse * driveScale); + SolverConstraint1DExt& e = static_cast<SolverConstraint1DExt&>(s); + + const SolverExtBody eb0(reinterpret_cast<const void*>(prepDesc.body0), prepDesc.data0, desc.linkIndexA); + const SolverExtBody eb1(reinterpret_cast<const void*>(prepDesc.body1), prepDesc.data1, desc.linkIndexB); + + const Cm::SpatialVector resp0 = createImpulseResponseVector(e.lin0, e.ang0, eb0); + const Cm::SpatialVector resp1 = createImpulseResponseVector(-e.lin1, -e.ang1, eb1); + unitResponse = getImpulseResponse(eb0, resp0, unsimdRef(e.deltaVA), prepDesc.mInvMassScales.linear0, prepDesc.mInvMassScales.angular0, + eb1, resp1, unsimdRef(e.deltaVB), prepDesc.mInvMassScales.linear1, prepDesc.mInvMassScales.angular1, true); + + s.ang0Writeback = c.angular0; + s.lin0 = resp0.linear; + s.ang0 = resp0.angular; + s.lin1 = -resp1.linear; + s.ang1 = -resp1.angular; + PxReal vel0, vel1; + if(needsNormalVel(c) || eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK || eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + { + vel0 = eb0.projectVelocity(c.linear0, c.angular0); + vel1 = eb1.projectVelocity(c.linear1, c.angular1); + + normalVel = vel0 - vel1; + + //normalVel = eb0.projectVelocity(s.lin0, s.ang0) - eb1.projectVelocity(s.lin1, s.ang1); + if(eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + initVel = vel0; + else if(eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + initVel = -vel1; + + } + } + + setSolverConstants(s.constant, s.unbiasedConstant, s.velMultiplier, s.impulseMultiplier, + c, normalVel, unitResponse, prepDesc.minResponseThreshold, erp, dt, invdt); + + //s.targetVelocity = initVel; + const PxReal velBias = initVel * s.velMultiplier; + s.constant += velBias; + s.unbiasedConstant += velBias; + + if(c.flags & Px1DConstraintFlag::eOUTPUT_FORCE) + s.flags |= DY_SC_FLAG_OUTPUT_FORCE; + + constraints += stride; + } + + //KS - Set the solve count at the end to 0 + *(reinterpret_cast<PxU32*>(constraints)) = 0; + *(reinterpret_cast<PxU32*>(constraints + 4)) = 0; + PX_ASSERT(desc.constraint + getConstraintLength(desc) == constraints); + return prepDesc.numRows; +} + +PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc, + PxSolverConstraintPrepDesc& prepDesc, + PxConstraintAllocator& allocator, + PxReal dt, PxReal invdt) +{ + // LL shouldn't see broken constraints + + PX_ASSERT(!(reinterpret_cast<ConstraintWriteback*>(prepDesc.writeback)->broken)); + + setConstraintLength(*prepDesc.desc, 0); + + if (!shaderDesc.solverPrep) + return 0; + + //PxU32 numAxisConstraints = 0; + + Px1DConstraint rows[MAX_CONSTRAINT_ROWS]; + + // This is necessary so that there will be sensible defaults and shaders will + // continue to work (albeit with a recompile) if the row format changes. + // It's a bit inefficient because it fills in all constraint rows even if there + // is only going to be one generated. A way around this would be for the shader to + // specify the maximum number of rows it needs, or it could call a subroutine to + // prep the row before it starts filling it it. + + PxMemZero(rows, sizeof(Px1DConstraint)*MAX_CONSTRAINT_ROWS); + + for (PxU32 i = 0; i<MAX_CONSTRAINT_ROWS; i++) + { + Px1DConstraint& c = rows[i]; + //Px1DConstraintInit(c); + c.minImpulse = -PX_MAX_REAL; + c.maxImpulse = PX_MAX_REAL; + } + + prepDesc.mInvMassScales.linear0 = prepDesc.mInvMassScales.linear1 = prepDesc.mInvMassScales.angular0 = prepDesc.mInvMassScales.angular1 = 1.f; + + PxVec3 body0WorldOffset(0.f); + PxU32 constraintCount = (*shaderDesc.solverPrep)(rows, + body0WorldOffset, + MAX_CONSTRAINT_ROWS, + prepDesc.mInvMassScales, + shaderDesc.constantBlock, + prepDesc.bodyFrame0, prepDesc.bodyFrame1); + + prepDesc.rows = rows; + prepDesc.numRows = constraintCount; + + prepDesc.body0WorldOffset = body0WorldOffset; + + return ConstraintHelper::setupSolverConstraint(prepDesc, allocator, dt, invdt); +} + +} + +} |