aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp')
-rw-r--r--PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp594
1 files changed, 594 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp
new file mode 100644
index 00000000..c5777c12
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp
@@ -0,0 +1,594 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+
+#include "foundation/PxMemory.h"
+#include "DyConstraintPrep.h"
+#include "PxsRigidBody.h"
+#include "DySolverConstraint1D.h"
+#include "PsSort.h"
+#include "DySolverConstraintDesc.h"
+#include "PxcConstraintBlockStream.h"
+#include "DyArticulationContactPrep.h"
+#include "PsFoundation.h"
+
+namespace physx
+{
+namespace Dy
+{
+ // dsequeira:
+ //
+ // we can choose any linear combination of equality constraints and get the same solution
+ // Hence we can orthogonalize the constraints using the inner product given by the
+ // inverse mass matrix, so that when we use PGS, solving a constraint row for a joint
+ // don't disturb the solution of prior rows.
+ //
+ // We also eliminate the equality constraints from the hard inequality constraints -
+ // (essentially projecting the direction corresponding to the lagrange multiplier
+ // onto the equality constraint subspace) but 'til I've verified this generates
+ // exactly the same KKT/complementarity conditions, status is 'experimental'.
+ //
+ // since for equality constraints the resulting rows have the property that applying
+ // an impulse along one row doesn't alter the projected velocity along another row,
+ // all equality constraints (plus one inequality constraint) can be processed in parallel
+ // using SIMD
+ //
+ // Eliminating the inequality constraints from each other would require a solver change
+ // and not give us any more parallelism, although we might get better convergence.
+
+namespace
+{
+ PX_FORCE_INLINE Vec3V V3FromV4(Vec4V x) { return Vec3V_From_Vec4V(x); }
+ PX_FORCE_INLINE Vec3V V3FromV4Unsafe(Vec4V x) { return Vec3V_From_Vec4V_WUndefined(x); }
+ PX_FORCE_INLINE Vec4V V4FromV3(Vec3V x) { return Vec4V_From_Vec3V(x); }
+ //PX_FORCE_INLINE Vec4V V4ClearW(Vec4V x) { return V4SetW(x, FZero()); }
+
+struct MassProps
+{
+ FloatV invMass0;
+ FloatV invMass1;
+ FloatV invInertiaScale0;
+ FloatV invInertiaScale1;
+
+ PX_FORCE_INLINE MassProps(const PxSolverBodyData& bd0,
+ const PxSolverBodyData& bd1,
+ const PxConstraintInvMassScale& ims)
+ :
+ invMass0(FLoad(bd0.invMass * ims.linear0))
+ , invMass1(FLoad(bd1.invMass * ims.linear1))
+ , invInertiaScale0(FLoad(ims.angular0))
+ , invInertiaScale1(FLoad(ims.angular1))
+ {}
+};
+
+
+PX_FORCE_INLINE PxReal innerProduct(const Px1DConstraint& row0, Px1DConstraint& row1,
+ PxVec4& row0AngSqrtInvInertia0, PxVec4& row0AngSqrtInvInertia1,
+ PxVec4& row1AngSqrtInvInertia0, PxVec4& row1AngSqrtInvInertia1, const MassProps& m)
+{
+ const Vec3V l0 = V3Mul(V3Scale(V3LoadA(row0.linear0), m.invMass0), V3LoadA(row1.linear0));
+ const Vec3V l1 = V3Mul(V3Scale(V3LoadA(row0.linear1), m.invMass1), V3LoadA(row1.linear1));
+ Vec4V r0ang0 = V4LoadA(&row0AngSqrtInvInertia0.x);
+ Vec4V r1ang0 = V4LoadA(&row1AngSqrtInvInertia0.x);
+ Vec4V r0ang1 = V4LoadA(&row0AngSqrtInvInertia1.x);
+ Vec4V r1ang1 = V4LoadA(&row1AngSqrtInvInertia1.x);
+
+ const Vec3V i0 = V3ScaleAdd(V3Mul(Vec3V_From_Vec4V(r0ang0), Vec3V_From_Vec4V(r1ang0)), m.invInertiaScale0, l0);
+ const Vec3V i1 = V3ScaleAdd(V3MulAdd(Vec3V_From_Vec4V(r0ang1), Vec3V_From_Vec4V(r1ang1), i0), m.invInertiaScale1, l1);
+ PxF32 f;
+ FStore(V3SumElems(i1), &f);
+ return f;
+}
+
+
+// indexed rotation around axis, with sine and cosine of half-angle
+PX_FORCE_INLINE PxQuat indexedRotation(PxU32 axis, PxReal s, PxReal c)
+{
+ PxQuat q(0,0,0,c);
+ reinterpret_cast<PxReal*>(&q)[axis] = s;
+ return q;
+}
+
+PxQuat diagonalize(const PxMat33& m) // jacobi rotation using quaternions
+{
+ const PxU32 MAX_ITERS = 5;
+
+ PxQuat q = PxQuat(PxIdentity);
+
+ PxMat33 d;
+ for(PxU32 i=0; i < MAX_ITERS;i++)
+ {
+ const PxMat33 axes(q);
+ d = axes.getTranspose() * m * axes;
+
+ const PxReal d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]);
+ const PxU32 a = PxU32(d0 > d1 && d0 > d2 ? 0 : d1 > d2 ? 1 : 2); // rotation axis index, from largest off-diagonal element
+
+ const PxU32 a1 = Ps::getNextIndex3(a), a2 = Ps::getNextIndex3(a1);
+ if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1]-d[a2][a2]) > 2e6f*PxAbs(2.0f*d[a1][a2]))
+ break;
+
+ const PxReal w = (d[a1][a1]-d[a2][a2]) / (2.0f*d[a1][a2]); // cot(2 * phi), where phi is the rotation angle
+ const PxReal absw = PxAbs(w);
+
+ PxQuat r;
+ if(absw>1000)
+ r = indexedRotation(a, 1.0f/(4.0f*w), 1.f); // h will be very close to 1, so use small angle approx instead
+ else
+ {
+ const PxReal t = 1 / (absw + PxSqrt(w*w+1)); // absolute value of tan phi
+ const PxReal h = 1 / PxSqrt(t*t+1); // absolute value of cos phi
+
+ PX_ASSERT(h!=1); // |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8)
+ r = indexedRotation(a, PxSqrt((1-h)/2) * PxSign(w), PxSqrt((1+h)/2));
+ }
+
+ q = (q*r).getNormalized();
+ }
+
+ return q;
+}
+
+
+PX_FORCE_INLINE void rescale(const Mat33V& m, PxVec3& a0, PxVec3& a1, PxVec3& a2)
+{
+ const Vec3V va0 = V3LoadU(a0);
+ const Vec3V va1 = V3LoadU(a1);
+ const Vec3V va2 = V3LoadU(a2);
+
+ const Vec3V b0 = V3ScaleAdd(va0, V3GetX(m.col0), V3ScaleAdd(va1, V3GetY(m.col0), V3Scale(va2, V3GetZ(m.col0))));
+ const Vec3V b1 = V3ScaleAdd(va0, V3GetX(m.col1), V3ScaleAdd(va1, V3GetY(m.col1), V3Scale(va2, V3GetZ(m.col1))));
+ const Vec3V b2 = V3ScaleAdd(va0, V3GetX(m.col2), V3ScaleAdd(va1, V3GetY(m.col2), V3Scale(va2, V3GetZ(m.col2))));
+
+ V3StoreU(b0, a0);
+ V3StoreU(b1, a1);
+ V3StoreU(b2, a2);
+}
+
+PX_FORCE_INLINE void rescale4(const Mat33V& m, PxReal* a0, PxReal* a1, PxReal* a2)
+{
+ const Vec4V va0 = V4LoadA(a0);
+ const Vec4V va1 = V4LoadA(a1);
+ const Vec4V va2 = V4LoadA(a2);
+
+ const Vec4V b0 = V4ScaleAdd(va0, V3GetX(m.col0), V4ScaleAdd(va1, V3GetY(m.col0), V4Scale(va2, V3GetZ(m.col0))));
+ const Vec4V b1 = V4ScaleAdd(va0, V3GetX(m.col1), V4ScaleAdd(va1, V3GetY(m.col1), V4Scale(va2, V3GetZ(m.col1))));
+ const Vec4V b2 = V4ScaleAdd(va0, V3GetX(m.col2), V4ScaleAdd(va1, V3GetY(m.col2), V4Scale(va2, V3GetZ(m.col2))));
+
+ V4StoreA(b0, a0);
+ V4StoreA(b1, a1);
+ V4StoreA(b2, a2);
+}
+
+
+template<typename T>
+PX_FORCE_INLINE void rescale(const PxMat33& m, T& a0, T& a1, T& a2)
+{
+ T b0 = a0*m(0,0) + a1 * m(1,0) + a2 * m(2,0);
+ T b1 = a0*m(0,1) + a1 * m(1,1) + a2 * m(2,1);
+ T b2 = a0*m(0,2) + a1 * m(1,2) + a2 * m(2,2);
+
+ a0 = b0;
+ a1 = b1;
+ a2 = b2;
+}
+
+void diagonalize(Px1DConstraint** row,
+ PxVec4* angSqrtInvInertia0,
+ PxVec4* angSqrtInvInertia1,
+ const MassProps &m)
+{
+ PxReal a00 = innerProduct(*row[0], *row[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], m);
+ PxReal a01 = innerProduct(*row[0], *row[1], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m);
+ PxReal a02 = innerProduct(*row[0], *row[2], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+ PxReal a11 = innerProduct(*row[1], *row[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m);
+ PxReal a12 = innerProduct(*row[1], *row[2], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+ PxReal a22 = innerProduct(*row[2], *row[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m);
+
+ PxMat33 a(PxVec3(a00, a01, a02),
+ PxVec3(a01, a11, a12),
+ PxVec3(a02, a12, a22));
+
+ PxQuat q = diagonalize(a);
+
+ PxMat33 n(-q);
+
+ Mat33V mn(V3LoadU(n.column0), V3LoadU(n.column1), V3LoadU(n.column2));
+
+ //KS - We treat as a Vec4V so that we get geometricError rescaled for free along with linear0
+ rescale4(mn, &row[0]->linear0.x, &row[1]->linear0.x, &row[2]->linear0.x);
+ rescale(mn, row[0]->linear1, row[1]->linear1, row[2]->linear1);
+ //KS - We treat as a PxVec4 so that we get velocityTarget rescaled for free
+ rescale4(mn, &row[0]->angular0.x, &row[1]->angular0.x, &row[2]->angular0.x);
+ rescale(mn, row[0]->angular1, row[1]->angular1, row[2]->angular1);
+ rescale4(mn, &angSqrtInvInertia0[0].x, &angSqrtInvInertia0[1].x, &angSqrtInvInertia0[2].x);
+ rescale4(mn, &angSqrtInvInertia1[0].x, &angSqrtInvInertia1[1].x, &angSqrtInvInertia1[2].x);
+
+}
+
+void orthogonalize(Px1DConstraint** row,
+ PxVec4* angSqrtInvInertia0,
+ PxVec4* angSqrtInvInertia1,
+ PxU32 rowCount,
+ PxU32 eqRowCount,
+ const MassProps &m)
+{
+ PX_ASSERT(eqRowCount<=6);
+
+ const FloatV zero = FZero();
+
+ Vec3V lin1m[6], ang1m[6], lin1[6], ang1[6];
+ Vec4V lin0m[6], ang0m[6]; // must have 0 in the W-field
+ Vec4V lin0AndG[6], ang0AndT[6];
+
+ for(PxU32 i=0;i<rowCount;i++)
+ {
+ Vec4V l0AndG = V4LoadA(&row[i]->linear0.x); // linear0 and geometric error
+ Vec4V a0AndT = V4LoadA(&row[i]->angular0.x); // angular0 and velocity target
+
+ Vec3V l1 = V3FromV4(V4LoadA(&row[i]->linear1.x));
+ Vec3V a1 = V3FromV4(V4LoadA(&row[i]->angular1.x));
+
+ Vec4V angSqrtL0 = V4LoadA(&angSqrtInvInertia0[i].x);
+ Vec4V angSqrtL1 = V4LoadA(&angSqrtInvInertia1[i].x);
+
+ PxU32 eliminationRows = PxMin<PxU32>(i, eqRowCount);
+ for(PxU32 j=0;j<eliminationRows;j++)
+ {
+ const Vec3V s0 = V3MulAdd(l1, lin1m[j], V3FromV4Unsafe(V4Mul(l0AndG, lin0m[j])));
+ const Vec3V s1 = V3MulAdd(V3FromV4Unsafe(angSqrtL1), ang1m[j], V3FromV4Unsafe(V4Mul(angSqrtL0, ang0m[j])));
+ FloatV t = V3SumElems(V3Add(s0, s1));
+
+ l0AndG = V4NegScaleSub(lin0AndG[j], t, l0AndG);
+ a0AndT = V4NegScaleSub(ang0AndT[j], t, a0AndT);
+ l1 = V3NegScaleSub(lin1[j], t, l1);
+ a1 = V3NegScaleSub(ang1[j], t, a1);
+ angSqrtL0 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia0[j].x), t, angSqrtL0);
+ angSqrtL1 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia1[j].x), t, angSqrtL1);
+ }
+
+ V4StoreA(l0AndG, &row[i]->linear0.x);
+ V4StoreA(a0AndT, &row[i]->angular0.x);
+ V3StoreA(l1, row[i]->linear1);
+ V3StoreA(a1, row[i]->angular1);
+ V4StoreA(angSqrtL0, &angSqrtInvInertia0[i].x);
+ V4StoreA(angSqrtL1, &angSqrtInvInertia1[i].x);
+
+ if(i<eqRowCount)
+ {
+ lin0AndG[i] = l0AndG;
+ ang0AndT[i] = a0AndT;
+ lin1[i] = l1;
+ ang1[i] = a1;
+
+ const Vec3V l0 = V3FromV4(l0AndG);
+
+ const Vec3V l0m = V3Scale(l0, m.invMass0);
+ const Vec3V l1m = V3Scale(l1, m.invMass1);
+ const Vec4V a0m = V4Scale(angSqrtL0, m.invInertiaScale0);
+ const Vec4V a1m = V4Scale(angSqrtL1, m.invInertiaScale1);
+
+ const Vec3V s0 = V3MulAdd(l0, l0m, V3Mul(l1, l1m));
+ const Vec4V s1 = V4MulAdd(a0m, angSqrtL0, V4Mul(a1m, angSqrtL1));
+ const FloatV s = V3SumElems(V3Add(s0, V3FromV4Unsafe(s1)));
+ const FloatV a = FSel(FIsGrtr(s, zero), FRecip(s), zero); // with mass scaling, it's possible for the inner product of a row to be zero
+
+ lin0m[i] = V4Scale(V4ClearW(V4FromV3(l0m)), a);
+ ang0m[i] = V4Scale(V4ClearW(a0m), a);
+ lin1m[i] = V3Scale(l1m, a);
+ ang1m[i] = V3Scale(V3FromV4Unsafe(a1m), a);
+ }
+ }
+}
+}
+
+
+void preprocessRows(Px1DConstraint** sorted,
+ Px1DConstraint* rows,
+ PxVec4* angSqrtInvInertia0,
+ PxVec4* angSqrtInvInertia1,
+ PxU32 rowCount,
+ const PxSolverBodyData& bd0,
+ const PxSolverBodyData& bd1,
+ const PxConstraintInvMassScale& ims,
+ bool disablePreprocessing,
+ bool diagonalizeDrive)
+{
+ // j is maxed at 12, typically around 7, so insertion sort is fine
+ for(PxU32 i=0; i<rowCount; i++)
+ {
+ Px1DConstraint* r = rows+i;
+
+ PxU32 j = i;
+ for(;j>0 && r->solveHint < sorted[j-1]->solveHint; j--)
+ sorted[j] = sorted[j-1];
+
+ sorted[j] = r;
+ }
+
+ for(PxU32 i=0;i<rowCount-1;i++)
+ PX_ASSERT(sorted[i]->solveHint <= sorted[i+1]->solveHint);
+
+ for (PxU32 i = 0; i<rowCount; i++)
+ rows[i].forInternalUse = rows[i].flags & Px1DConstraintFlag::eKEEPBIAS ? rows[i].geometricError : 0;
+
+
+ const Mat33V sqrtInvInertia0 = Mat33V(V3LoadU(bd0.sqrtInvInertia.column0), V3LoadU(bd0.sqrtInvInertia.column1),
+ V3LoadU(bd0.sqrtInvInertia.column2));
+
+ const Mat33V sqrtInvInertia1 = Mat33V(V3LoadU(bd1.sqrtInvInertia.column0), V3LoadU(bd1.sqrtInvInertia.column1),
+ V3LoadU(bd1.sqrtInvInertia.column2));
+
+ PX_ASSERT(((uintptr_t(angSqrtInvInertia0)) & 0xF) == 0);
+ PX_ASSERT(((uintptr_t(angSqrtInvInertia1)) & 0xF) == 0);
+
+ for(PxU32 i = 0; i < rowCount; ++i)
+ {
+ const Vec3V angDelta0 = M33MulV3(sqrtInvInertia0, V3LoadU(sorted[i]->angular0));
+ const Vec3V angDelta1 = M33MulV3(sqrtInvInertia1, V3LoadU(sorted[i]->angular1));
+ V4StoreA(Vec4V_From_Vec3V(angDelta0), &angSqrtInvInertia0[i].x);
+ V4StoreA(Vec4V_From_Vec3V(angDelta1), &angSqrtInvInertia1[i].x);
+ }
+
+ if(disablePreprocessing)
+ return;
+
+ MassProps m(bd0, bd1, ims);
+ for(PxU32 i=0;i<rowCount;)
+ {
+ const PxU32 groupMajorId = PxU32(sorted[i]->solveHint>>8), start = i++;
+ while(i<rowCount && PxU32(sorted[i]->solveHint>>8) == groupMajorId)
+ i++;
+
+ if(groupMajorId == 4)
+ {
+ PxU32 bCount = start; // count of bilateral constraints
+ for(; bCount<i && (sorted[bCount]->solveHint&255)==0; bCount++)
+ ;
+ orthogonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, i-start, bCount-start, m);
+ }
+
+ if(groupMajorId == 1 && diagonalizeDrive)
+ {
+ PxU32 slerp = start; // count of bilateral constraints
+ for(; slerp<i && (sorted[slerp]->solveHint&255)!=2; slerp++)
+ ;
+ if(slerp+3 == i)
+ diagonalize(sorted+slerp, angSqrtInvInertia0+slerp, angSqrtInvInertia1+slerp, m);
+
+ PX_ASSERT(i-start==3);
+ diagonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, m);
+ }
+ }
+}
+
+
+
+
+
+PxU32 ConstraintHelper::setupSolverConstraint(
+PxSolverConstraintPrepDesc& prepDesc,
+PxConstraintAllocator& allocator,
+PxReal dt, PxReal invdt)
+{
+ if (prepDesc .numRows== 0)
+ return 0;
+
+ PxSolverConstraintDesc& desc = *prepDesc.desc;
+
+ bool isExtended = desc.linkIndexA != PxSolverConstraintDesc::NO_LINK
+ || desc.linkIndexB != PxSolverConstraintDesc::NO_LINK;
+
+ PxU32 stride = isExtended ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D);
+ const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + stride * prepDesc.numRows;
+
+ //KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we
+ //know SPU DMAs have completed)
+ PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u);
+ if(NULL == ptr || (reinterpret_cast<PxU8*>(-1))==ptr)
+ {
+ if(NULL==ptr)
+ {
+ PX_WARN_ONCE(
+ "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. "
+ "Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks.");
+ return 0;
+ }
+ else
+ {
+ PX_WARN_ONCE(
+ "Attempting to allocate more than 16K of constraint data. "
+ "Either accept joints detaching/exploding or simplify constraints.");
+ ptr=NULL;
+ return 0;
+ }
+ }
+ desc.constraint = ptr;
+
+ setConstraintLength(desc,constraintLength);
+
+ desc.writeBack = prepDesc.writeback;
+ setWritebackLength(desc, sizeof(ConstraintWriteback));
+
+ memset(desc.constraint, 0, constraintLength);
+
+ SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint);
+ PxU8* constraints = desc.constraint + sizeof(SolverConstraint1DHeader);
+ init(*header, Ps::to8(prepDesc.numRows), isExtended, prepDesc.mInvMassScales);
+ header->body0WorldOffset = prepDesc.body0WorldOffset;
+ header->linBreakImpulse = prepDesc.linBreakForce * dt;
+ header->angBreakImpulse = prepDesc.angBreakForce * dt;
+ header->breakable = PxU8((prepDesc.linBreakForce != PX_MAX_F32) || (prepDesc.angBreakForce != PX_MAX_F32));
+ header->invMass0D0 = prepDesc.data0->invMass * prepDesc.mInvMassScales.linear0;
+ header->invMass1D1 = prepDesc.data1->invMass * prepDesc.mInvMassScales.linear1;
+
+
+ PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS];
+ PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS];
+
+ Px1DConstraint* sorted[MAX_CONSTRAINT_ROWS];
+
+ preprocessRows(sorted, prepDesc.rows, angSqrtInvInertia0, angSqrtInvInertia1, prepDesc.numRows, *prepDesc.data0, *prepDesc.data1, prepDesc.mInvMassScales,
+ isExtended || prepDesc.disablePreprocessing, prepDesc.improvedSlerp);
+
+ const PxReal erp = 1.0f;
+ for (PxU32 i = 0; i<prepDesc.numRows; i++)
+ {
+ Ps::prefetchLine(constraints, 128);
+ SolverConstraint1D &s = *reinterpret_cast<SolverConstraint1D *>(constraints);
+ Px1DConstraint& c = *sorted[i];
+
+ PxReal driveScale = c.flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && prepDesc.driveLimitsAreForces ? PxMin(dt, 1.0f) : 1.0f;
+
+ PxReal unitResponse;
+ PxReal normalVel = 0.0f;
+ PxReal initVel = 0.f;
+
+ if(!isExtended)
+ {
+ init(s, c.linear0, c.linear1, PxVec3(angSqrtInvInertia0[i].x, angSqrtInvInertia0[i].y, angSqrtInvInertia0[i].z),
+ PxVec3(angSqrtInvInertia1[i].x, angSqrtInvInertia1[i].y, angSqrtInvInertia1[i].z), c.minImpulse * driveScale, c.maxImpulse * driveScale);
+ s.ang0Writeback = c.angular0;
+ PxReal resp0 = s.lin0.magnitudeSquared() * prepDesc.data0->invMass * prepDesc.mInvMassScales.linear0 + s.ang0.magnitudeSquared() * prepDesc.mInvMassScales.angular0;
+ PxReal resp1 = s.lin1.magnitudeSquared() * prepDesc.data1->invMass * prepDesc.mInvMassScales.linear1 + s.ang1.magnitudeSquared() * prepDesc.mInvMassScales.angular1;
+ unitResponse = resp0 + resp1;
+ initVel = normalVel = prepDesc.data0->projectVelocity(c.linear0, c.angular0) - prepDesc.data1->projectVelocity(c.linear1, c.angular1);
+ }
+ else
+ {
+ init(s, c.linear0, c.linear1, c.angular0, c.angular1, c.minImpulse * driveScale, c.maxImpulse * driveScale);
+ SolverConstraint1DExt& e = static_cast<SolverConstraint1DExt&>(s);
+
+ const SolverExtBody eb0(reinterpret_cast<const void*>(prepDesc.body0), prepDesc.data0, desc.linkIndexA);
+ const SolverExtBody eb1(reinterpret_cast<const void*>(prepDesc.body1), prepDesc.data1, desc.linkIndexB);
+
+ const Cm::SpatialVector resp0 = createImpulseResponseVector(e.lin0, e.ang0, eb0);
+ const Cm::SpatialVector resp1 = createImpulseResponseVector(-e.lin1, -e.ang1, eb1);
+ unitResponse = getImpulseResponse(eb0, resp0, unsimdRef(e.deltaVA), prepDesc.mInvMassScales.linear0, prepDesc.mInvMassScales.angular0,
+ eb1, resp1, unsimdRef(e.deltaVB), prepDesc.mInvMassScales.linear1, prepDesc.mInvMassScales.angular1, true);
+
+ s.ang0Writeback = c.angular0;
+ s.lin0 = resp0.linear;
+ s.ang0 = resp0.angular;
+ s.lin1 = -resp1.linear;
+ s.ang1 = -resp1.angular;
+ PxReal vel0, vel1;
+ if(needsNormalVel(c) || eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK || eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ {
+ vel0 = eb0.projectVelocity(c.linear0, c.angular0);
+ vel1 = eb1.projectVelocity(c.linear1, c.angular1);
+
+ normalVel = vel0 - vel1;
+
+ //normalVel = eb0.projectVelocity(s.lin0, s.ang0) - eb1.projectVelocity(s.lin1, s.ang1);
+ if(eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ initVel = vel0;
+ else if(eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK)
+ initVel = -vel1;
+
+ }
+ }
+
+ setSolverConstants(s.constant, s.unbiasedConstant, s.velMultiplier, s.impulseMultiplier,
+ c, normalVel, unitResponse, prepDesc.minResponseThreshold, erp, dt, invdt);
+
+ //s.targetVelocity = initVel;
+ const PxReal velBias = initVel * s.velMultiplier;
+ s.constant += velBias;
+ s.unbiasedConstant += velBias;
+
+ if(c.flags & Px1DConstraintFlag::eOUTPUT_FORCE)
+ s.flags |= DY_SC_FLAG_OUTPUT_FORCE;
+
+ constraints += stride;
+ }
+
+ //KS - Set the solve count at the end to 0
+ *(reinterpret_cast<PxU32*>(constraints)) = 0;
+ *(reinterpret_cast<PxU32*>(constraints + 4)) = 0;
+ PX_ASSERT(desc.constraint + getConstraintLength(desc) == constraints);
+ return prepDesc.numRows;
+}
+
+PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc,
+ PxSolverConstraintPrepDesc& prepDesc,
+ PxConstraintAllocator& allocator,
+ PxReal dt, PxReal invdt)
+{
+ // LL shouldn't see broken constraints
+
+ PX_ASSERT(!(reinterpret_cast<ConstraintWriteback*>(prepDesc.writeback)->broken));
+
+ setConstraintLength(*prepDesc.desc, 0);
+
+ if (!shaderDesc.solverPrep)
+ return 0;
+
+ //PxU32 numAxisConstraints = 0;
+
+ Px1DConstraint rows[MAX_CONSTRAINT_ROWS];
+
+ // This is necessary so that there will be sensible defaults and shaders will
+ // continue to work (albeit with a recompile) if the row format changes.
+ // It's a bit inefficient because it fills in all constraint rows even if there
+ // is only going to be one generated. A way around this would be for the shader to
+ // specify the maximum number of rows it needs, or it could call a subroutine to
+ // prep the row before it starts filling it it.
+
+ PxMemZero(rows, sizeof(Px1DConstraint)*MAX_CONSTRAINT_ROWS);
+
+ for (PxU32 i = 0; i<MAX_CONSTRAINT_ROWS; i++)
+ {
+ Px1DConstraint& c = rows[i];
+ //Px1DConstraintInit(c);
+ c.minImpulse = -PX_MAX_REAL;
+ c.maxImpulse = PX_MAX_REAL;
+ }
+
+ prepDesc.mInvMassScales.linear0 = prepDesc.mInvMassScales.linear1 = prepDesc.mInvMassScales.angular0 = prepDesc.mInvMassScales.angular1 = 1.f;
+
+ PxVec3 body0WorldOffset(0.f);
+ PxU32 constraintCount = (*shaderDesc.solverPrep)(rows,
+ body0WorldOffset,
+ MAX_CONSTRAINT_ROWS,
+ prepDesc.mInvMassScales,
+ shaderDesc.constantBlock,
+ prepDesc.bodyFrame0, prepDesc.bodyFrame1);
+
+ prepDesc.rows = rows;
+ prepDesc.numRows = constraintCount;
+
+ prepDesc.body0WorldOffset = body0WorldOffset;
+
+ return ConstraintHelper::setupSolverConstraint(prepDesc, allocator, dt, invdt);
+}
+
+}
+
+}