diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelDynamics/src | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/LowLevelDynamics/src')
61 files changed, 24858 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulation.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulation.cpp new file mode 100644 index 00000000..347aecb8 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulation.cpp @@ -0,0 +1,241 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "PsMathUtils.h" +#include "CmConeLimitHelper.h" +#include "DySolverConstraint1D.h" +#include "DyArticulation.h" +#include "DyArticulationHelper.h" +#include "PxsRigidBody.h" +#include "PxcConstraintBlockStream.h" +#include "DyArticulationContactPrep.h" +#include "DyDynamics.h" +#include "DyArticulationReference.h" +#include "DyArticulationPImpl.h" +#include <stdio.h> + +using namespace physx; + +// we encode articulation link handles in the lower bits of the pointer, so the +// articulation has to be aligned, which in an aligned pool means we need to size it +// appropriately + +namespace physx +{ + namespace Dy + { + void SolverCoreRegisterArticulationFns(); + + void SolverCoreRegisterArticulationFnsCoulomb(); + + +PX_COMPILE_TIME_ASSERT((sizeof(Articulation)&(DY_ARTICULATION_MAX_SIZE-1))==0); + +Articulation::Articulation(Sc::ArticulationSim* sim) +: mSolverDesc(NULL), mArticulationSim(sim) +{ + PX_ASSERT((reinterpret_cast<size_t>(this) & (DY_ARTICULATION_MAX_SIZE-1))==0); +} + +Articulation::~Articulation() +{ +} + + +/* computes the implicit impulse and the drive scale at the joint, in joint coords */ + +PxU32 Articulation::getLinkIndex(ArticulationLinkHandle handle) const +{ + return PxU32(handle&DY_ARTICULATION_IDMASK); +} + +#if DY_DEBUG_ARTICULATION + +void Articulation::computeResiduals(const Cm::SpatialVector *v, + const ArticulationJointTransforms* jointTransforms, + bool /*dump*/) const +{ + typedef ArticulationFnsScalar Fns; + + PxReal error = 0, energy = 0; + for(PxU32 i=1;i<mSolverDesc->linkCount;i++) + { + const ArticulationJointTransforms &b = jointTransforms[i]; + PxU32 parent = mSolverDesc->links[i].parent; + const ArticulationJointCore &j = *mSolverDesc->links[i].inboundJoint; + PX_UNUSED(j); + + Cm::SpatialVector residual = Fns::translateMotion(mSolverDesc->poses[i].p - b.cB2w.p, v[i]) + - Fns::translateMotion(mSolverDesc->poses[parent].p - b.cB2w.p, v[parent]); + + error += residual.linear.magnitudeSquared(); + energy += residual.angular.magnitudeSquared(); + + } +// if(dump) + printf("Energy %f, Error %f\n", energy, error); +} + + +Cm::SpatialVector Articulation::computeMomentum(const FsInertia *inertia) const +{ + typedef ArticulationFnsScalar Fns; + + Cm::SpatialVector *velocity = reinterpret_cast<Cm::SpatialVector*>(getVelocity(*mSolverDesc->fsData)); + Cm::SpatialVector m = Cm::SpatialVector::zero(); + for(PxU32 i=0;i<mSolverDesc->linkCount;i++) + m += Fns::translateForce(mSolverDesc->poses[i].p - mSolverDesc->poses[0].p, ArticulationFnsScalar::multiply(inertia[i], velocity[i])); + return m; +} + + + +void Articulation::checkLimits() const +{ + for(PxU32 i=1;i<mSolverDesc->linkCount;i++) + { + PxTransform cA2w = mSolverDesc->poses[mSolverDesc->links[i].parent].transform(mSolverDesc->links[i].inboundJoint->parentPose); + PxTransform cB2w = mSolverDesc->poses[i].transform(mSolverDesc->links[i].inboundJoint->childPose); + + PxTransform cB2cA = cA2w.transformInv(cB2w); + + // the relative quat must be the short way round for limits to work... + + if(cB2cA.q.w<0) + cB2cA.q = -cB2cA.q; + + const ArticulationJointCore& j = *mSolverDesc->links[i].inboundJoint; + + PxQuat swing, twist; + if(j.twistLimited || j.swingLimited) + Ps::separateSwingTwist(cB2cA.q, swing, twist); + + if(j.swingLimited) + { + PxReal swingLimitContactDistance = PxMin(j.swingYLimit, j.swingZLimit)/4; + + Cm::ConeLimitHelper eh(PxTan(j.swingYLimit/4), + PxTan(j.swingZLimit/4), + PxTan(swingLimitContactDistance/4)); + + PxVec3 axis; + PxReal error = 0.0f; + if(eh.getLimit(swing, axis, error)) + printf("%u, (%f, %f), %f, (%f, %f, %f), %f\n", i, j.swingYLimit, j.swingZLimit, swingLimitContactDistance, axis.x, axis.y, axis.z, error); + } + +// if(j.twistLimited) +// { +// PxReal tqTwistHigh = PxTan(j.twistLimitHigh/4), +// tqTwistLow = PxTan(j.twistLimitLow/4), +// twistPad = (tqTwistHigh - tqTwistLow)*0.25f; +// //twistPad = j.twistLimitContactDistance; +// +// PxVec3 axis = jointTransforms[i].cB2w.rotate(PxVec3(1,0,0)); +// PxReal tqPhi = Ps::tanHalf(twist.x, twist.w); +// +// if(tqPhi < tqTwistLow + twistPad) +// constraintData.pushBack(ConstraintData(-axis, -(tqTwistLow - tqPhi)*4)); +// +// if(tqPhi > tqTwistHigh - twistPad) +// constraintData.pushBack(ConstraintData(axis, (tqTwistHigh - tqPhi)*4)); +// } + } + puts(""); +} + +#endif + +void PxvRegisterArticulations() +{ + ArticulationPImpl::sComputeUnconstrainedVelocities = &ArticulationHelper::computeUnconstrainedVelocities; + ArticulationPImpl::sUpdateBodies = &ArticulationHelper::updateBodies; + ArticulationPImpl::sSaveVelocity = &ArticulationHelper::saveVelocity; + + SolverCoreRegisterArticulationFns(); + SolverCoreRegisterArticulationFnsCoulomb(); +} + +void Articulation::getDataSizes(PxU32 linkCount, PxU32 &solverDataSize, PxU32& totalSize, PxU32& scratchSize) +{ + solverDataSize = sizeof(FsData) // header + + sizeof(Cm::SpatialVectorV) * linkCount // velocity + + sizeof(Cm::SpatialVectorV) * linkCount // deferredVelocity + + sizeof(Vec3V) * linkCount // deferredSZ + + sizeof(PxReal) * ((linkCount + 15) & 0xFFFFFFF0) // The maxPenBias values + + sizeof(FsJointVectors) * linkCount // joint offsets + + sizeof(FsInertia) // featherstone root inverse inertia + + sizeof(FsRow) * linkCount; // featherstone matrix rows + + totalSize = solverDataSize + + sizeof(LtbRow) * linkCount // lagrange matrix rows + + sizeof(Cm::SpatialVectorV) * linkCount // ref velocity + + sizeof(FsRowAux) * linkCount; + + scratchSize = PxU32(sizeof(FsInertia)*linkCount*3 + + ((sizeof(ArticulationJointTransforms)+15)&~15) * linkCount + + sizeof(Mat33V) * linkCount + + ((sizeof(ArticulationJointTransforms)+15)&~15) * linkCount); +} + + +void PxvArticulationDriveCache::initialize(FsData &cache, + PxU16 linkCount, + const ArticulationLink* links, + PxReal compliance, + PxU32 iterations, + char* scratchMemory, + PxU32 scratchMemorySize) +{ + ArticulationHelper::initializeDriveCache(cache, linkCount, links, compliance, iterations, scratchMemory, scratchMemorySize); +} + +PxU32 PxvArticulationDriveCache::getLinkCount(const FsData& cache) +{ + return cache.linkCount; +} + +void PxvArticulationDriveCache::applyImpulses(const FsData& cache, + Cm::SpatialVectorV* Z, + Cm::SpatialVectorV* V) +{ + ArticulationHelper::applyImpulses(cache, Z, V); +} + +void PxvArticulationDriveCache::getImpulseResponse(const FsData& cache, + PxU32 linkID, + const Cm::SpatialVectorV& impulse, + Cm::SpatialVectorV& deltaV) +{ + ArticulationHelper::getImpulseResponse(cache, linkID, impulse, deltaV); +} + +} +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.cpp new file mode 100644 index 00000000..2adc84ea --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.cpp @@ -0,0 +1,408 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" +#include "DyArticulationContactPrep.h" +#include "DySolverConstraintDesc.h" +#include "DySolverConstraint1D.h" +#include "DyArticulationHelper.h" +#include "PxcNpWorkUnit.h" +#include "PxsMaterialManager.h" +#include "PxsMaterialCombiner.h" +#include "DyCorrelationBuffer.h" +#include "DySolverConstraintExtShared.h" + +using namespace physx::Gu; + +namespace physx +{ + +namespace Dy +{ + +// constraint-gen only, since these use getVelocity methods +// which aren't valid during the solver phase + +PX_INLINE void computeFrictionTangents(const PxVec3& vrel,const PxVec3& unitNormal, PxVec3& t0, PxVec3& t1) +{ + PX_ASSERT(PxAbs(unitNormal.magnitude()-1)<1e-3f); + + t0 = vrel - unitNormal * unitNormal.dot(vrel); + PxReal ll = t0.magnitudeSquared(); + + if (ll > 0.1f) //can set as low as 0. + { + t0 *= PxRecipSqrt(ll); + t1 = unitNormal.cross(t0); + } + else + Ps::normalToTangents(unitNormal, t0, t1); //fallback +} + +PxReal SolverExtBody::projectVelocity(const PxVec3& linear, const PxVec3& angular) const +{ + if(mLinkIndex == PxSolverConstraintDesc::NO_LINK) + { + return mBodyData->projectVelocity(linear, angular); + } + else + { + PxF32 f; + FStore(getVelocity(*mFsData)[mLinkIndex].dot(Cm::SpatialVector(linear, angular)), &f); + return f; + } +} + +PxVec3 SolverExtBody::getLinVel() const +{ + if(mLinkIndex == PxSolverConstraintDesc::NO_LINK) + return mBodyData->linearVelocity; + else + { + PxVec3 result; + V3StoreU(getVelocity(*mFsData)[mLinkIndex].linear, result); + return result; + } +} + + +PxVec3 SolverExtBody::getAngVel() const +{ + if(mLinkIndex == PxSolverConstraintDesc::NO_LINK) + return mBodyData->angularVelocity; + else + { + PxVec3 result; + V3StoreU(getVelocity(*mFsData)[mLinkIndex].angular, result); + return result; + } +} + +Cm::SpatialVector createImpulseResponseVector(const PxVec3& linear, const PxVec3& angular, const SolverExtBody& body) +{ + if(body.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + { + return Cm::SpatialVector(linear, body.mBodyData->sqrtInvInertia * angular); + } + return Cm::SpatialVector(linear, angular); +} + +PxReal getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVector& impulse0, Cm::SpatialVector& deltaV0, PxReal dom0, PxReal angDom0, + const SolverExtBody& b1, const Cm::SpatialVector& impulse1, Cm::SpatialVector& deltaV1, PxReal dom1, PxReal angDom1, + bool /*allowSelfCollision*/) +{ + PxReal response; + // allowSelfCollision = true; + // right now self-collision with contacts crashes the solver + + //KS - knocked this out to save some space on SPU + //if(allowSelfCollision && b0.mLinkIndex!=PxSolverConstraintDesc::NO_LINK && b0.mFsData == b1.mFsData) + //{ + // ArticulationHelper::getImpulseSelfResponse(*b0.mFsData,b0.mLinkIndex, impulse0, deltaV0, + // b1.mLinkIndex, impulse1, deltaV1); + // //PxReal response = impulse0.dot(deltaV0*dom0) + impulse1.dot(deltaV1*dom1); + // PX_ASSERT(PxAbs(impulse0.dot(deltaV0*dom0) + impulse1.dot(deltaV1*dom1))>0); + //} + //else + { + + if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + { + deltaV0.linear = impulse0.linear * b0.mBodyData->invMass * dom0; + deltaV0.angular = impulse0.angular * angDom0; + } + else + ArticulationHelper::getImpulseResponse(*b0.mFsData, b0.mLinkIndex, impulse0.scale(dom0, angDom0), deltaV0); + + response = impulse0.dot(deltaV0); + if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + { + deltaV1.linear = impulse1.linear * b1.mBodyData->invMass * dom1; + deltaV1.angular = impulse1.angular * angDom1; + } + else + { + ArticulationHelper::getImpulseResponse(*b1.mFsData, b1.mLinkIndex, impulse1.scale(dom1, angDom1), deltaV1); + + } + response += impulse1.dot(deltaV1); + } + + return response; +} + + + void setupFinalizeExtSolverContacts( + const ContactPoint* buffer, + const CorrelationBuffer& c, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxU8* workspace, + const SolverExtBody& b0, + const SolverExtBody& b1, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal invMassScale0, PxReal invInertiaScale0, + PxReal invMassScale1, PxReal invInertiaScale1, + const PxReal restDist, + PxU8* frictionDataPtr, + PxReal ccdMaxContactDist) +{ + // NOTE II: the friction patches are sparse (some of them have no contact patches, and + // therefore did not get written back to the cache) but the patch addresses are dense, + // corresponding to valid patches + + /*const bool haveFriction = PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0;*/ + + const FloatV ccdMaxSeparation = FLoad(ccdMaxContactDist); + + PxU8* PX_RESTRICT ptr = workspace; + + const FloatV zero=FZero(); + + //KS - TODO - this should all be done in SIMD to avoid LHS + const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex]; + const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex]; + + const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1)); + + + const PxReal d0 = invMassScale0; + const PxReal d1 = invMassScale1; + + const PxReal angD0 = invInertiaScale0; + const PxReal angD1 = invInertiaScale1; + + Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero(); + staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(d0)); + staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(d1)); + + const FloatV restDistance = FLoad(restDist); + + PxU32 frictionPatchWritebackAddrIndex = 0; + PxU32 contactWritebackCount = 0; + + Ps::prefetchLine(c.contactID); + Ps::prefetchLine(c.contactID, 128); + + const FloatV invDt = FLoad(invDtF32); + const FloatV p8 = FLoad(0.8f); + const FloatV bounceThreshold = FLoad(bounceThresholdF32); + + const FloatV invDtp8 = FMul(invDt, p8); + + PxU8 flags = 0; + + for(PxU32 i=0;i<c.frictionPatchCount;i++) + { + PxU32 contactCount = c.frictionPatchContactCounts[i]; + if(contactCount == 0) + continue; + + const FrictionPatch& frictionPatch = c.frictionPatches[i]; + PX_ASSERT(frictionPatch.anchorCount <= 2); //0==anchorCount is allowed if all the contacts in the manifold have a large offset. + + const Gu::ContactPoint* contactBase0 = buffer + c.contactPatches[c.correlationListHeads[i]].start; + const PxReal combinedRestitution = contactBase0->restitution; + + const PxReal staticFriction = contactBase0->staticFriction; + const PxReal dynamicFriction = contactBase0->dynamicFriction; + const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION); + staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction)); + staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction)); + + SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr); + ptr += sizeof(SolverContactHeader); + + + Ps::prefetchLine(ptr + 128); + Ps::prefetchLine(ptr + 256); + Ps::prefetchLine(ptr + 384); + + const bool haveFriction = (disableStrongFriction == 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0; + header->numNormalConstr = Ps::to8(contactCount); + header->numFrictionConstr = Ps::to8(haveFriction ? frictionPatch.anchorCount*2 : 0); + + header->type = Ps::to8(DY_SC_TYPE_EXT_CONTACT); + + header->flags = flags; + + const FloatV restitution = FLoad(combinedRestitution); + + header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W; + + header->angDom0 = angD0; + header->angDom1 = angD1; + + const PxU32 pointStride = sizeof(SolverContactPointExt); + const PxU32 frictionStride = sizeof(SolverContactFrictionExt); + + const Vec3V normal = V3LoadU(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal); + + header->normal = normal; + + for(PxU32 patch=c.correlationListHeads[i]; + patch!=CorrelationBuffer::LIST_END; + patch = c.contactPatches[patch].next) + { + const PxU32 count = c.contactPatches[patch].count; + const Gu::ContactPoint* contactBase = buffer + c.contactPatches[patch].start; + + PxU8* p = ptr; + for(PxU32 j=0;j<count;j++) + { + const Gu::ContactPoint& contact = contactBase[j]; + + SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p); + p += pointStride; + + setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0, bodyFrame1, normal, invDt, invDtp8, restDistance, maxPenBias, restitution, + bounceThreshold, contact, *solverContact, ccdMaxSeparation); + + } + + ptr = p; + } + contactWritebackCount += contactCount; + + PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr); + PxMemZero(forceBuffer, sizeof(PxF32) * contactCount); + ptr += sizeof(PxF32) * ((contactCount + 3) & (~3)); + + header->broken = 0; + + if(haveFriction) + { + //const Vec3V normal = Vec3V_From_PxVec3(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal); + PxVec3 normalS = buffer[c.contactPatches[c.correlationListHeads[i]].start].normal; + + PxVec3 t0, t1; + computeFrictionTangents(b0.getLinVel() - b1.getLinVel(), normalS, t0, t1); + + Vec3V vT0 = V3LoadU(t0); + Vec3V vT1 = V3LoadU(t1); + + //We want to set the writeBack ptr to point to the broken flag of the friction patch. + //On spu we have a slight problem here because the friction patch array is + //in local store rather than in main memory. The good news is that the address of the friction + //patch array in main memory is stored in the work unit. These two addresses will be equal + //except on spu where one is local store memory and the other is the effective address in main memory. + //Using the value stored in the work unit guarantees that the main memory address is used on all platforms. + PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch); + + header->frictionBrokenWritebackByte = writeback; + + for(PxU32 j = 0; j < frictionPatch.anchorCount; j++) + { + SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(ptr); + ptr += frictionStride; + SolverContactFrictionExt* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionExt*>(ptr); + ptr += frictionStride; + + PxVec3 ra = bodyFrame0.q.rotate(frictionPatch.body0Anchors[j]); + PxVec3 rb = bodyFrame1.q.rotate(frictionPatch.body1Anchors[j]); + PxVec3 error = (ra + bodyFrame0.p) - (rb + bodyFrame1.p); + + { + const PxVec3 raXn = ra.cross(t0); + const PxVec3 rbXn = rb.cross(t0); + + Cm::SpatialVector deltaV0, deltaV1; + + const Cm::SpatialVector resp0 = createImpulseResponseVector(t0, raXn, b0); + const Cm::SpatialVector resp1 = createImpulseResponseVector(-t1, -rbXn, b1); + FloatV resp = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0, + b1, resp1, deltaV1, d1, angD1)); + + const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FMul(p8, FRecip(resp)), zero); + + PxU32 index = c.contactPatches[c.correlationListHeads[i]].start; + PxF32 targetVel = buffer[index].targetVel.dot(t0); + + if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + targetVel -= b0.projectVelocity(t0, raXn); + else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + targetVel += b1.projectVelocity(t0, rbXn); + + f0->normalXYZ_appliedForceW = V4SetW(vT0, zero); + f0->raXnXYZ_velMultiplierW = V4SetW(V4LoadA(&resp0.angular.x), velMultiplier); + f0->rbXnXYZ_biasW = V4SetW(V4Neg(V4LoadA(&resp1.angular.x)), FLoad(t0.dot(error) * invDtF32)); + f0->linDeltaVA = V3LoadA(deltaV0.linear); + f0->angDeltaVA = V3LoadA(deltaV0.angular); + f0->linDeltaVB = V3LoadA(deltaV1.linear); + f0->angDeltaVB = V3LoadA(deltaV1.angular); + f0->targetVel = targetVel; + } + + { + + const PxVec3 raXn = ra.cross(t1); + const PxVec3 rbXn = rb.cross(t1); + + Cm::SpatialVector deltaV0, deltaV1; + + + const Cm::SpatialVector resp0 = createImpulseResponseVector(t1, raXn, b0); + const Cm::SpatialVector resp1 = createImpulseResponseVector(-t1, -rbXn, b1); + + FloatV resp = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0, + b1, resp1, deltaV1, d1, angD1)); + + const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FMul(p8, FRecip(resp)), zero); + + PxU32 index = c.contactPatches[c.correlationListHeads[i]].start; + PxF32 targetVel = buffer[index].targetVel.dot(t0); + + if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + targetVel -= b0.projectVelocity(t1, raXn); + else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + targetVel += b1.projectVelocity(t1, rbXn); + + f1->normalXYZ_appliedForceW = V4SetW(vT1, zero); + f1->raXnXYZ_velMultiplierW = V4SetW(V4LoadA(&resp0.angular.x), velMultiplier); + f1->rbXnXYZ_biasW = V4SetW(V4Neg(V4LoadA(&resp1.angular.x)), FLoad(t1.dot(error) * invDtF32)); + f1->linDeltaVA = V3LoadA(deltaV0.linear); + f1->angDeltaVA = V3LoadA(deltaV0.angular); + f1->linDeltaVB = V3LoadA(deltaV1.linear); + f1->angDeltaVB = V3LoadA(deltaV1.angular); + f1->targetVel = targetVel; + } + } + } + + frictionPatchWritebackAddrIndex++; + } +} + +} + + +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.h new file mode 100644 index 00000000..4e927b10 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrep.h @@ -0,0 +1,95 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVERCONSTRAINTEXT_H +#define DY_SOLVERCONSTRAINTEXT_H + +#include "DySolverExt.h" + +namespace physx +{ + +struct PxcNpWorkUnit; + + +namespace Gu +{ + class ContactBuffer; + struct ContactPoint; +} + +namespace Dy +{ + +struct CorrelationBuffer; + +PxReal getImpulseResponse(const SolverExtBody& b0, const Cm::SpatialVector& impulse0, Cm::SpatialVector& deltaV0, PxReal dom0, PxReal angDom0, + const SolverExtBody& b1, const Cm::SpatialVector& impulse1, Cm::SpatialVector& deltaV1, PxReal dom1, PxReal angDom1, + bool allowSelfCollision = false); + +Cm::SpatialVector createImpulseResponseVector(const PxVec3& linear, const PxVec3& angular, const SolverExtBody& body); + +void setupFinalizeExtSolverContacts( + const Gu::ContactPoint* buffer, + const CorrelationBuffer& c, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxU8* workspace, + const SolverExtBody& b0, + const SolverExtBody& b1, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal invMassScale0, PxReal invInertiaScale0, + PxReal invMassScale1, PxReal invInertiaScale1, + PxReal restDistance, PxU8* frictionDataPtr, + PxReal ccdMaxContactDist); + + +bool setupFinalizeExtSolverContactsCoulomb( + const Gu::ContactBuffer& buffer, + const CorrelationBuffer& c, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxU8* workspace, + PxReal invDt, + PxReal bounceThreshold, + const SolverExtBody& b0, + const SolverExtBody& b1, + PxU32 frictionCountPerPoint, + PxReal invMassScale0, PxReal invInertiaScale0, + PxReal invMassScale1, PxReal invInertiaScale1, + PxReal restDist, + PxReal ccdMaxContactDist); + +} + +} + +#endif //DY_SOLVERCONSTRAINTEXT_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrepPF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrepPF.cpp new file mode 100644 index 00000000..8c954b71 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationContactPrepPF.cpp @@ -0,0 +1,305 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" +#include "DyArticulationContactPrep.h" +#include "DySolverConstraintDesc.h" +#include "DySolverConstraint1D.h" +#include "DySolverContact.h" +#include "DySolverContactPF.h" +#include "DyArticulationHelper.h" +#include "PxcNpWorkUnit.h" +#include "PxsMaterialManager.h" +#include "PxsMaterialCombiner.h" +#include "DyCorrelationBuffer.h" +#include "DySolverConstraintExtShared.h" + +using namespace physx; +using namespace Gu; + +// constraint-gen only, since these use getVelocityFast methods +// which aren't valid during the solver phase + +namespace physx +{ + +namespace Dy +{ + + +bool setupFinalizeExtSolverContactsCoulomb( + const ContactBuffer& buffer, + const CorrelationBuffer& c, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxU8* workspace, + PxReal invDt, + PxReal bounceThresholdF32, + const SolverExtBody& b0, + const SolverExtBody& b1, + PxU32 frictionCountPerPoint, + PxReal invMassScale0, PxReal invInertiaScale0, + PxReal invMassScale1, PxReal invInertiaScale1, + PxReal restDist, + PxReal ccdMaxDistance) +{ + // NOTE II: the friction patches are sparse (some of them have no contact patches, and + // therefore did not get written back to the cache) but the patch addresses are dense, + // corresponding to valid patches + + const FloatV ccdMaxSeparation = FLoad(ccdMaxDistance); + + PxU8* PX_RESTRICT ptr = workspace; + + //KS - TODO - this should all be done in SIMD to avoid LHS + const PxF32 maxPenBias0 = b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b0.mBodyData->penBiasClamp : getMaxPenBias(*b0.mFsData)[b0.mLinkIndex]; + const PxF32 maxPenBias1 = b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK ? b1.mBodyData->penBiasClamp : getMaxPenBias(*b1.mFsData)[b1.mLinkIndex]; + + const FloatV maxPenBias = FLoad(PxMax(maxPenBias0, maxPenBias1)/invDt); + + const FloatV restDistance = FLoad(restDist); + const FloatV bounceThreshold = FLoad(bounceThresholdF32); + + const FloatV invDtV = FLoad(invDt); + const FloatV pt8 = FLoad(0.8f); + + const FloatV invDtp8 = FMul(invDtV, pt8); + + Ps::prefetchLine(c.contactID); + Ps::prefetchLine(c.contactID, 128); + + const PxU32 frictionPatchCount = c.frictionPatchCount; + + const PxU32 pointStride = sizeof(SolverContactPointExt); + const PxU32 frictionStride = sizeof(SolverContactFrictionExt); + const PxU8 pointHeaderType = DY_SC_TYPE_EXT_CONTACT; + const PxU8 frictionHeaderType = DY_SC_TYPE_EXT_FRICTION; + + PxReal d0 = invMassScale0; + PxReal d1 = invMassScale1; + PxReal angD0 = invInertiaScale0; + PxReal angD1 = invInertiaScale1; + + PxU8 flags = 0; + + for(PxU32 i=0;i< frictionPatchCount;i++) + { + const PxU32 contactCount = c.frictionPatchContactCounts[i]; + if(contactCount == 0) + continue; + + const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start; + + const Vec3V normalV = Ps::aos::V3LoadA(contactBase0->normal); + const Vec3V normal = V3LoadA(contactBase0->normal); + + const PxReal combinedRestitution = contactBase0->restitution; + + + SolverContactCoulombHeader* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader*>(ptr); + ptr += sizeof(SolverContactCoulombHeader); + + Ps::prefetchLine(ptr, 128); + Ps::prefetchLine(ptr, 256); + Ps::prefetchLine(ptr, 384); + + const FloatV restitution = FLoad(combinedRestitution); + + + header->numNormalConstr = PxU8(contactCount); + header->type = pointHeaderType; + //header->setRestitution(combinedRestitution); + + header->setDominance0(d0); + header->setDominance1(d1); + header->angDom0 = angD0; + header->angDom1 = angD1; + header->flags = flags; + + header->setNormal(normalV); + + for(PxU32 patch=c.correlationListHeads[i]; + patch!=CorrelationBuffer::LIST_END; + patch = c.contactPatches[patch].next) + { + const PxU32 count = c.contactPatches[patch].count; + const Gu::ContactPoint* contactBase = buffer.contacts + c.contactPatches[patch].start; + + PxU8* p = ptr; + for(PxU32 j=0;j<count;j++) + { + const Gu::ContactPoint& contact = contactBase[j]; + + SolverContactPointExt* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPointExt*>(p); + p += pointStride; + + setupExtSolverContact(b0, b1, d0, d1, angD0, angD1, bodyFrame0, bodyFrame1, normal, invDtV, invDtp8, restDistance, maxPenBias, restitution, + bounceThreshold, contact, *solverContact, ccdMaxSeparation); + } + ptr = p; + } + } + + //construct all the frictions + + PxU8* PX_RESTRICT ptr2 = workspace; + + const PxF32 orthoThreshold = 0.70710678f; + const PxF32 eps = 0.00001f; + bool hasFriction = false; + + for(PxU32 i=0;i< frictionPatchCount;i++) + { + const PxU32 contactCount = c.frictionPatchContactCounts[i]; + if(contactCount == 0) + continue; + + SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(ptr2); + header->frictionOffset = PxU16(ptr - ptr2); + ptr2 += sizeof(SolverContactCoulombHeader) + header->numNormalConstr * pointStride; + + const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start; + + PxVec3 normal = contactBase0->normal; + + const PxReal staticFriction = contactBase0->staticFriction; + const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION); + const bool haveFriction = (disableStrongFriction == 0); + + SolverFrictionHeader* frictionHeader = reinterpret_cast<SolverFrictionHeader*>(ptr); + frictionHeader->numNormalConstr = Ps::to8(c.frictionPatchContactCounts[i]); + frictionHeader->numFrictionConstr = Ps::to8(haveFriction ? c.frictionPatchContactCounts[i] * frictionCountPerPoint : 0); + frictionHeader->flags = flags; + ptr += sizeof(SolverFrictionHeader); + PxF32* forceBuffer = reinterpret_cast<PxF32*>(ptr); + ptr += frictionHeader->getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]); + PxMemZero(forceBuffer, sizeof(PxF32) * c.frictionPatchContactCounts[i]); + Ps::prefetchLine(ptr, 128); + Ps::prefetchLine(ptr, 256); + Ps::prefetchLine(ptr, 384); + + + const PxVec3 t0Fallback1(0.f, -normal.z, normal.y); + const PxVec3 t0Fallback2(-normal.y, normal.x, 0.f) ; + const PxVec3 tFallback1 = orthoThreshold > PxAbs(normal.x) ? t0Fallback1 : t0Fallback2; + const PxVec3 vrel = b0.getLinVel() - b1.getLinVel(); + const PxVec3 t0_ = vrel - normal * (normal.dot(vrel)); + const PxReal sqDist = t0_.dot(t0_); + const PxVec3 tDir0 = (sqDist > eps ? t0_: tFallback1).getNormalized(); + const PxVec3 tDir1 = tDir0.cross(normal); + PxVec3 tFallback[2] = {tDir0, tDir1}; + + PxU32 ind = 0; + + if(haveFriction) + { + hasFriction = true; + frictionHeader->setStaticFriction(staticFriction); + frictionHeader->invMass0D0 = d0; + frictionHeader->invMass1D1 = d1; + frictionHeader->angDom0 = angD0; + frictionHeader->angDom1 = angD1; + frictionHeader->type = frictionHeaderType; + + PxU32 totalPatchContactCount = 0; + + for(PxU32 patch=c.correlationListHeads[i]; + patch!=CorrelationBuffer::LIST_END; + patch = c.contactPatches[patch].next) + { + const PxU32 count = c.contactPatches[patch].count; + const PxU32 start = c.contactPatches[patch].start; + const Gu::ContactPoint* contactBase = buffer.contacts + start; + + PxU8* p = ptr; + + for(PxU32 j =0; j < count; j++) + { + const Gu::ContactPoint& contact = contactBase[j]; + const PxVec3 ra = contact.point - bodyFrame0.p; + const PxVec3 rb = contact.point - bodyFrame1.p; + + const PxVec3 targetVel = contact.targetVel; + const PxVec3 pVRa = b0.getLinVel() + b0.getAngVel().cross(ra); + const PxVec3 pVRb = b1.getLinVel() + b1.getAngVel().cross(rb); + //const PxVec3 vrel = pVRa - pVRb; + + for(PxU32 k = 0; k < frictionCountPerPoint; ++k) + { + SolverContactFrictionExt* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionExt*>(p); + p += frictionStride; + + PxVec3 t0 = tFallback[ind]; + ind = 1 - ind; + PxVec3 raXn = ra.cross(t0); + PxVec3 rbXn = rb.cross(t0); + Cm::SpatialVector deltaV0, deltaV1; + + const Cm::SpatialVector resp0 = createImpulseResponseVector(t0, raXn, b0); + const Cm::SpatialVector resp1 = createImpulseResponseVector(-t0, -rbXn, b1); + + PxReal unitResponse = getImpulseResponse(b0, resp0, deltaV0, d0, angD0, + b1, resp1, deltaV1, d1, angD1); + + PxReal tv = targetVel.dot(t0); + if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + tv += pVRa.dot(t0); + else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + tv -= pVRb.dot(t0); + + + f0->setVelMultiplier(FLoad(unitResponse>0.0f ? 1.f/unitResponse : 0.0f)); + f0->setRaXn(resp0.angular); + f0->setRbXn(-resp1.angular); + f0->targetVel = tv; + f0->setNormal(t0); + f0->setAppliedForce(0.0f); + f0->linDeltaVA = V3LoadA(deltaV0.linear); + f0->angDeltaVA = V3LoadA(deltaV0.angular); + f0->linDeltaVB = V3LoadA(deltaV1.linear); + f0->angDeltaVB = V3LoadA(deltaV1.angular); + } + } + + totalPatchContactCount += c.contactPatches[patch].count; + + ptr = p; + } + } + } + //PX_ASSERT(ptr - workspace == n.solverConstraintSize); + return hasFriction; +} + + +} + +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsDebug.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsDebug.h new file mode 100644 index 00000000..901eef93 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsDebug.h @@ -0,0 +1,262 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_ARTICULATION_DEBUG_FNS_H +#define DY_ARTICULATION_DEBUG_FNS_H + +#include "DyArticulationFnsScalar.h" +#include "DyArticulationFnsSimd.h" + +namespace physx +{ +namespace Dy +{ +#if 0 + void printMomentum(const char* id, PxTransform* pose, Cm::SpatialVector* velocity, FsInertia* inertia, PxU32 linkCount) + { + typedef ArticulationFnsScalar Fns; + + Cm::SpatialVector m = Cm::SpatialVector::zero(); + for(PxU32 i=0;i<linkCount;i++) + m += Fns::translateForce(pose[i].p - pose[0].p, Fns::multiply(inertia[i], velocity[i])); + printf("momentum (%20s): (%f, %f, %f), (%f, %f, %f)\n", id, m.linear.x, m.linear.y, m.linear.z, m.angular.x, m.angular.y, m.angular.z); + } +#endif + +class ArticulationFnsDebug +{ + typedef ArticulationFnsSimdBase SimdBase; + typedef ArticulationFnsSimd<ArticulationFnsDebug> Simd; + typedef ArticulationFnsScalar Scalar; + +public: + + static PX_FORCE_INLINE FsInertia addInertia(const FsInertia& in1, const FsInertia& in2) + { + return FsInertia(M33Add(in1.ll, in2.ll), + M33Add(in1.la, in2.la), + M33Add(in1.aa, in2.aa)); + } + + static PX_FORCE_INLINE FsInertia subtractInertia(const FsInertia& in1, const FsInertia& in2) + { + return FsInertia(M33Sub(in1.ll, in2.ll), + M33Sub(in1.la, in2.la), + M33Sub(in1.aa, in2.aa)); + } + + static Mat33V invertSym33(const Mat33V &m) + { + PxMat33 n_ = Scalar::invertSym33(unsimdify(m)); + Mat33V n = SimdBase::invertSym33(m); + compare33(n_, unsimdify(n)); + + return n; + } + + static Mat33V invSqrt(const Mat33V &m) + { + PxMat33 n_ = Scalar::invSqrt(unsimdify(m)); + Mat33V n = SimdBase::invSqrt(m); + compare33(n_, unsimdify(n)); + + return n; + } + + + + static FsInertia invertInertia(const FsInertia &I) + { + SpInertia J_ = Scalar::invertInertia(unsimdify(I)); + FsInertia J = SimdBase::invertInertia(I); + compareInertias(J_,unsimdify(J)); + + return J; + } + + static Mat33V computeSIS(const FsInertia &I, const Cm::SpatialVectorV S[3], Cm::SpatialVectorV*PX_RESTRICT IS) + { + Cm::SpatialVector IS_[3]; + Scalar::multiply(IS_, unsimdify(I), unsimdify(&S[0])); + PxMat33 D_ = Scalar::multiplySym(IS_, unsimdify(&S[0])); + + Mat33V D = SimdBase::computeSIS(I, S, IS); + + compare33(unsimdify(D), D_); + + return D; + } + + + static FsInertia multiplySubtract(const FsInertia &I, const Mat33V &D, const Cm::SpatialVectorV IS[3], Cm::SpatialVectorV*PX_RESTRICT DSI) + { + Cm::SpatialVector DSI_[3]; + + Scalar::multiply(DSI_, unsimdify(IS), unsimdify(D)); + SpInertia J_ = Scalar::multiplySubtract(unsimdify(I), DSI_, unsimdify(IS)); + + FsInertia J = SimdBase::multiplySubtract(I, D, IS, DSI); + + compareInertias(unsimdify(J), J_); + + return J; + } + + + static FsInertia multiplySubtract(const FsInertia &I, const Cm::SpatialVectorV S[3]) + { + SpInertia J_ = Scalar::multiplySubtract(unsimdify(I), unsimdify(S), unsimdify(S)); + FsInertia J = SimdBase::multiplySubtract(I, S); + compareInertias(unsimdify(J), J_); + return J; + } + + + static FsInertia translateInertia(Vec3V offset, const FsInertia &I) + { + PxVec3 offset_; + V3StoreU(offset, offset_); + SpInertia J_ = Scalar::translate(offset_, unsimdify(I)); + FsInertia J = SimdBase::translateInertia(offset, I); + compareInertias(J_, unsimdify(J)); + + return J; + } + + + static PX_FORCE_INLINE FsInertia propagate(const FsInertia &I, + const Cm::SpatialVectorV S[3], + const Mat33V &load, + const FloatV isf) + { + SpInertia J_ = Scalar::propagate(unsimdify(I), unsimdify(&S[0]), unsimdify(load), unsimdify(isf)); + FsInertia J = Simd::propagate(I, S, load, isf); + + compareInertias(J_, unsimdify(J)); + return J; + } + + + static PX_FORCE_INLINE Mat33V computeDriveInertia(const FsInertia &I0, + const FsInertia &I1, + const Cm::SpatialVectorV S[3]) + { + PxMat33 m_ = Scalar::computeDriveInertia(unsimdify(I0), unsimdify(I1), unsimdify(&S[0])); + Mat33V m = Simd::computeDriveInertia(I0, I1, S); + + compare33(m_, unsimdify(m)); + return m; + } + + static const PxMat33 unsimdify(const Mat33V &m) + { + PX_ALIGN(16, PxMat33) m_; + PxMat33_From_Mat33V(m, m_); + return m_; + } + + static PxReal unsimdify(const FloatV &m) + { + PxF32 f; + FStore(m, &f); + return f; + } + + static SpInertia unsimdify(const FsInertia &I) + { + return SpInertia (unsimdify(I.ll), + unsimdify(I.la), + unsimdify(I.aa)); + } + + static const Cm::SpatialVector* unsimdify(const Cm::SpatialVectorV *S) + { + return reinterpret_cast<const Cm::SpatialVector*>(S); + } + + +private: + + static PxReal absmax(const PxVec3& n) + { + return PxMax(PxAbs(n.x), PxMax(PxAbs(n.y),PxAbs(n.z))); + } + + static PxReal norm(const PxMat33& n) + { + return PxMax(absmax(n.column0), PxMax(absmax(n.column1), absmax(n.column2))); + } + + static void compare33(const PxMat33& ref, const PxMat33& n) + { + PxReal errNorm = norm(ref-n); + PX_UNUSED(errNorm); + PX_ASSERT(errNorm <= PxMax(norm(ref)*1e-3f, 1e-4f)); + } + + static void compareInertias(const SpInertia& a, const SpInertia& b) + { + compare33(a.mLL, b.mLL); + compare33(a.mLA, b.mLA); + compare33(a.mAA, b.mAA); + } + + +}; + +#if DY_ARTICULATION_DEBUG_VERIFY +static bool isPositiveDefinite(const Mat33V& m) +{ + PX_ALIGN_PREFIX(16) PxMat33 m1 PX_ALIGN_SUFFIX(16); + PxMat33_From_Mat33V(m, m1); + return isPositiveDefinite(m1); +} + + +static bool isPositiveDefinite(const FsInertia& s) +{ + return isPositiveDefinite(ArticulationFnsDebug::unsimdify(s)); +} + +static PxReal magnitude(const Cm::SpatialVectorV &v) +{ + return PxSqrt(FStore(V3Dot(v.linear, v.linear)) + FStore(V3Dot(v.angular, v.angular))); +} + +static bool almostEqual(const Cm::SpatialVectorV &ref, const Cm::SpatialVectorV& test, PxReal tolerance) +{ + return magnitude(ref-test)<=tolerance*magnitude(ref); +} +#endif +} +} + +#endif //DY_ARTICULATION_DEBUG_FNS_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsScalar.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsScalar.h new file mode 100644 index 00000000..1efb2708 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsScalar.h @@ -0,0 +1,397 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_ARTICULATION_SCALAR_FNS_H +#define DY_ARTICULATION_SCALAR_FNS_H + +// Scalar helpers for articulations + +#include "DyArticulationUtils.h" +#include "DyArticulationScalar.h" +#include "DySpatial.h" + +namespace physx +{ + +namespace Dy +{ + +/* +namespace +{ + static void print(const PxMat33 &m) + { + printf("(%f, %f, %f)\n(%f, %f, %f)\n(%f, %f, %f)\n\n", + m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2]); + } + + static void print(const Cm::SpatialVector *v, PxU32 count) + { + for(PxU32 i=0;i<count;i++) + { + printf("(%f, %f, %f), (%f, %f, %f)\n", + v[i].linear.x, v[i].linear.y, v[i].linear.z, + v[i].angular.x, v[i].angular.y, v[i].angular.z); + } + } +} +*/ + +class ArticulationDiagnostics +{ +public: +static bool cholesky(const PxMat33& in, PxMat33& out) +{ + out = in; + + if(out[0][0]<=0) + return false; + + out[0] /= PxSqrt(out[0][0]); + out[1] -= out[0][1]*out[0]; + out[2] -= out[0][2]*out[0]; + + if(out[1][1]<=0) + return false; + + out[1] /= PxSqrt(out[1][1]); + + out[2] -= out[1][2]*out[1]; + if(out[2][2]<=0) + return false; + out[2] /= PxSqrt(out[2][2]); + + out[1][0] = out[2][0] = out[2][1] = 0; + return true; +} + +static bool isSymmetric(const PxMat33&a) +{ + return a[0][1] == a[1][0] && a[0][2] == a[2][0] && a[1][2] == a[2][1]; +} + +static bool isSymmetric(const Mat33V&a) +{ + PxMat33 m; + PxMat33_From_Mat33V(a,m); + return isSymmetric(m); +} + +static bool isSymmetric(const SpInertia&a) +{ + return isSymmetric(a.mLL) && isSymmetric(a.mAA); +} + + +static bool isPositiveDefinite(const PxMat33& m) +{ + PxMat33 _; + return cholesky(m, _); +} + + +static bool isPositiveDefinite(const SpInertia &s) +{ + // compute + // (a 0) + // (b c) + + PxMat33 a; + if(!cholesky(s.mLL, a)) + return false; + + PxMat33 bt = a.getInverse() * s.mLA; + PxMat33 x = s.mAA - bt.getTranspose()*bt; + PxMat33 c; + return cholesky(x, c); +} + +}; + +class ArticulationFnsScalar +{ +public: + + static PX_FORCE_INLINE Cm::SpatialVector translateMotion(const PxVec3& p, const Cm::SpatialVector& v) + { + return Cm::SpatialVector(v.linear + p.cross(v.angular), v.angular); + } + + // translate a force resolved at position p to the origin + + static PX_FORCE_INLINE Cm::SpatialVector translateForce(const PxVec3& p, const Cm::SpatialVector& v) + { + return Cm::SpatialVector(v.linear, v.angular + p.cross(v.linear)); + } + + static PX_FORCE_INLINE PxMat33 invertSym33(const PxMat33& in) + { + PxVec3 v0 = in[1].cross(in[2]), + v1 = in[2].cross(in[0]), + v2 = in[0].cross(in[1]); + + PxReal det = v0.dot(in[0]); + + + PX_ASSERT(det!=0); + PxReal recipDet = 1.0f/det; + + return PxMat33(v0 * recipDet, + PxVec3(v0.y, v1.y, v1.z) * recipDet, + PxVec3(v0.z, v1.z, v2.z) * recipDet); + } + + static PX_FORCE_INLINE SpInertia multiplySubtract(const SpInertia& I, const Cm::SpatialVector in0[3], const Cm::SpatialVector in1[3]) + { + return I - SpInertia::dyad(in0[0], in1[0]) + - SpInertia::dyad(in0[1], in1[1]) + - SpInertia::dyad(in0[2], in1[2]); + } + + static PX_FORCE_INLINE PxMat33 multiplySym(const Cm::SpatialVector* IS, const Cm::SpatialVector* S) + { + // return PxMat33(axisDot(IS, S[0]), axisDot(IS, S[1]), axisDot(IS, S[2])); + + PxReal a00 = IS[0].dot(S[0]), a01 = IS[0].dot(S[1]), a02 = IS[0].dot(S[2]), + a11 = IS[1].dot(S[1]), a12 = IS[1].dot(S[2]), + a22 = IS[2].dot(S[2]); + + return PxMat33(PxVec3(a00, a01, a02), + PxVec3(a01, a11, a12), + PxVec3(a02, a12, a22)); + } + + static PX_FORCE_INLINE void multiply(Cm::SpatialVector out[3], const SpInertia& I, const Cm::SpatialVector in[3]) + { + out[0] = I * in[0]; + out[1] = I * in[1]; + out[2] = I * in[2]; + } + + static PX_FORCE_INLINE void multiply(Cm::SpatialVector out[3], const Cm::SpatialVector in[3], const PxMat33& D) + { + out[0] = axisMultiply(in, D[0]); + out[1] = axisMultiply(in, D[1]); + out[2] = axisMultiply(in, D[2]); + } + + static PxMat33 invSqrt(const PxMat33 &m) + { + // cholesky factor to + // (a 0 0) + // (b c 0) + // (d e f) + // except that a,c,f are the reciprocal sqrts rather than sqrts + + PxVec3 v0 = m.column0, v1 = m.column1, v2 = m.column2; + + PxReal a = PxRecipSqrt(v0.x); + PxReal b = v0.y*a; + PxReal c = PxRecipSqrt(v1.y - b*b); + PxReal d = v0.z*a; + PxReal e = (v1.z-d*b) * c; + PxReal f = PxRecipSqrt(v2.z - d*d - e*e); + + // invert + PxReal x = -b*a*c, y = (-e*x-d*a)*f, z = -e*c*f; + + PxMat33 r(PxVec3(a, 0, 0 ), + PxVec3(x, c, 0 ), + PxVec3(y, z, f)); + + return r; + } + + + static PX_FORCE_INLINE PxMat33 computeSIS(const Cm::SpatialVector S[3], const SpInertia& I) + { + Cm::SpatialVector IS[3]; + multiply(IS, I, S); + return multiplySym(IS, S); + } + + // translate from COM-centered world-aligned inertia matrix to a displaced frame + static PX_INLINE SpInertia translate(const PxVec3& p, const SpInertia& i) + { + PxMat33 S = Ps::star(p), ST = S.getTranspose(); + PxMat33 sla = S * i.mLA, llst = i.mLL * ST; +// return SpInertia(i.mLL, i.mLA + llst, i.mAA + sla + sla.getTranspose() + S * llst); + + // this yields a symmetric result + PxMat33 t = sla+S*llst*0.5f; + return SpInertia(i.mLL, i.mLA + llst, i.mAA + (t+t.getTranspose())); } + + static PX_FORCE_INLINE Cm::SpatialVector axisMultiply(const Cm::SpatialVector* a, const PxVec3& v) + { + return a[0]*v[0]+a[1]*v[1]+a[2]*v[2]; + } + + static PX_FORCE_INLINE PxVec3 axisDot(const Cm::SpatialVector* a, const Cm::SpatialVector& v) + { + return PxVec3(a[0].dot(v), a[1].dot(v), a[2].dot(v)); + } + + static PX_FORCE_INLINE SpInertia invertInertia(const SpInertia& I) + { + PxMat33 aa = I.mAA, ll = I.mLL, la = I.mLA; + + aa = (aa + aa.getTranspose())*0.5f; + ll = (ll + ll.getTranspose())*0.5f; + + PxMat33 AAInv = invertSym33(aa); + + PxMat33 z = -la * AAInv; + PxMat33 S = ll + z * la.getTranspose(); // Schur complement of mAA + + PxMat33 LL = invertSym33(S); + + PxMat33 LA = LL * z; + PxMat33 AA = AAInv + z.getTranspose() * LA; + + SpInertia result(LL, LA, AA); + + return result; + } + + static SpInertia propagate(const SpInertia& I, + const Cm::SpatialVector S[3], + const PxMat33& load, + PxReal isf) + { + Cm::SpatialVector IS[3], ISD[3]; + multiply(IS, I, S); + + PxMat33 SIS = multiplySym(S, IS); + + // yields a symmetric result + PxMat33 D = invSqrt(SIS+load*isf); + multiply(ISD, IS, D); + return multiplySubtract(I, ISD, ISD); + } + + static PxMat33 computeDriveInertia(const SpInertia& I0, + const SpInertia& I1, + const Cm::SpatialVector S[3]) + { + // this could be a lot more efficient, especially since it can be combined with + // the inertia accumulation. Also it turns out to be symmetric in I0 and I1, which + // isn't obvious from the formulation, so it's likely there's a more efficient formulation + + PxMat33 D = invertSym33(computeSIS(S,I0)); + Cm::SpatialVector IS[3], ISD[3]; + + multiply(IS,I0,S); + multiply(ISD, IS, D); + + SpInertia tot = multiplySubtract(I0+I1,ISD,IS); + SpInertia invTot = invertInertia(tot); + + PxMat33 E = computeSIS(ISD,invTot); + + PxMat33 load = invertSym33(E+D); + + PX_ASSERT(load[0].isFinite() && load[1].isFinite() && load[2].isFinite()); + PX_ASSERT(ArticulationDiagnostics::isSymmetric(load) && ArticulationDiagnostics::isPositiveDefinite(load)); + return load; + } + + static PX_INLINE Cm::SpatialVector propagateImpulse(const FsRow& row, + const FsJointVectors& jv, + PxVec3& SZ, + const Cm::SpatialVector& Z, + const FsRowAux& aux) + { + PX_UNUSED(aux); + SZ = Z.angular + Z.linear.cross(getJointOffset(jv)); + Cm::SpatialVector result = translateForce(getParentOffset(jv), Z - axisMultiply(getDSI(row), SZ)); + +#if DY_ARTICULATION_DEBUG_VERIFY + PxVec3 SZcheck; + Cm::SpatialVector check = ArticulationRef::propagateImpulse(row, jv, SZcheck, Z, aux); + PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f)); + PX_ASSERT((SZ-SZcheck).magnitude()<1e-5*PxMax(SZcheck.magnitude(), 1.0f)); +#endif + return result; + } + + static PX_INLINE Cm::SpatialVector propagateVelocity(const FsRow& row, + const FsJointVectors& jv, + const PxVec3& SZ, + const Cm::SpatialVector& v, + const FsRowAux& aux) + { + PX_UNUSED(aux); + + Cm::SpatialVector w = translateMotion(-getParentOffset(jv), v); + PxVec3 DSZ = multiply(row.D, SZ); + + PxVec3 n = axisDot(getDSI(row), w) + DSZ; + Cm::SpatialVector result = w - Cm::SpatialVector(getJointOffset(jv).cross(n),n); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, SZ, v, aux); + PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f)); +#endif + return result; + } + + + static PX_FORCE_INLINE PxVec3 multiply(const Mat33V& m, const PxVec3& v) + { + return reinterpret_cast<const PxVec3&>(m.col0) * v.x + + reinterpret_cast<const PxVec3&>(m.col1) * v.y + + reinterpret_cast<const PxVec3&>(m.col2) * v.z; + } + + static PX_FORCE_INLINE PxVec3 multiplyTranspose(const Mat33V& m, const PxVec3& v) + { + return PxVec3(v.dot(reinterpret_cast<const PxVec3&>(m.col0)), + v.dot(reinterpret_cast<const PxVec3&>(m.col1)), + v.dot(reinterpret_cast<const PxVec3&>(m.col2))); + } + + static Cm::SpatialVector multiply(const FsInertia& m, const Cm::SpatialVector& v) + { + return Cm::SpatialVector(multiply(m.ll,v.linear) + multiply(m.la,v.angular), + multiplyTranspose(m.la, v.linear) + multiply(m.aa, v.angular)); + } + + static PX_FORCE_INLINE Cm::SpatialVector getRootDeltaV(const FsData& matrix, const Cm::SpatialVector& Z) + { + return multiply(getRootInverseInertia(matrix), Z); + } +}; + +} + +} + +#endif //DY_ARTICULATION_SCALAR_FNS_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsSimd.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsSimd.h new file mode 100644 index 00000000..182abc66 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationFnsSimd.h @@ -0,0 +1,438 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_ARTICULATION_SIMD_FNS_H +#define DY_ARTICULATION_SIMD_FNS_H + +#include "DyArticulationUtils.h" + +namespace physx +{ +namespace Dy +{ + +template <typename T, PxU32 count> +class PodULike +{ + PxU8 space[sizeof(T)*count]; +public: + PX_FORCE_INLINE operator T*() { return reinterpret_cast<T*>(space); } +}; + +#define POD_U_LIKE(_T, _count, _alignment) PX_ALIGN_PREFIX(_alignment) PodULike<_T, _count> PX_ALIGN_SUFFIX(_alignment) + +class ArticulationFnsSimdBase +{ +public: + + static PX_FORCE_INLINE FsInertia addInertia(const FsInertia& in1, const FsInertia& in2) + { + return FsInertia(M33Add(in1.ll, in2.ll), + M33Add(in1.la, in2.la), + M33Add(in1.aa, in2.aa)); + } + + static PX_FORCE_INLINE FsInertia subtractInertia(const FsInertia& in1, const FsInertia& in2) + { + return FsInertia(M33Sub(in1.ll, in2.ll), + M33Sub(in1.la, in2.la), + M33Sub(in1.aa, in2.aa)); + } + + static PX_FORCE_INLINE Vec3V axisDot(const Cm::SpatialVectorV S[3], const Cm::SpatialVectorV &v) + { + return V3Merge(FAdd(V3Dot(S[0].linear,v.linear), V3Dot(S[0].angular,v.angular)), + FAdd(V3Dot(S[1].linear,v.linear), V3Dot(S[1].angular,v.angular)), + FAdd(V3Dot(S[2].linear,v.linear), V3Dot(S[2].angular,v.angular))); + } + + static PX_FORCE_INLINE Cm::SpatialVectorV axisMultiply(const Cm::SpatialVectorV S[3], Vec3V v) + { + return Cm::SpatialVectorV(V3ScaleAdd(S[0].linear, V3GetX(v), V3ScaleAdd(S[1].linear, V3GetY(v), V3Scale(S[2].linear, V3GetZ(v)))), + V3ScaleAdd(S[0].angular, V3GetX(v), V3ScaleAdd(S[1].angular, V3GetY(v), V3Scale(S[2].angular, V3GetZ(v))))); + } + + + static PX_FORCE_INLINE Cm::SpatialVectorV subtract(const Cm::SpatialVectorV &a, const Cm::SpatialVectorV &b) + { + return Cm::SpatialVectorV(V3Sub(a.linear, b.linear), V3Sub(a.angular, b.angular)); + } + + static PX_FORCE_INLINE Cm::SpatialVectorV add(const Cm::SpatialVectorV &a, const Cm::SpatialVectorV &b) + { + return Cm::SpatialVectorV(V3Add(a.linear, b.linear), V3Add(a.angular, b.angular)); + } + + + static PX_FORCE_INLINE Cm::SpatialVectorV multiply(const FsInertia &I, const Cm::SpatialVectorV &S) + { + return Cm::SpatialVectorV(V3Add(M33MulV3(I.ll,S.linear), M33MulV3(I.la,S.angular)), + V3Add(M33TrnspsMulV3(I.la,S.linear), M33MulV3(I.aa,S.angular))); + } + + + static PX_FORCE_INLINE Cm::SpatialVectorV translateMotion(const Vec3V& p, const Cm::SpatialVectorV& v) + { + return Cm::SpatialVectorV(V3Add(v.linear, V3Cross(p, v.angular)), v.angular); + } + + // translate a force resolved at position p to the origin + + static PX_FORCE_INLINE Cm::SpatialVectorV translateForce(const Vec3V& p, const Cm::SpatialVectorV& v) + { + return Cm::SpatialVectorV(v.linear, V3Add(v.angular, V3Cross(p, v.linear))); + } + + static PX_FORCE_INLINE Mat33V invertSym33(const Mat33V &m) + { + Vec3V a0 = V3Cross(m.col1, m.col2); + Vec3V a1 = V3Cross(m.col2, m.col0); + Vec3V a2 = V3Cross(m.col0, m.col1); + FloatV det = V3Dot(a0, m.col0); + FloatV recipDet = FRecip(det); + + a1 = V3SetX(a1, V3GetY(a0)); + a2 = V3Merge(V3GetZ(a0), V3GetZ(a1), V3GetZ(a2)); // make sure it's symmetric + + return Mat33V(V3Scale(a0, recipDet), + V3Scale(a1, recipDet), + V3Scale(a2, recipDet)); + } + + + static PX_FORCE_INLINE FloatV safeInvSqrt(FloatV v) + { + return FSqrt(FMax(FZero(), FRecip(v))); + } + static PX_FORCE_INLINE Mat33V invSqrt(const Mat33V& m) + { + // cholesky factor to + // (a 0 0) + // (b c 0) + // (d e f) + // except that a,c,f are the reciprocal sqrts rather than sqrts + + // PxVec3 v0 = m.column0, v1 = m.column1, v2 = m.column2; + Vec3V v0 = m.col0, v1 = m.col1, v2 = m.col2; + + const FloatV x0 = V3GetX(v0), y1 = V3GetY(v1), z2 = V3GetZ(v2); + + FloatV a = safeInvSqrt(x0); // PxReal a = PxRecipSqrt(v0.x); + + Vec3V abd = V3Scale(v0, a); // PxReal b = v0.y*a; + FloatV b = V3GetY(abd); + + FloatV c2 = FNegScaleSub(b, b, y1); // PxReal c = PxRecipSqrt(v1.y - b*b); + FloatV c = safeInvSqrt(c2); + + FloatV d = V3GetZ(abd); // PxReal d = v0.z*a; + + FloatV e = FMul(FNegScaleSub(b, d, V3GetZ(v1)), c); // PxReal e = (v1.z-d*b) * c; + + FloatV f2 = FNegScaleSub(d, d, FNegScaleSub(e, e, z2)); // PxReal f = PxRecipSqrt(v2.z - d*d - e*e); + FloatV f = safeInvSqrt(f2); + + // invert + FloatV x = FMul(FMul(b,a),c), // x = -b*a*c + y = FMul((FNegScaleSub(d,a, FMul(e,x))), f), // y = (-e*x-d*a)*f + z = FMul(e, FMul(c,f)); // z = -e*c*f + + return Mat33V(V3Merge(a, FZero(), FZero()), + V3Merge(FNeg(x), c, FZero()), + V3Merge(y, FNeg(z), f)); + } + + + static PX_FORCE_INLINE FsInertia invertInertia(const FsInertia &I) + { + Mat33V aa = M33Scale(M33Add(I.aa, M33Trnsps(I.aa)), FHalf()); + Mat33V ll = M33Scale(M33Add(I.ll, M33Trnsps(I.ll)), FHalf()); + + Mat33V AAInv = invertSym33(aa); + Mat33V z = M33MulM33(M33Neg(I.la), AAInv); + Mat33V S = M33Add(ll, M33MulM33(z, M33Trnsps(I.la))); + + Mat33V LL = invertSym33(S); + Mat33V LA = M33MulM33(LL, z); + Mat33V AA = M33Add(AAInv, M33MulM33(M33Trnsps(z), LA)); + + return FsInertia(LL, LA, AA); + } + + static PX_NOINLINE Mat33V computeSIS(const FsInertia &I, const Cm::SpatialVectorV S[3], Cm::SpatialVectorV IS[3]) + { + Vec3V S0l = S[0].linear, S0a = S[0].angular; + Vec3V S1l = S[1].linear, S1a = S[1].angular; + Vec3V S2l = S[2].linear, S2a = S[2].angular; + + Vec3V IS0l = V3Add(M33MulV3(I.ll,S0l), M33MulV3(I.la,S0a)); + Vec3V IS0a = V3Add(M33TrnspsMulV3(I.la,S0l), M33MulV3(I.aa,S0a)); + Vec3V IS1l = V3Add(M33MulV3(I.ll,S1l), M33MulV3(I.la,S1a)); + Vec3V IS1a = V3Add(M33TrnspsMulV3(I.la,S1l), M33MulV3(I.aa,S1a)); + Vec3V IS2l = V3Add(M33MulV3(I.ll,S2l), M33MulV3(I.la,S2a)); + Vec3V IS2a = V3Add(M33TrnspsMulV3(I.la,S2l), M33MulV3(I.aa,S2a)); + + // compute SIS + FloatV a00 = FAdd(V3Dot(S0l, IS0l), V3Dot(S0a, IS0a)); + FloatV a01 = FAdd(V3Dot(S0l, IS1l), V3Dot(S0a, IS1a)); + FloatV a02 = FAdd(V3Dot(S0l, IS2l), V3Dot(S0a, IS2a)); + FloatV a11 = FAdd(V3Dot(S1l, IS1l), V3Dot(S1a, IS1a)); + FloatV a12 = FAdd(V3Dot(S1l, IS2l), V3Dot(S1a, IS2a)); + FloatV a22 = FAdd(V3Dot(S2l, IS2l), V3Dot(S2a, IS2a)); + + // write IS, a useful side-effect + IS[0].linear = IS0l; IS[0].angular = IS0a; + IS[1].linear = IS1l; IS[1].angular = IS1a; + IS[2].linear = IS2l; IS[2].angular = IS2a; + + return Mat33V(V3Merge(a00, a01, a02), + V3Merge(a01, a11, a12), + V3Merge(a02, a12, a22)); + } + + + static PX_FORCE_INLINE FsInertia multiplySubtract(const FsInertia &I, const Mat33V &D, const Cm::SpatialVectorV IS[3], Cm::SpatialVectorV DSI[3]) + { + // cut'n'paste, how I love ya, how I love ya + + Vec3V IS0l = IS[0].linear, IS0a = IS[0].angular; + Vec3V IS1l = IS[1].linear, IS1a = IS[1].angular; + Vec3V IS2l = IS[2].linear, IS2a = IS[2].angular; + + Vec3V D0 = D.col0, D1 = D.col1, D2 = D.col2; + + // compute IDS + Vec3V DSI0l = V3ScaleAdd(IS0l, V3GetX(D0), V3ScaleAdd(IS1l, V3GetY(D0), V3Scale(IS2l, V3GetZ(D0)))); + Vec3V DSI1l = V3ScaleAdd(IS0l, V3GetX(D1), V3ScaleAdd(IS1l, V3GetY(D1), V3Scale(IS2l, V3GetZ(D1)))); + Vec3V DSI2l = V3ScaleAdd(IS0l, V3GetX(D2), V3ScaleAdd(IS1l, V3GetY(D2), V3Scale(IS2l, V3GetZ(D2)))); + + Vec3V DSI0a = V3ScaleAdd(IS0a, V3GetX(D0), V3ScaleAdd(IS1a, V3GetY(D0), V3Scale(IS2a, V3GetZ(D0)))); + Vec3V DSI1a = V3ScaleAdd(IS0a, V3GetX(D1), V3ScaleAdd(IS1a, V3GetY(D1), V3Scale(IS2a, V3GetZ(D1)))); + Vec3V DSI2a = V3ScaleAdd(IS0a, V3GetX(D2), V3ScaleAdd(IS1a, V3GetY(D2), V3Scale(IS2a, V3GetZ(D2)))); + + // compute J = I - DSI' IS. Each row of DSI' IS generates an inertia dyad + + Vec3V ll0 = I.ll.col0, ll1 = I.ll.col1, ll2 = I.ll.col2; + Vec3V la0 = I.la.col0, la1 = I.la.col1, la2 = I.la.col2; + Vec3V aa0 = I.aa.col0, aa1 = I.aa.col1, aa2 = I.aa.col2; + +#define SUBTRACT_DYAD(_a, _b) \ + ll0 = V3NegScaleSub(_b##l, V3GetX(_a##l), ll0); la0 = V3NegScaleSub(_b##l, V3GetX(_a##a), la0); aa0 = V3NegScaleSub(_b##a, V3GetX(_a##a), aa0); \ + ll1 = V3NegScaleSub(_b##l, V3GetY(_a##l), ll1); la1 = V3NegScaleSub(_b##l, V3GetY(_a##a), la1); aa1 = V3NegScaleSub(_b##a, V3GetY(_a##a), aa1); \ + ll2 = V3NegScaleSub(_b##l, V3GetZ(_a##l), ll2); la2 = V3NegScaleSub(_b##l, V3GetZ(_a##a), la2); aa2 = V3NegScaleSub(_b##a, V3GetZ(_a##a), aa2); + + SUBTRACT_DYAD(IS0, DSI0); + SUBTRACT_DYAD(IS1, DSI1); + SUBTRACT_DYAD(IS2, DSI2); +#undef SUBTRACT_DYAD + + DSI[0].linear = DSI0l; DSI[0].angular = DSI0a; + DSI[1].linear = DSI1l; DSI[1].angular = DSI1a; + DSI[2].linear = DSI2l; DSI[2].angular = DSI2a; + + return FsInertia(Mat33V(ll0, ll1, ll2), + Mat33V(la0, la1, la2), + Mat33V(aa0, aa1, aa2)); + } + + + static PX_FORCE_INLINE FsInertia multiplySubtract(const FsInertia &I, const Cm::SpatialVectorV S[3]) + { + // cut'n'paste, how I love ya, how I love ya + + const Vec3V S0l = S[0].linear, S0a = S[0].angular; + const Vec3V S1l = S[1].linear, S1a = S[1].angular; + const Vec3V S2l = S[2].linear, S2a = S[2].angular; + + // compute J = I - DSI' IS. Each row of DSI' IS generates an inertia dyad + + Vec3V ll0 = I.ll.col0, ll1 = I.ll.col1, ll2 = I.ll.col2; + Vec3V la0 = I.la.col0, la1 = I.la.col1, la2 = I.la.col2; + Vec3V aa0 = I.aa.col0, aa1 = I.aa.col1, aa2 = I.aa.col2; + +#define SUBTRACT_DYAD(_a, _b) \ + ll0 = V3NegScaleSub(_b##l, V3GetX(_a##l), ll0); la0 = V3NegScaleSub(_b##l, V3GetX(_a##a), la0); aa0 = V3NegScaleSub(_b##a, V3GetX(_a##a), aa0); \ + ll1 = V3NegScaleSub(_b##l, V3GetY(_a##l), ll1); la1 = V3NegScaleSub(_b##l, V3GetY(_a##a), la1); aa1 = V3NegScaleSub(_b##a, V3GetY(_a##a), aa1); \ + ll2 = V3NegScaleSub(_b##l, V3GetZ(_a##l), ll2); la2 = V3NegScaleSub(_b##l, V3GetZ(_a##a), la2); aa2 = V3NegScaleSub(_b##a, V3GetZ(_a##a), aa2); + + SUBTRACT_DYAD(S0, S0); + SUBTRACT_DYAD(S1, S1); + SUBTRACT_DYAD(S2, S2); +#undef SUBTRACT_DYAD + + return FsInertia(Mat33V(ll0, ll1, ll2), + Mat33V(la0, la1, la2), + Mat33V(aa0, aa1, aa2)); + } + + + static PX_FORCE_INLINE FsInertia translateInertia(Vec3V a, const FsInertia &input) + { + Vec3V b = V3Neg(a); + + Vec3V la0 = input.la.col0, la1 = input.la.col1, la2 = input.la.col2; + Vec3V ll0 = input.ll.col0, ll1 = input.ll.col1, ll2 = input.ll.col2; + Vec3V aa0 = input.aa.col0, aa1 = input.aa.col1, aa2 = input.aa.col2; + + FloatV aX = V3GetX(a), aY = V3GetY(a), aZ = V3GetZ(a); + FloatV bX = V3GetX(b), bY = V3GetY(b), bZ = V3GetZ(b); + FloatV Z = FZero(); + + // s - star matrix of a + Vec3V s0 = V3Merge(Z, aZ, bY), + s1 = V3Merge(bZ, Z, aX), + s2 = V3Merge(aY, bX, Z); + + // s * la + Vec3V sla0 = V3ScaleAdd(s0, V3GetX(la0), V3ScaleAdd(s1, V3GetY(la0), V3Scale(s2, V3GetZ(la0)))); + Vec3V sla1 = V3ScaleAdd(s0, V3GetX(la1), V3ScaleAdd(s1, V3GetY(la1), V3Scale(s2, V3GetZ(la1)))); + Vec3V sla2 = V3ScaleAdd(s0, V3GetX(la2), V3ScaleAdd(s1, V3GetY(la2), V3Scale(s2, V3GetZ(la2)))); + + // ll * s.transpose() (ll is symmetric) + Vec3V llst0 = V3ScaleAdd(ll2, aY, V3Scale(ll1, bZ)), + llst1 = V3ScaleAdd(ll0, aZ, V3Scale(ll2, bX)), + llst2 = V3ScaleAdd(ll1, aX, V3Scale(ll0, bY)); + + // t = sla+S*llst*0.5f; + + Vec3V sllst0 = V3ScaleAdd(s2, V3GetZ(llst0), V3ScaleAdd(s1, V3GetY(llst0), V3Scale(s0, V3GetX(llst0)))); + Vec3V sllst1 = V3ScaleAdd(s2, V3GetZ(llst1), V3ScaleAdd(s1, V3GetY(llst1), V3Scale(s0, V3GetX(llst1)))); + Vec3V sllst2 = V3ScaleAdd(s2, V3GetZ(llst2), V3ScaleAdd(s1, V3GetY(llst2), V3Scale(s0, V3GetX(llst2)))); + + Vec3V t0 = V3ScaleAdd(sllst0, FHalf(), sla0); + Vec3V t1 = V3ScaleAdd(sllst1, FHalf(), sla1); + Vec3V t2 = V3ScaleAdd(sllst2, FHalf(), sla2); + + // t+t.transpose() + Vec3V r0 = V3Add(t0, V3Merge(V3GetX(t0), V3GetX(t1), V3GetX(t2))), + r1 = V3Add(t1, V3Merge(V3GetY(t0), V3GetY(t1), V3GetY(t2))), + r2 = V3Add(t2, V3Merge(V3GetZ(t0), V3GetZ(t1), V3GetZ(t2))); + + return FsInertia(Mat33V(ll0, ll1, ll2), + + Mat33V(V3Add(la0, llst0), + V3Add(la1, llst1), + V3Add(la2, llst2)), + + Mat33V(V3Add(aa0, r0), + V3Add(aa1, r1), + V3Add(aa2, r2))); + } + +}; + +template<class Base> +class ArticulationFnsSimd : public Base +{ + static PX_FORCE_INLINE void axisMultiplyLowerTriangular(Cm::SpatialVectorV ES[3], const Mat33V&E, const Cm::SpatialVectorV S[3]) + { + const Vec3V l0 = S[0].linear, l1 = S[1].linear, l2 = S[2].linear; + const Vec3V a0 = S[0].angular, a1 = S[1].angular, a2 = S[2].angular; + ES[0] = Cm::SpatialVectorV(V3Scale(l0, V3GetX(E.col0)), + V3Scale(a0, V3GetX(E.col0))); + ES[1] = Cm::SpatialVectorV(V3ScaleAdd(l0, V3GetX(E.col1), V3Scale(l1, V3GetY(E.col1))), + V3ScaleAdd(a0, V3GetX(E.col1), V3Scale(a1, V3GetY(E.col1)))); + ES[2] = Cm::SpatialVectorV(V3ScaleAdd(l0, V3GetX(E.col2), V3ScaleAdd(l1, V3GetY(E.col2), V3Scale(l2, V3GetZ(E.col2)))), + V3ScaleAdd(a0, V3GetX(E.col2), V3ScaleAdd(a1, V3GetY(E.col2), V3Scale(a2, V3GetZ(E.col2))))); + } + +public: + static PX_FORCE_INLINE FsInertia propagate(const FsInertia &I, + const Cm::SpatialVectorV S[3], + const Mat33V &load, + const FloatV isf) + { + Cm::SpatialVectorV IS[3], ISE[3]; + Mat33V D = Base::computeSIS(I, S, IS); + + D.col0 = V3ScaleAdd(load.col0, isf, D.col0); + D.col1 = V3ScaleAdd(load.col1, isf, D.col1); + D.col2 = V3ScaleAdd(load.col2, isf, D.col2); + + axisMultiplyLowerTriangular(ISE, Base::invSqrt(D), IS); + return Base::multiplySubtract(I, ISE); + } + + + + static PX_INLINE Cm::SpatialVectorV propagateImpulse(const FsRow& row, + const FsJointVectors& jv, + Vec3V& SZ, + const Cm::SpatialVectorV& Z, + const FsRowAux& aux) + { + PX_UNUSED(aux); + + SZ = V3Add(Z.angular, V3Cross(Z.linear, jv.jointOffset)); + return Base::translateForce(jv.parentOffset, Z - Base::axisMultiply(row.DSI, SZ)); + } + + static PX_INLINE Cm::SpatialVectorV propagateVelocity(const FsRow& row, + const FsJointVectors& jv, + const Vec3V& SZ, + const Cm::SpatialVectorV& v, + const FsRowAux& aux) + { + PX_UNUSED(aux); + + Cm::SpatialVectorV w = Base::translateMotion(V3Neg(jv.parentOffset), v); + Vec3V DSZ = M33MulV3(row.D, SZ); + + Vec3V n = V3Add(Base::axisDot(row.DSI, w), DSZ); + return w - Cm::SpatialVectorV(V3Cross(jv.jointOffset, n), n); + } + + + + + + static PX_FORCE_INLINE Mat33V computeDriveInertia(const FsInertia &I0, + const FsInertia &I1, + const Cm::SpatialVectorV S[3]) + { + POD_U_LIKE(Cm::SpatialVectorV, 3, 16) IS, ISD, dummy; + Mat33V D = Base::computeSIS(I0, S, IS); + Mat33V DInv = Base::invertSym33(D); + + FsInertia tmp = Base::addInertia(I0, I1); + tmp = Base::multiplySubtract(tmp, DInv, IS, ISD); + FsInertia J = Base::invertInertia(tmp); + + Mat33V E = Base::computeSIS(J, ISD, dummy); + return Base::invertSym33(M33Add(DInv,E)); + + } +}; + +} +} + +#endif //DY_ARTICULATION_SIMD_FNS_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp new file mode 100644 index 00000000..ea9ccb8d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.cpp @@ -0,0 +1,1344 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxVec3.h" +#include "foundation/PxMath.h" +#include "foundation/PxMemory.h" +#include "foundation/PxProfiler.h" + +#include "PsUtilities.h" +#include "CmSpatialVector.h" +#include "DyArticulationHelper.h" +#include "DyArticulationReference.h" +#include "DyArticulationFnsSimd.h" +#include "DyArticulationFnsScalar.h" +#include "DyArticulationFnsDebug.h" +#include "DySolverConstraintDesc.h" +#include "PxvDynamics.h" +#include "DyArticulation.h" +#include "PxcRigidBody.h" +#include "CmConeLimitHelper.h" +#include "DySolverConstraint1D.h" +#include "PxcConstraintBlockStream.h" +#include "DySolverConstraint1D.h" +#include "DyArticulationPImpl.h" +#include "PsFoundation.h" + +namespace physx +{ + +namespace Dy +{ + +void PxcFsFlushVelocity(FsData& matrix); + +// we pass this around by value so that when we return from a function the size is unaltered. That means we don't preserve state +// across functions - even though that could be handy to preserve baseInertia and jointTransforms across the solver so that if we +// need to run position projection positions they don't get recomputed. + +struct PxcFsScratchAllocator +{ + char* base; + size_t size; + size_t taken; + PxcFsScratchAllocator(char* p, size_t s): base(p), size(s), taken(0) {} + + template<typename T> + static size_t sizeof16() + { + return (sizeof(T)+15)&~15; + } + + template<class T> T* alloc(PxU32 count) + { + size_t s = sizeof16<T>(); + PX_ASSERT(taken+s*count <= size); + T* result = reinterpret_cast<T*>(base+taken); + taken+=s*count; + return result; + } +}; + +void PxcLtbFactor(FsData& m) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + LtbRow* rows = getLtbRows(m); + + for(PxU32 i=m.linkCount; --i>0;) + { + LtbRow& b = rows[i]; + PxU32 p = m.parent[i]; + const FsInertia inertia = Fns::invertInertia(b.inertia); + const Mat33V jResponse = Fns::invertSym33(M33Neg(Fns::computeSIS(inertia, b.j1, b.j1))); + b.inertia = inertia; + rows[p].inertia = Fns::multiplySubtract(rows[p].inertia, jResponse, b.j0, b.j0); + b.jResponse = jResponse; + + } + rows[0].inertia = Fns::invertInertia(rows[0].inertia); +} + +void PxcLtbSolve(const FsData& m, + Vec3V* b, // rhs error to solve for + Cm::SpatialVectorV* y) // velocity delta output +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + const LtbRow* rows = getLtbRows(m); + PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVectorV)); + + for(PxU32 i=m.linkCount;i-->1;) + { + const LtbRow& r = rows[i]; + const PxU32 p = m.parent[i]; + + const Vec3V t = V3Sub(b[i], Fns::axisDot(r.j1, y[i])); + b[i] = t; + y[p] = Fns::subtract(y[p], Fns::axisMultiply(r.j0, t)); + } + + y[0] = Fns::multiply(rows[0].inertia, y[0]); + + for(PxU32 i=1; i<m.linkCount; i++) + { + const LtbRow& r = rows[i]; + const PxU32 p = m.parent[i]; + + const Vec3V t = V3Sub(M33MulV3(r.jResponse, b[i]), Fns::axisDot(r.j0, y[p])); + y[i] = Fns::subtract(Fns::multiply(r.inertia, y[i]), Fns::axisMultiply(r.j1, t)); + } +} + +void PxcLtbProject(const FsData& m, + Cm::SpatialVectorV* velocity, + Vec3V* b) +{ + PX_ASSERT(m.linkCount<=DY_ARTICULATION_MAX_SIZE); + Cm::SpatialVectorV y[DY_ARTICULATION_MAX_SIZE]; + + PxcLtbSolve(m, b, y); + + for(PxU32 i=0;i<m.linkCount;i++) + velocity[i] -= y[i]; +} + +void PxcFsPropagateDrivenInertiaSimd(FsData& matrix, + const FsInertia* baseInertia, + const PxReal* isf, + const Mat33V* load, + PxcFsScratchAllocator allocator) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + Cm::SpatialVectorV IS[3]; + + FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + FsInertia* inertia = allocator.alloc<FsInertia>(matrix.linkCount); + PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(FsInertia)); + + for(PxU32 i=matrix.linkCount; --i>0;) + { + FsRow& r = rows[i]; + const FsRowAux& a = aux[i]; + const FsJointVectors& jv = jointVectors[i]; + + const Mat33V m = Fns::computeSIS(inertia[i], a.S, IS); + const FloatV f = FLoad(isf[i]); + + const Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0), + V3ScaleAdd(load[i].col1, f, m.col1), + V3ScaleAdd(load[i].col2, f, m.col2))); + r.D = D; + + inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]], + Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D, IS, r.DSI))); + } + + getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]); +} + +PX_FORCE_INLINE Cm::SpatialVectorV propagateDrivenImpulse(const FsRow& row, + const FsJointVectors& jv, + Vec3V& SZMinusQ, + const Cm::SpatialVectorV& Z, + const Vec3V& Q) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + SZMinusQ = V3Sub(V3Add(Z.angular, V3Cross(Z.linear,jv.jointOffset)), Q); + Cm::SpatialVectorV result = Fns::translateForce(jv.parentOffset, Z - Fns::axisMultiply(row.DSI, SZMinusQ)); + + return result; +} + +void PxcFsApplyJointDrives(FsData& matrix, + const Vec3V* Q) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE]; + Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE]; + Vec3V SZminusQ[DY_ARTICULATION_MAX_SIZE]; + + PxMemZero(Z, matrix.linkCount*sizeof(Cm::SpatialVectorV)); + + for(PxU32 i=matrix.linkCount;i-->1;) + Z[matrix.parent[i]] += propagateDrivenImpulse(rows[i], jointVectors[i], SZminusQ[i], Z[i], Q[i]); + + + dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]); + + for(PxU32 i=1;i<matrix.linkCount;i++) + dV[i] = Fns::propagateVelocity(rows[i], jointVectors[i], SZminusQ[i], dV[matrix.parent[i]], aux[i]); + + Cm::SpatialVectorV* V = getVelocity(matrix); + for(PxU32 i=0;i<matrix.linkCount;i++) + V[i] += dV[i]; +} + +void ArticulationHelper::applyImpulses( const FsData& matrix, + Cm::SpatialVectorV* Z, + Cm::SpatialVectorV* V) +{ + // note: Z is the negated impulse + + + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + Cm::SpatialVectorV dV[DY_ARTICULATION_MAX_SIZE]; + Vec3V SZ[DY_ARTICULATION_MAX_SIZE]; + + for(PxU32 i=matrix.linkCount;i-->1;) + Z[matrix.parent[i]] += Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z[i], aux[i]); + + dV[0] = Fns::multiply(getRootInverseInertia(matrix), -Z[0]); + + for(PxU32 i=1;i<matrix.linkCount;i++) + dV[i] = Fns::propagateVelocity(rows[i], jointVectors[i], SZ[i], dV[matrix.parent[i]], aux[i]); + + for(PxU32 i=0;i<matrix.linkCount;i++) + V[i] += dV[i]; +} + +void getImpulseResponseSlow(const FsData& matrix, + PxU32 linkID0, + const Cm::SpatialVectorV& impulse0, + Cm::SpatialVectorV& deltaV0, + PxU32 linkID1, + const Cm::SpatialVectorV& impulse1, + Cm::SpatialVectorV& deltaV1) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + PxU32 stack[DY_ARTICULATION_MAX_SIZE]; + Vec3V SZ[DY_ARTICULATION_MAX_SIZE]; + + PxU32 i0, i1, ic; + + for(i0 = linkID0, i1 = linkID1; i0!=i1;) // find common path + { + if(i0<i1) + i1 = matrix.parent[i1]; + else + i0 = matrix.parent[i0]; + } + + PxU32 common = i0; + + Cm::SpatialVectorV Z0 = -impulse0, Z1 = -impulse1; + for(i0 = 0; linkID0!=common; linkID0 = matrix.parent[linkID0]) + { + Z0 = Fns::propagateImpulse(rows[linkID0], jointVectors[linkID0], SZ[linkID0], Z0, aux[linkID0]); + stack[i0++] = linkID0; + } + + for(i1 = i0; linkID1!=common; linkID1 = matrix.parent[linkID1]) + { + Z1 = Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ[linkID1], Z1, aux[linkID1]); + stack[i1++] = linkID1; + } + + Cm::SpatialVectorV Z = Z0 + Z1; + for(ic = i1; common; common = matrix.parent[common]) + { + Z = Fns::propagateImpulse(rows[common], jointVectors[common], SZ[common], Z, aux[common]); + stack[ic++] = common; + } + + Cm::SpatialVectorV v = Fns::multiply(getRootInverseInertia(matrix), -Z); + + for(PxU32 index = ic; index-->i1 ;) + v = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], v, aux[stack[index]]); + + deltaV1 = v; + for(PxU32 index = i1; index-->i0 ;) + deltaV1 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV1, aux[stack[index]]); + + deltaV0 = v; + for(PxU32 index = i0; index-->0;) + deltaV0 = Fns::propagateVelocity(rows[stack[index]], jointVectors[stack[index]], SZ[stack[index]], deltaV0, aux[stack[index]]); +} + +void PxcFsGetImpulseResponse(const FsData& matrix, + PxU32 linkID, + const Cm::SpatialVectorV& impulse, + Cm::SpatialVectorV& deltaV) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + Vec3V SZ[DY_ARTICULATION_MAX_SIZE]; + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + Cm::SpatialVectorV Z = -impulse; + + for(PxU32 i = linkID; i; i = matrix.parent[i]) + Z = Fns::propagateImpulse(rows[i], jointVectors[i], SZ[i], Z, aux[i]); + + deltaV = Fns::multiply(getRootInverseInertia(matrix), -Z); + + PX_ASSERT(rows[linkID].pathToRoot&1); + + for(ArticulationBitField i=rows[linkID].pathToRoot-1; i; i &= (i-1)) + { + const PxU32 index = ArticulationLowestSetBit(i); + deltaV = Fns::propagateVelocity(rows[index], jointVectors[index], SZ[index], deltaV, aux[index]); + } +} + +void PxcFsGetImpulseSelfResponse(const FsData& matrix, + PxU32 linkID0, + const Cm::SpatialVectorV& impulse0, + Cm::SpatialVectorV& deltaV0, + PxU32 linkID1, + const Cm::SpatialVectorV& impulse1, + Cm::SpatialVectorV& deltaV1) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + PX_ASSERT(linkID0 != linkID1); + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + // standard case: parent-child limit + if(matrix.parent[linkID1] == linkID0) + { + Vec3V SZ; + const Cm::SpatialVectorV Z = impulse0 - Fns::propagateImpulse(rows[linkID1], jointVectors[linkID1], SZ, -impulse1, aux[linkID1]); + PxcFsGetImpulseResponse(matrix, linkID0, Z, deltaV0); + deltaV1 = Fns::propagateVelocity(rows[linkID1], jointVectors[linkID1], SZ, deltaV0, aux[linkID1]); + } + else + getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector V[DY_ARTICULATION_MAX_SIZE]; + for(PxU32 i=0;i<matrix.linkCount;i++) V[i] = Cm::SpatialVector::zero(); + ArticulationRef::applyImpulse(matrix,V,linkID0, reinterpret_cast<const Cm::SpatialVector&>(impulse0)); + ArticulationRef::applyImpulse(matrix,V,linkID1, reinterpret_cast<const Cm::SpatialVector&>(impulse1)); + + Cm::SpatialVector refV0 = V[linkID0]; + Cm::SpatialVector refV1 = V[linkID1]; +#endif +} + +namespace +{ + + PX_FORCE_INLINE Cm::SpatialVectorV getImpulseResponseSimd(const FsData& matrix, PxU32 linkID, Vec3V lZ, Vec3V aZ) + { + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + Vec3V SZ[DY_ARTICULATION_MAX_SIZE]; + PxU32 indices[DY_ARTICULATION_MAX_SIZE], iCount = 0; + + const FsRow*PX_RESTRICT rows = getFsRows(matrix); + const FsRowAux*PX_RESTRICT aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_UNUSED(aux); + PX_ASSERT(rows[linkID].pathToRoot&1); + + lZ = V3Neg(lZ); + aZ = V3Neg(aZ); + + for(PxU32 i = linkID; i; i = matrix.parent[i]) + { + const FsRow& r = rows[i]; + const FsJointVectors& j = jointVectors[i]; + + Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset)); + SZ[iCount] = sz; + + lZ = V3NegScaleSub(r.DSI[0].linear, V3GetX(sz), V3NegScaleSub(r.DSI[1].linear, V3GetY(sz), V3NegScaleSub(r.DSI[2].linear, V3GetZ(sz), lZ))); + aZ = V3NegScaleSub(r.DSI[0].angular, V3GetX(sz), V3NegScaleSub(r.DSI[1].angular, V3GetY(sz), V3NegScaleSub(r.DSI[2].angular, V3GetZ(sz), aZ))); + + aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ)); + indices[iCount++] = i; + } + + const FsInertia& I = getRootInverseInertia(matrix); + + Vec3V lV = V3Neg(V3Add(M33MulV3(I.ll, lZ), M33MulV3(I.la, aZ))); + Vec3V aV = V3Neg(V3Add(M33TrnspsMulV3(I.la, lZ), M33MulV3(I.aa, aZ))); + + while(iCount) + { + PxU32 i = indices[--iCount]; + const FsRow& r = rows[i]; + const FsJointVectors& j = jointVectors[i]; + + lV = V3Sub(lV, V3Cross(j.parentOffset, aV)); + + Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)), + V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV))); + + n = V3Add(n, M33MulV3(r.D, SZ[iCount])); + lV = V3Sub(lV, V3Cross(j.jointOffset, n)); + aV = V3Sub(aV, n); + } + + return Cm::SpatialVectorV(lV, aV); + } +} + +void ArticulationHelper::getImpulseResponse(const FsData& matrix, + PxU32 linkID, + const Cm::SpatialVectorV& impulse, + Cm::SpatialVectorV& deltaV) +{ + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + + deltaV = getImpulseResponseSimd(matrix, linkID, impulse.linear, impulse.angular); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVectorV deltaV_; + PxcFsGetImpulseResponse(matrix, linkID, impulse, deltaV_); + PX_ASSERT(almostEqual(deltaV_, deltaV,1e-3f)); +#endif +} + +void ArticulationHelper::getImpulseSelfResponse(const FsData& matrix, + PxU32 linkID0, + const Cm::SpatialVectorV& impulse0, + Cm::SpatialVectorV& deltaV0, + PxU32 linkID1, + const Cm::SpatialVectorV& impulse1, + Cm::SpatialVectorV& deltaV1) +{ + PX_ASSERT(linkID0 != linkID1); + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_UNUSED(aux); + + Cm::SpatialVectorV& dV0 = deltaV0, + & dV1 = deltaV1; + + // standard case: parent-child limit + if(matrix.parent[linkID1] == linkID0) + { + const FsRow& r = rows[linkID1]; + const FsJointVectors& j = jointVectors[linkID1]; + + Vec3V lZ = V3Neg(impulse1.linear), + aZ = V3Neg(impulse1.angular); + + Vec3V sz = V3Add(aZ, V3Cross(lZ, j.jointOffset)); + + lZ = V3Sub(lZ, V3ScaleAdd(r.DSI[0].linear, V3GetX(sz), V3ScaleAdd(r.DSI[1].linear, V3GetY(sz), V3Scale(r.DSI[2].linear, V3GetZ(sz))))); + aZ = V3Sub(aZ, V3ScaleAdd(r.DSI[0].angular, V3GetX(sz), V3ScaleAdd(r.DSI[1].angular, V3GetY(sz), V3Scale(r.DSI[2].angular, V3GetZ(sz))))); + + aZ = V3Add(aZ, V3Cross(j.parentOffset, lZ)); + + lZ = V3Sub(impulse0.linear, lZ); + aZ = V3Sub(impulse0.angular, aZ); + + dV0 = getImpulseResponseSimd(matrix, linkID0, lZ, aZ); + + Vec3V aV = dV0.angular; + Vec3V lV = V3Sub(dV0.linear, V3Cross(j.parentOffset, aV)); + + Vec3V n = V3Add(V3Merge(V3Dot(r.DSI[0].linear, lV), V3Dot(r.DSI[1].linear, lV), V3Dot(r.DSI[2].linear, lV)), + V3Merge(V3Dot(r.DSI[0].angular, aV), V3Dot(r.DSI[1].angular, aV), V3Dot(r.DSI[2].angular, aV))); + + n = V3Add(n, M33MulV3(r.D, sz)); + lV = V3Sub(lV, V3Cross(j.jointOffset, n)); + aV = V3Sub(aV, n); + + dV1 = Cm::SpatialVectorV(lV, aV); + } + else + getImpulseResponseSlow(matrix, linkID0, impulse0, deltaV0, linkID1, impulse1, deltaV1); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVectorV dV0_, dV1_; + PxcFsGetImpulseSelfResponse(matrix, linkID0, impulse0, dV0_, linkID1, impulse1, dV1_); + + PX_ASSERT(almostEqual(dV0_, dV0, 1e-3f)); + PX_ASSERT(almostEqual(dV1_, dV1, 1e-3f)); +#endif +} + +void PxcLtbComputeJv(Vec3V* jv, const FsData& m, const Cm::SpatialVectorV* velocity) +{ + const LtbRow* rows = getLtbRows(m); + const FsRow* fsRows = getFsRows(m); + const FsJointVectors* jointVectors = getJointVectors(m); + + PX_UNUSED(rows); + PX_UNUSED(fsRows); + + for(PxU32 i=1;i<m.linkCount;i++) + { + Cm::SpatialVectorV pv = velocity[m.parent[i]], v = velocity[i]; + + Vec3V parentOffset = V3Add(jointVectors[i].jointOffset, jointVectors[i].parentOffset); + + Vec3V k0v = V3Add(pv.linear, V3Cross(pv.angular, parentOffset)), + k1v = V3Add(v.linear, V3Cross(v.angular,jointVectors[i].jointOffset)); + jv[i] = V3Sub(k0v, k1v); + } +} + +void ArticulationHelper::saveVelocity(const ArticulationSolverDesc& d) +{ + Vec3V b[DY_ARTICULATION_MAX_SIZE]; + FsData& m = *d.fsData; + + Cm::SpatialVectorV* velocity = getVelocity(m); + PxcFsFlushVelocity(m); + + // save off the motion velocity + + for(PxU32 i=0;i<m.linkCount;i++) + { + d.motionVelocity[i] = velocity[i]; + PX_ASSERT(isFiniteVec3V(velocity[i].linear)); + PX_ASSERT(isFiniteVec3V(velocity[i].angular)); + } + + // and now re-solve to use the unbiased velocities + + PxcLtbComputeJv(b, m, velocity); + PxcLtbProject(m, velocity, b); + +#if DY_ARTICULATION_DEBUG_VERIFY + for(PxU32 i=0;i<m.linkCount;i++) + getRefVelocity(m)[i] = velocity[i]; +#endif +} + +void PxcFsComputeJointLoadsSimd(const FsData& matrix, + const FsInertia*PX_RESTRICT baseInertia, + Mat33V*PX_RESTRICT load, + const PxReal*PX_RESTRICT isf_, + PxU32 linkCount, + PxU32 maxIterations, + PxcFsScratchAllocator allocator) +{ + // dsequeira: this is really difficult to optimize on XBox: not inlining generates lots of LHSs, + // inlining generates lots of cache misses because the fn is so huge (almost 2000 instrs.) + // Timing says even for 1 iteration the cache misses are slighly preferable for a + // 20-bone articulation, for more iters it's *much* better to take the cache misses. + // + // about 400 instructions come from unnecessary and inexplicable branch checks + + if(!maxIterations) + return; + + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + FloatV isf[DY_ARTICULATION_MAX_SIZE]; + + for(PxU32 i=1;i<linkCount;i++) + isf[i] = FLoad(isf_[i]); + + FsInertia*PX_RESTRICT inertia = allocator.alloc<FsInertia>(linkCount); + FsInertia*PX_RESTRICT contribToParent = allocator.alloc<FsInertia>(linkCount); + + const FsRow*PX_RESTRICT row = getFsRows(matrix); + const FsRowAux*PX_RESTRICT aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_UNUSED(row); + + // gets rid of about 200 LHSs, need to change the matrix format to make this part of it + PxU64 parent[DY_ARTICULATION_MAX_SIZE]; + for(PxU32 i=0;i<linkCount;i++) + parent[i] = matrix.parent[i]; + + while(maxIterations--) + { + PxMemCopy(inertia, baseInertia, sizeof(FsInertia)*linkCount); + + for(PxU32 i=linkCount;i-->1;) + { + const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S; + + Ps::prefetch(&load[i-1]); + Ps::prefetch(&jointVectors[i-1]); + const FsInertia tmp = Fns::propagate(inertia[i], S, load[i], isf[i]); + inertia[parent[i]] = Fns::addInertia(inertia[parent[i]], Fns::translateInertia(jointVectors[i].parentOffset, tmp)); + contribToParent[i] = tmp; + } + + for(PxU32 i=1;i<linkCount;i++) + { + const Cm::SpatialVectorV*PX_RESTRICT S = aux[i].S; + + const FsInertia rootwardInertia = Fns::subtractInertia(Fns::translateInertia(V3Neg(jointVectors[i].parentOffset), inertia[parent[i]]), contribToParent[i]); + const FsInertia tmp = Fns::propagate(rootwardInertia, S, load[i], isf[i]); + load[i] = Fns::computeDriveInertia(inertia[i], rootwardInertia, S); + inertia[i] = Fns::addInertia(inertia[i], tmp); + } + } +} + +PxU32 ArticulationHelper::getFsDataSize(PxU32 linkCount) +{ + return sizeof(FsInertia) + sizeof(FsRow) * linkCount; +} + +PxU32 ArticulationHelper::getLtbDataSize(PxU32 linkCount) +{ + return sizeof(LtbRow) * linkCount; +} + +void ArticulationHelper::prepareDataBlock( FsData& fsData, + const ArticulationLink* links, + PxU16 linkCount, + PxTransform* poses, + FsInertia* baseInertia, + ArticulationJointTransforms* jointTransforms, + PxU32 expectedSize) +{ + PxU32 stateSize = sizeof(FsData) + + sizeof(Cm::SpatialVectorV) * linkCount + + sizeof(Cm::SpatialVectorV) * linkCount + + sizeof(Vec3V) * linkCount + + sizeof(PxReal) * ((linkCount + 15) & 0xfffffff0); + + PxU32 jointVectorSize = sizeof(FsJointVectors) * linkCount; + + PxU32 fsDataSize = getFsDataSize(linkCount); + PxU32 ltbDataSize = getLtbDataSize(linkCount); + + PxU32 totalSize = stateSize + + jointVectorSize + + fsDataSize + + ltbDataSize + + sizeof(Cm::SpatialVectorV) * linkCount + + sizeof(FsRowAux) * linkCount; + + PX_UNUSED(totalSize); + PX_UNUSED(expectedSize); + PX_ASSERT(expectedSize == 0 || totalSize == expectedSize); + + PxMemZero(&fsData, stateSize); + fsData.jointVectorOffset = PxU16(stateSize); + fsData.fsDataOffset = PxU16(stateSize+jointVectorSize); + fsData.ltbDataOffset = PxU16(stateSize+jointVectorSize+fsDataSize); + fsData.linkCount = linkCount; + + for(PxU32 i=1;i<linkCount;i++) + fsData.parent[i] = PxU8(links[i].parent); + fsData.deferredZ = Cm::SpatialVectorV(PxZero); + + Cm::SpatialVector* velocity = reinterpret_cast<Cm::SpatialVector*>(getVelocity(fsData)); + + PxMemZero(baseInertia, sizeof(FsInertia)*linkCount); + + PxReal* maxPenBias = getMaxPenBias(fsData); + + for(PxU32 i=0;i<linkCount;i++) + { + if((i+2)<linkCount) + { + Ps::prefetch(links[i+2].bodyCore); + Ps::prefetch(links[i+2].inboundJoint); + } + PxsBodyCore& core = *links[i].bodyCore; + poses[i] = core.body2World; + velocity[i] = Cm::SpatialVector(core.linearVelocity, core.angularVelocity); + setInertia(baseInertia[i], core, core.body2World); + maxPenBias[i] = core.maxPenBias; + + if(i) + setJointTransforms(jointTransforms[i], poses[links[i].parent], core.body2World, *links[i].inboundJoint); + } + + FsJointVectors* jointVectors = getJointVectors(fsData); + for(PxU32 i=1;i<linkCount;i++) + { + PX_ALIGN(16, PxVec3) parentOffset = poses[i].p - poses[fsData.parent[i]].p; + PX_ALIGN(16, PxVec3) jointOffset = jointTransforms[i].cB2w.p - poses[i].p; + jointVectors[i].parentOffset = V3LoadA(parentOffset); + jointVectors[i].jointOffset = V3LoadA(jointOffset); + } +} + +PxU32 ArticulationHelper::computeUnconstrainedVelocities( const ArticulationSolverDesc& desc, + PxReal dt, + PxcConstraintBlockStream& stream, + PxSolverConstraintDesc* constraintDesc, + PxU32& acCount, + PxsConstraintBlockManager& constraintBlockManager, + const PxVec3& gravity, PxU64 contextID) +{ + PX_UNUSED(contextID); + const ArticulationLink* links = desc.links; + PxU16 linkCount = desc.linkCount; + FsData& fsData = *desc.fsData; + PxTransform* poses = desc.poses; + + PxcFsScratchAllocator allocator(desc.scratchMemory, desc.scratchMemorySize); + FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(desc.linkCount); + ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(desc.linkCount); + + { + PX_PROFILE_ZONE("Articulations.prepareDataBlock", contextID); + prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, desc.totalDataSize); + } + + const PxReal recipDt = 1.0f/dt; + + Cm::SpatialVectorV* velocity = getVelocity(fsData); + + { + + PX_PROFILE_ZONE("Articulations.setupProject", contextID); + + PxMemZero(getLtbRows(fsData), getLtbDataSize(linkCount)); + prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt); + + PxcLtbFactor(fsData); + + Vec3V b[DY_ARTICULATION_MAX_SIZE]; + PxcLtbComputeJv(b, fsData, velocity); + + LtbRow* rows = getLtbRows(fsData); + for(PxU32 i=1;i<linkCount;i++) + b[i] = V3Add(b[i], rows[i].jC); + + PxcLtbProject(fsData, velocity, b); + } + + { + PX_PROFILE_ZONE("Articulations.prepareFsData", contextID); + PxMemZero(addAddr<void*>(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount)); + prepareFsData(fsData, links); + } + + { + PX_PROFILE_ZONE("Articulations.setupDrives", contextID); + + if(!(desc.core->externalDriveIterations & 0x80000000)) + PxMemZero(desc.externalLoads, sizeof(Mat33V) * linkCount); + + if(!(desc.core->internalDriveIterations & 0x80000000)) + PxMemZero(desc.internalLoads, sizeof(Mat33V) * linkCount); + + PxReal isf[DY_ARTICULATION_MAX_SIZE], esf[DY_ARTICULATION_MAX_SIZE]; // spring factors + Vec3V drive[DY_ARTICULATION_MAX_SIZE]; + + bool externalEqualsInternalCompliance = (desc.core->internalDriveIterations&0xffff) == (desc.core->externalDriveIterations&0xffff); + for(PxU32 i=1;i<linkCount;i++) + { + const ArticulationJointCore& j = *links[i].inboundJoint; + isf[i] = (1 + j.damping * dt + j.spring * dt * dt) * getResistance(j.internalCompliance); + esf[i] = (1 + j.damping * dt + j.spring * dt * dt) * getResistance(j.externalCompliance); + + externalEqualsInternalCompliance = externalEqualsInternalCompliance && j.internalCompliance == j.externalCompliance; + } + + { + PX_PROFILE_ZONE("Articulations.jointInternalLoads", contextID); + PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.internalLoads, isf, linkCount, desc.core->internalDriveIterations&0xffff, allocator); + + } + + { + PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID); + PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, isf, desc.internalLoads, allocator); + } + + { + PX_PROFILE_ZONE("Articulations.computeJointDrives", contextID); + computeJointDrives(fsData, drive, links, poses, jointTransforms, desc.internalLoads, dt); + } + + { + PX_PROFILE_ZONE("Articulations.applyJointDrives", contextID); + PxcFsApplyJointDrives(fsData, drive); + } + + if(!externalEqualsInternalCompliance) + { + { + PX_PROFILE_ZONE("Articulations.jointExternalLoads", contextID); + PxcFsComputeJointLoadsSimd(fsData, baseInertia, desc.externalLoads, esf, linkCount, desc.core->externalDriveIterations&0xffff, allocator); + } + + { + PX_PROFILE_ZONE("Articulations.propagateDrivenInertia", contextID); + PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, esf, desc.externalLoads, allocator); + } + } + } + + { + PX_PROFILE_ZONE("Articulations.applyExternalImpulses", contextID); + Cm::SpatialVectorV Z[DY_ARTICULATION_MAX_SIZE]; + + FloatV h = FLoad(dt); + + const Cm::SpatialVector* acceleration = desc.acceleration; + + const Vec3V vGravity = V3LoadU(gravity); + + for(PxU32 i=0;i<linkCount;i++) + { + Vec3V linearAccel = V3LoadA(acceleration[i].linear); + + if (!(desc.links[i].body->mInternalFlags & PxcRigidBody::eDISABLE_GRAVITY)) + linearAccel = V3Add(linearAccel, vGravity); + Cm::SpatialVectorV a(linearAccel, V3LoadA(acceleration[i].angular)); + Z[i] = -ArticulationFnsSimd<ArticulationFnsSimdBase>::multiply(baseInertia[i], a) * h; + } + + applyImpulses(fsData, Z, getVelocity(fsData)); + } + + // save off the motion velocity in case there are no constraints with the articulation + + PxMemCopy(desc.motionVelocity, velocity, linkCount*sizeof(Cm::SpatialVectorV)); + + // set up for deferred-update solve + + fsData.dirty = 0; + + // solver progress counters + fsData.maxSolverNormalProgress = 0; + fsData.maxSolverFrictionProgress = 0; + fsData.solverProgress = 0; + + +#if DY_ARTICULATION_DEBUG_VERIFY + for(PxU32 i=0;i<linkCount;i++) + getRefVelocity(fsData)[i] = getVelocity(fsData)[i]; +#endif + + { + PX_PROFILE_ZONE("Articulations.setupConstraints", contextID); + return setupSolverConstraints(fsData, desc.solverDataSize, stream, constraintDesc, links, jointTransforms, dt, acCount, constraintBlockManager); + } +} + +void ArticulationHelper::initializeDriveCache( FsData& fsData, + PxU16 linkCount, + const ArticulationLink* links, + PxReal compliance, + PxU32 iterations, + char* scratchMemory, + PxU32 scratchMemorySize) +{ + PxcFsScratchAllocator allocator(scratchMemory, scratchMemorySize); + FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(linkCount); + ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(linkCount); + PxTransform* PX_RESTRICT poses = allocator.alloc<PxTransform>(linkCount); + Mat33V* PX_RESTRICT jointLoads = allocator.alloc<Mat33V>(linkCount); + + PxReal springFactor[DY_ARTICULATION_MAX_SIZE]; // spring factors + + prepareDataBlock(fsData, links, linkCount, poses, baseInertia, jointTransforms, 0); + + PxMemZero(addAddr<void*>(&fsData,fsData.fsDataOffset), getFsDataSize(linkCount)); + prepareFsData(fsData, links); + + springFactor[0] = 0.0f; + for(PxU32 i=1;i<linkCount;i++) + springFactor[i] = getResistance(compliance); + + PxMemZero(jointLoads, sizeof(Mat33V)*linkCount); + PxcFsComputeJointLoadsSimd(fsData, baseInertia, jointLoads, springFactor, linkCount, iterations&0xffff, allocator); + PxcFsPropagateDrivenInertiaSimd(fsData, baseInertia, springFactor, jointLoads, allocator); +} + +void ArticulationHelper::updateBodies(const ArticulationSolverDesc& desc, PxReal dt) +{ + FsData& fsData = *desc.fsData; + const ArticulationCore& core = *desc.core; + const ArticulationLink* links = desc.links; + PxTransform* poses = desc.poses; + Cm::SpatialVectorV* motionVelocity = desc.motionVelocity; + + Vec3V b[DY_ARTICULATION_MAX_SIZE]; + + PxU32 linkCount = fsData.linkCount; + + PxcFsFlushVelocity(fsData); + PxcLtbComputeJv(b, fsData, getVelocity(fsData)); + PxcLtbProject(fsData, getVelocity(fsData), b); + + // update positions + PxcFsScratchAllocator allocator(desc.scratchMemory, desc.scratchMemorySize); + PxTransform* PX_RESTRICT oldPose = allocator.alloc<PxTransform>(desc.linkCount); + + for(PxU32 i=0;i<linkCount;i++) + { + const PxVec3& lv = reinterpret_cast<PxVec3&>(motionVelocity[i].linear); + const PxVec3& av = reinterpret_cast<PxVec3&>(motionVelocity[i].angular); + oldPose[i] = poses[i]; + poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q); + } + + bool projected = false; + const PxReal recipDt = 1.0f/dt; + + FsInertia* PX_RESTRICT baseInertia = allocator.alloc<FsInertia>(desc.linkCount); + ArticulationJointTransforms* PX_RESTRICT jointTransforms = allocator.alloc<ArticulationJointTransforms>(desc.linkCount); + + for(PxU32 iterations = 0; iterations < core.maxProjectionIterations; iterations++) + { + PxReal maxSeparation = -PX_MAX_F32; + for(PxU32 i=1;i<linkCount;i++) + { + const ArticulationJointCore& j = *links[i].inboundJoint; + maxSeparation = PxMax(maxSeparation, + (poses[links[i].parent].transform(j.parentPose).p - + poses[i].transform(j.childPose).p).magnitude()); + } + + if(maxSeparation<=core.separationTolerance) + break; + + projected = true; + + // we go around again, finding velocities which pull us back together - this + // form of projection is momentum-preserving but slow compared to hierarchical + // projection + + PxMemZero(baseInertia, sizeof(FsInertia)*linkCount); + + ArticulationHelper::setInertia(baseInertia[0], *links[0].bodyCore, poses[0]); + for(PxU32 i=1;i<linkCount;i++) + { + ArticulationHelper::setInertia(baseInertia[i], *links[i].bodyCore, poses[i]); + ArticulationHelper::setJointTransforms(jointTransforms[i], poses[links[i].parent], poses[i], *links[i].inboundJoint); + } + + ArticulationHelper::prepareLtbMatrix(fsData, baseInertia, poses, jointTransforms, recipDt); + PxcLtbFactor(fsData); + + LtbRow* rows = getLtbRows(fsData); + + for(PxU32 i=1;i<linkCount;i++) + b[i] = rows[i].jC; + + PxMemZero(motionVelocity, linkCount*sizeof(Cm::SpatialVectorV)); + + PxcLtbProject(fsData, motionVelocity, b); + + for(PxU32 i=0;i<linkCount;i++) + { + const PxVec3& lv = reinterpret_cast<PxVec3&>(motionVelocity[i].linear); + const PxVec3& av = reinterpret_cast<PxVec3&>(motionVelocity[i].angular); + poses[i] = PxTransform(poses[i].p + lv * dt, Ps::exp(av*dt) * poses[i].q); + } + } + + if(projected) + { + // recompute motion velocities. + for(PxU32 i=0;i<linkCount;i++) + { + motionVelocity[i].linear = V3LoadU((poses[i].p - oldPose[i].p) * recipDt); + motionVelocity[i].angular = V3LoadU(Ps::log(poses[i].q * oldPose[i].q.getConjugate()) * recipDt); + } + } + + Cm::SpatialVectorV* velocity = getVelocity(fsData); + for(PxU32 i=0;i<linkCount;i++) + { + links[i].bodyCore->body2World = poses[i]; + + V3StoreA(velocity[i].linear, links[i].bodyCore->linearVelocity); + V3StoreA(velocity[i].angular, links[i].bodyCore->angularVelocity); + } +} + +void ArticulationHelper::setInertia(FsInertia& inertia, + const PxsBodyCore& body, + const PxTransform& pose) +{ + // assumes that elements that are supposed to be zero (i.e. la matrix and off diagonal elements of ll) are zero + + const PxMat33 R(pose.q); + const PxVec3& v = body.inverseInertia; + const PxReal m = 1.0f/body.inverseMass; + V3WriteX(inertia.ll.col0, m); + V3WriteY(inertia.ll.col1, m); + V3WriteZ(inertia.ll.col2, m); + + PX_ALIGN_PREFIX(16) PxMat33 PX_ALIGN_SUFFIX(16) alignedInertia = R * PxMat33::createDiagonal(PxVec3(1.0f/v.x, 1.0f/v.y, 1.0f/v.z)) * R.getTranspose(); + alignedInertia = (alignedInertia + alignedInertia.getTranspose())*0.5f; + inertia.aa = Mat33V_From_PxMat33(alignedInertia); +} + +void ArticulationHelper::setJointTransforms(ArticulationJointTransforms& transforms, + const PxTransform& parentPose, + const PxTransform& childPose, + const ArticulationJointCore& joint) +{ + transforms.cA2w = parentPose.transform(joint.parentPose); + transforms.cB2w = childPose.transform(joint.childPose); + transforms.cB2cA = transforms.cA2w.transformInv(transforms.cB2w); + if(transforms.cB2cA.q.w<0) // the relative quat must be the short way round for limits to work... + { + transforms.cB2cA.q = -transforms.cB2cA.q; + transforms.cB2w.q = -transforms.cB2w.q; + } +} + +void ArticulationHelper::prepareLtbMatrix( FsData& fsData, + const FsInertia* baseInertia, + const PxTransform* poses, + const ArticulationJointTransforms* jointTransforms, + PxReal recipDt) +{ + PxU32 linkCount = fsData.linkCount; + LtbRow* rows = getLtbRows(fsData); + + rows[0].inertia = baseInertia[0]; + + const PxVec3 axis[3] = { PxVec3(1.0f,0.0f,0.0f), PxVec3(0.0f,1.0f,0.0f), PxVec3(0.0f,0.0f,1.0f) }; + for(PxU32 i=1;i<linkCount;i++) + { + rows[i].inertia = baseInertia[i]; + const ArticulationJointTransforms& s = jointTransforms[i]; + + const PxU32 p = fsData.parent[i]; + + // we put the action point of the constraint at the root of the child + + const PxVec3 ra = s.cB2w.p - poses[p].p; + const PxVec3 rb = s.cB2w.p - poses[i].p; + + // A bit different from the 1D solver, + // there we use a formulation j0.v0 - j1.v1 + c = 0 + // here we use the homogeneous j0.v0 + j1.v1 + c = 0 + + const PxVec3 error = (s.cA2w.p - s.cB2w.p) * 0.99f; + + Cm::SpatialVectorV* j0 = rows[i].j0; + Cm::SpatialVectorV* j1 = rows[i].j1; + + for(PxU32 j=0;j<3;j++) + { + PxVec3 n = axis[j]; + j0[j] = Cm::SpatialVector(n, ra.cross(n)); + j1[j] = Cm::SpatialVector(-n, -rb.cross(n)); + } + + rows[i].jC = V3LoadU(error*recipDt); + } +} + +void ArticulationHelper::prepareFsData(FsData& fsData, const ArticulationLink* links) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + PxU32 linkCount = fsData.linkCount; + FsRow* rows = getFsRows(fsData); + FsRowAux* aux = getAux(fsData); + const FsJointVectors* jointVectors = getJointVectors(fsData); + + rows[0].children = links[0].children; + rows[0].pathToRoot = 1; + + PX_ALIGN_PREFIX(16) PxVec4 v[] PX_ALIGN_SUFFIX(16) = { PxVec4(1.f,0,0,0), PxVec4(0,1.f,0,0), PxVec4(0,0,1.f,0) } ; + const Vec3V* axes = reinterpret_cast<const Vec3V*>(v); + + for(PxU32 i=1;i<linkCount;i++) + { + PxU32 p = links[i].parent; + FsRow& r = rows[i]; + FsRowAux& a = aux[i]; + + PX_UNUSED(p); + + r.children = links[i].children; + r.pathToRoot = links[i].pathToRoot; + + const Vec3V jointOffset = jointVectors[i].jointOffset; + + // the joint coords are world oriented, located at the joint. + a.S[0] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[0])); + a.S[1] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[1])); + a.S[2] = Fns::translateMotion(jointOffset, Cm::SpatialVectorV(V3Zero(), axes[2])); + } +} + +PX_FORCE_INLINE PxReal ArticulationHelper::getResistance(PxReal compliance) +{ + PX_ASSERT(compliance>0); + return 1.0f/compliance; +} + +void ArticulationHelper::createHardLimit( const FsData& fsData, + const ArticulationLink* links, + PxU32 linkIndex, + SolverConstraint1DExt& s, + const PxVec3& axis, + PxReal err, + PxReal recipDt) +{ + init(s, PxVec3(0), PxVec3(0), axis, axis, 0, PX_MAX_F32); + + ArticulationHelper::getImpulseSelfResponse(fsData, + links[linkIndex].parent,Cm::SpatialVector(PxVec3(0), axis), s.deltaVA, + linkIndex, Cm::SpatialVector(PxVec3(0), -axis), s.deltaVB); + + const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular)); + if(unitResponse<0.0f) + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, joint limit ignored"); + + const PxReal recipResponse = unitResponse>0.0f ? 1.0f/unitResponse : 0.0f; + + s.constant = recipResponse * -err * recipDt; + s.unbiasedConstant = err>0.0f ? s.constant : 0.0f; + s.velMultiplier = -recipResponse; + s.impulseMultiplier = 1.0f; +} + +void ArticulationHelper::createTangentialSpring(const FsData& fsData, + const ArticulationLink* links, + PxU32 linkIndex, + SolverConstraint1DExt& s, + const PxVec3& axis, + PxReal stiffness, + PxReal damping, + PxReal dt) +{ + init(s, PxVec3(0), PxVec3(0), axis, axis, -PX_MAX_F32, PX_MAX_F32); + + Cm::SpatialVector axis6(PxVec3(0), axis); + PxU32 parent = links[linkIndex].parent; + getImpulseSelfResponse(fsData, parent, axis6, s.deltaVA, linkIndex, -axis6, s.deltaVB); + + const PxReal unitResponse = axis.dot(reinterpret_cast<PxVec3&>(s.deltaVA.angular)) - axis.dot(reinterpret_cast<PxVec3&>(s.deltaVB.angular)); + if(unitResponse<0.0f) + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, "Warning: articulation ill-conditioned or under severe stress, tangential spring ignored"); + const PxReal recipResponse = unitResponse>0.0F ? 1.0f/unitResponse : 0.0f; + + // this is a specialization of the spring code in setSolverConstants() for acceleration springs. + // general case is b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError); + // but geomError and velocityTarget are both zero + + const PxReal a = dt * dt * stiffness + dt * damping; + const PxReal x = 1.0f/(1.0f+a); + s.constant = s.unbiasedConstant = 0.0f; + s.velMultiplier = -x * recipResponse * a; + s.impulseMultiplier = 1.0f - x; +} + +PxU32 ArticulationHelper::setupSolverConstraints( FsData& fsData, PxU32 solverDataSize, + PxcConstraintBlockStream& stream, + PxSolverConstraintDesc* constraintDesc, + const ArticulationLink* links, + const ArticulationJointTransforms* jointTransforms, + PxReal dt, + PxU32& acCount, + PxsConstraintBlockManager& constraintBlockManager) +{ + acCount = 0; + + const PxU16 linkCount = fsData.linkCount; + PxU32 descCount = 0; + const PxReal recipDt = 1.0f/dt; + + const PxConstraintInvMassScale ims(1.0f, 1.0f, 1.0f, 1.0f); + + for(PxU16 i=1;i<linkCount;i++) + { + const ArticulationJointCore& j = *links[i].inboundJoint; + + if(i+1<linkCount) + { + Ps::prefetch(links[i+1].inboundJoint, sizeof (ArticulationJointCore)); + Ps::prefetch(&jointTransforms[i+1], sizeof(ArticulationJointTransforms)); + } + + if(!(j.twistLimited || j.swingLimited)) + continue; + + PxQuat swing, twist; + Ps::separateSwingTwist(jointTransforms[i].cB2cA.q, swing, twist); + + Cm::ConeLimitHelper eh(j.tanQSwingY, j.tanQSwingZ, j.tanQSwingPad); + PxVec3 swingLimitAxis; + PxReal swingLimitError = 0.0f; + + const bool swingLimited = j.swingLimited && eh.getLimit(swing, swingLimitAxis, swingLimitError); + const bool tangentialStiffness = swingLimited && (j.tangentialStiffness>0 || j.tangentialDamping>0); + + const PxVec3 twistAxis = jointTransforms[i].cB2w.rotate(PxVec3(1.0f,0,0)); + const PxReal tqTwistAngle = Ps::tanHalf(twist.x, twist.w); + + const bool twistLowerLimited = j.twistLimited && tqTwistAngle < Cm::tanAdd(j.tanQTwistLow, j.tanQTwistPad); + const bool twistUpperLimited = j.twistLimited && tqTwistAngle > Cm::tanAdd(j.tanQTwistHigh, -j.tanQTwistPad); + + const PxU8 constraintCount = PxU8(swingLimited + tangentialStiffness + twistUpperLimited + twistLowerLimited); + if(!constraintCount) + continue; + + PxSolverConstraintDesc& desc = constraintDesc[descCount++]; + + desc.articulationA = &fsData; + desc.linkIndexA = Ps::to16(links[i].parent); + desc.articulationALength = Ps::to16(solverDataSize); + + desc.articulationB = &fsData; + desc.linkIndexB = i; + desc.articulationBLength = Ps::to16(solverDataSize); + + const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + + sizeof(SolverConstraint1DExt) * constraintCount; + + PX_ASSERT(0==(constraintLength & 0x0f)); + desc.constraintLengthOver16 = Ps::to16(constraintLength/16); + + desc.constraint = stream.reserve(constraintLength + 16u, constraintBlockManager); + + desc.writeBack = NULL; + + SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint); + SolverConstraint1DExt* constraints = reinterpret_cast<SolverConstraint1DExt*>(desc.constraint + sizeof(SolverConstraint1DHeader)); + + init(*header, constraintCount, true, ims); + + PxU32 cIndex = 0; + + if(swingLimited) + { + const PxVec3 normal = jointTransforms[i].cA2w.rotate(swingLimitAxis); + createHardLimit(fsData, links, i, constraints[cIndex++], normal, swingLimitError, recipDt); + if(tangentialStiffness) + { + const PxVec3 tangent = twistAxis.cross(normal).getNormalized(); + createTangentialSpring(fsData, links, i, constraints[cIndex++], tangent, j.tangentialStiffness, j.tangentialDamping, dt); + } + } + + if(twistUpperLimited) + createHardLimit(fsData, links, i, constraints[cIndex++], twistAxis, (j.tanQTwistHigh - tqTwistAngle)*4, recipDt); + + if(twistLowerLimited) + createHardLimit(fsData, links, i, constraints[cIndex++], -twistAxis, -(j.tanQTwistLow - tqTwistAngle)*4, recipDt); + + *(desc.constraint + getConstraintLength(desc)) = 0; + + PX_ASSERT(cIndex == constraintCount); + acCount += constraintCount; + } + + return descCount; +} + +void ArticulationHelper::computeJointDrives(FsData& fsData, + Vec3V* drives, + const ArticulationLink* links, + const PxTransform* poses, + const ArticulationJointTransforms* transforms, + const Mat33V* loads, + PxReal dt) +{ + typedef ArticulationFnsScalar Fns; + + const PxU32 linkCount = fsData.linkCount; + const Cm::SpatialVector* velocity = reinterpret_cast<const Cm::SpatialVector*>(getVelocity(fsData)); + + for(PxU32 i=1; i<linkCount;i++) + { + PxU32 parent = links[i].parent; + const ArticulationJointTransforms& b = transforms[i]; + const ArticulationJointCore& j = *links[i].inboundJoint; + + const Cm::SpatialVector currentVel = Fns::translateMotion(poses[i].p - b.cA2w.p, velocity[i]) + - Fns::translateMotion(poses[parent].p - b.cA2w.p, velocity[parent]); + + // we want the quat such that q * cB2cA = targetPosition + PxVec3 rotVec; + if(j.driveType == PxU8(PxArticulationJointDriveType::eTARGET)) + rotVec = Ps::log(j.targetPosition * b.cB2cA.q.getConjugate()); // as a rotation vector + else + rotVec = j.targetPosition.getImaginaryPart(); + + // NM's Tests indicate behavior is better without the term commented out below, even though + // an implicit spring derivation suggests it should be there. + + const PxVec3 posError = b.cA2w.rotate(rotVec); // - currentVel.angular * 0.5f * dt + const PxVec3 velError = b.cA2w.rotate(j.targetVelocity) - currentVel.angular; + + drives[i] = M33MulV3(loads[i], V3LoadU((j.spring * posError + j.damping * velError) * dt * getResistance(j.internalCompliance))); + } +} + +ArticulationPImpl::ComputeUnconstrainedVelocitiesFn ArticulationPImpl::sComputeUnconstrainedVelocities = NULL; +ArticulationPImpl::UpdateBodiesFn ArticulationPImpl::sUpdateBodies = NULL; +ArticulationPImpl::SaveVelocityFn ArticulationPImpl::sSaveVelocity = NULL; + +} +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.h new file mode 100644 index 00000000..1c2b28b7 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationHelper.h @@ -0,0 +1,192 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_ARTICULATION_HELPER_H +#define DY_ARTICULATION_HELPER_H + + +#include "DyArticulation.h" + +namespace physx +{ +struct PxsBodyCore; + +class PxcConstraintBlockStream; +class PxcRigidBody; +class PxsConstraintBlockManager; +struct PxSolverConstraintDesc; + +namespace Dy +{ + struct FsInertia; + struct SolverConstraint1DExt; + struct ArticulationJointCore; + struct ArticulationSolverDesc; + + +struct ArticulationJointTransforms +{ + PxTransform cA2w; // joint parent frame in world space + PxTransform cB2w; // joint child frame in world space + PxTransform cB2cA; // joint relative pose in world space +}; + +class ArticulationHelper +{ +public: + static PxU32 computeUnconstrainedVelocities(const ArticulationSolverDesc& desc, + PxReal dt, + PxcConstraintBlockStream& stream, + PxSolverConstraintDesc* constraintDesc, + PxU32& acCount, + PxsConstraintBlockManager& constraintBlockManager, + const PxVec3& gravity, PxU64 contextID); + + static void updateBodies(const ArticulationSolverDesc& desc, + PxReal dt); + + + static void getImpulseResponse(const FsData& matrix, + PxU32 linkID, + const Cm::SpatialVectorV& impulse, + Cm::SpatialVectorV& deltaV); + + + static PX_FORCE_INLINE + void getImpulseResponse(const FsData& matrix, + PxU32 linkID, + const Cm::SpatialVector& impulse, + Cm::SpatialVector& deltaV) + { + getImpulseResponse(matrix, linkID, reinterpret_cast<const Cm::SpatialVectorV&>(impulse), reinterpret_cast<Cm::SpatialVectorV&>(deltaV)); + } + + static void getImpulseSelfResponse(const FsData& matrix, + PxU32 linkID0, + const Cm::SpatialVectorV& impulse0, + Cm::SpatialVectorV& deltaV0, + PxU32 linkID1, + const Cm::SpatialVectorV& impulse1, + Cm::SpatialVectorV& deltaV1); + + static void flushVelocity(FsData& matrix); + + static void saveVelocity(const ArticulationSolverDesc& m); + + static void getDataSizes(PxU32 linkCount, PxU32 &solverDataSize, PxU32& totalSize, PxU32& scratchSize); + + static void initializeDriveCache(FsData &data, + PxU16 linkCount, + const ArticulationLink* links, + PxReal compliance, + PxU32 iterations, + char* scratchMemory, + PxU32 scratchMemorySize); + + static PxU32 getDriveCacheLinkCount(const FsData& cache); + + static void applyImpulses(const FsData& matrix, + Cm::SpatialVectorV* Z, + Cm::SpatialVectorV* V); + +private: + static PxU32 getLtbDataSize(PxU32 linkCount); + static PxU32 getFsDataSize(PxU32 linkCount); + + static void prepareDataBlock(FsData& fsData, + const ArticulationLink* links, + PxU16 linkCount, + PxTransform* poses, + FsInertia *baseInertia, + ArticulationJointTransforms* jointTransforms, + PxU32 expectedSize); + + static void setInertia(FsInertia& inertia, + const PxsBodyCore& body, + const PxTransform& pose); + + static void setJointTransforms(ArticulationJointTransforms& transforms, + const PxTransform& parentPose, + const PxTransform& childPose, + const ArticulationJointCore& joint); + + static void prepareLtbMatrix(FsData& fsData, + const FsInertia* baseInertia, + const PxTransform* poses, + const ArticulationJointTransforms* jointTransforms, + PxReal recipDt); + + static void prepareFsData(FsData& fsData, + const ArticulationLink* links); + + static PX_FORCE_INLINE PxReal getResistance(PxReal compliance); + + + static void createHardLimit(const FsData& fsData, + const ArticulationLink* links, + PxU32 linkIndex, + SolverConstraint1DExt& s, + const PxVec3& axis, + PxReal err, + PxReal recipDt); + + static void createTangentialSpring(const FsData& fsData, + const ArticulationLink* links, + PxU32 linkIndex, + SolverConstraint1DExt& s, + const PxVec3& axis, + PxReal stiffness, + PxReal damping, + PxReal dt); + + static PxU32 setupSolverConstraints(FsData& fsData, PxU32 solverDataSize, + PxcConstraintBlockStream& stream, + PxSolverConstraintDesc* constraintDesc, + const ArticulationLink* links, + const ArticulationJointTransforms* jointTransforms, + PxReal dt, + PxU32& acCount, + PxsConstraintBlockManager& constraintBlockManager); + + static void computeJointDrives(FsData& fsData, + Ps::aos::Vec3V* drives, + const ArticulationLink* links, + const PxTransform* poses, + const ArticulationJointTransforms* transforms, + const Ps::aos::Mat33V* loads, + PxReal dt); + +}; + +} + +} + +#endif //DY_ARTICULATION_HELPER_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationPImpl.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationPImpl.h new file mode 100644 index 00000000..e73cc373 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationPImpl.h @@ -0,0 +1,108 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_ARTICULATION_INTERFACE_H +#define DY_ARTICULATION_INTERFACE_H + +#include "DyArticulationUtils.h" + +namespace physx +{ + +class PxcConstraintBlockStream; +class PxcScratchAllocator; +class PxsConstraintBlockManager; +struct PxSolverConstraintDesc; + +namespace Dy +{ + + struct ArticulationSolverDesc; + + +class ArticulationPImpl +{ +public: + + typedef PxU32 (*ComputeUnconstrainedVelocitiesFn)(const ArticulationSolverDesc& desc, + PxReal dt, + PxcConstraintBlockStream& stream, + PxSolverConstraintDesc* constraintDesc, + PxU32& acCount, + PxsConstraintBlockManager& constraintBlockManager, + const PxVec3& gravity, PxU64 contextID); + + typedef void (*UpdateBodiesFn)(const ArticulationSolverDesc& desc, + PxReal dt); + + typedef void (*SaveVelocityFn)(const ArticulationSolverDesc &m); + + static ComputeUnconstrainedVelocitiesFn sComputeUnconstrainedVelocities; + static UpdateBodiesFn sUpdateBodies; + static SaveVelocityFn sSaveVelocity; + + static PxU32 computeUnconstrainedVelocities(const ArticulationSolverDesc& desc, + PxReal dt, + PxcConstraintBlockStream& stream, + PxSolverConstraintDesc* constraintDesc, + PxU32& acCount, + PxcScratchAllocator&, + PxsConstraintBlockManager& constraintBlockManager, + const PxVec3& gravity, PxU64 contextID) + { + PX_ASSERT(sComputeUnconstrainedVelocities); + if(sComputeUnconstrainedVelocities) + return (sComputeUnconstrainedVelocities)(desc, dt, stream, constraintDesc, acCount, constraintBlockManager, gravity, contextID); + else + return 0; + } + + static void updateBodies(const ArticulationSolverDesc& desc, + PxReal dt) + { + PX_ASSERT(sUpdateBodies); + if(sUpdateBodies) + (*sUpdateBodies)(desc, dt); + } + + static void saveVelocity(const ArticulationSolverDesc& desc) + { + PX_ASSERT(sSaveVelocity); + if(sSaveVelocity) + (*sSaveVelocity)(desc); + } +}; + + +} +} +#endif //DY_ARTICULATION_INTERFACE_H + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationReference.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationReference.h new file mode 100644 index 00000000..ff4d0d6e --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationReference.h @@ -0,0 +1,92 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_ARTICULATION_REFERENCE_H +#define DY_ARTICULATION_REFERENCE_H + +// a per-row struct where we put extra data for debug and setup - ultimately this will move to be just +// debug only + + + +#include "DyArticulationUtils.h" +#include "DyArticulationScalar.h" +#include "DyArticulationFnsScalar.h" +#include "DySpatial.h" + +#if DY_ARTICULATION_DEBUG_VERIFY + +namespace physx +{ + +PX_FORCE_INLINE Cm::SpatialVector propagateVelocity(const FsRow& row, + const FsJointVectors& jv, + const PxVec3& SZ, + const Cm::SpatialVector& v, + const FsRowAux& aux) +{ + typedef ArticulationFnsScalar Fns; + + Cm::SpatialVector w = Fns::translateMotion(-getParentOffset(jv), v); + PxVec3 DSZ = Fns::multiply(row.D, SZ); + + PxVec3 n = Fns::axisDot(getDSI(row), w) + DSZ; + Cm::SpatialVector result = w - Cm::SpatialVector(getJointOffset(jv).cross(n),n); +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, SZ, v, aux); + PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f)); +#endif + return result; +} + +PX_FORCE_INLINE Cm::SpatialVector propagateImpulse(const FsRow& row, + const FsJointVectors& jv, + PxVec3& SZ, + const Cm::SpatialVector& Z, + const FsRowAux& aux) +{ + typedef ArticulationFnsScalar Fns; + + SZ = Z.angular + Z.linear.cross(getJointOffset(jv)); + Cm::SpatialVector result = Fns::translateForce(getParentOffset(jv), Z - Fns::axisMultiply(getDSI(row), SZ)); +#if DY_ARTICULATION_DEBUG_VERIFY + PxVec3 SZcheck; + Cm::SpatialVector check = ArticulationRef::propagateImpulse(row, jv, SZcheck, Z, aux); + PX_ASSERT((result-check).magnitude()<1e-5*PxMax(check.magnitude(), 1.0f)); + PX_ASSERT((SZ-SZcheck).magnitude()<1e-5*PxMax(SZcheck.magnitude(), 1.0f)); +#endif + return result; +} + +} +#endif + +#endif //DY_ARTICULATION_REFERENCE_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationSIMD.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationSIMD.cpp new file mode 100644 index 00000000..e138c192 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationSIMD.cpp @@ -0,0 +1,306 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "DySpatial.h" +#include "DyArticulation.h" +#include "DyArticulationScalar.h" +#include "DyArticulationFnsScalar.h" +#include "DyArticulationReference.h" +#include "DyArticulationFnsSimd.h" + + +namespace physx +{ +namespace Dy +{ + +#if DY_ARTICULATION_DEBUG_VERIFY +namespace +{ + Cm::SpatialVector SpV(Vec3V linear, Vec3V angular) + { + return Cm::SpatialVector((PxVec3 &)linear, (PxVec3&)angular); + } +} +#endif + +void PxcFsApplyImpulse(FsData &matrix, + PxU32 linkID, + Vec3V linear, + Vec3V angular) +{ +#if DY_ARTICULATION_DEBUG_VERIFY + { + Cm::SpatialVectorV imp(linear, angular); + ArticulationRef::applyImpulse(matrix, reinterpret_cast<Cm::SpatialVector *>(getRefVelocity(matrix)), linkID, reinterpret_cast<Cm::SpatialVector&>(imp)); + } +#endif + + + Vec3V linZ = V3Neg(linear); + Vec3V angZ = V3Neg(angular); + + const FsRow *rows = getFsRows(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + +#if DY_ARTICULATION_DEBUG_VERIFY + const FsRowAux *aux = getAux(matrix); +#endif + Vec3V *deferredSZ = getDeferredSZ(matrix); + + for(PxU32 i = linkID; i!=0; i = matrix.parent[i]) + { + const FsRow &row = rows[i]; + const FsJointVectors& jv = jointVectors[i]; + +#if DY_ARTICULATION_DEBUG_VERIFY + PxVec3 SZcheck; + Cm::SpatialVector Zcheck = ArticulationRef::propagateImpulse(row, jv, SZcheck, SpV(linZ, angZ), aux[i]); +#endif + + Vec3V SZ = V3Add(angZ, V3Cross(linZ, jv.jointOffset)); + Vec3V lrLinear = V3Sub(linZ, V3ScaleAdd(row.DSI[0].linear, V3GetX(SZ), + V3ScaleAdd(row.DSI[1].linear, V3GetY(SZ), + V3Scale(row.DSI[2].linear, V3GetZ(SZ))))); + + Vec3V lrAngular = V3Sub(angZ, V3ScaleAdd(row.DSI[0].angular, V3GetX(SZ), + V3ScaleAdd(row.DSI[1].angular, V3GetY(SZ), + V3Scale(row.DSI[2].angular, V3GetZ(SZ))))); + + linZ = lrLinear; + angZ = V3Add(lrAngular, V3Cross(jv.parentOffset, lrLinear)); + deferredSZ[i] = V3Add(deferredSZ[i], SZ); + + PX_ASSERT(Ps::aos::isFiniteVec3V(linZ)); + PX_ASSERT(Ps::aos::isFiniteVec3V(angZ)); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector Z = SpV(linZ,angZ); + PX_ASSERT((Z - Zcheck).magnitude()<1e-4*PxMax(Zcheck.magnitude(), 1.0f)); + PX_ASSERT(((PxVec3&)SZ-SZcheck).magnitude()<1e-4*PxMax(SZcheck.magnitude(), 1.0f)); +#endif + } + + matrix.deferredZ.linear = V3Add(matrix.deferredZ.linear, linZ); + matrix.deferredZ.angular = V3Add(matrix.deferredZ.angular, angZ); + + matrix.dirty |= rows[linkID].pathToRoot; +} + +Cm::SpatialVectorV PxcFsGetVelocity(FsData &matrix, + PxU32 linkID) +{ + const FsRow *rows = getFsRows(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + +#if DY_ARTICULATION_DEBUG_VERIFY + const FsRowAux *aux = getAux(matrix); +#endif + Cm::SpatialVectorV* PX_RESTRICT V = getVelocity(matrix); + + // find the dirty node on the path (including the root) with the lowest index + ArticulationBitField toUpdate = rows[linkID].pathToRoot & matrix.dirty; + + + if(toUpdate) + { + // store the dV elements densely and use an array map to decode - hopefully cache friendlier + PxU32 indexToStackLoc[DY_ARTICULATION_MAX_SIZE], count = 0; + Cm::SpatialVectorV dVStack[DY_ARTICULATION_MAX_SIZE]; + + ArticulationBitField ignoreNodes = (toUpdate & (0-toUpdate))-1; + ArticulationBitField path = rows[linkID].pathToRoot & ~ignoreNodes, p = path; + ArticulationBitField newDirty = 0; + + Vec3V ldV = V3Zero(), adV = V3Zero(); + Cm::SpatialVectorV* PX_RESTRICT defV = getDeferredVel(matrix); + Vec3V* PX_RESTRICT SZ = getDeferredSZ(matrix); + + if(p & 1) + { + const FsInertia &m = getRootInverseInertia(matrix); + Vec3V lZ = V3Neg(matrix.deferredZ.linear); + Vec3V aZ = V3Neg(matrix.deferredZ.angular); + + ldV = V3Add(M33MulV3(m.ll,lZ), M33MulV3(m.la,aZ)); + adV = V3Add(M33TrnspsMulV3(m.la,lZ), M33MulV3(m.aa,aZ)); + + V[0].linear = V3Add(V[0].linear, ldV); + V[0].angular = V3Add(V[0].angular, adV); + + matrix.deferredZ.linear = V3Zero(); + matrix.deferredZ.angular = V3Zero(); + + indexToStackLoc[0] = count; + Cm::SpatialVectorV &e = dVStack[count++]; + + e.linear = ldV; + e.angular = adV; + + newDirty = rows[0].children; + p--; + } + + + while(p) // using "for(;p;p &= (p-1))" here generates LHSs from the ArticulationLowestSetBit + { + PxU32 i = ArticulationLowestSetBit(p); + const FsJointVectors& jv = jointVectors[i]; + + p &= (p-1); + + const FsRow* PX_RESTRICT row = rows + i; + + ldV = V3Add(ldV, defV[i].linear); + adV = V3Add(adV, defV[i].angular); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector dVcheck = ArticulationRef::propagateVelocity(*row, jv, (PxVec3&)SZ[i], SpV(ldV,adV), aux[i]); +#endif + + Vec3V DSZ = M33MulV3(row->D, SZ[i]); + + Vec3V lW = V3Add(ldV, V3Cross(adV,jv.parentOffset)); + Vec3V aW = adV; + + const Cm::SpatialVectorV*PX_RESTRICT DSI = row->DSI; + Vec3V lN = V3Merge(V3Dot(DSI[0].linear, lW), V3Dot(DSI[1].linear, lW), V3Dot(DSI[2].linear, lW)); + Vec3V aN = V3Merge(V3Dot(DSI[0].angular, aW), V3Dot(DSI[1].angular, aW), V3Dot(DSI[2].angular, aW)); + + Vec3V n = V3Add(V3Add(lN, aN), DSZ); + + ldV = V3Sub(lW, V3Cross(jv.jointOffset,n)); + adV = V3Sub(aW, n); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector dV = SpV(ldV,adV); + PX_ASSERT((dV-dVcheck).magnitude()<1e-4*PxMax(dVcheck.magnitude(), 1.0f)); +#endif + + V[i].linear = V3Add(V[i].linear, ldV); + V[i].angular = V3Add(V[i].angular, adV); + + defV[i].linear = V3Zero(); + defV[i].angular = V3Zero(); + SZ[i] = V3Zero(); + + indexToStackLoc[i] = count; + Cm::SpatialVectorV &e = dVStack[count++]; + newDirty |= rows[i].children; + + e.linear = ldV; + e.angular = adV; + } + + for(ArticulationBitField defer = newDirty&~path; defer; defer &= (defer-1)) + { + PxU32 i = ArticulationLowestSetBit(defer); + PxU32 parent = indexToStackLoc[matrix.parent[i]]; + + defV[i].linear = V3Add(defV[i].linear, dVStack[parent].linear); + defV[i].angular = V3Add(defV[i].angular, dVStack[parent].angular); + } + + matrix.dirty = (matrix.dirty | newDirty)&~path; + } +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector v = reinterpret_cast<Cm::SpatialVector&>(V[linkID]); + Cm::SpatialVector rv = reinterpret_cast<Cm::SpatialVector&>(getRefVelocity(matrix)[linkID]); + PX_ASSERT((v-rv).magnitude()<1e-4f * PxMax(rv.magnitude(),1.0f)); +#endif + + return V[linkID]; +} + +PX_FORCE_INLINE Cm::SpatialVectorV propagateVelocitySIMD(const FsRow& row, + const FsJointVectors& jv, + const Vec3V& SZ, + const Cm::SpatialVectorV& v, + const FsRowAux& aux) +{ + PX_UNUSED(aux); + + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + Cm::SpatialVectorV w(V3Add(v.linear, V3Cross(v.angular, jv.parentOffset)), v.angular); + Vec3V DSZ = M33MulV3(row.D, SZ); + + Vec3V n = V3Add(Fns::axisDot(row.DSI, w), DSZ); + Cm::SpatialVectorV result = w - Cm::SpatialVectorV(V3Cross(jv.jointOffset,n), n); + +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector check = ArticulationRef::propagateVelocity(row, jv, reinterpret_cast<const PxVec3&>(SZ), reinterpret_cast<const Cm::SpatialVector&>(v), aux); + PX_ASSERT((reinterpret_cast<const Cm::SpatialVector&>(result)-check).magnitude()<1e-4*PxMax(check.magnitude(), 1.0f)); +#endif + + return result; +} + +void PxcFsFlushVelocity(FsData& matrix) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + const FsRow* PX_RESTRICT rows = getFsRows(matrix); + const FsRowAux* PX_RESTRICT aux = getAux(matrix); + const FsJointVectors*PX_RESTRICT jointVectors = getJointVectors(matrix); + + Cm::SpatialVectorV V0 = Fns::multiply(getRootInverseInertia(matrix), -matrix.deferredZ); + matrix.deferredZ = Cm::SpatialVectorV(PxZero); + + getVelocity(matrix)[0] += V0; + for(ArticulationBitField defer = rows[0].children; defer; defer &= (defer-1)) + getDeferredVel(matrix)[ArticulationLowestSetBit(defer)] += V0; + + for(PxU32 i = 1; i<matrix.linkCount; i++) + { + Cm::SpatialVectorV V = propagateVelocitySIMD(rows[i], jointVectors[i], getDeferredSZ(matrix)[i], getDeferredVel(matrix)[i], aux[i]); + getDeferredVel(matrix)[i] = Cm::SpatialVectorV(PxZero); + getDeferredSZ(matrix)[i] = V3Zero(); + getVelocity(matrix)[i] += V; + for(ArticulationBitField defer = rows[i].children; defer; defer &= (defer-1)) + getDeferredVel(matrix)[ArticulationLowestSetBit(defer)] += V; + } + +#if DY_ARTICULATION_DEBUG_VERIFY + for(PxU32 i=0;i<matrix.linkCount;i++) + { + Cm::SpatialVector v = velocityRef(matrix,i), rv = reinterpret_cast<Cm::SpatialVector&>(getRefVelocity(matrix)[i]); + Cm::SpatialVector diff = v-rv; + PxReal m = rv.magnitude(); + PX_UNUSED(m); + PX_ASSERT(diff.magnitude()<1e-4*PxMax(1.0f,m)); + } +#endif + + matrix.dirty = 0; +} +} +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.cpp new file mode 100644 index 00000000..af00a367 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.cpp @@ -0,0 +1,575 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "DyArticulationUtils.h" +#include "DyArticulationScalar.h" +#include "DyArticulationReference.h" +#include "DyArticulationFnsDebug.h" + +namespace physx +{ +namespace Dy +{ +namespace ArticulationRef +{ + Cm::SpatialVector propagateImpulse(const FsRow& row, + const FsJointVectors& j, + PxVec3& SZ, + const Cm::SpatialVector& Z, + const FsRowAux& aux) + { + typedef ArticulationFnsScalar Fns; + + SZ = Fns::axisDot(reinterpret_cast<const Cm::SpatialVector*>(aux.S), Z); + return Fns::translateForce(getParentOffset(j), Z - Fns::axisMultiply(getDSI(row), SZ)); + } + + Cm::SpatialVector propagateVelocity(const FsRow& row, + const FsJointVectors& j, + const PxVec3& SZ, + const Cm::SpatialVector& v, + const FsRowAux& aux) + { + typedef ArticulationFnsScalar Fns; + + Cm::SpatialVector w = Fns::translateMotion(-getParentOffset(j), v); + PxVec3 DSZ = Fns::multiply(row.D, SZ); + + return w - Fns::axisMultiply(reinterpret_cast<const Cm::SpatialVector*>(aux.S), DSZ + Fns::axisDot(getDSI(row), w)); + } + + void applyImpulse(const FsData& matrix, + Cm::SpatialVector* velocity, + PxU32 linkID, + const Cm::SpatialVector& impulse) + { + typedef ArticulationFnsScalar Fns; + + PX_ASSERT(matrix.linkCount<=DY_ARTICULATION_MAX_SIZE); + + const FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + Cm::SpatialVector dV[DY_ARTICULATION_MAX_SIZE]; + PxVec3 SZ[DY_ARTICULATION_MAX_SIZE]; + + for(PxU32 i=0;i<matrix.linkCount;i++) + SZ[i] = PxVec3(0); + + Cm::SpatialVector Z = -impulse; + + for(;linkID!=0; linkID = matrix.parent[linkID]) + Z = ArticulationRef::propagateImpulse(rows[linkID], jointVectors[linkID], SZ[linkID], Z, aux[linkID]); + + dV[0] = Fns::getRootDeltaV(matrix,-Z); + + for(PxU32 i=1;i<matrix.linkCount; i++) + dV[i] = ArticulationRef::propagateVelocity(rows[i], jointVectors[i], SZ[i], dV[matrix.parent[i]], aux[i]); + + for(PxU32 i=0;i<matrix.linkCount;i++) + velocity[i] += dV[i]; + } + + void ltbFactor(FsData& m) + { + typedef ArticulationFnsScalar Fns; + LtbRow* rows = getLtbRows(m); + + SpInertia inertia[DY_ARTICULATION_MAX_SIZE]; + for(PxU32 i=0;i<m.linkCount;i++) + inertia[i] = ArticulationFnsDebug::unsimdify(rows[i].inertia); + + Cm::SpatialVector j[3]; + for(PxU32 i=m.linkCount; --i>0;) + { + LtbRow& b = rows[i]; + inertia[i] = Fns::invertInertia(inertia[i]); + PxU32 p = m.parent[i]; + + Cm::SpatialVector* j0 = &reinterpret_cast<Cm::SpatialVector&>(*b.j0), + * j1 = &reinterpret_cast<Cm::SpatialVector&>(*b.j1); + + Fns::multiply(j, inertia[i], j1); + PxMat33 jResponse = Fns::invertSym33(-Fns::multiplySym(j, j1)); + j1[0] = j[0]; j1[1] = j[1]; j1[2] = j[2]; + + b.jResponse = Mat33V_From_PxMat33(jResponse); + Fns::multiply(j, j0, jResponse); + inertia[p] = Fns::multiplySubtract(inertia[p], j, j0); + j0[0] = j[0]; j0[1] = j[1]; j0[2] = j[2]; + } + + rows[0].inertia = Fns::invertInertia(inertia[0]); + for(PxU32 i=1;i<m.linkCount;i++) + rows[i].inertia = inertia[i]; + } + + +} + +#if 0 + + +void ltbSolve(const FsData& m, + Vec3V* c, // rhs error to solve for + Cm::SpatialVector* y) // velocity delta output +{ + typedef ArticulationFnsScalar Fns; + + PxVec4* b = reinterpret_cast<PxVec4*>(c); + const LtbRow* rows = getLtbRows(m); + PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVector)); + + for(PxU32 i=m.linkCount;i-->1;) + { + PxU32 p = m.parent[i]; + const LtbRow& r = rows[i]; + b[i] -= PxVec4(Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j1), y[i]),0); + y[p] -= Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j0), b[i].getXYZ()); + } + + y[0] = Fns::multiply(rows[0].inertia,y[0]); + + for(PxU32 i=1; i<m.linkCount; i++) + { + PxU32 p = m.parent[i]; + const LtbRow& r = rows[i]; + PxVec3 t = Fns::multiply(r.jResponse, b[i].getXYZ()) - Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j0), y[p]); + y[i] = Fns::multiply(r.inertia, y[i]) - Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j1), t); + } +} + +void PxcFsPropagateDrivenInertiaScalar(FsData& matrix, + const FsInertia* baseInertia, + const PxReal* isf, + const Mat33V* load) +{ + typedef ArticulationFnsScalar Fns; + + Cm::SpatialVector IS[3], DSI[3]; + PxMat33 D; + + FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + SpInertia inertia[DY_ARTICULATION_MAX_SIZE]; + for(PxU32 i=0;i<matrix.linkCount;i++) + inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]); + + for(PxU32 i=matrix.linkCount; --i>0;) + { + FsRow& r = rows[i]; + const FsRowAux& a = aux[i]; + const FsJointVectors& jv = jointVectors[i]; + + Fns::multiply(IS, inertia[i], &static_cast<const Cm::SpatialVector&>(*a.S)); + + PX_ALIGN(16, PxMat33) L; + PxMat33_From_Mat33V(load[i], L); + D = Fns::invertSym33(Fns::multiplySym(&static_cast<const Cm::SpatialVector&>(*a.S), IS) + L*isf[i]); + + Fns::multiply(DSI, IS, D); + + r.D = Mat33V_From_PxMat33(D); + static_cast<Cm::SpatialVector&>(r.DSI[0]) = DSI[0]; + static_cast<Cm::SpatialVector&>(r.DSI[1]) = DSI[1]; + static_cast<Cm::SpatialVector&>(r.DSI[2]) = DSI[2]; + + inertia[matrix.parent[i]] += Fns::translate(getParentOffset(jv), Fns::multiplySubtract(inertia[i], DSI, IS)); + } + + FsInertia& m = getRootInverseInertia(matrix); + m = FsInertia(Fns::invertInertia(inertia[0])); +} + +// no need to compile this ecxcept for verification, and it consumes huge amounts of stack space +void PxcFsComputeJointLoadsScalar(const FsData& matrix, + const FsInertia*PX_RESTRICT baseInertia, + Mat33V*PX_RESTRICT load, + const PxReal*PX_RESTRICT isf, + PxU32 linkCount, + PxU32 maxIterations) +{ + typedef ArticulationFnsScalar Fns; + + // the childward S + SpInertia leafwardInertia[DY_ARTICULATION_MAX_SIZE]; + SpInertia rootwardInertia[DY_ARTICULATION_MAX_SIZE]; + SpInertia inertia[DY_ARTICULATION_MAX_SIZE]; + SpInertia contribToParent[DY_ARTICULATION_MAX_SIZE]; + + // total articulated inertia assuming the articulation is rooted here + + const FsRow* row = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_UNUSED(row); + + PxMat33 load_[DY_ARTICULATION_MAX_SIZE]; + + for(PxU32 iter=0;iter<maxIterations;iter++) + { + for(PxU32 i=0;i<linkCount;i++) + inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]); + + for(PxU32 i=linkCount;i-->1;) + { + const FsJointVectors& j = jointVectors[i]; + + leafwardInertia[i] = inertia[i]; + contribToParent[i] = Fns::propagate(inertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]); + inertia[matrix.parent[i]] += Fns::translate((PxVec3&)j.parentOffset, contribToParent[i]); + } + + for(PxU32 i=1;i<linkCount;i++) + { + rootwardInertia[i] = Fns::translate(-(PxVec3&)jointVectors[i].parentOffset, inertia[matrix.parent[i]]) - contribToParent[i]; + inertia[i] += Fns::propagate(rootwardInertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]); + } + + for(PxU32 i=1;i<linkCount;i++) + { + load_[i] = Fns::computeDriveInertia(leafwardInertia[i], rootwardInertia[i], &static_cast<const Cm::SpatialVector&>(*aux[i].S)); + PX_ASSERT(load_[i][0].isFinite() && load_[i][1].isFinite() && load_[2][i].isFinite()); + } + } + for(PxU32 i=1;i<linkCount;i++) + load[i] = Mat33V_From_PxMat33(load_[i]); +} + + +void PxcFsApplyImpulse(const FsData& matrix, + PxU32 linkID, + const Cm::SpatialVector& impulse) +{ +#if DY_ARTICULATION_DEBUG_VERIFY + PxcFsRefApplyImpulse(matrix, state.refVelocity, linkID, impulse); +#endif + + Cm::SpatialVector Z = -impulse; + + for(PxU32 i = linkID; i!=0; i = matrix.row[i].parent) + { + PxVec3 SZ; + Z = propagateImpulse(matrix.row[i], SZ, Z, matrix.aux[i]); + deferredSZRef(state,i) += SZ; + } + + static_cast<Cm::SpatialVector &>(state.deferredZ) += Z; + state.dirty |= matrix.row[linkID].pathToRoot; +} + +Cm::SpatialVector PxcFsGetVelocity(const FsData& matrix, + PxU32 linkID) +{ + // find the dirty node on the path (including the root) with the lowest index + ArticulationBitField toUpdate = matrix.row[linkID].pathToRoot & state.dirty; + + if(toUpdate) + { + ArticulationBitField ignoreNodes = (toUpdate & (0-toUpdate))-1; + ArticulationBitField path = matrix.row[linkID].pathToRoot & ~ignoreNodes, p = path; + ArticulationBitField newDirty = 0; + + Cm::SpatialVector dV = Cm::SpatialVector::zero(); + if(p & 1) + { + dV = getRootDeltaV(matrix, -deferredZ(state)); + + velocityRef(state, 0) += dV; + for(ArticulationBitField defer = matrix.row[0].children & ~path; defer; defer &= (defer-1)) + deferredVelRef(state, ArticulationLowestSetBit(defer)) += dV; + + deferredZRef(state) = Cm::SpatialVector::zero(); + newDirty = matrix.row[0].children; + p--; + } + + for(; p; p &= (p-1)) + { + PxU32 i = ArticulationLowestSetBit(p); + + dV = propagateVelocity(matrix.row[i], deferredSZ(state,i), dV + state.deferredVel[i], matrix.aux[i]); + + velocityRef(state,i) += dV; + for(ArticulationBitField defer = matrix.row[i].children & ~path; defer; defer &= (defer-1)) + deferredVelRef(state,ArticulationLowestSetBit(defer)) += dV; + + newDirty |= matrix.row[i].children; + deferredVelRef(state,i) = Cm::SpatialVector::zero(); + deferredSZRef(state,i) = PxVec3(0); + } + + state.dirty = (state.dirty | newDirty)&~path; + } +#if DY_ARTICULATION_DEBUG_VERIFY + Cm::SpatialVector v = state.velocity[linkID]; + Cm::SpatialVector rv = state.refVelocity[linkID]; + PX_ASSERT((v-rv).magnitude()<1e-4f * rv.magnitude()); +#endif + + return state.velocity[linkID]; +} + +void PxcFsFlushVelocity(const FsData& matrix) +{ + Cm::SpatialVector V = getRootDeltaV(matrix, -deferredZ(state)); + deferredZRef(state) = Cm::SpatialVector::zero(); + velocityRef(state,0) += V; + for(ArticulationBitField defer = matrix.row[0].children; defer; defer &= (defer-1)) + deferredVelRef(state,ArticulationLowestSetBit(defer)) += V; + + for(PxU32 i = 1; i<matrix.linkCount; i++) + { + Cm::SpatialVector V = propagateVelocity(matrix.row[i], deferredSZ(state,i), state.deferredVel[i], matrix.aux[i]); + deferredVelRef(state,i) = Cm::SpatialVector::zero(); + deferredSZRef(state,i) = PxVec3(0); + velocityRef(state,i) += V; + for(ArticulationBitField defer = matrix.row[i].children; defer; defer &= (defer-1)) + deferredVelRef(state,ArticulationLowestSetBit(defer)) += V; + } + + state.dirty = 0; +} + +void PxcFsPropagateDrivenInertiaScalar(FsData& matrix, + const FsInertia* baseInertia, + const PxReal* isf, + const Mat33V* load, + PxcFsScratchAllocator allocator) +{ + typedef ArticulationFnsSimd<ArticulationFnsSimdBase> Fns; + + Cm::SpatialVectorV IS[3]; + PxMat33 D; + + FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + FsInertia *inertia = allocator.alloc<FsInertia>(matrix.linkCount); + PxMemCopy(inertia, baseInertia, matrix.linkCount*sizeof(FsInertia)); + + for(PxU32 i=matrix.linkCount; --i>0;) + { + FsRow& r = rows[i]; + const FsRowAux& a = aux[i]; + const FsJointVectors& jv = jointVectors[i]; + + Mat33V m = Fns::computeSIS(inertia[i], a.S, IS); + FloatV f = FLoad(isf[i]); + + Mat33V D = Fns::invertSym33(Mat33V(V3ScaleAdd(load[i].col0, f, m.col0), + V3ScaleAdd(load[i].col1, f, m.col1), + V3ScaleAdd(load[i].col2, f, m.col2))); + r.D = D; + + inertia[matrix.parent[i]] = Fns::addInertia(inertia[matrix.parent[i]], + Fns::translateInertia(jv.parentOffset, Fns::multiplySubtract(inertia[i], D, IS, r.DSI))); + } + + getRootInverseInertia(matrix) = Fns::invertInertia(inertia[0]); +} + +void PxcLtbSolve(const FsData& m, + Vec3V* c, // rhs error to solve for + Cm::SpatialVector* y) // velocity delta output +{ + typedef ArticulationFnsScalar Fns; + + PxVec4* b = reinterpret_cast<PxVec4*>(c); + const LtbRow* rows = getLtbRows(m); + PxMemZero(y, m.linkCount*sizeof(Cm::SpatialVector)); + + for(PxU32 i=m.linkCount;i-->1;) + { + PxU32 p = m.parent[i]; + const LtbRow& r = rows[i]; + b[i] -= PxVec4(Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j1), y[i]),0); + y[p] -= Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j0), b[i].getXYZ()); + } + + y[0] = Fns::multiply(rows[0].inertia,y[0]); + + for(PxU32 i=1; i<m.linkCount; i++) + { + PxU32 p = m.parent[i]; + const LtbRow& r = rows[i]; + PxVec3 t = Fns::multiply(r.jResponse, b[i].getXYZ()) - Fns::axisDot(&static_cast<const Cm::SpatialVector&>(*r.j0), y[p]); + y[i] = Fns::multiply(r.inertia, y[i]) - Fns::axisMultiply(&static_cast<const Cm::SpatialVector&>(*r.j1), t); + } +} + + +#endif + + +#if DY_ARTICULATION_DEBUG_VERIFY +void PxcLtbFactorScalar(FsData& m) +{ + typedef ArticulationFnsScalar Fns; + LtbRow* rows = getLtbRows(m); + + SpInertia inertia[DY_ARTICULATION_MAX_SIZE]; + for(PxU32 i=0;i<m.linkCount;i++) + inertia[i] = ArticulationFnsDebug::unsimdify(rows[i].inertia); + + Cm::SpatialVector j[3]; + for(PxU32 i=m.linkCount; --i>0;) + { + LtbRow& b = rows[i]; + inertia[i] = Fns::invertInertia(inertia[i]); + PxU32 p = m.parent[i]; + + Cm::SpatialVector* j0 = &reinterpret_cast<Cm::SpatialVector&>(*b.j0), + * j1 = &reinterpret_cast<Cm::SpatialVector&>(*b.j1); + + Fns::multiply(j, inertia[i], j1); + PxMat33 jResponse = Fns::invertSym33(-Fns::multiplySym(j, j1)); + j1[0] = j[0]; j1[1] = j[1]; j1[2] = j[2]; + + b.jResponse = Mat33V_From_PxMat33(jResponse); + Fns::multiply(j, j0, jResponse); + inertia[p] = Fns::multiplySubtract(inertia[p], j, j0); + j0[0] = j[0]; j0[1] = j[1]; j0[2] = j[2]; + } + + rows[0].inertia = Fns::invertInertia(inertia[0]); + for(PxU32 i=1;i<m.linkCount;i++) + rows[i].inertia = inertia[i]; +} + +void PxcFsPropagateDrivenInertiaScalar(FsData& matrix, + const FsInertia* baseInertia, + const PxReal* isf, + const Mat33V* load) +{ + typedef ArticulationFnsScalar Fns; + + Cm::SpatialVector IS[3], DSI[3]; + PxMat33 D; + + FsRow* rows = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + SpInertia inertia[DY_ARTICULATION_MAX_SIZE]; + for(PxU32 i=0;i<matrix.linkCount;i++) + inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]); + + for(PxU32 i=matrix.linkCount; --i>0;) + { + FsRow& r = rows[i]; + const FsRowAux& a = aux[i]; + const FsJointVectors& jv = jointVectors[i]; + + Fns::multiply(IS, inertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*a.S)); + + PX_ALIGN(16, PxMat33) L; + PxMat33_From_Mat33V(load[i], L); + D = Fns::invertSym33(Fns::multiplySym(&reinterpret_cast<const Cm::SpatialVector&>(*a.S), IS) + L*isf[i]); + + Fns::multiply(DSI, IS, D); + + r.D = Mat33V_From_PxMat33(D); + reinterpret_cast<Cm::SpatialVector&>(r.DSI[0]) = DSI[0]; + reinterpret_cast<Cm::SpatialVector&>(r.DSI[1]) = DSI[1]; + reinterpret_cast<Cm::SpatialVector&>(r.DSI[2]) = DSI[2]; + + inertia[matrix.parent[i]] += Fns::translate(getParentOffset(jv), Fns::multiplySubtract(inertia[i], DSI, IS)); + } + + FsInertia& m = getRootInverseInertia(matrix); + m = FsInertia(Fns::invertInertia(inertia[0])); +} + +// no need to compile this ecxcept for verification, and it consumes huge amounts of stack space +void PxcFsComputeJointLoadsScalar(const FsData& matrix, + const FsInertia*PX_RESTRICT baseInertia, + Mat33V*PX_RESTRICT load, + const PxReal*PX_RESTRICT isf, + PxU32 linkCount, + PxU32 maxIterations) +{ + typedef ArticulationFnsScalar Fns; + + // the childward S + SpInertia leafwardInertia[DY_ARTICULATION_MAX_SIZE]; + SpInertia rootwardInertia[DY_ARTICULATION_MAX_SIZE]; + SpInertia inertia[DY_ARTICULATION_MAX_SIZE]; + SpInertia contribToParent[DY_ARTICULATION_MAX_SIZE]; + + // total articulated inertia assuming the articulation is rooted here + + const FsRow* row = getFsRows(matrix); + const FsRowAux* aux = getAux(matrix); + const FsJointVectors* jointVectors = getJointVectors(matrix); + + PX_UNUSED(row); + + PxMat33 load_[DY_ARTICULATION_MAX_SIZE]; + + for(PxU32 iter=0;iter<maxIterations;iter++) + { + for(PxU32 i=0;i<linkCount;i++) + inertia[i] = ArticulationFnsDebug::unsimdify(baseInertia[i]); + + for(PxU32 i=linkCount;i-->1;) + { + const FsJointVectors& j = jointVectors[i]; + + leafwardInertia[i] = inertia[i]; + contribToParent[i] = Fns::propagate(inertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]); + inertia[matrix.parent[i]] += Fns::translate((PxVec3&)j.parentOffset, contribToParent[i]); + } + + for(PxU32 i=1;i<linkCount;i++) + { + rootwardInertia[i] = Fns::translate(-(PxVec3&)jointVectors[i].parentOffset, inertia[matrix.parent[i]]) - contribToParent[i]; + inertia[i] += Fns::propagate(rootwardInertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S), load_[i], isf[i]); + } + + for(PxU32 i=1;i<linkCount;i++) + { + load_[i] = Fns::computeDriveInertia(leafwardInertia[i], rootwardInertia[i], &reinterpret_cast<const Cm::SpatialVector&>(*aux[i].S)); + PX_ASSERT(load_[i][0].isFinite() && load_[i][1].isFinite() && load_[2][i].isFinite()); + } + } + for(PxU32 i=1;i<linkCount;i++) + load[i] = Mat33V_From_PxMat33(load_[i]); +} +#endif + +} + +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.h new file mode 100644 index 00000000..8d639de3 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationScalar.h @@ -0,0 +1,101 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_ARTICULATION_SCALAR_H +#define DY_ARTICULATION_SCALAR_H + +// Scalar helpers for articulations + +#include "foundation/PxUnionCast.h" +#include "DyArticulationUtils.h" +#include "DySpatial.h" +#include "PsFPU.h" + +namespace physx +{ + +namespace Dy +{ + +PX_FORCE_INLINE Cm::SpatialVector& velocityRef(FsData &m, PxU32 i) +{ + return reinterpret_cast<Cm::SpatialVector&>(getVelocity(m)[i]); +} + +PX_FORCE_INLINE Cm::SpatialVector& deferredVelRef(FsData &m, PxU32 i) +{ + return reinterpret_cast<Cm::SpatialVector&>(getDeferredVel(m)[i]); +} + +PX_FORCE_INLINE PxVec3& deferredSZRef(FsData &m, PxU32 i) +{ + return reinterpret_cast<PxVec3 &>(getDeferredSZ(m)[i]); +} + +PX_FORCE_INLINE const PxVec3& deferredSZ(const FsData &s, PxU32 i) +{ + return reinterpret_cast<const PxVec3 &>(getDeferredSZ(s)[i]); +} + +PX_FORCE_INLINE Cm::SpatialVector& deferredZRef(FsData &s) +{ + return unsimdRef(s.deferredZ); +} + + +PX_FORCE_INLINE const Cm::SpatialVector& deferredZ(const FsData &s) +{ + return unsimdRef(s.deferredZ); +} + +PX_FORCE_INLINE const PxVec3& getJointOffset(const FsJointVectors& j) +{ + return reinterpret_cast<const PxVec3& >(j.jointOffset); +} + +PX_FORCE_INLINE const PxVec3& getParentOffset(const FsJointVectors& j) +{ + return reinterpret_cast<const PxVec3&>(j.parentOffset); +} + + + + +PX_FORCE_INLINE const Cm::SpatialVector* getDSI(const FsRow& row) +{ + return PxUnionCast<const Cm::SpatialVector*,const Cm::SpatialVectorV*>(row.DSI); //reinterpret_cast<const Cm::SpatialVector*>(row.DSI); +} + +} + +} + +#endif //DY_ARTICULATION_SCALAR_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationUtils.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationUtils.h new file mode 100644 index 00000000..67c4270d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyArticulationUtils.h @@ -0,0 +1,317 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_ARTICULATION_H +#define DY_ARTICULATION_H + +#include "PsVecMath.h" +#include "CmSpatialVector.h" +#include "DySpatial.h" +#include "PsBitUtils.h" +#include "DyArticulation.h" +#include "DyArticulationHelper.h" + +namespace physx +{ + +namespace Dy +{ + struct ArticulationCore; + struct ArticulationLink; + typedef size_t ArticulationLinkHandle; + class Articulation; + +#define DY_ARTICULATION_DEBUG_VERIFY 0 + +PX_FORCE_INLINE PxU32 ArticulationLowestSetBit(ArticulationBitField val) +{ + PxU32 low = PxU32(val&0xffffffff), high = PxU32(val>>32); + PxU32 mask = PxU32((!low)-1); + PxU32 result = (mask&Ps::lowestSetBitUnsafe(low)) | ((~mask)&(Ps::lowestSetBitUnsafe(high)+32)); + PX_ASSERT(val & (PxU64(1)<<result)); + PX_ASSERT(!(val & ((PxU64(1)<<result)-1))); + return result; +} + +using namespace Ps::aos; + + + +PX_FORCE_INLINE Cm::SpatialVector& unsimdRef(Cm::SpatialVectorV& v) { return reinterpret_cast<Cm::SpatialVector&>(v); } +PX_FORCE_INLINE const Cm::SpatialVector& unsimdRef(const Cm::SpatialVectorV& v) { return reinterpret_cast<const Cm::SpatialVector&>(v); } + + +PX_ALIGN_PREFIX(16) +struct FsJointVectors +{ + Vec3V parentOffset; // 16 bytes world-space offset from parent to child + Vec3V jointOffset; // 16 bytes world-space offset from child to joint +} +PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct FsRow +{ + Cm::SpatialVectorV DSI[3]; // 96 bytes + Mat33V D; // 48 bytes + ArticulationBitField children; // 8 bytes bitmap of children + ArticulationBitField pathToRoot; // 8 bytes bitmap of nodes to root, including self and root +} +PX_ALIGN_SUFFIX(16); + +PX_COMPILE_TIME_ASSERT(sizeof(FsRow)==160); + + + +PX_ALIGN_PREFIX(16) +struct FsInertia +{ + Mat33V ll, la, aa; + PX_FORCE_INLINE FsInertia(const Mat33V& _ll, const Mat33V& _la, const Mat33V& _aa): ll(_ll), la(_la), aa(_aa) {} + PX_FORCE_INLINE FsInertia(const SpInertia& I) + : ll(Mat33V_From_PxMat33(I.mLL)), la(Mat33V_From_PxMat33(I.mLA)), aa(Mat33V_From_PxMat33(I.mAA)) {} + PX_FORCE_INLINE FsInertia() {} + + PX_FORCE_INLINE void operator=(const FsInertia& other) + { + ll.col0 = other.ll.col0; ll.col1 = other.ll.col1; ll.col2 = other.ll.col2; + la.col0 = other.la.col0; la.col1 = other.la.col1; la.col2 = other.la.col2; + aa.col0 = other.aa.col0; aa.col1 = other.aa.col1; aa.col2 = other.aa.col2; + } + + PX_FORCE_INLINE FsInertia(const FsInertia& other) + { + ll.col0 = other.ll.col0; ll.col1 = other.ll.col1; ll.col2 = other.ll.col2; + la.col0 = other.la.col0; la.col1 = other.la.col1; la.col2 = other.la.col2; + aa.col0 = other.aa.col0; aa.col1 = other.aa.col1; aa.col2 = other.aa.col2; + } + +}PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct LtbRow +{ + FsInertia inertia; // body inertia in world space + Cm::SpatialVectorV j0[3], j1[3]; // jacobians + Mat33V jResponse; // inverse response matrix of joint + Vec3V jC; +} PX_ALIGN_SUFFIX(16); + +PX_ALIGN_PREFIX(16) +struct FsRowAux +{ + Cm::SpatialVectorV S[3]; // motion subspace +}PX_ALIGN_SUFFIX(16); + + +struct FsData +{ + Articulation* articulationX; //4 + +#if !PX_P64_FAMILY + PxU32 pad0; //8 +#endif + PxU16 linkCount; // number of links //10 + PxU16 jointVectorOffset; // offset of read-only data //12 + PxU16 maxSolverNormalProgress; //14 + PxU16 maxSolverFrictionProgress; //16 + + PxU64 dirty; //24 + PxU16 ltbDataOffset; // offset of save-velocity data //26 + PxU16 fsDataOffset; // offset of joint references //28 + PxU32 solverProgress; //32 + + + Cm::SpatialVectorV deferredZ; //64 + PxU8 parent[DY_ARTICULATION_MAX_SIZE]; //128 +}; + +PX_COMPILE_TIME_ASSERT(0 == (sizeof(FsData) & 0x0f)); + +#define SOLVER_BODY_SOLVER_PROGRESS_OFFSET 28 +#define SOLVER_BODY_MAX_SOLVER_PROGRESS_OFFSET 12 + +namespace +{ + template<class T> PX_FORCE_INLINE T addAddr(void* addr, PxU32 increment) + { + return reinterpret_cast<T>(reinterpret_cast<char*>(addr)+increment); + } + + template<class T> PX_FORCE_INLINE T addAddr(const void* addr, PxU32 increment) + { + return reinterpret_cast<T>(reinterpret_cast<const char*>(addr)+increment); + } +} + +PX_FORCE_INLINE Cm::SpatialVectorV* getVelocity(FsData& matrix) +{ + return addAddr<Cm::SpatialVectorV*>(&matrix, sizeof(FsData)); +} + + + + +PX_FORCE_INLINE const Cm::SpatialVectorV* getVelocity(const FsData& matrix) +{ + return addAddr<const Cm::SpatialVectorV*>(&matrix, sizeof(FsData)); +} + +PX_FORCE_INLINE Cm::SpatialVectorV* getDeferredVel(FsData& matrix) +{ + return addAddr<Cm::SpatialVectorV*>(getVelocity(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount); +} + +PX_FORCE_INLINE const Cm::SpatialVectorV* getDeferredVel(const FsData& matrix) +{ + return addAddr<const Cm::SpatialVectorV*>(getVelocity(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount); +} + +PX_FORCE_INLINE Vec3V* getDeferredSZ(FsData& matrix) +{ + return addAddr<Vec3V*>(getDeferredVel(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount); +} + +PX_FORCE_INLINE const Vec3V* getDeferredSZ(const FsData& matrix) +{ + return addAddr<const Vec3V*>(getDeferredVel(matrix), sizeof(Cm::SpatialVectorV) * matrix.linkCount); +} + +PX_FORCE_INLINE const PxReal* getMaxPenBias(const FsData& matrix) +{ + return addAddr<const PxReal*>(getDeferredSZ(matrix), sizeof(Vec3V) * matrix.linkCount); +} + +PX_FORCE_INLINE PxReal* getMaxPenBias(FsData& matrix) +{ + return addAddr<PxReal*>(getDeferredSZ(matrix), sizeof(Vec3V) * matrix.linkCount); +} + + +PX_FORCE_INLINE FsJointVectors* getJointVectors(FsData& matrix) +{ + return addAddr<FsJointVectors *>(&matrix,matrix.jointVectorOffset); +} + +PX_FORCE_INLINE const FsJointVectors* getJointVectors(const FsData& matrix) +{ + return addAddr<const FsJointVectors *>(&matrix,matrix.jointVectorOffset); +} + +PX_FORCE_INLINE FsInertia& getRootInverseInertia(FsData& matrix) +{ + return *addAddr<FsInertia*>(&matrix,matrix.fsDataOffset); +} + +PX_FORCE_INLINE const FsInertia& getRootInverseInertia(const FsData& matrix) +{ + return *addAddr<const FsInertia*>(&matrix,matrix.fsDataOffset); + +} + +PX_FORCE_INLINE FsRow* getFsRows(FsData& matrix) +{ + return addAddr<FsRow*>(&getRootInverseInertia(matrix),sizeof(FsInertia)); +} + +PX_FORCE_INLINE const FsRow* getFsRows(const FsData& matrix) +{ + return addAddr<const FsRow*>(&getRootInverseInertia(matrix),sizeof(FsInertia)); +} + + +PX_FORCE_INLINE LtbRow* getLtbRows(FsData& matrix) +{ + return addAddr<LtbRow*>(&matrix,matrix.ltbDataOffset); +} + +PX_FORCE_INLINE const LtbRow* getLtbRows(const FsData& matrix) +{ + return addAddr<const LtbRow*>(&matrix,matrix.ltbDataOffset); +} + + +PX_FORCE_INLINE Cm::SpatialVectorV* getRefVelocity(FsData& matrix) +{ + return addAddr<Cm::SpatialVectorV*>(getLtbRows(matrix), sizeof(LtbRow)*matrix.linkCount); +} + +PX_FORCE_INLINE const Cm::SpatialVectorV* getRefVelocity(const FsData& matrix) +{ + return addAddr<const Cm::SpatialVectorV*>(getLtbRows(matrix), sizeof(LtbRow)*matrix.linkCount); +} + +PX_FORCE_INLINE FsRowAux* getAux(FsData& matrix) +{ + return addAddr<FsRowAux*>(getRefVelocity(matrix),sizeof(Cm::SpatialVectorV)*matrix.linkCount); +} + +PX_FORCE_INLINE const FsRowAux* getAux(const FsData& matrix) +{ + return addAddr<const FsRowAux*>(getRefVelocity(matrix),sizeof(Cm::SpatialVectorV)*matrix.linkCount); +} + +void PxcFsApplyImpulse(FsData& matrix, + PxU32 linkID, + Vec3V linear, + Vec3V angular); + +Cm::SpatialVectorV PxcFsGetVelocity(FsData& matrix, + PxU32 linkID); + + +#if DY_ARTICULATION_DEBUG_VERIFY +namespace ArticulationRef +{ + Cm::SpatialVector propagateVelocity(const FsRow& row, + const FsJointVectors& jv, + const PxVec3& SZ, + const Cm::SpatialVector& v, + const FsRowAux& aux); + + Cm::SpatialVector propagateImpulse(const FsRow& row, + const FsJointVectors& jv, + PxVec3& SZ, + const Cm::SpatialVector& Z, + const FsRowAux& aux); + + void applyImpulse(const FsData& matrix, + Cm::SpatialVector* velocity, + PxU32 linkID, + const Cm::SpatialVector& impulse); + +} +#endif + +} +} + +#endif //DY_ARTICULATION_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyBodyCoreIntegrator.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyBodyCoreIntegrator.h new file mode 100644 index 00000000..3e842341 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyBodyCoreIntegrator.h @@ -0,0 +1,405 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_BODYCORE_INTEGRATOR_H +#define DY_BODYCORE_INTEGRATOR_H + +#include "CmPhysXCommon.h" +#include "PxvDynamics.h" +#include "PsMathUtils.h" +#include "PxsRigidBody.h" +#include "DySolverBody.h" +#include "DySleepingConfigulation.h" +#include "PxsIslandSim.h" + +namespace physx +{ + +namespace Dy +{ + +PX_FORCE_INLINE void bodyCoreComputeUnconstrainedVelocity +(const PxVec3& gravity, const PxReal dt, const PxReal linearDamping, const PxReal angularDamping, const PxReal accelScale, +const PxReal maxLinearVelocitySq, const PxReal maxAngularVelocitySq, PxVec3& inOutLinearVelocity, PxVec3& inOutAngularVelocity, +bool disableGravity) +{ + + //Multiply everything that needs multiplied by dt to improve code generation. + + PxVec3 linearVelocity = inOutLinearVelocity; + PxVec3 angularVelocity = inOutAngularVelocity; + + const PxReal linearDampingTimesDT=linearDamping*dt; + const PxReal angularDampingTimesDT=angularDamping*dt; + const PxReal oneMinusLinearDampingTimesDT=1.0f-linearDampingTimesDT; + const PxReal oneMinusAngularDampingTimesDT=1.0f-angularDampingTimesDT; + + //TODO context-global gravity + if (!disableGravity) + { + const PxVec3 linearAccelTimesDT = gravity*dt *accelScale; + linearVelocity += linearAccelTimesDT; + } + + //Apply damping. + const PxReal linVelMultiplier = physx::intrinsics::fsel(oneMinusLinearDampingTimesDT, oneMinusLinearDampingTimesDT, 0.0f); + const PxReal angVelMultiplier = physx::intrinsics::fsel(oneMinusAngularDampingTimesDT, oneMinusAngularDampingTimesDT, 0.0f); + linearVelocity*=linVelMultiplier; + angularVelocity*=angVelMultiplier; + + // Clamp velocity + const PxReal linVelSq = linearVelocity.magnitudeSquared(); + if(linVelSq > maxLinearVelocitySq) + { + linearVelocity *= PxSqrt(maxLinearVelocitySq / linVelSq); + } + const PxReal angVelSq = angularVelocity.magnitudeSquared(); + if(angVelSq > maxAngularVelocitySq) + { + angularVelocity *= PxSqrt(maxAngularVelocitySq / angVelSq); + } + + inOutLinearVelocity = linearVelocity; + inOutAngularVelocity = angularVelocity; +} + + +PX_FORCE_INLINE void integrateCore(PxVec3& motionLinearVelocity, PxVec3& motionAngularVelocity, PxSolverBody& solverBody, PxSolverBodyData& solverBodyData, const PxF32 dt) +{ + PxU32 lockFlags = solverBodyData.lockFlags; + if (lockFlags) + { + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_X) + { + motionLinearVelocity.x = 0.f; + solverBody.linearVelocity.x = 0.f; + } + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Y) + { + motionLinearVelocity.y = 0.f; + solverBody.linearVelocity.y = 0.f; + } + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Z) + { + motionLinearVelocity.z = 0.f; + solverBody.linearVelocity.z = 0.f; + } + + //The angular velocity should be 0 because it is now impossible to make it rotate around that axis! + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_X) + { + motionAngularVelocity.x = 0.f; + solverBody.angularState.x = 0.f; + } + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Y) + { + motionAngularVelocity.y = 0.f; + solverBody.angularState.y = 0.f; + } + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Z) + { + motionAngularVelocity.z = 0.f; + solverBody.angularState.z = 0.f; + } + } + + // Integrate linear part + PxVec3 linearMotionVel = solverBodyData.linearVelocity + motionLinearVelocity; + PxVec3 delta = linearMotionVel * dt; + PxVec3 angularMotionVel = solverBodyData.angularVelocity + solverBodyData.sqrtInvInertia * motionAngularVelocity; + PxReal w = angularMotionVel.magnitudeSquared(); + solverBodyData.body2World.p += delta; + PX_ASSERT(solverBodyData.body2World.p.isFinite()); + + //Store back the linear and angular velocities + //core.linearVelocity += solverBody.linearVelocity * solverBodyData.sqrtInvMass; + solverBodyData.linearVelocity += solverBody.linearVelocity; + solverBodyData.angularVelocity += solverBodyData.sqrtInvInertia * solverBody.angularState; + + // Integrate the rotation using closed form quaternion integrator + if (w != 0.0f) + { + w = PxSqrt(w); + // Perform a post-solver clamping + // TODO(dsequeira): ignore this for the moment + //just clamp motionVel to half float-range + const PxReal maxW = 1e+7f; //Should be about sqrt(PX_MAX_REAL/2) or smaller + if (w > maxW) + { + angularMotionVel = angularMotionVel.getNormalized() * maxW; + w = maxW; + } + const PxReal v = dt * w * 0.5f; + PxReal s, q; + Ps::sincos(v, s, q); + s /= w; + + const PxVec3 pqr = angularMotionVel * s; + const PxQuat quatVel(pqr.x, pqr.y, pqr.z, 0); + PxQuat result = quatVel * solverBodyData.body2World.q; + + result += solverBodyData.body2World.q * q; + + solverBodyData.body2World.q = result.getNormalized(); + PX_ASSERT(solverBodyData.body2World.q.isSane()); + PX_ASSERT(solverBodyData.body2World.q.isFinite()); + } + + motionLinearVelocity = linearMotionVel; + motionAngularVelocity = angularMotionVel; +} + + +PX_FORCE_INLINE PxReal updateWakeCounter(PxsRigidBody* originalBody, PxReal dt, PxReal /*invDt*/, const bool enableStabilization, const bool useAdaptiveForce, Cm::SpatialVector& motionVelocity, + bool hasStaticTouch) +{ + //KS - at most one of these features can be enabled at any time + PX_ASSERT(!useAdaptiveForce || !enableStabilization); + PxsBodyCore& bodyCore = originalBody->getCore(); + + // update the body's sleep state and + PxReal wakeCounterResetTime = 20.0f*0.02f; + + PxReal wc = bodyCore.wakeCounter; + + { + if (enableStabilization) + { + bool freeze = false; + const PxTransform& body2World = bodyCore.body2World; + + // calculate normalized energy: kinetic energy divided by mass + + const PxVec3 t = bodyCore.inverseInertia; + const PxVec3 inertia(t.x > 0.f ? 1.0f / t.x : 1.f, t.y > 0.f ? 1.0f / t.y : 1.f, t.z > 0.f ? 1.0f / t.z : 1.f); + + + PxVec3 sleepLinVelAcc = motionVelocity.linear; + PxVec3 sleepAngVelAcc = body2World.q.rotateInv(motionVelocity.angular); + + // scale threshold by cluster factor (more contacts => higher sleep threshold) + //const PxReal clusterFactor = PxReal(1u + getNumUniqueInteractions()); + + PxReal invMass = bodyCore.inverseMass; + if (invMass == 0.f) + invMass = 1.f; + + const PxReal angular = sleepAngVelAcc.multiply(sleepAngVelAcc).dot(inertia) * invMass; + const PxReal linear = sleepLinVelAcc.magnitudeSquared(); + PxReal frameNormalizedEnergy = 0.5f * (angular + linear); + + const PxReal cf = hasStaticTouch ? PxReal(PxMin(10u, bodyCore.numBodyInteractions)) : 0.f; + const PxReal freezeThresh = cf*bodyCore.freezeThreshold; + + originalBody->freezeCount = PxMax(originalBody->freezeCount - dt, 0.0f); + bool settled = true; + + PxReal accelScale = PxMin(1.f, originalBody->accelScale + dt); + + if (!hasStaticTouch) + accelScale = 1.f; + + if (frameNormalizedEnergy >= freezeThresh) + { + settled = false; + originalBody->freezeCount = PXD_FREEZE_INTERVAL; + } + + if (settled) + { + //Dampen bodies that are just about to go to sleep + if (cf > 1.f) + { + const PxReal sleepDamping = PXD_SLEEP_DAMPING; + const PxReal sleepDampingTimesDT = sleepDamping*dt; + const PxReal d = 1.0f - sleepDampingTimesDT; + bodyCore.linearVelocity = bodyCore.linearVelocity * d; + bodyCore.angularVelocity = bodyCore.angularVelocity * d; + accelScale = PXD_FREEZE_SCALE; + } + freeze = originalBody->freezeCount == 0.f && frameNormalizedEnergy < (bodyCore.freezeThreshold * PXD_FREEZE_TOLERANCE); + } + + originalBody->accelScale = accelScale; + + if (freeze) + { + //current flag isn't frozen but freeze flag raise so we need to raise the frozen flag in this frame + bool wasNotFrozen = (originalBody->mInternalFlags & PxsRigidBody::eFROZEN) == 0; + PxU16 flags = PxU16((originalBody->mInternalFlags & PxsRigidBody::eDISABLE_GRAVITY) | PxsRigidBody::eFROZEN); + if (wasNotFrozen) + { + flags |= PxsRigidBody::eFREEZE_THIS_FRAME; + } + originalBody->mInternalFlags = flags; + bodyCore.body2World = originalBody->getLastCCDTransform(); + } + else + { + PxU16 flags = PxU16(originalBody->mInternalFlags & PxsRigidBody::eDISABLE_GRAVITY); + bool wasFrozen = (originalBody->mInternalFlags & PxsRigidBody::eFROZEN) != 0; + if (wasFrozen) + { + flags |= PxsRigidBody::eUNFREEZE_THIS_FRAME; + } + originalBody->mInternalFlags = flags; + } + + /*KS: New algorithm for sleeping when using stabilization: + * Energy *this frame* must be higher than sleep threshold and accumulated energy over previous frames + * must be higher than clusterFactor*energyThreshold. + */ + if (wc < wakeCounterResetTime * 0.5f || wc < dt) + { + //Accumulate energy + originalBody->sleepLinVelAcc += sleepLinVelAcc; + originalBody->sleepAngVelAcc += sleepAngVelAcc; + + //If energy this frame is high + if (frameNormalizedEnergy >= bodyCore.sleepThreshold) + { + //Compute energy over sleep preparation time + const PxReal sleepAngular = originalBody->sleepAngVelAcc.multiply(originalBody->sleepAngVelAcc).dot(inertia) * invMass; + const PxReal sleepLinear = originalBody->sleepLinVelAcc.magnitudeSquared(); + PxReal normalizedEnergy = 0.5f * (sleepAngular + sleepLinear); + const PxReal sleepClusterFactor = PxReal(1u + bodyCore.numCountedInteractions); + // scale threshold by cluster factor (more contacts => higher sleep threshold) + const PxReal threshold = sleepClusterFactor*bodyCore.sleepThreshold; + + //If energy over sleep preparation time is high + if (normalizedEnergy >= threshold) + { + //Wake up + //PX_ASSERT(isActive()); + originalBody->sleepAngVelAcc = PxVec3(0); + originalBody->sleepLinVelAcc = PxVec3(0); + + const float factor = bodyCore.sleepThreshold == 0.f ? 2.0f : PxMin(normalizedEnergy / threshold, 2.0f); + PxReal oldWc = wc; + wc = factor * 0.5f * wakeCounterResetTime + dt * (sleepClusterFactor - 1.0f); + bodyCore.solverWakeCounter = wc; + //if (oldWc == 0.0f) // for the case where a sleeping body got activated by the system (not the user) AND got processed by the solver as well + // notifyNotReadyForSleeping(bodyCore.nodeIndex); + + if (oldWc == 0.0f) + originalBody->mInternalFlags |= PxsRigidBody::eACTIVATE_THIS_FRAME; + + return wc; + } + } + } + + } + else + { + if (useAdaptiveForce) + { + if (hasStaticTouch && bodyCore.numBodyInteractions > 1) + originalBody->accelScale = 1.f / PxReal(bodyCore.numBodyInteractions); + else + originalBody->accelScale = 1.f; + } + if (wc < wakeCounterResetTime * 0.5f || wc < dt) + { + const PxTransform& body2World = bodyCore.body2World; + + // calculate normalized energy: kinetic energy divided by mass + const PxVec3 t = bodyCore.inverseInertia; + const PxVec3 inertia(t.x > 0.f ? 1.0f / t.x : 1.f, t.y > 0.f ? 1.0f / t.y : 1.f, t.z > 0.f ? 1.0f / t.z : 1.f); + + PxVec3 sleepLinVelAcc = motionVelocity.linear; + PxVec3 sleepAngVelAcc = body2World.q.rotateInv(motionVelocity.angular); + + originalBody->sleepLinVelAcc += sleepLinVelAcc; + originalBody->sleepAngVelAcc += sleepAngVelAcc; + + PxReal invMass = bodyCore.inverseMass; + if (invMass == 0.f) + invMass = 1.f; + + const PxReal angular = originalBody->sleepAngVelAcc.multiply(originalBody->sleepAngVelAcc).dot(inertia) * invMass; + const PxReal linear = originalBody->sleepLinVelAcc.magnitudeSquared(); + PxReal normalizedEnergy = 0.5f * (angular + linear); + + // scale threshold by cluster factor (more contacts => higher sleep threshold) + const PxReal clusterFactor = PxReal(1 + bodyCore.numCountedInteractions); + const PxReal threshold = clusterFactor*bodyCore.sleepThreshold; + + if (normalizedEnergy >= threshold) + { + //PX_ASSERT(isActive()); + originalBody->sleepLinVelAcc = PxVec3(0); + originalBody->sleepAngVelAcc = PxVec3(0); + const float factor = threshold == 0.f ? 2.0f : PxMin(normalizedEnergy / threshold, 2.0f); + PxReal oldWc = wc; + wc = factor * 0.5f * wakeCounterResetTime + dt * (clusterFactor - 1.0f); + bodyCore.solverWakeCounter = wc; + PxU16 flags = PxU16(originalBody->mInternalFlags & PxsRigidBody::eDISABLE_GRAVITY); + if (oldWc == 0.0f) // for the case where a sleeping body got activated by the system (not the user) AND got processed by the solver as well + { + flags |= PxsRigidBody::eACTIVATE_THIS_FRAME; + //notifyNotReadyForSleeping(bodyCore.nodeIndex); + } + + originalBody->mInternalFlags = flags; + + return wc; + } + } + } + } + + wc = PxMax(wc - dt, 0.0f); + bodyCore.solverWakeCounter = wc; + return wc; +} + +PX_FORCE_INLINE void sleepCheck(PxsRigidBody* originalBody, const PxReal dt, const PxReal intDt, const bool enableStabilization, bool useAdaptiveForce, Cm::SpatialVector& motionVelocity, + bool hasStaticTouch) +{ + + PxReal wc = updateWakeCounter(originalBody, dt, intDt, enableStabilization, useAdaptiveForce, motionVelocity, hasStaticTouch); + bool wakeCounterZero = (wc == 0.0f); + + if (wakeCounterZero) + { + //PxsBodyCore& bodyCore = originalBody->getCore(); + originalBody->mInternalFlags |= PxsRigidBody::eDEACTIVATE_THIS_FRAME; + // notifyReadyForSleeping(bodyCore.nodeIndex); + originalBody->sleepLinVelAcc = PxVec3(0); + originalBody->sleepAngVelAcc = PxVec3(0); + } +} + +} + +} + +#endif //DY_BODYCORE_INTEGRATOR_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.cpp new file mode 100644 index 00000000..03751640 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.cpp @@ -0,0 +1,712 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. + +#include "DyConstraintPartition.h" +#include "DyArticulationUtils.h" + +#define INTERLEAVE_SELF_CONSTRAINTS 1 + + +namespace physx +{ +namespace Dy +{ + +namespace +{ + +PX_FORCE_INLINE PxU32 getArticulationIndex(const uintptr_t eaFsData, const uintptr_t* eas, const PxU32 numEas) +{ + PxU32 index=0xffffffff; + for(PxU32 i=0;i<numEas;i++) + { + if(eas[i]==eaFsData) + { + index=i; + break; + } + } + PX_ASSERT(index!=0xffffffff); + return index; +} + + +#define MAX_NUM_PARTITIONS 32 + +static PxU32 bitTable[32] = +{ + 1u<<0, 1u<<1, 1u<<2, 1u<<3, 1u<<4, 1u<<5, 1u<<6, 1u<<7, 1u<<8, 1u<<9, 1u<<10, 1u<<11, 1u<<12, 1u<<13, 1u<<14, 1u<<15, 1u<<16, 1u<<17, + 1u<<18, 1u<<19, 1u<<20, 1u<<21, 1u<<22, 1u<<23, 1u<<24, 1u<<25, 1u<<26, 1u<<27, 1u<<28, 1u<<29, 1u<<30, 1u<<31 +}; + +PxU32 getBit(const PxU32 index) +{ + PX_ASSERT(index < 32); + return bitTable[index]; +} + + +class RigidBodyClassification +{ + PxSolverBody* PX_RESTRICT mBodies; + PxU32 mNumBodies; + +public: + RigidBodyClassification(PxSolverBody* PX_RESTRICT bodies, PxU32 numBodies) : mBodies(bodies), mNumBodies(numBodies) + { + } + + //Returns true if it is a dynamic-dynamic constriant; false if it is a dynamic-static or dynamic-kinematic constraint + PX_FORCE_INLINE bool classifyConstraint(const PxSolverConstraintDesc& desc, uintptr_t& indexA, uintptr_t& indexB, bool& activeA, bool& activeB) const + { + indexA=uintptr_t(desc.bodyA - mBodies); + indexB=uintptr_t(desc.bodyB - mBodies); + activeA = indexA < mNumBodies; + activeB = indexB < mNumBodies; + return activeA && activeB; + } + + PX_FORCE_INLINE void clearState() + { + for(PxU32 a = 0; a < mNumBodies; ++a) + mBodies[a].solverProgress = 0; + } + + PX_FORCE_INLINE void reserveSpaceForStaticConstraints(Ps::Array<PxU32>& numConstraintsPerPartition) + { + for(PxU32 a = 0; a < mNumBodies; ++a) + { + mBodies[a].solverProgress = 0; + + PxU32 requiredSize = PxU32(mBodies[a].maxSolverNormalProgress + mBodies[a].maxSolverFrictionProgress); + if(requiredSize > numConstraintsPerPartition.size()) + { + numConstraintsPerPartition.resize(requiredSize); + } + + for(PxU32 b = 0; b < mBodies[a].maxSolverFrictionProgress; ++b) + { + numConstraintsPerPartition[mBodies[a].maxSolverNormalProgress + b]++; + } + } + } +}; + +class ExtendedRigidBodyClassification +{ + + PxSolverBody* PX_RESTRICT mBodies; + PxU32 mNumBodies; + uintptr_t* PX_RESTRICT mFsDatas; + PxU32 mNumArticulations; + +public: + + ExtendedRigidBodyClassification(PxSolverBody* PX_RESTRICT bodies, PxU32 numBodies, uintptr_t* PX_RESTRICT fsDatas, PxU32 numArticulations) + : mBodies(bodies), mNumBodies(numBodies), mFsDatas(fsDatas), mNumArticulations(numArticulations) + { + } + + //Returns true if it is a dynamic-dynamic constriant; false if it is a dynamic-static or dynamic-kinematic constraint + PX_FORCE_INLINE bool classifyConstraint(const PxSolverConstraintDesc& desc, uintptr_t& indexA, uintptr_t& indexB, bool& activeA, bool& activeB) const + { + if(PxSolverConstraintDesc::NO_LINK == desc.linkIndexA) + { + indexA=uintptr_t(desc.bodyA - mBodies); + activeA = indexA < mNumBodies; + } + else + { + indexA=mNumBodies+getArticulationIndex(uintptr_t(desc.articulationA),mFsDatas,mNumArticulations); + activeA = true; + } + if(PxSolverConstraintDesc::NO_LINK == desc.linkIndexB) + { + indexB=uintptr_t(desc.bodyB - mBodies); + activeB = indexB < mNumBodies; + } + else + { + indexB=mNumBodies+getArticulationIndex(uintptr_t(desc.articulationB),mFsDatas,mNumArticulations); + activeB = true; + } + return activeA && activeB; + } + + PX_FORCE_INLINE void clearState() + { + for(PxU32 a = 0; a < mNumBodies; ++a) + mBodies[a].solverProgress = 0; + + for(PxU32 a = 0; a < mNumArticulations; ++a) + (reinterpret_cast<FsData*>(mFsDatas[a]))->solverProgress = 0; + } + + PX_FORCE_INLINE void reserveSpaceForStaticConstraints(Ps::Array<PxU32>& numConstraintsPerPartition) + { + for(PxU32 a = 0; a < mNumBodies; ++a) + { + mBodies[a].solverProgress = 0; + + PxU32 requiredSize = PxU32(mBodies[a].maxSolverNormalProgress + mBodies[a].maxSolverFrictionProgress); + if(requiredSize > numConstraintsPerPartition.size()) + { + numConstraintsPerPartition.resize(requiredSize); + } + + for(PxU32 b = 0; b < mBodies[a].maxSolverFrictionProgress; ++b) + { + numConstraintsPerPartition[mBodies[a].maxSolverNormalProgress + b]++; + } + } + + for(PxU32 a = 0; a < mNumArticulations; ++a) + { + FsData* data = reinterpret_cast<FsData*>(mFsDatas[a]); + data->solverProgress = 0; + + PxU32 requiredSize = PxU32(data->maxSolverNormalProgress + data->maxSolverFrictionProgress); + if(requiredSize > numConstraintsPerPartition.size()) + { + numConstraintsPerPartition.resize(requiredSize); + } + + for(PxU32 b = 0; b < data->maxSolverFrictionProgress; ++b) + { + numConstraintsPerPartition[data->maxSolverNormalProgress + b]++; + } + } + } + +}; + +template <typename Classification> +void classifyConstraintDesc(const PxSolverConstraintDesc* PX_RESTRICT descs, const PxU32 numConstraints, Classification& classification, + Ps::Array<PxU32>& numConstraintsPerPartition, PxSolverConstraintDesc* PX_RESTRICT eaTempConstraintDescriptors) +{ + const PxSolverConstraintDesc* _desc = descs; + const PxU32 numConstraintsMin1 = numConstraints - 1; + + PxU32 numUnpartitionedConstraints = 0; + + numConstraintsPerPartition.forceSize_Unsafe(32); + + PxMemZero(numConstraintsPerPartition.begin(), sizeof(PxU32) * 32); + + for(PxU32 i = 0; i < numConstraints; ++i, _desc++) + { + const PxU32 prefetchOffset = PxMin(numConstraintsMin1 - i, 4u); + Ps::prefetchLine(_desc[prefetchOffset].constraint); + Ps::prefetchLine(_desc[prefetchOffset].bodyA); + Ps::prefetchLine(_desc[prefetchOffset].bodyB); + Ps::prefetchLine(_desc + 8); + + uintptr_t indexA, indexB; + bool activeA, activeB; + + const bool notContainsStatic = classification.classifyConstraint(*_desc, indexA, indexB, activeA, activeB); + + if(notContainsStatic) + { + PxU32 partitionsA=_desc->bodyA->solverProgress; + PxU32 partitionsB=_desc->bodyB->solverProgress; + + PxU32 availablePartition; + { + const PxU32 combinedMask = (~partitionsA & ~partitionsB); + availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask); + if(availablePartition == MAX_NUM_PARTITIONS) + { + eaTempConstraintDescriptors[numUnpartitionedConstraints++] = *_desc; + continue; + } + + const PxU32 partitionBit = getBit(availablePartition); + partitionsA |= partitionBit; + partitionsB |= partitionBit; + } + + _desc->bodyA->solverProgress = partitionsA; + _desc->bodyB->solverProgress = partitionsB; + numConstraintsPerPartition[availablePartition]++; + availablePartition++; + _desc->bodyA->maxSolverNormalProgress = PxMax(_desc->bodyA->maxSolverNormalProgress, PxU16(availablePartition)); + _desc->bodyB->maxSolverNormalProgress = PxMax(_desc->bodyB->maxSolverNormalProgress, PxU16(availablePartition)); + + + } + else + { + //Just count the number of static constraints and store in maxSolverFrictionProgress... + if(activeA) + _desc->bodyA->maxSolverFrictionProgress++; + else if(activeB) + _desc->bodyB->maxSolverFrictionProgress++; + } + } + + PxU32 partitionStartIndex = 0; + + while(numUnpartitionedConstraints > 0) + { + classification.clearState(); + + partitionStartIndex += 32; + //Keep partitioning the un-partitioned constraints and blat the whole thing to 0! + numConstraintsPerPartition.resize(32 + numConstraintsPerPartition.size()); + PxMemZero(numConstraintsPerPartition.begin() + partitionStartIndex, sizeof(PxU32) * 32); + + PxU32 newNumUnpartitionedConstraints = 0; + + for(PxU32 i = 0; i < numUnpartitionedConstraints; ++i) + { + const PxSolverConstraintDesc& desc = eaTempConstraintDescriptors[i]; + + PxU32 partitionsA=desc.bodyA->solverProgress; + PxU32 partitionsB=desc.bodyB->solverProgress; + + PxU32 availablePartition; + { + const PxU32 combinedMask = (~partitionsA & ~partitionsB); + availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask); + if(availablePartition == MAX_NUM_PARTITIONS) + { + //Need to shuffle around unpartitioned constraints... + eaTempConstraintDescriptors[newNumUnpartitionedConstraints++] = desc; + continue; + } + + const PxU32 partitionBit = getBit(availablePartition); + partitionsA |= partitionBit; + partitionsB |= partitionBit; + } + + desc.bodyA->solverProgress = partitionsA; + desc.bodyB->solverProgress = partitionsB; + availablePartition += partitionStartIndex; + numConstraintsPerPartition[availablePartition]++; + availablePartition++; + desc.bodyA->maxSolverNormalProgress = PxMax(desc.bodyA->maxSolverNormalProgress, PxU16(availablePartition)); + desc.bodyB->maxSolverNormalProgress = PxMax(desc.bodyB->maxSolverNormalProgress, PxU16(availablePartition)); + } + + numUnpartitionedConstraints = newNumUnpartitionedConstraints; + } + + classification.reserveSpaceForStaticConstraints(numConstraintsPerPartition); + +} + +template <typename Classification> +void writeConstraintDesc(const PxSolverConstraintDesc* PX_RESTRICT descs, const PxU32 numConstraints, Classification& classification, + Ps::Array<PxU32>& accumulatedConstraintsPerPartition, PxSolverConstraintDesc* eaTempConstraintDescriptors, + PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDesc) +{ + PX_UNUSED(eaTempConstraintDescriptors); + const PxSolverConstraintDesc* _desc = descs; + const PxU32 numConstraintsMin1 = numConstraints - 1; + + PxU32 numUnpartitionedConstraints = 0; + + for(PxU32 i = 0; i < numConstraints; ++i, _desc++) + { + const PxU32 prefetchOffset = PxMin(numConstraintsMin1 - i, 4u); + Ps::prefetchLine(_desc[prefetchOffset].constraint); + Ps::prefetchLine(_desc[prefetchOffset].bodyA); + Ps::prefetchLine(_desc[prefetchOffset].bodyB); + Ps::prefetchLine(_desc + 8); + + uintptr_t indexA, indexB; + bool activeA, activeB; + const bool notContainsStatic = classification.classifyConstraint(*_desc, indexA, indexB, activeA, activeB); + + if(notContainsStatic) + { + PxU32 partitionsA=_desc->bodyA->solverProgress; + PxU32 partitionsB=_desc->bodyB->solverProgress; + + PxU32 availablePartition; + { + const PxU32 combinedMask = (~partitionsA & ~partitionsB); + availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask); + if(availablePartition == MAX_NUM_PARTITIONS) + { + eaTempConstraintDescriptors[numUnpartitionedConstraints++] = *_desc; + continue; + } + + const PxU32 partitionBit = getBit(availablePartition); + + partitionsA |= partitionBit; + partitionsB |= partitionBit; + } + + _desc->bodyA->solverProgress = partitionsA; + _desc->bodyB->solverProgress = partitionsB; + + eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[availablePartition]++] = *_desc; + } + else + { + //Just count the number of static constraints and store in maxSolverFrictionProgress... + PxU32 index = 0; + if(activeA) + index = PxU32(_desc->bodyA->maxSolverNormalProgress + _desc->bodyA->maxSolverFrictionProgress++); + else if(activeB) + index = PxU32(_desc->bodyB->maxSolverNormalProgress + _desc->bodyB->maxSolverFrictionProgress++); + + eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[index]++] = *_desc; + } + } + + PxU32 partitionStartIndex = 0; + + while(numUnpartitionedConstraints > 0) + { + classification.clearState(); + + partitionStartIndex += 32; + PxU32 newNumUnpartitionedConstraints = 0; + + for(PxU32 i = 0; i < numUnpartitionedConstraints; ++i) + { + const PxSolverConstraintDesc& desc = eaTempConstraintDescriptors[i]; + + PxU32 partitionsA=desc.bodyA->solverProgress; + PxU32 partitionsB=desc.bodyB->solverProgress; + + PxU32 availablePartition; + { + const PxU32 combinedMask = (~partitionsA & ~partitionsB); + availablePartition = combinedMask == 0 ? MAX_NUM_PARTITIONS : Ps::lowestSetBit(combinedMask); + if(availablePartition == MAX_NUM_PARTITIONS) + { + //Need to shuffle around unpartitioned constraints... + eaTempConstraintDescriptors[newNumUnpartitionedConstraints++] = desc; + continue; + } + + const PxU32 partitionBit = getBit(availablePartition); + + partitionsA |= partitionBit; + partitionsB |= partitionBit; + } + + desc.bodyA->solverProgress = partitionsA; + desc.bodyB->solverProgress = partitionsB; + availablePartition += partitionStartIndex; + eaOrderedConstraintDesc[accumulatedConstraintsPerPartition[availablePartition]++] = desc; + } + + numUnpartitionedConstraints = newNumUnpartitionedConstraints; + } +} + +} + +#define PX_NORMALIZE_PARTITIONS 1 + +#if PX_NORMALIZE_PARTITIONS + +template<typename Classification> +PxU32 normalizePartitions(Ps::Array<PxU32>& accumulatedConstraintsPerPartition, PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDescriptors, + const PxU32 numConstraintDescriptors, Ps::Array<PxU32>& bitField, const Classification& classification, const PxU32 numBodies, const PxU32 numArticulations) +{ + PxU32 numPartitions = 0; + + PxU32 prevAccumulation = 0; + for(; numPartitions < accumulatedConstraintsPerPartition.size() && accumulatedConstraintsPerPartition[numPartitions] > prevAccumulation; + prevAccumulation = accumulatedConstraintsPerPartition[numPartitions++]); + + PxU32 targetSize = (numPartitions == 0 ? 0 : (numConstraintDescriptors)/numPartitions); + + bitField.reserve((numBodies + numArticulations + 31)/32); + bitField.forceSize_Unsafe((numBodies + numArticulations + 31)/32); + + for(PxU32 i = numPartitions; i > 0; i--) + { + PxU32 partitionIndex = i-1; + + //Build the partition mask... + + PxU32 startIndex = partitionIndex == 0 ? 0 : accumulatedConstraintsPerPartition[partitionIndex-1]; + PxU32 endIndex = accumulatedConstraintsPerPartition[partitionIndex]; + + //If its greater than target size, there's nothing that will be pulled into it from earlier partitions + if((endIndex - startIndex) >= targetSize) + continue; + + + PxMemZero(bitField.begin(), sizeof(PxU32)*bitField.size()); + + for(PxU32 a = startIndex; a < endIndex; ++a) + { + PxSolverConstraintDesc& desc = eaOrderedConstraintDescriptors[a]; + + uintptr_t indexA, indexB; + bool activeA, activeB; + + classification.classifyConstraint(desc, indexA, indexB, activeA, activeB); + + if(activeA) + bitField[PxU32(indexA)/32] |= getBit(indexA & 31); + if(activeB) + bitField[PxU32(indexB)/32] |= getBit(indexB & 31); + } + + bool bTerm = false; + for(PxU32 a = partitionIndex; a > 0 && !bTerm; --a) + { + PxU32 pInd = a-1; + + PxU32 si = pInd == 0 ? 0 : accumulatedConstraintsPerPartition[pInd-1]; + PxU32 ei = accumulatedConstraintsPerPartition[pInd]; + + for(PxU32 b = ei; b > si && !bTerm; --b) + { + PxU32 ind = b-1; + PxSolverConstraintDesc& desc = eaOrderedConstraintDescriptors[ind]; + + uintptr_t indexA, indexB; + bool activeA, activeB; + + classification.classifyConstraint(desc, indexA, indexB, activeA, activeB); + + bool canAdd = true; + + if(activeA && (bitField[PxU32(indexA)/32] & (getBit(indexA & 31)))) + canAdd = false; + if(activeB && (bitField[PxU32(indexB)/32] & (getBit(indexB & 31)))) + canAdd = false; + + if(canAdd) + { + PxSolverConstraintDesc tmp = eaOrderedConstraintDescriptors[ind]; + + if(activeA) + bitField[PxU32(indexA)/32] |= (getBit(indexA & 31)); + if(activeB) + bitField[PxU32(indexB)/32] |= (getBit(indexB & 31)); + + PxU32 index = ind; + for(PxU32 c = pInd; c < partitionIndex; ++c) + { + PxU32 newIndex = --accumulatedConstraintsPerPartition[c]; + if(index != newIndex) + eaOrderedConstraintDescriptors[index] = eaOrderedConstraintDescriptors[newIndex]; + index = newIndex; + } + + if(index != ind) + eaOrderedConstraintDescriptors[index] = tmp; + + if((accumulatedConstraintsPerPartition[partitionIndex] - accumulatedConstraintsPerPartition[partitionIndex-1]) >= targetSize) + { + bTerm = true; + break; + } + } + } + } + } + + PxU32 partitionCount = 0; + PxU32 lastPartitionCount = 0; + for (PxU32 a = 0; a < numPartitions; ++a) + { + const PxU32 constraintCount = accumulatedConstraintsPerPartition[a]; + accumulatedConstraintsPerPartition[partitionCount] = constraintCount; + if (constraintCount != lastPartitionCount) + { + lastPartitionCount = constraintCount; + partitionCount++; + } + } + + accumulatedConstraintsPerPartition.forceSize_Unsafe(partitionCount); + + return partitionCount; +} + +#endif + +PxU32 partitionContactConstraints(ConstraintPartitionArgs& args) +{ + PxU32 maxPartition = 0; + //Unpack the input data. + const PxU32 numBodies=args.mNumBodies; + PxSolverBody* PX_RESTRICT eaAtoms=args.mBodies; + const PxU32 numArticulations=args.mNumArticulationPtrs; + + const PxU32 numConstraintDescriptors=args.mNumContactConstraintDescriptors; + + PxSolverConstraintDesc* PX_RESTRICT eaConstraintDescriptors=args.mContactConstraintDescriptors; + PxSolverConstraintDesc* PX_RESTRICT eaOrderedConstraintDescriptors=args.mOrderedContactConstraintDescriptors; + PxSolverConstraintDesc* PX_RESTRICT eaTempConstraintDescriptors=args.mTempContactConstraintDescriptors; + + Ps::Array<PxU32>& constraintsPerPartition = *args.mConstraintsPerPartition; + constraintsPerPartition.forceSize_Unsafe(0); + + for(PxU32 a = 0; a < numBodies; ++a) + { + PxSolverBody& body = args.mBodies[a]; + Ps::prefetchLine(&args.mBodies[a], 256); + body.solverProgress = 0; + //We re-use maxSolverFrictionProgress and maxSolverNormalProgress to record the + //maximum partition used by dynamic constraints and the number of static constraints affecting + //a body. We use this to make partitioning much cheaper and be able to support + body.maxSolverFrictionProgress = 0; + body.maxSolverNormalProgress = 0; + } + + PxU32 numOrderedConstraints=0; + + PxU32 numSelfConstraintBlocks=0; + + if(numArticulations == 0) + { + RigidBodyClassification classification(eaAtoms, numBodies); + classifyConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition, + eaTempConstraintDescriptors); + + PxU32 accumulation = 0; + for(PxU32 a = 0; a < constraintsPerPartition.size(); ++a) + { + PxU32 count = constraintsPerPartition[a]; + constraintsPerPartition[a] = accumulation; + accumulation += count; + } + + for(PxU32 a = 0; a < numBodies; ++a) + { + PxSolverBody& body = args.mBodies[a]; + Ps::prefetchLine(&args.mBodies[a], 256); + body.solverProgress = 0; + //Keep the dynamic constraint count but bump the static constraint count back to 0. + //This allows us to place the static constraints in the appropriate place when we see them + //because we know the maximum index for the dynamic constraints... + body.maxSolverFrictionProgress = 0; + } + + writeConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition, + eaTempConstraintDescriptors, eaOrderedConstraintDescriptors); + + numOrderedConstraints = numConstraintDescriptors; + + if(!args.enhancedDeterminism) + maxPartition = normalizePartitions(constraintsPerPartition, eaOrderedConstraintDescriptors, numConstraintDescriptors, *args.mBitField, + classification, numBodies, 0); + + } + else + { + + const ArticulationSolverDesc* articulationDescs=args.mArticulationPtrs; + PX_ALLOCA(_eaFsData, uintptr_t, numArticulations); + uintptr_t* eaFsDatas = _eaFsData; + for(PxU32 i=0;i<numArticulations;i++) + { + FsData* data = articulationDescs[i].fsData; + eaFsDatas[i]=uintptr_t(data); + data->solverProgress = 0; + data->maxSolverFrictionProgress = 0; + data->maxSolverNormalProgress = 0; + } + ExtendedRigidBodyClassification classification(eaAtoms, numBodies, eaFsDatas, numArticulations); + + classifyConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, + constraintsPerPartition, eaTempConstraintDescriptors); + + PxU32 accumulation = 0; + for(PxU32 a = 0; a < constraintsPerPartition.size(); ++a) + { + PxU32 count = constraintsPerPartition[a]; + constraintsPerPartition[a] = accumulation; + accumulation += count; + } + + for(PxU32 a = 0; a < numBodies; ++a) + { + PxSolverBody& body = args.mBodies[a]; + Ps::prefetchLine(&args.mBodies[a], 256); + body.solverProgress = 0; + //Keep the dynamic constraint count but bump the static constraint count back to 0. + //This allows us to place the static constraints in the appropriate place when we see them + //because we know the maximum index for the dynamic constraints... + body.maxSolverFrictionProgress = 0; + } + + for(PxU32 a = 0; a < numArticulations; ++a) + { + FsData* data = reinterpret_cast<FsData*>(eaFsDatas[a]); + data->solverProgress = 0; + data->maxSolverFrictionProgress = 0; + } + + writeConstraintDesc(eaConstraintDescriptors, numConstraintDescriptors, classification, constraintsPerPartition, + eaTempConstraintDescriptors, eaOrderedConstraintDescriptors); + + numOrderedConstraints = numConstraintDescriptors; + + if (!args.enhancedDeterminism) + maxPartition = normalizePartitions(constraintsPerPartition, eaOrderedConstraintDescriptors, + numConstraintDescriptors, *args.mBitField, classification, numBodies, numArticulations); + + } + + + + const PxU32 numConstraintsDifferentBodies=numOrderedConstraints; + + PX_ASSERT(numConstraintsDifferentBodies == numConstraintDescriptors); + + //Now handle the articulated self-constraints. + PxU32 totalConstraintCount = numConstraintsDifferentBodies; + + args.mNumSelfConstraintBlocks=numSelfConstraintBlocks; + + args.mNumDifferentBodyConstraints=numConstraintsDifferentBodies; + args.mNumSelfConstraints=totalConstraintCount-numConstraintsDifferentBodies; + + if (args.enhancedDeterminism) + { + PxU32 prevPartitionSize = 0; + maxPartition = 0; + for (PxU32 a = 0; a < constraintsPerPartition.size(); ++a, maxPartition++) + { + if (constraintsPerPartition[a] == prevPartitionSize) + break; + prevPartitionSize = constraintsPerPartition[a]; + } + } + + return maxPartition; +} + +} + +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.h new file mode 100644 index 00000000..ba4c8c29 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPartition.h @@ -0,0 +1,79 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_CONSTRAINTPARTITION_H +#define DY_CONSTRAINTPARTITION_H + +#include "DyDynamics.h" + + + +namespace physx +{ + +namespace Dy +{ +struct ConstraintPartitionArgs +{ + enum + { + eMAX_NUM_BODIES = 8192 + }; + + //Input + PxSolverBody* mBodies; + PxU32 mNumBodies; + ArticulationSolverDesc* mArticulationPtrs; + PxU32 mNumArticulationPtrs; + PxSolverConstraintDesc* mContactConstraintDescriptors; + PxU32 mNumContactConstraintDescriptors; + //output + PxSolverConstraintDesc* mOrderedContactConstraintDescriptors; + PxSolverConstraintDesc* mTempContactConstraintDescriptors; + PxU32 mNumSelfConstraintBlocks; + PxU32 mNumDifferentBodyConstraints; + PxU32 mNumSelfConstraints; + Ps::Array<PxU32>* mConstraintsPerPartition; + //Ps::Array<PxU32>* mStartIndices; + Ps::Array<PxU32>* mBitField; + + bool enhancedDeterminism; +}; + +PxU32 partitionContactConstraints(ConstraintPartitionArgs& args); + +} // namespace physx + +} + + + +#endif // DY_CONSTRAINTPARTITION_H + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPrep.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPrep.h new file mode 100644 index 00000000..e7202a78 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintPrep.h @@ -0,0 +1,92 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_CONSTRAINTSHADER_H +#define DY_CONSTRAINTSHADER_H + +#include "DyConstraint.h" + +#include "DySolverConstraintDesc.h" +#include "PsArray.h" + +namespace physx +{ + +class PxcConstraintBlockStream; +class PxsConstraintBlockManager; +struct PxSolverBody; +struct PxSolverBodyData; +struct PxSolverConstraintDesc; + +namespace Dy +{ + + static const PxU32 MAX_CONSTRAINT_ROWS = 12; + +struct SolverConstraintShaderPrepDesc +{ + const Constraint* constraint; + PxConstraintSolverPrep solverPrep; + const void* constantBlock; + PxU32 constantBlockByteSize; +}; + +SolverConstraintPrepState::Enum setupSolverConstraint4 + (SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs, + PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs, + const PxReal dt, const PxReal recipdt, PxU32& totalRows, + PxConstraintAllocator& allocator); + +SolverConstraintPrepState::Enum setupSolverConstraint4 + (PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs, + const PxReal dt, const PxReal recipdt, PxU32& totalRows, + PxConstraintAllocator& allocator, PxU32 maxRows); + +PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc, + PxSolverConstraintPrepDesc& prepDesc, + PxConstraintAllocator& allocator, + PxReal dt, PxReal invdt); + + +class ConstraintHelper +{ +public: + + static PxU32 setupSolverConstraint( + PxSolverConstraintPrepDesc& prepDesc, + PxConstraintAllocator& allocator, + PxReal dt, PxReal invdt); +}; + +} + +} + +#endif //DY_CONSTRAINTSHADER_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp new file mode 100644 index 00000000..c5777c12 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetup.cpp @@ -0,0 +1,594 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxMemory.h" +#include "DyConstraintPrep.h" +#include "PxsRigidBody.h" +#include "DySolverConstraint1D.h" +#include "PsSort.h" +#include "DySolverConstraintDesc.h" +#include "PxcConstraintBlockStream.h" +#include "DyArticulationContactPrep.h" +#include "PsFoundation.h" + +namespace physx +{ +namespace Dy +{ + // dsequeira: + // + // we can choose any linear combination of equality constraints and get the same solution + // Hence we can orthogonalize the constraints using the inner product given by the + // inverse mass matrix, so that when we use PGS, solving a constraint row for a joint + // don't disturb the solution of prior rows. + // + // We also eliminate the equality constraints from the hard inequality constraints - + // (essentially projecting the direction corresponding to the lagrange multiplier + // onto the equality constraint subspace) but 'til I've verified this generates + // exactly the same KKT/complementarity conditions, status is 'experimental'. + // + // since for equality constraints the resulting rows have the property that applying + // an impulse along one row doesn't alter the projected velocity along another row, + // all equality constraints (plus one inequality constraint) can be processed in parallel + // using SIMD + // + // Eliminating the inequality constraints from each other would require a solver change + // and not give us any more parallelism, although we might get better convergence. + +namespace +{ + PX_FORCE_INLINE Vec3V V3FromV4(Vec4V x) { return Vec3V_From_Vec4V(x); } + PX_FORCE_INLINE Vec3V V3FromV4Unsafe(Vec4V x) { return Vec3V_From_Vec4V_WUndefined(x); } + PX_FORCE_INLINE Vec4V V4FromV3(Vec3V x) { return Vec4V_From_Vec3V(x); } + //PX_FORCE_INLINE Vec4V V4ClearW(Vec4V x) { return V4SetW(x, FZero()); } + +struct MassProps +{ + FloatV invMass0; + FloatV invMass1; + FloatV invInertiaScale0; + FloatV invInertiaScale1; + + PX_FORCE_INLINE MassProps(const PxSolverBodyData& bd0, + const PxSolverBodyData& bd1, + const PxConstraintInvMassScale& ims) + : + invMass0(FLoad(bd0.invMass * ims.linear0)) + , invMass1(FLoad(bd1.invMass * ims.linear1)) + , invInertiaScale0(FLoad(ims.angular0)) + , invInertiaScale1(FLoad(ims.angular1)) + {} +}; + + +PX_FORCE_INLINE PxReal innerProduct(const Px1DConstraint& row0, Px1DConstraint& row1, + PxVec4& row0AngSqrtInvInertia0, PxVec4& row0AngSqrtInvInertia1, + PxVec4& row1AngSqrtInvInertia0, PxVec4& row1AngSqrtInvInertia1, const MassProps& m) +{ + const Vec3V l0 = V3Mul(V3Scale(V3LoadA(row0.linear0), m.invMass0), V3LoadA(row1.linear0)); + const Vec3V l1 = V3Mul(V3Scale(V3LoadA(row0.linear1), m.invMass1), V3LoadA(row1.linear1)); + Vec4V r0ang0 = V4LoadA(&row0AngSqrtInvInertia0.x); + Vec4V r1ang0 = V4LoadA(&row1AngSqrtInvInertia0.x); + Vec4V r0ang1 = V4LoadA(&row0AngSqrtInvInertia1.x); + Vec4V r1ang1 = V4LoadA(&row1AngSqrtInvInertia1.x); + + const Vec3V i0 = V3ScaleAdd(V3Mul(Vec3V_From_Vec4V(r0ang0), Vec3V_From_Vec4V(r1ang0)), m.invInertiaScale0, l0); + const Vec3V i1 = V3ScaleAdd(V3MulAdd(Vec3V_From_Vec4V(r0ang1), Vec3V_From_Vec4V(r1ang1), i0), m.invInertiaScale1, l1); + PxF32 f; + FStore(V3SumElems(i1), &f); + return f; +} + + +// indexed rotation around axis, with sine and cosine of half-angle +PX_FORCE_INLINE PxQuat indexedRotation(PxU32 axis, PxReal s, PxReal c) +{ + PxQuat q(0,0,0,c); + reinterpret_cast<PxReal*>(&q)[axis] = s; + return q; +} + +PxQuat diagonalize(const PxMat33& m) // jacobi rotation using quaternions +{ + const PxU32 MAX_ITERS = 5; + + PxQuat q = PxQuat(PxIdentity); + + PxMat33 d; + for(PxU32 i=0; i < MAX_ITERS;i++) + { + const PxMat33 axes(q); + d = axes.getTranspose() * m * axes; + + const PxReal d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]); + const PxU32 a = PxU32(d0 > d1 && d0 > d2 ? 0 : d1 > d2 ? 1 : 2); // rotation axis index, from largest off-diagonal element + + const PxU32 a1 = Ps::getNextIndex3(a), a2 = Ps::getNextIndex3(a1); + if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1]-d[a2][a2]) > 2e6f*PxAbs(2.0f*d[a1][a2])) + break; + + const PxReal w = (d[a1][a1]-d[a2][a2]) / (2.0f*d[a1][a2]); // cot(2 * phi), where phi is the rotation angle + const PxReal absw = PxAbs(w); + + PxQuat r; + if(absw>1000) + r = indexedRotation(a, 1.0f/(4.0f*w), 1.f); // h will be very close to 1, so use small angle approx instead + else + { + const PxReal t = 1 / (absw + PxSqrt(w*w+1)); // absolute value of tan phi + const PxReal h = 1 / PxSqrt(t*t+1); // absolute value of cos phi + + PX_ASSERT(h!=1); // |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8) + r = indexedRotation(a, PxSqrt((1-h)/2) * PxSign(w), PxSqrt((1+h)/2)); + } + + q = (q*r).getNormalized(); + } + + return q; +} + + +PX_FORCE_INLINE void rescale(const Mat33V& m, PxVec3& a0, PxVec3& a1, PxVec3& a2) +{ + const Vec3V va0 = V3LoadU(a0); + const Vec3V va1 = V3LoadU(a1); + const Vec3V va2 = V3LoadU(a2); + + const Vec3V b0 = V3ScaleAdd(va0, V3GetX(m.col0), V3ScaleAdd(va1, V3GetY(m.col0), V3Scale(va2, V3GetZ(m.col0)))); + const Vec3V b1 = V3ScaleAdd(va0, V3GetX(m.col1), V3ScaleAdd(va1, V3GetY(m.col1), V3Scale(va2, V3GetZ(m.col1)))); + const Vec3V b2 = V3ScaleAdd(va0, V3GetX(m.col2), V3ScaleAdd(va1, V3GetY(m.col2), V3Scale(va2, V3GetZ(m.col2)))); + + V3StoreU(b0, a0); + V3StoreU(b1, a1); + V3StoreU(b2, a2); +} + +PX_FORCE_INLINE void rescale4(const Mat33V& m, PxReal* a0, PxReal* a1, PxReal* a2) +{ + const Vec4V va0 = V4LoadA(a0); + const Vec4V va1 = V4LoadA(a1); + const Vec4V va2 = V4LoadA(a2); + + const Vec4V b0 = V4ScaleAdd(va0, V3GetX(m.col0), V4ScaleAdd(va1, V3GetY(m.col0), V4Scale(va2, V3GetZ(m.col0)))); + const Vec4V b1 = V4ScaleAdd(va0, V3GetX(m.col1), V4ScaleAdd(va1, V3GetY(m.col1), V4Scale(va2, V3GetZ(m.col1)))); + const Vec4V b2 = V4ScaleAdd(va0, V3GetX(m.col2), V4ScaleAdd(va1, V3GetY(m.col2), V4Scale(va2, V3GetZ(m.col2)))); + + V4StoreA(b0, a0); + V4StoreA(b1, a1); + V4StoreA(b2, a2); +} + + +template<typename T> +PX_FORCE_INLINE void rescale(const PxMat33& m, T& a0, T& a1, T& a2) +{ + T b0 = a0*m(0,0) + a1 * m(1,0) + a2 * m(2,0); + T b1 = a0*m(0,1) + a1 * m(1,1) + a2 * m(2,1); + T b2 = a0*m(0,2) + a1 * m(1,2) + a2 * m(2,2); + + a0 = b0; + a1 = b1; + a2 = b2; +} + +void diagonalize(Px1DConstraint** row, + PxVec4* angSqrtInvInertia0, + PxVec4* angSqrtInvInertia1, + const MassProps &m) +{ + PxReal a00 = innerProduct(*row[0], *row[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[0], angSqrtInvInertia1[0], m); + PxReal a01 = innerProduct(*row[0], *row[1], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m); + PxReal a02 = innerProduct(*row[0], *row[2], angSqrtInvInertia0[0], angSqrtInvInertia1[0], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); + PxReal a11 = innerProduct(*row[1], *row[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[1], angSqrtInvInertia1[1], m); + PxReal a12 = innerProduct(*row[1], *row[2], angSqrtInvInertia0[1], angSqrtInvInertia1[1], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); + PxReal a22 = innerProduct(*row[2], *row[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], angSqrtInvInertia0[2], angSqrtInvInertia1[2], m); + + PxMat33 a(PxVec3(a00, a01, a02), + PxVec3(a01, a11, a12), + PxVec3(a02, a12, a22)); + + PxQuat q = diagonalize(a); + + PxMat33 n(-q); + + Mat33V mn(V3LoadU(n.column0), V3LoadU(n.column1), V3LoadU(n.column2)); + + //KS - We treat as a Vec4V so that we get geometricError rescaled for free along with linear0 + rescale4(mn, &row[0]->linear0.x, &row[1]->linear0.x, &row[2]->linear0.x); + rescale(mn, row[0]->linear1, row[1]->linear1, row[2]->linear1); + //KS - We treat as a PxVec4 so that we get velocityTarget rescaled for free + rescale4(mn, &row[0]->angular0.x, &row[1]->angular0.x, &row[2]->angular0.x); + rescale(mn, row[0]->angular1, row[1]->angular1, row[2]->angular1); + rescale4(mn, &angSqrtInvInertia0[0].x, &angSqrtInvInertia0[1].x, &angSqrtInvInertia0[2].x); + rescale4(mn, &angSqrtInvInertia1[0].x, &angSqrtInvInertia1[1].x, &angSqrtInvInertia1[2].x); + +} + +void orthogonalize(Px1DConstraint** row, + PxVec4* angSqrtInvInertia0, + PxVec4* angSqrtInvInertia1, + PxU32 rowCount, + PxU32 eqRowCount, + const MassProps &m) +{ + PX_ASSERT(eqRowCount<=6); + + const FloatV zero = FZero(); + + Vec3V lin1m[6], ang1m[6], lin1[6], ang1[6]; + Vec4V lin0m[6], ang0m[6]; // must have 0 in the W-field + Vec4V lin0AndG[6], ang0AndT[6]; + + for(PxU32 i=0;i<rowCount;i++) + { + Vec4V l0AndG = V4LoadA(&row[i]->linear0.x); // linear0 and geometric error + Vec4V a0AndT = V4LoadA(&row[i]->angular0.x); // angular0 and velocity target + + Vec3V l1 = V3FromV4(V4LoadA(&row[i]->linear1.x)); + Vec3V a1 = V3FromV4(V4LoadA(&row[i]->angular1.x)); + + Vec4V angSqrtL0 = V4LoadA(&angSqrtInvInertia0[i].x); + Vec4V angSqrtL1 = V4LoadA(&angSqrtInvInertia1[i].x); + + PxU32 eliminationRows = PxMin<PxU32>(i, eqRowCount); + for(PxU32 j=0;j<eliminationRows;j++) + { + const Vec3V s0 = V3MulAdd(l1, lin1m[j], V3FromV4Unsafe(V4Mul(l0AndG, lin0m[j]))); + const Vec3V s1 = V3MulAdd(V3FromV4Unsafe(angSqrtL1), ang1m[j], V3FromV4Unsafe(V4Mul(angSqrtL0, ang0m[j]))); + FloatV t = V3SumElems(V3Add(s0, s1)); + + l0AndG = V4NegScaleSub(lin0AndG[j], t, l0AndG); + a0AndT = V4NegScaleSub(ang0AndT[j], t, a0AndT); + l1 = V3NegScaleSub(lin1[j], t, l1); + a1 = V3NegScaleSub(ang1[j], t, a1); + angSqrtL0 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia0[j].x), t, angSqrtL0); + angSqrtL1 = V4NegScaleSub(V4LoadA(&angSqrtInvInertia1[j].x), t, angSqrtL1); + } + + V4StoreA(l0AndG, &row[i]->linear0.x); + V4StoreA(a0AndT, &row[i]->angular0.x); + V3StoreA(l1, row[i]->linear1); + V3StoreA(a1, row[i]->angular1); + V4StoreA(angSqrtL0, &angSqrtInvInertia0[i].x); + V4StoreA(angSqrtL1, &angSqrtInvInertia1[i].x); + + if(i<eqRowCount) + { + lin0AndG[i] = l0AndG; + ang0AndT[i] = a0AndT; + lin1[i] = l1; + ang1[i] = a1; + + const Vec3V l0 = V3FromV4(l0AndG); + + const Vec3V l0m = V3Scale(l0, m.invMass0); + const Vec3V l1m = V3Scale(l1, m.invMass1); + const Vec4V a0m = V4Scale(angSqrtL0, m.invInertiaScale0); + const Vec4V a1m = V4Scale(angSqrtL1, m.invInertiaScale1); + + const Vec3V s0 = V3MulAdd(l0, l0m, V3Mul(l1, l1m)); + const Vec4V s1 = V4MulAdd(a0m, angSqrtL0, V4Mul(a1m, angSqrtL1)); + const FloatV s = V3SumElems(V3Add(s0, V3FromV4Unsafe(s1))); + const FloatV a = FSel(FIsGrtr(s, zero), FRecip(s), zero); // with mass scaling, it's possible for the inner product of a row to be zero + + lin0m[i] = V4Scale(V4ClearW(V4FromV3(l0m)), a); + ang0m[i] = V4Scale(V4ClearW(a0m), a); + lin1m[i] = V3Scale(l1m, a); + ang1m[i] = V3Scale(V3FromV4Unsafe(a1m), a); + } + } +} +} + + +void preprocessRows(Px1DConstraint** sorted, + Px1DConstraint* rows, + PxVec4* angSqrtInvInertia0, + PxVec4* angSqrtInvInertia1, + PxU32 rowCount, + const PxSolverBodyData& bd0, + const PxSolverBodyData& bd1, + const PxConstraintInvMassScale& ims, + bool disablePreprocessing, + bool diagonalizeDrive) +{ + // j is maxed at 12, typically around 7, so insertion sort is fine + for(PxU32 i=0; i<rowCount; i++) + { + Px1DConstraint* r = rows+i; + + PxU32 j = i; + for(;j>0 && r->solveHint < sorted[j-1]->solveHint; j--) + sorted[j] = sorted[j-1]; + + sorted[j] = r; + } + + for(PxU32 i=0;i<rowCount-1;i++) + PX_ASSERT(sorted[i]->solveHint <= sorted[i+1]->solveHint); + + for (PxU32 i = 0; i<rowCount; i++) + rows[i].forInternalUse = rows[i].flags & Px1DConstraintFlag::eKEEPBIAS ? rows[i].geometricError : 0; + + + const Mat33V sqrtInvInertia0 = Mat33V(V3LoadU(bd0.sqrtInvInertia.column0), V3LoadU(bd0.sqrtInvInertia.column1), + V3LoadU(bd0.sqrtInvInertia.column2)); + + const Mat33V sqrtInvInertia1 = Mat33V(V3LoadU(bd1.sqrtInvInertia.column0), V3LoadU(bd1.sqrtInvInertia.column1), + V3LoadU(bd1.sqrtInvInertia.column2)); + + PX_ASSERT(((uintptr_t(angSqrtInvInertia0)) & 0xF) == 0); + PX_ASSERT(((uintptr_t(angSqrtInvInertia1)) & 0xF) == 0); + + for(PxU32 i = 0; i < rowCount; ++i) + { + const Vec3V angDelta0 = M33MulV3(sqrtInvInertia0, V3LoadU(sorted[i]->angular0)); + const Vec3V angDelta1 = M33MulV3(sqrtInvInertia1, V3LoadU(sorted[i]->angular1)); + V4StoreA(Vec4V_From_Vec3V(angDelta0), &angSqrtInvInertia0[i].x); + V4StoreA(Vec4V_From_Vec3V(angDelta1), &angSqrtInvInertia1[i].x); + } + + if(disablePreprocessing) + return; + + MassProps m(bd0, bd1, ims); + for(PxU32 i=0;i<rowCount;) + { + const PxU32 groupMajorId = PxU32(sorted[i]->solveHint>>8), start = i++; + while(i<rowCount && PxU32(sorted[i]->solveHint>>8) == groupMajorId) + i++; + + if(groupMajorId == 4) + { + PxU32 bCount = start; // count of bilateral constraints + for(; bCount<i && (sorted[bCount]->solveHint&255)==0; bCount++) + ; + orthogonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, i-start, bCount-start, m); + } + + if(groupMajorId == 1 && diagonalizeDrive) + { + PxU32 slerp = start; // count of bilateral constraints + for(; slerp<i && (sorted[slerp]->solveHint&255)!=2; slerp++) + ; + if(slerp+3 == i) + diagonalize(sorted+slerp, angSqrtInvInertia0+slerp, angSqrtInvInertia1+slerp, m); + + PX_ASSERT(i-start==3); + diagonalize(sorted+start, angSqrtInvInertia0+start, angSqrtInvInertia1+start, m); + } + } +} + + + + + +PxU32 ConstraintHelper::setupSolverConstraint( +PxSolverConstraintPrepDesc& prepDesc, +PxConstraintAllocator& allocator, +PxReal dt, PxReal invdt) +{ + if (prepDesc .numRows== 0) + return 0; + + PxSolverConstraintDesc& desc = *prepDesc.desc; + + bool isExtended = desc.linkIndexA != PxSolverConstraintDesc::NO_LINK + || desc.linkIndexB != PxSolverConstraintDesc::NO_LINK; + + PxU32 stride = isExtended ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D); + const PxU32 constraintLength = sizeof(SolverConstraint1DHeader) + stride * prepDesc.numRows; + + //KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we + //know SPU DMAs have completed) + PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u); + if(NULL == ptr || (reinterpret_cast<PxU8*>(-1))==ptr) + { + if(NULL==ptr) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks."); + return 0; + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of constraint data. " + "Either accept joints detaching/exploding or simplify constraints."); + ptr=NULL; + return 0; + } + } + desc.constraint = ptr; + + setConstraintLength(desc,constraintLength); + + desc.writeBack = prepDesc.writeback; + setWritebackLength(desc, sizeof(ConstraintWriteback)); + + memset(desc.constraint, 0, constraintLength); + + SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint); + PxU8* constraints = desc.constraint + sizeof(SolverConstraint1DHeader); + init(*header, Ps::to8(prepDesc.numRows), isExtended, prepDesc.mInvMassScales); + header->body0WorldOffset = prepDesc.body0WorldOffset; + header->linBreakImpulse = prepDesc.linBreakForce * dt; + header->angBreakImpulse = prepDesc.angBreakForce * dt; + header->breakable = PxU8((prepDesc.linBreakForce != PX_MAX_F32) || (prepDesc.angBreakForce != PX_MAX_F32)); + header->invMass0D0 = prepDesc.data0->invMass * prepDesc.mInvMassScales.linear0; + header->invMass1D1 = prepDesc.data1->invMass * prepDesc.mInvMassScales.linear1; + + + PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS]; + PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS]; + + Px1DConstraint* sorted[MAX_CONSTRAINT_ROWS]; + + preprocessRows(sorted, prepDesc.rows, angSqrtInvInertia0, angSqrtInvInertia1, prepDesc.numRows, *prepDesc.data0, *prepDesc.data1, prepDesc.mInvMassScales, + isExtended || prepDesc.disablePreprocessing, prepDesc.improvedSlerp); + + const PxReal erp = 1.0f; + for (PxU32 i = 0; i<prepDesc.numRows; i++) + { + Ps::prefetchLine(constraints, 128); + SolverConstraint1D &s = *reinterpret_cast<SolverConstraint1D *>(constraints); + Px1DConstraint& c = *sorted[i]; + + PxReal driveScale = c.flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && prepDesc.driveLimitsAreForces ? PxMin(dt, 1.0f) : 1.0f; + + PxReal unitResponse; + PxReal normalVel = 0.0f; + PxReal initVel = 0.f; + + if(!isExtended) + { + init(s, c.linear0, c.linear1, PxVec3(angSqrtInvInertia0[i].x, angSqrtInvInertia0[i].y, angSqrtInvInertia0[i].z), + PxVec3(angSqrtInvInertia1[i].x, angSqrtInvInertia1[i].y, angSqrtInvInertia1[i].z), c.minImpulse * driveScale, c.maxImpulse * driveScale); + s.ang0Writeback = c.angular0; + PxReal resp0 = s.lin0.magnitudeSquared() * prepDesc.data0->invMass * prepDesc.mInvMassScales.linear0 + s.ang0.magnitudeSquared() * prepDesc.mInvMassScales.angular0; + PxReal resp1 = s.lin1.magnitudeSquared() * prepDesc.data1->invMass * prepDesc.mInvMassScales.linear1 + s.ang1.magnitudeSquared() * prepDesc.mInvMassScales.angular1; + unitResponse = resp0 + resp1; + initVel = normalVel = prepDesc.data0->projectVelocity(c.linear0, c.angular0) - prepDesc.data1->projectVelocity(c.linear1, c.angular1); + } + else + { + init(s, c.linear0, c.linear1, c.angular0, c.angular1, c.minImpulse * driveScale, c.maxImpulse * driveScale); + SolverConstraint1DExt& e = static_cast<SolverConstraint1DExt&>(s); + + const SolverExtBody eb0(reinterpret_cast<const void*>(prepDesc.body0), prepDesc.data0, desc.linkIndexA); + const SolverExtBody eb1(reinterpret_cast<const void*>(prepDesc.body1), prepDesc.data1, desc.linkIndexB); + + const Cm::SpatialVector resp0 = createImpulseResponseVector(e.lin0, e.ang0, eb0); + const Cm::SpatialVector resp1 = createImpulseResponseVector(-e.lin1, -e.ang1, eb1); + unitResponse = getImpulseResponse(eb0, resp0, unsimdRef(e.deltaVA), prepDesc.mInvMassScales.linear0, prepDesc.mInvMassScales.angular0, + eb1, resp1, unsimdRef(e.deltaVB), prepDesc.mInvMassScales.linear1, prepDesc.mInvMassScales.angular1, true); + + s.ang0Writeback = c.angular0; + s.lin0 = resp0.linear; + s.ang0 = resp0.angular; + s.lin1 = -resp1.linear; + s.ang1 = -resp1.angular; + PxReal vel0, vel1; + if(needsNormalVel(c) || eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK || eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + { + vel0 = eb0.projectVelocity(c.linear0, c.angular0); + vel1 = eb1.projectVelocity(c.linear1, c.angular1); + + normalVel = vel0 - vel1; + + //normalVel = eb0.projectVelocity(s.lin0, s.ang0) - eb1.projectVelocity(s.lin1, s.ang1); + if(eb0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + initVel = vel0; + else if(eb1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + initVel = -vel1; + + } + } + + setSolverConstants(s.constant, s.unbiasedConstant, s.velMultiplier, s.impulseMultiplier, + c, normalVel, unitResponse, prepDesc.minResponseThreshold, erp, dt, invdt); + + //s.targetVelocity = initVel; + const PxReal velBias = initVel * s.velMultiplier; + s.constant += velBias; + s.unbiasedConstant += velBias; + + if(c.flags & Px1DConstraintFlag::eOUTPUT_FORCE) + s.flags |= DY_SC_FLAG_OUTPUT_FORCE; + + constraints += stride; + } + + //KS - Set the solve count at the end to 0 + *(reinterpret_cast<PxU32*>(constraints)) = 0; + *(reinterpret_cast<PxU32*>(constraints + 4)) = 0; + PX_ASSERT(desc.constraint + getConstraintLength(desc) == constraints); + return prepDesc.numRows; +} + +PxU32 SetupSolverConstraint(SolverConstraintShaderPrepDesc& shaderDesc, + PxSolverConstraintPrepDesc& prepDesc, + PxConstraintAllocator& allocator, + PxReal dt, PxReal invdt) +{ + // LL shouldn't see broken constraints + + PX_ASSERT(!(reinterpret_cast<ConstraintWriteback*>(prepDesc.writeback)->broken)); + + setConstraintLength(*prepDesc.desc, 0); + + if (!shaderDesc.solverPrep) + return 0; + + //PxU32 numAxisConstraints = 0; + + Px1DConstraint rows[MAX_CONSTRAINT_ROWS]; + + // This is necessary so that there will be sensible defaults and shaders will + // continue to work (albeit with a recompile) if the row format changes. + // It's a bit inefficient because it fills in all constraint rows even if there + // is only going to be one generated. A way around this would be for the shader to + // specify the maximum number of rows it needs, or it could call a subroutine to + // prep the row before it starts filling it it. + + PxMemZero(rows, sizeof(Px1DConstraint)*MAX_CONSTRAINT_ROWS); + + for (PxU32 i = 0; i<MAX_CONSTRAINT_ROWS; i++) + { + Px1DConstraint& c = rows[i]; + //Px1DConstraintInit(c); + c.minImpulse = -PX_MAX_REAL; + c.maxImpulse = PX_MAX_REAL; + } + + prepDesc.mInvMassScales.linear0 = prepDesc.mInvMassScales.linear1 = prepDesc.mInvMassScales.angular0 = prepDesc.mInvMassScales.angular1 = 1.f; + + PxVec3 body0WorldOffset(0.f); + PxU32 constraintCount = (*shaderDesc.solverPrep)(rows, + body0WorldOffset, + MAX_CONSTRAINT_ROWS, + prepDesc.mInvMassScales, + shaderDesc.constantBlock, + prepDesc.bodyFrame0, prepDesc.bodyFrame1); + + prepDesc.rows = rows; + prepDesc.numRows = constraintCount; + + prepDesc.body0WorldOffset = body0WorldOffset; + + return ConstraintHelper::setupSolverConstraint(prepDesc, allocator, dt, invdt); +} + +} + +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetupBlock.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetupBlock.cpp new file mode 100644 index 00000000..5c72f36e --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyConstraintSetupBlock.cpp @@ -0,0 +1,535 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxMemory.h" +#include "DyConstraintPrep.h" +#include "PxsRigidBody.h" +#include "DySolverConstraint1D.h" +#include "DySolverConstraint1D4.h" +#include "PsSort.h" +#include "PxcConstraintBlockStream.h" +#include "DyArticulationContactPrep.h" +#include "PsFoundation.h" +namespace physx +{ + +namespace Dy +{ + +void preprocessRows(Px1DConstraint** sorted, + Px1DConstraint* rows, + PxVec4* angSqrtInvInertia0, + PxVec4* angSqrtInvInertia1, + PxU32 rowCount, + const PxSolverBodyData& bd0, + const PxSolverBodyData& bd1, + const PxConstraintInvMassScale& ims, + bool disablePreprocessing, + bool diagonalizeDrive); + + +namespace +{ +void setConstants(PxReal& constant, PxReal& unbiasedConstant, PxReal& velMultiplier, PxReal& impulseMultiplier, + const Px1DConstraint& c, PxReal unitResponse, PxReal minRowResponse, PxReal erp, PxReal dt, PxReal recipdt, + const PxSolverBodyData& b0, const PxSolverBodyData& b1, const bool finished) +{ + if(finished) + { + constant = 0.f; + unbiasedConstant = 0.f; + velMultiplier = 0.f; + impulseMultiplier = 0.f; + return; + } + PxReal nv = needsNormalVel(c) ? b0.projectVelocity(c.linear0, c.angular0) - b1.projectVelocity(c.linear1, c.angular1) + : 0; + + setSolverConstants(constant, unbiasedConstant, velMultiplier, impulseMultiplier, + c, nv, unitResponse, minRowResponse, erp, dt, recipdt); +} +} + +SolverConstraintPrepState::Enum setupSolverConstraint4 + (PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs, + const PxReal dt, const PxReal recipdt, PxU32& totalRows, + PxConstraintAllocator& allocator, PxU32 maxRows); + +SolverConstraintPrepState::Enum setupSolverConstraint4 +(SolverConstraintShaderPrepDesc* PX_RESTRICT constraintShaderDescs, +PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs, +const PxReal dt, const PxReal recipdt, PxU32& totalRows, +PxConstraintAllocator& allocator) + +{ + //KS - we will never get here with constraints involving articulations so we don't need to stress about those in here + + totalRows = 0; + + Px1DConstraint allRows[MAX_CONSTRAINT_ROWS * 4]; + + PxU32 numRows = 0; + + PxU32 maxRows = 0; + PxU32 preppedIndex = 0; + + for (PxU32 a = 0; a < 4; ++a) + { + Px1DConstraint* rows = allRows + numRows; + SolverConstraintShaderPrepDesc& shaderDesc = constraintShaderDescs[a]; + PxSolverConstraintPrepDesc& desc = constraintDescs[a]; + + if (!shaderDesc.solverPrep) + return SolverConstraintPrepState::eUNBATCHABLE; + + PxMemZero(rows + preppedIndex, sizeof(Px1DConstraint)*(MAX_CONSTRAINT_ROWS)); + for (PxU32 b = preppedIndex; b < MAX_CONSTRAINT_ROWS; ++b) + { + Px1DConstraint& c = rows[b]; + //Px1DConstraintInit(c); + c.minImpulse = -PX_MAX_REAL; + c.maxImpulse = PX_MAX_REAL; + } + + desc.mInvMassScales.linear0 = desc.mInvMassScales.linear1 = desc.mInvMassScales.angular0 = desc.mInvMassScales.angular1 = 1.f; + + desc.body0WorldOffset = PxVec3(0.f); + + PxU32 constraintCount = (*shaderDesc.solverPrep)(rows, + desc.body0WorldOffset, + MAX_CONSTRAINT_ROWS, + desc.mInvMassScales, + shaderDesc.constantBlock, + desc.bodyFrame0, desc.bodyFrame1); + + preppedIndex = MAX_CONSTRAINT_ROWS - constraintCount; + + maxRows = PxMax(constraintCount, maxRows); + + if (constraintCount == 0) + return SolverConstraintPrepState::eUNBATCHABLE; + + desc.rows = rows; + desc.numRows = constraintCount; + numRows += constraintCount; + } + + return setupSolverConstraint4(constraintDescs, dt, recipdt, totalRows, allocator, maxRows); +} + +SolverConstraintPrepState::Enum setupSolverConstraint4 +(PxSolverConstraintPrepDesc* PX_RESTRICT constraintDescs, +const PxReal dt, const PxReal recipdt, PxU32& totalRows, +PxConstraintAllocator& allocator, PxU32 maxRows) +{ + const Vec4V zero = V4Zero(); + Px1DConstraint* allSorted[MAX_CONSTRAINT_ROWS * 4]; + PxU32 startIndex[4]; + PX_ALIGN(16, PxVec4) angSqrtInvInertia0[MAX_CONSTRAINT_ROWS * 4]; + PX_ALIGN(16, PxVec4) angSqrtInvInertia1[MAX_CONSTRAINT_ROWS * 4]; + + PxU32 numRows = 0; + + for (PxU32 a = 0; a < 4; ++a) + { + startIndex[a] = numRows; + PxSolverConstraintPrepDesc& desc = constraintDescs[a]; + Px1DConstraint** sorted = allSorted + numRows; + + preprocessRows(sorted, desc.rows, angSqrtInvInertia0 + numRows, angSqrtInvInertia1 + numRows, desc.numRows, *desc.data0, *desc.data1, desc.mInvMassScales, + desc.disablePreprocessing, desc.improvedSlerp); + + numRows += desc.numRows; + } + + + PxU32 stride = sizeof(SolverConstraint1DDynamic4); + + + const PxU32 constraintLength = sizeof(SolverConstraint1DHeader4) + stride * maxRows; + + //KS - +16 is for the constraint progress counter, which needs to be the last element in the constraint (so that we + //know SPU DMAs have completed) + PxU8* ptr = allocator.reserveConstraintData(constraintLength + 16u); + if(NULL == ptr || (reinterpret_cast<PxU8*>(-1))==ptr) + { + for(PxU32 a = 0; a < 4; ++a) + { + PxSolverConstraintPrepDesc& desc = constraintDescs[a]; + desc.desc->constraint = NULL; + setConstraintLength(*desc.desc, 0); + desc.desc->writeBack = desc.writeback; + } + + if(NULL==ptr) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept joints detaching/exploding or increase buffer size allocated for constraint prep by increasing PxSceneDesc::maxNbContactDataBlocks."); + return SolverConstraintPrepState::eOUT_OF_MEMORY; + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of constraint data. " + "Either accept joints detaching/exploding or simplify constraints."); + ptr=NULL; + return SolverConstraintPrepState::eOUT_OF_MEMORY; + } + } + //desc.constraint = ptr; + + totalRows = numRows; + + for(PxU32 a = 0; a < 4; ++a) + { + PxSolverConstraintPrepDesc& desc = constraintDescs[a]; + desc.desc->constraint = ptr; + setConstraintLength(*desc.desc, constraintLength); + desc.desc->writeBack = desc.writeback; + } + + const PxReal erp[4] = { 1.0f, 1.0f, 1.0f, 1.0f}; + //OK, now we build all 4 constraints into a single set of rows + + { + PxU8* currPtr = ptr; + SolverConstraint1DHeader4* header = reinterpret_cast<SolverConstraint1DHeader4*>(currPtr); + currPtr += sizeof(SolverConstraint1DHeader4); + + const PxSolverBodyData& bd00 = *constraintDescs[0].data0; + const PxSolverBodyData& bd01 = *constraintDescs[1].data0; + const PxSolverBodyData& bd02 = *constraintDescs[2].data0; + const PxSolverBodyData& bd03 = *constraintDescs[3].data0; + + const PxSolverBodyData& bd10 = *constraintDescs[0].data1; + const PxSolverBodyData& bd11 = *constraintDescs[1].data1; + const PxSolverBodyData& bd12 = *constraintDescs[2].data1; + const PxSolverBodyData& bd13 = *constraintDescs[3].data1; + + //Load up masses, invInertia, velocity etc. + + const Vec4V invMassScale0 = V4LoadXYZW(constraintDescs[0].mInvMassScales.linear0, constraintDescs[1].mInvMassScales.linear0, + constraintDescs[2].mInvMassScales.linear0, constraintDescs[3].mInvMassScales.linear0); + const Vec4V invMassScale1 = V4LoadXYZW(constraintDescs[0].mInvMassScales.linear1, constraintDescs[1].mInvMassScales.linear1, + constraintDescs[2].mInvMassScales.linear1, constraintDescs[3].mInvMassScales.linear1); + + + const Vec4V iMass0 = V4LoadXYZW(bd00.invMass, bd01.invMass, bd02.invMass, bd03.invMass); + + const Vec4V iMass1 = V4LoadXYZW(bd10.invMass, bd11.invMass, bd12.invMass, bd13.invMass); + + const Vec4V invMass0 = V4Mul(iMass0, invMassScale0); + const Vec4V invMass1 = V4Mul(iMass1, invMassScale1); + + + const Vec4V invInertiaScale0 = V4LoadXYZW(constraintDescs[0].mInvMassScales.angular0, constraintDescs[1].mInvMassScales.angular0, + constraintDescs[2].mInvMassScales.angular0, constraintDescs[3].mInvMassScales.angular0); + const Vec4V invInertiaScale1 = V4LoadXYZW(constraintDescs[0].mInvMassScales.angular1, constraintDescs[1].mInvMassScales.angular1, + constraintDescs[2].mInvMassScales.angular1, constraintDescs[3].mInvMassScales.angular1); + + //Velocities + Vec4V linVel00 = V4LoadA(&bd00.linearVelocity.x); + Vec4V linVel01 = V4LoadA(&bd10.linearVelocity.x); + Vec4V angVel00 = V4LoadA(&bd00.angularVelocity.x); + Vec4V angVel01 = V4LoadA(&bd10.angularVelocity.x); + + Vec4V linVel10 = V4LoadA(&bd01.linearVelocity.x); + Vec4V linVel11 = V4LoadA(&bd11.linearVelocity.x); + Vec4V angVel10 = V4LoadA(&bd01.angularVelocity.x); + Vec4V angVel11 = V4LoadA(&bd11.angularVelocity.x); + + Vec4V linVel20 = V4LoadA(&bd02.linearVelocity.x); + Vec4V linVel21 = V4LoadA(&bd12.linearVelocity.x); + Vec4V angVel20 = V4LoadA(&bd02.angularVelocity.x); + Vec4V angVel21 = V4LoadA(&bd12.angularVelocity.x); + + Vec4V linVel30 = V4LoadA(&bd03.linearVelocity.x); + Vec4V linVel31 = V4LoadA(&bd13.linearVelocity.x); + Vec4V angVel30 = V4LoadA(&bd03.angularVelocity.x); + Vec4V angVel31 = V4LoadA(&bd13.angularVelocity.x); + + + Vec4V linVel0T0, linVel0T1, linVel0T2; + Vec4V linVel1T0, linVel1T1, linVel1T2; + Vec4V angVel0T0, angVel0T1, angVel0T2; + Vec4V angVel1T0, angVel1T1, angVel1T2; + + + PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2); + PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2); + PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVel0T0, angVel0T1, angVel0T2); + PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVel1T0, angVel1T1, angVel1T2); + + + + //body world offsets + Vec4V workOffset0 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[0].body0WorldOffset)); + Vec4V workOffset1 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[1].body0WorldOffset)); + Vec4V workOffset2 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[2].body0WorldOffset)); + Vec4V workOffset3 = Vec4V_From_Vec3V(V3LoadU(constraintDescs[3].body0WorldOffset)); + + Vec4V workOffsetX, workOffsetY, workOffsetZ; + + PX_TRANSPOSE_44_34(workOffset0, workOffset1, workOffset2, workOffset3, workOffsetX, workOffsetY, workOffsetZ); + + const FloatV dtV = FLoad(dt); + Vec4V linBreakForce = V4LoadXYZW(constraintDescs[0].linBreakForce, constraintDescs[1].linBreakForce, + constraintDescs[2].linBreakForce, constraintDescs[3].linBreakForce); + Vec4V angBreakForce = V4LoadXYZW(constraintDescs[0].angBreakForce, constraintDescs[1].angBreakForce, + constraintDescs[2].angBreakForce, constraintDescs[3].angBreakForce); + + + header->break0 = PxU8((constraintDescs[0].linBreakForce != PX_MAX_F32) || (constraintDescs[0].angBreakForce != PX_MAX_F32)); + header->break1 = PxU8((constraintDescs[1].linBreakForce != PX_MAX_F32) || (constraintDescs[1].angBreakForce != PX_MAX_F32)); + header->break2 = PxU8((constraintDescs[2].linBreakForce != PX_MAX_F32) || (constraintDescs[2].angBreakForce != PX_MAX_F32)); + header->break3 = PxU8((constraintDescs[3].linBreakForce != PX_MAX_F32) || (constraintDescs[3].angBreakForce != PX_MAX_F32)); + + + //OK, I think that's everything loaded in + + header->invMass0D0 = invMass0; + header->invMass1D1 = invMass1; + header->angD0 = invInertiaScale0; + header->angD1 = invInertiaScale1; + header->body0WorkOffsetX = workOffsetX; + header->body0WorkOffsetY = workOffsetY; + header->body0WorkOffsetZ = workOffsetZ; + + header->count = maxRows; + header->type = DY_SC_TYPE_BLOCK_1D; + header->linBreakImpulse = V4Scale(linBreakForce, dtV); + header->angBreakImpulse = V4Scale(angBreakForce, dtV); + header->count0 = Ps::to8(constraintDescs[0].numRows); + header->count1 = Ps::to8(constraintDescs[1].numRows); + header->count2 = Ps::to8(constraintDescs[2].numRows); + header->count3 = Ps::to8(constraintDescs[3].numRows); + + //Now we loop over the constraints and build the results... + + PxU32 index0 = 0; + PxU32 endIndex0 = constraintDescs[0].numRows - 1; + PxU32 index1 = startIndex[1]; + PxU32 endIndex1 = index1 + constraintDescs[1].numRows - 1; + PxU32 index2 = startIndex[2]; + PxU32 endIndex2 = index2 + constraintDescs[2].numRows - 1; + PxU32 index3 = startIndex[3]; + PxU32 endIndex3 = index3 + constraintDescs[3].numRows - 1; + + const FloatV one = FOne(); + + for(PxU32 a = 0; a < maxRows; ++a) + { + SolverConstraint1DDynamic4* c = reinterpret_cast<SolverConstraint1DDynamic4*>(currPtr); + currPtr += stride; + + Px1DConstraint* con0 = allSorted[index0]; + Px1DConstraint* con1 = allSorted[index1]; + Px1DConstraint* con2 = allSorted[index2]; + Px1DConstraint* con3 = allSorted[index3]; + + Vec4V cangDelta00 = V4LoadA(&angSqrtInvInertia0[index0].x); + Vec4V cangDelta01 = V4LoadA(&angSqrtInvInertia0[index1].x); + Vec4V cangDelta02 = V4LoadA(&angSqrtInvInertia0[index2].x); + Vec4V cangDelta03 = V4LoadA(&angSqrtInvInertia0[index3].x); + + Vec4V cangDelta10 = V4LoadA(&angSqrtInvInertia1[index0].x); + Vec4V cangDelta11 = V4LoadA(&angSqrtInvInertia1[index1].x); + Vec4V cangDelta12 = V4LoadA(&angSqrtInvInertia1[index2].x); + Vec4V cangDelta13 = V4LoadA(&angSqrtInvInertia1[index3].x); + + index0 = index0 == endIndex0 ? index0 : index0 + 1; + index1 = index1 == endIndex1 ? index1 : index1 + 1; + index2 = index2 == endIndex2 ? index2 : index2 + 1; + index3 = index3 == endIndex3 ? index3 : index3 + 1; + + Vec4V driveScale = V4Splat(one); + if (con0->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[0].driveLimitsAreForces) + driveScale = V4SetX(driveScale, FMin(one, dtV)); + if (con1->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[1].driveLimitsAreForces) + driveScale = V4SetY(driveScale, FMin(one, dtV)); + if (con2->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[2].driveLimitsAreForces) + driveScale = V4SetZ(driveScale, FMin(one, dtV)); + if (con3->flags&Px1DConstraintFlag::eHAS_DRIVE_LIMIT && constraintDescs[3].driveLimitsAreForces) + driveScale = V4SetW(driveScale, FMin(one, dtV)); + + + Vec4V clin00 = V4LoadA(&con0->linear0.x); + Vec4V clin01 = V4LoadA(&con1->linear0.x); + Vec4V clin02 = V4LoadA(&con2->linear0.x); + Vec4V clin03 = V4LoadA(&con3->linear0.x); + + Vec4V cang00 = V4LoadA(&con0->angular0.x); + Vec4V cang01 = V4LoadA(&con1->angular0.x); + Vec4V cang02 = V4LoadA(&con2->angular0.x); + Vec4V cang03 = V4LoadA(&con3->angular0.x); + + Vec4V clin0X, clin0Y, clin0Z; + Vec4V cang0X, cang0Y, cang0Z; + + PX_TRANSPOSE_44_34(clin00, clin01, clin02, clin03, clin0X, clin0Y, clin0Z); + PX_TRANSPOSE_44_34(cang00, cang01, cang02, cang03, cang0X, cang0Y, cang0Z); + + const Vec4V maxImpulse = V4LoadXYZW(con0->maxImpulse, con1->maxImpulse, con2->maxImpulse, con3->maxImpulse); + const Vec4V minImpulse = V4LoadXYZW(con0->minImpulse, con1->minImpulse, con2->minImpulse, con3->minImpulse); + + Vec4V angDelta0X, angDelta0Y, angDelta0Z; + + PX_TRANSPOSE_44_34(cangDelta00, cangDelta01, cangDelta02, cangDelta03, angDelta0X, angDelta0Y, angDelta0Z); + + c->flags[0] = 0; + c->flags[1] = 0; + c->flags[2] = 0; + c->flags[3] = 0; + + c->lin0X = clin0X; + c->lin0Y = clin0Y; + c->lin0Z = clin0Z; + c->ang0X = angDelta0X; + c->ang0Y = angDelta0Y; + c->ang0Z = angDelta0Z; + c->ang0WritebackX = cang0X; + c->ang0WritebackY = cang0Y; + c->ang0WritebackZ = cang0Z; + + c->minImpulse = V4Mul(minImpulse, driveScale); + c->maxImpulse = V4Mul(maxImpulse, driveScale); + c->appliedForce = zero; + + const Vec4V lin0MagSq = V4MulAdd(clin0Z, clin0Z, V4MulAdd(clin0Y, clin0Y, V4Mul(clin0X, clin0X))); + const Vec4V cang0DotAngDelta = V4MulAdd(angDelta0Z, angDelta0Z, V4MulAdd(angDelta0Y, angDelta0Y, V4Mul(angDelta0X, angDelta0X))); + c->flags[0] = 0; + c->flags[1] = 0; + c->flags[2] = 0; + c->flags[3] = 0; + + Vec4V unitResponse = V4MulAdd(lin0MagSq, invMass0, V4Mul(cang0DotAngDelta, invInertiaScale0)); + + Vec4V clin10 = V4LoadA(&con0->linear1.x); + Vec4V clin11 = V4LoadA(&con1->linear1.x); + Vec4V clin12 = V4LoadA(&con2->linear1.x); + Vec4V clin13 = V4LoadA(&con3->linear1.x); + + Vec4V cang10 = V4LoadA(&con0->angular1.x); + Vec4V cang11 = V4LoadA(&con1->angular1.x); + Vec4V cang12 = V4LoadA(&con2->angular1.x); + Vec4V cang13 = V4LoadA(&con3->angular1.x); + + Vec4V clin1X, clin1Y, clin1Z; + Vec4V cang1X, cang1Y, cang1Z; + PX_TRANSPOSE_44_34(clin10, clin11, clin12, clin13, clin1X, clin1Y, clin1Z); + PX_TRANSPOSE_44_34(cang10, cang11, cang12, cang13, cang1X, cang1Y, cang1Z); + + Vec4V angDelta1X, angDelta1Y, angDelta1Z; + + PX_TRANSPOSE_44_34(cangDelta10, cangDelta11, cangDelta12, cangDelta13, angDelta1X, angDelta1Y, angDelta1Z); + + const Vec4V lin1MagSq = V4MulAdd(clin1Z, clin1Z, V4MulAdd(clin1Y, clin1Y, V4Mul(clin1X, clin1X))); + const Vec4V cang1DotAngDelta = V4MulAdd(angDelta1Z, angDelta1Z, V4MulAdd(angDelta1Y, angDelta1Y, V4Mul(angDelta1X, angDelta1X))); + + c->lin1X = clin1X; + c->lin1Y = clin1Y; + c->lin1Z = clin1Z; + + c->ang1X = angDelta1X; + c->ang1Y = angDelta1Y; + c->ang1Z = angDelta1Z; + + unitResponse = V4Add(unitResponse, V4MulAdd(lin1MagSq, invMass1, V4Mul(cang1DotAngDelta, invInertiaScale1))); + + Vec4V linProj0(V4Mul(clin0X, linVel0T0)); + Vec4V linProj1(V4Mul(clin1X, linVel1T0)); + Vec4V angProj0(V4Mul(cang0X, angVel0T0)); + Vec4V angProj1(V4Mul(cang1X, angVel1T0)); + + linProj0 = V4MulAdd(clin0Y, linVel0T1, linProj0); + linProj1 = V4MulAdd(clin1Y, linVel1T1, linProj1); + angProj0 = V4MulAdd(cang0Y, angVel0T1, angProj0); + angProj1 = V4MulAdd(cang1Y, angVel1T1, angProj1); + + linProj0 = V4MulAdd(clin0Z, linVel0T2, linProj0); + linProj1 = V4MulAdd(clin1Z, linVel1T2, linProj1); + angProj0 = V4MulAdd(cang0Z, angVel0T2, angProj0); + angProj1 = V4MulAdd(cang1Z, angVel1T2, angProj1); + + const Vec4V projectVel0 = V4Add(linProj0, angProj0); + const Vec4V projectVel1 = V4Add(linProj1, angProj1); + + const Vec4V normalVel = V4Sub(projectVel0, projectVel1); + + + { + const PxVec4& ur = reinterpret_cast<const PxVec4&>(unitResponse); + PxVec4& cConstant = reinterpret_cast<PxVec4&>(c->constant); + PxVec4& cUnbiasedConstant = reinterpret_cast<PxVec4&>(c->unbiasedConstant); + PxVec4& cVelMultiplier = reinterpret_cast<PxVec4&>(c->velMultiplier); + PxVec4& cImpulseMultiplier = reinterpret_cast<PxVec4&>(c->impulseMultiplier); + + setConstants(cConstant.x, cUnbiasedConstant.x, cVelMultiplier.x, cImpulseMultiplier.x, + *con0, ur.x, constraintDescs[0].minResponseThreshold, erp[0], dt, recipdt, + *constraintDescs[0].data0, *constraintDescs[0].data1, a >= constraintDescs[0].numRows); + + setConstants(cConstant.y, cUnbiasedConstant.y, cVelMultiplier.y, cImpulseMultiplier.y, + *con1, ur.y, constraintDescs[1].minResponseThreshold, erp[1], dt, recipdt, + *constraintDescs[1].data0, *constraintDescs[1].data1, a >= constraintDescs[1].numRows); + + setConstants(cConstant.z, cUnbiasedConstant.z, cVelMultiplier.z, cImpulseMultiplier.z, + *con2, ur.z, constraintDescs[2].minResponseThreshold, erp[2], dt, recipdt, + *constraintDescs[2].data0, *constraintDescs[2].data1, a >= constraintDescs[2].numRows); + + setConstants(cConstant.w, cUnbiasedConstant.w, cVelMultiplier.w, cImpulseMultiplier.w, + *con3, ur.w, constraintDescs[3].minResponseThreshold, erp[3], dt, recipdt, + *constraintDescs[3].data0, *constraintDescs[3].data1, a >= constraintDescs[3].numRows); + } + + const Vec4V velBias = V4Mul(c->velMultiplier, normalVel); + c->constant = V4Add(c->constant, velBias); + c->unbiasedConstant = V4Add(c->unbiasedConstant, velBias); + + if(con0->flags & Px1DConstraintFlag::eOUTPUT_FORCE) + c->flags[0] |= DY_SC_FLAG_OUTPUT_FORCE; + if(con1->flags & Px1DConstraintFlag::eOUTPUT_FORCE) + c->flags[1] |= DY_SC_FLAG_OUTPUT_FORCE; + if(con2->flags & Px1DConstraintFlag::eOUTPUT_FORCE) + c->flags[2] |= DY_SC_FLAG_OUTPUT_FORCE; + if(con3->flags & Px1DConstraintFlag::eOUTPUT_FORCE) + c->flags[3] |= DY_SC_FLAG_OUTPUT_FORCE; + } + *(reinterpret_cast<PxU32*>(currPtr)) = 0; + *(reinterpret_cast<PxU32*>(currPtr + 4)) = 0; + } + + //OK, we're ready to allocate and solve prep these constraints now :-) + return SolverConstraintPrepState::eSUCCESS; +} + +} + +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.cpp new file mode 100644 index 00000000..1e21f1e3 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.cpp @@ -0,0 +1,725 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxPreprocessor.h" +#include "PxSceneDesc.h" +#include "PsVecMath.h" +#include "PsMathUtils.h" +#include "DySolverContact.h" +#include "DySolverContact4.h" +#include "DySolverConstraintTypes.h" +#include "PxcNpWorkUnit.h" +#include "DyThreadContext.h" +#include "DyContactPrep.h" +#include "PxcNpContactPrepShared.h" +#include "PxvDynamics.h" +#include "DyCorrelationBuffer.h" +#include "DyDynamics.h" +#include "DyArticulationContactPrep.h" +#include "PxsContactManager.h" +#include "PsFoundation.h" + +using namespace physx; +using namespace Gu; + + +#include "PsVecMath.h" +#include "PxContactModifyCallback.h" +#include "PxsMaterialManager.h" +#include "PxsMaterialCombiner.h" +#include "DyContactPrepShared.h" + +using namespace Ps::aos; + +namespace physx +{ +namespace Dy +{ + +PxcCreateFinalizeSolverContactMethod createFinalizeMethods[3] = +{ + createFinalizeSolverContacts, + createFinalizeSolverContactsCoulomb1D, + createFinalizeSolverContactsCoulomb2D +}; + + + +static void setupFinalizeSolverConstraints(Sc::ShapeInteraction* shapeInteraction, + const ContactPoint* buffer, + const CorrelationBuffer& c, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxU8* workspace, + const PxSolverBodyData& data0, + const PxSolverBodyData& data1, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal invMassScale0, PxReal invInertiaScale0, + PxReal invMassScale1, PxReal invInertiaScale1, + bool hasForceThreshold, bool staticOrKinematicBody, + const PxReal restDist, PxU8* frictionDataPtr, + const PxReal maxCCDSeparation) +{ + // NOTE II: the friction patches are sparse (some of them have no contact patches, and + // therefore did not get written back to the cache) but the patch addresses are dense, + // corresponding to valid patches + + const FloatV ccdMaxSeparation = FLoad(maxCCDSeparation); + + PxU8 flags = PxU8(hasForceThreshold ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0); + + PxU8* PX_RESTRICT ptr = workspace; + + PxU8 type = Ps::to8(staticOrKinematicBody ? DY_SC_TYPE_STATIC_CONTACT + : DY_SC_TYPE_RB_CONTACT); + + const FloatV zero=FZero(); + + const FloatV d0 = FLoad(invMassScale0); + const FloatV d1 = FLoad(invMassScale1); + const FloatV angD0 = FLoad(invInertiaScale0); + const FloatV angD1 = FLoad(invInertiaScale1); + + const FloatV nDom1fV = FNeg(d1); + + const FloatV invMass0 = FLoad(data0.invMass); + const FloatV invMass1 = FLoad(data1.invMass); + + const FloatV invMass0_dom0fV = FMul(d0, invMass0); + const FloatV invMass1_dom1fV = FMul(nDom1fV, invMass1); + + + Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = V4Zero(); + staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, invMass0_dom0fV); + staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, invMass1_dom1fV); + + const FloatV restDistance = FLoad(restDist); + + const FloatV maxPenBias = FMax(FLoad(data0.penBiasClamp), FLoad(data1.penBiasClamp)); + + const QuatV bodyFrame0q = QuatVLoadU(&bodyFrame0.q.x); + const Vec3V bodyFrame0p = V3LoadU(bodyFrame0.p); + + const QuatV bodyFrame1q = QuatVLoadU(&bodyFrame1.q.x); + const Vec3V bodyFrame1p = V3LoadU(bodyFrame1.p); + + PxU32 frictionPatchWritebackAddrIndex = 0; + PxU32 contactWritebackCount = 0; + + Ps::prefetchLine(c.contactID); + Ps::prefetchLine(c.contactID, 128); + + const Vec3V linVel0 = V3LoadU_SafeReadW(data0.linearVelocity); // PT: safe because 'invMass' follows 'initialLinVel' in PxSolverBodyData + const Vec3V linVel1 = V3LoadU_SafeReadW(data1.linearVelocity); // PT: safe because 'invMass' follows 'initialLinVel' in PxSolverBodyData + const Vec3V angVel0 = V3LoadU_SafeReadW(data0.angularVelocity); // PT: safe because 'reportThreshold' follows 'initialAngVel' in PxSolverBodyData + const Vec3V angVel1 = V3LoadU_SafeReadW(data1.angularVelocity); // PT: safe because 'reportThreshold' follows 'initialAngVel' in PxSolverBodyData + + PX_ALIGN(16, const Mat33V invSqrtInertia0) + ( + V3LoadU_SafeReadW(data0.sqrtInvInertia.column0), // PT: safe because 'column1' follows 'column0' in PxMat33 + V3LoadU_SafeReadW(data0.sqrtInvInertia.column1), // PT: safe because 'column2' follows 'column1' in PxMat33 + V3LoadU(data0.sqrtInvInertia.column2) + ); + + PX_ALIGN(16, const Mat33V invSqrtInertia1) + ( + V3LoadU_SafeReadW(data1.sqrtInvInertia.column0), // PT: safe because 'column1' follows 'column0' in PxMat33 + V3LoadU_SafeReadW(data1.sqrtInvInertia.column1), // PT: safe because 'column2' follows 'column1' in PxMat33 + V3LoadU(data1.sqrtInvInertia.column2) + ); + + const FloatV invDt = FLoad(invDtF32); + const FloatV p8 = FLoad(0.8f); + const FloatV bounceThreshold = FLoad(bounceThresholdF32); + + const FloatV invDtp8 = FMul(invDt, p8); + + + for(PxU32 i=0;i<c.frictionPatchCount;i++) + { + PxU32 contactCount = c.frictionPatchContactCounts[i]; + if(contactCount == 0) + continue; + + const FrictionPatch& frictionPatch = c.frictionPatches[i]; + PX_ASSERT(frictionPatch.anchorCount <= 2); + + PxU32 firstPatch = c.correlationListHeads[i]; + const Gu::ContactPoint* contactBase0 = buffer + c.contactPatches[firstPatch].start; + + const PxReal combinedRestitution = contactBase0->restitution; + + SolverContactHeader* PX_RESTRICT header = reinterpret_cast<SolverContactHeader*>(ptr); + ptr += sizeof(SolverContactHeader); + + + Ps::prefetchLine(ptr, 128); + Ps::prefetchLine(ptr, 256); + + header->shapeInteraction = shapeInteraction; + header->flags = flags; + FStore(invMass0_dom0fV, &header->invMass0); + FStore(FNeg(invMass1_dom1fV), &header->invMass1); + const FloatV restitution = FLoad(combinedRestitution); + + PxU32 pointStride = sizeof(SolverContactPoint); + PxU32 frictionStride = sizeof(SolverContactFriction); + + const Vec3V normal = V3LoadA(buffer[c.contactPatches[c.correlationListHeads[i]].start].normal); + const FloatV normalLenSq = V3LengthSq(normal); + const VecCrossV norCross = V3PrepareCross(normal); + const FloatV norVel = V3SumElems(V3NegMulSub(normal, linVel1, V3Mul(normal, linVel0))); + + const FloatV invMassNorLenSq0 = FMul(invMass0_dom0fV, normalLenSq); + const FloatV invMassNorLenSq1 = FMul(invMass1_dom1fV, normalLenSq); + + header->normal = normal; + + for(PxU32 patch=c.correlationListHeads[i]; + patch!=CorrelationBuffer::LIST_END; + patch = c.contactPatches[patch].next) + { + const PxU32 count = c.contactPatches[patch].count; + const Gu::ContactPoint* contactBase = buffer + c.contactPatches[patch].start; + + PxU8* p = ptr; + + for(PxU32 j=0;j<count;j++) + { + Ps::prefetchLine(p, 256); + const Gu::ContactPoint& contact = contactBase[j]; + + SolverContactPoint* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPoint*>(p); + p += pointStride; + + constructContactConstraint(invSqrtInertia0, invSqrtInertia1, invMassNorLenSq0, + invMassNorLenSq1, angD0, angD1, bodyFrame0p, bodyFrame1p, + normal, norVel, norCross, angVel0, angVel1, + invDt, invDtp8, restDistance, maxPenBias, restitution, + bounceThreshold, contact, *solverContact, + ccdMaxSeparation); + } + + ptr = p; + } + contactWritebackCount += contactCount; + + PxF32* forceBuffers = reinterpret_cast<PxF32*>(ptr); + PxMemZero(forceBuffers, sizeof(PxF32) * contactCount); + ptr += ((contactCount + 3) & (~3)) * sizeof(PxF32); // jump to next 16-byte boundary + + const PxReal staticFriction = contactBase0->staticFriction; + const PxReal dynamicFriction = contactBase0->dynamicFriction; + const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION); + staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(staticFriction)); + staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, FLoad(dynamicFriction)); + + const bool haveFriction = (disableStrongFriction == 0 && frictionPatch.anchorCount != 0) ;//PX_IR(n.staticFriction) > 0 || PX_IR(n.dynamicFriction) > 0; + header->numNormalConstr = Ps::to8(contactCount); + header->numFrictionConstr = Ps::to8(haveFriction ? frictionPatch.anchorCount*2 : 0); + + header->type = type; + + header->staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W = staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W; + FStore(angD0, &header->angDom0); + FStore(angD1, &header->angDom1); + + header->broken = 0; + + if(haveFriction) + { + const Vec3V linVrel = V3Sub(linVel0, linVel1); + //const Vec3V normal = Vec3V_From_PxVec3_Aligned(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal); + + const FloatV orthoThreshold = FLoad(0.70710678f); + const FloatV p1 = FLoad(0.1f); + // fallback: normal.cross((1,0,0)) or normal.cross((0,0,1)) + const FloatV normalX = V3GetX(normal); + const FloatV normalY = V3GetY(normal); + const FloatV normalZ = V3GetZ(normal); + + Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY); + Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero) ; + Vec3V t0Fallback = V3Sel(FIsGrtr(orthoThreshold, FAbs(normalX)), t0Fallback1, t0Fallback2); + + Vec3V t0 = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel))); + t0 = V3Sel(FIsGrtr(V3LengthSq(t0), p1), t0, t0Fallback); + t0 = V3Normalize(t0); + + const VecCrossV t0Cross = V3PrepareCross(t0); + + const Vec3V t1 = V3Cross(norCross, t0Cross); + const VecCrossV t1Cross = V3PrepareCross(t1); + + + // since we don't even have the body velocities we can't compute the tangent dirs, so + // the only thing we can do right now is to write the geometric information (which is the + // same for both axis constraints of an anchor) We put ra in the raXn field, rb in the rbXn + // field, and the error in the normal field. See corresponding comments in + // completeContactFriction() + + //We want to set the writeBack ptr to point to the broken flag of the friction patch. + //On spu we have a slight problem here because the friction patch array is + //in local store rather than in main memory. The good news is that the address of the friction + //patch array in main memory is stored in the work unit. These two addresses will be equal + //except on spu where one is local store memory and the other is the effective address in main memory. + //Using the value stored in the work unit guarantees that the main memory address is used on all platforms. + PxU8* PX_RESTRICT writeback = frictionDataPtr + frictionPatchWritebackAddrIndex*sizeof(FrictionPatch); + + header->frictionBrokenWritebackByte = writeback; + + for(PxU32 j = 0; j < frictionPatch.anchorCount; j++) + { + Ps::prefetchLine(ptr, 256); + Ps::prefetchLine(ptr, 384); + SolverContactFriction* PX_RESTRICT f0 = reinterpret_cast<SolverContactFriction*>(ptr); + ptr += frictionStride; + SolverContactFriction* PX_RESTRICT f1 = reinterpret_cast<SolverContactFriction*>(ptr); + ptr += frictionStride; + + Vec3V body0Anchor = V3LoadU(frictionPatch.body0Anchors[j]); + Vec3V body1Anchor = V3LoadU(frictionPatch.body1Anchors[j]); + + Vec3V ra = QuatRotate(bodyFrame0q, body0Anchor); + Vec3V rb = QuatRotate(bodyFrame1q, body1Anchor); + Vec3V error =V3Sub(V3Add(ra, bodyFrame0p), V3Add(rb, bodyFrame1p)); + + const PxU32 index = c.contactPatches[c.correlationListHeads[i]].start; + const Vec3V tvel = V3LoadA(buffer[index].targetVel); + + { + const Vec3V raXn = V3Cross(ra, t0Cross); + const Vec3V rbXn = V3Cross(rb, t0Cross); + + const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn); + const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn); + + + const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(raXnSqrtInertia, raXnSqrtInertia))); + const FloatV resp1 = FSub(FMul(angD1, V3Dot(rbXnSqrtInertia, rbXnSqrtInertia)), invMass1_dom1fV); + const FloatV resp = FAdd(resp0, resp1); + + const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FDiv(p8, resp), zero); + + FloatV targetVel = V3Dot(tvel, t0); + + const FloatV vrel1 = FAdd(V3Dot(t0, linVel0), V3Dot(raXn, angVel0)); + const FloatV vrel2 = FAdd(V3Dot(t0, linVel1), V3Dot(rbXn, angVel1)); + const FloatV vrel = FSub(vrel1, vrel2); + + targetVel = FSub(targetVel, vrel); + + f0->normalXYZ_appliedForceW = V4SetW(t0, zero); + f0->raXnXYZ_velMultiplierW = V4SetW(raXnSqrtInertia, velMultiplier); + f0->rbXnXYZ_biasW = V4SetW(rbXnSqrtInertia, FMul(V3Dot(t0, error), invDt)); + FStore(targetVel, &f0->targetVel); + } + + { + + const Vec3V raXn = V3Cross(ra, t1Cross); + const Vec3V rbXn = V3Cross(rb, t1Cross); + + const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn); + const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn); + + const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(raXnSqrtInertia, raXnSqrtInertia))); + const FloatV resp1 = FSub(FMul(angD1, V3Dot(rbXnSqrtInertia, rbXnSqrtInertia)), invMass1_dom1fV); + const FloatV resp = FAdd(resp0, resp1); + + const FloatV velMultiplier = FSel(FIsGrtr(resp, zero), FDiv(p8, resp), zero); + + FloatV targetVel = V3Dot(tvel, t1); + + const FloatV vrel1 = FAdd(V3Dot(t1, linVel0), V3Dot(raXn, angVel0)); + const FloatV vrel2 = FAdd(V3Dot(t1, linVel1), V3Dot(rbXn, angVel1)); + const FloatV vrel = FSub(vrel1, vrel2); + + targetVel = FSub(targetVel, vrel); + + f1->normalXYZ_appliedForceW = V4SetW(t1, zero); + f1->raXnXYZ_velMultiplierW = V4SetW(raXnSqrtInertia, velMultiplier); + f1->rbXnXYZ_biasW = V4SetW(rbXnSqrtInertia, FMul(V3Dot(t1, error), invDt)); + FStore(targetVel, &f1->targetVel); + } + } + } + + frictionPatchWritebackAddrIndex++; + } +} + + +PX_FORCE_INLINE void computeBlockStreamByteSizes(const bool useExtContacts, const CorrelationBuffer& c, + PxU32& _solverConstraintByteSize, PxU32& _frictionPatchByteSize, PxU32& _numFrictionPatches, + PxU32& _axisConstraintCount) +{ + PX_ASSERT(0 == _solverConstraintByteSize); + PX_ASSERT(0 == _frictionPatchByteSize); + PX_ASSERT(0 == _numFrictionPatches); + PX_ASSERT(0 == _axisConstraintCount); + + // PT: use local vars to remove LHS + PxU32 solverConstraintByteSize = 0; + PxU32 numFrictionPatches = 0; + PxU32 axisConstraintCount = 0; + + + for(PxU32 i = 0; i < c.frictionPatchCount; i++) + { + //Friction patches. + if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END) + numFrictionPatches++; + + const FrictionPatch& frictionPatch = c.frictionPatches[i]; + + const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0; + + //Solver constraint data. + if(c.frictionPatchContactCounts[i]!=0) + { + solverConstraintByteSize += sizeof(SolverContactHeader); + solverConstraintByteSize += useExtContacts ? c.frictionPatchContactCounts[i] * sizeof(SolverContactPointExt) + : c.frictionPatchContactCounts[i] * sizeof(SolverContactPoint); + solverConstraintByteSize += sizeof(PxF32) * ((c.frictionPatchContactCounts[i] + 3)&(~3)); //Add on space for applied impulses + + axisConstraintCount += c.frictionPatchContactCounts[i]; + + if(haveFriction) + { + solverConstraintByteSize += useExtContacts ? c.frictionPatches[i].anchorCount * 2 * sizeof(SolverContactFrictionExt) + : c.frictionPatches[i].anchorCount * 2 * sizeof(SolverContactFriction); + axisConstraintCount += c.frictionPatches[i].anchorCount * 2; + + } + } + } + PxU32 frictionPatchByteSize = numFrictionPatches*sizeof(FrictionPatch); + + _numFrictionPatches = numFrictionPatches; + _axisConstraintCount = axisConstraintCount; + + //16-byte alignment. + _frictionPatchByteSize = ((frictionPatchByteSize + 0x0f) & ~0x0f); + _solverConstraintByteSize = ((solverConstraintByteSize + 0x0f) & ~0x0f); + PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f)); + PX_ASSERT(0 == (_frictionPatchByteSize & 0x0f)); +} + +static bool reserveBlockStreams(const bool useExtContacts, Dy::CorrelationBuffer& cBuffer, + PxU8*& solverConstraint, + FrictionPatch*& _frictionPatches, + PxU32& numFrictionPatches, PxU32& solverConstraintByteSize, + PxU32& axisConstraintCount, PxConstraintAllocator& constraintAllocator) +{ + PX_ASSERT(NULL == solverConstraint); + PX_ASSERT(NULL == _frictionPatches); + PX_ASSERT(0 == numFrictionPatches); + PX_ASSERT(0 == solverConstraintByteSize); + PX_ASSERT(0 == axisConstraintCount); + + //From frictionPatchStream we just need to reserve a single buffer. + PxU32 frictionPatchByteSize = 0; + //Compute the sizes of all the buffers. + computeBlockStreamByteSizes( + useExtContacts, cBuffer, + solverConstraintByteSize, frictionPatchByteSize, numFrictionPatches, + axisConstraintCount); + + //Reserve the buffers. + + //First reserve the accumulated buffer size for the constraint block. + PxU8* constraintBlock = NULL; + const PxU32 constraintBlockByteSize = solverConstraintByteSize; + if(constraintBlockByteSize > 0) + { + constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u); + + if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock) + { + if(0==constraintBlock) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks."); + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. " + "Either accept dropped contacts or simplify collision geometry."); + constraintBlock=NULL; + } + } + } + + FrictionPatch* frictionPatches = NULL; + //If the constraint block reservation didn't fail then reserve the friction buffer too. + if(frictionPatchByteSize >0 && (0==constraintBlockByteSize || constraintBlock)) + { + frictionPatches = reinterpret_cast<FrictionPatch*>(constraintAllocator.reserveFrictionData(frictionPatchByteSize)); + + if(0==frictionPatches || (reinterpret_cast<FrictionPatch*>(-1))==frictionPatches) + { + if(0==frictionPatches) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks."); + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of friction data for a single contact pair in constraint prep. " + "Either accept dropped contacts or simplify collision geometry."); + frictionPatches=NULL; + } + } + } + + _frictionPatches = frictionPatches; + + //Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail). + if(0==constraintBlockByteSize || constraintBlock) + { + if(solverConstraintByteSize) + { + solverConstraint = constraintBlock; + PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f)); + } + } + + //Return true if neither of the two block reservations failed. + return ((0==constraintBlockByteSize || constraintBlock) && (0==frictionPatchByteSize || frictionPatches)); +} + + +bool createFinalizeSolverContacts( + PxSolverContactDesc& contactDesc, + CorrelationBuffer& c, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator) +{ + Ps::prefetchLine(contactDesc.body0); + Ps::prefetchLine(contactDesc.body1); + Ps::prefetchLine(contactDesc.data0); + Ps::prefetchLine(contactDesc.data1); + + c.frictionPatchCount = 0; + c.contactPatchCount = 0; + + const bool hasForceThreshold = contactDesc.hasForceThresholds; + const bool staticOrKinematicBody = contactDesc.bodyState1 == PxSolverContactDesc::eKINEMATIC_BODY || contactDesc.bodyState1 == PxSolverContactDesc::eSTATIC_BODY; + + const bool disableStrongFriction = contactDesc.disableStrongFriction; + const bool useExtContacts = ((contactDesc.bodyState0 | contactDesc.bodyState1) & PxSolverContactDesc::eARTICULATION) != 0; + + PxSolverConstraintDesc& desc = *contactDesc.desc; + + desc.constraintLengthOver16 = 0; + + + if (contactDesc.numContacts == 0) + { + contactDesc.frictionPtr = NULL; + contactDesc.frictionCount = 0; + desc.constraint = NULL; + return true; + } + + if (!disableStrongFriction) + { + getFrictionPatches(c, contactDesc.frictionPtr, contactDesc.frictionCount, contactDesc.bodyFrame0, contactDesc.bodyFrame1, correlationDistance); + } + + bool overflow = !createContactPatches(c, contactDesc.contacts, contactDesc.numContacts, PXC_SAME_NORMAL); + overflow = correlatePatches(c, contactDesc.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, PXC_SAME_NORMAL, 0, 0) || overflow; + PX_UNUSED(overflow); + +#if PX_CHECKED + if (overflow) + { + Ps::getFoundation().error(physx::PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, + "Dropping contacts in solver because we exceeded limit of 32 friction patches."); + } +#endif + + growPatches(c, contactDesc.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, correlationDistance, 0, frictionOffsetThreshold + contactDesc.restDistance); + + //PX_ASSERT(patchCount == c.frictionPatchCount); + + FrictionPatch* frictionPatches = NULL; + PxU8* solverConstraint = NULL; + PxU32 numFrictionPatches = 0; + PxU32 solverConstraintByteSize = 0; + PxU32 axisConstraintCount = 0; + + const bool successfulReserve = reserveBlockStreams( + useExtContacts, c, + solverConstraint, frictionPatches, + numFrictionPatches, + solverConstraintByteSize, + axisConstraintCount, + constraintAllocator); + // initialise the work unit's ptrs to the various buffers. + + contactDesc.frictionPtr = NULL; + contactDesc.frictionCount = 0; + desc.constraint = NULL; + desc.constraintLengthOver16 = 0; + // patch up the work unit with the reserved buffers and set the reserved buffer data as appropriate. + + if (successfulReserve) + { + PxU8* frictionDataPtr = reinterpret_cast<PxU8*>(frictionPatches); + contactDesc.frictionPtr = frictionDataPtr; + desc.constraint = solverConstraint; + //output.nbContacts = Ps::to8(numContacts); + contactDesc.frictionCount = Ps::to8(numFrictionPatches); + desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize / 16); + desc.writeBack = contactDesc.contactForces; + desc.writeBackLengthOver4 = PxU16(contactDesc.contactForces ? contactDesc.numContacts : 0); + + //Initialise friction buffer. + if (frictionPatches) + { + // PT: TODO: revisit this... not very satisfying + //const PxU32 maxSize = numFrictionPatches*sizeof(FrictionPatch); + Ps::prefetchLine(frictionPatches); + Ps::prefetchLine(frictionPatches, 128); + Ps::prefetchLine(frictionPatches, 256); + + for (PxU32 i = 0; i<c.frictionPatchCount; i++) + { + //if(c.correlationListHeads[i]!=CorrelationBuffer::LIST_END) + if (c.frictionPatchContactCounts[i]) + { + *frictionPatches++ = c.frictionPatches[i]; + Ps::prefetchLine(frictionPatches, 256); + } + } + } + + //Initialise solverConstraint buffer. + if (solverConstraint) + { + if (useExtContacts) + { + const PxSolverBodyData& data0 = *contactDesc.data0; + const PxSolverBodyData& data1 = *contactDesc.data1; + + const SolverExtBody b0(reinterpret_cast<const void*>(contactDesc.body0), reinterpret_cast<const void*>(&data0), desc.linkIndexA); + const SolverExtBody b1(reinterpret_cast<const void*>(contactDesc.body1), reinterpret_cast<const void*>(&data1), desc.linkIndexB); + + setupFinalizeExtSolverContacts(contactDesc.contacts, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint, + b0, b1, invDtF32, bounceThresholdF32, + contactDesc.mInvMassScales.linear0, contactDesc.mInvMassScales.angular0, contactDesc.mInvMassScales.linear1, contactDesc.mInvMassScales.angular1, + contactDesc.restDistance, frictionDataPtr, contactDesc.maxCCDSeparation); + } + else + { + const PxSolverBodyData& data0 = *contactDesc.data0; + const PxSolverBodyData& data1 = *contactDesc.data1; + setupFinalizeSolverConstraints(contactDesc.shapeInteraction, contactDesc.contacts, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint, + data0, data1, invDtF32, bounceThresholdF32, + contactDesc.mInvMassScales.linear0, contactDesc.mInvMassScales.angular0, contactDesc.mInvMassScales.linear1, contactDesc.mInvMassScales.angular1, + hasForceThreshold, staticOrKinematicBody, contactDesc.restDistance, frictionDataPtr, contactDesc.maxCCDSeparation); + } + //KS - set to 0 so we have a counter for the number of times we solved the constraint + //only going to be used on SPU but might as well set on all platforms because this code is shared + *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0; + } + } + + return successfulReserve; +} + + + +bool createFinalizeSolverContacts(PxSolverContactDesc& contactDesc, + PxsContactManagerOutput& output, + ThreadContext& threadContext, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator) +{ + ContactBuffer& buffer = threadContext.mContactBuffer; + + + + buffer.count = 0; + + // We pull the friction patches out of the cache to remove the dependency on how + // the cache is organized. Remember original addrs so we can write them back + // efficiently. + + PxU32 numContacts = 0; + { + PxReal invMassScale0 = 1.f; + PxReal invMassScale1 = 1.f; + PxReal invInertiaScale0 = 1.f; + PxReal invInertiaScale1 = 1.f; + + bool hasMaxImpulse = false, hasTargetVelocity = false; + + numContacts = extractContacts(buffer, output, hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1, + invInertiaScale0, invInertiaScale1, PxMin(contactDesc.data0->maxContactImpulse, contactDesc.data1->maxContactImpulse)); + + contactDesc.contacts = buffer.contacts; + contactDesc.numContacts = numContacts; + contactDesc.disableStrongFriction = contactDesc.disableStrongFriction || hasTargetVelocity; + contactDesc.hasMaxImpulse = hasMaxImpulse; + contactDesc.mInvMassScales.linear0 *= invMassScale0; + contactDesc.mInvMassScales.linear1 *= invMassScale1; + contactDesc.mInvMassScales.angular0 *= invInertiaScale0; + contactDesc.mInvMassScales.angular1 *= invInertiaScale1; + } + + CorrelationBuffer& c = threadContext.mCorrelationBuffer; + + return createFinalizeSolverContacts(contactDesc, c, invDtF32, bounceThresholdF32, frictionOffsetThreshold, correlationDistance, constraintAllocator); +} + +PxU32 getContactManagerConstraintDesc(const PxsContactManagerOutput& cmOutput, const PxsContactManager& /*cm*/, PxSolverConstraintDesc& desc) +{ + desc.writeBackLengthOver4 = cmOutput.nbContacts; + desc.writeBack = cmOutput.contactForces; + return cmOutput.nbContacts;// cm.getWorkUnit().axisConstraintCount; +} + +} + +} + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.h new file mode 100644 index 00000000..2e4a7ba2 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep.h @@ -0,0 +1,168 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_CONTACTPREP_H +#define DY_CONTACTPREP_H + +#include "DySolverConstraintDesc.h" +#include "PxSceneDesc.h" +#include "DySolverContact4.h" + + +namespace physx +{ + +struct PxcNpWorkUnit; +class PxsConstraintBlockManager; +struct PxsContactManagerOutput; +struct PxSolverBody; +struct PxSolverBodyData; +struct PxSolverConstraintDesc; + +namespace Dy +{ + class ThreadContext; + struct CorrelationBuffer; + +#define CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS \ + PxSolverContactDesc& contactDesc, \ + PxsContactManagerOutput& output, \ + ThreadContext& threadContext, \ + const PxReal invDtF32, \ + PxReal bounceThresholdF32, \ + PxReal frictionOffsetThreshold, \ + PxReal correlationDistance, \ + PxConstraintAllocator& constraintAllocator + +#define CREATE_FINALIZE_SOVLER_CONTACT_METHOD_ARGS_4 \ + PxsContactManagerOutput** outputs, \ + ThreadContext& threadContext, \ + PxSolverContactDesc* blockDescs, \ + const PxReal invDtF32, \ + PxReal bounceThresholdF32, \ + PxReal frictionThresholdF32, \ + PxReal correlationDistanceF32, \ + PxConstraintAllocator& constraintAllocator + + +/*! +Method prototype for create finalize solver contact +*/ + +typedef bool (*PxcCreateFinalizeSolverContactMethod)(CREATE_FINALIZE_SOLVER_CONTACT_METHOD_ARGS); + +extern PxcCreateFinalizeSolverContactMethod createFinalizeMethods[3]; + +typedef SolverConstraintPrepState::Enum (*PxcCreateFinalizeSolverContactMethod4)(CREATE_FINALIZE_SOVLER_CONTACT_METHOD_ARGS_4); + +extern PxcCreateFinalizeSolverContactMethod4 createFinalizeMethods4[3]; + + +bool createFinalizeSolverContacts( PxSolverContactDesc& contactDesc, + PxsContactManagerOutput& output, + ThreadContext& threadContext, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator); + +bool createFinalizeSolverContacts( PxSolverContactDesc& contactDesc, + CorrelationBuffer& c, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator); + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4( PxsContactManagerOutput** outputs, + ThreadContext& threadContext, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator); + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4( Dy::CorrelationBuffer& c, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator); + + + +bool createFinalizeSolverContactsCoulomb1D(PxSolverContactDesc& contactDesc, + PxsContactManagerOutput& output, + ThreadContext& threadContext, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator); + +bool createFinalizeSolverContactsCoulomb2D(PxSolverContactDesc& contactDesc, + PxsContactManagerOutput& output, + ThreadContext& threadContext, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator); + + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb1D( PxsContactManagerOutput** outputs, + ThreadContext& threadContext, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator); + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb2D(PxsContactManagerOutput** outputs, + ThreadContext& threadContext, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator); + + +PxU32 getContactManagerConstraintDesc(const PxsContactManagerOutput& cmOutput, const PxsContactManager& cm, PxSolverConstraintDesc& desc); + +} + +} + +#endif //DY_CONTACTPREP_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4.cpp new file mode 100644 index 00000000..5bbf9637 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4.cpp @@ -0,0 +1,1478 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxPreprocessor.h" +#include "PxSceneDesc.h" +#include "PsVecMath.h" +#include "PsMathUtils.h" +#include "DySolverContact.h" +#include "DySolverContact4.h" +#include "DySolverConstraintTypes.h" +#include "PxcNpWorkUnit.h" +#include "DyThreadContext.h" +#include "DyContactPrep.h" +#include "PxcNpContactPrepShared.h" +#include "PxvDynamics.h" +#include "DyCorrelationBuffer.h" +#include "DyDynamics.h" +#include "DyArticulationContactPrep.h" +#include "PxsContactManager.h" + +#include "PsFoundation.h" + +using namespace physx; +using namespace Gu; + + +#include "PsVecMath.h" +#include "PxContactModifyCallback.h" +#include "PxsMaterialManager.h" +#include "PxsMaterialCombiner.h" +#include "DyContactPrepShared.h" + +using namespace Ps::aos; + +namespace physx +{ +namespace Dy +{ + +PxcCreateFinalizeSolverContactMethod4 createFinalizeMethods4[3] = +{ + createFinalizeSolverContacts4, + createFinalizeSolverContacts4Coulomb1D, + createFinalizeSolverContacts4Coulomb2D +}; + +inline bool ValidateVec4(const Vec4V v) +{ + PX_ALIGN(16, PxVec4 vF); + Ps::aos::V4StoreA(v, &vF.x); + return vF.isFinite(); +} + +static void setupFinalizeSolverConstraints4(PxSolverContactDesc* PX_RESTRICT descs, CorrelationBuffer& c, PxU8* PX_RESTRICT workspace, + const PxReal invDtF32, PxReal bounceThresholdF32, + const Ps::aos::Vec4VArg invMassScale0, const Ps::aos::Vec4VArg invInertiaScale0, + const Ps::aos::Vec4VArg invMassScale1, const Ps::aos::Vec4VArg invInertiaScale1) +{ + + //OK, we have a workspace of pre-allocated space to store all 4 descs in. We now need to create the constraints in it + + const Vec4V ccdMaxSeparation = Ps::aos::V4LoadXYZW(descs[0].maxCCDSeparation, descs[1].maxCCDSeparation, descs[2].maxCCDSeparation, descs[3].maxCCDSeparation); + + const Vec4V zero = V4Zero(); + const BoolV bFalse = BFFFF(); + const FloatV fZero = FZero(); + + PxU8 flags[4] = { PxU8(descs[0].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0), + PxU8(descs[1].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0), + PxU8(descs[2].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0), + PxU8(descs[3].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0) }; + + bool hasMaxImpulse = descs[0].hasMaxImpulse || descs[1].hasMaxImpulse || descs[2].hasMaxImpulse || descs[3].hasMaxImpulse; + + //The block is dynamic if **any** of the constraints have a non-static body B. This allows us to batch static and non-static constraints but we only get a memory/perf + //saving if all 4 are static. This simplifies the constraint partitioning such that it only needs to care about separating contacts and 1D constraints (which it already does) + bool isDynamic = false; + bool hasKinematic = false; + for(PxU32 a = 0; a < 4; ++a) + { + isDynamic = isDynamic || (descs[a].bodyState1 == PxSolverContactDesc::eDYNAMIC_BODY); + hasKinematic = hasKinematic || descs[a].bodyState1 == PxSolverContactDesc::eKINEMATIC_BODY; + } + + const PxU32 constraintSize = isDynamic ? sizeof(SolverContactBatchPointDynamic4) : sizeof(SolverContactBatchPointBase4); + const PxU32 frictionSize = isDynamic ? sizeof(SolverContactFrictionDynamic4) : sizeof(SolverContactFrictionBase4); + + PxU8* PX_RESTRICT ptr = workspace; + + const Vec4V dom0 = invMassScale0; + const Vec4V dom1 = invMassScale1; + const Vec4V angDom0 = invInertiaScale0; + const Vec4V angDom1 = invInertiaScale1; + + const Vec4V maxPenBias = V4Max(V4LoadXYZW(descs[0].data0->penBiasClamp, descs[1].data0->penBiasClamp, + descs[2].data0->penBiasClamp, descs[3].data0->penBiasClamp), + V4LoadXYZW(descs[0].data1->penBiasClamp, descs[1].data1->penBiasClamp, + descs[2].data1->penBiasClamp, descs[3].data1->penBiasClamp)); + + const Vec4V restDistance = V4LoadXYZW(descs[0].restDistance, descs[1].restDistance, descs[2].restDistance, + descs[3].restDistance); + + + //load up velocities + Vec4V linVel00 = V4LoadA(&descs[0].data0->linearVelocity.x); + Vec4V linVel10 = V4LoadA(&descs[1].data0->linearVelocity.x); + Vec4V linVel20 = V4LoadA(&descs[2].data0->linearVelocity.x); + Vec4V linVel30 = V4LoadA(&descs[3].data0->linearVelocity.x); + + Vec4V linVel01 = V4LoadA(&descs[0].data1->linearVelocity.x); + Vec4V linVel11 = V4LoadA(&descs[1].data1->linearVelocity.x); + Vec4V linVel21 = V4LoadA(&descs[2].data1->linearVelocity.x); + Vec4V linVel31 = V4LoadA(&descs[3].data1->linearVelocity.x); + + Vec4V angVel00 = V4LoadA(&descs[0].data0->angularVelocity.x); + Vec4V angVel10 = V4LoadA(&descs[1].data0->angularVelocity.x); + Vec4V angVel20 = V4LoadA(&descs[2].data0->angularVelocity.x); + Vec4V angVel30 = V4LoadA(&descs[3].data0->angularVelocity.x); + + Vec4V angVel01 = V4LoadA(&descs[0].data1->angularVelocity.x); + Vec4V angVel11 = V4LoadA(&descs[1].data1->angularVelocity.x); + Vec4V angVel21 = V4LoadA(&descs[2].data1->angularVelocity.x); + Vec4V angVel31 = V4LoadA(&descs[3].data1->angularVelocity.x); + + Vec4V linVelT00, linVelT10, linVelT20; + Vec4V linVelT01, linVelT11, linVelT21; + Vec4V angVelT00, angVelT10, angVelT20; + Vec4V angVelT01, angVelT11, angVelT21; + + PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVelT00, linVelT10, linVelT20); + PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVelT01, linVelT11, linVelT21); + PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVelT00, angVelT10, angVelT20); + PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVelT01, angVelT11, angVelT21); + + const Vec4V vrelX = V4Sub(linVelT00, linVelT01); + const Vec4V vrelY = V4Sub(linVelT10, linVelT11); + const Vec4V vrelZ = V4Sub(linVelT20, linVelT21); + + //Load up masses and invInertia + + /*const Vec4V sqrtInvMass0 = V4Merge(FLoad(descs[0].data0->sqrtInvMass), FLoad(descs[1].data0->sqrtInvMass), FLoad(descs[2].data0->sqrtInvMass), + FLoad(descs[3].data0->sqrtInvMass)); + + const Vec4V sqrtInvMass1 = V4Merge(FLoad(descs[0].data1->sqrtInvMass), FLoad(descs[1].data1->sqrtInvMass), FLoad(descs[2].data1->sqrtInvMass), + FLoad(descs[3].data1->sqrtInvMass));*/ + + const Vec4V invMass0 = V4LoadXYZW(descs[0].data0->invMass, descs[1].data0->invMass, descs[2].data0->invMass, descs[3].data0->invMass); + const Vec4V invMass1 = V4LoadXYZW(descs[0].data1->invMass, descs[1].data1->invMass, descs[2].data1->invMass, descs[3].data1->invMass); + + const Vec4V invMass0D0 = V4Mul(dom0, invMass0); + const Vec4V invMass1D1 = V4Mul(dom1, invMass1); + + Vec4V invInertia00X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[0].data0->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33 + Vec4V invInertia00Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[0].data0->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33 + Vec4V invInertia00Z = Vec4V_From_Vec3V(V3LoadU(descs[0].data0->sqrtInvInertia.column2)); + + Vec4V invInertia10X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[1].data0->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33 + Vec4V invInertia10Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[1].data0->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33 + Vec4V invInertia10Z = Vec4V_From_Vec3V(V3LoadU(descs[1].data0->sqrtInvInertia.column2)); + + Vec4V invInertia20X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[2].data0->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33 + Vec4V invInertia20Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[2].data0->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33 + Vec4V invInertia20Z = Vec4V_From_Vec3V(V3LoadU(descs[2].data0->sqrtInvInertia.column2)); + + Vec4V invInertia30X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[3].data0->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33 + Vec4V invInertia30Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[3].data0->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33 + Vec4V invInertia30Z = Vec4V_From_Vec3V(V3LoadU(descs[3].data0->sqrtInvInertia.column2)); + + Vec4V invInertia01X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[0].data1->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33 + Vec4V invInertia01Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[0].data1->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33 + Vec4V invInertia01Z = Vec4V_From_Vec3V(V3LoadU(descs[0].data1->sqrtInvInertia.column2)); + + Vec4V invInertia11X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[1].data1->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33 + Vec4V invInertia11Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[1].data1->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33 + Vec4V invInertia11Z = Vec4V_From_Vec3V(V3LoadU(descs[1].data1->sqrtInvInertia.column2)); + + Vec4V invInertia21X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[2].data1->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33 + Vec4V invInertia21Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[2].data1->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33 + Vec4V invInertia21Z = Vec4V_From_Vec3V(V3LoadU(descs[2].data1->sqrtInvInertia.column2)); + + Vec4V invInertia31X = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[3].data1->sqrtInvInertia.column0)); // PT: safe because 'column1' follows 'column0' in PxMat33 + Vec4V invInertia31Y = Vec4V_From_Vec3V(V3LoadU_SafeReadW(descs[3].data1->sqrtInvInertia.column1)); // PT: safe because 'column2' follows 'column1' in PxMat33 + Vec4V invInertia31Z = Vec4V_From_Vec3V(V3LoadU(descs[3].data1->sqrtInvInertia.column2)); + + Vec4V invInertia0X0, invInertia0X1, invInertia0X2; + Vec4V invInertia0Y0, invInertia0Y1, invInertia0Y2; + Vec4V invInertia0Z0, invInertia0Z1, invInertia0Z2; + + Vec4V invInertia1X0, invInertia1X1, invInertia1X2; + Vec4V invInertia1Y0, invInertia1Y1, invInertia1Y2; + Vec4V invInertia1Z0, invInertia1Z1, invInertia1Z2; + + PX_TRANSPOSE_44_34(invInertia00X, invInertia10X, invInertia20X, invInertia30X, invInertia0X0, invInertia0Y0, invInertia0Z0); + PX_TRANSPOSE_44_34(invInertia00Y, invInertia10Y, invInertia20Y, invInertia30Y, invInertia0X1, invInertia0Y1, invInertia0Z1); + PX_TRANSPOSE_44_34(invInertia00Z, invInertia10Z, invInertia20Z, invInertia30Z, invInertia0X2, invInertia0Y2, invInertia0Z2); + + PX_TRANSPOSE_44_34(invInertia01X, invInertia11X, invInertia21X, invInertia31X, invInertia1X0, invInertia1Y0, invInertia1Z0); + PX_TRANSPOSE_44_34(invInertia01Y, invInertia11Y, invInertia21Y, invInertia31Y, invInertia1X1, invInertia1Y1, invInertia1Z1); + PX_TRANSPOSE_44_34(invInertia01Z, invInertia11Z, invInertia21Z, invInertia31Z, invInertia1X2, invInertia1Y2, invInertia1Z2); + + + const FloatV invDt = FLoad(invDtF32); + const FloatV p8 = FLoad(0.8f); + const Vec4V p84 = V4Splat(p8); + const Vec4V bounceThreshold = V4Splat(FLoad(bounceThresholdF32)); + + const FloatV invDtp8 = FMul(invDt, p8); + + const Vec3V bodyFrame00p = V3LoadU(descs[0].bodyFrame0.p); + const Vec3V bodyFrame01p = V3LoadU(descs[1].bodyFrame0.p); + const Vec3V bodyFrame02p = V3LoadU(descs[2].bodyFrame0.p); + const Vec3V bodyFrame03p = V3LoadU(descs[3].bodyFrame0.p); + + Vec4V bodyFrame00p4 = Vec4V_From_Vec3V(bodyFrame00p); + Vec4V bodyFrame01p4 = Vec4V_From_Vec3V(bodyFrame01p); + Vec4V bodyFrame02p4 = Vec4V_From_Vec3V(bodyFrame02p); + Vec4V bodyFrame03p4 = Vec4V_From_Vec3V(bodyFrame03p); + + Vec4V bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ; + PX_TRANSPOSE_44_34(bodyFrame00p4, bodyFrame01p4, bodyFrame02p4, bodyFrame03p4, bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ); + + + const Vec3V bodyFrame10p = V3LoadU(descs[0].bodyFrame1.p); + const Vec3V bodyFrame11p = V3LoadU(descs[1].bodyFrame1.p); + const Vec3V bodyFrame12p = V3LoadU(descs[2].bodyFrame1.p); + const Vec3V bodyFrame13p = V3LoadU(descs[3].bodyFrame1.p); + + Vec4V bodyFrame10p4 = Vec4V_From_Vec3V(bodyFrame10p); + Vec4V bodyFrame11p4 = Vec4V_From_Vec3V(bodyFrame11p); + Vec4V bodyFrame12p4 = Vec4V_From_Vec3V(bodyFrame12p); + Vec4V bodyFrame13p4 = Vec4V_From_Vec3V(bodyFrame13p); + + Vec4V bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ; + PX_TRANSPOSE_44_34(bodyFrame10p4, bodyFrame11p4, bodyFrame12p4, bodyFrame13p4, bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ); + + + const QuatV bodyFrame00q = QuatVLoadU(&descs[0].bodyFrame0.q.x); + const QuatV bodyFrame01q = QuatVLoadU(&descs[1].bodyFrame0.q.x); + const QuatV bodyFrame02q = QuatVLoadU(&descs[2].bodyFrame0.q.x); + const QuatV bodyFrame03q = QuatVLoadU(&descs[3].bodyFrame0.q.x); + + const QuatV bodyFrame10q = QuatVLoadU(&descs[0].bodyFrame1.q.x); + const QuatV bodyFrame11q = QuatVLoadU(&descs[1].bodyFrame1.q.x); + const QuatV bodyFrame12q = QuatVLoadU(&descs[2].bodyFrame1.q.x); + const QuatV bodyFrame13q = QuatVLoadU(&descs[3].bodyFrame1.q.x); + + PxU32 frictionPatchWritebackAddrIndex0 = 0; + PxU32 frictionPatchWritebackAddrIndex1 = 0; + PxU32 frictionPatchWritebackAddrIndex2 = 0; + PxU32 frictionPatchWritebackAddrIndex3 = 0; + + Ps::prefetchLine(c.contactID); + Ps::prefetchLine(c.contactID, 128); + + PxU32 frictionIndex0 = 0, frictionIndex1 = 0, frictionIndex2 = 0, frictionIndex3 = 0; + //PxU32 contactIndex0 = 0, contactIndex1 = 0, contactIndex2 = 0, contactIndex3 = 0; + + + //OK, we iterate through all friction patch counts in the constraint patch, building up the constraint list etc. + + PxU32 maxPatches = PxMax(descs[0].numFrictionPatches, PxMax(descs[1].numFrictionPatches, PxMax(descs[2].numFrictionPatches, descs[3].numFrictionPatches))); + + const Vec4V p1 = V4Splat(FLoad(0.1f)); + const Vec4V orthoThreshold = V4Splat(FLoad(0.70710678f)); + + + PxU32 contact0 = 0, contact1 = 0, contact2 = 0, contact3 = 0; + PxU32 patch0 = 0, patch1 = 0, patch2 = 0, patch3 = 0; + + PxU8 flag = 0; + if(hasMaxImpulse) + flag |= SolverContactHeader4::eHAS_MAX_IMPULSE; + + for(PxU32 i=0;i<maxPatches;i++) + { + const bool hasFinished0 = i >= descs[0].numFrictionPatches; + const bool hasFinished1 = i >= descs[1].numFrictionPatches; + const bool hasFinished2 = i >= descs[2].numFrictionPatches; + const bool hasFinished3 = i >= descs[3].numFrictionPatches; + + + frictionIndex0 = hasFinished0 ? frictionIndex0 : descs[0].startFrictionPatchIndex + i; + frictionIndex1 = hasFinished1 ? frictionIndex1 : descs[1].startFrictionPatchIndex + i; + frictionIndex2 = hasFinished2 ? frictionIndex2 : descs[2].startFrictionPatchIndex + i; + frictionIndex3 = hasFinished3 ? frictionIndex3 : descs[3].startFrictionPatchIndex + i; + + PxU32 clampedContacts0 = hasFinished0 ? 0 : c.frictionPatchContactCounts[frictionIndex0]; + PxU32 clampedContacts1 = hasFinished1 ? 0 : c.frictionPatchContactCounts[frictionIndex1]; + PxU32 clampedContacts2 = hasFinished2 ? 0 : c.frictionPatchContactCounts[frictionIndex2]; + PxU32 clampedContacts3 = hasFinished3 ? 0 : c.frictionPatchContactCounts[frictionIndex3]; + + PxU32 firstPatch0 = c.correlationListHeads[frictionIndex0]; + PxU32 firstPatch1 = c.correlationListHeads[frictionIndex1]; + PxU32 firstPatch2 = c.correlationListHeads[frictionIndex2]; + PxU32 firstPatch3 = c.correlationListHeads[frictionIndex3]; + + const Gu::ContactPoint* contactBase0 = descs[0].contacts + c.contactPatches[firstPatch0].start; + const Gu::ContactPoint* contactBase1 = descs[1].contacts + c.contactPatches[firstPatch1].start; + const Gu::ContactPoint* contactBase2 = descs[2].contacts + c.contactPatches[firstPatch2].start; + const Gu::ContactPoint* contactBase3 = descs[3].contacts + c.contactPatches[firstPatch3].start; + + const Vec4V restitution = V4Neg(V4LoadXYZW(contactBase0->restitution, contactBase1->restitution, contactBase2->restitution, + contactBase3->restitution)); + + SolverContactHeader4* PX_RESTRICT header = reinterpret_cast<SolverContactHeader4*>(ptr); + ptr += sizeof(SolverContactHeader4); + + + header->flags[0] = flags[0]; + header->flags[1] = flags[1]; + header->flags[2] = flags[2]; + header->flags[3] = flags[3]; + + header->flag = flag; + + PxU32 totalContacts = PxMax(clampedContacts0, PxMax(clampedContacts1, PxMax(clampedContacts2, clampedContacts3))); + + Vec4V* PX_RESTRICT appliedNormalForces = reinterpret_cast<Vec4V*>(ptr); + ptr += sizeof(Vec4V)*totalContacts; + + PxMemZero(appliedNormalForces, sizeof(Vec4V) * totalContacts); + + header->numNormalConstr = Ps::to8(totalContacts); + header->numNormalConstr0 = Ps::to8(clampedContacts0); + header->numNormalConstr1 = Ps::to8(clampedContacts1); + header->numNormalConstr2 = Ps::to8(clampedContacts2); + header->numNormalConstr3 = Ps::to8(clampedContacts3); + //header->sqrtInvMassA = sqrtInvMass0; + //header->sqrtInvMassB = sqrtInvMass1; + header->invMass0D0 = invMass0D0; + header->invMass1D1 = invMass1D1; + header->angDom0 = angDom0; + header->angDom1 = angDom1; + header->shapeInteraction[0] = descs[0].shapeInteraction; header->shapeInteraction[1] = descs[1].shapeInteraction; + header->shapeInteraction[2] = descs[2].shapeInteraction; header->shapeInteraction[3] = descs[3].shapeInteraction; + + Vec4V* maxImpulse = reinterpret_cast<Vec4V*>(ptr + constraintSize * totalContacts); + + header->restitution = restitution; + + Vec4V normal0 = V4LoadA(&contactBase0->normal.x); + Vec4V normal1 = V4LoadA(&contactBase1->normal.x); + Vec4V normal2 = V4LoadA(&contactBase2->normal.x); + Vec4V normal3 = V4LoadA(&contactBase3->normal.x); + + Vec4V normalX, normalY, normalZ; + PX_TRANSPOSE_44_34(normal0, normal1, normal2, normal3, normalX, normalY, normalZ); + + PX_ASSERT(ValidateVec4(normalX)); + PX_ASSERT(ValidateVec4(normalY)); + PX_ASSERT(ValidateVec4(normalZ)); + + header->normalX = normalX; + header->normalY = normalY; + header->normalZ = normalZ; + + const Vec4V norVel0 = V4MulAdd(normalZ, linVelT20, V4MulAdd(normalY, linVelT10, V4Mul(normalX, linVelT00))); + const Vec4V norVel1 = V4MulAdd(normalZ, linVelT21, V4MulAdd(normalY, linVelT11, V4Mul(normalX, linVelT01))); + const Vec4V relNorVel = V4Sub(norVel0, norVel1); + + //For all correlation heads - need to pull this out I think + + //OK, we have a counter for all our patches... + PxU32 finished = (PxU32(hasFinished0)) | + ((PxU32(hasFinished1)) << 1) | + ((PxU32(hasFinished2)) << 2) | + ((PxU32(hasFinished3)) << 3); + + CorrelationListIterator iter0(c, firstPatch0); + CorrelationListIterator iter1(c, firstPatch1); + CorrelationListIterator iter2(c, firstPatch2); + CorrelationListIterator iter3(c, firstPatch3); + + //PxU32 contact0, contact1, contact2, contact3; + //PxU32 patch0, patch1, patch2, patch3; + + if(!hasFinished0) + iter0.nextContact(patch0, contact0); + if(!hasFinished1) + iter1.nextContact(patch1, contact1); + if(!hasFinished2) + iter2.nextContact(patch2, contact2); + if(!hasFinished3) + iter3.nextContact(patch3, contact3); + + PxU8* p = ptr; + + PxU32 contactCount = 0; + PxU32 newFinished = + (PxU32(hasFinished0 || !iter0.hasNextContact())) | + ((PxU32(hasFinished1 || !iter1.hasNextContact())) << 1) | + ((PxU32(hasFinished2 || !iter2.hasNextContact())) << 2) | + ((PxU32(hasFinished3 || !iter3.hasNextContact())) << 3); + + while(finished != 0xf) + { + finished = newFinished; + ++contactCount; + Ps::prefetchLine(p, 384); + Ps::prefetchLine(p, 512); + Ps::prefetchLine(p, 640); + + SolverContactBatchPointBase4* PX_RESTRICT solverContact = reinterpret_cast<SolverContactBatchPointBase4*>(p); + p += constraintSize; + + const Gu::ContactPoint& con0 = descs[0].contacts[c.contactPatches[patch0].start + contact0]; + const Gu::ContactPoint& con1 = descs[1].contacts[c.contactPatches[patch1].start + contact1]; + const Gu::ContactPoint& con2 = descs[2].contacts[c.contactPatches[patch2].start + contact2]; + const Gu::ContactPoint& con3 = descs[3].contacts[c.contactPatches[patch3].start + contact3]; + + //Now we need to splice these 4 contacts into a single structure + + { + Vec4V point0 = V4LoadA(&con0.point.x); + Vec4V point1 = V4LoadA(&con1.point.x); + Vec4V point2 = V4LoadA(&con2.point.x); + Vec4V point3 = V4LoadA(&con3.point.x); + + Vec4V pointX, pointY, pointZ; + PX_TRANSPOSE_44_34(point0, point1, point2, point3, pointX, pointY, pointZ); + + PX_ASSERT(ValidateVec4(pointX)); + PX_ASSERT(ValidateVec4(pointY)); + PX_ASSERT(ValidateVec4(pointZ)); + + Vec4V cTargetVel0 = V4LoadA(&con0.targetVel.x); + Vec4V cTargetVel1 = V4LoadA(&con1.targetVel.x); + Vec4V cTargetVel2 = V4LoadA(&con2.targetVel.x); + Vec4V cTargetVel3 = V4LoadA(&con3.targetVel.x); + + Vec4V cTargetVelX, cTargetVelY, cTargetVelZ; + PX_TRANSPOSE_44_34(cTargetVel0, cTargetVel1, cTargetVel2, cTargetVel3, cTargetVelX, cTargetVelY, cTargetVelZ); + + const Vec4V separation = V4LoadXYZW(con0.separation, con1.separation, con2.separation, con3.separation); + + const Vec4V cTargetNorVel = V4MulAdd(cTargetVelX, normalX, V4MulAdd(cTargetVelY, normalY, V4Mul(cTargetVelZ, normalZ))); + + const Vec4V raX = V4Sub(pointX, bodyFrame0pX); + const Vec4V raY = V4Sub(pointY, bodyFrame0pY); + const Vec4V raZ = V4Sub(pointZ, bodyFrame0pZ); + + const Vec4V rbX = V4Sub(pointX, bodyFrame1pX); + const Vec4V rbY = V4Sub(pointY, bodyFrame1pY); + const Vec4V rbZ = V4Sub(pointZ, bodyFrame1pZ); + + PX_ASSERT(ValidateVec4(raX)); + PX_ASSERT(ValidateVec4(raY)); + PX_ASSERT(ValidateVec4(raZ)); + + PX_ASSERT(ValidateVec4(rbX)); + PX_ASSERT(ValidateVec4(rbY)); + PX_ASSERT(ValidateVec4(rbZ)); + + + //raXn = cross(ra, normal) which = Vec3V( a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); + + const Vec4V raXnX = V4NegMulSub(raZ, normalY, V4Mul(raY, normalZ)); + const Vec4V raXnY = V4NegMulSub(raX, normalZ, V4Mul(raZ, normalX)); + const Vec4V raXnZ = V4NegMulSub(raY, normalX, V4Mul(raX, normalY)); + + Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX); + Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX); + Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX); + + delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X); + delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y); + delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z); + + delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X); + delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y); + delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z); + + + PX_ASSERT(ValidateVec4(delAngVel0X)); + PX_ASSERT(ValidateVec4(delAngVel0Y)); + PX_ASSERT(ValidateVec4(delAngVel0Z)); + + const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0X, delAngVel0X, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0Z, delAngVel0Z))); + const Vec4V dotRaXnAngVel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4Mul(raXnX, angVelT00))); + + Vec4V unitResponse = V4MulAdd(invMass0D0, angDom0, dotDelAngVel0); + Vec4V vrel = V4Add(relNorVel, dotRaXnAngVel0); + + + //The dynamic-only parts - need to if-statement these up. A branch here shouldn't cost us too much + if(isDynamic) + { + SolverContactBatchPointDynamic4* PX_RESTRICT dynamicContact = static_cast<SolverContactBatchPointDynamic4*>(solverContact); + const Vec4V rbXnX = V4NegMulSub(rbZ, normalY, V4Mul(rbY, normalZ)); + const Vec4V rbXnY = V4NegMulSub(rbX, normalZ, V4Mul(rbZ, normalX)); + const Vec4V rbXnZ = V4NegMulSub(rbY, normalX, V4Mul(rbX, normalY)); + + Vec4V delAngVel1X = V4Mul(invInertia1X0, rbXnX); + Vec4V delAngVel1Y = V4Mul(invInertia1X1, rbXnX); + Vec4V delAngVel1Z = V4Mul(invInertia1X2, rbXnX); + + delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X); + delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y); + delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z); + + delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X); + delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y); + delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z); + + PX_ASSERT(ValidateVec4(delAngVel1X)); + PX_ASSERT(ValidateVec4(delAngVel1Y)); + PX_ASSERT(ValidateVec4(delAngVel1Z)); + + const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1X, delAngVel1X, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1Z, delAngVel1Z))); + const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01))); + + const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1); + + unitResponse = V4Add(unitResponse, resp1); + + vrel = V4Sub(vrel, dotRbXnAngVel1); + + //These are for dynamic-only contacts. + dynamicContact->rbXnX = delAngVel1X; + dynamicContact->rbXnY = delAngVel1Y; + dynamicContact->rbXnZ = delAngVel1Z; + + } + else if(hasKinematic) + { + const Vec4V rbXnX = V4NegMulSub(rbZ, normalY, V4Mul(rbY, normalZ)); + const Vec4V rbXnY = V4NegMulSub(rbX, normalZ, V4Mul(rbZ, normalX)); + const Vec4V rbXnZ = V4NegMulSub(rbY, normalX, V4Mul(rbX, normalY)); + + const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01))); + + vrel = V4Sub(vrel, dotRbXnAngVel1); + } + + const Vec4V velMultiplier = V4Sel(V4IsGrtr(unitResponse, zero), V4Recip(unitResponse), zero); + + const Vec4V penetration = V4Sub(separation, restDistance); + const Vec4V penInvDtPt8 = V4Max(maxPenBias, V4Scale(penetration, invDtp8)); + Vec4V scaledBias = V4Mul(penInvDtPt8, velMultiplier); + + const Vec4V penetrationInvDt = V4Scale(penetration, invDt); + + const BoolV isGreater2 = BAnd(BAnd(V4IsGrtr(zero, restitution), V4IsGrtr(bounceThreshold, vrel)), + V4IsGrtr(V4Neg(vrel), penetrationInvDt)); + + const BoolV ccdSeparationCondition = V4IsGrtrOrEq(ccdMaxSeparation, penetration); + + scaledBias = V4Sel(BAnd(ccdSeparationCondition, isGreater2), zero, V4Neg(scaledBias)); + + const Vec4V targetVelocity = V4Sel(isGreater2, V4Mul(velMultiplier, V4Mul(vrel, restitution)), zero); + + //Vec4V biasedErr = V4Sel(isGreater2, targetVelocity, scaledBias); + Vec4V biasedErr = V4Add(targetVelocity, scaledBias); + + biasedErr = V4NegMulSub(V4Sub(vrel, cTargetNorVel), velMultiplier, biasedErr); + + //These values are present for static and dynamic contacts + solverContact->raXnX = delAngVel0X; + solverContact->raXnY = delAngVel0Y; + solverContact->raXnZ = delAngVel0Z; + solverContact->velMultiplier = velMultiplier; + solverContact->biasedErr = biasedErr; + + //solverContact->scaledBias = V4Max(zero, scaledBias); + solverContact->scaledBias = V4Sel(isGreater2, scaledBias, V4Max(zero, scaledBias)); + + if(hasMaxImpulse) + { + maxImpulse[contactCount-1] = V4Merge(FLoad(con0.maxImpulse), FLoad(con1.maxImpulse), FLoad(con2.maxImpulse), + FLoad(con3.maxImpulse)); + } + } + if(!(finished & 0x1)) + { + iter0.nextContact(patch0, contact0); + newFinished |= PxU32(!iter0.hasNextContact()); + } + + if(!(finished & 0x2)) + { + iter1.nextContact(patch1, contact1); + newFinished |= (PxU32(!iter1.hasNextContact()) << 1); + } + + if(!(finished & 0x4)) + { + iter2.nextContact(patch2, contact2); + newFinished |= (PxU32(!iter2.hasNextContact()) << 2); + } + + if(!(finished & 0x8)) + { + iter3.nextContact(patch3, contact3); + newFinished |= (PxU32(!iter3.hasNextContact()) << 3); + } + } + ptr = p; + if(hasMaxImpulse) + { + ptr += sizeof(Vec4V) * totalContacts; + } + + //OK...friction time :-) + + Vec4V maxImpulseScale = V4One(); + { + const Vec4V staticFriction = V4LoadXYZW(contactBase0->staticFriction, contactBase1->staticFriction, + contactBase2->staticFriction, contactBase3->staticFriction); + + const Vec4V dynamicFriction = V4LoadXYZW(contactBase0->dynamicFriction, contactBase1->dynamicFriction, + contactBase2->dynamicFriction, contactBase3->dynamicFriction); + + PX_ASSERT(totalContacts == contactCount); + header->dynamicFriction = dynamicFriction; + header->staticFriction = staticFriction; + + const FrictionPatch& frictionPatch0 = c.frictionPatches[frictionIndex0]; + const FrictionPatch& frictionPatch1 = c.frictionPatches[frictionIndex1]; + const FrictionPatch& frictionPatch2 = c.frictionPatches[frictionIndex2]; + const FrictionPatch& frictionPatch3 = c.frictionPatches[frictionIndex3]; + + PxU32 anchorCount0 = frictionPatch0.anchorCount; + PxU32 anchorCount1 = frictionPatch1.anchorCount; + PxU32 anchorCount2 = frictionPatch2.anchorCount; + PxU32 anchorCount3 = frictionPatch3.anchorCount; + + PxU32 clampedAnchorCount0 = hasFinished0 || (contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount0; + PxU32 clampedAnchorCount1 = hasFinished1 || (contactBase1->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount1; + PxU32 clampedAnchorCount2 = hasFinished2 || (contactBase2->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount2; + PxU32 clampedAnchorCount3 = hasFinished3 || (contactBase3->materialFlags & PxMaterialFlag::eDISABLE_FRICTION) ? 0 : anchorCount3; + + const PxU32 maxAnchorCount = PxMax(clampedAnchorCount0, PxMax(clampedAnchorCount1, PxMax(clampedAnchorCount2, clampedAnchorCount3))); + + //if(clampedAnchorCount0 != clampedAnchorCount1 || clampedAnchorCount0 != clampedAnchorCount2 || clampedAnchorCount0 != clampedAnchorCount3) + // Ps::debugBreak(); + + + //const bool haveFriction = maxAnchorCount != 0; + header->numFrictionConstr = Ps::to8(maxAnchorCount*2); + header->numFrictionConstr0 = Ps::to8(clampedAnchorCount0*2); + header->numFrictionConstr1 = Ps::to8(clampedAnchorCount1*2); + header->numFrictionConstr2 = Ps::to8(clampedAnchorCount2*2); + header->numFrictionConstr3 = Ps::to8(clampedAnchorCount3*2); + + //KS - TODO - extend this if needed + header->type = Ps::to8(isDynamic ? DY_SC_TYPE_BLOCK_RB_CONTACT : DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT); + + if(maxAnchorCount) + { + + //Allocate the shared friction data... + + SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(ptr); + ptr += sizeof(SolverFrictionSharedData4); + PX_UNUSED(fd); + + const BoolV cond =V4IsGrtr(orthoThreshold, V4Abs(normalX)); + + const Vec4V t0FallbackX = V4Sel(cond, zero, V4Neg(normalY)); + const Vec4V t0FallbackY = V4Sel(cond, V4Neg(normalZ), normalX); + const Vec4V t0FallbackZ = V4Sel(cond, normalY, zero); + + //const Vec4V dotNormalVrel = V4MulAdd(normalZ, vrelZ, V4MulAdd(normalY, vrelY, V4Mul(normalX, vrelX))); + const Vec4V vrelSubNorVelX = V4NegMulSub(normalX, relNorVel, vrelX); + const Vec4V vrelSubNorVelY = V4NegMulSub(normalY, relNorVel, vrelY); + const Vec4V vrelSubNorVelZ = V4NegMulSub(normalZ, relNorVel, vrelZ); + + const Vec4V lenSqvrelSubNorVelZ = V4MulAdd(vrelSubNorVelX, vrelSubNorVelX, V4MulAdd(vrelSubNorVelY, vrelSubNorVelY, V4Mul(vrelSubNorVelZ, vrelSubNorVelZ))); + + const BoolV bcon2 = V4IsGrtr(lenSqvrelSubNorVelZ, p1); + + Vec4V t0X = V4Sel(bcon2, vrelSubNorVelX, t0FallbackX); + Vec4V t0Y = V4Sel(bcon2, vrelSubNorVelY, t0FallbackY); + Vec4V t0Z = V4Sel(bcon2, vrelSubNorVelZ, t0FallbackZ); + + + //Now normalize this... + const Vec4V recipLen = V4Rsqrt(V4MulAdd(t0Z, t0Z, V4MulAdd(t0Y, t0Y, V4Mul(t0X, t0X)))); + + t0X = V4Mul(t0X, recipLen); + t0Y = V4Mul(t0Y, recipLen); + t0Z = V4Mul(t0Z, recipLen); + + Vec4V t1X = V4NegMulSub(normalZ, t0Y, V4Mul(normalY, t0Z)); + Vec4V t1Y = V4NegMulSub(normalX, t0Z, V4Mul(normalZ, t0X)); + Vec4V t1Z = V4NegMulSub(normalY, t0X, V4Mul(normalX, t0Y)); + + PX_ASSERT((uintptr_t(descs[0].frictionPtr) & 0xF) == 0); + PX_ASSERT((uintptr_t(descs[1].frictionPtr) & 0xF) == 0); + PX_ASSERT((uintptr_t(descs[2].frictionPtr) & 0xF) == 0); + PX_ASSERT((uintptr_t(descs[3].frictionPtr) & 0xF) == 0); + + + PxU8* PX_RESTRICT writeback0 = descs[0].frictionPtr + frictionPatchWritebackAddrIndex0*sizeof(FrictionPatch); + PxU8* PX_RESTRICT writeback1 = descs[1].frictionPtr + frictionPatchWritebackAddrIndex1*sizeof(FrictionPatch); + PxU8* PX_RESTRICT writeback2 = descs[2].frictionPtr + frictionPatchWritebackAddrIndex2*sizeof(FrictionPatch); + PxU8* PX_RESTRICT writeback3 = descs[3].frictionPtr + frictionPatchWritebackAddrIndex3*sizeof(FrictionPatch); + + PxU32 index0 = 0, index1 = 0, index2 = 0, index3 = 0; + + fd->broken = bFalse; + fd->frictionBrokenWritebackByte[0] = writeback0; + fd->frictionBrokenWritebackByte[1] = writeback1; + fd->frictionBrokenWritebackByte[2] = writeback2; + fd->frictionBrokenWritebackByte[3] = writeback3; + + + fd->normalX[0] = t0X; + fd->normalY[0] = t0Y; + fd->normalZ[0] = t0Z; + + fd->normalX[1] = t1X; + fd->normalY[1] = t1Y; + fd->normalZ[1] = t1Z; + + Vec4V* PX_RESTRICT appliedForces = reinterpret_cast<Vec4V*>(ptr); + ptr += sizeof(Vec4V)*header->numFrictionConstr; + + PxMemZero(appliedForces, sizeof(Vec4V) * header->numFrictionConstr); + + for(PxU32 j = 0; j < maxAnchorCount; j++) + { + Ps::prefetchLine(ptr, 384); + Ps::prefetchLine(ptr, 512); + Ps::prefetchLine(ptr, 640); + SolverContactFrictionBase4* PX_RESTRICT f0 = reinterpret_cast<SolverContactFrictionBase4*>(ptr); + ptr += frictionSize; + SolverContactFrictionBase4* PX_RESTRICT f1 = reinterpret_cast<SolverContactFrictionBase4*>(ptr); + ptr += frictionSize; + + index0 = j < clampedAnchorCount0 ? j : index0; + index1 = j < clampedAnchorCount1 ? j : index1; + index2 = j < clampedAnchorCount2 ? j : index2; + index3 = j < clampedAnchorCount3 ? j : index3; + + if(j >= clampedAnchorCount0) + maxImpulseScale = V4SetX(maxImpulseScale, fZero); + if(j >= clampedAnchorCount1) + maxImpulseScale = V4SetY(maxImpulseScale, fZero); + if(j >= clampedAnchorCount2) + maxImpulseScale = V4SetZ(maxImpulseScale, fZero); + if(j >= clampedAnchorCount3) + maxImpulseScale = V4SetW(maxImpulseScale, fZero); + + t0X = V4Mul(maxImpulseScale, t0X); + t0Y = V4Mul(maxImpulseScale, t0Y); + t0Z = V4Mul(maxImpulseScale, t0Z); + + t1X = V4Mul(maxImpulseScale, t1X); + t1Y = V4Mul(maxImpulseScale, t1Y); + t1Z = V4Mul(maxImpulseScale, t1Z); + + + Vec3V body0Anchor0 = V3LoadU(frictionPatch0.body0Anchors[index0]); + Vec3V body0Anchor1 = V3LoadU(frictionPatch1.body0Anchors[index1]); + Vec3V body0Anchor2 = V3LoadU(frictionPatch2.body0Anchors[index2]); + Vec3V body0Anchor3 = V3LoadU(frictionPatch3.body0Anchors[index3]); + + Vec4V ra0 = Vec4V_From_Vec3V(QuatRotate(bodyFrame00q, body0Anchor0)); + Vec4V ra1 = Vec4V_From_Vec3V(QuatRotate(bodyFrame01q, body0Anchor1)); + Vec4V ra2 = Vec4V_From_Vec3V(QuatRotate(bodyFrame02q, body0Anchor2)); + Vec4V ra3 = Vec4V_From_Vec3V(QuatRotate(bodyFrame03q, body0Anchor3)); + + Vec4V raX, raY, raZ; + PX_TRANSPOSE_44_34(ra0, ra1, ra2, ra3, raX, raY, raZ); + + const Vec4V raWorldX = V4Add(raX, bodyFrame0pX); + const Vec4V raWorldY = V4Add(raY, bodyFrame0pY); + const Vec4V raWorldZ = V4Add(raZ, bodyFrame0pZ); + + Vec3V body1Anchor0 = V3LoadU(frictionPatch0.body1Anchors[index0]); + Vec3V body1Anchor1 = V3LoadU(frictionPatch1.body1Anchors[index1]); + Vec3V body1Anchor2 = V3LoadU(frictionPatch2.body1Anchors[index2]); + Vec3V body1Anchor3 = V3LoadU(frictionPatch3.body1Anchors[index3]); + + Vec4V rb0 = Vec4V_From_Vec3V(QuatRotate(bodyFrame10q, body1Anchor0)); + Vec4V rb1 = Vec4V_From_Vec3V(QuatRotate(bodyFrame11q, body1Anchor1)); + Vec4V rb2 = Vec4V_From_Vec3V(QuatRotate(bodyFrame12q, body1Anchor2)); + Vec4V rb3 = Vec4V_From_Vec3V(QuatRotate(bodyFrame13q, body1Anchor3)); + + Vec4V rbX, rbY, rbZ; + PX_TRANSPOSE_44_34(rb0, rb1, rb2, rb3, rbX, rbY, rbZ); + + const Vec4V rbWorldX = V4Add(rbX, bodyFrame1pX); + const Vec4V rbWorldY = V4Add(rbY, bodyFrame1pY); + const Vec4V rbWorldZ = V4Add(rbZ, bodyFrame1pZ); + + const Vec4V errorX = V4Sub(raWorldX, rbWorldX); + const Vec4V errorY = V4Sub(raWorldY, rbWorldY); + const Vec4V errorZ = V4Sub(raWorldZ, rbWorldZ); + + //KS - todo - get this working with per-point friction + //PxU32 index0 = /*perPointFriction ? c.contactID[i][j] : */c.contactPatches[c.correlationListHeads[i]].start; + + Vec4V targetVel0 = V4LoadA(&contactBase0->targetVel.x); + Vec4V targetVel1 = V4LoadA(&contactBase1->targetVel.x); + Vec4V targetVel2 = V4LoadA(&contactBase2->targetVel.x); + Vec4V targetVel3 = V4LoadA(&contactBase3->targetVel.x); + + Vec4V targetVelX, targetVelY, targetVelZ; + PX_TRANSPOSE_44_34(targetVel0, targetVel1, targetVel2, targetVel3, targetVelX, targetVelY, targetVelZ); + + + { + const Vec4V raXnX = V4NegMulSub(raZ, t0Y, V4Mul(raY, t0Z)); + const Vec4V raXnY = V4NegMulSub(raX, t0Z, V4Mul(raZ, t0X)); + const Vec4V raXnZ = V4NegMulSub(raY, t0X, V4Mul(raX, t0Y)); + + Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX); + Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX); + Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX); + + delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X); + delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y); + delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z); + + delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X); + delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y); + delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z); + + const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X))); + + Vec4V resp = V4MulAdd(dotDelAngVel0, angDom0, invMass0D0); + + const Vec4V tVel0 = V4MulAdd(t0Z, linVelT20, V4MulAdd(t0Y, linVelT10, V4Mul(t0X, linVelT00))); + Vec4V vrel = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4MulAdd(raXnX, angVelT00, tVel0))); + + if(isDynamic) + { + SolverContactFrictionDynamic4* PX_RESTRICT dynamicF0 = static_cast<SolverContactFrictionDynamic4*>(f0); + + const Vec4V rbXnX = V4NegMulSub(rbZ, t0Y, V4Mul(rbY, t0Z)); + const Vec4V rbXnY = V4NegMulSub(rbX, t0Z, V4Mul(rbZ, t0X)); + const Vec4V rbXnZ = V4NegMulSub(rbY, t0X, V4Mul(rbX, t0Y)); + + Vec4V delAngVel1X = V4Mul(invInertia1X0, rbXnX); + Vec4V delAngVel1Y = V4Mul(invInertia1X1, rbXnX); + Vec4V delAngVel1Z = V4Mul(invInertia1X2, rbXnX); + + delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X); + delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y); + delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z); + + delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X); + delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y); + delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z); + + const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X))); + + const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1); + + resp = V4Add(resp, resp1); + + dynamicF0->rbXnX = delAngVel1X; + dynamicF0->rbXnY = delAngVel1Y; + dynamicF0->rbXnZ = delAngVel1Z; + + const Vec4V tVel1 = V4MulAdd(t0Z, linVelT21, V4MulAdd(t0Y, linVelT11, V4Mul(t0X, linVelT01))); + const Vec4V vel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4MulAdd(rbXnX, angVelT01, tVel1))); + + vrel = V4Sub(vrel, vel1); + } + else if(hasKinematic) + { + const Vec4V rbXnX = V4NegMulSub(rbZ, t0Y, V4Mul(rbY, t0Z)); + const Vec4V rbXnY = V4NegMulSub(rbX, t0Z, V4Mul(rbZ, t0X)); + const Vec4V rbXnZ = V4NegMulSub(rbY, t0X, V4Mul(rbX, t0Y)); + + const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01))); + + vrel = V4Sub(vrel, dotRbXnAngVel1); + } + + + const Vec4V velMultiplier = V4Mul(maxImpulseScale, V4Sel(V4IsGrtr(resp, zero), V4Div(p84, resp), zero)); + + Vec4V bias = V4Scale(V4MulAdd(t0Z, errorZ, V4MulAdd(t0Y, errorY, V4Mul(t0X, errorX))), invDt); + + Vec4V targetVel = V4MulAdd(t0Z, targetVelZ,V4MulAdd(t0Y, targetVelY, V4Mul(t0X, targetVelX))); + targetVel = V4Sub(targetVel, vrel); + f0->targetVelocity = V4Neg(V4Mul(targetVel, velMultiplier)); + bias = V4Sub(bias, targetVel); + + f0->raXnX = delAngVel0X; + f0->raXnY = delAngVel0Y; + f0->raXnZ = delAngVel0Z; + f0->scaledBias = V4Mul(bias, velMultiplier); + f0->velMultiplier = velMultiplier; + } + + { + const Vec4V raXnX = V4NegMulSub(raZ, t1Y, V4Mul(raY, t1Z)); + const Vec4V raXnY = V4NegMulSub(raX, t1Z, V4Mul(raZ, t1X)); + const Vec4V raXnZ = V4NegMulSub(raY, t1X, V4Mul(raX, t1Y)); + + Vec4V delAngVel0X = V4Mul(invInertia0X0, raXnX); + Vec4V delAngVel0Y = V4Mul(invInertia0X1, raXnX); + Vec4V delAngVel0Z = V4Mul(invInertia0X2, raXnX); + + delAngVel0X = V4MulAdd(invInertia0Y0, raXnY, delAngVel0X); + delAngVel0Y = V4MulAdd(invInertia0Y1, raXnY, delAngVel0Y); + delAngVel0Z = V4MulAdd(invInertia0Y2, raXnY, delAngVel0Z); + + delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, delAngVel0X); + delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, delAngVel0Y); + delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, delAngVel0Z); + + const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X))); + + Vec4V resp = V4MulAdd(dotDelAngVel0, angDom0, invMass0D0); + + const Vec4V tVel0 = V4MulAdd(t1Z, linVelT20, V4MulAdd(t1Y, linVelT10, V4Mul(t1X, linVelT00))); + Vec4V vrel = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4MulAdd(raXnX, angVelT00, tVel0))); + + if(isDynamic) + { + SolverContactFrictionDynamic4* PX_RESTRICT dynamicF1 = static_cast<SolverContactFrictionDynamic4*>(f1); + + const Vec4V rbXnX = V4NegMulSub(rbZ, t1Y, V4Mul(rbY, t1Z)); + const Vec4V rbXnY = V4NegMulSub(rbX, t1Z, V4Mul(rbZ, t1X)); + const Vec4V rbXnZ = V4NegMulSub(rbY, t1X, V4Mul(rbX, t1Y)); + + Vec4V delAngVel1X = V4Mul(invInertia1X0, rbXnX); + Vec4V delAngVel1Y = V4Mul(invInertia1X1, rbXnX); + Vec4V delAngVel1Z = V4Mul(invInertia1X2, rbXnX); + + delAngVel1X = V4MulAdd(invInertia1Y0, rbXnY, delAngVel1X); + delAngVel1Y = V4MulAdd(invInertia1Y1, rbXnY, delAngVel1Y); + delAngVel1Z = V4MulAdd(invInertia1Y2, rbXnY, delAngVel1Z); + + delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, delAngVel1X); + delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, delAngVel1Y); + delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, delAngVel1Z); + + const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X))); + + const Vec4V resp1 = V4MulAdd(dotDelAngVel1, angDom1, invMass1D1); + + resp = V4Add(resp, resp1); + + dynamicF1->rbXnX = delAngVel1X; + dynamicF1->rbXnY = delAngVel1Y; + dynamicF1->rbXnZ = delAngVel1Z; + + const Vec4V tVel1 = V4MulAdd(t1Z, linVelT21, V4MulAdd(t1Y, linVelT11, V4Mul(t1X, linVelT01))); + const Vec4V vel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4MulAdd(rbXnX, angVelT01, tVel1))); + + vrel = V4Sub(vrel, vel1); + + } + else if(hasKinematic) + { + const Vec4V rbXnX = V4NegMulSub(rbZ, t1Y, V4Mul(rbY, t1Z)); + const Vec4V rbXnY = V4NegMulSub(rbX, t1Z, V4Mul(rbZ, t1X)); + const Vec4V rbXnZ = V4NegMulSub(rbY, t1X, V4Mul(rbX, t1Y)); + + const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01))); + + vrel = V4Sub(vrel, dotRbXnAngVel1); + } + + + const Vec4V velMultiplier = V4Mul(maxImpulseScale, V4Sel(V4IsGrtr(resp, zero), V4Div(p84, resp), zero)); + + Vec4V bias = V4Scale(V4MulAdd(t1Z, errorZ, V4MulAdd(t1Y, errorY, V4Mul(t1X, errorX))), invDt); + + Vec4V targetVel = V4MulAdd(t1Z, targetVelZ,V4MulAdd(t1Y, targetVelY, V4Mul(t1X, targetVelX))); + targetVel = V4Sub(targetVel, vrel); + f1->targetVelocity = V4Neg(V4Mul(targetVel, velMultiplier)); + bias = V4Sub(bias, targetVel); + f1->raXnX = delAngVel0X; + f1->raXnY = delAngVel0Y; + f1->raXnZ = delAngVel0Z; + f1->scaledBias = V4Mul(bias, velMultiplier); + f1->velMultiplier = velMultiplier; + } + } + + frictionPatchWritebackAddrIndex0++; + frictionPatchWritebackAddrIndex1++; + frictionPatchWritebackAddrIndex2++; + frictionPatchWritebackAddrIndex3++; + } + } + } +} + + + +PX_FORCE_INLINE void computeBlockStreamFrictionByteSizes(const CorrelationBuffer& c, + PxU32& _frictionPatchByteSize, PxU32& _numFrictionPatches, + PxU32 frictionPatchStartIndex, PxU32 frictionPatchEndIndex) +{ + // PT: use local vars to remove LHS + PxU32 numFrictionPatches = 0; + + for(PxU32 i = frictionPatchStartIndex; i < frictionPatchEndIndex; i++) + { + //Friction patches. + if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END) + numFrictionPatches++; + } + PxU32 frictionPatchByteSize = numFrictionPatches*sizeof(FrictionPatch); + + _numFrictionPatches = numFrictionPatches; + + //16-byte alignment. + _frictionPatchByteSize = ((frictionPatchByteSize + 0x0f) & ~0x0f); + PX_ASSERT(0 == (_frictionPatchByteSize & 0x0f)); +} + +static bool reserveFrictionBlockStreams(const CorrelationBuffer& c, PxConstraintAllocator& constraintAllocator, PxU32 frictionPatchStartIndex, PxU32 frictionPatchEndIndex, + FrictionPatch*& _frictionPatches, + PxU32& numFrictionPatches) +{ + + //From frictionPatchStream we just need to reserve a single buffer. + PxU32 frictionPatchByteSize = 0; + //Compute the sizes of all the buffers. + + computeBlockStreamFrictionByteSizes(c, frictionPatchByteSize, numFrictionPatches, frictionPatchStartIndex, frictionPatchEndIndex); + + FrictionPatch* frictionPatches = NULL; + //If the constraint block reservation didn't fail then reserve the friction buffer too. + if(frictionPatchByteSize > 0) + { + frictionPatches = reinterpret_cast<FrictionPatch*>(constraintAllocator.reserveFrictionData(frictionPatchByteSize)); + + if(0==frictionPatches || (reinterpret_cast<FrictionPatch*>(-1))==frictionPatches) + { + if(0==frictionPatches) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks."); + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of friction data for a single contact pair in constraint prep. " + "Either accept dropped contacts or simplify collision geometry."); + frictionPatches=NULL; + } + } + } + + _frictionPatches = frictionPatches; + + //Return true if neither of the two block reservations failed. + return (0==frictionPatchByteSize || frictionPatches); +} + +//The persistent friction patch correlation/allocation will already have happenned as this is per-pair. +//This function just computes the size of the combined solve data. +void computeBlockStreamByteSizes4(PxSolverContactDesc* descs, + PxU32& _solverConstraintByteSize, PxU32* _axisConstraintCount, + const CorrelationBuffer& c) +{ + PX_ASSERT(0 == _solverConstraintByteSize); + + PxU32 maxPatches = 0; + PxU32 maxFrictionPatches = 0; + PxU32 maxContactCount[CorrelationBuffer::MAX_FRICTION_PATCHES]; + PxU32 maxFrictionCount[CorrelationBuffer::MAX_FRICTION_PATCHES]; + PxMemZero(maxContactCount, sizeof(maxContactCount)); + PxMemZero(maxFrictionCount, sizeof(maxFrictionCount)); + bool hasMaxImpulse = false; + + for(PxU32 a = 0; a < 4; ++a) + { + PxU32 axisConstraintCount = 0; + hasMaxImpulse = hasMaxImpulse || descs[a].hasMaxImpulse; + for(PxU32 i = 0; i < descs[a].numFrictionPatches; i++) + { + PxU32 ind = i + descs[a].startFrictionPatchIndex; + + const FrictionPatch& frictionPatch = c.frictionPatches[ind]; + + const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0 + && frictionPatch.anchorCount != 0; + //Solver constraint data. + if(c.frictionPatchContactCounts[ind]!=0) + { + maxContactCount[i] = PxMax(c.frictionPatchContactCounts[ind], maxContactCount[i]); + axisConstraintCount += c.frictionPatchContactCounts[ind]; + + if(haveFriction) + { + const PxU32 fricCount = PxU32(c.frictionPatches[ind].anchorCount) * 2; + maxFrictionCount[i] = PxMax(fricCount, maxFrictionCount[i]); + axisConstraintCount += fricCount; + } + } + } + maxPatches = PxMax(descs[a].numFrictionPatches, maxPatches); + _axisConstraintCount[a] = axisConstraintCount; + } + + for(PxU32 a = 0; a < maxPatches; ++a) + { + if(maxFrictionCount[a] > 0) + maxFrictionPatches++; + } + + + PxU32 totalContacts = 0, totalFriction = 0; + for(PxU32 a = 0; a < maxPatches; ++a) + { + totalContacts += maxContactCount[a]; + totalFriction += maxFrictionCount[a]; + } + + //OK, we have a given number of friction patches, contact points and friction constraints so we can calculate how much memory we need + + //Body 2 is considered static if it is either *not dynamic* or *kinematic* + + bool hasDynamicBody = false; + for(PxU32 a = 0; a < 4; ++a) + { + hasDynamicBody = hasDynamicBody || ((descs[a].bodyState1 == PxSolverContactDesc::eDYNAMIC_BODY)); + } + + + const bool isStatic = !hasDynamicBody; + + const PxU32 headerSize = sizeof(SolverContactHeader4) * maxPatches + sizeof(SolverFrictionSharedData4) * maxFrictionPatches; + PxU32 constraintSize = isStatic ? (sizeof(SolverContactBatchPointBase4) * totalContacts) + ( sizeof(SolverContactFrictionBase4) * totalFriction) : + (sizeof(SolverContactBatchPointDynamic4) * totalContacts) + (sizeof(SolverContactFrictionDynamic4) * totalFriction); + + //Space for the appliedForce buffer + constraintSize += sizeof(Vec4V)*(totalContacts+totalFriction); + + //If we have max impulse, reserve a buffer for it + if(hasMaxImpulse) + constraintSize += sizeof(Ps::aos::Vec4V) * totalContacts; + + _solverConstraintByteSize = ((constraintSize + headerSize + 0x0f) & ~0x0f); + PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f)); +} + +static SolverConstraintPrepState::Enum reserveBlockStreams4(PxSolverContactDesc* descs, Dy::CorrelationBuffer& c, + PxU8*& solverConstraint, PxU32* axisConstraintCount, + PxU32& solverConstraintByteSize, + PxConstraintAllocator& constraintAllocator) +{ + PX_ASSERT(NULL == solverConstraint); + PX_ASSERT(0 == solverConstraintByteSize); + + //Compute the sizes of all the buffers. + computeBlockStreamByteSizes4(descs, + solverConstraintByteSize, axisConstraintCount, + c); + + //Reserve the buffers. + + //First reserve the accumulated buffer size for the constraint block. + PxU8* constraintBlock = NULL; + const PxU32 constraintBlockByteSize = solverConstraintByteSize; + if(constraintBlockByteSize > 0) + { + if((constraintBlockByteSize + 16u) > 16384) + return SolverConstraintPrepState::eUNBATCHABLE; + + constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u); + + if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock) + { + if(0==constraintBlock) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks."); + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. " + "Either accept dropped contacts or simplify collision geometry."); + constraintBlock=NULL; + } + } + } + + //Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail). + if(0==constraintBlockByteSize || constraintBlock) + { + if(solverConstraintByteSize) + { + solverConstraint = constraintBlock; + PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f)); + } + } + + return ((0==constraintBlockByteSize || constraintBlock)) ? SolverConstraintPrepState::eSUCCESS : SolverConstraintPrepState::eOUT_OF_MEMORY; +} + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4( + Dy::CorrelationBuffer& c, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator) +{ + + PX_ALIGN(16, PxReal invMassScale0[4]); + PX_ALIGN(16, PxReal invMassScale1[4]); + PX_ALIGN(16, PxReal invInertiaScale0[4]); + PX_ALIGN(16, PxReal invInertiaScale1[4]); + + c.frictionPatchCount = 0; + c.contactPatchCount = 0; + + for (PxU32 a = 0; a < 4; ++a) + { + PxSolverContactDesc& blockDesc = blockDescs[a]; + + invMassScale0[a] = blockDesc.mInvMassScales.linear0; + invMassScale1[a] = blockDesc.mInvMassScales.linear1; + invInertiaScale0[a] = blockDesc.mInvMassScales.angular0; + invInertiaScale1[a] = blockDesc.mInvMassScales.angular1; + + blockDesc.startFrictionPatchIndex = c.frictionPatchCount; + if (!(blockDesc.disableStrongFriction)) + { + bool valid = getFrictionPatches(c, blockDesc.frictionPtr, blockDesc.frictionCount, + blockDesc.bodyFrame0, blockDesc.bodyFrame1, correlationDistance); + if (!valid) + return SolverConstraintPrepState::eUNBATCHABLE; + } + //Create the contact patches + blockDesc.startContactPatchIndex = c.contactPatchCount; + if (!createContactPatches(c, blockDesc.contacts, blockDesc.numContacts, PXC_SAME_NORMAL)) + return SolverConstraintPrepState::eUNBATCHABLE; + blockDesc.numContactPatches = PxU16(c.contactPatchCount - blockDesc.startContactPatchIndex); + + bool overflow = correlatePatches(c, blockDesc.contacts, blockDesc.bodyFrame0, blockDesc.bodyFrame1, PXC_SAME_NORMAL, + blockDesc.startContactPatchIndex, blockDesc.startFrictionPatchIndex); + + if (overflow) + return SolverConstraintPrepState::eUNBATCHABLE; + + growPatches(c, blockDesc.contacts, blockDesc.bodyFrame0, blockDesc.bodyFrame1, correlationDistance, blockDesc.startFrictionPatchIndex, + frictionOffsetThreshold + blockDescs[a].restDistance); + + //Remove the empty friction patches - do we actually need to do this? + for (PxU32 p = c.frictionPatchCount; p > blockDesc.startFrictionPatchIndex; --p) + { + if (c.correlationListHeads[p - 1] == 0xffff) + { + //We have an empty patch...need to bin this one... + for (PxU32 p2 = p; p2 < c.frictionPatchCount; ++p2) + { + c.correlationListHeads[p2 - 1] = c.correlationListHeads[p2]; + c.frictionPatchContactCounts[p2 - 1] = c.frictionPatchContactCounts[p2]; + } + c.frictionPatchCount--; + } + } + + PxU32 numFricPatches = c.frictionPatchCount - blockDesc.startFrictionPatchIndex; + blockDesc.numFrictionPatches = numFricPatches; + } + + FrictionPatch* frictionPatchArray[4]; + PxU32 frictionPatchCounts[4]; + + for (PxU32 a = 0; a < 4; ++a) + { + PxSolverContactDesc& blockDesc = blockDescs[a]; + + const bool successfulReserve = reserveFrictionBlockStreams(c, constraintAllocator, blockDesc.startFrictionPatchIndex, blockDesc.numFrictionPatches + blockDesc.startFrictionPatchIndex, + frictionPatchArray[a], + frictionPatchCounts[a]); + + //KS - TODO - how can we recover if we failed to allocate this memory? + if (!successfulReserve) + { + return SolverConstraintPrepState::eOUT_OF_MEMORY; + } + } + //At this point, all the friction data has been calculated, the correlation has been done. Provided this was all successful, + //we are ready to create the batched constraints + + PxU8* solverConstraint = NULL; + PxU32 solverConstraintByteSize = 0; + + + + { + PxU32 axisConstraintCount[4]; + SolverConstraintPrepState::Enum state = reserveBlockStreams4(blockDescs, c, + solverConstraint, axisConstraintCount, + solverConstraintByteSize, + constraintAllocator); + + if (state != SolverConstraintPrepState::eSUCCESS) + return state; + + + for (PxU32 a = 0; a < 4; ++a) + { + + FrictionPatch* frictionPatches = frictionPatchArray[a]; + + PxSolverContactDesc& blockDesc = blockDescs[a]; + PxSolverConstraintDesc& desc = *blockDesc.desc; + blockDesc.frictionPtr = reinterpret_cast<PxU8*>(frictionPatches); + blockDesc.frictionCount = Ps::to8(frictionPatchCounts[a]); + + //Initialise friction buffer. + if (frictionPatches) + { + // PT: TODO: revisit this... not very satisfying + //const PxU32 maxSize = numFrictionPatches*sizeof(FrictionPatch); + Ps::prefetchLine(frictionPatches); + Ps::prefetchLine(frictionPatches, 128); + Ps::prefetchLine(frictionPatches, 256); + + for (PxU32 i = 0; i<blockDesc.numFrictionPatches; i++) + { + if (c.correlationListHeads[blockDesc.startFrictionPatchIndex + i] != CorrelationBuffer::LIST_END) + { + //*frictionPatches++ = c.frictionPatches[blockDesc.startFrictionPatchIndex + i]; + PxMemCopy(frictionPatches++, &c.frictionPatches[blockDesc.startFrictionPatchIndex + i], sizeof(FrictionPatch)); + //Ps::prefetchLine(frictionPatches, 256); + } + } + } + + + blockDesc.axisConstraintCount += Ps::to16(axisConstraintCount[a]); + + desc.constraint = solverConstraint; + desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize / 16); + desc.writeBackLengthOver4 = PxU16(blockDesc.numContacts); + desc.writeBack = blockDesc.contactForces; + } + + const Vec4V iMassScale0 = V4LoadA(invMassScale0); + const Vec4V iInertiaScale0 = V4LoadA(invInertiaScale0); + const Vec4V iMassScale1 = V4LoadA(invMassScale1); + const Vec4V iInertiaScale1 = V4LoadA(invInertiaScale1); + + setupFinalizeSolverConstraints4(blockDescs, c, solverConstraint, invDtF32, bounceThresholdF32, + iMassScale0, iInertiaScale0, iMassScale1, iInertiaScale1); + + PX_ASSERT((*solverConstraint == DY_SC_TYPE_BLOCK_RB_CONTACT) || (*solverConstraint == DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT)); + + *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0; + } + return SolverConstraintPrepState::eSUCCESS; +} + + +//This returns 1 of 3 states: success, unbatchable or out-of-memory. If the constraint is unbatchable, we must fall back on 4 separate constraint +//prep calls +SolverConstraintPrepState::Enum createFinalizeSolverContacts4( + PxsContactManagerOutput** cmOutputs, + ThreadContext& threadContext, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator) +{ + + for (PxU32 a = 0; a < 4; ++a) + { + blockDescs[a].desc->constraintLengthOver16 = 0; + } + + PX_ASSERT(cmOutputs[0]->nbContacts && cmOutputs[1]->nbContacts && cmOutputs[2]->nbContacts && cmOutputs[3]->nbContacts); + + + Gu::ContactBuffer& buffer = threadContext.mContactBuffer; + + buffer.count = 0; + + //PxTransform idt = PxTransform(PxIdentity); + + CorrelationBuffer& c = threadContext.mCorrelationBuffer; + + for (PxU32 a = 0; a < 4; ++a) + { + PxSolverContactDesc& blockDesc = blockDescs[a]; + PxSolverConstraintDesc& desc = *blockDesc.desc; + + //blockDesc.startContactIndex = buffer.count; + blockDesc.contacts = buffer.contacts + buffer.count; + + Ps::prefetchLine(desc.bodyA); + Ps::prefetchLine(desc.bodyB); + + + if ((buffer.count + cmOutputs[a]->nbContacts) > 64) + { + return SolverConstraintPrepState::eUNBATCHABLE; + } + + bool hasMaxImpulse = false; + bool hasTargetVelocity = false; + + //OK...do the correlation here as well... + Ps::prefetchLine(blockDescs[a].frictionPtr); + Ps::prefetchLine(blockDescs[a].frictionPtr, 64); + Ps::prefetchLine(blockDescs[a].frictionPtr, 128); + + if (a < 3) + { + Ps::prefetchLine(cmOutputs[a]->contactPatches); + Ps::prefetchLine(cmOutputs[a]->contactPoints); + } + + PxReal invMassScale0, invMassScale1, invInertiaScale0, invInertiaScale1; + + const PxReal defaultMaxImpulse = PxMin(blockDesc.data0->maxContactImpulse, blockDesc.data1->maxContactImpulse); + + PxU32 contactCount = extractContacts(buffer, *cmOutputs[a], hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1, + invInertiaScale0, invInertiaScale1, defaultMaxImpulse); + + if (contactCount == 0) + return SolverConstraintPrepState::eUNBATCHABLE; + + blockDesc.numContacts = contactCount; + blockDesc.hasMaxImpulse = hasMaxImpulse; + blockDesc.disableStrongFriction = blockDesc.disableStrongFriction || hasTargetVelocity; + + blockDesc.mInvMassScales.linear0 *= invMassScale0; + blockDesc.mInvMassScales.linear1 *= invMassScale1; + blockDesc.mInvMassScales.angular0 *= invInertiaScale0; + blockDesc.mInvMassScales.angular1 *= invInertiaScale1; + + //blockDesc.frictionPtr = &blockDescs[a].frictionPtr; + //blockDesc.frictionCount = blockDescs[a].frictionCount; + + } + return createFinalizeSolverContacts4(c, blockDescs, + invDtF32, bounceThresholdF32, frictionOffsetThreshold, + correlationDistance, constraintAllocator); +} + + + + +} + +} + + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4PF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4PF.cpp new file mode 100644 index 00000000..4442b433 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrep4PF.cpp @@ -0,0 +1,1017 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" +#include "PsMathUtils.h" +#include "DySolverContact.h" +#include "DySolverContactPF.h" +#include "DySolverConstraintTypes.h" +#include "PxcNpWorkUnit.h" +#include "DyThreadContext.h" +#include "DyContactPrep.h" +#include "PxcNpContactPrepShared.h" +//#include "PxvGeometry.h" +#include "PxvDynamics.h" +#include "DyCorrelationBuffer.h" +#include "DySolverConstraintDesc.h" +#include "DySolverBody.h" +#include "DySolverContact4.h" +#include "DySolverContactPF4.h" + + +#include "PsVecMath.h" +#include "PxContactModifyCallback.h" +#include "PxsMaterialManager.h" +#include "PxsMaterialCombiner.h" +#include "DySolverExt.h" +#include "DyArticulationContactPrep.h" +#include "DyContactPrepShared.h" +#include "PsFoundation.h" + +using namespace physx::Gu; +using namespace physx::shdfnd::aos; + +namespace physx +{ +namespace Dy +{ + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb( + PxsContactManagerOutput** outputs, + ThreadContext& threadContext, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator, + PxFrictionType::Enum frictionType); + +static bool setupFinalizeSolverConstraintsCoulomb4(PxSolverContactDesc* PX_RESTRICT descs, PxU8* PX_RESTRICT workspace, + const PxReal invDtF32, PxReal bounceThresholdF32, CorrelationBuffer& c, const PxU32 numFrictionPerPoint, + const PxU32 numContactPoints4, const PxU32 /*solverConstraintByteSize*/, + const Ps::aos::Vec4VArg invMassScale0, const Ps::aos::Vec4VArg invInertiaScale0, + const Ps::aos::Vec4VArg invMassScale1, const Ps::aos::Vec4VArg invInertiaScale1) +{ + //KS - final step. Create the constraints in the place we pre-allocated... + + const Vec4V ccdMaxSeparation = Ps::aos::V4LoadXYZW(descs[0].maxCCDSeparation, descs[1].maxCCDSeparation, descs[2].maxCCDSeparation, descs[3].maxCCDSeparation); + + const Vec4V zero = V4Zero(); + + PxU8 flags[4] = { PxU8(descs[0].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0), + PxU8(descs[1].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0), + PxU8(descs[2].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0), + PxU8(descs[3].hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0) }; + + + //The block is dynamic if **any** of the constraints have a non-static body B. This allows us to batch static and non-static constraints but we only get a memory/perf + //saving if all 4 are static. This simplifies the constraint partitioning such that it only needs to care about separating contacts and 1D constraints (which it already does) + const bool isDynamic = ((descs[0].bodyState1 | descs[1].bodyState1 | descs[2].bodyState1 | descs[3].bodyState1) & PxSolverContactDesc::eDYNAMIC_BODY) != 0; + + const PxU32 constraintSize = isDynamic ? sizeof(SolverContact4Dynamic) : sizeof(SolverContact4Base); + const PxU32 frictionSize = isDynamic ? sizeof(SolverFriction4Dynamic) : sizeof(SolverFriction4Base); + + PxU8* PX_RESTRICT ptr = workspace; + + const Vec4V dom0 = invMassScale0; + const Vec4V dom1 = invMassScale1; + const Vec4V angDom0 = invInertiaScale0; + const Vec4V angDom1 = invInertiaScale1; + + const Vec4V maxPenBias = V4Max(V4Merge(FLoad(descs[0].data0->penBiasClamp), FLoad(descs[1].data0->penBiasClamp), + FLoad(descs[2].data0->penBiasClamp), FLoad(descs[3].data0->penBiasClamp)), + V4Merge(FLoad(descs[0].data1->penBiasClamp), FLoad(descs[1].data1->penBiasClamp), + FLoad(descs[2].data1->penBiasClamp), FLoad(descs[3].data1->penBiasClamp))); + + const Vec4V restDistance = V4Merge(FLoad(descs[0].restDistance), FLoad(descs[1].restDistance), FLoad(descs[2].restDistance), + FLoad(descs[3].restDistance)); + + //load up velocities + Vec4V linVel00 = V4LoadA(&descs[0].data0->linearVelocity.x); + Vec4V linVel10 = V4LoadA(&descs[1].data0->linearVelocity.x); + Vec4V linVel20 = V4LoadA(&descs[2].data0->linearVelocity.x); + Vec4V linVel30 = V4LoadA(&descs[3].data0->linearVelocity.x); + + Vec4V linVel01 = V4LoadA(&descs[0].data1->linearVelocity.x); + Vec4V linVel11 = V4LoadA(&descs[1].data1->linearVelocity.x); + Vec4V linVel21 = V4LoadA(&descs[2].data1->linearVelocity.x); + Vec4V linVel31 = V4LoadA(&descs[3].data1->linearVelocity.x); + + Vec4V angVel00 = V4LoadA(&descs[0].data0->angularVelocity.x); + Vec4V angVel10 = V4LoadA(&descs[1].data0->angularVelocity.x); + Vec4V angVel20 = V4LoadA(&descs[2].data0->angularVelocity.x); + Vec4V angVel30 = V4LoadA(&descs[3].data0->angularVelocity.x); + + Vec4V angVel01 = V4LoadA(&descs[0].data1->angularVelocity.x); + Vec4V angVel11 = V4LoadA(&descs[1].data1->angularVelocity.x); + Vec4V angVel21 = V4LoadA(&descs[2].data1->angularVelocity.x); + Vec4V angVel31 = V4LoadA(&descs[3].data1->angularVelocity.x); + + Vec4V linVelT00, linVelT10, linVelT20; + Vec4V linVelT01, linVelT11, linVelT21; + Vec4V angVelT00, angVelT10, angVelT20; + Vec4V angVelT01, angVelT11, angVelT21; + + PX_TRANSPOSE_44_34(linVel00, linVel10, linVel20, linVel30, linVelT00, linVelT10, linVelT20); + PX_TRANSPOSE_44_34(linVel01, linVel11, linVel21, linVel31, linVelT01, linVelT11, linVelT21); + PX_TRANSPOSE_44_34(angVel00, angVel10, angVel20, angVel30, angVelT00, angVelT10, angVelT20); + PX_TRANSPOSE_44_34(angVel01, angVel11, angVel21, angVel31, angVelT01, angVelT11, angVelT21); + + const Vec4V vrelX = V4Sub(linVelT00, linVelT01); + const Vec4V vrelY = V4Sub(linVelT10, linVelT11); + const Vec4V vrelZ = V4Sub(linVelT20, linVelT21); + + + + //Load up masses and invInertia + + const Vec4V invMass0 = V4Merge(FLoad(descs[0].data0->invMass), FLoad(descs[1].data0->invMass), FLoad(descs[2].data0->invMass), + FLoad(descs[3].data0->invMass)); + + const Vec4V invMass1 = V4Merge(FLoad(descs[0].data1->invMass), FLoad(descs[1].data1->invMass), FLoad(descs[2].data1->invMass), + FLoad(descs[3].data1->invMass)); + + const Vec4V invMass0_dom0fV = V4Mul(dom0, invMass0); + const Vec4V invMass1_dom1fV = V4Mul(dom1, invMass1); + + Vec4V invInertia00X = Vec4V_From_Vec3V(V3LoadU(descs[0].data0->sqrtInvInertia.column0)); + Vec4V invInertia00Y = Vec4V_From_Vec3V(V3LoadU(descs[0].data0->sqrtInvInertia.column1)); + Vec4V invInertia00Z = Vec4V_From_Vec3V(V3LoadU(descs[0].data0->sqrtInvInertia.column2)); + + Vec4V invInertia10X = Vec4V_From_Vec3V(V3LoadU(descs[1].data0->sqrtInvInertia.column0)); + Vec4V invInertia10Y = Vec4V_From_Vec3V(V3LoadU(descs[1].data0->sqrtInvInertia.column1)); + Vec4V invInertia10Z = Vec4V_From_Vec3V(V3LoadU(descs[1].data0->sqrtInvInertia.column2)); + + Vec4V invInertia20X = Vec4V_From_Vec3V(V3LoadU(descs[2].data0->sqrtInvInertia.column0)); + Vec4V invInertia20Y = Vec4V_From_Vec3V(V3LoadU(descs[2].data0->sqrtInvInertia.column1)); + Vec4V invInertia20Z = Vec4V_From_Vec3V(V3LoadU(descs[2].data0->sqrtInvInertia.column2)); + + Vec4V invInertia30X = Vec4V_From_Vec3V(V3LoadU(descs[3].data0->sqrtInvInertia.column0)); + Vec4V invInertia30Y = Vec4V_From_Vec3V(V3LoadU(descs[3].data0->sqrtInvInertia.column1)); + Vec4V invInertia30Z = Vec4V_From_Vec3V(V3LoadU(descs[3].data0->sqrtInvInertia.column2)); + + Vec4V invInertia01X = Vec4V_From_Vec3V(V3LoadU(descs[0].data1->sqrtInvInertia.column0)); + Vec4V invInertia01Y = Vec4V_From_Vec3V(V3LoadU(descs[0].data1->sqrtInvInertia.column1)); + Vec4V invInertia01Z = Vec4V_From_Vec3V(V3LoadU(descs[0].data1->sqrtInvInertia.column2)); + + Vec4V invInertia11X = Vec4V_From_Vec3V(V3LoadU(descs[1].data1->sqrtInvInertia.column0)); + Vec4V invInertia11Y = Vec4V_From_Vec3V(V3LoadU(descs[1].data1->sqrtInvInertia.column1)); + Vec4V invInertia11Z = Vec4V_From_Vec3V(V3LoadU(descs[1].data1->sqrtInvInertia.column2)); + + Vec4V invInertia21X = Vec4V_From_Vec3V(V3LoadU(descs[2].data1->sqrtInvInertia.column0)); + Vec4V invInertia21Y = Vec4V_From_Vec3V(V3LoadU(descs[2].data1->sqrtInvInertia.column1)); + Vec4V invInertia21Z = Vec4V_From_Vec3V(V3LoadU(descs[2].data1->sqrtInvInertia.column2)); + + Vec4V invInertia31X = Vec4V_From_Vec3V(V3LoadU(descs[3].data1->sqrtInvInertia.column0)); + Vec4V invInertia31Y = Vec4V_From_Vec3V(V3LoadU(descs[3].data1->sqrtInvInertia.column1)); + Vec4V invInertia31Z = Vec4V_From_Vec3V(V3LoadU(descs[3].data1->sqrtInvInertia.column2)); + + Vec4V invInertia0X0, invInertia0X1, invInertia0X2; + Vec4V invInertia0Y0, invInertia0Y1, invInertia0Y2; + Vec4V invInertia0Z0, invInertia0Z1, invInertia0Z2; + + Vec4V invInertia1X0, invInertia1X1, invInertia1X2; + Vec4V invInertia1Y0, invInertia1Y1, invInertia1Y2; + Vec4V invInertia1Z0, invInertia1Z1, invInertia1Z2; + + PX_TRANSPOSE_44_34(invInertia00X, invInertia10X, invInertia20X, invInertia30X, invInertia0X0, invInertia0Y0, invInertia0Z0); + PX_TRANSPOSE_44_34(invInertia00Y, invInertia10Y, invInertia20Y, invInertia30Y, invInertia0X1, invInertia0Y1, invInertia0Z1); + PX_TRANSPOSE_44_34(invInertia00Z, invInertia10Z, invInertia20Z, invInertia30Z, invInertia0X2, invInertia0Y2, invInertia0Z2); + + PX_TRANSPOSE_44_34(invInertia01X, invInertia11X, invInertia21X, invInertia31X, invInertia1X0, invInertia1Y0, invInertia1Z0); + PX_TRANSPOSE_44_34(invInertia01Y, invInertia11Y, invInertia21Y, invInertia31Y, invInertia1X1, invInertia1Y1, invInertia1Z1); + PX_TRANSPOSE_44_34(invInertia01Z, invInertia11Z, invInertia21Z, invInertia31Z, invInertia1X2, invInertia1Y2, invInertia1Z2); + + const FloatV invDt = FLoad(invDtF32); + const FloatV p8 = FLoad(0.8f); + //const Vec4V p84 = V4Splat(p8); + const Vec4V p1 = V4Splat(FLoad(0.1f)); + const Vec4V bounceThreshold = V4Splat(FLoad(bounceThresholdF32)); + const Vec4V orthoThreshold = V4Splat(FLoad(0.70710678f)); + + const FloatV invDtp8 = FMul(invDt, p8); + + const Vec3V bodyFrame00p = V3LoadU(descs[0].bodyFrame0.p); + const Vec3V bodyFrame01p = V3LoadU(descs[1].bodyFrame0.p); + const Vec3V bodyFrame02p = V3LoadU(descs[2].bodyFrame0.p); + const Vec3V bodyFrame03p = V3LoadU(descs[3].bodyFrame0.p); + + Vec4V bodyFrame00p4 = Vec4V_From_Vec3V(bodyFrame00p); + Vec4V bodyFrame01p4 = Vec4V_From_Vec3V(bodyFrame01p); + Vec4V bodyFrame02p4 = Vec4V_From_Vec3V(bodyFrame02p); + Vec4V bodyFrame03p4 = Vec4V_From_Vec3V(bodyFrame03p); + + Vec4V bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ; + PX_TRANSPOSE_44_34(bodyFrame00p4, bodyFrame01p4, bodyFrame02p4, bodyFrame03p4, bodyFrame0pX, bodyFrame0pY, bodyFrame0pZ); + + + const Vec3V bodyFrame10p = V3LoadU(descs[0].bodyFrame1.p); + const Vec3V bodyFrame11p = V3LoadU(descs[1].bodyFrame1.p); + const Vec3V bodyFrame12p = V3LoadU(descs[2].bodyFrame1.p); + const Vec3V bodyFrame13p = V3LoadU(descs[3].bodyFrame1.p); + + Vec4V bodyFrame10p4 = Vec4V_From_Vec3V(bodyFrame10p); + Vec4V bodyFrame11p4 = Vec4V_From_Vec3V(bodyFrame11p); + Vec4V bodyFrame12p4 = Vec4V_From_Vec3V(bodyFrame12p); + Vec4V bodyFrame13p4 = Vec4V_From_Vec3V(bodyFrame13p); + + Vec4V bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ; + PX_TRANSPOSE_44_34(bodyFrame10p4, bodyFrame11p4, bodyFrame12p4, bodyFrame13p4, bodyFrame1pX, bodyFrame1pY, bodyFrame1pZ); + + + Ps::prefetchLine(c.contactID); + Ps::prefetchLine(c.contactID, 128); + + PxU32 frictionIndex0 = 0, frictionIndex1 = 0, frictionIndex2 = 0, frictionIndex3 = 0; + + + PxU32 maxPatches = PxMax(descs[0].numFrictionPatches, PxMax(descs[1].numFrictionPatches, PxMax(descs[2].numFrictionPatches, descs[3].numFrictionPatches))); + PxU32 maxContacts = numContactPoints4; + + //This is the address at which the first friction patch exists + PxU8* ptr2 = ptr + ((sizeof(SolverContactCoulombHeader4) * maxPatches) + constraintSize * maxContacts); + + //PxU32 contactId = 0; + + for(PxU32 i=0;i<maxPatches;i++) + { + const bool hasFinished0 = i >= descs[0].numFrictionPatches; + const bool hasFinished1 = i >= descs[1].numFrictionPatches; + const bool hasFinished2 = i >= descs[2].numFrictionPatches; + const bool hasFinished3 = i >= descs[3].numFrictionPatches; + + + frictionIndex0 = hasFinished0 ? frictionIndex0 : descs[0].startFrictionPatchIndex + i; + frictionIndex1 = hasFinished1 ? frictionIndex1 : descs[1].startFrictionPatchIndex + i; + frictionIndex2 = hasFinished2 ? frictionIndex2 : descs[2].startFrictionPatchIndex + i; + frictionIndex3 = hasFinished3 ? frictionIndex3 : descs[3].startFrictionPatchIndex + i; + + PxU32 clampedContacts0 = hasFinished0 ? 0 : c.frictionPatchContactCounts[frictionIndex0]; + PxU32 clampedContacts1 = hasFinished1 ? 0 : c.frictionPatchContactCounts[frictionIndex1]; + PxU32 clampedContacts2 = hasFinished2 ? 0 : c.frictionPatchContactCounts[frictionIndex2]; + PxU32 clampedContacts3 = hasFinished3 ? 0 : c.frictionPatchContactCounts[frictionIndex3]; + + PxU32 clampedFric0 = clampedContacts0 * numFrictionPerPoint; + PxU32 clampedFric1 = clampedContacts1 * numFrictionPerPoint; + PxU32 clampedFric2 = clampedContacts2 * numFrictionPerPoint; + PxU32 clampedFric3 = clampedContacts3 * numFrictionPerPoint; + + + const PxU32 numContacts = PxMax(clampedContacts0, PxMax(clampedContacts1, PxMax(clampedContacts2, clampedContacts3))); + const PxU32 numFrictions = PxMax(clampedFric0, PxMax(clampedFric1, PxMax(clampedFric2, clampedFric3))); + + PxU32 firstPatch0 = c.correlationListHeads[frictionIndex0]; + PxU32 firstPatch1 = c.correlationListHeads[frictionIndex1]; + PxU32 firstPatch2 = c.correlationListHeads[frictionIndex2]; + PxU32 firstPatch3 = c.correlationListHeads[frictionIndex3]; + + const Gu::ContactPoint* contactBase0 = descs[0].contacts + c.contactPatches[firstPatch0].start; + const Gu::ContactPoint* contactBase1 = descs[1].contacts + c.contactPatches[firstPatch1].start; + const Gu::ContactPoint* contactBase2 = descs[2].contacts + c.contactPatches[firstPatch2].start; + const Gu::ContactPoint* contactBase3 = descs[3].contacts + c.contactPatches[firstPatch3].start; + + const Vec4V restitution = V4Merge(FLoad(contactBase0->restitution), FLoad(contactBase1->restitution), FLoad(contactBase2->restitution), + FLoad(contactBase3->restitution)); + + const Vec4V staticFriction = V4Merge(FLoad(contactBase0->staticFriction), FLoad(contactBase1->staticFriction), FLoad(contactBase2->staticFriction), + FLoad(contactBase3->staticFriction)); + + SolverContactCoulombHeader4* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader4*>(ptr); + + header->frictionOffset = PxU16(ptr2 - ptr); + + ptr += sizeof(SolverContactCoulombHeader4); + + SolverFrictionHeader4* PX_RESTRICT fricHeader = reinterpret_cast<SolverFrictionHeader4*>(ptr2); + ptr2 += sizeof(SolverFrictionHeader4) + sizeof(Vec4V) * numContacts; + + + header->numNormalConstr0 = Ps::to8(clampedContacts0); + header->numNormalConstr1 = Ps::to8(clampedContacts1); + header->numNormalConstr2 = Ps::to8(clampedContacts2); + header->numNormalConstr3 = Ps::to8(clampedContacts3); + header->numNormalConstr = Ps::to8(numContacts); + header->invMassADom = invMass0_dom0fV; + header->invMassBDom = invMass1_dom1fV; + header->angD0 = angDom0; + header->angD1 = angDom1; + header->restitution = restitution; + + header->flags[0] = flags[0]; header->flags[1] = flags[1]; header->flags[2] = flags[2]; header->flags[3] = flags[3]; + + header->type = Ps::to8(isDynamic ? DY_SC_TYPE_BLOCK_RB_CONTACT : DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT); + header->shapeInteraction[0] = descs[0].shapeInteraction; header->shapeInteraction[1] = descs[1].shapeInteraction; + header->shapeInteraction[2] = descs[2].shapeInteraction; header->shapeInteraction[3] = descs[3].shapeInteraction; + + + fricHeader->invMassADom = invMass0_dom0fV; + fricHeader->invMassBDom = invMass1_dom1fV; + fricHeader->angD0 = angDom0; + fricHeader->angD1 = angDom1; + fricHeader->numFrictionConstr0 = Ps::to8(clampedFric0); + fricHeader->numFrictionConstr1 = Ps::to8(clampedFric1); + fricHeader->numFrictionConstr2 = Ps::to8(clampedFric2); + fricHeader->numFrictionConstr3 = Ps::to8(clampedFric3); + fricHeader->numNormalConstr = Ps::to8(numContacts); + fricHeader->numNormalConstr0 = Ps::to8(clampedContacts0); + fricHeader->numNormalConstr1 = Ps::to8(clampedContacts1); + fricHeader->numNormalConstr2 = Ps::to8(clampedContacts2); + fricHeader->numNormalConstr3 = Ps::to8(clampedContacts3); + fricHeader->type = Ps::to8(isDynamic ? DY_SC_TYPE_BLOCK_FRICTION : DY_SC_TYPE_BLOCK_STATIC_FRICTION); + fricHeader->staticFriction = staticFriction; + fricHeader->frictionPerContact = PxU32(numFrictionPerPoint == 2 ? 1 : 0); + + fricHeader->numFrictionConstr = Ps::to8(numFrictions); + + Vec4V normal0 = V4LoadA(&contactBase0->normal.x); + Vec4V normal1 = V4LoadA(&contactBase1->normal.x); + Vec4V normal2 = V4LoadA(&contactBase2->normal.x); + Vec4V normal3 = V4LoadA(&contactBase3->normal.x); + + Vec4V normalX, normalY, normalZ; + PX_TRANSPOSE_44_34(normal0, normal1, normal2, normal3, normalX, normalY, normalZ); + header->normalX = normalX; + header->normalY = normalY; + header->normalZ = normalZ; + + const Vec4V normalLenSq = V4MulAdd(normalZ, normalZ, V4MulAdd(normalY, normalY, V4Mul(normalX, normalX))); + + const Vec4V linNorVel0 = V4MulAdd(normalZ, linVelT20, V4MulAdd(normalY, linVelT10, V4Mul(normalX, linVelT00))); + const Vec4V linNorVel1 = V4MulAdd(normalZ, linVelT21, V4MulAdd(normalY, linVelT11, V4Mul(normalX, linVelT01))); + + const Vec4V invMassNorLenSq0 = V4Mul(invMass0_dom0fV, normalLenSq); + const Vec4V invMassNorLenSq1 = V4Mul(invMass1_dom1fV, normalLenSq); + + + //Calculate friction directions + const BoolV cond =V4IsGrtr(orthoThreshold, V4Abs(normalX)); + + const Vec4V t0FallbackX = V4Sel(cond, zero, V4Neg(normalY)); + const Vec4V t0FallbackY = V4Sel(cond, V4Neg(normalZ), normalX); + const Vec4V t0FallbackZ = V4Sel(cond, normalY, zero); + + const Vec4V dotNormalVrel = V4MulAdd(normalZ, vrelZ, V4MulAdd(normalY, vrelY, V4Mul(normalX, vrelX))); + const Vec4V vrelSubNorVelX = V4NegMulSub(normalX, dotNormalVrel, vrelX); + const Vec4V vrelSubNorVelY = V4NegMulSub(normalY, dotNormalVrel, vrelY); + const Vec4V vrelSubNorVelZ = V4NegMulSub(normalZ, dotNormalVrel, vrelZ); + + const Vec4V lenSqvrelSubNorVelZ = V4MulAdd(vrelSubNorVelX, vrelSubNorVelX, V4MulAdd(vrelSubNorVelY, vrelSubNorVelY, V4Mul(vrelSubNorVelZ, vrelSubNorVelZ))); + + const BoolV bcon2 = V4IsGrtr(lenSqvrelSubNorVelZ, p1); + + Vec4V t0X = V4Sel(bcon2, vrelSubNorVelX, t0FallbackX); + Vec4V t0Y = V4Sel(bcon2, vrelSubNorVelY, t0FallbackY); + Vec4V t0Z = V4Sel(bcon2, vrelSubNorVelZ, t0FallbackZ); + + //Now normalize this... + const Vec4V recipLen = V4Rsqrt(V4MulAdd(t0X, t0X, V4MulAdd(t0Y, t0Y, V4Mul(t0Z, t0Z)))); + + t0X = V4Mul(t0X, recipLen); + t0Y = V4Mul(t0Y, recipLen); + t0Z = V4Mul(t0Z, recipLen); + + const Vec4V t1X = V4NegMulSub(normalZ, t0Y, V4Mul(normalY, t0Z)); + const Vec4V t1Y = V4NegMulSub(normalX, t0Z, V4Mul(normalZ, t0X)); + const Vec4V t1Z = V4NegMulSub(normalY, t0X, V4Mul(normalX, t0Y)); + + const Vec4V tFallbackX[2] = {t0X, t1X}; + const Vec4V tFallbackY[2] = {t0Y, t1Y}; + const Vec4V tFallbackZ[2] = {t0Z, t1Z}; + + + //For all correlation heads - need to pull this out I think + + //OK, we have a counter for all our patches... + PxU32 finished = (PxU32(hasFinished0)) | + ((PxU32(hasFinished1)) << 1) | + ((PxU32(hasFinished2)) << 2) | + ((PxU32(hasFinished3)) << 3); + + CorrelationListIterator iter0(c, firstPatch0); + CorrelationListIterator iter1(c, firstPatch1); + CorrelationListIterator iter2(c, firstPatch2); + CorrelationListIterator iter3(c, firstPatch3); + + PxU32 contact0, contact1, contact2, contact3; + PxU32 patch0, patch1, patch2, patch3; + + iter0.nextContact(patch0, contact0); + iter1.nextContact(patch1, contact1); + iter2.nextContact(patch2, contact2); + iter3.nextContact(patch3, contact3); + + PxU8* p = ptr; + + PxU32 contactCount = 0; + PxU32 newFinished = + (PxU32(hasFinished0 || !iter0.hasNextContact())) | + ((PxU32(hasFinished1 || !iter1.hasNextContact())) << 1) | + ((PxU32(hasFinished2 || !iter2.hasNextContact())) << 2) | + ((PxU32(hasFinished3 || !iter3.hasNextContact())) << 3); + + PxU32 fricIndex = 0; + + while(finished != 0xf) + { + finished = newFinished; + ++contactCount; + Ps::prefetchLine(p, 384); + Ps::prefetchLine(p, 512); + Ps::prefetchLine(p, 640); + + SolverContact4Base* PX_RESTRICT solverContact = reinterpret_cast<SolverContact4Base*>(p); + p += constraintSize; + + const Gu::ContactPoint& con0 = descs[0].contacts[c.contactPatches[patch0].start + contact0]; + const Gu::ContactPoint& con1 = descs[1].contacts[c.contactPatches[patch1].start + contact1]; + const Gu::ContactPoint& con2 = descs[2].contacts[c.contactPatches[patch2].start + contact2]; + const Gu::ContactPoint& con3 = descs[3].contacts[c.contactPatches[patch3].start + contact3]; + + //Now we need to splice these 4 contacts into a single structure + + { + Vec4V point0 = V4LoadA(&con0.point.x); + Vec4V point1 = V4LoadA(&con1.point.x); + Vec4V point2 = V4LoadA(&con2.point.x); + Vec4V point3 = V4LoadA(&con3.point.x); + + Vec4V pointX, pointY, pointZ; + PX_TRANSPOSE_44_34(point0, point1, point2, point3, pointX, pointY, pointZ); + + Vec4V targetVel0 = V4LoadA(&con0.targetVel.x); + Vec4V targetVel1 = V4LoadA(&con1.targetVel.x); + Vec4V targetVel2 = V4LoadA(&con2.targetVel.x); + Vec4V targetVel3 = V4LoadA(&con3.targetVel.x); + + Vec4V targetVelX, targetVelY, targetVelZ; + PX_TRANSPOSE_44_34(targetVel0, targetVel1, targetVel2, targetVel3, targetVelX, targetVelY, targetVelZ); + + const Vec4V raX = V4Sub(pointX, bodyFrame0pX); + const Vec4V raY = V4Sub(pointY, bodyFrame0pY); + const Vec4V raZ = V4Sub(pointZ, bodyFrame0pZ); + + const Vec4V rbX = V4Sub(pointX, bodyFrame1pX); + const Vec4V rbY = V4Sub(pointY, bodyFrame1pY); + const Vec4V rbZ = V4Sub(pointZ, bodyFrame1pZ); + + { + const Vec4V separation = V4Merge(FLoad(con0.separation), FLoad(con1.separation), FLoad(con2.separation), + FLoad(con3.separation)); + const Vec4V maxImpulse = V4Merge(FLoad(con0.maxImpulse), FLoad(con1.maxImpulse), FLoad(con2.maxImpulse), + FLoad(con3.maxImpulse)); + + const Vec4V cTargetVel = V4MulAdd(normalX, targetVelX, V4MulAdd(normalY, targetVelY, V4Mul(normalZ, targetVelZ))); + + //raXn = cross(ra, normal) which = Vec3V( a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); + const Vec4V raXnX = V4NegMulSub(raZ, normalY, V4Mul(raY, normalZ)); + const Vec4V raXnY = V4NegMulSub(raX, normalZ, V4Mul(raZ, normalX)); + const Vec4V raXnZ = V4NegMulSub(raY, normalX, V4Mul(raX, normalY)); + + const Vec4V v0a0 = V4Mul(invInertia0X0, raXnX); + const Vec4V v0a1 = V4Mul(invInertia0X1, raXnX); + const Vec4V v0a2 = V4Mul(invInertia0X2, raXnX); + + const Vec4V v0PlusV1a0 = V4MulAdd(invInertia0Y0, raXnY, v0a0); + const Vec4V v0PlusV1a1 = V4MulAdd(invInertia0Y1, raXnY, v0a1); + const Vec4V v0PlusV1a2 = V4MulAdd(invInertia0Y2, raXnY, v0a2); + + const Vec4V delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, v0PlusV1a0); + const Vec4V delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, v0PlusV1a1); + const Vec4V delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, v0PlusV1a2); + + const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X))); + const Vec4V dotRaXnAngVel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4Mul(raXnX, angVelT00))); + + Vec4V unitResponse = V4Add(invMassNorLenSq0, dotDelAngVel0); + Vec4V vrel = V4Add(linNorVel0, dotRaXnAngVel0); + + + //The dynamic-only parts - need to if-statement these up. A branch here shouldn't cost us too much + if(isDynamic) + { + SolverContact4Dynamic* PX_RESTRICT dynamicContact = static_cast<SolverContact4Dynamic*>(solverContact); + const Vec4V rbXnX = V4NegMulSub(rbZ, normalY, V4Mul(rbY, normalZ)); + const Vec4V rbXnY = V4NegMulSub(rbX, normalZ, V4Mul(rbZ, normalX)); + const Vec4V rbXnZ = V4NegMulSub(rbY, normalX, V4Mul(rbX, normalY)); + + const Vec4V v0b0 = V4Mul(invInertia1X0, rbXnX); + const Vec4V v0b1 = V4Mul(invInertia1X1, rbXnX); + const Vec4V v0b2 = V4Mul(invInertia1X2, rbXnX); + + const Vec4V v0PlusV1b0 = V4MulAdd(invInertia1Y0, rbXnY, v0b0); + const Vec4V v0PlusV1b1 = V4MulAdd(invInertia1Y1, rbXnY, v0b1); + const Vec4V v0PlusV1b2 = V4MulAdd(invInertia1Y2, rbXnY, v0b2); + + const Vec4V delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, v0PlusV1b0); + const Vec4V delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, v0PlusV1b1); + const Vec4V delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, v0PlusV1b2); + + + //V3Dot(raXn, delAngVel0) + + const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X))); + + const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01))); + + const Vec4V resp1 = V4Add(dotDelAngVel1, invMassNorLenSq1); + + unitResponse = V4Add(unitResponse, resp1); + + const Vec4V vrel2 = V4Add(linNorVel1, dotRbXnAngVel1); + vrel = V4Sub(vrel, vrel2); + + //These are for dynamic-only contacts. + dynamicContact->rbXnX = delAngVel1X; + dynamicContact->rbXnY = delAngVel1Y; + dynamicContact->rbXnZ = delAngVel1Z; + + } + + const Vec4V velMultiplier = V4Sel(V4IsGrtr(unitResponse, zero), V4Recip(unitResponse), zero); + + const Vec4V penetration = V4Sub(separation, restDistance); + + const Vec4V penInvDtp8 = V4Max(maxPenBias, V4Scale(penetration, invDtp8)); + + Vec4V scaledBias = V4Mul(velMultiplier, penInvDtp8); + + const Vec4V penetrationInvDt = V4Scale(penetration, invDt); + + const BoolV isGreater2 = BAnd(BAnd(V4IsGrtr(restitution, zero), V4IsGrtr(bounceThreshold, vrel)), + V4IsGrtr(V4Neg(vrel), penetrationInvDt)); + + const BoolV ccdSeparationCondition = V4IsGrtrOrEq(ccdMaxSeparation, penetration); + + scaledBias = V4Sel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias); + + const Vec4V sumVRel(vrel); + + const Vec4V targetVelocity = V4Sub(V4Add(V4Sel(isGreater2, V4Mul(V4Neg(sumVRel), restitution), zero), cTargetVel), vrel); + + //These values are present for static and dynamic contacts + solverContact->raXnX = delAngVel0X; + solverContact->raXnY = delAngVel0Y; + solverContact->raXnZ = delAngVel0Z; + solverContact->velMultiplier = velMultiplier; + solverContact->appliedForce = zero; + solverContact->scaledBias = scaledBias; + solverContact->targetVelocity = targetVelocity; + solverContact->maxImpulse = maxImpulse; + } + + //PxU32 conId = contactId++; + + /*Vec4V targetVel0 = V4LoadA(&con0.targetVel.x); + Vec4V targetVel1 = V4LoadA(&con1.targetVel.x); + Vec4V targetVel2 = V4LoadA(&con2.targetVel.x); + Vec4V targetVel3 = V4LoadA(&con3.targetVel.x); + + Vec4V targetVelX, targetVelY, targetVelZ; + PX_TRANSPOSE_44_34(targetVel0, targetVel1, targetVel2, targetVel3, targetVelX, targetVelY, targetVelZ);*/ + + for(PxU32 a = 0; a < numFrictionPerPoint; ++a) + { + SolverFriction4Base* PX_RESTRICT friction = reinterpret_cast<SolverFriction4Base*>(ptr2); + + ptr2 += frictionSize; + + const Vec4V tX = tFallbackX[fricIndex]; + const Vec4V tY = tFallbackY[fricIndex]; + const Vec4V tZ = tFallbackZ[fricIndex]; + + fricIndex = 1 - fricIndex; + + const Vec4V raXnX = V4NegMulSub(raZ, tY, V4Mul(raY, tZ)); + const Vec4V raXnY = V4NegMulSub(raX, tZ, V4Mul(raZ, tX)); + const Vec4V raXnZ = V4NegMulSub(raY, tX, V4Mul(raX, tY)); + + const Vec4V v0a0 = V4Mul(invInertia0X0, raXnX); + const Vec4V v0a1 = V4Mul(invInertia0X1, raXnX); + const Vec4V v0a2 = V4Mul(invInertia0X2, raXnX); + + const Vec4V v0PlusV1a0 = V4MulAdd(invInertia0Y0, raXnY, v0a0); + const Vec4V v0PlusV1a1 = V4MulAdd(invInertia0Y1, raXnY, v0a1); + const Vec4V v0PlusV1a2 = V4MulAdd(invInertia0Y2, raXnY, v0a2); + + const Vec4V delAngVel0X = V4MulAdd(invInertia0Z0, raXnZ, v0PlusV1a0); + const Vec4V delAngVel0Y = V4MulAdd(invInertia0Z1, raXnZ, v0PlusV1a1); + const Vec4V delAngVel0Z = V4MulAdd(invInertia0Z2, raXnZ, v0PlusV1a2); + + const Vec4V dotDelAngVel0 = V4MulAdd(delAngVel0Z, delAngVel0Z, V4MulAdd(delAngVel0Y, delAngVel0Y, V4Mul(delAngVel0X, delAngVel0X))); + + const Vec4V norVel0 = V4MulAdd(tX, linVelT00, V4MulAdd(tY, linVelT10, V4Mul(tZ, linVelT20))); + const Vec4V dotRaXnAngVel0 = V4MulAdd(raXnZ, angVelT20, V4MulAdd(raXnY, angVelT10, V4Mul(raXnX, angVelT00))); + Vec4V vrel = V4Add(norVel0, dotRaXnAngVel0); + + Vec4V unitResponse = V4Add(invMass0_dom0fV, dotDelAngVel0); + + if(isDynamic) + { + SolverFriction4Dynamic* PX_RESTRICT dFric = static_cast<SolverFriction4Dynamic*>(friction); + + const Vec4V rbXnX = V4NegMulSub(rbZ, tY, V4Mul(rbY, tZ)); + const Vec4V rbXnY = V4NegMulSub(rbX, tZ, V4Mul(rbZ, tX)); + const Vec4V rbXnZ = V4NegMulSub(rbY, tX, V4Mul(rbX, tY)); + + const Vec4V v0b0 = V4Mul(invInertia1X0, rbXnX); + const Vec4V v0b1 = V4Mul(invInertia1X1, rbXnX); + const Vec4V v0b2 = V4Mul(invInertia1X2, rbXnX); + + const Vec4V v0PlusV1b0 = V4MulAdd(invInertia1Y0, rbXnY, v0b0); + const Vec4V v0PlusV1b1 = V4MulAdd(invInertia1Y1, rbXnY, v0b1); + const Vec4V v0PlusV1b2 = V4MulAdd(invInertia1Y2, rbXnY, v0b2); + + const Vec4V delAngVel1X = V4MulAdd(invInertia1Z0, rbXnZ, v0PlusV1b0); + const Vec4V delAngVel1Y = V4MulAdd(invInertia1Z1, rbXnZ, v0PlusV1b1); + const Vec4V delAngVel1Z = V4MulAdd(invInertia1Z2, rbXnZ, v0PlusV1b2); + + const Vec4V dotDelAngVel1 = V4MulAdd(delAngVel1Z, delAngVel1Z, V4MulAdd(delAngVel1Y, delAngVel1Y, V4Mul(delAngVel1X, delAngVel1X))); + + const Vec4V norVel1 = V4MulAdd(tX, linVelT01, V4MulAdd(tY, linVelT11, V4Mul(tZ, linVelT21))); + const Vec4V dotRbXnAngVel1 = V4MulAdd(rbXnZ, angVelT21, V4MulAdd(rbXnY, angVelT11, V4Mul(rbXnX, angVelT01))); + vrel = V4Sub(vrel, V4Add(norVel1, dotRbXnAngVel1)); + + const Vec4V resp1 = V4Add(dotDelAngVel1, invMassNorLenSq1); + + unitResponse = V4Add(unitResponse, resp1); + + dFric->rbXnX = delAngVel1X; + dFric->rbXnY = delAngVel1Y; + dFric->rbXnZ = delAngVel1Z; + } + + const Vec4V velMultiplier = V4Neg(V4Sel(V4IsGrtr(unitResponse, zero), V4Recip(unitResponse), zero)); + + friction->appliedForce = zero; + friction->raXnX = delAngVel0X; + friction->raXnY = delAngVel0Y; + friction->raXnZ = delAngVel0Z; + friction->velMultiplier = velMultiplier; + friction->targetVelocity = V4Sub(V4MulAdd(targetVelZ, tZ, V4MulAdd(targetVelY, tY, V4Mul(targetVelX, tX))), vrel); + friction->normalX = tX; + friction->normalY = tY; + friction->normalZ = tZ; + } + } + if(!(finished & 0x1)) + { + iter0.nextContact(patch0, contact0); + newFinished |= PxU32(!iter0.hasNextContact()); + } + + if(!(finished & 0x2)) + { + iter1.nextContact(patch1, contact1); + newFinished |= (PxU32(!iter1.hasNextContact()) << 1); + } + + if(!(finished & 0x4)) + { + iter2.nextContact(patch2, contact2); + newFinished |= (PxU32(!iter2.hasNextContact()) << 2); + } + + if(!(finished & 0x8)) + { + iter3.nextContact(patch3, contact3); + newFinished |= (PxU32(!iter3.hasNextContact()) << 3); + } + } + ptr = p; + } + return true; +} + + + +//The persistent friction patch correlation/allocation will already have happenned as this is per-pair. +//This function just computes the size of the combined solve data. +void computeBlockStreamByteSizesCoulomb4(PxSolverContactDesc* descs, + ThreadContext& threadContext, const CorrelationBuffer& c, + const PxU32 numFrictionPerPoint, + PxU32& _solverConstraintByteSize, PxU32* _axisConstraintCount, PxU32& _numContactPoints4) +{ + PX_ASSERT(0 == _solverConstraintByteSize); + PX_UNUSED(threadContext); + + PxU32 maxPatches = 0; + PxU32 maxContactCount[CorrelationBuffer::MAX_FRICTION_PATCHES]; + PxU32 maxFrictionCount[CorrelationBuffer::MAX_FRICTION_PATCHES]; + PxMemZero(maxContactCount, sizeof(maxContactCount)); + PxMemZero(maxFrictionCount, sizeof(maxFrictionCount)); + for(PxU32 a = 0; a < 4; ++a) + { + PxU32 axisConstraintCount = 0; + + for(PxU32 i = 0; i < descs[a].numFrictionPatches; i++) + { + PxU32 ind = i + descs[a].startFrictionPatchIndex; + + const FrictionPatch& frictionPatch = c.frictionPatches[ind]; + + const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0; + //Solver constraint data. + if(c.frictionPatchContactCounts[ind]!=0) + { + maxContactCount[i] = PxMax(c.frictionPatchContactCounts[ind], maxContactCount[i]); + axisConstraintCount += c.frictionPatchContactCounts[ind]; + + if(haveFriction) + { + //const PxU32 fricCount = c.frictionPatches[ind].numConstraints; + const PxU32 fricCount = c.frictionPatchContactCounts[ind] * numFrictionPerPoint; + maxFrictionCount[i] = PxMax(fricCount, maxFrictionCount[i]); + axisConstraintCount += fricCount; + } + } + } + maxPatches = PxMax(descs[a].numFrictionPatches, maxPatches); + _axisConstraintCount[a] = axisConstraintCount; + } + + PxU32 totalContacts = 0, totalFriction = 0; + for(PxU32 a = 0; a < maxPatches; ++a) + { + totalContacts += maxContactCount[a]; + totalFriction += maxFrictionCount[a]; + } + + _numContactPoints4 = totalContacts; + + + //OK, we have a given number of friction patches, contact points and friction constraints so we can calculate how much memory we need + + const bool isStatic = (((descs[0].bodyState1 | descs[1].bodyState1 | descs[2].bodyState1 | descs[3].bodyState1) & PxSolverContactDesc::eDYNAMIC_BODY) == 0); + + const PxU32 headerSize = (sizeof(SolverContactCoulombHeader4) + sizeof(SolverFrictionHeader4)) * maxPatches; + //Add on 1 Vec4V per contact for the applied force buffer + const PxU32 constraintSize = isStatic ? ((sizeof(SolverContact4Base) + sizeof(Vec4V)) * totalContacts) + ( sizeof(SolverFriction4Base) * totalFriction) : + ((sizeof(SolverContact4Dynamic) + sizeof(Vec4V)) * totalContacts) + (sizeof(SolverFriction4Dynamic) * totalFriction); + + _solverConstraintByteSize = ((constraintSize + headerSize + 0x0f) & ~0x0f); + PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f)); +} + + +static SolverConstraintPrepState::Enum reserveBlockStreamsCoulomb4(PxSolverContactDesc* descs, ThreadContext& threadContext, const CorrelationBuffer& c, + PxU8*& solverConstraint, const PxU32 numFrictionPerContactPoint, + PxU32& solverConstraintByteSize, + PxU32* axisConstraintCount, PxU32& numContactPoints4, PxConstraintAllocator& constraintAllocator) +{ + PX_ASSERT(NULL == solverConstraint); + PX_ASSERT(0 == solverConstraintByteSize); + + //From constraintBlockStream we need to reserve contact points, contact forces, and a char buffer for the solver constraint data (already have a variable for this). + //From frictionPatchStream we just need to reserve a single buffer. + + //Compute the sizes of all the buffers. + computeBlockStreamByteSizesCoulomb4( + descs, threadContext, c, numFrictionPerContactPoint, solverConstraintByteSize, + axisConstraintCount, numContactPoints4); + + //Reserve the buffers. + + //First reserve the accumulated buffer size for the constraint block. + PxU8* constraintBlock = NULL; + const PxU32 constraintBlockByteSize = solverConstraintByteSize; + if(constraintBlockByteSize > 0) + { + if((constraintBlockByteSize + 16u) > 16384) + return SolverConstraintPrepState::eUNBATCHABLE; + + constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u); + + if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock) + { + if(0==constraintBlock) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks."); + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. " + "Either accept dropped contacts or simplify collision geometry."); + constraintBlock=NULL; + } + } + } + + //Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail). + if(0==constraintBlockByteSize || constraintBlock) + { + if(solverConstraintByteSize) + { + solverConstraint = constraintBlock; + PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f)); + } + } + + //Return true if neither of the two block reservations failed. + return ((0==constraintBlockByteSize || constraintBlock)) ? SolverConstraintPrepState::eSUCCESS : SolverConstraintPrepState::eOUT_OF_MEMORY; +} + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb1D( + PxsContactManagerOutput** outputs, + ThreadContext& threadContext, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator) +{ + return createFinalizeSolverContacts4Coulomb(outputs, threadContext, blockDescs, invDtF32, bounceThresholdF32, + frictionOffsetThreshold, correlationDistance, constraintAllocator, PxFrictionType::eONE_DIRECTIONAL); +} + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb2D( + PxsContactManagerOutput** outputs, + ThreadContext& threadContext, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator) +{ + return createFinalizeSolverContacts4Coulomb(outputs, threadContext, blockDescs, invDtF32, bounceThresholdF32, + frictionOffsetThreshold, correlationDistance, constraintAllocator, PxFrictionType::eTWO_DIRECTIONAL); +} + + +SolverConstraintPrepState::Enum createFinalizeSolverContacts4Coulomb( + PxsContactManagerOutput** outputs, + ThreadContext& threadContext, + PxSolverContactDesc* blockDescs, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator, + PxFrictionType::Enum frictionType) +{ + PX_UNUSED(frictionOffsetThreshold); + PX_UNUSED(correlationDistance); + + for(PxU32 i = 0; i < 4; ++i) + { + blockDescs[i].desc->constraintLengthOver16 = 0; + } + + PX_ASSERT(outputs[0]->nbContacts && outputs[1]->nbContacts && outputs[2]->nbContacts && outputs[3]->nbContacts); + + Gu::ContactBuffer& buffer = threadContext.mContactBuffer; + + buffer.count = 0; + + PxU32 numContacts = 0; + + CorrelationBuffer& c = threadContext.mCorrelationBuffer; + + c.frictionPatchCount = 0; + c.contactPatchCount = 0; + + PxU32 numFrictionPerPoint = PxU32(frictionType == PxFrictionType::eONE_DIRECTIONAL ? 1 : 2); + + PX_ALIGN(16, PxReal invMassScale0[4]); + PX_ALIGN(16, PxReal invMassScale1[4]); + PX_ALIGN(16, PxReal invInertiaScale0[4]); + PX_ALIGN(16, PxReal invInertiaScale1[4]); + + for(PxU32 a = 0; a < 4; ++a) + { + PxSolverContactDesc& blockDesc = blockDescs[a]; + PxSolverConstraintDesc& desc = *blockDesc.desc; + + //blockDesc.startContactIndex = numContacts; + blockDesc.contacts = &buffer.contacts[numContacts]; + + Ps::prefetchLine(desc.bodyA); + Ps::prefetchLine(desc.bodyB); + + if((numContacts + outputs[a]->nbContacts) > 64) + { + return SolverConstraintPrepState::eUNBATCHABLE; + } + bool hasMaxImpulse, hasTargetVelocity; + + const PxReal defaultMaxImpulse = PxMin(blockDesc.data0->maxContactImpulse, blockDesc.data1->maxContactImpulse); + + PxU32 contactCount = extractContacts(buffer, *outputs[a], hasMaxImpulse, hasTargetVelocity, invMassScale0[a], invMassScale1[a], + invInertiaScale0[a], invInertiaScale1[a], defaultMaxImpulse); + + if(contactCount == 0) + return SolverConstraintPrepState::eUNBATCHABLE; + + numContacts+=contactCount; + + blockDesc.numContacts = contactCount; + blockDesc.hasMaxImpulse = hasMaxImpulse; + + blockDesc.startFrictionPatchIndex = c.frictionPatchCount; + blockDesc.startContactPatchIndex = c.contactPatchCount; + + createContactPatches(c, blockDesc.contacts, contactCount, PXC_SAME_NORMAL); + + bool overflow = correlatePatches(c, blockDesc.contacts, blockDesc.bodyFrame0, blockDesc.bodyFrame1, PXC_SAME_NORMAL, blockDesc.startContactPatchIndex, + blockDesc.startFrictionPatchIndex); + if(overflow) + return SolverConstraintPrepState::eUNBATCHABLE; + + blockDesc.numContactPatches = PxU16(c.contactPatchCount - blockDesc.startContactPatchIndex); + blockDesc.numFrictionPatches = c.frictionPatchCount - blockDesc.startFrictionPatchIndex; + + invMassScale0[a] *= blockDesc.mInvMassScales.linear0; + invMassScale1[a] *= blockDesc.mInvMassScales.linear1; + invInertiaScale0[a] *= blockDesc.mInvMassScales.angular0; + invInertiaScale1[a] *= blockDesc.mInvMassScales.angular1; + + } + + //OK, now we need to work out how much memory to allocate, allocate it and then block-create the constraints... + + PxU8* solverConstraint = NULL; + PxU32 solverConstraintByteSize = 0; + PxU32 axisConstraintCount[4]; + PxU32 numContactPoints4 = 0; + + SolverConstraintPrepState::Enum state = reserveBlockStreamsCoulomb4(blockDescs, threadContext, c, + solverConstraint, numFrictionPerPoint, + solverConstraintByteSize, + axisConstraintCount, numContactPoints4, constraintAllocator); + + if(state != SolverConstraintPrepState::eSUCCESS) + return state; + + //OK, we allocated the memory, now let's create the constraints + + for(PxU32 a = 0; a < 4; ++a) + { + PxSolverConstraintDesc& desc = *blockDescs[a].desc; + //n[a]->solverConstraintPointer = solverConstraint; + desc.constraint = solverConstraint; + + //KS - TODO - add back in counters for axisConstraintCount somewhere... + blockDescs[a].axisConstraintCount += Ps::to16(axisConstraintCount[a]); + + desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize/16); + + PxU32 writeBackLength = outputs[a]->nbContacts * sizeof(PxReal); + void* writeBack = outputs[a]->contactForces; + desc.writeBack = writeBack; + setWritebackLength(desc, writeBackLength); + } + + const Vec4V iMassScale0 = V4LoadA(invMassScale0); + const Vec4V iInertiaScale0 = V4LoadA(invInertiaScale0); + const Vec4V iMassScale1 = V4LoadA(invMassScale1); + const Vec4V iInertiaScale1 = V4LoadA(invInertiaScale1); + + + bool hasFriction = setupFinalizeSolverConstraintsCoulomb4(blockDescs, solverConstraint, + invDtF32, bounceThresholdF32, c, numFrictionPerPoint, numContactPoints4, solverConstraintByteSize, + iMassScale0, iInertiaScale0, iMassScale1, iInertiaScale1); + + *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0; + *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize + 4)) = hasFriction ? 0xFFFFFFFF : 0; + + + return SolverConstraintPrepState::eSUCCESS; +} + +} + +} + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepPF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepPF.cpp new file mode 100644 index 00000000..4651605b --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepPF.cpp @@ -0,0 +1,650 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" +#include "PsMathUtils.h" +#include "DySolverContact.h" +#include "DySolverContactPF.h" +#include "DySolverConstraintTypes.h" +#include "PxcNpWorkUnit.h" +#include "DyThreadContext.h" +#include "DyContactPrep.h" +#include "PxcNpContactPrepShared.h" +//#include "PxvGeometry.h" +#include "PxvDynamics.h" +#include "DyCorrelationBuffer.h" +#include "DySolverConstraintDesc.h" +#include "DySolverBody.h" +#include "DySolverContact4.h" +#include "DySolverContactPF4.h" + + +#include "PsVecMath.h" +#include "PxContactModifyCallback.h" +#include "PxsMaterialManager.h" +#include "PxsMaterialCombiner.h" +#include "DySolverExt.h" +#include "DyArticulationContactPrep.h" +#include "DyContactPrepShared.h" + +#include "PsFoundation.h" + +using namespace physx::Gu; +using namespace physx::shdfnd::aos; + +namespace physx +{ +namespace Dy +{ + +bool createFinalizeSolverContactsCoulomb(PxSolverContactDesc& contactDesc, + PxsContactManagerOutput& output, + ThreadContext& threadContext, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator, + PxFrictionType::Enum frictionType); + +static bool setupFinalizeSolverConstraintsCoulomb( + Sc::ShapeInteraction* shapeInteraction, + const ContactBuffer& buffer, + const CorrelationBuffer& c, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxU8* workspace, + const PxSolverBodyData& data0, + const PxSolverBodyData& data1, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxU32 frictionPerPointCount, + const bool hasForceThresholds, + const bool staticBody, + PxReal invMassScale0, PxReal invInertiaScale0, + PxReal invMassScale1, PxReal invInertiaScale1, + PxReal restDist, + const PxReal maxCCDSeparation) +{ + const FloatV ccdMaxSeparation = FLoad(maxCCDSeparation); + PxU8* PX_RESTRICT ptr = workspace; + const FloatV zero=FZero(); + + PxU8 flags = PxU8(hasForceThresholds ? SolverContactHeader::eHAS_FORCE_THRESHOLDS : 0); + + const FloatV restDistance = FLoad(restDist); + + const Vec3V bodyFrame0p = V3LoadU(bodyFrame0.p); + const Vec3V bodyFrame1p = V3LoadU(bodyFrame1.p); + + Ps::prefetchLine(c.contactID); + Ps::prefetchLine(c.contactID, 128); + + const PxU32 frictionPatchCount = c.frictionPatchCount; + + const PxU32 pointStride = sizeof(SolverContactPoint); + const PxU32 frictionStride = sizeof(SolverContactFriction); + const PxU8 pointHeaderType = Ps::to8(staticBody ? DY_SC_TYPE_STATIC_CONTACT : DY_SC_TYPE_RB_CONTACT); + const PxU8 frictionHeaderType = Ps::to8(staticBody ? DY_SC_TYPE_STATIC_FRICTION : DY_SC_TYPE_FRICTION); + + + const Vec3V linVel0 = V3LoadU(data0.linearVelocity); + const Vec3V linVel1 = V3LoadU(data1.linearVelocity); + const Vec3V angVel0 = V3LoadU(data0.angularVelocity); + const Vec3V angVel1 = V3LoadU(data1.angularVelocity); + + + const FloatV invMass0 = FLoad(data0.invMass); + const FloatV invMass1 = FLoad(data1.invMass); + + const FloatV maxPenBias = FMax(FLoad(data0.penBiasClamp), FLoad(data1.penBiasClamp)); + + // PT: the matrix is symmetric so we can read it as a PxMat33! Gets rid of 25000+ LHS. + const PxMat33& invIn0 = reinterpret_cast<const PxMat33&>(data0.sqrtInvInertia); + PX_ALIGN(16, const Mat33V invSqrtInertia0) + ( + V3LoadU(invIn0.column0), + V3LoadU(invIn0.column1), + V3LoadU(invIn0.column2) + ); + const PxMat33& invIn1 = reinterpret_cast<const PxMat33&>(data1.sqrtInvInertia); + PX_ALIGN(16, const Mat33V invSqrtInertia1) + ( + V3LoadU(invIn1.column0), + V3LoadU(invIn1.column1), + V3LoadU(invIn1.column2) + ); + + const FloatV invDt = FLoad(invDtF32); + const FloatV p8 = FLoad(0.8f); + const FloatV bounceThreshold = FLoad(bounceThresholdF32); + const FloatV orthoThreshold = FLoad(0.70710678f); + const FloatV eps = FLoad(0.00001f); + + const FloatV invDtp8 = FMul(invDt, p8); + + const FloatV d0 = FLoad(invMassScale0); + const FloatV d1 = FLoad(invMassScale1); + const FloatV nDom1fV = FNeg(d1); + const FloatV angD0 = FLoad(invInertiaScale0); + const FloatV angD1 = FLoad(invInertiaScale1); + + const FloatV invMass0_dom0fV = FMul(d0, invMass0); + const FloatV invMass1_dom1fV = FMul(nDom1fV, invMass1); + + + for(PxU32 i=0;i< frictionPatchCount;i++) + { + const PxU32 contactCount = c.frictionPatchContactCounts[i]; + if(contactCount == 0) + continue; + + const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start; + + const Vec3V normal = Ps::aos::V3LoadA(contactBase0->normal); + + const FloatV normalLenSq = V3LengthSq(normal); + const VecCrossV norCross = V3PrepareCross(normal); + + const FloatV restitution = FLoad(contactBase0->restitution); + + const FloatV norVel = V3SumElems(V3NegMulSub(normal, linVel1, V3Mul(normal, linVel0))); + /*const FloatV norVel0 = V3Dot(normal, linVel0); + const FloatV norVel1 = V3Dot(normal, linVel1); + const FloatV norVel = FSub(norVel0, norVel1);*/ + + const FloatV invMassNorLenSq0 = FMul(invMass0_dom0fV, normalLenSq); + const FloatV invMassNorLenSq1 = FMul(invMass1_dom1fV, normalLenSq); + + + SolverContactCoulombHeader* PX_RESTRICT header = reinterpret_cast<SolverContactCoulombHeader*>(ptr); + ptr += sizeof(SolverContactCoulombHeader); + + Ps::prefetchLine(ptr, 128); + Ps::prefetchLine(ptr, 256); + Ps::prefetchLine(ptr, 384); + + + header->numNormalConstr = PxU8(contactCount); + header->type = pointHeaderType; + //header->setRestitution(n.restitution); + //header->setRestitution(contactBase0->restitution); + + header->setDominance0(invMass0_dom0fV); + header->setDominance1(FNeg(invMass1_dom1fV)); + FStore(angD0, &header->angDom0); + FStore(angD1, &header->angDom1); + header->setNormal(normal); + header->flags = flags; + header->shapeInteraction = shapeInteraction; + + + for(PxU32 patch=c.correlationListHeads[i]; + patch!=CorrelationBuffer::LIST_END; + patch = c.contactPatches[patch].next) + { + const PxU32 count = c.contactPatches[patch].count; + const Gu::ContactPoint* contactBase = buffer.contacts + c.contactPatches[patch].start; + + + PxU8* p = ptr; + for(PxU32 j=0;j<count;j++) + { + const Gu::ContactPoint& contact = contactBase[j]; + + SolverContactPoint* PX_RESTRICT solverContact = reinterpret_cast<SolverContactPoint*>(p); + p += pointStride; + + constructContactConstraint(invSqrtInertia0, invSqrtInertia1, invMassNorLenSq0, + invMassNorLenSq1, angD0, angD1, bodyFrame0p, bodyFrame1p, + normal, norVel, norCross, angVel0, angVel1, + invDt, invDtp8, restDistance, maxPenBias, restitution, + bounceThreshold, contact, *solverContact, ccdMaxSeparation); + } + ptr = p; + } + } + + //construct all the frictions + + PxU8* PX_RESTRICT ptr2 = workspace; + + bool hasFriction = false; + for(PxU32 i=0;i< frictionPatchCount;i++) + { + const PxU32 contactCount = c.frictionPatchContactCounts[i]; + if(contactCount == 0) + continue; + + const Gu::ContactPoint* contactBase0 = buffer.contacts + c.contactPatches[c.correlationListHeads[i]].start; + + SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(ptr2); + header->frictionOffset = PxU16(ptr - ptr2);// + sizeof(SolverFrictionHeader); + ptr2 += sizeof(SolverContactCoulombHeader) + header->numNormalConstr * pointStride; + + const PxReal staticFriction = contactBase0->staticFriction; + const bool disableStrongFriction = !!(contactBase0->materialFlags & PxMaterialFlag::eDISABLE_FRICTION); + const bool haveFriction = (disableStrongFriction == 0); + + SolverFrictionHeader* frictionHeader = reinterpret_cast<SolverFrictionHeader*>(ptr); + frictionHeader->numNormalConstr = Ps::to8(c.frictionPatchContactCounts[i]); + frictionHeader->numFrictionConstr = Ps::to8(haveFriction ? c.frictionPatchContactCounts[i] * frictionPerPointCount : 0); + ptr += sizeof(SolverFrictionHeader); + PxF32* appliedForceBuffer = reinterpret_cast<PxF32*>(ptr); + ptr += frictionHeader->getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]); + PxMemZero(appliedForceBuffer, sizeof(PxF32)*contactCount*frictionPerPointCount); + Ps::prefetchLine(ptr, 128); + Ps::prefetchLine(ptr, 256); + Ps::prefetchLine(ptr, 384); + + const Vec3V normal = V3LoadU(buffer.contacts[c.contactPatches[c.correlationListHeads[i]].start].normal); + + const FloatV normalX = V3GetX(normal); + const FloatV normalY = V3GetY(normal); + const FloatV normalZ = V3GetZ(normal); + + const Vec3V t0Fallback1 = V3Merge(zero, FNeg(normalZ), normalY); + const Vec3V t0Fallback2 = V3Merge(FNeg(normalY), normalX, zero) ; + + const BoolV con = FIsGrtr(orthoThreshold, FAbs(normalX)); + const Vec3V tFallback1 = V3Sel(con, t0Fallback1, t0Fallback2); + + const Vec3V linVrel = V3Sub(linVel0, linVel1); + const Vec3V t0_ = V3Sub(linVrel, V3Scale(normal, V3Dot(normal, linVrel))); + const FloatV sqDist = V3Dot(t0_,t0_); + const BoolV con1 = FIsGrtr(sqDist, eps); + const Vec3V tDir0 =V3Normalize(V3Sel(con1, t0_, tFallback1)); + const Vec3V tDir1 = V3Cross(tDir0, normal); + + Vec3V tFallback = tDir0; + Vec3V tFallbackAlt = tDir1; + + if(haveFriction) + { + //frictionHeader->setStaticFriction(n.staticFriction); + frictionHeader->setStaticFriction(staticFriction); + FStore(invMass0_dom0fV, &frictionHeader->invMass0D0); + FStore(FNeg(invMass1_dom1fV), &frictionHeader->invMass1D1); + FStore(angD0, &frictionHeader->angDom0); + FStore(angD1, &frictionHeader->angDom1); + frictionHeader->type = frictionHeaderType; + + PxU32 totalPatchContactCount = 0; + + for(PxU32 patch=c.correlationListHeads[i]; + patch!=CorrelationBuffer::LIST_END; + patch = c.contactPatches[patch].next) + { + const PxU32 count = c.contactPatches[patch].count; + const PxU32 start = c.contactPatches[patch].start; + const Gu::ContactPoint* contactBase = buffer.contacts + start; + + PxU8* p = ptr; + for(PxU32 j =0; j < count; j++) + { + hasFriction = true; + const Gu::ContactPoint& contact = contactBase[j]; + const Vec3V point = V3LoadU(contact.point); + const Vec3V ra = V3Sub(point, bodyFrame0p); + const Vec3V rb = V3Sub(point, bodyFrame1p); + const Vec3V targetVel = V3LoadU(contact.targetVel); + + for(PxU32 k = 0; k < frictionPerPointCount; ++k) + { + const Vec3V t0 = tFallback; + tFallback = tFallbackAlt; + tFallbackAlt = t0; + + SolverContactFriction* PX_RESTRICT f0 = reinterpret_cast<SolverContactFriction*>(p); + p += frictionStride; + //f0->brokenOrContactIndex = contactId; + + const Vec3V raXn = V3Cross(ra, t0); + const Vec3V rbXn = V3Cross(rb, t0); + + const Vec3V delAngVel0 = M33MulV3(invSqrtInertia0, raXn); + const Vec3V delAngVel1 = M33MulV3(invSqrtInertia1, rbXn); + + const FloatV resp0 = FAdd(invMass0_dom0fV, FMul(angD0, V3Dot(delAngVel0, delAngVel0))); + const FloatV resp1 = FSub(FMul(angD1, V3Dot(delAngVel1, delAngVel1)), invMass1_dom1fV); + const FloatV resp = FAdd(resp0, resp1); + + const FloatV velMultiplier = FNeg(FSel(FIsGrtr(resp, zero), FRecip(resp), zero)); + + const FloatV vrel1 = FAdd(V3Dot(t0, linVel0), V3Dot(raXn, angVel0)); + const FloatV vrel2 = FAdd(V3Dot(t0, linVel1), V3Dot(rbXn, angVel1)); + const FloatV vrel = FSub(vrel1, vrel2); + + + f0->normalXYZ_appliedForceW = V4SetW(Vec4V_From_Vec3V(t0), zero); + f0->raXnXYZ_velMultiplierW = V4SetW(Vec4V_From_Vec3V(delAngVel0), velMultiplier); + //f0->rbXnXYZ_targetVelocityW = V4SetW(Vec4V_From_Vec3V(delAngVel1), FSub(V3Dot(targetVel, t0), vrel)); + f0->rbXnXYZ_biasW = Vec4V_From_Vec3V(delAngVel1); + FStore(FSub(V3Dot(targetVel, t0), vrel), &f0->targetVel); + } + } + + totalPatchContactCount += c.contactPatches[patch].count; + + ptr = p; + } + } + } + *ptr = 0; + return hasFriction; +} + + + +static void computeBlockStreamByteSizesCoulomb(const CorrelationBuffer& c, + const PxU32 frictionCountPerPoint, PxU32& _solverConstraintByteSize, + PxU32& _axisConstraintCount, + bool useExtContacts) +{ + PX_ASSERT(0 == _solverConstraintByteSize); + PX_ASSERT(0 == _axisConstraintCount); + + // PT: use local vars to remove LHS + PxU32 solverConstraintByteSize = 0; + PxU32 numFrictionPatches = 0; + PxU32 axisConstraintCount = 0; + + for(PxU32 i = 0; i < c.frictionPatchCount; i++) + { + //Friction patches. + if(c.correlationListHeads[i] != CorrelationBuffer::LIST_END) + numFrictionPatches++; + + + const FrictionPatch& frictionPatch = c.frictionPatches[i]; + const bool haveFriction = (frictionPatch.materialFlags & PxMaterialFlag::eDISABLE_FRICTION) == 0; + + //Solver constraint data. + if(c.frictionPatchContactCounts[i]!=0) + { + solverConstraintByteSize += sizeof(SolverContactCoulombHeader); + + solverConstraintByteSize += useExtContacts ? c.frictionPatchContactCounts[i] * sizeof(SolverContactPointExt) + : c.frictionPatchContactCounts[i] * sizeof(SolverContactPoint); + + axisConstraintCount += c.frictionPatchContactCounts[i]; + + //We always need the friction headers to write the accumulated + if(haveFriction) + { + //4 bytes + solverConstraintByteSize += sizeof(SolverFrictionHeader); + //buffer to store applied forces in + solverConstraintByteSize += SolverFrictionHeader::getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]); + + const PxU32 nbFrictionConstraints = c.frictionPatchContactCounts[i] * frictionCountPerPoint; + + solverConstraintByteSize += useExtContacts ? nbFrictionConstraints * sizeof(SolverContactFrictionExt) + : nbFrictionConstraints * sizeof(SolverContactFriction); + axisConstraintCount += c.frictionPatchContactCounts[i]; + } + else + { + //reserve buffers for storing accumulated impulses + solverConstraintByteSize += sizeof(SolverFrictionHeader); + solverConstraintByteSize += SolverFrictionHeader::getAppliedForcePaddingSize(c.frictionPatchContactCounts[i]); + } + } + } + _axisConstraintCount = axisConstraintCount; + + //16-byte alignment. + _solverConstraintByteSize = ((solverConstraintByteSize + 0x0f) & ~0x0f); + PX_ASSERT(0 == (_solverConstraintByteSize & 0x0f)); +} + +static bool reserveBlockStreamsCoulomb(const CorrelationBuffer& c, + PxU8*& solverConstraint, PxU32 frictionCountPerPoint, + PxU32& solverConstraintByteSize, + PxU32& axisConstraintCount, PxConstraintAllocator& constraintAllocator, + bool useExtContacts) +{ + PX_ASSERT(NULL == solverConstraint); + PX_ASSERT(0 == solverConstraintByteSize); + PX_ASSERT(0 == axisConstraintCount); + + + //From constraintBlockStream we need to reserve contact points, contact forces, and a char buffer for the solver constraint data (already have a variable for this). + //From frictionPatchStream we just need to reserve a single buffer. + + //Compute the sizes of all the buffers. + computeBlockStreamByteSizesCoulomb( + c, + frictionCountPerPoint, solverConstraintByteSize, + axisConstraintCount, useExtContacts); + + //Reserve the buffers. + + //First reserve the accumulated buffer size for the constraint block. + PxU8* constraintBlock = NULL; + const PxU32 constraintBlockByteSize = solverConstraintByteSize; + if(constraintBlockByteSize > 0) + { + constraintBlock = constraintAllocator.reserveConstraintData(constraintBlockByteSize + 16u); + + if(0==constraintBlock || (reinterpret_cast<PxU8*>(-1))==constraintBlock) + { + if(0==constraintBlock) + { + PX_WARN_ONCE( + "Reached limit set by PxSceneDesc::maxNbContactDataBlocks - ran out of buffer space for constraint prep. " + "Either accept dropped contacts or increase buffer size allocated for narrow phase by increasing PxSceneDesc::maxNbContactDataBlocks."); + } + else + { + PX_WARN_ONCE( + "Attempting to allocate more than 16K of contact data for a single contact pair in constraint prep. " + "Either accept dropped contacts or simplify collision geometry."); + constraintBlock=NULL; + } + } + } + + //Patch up the individual ptrs to the buffer returned by the constraint block reservation (assuming the reservation didn't fail). + if(0==constraintBlockByteSize || constraintBlock) + { + if(solverConstraintByteSize) + { + solverConstraint = constraintBlock; + PX_ASSERT(0==(uintptr_t(solverConstraint) & 0x0f)); + } + } + + //Return true if neither of the two block reservations failed. + return ((0==constraintBlockByteSize || constraintBlock)); +} + +bool createFinalizeSolverContactsCoulomb1D(PxSolverContactDesc& contactDesc, + PxsContactManagerOutput& output, + ThreadContext& threadContext, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator) +{ + return createFinalizeSolverContactsCoulomb(contactDesc, output, threadContext, invDtF32, bounceThresholdF32, frictionOffsetThreshold, correlationDistance, constraintAllocator, PxFrictionType::eONE_DIRECTIONAL); +} + +bool createFinalizeSolverContactsCoulomb2D(PxSolverContactDesc& contactDesc, + PxsContactManagerOutput& output, + ThreadContext& threadContext, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator) + +{ + return createFinalizeSolverContactsCoulomb(contactDesc, output, threadContext, invDtF32, bounceThresholdF32, frictionOffsetThreshold, correlationDistance, constraintAllocator, PxFrictionType::eTWO_DIRECTIONAL); +} + +bool createFinalizeSolverContactsCoulomb(PxSolverContactDesc& contactDesc, + PxsContactManagerOutput& output, + ThreadContext& threadContext, + const PxReal invDtF32, + PxReal bounceThresholdF32, + PxReal frictionOffsetThreshold, + PxReal correlationDistance, + PxConstraintAllocator& constraintAllocator, + PxFrictionType::Enum frictionType) +{ + PX_UNUSED(frictionOffsetThreshold); + PX_UNUSED(correlationDistance); + + PxSolverConstraintDesc& desc = *contactDesc.desc; + + desc.constraintLengthOver16 = 0; + + ContactBuffer& buffer = threadContext.mContactBuffer; + + buffer.count = 0; + + // We pull the friction patches out of the cache to remove the dependency on how + // the cache is organized. Remember original addrs so we can write them back + // efficiently. + + Ps::prefetchLine(contactDesc.frictionPtr); + + PxReal invMassScale0 = 1.f; + PxReal invMassScale1 = 1.f; + PxReal invInertiaScale0 = 1.f; + PxReal invInertiaScale1 = 1.f; + + bool hasMaxImpulse = false, hasTargetVelocity = false; + + PxU32 numContacts = extractContacts(buffer, output, hasMaxImpulse, hasTargetVelocity, invMassScale0, invMassScale1, + invInertiaScale0, invInertiaScale1, PxMin(contactDesc.data0->maxContactImpulse, contactDesc.data1->maxContactImpulse)); + + if(numContacts == 0) + { + contactDesc.frictionPtr = NULL; + contactDesc.frictionCount = 0; + return true; + } + + Ps::prefetchLine(contactDesc.body0); + Ps::prefetchLine(contactDesc.body1); + Ps::prefetchLine(contactDesc.data0); + Ps::prefetchLine(contactDesc.data1); + + CorrelationBuffer& c = threadContext.mCorrelationBuffer; + c.frictionPatchCount = 0; + c.contactPatchCount = 0; + + createContactPatches(c, buffer.contacts, buffer.count, PXC_SAME_NORMAL); + + PxU32 numFrictionPerPatch = PxU32(frictionType == PxFrictionType::eONE_DIRECTIONAL ? 1 : 2); + + bool overflow = correlatePatches(c, buffer.contacts, contactDesc.bodyFrame0, contactDesc.bodyFrame1, PXC_SAME_NORMAL, 0, 0); + PX_UNUSED(overflow); +#if PX_CHECKED + if(overflow) + { + Ps::getFoundation().error(physx::PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, + "Dropping contacts in solver because we exceeded limit of 32 friction patches."); + } +#endif + + + //PX_ASSERT(patchCount == c.frictionPatchCount); + + PxU8* solverConstraint = NULL; + PxU32 solverConstraintByteSize = 0; + PxU32 axisConstraintCount = 0; + + bool useExtContacts = !!((contactDesc.bodyState0 | contactDesc.bodyState1) & PxSolverContactDesc::eARTICULATION); + + const bool successfulReserve = reserveBlockStreamsCoulomb( + c, + solverConstraint, numFrictionPerPatch, + solverConstraintByteSize, + axisConstraintCount, + constraintAllocator, + useExtContacts); + + // initialise the work unit's ptrs to the various buffers. + + contactDesc.frictionPtr = NULL; + desc.constraint = NULL; + desc.constraintLengthOver16 = 0; + contactDesc.frictionCount = 0; + + // patch up the work unit with the reserved buffers and set the reserved buffer data as appropriate. + + if(successfulReserve) + { + desc.constraint = solverConstraint; + output.nbContacts = Ps::to8(numContacts); + desc.constraintLengthOver16 = Ps::to16(solverConstraintByteSize/16); + + //Initialise solverConstraint buffer. + if(solverConstraint) + { + bool hasFriction = false; + if(useExtContacts) + { + const PxSolverBodyData& data0 = *contactDesc.data0; + const PxSolverBodyData& data1 = *contactDesc.data1; + + const SolverExtBody b0(reinterpret_cast<const void*>(contactDesc.body0), reinterpret_cast<const void*>(&data0), desc.linkIndexA); + const SolverExtBody b1(reinterpret_cast<const void*>(contactDesc.body1), reinterpret_cast<const void*>(&data1), desc.linkIndexB); + + hasFriction = setupFinalizeExtSolverContactsCoulomb(buffer, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint, + invDtF32, bounceThresholdF32, b0, b1, numFrictionPerPatch, + invMassScale0, invInertiaScale0, invMassScale1, invInertiaScale1, contactDesc.restDistance, contactDesc.maxCCDSeparation); + } + else + { + const PxSolverBodyData& data0 = *contactDesc.data0; + const PxSolverBodyData& data1 = *contactDesc.data1; + + hasFriction = setupFinalizeSolverConstraintsCoulomb(contactDesc.shapeInteraction, buffer, c, contactDesc.bodyFrame0, contactDesc.bodyFrame1, solverConstraint, + data0, data1, invDtF32, bounceThresholdF32, numFrictionPerPatch, contactDesc.hasForceThresholds, contactDesc.bodyState1 == PxSolverContactDesc::eSTATIC_BODY, + invMassScale0, invInertiaScale0, invMassScale1, invInertiaScale1, contactDesc.restDistance, contactDesc.maxCCDSeparation); + } + *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize)) = 0; + *(reinterpret_cast<PxU32*>(solverConstraint + solverConstraintByteSize + 4)) = hasFriction ? 0xFFFFFFFF : 0; + } + } + + return successfulReserve; +} + +} +} + + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepShared.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepShared.h new file mode 100644 index 00000000..7accabd3 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactPrepShared.h @@ -0,0 +1,301 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef DY_CONTACT_PREP_SHARED_H +#define DY_CONTACT_PREP_SHARED_H + +#include "foundation/PxPreprocessor.h" +#include "PxSceneDesc.h" +#include "PsVecMath.h" +#include "PsMathUtils.h" +#include "DyContactPrep.h" +#include "DyCorrelationBuffer.h" +#include "DyArticulationContactPrep.h" +#include "PxsContactManager.h" +#include "PxsContactManagerState.h" + +namespace physx +{ +namespace Dy +{ + + +PX_FORCE_INLINE bool pointsAreClose(const PxTransform& body1ToBody0, + const PxVec3& localAnchor0, const PxVec3& localAnchor1, + const PxVec3& axis, float correlDist) +{ + const PxVec3 body0PatchPoint1 = body1ToBody0.transform(localAnchor1); + + return PxAbs((localAnchor0 - body0PatchPoint1).dot(axis))<correlDist; +} + +PX_FORCE_INLINE bool isSeparated(const FrictionPatch& patch, const PxTransform& body1ToBody0, const PxReal correlationDistance) +{ + PX_ASSERT(patch.anchorCount <= 2); + for(PxU32 a = 0; a < patch.anchorCount; ++a) + { + if(!pointsAreClose(body1ToBody0, patch.body0Anchors[a], patch.body1Anchors[a], patch.body0Normal, correlationDistance)) + return true; + } + return false; +} + + +inline bool getFrictionPatches(CorrelationBuffer& c, + const PxU8* frictionCookie, + PxU32 frictionPatchCount, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxReal correlationDistance) +{ + PX_UNUSED(correlationDistance); + if(frictionCookie == NULL || frictionPatchCount == 0) + return true; + + //KS - this is now DMA'd inside the shader so we don't need to immediate DMA it here + const FrictionPatch* patches = reinterpret_cast<const FrictionPatch*>(frictionCookie); + + //Try working out relative transforms! TODO - can we compute this lazily for the first friction patch + bool evaluated = false; + PxTransform body1ToBody0; + + while(frictionPatchCount--) + { + Ps::prefetchLine(patches,128); + const FrictionPatch& patch = *patches++; + PX_ASSERT (patch.broken == 0 || patch.broken == 1); + if(!patch.broken) + { + // if the eDISABLE_STRONG_FRICTION flag is there we need to blow away the previous frame's friction correlation, so + // that we can associate each friction anchor with a target velocity. So we lose strong friction. + if(patch.anchorCount != 0 && !(patch.materialFlags & PxMaterialFlag::eDISABLE_STRONG_FRICTION)) + { + PX_ASSERT(patch.anchorCount <= 2); + + + if(!evaluated) + { + body1ToBody0 = bodyFrame0.transformInv(bodyFrame1); + evaluated = true; + } + + + if(patch.body0Normal.dot(body1ToBody0.rotate(patch.body1Normal)) > PXC_SAME_NORMAL) + { + if(!isSeparated(patch, body1ToBody0, correlationDistance)) + { + if(c.frictionPatchCount == CorrelationBuffer::MAX_FRICTION_PATCHES) + return false; + { + c.contactID[c.frictionPatchCount][0] = 0xffff; + c.contactID[c.frictionPatchCount][1] = 0xffff; + //Rotate the contact normal into world space + c.frictionPatchWorldNormal[c.frictionPatchCount] = bodyFrame0.rotate(patch.body0Normal); + c.frictionPatchContactCounts[c.frictionPatchCount] = 0; + c.correlationListHeads[c.frictionPatchCount] = CorrelationBuffer::LIST_END; + PxMemCopy(&c.frictionPatches[c.frictionPatchCount++], &patch, sizeof(FrictionPatch)); + } + } + } + } + } + } + return true; +} + +PX_FORCE_INLINE PxU32 extractContacts(Gu::ContactBuffer& buffer, PxsContactManagerOutput& npOutput, bool& hasMaxImpulse, bool& hasTargetVelocity, + PxReal& invMassScale0, PxReal& invMassScale1, PxReal& invInertiaScale0, PxReal& invInertiaScale1, PxReal defaultMaxImpulse) +{ + PxContactStreamIterator iter(npOutput.contactPatches, npOutput.contactPoints, npOutput.getInternalFaceIndice(), npOutput.nbPatches, npOutput.nbContacts); + + PxU32 numContacts = buffer.count, origContactCount = buffer.count; + if(!iter.forceNoResponse) + { + invMassScale0 = iter.getInvMassScale0(); + invMassScale1 = iter.getInvMassScale1(); + invInertiaScale0 = iter.getInvInertiaScale0(); + invInertiaScale1 = iter.getInvInertiaScale1(); + hasMaxImpulse = (iter.patch->internalFlags & PxContactPatch::eHAS_MAX_IMPULSE) != 0; + hasTargetVelocity = (iter.patch->internalFlags & PxContactPatch::eHAS_TARGET_VELOCITY) != 0; + + while(iter.hasNextPatch()) + { + iter.nextPatch(); + while(iter.hasNextContact()) + { + iter.nextContact(); + Ps::prefetchLine(iter.contact, 128); + Ps::prefetchLine(&buffer.contacts[numContacts], 128); + PxReal maxImpulse = hasMaxImpulse ? iter.getMaxImpulse() : defaultMaxImpulse; + if(maxImpulse != 0.f) + { + PX_ASSERT(numContacts < Gu::ContactBuffer::MAX_CONTACTS); + buffer.contacts[numContacts].normal = iter.getContactNormal(); + buffer.contacts[numContacts].point = iter.getContactPoint(); + buffer.contacts[numContacts].separation = iter.getSeparation(); + //KS - we use the face indices to cache the material indices and flags - avoids bloating the PxContact structure + buffer.contacts[numContacts].materialFlags = PxU8(iter.getMaterialFlags()); + buffer.contacts[numContacts].maxImpulse = maxImpulse; + buffer.contacts[numContacts].staticFriction = iter.getStaticFriction(); + buffer.contacts[numContacts].dynamicFriction = iter.getDynamicFriction(); + buffer.contacts[numContacts].restitution = iter.getRestitution(); + const PxVec3& targetVel = iter.getTargetVel(); + buffer.contacts[numContacts].targetVel = targetVel; + ++numContacts; + } + } + } + } + const PxU32 contactCount = numContacts - origContactCount; + buffer.count = numContacts; + return contactCount; +} + +struct CorrelationListIterator +{ + CorrelationBuffer& buffer; + PxU32 currPatch; + PxU32 currContact; + + CorrelationListIterator(CorrelationBuffer& correlationBuffer, PxU32 startPatch) : buffer(correlationBuffer) + { + //We need to force us to advance the correlation buffer to the first available contact (if one exists) + PxU32 newPatch = startPatch, newContact = 0; + + while(newPatch != CorrelationBuffer::LIST_END && newContact == buffer.contactPatches[newPatch].count) + { + newPatch = buffer.contactPatches[newPatch].next; + newContact = 0; + } + + currPatch = newPatch; + currContact = newContact; + } + + //Returns true if it has another contact pre-loaded. Returns false otherwise + PX_FORCE_INLINE bool hasNextContact() + { + return (currPatch != CorrelationBuffer::LIST_END && currContact < buffer.contactPatches[currPatch].count); + } + + inline void nextContact(PxU32& patch, PxU32& contact) + { + PX_ASSERT(currPatch != CorrelationBuffer::LIST_END); + PX_ASSERT(currContact < buffer.contactPatches[currPatch].count); + + patch = currPatch; + contact = currContact; + PxU32 newPatch = currPatch, newContact = currContact + 1; + + while(newPatch != CorrelationBuffer::LIST_END && newContact == buffer.contactPatches[newPatch].count) + { + newPatch = buffer.contactPatches[newPatch].next; + newContact = 0; + } + + currPatch = newPatch; + currContact = newContact; + } + +private: + CorrelationListIterator& operator=(const CorrelationListIterator&); + +}; + + + PX_FORCE_INLINE void constructContactConstraint(const Mat33V& invSqrtInertia0, const Mat33V& invSqrtInertia1, const FloatVArg invMassNorLenSq0, + const FloatVArg invMassNorLenSq1, const FloatVArg angD0, const FloatVArg angD1, const Vec3VArg bodyFrame0p, const Vec3VArg bodyFrame1p, + const Vec3VArg normal, const FloatVArg norVel, const VecCrossV& norCross, const Vec3VArg angVel0, const Vec3VArg angVel1, + const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution, + const FloatVArg bounceThreshold, const Gu::ContactPoint& contact, SolverContactPoint& solverContact, + const FloatVArg ccdMaxSeparation) + { + const FloatV zero = FZero(); + const Vec3V point = V3LoadA(contact.point); + const FloatV separation = FLoad(contact.separation); + + const FloatV cTargetVel = V3Dot(normal, V3LoadA(contact.targetVel)); + + const Vec3V ra = V3Sub(point, bodyFrame0p); + const Vec3V rb = V3Sub(point, bodyFrame1p); + + const Vec3V raXn = V3Cross(ra, norCross); + const Vec3V rbXn = V3Cross(rb, norCross); + + const Vec3V raXnSqrtInertia = M33MulV3(invSqrtInertia0, raXn); + const Vec3V rbXnSqrtInertia = M33MulV3(invSqrtInertia1, rbXn); + + const FloatV resp0 = FAdd(invMassNorLenSq0, FMul(V3Dot(raXnSqrtInertia, raXnSqrtInertia), angD0)); + const FloatV resp1 = FSub(FMul(V3Dot(rbXnSqrtInertia, rbXnSqrtInertia), angD1), invMassNorLenSq1); + + const FloatV unitResponse = FAdd(resp0, resp1); + + const FloatV vrel1 = FAdd(norVel, V3Dot(raXn, angVel0)); + const FloatV vrel2 = V3Dot(rbXn, angVel1); + const FloatV vrel = FSub(vrel1, vrel2); + + const FloatV velMultiplier = FSel(FIsGrtr(unitResponse, zero), FRecip(unitResponse), zero); + + const FloatV penetration = FSub(separation, restDistance); + + const FloatV penetrationInvDt = FMul(penetration, invDt); + + const FloatV penetrationInvDtPt8 = FMax(maxPenBias, FMul(penetration, invDtp8)); + + FloatV scaledBias = FMul(velMultiplier, penetrationInvDtPt8); + + const BoolV isGreater2 = BAnd(BAnd(FIsGrtr(restitution, zero), FIsGrtr(bounceThreshold, vrel)), FIsGrtr(FNeg(vrel), penetrationInvDt)); + + const BoolV ccdSeparationCondition = FIsGrtrOrEq(ccdMaxSeparation, penetration); + + scaledBias = FSel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias); + + const FloatV sumVRel(vrel); + + FloatV targetVelocity = FAdd(cTargetVel, FSel(isGreater2, FMul(FNeg(sumVRel), restitution), zero)); + + //Note - we add on the initial target velocity + targetVelocity = FSub(targetVelocity, vrel); + + const FloatV biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias)); + const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FSel(isGreater2, zero, FNeg(FMax(scaledBias, zero)))); + //const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(FMax(scaledBias, zero))); + + FStore(velMultiplier, &solverContact.velMultiplier); + FStore(biasedErr, &solverContact.biasedErr); + FStore(unbiasedErr, &solverContact.unbiasedErr); + solverContact.maxImpulse = contact.maxImpulse; + + solverContact.raXn = raXnSqrtInertia; + solverContact.rbXn = rbXnSqrtInertia; + } +} +} + +#endif //DY_CONTACT_PREP_SHARED_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyContactReduction.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactReduction.h new file mode 100644 index 00000000..a02fe8e9 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyContactReduction.h @@ -0,0 +1,409 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef DY_CONTACT_REDUCTION_H +#define DY_CONTACT_REDUCTION_H + +#include "GuContactPoint.h" +#include "PxsMaterialManager.h" + +namespace physx +{ + + +namespace Dy +{ + +//KS - might be OK with 4 but 5 guarantees the deepest + 4 contacts that contribute to largest surface area +#define CONTACT_REDUCTION_MAX_CONTACTS 6 +#define CONTACT_REDUCTION_MAX_PATCHES 32 +#define PXS_NORMAL_TOLERANCE 0.995f +#define PXS_SEPARATION_TOLERANCE 0.001f + + + //A patch contains a normal, pair of material indices and a list of indices. These indices are + //used to index into the PxContact array that's passed by the user + struct ReducedContactPatch + { + PxU32 numContactPoints; + PxU32 contactPoints[CONTACT_REDUCTION_MAX_CONTACTS]; + }; + + struct ContactPatch + { + PxVec3 rootNormal; + ContactPatch* mNextPatch; + PxReal maxPenetration; + PxU16 startIndex; + PxU16 stride; + PxU16 rootIndex; + PxU16 index; + }; + + struct SortBoundsPredicateManifold + { + bool operator()(const ContactPatch* idx1, const ContactPatch* idx2) const + { + return idx1->maxPenetration < idx2->maxPenetration; + } + }; + + + + template <PxU32 MaxPatches> + class ContactReduction + { + public: + ReducedContactPatch mPatches[MaxPatches]; + PxU32 mNumPatches; + ContactPatch mIntermediatePatches[CONTACT_REDUCTION_MAX_PATCHES]; + ContactPatch* mIntermediatePatchesPtrs[CONTACT_REDUCTION_MAX_PATCHES]; + PxU32 mNumIntermediatePatches; + Gu::ContactPoint* PX_RESTRICT mOriginalContacts; + PxsMaterialInfo* PX_RESTRICT mMaterialInfo; + PxU32 mNumOriginalContacts; + + ContactReduction(Gu::ContactPoint* PX_RESTRICT originalContacts, PxsMaterialInfo* PX_RESTRICT materialInfo, PxU32 numContacts) : + mNumPatches(0), mNumIntermediatePatches(0), mOriginalContacts(originalContacts), mMaterialInfo(materialInfo), mNumOriginalContacts(numContacts) + { + } + + void reduceContacts() + { + //First pass, break up into contact patches, storing the start and stride of the patches + //We will need to have contact patches and then coallesce them + mIntermediatePatches[0].rootNormal = mOriginalContacts[0].normal; + mIntermediatePatches[0].mNextPatch = NULL; + mIntermediatePatches[0].startIndex = 0; + mIntermediatePatches[0].rootIndex = 0; + mIntermediatePatches[0].maxPenetration = mOriginalContacts[0].separation; + mIntermediatePatches[0].index = 0; + PxU16 numPatches = 1; + //PxU32 startIndex = 0; + PxU32 numUniquePatches = 1; + PxU16 m = 1; + for(; m < mNumOriginalContacts; ++m) + { + PxI32 index = -1; + for(PxU32 b = numPatches; b > 0; --b) + { + ContactPatch& patch = mIntermediatePatches[b-1]; + if(mMaterialInfo[patch.startIndex].mMaterialIndex0 == mMaterialInfo[m].mMaterialIndex0 && mMaterialInfo[patch.startIndex].mMaterialIndex1 == mMaterialInfo[m].mMaterialIndex1 && + patch.rootNormal.dot(mOriginalContacts[m].normal) >= PXS_NORMAL_TOLERANCE) + { + index = PxI32(b-1); + break; + } + } + + if(index != numPatches - 1) + { + mIntermediatePatches[numPatches-1].stride = PxU16(m - mIntermediatePatches[numPatches - 1].startIndex); + //Create a new patch... + if(numPatches == CONTACT_REDUCTION_MAX_PATCHES) + { + break; + } + mIntermediatePatches[numPatches].startIndex = m; + mIntermediatePatches[numPatches].mNextPatch = NULL; + if(index == -1) + { + mIntermediatePatches[numPatches].rootIndex = numPatches; + mIntermediatePatches[numPatches].rootNormal = mOriginalContacts[m].normal; + mIntermediatePatches[numPatches].maxPenetration = mOriginalContacts[m].separation; + mIntermediatePatches[numPatches].index = numPatches; + ++numUniquePatches; + } + else + { + //Find last element in the link + PxU16 rootIndex = mIntermediatePatches[index].rootIndex; + mIntermediatePatches[index].mNextPatch = &mIntermediatePatches[numPatches]; + mIntermediatePatches[numPatches].rootNormal = mIntermediatePatches[index].rootNormal; + mIntermediatePatches[rootIndex].maxPenetration = mIntermediatePatches[numPatches].maxPenetration = PxMin(mIntermediatePatches[rootIndex].maxPenetration, mOriginalContacts[m].separation); + mIntermediatePatches[numPatches].rootIndex = rootIndex; + mIntermediatePatches[numPatches].index = numPatches; + } + ++numPatches; + } + } + mIntermediatePatches[numPatches-1].stride = PxU16(m - mIntermediatePatches[numPatches-1].startIndex); + + //OK, we have a list of contact patches so that we can start contact reduction per-patch + + //OK, now we can go and reduce the contacts on a per-patch basis... + + for(PxU32 a = 0; a < numPatches; ++a) + { + mIntermediatePatchesPtrs[a] = &mIntermediatePatches[a]; + } + + + SortBoundsPredicateManifold predicate; + Ps::sort(mIntermediatePatchesPtrs, numPatches, predicate); + + PxU32 numReducedPatches = 0; + for(PxU32 a = 0; a < numPatches; ++a) + { + if(mIntermediatePatchesPtrs[a]->rootIndex == mIntermediatePatchesPtrs[a]->index) + { + //Reduce this patch... + if(numReducedPatches == MaxPatches) + break; + + ReducedContactPatch& reducedPatch = mPatches[numReducedPatches++]; + //OK, now we need to work out if we have to reduce patches... + PxU32 contactCount = 0; + { + ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a]; + + while(tmpPatch) + { + contactCount += tmpPatch->stride; + tmpPatch = tmpPatch->mNextPatch; + } + } + + if(contactCount <= CONTACT_REDUCTION_MAX_CONTACTS) + { + //Just add the contacts... + ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a]; + + PxU32 ind = 0; + while(tmpPatch) + { + for(PxU32 b = 0; b < tmpPatch->stride; ++b) + { + reducedPatch.contactPoints[ind++] = tmpPatch->startIndex + b; + } + tmpPatch = tmpPatch->mNextPatch; + } + reducedPatch.numContactPoints = contactCount; + } + else + { + //Iterate through and find the most extreme point + + + PxU32 ind = 0; + + { + PxReal dist = 0.f; + ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a]; + while(tmpPatch) + { + for(PxU32 b = 0; b < tmpPatch->stride; ++b) + { + PxReal magSq = mOriginalContacts[tmpPatch->startIndex + b].point.magnitudeSquared(); + if(dist < magSq) + { + ind = tmpPatch->startIndex + b; + dist = magSq; + } + } + tmpPatch = tmpPatch->mNextPatch; + } + } + reducedPatch.contactPoints[0] = ind; + const PxVec3 p0 = mOriginalContacts[ind].point; + + //Now find the point farthest from this point... + { + PxReal maxDist = 0.f; + ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a]; + while(tmpPatch) + { + for(PxU32 b = 0; b < tmpPatch->stride; ++b) + { + PxReal magSq = (p0 - mOriginalContacts[tmpPatch->startIndex + b].point).magnitudeSquared(); + if(magSq > maxDist) + { + ind = tmpPatch->startIndex + b; + maxDist = magSq; + } + } + tmpPatch = tmpPatch->mNextPatch; + } + } + reducedPatch.contactPoints[1] = ind; + const PxVec3 p1 = mOriginalContacts[ind].point; + + //Now find the point farthest from the segment + + PxVec3 n = (p0 - p1).cross(mIntermediatePatchesPtrs[a]->rootNormal); + + //PxReal tVal = 0.f; + { + PxReal maxDist = 0.f; + //PxReal tmpTVal; + + ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a]; + while(tmpPatch) + { + for(PxU32 b = 0; b < tmpPatch->stride; ++b) + { + + //PxReal magSq = tmpDistancePointSegmentSquared(p0, p1, mOriginalContacts[tmpPatch->startIndex + b].point, tmpTVal); + PxReal magSq = (mOriginalContacts[tmpPatch->startIndex + b].point - p0).dot(n); + if(magSq > maxDist) + { + ind = tmpPatch->startIndex + b; + //tVal = tmpTVal; + maxDist = magSq; + } + } + tmpPatch = tmpPatch->mNextPatch; + } + } + reducedPatch.contactPoints[2] = ind; + + //const PxVec3 closest = (p0 + (p1 - p0) * tVal); + + const PxVec3 dir = -n;//closest - p3; + + { + PxReal maxDist = 0.f; + //PxReal tVal = 0.f; + ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a]; + while(tmpPatch) + { + for(PxU32 b = 0; b < tmpPatch->stride; ++b) + { + PxReal magSq = (mOriginalContacts[tmpPatch->startIndex + b].point - p0).dot(dir); + if(magSq > maxDist) + { + ind = tmpPatch->startIndex + b; + maxDist = magSq; + } + } + tmpPatch = tmpPatch->mNextPatch; + } + } + reducedPatch.contactPoints[3] = ind; + + //Now, we iterate through all the points, and cluster the points. From this, we establish the deepest point that's within a + //tolerance of this point and keep that point + + PxReal separation[CONTACT_REDUCTION_MAX_CONTACTS]; + PxU32 deepestInd[CONTACT_REDUCTION_MAX_CONTACTS]; + for(PxU32 i = 0; i < 4; ++i) + { + PxU32 index = reducedPatch.contactPoints[i]; + separation[i] = mOriginalContacts[index].separation - PXS_SEPARATION_TOLERANCE; + deepestInd[i] = index; + } + + ContactPatch* tmpPatch = mIntermediatePatchesPtrs[a]; + while(tmpPatch) + { + for(PxU32 b = 0; b < tmpPatch->stride; ++b) + { + Gu::ContactPoint& point = mOriginalContacts[tmpPatch->startIndex + b]; + + PxReal distance = PX_MAX_REAL; + PxU32 index = 0; + for(PxU32 c = 0; c < 4; ++c) + { + PxVec3 dif = mOriginalContacts[reducedPatch.contactPoints[c]].point - point.point; + PxReal d = dif.magnitudeSquared(); + if(distance > d) + { + distance = d; + index = c; + } + } + if(separation[index] > point.separation) + { + deepestInd[index] = tmpPatch->startIndex+b; + separation[index] = point.separation; + } + + } + tmpPatch = tmpPatch->mNextPatch; + } + + bool chosen[64]; + PxMemZero(chosen, sizeof(chosen)); + for(PxU32 i = 0; i < 4; ++i) + { + reducedPatch.contactPoints[i] = deepestInd[i]; + chosen[deepestInd[i]] = true; + } + + for(PxU32 i = 4; i < CONTACT_REDUCTION_MAX_CONTACTS; ++i) + { + separation[i] = PX_MAX_REAL; + deepestInd[i] = 0; + } + tmpPatch = mIntermediatePatchesPtrs[a]; + while(tmpPatch) + { + for(PxU32 b = 0; b < tmpPatch->stride; ++b) + { + if(!chosen[tmpPatch->startIndex+b]) + { + Gu::ContactPoint& point = mOriginalContacts[tmpPatch->startIndex + b]; + for(PxU32 j = 4; j < CONTACT_REDUCTION_MAX_CONTACTS; ++j) + { + if(point.separation < separation[j]) + { + for(PxU32 k = CONTACT_REDUCTION_MAX_CONTACTS-1; k > j; --k) + { + separation[k] = separation[k-1]; + deepestInd[k] = deepestInd[k-1]; + } + separation[j] = point.separation; + deepestInd[j] = tmpPatch->startIndex+b; + break; + } + } + } + } + tmpPatch = tmpPatch->mNextPatch; + } + + for(PxU32 i = 4; i < CONTACT_REDUCTION_MAX_CONTACTS; ++i) + { + reducedPatch.contactPoints[i] = deepestInd[i]; + } + + reducedPatch.numContactPoints = CONTACT_REDUCTION_MAX_CONTACTS; + } + } + } + mNumPatches = numReducedPatches; + } + + }; +} + +} + + +#endif //DY_CONTACT_REDUCTION_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyCorrelationBuffer.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyCorrelationBuffer.h new file mode 100644 index 00000000..9e4d491d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyCorrelationBuffer.h @@ -0,0 +1,104 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_CORRELATIONBUFFER_H +#define DY_CORRELATIONBUFFER_H + +#include "PxvConfig.h" +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxVec3.h" +#include "foundation/PxTransform.h" +#include "DyFrictionPatch.h" +#include "GuContactBuffer.h" + +namespace physx +{ + +struct PxcNpWorkUnit; +struct PxsMaterialInfo; + +namespace Dy +{ + +struct CorrelationBuffer +{ + static const PxU32 MAX_FRICTION_PATCHES = 32; + static const PxU16 LIST_END = 0xffff; + + struct ContactPatchData + { + PxU16 start; + PxU16 next; + PxU8 flags; + PxU8 count; + PxReal staticFriction, dynamicFriction, restitution; + }; + + // we can have as many contact patches as contacts, unfortunately + ContactPatchData contactPatches[Gu::ContactBuffer::MAX_CONTACTS]; + + FrictionPatch PX_ALIGN(16, frictionPatches[MAX_FRICTION_PATCHES]); + PxVec3 PX_ALIGN(16, frictionPatchWorldNormal[MAX_FRICTION_PATCHES]); + + PxU32 frictionPatchContactCounts[MAX_FRICTION_PATCHES]; + PxU32 correlationListHeads[MAX_FRICTION_PATCHES+1]; + + // contact IDs are only used to identify auxiliary contact data when velocity + // targets have been set. + PxU16 contactID[MAX_FRICTION_PATCHES][2]; + + PxU32 contactPatchCount, frictionPatchCount; + +}; + +bool createContactPatches(CorrelationBuffer& fb, const Gu::ContactPoint* cb, PxU32 contactCount, PxReal normalTolerance); + +bool correlatePatches(CorrelationBuffer& fb, + const Gu::ContactPoint* cb, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxReal normalTolerance, + PxU32 startContactPatchIndex, + PxU32 startFrictionPatchIndex); + +void growPatches(CorrelationBuffer& fb, + const Gu::ContactPoint* buffer, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxReal normalTolerance, + PxU32 frictionPatchStartIndex, + PxReal frictionOffsetThreshold); + +} + +} + +#endif //DY_CORRELATIONBUFFER_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.cpp new file mode 100644 index 00000000..07f3b642 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.cpp @@ -0,0 +1,2950 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "PsTime.h" +#include "PsAtomic.h" +#include "PxvDynamics.h" + +#include "foundation/PxProfiler.h" +#include "PxsRigidBody.h" +#include "PxsContactManager.h" +#include "DyDynamics.h" +#include "DyBodyCoreIntegrator.h" +#include "DySolverCore.h" +#include "DySolverControl.h" +#include "DySolverContact.h" +#include "DySolverContactPF.h" +#include "DyArticulationContactPrep.h" +#include "DySolverBody.h" + +#include "DyConstraintPrep.h" +#include "DyConstraintPartition.h" +#include "DyArticulation.h" + +#include "CmFlushPool.h" +#include "DyArticulationPImpl.h" +#include "PxsMaterialManager.h" +#include "DySolverContactPF4.h" +#include "DyContactReduction.h" +#include "PxcNpContactPrepShared.h" +#include "DyContactPrep.h" +#include "DySolverControlPF.h" +#include "PxSceneDesc.h" +#include "PxsSimpleIslandManager.h" +#include "PxvNphaseImplementationContext.h" +#include "PxsContactManagerState.h" +#include "PxsDefaultMemoryManager.h" +#include "DyContactPrepShared.h" + +//KS - used to turn on/off batched SIMD constraints. +#define DY_BATCH_CONSTRAINTS 1 +//KS - used to specifically turn on/off batches 1D SIMD constraints. +#define DY_BATCH_1D 1 + +namespace physx +{ +namespace Dy +{ + +struct SolverIslandObjects +{ + PxsRigidBody** bodies; + Articulation** articulations; + Dy::Articulation** articulationOwners; + PxsIndexedContactManager* contactManagers; + //PxsIndexedConstraint* constraints; + + const IG::IslandId* islandIds; + PxU32 numIslands; + PxU32* bodyRemapTable; + PxU32* nodeIndexArray; + + PxSolverConstraintDesc* constraintDescs; + PxSolverConstraintDesc* orderedConstraintDescs; + PxSolverConstraintDesc* tempConstraintDescs; + PxConstraintBatchHeader* constraintBatchHeaders; + Cm::SpatialVector* motionVelocities; + PxsBodyCore** bodyCoreArray; + + SolverIslandObjects() : bodies(NULL), articulations(NULL), articulationOwners(NULL), + contactManagers(NULL), islandIds(NULL), numIslands(0), nodeIndexArray(NULL), constraintDescs(NULL), orderedConstraintDescs(NULL), + tempConstraintDescs(NULL), constraintBatchHeaders(NULL), motionVelocities(NULL), bodyCoreArray(NULL) + { + } +}; + +Context* createDynamicsContext( PxcNpMemBlockPool* memBlockPool, + PxcScratchAllocator& scratchAllocator, Cm::FlushPool& taskPool, + PxvSimStats& simStats, PxTaskManager* taskManager, Ps::VirtualAllocatorCallback* allocatorCallback, PxsMaterialManager* materialManager, + IG::IslandSim* accurateIslandSim, PxU64 contextID, + const bool enableStabilization, const bool useEnhancedDeterminism, const bool useAdaptiveForce + ) +{ + return DynamicsContext::create( memBlockPool, scratchAllocator, taskPool, simStats, taskManager, allocatorCallback, materialManager, accurateIslandSim, + contextID, enableStabilization, useEnhancedDeterminism, useAdaptiveForce); +} + +// PT: TODO: consider removing this function. We already have "createDynamicsContext". +DynamicsContext* DynamicsContext::create( PxcNpMemBlockPool* memBlockPool, + PxcScratchAllocator& scratchAllocator, + Cm::FlushPool& taskPool, + PxvSimStats& simStats, + PxTaskManager* taskManager, + Ps::VirtualAllocatorCallback* allocatorCallback, + PxsMaterialManager* materialManager, + IG::IslandSim* accurateIslandSim, + PxU64 contextID, + const bool enableStabilization, + const bool useEnhancedDeterminism, + const bool useAdaptiveForce + ) +{ + // PT: TODO: inherit from UserAllocated, remove placement new + DynamicsContext* dc = reinterpret_cast<DynamicsContext*>(PX_ALLOC(sizeof(DynamicsContext), "DynamicsContext")); + if(dc) + { + new(dc)DynamicsContext(memBlockPool, scratchAllocator, taskPool, simStats, taskManager, allocatorCallback, materialManager, accurateIslandSim, contextID, enableStabilization, useEnhancedDeterminism, useAdaptiveForce); + } + return dc; +} + + +void DynamicsContext::destroy() +{ + this->~DynamicsContext(); + PX_FREE(this); +} + +void DynamicsContext::resetThreadContexts() +{ + PxcThreadCoherentCacheIterator<ThreadContext, PxcNpMemBlockPool> threadContextIt(mThreadContextPool); + ThreadContext* threadContext = threadContextIt.getNext(); + + while(threadContext != NULL) + { + threadContext->reset(); + threadContext = threadContextIt.getNext(); + } +} + + +// =========================== Basic methods + + +DynamicsContext::DynamicsContext( PxcNpMemBlockPool* memBlockPool, + PxcScratchAllocator& scratchAllocator, + Cm::FlushPool& taskPool, + PxvSimStats& simStats, + PxTaskManager* taskManager, + Ps::VirtualAllocatorCallback* allocatorCallback, + PxsMaterialManager* materialManager, + IG::IslandSim* accurateIslandSim, + PxU64 contextID, + const bool enableStabilization, + const bool useEnhancedDeterminism, + const bool useAdaptiveForce + ) : + Dy::Context (accurateIslandSim, allocatorCallback, simStats, enableStabilization, useEnhancedDeterminism, useAdaptiveForce), + mThreadContextPool (memBlockPool), + mMaterialManager (materialManager), + mScratchAllocator (scratchAllocator), + mTaskPool (taskPool), + mTaskManager (taskManager), + mContextID (contextID) +{ + createThresholdStream(*allocatorCallback); + createForceChangeThresholdStream(*allocatorCallback); + mExceededForceThresholdStream[0] = PX_PLACEMENT_NEW(PX_ALLOC(sizeof(ThresholdStream), PX_DEBUG_EXP("ExceededForceThresholdStream[0]")), ThresholdStream(*allocatorCallback)); + mExceededForceThresholdStream[1] = PX_PLACEMENT_NEW(PX_ALLOC(sizeof(ThresholdStream), PX_DEBUG_EXP("ExceededForceThresholdStream[1]")), ThresholdStream(*allocatorCallback)); + mThresholdStreamOut = 0; + mCurrentIndex = 0; + mWorldSolverBody.linearVelocity = PxVec3(0); + mWorldSolverBody.angularState = PxVec3(0); + mWorldSolverBodyData.invMass = 0; + mWorldSolverBodyData.sqrtInvInertia = PxMat33(PxZero); + mWorldSolverBodyData.nodeIndex = IG_INVALID_NODE; + mWorldSolverBodyData.reportThreshold = PX_MAX_REAL; + mWorldSolverBodyData.penBiasClamp = -PX_MAX_REAL; + mWorldSolverBodyData.maxContactImpulse = PX_MAX_REAL; + mWorldSolverBody.solverProgress=MAX_PERMITTED_SOLVER_PROGRESS; + mWorldSolverBody.maxSolverNormalProgress=MAX_PERMITTED_SOLVER_PROGRESS; + mWorldSolverBody.maxSolverFrictionProgress=MAX_PERMITTED_SOLVER_PROGRESS; + mWorldSolverBodyData.linearVelocity = mWorldSolverBodyData.angularVelocity = PxVec3(0.f); + mWorldSolverBodyData.body2World = PxTransform(PxIdentity); + mWorldSolverBodyData.lockFlags = 0; + mSolverCore[PxFrictionType::ePATCH] = SolverCoreGeneral::create(); + mSolverCore[PxFrictionType::eONE_DIRECTIONAL] = SolverCoreGeneralPF::create(); + mSolverCore[PxFrictionType::eTWO_DIRECTIONAL] = SolverCoreGeneralPF::create(); +} + +DynamicsContext::~DynamicsContext() +{ + for(PxU32 i = 0; i < PxFrictionType::eFRICTION_COUNT; ++i) + { + mSolverCore[i]->destroyV(); + } + + if(mExceededForceThresholdStream[0]) + { + mExceededForceThresholdStream[0]->~ThresholdStream(); + PX_FREE(mExceededForceThresholdStream[0]); + } + mExceededForceThresholdStream[0] = NULL; + + if(mExceededForceThresholdStream[1]) + { + mExceededForceThresholdStream[1]->~ThresholdStream(); + PX_FREE(mExceededForceThresholdStream[1]); + } + mExceededForceThresholdStream[1] = NULL; + +} + +#if PX_ENABLE_SIM_STATS +void DynamicsContext::addThreadStats(const ThreadContext::ThreadSimStats& stats) +{ + mSimStats.mNbActiveConstraints += stats.numActiveConstraints; + mSimStats.mNbActiveDynamicBodies += stats.numActiveDynamicBodies; + mSimStats.mNbActiveKinematicBodies += stats.numActiveKinematicBodies; + mSimStats.mNbAxisSolverConstraints += stats.numAxisSolverConstraints; +} +#endif + +// =========================== Solve methods! + +void DynamicsContext::setDescFromIndices(PxSolverConstraintDesc& desc, const PxsIndexedInteraction& constraint, const PxU32 solverBodyOffset) +{ + PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eBODY == 0); + PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eKINEMATIC == 1); + const PxU32 offsetMap[] = {solverBodyOffset, 0}; + //const PxU32 offsetMap[] = {mKinematicCount, 0}; + + if(constraint.indexType0 == PxsIndexedInteraction::eARTICULATION) + { + Articulation* a = getArticulation(constraint.articulation0); + desc.articulationA = a->getFsDataPtr(); + desc.articulationALength = Ps::to16(a->getSolverDataSize()); + PX_ASSERT(0==(desc.articulationALength & 0x0f)); + desc.linkIndexA = Ps::to16(a->getLinkIndex(constraint.articulation0)); + } + else + { + desc.linkIndexA = PxSolverConstraintDesc::NO_LINK; + //desc.articulationALength = 0; //this is unioned with bodyADataIndex + /*desc.bodyA = constraint.indexType0 == PxsIndexedInteraction::eWORLD ? &mWorldSolverBody + : &mSolverBodyPool[(PxU32)constraint.solverBody0 + offsetMap[constraint.indexType0]]; + desc.bodyADataIndex = PxU16(constraint.indexType0 == PxsIndexedInteraction::eWORLD ? 0 + : (PxU16)constraint.solverBody0 + 1 + offsetMap[constraint.indexType0]);*/ + + desc.bodyA = constraint.indexType0 == PxsIndexedInteraction::eWORLD ? &mWorldSolverBody + : &mSolverBodyPool[PxU32(constraint.solverBody0) + offsetMap[constraint.indexType0]]; + desc.bodyADataIndex = PxU16(constraint.indexType0 == PxsIndexedInteraction::eWORLD ? 0 + : PxU16(constraint.solverBody0) + 1 + offsetMap[constraint.indexType0]); + } + + if(constraint.indexType1 == PxsIndexedInteraction::eARTICULATION) + { + Articulation* a = getArticulation(constraint.articulation1); + desc.articulationB = a->getFsDataPtr(); + desc.articulationBLength = Ps::to16(a->getSolverDataSize()); + PX_ASSERT(0==(desc.articulationBLength & 0x0f)); + desc.linkIndexB = Ps::to16(a->getLinkIndex(constraint.articulation1)); + } + else + { + desc.linkIndexB = PxSolverConstraintDesc::NO_LINK; + //desc.articulationBLength = 0; //this is unioned with bodyBDataIndex + desc.bodyB = constraint.indexType1 == PxsIndexedInteraction::eWORLD ? &mWorldSolverBody + : &mSolverBodyPool[PxU32(constraint.solverBody1) + offsetMap[constraint.indexType1]]; + desc.bodyBDataIndex = PxU16(constraint.indexType1 == PxsIndexedInteraction::eWORLD ? 0 + : PxU16(constraint.solverBody1) + 1 + offsetMap[constraint.indexType1]); + } +} + +void DynamicsContext::setDescFromIndices(PxSolverConstraintDesc& desc, IG::EdgeIndex edgeIndex, const IG::SimpleIslandManager& islandManager, + PxU32* bodyRemap, const PxU32 solverBodyOffset) +{ + PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eBODY == 0); + PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eKINEMATIC == 1); + + const IG::IslandSim& islandSim = islandManager.getAccurateIslandSim(); + + IG::NodeIndex node1 = islandSim.getNodeIndex1(edgeIndex); + if (node1.isStaticBody()) + { + desc.bodyA = &mWorldSolverBody; + desc.bodyADataIndex = 0; + desc.linkIndexA = PxSolverConstraintDesc::NO_LINK; + } + else + { + const IG::Node& node = islandSim.getNode(node1); + if (node.getNodeType() == IG::Node::eARTICULATION_TYPE) + { + Dy::Articulation* a = islandSim.getLLArticulation(node1); + desc.articulationA = a->getFsDataPtr(); + desc.articulationALength = Ps::to16(a->getSolverDataSize()); + PX_ASSERT(0 == (desc.articulationALength & 0x0f)); + desc.linkIndexA = Ps::to16(node1.articulationLinkId()); + } + else + { + PxU32 activeIndex = islandSim.getActiveNodeIndex(node1); + PxU32 index = node.isKinematic() ? activeIndex : bodyRemap[activeIndex] + solverBodyOffset; + desc.bodyA = &mSolverBodyPool[index]; + desc.bodyADataIndex = Ps::to16(index + 1); + desc.linkIndexA = PxSolverConstraintDesc::NO_LINK; + } + } + + IG::NodeIndex node2 = islandSim.getNodeIndex2(edgeIndex); + if (node2.isStaticBody()) + { + desc.bodyB = &mWorldSolverBody; + desc.bodyBDataIndex = 0; + desc.linkIndexB = PxSolverConstraintDesc::NO_LINK; + } + else + { + const IG::Node& node = islandSim.getNode(node2); + if (node.getNodeType() == IG::Node::eARTICULATION_TYPE) + { + Dy::Articulation* a = islandSim.getLLArticulation(node2); + desc.articulationB = a->getFsDataPtr(); + desc.articulationBLength = Ps::to16(a->getSolverDataSize()); + PX_ASSERT(0 == (desc.articulationBLength & 0x0f)); + desc.linkIndexB = Ps::to16(node2.articulationLinkId()); + } + else + { + PxU32 activeIndex = islandSim.getActiveNodeIndex(node2); + PxU32 index = node.isKinematic() ? activeIndex : bodyRemap[activeIndex] + solverBodyOffset; + desc.bodyB = &mSolverBodyPool[index]; + desc.bodyBDataIndex = Ps::to16(index + 1); + desc.linkIndexB = PxSolverConstraintDesc::NO_LINK; + } + } +} + + +class PxsPreIntegrateTask : public Cm::Task +{ + PxsPreIntegrateTask& operator=(const PxsPreIntegrateTask&); +public: + PxsPreIntegrateTask( DynamicsContext& context, + PxsBodyCore*const* bodyArray, + PxsRigidBody*const* originalBodyArray, + PxU32 const* nodeIndexArray, + PxSolverBody* solverBodies, + PxSolverBodyData* solverBodyDataPool, + PxF32 dt, + PxU32 numBodies, + volatile PxU32* maxSolverPositionIterations, + volatile PxU32* maxSolverVelocityIterations, + const PxU32 startIndex, + const PxU32 numToIntegrate, + const PxVec3& gravity) : + mContext(context), + mBodyArray(bodyArray), + mOriginalBodyArray(originalBodyArray), + mNodeIndexArray(nodeIndexArray), + mSolverBodies(solverBodies), + mSolverBodyDataPool(solverBodyDataPool), + mDt(dt), + mNumBodies(numBodies), + mMaxSolverPositionIterations(maxSolverPositionIterations), + mMaxSolverVelocityIterations(maxSolverVelocityIterations), + mStartIndex(startIndex), + mNumToIntegrate(numToIntegrate), + mGravity(gravity) + {} + + virtual void runInternal(); + + virtual const char* getName() const + { + return "PxsDynamics.preIntegrate"; + } + +public: + DynamicsContext& mContext; + PxsBodyCore*const* mBodyArray; + PxsRigidBody*const* mOriginalBodyArray; + PxU32 const* mNodeIndexArray; + PxSolverBody* mSolverBodies; + PxSolverBodyData* mSolverBodyDataPool; + PxF32 mDt; + PxU32 mNumBodies; + volatile PxU32* mMaxSolverPositionIterations; + volatile PxU32* mMaxSolverVelocityIterations; + PxU32 mStartIndex; + PxU32 mNumToIntegrate; + PxVec3 mGravity; + +}; + + + +class PxsParallelSolverTask : public Cm::Task +{ + PxsParallelSolverTask& operator=(PxsParallelSolverTask&); +public: + + PxsParallelSolverTask(SolverIslandParams& params, DynamicsContext& context, PxFrictionType::Enum frictionType, IG::IslandSim& islandSim) + : mParams(params), mContext(context), mFrictionType(frictionType), mIslandSim(islandSim) + { + } + + virtual void runInternal() + { + solveParallel(mContext, mParams, mIslandSim); + } + + virtual const char* getName() const + { + return "PxsDynamics.parallelSolver"; + } + + SolverIslandParams& mParams; + DynamicsContext& mContext; + PxFrictionType::Enum mFrictionType; + IG::IslandSim& mIslandSim; +}; + + +#define PX_CONTACT_REDUCTION 1 + +class PxsSolverConstraintPostProcessTask : public Cm::Task +{ + PxsSolverConstraintPostProcessTask& operator=(const PxsSolverConstraintPostProcessTask&); +public: + + PxsSolverConstraintPostProcessTask(DynamicsContext& context, + ThreadContext& threadContext, + const SolverIslandObjects& objects, + const PxU32 solverBodyOffset, + PxU32 startIndex, + PxU32 stride, + PxsMaterialManager* materialManager, + PxsContactManagerOutputIterator& iterator) : + mContext(context), + mThreadContext(threadContext), + mObjects(objects), + mSolverBodyOffset(solverBodyOffset), + mStartIndex(startIndex), + mStride(stride), + mMaterialManager(materialManager), + mOutputs(iterator) + {} + + void mergeContacts(CompoundContactManager& header, ThreadContext& threadContext) + { + Gu::ContactBuffer& buffer = threadContext.mContactBuffer; + PxsMaterialInfo materialInfo[Gu::ContactBuffer::MAX_CONTACTS]; + PxU32 size = 0; + + for(PxU32 a = 0; a < header.mStride; ++a) + { + PxsContactManager* manager = mThreadContext.orderedContactList[a+header.mStartIndex]->contactManager; + PxcNpWorkUnit& unit = manager->getWorkUnit(); + PxsContactManagerOutput& output = mOutputs.getContactManager(unit.mNpIndex); + PxContactStreamIterator iter(output.contactPatches, output.contactPoints, output.getInternalFaceIndice(), output.nbPatches, output.nbContacts); + + PxU32 origSize = size; + PX_UNUSED(origSize); + if(!iter.forceNoResponse) + { + while(iter.hasNextPatch()) + { + iter.nextPatch(); + while(iter.hasNextContact()) + { + PX_ASSERT(size < Gu::ContactBuffer::MAX_CONTACTS); + iter.nextContact(); + PxsMaterialInfo& info = materialInfo[size]; + Gu::ContactPoint& point = buffer.contacts[size++]; + point.dynamicFriction = iter.getDynamicFriction(); + point.staticFriction = iter.getStaticFriction(); + point.restitution = iter.getRestitution(); + point.internalFaceIndex1 = iter.getFaceIndex1(); + point.materialFlags = PxU8(iter.getMaterialFlags()); + point.maxImpulse = iter.getMaxImpulse(); + point.targetVel = iter.getTargetVel(); + point.normal = iter.getContactNormal(); + point.point = iter.getContactPoint(); + point.separation = iter.getSeparation(); + info.mMaterialIndex0 = iter.getMaterialIndex0(); + info.mMaterialIndex1 = iter.getMaterialIndex1(); + } + } + PX_ASSERT(output.nbContacts == (size - origSize)); + } + } + + PxU32 origSize = size; +#if PX_CONTACT_REDUCTION + ContactReduction<6> reduction(buffer.contacts, materialInfo, size); + reduction.reduceContacts(); + //OK, now we write back the contacts... + + PxU8 histo[Gu::ContactBuffer::MAX_CONTACTS]; + PxMemZero(histo, sizeof(histo)); + + size = 0; + for(PxU32 a = 0; a < reduction.mNumPatches; ++a) + { + ReducedContactPatch& patch = reduction.mPatches[a]; + for(PxU32 b = 0; b < patch.numContactPoints; ++b) + { + histo[patch.contactPoints[b]] = 1; + ++size; + } + } +#endif + + PxU16* PX_RESTRICT data = reinterpret_cast<PxU16*>(threadContext.mConstraintBlockStream.reserve(size * sizeof(PxU16), mThreadContext.mConstraintBlockManager)); + header.forceBufferList = data; + + +#if PX_CONTACT_REDUCTION + const PxU32 reservedSize = size; + PX_UNUSED(reservedSize); + size = 0; + for(PxU32 a = 0; a < origSize; ++a) + { + if(histo[a]) + { + if(size != a) + { + buffer.contacts[size] = buffer.contacts[a]; + materialInfo[size] = materialInfo[a]; + } + data[size] = Ps::to16(a); + size++; + } + } + PX_ASSERT(reservedSize >= size); +#else + for(PxU32 a = 0; a < size; ++a) + data[a] = a; +#endif + + + PxU32 contactForceByteSize = size * sizeof(PxReal); + + + PxsContactManagerOutput& output = mOutputs.getContactManager(header.unit->mNpIndex); + + PxU16 compressedContactSize; + + physx::writeCompressedContact(buffer.contacts, size, NULL, output.nbContacts, output.contactPatches, output.contactPoints, compressedContactSize, + reinterpret_cast<PxReal*&>(output.contactForces), contactForceByteSize, mMaterialManager, false, + false, materialInfo, output.nbPatches, 0, &mThreadContext.mConstraintBlockManager, &threadContext.mConstraintBlockStream, false); + } + + virtual void runInternal() + { + PxU32 endIndex = mStartIndex + mStride; + + ThreadContext* threadContext = mContext.getThreadContext(); + //TODO - we need to do this somewhere else + //threadContext->mContactBlockStream.reset(); + threadContext->mConstraintBlockStream.reset(); + + for(PxU32 a = mStartIndex; a < endIndex; ++a) + { + mergeContacts(mThreadContext.compoundConstraints[a], *threadContext); + } + mContext.putThreadContext(threadContext); + } + + virtual const char* getName() const { return "PxsDynamics.solverConstraintPostProcess"; } + + + DynamicsContext& mContext; + ThreadContext& mThreadContext; + const SolverIslandObjects mObjects; + PxU32 mSolverBodyOffset; + PxU32 mStartIndex; + PxU32 mStride; + PxsMaterialManager* mMaterialManager; + PxsContactManagerOutputIterator& mOutputs; +}; + +class PxsForceThresholdTask : public Cm::Task +{ + DynamicsContext& mDynamicsContext; + + PxsForceThresholdTask& operator=(const PxsForceThresholdTask&); +public: + + PxsForceThresholdTask(DynamicsContext& context): mDynamicsContext(context) + { + } + + void createForceChangeThresholdStream() + { + ThresholdStream& thresholdStream = mDynamicsContext.getThresholdStream(); + //bool haveThresholding = thresholdStream.size()!=0; + + ThresholdTable& thresholdTable = mDynamicsContext.getThresholdTable(); + thresholdTable.build(thresholdStream); + + //generate current force exceeded threshold stream + ThresholdStream& curExceededForceThresholdStream = *mDynamicsContext.mExceededForceThresholdStream[mDynamicsContext.mCurrentIndex]; + ThresholdStream& preExceededForceThresholdStream = *mDynamicsContext.mExceededForceThresholdStream[1 - mDynamicsContext.mCurrentIndex]; + curExceededForceThresholdStream.forceSize_Unsafe(0); + + //fill in the currrent exceeded force threshold stream + for(PxU32 i=0; i<thresholdTable.mPairsSize; ++i) + { + ThresholdTable::Pair& pair = thresholdTable.mPairs[i]; + ThresholdStreamElement& elem = thresholdStream[pair.thresholdStreamIndex]; + if(pair.accumulatedForce > elem.threshold * mDynamicsContext.mDt) + { + elem.accumulatedForce = pair.accumulatedForce; + curExceededForceThresholdStream.pushBack(elem); + } + } + + ThresholdStream& forceChangeThresholdStream = mDynamicsContext.getForceChangedThresholdStream(); + forceChangeThresholdStream.forceSize_Unsafe(0); + Ps::Array<PxU32>& forceChangeMask = mDynamicsContext.mExceededForceThresholdStreamMask; + + const PxU32 nbPreExceededForce = preExceededForceThresholdStream.size(); + const PxU32 nbCurExceededForce = curExceededForceThresholdStream.size(); + + //generate force change thresholdStream + if(nbPreExceededForce) + { + thresholdTable.build(preExceededForceThresholdStream); + + //set force change mask + const PxU32 nbTotalExceededForce = nbPreExceededForce + nbCurExceededForce; + forceChangeMask.reserve(nbTotalExceededForce); + forceChangeMask.forceSize_Unsafe(nbTotalExceededForce); + + //initialize the forceChangeMask + for (PxU32 i = 0; i < nbTotalExceededForce; ++i) + forceChangeMask[i] = 1; + + for(PxU32 i=0; i< nbCurExceededForce; ++i) + { + ThresholdStreamElement& curElem = curExceededForceThresholdStream[i]; + + PxU32 pos; + if(thresholdTable.check(preExceededForceThresholdStream, curElem, pos)) + { + forceChangeMask[pos] = 0; + forceChangeMask[i + nbPreExceededForce] = 0; + } + } + + //create force change threshold stream + for(PxU32 i=0; i<nbTotalExceededForce; ++i) + { + const PxU32 hasForceChange = forceChangeMask[i]; + if(hasForceChange) + { + bool lostPair = (i < nbPreExceededForce); + ThresholdStreamElement& elem = lostPair ? preExceededForceThresholdStream[i] : curExceededForceThresholdStream[i - nbPreExceededForce]; + ThresholdStreamElement elt; + elt = elem; + elt.accumulatedForce = lostPair ? 0.f : elem.accumulatedForce; + forceChangeThresholdStream.pushBack(elt); + } + else + { + //persistent pair + if (i < nbPreExceededForce) + { + ThresholdStreamElement& elem = preExceededForceThresholdStream[i]; + ThresholdStreamElement elt; + elt = elem; + elt.accumulatedForce = elem.accumulatedForce; + forceChangeThresholdStream.pushBack(elt); + } + } + } + } + else + { + forceChangeThresholdStream.reserve(nbCurExceededForce); + forceChangeThresholdStream.forceSize_Unsafe(nbCurExceededForce); + PxMemCopy(forceChangeThresholdStream.begin(), curExceededForceThresholdStream.begin(), sizeof(ThresholdStreamElement) * nbCurExceededForce); + } + } + + virtual void runInternal() + { + mDynamicsContext.getThresholdStream().forceSize_Unsafe(PxU32(mDynamicsContext.mThresholdStreamOut)); + createForceChangeThresholdStream(); + } + + virtual const char* getName() const { return "PxsDynamics.createForceChangeThresholdStream"; } +}; + + +struct ConstraintLess +{ + bool operator()(const PxSolverConstraintDesc& left, const PxSolverConstraintDesc& right) const + { + return reinterpret_cast<Constraint*>(left.constraint)->index > reinterpret_cast<Constraint*>(right.constraint)->index; + } +}; + +struct ArticulationSortPredicate +{ + bool operator()(const PxsIndexedContactManager*& left, const PxsIndexedContactManager*& right) const + { + return left->contactManager->getWorkUnit().index < right->contactManager->getWorkUnit().index; + } +}; + +class SolverArticulationUpdateTask : public Cm::Task +{ + + + ThreadContext& mIslandThreadContext; + + Articulation** mArticulations; + ArticulationSolverDesc* mArticulationDescArray; + PxU32 mNbToProcess; + + Dy::DynamicsContext& mContext; + PxU32 mStartIdx; + +public: + + static const PxU32 NbArticulationsPerTask = 8; + + SolverArticulationUpdateTask(ThreadContext& islandThreadContext, Articulation** articulations, ArticulationSolverDesc* articulationDescArray, PxU32 nbToProcess, Dy::DynamicsContext& context, + PxU32 startIdx): + mIslandThreadContext(islandThreadContext), mArticulations(articulations), mArticulationDescArray(articulationDescArray), mNbToProcess(nbToProcess), mContext(context), mStartIdx(startIdx) + { + } + + virtual const char* getName() const { return "SolverArticulationUpdateTask"; } + + virtual void runInternal() + { + ThreadContext& threadContext = *mContext.getThreadContext(); + + threadContext.mConstraintBlockStream.reset(); //Clear in case there's some left-over memory in this context, for which the block has already been freed + PxU32 maxVelIters = 0; + PxU32 maxPosIters = 0; + PxU32 maxArticulationLength = 0; + PxU32 maxSolverArticLength = 0; + + PxU32 startIdx = mStartIdx; + for(PxU32 i=0;i<mNbToProcess; i++) + { + Articulation& a = *(mArticulations[i]); + a.getSolverDesc(mArticulationDescArray[i]); + + PxU32 acCount, descCount; + + descCount = ArticulationPImpl::computeUnconstrainedVelocities(mArticulationDescArray[i], mContext.mDt, threadContext.mConstraintBlockStream, + mIslandThreadContext.mContactDescPtr + startIdx, acCount, mContext.getScratchAllocator(), + mIslandThreadContext.mConstraintBlockManager, mContext.getGravity(), mContext.getContextId()); + + mArticulationDescArray[i].numInternalConstraints = Ps::to8(descCount); + + maxArticulationLength = PxMax(maxArticulationLength, PxU32(mArticulationDescArray[i].totalDataSize)); + maxSolverArticLength = PxMax(maxSolverArticLength, PxU32(mArticulationDescArray[i].solverDataSize)); + + const PxU16 iterWord = a.getIterationCounts(); + maxVelIters = PxMax<PxU32>(PxU32(iterWord >> 8), maxVelIters); + maxPosIters = PxMax<PxU32>(PxU32(iterWord & 0xff), maxPosIters); + startIdx += DY_ARTICULATION_MAX_SIZE; + } + Ps::atomicMax(reinterpret_cast<PxI32*>(&mIslandThreadContext.mMaxSolverPositionIterations), PxI32(maxPosIters)); + Ps::atomicMax(reinterpret_cast<PxI32*>(&mIslandThreadContext.mMaxSolverVelocityIterations), PxI32(maxVelIters)); + Ps::atomicMax(reinterpret_cast<PxI32*>(&mIslandThreadContext.mMaxArticulationLength), PxI32(maxArticulationLength)); + Ps::atomicMax(reinterpret_cast<PxI32*>(&mIslandThreadContext.mMaxArticulationSolverLength), PxI32(maxSolverArticLength)); + + mContext.putThreadContext(&threadContext); + } + +private: + PX_NOCOPY(SolverArticulationUpdateTask) +}; + + +struct EnhancedSortPredicate +{ + bool operator()(const PxsIndexedContactManager& left, const PxsIndexedContactManager& right) const + { + PxcNpWorkUnit& unit0 = left.contactManager->getWorkUnit(); + PxcNpWorkUnit& unit1 = right.contactManager->getWorkUnit(); + return (unit0.mTransformCache0 < unit1.mTransformCache0) || + ((unit0.mTransformCache0 == unit1.mTransformCache0) && (unit0.mTransformCache1 < unit1.mTransformCache1)); + } +}; + + +class PxsSolverStartTask : public Cm::Task +{ + PxsSolverStartTask& operator=(const PxsSolverStartTask&); +public: + + PxsSolverStartTask(DynamicsContext& context, + IslandContext& islandContext, + const SolverIslandObjects& objects, + const PxU32 solverBodyOffset, + const PxU32 kinematicCount, + IG::SimpleIslandManager& islandManager, + PxU32* bodyRemapTable, + PxsMaterialManager* materialManager, + PxsContactManagerOutputIterator& iterator, + bool enhancedDeterminism + ) : + mContext (context), + mIslandContext (islandContext), + mObjects (objects), + mSolverBodyOffset (solverBodyOffset), + mKinematicCount (kinematicCount), + mIslandManager (islandManager), + mBodyRemapTable (bodyRemapTable), + mMaterialManager (materialManager), + mOutputs (iterator), + mEnhancedDeterminism (enhancedDeterminism) + {} + + void startTasks() + { + PX_PROFILE_ZONE("Dynamics.solveGroup", mContext.getContextId()); + { + ThreadContext& mThreadContext = *mContext.getThreadContext(); + + mIslandContext.mThreadContext = &mThreadContext; + + mThreadContext.mMaxSolverPositionIterations = 0; + mThreadContext.mMaxSolverVelocityIterations = 0; + mThreadContext.mAxisConstraintCount = 0; + mThreadContext.mContactDescPtr = mThreadContext.contactConstraintDescArray; + mThreadContext.mFrictionDescPtr = mThreadContext.frictionConstraintDescArray.begin(); + mThreadContext.mNumDifferentBodyConstraints = 0; + mThreadContext.mNumSelfConstraintBlocks = 0; + mThreadContext.mNumSelfConstraints = 0; + mThreadContext.mNumDifferentBodyFrictionConstraints = 0; + mThreadContext.mNumSelfConstraintFrictionBlocks = 0; + mThreadContext.mNumSelfFrictionConstraints = 0; + mThreadContext.numContactConstraintBatches = 0; + mThreadContext.contactDescArraySize = 0; + + + mThreadContext.contactConstraintDescArray = mObjects.constraintDescs; + mThreadContext.orderedContactConstraints = mObjects.orderedConstraintDescs; + mThreadContext.mContactDescPtr = mObjects.constraintDescs; + mThreadContext.tempConstraintDescArray = mObjects.tempConstraintDescs; + mThreadContext.contactConstraintBatchHeaders = mObjects.constraintBatchHeaders; + mThreadContext.motionVelocityArray = mObjects.motionVelocities; + mThreadContext.mBodyCoreArray = mObjects.bodyCoreArray; + mThreadContext.mRigidBodyArray = mObjects.bodies; + mThreadContext.mArticulationArray = mObjects.articulations; + mThreadContext.bodyRemapTable = mObjects.bodyRemapTable; + mThreadContext.mNodeIndexArray = mObjects.nodeIndexArray; + + const PxU32 frictionConstraintCount = mContext.getFrictionType() == PxFrictionType::ePATCH ? 0 : PxU32(mIslandContext.mCounts.contactManagers); + mThreadContext.resizeArrays(frictionConstraintCount, mIslandContext.mCounts.articulations); + + PxsBodyCore** PX_RESTRICT bodyArrayPtr = mThreadContext.mBodyCoreArray; + PxsRigidBody** PX_RESTRICT rigidBodyPtr = mThreadContext.mRigidBodyArray; + Articulation** PX_RESTRICT articulationPtr = mThreadContext.mArticulationArray; + PxU32* PX_RESTRICT bodyRemapTable = mThreadContext.bodyRemapTable; + PxU32* PX_RESTRICT nodeIndexArray = mThreadContext.mNodeIndexArray; + + PxU32 nbIslands = mObjects.numIslands; + const IG::IslandId* const islandIds = mObjects.islandIds; + + const IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim(); + + PxU32 bodyIndex = 0, articIndex = 0; + for(PxU32 i = 0; i < nbIslands; ++i) + { + const IG::Island& island = islandSim.getIsland(islandIds[i]); + + IG::NodeIndex currentIndex = island.mRootNode; + + while(currentIndex.isValid()) + { + const IG::Node& node = islandSim.getNode(currentIndex); + + if(node.getNodeType() == IG::Node::eARTICULATION_TYPE) + { + articulationPtr[articIndex++] = node.getArticulation(); + } + else + { + PxsRigidBody* rigid = node.getRigidBody(); + PX_ASSERT(bodyIndex < (mIslandContext.mCounts.bodies + mContext.mKinematicCount + 1)); + rigidBodyPtr[bodyIndex] = rigid; + bodyArrayPtr[bodyIndex] = &rigid->getCore(); + nodeIndexArray[bodyIndex] = currentIndex.index(); + bodyRemapTable[islandSim.getActiveNodeIndex(currentIndex)] = bodyIndex++; + } + + currentIndex = node.mNextNode; + } + } + + + PxsIndexedContactManager* indexedManagers = mObjects.contactManagers; + + PxU32 currentContactIndex = 0; + for(PxU32 i = 0; i < nbIslands; ++i) + { + const IG::Island& island = islandSim.getIsland(islandIds[i]); + + IG::EdgeIndex contactEdgeIndex = island.mFirstEdge[IG::Edge::eCONTACT_MANAGER]; + + while(contactEdgeIndex != IG_INVALID_EDGE) + { + const IG::Edge& edge = islandSim.getEdge(contactEdgeIndex); + + PxsContactManager* contactManager = mIslandManager.getContactManager(contactEdgeIndex); + + if(contactManager) + { + const IG::NodeIndex nodeIndex1 = islandSim.getNodeIndex1(contactEdgeIndex); + const IG::NodeIndex nodeIndex2 = islandSim.getNodeIndex2(contactEdgeIndex); + + PxsIndexedContactManager& indexedManager = indexedManagers[currentContactIndex++]; + indexedManager.contactManager = contactManager; + + PX_ASSERT(!nodeIndex1.isStaticBody()); + { + const IG::Node& node1 = islandSim.getNode(nodeIndex1); + + //Is it an articulation or not??? + if(node1.getNodeType() == IG::Node::eARTICULATION_TYPE) + { + indexedManager.indexType0 = PxsIndexedInteraction::eARTICULATION; + indexedManager.solverBody0 = size_t(node1.getArticulation()) | nodeIndex1.articulationLinkId(); + } + else + { + if(node1.isKinematic()) + { + indexedManager.indexType0 = PxsIndexedInteraction::eKINEMATIC; + indexedManager.solverBody0 = islandSim.getActiveNodeIndex(nodeIndex1); + } + else + { + indexedManager.indexType0 = PxsIndexedInteraction::eBODY; + indexedManager.solverBody0 = bodyRemapTable[islandSim.getActiveNodeIndex(nodeIndex1)]; + } + PX_ASSERT(indexedManager.solverBody0 < (mIslandContext.mCounts.bodies + mContext.mKinematicCount + 1)); + } + + } + + if(nodeIndex2.isStaticBody()) + { + indexedManager.indexType1 = PxsIndexedInteraction::eWORLD; + } + else + { + const IG::Node& node2 = islandSim.getNode(nodeIndex2); + + //Is it an articulation or not??? + if(node2.getNodeType() == IG::Node::eARTICULATION_TYPE) + { + indexedManager.indexType1 = PxsIndexedInteraction::eARTICULATION; + indexedManager.solverBody1 = size_t(node2.getArticulation()) | nodeIndex2.articulationLinkId(); + } + else + { + if(node2.isKinematic()) + { + indexedManager.indexType1 = PxsIndexedInteraction::eKINEMATIC; + indexedManager.solverBody1 = islandSim.getActiveNodeIndex(nodeIndex2); + } + else + { + indexedManager.indexType1 = PxsIndexedInteraction::eBODY; + indexedManager.solverBody1 = bodyRemapTable[islandSim.getActiveNodeIndex(nodeIndex2)]; + } + PX_ASSERT(indexedManager.solverBody1 < (mIslandContext.mCounts.bodies + mContext.mKinematicCount + 1)); + } + } + + } + contactEdgeIndex = edge.mNextIslandEdge; + } + } + + if (mEnhancedDeterminism) + { + Ps::sort(indexedManagers, currentContactIndex, EnhancedSortPredicate()); + } + + mIslandContext.mCounts.contactManagers = currentContactIndex; + } + } + + void integrate() + { + ThreadContext& mThreadContext = *mIslandContext.mThreadContext; + PxSolverBody* solverBodies = mContext.mSolverBodyPool.begin() + mSolverBodyOffset; + PxSolverBodyData* solverBodyData = mContext.mSolverBodyDataPool.begin() + mSolverBodyOffset; + + { + PX_PROFILE_ZONE("Dynamics.updateVelocities", mContext.getContextId()); + + mContext.preIntegrationParallel( + mContext.mDt, + mThreadContext.mBodyCoreArray, + mObjects.bodies, + mThreadContext.mNodeIndexArray, + mIslandContext.mCounts.bodies, + solverBodies, + solverBodyData, + mThreadContext.motionVelocityArray, + mThreadContext.mMaxSolverPositionIterations, + mThreadContext.mMaxSolverVelocityIterations, + *mCont + ); + } + } + + void articulationTask() + { + ThreadContext& mThreadContext = *mIslandContext.mThreadContext; + ArticulationSolverDesc* articulationDescArray = mThreadContext.getArticulations().begin(); + + for(PxU32 i=0;i<mIslandContext.mCounts.articulations; i+= SolverArticulationUpdateTask::NbArticulationsPerTask) + { + + SolverArticulationUpdateTask* task = PX_PLACEMENT_NEW(mContext.getTaskPool().allocate(sizeof(SolverArticulationUpdateTask)), SolverArticulationUpdateTask)(mThreadContext, + &mObjects.articulations[i], &articulationDescArray[i], PxMin(SolverArticulationUpdateTask::NbArticulationsPerTask, mIslandContext.mCounts.articulations - i), mContext, + i*DY_ARTICULATION_MAX_SIZE); + + task->setContinuation(mCont); + task->removeReference(); + + } + } + + void setupDescTask() + { + ThreadContext& mThreadContext = *mIslandContext.mThreadContext; + PxSolverConstraintDesc* contactDescPtr = mThreadContext.mContactDescPtr; + + //PxU32 constraintCount = mCounts.constraints + mCounts.contactManagers; + + PxU32 nbIslands = mObjects.numIslands; + const IG::IslandId* const islandIds = mObjects.islandIds; + + const IG::IslandSim& islandSim = mIslandManager.getAccurateIslandSim(); + + for(PxU32 i = 0; i < nbIslands; ++i) + { + const IG::Island& island = islandSim.getIsland(islandIds[i]); + + IG::EdgeIndex edgeId = island.mFirstEdge[IG::Edge::eCONSTRAINT]; + + while(edgeId != IG_INVALID_EDGE) + { + PxSolverConstraintDesc& desc = *contactDescPtr; + + const IG::Edge& edge = islandSim.getEdge(edgeId); + Dy::Constraint* constraint = mIslandManager.getConstraint(edgeId); + mContext.setDescFromIndices(desc, edgeId, mIslandManager, mBodyRemapTable, mSolverBodyOffset); + desc.constraint = reinterpret_cast<PxU8*>(constraint); + desc.constraintLengthOver16 = DY_SC_TYPE_RB_1D; + contactDescPtr++; + edgeId = edge.mNextIslandEdge; + } + + } + +#if 1 + Ps::sort(mThreadContext.mContactDescPtr, PxU32(contactDescPtr - mThreadContext.mContactDescPtr), ConstraintLess()); +#endif + + + mThreadContext.orderedContactList.forceSize_Unsafe(0); + mThreadContext.orderedContactList.reserve(mIslandContext.mCounts.contactManagers); + mThreadContext.orderedContactList.forceSize_Unsafe(mIslandContext.mCounts.contactManagers); + mThreadContext.tempContactList.forceSize_Unsafe(0); + mThreadContext.tempContactList.reserve(mIslandContext.mCounts.contactManagers); + mThreadContext.tempContactList.forceSize_Unsafe(mIslandContext.mCounts.contactManagers); + + const PxsIndexedContactManager** constraints = mThreadContext.orderedContactList.begin(); + + + //OK, we sort the orderedContactList + + mThreadContext.compoundConstraints.forceSize_Unsafe(0); + if(mIslandContext.mCounts.contactManagers) + { + { + mThreadContext.sortIndexArray.forceSize_Unsafe(0); + + PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eBODY == 0); + PX_COMPILE_TIME_ASSERT(PxsIndexedInteraction::eKINEMATIC == 1); + + const PxI32 offsetMap[] = {PxI32(mContext.mKinematicCount), 0}; + + const PxU32 totalBodies = mContext.mKinematicCount + mIslandContext.mCounts.bodies+1; + + mThreadContext.sortIndexArray.reserve(totalBodies); + mThreadContext.sortIndexArray.forceSize_Unsafe(totalBodies); + PxMemZero(mThreadContext.sortIndexArray.begin(), totalBodies * 4); + + //Iterate over the array based on solverBodyDatapool, creating a list of sorted constraints (in order of body pair) + //We only do this with contacts. It's important that this is done this way because we don't want to break our rules that all joints + //appear before all contacts in the constraint list otherwise we will lose all guarantees about sorting joints. + + for(PxU32 a = 0; a < mIslandContext.mCounts.contactManagers; ++a) + { + PX_ASSERT(mObjects.contactManagers[a].indexType0 != PxsIndexedInteraction::eWORLD); + //Index first body... + PxU8 indexType = mObjects.contactManagers[a].indexType0; + if(indexType != PxsIndexedInteraction::eARTICULATION && mObjects.contactManagers[a].indexType1 != PxsIndexedInteraction::eARTICULATION) + { + PX_ASSERT((indexType == PxsIndexedInteraction::eBODY) || (indexType == PxsIndexedInteraction::eKINEMATIC)); + + PxI32 index = PxI32(mObjects.contactManagers[a].solverBody0 + offsetMap[indexType]); + PX_ASSERT(index >= 0); + mThreadContext.sortIndexArray[PxU32(index)]++; + } + } + + PxU32 accumulatedCount = 0; + + for(PxU32 a = mThreadContext.sortIndexArray.size(); a > 0; --a) + { + PxU32 ind = a - 1; + PxU32 val = mThreadContext.sortIndexArray[ind]; + mThreadContext.sortIndexArray[ind] = accumulatedCount; + accumulatedCount += val; + } + + //OK, now copy across data to orderedConstraintDescs, pushing articulations to the end... + for(PxU32 a = 0; a < mIslandContext.mCounts.contactManagers; ++a) + { + //Index first body... + PxU8 indexType = mObjects.contactManagers[a].indexType0; + if(indexType != PxsIndexedInteraction::eARTICULATION && mObjects.contactManagers[a].indexType1 != PxsIndexedInteraction::eARTICULATION) + { + PX_ASSERT((indexType == PxsIndexedInteraction::eBODY) || (indexType == PxsIndexedInteraction::eKINEMATIC)); + + PxI32 index = PxI32(mObjects.contactManagers[a].solverBody0 + offsetMap[indexType]); + PX_ASSERT(index >= 0); + mThreadContext.tempContactList[mThreadContext.sortIndexArray[PxU32(index)]++] = &mObjects.contactManagers[a]; + } + else + { + mThreadContext.tempContactList[accumulatedCount++] = &mObjects.contactManagers[a]; + } + } + + //Now do the same again with bodyB, being careful not to overwrite the joints + PxMemZero(mThreadContext.sortIndexArray.begin(), totalBodies * 4); + + + for(PxU32 a = 0; a < mIslandContext.mCounts.contactManagers; ++a) + { + //Index first body... + PxU8 indexType = mThreadContext.tempContactList[a]->indexType1; + if(indexType != PxsIndexedInteraction::eARTICULATION && mObjects.contactManagers[a].indexType0 != PxsIndexedInteraction::eARTICULATION) + { + PX_ASSERT((indexType == PxsIndexedInteraction::eBODY) || (indexType == PxsIndexedInteraction::eKINEMATIC) || (indexType == PxsIndexedInteraction::eWORLD)); + + PxI32 index = (indexType == PxsIndexedInteraction::eWORLD) ? 0 : PxI32(mThreadContext.tempContactList[a]->solverBody1 + offsetMap[indexType]); + PX_ASSERT(index >= 0); + mThreadContext.sortIndexArray[PxU32(index)]++; + } + } + + accumulatedCount = 0; + for(PxU32 a = mThreadContext.sortIndexArray.size(); a > 0; --a) + { + PxU32 ind = a - 1; + PxU32 val = mThreadContext.sortIndexArray[ind]; + mThreadContext.sortIndexArray[ind] = accumulatedCount; + accumulatedCount += val; + } + + PxU32 articulationStartIndex = accumulatedCount; + + //OK, now copy across data to orderedConstraintDescs, pushing articulations to the end... + for(PxU32 a = 0; a < mIslandContext.mCounts.contactManagers; ++a) + { + //Index first body... + PxU8 indexType = mThreadContext.tempContactList[a]->indexType1; + if(indexType != PxsIndexedInteraction::eARTICULATION && mObjects.contactManagers[a].indexType0 != PxsIndexedInteraction::eARTICULATION) + { + PX_ASSERT((indexType == PxsIndexedInteraction::eBODY) || (indexType == PxsIndexedInteraction::eKINEMATIC) || (indexType == PxsIndexedInteraction::eWORLD)); + + PxI32 index = (indexType == PxsIndexedInteraction::eWORLD) ? 0 : PxI32(mThreadContext.tempContactList[a]->solverBody1 + offsetMap[indexType]); + PX_ASSERT(index >= 0); + constraints[mThreadContext.sortIndexArray[PxU32(index)]++] = mThreadContext.tempContactList[a]; + } + else + { + constraints[accumulatedCount++] = mThreadContext.tempContactList[a]; + } + } + +#if 1 + Ps::sort(constraints + articulationStartIndex, accumulatedCount - articulationStartIndex, ArticulationSortPredicate()); +#endif + } + + mThreadContext.mStartContactDescPtr = contactDescPtr; + + mThreadContext.compoundConstraints.reserve(1024); + mThreadContext.compoundConstraints.forceSize_Unsafe(0); + //mThreadContext.compoundConstraints.forceSize_Unsafe(mCounts.contactManagers); + + PxSolverConstraintDesc* startDesc = contactDescPtr; + mContext.setDescFromIndices(*startDesc, *constraints[0], mSolverBodyOffset); + startDesc->constraint = reinterpret_cast<PxU8*>(constraints[0]->contactManager); + startDesc->constraintLengthOver16 = DY_SC_TYPE_RB_CONTACT; + + PxsContactManagerOutput* startManagerOutput = &mOutputs.getContactManager(constraints[0]->contactManager->getWorkUnit().mNpIndex); + PxU32 contactCount = startManagerOutput->nbContacts; + PxU32 startIndex = 0; + PxU32 numHeaders = 0; + for(PxU32 a = 1; a < mIslandContext.mCounts.contactManagers; ++a) + { + PxSolverConstraintDesc& desc = *(contactDescPtr+1); + mContext.setDescFromIndices(desc, *constraints[a], mSolverBodyOffset); + + PxsContactManager* manager = constraints[a]->contactManager; + PxsContactManagerOutput& output = mOutputs.getContactManager(manager->getWorkUnit().mNpIndex); + + desc.constraint = reinterpret_cast<PxU8*>(constraints[a]->contactManager); + desc.constraintLengthOver16 = DY_SC_TYPE_RB_CONTACT; + + if (contactCount == 0) + { + //This is the first object in the pair + *startDesc = *(contactDescPtr + 1); + startIndex = a; + startManagerOutput = &output; + } + + if(startDesc->bodyA != desc.bodyA || startDesc->bodyB != desc.bodyB + || startDesc->linkIndexA != PxSolverConstraintDesc::NO_LINK || startDesc->linkIndexB != PxSolverConstraintDesc::NO_LINK + || contactCount + output.nbContacts > Gu::ContactBuffer::MAX_CONTACTS + || manager->isChangeable() + ) //If this is the first thing and no contacts...then we skip + { + PxU32 stride = a - startIndex; + if(contactCount > 0) + { + if(stride > 1) + { + ++numHeaders; + CompoundContactManager& header = mThreadContext.compoundConstraints.insert(); + header.mStartIndex = startIndex; + header.mStride = Ps::to16(stride); + header.mReducedContactCount = Ps::to16(contactCount); + PxsContactManager* manager1 = constraints[startIndex]->contactManager; + PxcNpWorkUnit& unit = manager1->getWorkUnit(); + + PX_ASSERT(startManagerOutput == &mOutputs.getContactManager(unit.mNpIndex)); + + header.unit = &unit; + header.cmOutput = startManagerOutput; + header.originalContactPatches = startManagerOutput->contactPatches; + header.originalContactPoints = startManagerOutput->contactPoints; + header.originalContactCount = startManagerOutput->nbContacts; + header.originalPatchCount = startManagerOutput->nbPatches; + header.originalForceBuffer = reinterpret_cast<PxReal*>(startManagerOutput->contactForces); + header.originalStatusFlags = startManagerOutput->statusFlag; + } + startDesc = ++contactDescPtr; + } + else + { + //Copy back next contactDescPtr + *startDesc = *(contactDescPtr+1); + } + contactCount = 0; + startIndex = a; + startManagerOutput = &output; + } + contactCount += output.nbContacts; + + } + PxU32 stride = mIslandContext.mCounts.contactManagers - startIndex; + if(contactCount > 0) + { + if(stride > 1) + { + ++numHeaders; + CompoundContactManager& header = mThreadContext.compoundConstraints.insert(); + header.mStartIndex = startIndex; + header.mStride = Ps::to16(stride); + header.mReducedContactCount = Ps::to16(contactCount); + PxsContactManager* manager = constraints[startIndex]->contactManager; + PxcNpWorkUnit& unit = manager->getWorkUnit(); + header.unit = &unit; + header.cmOutput = startManagerOutput; + header.originalContactPatches = startManagerOutput->contactPatches; + header.originalContactPoints = startManagerOutput->contactPoints; + header.originalContactCount = startManagerOutput->nbContacts; + header.originalPatchCount = startManagerOutput->nbPatches; + header.originalForceBuffer = reinterpret_cast<PxReal*>(startManagerOutput->contactForces); + header.originalStatusFlags = startManagerOutput->statusFlag; + } + contactDescPtr++; + } + + if(numHeaders) + { + const PxU32 unrollSize = 8; + for(PxU32 a = 0; a < numHeaders; a+= unrollSize) + { + PxsSolverConstraintPostProcessTask* postProcessTask = PX_PLACEMENT_NEW( mContext.getTaskPool().allocate(sizeof(PxsSolverConstraintPostProcessTask)), + PxsSolverConstraintPostProcessTask)(mContext, mThreadContext, mObjects, mSolverBodyOffset, a, PxMin(unrollSize, numHeaders - a), mMaterialManager, + mOutputs); + postProcessTask->setContinuation(mCont); + postProcessTask->removeReference(); + } + } + } + mThreadContext.contactDescArraySize = PxU32(contactDescPtr - mThreadContext.contactConstraintDescArray); + mThreadContext.mContactDescPtr = contactDescPtr; + } + + virtual void runInternal() + { + startTasks(); + integrate(); + setupDescTask(); + articulationTask(); + } + + virtual const char* getName() const + { + return "PxsDynamics.solverStart"; + } + +private: + DynamicsContext& mContext; + IslandContext& mIslandContext; + const SolverIslandObjects mObjects; + const PxU32 mSolverBodyOffset; + const PxU32 mKinematicCount; + IG::SimpleIslandManager& mIslandManager; + PxU32* mBodyRemapTable; + PxsMaterialManager* mMaterialManager; + PxsContactManagerOutputIterator& mOutputs; + bool mEnhancedDeterminism; +}; + +class PxsSolverConstraintPartitionTask : public Cm::Task +{ + PxsSolverConstraintPartitionTask& operator=(const PxsSolverConstraintPartitionTask&); +public: + + PxsSolverConstraintPartitionTask(DynamicsContext& context, + IslandContext& islandContext, + const SolverIslandObjects& objects, + const PxU32 solverBodyOffset, bool enhancedDeterminism) : + mContext(context), + mIslandContext(islandContext), + mObjects(objects), + mSolverBodyOffset(solverBodyOffset), + mEnhancedDeterminism(enhancedDeterminism) + {} + + virtual void runInternal() + { + + ThreadContext& mThreadContext = *mIslandContext.mThreadContext; + + //Compact articulation pairs... + ArticulationSolverDesc* artics = mThreadContext.getArticulations().begin(); + + if(mIslandContext.mCounts.articulations) + { + PxU32 nbArticConstraints = artics[0].numInternalConstraints; + + PxSolverConstraintDesc* currDesc = mThreadContext.mContactDescPtr; + for(PxU32 a = 1; a < mIslandContext.mCounts.articulations; ++a) + { + //Compact pairs... + const PxU32 nbInternalConstraints = artics[a].numInternalConstraints; + const PxU32 startIdx = a * DY_ARTICULATION_MAX_SIZE; + const PxU32 endIdx = startIdx + nbInternalConstraints; + + for(PxU32 b = startIdx; b < endIdx; ++b) + { + currDesc[nbArticConstraints++] = currDesc[b]; + } + } + + mThreadContext.contactDescArraySize += nbArticConstraints; + } + + PxSolverConstraintDesc* descBegin = mThreadContext.contactConstraintDescArray; + PxU32 descCount = mThreadContext.contactDescArraySize; + + PxSolverBody* solverBodies = mContext.mSolverBodyPool.begin() + mSolverBodyOffset; + + mThreadContext.mNumDifferentBodyConstraints = descCount; + + { + mThreadContext.mNumDifferentBodyConstraints = 0; + mThreadContext.mNumSelfConstraints = 0; + mThreadContext.mNumSelfConstraintBlocks = 0; + mThreadContext.mNumDifferentBodyFrictionConstraints = 0; + mThreadContext.mNumSelfConstraintFrictionBlocks = 0; + mThreadContext.mNumSelfFrictionConstraints = 0; + + if(descCount > 0) + { + ConstraintPartitionArgs args; + args.mBodies = solverBodies; + args.mArticulationPtrs = artics; + args.mContactConstraintDescriptors = descBegin; + args.mNumArticulationPtrs = mThreadContext.getArticulations().size(); + args.mNumBodies = mIslandContext.mCounts.bodies; + args.mNumContactConstraintDescriptors = descCount; + args.mOrderedContactConstraintDescriptors = mThreadContext.orderedContactConstraints; + args.mTempContactConstraintDescriptors = mThreadContext.tempConstraintDescArray; + args.mNumDifferentBodyConstraints = args.mNumSelfConstraints = args.mNumSelfConstraintBlocks = 0; + args.mConstraintsPerPartition = &mThreadContext.mConstraintsPerPartition; + args.mBitField = &mThreadContext.mPartitionNormalizationBitmap; + args.enhancedDeterminism = mEnhancedDeterminism; + + mThreadContext.mMaxPartitions = partitionContactConstraints(args); + mThreadContext.mNumDifferentBodyConstraints = args.mNumDifferentBodyConstraints; + mThreadContext.mNumSelfConstraints = args.mNumSelfConstraints; + mThreadContext.mNumSelfConstraintBlocks = args.mNumSelfConstraintBlocks; + } + else + { + PxMemZero(mThreadContext.mConstraintsPerPartition.begin(), sizeof(PxU32)*mThreadContext.mConstraintsPerPartition.capacity()); + } + + PX_ASSERT((mThreadContext.mNumDifferentBodyConstraints + mThreadContext.mNumSelfConstraints) == descCount); + } + + } + + virtual const char* getName() const { return "PxsDynamics.solverConstraintPartition"; } + + DynamicsContext& mContext; + IslandContext& mIslandContext; + const SolverIslandObjects mObjects; + PxU32 mSolverBodyOffset; + bool mEnhancedDeterminism; +}; + + +class PxsSolverSetupSolveTask : public Cm::Task +{ + PxsSolverSetupSolveTask& operator=(const PxsSolverSetupSolveTask&); +public: + + PxsSolverSetupSolveTask( + DynamicsContext& context, + IslandContext& islandContext, + const SolverIslandObjects& objects, + const PxU32 solverBodyOffset, + IG::IslandSim& islandSim) : + mContext(context), + mIslandContext(islandContext), + mObjects(objects), + mSolverBodyOffset(solverBodyOffset), + mIslandSim(islandSim) + {} + + + virtual void runInternal() + { + ThreadContext& mThreadContext = *mIslandContext.mThreadContext; + + PxSolverConstraintDesc* contactDescBegin = mThreadContext.orderedContactConstraints; + PxSolverConstraintDesc* contactDescPtr = mThreadContext.orderedContactConstraints; + + PxSolverBody* solverBodies = mContext.mSolverBodyPool.begin() + mSolverBodyOffset; + PxSolverBodyData* solverBodyDatas = mContext.mSolverBodyDataPool.begin(); + + PxU32 frictionDescCount = mThreadContext.mNumDifferentBodyFrictionConstraints; + + PxU32 j = 0, i = 0; + + //On PS3, self-constraints will be bumped to the end of the constraint list + //and processed separately. On PC/360, they will be mixed in the array and + //classed as "different body" constraints regardless of the fact that they're self-constraints. + //PxU32 numBatches = mThreadContext.numDifferentBodyBatchHeaders; + // TODO: maybe replace with non-null joints from end of the array + + PxU32 numBatches = 0; + + PxU32 currIndex = 0; + for(PxU32 a = 0; a < mThreadContext.mConstraintsPerPartition.size(); ++a) + { + PxU32 endIndex = currIndex + mThreadContext.mConstraintsPerPartition[a]; + + PxU32 numBatchesInPartition = 0; + for(PxU32 b = currIndex; b < endIndex; ++b) + { + PxConstraintBatchHeader& _header = mThreadContext.contactConstraintBatchHeaders[b]; + PxU16 stride = _header.mStride, newStride = _header.mStride; + PxU32 startIndex = j; + for(PxU16 c = 0; c < stride; ++c) + { + if(getConstraintLength(contactDescBegin[i]) == 0) + { + newStride--; + i++; + } + else + { + if(i!=j) + contactDescBegin[j] = contactDescBegin[i]; + i++; + j++; + contactDescPtr++; + } + } + + if(newStride != 0) + { + mThreadContext.contactConstraintBatchHeaders[numBatches].mStartIndex = startIndex; + mThreadContext.contactConstraintBatchHeaders[numBatches].mStride = newStride; + PxU8 type = *contactDescBegin[startIndex].constraint; + if(type == DY_SC_TYPE_STATIC_CONTACT) + { + //Check if any block of constraints is classified as type static (single) contact constraint. + //If they are, iterate over all constraints grouped with it and switch to "dynamic" contact constraint + //type if there's a dynamic contact constraint in the group. + for(PxU32 c = 1; c < newStride; ++c) + { + if(*contactDescBegin[startIndex+c].constraint == DY_SC_TYPE_RB_CONTACT) + { + type = DY_SC_TYPE_RB_CONTACT; + } + } + } + + mThreadContext.contactConstraintBatchHeaders[numBatches].mConstraintType = type; + numBatches++; + numBatchesInPartition++; + } + } + PxU32 numHeaders = numBatchesInPartition; + currIndex += mThreadContext.mConstraintsPerPartition[a]; + mThreadContext.mConstraintsPerPartition[a] = numHeaders; + } + + PxU32 contactDescCount = PxU32(contactDescPtr - contactDescBegin); + + mThreadContext.mNumDifferentBodyConstraints = contactDescCount; + + PxU32 numSelfConstraintBlocks = mThreadContext.mNumSelfConstraintBlocks; + + //Remap self constraint array. Self-constraint blocks exists on PS3 as an optimization for SPU solver. + for(PxU32 a = 0; a < numSelfConstraintBlocks; ++a) + { + PX_ASSERT(mThreadContext.mSelfConstraintBlocks[a].startId == i); + PxU32 origNumSelfConstraints = mThreadContext.mSelfConstraintBlocks[a].numSelfConstraints; + PxU32 startId = j; + + for(PxU32 b = 0; b < origNumSelfConstraints; ++b) + { + PxSolverConstraintDesc& desc = contactDescBegin[i]; + + if(getConstraintLength(desc)) + { + PxConstraintBatchHeader& header = mThreadContext.contactConstraintBatchHeaders[numBatches++]; + header.mStride = 1; + header.mStartIndex = j; + header.mConstraintType = *desc.constraint; + if(i != j) + contactDescBegin[j] = contactDescBegin[i]; + j++; + } + i++; + } + mThreadContext.mSelfConstraintBlocks[a].startId = startId; + mThreadContext.mSelfConstraintBlocks[a].numSelfConstraints = j - startId; + } + + mThreadContext.numContactConstraintBatches = numBatches; + mThreadContext.mNumSelfConstraints = j - contactDescCount; //self constraint count + contactDescCount = j; + mThreadContext.mOrderedContactDescCount = j; + + //Now do the friction constraints if we're not using the sticky model + if(mContext.getFrictionType() != PxFrictionType::ePATCH) + { + PxSolverConstraintDesc* frictionDescBegin = mThreadContext.frictionConstraintDescArray.begin(); + PxSolverConstraintDesc* frictionDescPtr = frictionDescBegin; + + Ps::Array<PxConstraintBatchHeader>& frictionHeaderArray = mThreadContext.frictionConstraintBatchHeaders; + frictionHeaderArray.forceSize_Unsafe(0); + frictionHeaderArray.reserve(mThreadContext.numContactConstraintBatches); + PxConstraintBatchHeader* headers = frictionHeaderArray.begin(); + + Ps::Array<PxU32>& constraintsPerPartition = mThreadContext.mConstraintsPerPartition; + Ps::Array<PxU32>& frictionConstraintsPerPartition = mThreadContext.mFrictionConstraintsPerPartition; + frictionConstraintsPerPartition.forceSize_Unsafe(0); + frictionConstraintsPerPartition.reserve(constraintsPerPartition.capacity()); + + + PxU32 fricI = 0; + PxU32 startIndex = 0; + PxU32 fricHeaders = 0; + for(PxU32 k = 0; k < constraintsPerPartition.size(); ++k) + { + PxU32 numBatchesInK = constraintsPerPartition[k]; + PxU32 endIndex = startIndex + numBatchesInK; + + PxU32 startFricH = fricHeaders; + + for(PxU32 a = startIndex; a < endIndex; ++a) + { + PxConstraintBatchHeader& _header = mThreadContext.contactConstraintBatchHeaders[a]; + PxU16 stride = _header.mStride; + if(_header.mConstraintType == DY_SC_TYPE_RB_CONTACT || _header.mConstraintType == DY_SC_TYPE_EXT_CONTACT || + _header.mConstraintType == DY_SC_TYPE_STATIC_CONTACT) + { + PxU8 type = 0; + //Extract friction from this constraint + for(PxU16 b = 0; b < stride; ++b) + { + //create the headers... + PxSolverConstraintDesc& desc = contactDescBegin[_header.mStartIndex + b]; + PX_ASSERT(desc.constraint); + SolverContactCoulombHeader* header = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint); + PxU32 frictionOffset = header->frictionOffset; + PxU8* PX_RESTRICT constraint = reinterpret_cast<PxU8*>(header) + frictionOffset; + const PxU32 origLength = getConstraintLength(desc); + const PxU32 length = (origLength - frictionOffset); + + setConstraintLength(*frictionDescPtr, length); + frictionDescPtr->constraint = constraint; + frictionDescPtr->bodyA = desc.bodyA; + frictionDescPtr->bodyB = desc.bodyB; + frictionDescPtr->bodyADataIndex = desc.bodyADataIndex; + frictionDescPtr->bodyBDataIndex = desc.bodyBDataIndex; + frictionDescPtr->linkIndexA = desc.linkIndexA; + frictionDescPtr->linkIndexB = desc.linkIndexB; + frictionDescPtr->writeBack = NULL; + frictionDescPtr->writeBackLengthOver4 = 0; + type = *constraint; + frictionDescPtr++; + } + headers->mStartIndex = fricI; + headers->mStride = stride; + headers->mConstraintType = type; + headers++; + fricHeaders++; + fricI += stride; + } + else if(_header.mConstraintType == DY_SC_TYPE_BLOCK_RB_CONTACT || _header.mConstraintType == DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT) + { + //KS - TODO - Extract block of 4 contacts from this constraint. This isn't implemented yet for coulomb friction model + PX_ASSERT(contactDescBegin[_header.mStartIndex].constraint); + SolverContactCoulombHeader4* head = reinterpret_cast<SolverContactCoulombHeader4*>(contactDescBegin[_header.mStartIndex].constraint); + PxU32 frictionOffset = head->frictionOffset; + PxU8* PX_RESTRICT constraint = reinterpret_cast<PxU8*>(head) + frictionOffset; + const PxU32 origLength = getConstraintLength(contactDescBegin[_header.mStartIndex]); + const PxU32 length = (origLength - frictionOffset); + PxU8 type = *constraint; + PX_ASSERT(type == DY_SC_TYPE_BLOCK_FRICTION || type == DY_SC_TYPE_BLOCK_STATIC_FRICTION); + for(PxU32 b = 0; b < 4; ++b) + { + PxSolverConstraintDesc& desc = contactDescBegin[_header.mStartIndex+b]; + setConstraintLength(*frictionDescPtr, length); + frictionDescPtr->constraint = constraint; + frictionDescPtr->bodyA = desc.bodyA; + frictionDescPtr->bodyB = desc.bodyB; + frictionDescPtr->bodyADataIndex = desc.bodyADataIndex; + frictionDescPtr->bodyBDataIndex = desc.bodyBDataIndex; + frictionDescPtr->linkIndexA = desc.linkIndexA; + frictionDescPtr->linkIndexB = desc.linkIndexB; + frictionDescPtr->writeBack = NULL; + frictionDescPtr->writeBackLengthOver4 = 0; + frictionDescPtr++; + } + headers->mStartIndex = fricI; + headers->mStride = stride; + headers->mConstraintType = type; + headers++; + fricHeaders++; + fricI += stride; + } + } + startIndex += numBatchesInK; + if(startFricH < fricHeaders) + { + frictionConstraintsPerPartition.pushBack(fricHeaders - startFricH); + } + } + + + frictionDescCount = PxU32(frictionDescPtr - frictionDescBegin); + + mThreadContext.mNumDifferentBodyFrictionConstraints = frictionDescCount; + + frictionHeaderArray.forceSize_Unsafe(PxU32(headers - frictionHeaderArray.begin())); + + mThreadContext.mNumSelfFrictionConstraints = fricI - frictionDescCount; //self constraint count + mThreadContext.mNumDifferentBodyFrictionConstraints = frictionDescCount; + frictionDescCount = fricI; + mThreadContext.mOrderedFrictionDescCount = frictionDescCount; + + + } + + { + { + PX_PROFILE_ZONE("Dynamics.solver", mContext.getContextId()); + + PxSolverConstraintDesc* contactDescs = mThreadContext.orderedContactConstraints; + PxSolverConstraintDesc* frictionDescs = mThreadContext.frictionConstraintDescArray.begin(); + + PxI32* thresholdPairsOut = &mContext.mThresholdStreamOut; + + SolverIslandParams& params = *reinterpret_cast<SolverIslandParams*>(mContext.getTaskPool().allocate(sizeof(SolverIslandParams))); + params.positionIterations = mThreadContext.mMaxSolverPositionIterations; + params.velocityIterations = mThreadContext.mMaxSolverVelocityIterations; + params.bodyListStart = solverBodies; + params.bodyDataList = solverBodyDatas; + params.solverBodyOffset = mSolverBodyOffset; + params.bodyListSize = mIslandContext.mCounts.bodies; + params.articulationListStart = mThreadContext.getArticulations().begin(); + params.articulationListSize = mThreadContext.getArticulations().size(); + params.constraintList = contactDescs; + params.constraintIndex = 0; + params.constraintIndex2 = 0; + params.bodyListIndex = 0; + params.bodyListIndex2 = 0; + params.bodyIntegrationListIndex = 0; + params.thresholdStream = mContext.getThresholdStream().begin(); + params.thresholdStreamLength = mContext.getThresholdStream().size(); + params.outThresholdPairs = thresholdPairsOut; + params.motionVelocityArray = mThreadContext.motionVelocityArray; + params.bodyArray = mThreadContext.mBodyCoreArray; + params.numObjectsIntegrated = 0; + params.constraintBatchHeaders = mThreadContext.contactConstraintBatchHeaders; + params.numConstraintHeaders = mThreadContext.numContactConstraintBatches; + params.headersPerPartition = mThreadContext.mConstraintsPerPartition.begin(); + params.nbPartitions = mThreadContext.mConstraintsPerPartition.size(); + params.rigidBodies = const_cast<PxsRigidBody**>(mObjects.bodies); + params.frictionHeadersPerPartition = mThreadContext.mFrictionConstraintsPerPartition.begin(); + params.nbFrictionPartitions = mThreadContext.mFrictionConstraintsPerPartition.size(); + params.frictionConstraintBatches = mThreadContext.frictionConstraintBatchHeaders.begin(); + params.numFrictionConstraintHeaders = mThreadContext.frictionConstraintBatchHeaders.size(); + params.frictionConstraintIndex = 0; + params.frictionConstraintList = frictionDescs; + + const PxU32 unrollSize = 8; + const PxU32 denom = PxMax(1u, (mThreadContext.mMaxPartitions*unrollSize)); + const PxU32 MaxTasks = getTaskManager()->getCpuDispatcher()->getWorkerCount(); + const PxU32 idealThreads = mThreadContext.numContactConstraintBatches/denom; + const PxU32 numTasks = PxMax(1u, PxMin(idealThreads, MaxTasks)); + + if(numTasks > 1) + { + const PxU32 idealBatchSize = PxMax(unrollSize, idealThreads*unrollSize/(numTasks*2)); + + params.batchSize = idealBatchSize; //assigning ideal batch size for the solver to grab work at. Only needed by the multi-threaded island solver. + + for(PxU32 a = 1; a < numTasks; ++a) + { + void* tsk = mContext.getTaskPool().allocate(sizeof(PxsParallelSolverTask)); + PxsParallelSolverTask* pTask = PX_PLACEMENT_NEW(tsk, PxsParallelSolverTask)( + params, mContext, mContext.getFrictionType(), mIslandSim); + + //Force to complete before merge task! + pTask->setContinuation(mCont); + + pTask->removeReference(); + } + + //Avoid kicking off one parallel task when we can do the work inline in this function + { + PX_PROFILE_ZONE("Dynamics.parallelSolve", mContext.getContextId()); + + solveParallel(mContext, params, mIslandSim); + } + const PxI32 numBodiesPlusArtics = PxI32( mIslandContext.mCounts.bodies + mIslandContext.mCounts.articulations ); + + PxI32* numObjectsIntegrated = ¶ms.numObjectsIntegrated; + + WAIT_FOR_PROGRESS_NO_TIMER(numObjectsIntegrated, numBodiesPlusArtics); + + } + else + { + + //Only one task - a small island so do a sequential solve (avoid the atomic overheads) + solveVBlock(mContext.mSolverCore[mContext.getFrictionType()], params); + + const PxU32 bodyCountMin1 = mIslandContext.mCounts.bodies - 1u; + PxSolverBodyData* solverBodyData2 = solverBodyDatas + mSolverBodyOffset + 1; + for(PxU32 k=0; k < mIslandContext.mCounts.bodies; k++) + { + const PxU32 prefetchAddress = PxMin(k+4, bodyCountMin1); + Ps::prefetchLine(mThreadContext.mBodyCoreArray[prefetchAddress]); + Ps::prefetchLine(&mThreadContext.motionVelocityArray[k], 128); + Ps::prefetchLine(&mThreadContext.mBodyCoreArray[prefetchAddress], 128); + Ps::prefetchLine(&mObjects.bodies[prefetchAddress]); + + PxSolverBodyData& solverBodyData = solverBodyData2[k]; + + integrateCore(mThreadContext.motionVelocityArray[k].linear, mThreadContext.motionVelocityArray[k].angular, + solverBodies[k], solverBodyData, mContext.mDt); + + PxsRigidBody& rBody = *mObjects.bodies[k]; + PxsBodyCore& core = rBody.getCore(); + rBody.mLastTransform = core.body2World; + core.body2World = solverBodyData.body2World; + core.linearVelocity = solverBodyData.linearVelocity; + core.angularVelocity = solverBodyData.angularVelocity; + + + bool hasStaticTouch = mIslandSim.getIslandStaticTouchCount(IG::NodeIndex(solverBodyData.nodeIndex)) != 0; + sleepCheck(const_cast<PxsRigidBody*>(mObjects.bodies[k]), mContext.mDt, mContext.mInvDt, mContext.mEnableStabilization, mContext.mUseAdaptiveForce, mThreadContext.motionVelocityArray[k], + hasStaticTouch); + } + + for(PxU32 cnt=0;cnt<mIslandContext.mCounts.articulations;cnt++) + { + ArticulationSolverDesc &d = mThreadContext.getArticulations()[cnt]; + PX_PROFILE_ZONE("Articulations.integrate", mContext.getContextId()); + + ArticulationPImpl::updateBodies(d, mContext.getDt()); + } + } + } + } + } + + virtual const char* getName() const { return "PxsDynamics.solverSetupSolve"; } + + DynamicsContext& mContext; + IslandContext& mIslandContext; + const SolverIslandObjects mObjects; + PxU32 mSolverBodyOffset; + IG::IslandSim& mIslandSim; +}; + +class PxsSolverEndTask : public Cm::Task +{ + PxsSolverEndTask& operator=(const PxsSolverEndTask&); +public: + + PxsSolverEndTask(DynamicsContext& context, + IslandContext& islandContext, + const SolverIslandObjects& objects, + const PxU32 solverBodyOffset, + PxsContactManagerOutputIterator& cmOutputs) : + mContext (context), + mIslandContext (islandContext), + mObjects (objects), + mSolverBodyOffset (solverBodyOffset), + mOutputs (cmOutputs) + {} + + virtual void runInternal() + { + ThreadContext& mThreadContext = *mIslandContext.mThreadContext; +#if PX_ENABLE_SIM_STATS + mThreadContext.getSimStats().numAxisSolverConstraints += mThreadContext.mAxisConstraintCount; +#endif + //Patch up the contact managers (TODO - fix up force writeback) + PxU32 numCompoundConstraints = mThreadContext.compoundConstraints.size(); + for(PxU32 i = 0; i < numCompoundConstraints; ++i) + { + CompoundContactManager& manager = mThreadContext.compoundConstraints[i]; + PxsContactManagerOutput* cmOutput = manager.cmOutput; + + PxReal* contactForces = reinterpret_cast<PxReal*>(cmOutput->contactForces); + PxU32 contactCount = cmOutput->nbContacts; + + cmOutput->contactPatches = manager.originalContactPatches; + cmOutput->contactPoints = manager.originalContactPoints; + cmOutput->nbContacts = manager.originalContactCount; + cmOutput->nbPatches = manager.originalPatchCount; + cmOutput->statusFlag = manager.originalStatusFlags; + cmOutput->contactForces = manager.originalForceBuffer; + + for(PxU32 a = 1; a < manager.mStride; ++a) + { + PxsContactManager* pManager = mThreadContext.orderedContactList[manager.mStartIndex + a]->contactManager; + pManager->getWorkUnit().frictionDataPtr = manager.unit->frictionDataPtr; + pManager->getWorkUnit().frictionPatchCount = manager.unit->frictionPatchCount; + //pManager->getWorkUnit().prevFrictionPatchCount = manager.unit->prevFrictionPatchCount; + } + + //This is a stride-based contact force writer. The assumption is that we may have skipped certain unimportant contacts reported by the + //discrete narrow phase + if(contactForces) + { + PxU32 currentContactIndex = 0; + PxU32 currentManagerIndex = manager.mStartIndex; + PxU32 currentManagerContactIndex = 0; + + for(PxU32 a = 0; a < contactCount; ++a) + { + PxU32 index = manager.forceBufferList[a]; + PxsContactManager* pManager = mThreadContext.orderedContactList[currentManagerIndex]->contactManager; + PxsContactManagerOutput* output = &mOutputs.getContactManager(pManager->getWorkUnit().mNpIndex); + while(currentContactIndex < index || output->nbContacts == 0) + { + //Step forwards...first in this manager... + + PxU32 numToStep = PxMin(index - currentContactIndex, PxU32(output->nbContacts) - currentManagerContactIndex); + currentContactIndex += numToStep; + currentManagerContactIndex += numToStep; + if(currentManagerContactIndex == output->nbContacts) + { + currentManagerIndex++; + currentManagerContactIndex = 0; + pManager = mThreadContext.orderedContactList[currentManagerIndex]->contactManager; + output = &mOutputs.getContactManager(pManager->getWorkUnit().mNpIndex); + } + } + if(output->nbContacts > 0 && output->contactForces) + output->contactForces[currentManagerContactIndex] = contactForces[a]; + } + } + } + + mThreadContext.compoundConstraints.forceSize_Unsafe(0); + + mThreadContext.mConstraintBlockManager.reset(); + + mContext.putThreadContext(&mThreadContext); + } + + + virtual const char* getName() const + { + return "PxsDynamics.solverEnd"; + } + + DynamicsContext& mContext; + IslandContext& mIslandContext; + const SolverIslandObjects mObjects; + const PxU32 mSolverBodyOffset; + PxsContactManagerOutputIterator& mOutputs; +}; + +class PxsSolverCreateFinalizeConstraintsTask : public Cm::Task +{ + PxsSolverCreateFinalizeConstraintsTask& operator=(const PxsSolverCreateFinalizeConstraintsTask&); +public: + + PxsSolverCreateFinalizeConstraintsTask( + DynamicsContext& context, + IslandContext& islandContext, + PxU32 solverDataOffset, + PxsContactManagerOutputIterator& outputs, + bool enhancedDeterminism) : + mContext (context), + mIslandContext (islandContext), + mSolverDataOffset (solverDataOffset), + mOutputs (outputs), + mEnhancedDeterminism (enhancedDeterminism) + { + } + + virtual void runInternal(); + + virtual const char* getName() const { return "PxsDynamics.solverCreateFinalizeConstraints"; } + + DynamicsContext& mContext; + IslandContext& mIslandContext; + PxU32 mSolverDataOffset; + PxsContactManagerOutputIterator& mOutputs; + bool mEnhancedDeterminism; +}; + + +// helper function to join two tasks together and ensure ref counts are correct +void chainTasks(PxLightCpuTask* first, PxLightCpuTask* next) +{ + first->setContinuation(next); + next->removeReference(); +} + +PxBaseTask* createSolverTaskChain(DynamicsContext& dynamicContext, + const SolverIslandObjects& objects, + const PxsIslandIndices& counts, + const PxU32 solverBodyOffset, + IG::SimpleIslandManager& islandManager, + PxU32* bodyRemapTable, PxsMaterialManager* materialManager, PxBaseTask* continuation, + PxsContactManagerOutputIterator& iterator, bool useEnhancedDeterminism) +{ + Cm::FlushPool& taskPool = dynamicContext.getTaskPool(); + + taskPool.lock(); + + + IslandContext* islandContext = reinterpret_cast<IslandContext*>(taskPool.allocate(sizeof(IslandContext))); + islandContext->mThreadContext = NULL; + islandContext->mCounts = counts; + + + // create lead task + PxsSolverStartTask* startTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverStartTask)), PxsSolverStartTask)(dynamicContext, *islandContext, objects, solverBodyOffset, dynamicContext.getKinematicCount(), + islandManager, bodyRemapTable, materialManager, iterator, useEnhancedDeterminism); + PxsSolverEndTask* endTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverEndTask)), PxsSolverEndTask)(dynamicContext, *islandContext, objects, solverBodyOffset, iterator); + + + PxsSolverCreateFinalizeConstraintsTask* createFinalizeConstraintsTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverCreateFinalizeConstraintsTask)), PxsSolverCreateFinalizeConstraintsTask)(dynamicContext, *islandContext, solverBodyOffset, iterator, useEnhancedDeterminism); + PxsSolverSetupSolveTask* setupSolveTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverSetupSolveTask)), PxsSolverSetupSolveTask)(dynamicContext, *islandContext, objects, solverBodyOffset, islandManager.getAccurateIslandSim()); + + PxsSolverConstraintPartitionTask* partitionConstraintsTask = PX_PLACEMENT_NEW(taskPool.allocateNotThreadSafe(sizeof(PxsSolverConstraintPartitionTask)), PxsSolverConstraintPartitionTask)(dynamicContext, *islandContext, objects, solverBodyOffset, useEnhancedDeterminism); + + endTask->setContinuation(continuation); + + // set up task chain in reverse order + chainTasks(setupSolveTask, endTask); + chainTasks(createFinalizeConstraintsTask, setupSolveTask); + chainTasks(partitionConstraintsTask, createFinalizeConstraintsTask); + chainTasks(startTask, partitionConstraintsTask); + + taskPool.unlock(); + + return startTask; +} + + +void DynamicsContext::update(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* /*continuation*/, PxBaseTask* lostTouchTask, + PxsContactManager** /*foundPatchManagers*/, PxU32 /*nbFoundPatchManagers*/, + PxsContactManager** /*lostPatchManagers*/, PxU32 /*nbLostPatchManagers*/, + PxU32 /*maxPatchesPerCM*/, + PxsContactManagerOutputIterator& iterator, + PxsContactManagerOutput*, + const PxReal dt, const PxVec3& gravity, const PxU32 /*bitMapWordCounts*/) +{ + PX_PROFILE_ZONE("Dynamics.solverQueueTasks", mContextID); + + PX_UNUSED(simpleIslandManager); + + mOutputIterator = iterator; + + mDt = dt; + mInvDt = dt == 0.0f ? 0.0f : 1.0f/dt; + mGravity = gravity; + + const IG::IslandSim& islandSim = simpleIslandManager.getAccurateIslandSim(); + + const PxU32 islandCount = islandSim.getNbActiveIslands(); + + const PxU32 activatedContactCount = islandSim.getNbActivatedEdges(IG::Edge::eCONTACT_MANAGER); + const IG::EdgeIndex* const activatingEdges = islandSim.getActivatedEdges(IG::Edge::eCONTACT_MANAGER); + + for(PxU32 a = 0; a < activatedContactCount; ++a) + { + PxsContactManager* cm = simpleIslandManager.getContactManager(activatingEdges[a]); + if(cm) + { + cm->getWorkUnit().frictionPatchCount = 0; //KS - zero the friction patch count on any activating edges + } + } + +#if PX_ENABLE_SIM_STATS + if(islandCount > 0) + { + mSimStats.mNbActiveKinematicBodies = islandSim.getNbActiveKinematics(); + mSimStats.mNbActiveDynamicBodies = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE); + mSimStats.mNbActiveConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT); + } + else + { + mSimStats.mNbActiveKinematicBodies = islandSim.getNbActiveKinematics(); + mSimStats.mNbActiveDynamicBodies = 0; + mSimStats.mNbActiveConstraints = 0; + } +#endif + + mThresholdStreamOut = 0; + + resetThreadContexts(); + + //If there is no work to do then we can do nothing at all. + if(0 == islandCount) + { + return; + } + + //KS - test that world solver body's velocities are finite and 0, then set it to 0. + //Technically, the velocity should always be 0 but can be stomped if a NAN creeps into the simulation. + PX_ASSERT(mWorldSolverBody.linearVelocity == PxVec3(0.f)); + PX_ASSERT(mWorldSolverBody.angularState == PxVec3(0.f)); + PX_ASSERT(mWorldSolverBody.linearVelocity.isFinite()); + PX_ASSERT(mWorldSolverBody.angularState.isFinite()); + + mWorldSolverBody.linearVelocity = mWorldSolverBody.angularState = PxVec3(0.f); + + const PxU32 kinematicCount = islandSim.getNbActiveKinematics(); + const IG::NodeIndex* const kinematicIndices = islandSim.getActiveKinematics(); + mKinematicCount = kinematicCount; + + const PxU32 bodyCount = islandSim.getNbActiveNodes(IG::Node::eRIGID_BODY_TYPE); + + PxU32 numArtics = islandSim.getNbActiveNodes(IG::Node::eARTICULATION_TYPE); + + { + if(kinematicCount + bodyCount > mSolverBodyPool.capacity()) + { + mSolverBodyPool.reserve((kinematicCount + bodyCount + 31) & ~31); // pad out to 32 * 128 = 4k to prevent alloc churn + mSolverBodyDataPool.reserve((kinematicCount + bodyCount + 31 + 1) & ~31); // pad out to 32 * 128 = 4k to prevent alloc churn + mSolverBodyRemapTable.reserve((kinematicCount + bodyCount + 31 + 1) & ~31); + } + + { + PxSolverBody emptySolverBody; + PxMemZero(&emptySolverBody, sizeof(PxSolverBody)); + mSolverBodyPool.resize(kinematicCount + bodyCount, emptySolverBody); + PxSolverBodyData emptySolverBodyData; + PxMemZero(&emptySolverBodyData, sizeof(PxSolverBodyData)); + mSolverBodyDataPool.resize(kinematicCount + bodyCount + 1, emptySolverBodyData); + mSolverBodyRemapTable.resize(bodyCount); + } + + // integrate and copy all the kinematics - overkill, since not all kinematics + // need solver bodies + + mSolverBodyDataPool[0] = mWorldSolverBodyData; + + + { + PX_PROFILE_ZONE("Dynamics.updateKinematics", mContextID); + PxMemZero(mSolverBodyPool.begin(), kinematicCount*sizeof(PxSolverBody)); + for(PxU32 i=0;i<kinematicCount;i++) + { + PxsRigidBody* rigidBody = islandSim.getRigidBody(kinematicIndices[i]); + const PxsBodyCore& core = rigidBody->getCore(); + copyToSolverBodyData(core.linearVelocity, core.angularVelocity, core.inverseMass, core.inverseInertia, core.body2World, core.maxPenBias, + core.maxContactImpulse, kinematicIndices[i].index(), core.contactReportThreshold, mSolverBodyDataPool[i + 1], core.lockFlags); + rigidBody->saveLastCCDTransform(); + // Only really necessary for PS3 at the moment (for the cross island parallel constraint solver + // but we might switch to the same on other platforms) + mSolverBodyPool[i].solverProgress=MAX_PERMITTED_SOLVER_PROGRESS; + mSolverBodyPool[i].maxSolverNormalProgress=MAX_PERMITTED_SOLVER_PROGRESS; + mSolverBodyPool[i].maxSolverFrictionProgress=MAX_PERMITTED_SOLVER_PROGRESS; + } + } + } + + PxU32 solverBatchMax = mSolverBatchSize; + PxU32 articulationBatchMax = 2; + PxU32 minimumConstraintCount = 1; + + + //Resize arrays of solver constraints... + PxU32 numArticulationConstraints=numArtics*Dy::DY_ARTICULATION_MAX_SIZE; //Just allocate enough memory to fit worst-case maximum size articulations... + + const PxU32 nbActiveContactManagers = islandSim.getNbActiveEdges(IG::Edge::eCONTACT_MANAGER); + const PxU32 nbActiveConstraints = islandSim.getNbActiveEdges(IG::Edge::eCONSTRAINT); + + PxU32 totalConstraintCount = nbActiveConstraints + nbActiveContactManagers + numArticulationConstraints; + + mSolverConstraintDescPool.forceSize_Unsafe(0); + mSolverConstraintDescPool.reserve((totalConstraintCount + 63) & (~63)); + mSolverConstraintDescPool.forceSize_Unsafe(totalConstraintCount); + + mOrderedSolverConstraintDescPool.forceSize_Unsafe(0); + mOrderedSolverConstraintDescPool.reserve((totalConstraintCount + 63) & (~63)); + mOrderedSolverConstraintDescPool.forceSize_Unsafe(totalConstraintCount); + + mTempSolverConstraintDescPool.forceSize_Unsafe(0); + mTempSolverConstraintDescPool.reserve((totalConstraintCount + 63) & (~63)); + mTempSolverConstraintDescPool.forceSize_Unsafe(totalConstraintCount); + + mContactConstraintBatchHeaders.forceSize_Unsafe(0); + mContactConstraintBatchHeaders.reserve((totalConstraintCount + 63) & (~63)); + mContactConstraintBatchHeaders.forceSize_Unsafe(totalConstraintCount); + + mContactList.forceSize_Unsafe(0); + mContactList.reserve((nbActiveContactManagers +63u) & (~63u)); + mContactList.forceSize_Unsafe(nbActiveContactManagers); + + mMotionVelocityArray.forceSize_Unsafe(0); + mMotionVelocityArray.reserve((bodyCount + 63u) & (~63u)); + mMotionVelocityArray.forceSize_Unsafe(bodyCount); + + mBodyCoreArray.forceSize_Unsafe(0); + mBodyCoreArray.reserve((bodyCount + 63u) & (~63u)); + mBodyCoreArray.forceSize_Unsafe(bodyCount); + + mRigidBodyArray.forceSize_Unsafe(0); + mRigidBodyArray.reserve((bodyCount + 63u) & (~63u)); + mRigidBodyArray.forceSize_Unsafe(bodyCount); + + mArticulationArray.forceSize_Unsafe(0); + mArticulationArray.reserve((numArtics + 63u) & (~63u)); + mArticulationArray.forceSize_Unsafe(numArtics); + + mNodeIndexArray.forceSize_Unsafe(0); + mNodeIndexArray.reserve((bodyCount + 63u) & (~63u)); + mNodeIndexArray.forceSize_Unsafe(bodyCount); + + + ThresholdStream& stream = getThresholdStream(); + stream.forceSize_Unsafe(0); + stream.reserve(Ps::nextPowerOfTwo(nbActiveContactManagers != 0 ? nbActiveContactManagers-1 : nbActiveContactManagers)); + + PxU32 constraintIndex = 0; + + //flip exceeded force threshold buffer + mCurrentIndex = 1 - mCurrentIndex; + + //create force threshold tasks to produce force change events + PxsForceThresholdTask* forceThresholdTask = PX_PLACEMENT_NEW(getTaskPool().allocateNotThreadSafe(sizeof(PxsForceThresholdTask)), PxsForceThresholdTask)(*this); + forceThresholdTask->setContinuation(lostTouchTask); + + const IG::IslandId*const islandIds = islandSim.getActiveIslands(); + + PxU32 currentIsland = 0; + PxU32 currentBodyIndex = 0; + PxU32 currentArticulation = 0; + PxU32 currentContact = 0; + //while(start<sentinel) + while(currentIsland < islandCount) + { + SolverIslandObjects objectStarts; + objectStarts.articulations = mArticulationArray.begin()+ currentArticulation; + objectStarts.bodies = mRigidBodyArray.begin() + currentBodyIndex; + objectStarts.contactManagers = mContactList.begin() + currentContact; + objectStarts.constraintDescs = mSolverConstraintDescPool.begin() + constraintIndex; + objectStarts.orderedConstraintDescs = mOrderedSolverConstraintDescPool.begin() + constraintIndex; + objectStarts.tempConstraintDescs = mTempSolverConstraintDescPool.begin() + constraintIndex; + objectStarts.constraintBatchHeaders = mContactConstraintBatchHeaders.begin() + constraintIndex; + objectStarts.motionVelocities = mMotionVelocityArray.begin() + currentBodyIndex; + objectStarts.bodyCoreArray = mBodyCoreArray.begin() + currentBodyIndex; + objectStarts.islandIds = islandIds + currentIsland; + objectStarts.bodyRemapTable = mSolverBodyRemapTable.begin(); + objectStarts.nodeIndexArray = mNodeIndexArray.begin() + currentBodyIndex; + + PxU32 startIsland = currentIsland; + PxU32 constraintCount = 0; + + PxU32 nbArticulations = 0; + PxU32 nbBodies = 0; + PxU32 nbConstraints = 0; + PxU32 nbContactManagers =0; + + //KS - logic is a bit funky here. We will keep rolling the island together provided currentIsland < islandCount AND either we haven't exceeded the max number of bodies or we have + //zero constraints AND we haven't exceeded articulation batch counts (it's still currently beneficial to keep articulations in separate islands but this is only temporary). + while((currentIsland < islandCount && (nbBodies < solverBatchMax || constraintCount < minimumConstraintCount)) && nbArticulations < articulationBatchMax) + { + const IG::Island& island = islandSim.getIsland(islandIds[currentIsland]); + nbBodies += island.mSize[IG::Node::eRIGID_BODY_TYPE]; + nbArticulations += island.mSize[IG::Node::eARTICULATION_TYPE]; + nbConstraints += island.mEdgeCount[IG::Edge::eCONSTRAINT]; + nbContactManagers += island.mEdgeCount[IG::Edge::eCONTACT_MANAGER]; + constraintCount = nbConstraints + nbContactManagers; + currentIsland++; + } + + + objectStarts.numIslands = currentIsland - startIsland; + + constraintIndex += nbArticulations*Dy::DY_ARTICULATION_MAX_SIZE; + + PxsIslandIndices counts; + + counts.articulations = nbArticulations; + counts.bodies = nbBodies; + + counts.constraints = nbConstraints; + counts.contactManagers = nbContactManagers; + if(counts.articulations + counts.bodies > 0) + { + PxBaseTask* task = createSolverTaskChain(*this, objectStarts, counts, + kinematicCount + currentBodyIndex, simpleIslandManager, mSolverBodyRemapTable.begin(), mMaterialManager, forceThresholdTask, mOutputIterator, mUseEnhancedDeterminism); + task->removeReference(); + } + + currentBodyIndex += nbBodies; + currentArticulation += nbArticulations; + currentContact += nbContactManagers; + + constraintIndex += constraintCount; + } + + //kick off forceThresholdTask + forceThresholdTask->removeReference(); +} + +void DynamicsContext::updateBodyCore(PxBaseTask* continuation) +{ + PX_UNUSED(continuation); +} + +void DynamicsContext::mergeResults() +{ + PX_PROFILE_ZONE("Dynamics.solverMergeResults", mContextID); + //OK. Sum up sim stats here... + +#if PX_ENABLE_SIM_STATS + PxcThreadCoherentCacheIterator<ThreadContext, PxcNpMemBlockPool> threadContextIt(mThreadContextPool); + ThreadContext* threadContext = threadContextIt.getNext(); + + while(threadContext != NULL) + { + ThreadContext::ThreadSimStats& threadStats = threadContext->getSimStats(); + addThreadStats(threadStats); + threadStats.clear(); + threadContext = threadContextIt.getNext(); + } +#endif +} + + +static void preIntegrationParallel( + const PxF32 dt, + PxsBodyCore*const* bodyArray, // INOUT: core body attributes + PxsRigidBody*const* originalBodyArray, // IN: original bodies (LEGACY - DON'T deref the ptrs!!) + PxU32 const* nodeIndexArray, // IN: island node index + PxU32 bodyCount, // IN: body count + PxSolverBody* solverBodyPool, // IN: solver body pool (space preallocated) + PxSolverBodyData* solverBodyDataPool, // IN: solver body data pool (space preallocated) + volatile PxU32* maxSolverPositionIterations, + volatile PxU32* maxSolverVelocityIterations, + const PxVec3& gravity) +{ + PxU32 localMaxPosIter = 0; + PxU32 localMaxVelIter = 0; + + + for(PxU32 a = 1; a < bodyCount; ++a) + { + PxU32 i = a-1; + Ps::prefetchLine(bodyArray[a]); + Ps::prefetchLine(bodyArray[a],128); + Ps::prefetchLine(&solverBodyDataPool[a]); + Ps::prefetchLine(&solverBodyDataPool[a],128); + + PxsBodyCore& core = *bodyArray[i]; + const PxsRigidBody& rBody = *originalBodyArray[i]; + + PxU16 iterWord = core.solverIterationCounts; + localMaxPosIter = PxMax<PxU32>(PxU32(iterWord & 0xff), localMaxPosIter); + localMaxVelIter = PxMax<PxU32>(PxU32(iterWord >> 8), localMaxVelIter); + + //const Cm::SpatialVector& accel = originalBodyArray[i]->getAccelerationV(); + bodyCoreComputeUnconstrainedVelocity(gravity, dt, core.linearDamping, core.angularDamping, rBody.accelScale, core.maxLinearVelocitySq, core.maxAngularVelocitySq, + core.linearVelocity, core.angularVelocity, !!(rBody.mInternalFlags & PxcRigidBody::eDISABLE_GRAVITY)); + + copyToSolverBodyData(core.linearVelocity, core.angularVelocity, core.inverseMass, core.inverseInertia, core.body2World, core.maxPenBias, core.maxContactImpulse, nodeIndexArray[i], + core.contactReportThreshold, solverBodyDataPool[i + 1], core.lockFlags); + solverBodyPool[i].solverProgress = 0; + solverBodyPool[i].maxSolverNormalProgress = 0; + solverBodyPool[i].maxSolverFrictionProgress = 0; + } + const PxU32 i = bodyCount - 1; + PxsBodyCore& core = *bodyArray[i]; + const PxsRigidBody& rBody = *originalBodyArray[i]; + + PxU16 iterWord = core.solverIterationCounts; + localMaxPosIter = PxMax<PxU32>(PxU32(iterWord & 0xff), localMaxPosIter); + localMaxVelIter = PxMax<PxU32>(PxU32(iterWord >> 8), localMaxVelIter); + + bodyCoreComputeUnconstrainedVelocity(gravity, dt, core.linearDamping, core.angularDamping, rBody.accelScale, core.maxLinearVelocitySq, core.maxAngularVelocitySq, + core.linearVelocity, core.angularVelocity, !!(rBody.mInternalFlags & PxcRigidBody::eDISABLE_GRAVITY)); + + copyToSolverBodyData(core.linearVelocity, core.angularVelocity, core.inverseMass, core.inverseInertia, core.body2World, core.maxPenBias, core.maxContactImpulse, nodeIndexArray[i], + core.contactReportThreshold, solverBodyDataPool[i + 1], core.lockFlags); + solverBodyPool[i].solverProgress = 0; + solverBodyPool[i].maxSolverNormalProgress = 0; + solverBodyPool[i].maxSolverFrictionProgress = 0; + + physx::shdfnd::atomicMax(reinterpret_cast<volatile PxI32*>(maxSolverPositionIterations), PxI32(localMaxPosIter)); + physx::shdfnd::atomicMax(reinterpret_cast<volatile PxI32*>(maxSolverVelocityIterations), PxI32(localMaxVelIter)); +} + + +void PxsPreIntegrateTask::runInternal() +{ + { + preIntegrationParallel(mDt, mBodyArray + mStartIndex, mOriginalBodyArray + mStartIndex, mNodeIndexArray + mStartIndex, mNumToIntegrate, + mSolverBodies + mStartIndex, mSolverBodyDataPool + mStartIndex, + mMaxSolverPositionIterations, mMaxSolverVelocityIterations, mGravity); + } +} + +void DynamicsContext::preIntegrationParallel( + const PxF32 dt, + PxsBodyCore*const* bodyArray, // INOUT: core body attributes + PxsRigidBody*const* originalBodyArray, // IN: original bodies (LEGACY - DON'T deref the ptrs!!) + PxU32 const* nodeIndexArray, // IN: island node index + PxU32 bodyCount, // IN: body count + PxSolverBody* solverBodyPool, // IN: solver body pool (space preallocated) + PxSolverBodyData* solverBodyDataPool, // IN: solver body data pool (space preallocated) + Cm::SpatialVector* /*motionVelocityArray*/, // OUT: motion velocities + PxU32& maxSolverPositionIterations, + PxU32& maxSolverVelocityIterations, + PxBaseTask& task + ) +{ + //TODO - make this based on some variables so we can try different configurations + const PxU32 IntegrationPerThread = 256; + + const PxU32 numTasks = ((bodyCount + IntegrationPerThread-1)/IntegrationPerThread); + const PxU32 taskBatchSize = 64; + + for(PxU32 i = 0; i < numTasks; i+=taskBatchSize) + { + const PxU32 nbTasks = PxMin(numTasks - i, taskBatchSize); + PxsPreIntegrateTask* tasks = reinterpret_cast<PxsPreIntegrateTask*>(getTaskPool().allocate(sizeof(PxsPreIntegrateTask)*nbTasks)); + for(PxU32 a = 0; a < nbTasks; ++a) + { + PxU32 startIndex = (i+a)*IntegrationPerThread; + PxU32 nbToIntegrate = PxMin((bodyCount-startIndex), IntegrationPerThread); + PxsPreIntegrateTask* pTask = PX_PLACEMENT_NEW(&tasks[a], PxsPreIntegrateTask)(*this, bodyArray, + originalBodyArray, nodeIndexArray, solverBodyPool, solverBodyDataPool, dt, bodyCount, + &maxSolverPositionIterations, &maxSolverVelocityIterations, startIndex, + nbToIntegrate, mGravity); + + pTask->setContinuation(&task); + pTask->removeReference(); + } + } + + PxMemZero(solverBodyPool, bodyCount * sizeof(PxSolverBody)); +} + +inline void WaitBodyRequiredState(volatile PxU32* state, PxU32 requiredState) +{ + while(requiredState != *state ); +} + +void solveParallel(SOLVER_PARALLEL_METHOD_ARGS) +{ + context.solveParallel(params, islandSim); +} + + +void DynamicsContext::solveParallel(SolverIslandParams& params, IG::IslandSim& islandSim) +{ + PxI32 targetCount = mSolverCore[mFrictionType]->solveVParallelAndWriteBack(params); + + PxI32* solveCount = ¶ms.constraintIndex2; + + //PxI32 targetCount = (PxI32)(params.numConstraintHeaders * (params.velocityIterations + params.positionIterations)); + + WAIT_FOR_PROGRESS_NO_TIMER(solveCount, targetCount); + + integrateCoreParallel(params, islandSim); +} + +void DynamicsContext::integrateCoreParallel(SolverIslandParams& params, IG::IslandSim& islandSim) +{ + const PxI32 unrollCount = 128; + + PxI32* bodyIntegrationListIndex = ¶ms.bodyIntegrationListIndex; + + PxI32 index = physx::shdfnd::atomicAdd(bodyIntegrationListIndex, unrollCount) - unrollCount; + + const PxI32 numBodies = PxI32(params.bodyListSize); + const PxI32 numArtics = PxI32(params.articulationListSize); + + Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray; + PxsBodyCore*const* bodyArray = params.bodyArray; + PxsRigidBody** PX_RESTRICT rigidBodies = params.rigidBodies; + ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart; + + + PxI32 numIntegrated = 0; + + PxI32 bodyRemainder = unrollCount; + + while(index < numArtics) + { + const PxI32 remainder = PxMin(numArtics - index, unrollCount); + bodyRemainder -= remainder; + + for(PxI32 a = 0; a < remainder; ++a, index++) + { + const PxI32 i = index; + { + PX_PROFILE_ZONE("Articulations.integrate", mContextID); + + ArticulationPImpl::updateBodies(articulationListStart[i], mDt); + } + + ++numIntegrated; + } + if(bodyRemainder == 0) + { + index = physx::shdfnd::atomicAdd(bodyIntegrationListIndex, unrollCount) - unrollCount; + bodyRemainder = unrollCount; + } + } + + index -= numArtics; + + const PxI32 unrollPlusArtics = unrollCount + numArtics; + + PxSolverBody* PX_RESTRICT solverBodies = params.bodyListStart; + PxSolverBodyData* PX_RESTRICT solverBodyData = params.bodyDataList + params.solverBodyOffset+1; + + while(index < numBodies) + { + const PxI32 remainder = PxMin(numBodies - index, bodyRemainder); + bodyRemainder -= remainder; + for(PxI32 a = 0; a < remainder; ++a, index++) + { + const PxI32 prefetch = PxMin(index+4, numBodies - 1); + Ps::prefetchLine(bodyArray[prefetch]); + Ps::prefetchLine(bodyArray[prefetch],128); + Ps::prefetchLine(&solverBodies[index],128); + Ps::prefetchLine(&motionVelocityArray[index],128); + Ps::prefetchLine(&bodyArray[index+32]); + Ps::prefetchLine(&rigidBodies[prefetch]); + + PxSolverBodyData& data = solverBodyData[index]; + + integrateCore(motionVelocityArray[index].linear, motionVelocityArray[index].angular, + solverBodies[index], data, mDt); + + PxsRigidBody& rBody = *rigidBodies[index]; + PxsBodyCore& core = rBody.getCore(); + rBody.mLastTransform = core.body2World; + core.body2World = data.body2World; + core.linearVelocity = data.linearVelocity; + core.angularVelocity = data.angularVelocity; + + bool hasStaticTouch = islandSim.getIslandStaticTouchCount(IG::NodeIndex(data.nodeIndex)) != 0; + sleepCheck(rigidBodies[index], mDt, mInvDt, mEnableStabilization, mUseAdaptiveForce, motionVelocityArray[index], hasStaticTouch); + + ++numIntegrated; + } + + { + index = physx::shdfnd::atomicAdd(bodyIntegrationListIndex, unrollCount) - unrollPlusArtics; + bodyRemainder = unrollCount; + } + } + + Ps::memoryBarrier(); + physx::shdfnd::atomicAdd(¶ms.numObjectsIntegrated, numIntegrated); +} + +class BlockAllocator : public PxConstraintAllocator +{ + PxsConstraintBlockManager& mConstraintBlockManager; + PxcConstraintBlockStream& mConstraintBlockStream; + FrictionPatchStreamPair& mFrictionPatchStreamPair; + PxU32& mTotalConstraintByteSize; +public: + + BlockAllocator(PxsConstraintBlockManager& constraintBlockManager, PxcConstraintBlockStream& constraintBlockStream, FrictionPatchStreamPair& frictionPatchStreamPair, + PxU32& totalConstraintByteSize) : + mConstraintBlockManager(constraintBlockManager), mConstraintBlockStream(constraintBlockStream), mFrictionPatchStreamPair(frictionPatchStreamPair), + mTotalConstraintByteSize(totalConstraintByteSize) + { + } + + virtual PxU8* reserveConstraintData(const PxU32 size) + { + mTotalConstraintByteSize += size; + return mConstraintBlockStream.reserve(size, mConstraintBlockManager); + } + + virtual PxU8* reserveFrictionData(const PxU32 size) + { + return mFrictionPatchStreamPair.reserve<PxU8>(size); + } + + virtual PxU8* findInputPatches(PxU8* frictionCookie) + { + return frictionCookie; + } + + PX_NOCOPY(BlockAllocator) + +}; + + + +static PxU32 createFinalizeContacts_Parallel(PxSolverBodyData* solverBodyData, ThreadContext& mThreadContext, DynamicsContext& context, + PxU32 startIndex, PxU32 endIndex, PxsContactManagerOutputIterator& outputs) +{ + const PxFrictionType::Enum frictionType = context.getFrictionType(); + const PxReal bounceThreshold = context.getBounceThreshold(); + const PxReal frictionOffsetThreshold = context.getFrictionOffsetThreshold(); + const PxReal dt = context.getDt(); + const PxReal invDt = context.getInvDt(); + + PxSolverConstraintDesc* contactDescPtr = mThreadContext.orderedContactConstraints; + + PxConstraintBatchHeader* headers = mThreadContext.contactConstraintBatchHeaders; + + PxI32 axisConstraintCount = 0; + ThreadContext* threadContext = context.getThreadContext(); + threadContext->mConstraintBlockStream.reset(); //ensure there's no left-over memory that belonged to another island + + PxTransform idt(PxIdentity); + + BlockAllocator blockAllocator(mThreadContext.mConstraintBlockManager, threadContext->mConstraintBlockStream, threadContext->mFrictionPatchStreamPair, threadContext->mConstraintSize); + + const PxReal ccdMaxSeparation = context.getCCDSeparationThreshold(); + + for(PxU32 a = startIndex; a < endIndex; ++a) + { + + PxConstraintBatchHeader& header = headers[a]; + + if(contactDescPtr[header.mStartIndex].constraintLengthOver16 == DY_SC_TYPE_RB_CONTACT) + { + SolverConstraintPrepState::Enum state = SolverConstraintPrepState::eUNBATCHABLE; + + PxSolverContactDesc blockDescs[4]; + PxsContactManagerOutput* cmOutputs[4]; + PxsContactManager* cms[4]; + for (PxU32 i = 0; i < header.mStride; ++i) + { + PxSolverConstraintDesc& desc = contactDescPtr[header.mStartIndex + i]; + PxSolverContactDesc& blockDesc = blockDescs[i]; + PxsContactManager* cm = reinterpret_cast<PxsContactManager*>(desc.constraint); + + cms[i] = cm; + + PxcNpWorkUnit& unit = cm->getWorkUnit(); + + cmOutputs[i] = &outputs.getContactManager(unit.mNpIndex); + + PxSolverBodyData& data0 = desc.linkIndexA != 0xffff ? solverBodyData[0] : solverBodyData[desc.bodyADataIndex]; + PxSolverBodyData& data1 = desc.linkIndexB != 0xffff ? solverBodyData[0] : solverBodyData[desc.bodyBDataIndex]; + + blockDesc.data0 = &data0; + blockDesc.data1 = &data1; + + PxU8 flags = unit.rigidCore0->mFlags; + if (unit.rigidCore1) + flags |= PxU8(unit.rigidCore1->mFlags); + + blockDesc.bodyFrame0 = unit.rigidCore0->body2World; + blockDesc.bodyFrame1 = unit.rigidCore1 ? unit.rigidCore1->body2World : idt; + blockDesc.shapeInteraction = cm->getShapeInteraction(); + blockDesc.contactForces = cmOutputs[i]->contactForces; + blockDesc.desc = &desc; + blockDesc.body0 = desc.bodyA; + blockDesc.body1 = desc.bodyB; + blockDesc.hasForceThresholds = !!(unit.flags & PxcNpWorkUnitFlag::eFORCE_THRESHOLD); + blockDesc.disableStrongFriction = !!(unit.flags & PxcNpWorkUnitFlag::eDISABLE_STRONG_FRICTION); + blockDesc.bodyState0 = (unit.flags & PxcNpWorkUnitFlag::eARTICULATION_BODY0) ? PxSolverContactDesc::eARTICULATION : PxSolverContactDesc::eDYNAMIC_BODY; + blockDesc.bodyState1 = (unit.flags & PxcNpWorkUnitFlag::eARTICULATION_BODY1) ? PxSolverContactDesc::eARTICULATION : (unit.flags & PxcNpWorkUnitFlag::eHAS_KINEMATIC_ACTOR) ? PxSolverContactDesc::eKINEMATIC_BODY : + ((unit.flags & PxcNpWorkUnitFlag::eDYNAMIC_BODY1) ? PxSolverContactDesc::eDYNAMIC_BODY : PxSolverContactDesc::eSTATIC_BODY); + //blockDesc.flags = unit.flags; + + PxReal dominance0 = unit.dominance0 ? 1.f : 0.f; + PxReal dominance1 = unit.dominance1 ? 1.f : 0.f; + + blockDesc.mInvMassScales.linear0 = blockDesc.mInvMassScales.angular0 = dominance0; + blockDesc.mInvMassScales.linear1 = blockDesc.mInvMassScales.angular1 = dominance1; + blockDesc.restDistance = unit.restDistance; + blockDesc.frictionPtr = unit.frictionDataPtr; + blockDesc.frictionCount = unit.frictionPatchCount; + blockDesc.maxCCDSeparation = (flags & PxRigidBodyFlag::eENABLE_SPECULATIVE_CCD) ? ccdMaxSeparation : PX_MAX_F32; + + } + + if(header.mStride == 4) + { + //KS - todo - plumb in axisConstraintCount into this method to keep track of the number of axes + state = createFinalizeMethods4[frictionType](cmOutputs, *threadContext, + blockDescs, + invDt, + bounceThreshold, + frictionOffsetThreshold, + context.getCorrelationDistance(), + blockAllocator); + + } + if(SolverConstraintPrepState::eSUCCESS != state) + { + for(PxU32 i = 0; i < header.mStride; ++i) + { + PxSolverConstraintDesc& desc = contactDescPtr[header.mStartIndex+i]; + PxsContactManager* cm = reinterpret_cast<PxsContactManager*>(desc.constraint); + PxcNpWorkUnit& n = cm->getWorkUnit(); + + PxsContactManagerOutput& output = outputs.getContactManager(n.mNpIndex); + + createFinalizeMethods[frictionType](blockDescs[i], output, *threadContext, + invDt, bounceThreshold, frictionOffsetThreshold, context.getCorrelationDistance(), blockAllocator); + + getContactManagerConstraintDesc(output,*cm,desc); + } + } + + for (PxU32 i = 0; i < header.mStride; ++i) + { + PxsContactManager* cm = cms[i]; + + PxcNpWorkUnit& unit = cm->getWorkUnit(); + unit.frictionDataPtr = blockDescs[i].frictionPtr; + unit.frictionPatchCount = blockDescs[i].frictionCount; + axisConstraintCount += blockDescs[i].axisConstraintCount; + + } + } + else if(contactDescPtr[header.mStartIndex].constraintLengthOver16 == DY_SC_TYPE_RB_1D) + { + + SolverConstraintShaderPrepDesc shaderDescs[4]; + PxSolverConstraintPrepDesc descs[4]; + + PxTransform id(PxIdentity); + + for (PxU32 i = 0; i < header.mStride; ++i) + { + PxSolverConstraintDesc& desc = contactDescPtr[header.mStartIndex + i]; + const Constraint* constraint = reinterpret_cast<const Constraint*>(desc.constraint); + + SolverConstraintShaderPrepDesc& shaderPrepDesc = shaderDescs[i]; + PxSolverConstraintPrepDesc& prepDesc = descs[i]; + + const PxConstraintSolverPrep solverPrep = constraint->solverPrep; + const void* constantBlock = constraint->constantBlock; + const PxU32 constantBlockByteSize = constraint->constantBlockSize; + const PxTransform& pose0 = (constraint->body0 ? constraint->body0->getPose() : id); + const PxTransform& pose1 = (constraint->body1 ? constraint->body1->getPose() : id); + const PxSolverBody* sbody0 = desc.bodyA; + const PxSolverBody* sbody1 = desc.bodyB; + PxSolverBodyData* sbodyData0 = &solverBodyData[desc.linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc.bodyADataIndex]; + PxSolverBodyData* sbodyData1 = &solverBodyData[desc.linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc.bodyBDataIndex]; + + shaderPrepDesc.constantBlock = constantBlock; + shaderPrepDesc.constantBlockByteSize = constantBlockByteSize; + shaderPrepDesc.constraint = constraint; + shaderPrepDesc.solverPrep = solverPrep; + + prepDesc.desc = &desc; + prepDesc.bodyFrame0 = pose0; + prepDesc.bodyFrame1 = pose1; + prepDesc.data0 = sbodyData0; + prepDesc.data1 = sbodyData1; + prepDesc.body0 = sbody0; + prepDesc.body1 = sbody1; + prepDesc.linBreakForce = constraint->linBreakForce; + prepDesc.angBreakForce = constraint->angBreakForce; + prepDesc.writeback = &context.getConstraintWriteBackPool()[constraint->index]; + prepDesc.disablePreprocessing = !!(constraint->flags & PxConstraintFlag::eDISABLE_PREPROCESSING); + prepDesc.improvedSlerp = !!(constraint->flags & PxConstraintFlag::eIMPROVED_SLERP); + prepDesc.driveLimitsAreForces = !!(constraint->flags & PxConstraintFlag::eDRIVE_LIMITS_ARE_FORCES); + prepDesc.minResponseThreshold = constraint->minResponseThreshold; + } + +#if DY_BATCH_1D + SolverConstraintPrepState::Enum state = SolverConstraintPrepState::eUNBATCHABLE; + if(header.mStride == 4) + { + PxU32 totalRows; + state = setupSolverConstraint4 + (shaderDescs, descs, dt, invDt, totalRows, + blockAllocator); + + axisConstraintCount += totalRows; + } + if(state != SolverConstraintPrepState::eSUCCESS) +#endif + { + for(PxU32 i = 0; i < header.mStride; ++i) + { + axisConstraintCount += SetupSolverConstraint(shaderDescs[i], descs[i], blockAllocator, dt, invDt); + } + } + } + } + + threadContext->getSimStats().numAxisSolverConstraints += axisConstraintCount; + + context.putThreadContext(threadContext); + return PxU32(axisConstraintCount); //Can't write to mThreadContext as it's shared!!!! +} + +class PxsCreateFinalizeContactsTask : public Cm::Task +{ + PxsCreateFinalizeContactsTask& operator=(const PxsCreateFinalizeContactsTask&); +public: + PxsCreateFinalizeContactsTask( const PxU32 numConstraints, PxSolverConstraintDesc* descArray, PxSolverBodyData* solverBodyData, + ThreadContext& threadContext, DynamicsContext& context, PxU32 startIndex, PxU32 endIndex, PxsContactManagerOutputIterator& outputs) : + mNumConstraints(numConstraints), mDescArray(descArray), mSolverBodyData(solverBodyData), + mThreadContext(threadContext), mDynamicsContext(context), + mOutputs(outputs), + mStartIndex(startIndex), mEndIndex(endIndex) + {} + + virtual void runInternal() + { + createFinalizeContacts_Parallel(mSolverBodyData, mThreadContext, mDynamicsContext, mStartIndex, mEndIndex, mOutputs); + } + + virtual const char* getName() const + { + return "PxsDynamics.createFinalizeContacts"; + } + +public: + const PxU32 mNumConstraints; + PxSolverConstraintDesc* mDescArray; + PxSolverBodyData* mSolverBodyData; + ThreadContext& mThreadContext; + DynamicsContext& mDynamicsContext; + PxsContactManagerOutputIterator& mOutputs; + PxU32 mStartIndex; + PxU32 mEndIndex; +}; + +void PxsSolverCreateFinalizeConstraintsTask::runInternal() +{ + ThreadContext& mThreadContext = *mIslandContext.mThreadContext; + + + + PxU32 descCount = mThreadContext.mNumDifferentBodyConstraints; + PxU32 selfConstraintDescCount = mThreadContext.contactDescArraySize - mThreadContext.mNumDifferentBodyConstraints; + + Ps::Array<PxU32>& accumulatedConstraintsPerPartition = mThreadContext.mConstraintsPerPartition; + + PxU32 numHeaders = 0; + PxU32 currentPartition = 0; + PxU32 maxJ = descCount == 0 ? 0 : accumulatedConstraintsPerPartition[0]; + + const PxU32 maxBatchPartition = 0xFFFFFFFF; + + const PxU32 maxBatchSize = mEnhancedDeterminism ? 1u : 4u; + + PxU32 headersPerPartition = 0; + for(PxU32 a = 0; a < descCount;) + { + + + PxU32 loopMax = PxMin(maxJ - a, maxBatchSize); + PxU16 j = 0; + if(loopMax > 0) + { + PxConstraintBatchHeader& header = mThreadContext.contactConstraintBatchHeaders[numHeaders++]; + + j=1; + PxSolverConstraintDesc& desc = mThreadContext.orderedContactConstraints[a]; + if(!isArticulationConstraint(desc) && (desc.constraintLengthOver16 == DY_SC_TYPE_RB_CONTACT || + desc.constraintLengthOver16 == DY_SC_TYPE_RB_1D) && currentPartition < maxBatchPartition) + { + for(; j < loopMax && desc.constraintLengthOver16 == mThreadContext.orderedContactConstraints[a+j].constraintLengthOver16 && + !isArticulationConstraint(mThreadContext.orderedContactConstraints[a+j]); ++j); + } + header.mStartIndex = a; + header.mStride = j; + headersPerPartition++; + } + if(maxJ == (a + j) && maxJ != descCount) + { + //Go to next partition! + accumulatedConstraintsPerPartition[currentPartition] = headersPerPartition; + headersPerPartition = 0; + currentPartition++; + maxJ = accumulatedConstraintsPerPartition[currentPartition]; + } + a+= j; + } + if(descCount) + accumulatedConstraintsPerPartition[currentPartition] = headersPerPartition; + + + + accumulatedConstraintsPerPartition.forceSize_Unsafe(mThreadContext.mMaxPartitions); + + PxU32 numDifferentBodyBatchHeaders = numHeaders; + + for(PxU32 a = 0; a < selfConstraintDescCount; ++a) + { + PxConstraintBatchHeader& header = mThreadContext.contactConstraintBatchHeaders[numHeaders++]; + header.mStartIndex = a + descCount; + header.mStride = 1; + } + + PxU32 numSelfConstraintBatchHeaders = numHeaders - numDifferentBodyBatchHeaders; + + mThreadContext.numDifferentBodyBatchHeaders = numDifferentBodyBatchHeaders; + mThreadContext.numSelfConstraintBatchHeaders = numSelfConstraintBatchHeaders; + mThreadContext.numContactConstraintBatches = numHeaders; + + PX_UNUSED(descCount); + + { + PxSolverConstraintDesc* descBegin = mThreadContext.orderedContactConstraints; + + const PxU32 numThreads = getTaskManager()->getCpuDispatcher()->getWorkerCount(); + + //Choose an appropriate number of constraint prep tasks. This must be proportionate to the number of constraints to prep and the number + //of worker threads available. + const PxU32 TaskBlockSize = 16; + const PxU32 TaskBlockLargeSize = 64; + const PxU32 BlockAllocationSize = 64; + + PxU32 numTasks = (numHeaders+TaskBlockLargeSize-1)/TaskBlockLargeSize; + + if(numTasks) + { + + if(numTasks < numThreads) + numTasks = PxMax(1u, (numHeaders+TaskBlockSize-1)/TaskBlockSize); + + const PxU32 constraintsPerTask = (numHeaders + numTasks-1)/numTasks; + + for(PxU32 i = 0; i < numTasks; i+=BlockAllocationSize) + { + PxU32 blockSize = PxMin(numTasks - i, BlockAllocationSize); + + PxsCreateFinalizeContactsTask* tasks = reinterpret_cast<PxsCreateFinalizeContactsTask*>(mContext.getTaskPool().allocate(sizeof(PxsCreateFinalizeContactsTask)*blockSize)); + + for(PxU32 a = 0; a < blockSize; ++a) + { + PxU32 startIndex = (a + i) * constraintsPerTask; + PxU32 endIndex = PxMin(startIndex + constraintsPerTask, numHeaders); + PxsCreateFinalizeContactsTask* pTask = PX_PLACEMENT_NEW(&tasks[a], PxsCreateFinalizeContactsTask( descCount, descBegin, mContext.mSolverBodyDataPool.begin(), mThreadContext, mContext, startIndex, endIndex, mOutputs)); + + pTask->setContinuation(mCont); + pTask->removeReference(); + } + } + } + } +} + +} +} + + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.h new file mode 100644 index 00000000..9fb1d94d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyDynamics.h @@ -0,0 +1,483 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_DYNAMICS_H +#define DY_DYNAMICS_H + +#include "PxvConfig.h" +#include "CmSpatialVector.h" +#include "CmTask.h" +#include "CmPool.h" +#include "PxcThreadCoherentCache.h" +#include "DyThreadContext.h" +#include "PxcConstraintBlockStream.h" +#include "DySolverBody.h" +#include "DyContext.h" +#include "PxsIslandManagerTypes.h" +#include "PxvNphaseImplementationContext.h" +#include "solver/PxSolverDefs.h" + +namespace physx +{ + +namespace Cm +{ + class FlushPool; +} + +namespace IG +{ + class SimpleIslandManager; + struct Edge; +} + +class PxsRigidBody; + +class PxsStreamedThresholdTable; + +struct PxsBodyCore; +struct PxsIslandObjects; +class PxsIslandIndices; +struct PxsIndexedInteraction; +class PxsIslandManager; +struct PxsIndexedConstraint; +struct PxsIndexedContactManager; +class PxsHeapMemoryAllocator; +class PxsMemoryManager; +class PxsDefaultMemoryManager; +struct PxSolverConstraintDesc; + +namespace Cm +{ + class Bitmap; + class SpatialVector; +} + +namespace Dy +{ + class SolverCore; + struct SolverIslandParams; + struct ArticulationSolverDesc; + class Articulation; + class DynamicsContext; + + + + +#define SOLVER_PARALLEL_METHOD_ARGS \ + DynamicsContext& context, \ + SolverIslandParams& params, \ + IG::IslandSim& islandSim + +//typedef void (*PxsSolveParallelMethod)(SOLVER_PARALLEL_METHOD_ARGS); +//extern PxsSolveParallelMethod solveParallel[3]; + +void solveParallel(SOLVER_PARALLEL_METHOD_ARGS); +void solveParallelCouloumFriction(SOLVER_PARALLEL_METHOD_ARGS); + + +struct SolverIslandObjects; + +/** +\brief Solver body pool (array) that enforces 128-byte alignment for base address of array. +\note This reduces cache misses on platforms with 128-byte-size cache lines by aligning the start of the array to the beginning of a cache line. +*/ +class SolverBodyPool : public Ps::Array<PxSolverBody, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverBody> > > +{ + PX_NOCOPY(SolverBodyPool) +public: + SolverBodyPool() {} +}; + +/** +\brief Solver body data pool (array) that enforces 128-byte alignment for base address of array. +\note This reduces cache misses on platforms with 128-byte-size cache lines by aligning the start of the array to the beginning of a cache line. +*/ +class SolverBodyDataPool : public Ps::Array<PxSolverBodyData, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverBodyData> > > +{ + PX_NOCOPY(SolverBodyDataPool) +public: + SolverBodyDataPool() {} +}; + +class SolverConstraintDescPool : public Ps::Array<PxSolverConstraintDesc, Ps::AlignedAllocator<128, Ps::ReflectionAllocator<PxSolverConstraintDesc> > > +{ + PX_NOCOPY(SolverConstraintDescPool) +public: + SolverConstraintDescPool() { } +}; + +/** +\brief Encapsulates an island's context +*/ + +struct IslandContext +{ + //The thread context for this island (set in in the island start task, released in the island end task) + ThreadContext* mThreadContext; + PxsIslandIndices mCounts; +}; + + +/** +\brief Encapsules the data used by the constraint solver. +*/ + +#if PX_VC + #pragma warning(push) + #pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value. +#endif + + +class DynamicsContext : public Context +{ + PX_NOCOPY(DynamicsContext) +public: + + /** + \brief Creates a DynamicsContext associated with a PxsContext + \return A pointer to the newly-created DynamicsContext. + */ + static DynamicsContext* create( PxcNpMemBlockPool* memBlockPool, + PxcScratchAllocator& scratchAllocator, + Cm::FlushPool& taskPool, + PxvSimStats& simStats, + PxTaskManager* taskManager, + Ps::VirtualAllocatorCallback* allocator, + PxsMaterialManager* materialManager, + IG::IslandSim* accurateIslandSim, + PxU64 contextID, + const bool enableStabilization, + const bool useEnhancedDeterminism, + const bool useAdaptiveForce + ); + + /** + \brief Destroys this DynamicsContext + */ + void destroy(); + + /** + \brief Returns the static world solver body + \return The static world solver body. + */ + PX_FORCE_INLINE PxSolverBody& getWorldSolverBody() { return mWorldSolverBody; } + + PX_FORCE_INLINE Cm::FlushPool& getTaskPool() { return mTaskPool; } + + PX_FORCE_INLINE ThresholdStream& getThresholdStream() { return *mThresholdStream; } + + PX_FORCE_INLINE PxvSimStats& getSimStats() { return mSimStats; } + +#if PX_ENABLE_SIM_STATS + void addThreadStats(const ThreadContext::ThreadSimStats& stats); +#endif + + /** + \brief The entry point for the constraint solver. + \param[in] dt The simulation time-step + \param[in] continuation The continuation task for the solver + + This method is called after the island generation has completed. Its main responsibilities are: + (1) Reserving the solver body pools + (2) Initializing the static and kinematic solver bodies, which are shared resources between islands. + (3) Construct the solver task chains for each island + + Each island is solved as an independent solver task chain in parallel. + + */ + + virtual void update(IG::SimpleIslandManager& simpleIslandManager, PxBaseTask* continuation, PxBaseTask* lostTouchTask, + PxsContactManager** foundPatchManagers, PxU32 nbFoundPatchManagers, PxsContactManager** lostPatchManagers, PxU32 nbLostPatchManagers, + PxU32 maxPatchesPerCM, PxsContactManagerOutputIterator& iter, PxsContactManagerOutput* gpuOutputs, const PxReal dt, const PxVec3& gravity, const PxU32 bitMapWordCounts); + + virtual void processLostPatches(IG::SimpleIslandManager& /*simpleIslandManager*/, PxsContactManager** /*lostPatchManagers*/, PxU32 /*nbLostPatchManagers*/, PxsContactManagerOutputIterator& /*iterator*/){} + + virtual void updateBodyCore(PxBaseTask* continuation); + + virtual void setSimulationController(PxsSimulationController* simulationController ){ mSimulationController = simulationController; } + /** + \brief This method combines the results of several islands, e.g. constructing scene-level simulation statistics and merging together threshold streams for contact notification. + */ + virtual void mergeResults(); + + virtual void getDataStreamBase(void*& /*contactStreamBase*/, void*& /*patchStreamBase*/, void*& /*forceAndIndicesStreamBase*/){} + + /** + \brief Allocates and returns a thread context object. + \return A thread context. + */ + PX_FORCE_INLINE ThreadContext* getThreadContext() + { + return mThreadContextPool.get(); + } + + /** + \brief Returns a thread context to the thread context pool. + \param[in] context The thread context to return to the thread context pool. + */ + void putThreadContext(ThreadContext* context) + { + mThreadContextPool.put(context); + } + + + PX_FORCE_INLINE PxU32 getKinematicCount() const { return mKinematicCount; } + PX_FORCE_INLINE PxU64 getContextId() const { return mContextID; } + +protected: + + /** + \brief Constructor for DynamicsContext + */ + DynamicsContext(PxcNpMemBlockPool* memBlockPool, + PxcScratchAllocator& scratchAllocator, + Cm::FlushPool& taskPool, + PxvSimStats& simStats, + PxTaskManager* taskManager, + Ps::VirtualAllocatorCallback* allocator, + PxsMaterialManager* materialManager, + IG::IslandSim* accurateIslandSim, + PxU64 contextID, + const bool enableStabilization, + const bool useEnhancedDeterminism, + const bool useAdaptiveForce + ); + /** + \brief Destructor for DynamicsContext + */ + virtual ~DynamicsContext(); + + + // Solver helper-methods + /** + \brief Computes the unconstrained velocity for a given PxsRigidBody + \param[in] atom The PxsRigidBody + */ + void computeUnconstrainedVelocity(PxsRigidBody* atom) const; + + /** + \brief fills in a PxSolverConstraintDesc from an indexed interaction + \param[in,out] desc The PxSolverConstraintDesc + \param[in] constraint The PxsIndexedInteraction + */ + void setDescFromIndices(PxSolverConstraintDesc& desc, + const PxsIndexedInteraction& constraint, const PxU32 solverBodyOffset); + + + void setDescFromIndices(PxSolverConstraintDesc& desc, IG::EdgeIndex edgeIndex, + const IG::SimpleIslandManager& islandManager, PxU32* bodyRemapTable, const PxU32 solverBodyOffset); + + /** + \brief Compute the unconstrained velocity for set of bodies in parallel. This function may spawn additional tasks. + \param[in] dt The timestep + \param[in] bodyArray The array of body cores + \param[in] originalBodyArray The array of PxsRigidBody + \param[in] nodeIndexArray The array of island node index + \param[in] bodyCount The number of bodies + \param[out] solverBodyPool The pool of solver bodies. These are synced with the corresponding body in bodyArray. + \param[out] solverBodyDataPool The pool of solver body data. These are synced with the corresponding body in bodyArray + \param[out] motionVelocityArray The motion velocities for the bodies + \param[out] maxSolverPositionIterations The maximum number of position iterations requested by any body in the island + \param[out] maxSolverVelocityIterations The maximum number of velocity iterations requested by any body in the island + \param[out] integrateTask The continuation task for any tasks spawned by this function. + */ + void preIntegrationParallel( + const PxF32 dt, + PxsBodyCore*const* bodyArray, // INOUT: core body attributes + PxsRigidBody*const* originalBodyArray, // IN: original body atom names (LEGACY - DON'T deref the ptrs!!) + PxU32 const* nodeIndexArray, // IN: island node index + PxU32 bodyCount, // IN: body count + PxSolverBody* solverBodyPool, // IN: solver atom pool (space preallocated) + PxSolverBodyData* solverBodyDataPool, + Cm::SpatialVector* motionVelocityArray, // OUT: motion velocities + PxU32& maxSolverPositionIterations, + PxU32& maxSolverVelocityIterations, + PxBaseTask& integrateTask + ); + + /** + \brief Solves an island in parallel. + + \param[in] params Solver parameter structure + */ + + void solveParallel(SolverIslandParams& params, IG::IslandSim& islandSim); + + + + void integrateCoreParallel(SolverIslandParams& params, IG::IslandSim& islandSim); + + + + + /** + \brief Resets the thread contexts + */ + void resetThreadContexts(); + + /** + \brief Returns the scratch memory allocator. + \return The scratch memory allocator. + */ + PX_FORCE_INLINE PxcScratchAllocator& getScratchAllocator() { return mScratchAllocator; } + + //Data + + /** + \brief Body to represent the world static body. + */ + PX_ALIGN(16, PxSolverBody mWorldSolverBody); + /** + \brief Body data to represent the world static body. + */ + PX_ALIGN(16, PxSolverBodyData mWorldSolverBodyData); + + /** + \brief A thread context pool + */ + PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool> mThreadContextPool; + + /** + \brief Solver constraint desc array + */ + SolverConstraintDescPool mSolverConstraintDescPool; + + /** + \brief Ordered sover constraint desc array (after partitioning) + */ + SolverConstraintDescPool mOrderedSolverConstraintDescPool; + + /** + \brief A temporary array of constraint descs used for partitioning + */ + SolverConstraintDescPool mTempSolverConstraintDescPool; + + /** + \brief An array of contact constraint batch headers + */ + Ps::Array<PxConstraintBatchHeader> mContactConstraintBatchHeaders; + + /** + \brief Array of motion velocities for all bodies in the scene. + */ + Ps::Array<Cm::SpatialVector> mMotionVelocityArray; + + /** + \brief Array of body core pointers for all bodies in the scene. + */ + Ps::Array<PxsBodyCore*> mBodyCoreArray; + + /** + \brief Array of rigid body pointers for all bodies in the scene. + */ + Ps::Array<PxsRigidBody*> mRigidBodyArray; + + /** + \brief Array of articulationpointers for all articulations in the scene. + */ + Ps::Array<Articulation*> mArticulationArray; + + /** + \brief Global pool for solver bodies. Kinematic bodies are at the start, and then dynamic bodies + */ + SolverBodyPool mSolverBodyPool; + /** + \brief Global pool for solver body data. Kinematic bodies are at the start, and then dynamic bodies + */ + SolverBodyDataPool mSolverBodyDataPool; + + + ThresholdStream* mExceededForceThresholdStream[2]; //this store previous and current exceeded force thresholdStream + + Ps::Array<PxU32> mExceededForceThresholdStreamMask; + + /** + \brief Interface to the solver core. + \note We currently only support PxsSolverCoreSIMD. Other cores may be added in future releases. + */ + SolverCore* mSolverCore[PxFrictionType::eFRICTION_COUNT]; + + Ps::Array<PxU32> mSolverBodyRemapTable; //Remaps from the "active island" index to the index within a solver island + + Ps::Array<PxU32> mNodeIndexArray; //island node index + + Ps::Array<PxsIndexedContactManager> mContactList; + + /** + \brief The total number of kinematic bodies in the scene + */ + PxU32 mKinematicCount; + + /** + \brief Atomic counter for the number of threshold stream elements. + */ + PxI32 mThresholdStreamOut; + + + + PxsMaterialManager* mMaterialManager; + + PxsContactManagerOutputIterator mOutputIterator; + +private: + //private: + PxcScratchAllocator& mScratchAllocator; + Cm::FlushPool& mTaskPool; + PxTaskManager* mTaskManager; + PxU32 mCurrentIndex; // this is the index point to the current exceeded force threshold stream + + PxU64 mContextID; + + protected: + + friend class PxsSolverStartTask; + friend class PxsSolverAticulationsTask; + friend class PxsSolverSetupConstraintsTask; + friend class PxsSolverCreateFinalizeConstraintsTask; + friend class PxsSolverConstraintPartitionTask; + friend class PxsSolverSetupSolveTask; + friend class PxsSolverIntegrateTask; + friend class PxsSolverEndTask; + friend class PxsSolverConstraintPostProcessTask; + friend class PxsForceThresholdTask; + friend class SolverArticulationUpdateTask; + + friend void solveParallel(SOLVER_PARALLEL_METHOD_ARGS); +}; + +#if PX_VC + #pragma warning(pop) +#endif + +} +} + +#endif //DY_DYNAMICS_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionCorrelation.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionCorrelation.cpp new file mode 100644 index 00000000..ba7c2b1d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionCorrelation.cpp @@ -0,0 +1,276 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "PxvConfig.h" +#include "DyCorrelationBuffer.h" +#include "PxsMaterialManager.h" +#include "PsUtilities.h" + +using namespace physx; +using namespace Gu; + +namespace physx +{ + +namespace Dy +{ + +namespace +{ +PX_FORCE_INLINE void initContactPatch(CorrelationBuffer::ContactPatchData& patch, PxU16 index, PxReal restitution, PxReal staticFriction, PxReal dynamicFriction, + PxU8 flags) +{ + patch.start = index; + patch.count = 1; + patch.next = 0; + patch.flags = flags; + patch.restitution = restitution; + patch.staticFriction = staticFriction; + patch.dynamicFriction = dynamicFriction; +} + +PX_FORCE_INLINE void initFrictionPatch(FrictionPatch& p, const PxVec3& worldNormal, const PxTransform& body0Pose, const PxTransform& body1Pose, + PxReal restitution, PxReal staticFriction, PxReal dynamicFriction, PxU8 materialFlags) +{ + p.body0Normal = body0Pose.rotateInv(worldNormal); + p.body1Normal = body1Pose.rotateInv(worldNormal); + p.anchorCount = 0; + p.broken = 0; + p.staticFriction = staticFriction; + p.dynamicFriction = dynamicFriction; + p.restitution = restitution; + p.materialFlags = materialFlags; +} +} + + +bool createContactPatches(CorrelationBuffer& fb, const Gu::ContactPoint* cb, PxU32 contactCount, PxReal normalTolerance) +{ + + // PT: this rewritten version below doesn't have LHS + + PxU32 contactPatchCount = fb.contactPatchCount; + if(contactPatchCount == Gu::ContactBuffer::MAX_CONTACTS) + return false; + if(contactCount>0) + { + CorrelationBuffer::ContactPatchData* currentPatchData = fb.contactPatches + contactPatchCount; + const Gu::ContactPoint* PX_RESTRICT contacts = cb; + + PxU8 count=1; + + initContactPatch(fb.contactPatches[contactPatchCount++], Ps::to16(0), contacts[0].restitution, + contacts[0].staticFriction, contacts[0].dynamicFriction, PxU8(contacts[0].materialFlags)); + + PxU32 patchIndex = 0; + + for (PxU32 i = 1; i<contactCount; i++) + { + const Gu::ContactPoint& curContact = contacts[i]; + const Gu::ContactPoint& preContact = contacts[patchIndex]; + + if(curContact.staticFriction == preContact.staticFriction + && curContact.dynamicFriction == preContact.dynamicFriction + && curContact.restitution == preContact.restitution + && curContact.normal.dot(preContact.normal)>=normalTolerance) + { + count++; + } + else + { + if(contactPatchCount == Gu::ContactBuffer::MAX_CONTACTS) + return false; + patchIndex = i; + currentPatchData->count = count; + count = 1; + currentPatchData = fb.contactPatches + contactPatchCount; + + initContactPatch(fb.contactPatches[contactPatchCount++], Ps::to16(i), curContact.restitution, + curContact.staticFriction, curContact.dynamicFriction, PxU8(curContact.materialFlags)); + } + } + if(count!=1) + currentPatchData->count = count; + } + fb.contactPatchCount = contactPatchCount; + return true; +} + +bool correlatePatches(CorrelationBuffer& fb, + const Gu::ContactPoint* cb, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxReal normalTolerance, + PxU32 startContactPatchIndex, + PxU32 startFrictionPatchIndex) +{ + bool overflow = false; + PxU32 frictionPatchCount = fb.frictionPatchCount; + + for(PxU32 i=startContactPatchIndex;i<fb.contactPatchCount;i++) + { + CorrelationBuffer::ContactPatchData &c = fb.contactPatches[i]; + const PxVec3 patchNormal = cb[c.start].normal; + + PxU32 j=startFrictionPatchIndex; + for(;j<frictionPatchCount && ((patchNormal.dot(fb.frictionPatchWorldNormal[j]) < normalTolerance) + || fb.frictionPatches[j].restitution != c.restitution|| fb.frictionPatches[j].staticFriction != c.staticFriction || + fb.frictionPatches[j].dynamicFriction != c.dynamicFriction);j++) + ; + + if(j==frictionPatchCount) + { + overflow |= j==CorrelationBuffer::MAX_FRICTION_PATCHES; + if(overflow) + continue; + + initFrictionPatch(fb.frictionPatches[frictionPatchCount], patchNormal, bodyFrame0, bodyFrame1, c.restitution, c.staticFriction, c.dynamicFriction, c.flags); + fb.frictionPatchWorldNormal[j] = patchNormal; + fb.frictionPatchContactCounts[frictionPatchCount] = c.count; + fb.contactID[frictionPatchCount][0] = 0xffff; + fb.contactID[frictionPatchCount++][1] = 0xffff; + c.next = CorrelationBuffer::LIST_END; + } + else + { + fb.frictionPatchContactCounts[j] += c.count; + c.next = Ps::to16(fb.correlationListHeads[j]); + } + + fb.correlationListHeads[j] = i; + } + + fb.frictionPatchCount = frictionPatchCount; + + return overflow; +} + +// run over the friction patches, trying to find two anchors per patch. If we already have +// anchors that are close, we keep them, which gives us persistent spring behavior + +void growPatches(CorrelationBuffer& fb, + const ContactPoint* cb, + const PxTransform& bodyFrame0, + const PxTransform& bodyFrame1, + PxReal , //unused correlationDistance + PxU32 frictionPatchStartIndex, + PxReal frictionOffsetThreshold) +{ + for(PxU32 i=frictionPatchStartIndex;i<fb.frictionPatchCount;i++) + { + FrictionPatch& fp = fb.frictionPatches[i]; + + if(fp.anchorCount==2 || fb.correlationListHeads[i]==CorrelationBuffer::LIST_END) + continue; + + PxVec3 worldAnchors[2]; + PxU16 anchorCount = 0; + PxReal pointDistSq = 0.0f, dist0, dist1; + + // if we have an anchor already, keep it + if(fp.anchorCount == 1) + { + worldAnchors[anchorCount++] = bodyFrame0.transform(fp.body0Anchors[0]); + } + + for(PxU32 patch = fb.correlationListHeads[i]; + patch!=CorrelationBuffer::LIST_END; + patch = fb.contactPatches[patch].next) + { + CorrelationBuffer::ContactPatchData& cp = fb.contactPatches[patch]; + for(PxU16 j=0;j<cp.count;j++) + { + const PxVec3& worldPoint = cb[cp.start+j].point; + + if(cb[cp.start+j].separation < frictionOffsetThreshold) + { + + switch(anchorCount) + { + case 0: + fb.contactID[i][0] = PxU16(cp.start+j); + worldAnchors[0] = worldPoint; + anchorCount++; + break; + case 1: + pointDistSq = (worldPoint-worldAnchors[0]).magnitudeSquared(); + if (pointDistSq > (0.025f * 0.025f)) + { + fb.contactID[i][1] = PxU16(cp.start+j); + worldAnchors[1] = worldPoint; + anchorCount++; + } + break; + default: //case 2 + dist0 = (worldPoint-worldAnchors[0]).magnitudeSquared(); + dist1 = (worldPoint-worldAnchors[1]).magnitudeSquared(); + if (dist0 > dist1) + { + if(dist0 > pointDistSq) + { + fb.contactID[i][1] = PxU16(cp.start+j); + worldAnchors[1] = worldPoint; + pointDistSq = dist0; + } + } + else if (dist1 > pointDistSq) + { + fb.contactID[i][0] = PxU16(cp.start+j); + worldAnchors[0] = worldPoint; + pointDistSq = dist1; + } + } + } + } + } + + //PX_ASSERT(anchorCount > 0); + + // add the new anchor(s) to the patch + for(PxU32 j = fp.anchorCount; j < anchorCount; j++) + { + fp.body0Anchors[j] = bodyFrame0.transformInv(worldAnchors[j]); + fp.body1Anchors[j] = bodyFrame1.transformInv(worldAnchors[j]); + } + + // the block contact solver always reads at least one anchor per patch for performance reasons even if there are no valid patches, + // so we need to initialize this in the unexpected case that we have no anchors + + if(anchorCount==0) + fp.body0Anchors[0] = fp.body1Anchors[0] = PxVec3(0); + + fp.anchorCount = anchorCount; + } +} + +} + +} + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatch.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatch.h new file mode 100644 index 00000000..507e7f12 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatch.h @@ -0,0 +1,81 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef PXC_FRICTIONPATCH_H +#define PXC_FRICTIONPATCH_H + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxVec3.h" +#include "PxvConfig.h" + +namespace physx +{ + +namespace Dy +{ + +struct FrictionPatch +{ + PxU8 broken; // PT: must be first byte of struct, see "frictionBrokenWritebackByte" + PxU8 materialFlags; + PxU16 anchorCount; + PxReal restitution; + PxReal staticFriction; + PxReal dynamicFriction; + PxVec3 body0Normal; + PxVec3 body1Normal; + PxVec3 body0Anchors[2]; + PxVec3 body1Anchors[2]; + + PX_FORCE_INLINE void operator = (const FrictionPatch& other) + { + broken = other.broken; + materialFlags = other.materialFlags; + anchorCount = other.anchorCount; + body0Normal = other.body0Normal; + body1Normal = other.body1Normal; + body0Anchors[0] = other.body0Anchors[0]; + body0Anchors[1] = other.body0Anchors[1]; + body1Anchors[0] = other.body1Anchors[0]; + body1Anchors[1] = other.body1Anchors[1]; + restitution = other.restitution; + staticFriction = other.staticFriction; + dynamicFriction = other.dynamicFriction; + } +}; + +//PX_COMPILE_TIME_ASSERT(sizeof(FrictionPatch)==80); + +} + +} + +#endif diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatchStreamPair.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatchStreamPair.h new file mode 100644 index 00000000..8219918f --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyFrictionPatchStreamPair.h @@ -0,0 +1,128 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef PXC_FRICTIONPATCHPOOL_H +#define PXC_FRICTIONPATCHPOOL_H + +#include "foundation/PxSimpleTypes.h" +#include "PxvConfig.h" +#include "PsMutex.h" +#include "PsArray.h" + +// Each narrow phase thread has an input stream of friction patches from the +// previous frame and an output stream of friction patches which will be +// saved for next frame. The patches persist for exactly one frame at which +// point they get thrown away. + + +// There is a stream pair per thread. A contact callback reserves space +// for its friction patches and gets a cookie in return that can stash +// for next frame. Cookies are valid for one frame only. +// +// note that all friction patches reserved are guaranteed to be contiguous; +// this might turn out to be a bit inefficient if we often have a large +// number of friction patches + +#include "PxcNpMemBlockPool.h" + +namespace physx +{ + +class FrictionPatchStreamPair +{ +public: + FrictionPatchStreamPair(PxcNpMemBlockPool& blockPool); + + // reserve can fail and return null. Read should never fail + template<class FrictionPatch> + FrictionPatch* reserve(const PxU32 size); + + template<class FrictionPatch> + const FrictionPatch* findInputPatches(const PxU8* ptr) const; + void reset(); + + PxcNpMemBlockPool& getBlockPool() { return mBlockPool;} +private: + PxcNpMemBlockPool& mBlockPool; + PxcNpMemBlock* mBlock; + PxU32 mUsed; + + FrictionPatchStreamPair& operator=(const FrictionPatchStreamPair&); +}; + +PX_FORCE_INLINE FrictionPatchStreamPair::FrictionPatchStreamPair(PxcNpMemBlockPool& blockPool): + mBlockPool(blockPool), mBlock(NULL), mUsed(0) +{ +} + +PX_FORCE_INLINE void FrictionPatchStreamPair::reset() +{ + mBlock = NULL; + mUsed = 0; +} + +// reserve can fail and return null. Read should never fail +template <class FrictionPatch> +FrictionPatch* FrictionPatchStreamPair::reserve(const PxU32 size) +{ + if(size>PxcNpMemBlock::SIZE) + { + return reinterpret_cast<FrictionPatch*>(-1); + } + + PX_ASSERT(size <= PxcNpMemBlock::SIZE); + + FrictionPatch* ptr = NULL; + + if(mBlock == NULL || mUsed + size > PxcNpMemBlock::SIZE) + { + mBlock = mBlockPool.acquireFrictionBlock(); + mUsed = 0; + } + + if(mBlock) + { + ptr = reinterpret_cast<FrictionPatch*>(mBlock->data+mUsed); + mUsed += size; + } + + return ptr; +} + +template <class FrictionPatch> +const FrictionPatch* FrictionPatchStreamPair::findInputPatches(const PxU8* ptr) const +{ + return reinterpret_cast<const FrictionPatch*>(ptr); +} + +} + +#endif diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyRigidBodyToSolverBody.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyRigidBodyToSolverBody.cpp new file mode 100644 index 00000000..c6c66e8a --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyRigidBodyToSolverBody.cpp @@ -0,0 +1,107 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "CmUtils.h" +#include "DySolverBody.h" +#include "PxsRigidBody.h" +#include "PxvDynamics.h" + +namespace physx +{ + +namespace Dy +{ +//This method returns values of 0 when the inertia is 0. This is a bit of a hack but allows us to +//represent kinematic objects' velocities in our new format +PX_FORCE_INLINE PxVec3 computeSafeSqrtInertia(const PxVec3& v) +{ + return PxVec3(v.x == 0.f ? 0.f : PxSqrt(v.x), v.y == 0.f ? 0.f : PxSqrt(v.y), v.z == 0.f ? 0.f : PxSqrt(v.z)); +} + +void copyToSolverBodyData(const PxVec3& linearVelocity, const PxVec3& angularVelocity, const PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose, + const PxReal maxDepenetrationVelocity, const PxReal maxContactImpulse, const PxU32 nodeIndex, const PxReal reportThreshold, PxSolverBodyData& data, PxU32 lockFlags) +{ + data.nodeIndex = nodeIndex; + + PxVec3 safeSqrtInvInertia = computeSafeSqrtInertia(invInertia); + + PxMat33 rotation(globalPose.q); + + Cm::transformInertiaTensor(safeSqrtInvInertia, rotation, data.sqrtInvInertia); + + // Copy simple properties + data.linearVelocity = linearVelocity; + data.angularVelocity = angularVelocity; + + if (lockFlags) + { + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_X) + data.linearVelocity.x = 0.f; + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Y) + data.linearVelocity.y = 0.f; + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_LINEAR_Z) + data.linearVelocity.z = 0.f; + + //KS - technically, we can zero the inertia columns and produce stiffer constraints. However, this can cause numerical issues with the + //joint solver, which is fixed by disabling joint preprocessing and setting minResponseThreshold to some reasonable value > 0. However, until + //this is handled automatically, it's probably better not to zero these inertia rows + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_X) + { + data.angularVelocity.x = 0.f; + //data.sqrtInvInertia.column0 = PxVec3(0.f); + } + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Y) + { + data.angularVelocity.y = 0.f; + //data.sqrtInvInertia.column1 = PxVec3(0.f); + } + if (lockFlags & PxRigidDynamicLockFlag::eLOCK_ANGULAR_Z) + { + data.angularVelocity.z = 0.f; + //data.sqrtInvInertia.column2 = PxVec3(0.f); + } + } + + + PX_ASSERT(linearVelocity.isFinite()); + PX_ASSERT(angularVelocity.isFinite()); + + data.invMass = invMass; + data.penBiasClamp = maxDepenetrationVelocity; + data.maxContactImpulse = maxContactImpulse; + data.body2World = globalPose; + data.lockFlags = lockFlags; + + data.reportThreshold = reportThreshold; +} + +} + +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverBody.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverBody.h new file mode 100644 index 00000000..566f1ca1 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverBody.h @@ -0,0 +1,60 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVERATOM_H +#define DY_SOLVERATOM_H + +#include "foundation/PxVec3.h" +#include "foundation/PxTransform.h" +#include "foundation/PxMat33.h" +#include "CmPhysXCommon.h" +#include "CmSpatialVector.h" +#include "solver/PxSolverDefs.h" + +namespace physx +{ + +class PxsRigidBody; +struct PxsBodyCore; + +namespace Dy +{ + +//void copyToSolverBodyData(PxSolverBodyData& data, const PxsBodyCore& core, const PxU32 nodeIndex); + + +void copyToSolverBodyData(const PxVec3& linearVelocity, const PxVec3& angularVelocity, const PxReal invMass, const PxVec3& invInertia, const PxTransform& globalPose, + const PxReal maxDepenetrationVelocity, const PxReal maxContactImpulse, const PxU32 nodeIndex, const PxReal reportThreshold, PxSolverBodyData& solverBodyData, PxU32 lockFlags); + +} + +} + +#endif //DY_SOLVERATOM_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D.h new file mode 100644 index 00000000..4291530b --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D.h @@ -0,0 +1,203 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVER_CONSTRAINT_1D_H +#define DY_SOLVER_CONSTRAINT_1D_H + +#include "foundation/PxVec3.h" +#include "PxvConfig.h" +#include "DyArticulationUtils.h" +#include "DySolverConstraintTypes.h" +#include "DySolverBody.h" +#include "PxConstraintDesc.h" +#include "DySolverConstraintDesc.h" + +namespace physx +{ + +namespace Dy +{ + +// dsequeira: we should probably fork these structures for constraints and extended constraints, +// since there's a few things that are used for one but not the other + +struct SolverConstraint1DHeader +{ + PxU8 type; // enum SolverConstraintType - must be first byte + PxU8 count; // count of following 1D constraints + PxU8 dominance; + PxU8 breakable; // indicate whether this constraint is breakable or not + + PxReal linBreakImpulse; + PxReal angBreakImpulse; + PxReal invMass0D0; + PxVec3 body0WorldOffset; + PxReal invMass1D1; + PxReal linearInvMassScale0; // only used by articulations + PxReal angularInvMassScale0; // only used by articulations + PxReal linearInvMassScale1; // only used by articulations + PxReal angularInvMassScale1; // only used by articulations +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DHeader) == 48); + +PX_ALIGN_PREFIX(16) +struct SolverConstraint1D +{ +public: + PxVec3 lin0; //!< linear velocity projection (body 0) + PxReal constant; //!< constraint constant term + + PxVec3 lin1; //!< linear velocity projection (body 1) + PxReal unbiasedConstant; //!< constraint constant term without bias + + PxVec3 ang0; //!< angular velocity projection (body 0) + PxReal velMultiplier; //!< constraint velocity multiplier + + PxVec3 ang1; //!< angular velocity projection (body 1) + PxReal impulseMultiplier; //!< constraint impulse multiplier + + PxVec3 ang0Writeback; //!< unscaled angular velocity projection (body 0) + PxU32 pad; + + PxReal minImpulse; //!< Lower bound on impulse magnitude + PxReal maxImpulse; //!< Upper bound on impulse magnitude + PxReal appliedForce; //!< applied force to correct velocity+bias + PxU32 flags; +} PX_ALIGN_SUFFIX(16); + +PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1D) == 96); + + +struct SolverConstraint1DExt : public SolverConstraint1D +{ +public: + Cm::SpatialVectorV deltaVA; + Cm::SpatialVectorV deltaVB; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DExt) == 160); + + +PX_FORCE_INLINE void init(SolverConstraint1DHeader& h, + PxU8 count, + bool isExtended, + const PxConstraintInvMassScale& ims) +{ + h.type = PxU8(isExtended ? DY_SC_TYPE_EXT_1D : DY_SC_TYPE_RB_1D); + h.count = count; + h.dominance = 0; + h.linearInvMassScale0 = ims.linear0; + h.angularInvMassScale0 = ims.angular0; + h.linearInvMassScale1 = -ims.linear1; + h.angularInvMassScale1 = -ims.angular1; +} + +PX_FORCE_INLINE void init(SolverConstraint1D& c, + const PxVec3& _linear0, const PxVec3& _linear1, + const PxVec3& _angular0, const PxVec3& _angular1, + PxReal _minImpulse, PxReal _maxImpulse) +{ + PX_ASSERT(_linear0.isFinite()); + PX_ASSERT(_linear1.isFinite()); + c.lin0 = _linear0; + c.lin1 = _linear1; + c.ang0 = _angular0; + c.ang1 = _angular1; + c.minImpulse = _minImpulse; + c.maxImpulse = _maxImpulse; + c.flags = 0; + c.appliedForce = 0; +} + +PX_FORCE_INLINE bool needsNormalVel(const Px1DConstraint &c) +{ + return c.flags & Px1DConstraintFlag::eRESTITUTION + || (c.flags & Px1DConstraintFlag::eSPRING && c.flags & Px1DConstraintFlag::eACCELERATION_SPRING); +} + +PX_FORCE_INLINE void setSolverConstants(PxReal& constant, + PxReal& unbiasedConstant, + PxReal& velMultiplier, + PxReal& impulseMultiplier, + const Px1DConstraint& c, + PxReal normalVel, + PxReal unitResponse, + PxReal minRowResponse, + PxReal erp, + PxReal dt, + PxReal recipdt) +{ + PX_ASSERT(PxIsFinite(unitResponse)); + PxReal recipResponse = unitResponse <= minRowResponse ? 0 : 1.0f/unitResponse; + PxReal geomError = c.geometricError * erp; + + if(c.flags & Px1DConstraintFlag::eSPRING) + { + PxReal a = dt * dt * c.mods.spring.stiffness + dt * c.mods.spring.damping; + PxReal b = dt * (c.mods.spring.damping * c.velocityTarget - c.mods.spring.stiffness * geomError); + + if(c.flags & Px1DConstraintFlag::eACCELERATION_SPRING) + { + PxReal x = 1.0f/(1.0f+a); + constant = unbiasedConstant = x * recipResponse * b; + velMultiplier = -x * recipResponse * a; + impulseMultiplier = 1.0f-x; + } + else + { + PxReal x = 1.0f/(1.0f+a*unitResponse); + constant = unbiasedConstant = x * b; + velMultiplier = -x*a; + impulseMultiplier = 1.0f-x; + } + } + else + { + velMultiplier = -recipResponse; + impulseMultiplier = 1.0f; + + if(c.flags & Px1DConstraintFlag::eRESTITUTION && -normalVel>c.mods.bounce.velocityThreshold) + { + unbiasedConstant = constant = recipResponse * c.mods.bounce.restitution*-normalVel; + } + else + { + // see usage of 'for internal use' in preprocessRows() + constant = recipResponse * (c.velocityTarget - geomError*recipdt); + unbiasedConstant = recipResponse * (c.velocityTarget - c.forInternalUse*recipdt); + } + } +} + +} +} + +#endif //DY_SOLVER_CONSTRAINT_1D_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D4.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D4.h new file mode 100644 index 00000000..833f7934 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraint1D4.h @@ -0,0 +1,106 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef DY_SOLVERCONSTRAINT1D4_H +#define DY_SOLVERCONSTRAINT1D4_H + +#include "foundation/PxVec3.h" +#include "PxvConfig.h" +#include "DyArticulationUtils.h" +#include "DySolverConstraint1D.h" + +namespace physx +{ + +namespace Dy +{ + +struct SolverConstraint1DHeader4 +{ + PxU8 type; // enum SolverConstraintType - must be first byte + PxU8 pad0[3]; + //These counts are the max of the 4 sets of data. + //When certain pairs have fewer constraints than others, they are padded with 0s so that no work is performed but + //calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD) + PxU32 count; + PxU8 count0, count1, count2, count3; + PxU8 break0, break1, break2, break3; + + Vec4V linBreakImpulse; + Vec4V angBreakImpulse; + Vec4V invMass0D0; + Vec4V invMass1D1; + Vec4V angD0; + Vec4V angD1; + + Vec4V body0WorkOffsetX; + Vec4V body0WorkOffsetY; + Vec4V body0WorkOffsetZ; +}; + +struct SolverConstraint1DBase4 +{ +public: + Vec4V lin0X; + Vec4V lin0Y; + Vec4V lin0Z; + Vec4V ang0X; + Vec4V ang0Y; + Vec4V ang0Z; + Vec4V ang0WritebackX; + Vec4V ang0WritebackY; + Vec4V ang0WritebackZ; + Vec4V constant; + Vec4V unbiasedConstant; + Vec4V velMultiplier; + Vec4V impulseMultiplier; + Vec4V minImpulse; + Vec4V maxImpulse; + Vec4V appliedForce; + PxU32 flags[4]; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DBase4) == 272); + +struct SolverConstraint1DDynamic4 : public SolverConstraint1DBase4 +{ + Vec4V lin1X; + Vec4V lin1Y; + Vec4V lin1Z; + Vec4V ang1X; + Vec4V ang1Y; + Vec4V ang1Z; +}; +PX_COMPILE_TIME_ASSERT(sizeof(SolverConstraint1DDynamic4) == 368); + +} + +} + +#endif //DY_SOLVERCONSTRAINT1D4_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintDesc.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintDesc.h new file mode 100644 index 00000000..e74b0374 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintDesc.h @@ -0,0 +1,141 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVERCONSTRAINTDESC_H +#define DY_SOLVERCONSTRAINTDESC_H + +#include "PxvConfig.h" +#include "DySolverConstraintTypes.h" +#include "PsUtilities.h" +#include "PxConstraintDesc.h" +#include "solver/PxSolverDefs.h" + +namespace physx +{ + +struct PxcNpWorkUnit; + +struct PxsContactManagerOutput; + +namespace Cm +{ + class SpatialVector; +} + +struct PxSolverBody; +struct PxSolverBodyData; + +namespace Dy +{ + +struct FsData; + + + + +// dsequeira: moved this articulation stuff here to sever a build dep on Articulation.h through DyThreadContext.h and onward + +struct SelfConstraintBlock +{ + PxU32 startId; + PxU32 numSelfConstraints; + PxU16 fsDataLength; + PxU16 requiredSolverProgress; + uintptr_t eaFsData; +}; + +//This class rolls together multiple contact managers into a single contact manager. +struct CompoundContactManager +{ + PxU32 mStartIndex; + PxU16 mStride; + PxU16 mReducedContactCount; + + PxcNpWorkUnit* unit; //This is a work unit but the contact buffer has been adjusted to contain all the contacts for all the subsequent pairs + PxsContactManagerOutput* cmOutput; + PxU8* originalContactPatches; //This is the original contact buffer that we replaced with a combined buffer + PxU8* originalContactPoints; + PxU8 originalContactCount; + PxU8 originalPatchCount; + PxU8 originalStatusFlags; + PxReal* originalForceBuffer; //This is the original force buffer that we replaced with a combined force buffer + PxU16* forceBufferList; //This is a list of indices from the reduced force buffer to the original force buffers - we need this to fix up the write-backs from the solver +}; + +struct SolverConstraintPrepState +{ +enum Enum +{ + eOUT_OF_MEMORY, + eUNBATCHABLE, + eSUCCESS +}; +}; + +PX_FORCE_INLINE bool isArticulationConstraint(const PxSolverConstraintDesc& desc) +{ + return desc.linkIndexA != PxSolverConstraintDesc::NO_LINK || + desc.linkIndexB != PxSolverConstraintDesc::NO_LINK; +} + + +PX_FORCE_INLINE void setConstraintLength(PxSolverConstraintDesc& desc, const PxU32 constraintLength) +{ + PX_ASSERT(0==(constraintLength & 0x0f)); + PX_ASSERT(constraintLength <= PX_MAX_U16 * 16); + desc.constraintLengthOver16 = Ps::to16(constraintLength >> 4); +} + +PX_FORCE_INLINE void setWritebackLength(PxSolverConstraintDesc& desc, const PxU32 writeBackLength) +{ + PX_ASSERT(0==(writeBackLength & 0x03)); + PX_ASSERT(writeBackLength <= PX_MAX_U16 * 4); + desc.writeBackLengthOver4 = Ps::to16(writeBackLength >> 2); +} + +PX_FORCE_INLINE PxU32 getConstraintLength(const PxSolverConstraintDesc& desc) +{ + return PxU32(desc.constraintLengthOver16 << 4); +} + +PX_FORCE_INLINE PxU32 getWritebackLength(const PxSolverConstraintDesc& desc) +{ + return PxU32(desc.writeBackLengthOver4 << 2); +} + +PX_COMPILE_TIME_ASSERT(0 == (0x0f & sizeof(PxSolverConstraintDesc))); + +#define MAX_PERMITTED_SOLVER_PROGRESS 0xFFFF + +} + +} + +#endif //DY_SOLVERCONSTRAINTDESC_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintExtShared.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintExtShared.h new file mode 100644 index 00000000..2c2f59f9 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintExtShared.h @@ -0,0 +1,116 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef DY_SOLVER_CONSTRAINT_EXT_SHARED_H +#define DY_SOLVER_CONSTRAINT_EXT_SHARED_H + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" +#include "DyArticulationContactPrep.h" +#include "DySolverConstraintDesc.h" +#include "DySolverConstraint1D.h" +#include "DySolverContact.h" +#include "DySolverContactPF.h" +#include "DyArticulationHelper.h" +#include "PxcNpWorkUnit.h" +#include "PxsMaterialManager.h" +#include "PxsMaterialCombiner.h" + +namespace physx +{ +namespace Dy +{ + PX_FORCE_INLINE void setupExtSolverContact(const SolverExtBody& b0, const SolverExtBody& b1, + const PxF32 d0, const PxF32 d1, const PxF32 angD0, const PxF32 angD1, const PxTransform& bodyFrame0, const PxTransform& bodyFrame1, + const Vec3VArg normal, const FloatVArg invDt, const FloatVArg invDtp8, const FloatVArg restDistance, const FloatVArg maxPenBias, const FloatVArg restitution, + const FloatVArg bounceThreshold, const Gu::ContactPoint& contact, SolverContactPointExt& solverContact, const FloatVArg ccdMaxSeparation) + { + const FloatV zero = FZero(); + const FloatV separation = FLoad(contact.separation); + + const FloatV penetration = FSub(separation, restDistance); + + const PxVec3 ra = contact.point - bodyFrame0.p; + const PxVec3 rb = contact.point - bodyFrame1.p; + + const PxVec3 raXn = ra.cross(contact.normal); + const PxVec3 rbXn = rb.cross(contact.normal); + + Cm::SpatialVector deltaV0, deltaV1; + + const Cm::SpatialVector resp0 = createImpulseResponseVector(contact.normal, raXn, b0); + const Cm::SpatialVector resp1 = createImpulseResponseVector(-contact.normal, -rbXn, b1); + + const FloatV unitResponse = FLoad(getImpulseResponse(b0, resp0, deltaV0, d0, angD0, + b1, resp1, deltaV1, d1, angD1)); + + const FloatV vel0 = FLoad(b0.projectVelocity(contact.normal, raXn)); + const FloatV vel1 = FLoad(b1.projectVelocity(contact.normal, rbXn)); + const FloatV vrel = FSub(vel0, vel1); + + FloatV velMultiplier = FSel(FIsEq(unitResponse, zero), zero, FRecip(unitResponse)); + FloatV scaledBias = FMul(velMultiplier, FMax(maxPenBias, FMul(penetration, invDtp8))); + const FloatV penetrationInvDt = FMul(penetration, invDt); + + const BoolV isGreater2 = BAnd(BAnd(FIsGrtr(restitution, zero), FIsGrtr(bounceThreshold, vrel)), FIsGrtr(FNeg(vrel), penetrationInvDt)); + + const BoolV ccdSeparationCondition = FIsGrtrOrEq(ccdMaxSeparation, penetration); + + scaledBias = FSel(BAnd(ccdSeparationCondition, isGreater2), zero, scaledBias); + + FloatV targetVelocity = FSel(isGreater2, FMul(FNeg(vrel), restitution), zero); + + //Get the rigid body's current velocity and embed into the constraint target velocities + if(b0.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + targetVelocity = FSub(targetVelocity, vel0); + else if(b1.mLinkIndex == PxSolverConstraintDesc::NO_LINK) + targetVelocity = FAdd(targetVelocity, vel1); + + targetVelocity = FAdd(targetVelocity, V3Dot(V3LoadA(contact.targetVel), normal)); + + const FloatV biasedErr = FScaleAdd(targetVelocity, velMultiplier, FNeg(scaledBias)); + const FloatV unbiasedErr = FScaleAdd(targetVelocity, velMultiplier, FSel(isGreater2, zero, FNeg(FMax(scaledBias, zero)))); + + + FStore(velMultiplier, &solverContact.velMultiplier); + FStore(biasedErr, &solverContact.biasedErr); + FStore(unbiasedErr, &solverContact.unbiasedErr); + solverContact.maxImpulse = contact.maxImpulse; + + solverContact.raXn = V3LoadA(resp0.angular); + solverContact.rbXn = V3Neg(V3LoadA(resp1.angular)); + solverContact.linDeltaVA = V3LoadA(deltaV0.linear); + solverContact.angDeltaVA = V3LoadA(deltaV0.angular); + solverContact.linDeltaVB = V3LoadA(deltaV1.linear); + solverContact.angDeltaVB = V3LoadA(deltaV1.angular); + } +} +} + +#endif //DY_SOLVER_CONSTRAINT_EXT_SHARED_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintTypes.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintTypes.h new file mode 100644 index 00000000..2b13c190 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintTypes.h @@ -0,0 +1,67 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVERCONSTRAINTTYPES_H +#define DY_SOLVERCONSTRAINTTYPES_H + +#include "foundation/PxSimpleTypes.h" +#include "PxvConfig.h" + +namespace physx +{ + +enum SolverConstraintType +{ + DY_SC_TYPE_NONE = 0, + DY_SC_TYPE_RB_CONTACT, // RB-only contact + DY_SC_TYPE_RB_1D, // RB-only 1D constraint + DY_SC_TYPE_EXT_CONTACT, // contact involving articulations + DY_SC_TYPE_EXT_1D, // 1D constraint involving articulations + DY_SC_TYPE_STATIC_CONTACT, // RB-only contact where body b is static + DY_SC_TYPE_NOFRICTION_RB_CONTACT, //RB-only contact with no friction patch + DY_SC_TYPE_BLOCK_RB_CONTACT, + DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT, + DY_SC_TYPE_BLOCK_1D, + DY_SC_TYPE_FRICTION, + DY_SC_TYPE_STATIC_FRICTION, + DY_SC_TYPE_EXT_FRICTION, + DY_SC_TYPE_BLOCK_FRICTION, + DY_SC_TYPE_BLOCK_STATIC_FRICTION, + DY_SC_CONSTRAINT_TYPE_COUNT //Count of the number of different constraint types in the solver +}; + +enum SolverConstraintFlags +{ + DY_SC_FLAG_OUTPUT_FORCE = (1<<1) +}; + +} + +#endif //DY_SOLVERCONSTRAINTTYPES_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp new file mode 100644 index 00000000..ea935ce9 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraints.cpp @@ -0,0 +1,1121 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" + +#ifdef PX_SUPPORT_SIMD + +#include "CmPhysXCommon.h" +#include "DySolverBody.h" +#include "DySolverContact.h" +#include "DySolverConstraint1D.h" +#include "DySolverConstraintDesc.h" +#include "DyThresholdTable.h" +#include "DySolverContext.h" +#include "PsUtilities.h" +#include "DyConstraint.h" +#include "PsAtomic.h" +#include "DySolverConstraintsShared.h" + +namespace physx +{ + +namespace Dy +{ + +//Port of scalar implementation to SIMD maths with some interleaving of instructions +void solve1D(const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + PX_UNUSED(cache); + PxSolverBody& b0 = *desc.bodyA; + PxSolverBody& b1 = *desc.bodyB; + + PxU8* PX_RESTRICT bPtr = desc.constraint; + //PxU32 length = desc.constraintLength; + + const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr); + SolverConstraint1D* PX_RESTRICT base = reinterpret_cast<SolverConstraint1D*>(bPtr + sizeof(SolverConstraint1DHeader)); + + Vec3V linVel0 = V3LoadA(b0.linearVelocity); + Vec3V linVel1 = V3LoadA(b1.linearVelocity); + Vec3V angState0 = V3LoadA(b0.angularState); + Vec3V angState1 = V3LoadA(b1.angularState); + + const FloatV invMass0 = FLoad(header->invMass0D0); + const FloatV invMass1 = FLoad(header->invMass1D1); + const FloatV invInertiaScale0 = FLoad(header->angularInvMassScale0); + const FloatV invInertiaScale1 = FLoad(header->angularInvMassScale1); + + + for(PxU32 i=0; i<header->count;++i, base++) + { + Ps::prefetchLine(base+1); + SolverConstraint1D& c = *base; + + const Vec3V clinVel0 = V3LoadA(c.lin0); + const Vec3V clinVel1 = V3LoadA(c.lin1); + const Vec3V cangVel0 = V3LoadA(c.ang0); + const Vec3V cangVel1 = V3LoadA(c.ang1); + + const FloatV constant = FLoad(c.constant); + const FloatV vMul = FLoad(c.velMultiplier); + const FloatV iMul = FLoad(c.impulseMultiplier); + const FloatV appliedForce = FLoad(c.appliedForce); + //const FloatV targetVel = FLoad(c.targetVelocity); + + const FloatV maxImpulse = FLoad(c.maxImpulse); + const FloatV minImpulse = FLoad(c.minImpulse); + + const Vec3V v0 = V3MulAdd(linVel0, clinVel0, V3Mul(angState0, cangVel0)); + const Vec3V v1 = V3MulAdd(linVel1, clinVel1, V3Mul(angState1, cangVel1)); + + const FloatV normalVel = V3SumElems(V3Sub(v0, v1)); + const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant)); + const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce))); + const FloatV deltaF = FSub(clampedForce, appliedForce); + + FStore(clampedForce, &c.appliedForce); + linVel0 = V3ScaleAdd(clinVel0, FMul(deltaF, invMass0), linVel0); + linVel1 = V3NegScaleSub(clinVel1, FMul(deltaF, invMass1), linVel1); + angState0 = V3ScaleAdd(cangVel0, FMul(deltaF, invInertiaScale0), angState0); + //This should be negScaleSub but invInertiaScale1 is negated already + angState1 = V3ScaleAdd(cangVel1, FMul(deltaF, invInertiaScale1), angState1); + + } + + V3StoreA(linVel0, b0.linearVelocity); + V3StoreA(angState0, b0.angularState); + V3StoreA(linVel1, b1.linearVelocity); + V3StoreA(angState1, b1.angularState); + + PX_ASSERT(b0.linearVelocity.isFinite()); + PX_ASSERT(b0.angularState.isFinite()); + PX_ASSERT(b1.linearVelocity.isFinite()); + PX_ASSERT(b1.angularState.isFinite()); +} + +void conclude1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint); + PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader); + PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D); + + for(PxU32 i=0; i<header->count; i++) + { + SolverConstraint1D& c = *reinterpret_cast<SolverConstraint1D*>(base); + + c.constant = c.unbiasedConstant; + + base += stride; + } + PX_ASSERT(desc.constraint + getConstraintLength(desc) == base); +} + +// ============================================================== + +void solveContact(const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + PxSolverBody& b0 = *desc.bodyA; + PxSolverBody& b1 = *desc.bodyB; + + Vec3V linVel0 = V3LoadA(b0.linearVelocity); + Vec3V linVel1 = V3LoadA(b1.linearVelocity); + Vec3V angState0 = V3LoadA(b0.angularState); + Vec3V angState1 = V3LoadA(b1.angularState); + + const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc); + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc.constraint; + + while(currPtr < last) + { + SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr); + currPtr += sizeof(SolverContactHeader); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr); + Ps::prefetchLine(contacts); + currPtr += numNormalConstr * sizeof(SolverContactPoint); + + PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr); + currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); + + SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr); + currPtr += numFrictionConstr * sizeof(SolverContactFriction); + + const FloatV invMassA = FLoad(hdr->invMass0); + const FloatV invMassB = FLoad(hdr->invMass1); + + const FloatV angDom0 = FLoad(hdr->angDom0); + const FloatV angDom1 = FLoad(hdr->angDom1); + + const Vec3V contactNormal = hdr->normal; + + const FloatV accumulatedNormalImpulse = solveDynamicContacts(contacts, numNormalConstr, contactNormal, invMassA, invMassB, + angDom0, angDom1, linVel0, angState0, linVel1, angState1, forceBuffer); + + if(cache.doFriction && numFrictionConstr) + { + const FloatV staticFrictionCof = hdr->getStaticFriction(); + const FloatV dynamicFrictionCof = hdr->getDynamicFriction(); + const FloatV maxFrictionImpulse = FMul(staticFrictionCof, accumulatedNormalImpulse); + const FloatV maxDynFrictionImpulse = FMul(dynamicFrictionCof, accumulatedNormalImpulse); + const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse); + + BoolV broken = BFFFF(); + + if(cache.writeBackIteration) + Ps::prefetchLine(hdr->frictionBrokenWritebackByte); + + for(PxU32 i=0;i<numFrictionConstr;i++) + { + SolverContactFriction& f = frictions[i]; + Ps::prefetchLine(&frictions[i],128); + + + const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW; + const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW; + const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW; + + const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW); + const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW); + const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW); + + const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW); + const FloatV bias = V4GetW(rbXnXYZ_biasW); + const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW); + + const FloatV targetVel = FLoad(f.targetVel); + + const Vec3V delLinVel0 = V3Scale(normal, invMassA); + const Vec3V delLinVel1 = V3Scale(normal, invMassB); + + const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn)); + const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angState1, rbXn)); + const FloatV normalVel = V3SumElems(V3Sub(v0, v1)); + + + + // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation + const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce); + + // Algorithm: + // if abs(appliedForce + deltaF) > maxFrictionImpulse + // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse] + // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce] + // set broken flag to true || broken flag + + // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier); + // FloatV potentialSumF = FAdd(appliedForce, deltaF); + + const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1); + + // On XBox this clamping code uses the vector simple pipe rather than vector float, + // which eliminates a lot of stall cycles + + const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse); + + const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse)); + + const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse); + + broken = BOr(broken, clamp); + + FloatV deltaF = FSub(newAppliedForce, appliedForce); + + // we could get rid of the stall here by calculating and clamping delta separately, but + // the complexity isn't really worth it. + + linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); + linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1); + angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0); + angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1); + + f.setAppliedForce(newAppliedForce); + + + } + Store_From_BoolV(broken, &hdr->broken); + } + + } + + PX_ASSERT(b0.linearVelocity.isFinite()); + PX_ASSERT(b0.angularState.isFinite()); + PX_ASSERT(b1.linearVelocity.isFinite()); + PX_ASSERT(b1.angularState.isFinite()); + + // Write back + V3StoreU(linVel0, b0.linearVelocity); + V3StoreU(linVel1, b1.linearVelocity); + V3StoreU(angState0, b0.angularState); + V3StoreU(angState1, b1.angularState); + + PX_ASSERT(b0.linearVelocity.isFinite()); + PX_ASSERT(b0.angularState.isFinite()); + PX_ASSERT(b1.linearVelocity.isFinite()); + PX_ASSERT(b1.angularState.isFinite()); + + PX_ASSERT(currPtr == last); +} + +void solveContact_BStatic(const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + PxSolverBody& b0 = *desc.bodyA; + //PxSolverBody& b1 = *desc.bodyB; + + Vec3V linVel0 = V3LoadA(b0.linearVelocity); + Vec3V angState0 = V3LoadA(b0.angularState); + + const PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc); + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc.constraint; + + while(currPtr < last) + { + SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr); + currPtr += sizeof(SolverContactHeader); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr); + //Ps::prefetchLine(contacts); + currPtr += numNormalConstr * sizeof(SolverContactPoint); + + PxF32* forceBuffer = reinterpret_cast<PxF32*>(currPtr); + currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); + + SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr); + currPtr += numFrictionConstr * sizeof(SolverContactFriction); + + + + const FloatV invMassA = FLoad(hdr->invMass0); + + const Vec3V contactNormal = hdr->normal; + const FloatV angDom0 = FLoad(hdr->angDom0); + + + const FloatV accumulatedNormalImpulse = solveStaticContacts(contacts, numNormalConstr, contactNormal, + invMassA, angDom0, linVel0, angState0, forceBuffer); + + if(cache.doFriction && numFrictionConstr) + { + const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse); + const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse); + + BoolV broken = BFFFF(); + if(cache.writeBackIteration) + Ps::prefetchLine(hdr->frictionBrokenWritebackByte); + + for(PxU32 i=0;i<numFrictionConstr;i++) + { + SolverContactFriction& f = frictions[i]; + Ps::prefetchLine(&frictions[i],128); + + + const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW; + const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW; + const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW; + + const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW); + const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW); + + const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW); + const FloatV bias = V4GetW(rbXnXYZ_biasW); + const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW); + + const FloatV targetVel = FLoad(f.targetVel); + + const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse); + + const Vec3V delLinVel0 = V3Scale(normal, invMassA); + //const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse); + + const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angState0, raXn)); + const FloatV normalVel = V3SumElems(v0); + + + // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation + const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce); + + // Algorithm: + // if abs(appliedForce + deltaF) > maxFrictionImpulse + // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse] + // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce] + // set broken flag to true || broken flag + + // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier); + // FloatV potentialSumF = FAdd(appliedForce, deltaF); + + const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1); + + // On XBox this clamping code uses the vector simple pipe rather than vector float, + // which eliminates a lot of stall cycles + + const BoolV clamp = FIsGrtr(FAbs(totalImpulse), maxFrictionImpulse); + + const FloatV totalClamped = FMin(maxDynFrictionImpulse, FMax(negMaxDynFrictionImpulse, totalImpulse)); + + broken = BOr(broken, clamp); + + const FloatV newAppliedForce = FSel(clamp, totalClamped,totalImpulse); + + FloatV deltaF = FSub(newAppliedForce, appliedForce); + + // we could get rid of the stall here by calculating and clamping delta separately, but + // the complexity isn't really worth it. + + linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); + angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0); + + f.setAppliedForce(newAppliedForce); + + } + Store_From_BoolV(broken, &hdr->broken); + } + + } + + PX_ASSERT(b0.linearVelocity.isFinite()); + PX_ASSERT(b0.angularState.isFinite()); + + // Write back + V3StoreA(linVel0, b0.linearVelocity); + V3StoreA(angState0, b0.angularState); + + PX_ASSERT(b0.linearVelocity.isFinite()); + PX_ASSERT(b0.angularState.isFinite()); + + PX_ASSERT(currPtr == last); +} + + +void concludeContact(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + PxU8* PX_RESTRICT cPtr = desc.constraint; + + const FloatV zero = FZero(); + + PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc); + while(cPtr < last) + { + const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr); + cPtr += sizeof(SolverContactHeader); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + //if(cPtr < last) + //Ps::prefetchLine(cPtr, 512); + Ps::prefetchLine(cPtr,128); + Ps::prefetchLine(cPtr,256); + Ps::prefetchLine(cPtr,384); + + const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt) + : sizeof(SolverContactPoint); + for(PxU32 i=0;i<numNormalConstr;i++) + { + SolverContactPoint *c = reinterpret_cast<SolverContactPoint*>(cPtr); + cPtr += pointStride; + //c->scaledBias = PxMin(c->scaledBias, 0.f); + c->biasedErr = c->unbiasedErr; + } + + cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); //Jump over force buffers + + const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt) + : sizeof(SolverContactFriction); + for(PxU32 i=0;i<numFrictionConstr;i++) + { + SolverContactFriction *f = reinterpret_cast<SolverContactFriction*>(cPtr); + cPtr += frictionStride; + f->setBias(zero); + } + } + PX_ASSERT(cPtr == last); +} + +void writeBackContact(const PxSolverConstraintDesc& desc, SolverContext& cache, + PxSolverBodyData& bd0, PxSolverBodyData& bd1) +{ + + PxReal normalForce = 0; + + PxU8* PX_RESTRICT cPtr = desc.constraint; + PxReal* PX_RESTRICT vForceWriteback = reinterpret_cast<PxReal*>(desc.writeBack); + PxU8* PX_RESTRICT last = desc.constraint + getConstraintLength(desc); + + bool forceThreshold = false; + + while(cPtr < last) + { + const SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactHeader*>(cPtr); + cPtr += sizeof(SolverContactHeader); + + forceThreshold = hdr->flags & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + //if(cPtr < last) + Ps::prefetchLine(cPtr, 256); + Ps::prefetchLine(cPtr, 384); + + const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt) + : sizeof(SolverContactPoint); + + cPtr += pointStride * numNormalConstr; + PxF32* forceBuffer = reinterpret_cast<PxF32*>(cPtr); + cPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); + + if(vForceWriteback!=NULL) + { + for(PxU32 i=0; i<numNormalConstr; i++) + { + PxReal appliedForce = forceBuffer[i]; + *vForceWriteback++ = appliedForce; + normalForce += appliedForce; + } + } + + const PxU32 frictionStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactFrictionExt) + : sizeof(SolverContactFriction); + + if(hdr->broken && hdr->frictionBrokenWritebackByte != NULL) + { + *hdr->frictionBrokenWritebackByte = 1; + } + + cPtr += frictionStride * numFrictionConstr; + + } + PX_ASSERT(cPtr == last); + + + + if(forceThreshold && desc.linkIndexA == PxSolverConstraintDesc::NO_LINK && desc.linkIndexB == PxSolverConstraintDesc::NO_LINK && + normalForce !=0 && (bd0.reportThreshold < PX_MAX_REAL || bd1.reportThreshold < PX_MAX_REAL)) + { + ThresholdStreamElement elt; + elt.normalForce = normalForce; + elt.threshold = PxMin<float>(bd0.reportThreshold, bd1.reportThreshold); + elt.nodeIndexA = bd0.nodeIndex; + elt.nodeIndexB = bd1.nodeIndex; + elt.shapeInteraction = reinterpret_cast<const SolverContactHeader*>(desc.constraint)->shapeInteraction; + Ps::order(elt.nodeIndexA, elt.nodeIndexB); + PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB); + PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength); + cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt; + } +} + +// adjust from CoM to joint + +void writeBack1D(const PxSolverConstraintDesc& desc, SolverContext&, PxSolverBodyData&, PxSolverBodyData&) +{ + ConstraintWriteback* writeback = reinterpret_cast<ConstraintWriteback*>(desc.writeBack); + if(writeback) + { + SolverConstraint1DHeader* header = reinterpret_cast<SolverConstraint1DHeader*>(desc.constraint); + PxU8* base = desc.constraint + sizeof(SolverConstraint1DHeader); + PxU32 stride = header->type == DY_SC_TYPE_EXT_1D ? sizeof(SolverConstraint1DExt) : sizeof(SolverConstraint1D); + + PxVec3 lin(0), ang(0); + for(PxU32 i=0; i<header->count; i++) + { + const SolverConstraint1D* c = reinterpret_cast<SolverConstraint1D*>(base); + if(c->flags & DY_SC_FLAG_OUTPUT_FORCE) + { + lin += c->lin0 * c->appliedForce; + ang += c->ang0Writeback * c->appliedForce; + } + base += stride; + } + + ang -= header->body0WorldOffset.cross(lin); + writeback->linearImpulse = lin; + writeback->angularImpulse = ang; + writeback->broken = header->breakable ? PxU32(lin.magnitude()>header->linBreakImpulse || ang.magnitude()>header->angBreakImpulse) : 0; + + PX_ASSERT(desc.constraint + getConstraintLength(desc) == base); + } +} + + +void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + solve1D(desc[a-1], cache); + } + solve1D(desc[constraintCount-1], cache); +} + +void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + solve1D(desc[a-1], cache); + conclude1D(desc[a-1], cache); + } + solve1D(desc[constraintCount-1], cache); + conclude1D(desc[constraintCount-1], cache); +} + +void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex]; + solve1D(desc[a-1], cache); + writeBack1D(desc[a-1], cache, bd0, bd1); + } + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex]; + solve1D(desc[constraintCount-1], cache); + writeBack1D(desc[constraintCount-1], cache, bd0, bd1); +} + +void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex]; + writeBack1D(desc[a-1], cache, bd0, bd1); + } + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex]; + writeBack1D(desc[constraintCount-1], cache, bd0, bd1); +} + +void solveContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + solveContact(desc[a-1], cache); + } + solveContact(desc[constraintCount-1], cache); +} + +void solveContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + solveContact(desc[a-1], cache); + concludeContact(desc[a-1], cache); + } + solveContact(desc[constraintCount-1], cache); + concludeContact(desc[constraintCount-1], cache); +} + +void solveContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex]; + solveContact(desc[a-1], cache); + writeBackContact(desc[a-1], cache, bd0, bd1); + } + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex]; + solveContact(desc[constraintCount-1], cache); + writeBackContact(desc[constraintCount-1], cache, bd0, bd1); + + if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4)) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +void solveContact_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + solveContact_BStatic(desc[a-1], cache); + } + solveContact_BStatic(desc[constraintCount-1], cache); +} + +void solveContact_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + solveContact_BStatic(desc[a-1], cache); + concludeContact(desc[a-1], cache); + } + solveContact_BStatic(desc[constraintCount-1], cache); + concludeContact(desc[constraintCount-1], cache); +} + +void solveContact_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 1; a < constraintCount; ++a) + { + Ps::prefetchLine(desc[a].constraint); + Ps::prefetchLine(desc[a].constraint, 128); + Ps::prefetchLine(desc[a].constraint, 256); + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a-1].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a-1].bodyBDataIndex]; + solveContact_BStatic(desc[a-1], cache); + writeBackContact(desc[a-1], cache, bd0, bd1); + } + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[constraintCount-1].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[constraintCount-1].bodyBDataIndex]; + solveContact_BStatic(desc[constraintCount-1], cache); + writeBackContact(desc[constraintCount-1], cache, bd0, bd1); + + if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4)) + { + //Not enough space to write 4 more thresholds back! + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +//Port of scalar implementation to SIMD maths with some interleaving of instructions +void solveExt1D(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + PxU8* PX_RESTRICT bPtr = desc.constraint; + //PxU32 length = desc.constraintLength; + + const SolverConstraint1DHeader* PX_RESTRICT header = reinterpret_cast<const SolverConstraint1DHeader*>(bPtr); + SolverConstraint1DExt* PX_RESTRICT base = reinterpret_cast<SolverConstraint1DExt*>(bPtr + sizeof(SolverConstraint1DHeader)); + + Vec3V linVel0, angVel0, linVel1, angVel1; + if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK) + { + linVel0 = V3LoadA(desc.bodyA->linearVelocity); + angVel0 = V3LoadA(desc.bodyA->angularState); + } + else + { + Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA); + linVel0 = v.linear; + angVel0 = v.angular; + } + + if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK) + { + linVel1 = V3LoadA(desc.bodyB->linearVelocity); + angVel1 = V3LoadA(desc.bodyB->angularState); + } + else + { + Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB); + linVel1 = v.linear; + angVel1 = v.angular; + } + + Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero(); + + for(PxU32 i=0; i<header->count;++i, base++) + { + Ps::prefetchLine(base+1); + + const Vec4V lin0XYZ_constantW = V4LoadA(&base->lin0.x); + const Vec4V lin1XYZ_unbiasedConstantW = V4LoadA(&base->lin1.x); + const Vec4V ang0XYZ_velMultiplierW = V4LoadA(&base->ang0.x); + const Vec4V ang1XYZ_impulseMultiplierW = V4LoadA(&base->ang1.x); + const Vec4V minImpulseX_maxImpulseY_appliedForceZ = V4LoadA(&base->minImpulse); + + const Vec3V lin0 = Vec3V_From_Vec4V(lin0XYZ_constantW); FloatV constant = V4GetW(lin0XYZ_constantW); + const Vec3V lin1 = Vec3V_From_Vec4V(lin1XYZ_unbiasedConstantW); + const Vec3V ang0 = Vec3V_From_Vec4V(ang0XYZ_velMultiplierW); FloatV vMul = V4GetW(ang0XYZ_velMultiplierW); + const Vec3V ang1 = Vec3V_From_Vec4V(ang1XYZ_impulseMultiplierW); FloatV iMul = V4GetW(ang1XYZ_impulseMultiplierW); + + const FloatV minImpulse = V4GetX(minImpulseX_maxImpulseY_appliedForceZ); + const FloatV maxImpulse = V4GetY(minImpulseX_maxImpulseY_appliedForceZ); + const FloatV appliedForce = V4GetZ(minImpulseX_maxImpulseY_appliedForceZ); + + const Vec3V v0 = V3MulAdd(linVel0, lin0, V3Mul(angVel0, ang0)); + const Vec3V v1 = V3MulAdd(linVel1, lin1, V3Mul(angVel1, ang1)); + const FloatV normalVel = V3SumElems(V3Sub(v0, v1)); + + const FloatV unclampedForce = FScaleAdd(iMul, appliedForce, FScaleAdd(vMul, normalVel, constant)); + const FloatV clampedForce = FMin(maxImpulse, (FMax(minImpulse, unclampedForce))); + const FloatV deltaF = FSub(clampedForce, appliedForce); + + FStore(clampedForce, &base->appliedForce); + li0 = V3ScaleAdd(lin0, deltaF, li0); ai0 = V3ScaleAdd(ang0, deltaF, ai0); + li1 = V3ScaleAdd(lin1, deltaF, li1); ai1 = V3ScaleAdd(ang1, deltaF, ai1); + + linVel0 = V3ScaleAdd(base->deltaVA.linear, deltaF, linVel0); angVel0 = V3ScaleAdd(base->deltaVA.angular, deltaF, angVel0); + linVel1 = V3ScaleAdd(base->deltaVB.linear, deltaF, linVel1); angVel1 = V3ScaleAdd(base->deltaVB.angular, deltaF, angVel1); + } + + if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK) + { + V3StoreA(linVel0, desc.bodyA->linearVelocity); + V3StoreA(angVel0, desc.bodyA->angularState); + } + else + PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, V3Scale(li0, FLoad(header->linearInvMassScale0)), + V3Scale(ai0, FLoad(header->angularInvMassScale0))); + + if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK) + { + V3StoreA(linVel1, desc.bodyB->linearVelocity); + V3StoreA(angVel1, desc.bodyB->angularState); + } + else + PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, V3Scale(li1, FLoad(header->linearInvMassScale1)), + V3Scale(ai1, FLoad(header->angularInvMassScale1))); +} + +void solveExtContact(const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + Vec3V linVel0, angVel0, linVel1, angVel1; + + if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK) + { + linVel0 = V3LoadA(desc.bodyA->linearVelocity); + angVel0 = V3LoadA(desc.bodyA->angularState); + } + else + { + Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA); + linVel0 = v.linear; + angVel0 = v.angular; + } + + if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK) + { + linVel1 = V3LoadA(desc.bodyB->linearVelocity); + angVel1 = V3LoadA(desc.bodyB->angularState); + } + else + { + Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB); + linVel1 = v.linear; + angVel1 = v.angular; + } + + const PxU8* PX_RESTRICT last = desc.constraint + desc.constraintLengthOver16*16; + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc.constraint; + + Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero(); + + while(currPtr < last) + { + SolverContactHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader*>(currPtr); + currPtr += sizeof(SolverContactHeader); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + SolverContactPointExt* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointExt*>(currPtr); + Ps::prefetchLine(contacts); + currPtr += numNormalConstr * sizeof(SolverContactPointExt); + + PxF32* appliedForceBuffer = reinterpret_cast<PxF32*>(currPtr); + currPtr += sizeof(PxF32) * ((numNormalConstr + 3) & (~3)); + + SolverContactFrictionExt* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionExt*>(currPtr); + currPtr += numFrictionConstr * sizeof(SolverContactFrictionExt); + + + + Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero(); + + const Vec3V contactNormal = hdr->normal; + + const FloatV accumulatedNormalImpulse = solveExtContacts(contacts, numNormalConstr, contactNormal, linVel0, angVel0, linVel1, + angVel1, li0, ai0, li1, ai1, appliedForceBuffer); + + + if(cache.doFriction && numFrictionConstr) + { + Ps::prefetchLine(frictions); + const FloatV maxFrictionImpulse = FMul(hdr->getStaticFriction(), accumulatedNormalImpulse); + const FloatV maxDynFrictionImpulse = FMul(hdr->getDynamicFriction(), accumulatedNormalImpulse); + + BoolV broken = BFFFF(); + + for(PxU32 i=0;i<numFrictionConstr;i++) + { + SolverContactFrictionExt& f = frictions[i]; + Ps::prefetchLine(&frictions[i+1]); + + const Vec4V normalXYZ_appliedForceW = f.normalXYZ_appliedForceW; + const Vec4V raXnXYZ_velMultiplierW = f.raXnXYZ_velMultiplierW; + const Vec4V rbXnXYZ_biasW = f.rbXnXYZ_biasW; + + const Vec3V normal = Vec3V_From_Vec4V(normalXYZ_appliedForceW); + /*const Vec3V normal0 = V3Scale(normal, sqrtInvMass0); + const Vec3V normal1 = V3Scale(normal, sqrtInvMass1);*/ + const Vec3V raXn = Vec3V_From_Vec4V(raXnXYZ_velMultiplierW); + const Vec3V rbXn = Vec3V_From_Vec4V(rbXnXYZ_biasW); + + const FloatV appliedForce = V4GetW(normalXYZ_appliedForceW); + const FloatV bias = V4GetW(rbXnXYZ_biasW); + const FloatV velMultiplier = V4GetW(raXnXYZ_velMultiplierW); + + const FloatV targetVel = FLoad(f.targetVel); + + const FloatV negMaxDynFrictionImpulse = FNeg(maxDynFrictionImpulse); + const FloatV negMaxFrictionImpulse = FNeg(maxFrictionImpulse); + + const Vec3V v0 = V3MulAdd(linVel0, normal, V3Mul(angVel0, raXn)); + const Vec3V v1 = V3MulAdd(linVel1, normal, V3Mul(angVel1, rbXn)); + const FloatV normalVel = V3SumElems(V3Sub(v0, v1)); + + // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation + const FloatV tmp1 = FNegScaleSub(FSub(bias, targetVel),velMultiplier,appliedForce); + + // Algorithm: + // if abs(appliedForce + deltaF) > maxFrictionImpulse + // clamp newAppliedForce + deltaF to [-maxDynFrictionImpulse, maxDynFrictionImpulse] + // (i.e. clamp deltaF to [-maxDynFrictionImpulse-appliedForce, maxDynFrictionImpulse-appliedForce] + // set broken flag to true || broken flag + + // FloatV deltaF = FMul(FAdd(bias, normalVel), minusVelMultiplier); + // FloatV potentialSumF = FAdd(appliedForce, deltaF); + + const FloatV totalImpulse = FNegScaleSub(normalVel, velMultiplier, tmp1); + + // On XBox this clamping code uses the vector simple pipe rather than vector float, + // which eliminates a lot of stall cycles + + const BoolV clampLow = FIsGrtr(negMaxFrictionImpulse, totalImpulse); + const BoolV clampHigh = FIsGrtr(totalImpulse, maxFrictionImpulse); + + const FloatV totalClampedLow = FMax(negMaxDynFrictionImpulse, totalImpulse); + const FloatV totalClampedHigh = FMin(maxDynFrictionImpulse, totalImpulse); + + const FloatV newAppliedForce = FSel(clampLow, totalClampedLow, + FSel(clampHigh, totalClampedHigh, totalImpulse)); + + broken = BOr(broken, BOr(clampLow, clampHigh)); + + FloatV deltaF = FSub(newAppliedForce, appliedForce); + + linVel0 = V3ScaleAdd(f.linDeltaVA, deltaF, linVel0); + angVel0 = V3ScaleAdd(f.angDeltaVA, deltaF, angVel0); + linVel1 = V3ScaleAdd(f.linDeltaVB, deltaF, linVel1); + angVel1 = V3ScaleAdd(f.angDeltaVB, deltaF, angVel1); + + li0 = V3ScaleAdd(normal, deltaF, li0); ai0 = V3ScaleAdd(raXn, deltaF, ai0); + li1 = V3ScaleAdd(normal, deltaF, li1); ai1 = V3ScaleAdd(rbXn, deltaF, ai1); + + f.setAppliedForce(newAppliedForce); + } + Store_From_BoolV(broken, &hdr->broken); + } + + linImpulse0 = V3ScaleAdd(li0, hdr->getDominance0(), linImpulse0); + angImpulse0 = V3ScaleAdd(ai0, FLoad(hdr->angDom0), angImpulse0); + linImpulse1 = V3NegScaleSub(li1, hdr->getDominance1(), linImpulse1); + angImpulse1 = V3NegScaleSub(ai1, FLoad(hdr->angDom1), angImpulse1); + } + + if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK) + { + V3StoreA(linVel0, desc.bodyA->linearVelocity); + V3StoreA(angVel0, desc.bodyA->angularState); + } + else + PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, linImpulse0, angImpulse0); + + if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK) + { + V3StoreA(linVel1, desc.bodyB->linearVelocity); + V3StoreA(angVel1, desc.bodyB->angularState); + } + else + PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, linImpulse1, angImpulse1); + + PX_ASSERT(currPtr == last); +} + + +void solveExtContactBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExtContact(desc[a], cache); + } +} + +void solveExtContactConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExtContact(desc[a], cache); + concludeContact(desc[a], cache); + } +} + +void solveExtContactBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex]; + + solveExtContact(desc[a], cache); + writeBackContact(desc[a], cache, bd0, bd1); + } + if(cache.mThresholdStreamIndex > 0) + { + //Not enough space to write 4 more thresholds back! + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +void solveExt1DBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExt1D(desc[a], cache); + } +} + +void solveExt1DConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExt1D(desc[a], cache); + conclude1D(desc[a], cache); + } +} + +void solveExt1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex]; + solveExt1D(desc[a], cache); + writeBack1D(desc[a], cache, bd0, bd1); + } +} + +void ext1DBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex]; + writeBack1D(desc[a], cache, bd0, bd1); + } +} + +void solveConcludeExtContact (const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + solveExtContact(desc, cache); + concludeContact(desc, cache); +} + +void solveConcludeExt1D (const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + solveExt1D(desc, cache); + conclude1D(desc, cache); +} + + +void solveConclude1D(const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + solve1D(desc, cache); + conclude1D(desc, cache); +} + +void solveConcludeContact (const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + solveContact(desc, cache); + concludeContact(desc, cache); +} + +void solveConcludeContact_BStatic (const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + solveContact_BStatic(desc, cache); + concludeContact(desc, cache); +} + + +} + +} + +#endif diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsBlock.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsBlock.cpp new file mode 100644 index 00000000..aa06dfcf --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsBlock.cpp @@ -0,0 +1,1230 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" +#include "PsFPU.h" + +#ifdef PX_SUPPORT_SIMD + +#include "CmPhysXCommon.h" +#include "DySolverBody.h" +#include "DySolverContact.h" +#include "DySolverConstraint1D.h" +#include "DySolverConstraintDesc.h" +#include "DyThresholdTable.h" +#include "DySolverContext.h" +#include "PsUtilities.h" +#include "DyConstraint.h" +#include "PsAtomic.h" +#include "DySolverContact4.h" +#include "DySolverConstraint1D4.h" + +namespace physx +{ + +namespace Dy +{ + +static void solveContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& cache) +{ + PxSolverBody& b00 = *desc[0].bodyA; + PxSolverBody& b01 = *desc[0].bodyB; + PxSolverBody& b10 = *desc[1].bodyA; + PxSolverBody& b11 = *desc[1].bodyB; + PxSolverBody& b20 = *desc[2].bodyA; + PxSolverBody& b21 = *desc[2].bodyB; + PxSolverBody& b30 = *desc[3].bodyA; + PxSolverBody& b31 = *desc[3].bodyB; + + //We'll need this. + const Vec4V vZero = V4Zero(); + + Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x); + Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x); + Vec4V angState00 = V4LoadA(&b00.angularState.x); + Vec4V angState01 = V4LoadA(&b01.angularState.x); + + Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x); + Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x); + Vec4V angState10 = V4LoadA(&b10.angularState.x); + Vec4V angState11 = V4LoadA(&b11.angularState.x); + + Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x); + Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x); + Vec4V angState20 = V4LoadA(&b20.angularState.x); + Vec4V angState21 = V4LoadA(&b21.angularState.x); + + Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x); + Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x); + Vec4V angState30 = V4LoadA(&b30.angularState.x); + Vec4V angState31 = V4LoadA(&b31.angularState.x); + + + Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3; + Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3; + Vec4V angState0T0, angState0T1, angState0T2, angState0T3; + Vec4V angState1T0, angState1T1, angState1T2, angState1T3; + + + PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3); + PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3); + PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3); + PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3); + + + const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]); + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc[0].constraint; + + Vec4V vMax = V4Splat(FMax()); + + const PxU8* PX_RESTRICT prefetchAddress = currPtr + sizeof(SolverContactHeader4) + sizeof(SolverContactBatchPointDynamic4); + + const SolverContactHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader4*>(currPtr); + + const Vec4V invMassA = hdr->invMass0D0; + const Vec4V invMassB = hdr->invMass1D1; + + const Vec4V sumInvMass = V4Add(invMassA, invMassB); + + + while(currPtr < last) + { + + hdr = reinterpret_cast<const SolverContactHeader4*>(currPtr); + + PX_ASSERT(hdr->type == DY_SC_TYPE_BLOCK_RB_CONTACT); + + currPtr = reinterpret_cast<PxU8*>(const_cast<SolverContactHeader4*>(hdr) + 1); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0; + + Vec4V* appliedForces = reinterpret_cast<Vec4V*>(currPtr); + currPtr += sizeof(Vec4V)*numNormalConstr; + + SolverContactBatchPointDynamic4* PX_RESTRICT contacts = reinterpret_cast<SolverContactBatchPointDynamic4*>(currPtr); + + Vec4V* maxImpulses; + currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr); + PxU32 maxImpulseMask = 0; + if(hasMaxImpulse) + { + maxImpulseMask = 0xFFFFFFFF; + maxImpulses = reinterpret_cast<Vec4V*>(currPtr); + currPtr += sizeof(Vec4V) * numNormalConstr; + } + else + { + maxImpulses = &vMax; + } + + + SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr); + if(numFrictionConstr) + currPtr += sizeof(SolverFrictionSharedData4); + + Vec4V* frictionAppliedForce = reinterpret_cast<Vec4V*>(currPtr); + currPtr += sizeof(Vec4V)*numFrictionConstr; + + const SolverContactFrictionDynamic4* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionDynamic4*>(currPtr); + currPtr += numFrictionConstr * sizeof(SolverContactFrictionDynamic4); + + Vec4V accumulatedNormalImpulse = vZero; + + const Vec4V angD0 = hdr->angDom0; + const Vec4V angD1 = hdr->angDom1; + + const Vec4V _normalT0 = hdr->normalX; + const Vec4V _normalT1 = hdr->normalY; + const Vec4V _normalT2 = hdr->normalZ; + + Vec4V contactNormalVel1 = V4Mul(linVel0T0, _normalT0); + Vec4V contactNormalVel3 = V4Mul(linVel1T0, _normalT0); + contactNormalVel1 = V4MulAdd(linVel0T1, _normalT1, contactNormalVel1); + contactNormalVel3 = V4MulAdd(linVel1T1, _normalT1, contactNormalVel3); + contactNormalVel1 = V4MulAdd(linVel0T2, _normalT2, contactNormalVel1); + contactNormalVel3 = V4MulAdd(linVel1T2, _normalT2, contactNormalVel3); + + Vec4V relVel1 = V4Sub(contactNormalVel1, contactNormalVel3); + + Vec4V accumDeltaF = vZero; + + for(PxU32 i=0;i<numNormalConstr;i++) + { + const SolverContactBatchPointDynamic4& c = contacts[i]; + + PxU32 offset = 0; + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + prefetchAddress += offset; + + const Vec4V appliedForce = appliedForces[i]; + const Vec4V maxImpulse = maxImpulses[i & maxImpulseMask]; + + Vec4V contactNormalVel2 = V4Mul(c.raXnX, angState0T0); + Vec4V contactNormalVel4 = V4Mul(c.rbXnX, angState1T0); + + contactNormalVel2 = V4MulAdd(c.raXnY, angState0T1, contactNormalVel2); + contactNormalVel4 = V4MulAdd(c.rbXnY, angState1T1, contactNormalVel4); + + contactNormalVel2 = V4MulAdd(c.raXnZ, angState0T2, contactNormalVel2); + contactNormalVel4 = V4MulAdd(c.rbXnZ, angState1T2, contactNormalVel4); + + const Vec4V normalVel = V4Add(relVel1, V4Sub(contactNormalVel2, contactNormalVel4)); + + Vec4V deltaF = V4NegMulSub(normalVel, c.velMultiplier, c.biasedErr); + + deltaF = V4Max(deltaF, V4Neg(appliedForce)); + const Vec4V newAppliedForce = V4Min(V4Add(appliedForce, deltaF), maxImpulse); + deltaF = V4Sub(newAppliedForce, appliedForce); + + accumDeltaF = V4Add(accumDeltaF, deltaF); + + const Vec4V angDetaF0 = V4Mul(deltaF, angD0); + const Vec4V angDetaF1 = V4Mul(deltaF, angD1); + + relVel1 = V4MulAdd(sumInvMass, deltaF, relVel1); + + angState0T0 = V4MulAdd(c.raXnX, angDetaF0, angState0T0); + angState1T0 = V4NegMulSub(c.rbXnX, angDetaF1, angState1T0); + + angState0T1 = V4MulAdd(c.raXnY, angDetaF0, angState0T1); + angState1T1 = V4NegMulSub(c.rbXnY, angDetaF1, angState1T1); + + angState0T2 = V4MulAdd(c.raXnZ, angDetaF0, angState0T2); + angState1T2 = V4NegMulSub(c.rbXnZ, angDetaF1, angState1T2); + + appliedForces[i] = newAppliedForce; + + accumulatedNormalImpulse = V4Add(accumulatedNormalImpulse, newAppliedForce); + } + + const Vec4V accumDeltaF_IM0 = V4Mul(accumDeltaF, invMassA); + const Vec4V accumDeltaF_IM1 = V4Mul(accumDeltaF, invMassB); + + linVel0T0 = V4MulAdd(_normalT0, accumDeltaF_IM0, linVel0T0); + linVel1T0 = V4NegMulSub(_normalT0, accumDeltaF_IM1, linVel1T0); + linVel0T1 = V4MulAdd(_normalT1, accumDeltaF_IM0, linVel0T1); + linVel1T1 = V4NegMulSub(_normalT1, accumDeltaF_IM1, linVel1T1); + linVel0T2 = V4MulAdd(_normalT2, accumDeltaF_IM0, linVel0T2); + linVel1T2 = V4NegMulSub(_normalT2, accumDeltaF_IM1, linVel1T2); + + + if(cache.doFriction && numFrictionConstr) + { + const Vec4V staticFric = hdr->staticFriction; + const Vec4V dynamicFric = hdr->dynamicFriction; + + const Vec4V maxFrictionImpulse = V4Mul(staticFric, accumulatedNormalImpulse); + const Vec4V maxDynFrictionImpulse = V4Mul(dynamicFric, accumulatedNormalImpulse); + const Vec4V negMaxDynFrictionImpulse = V4Neg(maxDynFrictionImpulse); + //const Vec4V negMaxFrictionImpulse = V4Neg(maxFrictionImpulse); + BoolV broken = BFFFF(); + + if(cache.writeBackIteration) + { + Ps::prefetchLine(fd->frictionBrokenWritebackByte[0]); + Ps::prefetchLine(fd->frictionBrokenWritebackByte[1]); + Ps::prefetchLine(fd->frictionBrokenWritebackByte[2]); + } + + + for(PxU32 i=0;i<numFrictionConstr;i++) + { + const SolverContactFrictionDynamic4& f = frictions[i]; + + PxU32 offset = 0; + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + prefetchAddress += offset; + + const Vec4V appliedForce = frictionAppliedForce[i]; + + const Vec4V normalT0 = fd->normalX[i&1]; + const Vec4V normalT1 = fd->normalY[i&1]; + const Vec4V normalT2 = fd->normalZ[i&1]; + + Vec4V normalVel1 = V4Mul(linVel0T0, normalT0); + Vec4V normalVel2 = V4Mul(f.raXnX, angState0T0); + Vec4V normalVel3 = V4Mul(linVel1T0, normalT0); + Vec4V normalVel4 = V4Mul(f.rbXnX, angState1T0); + + normalVel1 = V4MulAdd(linVel0T1, normalT1, normalVel1); + normalVel2 = V4MulAdd(f.raXnY, angState0T1, normalVel2); + normalVel3 = V4MulAdd(linVel1T1, normalT1, normalVel3); + normalVel4 = V4MulAdd(f.rbXnY, angState1T1, normalVel4); + + normalVel1 = V4MulAdd(linVel0T2, normalT2, normalVel1); + normalVel2 = V4MulAdd(f.raXnZ, angState0T2, normalVel2); + normalVel3 = V4MulAdd(linVel1T2, normalT2, normalVel3); + normalVel4 = V4MulAdd(f.rbXnZ, angState1T2, normalVel4); + + const Vec4V _normalVel = V4Add(normalVel1, normalVel2); + const Vec4V __normalVel = V4Add(normalVel3, normalVel4); + + // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation + + const Vec4V normalVel = V4Sub(_normalVel, __normalVel ); + + const Vec4V tmp1 = V4Sub(appliedForce, f.scaledBias); + + const Vec4V totalImpulse = V4NegMulSub(normalVel, f.velMultiplier, tmp1); + + broken = BOr(broken, V4IsGrtr(V4Abs(totalImpulse), maxFrictionImpulse)); + + const Vec4V newAppliedForce = V4Sel(broken, V4Min(maxDynFrictionImpulse, V4Max(negMaxDynFrictionImpulse, totalImpulse)), totalImpulse); + + const Vec4V deltaF =V4Sub(newAppliedForce, appliedForce); + + frictionAppliedForce[i] = newAppliedForce; + + const Vec4V deltaFIM0 = V4Mul(deltaF, invMassA); + const Vec4V deltaFIM1 = V4Mul(deltaF, invMassB); + + const Vec4V angDetaF0 = V4Mul(deltaF, angD0); + const Vec4V angDetaF1 = V4Mul(deltaF, angD1); + + linVel0T0 = V4MulAdd(normalT0, deltaFIM0, linVel0T0); + linVel1T0 = V4NegMulSub(normalT0, deltaFIM1, linVel1T0); + angState0T0 = V4MulAdd(f.raXnX, angDetaF0, angState0T0); + angState1T0 = V4NegMulSub(f.rbXnX, angDetaF1, angState1T0); + + linVel0T1 = V4MulAdd(normalT1, deltaFIM0, linVel0T1); + linVel1T1 = V4NegMulSub(normalT1, deltaFIM1, linVel1T1); + angState0T1 = V4MulAdd(f.raXnY, angDetaF0, angState0T1); + angState1T1 = V4NegMulSub(f.rbXnY, angDetaF1, angState1T1); + + linVel0T2 = V4MulAdd(normalT2, deltaFIM0, linVel0T2); + linVel1T2 = V4NegMulSub(normalT2, deltaFIM1, linVel1T2); + angState0T2 = V4MulAdd(f.raXnZ, angDetaF0, angState0T2); + angState1T2 = V4NegMulSub(f.rbXnZ, angDetaF1, angState1T2); + } + fd->broken = broken; + } + } + + PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30); + PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31); + PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30); + PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31); + + PX_ASSERT(b00.linearVelocity.isFinite()); + PX_ASSERT(b00.angularState.isFinite()); + PX_ASSERT(b10.linearVelocity.isFinite()); + PX_ASSERT(b10.angularState.isFinite()); + PX_ASSERT(b20.linearVelocity.isFinite()); + PX_ASSERT(b20.angularState.isFinite()); + PX_ASSERT(b30.linearVelocity.isFinite()); + PX_ASSERT(b30.angularState.isFinite()); + + PX_ASSERT(b01.linearVelocity.isFinite()); + PX_ASSERT(b01.angularState.isFinite()); + PX_ASSERT(b11.linearVelocity.isFinite()); + PX_ASSERT(b11.angularState.isFinite()); + PX_ASSERT(b21.linearVelocity.isFinite()); + PX_ASSERT(b21.angularState.isFinite()); + PX_ASSERT(b31.linearVelocity.isFinite()); + PX_ASSERT(b31.angularState.isFinite()); + + // Write back + V4StoreA(linVel00, &b00.linearVelocity.x); + V4StoreA(angState00, &b00.angularState.x); + V4StoreA(linVel10, &b10.linearVelocity.x); + V4StoreA(angState10, &b10.angularState.x); + V4StoreA(linVel20, &b20.linearVelocity.x); + V4StoreA(angState20, &b20.angularState.x); + V4StoreA(linVel30, &b30.linearVelocity.x); + V4StoreA(angState30, &b30.angularState.x); + + if(desc[0].bodyBDataIndex != 0) + { + V4StoreA(linVel01, &b01.linearVelocity.x); + V4StoreA(angState01, &b01.angularState.x); + } + if(desc[1].bodyBDataIndex != 0) + { + V4StoreA(linVel11, &b11.linearVelocity.x); + V4StoreA(angState11, &b11.angularState.x); + } + if(desc[2].bodyBDataIndex != 0) + { + V4StoreA(linVel21, &b21.linearVelocity.x); + V4StoreA(angState21, &b21.angularState.x); + } + if(desc[3].bodyBDataIndex != 0) + { + V4StoreA(linVel31, &b31.linearVelocity.x); + V4StoreA(angState31, &b31.angularState.x); + } + + PX_ASSERT(b00.linearVelocity.isFinite()); + PX_ASSERT(b00.angularState.isFinite()); + PX_ASSERT(b10.linearVelocity.isFinite()); + PX_ASSERT(b10.angularState.isFinite()); + PX_ASSERT(b20.linearVelocity.isFinite()); + PX_ASSERT(b20.angularState.isFinite()); + PX_ASSERT(b30.linearVelocity.isFinite()); + PX_ASSERT(b30.angularState.isFinite()); + + PX_ASSERT(b01.linearVelocity.isFinite()); + PX_ASSERT(b01.angularState.isFinite()); + PX_ASSERT(b11.linearVelocity.isFinite()); + PX_ASSERT(b11.angularState.isFinite()); + PX_ASSERT(b21.linearVelocity.isFinite()); + PX_ASSERT(b21.angularState.isFinite()); + PX_ASSERT(b31.linearVelocity.isFinite()); + PX_ASSERT(b31.angularState.isFinite()); +} + +static void solveContact4_StaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& cache) +{ + PxSolverBody& b00 = *desc[0].bodyA; + PxSolverBody& b10 = *desc[1].bodyA; + PxSolverBody& b20 = *desc[2].bodyA; + PxSolverBody& b30 = *desc[3].bodyA; + + const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]); + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc[0].constraint; + + + //We'll need this. + const Vec4V vZero = V4Zero(); + Vec4V vMax = V4Splat(FMax()); + + Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x); + Vec4V angState00 = V4LoadA(&b00.angularState.x); + + Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x); + Vec4V angState10 = V4LoadA(&b10.angularState.x); + + Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x); + Vec4V angState20 = V4LoadA(&b20.angularState.x); + + Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x); + Vec4V angState30 = V4LoadA(&b30.angularState.x); + + Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3; + Vec4V angState0T0, angState0T1, angState0T2, angState0T3; + + + PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3); + PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3); + + const PxU8* PX_RESTRICT prefetchAddress = currPtr + sizeof(SolverContactHeader4) + sizeof(SolverContactBatchPointBase4); + + const SolverContactHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader4*>(currPtr); + + const Vec4V invMass0 = hdr->invMass0D0; + + while((currPtr < last)) + { + hdr = reinterpret_cast<const SolverContactHeader4*>(currPtr); + + PX_ASSERT(hdr->type == DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT); + + currPtr = const_cast<PxU8*>(reinterpret_cast<const PxU8*>(hdr + 1)); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0; + + Vec4V* appliedForces = reinterpret_cast<Vec4V*>(currPtr); + currPtr += sizeof(Vec4V)*numNormalConstr; + + SolverContactBatchPointBase4* PX_RESTRICT contacts = reinterpret_cast<SolverContactBatchPointBase4*>(currPtr); + + currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr); + + Vec4V* maxImpulses; + PxU32 maxImpulseMask; + if(hasMaxImpulse) + { + maxImpulseMask = 0xFFFFFFFF; + maxImpulses = reinterpret_cast<Vec4V*>(currPtr); + currPtr += sizeof(Vec4V) * numNormalConstr; + } + else + { + maxImpulseMask = 0; + maxImpulses = &vMax; + } + + SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr); + if(numFrictionConstr) + currPtr += sizeof(SolverFrictionSharedData4); + + Vec4V* frictionAppliedForces = reinterpret_cast<Vec4V*>(currPtr); + currPtr += sizeof(Vec4V)*numFrictionConstr; + + const SolverContactFrictionBase4* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionBase4*>(currPtr); + currPtr += numFrictionConstr * sizeof(SolverContactFrictionBase4); + + + Vec4V accumulatedNormalImpulse = vZero; + + const Vec4V angD0 = hdr->angDom0; + const Vec4V _normalT0 = hdr->normalX; + const Vec4V _normalT1 = hdr->normalY; + const Vec4V _normalT2 = hdr->normalZ; + + Vec4V contactNormalVel1 = V4Mul(linVel0T0, _normalT0); + contactNormalVel1 = V4MulAdd(linVel0T1, _normalT1, contactNormalVel1); + + contactNormalVel1 = V4MulAdd(linVel0T2, _normalT2, contactNormalVel1); + + Vec4V accumDeltaF = vZero; + + + for(PxU32 i=0;i<numNormalConstr;i++) + { + const SolverContactBatchPointBase4& c = contacts[i]; + + PxU32 offset = 0; + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + prefetchAddress += offset; + + const Vec4V appliedForce = appliedForces[i]; + const Vec4V maxImpulse = maxImpulses[i&maxImpulseMask]; + Vec4V contactNormalVel2 = V4MulAdd(c.raXnX, angState0T0, contactNormalVel1); + contactNormalVel2 = V4MulAdd(c.raXnY, angState0T1, contactNormalVel2); + const Vec4V normalVel = V4MulAdd(c.raXnZ, angState0T2, contactNormalVel2); + + const Vec4V _deltaF = V4Max(V4NegMulSub(normalVel, c.velMultiplier, c.biasedErr), V4Neg(appliedForce)); + + Vec4V newAppliedForce(V4Add(appliedForce, _deltaF)); + newAppliedForce = V4Min(newAppliedForce, maxImpulse); + const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce); + const Vec4V angDeltaF = V4Mul(angD0, deltaF); + + accumDeltaF = V4Add(accumDeltaF, deltaF); + + contactNormalVel1 = V4MulAdd(invMass0, deltaF, contactNormalVel1); + angState0T0 = V4MulAdd(c.raXnX, angDeltaF, angState0T0); + angState0T1 = V4MulAdd(c.raXnY, angDeltaF, angState0T1); + angState0T2 = V4MulAdd(c.raXnZ, angDeltaF, angState0T2); + +#if 1 + appliedForces[i] = newAppliedForce; +#endif + + accumulatedNormalImpulse = V4Add(accumulatedNormalImpulse, newAppliedForce); + } + + const Vec4V deltaFInvMass0 = V4Mul(accumDeltaF, invMass0); + + linVel0T0 = V4MulAdd(_normalT0, deltaFInvMass0, linVel0T0); + linVel0T1 = V4MulAdd(_normalT1, deltaFInvMass0, linVel0T1); + linVel0T2 = V4MulAdd(_normalT2, deltaFInvMass0, linVel0T2); + + if(cache.doFriction && numFrictionConstr) + { + const Vec4V staticFric = hdr->staticFriction; + + const Vec4V dynamicFric = hdr->dynamicFriction; + + const Vec4V maxFrictionImpulse = V4Mul(staticFric, accumulatedNormalImpulse); + const Vec4V maxDynFrictionImpulse = V4Mul(dynamicFric, accumulatedNormalImpulse); + const Vec4V negMaxDynFrictionImpulse = V4Neg(maxDynFrictionImpulse); + + BoolV broken = BFFFF(); + + if(cache.writeBackIteration) + { + Ps::prefetchLine(fd->frictionBrokenWritebackByte[0]); + Ps::prefetchLine(fd->frictionBrokenWritebackByte[1]); + Ps::prefetchLine(fd->frictionBrokenWritebackByte[2]); + Ps::prefetchLine(fd->frictionBrokenWritebackByte[3]); + } + + for(PxU32 i=0;i<numFrictionConstr;i++) + { + const SolverContactFrictionBase4& f = frictions[i]; + + PxU32 offset = 0; + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + Ps::prefetchLine(prefetchAddress, offset += 64); + prefetchAddress += offset; + + const Vec4V appliedForce = frictionAppliedForces[i]; + + const Vec4V normalT0 = fd->normalX[i&1]; + const Vec4V normalT1 = fd->normalY[i&1]; + const Vec4V normalT2 = fd->normalZ[i&1]; + + Vec4V normalVel1 = V4Mul(linVel0T0, normalT0); + Vec4V normalVel2 = V4Mul(f.raXnX, angState0T0); + + normalVel1 = V4MulAdd(linVel0T1, normalT1, normalVel1); + normalVel2 = V4MulAdd(f.raXnY, angState0T1, normalVel2); + + normalVel1 = V4MulAdd(linVel0T2, normalT2, normalVel1); + normalVel2 = V4MulAdd(f.raXnZ, angState0T2, normalVel2); + + //relative normal velocity for all 4 constraints + const Vec4V normalVel = V4Add(normalVel1, normalVel2); + + // appliedForce -bias * velMultiplier - a hoisted part of the total impulse computation + const Vec4V tmp1 = V4Sub(appliedForce, f.scaledBias); + + const Vec4V totalImpulse = V4NegMulSub(normalVel, f.velMultiplier, tmp1); + + broken = BOr(broken, V4IsGrtr(V4Abs(totalImpulse), maxFrictionImpulse)); + + const Vec4V newAppliedForce = V4Sel(broken, V4Min(maxDynFrictionImpulse, V4Max(negMaxDynFrictionImpulse, totalImpulse)), totalImpulse); + + const Vec4V deltaF =V4Sub(newAppliedForce, appliedForce); + + const Vec4V deltaFInvMass = V4Mul(invMass0, deltaF); + const Vec4V angDeltaF = V4Mul(angD0, deltaF); + + linVel0T0 = V4MulAdd(normalT0, deltaFInvMass, linVel0T0); + angState0T0 = V4MulAdd(f.raXnX, angDeltaF, angState0T0); + + linVel0T1 = V4MulAdd(normalT1, deltaFInvMass, linVel0T1); + angState0T1 = V4MulAdd(f.raXnY, angDeltaF, angState0T1); + + linVel0T2 = V4MulAdd(normalT2, deltaFInvMass, linVel0T2); + angState0T2 = V4MulAdd(f.raXnZ, angDeltaF, angState0T2); + +#if 1 + frictionAppliedForces[i] = newAppliedForce; +#endif + + } + + fd->broken = broken; + } + } + + PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30); + PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30); + + PX_ASSERT(b00.linearVelocity.isFinite()); + PX_ASSERT(b00.angularState.isFinite()); + PX_ASSERT(b10.linearVelocity.isFinite()); + PX_ASSERT(b10.angularState.isFinite()); + PX_ASSERT(b20.linearVelocity.isFinite()); + PX_ASSERT(b20.angularState.isFinite()); + PX_ASSERT(b30.linearVelocity.isFinite()); + PX_ASSERT(b30.angularState.isFinite()); + + // Write back + V4StoreA(linVel00, &b00.linearVelocity.x); + V4StoreA(linVel10, &b10.linearVelocity.x); + V4StoreA(linVel20, &b20.linearVelocity.x); + V4StoreA(linVel30, &b30.linearVelocity.x); + + V4StoreA(angState00, &b00.angularState.x); + V4StoreA(angState10, &b10.angularState.x); + V4StoreA(angState20, &b20.angularState.x); + V4StoreA(angState30, &b30.angularState.x); + + PX_ASSERT(b00.linearVelocity.isFinite()); + PX_ASSERT(b00.angularState.isFinite()); + PX_ASSERT(b10.linearVelocity.isFinite()); + PX_ASSERT(b10.angularState.isFinite()); + PX_ASSERT(b20.linearVelocity.isFinite()); + PX_ASSERT(b20.angularState.isFinite()); + PX_ASSERT(b30.linearVelocity.isFinite()); + PX_ASSERT(b30.angularState.isFinite()); +} + +static void concludeContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/, PxU32 contactSize, PxU32 frictionSize) +{ + const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]); + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc[0].constraint; + + while((currPtr < last)) + { + const SolverContactHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader4*>(currPtr); + + currPtr = const_cast<PxU8*>(reinterpret_cast<const PxU8*>(hdr + 1)); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + currPtr += sizeof(Vec4V)*numNormalConstr; + + SolverContactBatchPointBase4* PX_RESTRICT contacts = reinterpret_cast<SolverContactBatchPointBase4*>(currPtr); + currPtr += (numNormalConstr * contactSize); + bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0; + + if(hasMaxImpulse) + currPtr += sizeof(Vec4V) * numNormalConstr; + + currPtr += sizeof(Vec4V)*numFrictionConstr; + + SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr); + if(numFrictionConstr) + currPtr += sizeof(SolverFrictionSharedData4); + PX_UNUSED(fd); + + SolverContactFrictionBase4* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionBase4*>(currPtr); + currPtr += (numFrictionConstr * frictionSize); + + for(PxU32 i=0;i<numNormalConstr;i++) + { + SolverContactBatchPointBase4& c = *contacts; + contacts = reinterpret_cast<SolverContactBatchPointBase4*>((reinterpret_cast<PxU8*>(contacts)) + contactSize); + c.biasedErr = V4Sub(c.biasedErr, c.scaledBias); + } + + for(PxU32 i=0;i<numFrictionConstr;i++) + { + SolverContactFrictionBase4& f = *frictions; + frictions = reinterpret_cast<SolverContactFrictionBase4*>((reinterpret_cast<PxU8*>(frictions)) + frictionSize); + f.scaledBias = f.targetVelocity; + } + } +} + +void writeBackContact4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& cache, + const PxSolverBodyData** PX_RESTRICT bd0, const PxSolverBodyData** PX_RESTRICT bd1) +{ + const PxU8* PX_RESTRICT last = desc[0].constraint + getConstraintLength(desc[0]); + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc[0].constraint; + PxReal* PX_RESTRICT vForceWriteback0 = reinterpret_cast<PxReal*>(desc[0].writeBack); + PxReal* PX_RESTRICT vForceWriteback1 = reinterpret_cast<PxReal*>(desc[1].writeBack); + PxReal* PX_RESTRICT vForceWriteback2 = reinterpret_cast<PxReal*>(desc[2].writeBack); + PxReal* PX_RESTRICT vForceWriteback3 = reinterpret_cast<PxReal*>(desc[3].writeBack); + + const PxU8 type = *desc[0].constraint; + const PxU32 contactSize = type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContactBatchPointDynamic4) : sizeof(SolverContactBatchPointBase4); + const PxU32 frictionSize = type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContactFrictionDynamic4) : sizeof(SolverContactFrictionBase4); + + + Vec4V normalForce = V4Zero(); + + + //We'll need this. + //const Vec4V vZero = V4Zero(); + + bool writeBackThresholds[4] = {false, false, false, false}; + + while((currPtr < last)) + { + SolverContactHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactHeader4*>(currPtr); + + currPtr = reinterpret_cast<PxU8*>(hdr + 1); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + Vec4V* PX_RESTRICT appliedForces = reinterpret_cast<Vec4V*>(currPtr); + currPtr += sizeof(Vec4V)*numNormalConstr; + + //SolverContactBatchPointBase4* PX_RESTRICT contacts = (SolverContactBatchPointBase4*)currPtr; + currPtr += (numNormalConstr * contactSize); + + bool hasMaxImpulse = (hdr->flag & SolverContactHeader4::eHAS_MAX_IMPULSE) != 0; + + if(hasMaxImpulse) + currPtr += sizeof(Vec4V) * numNormalConstr; + + SolverFrictionSharedData4* PX_RESTRICT fd = reinterpret_cast<SolverFrictionSharedData4*>(currPtr); + if(numFrictionConstr) + currPtr += sizeof(SolverFrictionSharedData4); + + currPtr += sizeof(Vec4V)*numFrictionConstr; + + //SolverContactFrictionBase4* PX_RESTRICT frictions = (SolverContactFrictionBase4*)currPtr; + currPtr += (numFrictionConstr * frictionSize); + + writeBackThresholds[0] = hdr->flags[0] & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + writeBackThresholds[1] = hdr->flags[1] & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + writeBackThresholds[2] = hdr->flags[2] & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + writeBackThresholds[3] = hdr->flags[3] & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + + + for(PxU32 i=0;i<numNormalConstr;i++) + { + //contacts = (SolverContactBatchPointBase4*)(((PxU8*)contacts) + contactSize); + const FloatV appliedForce0 = V4GetX(appliedForces[i]); + const FloatV appliedForce1 = V4GetY(appliedForces[i]); + const FloatV appliedForce2 = V4GetZ(appliedForces[i]); + const FloatV appliedForce3 = V4GetW(appliedForces[i]); + + normalForce = V4Add(normalForce, appliedForces[i]); + + if(vForceWriteback0 && i < hdr->numNormalConstr0) + FStore(appliedForce0, vForceWriteback0++); + if(vForceWriteback1 && i < hdr->numNormalConstr1) + FStore(appliedForce1, vForceWriteback1++); + if(vForceWriteback2 && i < hdr->numNormalConstr2) + FStore(appliedForce2, vForceWriteback2++); + if(vForceWriteback3 && i < hdr->numNormalConstr3) + FStore(appliedForce3, vForceWriteback3++); + } + + if(numFrictionConstr) + { + PX_ALIGN(16, PxU32 broken[4]); + BStoreA(fd->broken, broken); + + PxU8* frictionCounts = &hdr->numFrictionConstr0; + + for(PxU32 a = 0; a < 4; ++a) + { + if(frictionCounts[a] && broken[a]) + *fd->frictionBrokenWritebackByte[a] = 1; // PT: bad L2 miss here + } + } + } + + PX_ALIGN(16, PxReal nf[4]); + V4StoreA(normalForce, nf); + + Sc::ShapeInteraction** shapeInteractions = reinterpret_cast<SolverContactHeader4*>(desc[0].constraint)->shapeInteraction; + + for(PxU32 a = 0; a < 4; ++a) + { + if(writeBackThresholds[a] && desc[a].linkIndexA == PxSolverConstraintDesc::NO_LINK && desc[a].linkIndexB == PxSolverConstraintDesc::NO_LINK && + nf[a] !=0.f && (bd0[a]->reportThreshold < PX_MAX_REAL || bd1[a]->reportThreshold < PX_MAX_REAL)) + { + ThresholdStreamElement elt; + elt.normalForce = nf[a]; + elt.threshold = PxMin<float>(bd0[a]->reportThreshold, bd1[a]->reportThreshold); + elt.nodeIndexA = bd0[a]->nodeIndex; + elt.nodeIndexB = bd1[a]->nodeIndex; + elt.shapeInteraction = shapeInteractions[a]; + Ps::order(elt.nodeIndexA, elt.nodeIndexB); + PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB); + PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength); + cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt; + } + } +} + +static void solve1D4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/) +{ + + PxSolverBody& b00 = *desc[0].bodyA; + PxSolverBody& b01 = *desc[0].bodyB; + + PxSolverBody& b10 = *desc[1].bodyA; + PxSolverBody& b11 = *desc[1].bodyB; + + PxSolverBody& b20 = *desc[2].bodyA; + PxSolverBody& b21 = *desc[2].bodyB; + + PxSolverBody& b30 = *desc[3].bodyA; + PxSolverBody& b31 = *desc[3].bodyB; + + PxU8* PX_RESTRICT bPtr = desc[0].constraint; + //PxU32 length = desc.constraintLength; + + SolverConstraint1DHeader4* PX_RESTRICT header = reinterpret_cast<SolverConstraint1DHeader4*>(bPtr); + SolverConstraint1DDynamic4* PX_RESTRICT base = reinterpret_cast<SolverConstraint1DDynamic4*>(header+1); + + //const FloatV fZero = FZero(); + Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x); + Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x); + Vec4V angState00 = V4LoadA(&b00.angularState.x); + Vec4V angState01 = V4LoadA(&b01.angularState.x); + + Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x); + Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x); + Vec4V angState10 = V4LoadA(&b10.angularState.x); + Vec4V angState11 = V4LoadA(&b11.angularState.x); + + Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x); + Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x); + Vec4V angState20 = V4LoadA(&b20.angularState.x); + Vec4V angState21 = V4LoadA(&b21.angularState.x); + + Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x); + Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x); + Vec4V angState30 = V4LoadA(&b30.angularState.x); + Vec4V angState31 = V4LoadA(&b31.angularState.x); + + + Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3; + Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3; + Vec4V angState0T0, angState0T1, angState0T2, angState0T3; + Vec4V angState1T0, angState1T1, angState1T2, angState1T3; + + + PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3); + PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3); + PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3); + PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3); + + const Vec4V invMass0D0 = header->invMass0D0; + const Vec4V invMass1D1 = header->invMass1D1; + + const Vec4V angD0 = header->angD0; + const Vec4V angD1 = header->angD1; + + PxU32 maxConstraints = header->count; + + for(PxU32 a = 0; a < maxConstraints; ++a) + { + SolverConstraint1DDynamic4& c = *base; + base++; + + Ps::prefetchLine(base); + Ps::prefetchLine(base, 64); + Ps::prefetchLine(base, 128); + Ps::prefetchLine(base, 192); + Ps::prefetchLine(base, 256); + + const Vec4V appliedForce = c.appliedForce; + + Vec4V linProj0(V4Mul(c.lin0X, linVel0T0)); + Vec4V linProj1(V4Mul(c.lin1X, linVel1T0)); + Vec4V angProj0(V4Mul(c.ang0X, angState0T0)); + Vec4V angProj1(V4Mul(c.ang1X, angState1T0)); + + linProj0 = V4MulAdd(c.lin0Y, linVel0T1, linProj0); + linProj1 = V4MulAdd(c.lin1Y, linVel1T1, linProj1); + angProj0 = V4MulAdd(c.ang0Y, angState0T1, angProj0); + angProj1 = V4MulAdd(c.ang1Y, angState1T1, angProj1); + + linProj0 = V4MulAdd(c.lin0Z, linVel0T2, linProj0); + linProj1 = V4MulAdd(c.lin1Z, linVel1T2, linProj1); + angProj0 = V4MulAdd(c.ang0Z, angState0T2, angProj0); + angProj1 = V4MulAdd(c.ang1Z, angState1T2, angProj1); + + const Vec4V projectVel0 = V4Add(linProj0, angProj0); + const Vec4V projectVel1 = V4Add(linProj1, angProj1); + + const Vec4V normalVel = V4Sub(projectVel0, projectVel1); + + const Vec4V unclampedForce = V4MulAdd(appliedForce, c.impulseMultiplier, V4MulAdd(normalVel, c.velMultiplier, c.constant)); + const Vec4V clampedForce = V4Max(c.minImpulse, V4Min(c.maxImpulse, unclampedForce)); + const Vec4V deltaF = V4Sub(clampedForce, appliedForce); + c.appliedForce = clampedForce; + + const Vec4V deltaFInvMass0 = V4Mul(deltaF, invMass0D0); + const Vec4V deltaFInvMass1 = V4Mul(deltaF, invMass1D1); + + const Vec4V angDeltaFInvMass0 = V4Mul(deltaF, angD0); + const Vec4V angDeltaFInvMass1 = V4Mul(deltaF, angD1); + + linVel0T0 = V4MulAdd(c.lin0X, deltaFInvMass0, linVel0T0); + linVel1T0 = V4NegMulSub(c.lin1X, deltaFInvMass1, linVel1T0); + angState0T0 = V4MulAdd(c.ang0X, angDeltaFInvMass0, angState0T0); + angState1T0 = V4NegMulSub(c.ang1X, angDeltaFInvMass1, angState1T0); + + linVel0T1 = V4MulAdd(c.lin0Y, deltaFInvMass0, linVel0T1); + linVel1T1 = V4NegMulSub(c.lin1Y, deltaFInvMass1, linVel1T1); + angState0T1 = V4MulAdd(c.ang0Y, angDeltaFInvMass0, angState0T1); + angState1T1 = V4NegMulSub(c.ang1Y, angDeltaFInvMass1, angState1T1); + + linVel0T2 = V4MulAdd(c.lin0Z, deltaFInvMass0, linVel0T2); + linVel1T2 = V4NegMulSub(c.lin1Z, deltaFInvMass1, linVel1T2); + angState0T2 = V4MulAdd(c.ang0Z, angDeltaFInvMass0, angState0T2); + angState1T2 = V4NegMulSub(c.ang1Z, angDeltaFInvMass1, angState1T2); + } + + PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30); + PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31); + PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30); + PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31); + + + // Write back + V4StoreA(linVel00, &b00.linearVelocity.x); + V4StoreA(linVel10, &b10.linearVelocity.x); + V4StoreA(linVel20, &b20.linearVelocity.x); + V4StoreA(linVel30, &b30.linearVelocity.x); + + V4StoreA(linVel01, &b01.linearVelocity.x); + V4StoreA(linVel11, &b11.linearVelocity.x); + V4StoreA(linVel21, &b21.linearVelocity.x); + V4StoreA(linVel31, &b31.linearVelocity.x); + + V4StoreA(angState00, &b00.angularState.x); + V4StoreA(angState10, &b10.angularState.x); + V4StoreA(angState20, &b20.angularState.x); + V4StoreA(angState30, &b30.angularState.x); + + V4StoreA(angState01, &b01.angularState.x); + V4StoreA(angState11, &b11.angularState.x); + V4StoreA(angState21, &b21.angularState.x); + V4StoreA(angState31, &b31.angularState.x); + +} + +static void conclude1D4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/) +{ + SolverConstraint1DHeader4* header = reinterpret_cast<SolverConstraint1DHeader4*>(desc[0].constraint); + PxU8* base = desc[0].constraint + sizeof(SolverConstraint1DHeader4); + PxU32 stride = header->type == DY_SC_TYPE_BLOCK_1D ? sizeof(SolverConstraint1DDynamic4) : sizeof(SolverConstraint1DBase4); + + for(PxU32 i=0; i<header->count; i++) + { + SolverConstraint1DBase4& c = *reinterpret_cast<SolverConstraint1DBase4*>(base); + c.constant = c.unbiasedConstant; + base += stride; + } + PX_ASSERT(desc[0].constraint + getConstraintLength(desc[0]) == base); +} + +void writeBack1D4(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/, + const PxSolverBodyData** PX_RESTRICT /*bd0*/, const PxSolverBodyData** PX_RESTRICT /*bd1*/) +{ + ConstraintWriteback* writeback0 = reinterpret_cast<ConstraintWriteback*>(desc[0].writeBack); + ConstraintWriteback* writeback1 = reinterpret_cast<ConstraintWriteback*>(desc[1].writeBack); + ConstraintWriteback* writeback2 = reinterpret_cast<ConstraintWriteback*>(desc[2].writeBack); + ConstraintWriteback* writeback3 = reinterpret_cast<ConstraintWriteback*>(desc[3].writeBack); + + if(writeback0 || writeback1 || writeback2 || writeback3) + { + SolverConstraint1DHeader4* header = reinterpret_cast<SolverConstraint1DHeader4*>(desc[0].constraint); + PxU8* base = desc[0].constraint + sizeof(SolverConstraint1DHeader4); + PxU32 stride = header->type == DY_SC_TYPE_BLOCK_1D ? sizeof(SolverConstraint1DDynamic4) : sizeof(SolverConstraint1DBase4); + + const Vec4V zero = V4Zero(); + Vec4V linX(zero), linY(zero), linZ(zero); + Vec4V angX(zero), angY(zero), angZ(zero); + + for(PxU32 i=0; i<header->count; i++) + { + const SolverConstraint1DBase4* c = reinterpret_cast<SolverConstraint1DBase4*>(base); + + //Load in flags + const VecI32V flags = I4LoadU(reinterpret_cast<const PxI32*>(&c->flags[0])); + //Work out masks + const VecI32V mask = I4Load(DY_SC_FLAG_OUTPUT_FORCE); + + const VecI32V masked = VecI32V_And(flags, mask); + const BoolV isEq = VecI32V_IsEq(masked, mask); + + const Vec4V appliedForce = V4Sel(isEq, c->appliedForce, zero); + + linX = V4MulAdd(c->lin0X, appliedForce, linX); + linY = V4MulAdd(c->lin0Y, appliedForce, linY); + linZ = V4MulAdd(c->lin0Z, appliedForce, linZ); + + angX = V4MulAdd(c->ang0WritebackX, appliedForce, angX); + angY = V4MulAdd(c->ang0WritebackY, appliedForce, angY); + angZ = V4MulAdd(c->ang0WritebackZ, appliedForce, angZ); + + base += stride; + } + + //We need to do the cross product now + + angX = V4Sub(angX, V4NegMulSub(header->body0WorkOffsetZ, linY, V4Mul(header->body0WorkOffsetY, linZ))); + angY = V4Sub(angY, V4NegMulSub(header->body0WorkOffsetX, linZ, V4Mul(header->body0WorkOffsetZ, linX))); + angZ = V4Sub(angZ, V4NegMulSub(header->body0WorkOffsetY, linX, V4Mul(header->body0WorkOffsetX, linY))); + + const Vec4V linLenSq = V4MulAdd(linZ, linZ, V4MulAdd(linY, linY, V4Mul(linX, linX))); + const Vec4V angLenSq = V4MulAdd(angZ, angZ, V4MulAdd(angY, angY, V4Mul(angX, angX))); + + const Vec4V linLen = V4Sqrt(linLenSq); + const Vec4V angLen = V4Sqrt(angLenSq); + + const BoolV broken = BOr(V4IsGrtr(linLen, header->linBreakImpulse), V4IsGrtr(angLen, header->angBreakImpulse)); + + PX_ALIGN(16, PxU32 iBroken[4]); + BStoreA(broken, iBroken); + + Vec4V lin0, lin1, lin2, lin3; + Vec4V ang0, ang1, ang2, ang3; + + PX_TRANSPOSE_34_44(linX, linY, linZ, lin0, lin1, lin2, lin3); + PX_TRANSPOSE_34_44(angX, angY, angZ, ang0, ang1, ang2, ang3); + + if(writeback0) + { + V3StoreU(Vec3V_From_Vec4V_WUndefined(lin0), writeback0->linearImpulse); + V3StoreU(Vec3V_From_Vec4V_WUndefined(ang0), writeback0->angularImpulse); + writeback0->broken = header->break0 ? PxU32(iBroken[0] != 0) : 0; + } + if(writeback1) + { + V3StoreU(Vec3V_From_Vec4V_WUndefined(lin1), writeback1->linearImpulse); + V3StoreU(Vec3V_From_Vec4V_WUndefined(ang1), writeback1->angularImpulse); + writeback1->broken = header->break1 ? PxU32(iBroken[1] != 0) : 0; + } + if(writeback2) + { + V3StoreU(Vec3V_From_Vec4V_WUndefined(lin2), writeback2->linearImpulse); + V3StoreU(Vec3V_From_Vec4V_WUndefined(ang2), writeback2->angularImpulse); + writeback2->broken = header->break2 ? PxU32(iBroken[2] != 0) : 0; + } + if(writeback3) + { + V3StoreU(Vec3V_From_Vec4V_WUndefined(lin3), writeback3->linearImpulse); + V3StoreU(Vec3V_From_Vec4V_WUndefined(ang3), writeback3->angularImpulse); + writeback3->broken = header->break3 ? PxU32(iBroken[3] != 0) : 0; + } + + PX_ASSERT(desc[0].constraint + getConstraintLength(desc[0]) == base); + } +} + + +void solveContactPreBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContact4_Block(desc, cache); +} + +void solveContactPreBlock_Static(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContact4_StaticBlock(desc, cache); +} + +void solveContactPreBlock_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContact4_Block(desc, cache); + concludeContact4_Block(desc, cache, sizeof(SolverContactBatchPointDynamic4), sizeof(SolverContactFrictionDynamic4)); +} + +void solveContactPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContact4_StaticBlock(desc, cache); + concludeContact4_Block(desc, cache, sizeof(SolverContactBatchPointBase4), sizeof(SolverContactFrictionBase4)); +} + +void solveContactPreBlock_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContact4_Block(desc, cache); + + const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex], + &cache.solverBodyArray[desc[1].bodyADataIndex], + &cache.solverBodyArray[desc[2].bodyADataIndex], + &cache.solverBodyArray[desc[3].bodyADataIndex]}; + + const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex], + &cache.solverBodyArray[desc[1].bodyBDataIndex], + &cache.solverBodyArray[desc[2].bodyBDataIndex], + &cache.solverBodyArray[desc[3].bodyBDataIndex]}; + + writeBackContact4_Block(desc, cache, bd0, bd1); + + if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4)) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +void solveContactPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContact4_StaticBlock(desc, cache); + const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex], + &cache.solverBodyArray[desc[1].bodyADataIndex], + &cache.solverBodyArray[desc[2].bodyADataIndex], + &cache.solverBodyArray[desc[3].bodyADataIndex]}; + + const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex], + &cache.solverBodyArray[desc[1].bodyBDataIndex], + &cache.solverBodyArray[desc[2].bodyBDataIndex], + &cache.solverBodyArray[desc[3].bodyBDataIndex]}; + + writeBackContact4_Block(desc, cache, bd0, bd1); + + if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4)) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +void solve1D4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solve1D4_Block(desc, cache); +} + + +void solve1D4Block_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solve1D4_Block(desc, cache); + conclude1D4_Block(desc, cache); +} + + +void solve1D4Block_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solve1D4_Block(desc, cache); + + const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex], + &cache.solverBodyArray[desc[1].bodyADataIndex], + &cache.solverBodyArray[desc[2].bodyADataIndex], + &cache.solverBodyArray[desc[3].bodyADataIndex]}; + + const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex], + &cache.solverBodyArray[desc[1].bodyBDataIndex], + &cache.solverBodyArray[desc[2].bodyBDataIndex], + &cache.solverBodyArray[desc[3].bodyBDataIndex]}; + + writeBack1D4(desc, cache, bd0, bd1); +} + +void writeBack1D4Block(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex], + &cache.solverBodyArray[desc[1].bodyADataIndex], + &cache.solverBodyArray[desc[2].bodyADataIndex], + &cache.solverBodyArray[desc[3].bodyADataIndex]}; + + const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex], + &cache.solverBodyArray[desc[1].bodyBDataIndex], + &cache.solverBodyArray[desc[2].bodyBDataIndex], + &cache.solverBodyArray[desc[3].bodyBDataIndex]}; + + writeBack1D4(desc, cache, bd0, bd1); +} + +} + +} + +#endif diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsShared.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsShared.h new file mode 100644 index 00000000..13c8a0e2 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverConstraintsShared.h @@ -0,0 +1,221 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef DY_SOLVER_CORE_SHARED_H +#define DY_SOLVER_CORE_SHARED_H + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" + +#ifdef PX_SUPPORT_SIMD + +#include "CmPhysXCommon.h" +#include "DySolverBody.h" +#include "DySolverContact.h" +#include "DySolverConstraint1D.h" +#include "DySolverConstraintDesc.h" +#include "PsUtilities.h" +#include "DyConstraint.h" +#include "PsAtomic.h" + + +namespace physx +{ + +namespace Dy +{ + PX_FORCE_INLINE static FloatV solveDynamicContacts(SolverContactPoint* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal, + const FloatVArg invMassA, const FloatVArg invMassB, const FloatVArg angDom0, const FloatVArg angDom1, Vec3V& linVel0_, Vec3V& angState0_, + Vec3V& linVel1_, Vec3V& angState1_, PxF32* PX_RESTRICT forceBuffer) +{ + Vec3V linVel0 = linVel0_; + Vec3V angState0 = angState0_; + Vec3V linVel1 = linVel1_; + Vec3V angState1 = angState1_; + FloatV accumulatedNormalImpulse = FZero(); + + const Vec3V delLinVel0 = V3Scale(contactNormal, invMassA); + const Vec3V delLinVel1 = V3Scale(contactNormal, invMassB); + + for(PxU32 i=0;i<nbContactPoints;i++) + { + SolverContactPoint& c = contacts[i]; + Ps::prefetchLine(&contacts[i], 128); + + const Vec3V raXn = c.raXn; + + const Vec3V rbXn = c.rbXn; + + const FloatV appliedForce = FLoad(forceBuffer[i]); + const FloatV velMultiplier = c.getVelMultiplier(); + + /*const FloatV targetVel = c.getTargetVelocity(); + const FloatV nScaledBias = c.getScaledBias();*/ + const FloatV maxImpulse = c.getMaxImpulse(); + + //Compute the normal velocity of the constraint. + const Vec3V v0 = V3MulAdd(linVel0, contactNormal, V3Mul(angState0, raXn)); + const Vec3V v1 = V3MulAdd(linVel1, contactNormal, V3Mul(angState1, rbXn)); + const FloatV normalVel = V3SumElems(V3Sub(v0, v1)); + + const FloatV biasedErr = c.getBiasedErr();//FScaleAdd(targetVel, velMultiplier, nScaledBias); + + //KS - clamp the maximum force + const FloatV _deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce)); + const FloatV _newForce = FAdd(appliedForce, _deltaF); + const FloatV newForce = FMin(_newForce, maxImpulse); + const FloatV deltaF = FSub(newForce, appliedForce); + + linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); + linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1); + angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0); + angState1 = V3NegScaleSub(rbXn, FMul(deltaF, angDom1), angState1); + + FStore(newForce, &forceBuffer[i]); + + accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newForce); + } + + linVel0_ = linVel0; + angState0_ = angState0; + linVel1_ = linVel1; + angState1_ = angState1; + return accumulatedNormalImpulse; +} + +PX_FORCE_INLINE static FloatV solveStaticContacts(SolverContactPoint* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal, + const FloatVArg invMassA, const FloatVArg angDom0, Vec3V& linVel0_, Vec3V& angState0_, PxF32* PX_RESTRICT forceBuffer) +{ + Vec3V linVel0 = linVel0_; + Vec3V angState0 = angState0_; + FloatV accumulatedNormalImpulse = FZero(); + + const Vec3V delLinVel0 = V3Scale(contactNormal, invMassA); + + for(PxU32 i=0;i<nbContactPoints;i++) + { + SolverContactPoint& c = contacts[i]; + Ps::prefetchLine(&contacts[i],128); + + const Vec3V raXn = c.raXn; + + const FloatV appliedForce = FLoad(forceBuffer[i]); + const FloatV velMultiplier = c.getVelMultiplier(); + + /*const FloatV targetVel = c.getTargetVelocity(); + const FloatV nScaledBias = c.getScaledBias();*/ + const FloatV maxImpulse = c.getMaxImpulse(); + + const Vec3V v0 = V3MulAdd(linVel0, contactNormal, V3Mul(angState0, raXn)); + const FloatV normalVel = V3SumElems(v0); + + + const FloatV biasedErr = c.getBiasedErr();//FScaleAdd(targetVel, velMultiplier, nScaledBias); + + // still lots to do here: using loop pipelining we can interweave this code with the + // above - the code here has a lot of stalls that we would thereby eliminate + const FloatV _deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce)); + const FloatV _newForce = FAdd(appliedForce, _deltaF); + const FloatV newForce = FMin(_newForce, maxImpulse); + const FloatV deltaF = FSub(newForce, appliedForce); + + linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); + angState0 = V3ScaleAdd(raXn, FMul(deltaF, angDom0), angState0); + + FStore(newForce, &forceBuffer[i]); + + accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newForce); + } + + linVel0_ = linVel0; + angState0_ = angState0; + return accumulatedNormalImpulse; +} + +PX_FORCE_INLINE static FloatV solveExtContacts(SolverContactPointExt* contacts, const PxU32 nbContactPoints, const Vec3VArg contactNormal, + Vec3V& linVel0, Vec3V& angVel0, + Vec3V& linVel1, Vec3V& angVel1, + Vec3V& li0, Vec3V& ai0, + Vec3V& li1, Vec3V& ai1, + PxF32* PX_RESTRICT appliedForceBuffer) + { + + FloatV accumulatedNormalImpulse = FZero(); + for(PxU32 i=0;i<nbContactPoints;i++) + { + SolverContactPointExt& c = contacts[i]; + Ps::prefetchLine(&contacts[i+1]); + + const Vec3V raXn = c.raXn; + const Vec3V rbXn = c.rbXn; + + const FloatV appliedForce = FLoad(appliedForceBuffer[i]); + const FloatV velMultiplier = c.getVelMultiplier(); + + /*const FloatV targetVel = c.getTargetVelocity(); + const FloatV scaledBias = c.getScaledBias();*/ + + //Compute the normal velocity of the constraint. + + Vec3V v = V3MulAdd(linVel0, contactNormal, V3Mul(angVel0, raXn)); + v = V3Sub(v, V3MulAdd(linVel1, contactNormal, V3Mul(angVel1, rbXn))); + const FloatV normalVel = V3SumElems(v); + + const FloatV biasedErr = c.getBiasedErr();//FNeg(scaledBias); + + // still lots to do here: using loop pipelining we can interweave this code with the + // above - the code here has a lot of stalls that we would thereby eliminate + + const FloatV deltaF = FMax(FNegScaleSub(normalVel, velMultiplier, biasedErr), FNeg(appliedForce)); + + linVel0 = V3ScaleAdd(c.linDeltaVA, deltaF, linVel0); + angVel0 = V3ScaleAdd(c.angDeltaVA, deltaF, angVel0); + linVel1 = V3ScaleAdd(c.linDeltaVB, deltaF, linVel1); + angVel1 = V3ScaleAdd(c.angDeltaVB, deltaF, angVel1); + + li0 = V3ScaleAdd(contactNormal, deltaF, li0); ai0 = V3ScaleAdd(raXn, deltaF, ai0); + li1 = V3ScaleAdd(contactNormal, deltaF, li1); ai1 = V3ScaleAdd(rbXn, deltaF, ai1); + + const FloatV newAppliedForce = FAdd(appliedForce, deltaF); + + FStore(newAppliedForce, &appliedForceBuffer[i]); + + accumulatedNormalImpulse = FAdd(accumulatedNormalImpulse, newAppliedForce); + } + return accumulatedNormalImpulse; + } + +} + +} + +#endif //PX_SUPPORT_SIMD + +#endif //DY_SOLVER_CORE_SHARED_H + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact.h new file mode 100644 index 00000000..f204633c --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact.h @@ -0,0 +1,228 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_SOLVERCONTACT_H +#define DY_SOLVERCONTACT_H + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxVec3.h" +#include "PxvConfig.h" +#include "PsVecMath.h" + +namespace physx +{ + +using namespace Ps::aos; + +namespace Sc +{ + class ShapeInteraction; +} +/** +\brief A header to represent a friction patch for the solver. +*/ + +namespace Dy +{ + +struct SolverContactHeader +{ + enum DySolverContactFlags + { + eHAS_FORCE_THRESHOLDS = 0x1 + }; + + PxU8 type; //Note: mType should be first as the solver expects a type in the first byte. + PxU8 flags; + PxU8 numNormalConstr; + PxU8 numFrictionConstr; //4 + + PxReal angDom0; //8 + PxReal angDom1; //12 + PxReal invMass0; //16 + + Vec4V staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W; //32 + Vec3V normal; //48 + + PxReal invMass1; //52 + PxU32 broken; //56 + PxU8* frictionBrokenWritebackByte; //60 64 + Sc::ShapeInteraction* shapeInteraction; //64 72 +#if PX_P64_FAMILY + PxU32 pad[2]; //64 80 +#endif // PX_X64 + + + PX_FORCE_INLINE void setStaticFriction(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);} + PX_FORCE_INLINE void setDynamicFriction(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);} + PX_FORCE_INLINE void setDominance0(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);} + PX_FORCE_INLINE void setDominance1(const FloatV f) {staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W=V4SetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W,f);} + + PX_FORCE_INLINE FloatV getStaticFriction() const {return V4GetX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);} + PX_FORCE_INLINE FloatV getDynamicFriction() const {return V4GetY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);} + PX_FORCE_INLINE FloatV getDominance0() const {return V4GetZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);} + PX_FORCE_INLINE FloatV getDominance1() const {return V4GetW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);} + + PX_FORCE_INLINE void setStaticFriction(PxF32 f) {V4WriteX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);} + PX_FORCE_INLINE void setDynamicFriction(PxF32 f) {V4WriteY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);} + PX_FORCE_INLINE void setDominance0(PxF32 f) {V4WriteZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);} + PX_FORCE_INLINE void setDominance1(PxF32 f) {V4WriteW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W, f);} + + PX_FORCE_INLINE PxF32 getStaticFrictionPxF32() const {return V4ReadX(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);} + PX_FORCE_INLINE PxF32 getDynamicFrictionPxF32() const {return V4ReadY(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);} + PX_FORCE_INLINE PxF32 getDominance0PxF32() const {return V4ReadZ(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);} + PX_FORCE_INLINE PxF32 getDominance1PxF32() const {return V4ReadW(staticFrictionX_dynamicFrictionY_dominance0Z_dominance1W);} +}; + +#if !PX_P64_FAMILY +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader) == 64); +#else +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader) == 80); +#endif + +/** +\brief A single rigid body contact point for the solver. +*/ +struct SolverContactPoint +{ + Vec3V raXn; + Vec3V rbXn; + + PxF32 velMultiplier; + PxF32 biasedErr; + PxF32 unbiasedErr; + PxF32 maxImpulse; + + PX_FORCE_INLINE FloatV getVelMultiplier() const {return FLoad(velMultiplier);} + + PX_FORCE_INLINE FloatV getBiasedErr() const {return FLoad(biasedErr);} + PX_FORCE_INLINE FloatV getMaxImpulse() const {return FLoad(maxImpulse);} + + +#ifdef PX_SUPPORT_SIMD + PX_FORCE_INLINE Vec3V getRaXn() const {return raXn;} + PX_FORCE_INLINE Vec3V getRbXn() const {return rbXn;} +#endif + + PX_FORCE_INLINE void setRaXn(const PxVec3& v) {V3WriteXYZ(raXn, v);} + PX_FORCE_INLINE void setRbXn(const PxVec3& v) {V3WriteXYZ(rbXn, v);} + PX_FORCE_INLINE void setVelMultiplier(PxF32 f) {velMultiplier = f;} + + PX_FORCE_INLINE void setBiasedErr(PxF32 f) {biasedErr = f;} + PX_FORCE_INLINE void setUnbiasedErr(PxF32 f) {unbiasedErr = f;} + + PX_FORCE_INLINE PxF32 getVelMultiplierPxF32() const {return velMultiplier;} + PX_FORCE_INLINE const PxVec3& getRaXnPxVec3() const {return V3ReadXYZ(raXn);} + PX_FORCE_INLINE const PxVec3& getRbXnPxVec3() const {return V3ReadXYZ(rbXn);} + PX_FORCE_INLINE PxF32 getBiasedErrPxF32() const {return biasedErr;} +}; + + +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactPoint) == 48); + +/** +\brief A single extended articulation contact point for the solver. +*/ +struct SolverContactPointExt : public SolverContactPoint +{ + Vec3V linDeltaVA; + Vec3V angDeltaVA; + Vec3V linDeltaVB; + Vec3V angDeltaVB; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactPointExt) == 112); + + +/** +\brief A single friction constraint for the solver. +*/ +struct SolverContactFriction +{ + Vec4V normalXYZ_appliedForceW; //16 + Vec4V raXnXYZ_velMultiplierW; //32 + Vec4V rbXnXYZ_biasW; //48 + PxReal targetVel; //52 + PxU32 mPad[3]; //64 + + PX_FORCE_INLINE void setAppliedForce(const FloatV f) {normalXYZ_appliedForceW=V4SetW(normalXYZ_appliedForceW,f);} + PX_FORCE_INLINE void setVelMultiplier(const FloatV f) {raXnXYZ_velMultiplierW=V4SetW(raXnXYZ_velMultiplierW,f);} + PX_FORCE_INLINE void setBias(const FloatV f) {rbXnXYZ_biasW=V4SetW(rbXnXYZ_biasW,f);} + + PX_FORCE_INLINE FloatV getAppliedForce() const {return V4GetW(normalXYZ_appliedForceW);} + PX_FORCE_INLINE FloatV getVelMultiplier() const {return V4GetW(raXnXYZ_velMultiplierW);} + PX_FORCE_INLINE FloatV getBias() const {return V4GetW(rbXnXYZ_biasW);} + +#ifdef PX_SUPPORT_SIMD + PX_FORCE_INLINE Vec3V getNormal() const {return Vec3V_From_Vec4V(normalXYZ_appliedForceW);} + PX_FORCE_INLINE Vec3V getRaXn() const {return Vec3V_From_Vec4V(raXnXYZ_velMultiplierW);} + PX_FORCE_INLINE Vec3V getRbXn() const {return Vec3V_From_Vec4V(rbXnXYZ_biasW);} +#endif + + PX_FORCE_INLINE void setNormal(const PxVec3& v) {V4WriteXYZ(normalXYZ_appliedForceW, v);} + PX_FORCE_INLINE void setRaXn(const PxVec3& v) {V4WriteXYZ(raXnXYZ_velMultiplierW, v);} + PX_FORCE_INLINE void setRbXn(const PxVec3& v) {V4WriteXYZ(rbXnXYZ_biasW, v);} + + PX_FORCE_INLINE const PxVec3& getNormalPxVec3() const {return V4ReadXYZ(normalXYZ_appliedForceW);} + PX_FORCE_INLINE const PxVec3& getRaXnPxVec3() const {return V4ReadXYZ(raXnXYZ_velMultiplierW);} + PX_FORCE_INLINE const PxVec3& getRbXnPxVec3() const {return V4ReadXYZ(rbXnXYZ_biasW);} + + PX_FORCE_INLINE void setAppliedForce(PxF32 f) {V4WriteW(normalXYZ_appliedForceW, f);} + PX_FORCE_INLINE void setVelMultiplier(PxF32 f) {V4WriteW(raXnXYZ_velMultiplierW, f);} + PX_FORCE_INLINE void setBias(PxF32 f) {V4WriteW(rbXnXYZ_biasW, f);} + + PX_FORCE_INLINE PxF32 getAppliedForcePxF32() const {return V4ReadW(normalXYZ_appliedForceW);} + PX_FORCE_INLINE PxF32 getVelMultiplierPxF32() const {return V4ReadW(raXnXYZ_velMultiplierW);} + PX_FORCE_INLINE PxF32 getBiasPxF32() const {return V4ReadW(rbXnXYZ_biasW);} + +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFriction) == 64); + +/** +\brief A single extended articulation friction constraint for the solver. +*/ +struct SolverContactFrictionExt : public SolverContactFriction +{ + Vec3V linDeltaVA; + Vec3V angDeltaVA; + Vec3V linDeltaVB; + Vec3V angDeltaVB; +}; +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionExt) == 128); + +} + +} + + + +#endif //DY_SOLVERCONTACT_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact4.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact4.h new file mode 100644 index 00000000..31fc9a6d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContact4.h @@ -0,0 +1,179 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef DY_SOLVERCONTACT4_H +#define DY_SOLVERCONTACT4_H + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxVec3.h" +#include "PxvConfig.h" +#include "PsVecMath.h" +#include "DySolverContact.h" + +namespace physx +{ + +struct PxcNpWorkUnit; +struct PxSolverBody; +struct PxSolverBodyData; +struct PxSolverConstraintDesc; + +namespace Sc +{ + class ShapeInteraction; +} + +namespace Dy +{ + + + + +/** +\brief Batched SOA contact data. Note, we don't support batching with extended contacts for the simple reason that handling multiple articulations would be complex. +*/ +struct SolverContactHeader4 +{ + enum + { + eHAS_MAX_IMPULSE = 1 << 0, + eHAS_TARGET_VELOCITY = 1 << 1 + }; + + PxU8 type; //Note: mType should be first as the solver expects a type in the first byte. + PxU8 numNormalConstr; + PxU8 numFrictionConstr; + PxU8 flag; + + PxU8 flags[4]; + //These counts are the max of the 4 sets of data. + //When certain pairs have fewer patches/contacts than others, they are padded with 0s so that no work is performed but + //calculations are still shared (afterall, they're computationally free because we're doing 4 things at a time in SIMD) + + //KS - used for write-back only + PxU8 numNormalConstr0, numNormalConstr1, numNormalConstr2, numNormalConstr3; + PxU8 numFrictionConstr0, numFrictionConstr1, numFrictionConstr2, numFrictionConstr3; + + Vec4V restitution; + Vec4V staticFriction; + Vec4V dynamicFriction; + //Technically, these mass properties could be pulled out into a new structure and shared. For multi-manifold contacts, + //this would save 64 bytes per-manifold after the cost of the first manifold + Vec4V invMass0D0; + Vec4V invMass1D1; + Vec4V angDom0; + Vec4V angDom1; + //Normal is shared between all contacts in the batch. This will save some memory! + Vec4V normalX; + Vec4V normalY; + Vec4V normalZ; + + Sc::ShapeInteraction* shapeInteraction[4]; //192 or 208 +}; + +#if !PX_P64_FAMILY +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader4) == 192); +#else +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactHeader4) == 208); +#endif + + +/** +\brief This represents a batch of 4 contacts with static rolled into a single structure +*/ +struct SolverContactBatchPointBase4 +{ + Vec4V raXnX; + Vec4V raXnY; + Vec4V raXnZ; + Vec4V velMultiplier; + Vec4V scaledBias; + Vec4V biasedErr; +}; +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactBatchPointBase4) == 96); + +/** +\brief Contains the additional data required to represent 4 contacts between 2 dynamic bodies +@see SolverContactBatchPointBase4 +*/ +struct SolverContactBatchPointDynamic4 : public SolverContactBatchPointBase4 +{ + Vec4V rbXnX; + Vec4V rbXnY; + Vec4V rbXnZ; +}; +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactBatchPointDynamic4) == 144); + +/** +\brief This represents the shared information of a batch of 4 friction constraints +*/ +struct SolverFrictionSharedData4 +{ + BoolV broken; + PxU8* frictionBrokenWritebackByte[4]; + Vec4V normalX[2]; + Vec4V normalY[2]; + Vec4V normalZ[2]; +}; +#if !PX_P64_FAMILY +PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionSharedData4) == 128); +#endif + + +/** +\brief This represents a batch of 4 friction constraints with static rolled into a single structure +*/ +struct SolverContactFrictionBase4 +{ + Vec4V raXnX; + Vec4V raXnY; + Vec4V raXnZ; + Vec4V scaledBias; + Vec4V velMultiplier; + Vec4V targetVelocity; +}; +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionBase4) == 96); + +/** +\brief Contains the additional data required to represent 4 friction constraints between 2 dynamic bodies +@see SolverContactFrictionBase4 +*/ +struct SolverContactFrictionDynamic4 : public SolverContactFrictionBase4 +{ + Vec4V rbXnX; + Vec4V rbXnY; + Vec4V rbXnZ; +}; +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactFrictionDynamic4) == 144); + +} + +} + +#endif //DY_SOLVERCONTACT4_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF.h new file mode 100644 index 00000000..e18421e9 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF.h @@ -0,0 +1,123 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + + +#ifndef DY_SOLVERCONTACTPF_H +#define DY_SOLVERCONTACTPF_H + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxVec3.h" +#include "PxvConfig.h" +#include "PsVecMath.h" + +namespace physx +{ + +using namespace Ps::aos; + +namespace Dy +{ + +struct SolverContactCoulombHeader +{ + PxU8 type; //Note: mType should be first as the solver expects a type in the first byte. + PxU8 numNormalConstr; + PxU16 frictionOffset; //4 + //PxF32 restitution; + PxF32 angDom0; //8 + PxF32 dominance0; //12 + PxF32 dominance1; //16 + PX_ALIGN(16, PxVec3 normalXYZ); //28 + PxF32 angDom1; //32 + + Sc::ShapeInteraction* shapeInteraction; //36 40 + PxU8 flags; //37 41 + PxU8 pad0[3]; //40 44 +#if !PX_P64_FAMILY + PxU32 pad1[2]; //48 +#else + PxU32 pad1; // 48 +#endif + + + + PX_FORCE_INLINE void setDominance0(const FloatV f) {FStore(f, &dominance0);} + PX_FORCE_INLINE void setDominance1(const FloatV f) {FStore(f, &dominance1);} + PX_FORCE_INLINE void setNormal(const Vec3V n) {V3StoreA(n, normalXYZ);} + + PX_FORCE_INLINE FloatV getDominance0() const {return FLoad(dominance0);} + PX_FORCE_INLINE FloatV getDominance1() const {return FLoad(dominance1);} + //PX_FORCE_INLINE FloatV getRestitution() const {return FLoad(restitution);} + PX_FORCE_INLINE Vec3V getNormal()const {return V3LoadA(normalXYZ);} + + + PX_FORCE_INLINE void setDominance0(PxF32 f) { dominance0 = f; } + PX_FORCE_INLINE void setDominance1(PxF32 f) { dominance1 = f;} + //PX_FORCE_INLINE void setRestitution(PxF32 f) { restitution = f;} + + PX_FORCE_INLINE PxF32 getDominance0PxF32() const {return dominance0;} + PX_FORCE_INLINE PxF32 getDominance1PxF32() const {return dominance1;} + //PX_FORCE_INLINE PxF32 getRestitutionPxF32() const {return restitution;} + +}; +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader) == 48); + +struct SolverFrictionHeader +{ + PxU8 type; //Note: mType should be first as the solver expects a type in the first byte. + PxU8 numNormalConstr; + PxU8 numFrictionConstr; + PxU8 flags; + PxF32 staticFriction; + PxF32 invMass0D0; + PxF32 invMass1D1; + PxF32 angDom0; + PxF32 angDom1; + PxU32 pad2[2]; + + PX_FORCE_INLINE void setStaticFriction(const FloatV f) {FStore(f, &staticFriction);} + + PX_FORCE_INLINE FloatV getStaticFriction() const {return FLoad(staticFriction);} + + PX_FORCE_INLINE void setStaticFriction(PxF32 f) {staticFriction = f;} + + PX_FORCE_INLINE PxF32 getStaticFrictionPxF32() const {return staticFriction;} + + PX_FORCE_INLINE PxU32 getAppliedForcePaddingSize() const {return sizeof(PxU32)*((4 * ((numNormalConstr + 3)/4)));} + static PX_FORCE_INLINE PxU32 getAppliedForcePaddingSize(const PxU32 numConstr) {return sizeof(PxU32)*((4 * ((numConstr + 3)/4)));} +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionHeader) == 32); + +} + +} + +#endif //DY_SOLVERCONTACTPF_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF4.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF4.h new file mode 100644 index 00000000..7cf3b94d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContactPF4.h @@ -0,0 +1,155 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef DY_SOLVER_CONTACT_PF_4_H +#define DY_SOLVER_CONTACT_PF_4_H + +#include "foundation/PxSimpleTypes.h" +#include "foundation/PxVec3.h" +#include "PxvConfig.h" +#include "PsVecMath.h" + +namespace physx +{ + +using namespace Ps::aos; + +namespace Sc +{ + class ShapeInteraction; +} + +namespace Dy +{ + +struct SolverContactCoulombHeader4 +{ + PxU8 type; //Note: mType should be first as the solver expects a type in the first byte. + PxU8 numNormalConstr; + PxU16 frictionOffset; + PxU8 numNormalConstr0, numNormalConstr1, numNormalConstr2, numNormalConstr3; + PxU8 flags[4]; + PxU32 pad; //16 + Vec4V restitution; //32 + Vec4V normalX; //48 + Vec4V normalY; //64 + Vec4V normalZ; //80 + Vec4V invMassADom; //96 + Vec4V invMassBDom; //112 + Vec4V angD0; //128 + Vec4V angD1; //144 + Sc::ShapeInteraction* shapeInteraction[4]; //160 or 176 +}; + +#if !PX_P64_FAMILY +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader4) == 160); +#else +PX_COMPILE_TIME_ASSERT(sizeof(SolverContactCoulombHeader4) == 176); +#endif + +struct SolverContact4Base +{ + Vec4V raXnX; + Vec4V raXnY; + Vec4V raXnZ; + Vec4V appliedForce; + Vec4V velMultiplier; + Vec4V targetVelocity; + Vec4V scaledBias; + Vec4V maxImpulse; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverContact4Base) == 128); + +struct SolverContact4Dynamic : public SolverContact4Base +{ + Vec4V rbXnX; + Vec4V rbXnY; + Vec4V rbXnZ; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverContact4Dynamic) == 176); + +struct SolverFrictionHeader4 +{ + PxU8 type; //Note: mType should be first as the solver expects a type in the first byte. + PxU8 numNormalConstr; + PxU8 numFrictionConstr; + PxU8 numNormalConstr0; + PxU8 numNormalConstr1; + PxU8 numNormalConstr2; + PxU8 numNormalConstr3; + PxU8 numFrictionConstr0; + PxU8 numFrictionConstr1; + PxU8 numFrictionConstr2; + PxU8 numFrictionConstr3; + PxU8 pad0; + PxU32 frictionPerContact; + + Vec4V staticFriction; + Vec4V invMassADom; + Vec4V invMassBDom; + Vec4V angD0; + Vec4V angD1; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverFrictionHeader4) == 96); + +struct SolverFriction4Base +{ + Vec4V normalX; + Vec4V normalY; + Vec4V normalZ; + Vec4V raXnX; + Vec4V raXnY; + Vec4V raXnZ; + Vec4V appliedForce; + Vec4V velMultiplier; + Vec4V targetVelocity; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverFriction4Base) == 144); + +struct SolverFriction4Dynamic : public SolverFriction4Base +{ + Vec4V rbXnX; + Vec4V rbXnY; + Vec4V rbXnZ; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(SolverFriction4Dynamic) == 192); + +} + +} + + + +#endif //DY_SOLVER_CONTACT_PF_4_H + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContext.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContext.h new file mode 100644 index 00000000..df3d7fea --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverContext.h @@ -0,0 +1,64 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVERCONTEXT_H +#define DY_SOLVERCONTEXT_H + +namespace physx +{ + struct PxSolverBodyData; + +namespace Dy +{ + struct ThresholdStreamElement; + + +struct SolverContext +{ + bool doFriction; + bool writeBackIteration; + + // for threshold stream output + ThresholdStreamElement* mThresholdStream; + PxU32 mThresholdStreamIndex; + PxU32 mThresholdStreamLength; + PxSolverBodyData* solverBodyArray; + + ThresholdStreamElement* PX_RESTRICT mSharedThresholdStream; + PxU32 mSharedThresholdStreamLength; + PxI32* mSharedOutThresholdPairs; + +}; + +} + +} + +#endif //DY_SOLVERCONTEXT_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.cpp new file mode 100644 index 00000000..688e0b81 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.cpp @@ -0,0 +1,622 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxPreprocessor.h" + +#include "PsAllocator.h" +#include <new> +#include <stdio.h> +#include "CmPhysXCommon.h" +#include "DySolverBody.h" +#include "DySolverConstraint1D.h" +#include "DySolverContact.h" +#include "DyThresholdTable.h" +#include "DySolverControl.h" +#include "DyArticulationHelper.h" +#include "PsAtomic.h" +#include "PsIntrinsics.h" +#include "DyArticulationPImpl.h" +#include "PsThread.h" +#include "DySolverConstraintDesc.h" +#include "DySolverContext.h" + +namespace physx +{ + +namespace Dy +{ + +//----------------------------------- + +void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContact_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4_Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContact_BStaticConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4Block_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContact_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4Block_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void contactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void extContactBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void ext1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void contactPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void writeBack1D4Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +// could move this to PxPreprocessor.h but +// no implementation available for MSVC +#if PX_GCC_FAMILY +#define PX_UNUSED_ATTRIBUTE __attribute__((unused)) +#else +#define PX_UNUSED_ATTRIBUTE +#endif + +#define DYNAMIC_ARTICULATION_REGISTRATION(x) 0 + +static SolveBlockMethod gVTableSolveBlock[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactBlock, // DY_SC_TYPE_RB_CONTACT + solve1DBlock, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactBlock), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlock), // DY_SC_TYPE_EXT_1D + solveContact_BStaticBlock, // DY_SC_TYPE_STATIC_CONTACT + solveContactBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactPreBlock, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactPreBlock_Static, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4_Block, // DY_SC_TYPE_BLOCK_1D, +}; + +static SolveWriteBackBlockMethod gVTableSolveWriteBackBlock[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactBlockWriteBack, // DY_SC_TYPE_RB_CONTACT + solve1DBlockWriteBack, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactBlockWriteBack), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlockWriteBack), // DY_SC_TYPE_EXT_1D + solveContact_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_CONTACT + solveContactBlockWriteBack, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactPreBlock_WriteBackStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4Block_WriteBack, // DY_SC_TYPE_BLOCK_1D, +}; + +static SolveBlockMethod gVTableSolveConcludeBlock[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactConcludeBlock, // DY_SC_TYPE_RB_CONTACT + solve1DConcludeBlock, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactConcludeBlock), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DConcludeBlock), // DY_SC_TYPE_EXT_1D + solveContact_BStaticConcludeBlock, // DY_SC_TYPE_STATIC_CONTACT + solveContactConcludeBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactPreBlock_Conclude, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactPreBlock_ConcludeStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4Block_Conclude, // DY_SC_TYPE_BLOCK_1D, +}; + +void SolverCoreRegisterArticulationFns() +{ + gVTableSolveBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactBlock; + gVTableSolveBlock[DY_SC_TYPE_EXT_1D] = solveExt1DBlock; + + gVTableSolveWriteBackBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactBlockWriteBack; + gVTableSolveWriteBackBlock[DY_SC_TYPE_EXT_1D] = solveExt1DBlockWriteBack; + gVTableSolveConcludeBlock[DY_SC_TYPE_EXT_CONTACT] = solveExtContactConcludeBlock; + gVTableSolveConcludeBlock[DY_SC_TYPE_EXT_1D] = solveExt1DConcludeBlock; +} + + +SolveBlockMethod* getSolveBlockTable() +{ + return gVTableSolveBlock; +} + +SolveBlockMethod* getSolverConcludeBlockTable() +{ + return gVTableSolveConcludeBlock; +} + +SolveWriteBackBlockMethod* getSolveWritebackBlockTable() +{ + return gVTableSolveWriteBackBlock; +} + + + + +SolverCoreGeneral* SolverCoreGeneral::create() +{ + SolverCoreGeneral* scg = reinterpret_cast<SolverCoreGeneral*>( + PX_ALLOC(sizeof(SolverCoreGeneral), "SolverCoreGeneral")); + + if(scg) + new (scg) SolverCoreGeneral; + + return scg; +} + +void SolverCoreGeneral::destroyV() +{ + this->~SolverCoreGeneral(); + PX_FREE(this); +} + +void SolverCoreGeneral::solveV_Blocks(SolverIslandParams& params) const +{ + + const PxI32 TempThresholdStreamSize = 32; + ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize]; + + SolverContext cache; + cache.solverBodyArray = params.bodyDataList; + cache.mThresholdStream = tempThresholdStream; + cache.mThresholdStreamLength = TempThresholdStreamSize; + cache.mThresholdStreamIndex = 0; + cache.writeBackIteration = false; + + PxI32 batchCount = PxI32(params.numConstraintHeaders); + + PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart; + const PxU32 bodyListSize = params.bodyListSize; + + Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray; + + const PxU32 velocityIterations = params.velocityIterations; + const PxU32 positionIterations = params.positionIterations; + + const PxU32 numConstraintHeaders = params.numConstraintHeaders; + const PxU32 articulationListSize = params.articulationListSize; + + ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart; + + PX_ASSERT(velocityIterations >= 1); + PX_ASSERT(positionIterations >= 1); + + if(numConstraintHeaders == 0) + { + for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++) + { + Cm::SpatialVector& motionVel = motionVelocityArray[baIdx]; + PxSolverBody& atom = bodyListStart[baIdx]; + + motionVel.linear = atom.linearVelocity; + motionVel.angular = atom.angularState; + } + + for (PxU32 i = 0; i < articulationListSize; i++) + ArticulationPImpl::saveVelocity(articulationListStart[i]); + + return; + } + + BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders); + + PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList; + + //0-(n-1) iterations + PxI32 normalIter = 0; + PxI32 frictionIter = 0; + + for (PxU32 iteration = positionIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters + { + cache.doFriction = iteration<=3; + + SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlock : gVTableSolveBlock, normalIter, frictionIter, normalIter); + + ++normalIter; + } + + for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++) + { + const PxSolverBody& atom = bodyListStart[baIdx]; + Cm::SpatialVector& motionVel = motionVelocityArray[baIdx]; + motionVel.linear = atom.linearVelocity; + motionVel.angular = atom.angularState; + } + + + for (PxU32 i = 0; i < articulationListSize; i++) + ArticulationPImpl::saveVelocity(articulationListStart[i]); + + + const PxI32 velItersMinOne = (PxI32(velocityIterations)) - 1; + + PxI32 iteration = 0; + + for(; iteration < velItersMinOne; ++iteration) + { + + SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, gVTableSolveBlock, normalIter, frictionIter, normalIter); + ++normalIter; + + } + + PxI32* outThresholdPairs = params.outThresholdPairs; + ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream; + PxU32 thresholdStreamLength = params.thresholdStreamLength; + + cache.writeBackIteration = true; + cache.mSharedThresholdStream = thresholdStream; + cache.mSharedThresholdStreamLength = thresholdStreamLength; + cache.mSharedOutThresholdPairs = outThresholdPairs; + for(; iteration < PxI32(velocityIterations); ++iteration) + { + + SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, gVTableSolveWriteBackBlock, normalIter, frictionIter, normalIter); + ++normalIter; + + } + + //Write back remaining threshold streams + if(cache.mThresholdStreamIndex > 0) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b) + { + thresholdStream[b + threshIndex] = cache.mThresholdStream[b]; + } + cache.mThresholdStreamIndex = 0; + } +} + +PxI32 SolverCoreGeneral::solveVParallelAndWriteBack +(SolverIslandParams& params) const +{ +#if PX_PROFILE_SOLVE_STALLS + PxU64 startTime = readTimer(); + + PxU64 stallCount = 0; +#endif + + SolverContext cache; + cache.solverBodyArray = params.bodyDataList; + const PxU32 batchSize = params.batchSize; + + const PxI32 UnrollCount = PxI32(batchSize); + const PxI32 SaveUnrollCount = 32; + + const PxI32 TempThresholdStreamSize = 32; + ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize]; + + const PxI32 bodyListSize = PxI32(params.bodyListSize); + const PxI32 articulationListSize = PxI32(params.articulationListSize); + + + const PxI32 batchCount = PxI32(params.numConstraintHeaders); + cache.mThresholdStream = tempThresholdStream; + cache.mThresholdStreamLength = TempThresholdStreamSize; + cache.mThresholdStreamIndex = 0; + cache.writeBackIteration = false; + + const PxI32 positionIterations = PxI32(params.positionIterations); + const PxI32 velocityIterations = PxI32(params.velocityIterations); + + PxI32* constraintIndex = ¶ms.constraintIndex; + PxI32* constraintIndex2 = ¶ms.constraintIndex2; + + PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList; + + const PxU32 nbPartitions = params.nbPartitions; + + PxU32* headersPerPartition = params.headersPerPartition; + + PX_UNUSED(velocityIterations); + + PX_ASSERT(velocityIterations >= 1); + PX_ASSERT(positionIterations >= 1); + + PxI32 endIndexCount = UnrollCount; + PxI32 index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + + BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders); + + PxI32 maxNormalIndex = 0; + PxI32 normalIteration = 0; + PxI32 frictionIteration = 0; + PxU32 a = 0; + PxI32 targetConstraintIndex = 0; + for(PxU32 i = 0; i < 2; ++i) + { + SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlock : gVTableSolveConcludeBlock; + for(; a < positionIterations - 1 + i; ++a) + { + cache.doFriction = (positionIterations - a) <= 3; + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex); + + maxNormalIndex += headersPerPartition[b]; + + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, solveTable, + normalIteration, frictionIteration, normalIteration); + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + physx::shdfnd::atomicAdd(constraintIndex2, nbSolved); + } + targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count + } + ++normalIteration; + } + } + + PxI32* bodyListIndex = ¶ms.bodyListIndex; + PxI32* bodyListIndex2 = ¶ms.bodyListIndex2; + + ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart; + + PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart; + Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray; + + + //Save velocity - articulated + PxI32 endIndexCount2 = SaveUnrollCount; + PxI32 index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount; + { + WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex); + PxI32 nbConcluded = 0; + while(index2 < articulationListSize) + { + const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2)); + endIndexCount2 -= remainder; + for(PxI32 b = 0; b < remainder; ++b, ++index2) + { + ArticulationPImpl::saveVelocity(articulationListStart[index2]); + } + if(endIndexCount2 == 0) + { + index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount; + endIndexCount2 = SaveUnrollCount; + } + nbConcluded += remainder; + } + + index2 -= articulationListSize; + + //save velocity + + + while(index2 < bodyListSize) + { + const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2)); + endIndexCount2 -= remainder; + for(PxI32 b = 0; b < remainder; ++b, ++index2) + { + Ps::prefetchLine(&bodyListStart[index2 + 8]); + Ps::prefetchLine(&motionVelocityArray[index2 + 8]); + PxSolverBody& body = bodyListStart[index2]; + Cm::SpatialVector& motionVel = motionVelocityArray[index2]; + motionVel.linear = body.linearVelocity; + motionVel.angular = body.angularState; + PX_ASSERT(motionVel.linear.isFinite()); + PX_ASSERT(motionVel.angular.isFinite()); + } + + nbConcluded += remainder; + + //Branch not required because this is the last time we use this atomic variable + //if(index2 < articulationListSizePlusbodyListSize) + { + index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize; + endIndexCount2 = SaveUnrollCount; + } + } + + if(nbConcluded) + { + Ps::memoryBarrier(); + physx::shdfnd::atomicAdd(bodyListIndex2, nbConcluded); + } + } + + + WAIT_FOR_PROGRESS(bodyListIndex2, (bodyListSize + articulationListSize)); + + a = 1; + for(; a < params.velocityIterations; ++a) + { + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex); + + maxNormalIndex += headersPerPartition[b]; + + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlock, + normalIteration, 0, normalIteration); + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + physx::shdfnd::atomicAdd(constraintIndex2, nbSolved); + } + targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count + } + ++normalIteration; + } + + ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream; + PxU32 thresholdStreamLength = params.thresholdStreamLength; + PxI32* outThresholdPairs = params.outThresholdPairs; + + cache.mSharedOutThresholdPairs = outThresholdPairs; + cache.mSharedThresholdStream = thresholdStream; + cache.mSharedThresholdStreamLength = thresholdStreamLength; + + //Last iteration - do writeback as well! + cache.writeBackIteration = true; + { + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, targetConstraintIndex); + + maxNormalIndex += headersPerPartition[b]; + + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + + SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveWriteBackBlock, + normalIteration, 0, normalIteration); + + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + physx::shdfnd::atomicAdd(constraintIndex2, nbSolved); + } + targetConstraintIndex += headersPerPartition[b]; //Increment target constraint index by batch count + } + + if(cache.mThresholdStreamIndex > 0) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b) + { + thresholdStream[b + threshIndex] = cache.mThresholdStream[b]; + } + cache.mThresholdStreamIndex = 0; + } + + ++normalIteration; + + } + +#if PX_PROFILE_SOLVE_STALLS + + + PxU64 endTime = readTimer(); + PxReal totalTime = (PxReal)(endTime - startTime); + PxReal stallTime = (PxReal)stallCount; + PxReal stallRatio = stallTime/totalTime; + if(0)//stallRatio > 0.2f) + { + LARGE_INTEGER frequency; + QueryPerformanceFrequency( &frequency ); + printf("Warning -- percentage time stalled = %f; stalled for %f seconds; total Time took %f seconds\n", + stallRatio * 100.f, stallTime/(PxReal)frequency.QuadPart, totalTime/(PxReal)frequency.QuadPart); + } +#endif + + return normalIteration * batchCount; + +} + + +void SolverCoreGeneral::writeBackV +(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 /*constraintListSize*/, PxConstraintBatchHeader* batchHeaders, const PxU32 numBatches, + ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs, + PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const +{ + SolverContext cache; + cache.solverBodyArray = atomListData; + cache.mThresholdStream = thresholdStream; + cache.mThresholdStreamLength = thresholdStreamLength; + cache.mThresholdStreamIndex = 0; + + PxI32 outThreshIndex = 0; + for(PxU32 j = 0; j < numBatches; ++j) + { + PxU8 type = *constraintList[batchHeaders[j].mStartIndex].constraint; + writeBackTable[type](constraintList + batchHeaders[j].mStartIndex, + batchHeaders[j].mStride, cache); + } + + outThresholdPairs = PxU32(outThreshIndex); +} + +void solveVBlock(SOLVEV_BLOCK_METHOD_ARGS) +{ + solverCore->solveV_Blocks(params); +} + +} +} + + +//#endif diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.h new file mode 100644 index 00000000..bfccb2b6 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControl.h @@ -0,0 +1,218 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVERCOREGENERAL_H +#define DY_SOLVERCOREGENERAL_H + +#include "DySolverCore.h" +#include "DySolverConstraintDesc.h" + +namespace physx +{ + +namespace Dy +{ + +struct FsData; + +inline void BusyWaitState(volatile PxU32* state, const PxU32 requiredState) +{ + while(requiredState != *state ); +} + +inline void WaitBodyRequiredState(PxU32* state, const PxU32 requiredState) +{ + if(*state != requiredState) + { + BusyWaitState(state, requiredState); + } +} + +inline void BusyWaitStates(volatile PxU32* stateA, volatile PxU32* stateB, const PxU32 requiredStateA, const PxU32 requiredStateB) +{ + while(*stateA != requiredStateA); + while(*stateB != requiredStateB); +} + + +PX_FORCE_INLINE void WaitBodyABodyBRequiredState(const PxSolverConstraintDesc& desc, const PxI32 iterationA, const PxI32 iterationB) +{ + PxSolverBody* PX_RESTRICT pBodyA = desc.bodyA; + PxSolverBody* PX_RESTRICT pBodyB = desc.bodyB; + + const PxU32 requiredProgressA=(desc.bodyASolverProgress == 0xFFFF) ? 0xFFFF : PxU32(desc.bodyASolverProgress + iterationA * pBodyA->maxSolverNormalProgress + iterationB * pBodyA->maxSolverFrictionProgress); + const PxU32 requiredProgressB=(desc.bodyBSolverProgress == 0xFFFF) ? 0xFFFF : PxU32(desc.bodyBSolverProgress + iterationA * pBodyB->maxSolverNormalProgress + iterationB * pBodyB->maxSolverFrictionProgress); + PX_ASSERT(requiredProgressA!=0xFFFFFFFF || requiredProgressB!=0xFFFFFFFF); + + const PxU32 solverProgressA = pBodyA->solverProgress; + const PxU32 solverProgressB = pBodyB->solverProgress; + + if(solverProgressA != requiredProgressA || solverProgressB != requiredProgressB) + { + BusyWaitStates(&pBodyA->solverProgress, &pBodyB->solverProgress, requiredProgressA, requiredProgressB); + } +} + +PX_FORCE_INLINE void IncrementBodyProgress(const PxSolverConstraintDesc& desc) +{ + PxSolverBody* PX_RESTRICT pBodyA = desc.bodyA; + PxSolverBody* PX_RESTRICT pBodyB = desc.bodyB; + + const PxU32 maxProgressA = pBodyA->maxSolverNormalProgress; + const PxU32 maxProgressB = pBodyB->maxSolverNormalProgress; + + //NB - this approach removes the need for an imul (which is a non-pipeline instruction on PPC chips) + const PxU32 requiredProgressA=(maxProgressA == 0xFFFF) ? 0xFFFF : pBodyA->solverProgress + 1; + const PxU32 requiredProgressB=(maxProgressB == 0xFFFF) ? 0xFFFF : pBodyB->solverProgress + 1; + + volatile PxU32* solveProgressA = &pBodyA->solverProgress; + volatile PxU32* solveProgressB = &pBodyB->solverProgress; + + *solveProgressA=requiredProgressA; + *solveProgressB=requiredProgressB; + +} + + +class BatchIterator +{ +public: + PxConstraintBatchHeader* constraintBatchHeaders; + PxU32 mSize; + PxU32 mCurrentIndex; + + BatchIterator(PxConstraintBatchHeader* _constraintBatchHeaders, PxU32 size) : constraintBatchHeaders(_constraintBatchHeaders), + mSize(size), mCurrentIndex(0) + { + } + + PX_FORCE_INLINE const PxConstraintBatchHeader& GetCurrentHeader(const PxU32 constraintIndex) + { + PxU32 currentIndex = mCurrentIndex; + while((constraintIndex - constraintBatchHeaders[currentIndex].mStartIndex) >= constraintBatchHeaders[currentIndex].mStride) + currentIndex = (currentIndex + 1)%mSize; + Ps::prefetchLine(&constraintBatchHeaders[currentIndex], 128); + mCurrentIndex = currentIndex; + return constraintBatchHeaders[currentIndex]; + } +private: + BatchIterator& operator=(const BatchIterator&); +}; + + +template<bool bWaitIncrement> +void SolveBlockParallel (PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxI32 batchCount, const PxI32 index, + const PxI32 headerCount, SolverContext& cache, BatchIterator& iterator, + SolveBlockMethod solveTable[], const PxI32 normalIteration, const PxI32 frictionIteration, + const PxI32 iteration + ) +{ + const PxI32 indA = index - (iteration * headerCount); + + const PxConstraintBatchHeader* PX_RESTRICT headers = iterator.constraintBatchHeaders; + + const PxI32 endIndex = indA + batchCount; + for(PxI32 i = indA; i < endIndex; ++i) + { + const PxConstraintBatchHeader& header = headers[i]; + + const PxI32 numToGrab = header.mStride; + PxSolverConstraintDesc* PX_RESTRICT block = &constraintList[header.mStartIndex]; + + Ps::prefetch(block[0].constraint, 384); + + for(PxI32 b = 0; b < numToGrab; ++b) + { + Ps::prefetchLine(block[b].bodyA); + Ps::prefetchLine(block[b].bodyB); + if(bWaitIncrement) + WaitBodyABodyBRequiredState(block[b], normalIteration, frictionIteration); + } + + //OK. We have a number of constraints to run... + solveTable[header.mConstraintType](block, PxU32(numToGrab), cache); + + //Increment body progresses + if(bWaitIncrement) + { + Ps::memoryBarrier(); + for(PxI32 j = 0; j < numToGrab; ++j) + { + IncrementBodyProgress(block[j]); + } + } + } +} + + + + +class SolverCoreGeneral : public SolverCore +{ +public: + static SolverCoreGeneral* create(); + + // Implements SolverCore + virtual void destroyV(); + + virtual PxI32 solveVParallelAndWriteBack + (SolverIslandParams& params) const; + + virtual void solveV_Blocks + (SolverIslandParams& params) const; + + virtual void writeBackV + (const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numBatches, + ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs, + PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const; + +private: + + //~Implements SolverCore +}; + +#define SOLVEV_BLOCK_METHOD_ARGS \ + SolverCore* solverCore, \ + SolverIslandParams& params + +void solveVBlock(SOLVEV_BLOCK_METHOD_ARGS); + +SolveBlockMethod* getSolveBlockTable(); + +SolveBlockMethod* getSolverConcludeBlockTable(); + +SolveWriteBackBlockMethod* getSolveWritebackBlockTable(); + + +} + +} + +#endif //DY_SOLVERCOREGENERAL_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp new file mode 100644 index 00000000..1858da15 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.cpp @@ -0,0 +1,755 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxPreprocessor.h" +#include "PsAllocator.h" +#include <new> +#include <stdio.h> +#include "CmPhysXCommon.h" +#include "DySolverBody.h" +#include "DySolverConstraint1D.h" +#include "DySolverContact.h" +#include "DyThresholdTable.h" +#include "DySolverControl.h" +#include "DyArticulationHelper.h" +#include "PsAtomic.h" +#include "PsIntrinsics.h" +#include "DyArticulationPImpl.h" +#include "PsThread.h" +#include "DySolverConstraintDesc.h" +#include "DySolverContext.h" +#include "DySolverControlPF.h" + +namespace physx +{ + +namespace Dy +{ +//----------------------------------- + +void solve1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4_Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +void solve1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4Block_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solve1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExt1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solve1D4Block_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void writeBack1DBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void ext1DBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void writeBack1D4Block (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +void solveFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFriction_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtFrictionBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactCoulombBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulomb_BStaticBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +void solveContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactCoulombConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulomb_BStaticConcludeBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solveContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtContactCoulombBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulomb_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFriction_BStaticBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveExtFrictionBlockWriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +//Pre-block 1d/2d friction stuff... + +void solveContactCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_ConcludeStatic (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveContactCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFrictionCoulombPreBlock (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solveFrictionCoulombPreBlock_Static (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFrictionCoulombPreBlock_Conclude (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); +void solveFrictionCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solveFrictionCoulombPreBlock_WriteBack (const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + +void solveFrictionCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache); + + +// could move this to PxPreprocessor.h but +// no implementation available for MSVC +#if PX_GCC_FAMILY +#define PX_UNUSED_ATTRIBUTE __attribute__((unused)) +#else +#define PX_UNUSED_ATTRIBUTE +#endif + +#define DYNAMIC_ARTICULATION_REGISTRATION(x) 0 + + +static SolveBlockMethod gVTableSolveBlockCoulomb[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactCoulombBlock, // DY_SC_TYPE_RB_CONTACT + solve1DBlock, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlock), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlock), // DY_SC_TYPE_EXT_1D + solveContactCoulomb_BStaticBlock, // DY_SC_TYPE_STATIC_CONTACT + solveContactCoulombBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactCoulombPreBlock, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactCoulombPreBlock_Static, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4_Block, // DY_SC_TYPE_BLOCK_1D, + solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT + solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT + DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT + solveFrictionCoulombPreBlock, // DY_SC_TYPE_BLOCK_FRICTION + solveFrictionCoulombPreBlock_Static // DY_SC_TYPE_BLOCK_STATIC_FRICTION +}; + +static SolveWriteBackBlockMethod gVTableSolveWriteBackBlockCoulomb[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactCoulombBlockWriteBack, // DY_SC_TYPE_RB_CONTACT + solve1DBlockWriteBack, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombBlockWriteBack), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DBlockWriteBack), // DY_SC_TYPE_EXT_1D + solveContactCoulomb_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_CONTACT + solveContactCoulombBlockWriteBack, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactCoulombPreBlock_WriteBackStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4Block_WriteBack, // DY_SC_TYPE_BLOCK_1D, + solveFrictionBlockWriteBack, // DY_SC_TYPE_FRICTION_CONSTRAINT + solveFriction_BStaticBlockWriteBack, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT + DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlockWriteBack), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT + solveFrictionCoulombPreBlock_WriteBack, // DY_SC_TYPE_BLOCK_FRICTION + solveFrictionCoulombPreBlock_WriteBackStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION +}; + + +static SolveBlockMethod gVTableSolveConcludeBlockCoulomb[] PX_UNUSED_ATTRIBUTE = +{ + 0, + solveContactCoulombConcludeBlock, // DY_SC_TYPE_RB_CONTACT + solve1DConcludeBlock, // DY_SC_TYPE_RB_1D + DYNAMIC_ARTICULATION_REGISTRATION(solveExtContactCoulombConcludeBlock), // DY_SC_TYPE_EXT_CONTACT + DYNAMIC_ARTICULATION_REGISTRATION(solveExt1DConcludeBlock), // DY_SC_TYPE_EXT_1D + solveContactCoulomb_BStaticConcludeBlock, // DY_SC_TYPE_STATIC_CONTACT + solveContactCoulombConcludeBlock, // DY_SC_TYPE_NOFRICTION_RB_CONTACT + solveContactCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_RB_CONTACT + solveContactCoulombPreBlock_ConcludeStatic, // DY_SC_TYPE_BLOCK_STATIC_RB_CONTACT + solve1D4Block_Conclude, // DY_SC_TYPE_BLOCK_1D, + solveFrictionBlock, // DY_SC_TYPE_FRICTION_CONSTRAINT + solveFriction_BStaticBlock, // DY_SC_TYPE_STATIC_FRICTION_CONSTRAINT + DYNAMIC_ARTICULATION_REGISTRATION(solveExtFrictionBlock), // DY_SC_TYPE_EXT_FRICTION_CONSTRAINT + solveFrictionCoulombPreBlock_Conclude, // DY_SC_TYPE_BLOCK_FRICTION + solveFrictionCoulombPreBlock_ConcludeStatic // DY_SC_TYPE_BLOCK_STATIC_FRICTION +}; + + +void SolverCoreRegisterArticulationFnsCoulomb() +{ + gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlock; + gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlock; + + gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombBlockWriteBack; + gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DBlockWriteBack; + gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_CONTACT] = solveExtContactCoulombConcludeBlock; + gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_1D] = solveExt1DConcludeBlock; + + gVTableSolveBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock; + gVTableSolveWriteBackBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlockWriteBack; + gVTableSolveConcludeBlockCoulomb[DY_SC_TYPE_EXT_FRICTION] = solveExtFrictionBlock; +} + +SolverCoreGeneralPF* SolverCoreGeneralPF::create() +{ + SolverCoreGeneralPF* scg = reinterpret_cast<SolverCoreGeneralPF*>( + PX_ALLOC(sizeof(SolverCoreGeneralPF), "SolverCoreGeneral")); + + if(scg) + new (scg) SolverCoreGeneralPF; + + return scg; +} + +void SolverCoreGeneralPF::destroyV() +{ + this->~SolverCoreGeneralPF(); + PX_FREE(this); +} + +void SolverCoreGeneralPF::solveV_Blocks(SolverIslandParams& params) const +{ + const PxI32 TempThresholdStreamSize = 32; + ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize]; + + SolverContext cache; + cache.solverBodyArray = params.bodyDataList; + cache.mThresholdStream = tempThresholdStream; + cache.mThresholdStreamLength = TempThresholdStreamSize; + cache.mThresholdStreamIndex = 0; + cache.writeBackIteration = false; + + PxI32 batchCount = PxI32(params.numConstraintHeaders); + + PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart; + const PxU32 bodyListSize = params.bodyListSize; + + Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray; + + const PxU32 velocityIterations = params.velocityIterations; + const PxU32 positionIterations = params.positionIterations; + + const PxU32 numConstraintHeaders = params.numConstraintHeaders; + const PxU32 articulationListSize = params.articulationListSize; + + ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart; + + + PX_ASSERT(velocityIterations >= 1); + PX_ASSERT(positionIterations >= 1); + + if(numConstraintHeaders == 0) + { + for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++) + { + Cm::SpatialVector& motionVel = motionVelocityArray[baIdx]; + PxSolverBody& atom = bodyListStart[baIdx]; + motionVel.linear = atom.linearVelocity; + motionVel.angular = atom.angularState; + } + + for (PxU32 i = 0; i < articulationListSize; i++) + ArticulationPImpl::saveVelocity(articulationListStart[i]); + + return; + } + + BatchIterator contactIterator(params.constraintBatchHeaders, params.numConstraintHeaders); + BatchIterator frictionIterator(params.frictionConstraintBatches, params.numFrictionConstraintHeaders); + + + PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders); + + PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList; + + PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList; + + + //0-(n-1) iterations + PxI32 normalIter = 0; + PxI32 frictionIter = 0; + for (PxU32 iteration = positionIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters + { + + SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter, frictionIter, normalIter); + ++normalIter; + + } + + if(frictionBatchCount>0) + { + const PxU32 numIterations = positionIterations * 2; + for (PxU32 iteration = numIterations; iteration > 0; iteration--) //decreasing positive numbers == position iters + { + SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount, + cache, frictionIterator, iteration == 1 ? gVTableSolveConcludeBlockCoulomb : gVTableSolveBlockCoulomb, normalIter, frictionIter, frictionIter); + ++frictionIter; + } + } + + for (PxU32 baIdx = 0; baIdx < bodyListSize; baIdx++) + { + const PxSolverBody& atom = bodyListStart[baIdx]; + Cm::SpatialVector& motionVel = motionVelocityArray[baIdx]; + motionVel.linear = atom.linearVelocity; + motionVel.angular = atom.angularState; + } + + + for (PxU32 i = 0; i < articulationListSize; i++) + ArticulationPImpl::saveVelocity(articulationListStart[i]); + + + const PxU32 velItersMinOne = velocityIterations - 1; + + PxU32 iteration = 0; + + for(; iteration < velItersMinOne; ++iteration) + { + + SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, gVTableSolveBlockCoulomb, normalIter, frictionIter, normalIter); + ++normalIter; + + if(frictionBatchCount > 0) + { + SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount, + cache, frictionIterator, gVTableSolveBlockCoulomb, normalIter, frictionIter, frictionIter); + ++frictionIter; + } + } + + PxI32* outThresholdPairs = params.outThresholdPairs; + ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream; + PxU32 thresholdStreamLength = params.thresholdStreamLength; + + cache.writeBackIteration = true; + + cache.mSharedOutThresholdPairs = outThresholdPairs; + cache.mSharedThresholdStreamLength = thresholdStreamLength; + cache.mSharedThresholdStream = thresholdStream; + + for(; iteration < velocityIterations; ++iteration) + { + SolveBlockParallel<false>(constraintList, batchCount, normalIter * batchCount, batchCount, + cache, contactIterator, gVTableSolveWriteBackBlockCoulomb, normalIter, frictionIter, normalIter); + ++normalIter; + + if(frictionBatchCount > 0) + { + SolveBlockParallel<false>(frictionConstraintList, frictionBatchCount, frictionIter * frictionBatchCount, frictionBatchCount, + cache, frictionIterator, gVTableSolveWriteBackBlockCoulomb, normalIter, frictionIter, frictionIter); + ++frictionIter; + } + } + + //Write back remaining threshold streams + if(cache.mThresholdStreamIndex > 0) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(reinterpret_cast<PxI32*>(&outThresholdPairs), PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b) + { + thresholdStream[b + threshIndex] = cache.mThresholdStream[b]; + } + cache.mThresholdStreamIndex = 0; + } + +} + +PxI32 SolverCoreGeneralPF::solveVParallelAndWriteBack(SolverIslandParams& params) const +{ + SolverContext cache; + cache.solverBodyArray = params.bodyDataList; + + const PxI32 UnrollCount = PxI32(params.batchSize); + const PxI32 SaveUnrollCount = 64; + + const PxI32 TempThresholdStreamSize = 32; + ThresholdStreamElement tempThresholdStream[TempThresholdStreamSize]; + + + const PxI32 batchCount = PxI32(params.numConstraintHeaders); + const PxI32 frictionBatchCount = PxI32(params.numFrictionConstraintHeaders);//frictionConstraintBatches.size(); + cache.mThresholdStream = tempThresholdStream; + cache.mThresholdStreamLength = TempThresholdStreamSize; + cache.mThresholdStreamIndex = 0; + + const PxI32 positionIterations = PxI32(params.positionIterations); + const PxU32 velocityIterations = params.velocityIterations; + + const PxI32 bodyListSize = PxI32(params.bodyListSize); + const PxI32 articulationListSize = PxI32(params.articulationListSize); + + PX_ASSERT(velocityIterations >= 1); + PX_ASSERT(positionIterations >= 1); + + PxI32* constraintIndex = ¶ms.constraintIndex; + PxI32* constraintIndex2 = ¶ms.constraintIndex2; + PxI32* frictionConstraintIndex = ¶ms.frictionConstraintIndex; + + PxI32 endIndexCount = UnrollCount; + PxI32 index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + PxI32 frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount; + + + BatchIterator contactIter(params.constraintBatchHeaders, params.numConstraintHeaders); + BatchIterator frictionIter(params.frictionConstraintBatches, params.numFrictionConstraintHeaders); + + PxU32* headersPerPartition = params.headersPerPartition; + PxU32 nbPartitions = params.nbPartitions; + + PxU32* frictionHeadersPerPartition = params.frictionHeadersPerPartition; + PxU32 nbFrictionPartitions = params.nbFrictionPartitions; + + PxSolverConstraintDesc* PX_RESTRICT constraintList = params.constraintList; + PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList = params.frictionConstraintList; + + + PxI32 maxNormalIndex = 0; + PxI32 maxProgress = 0; + PxI32 frictionEndIndexCount = UnrollCount; + PxI32 maxFrictionIndex = 0; + + PxI32 normalIteration = 0; + PxI32 frictionIteration = 0; + PxU32 a = 0; + for(PxU32 i = 0; i < 2; ++i) + { + SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb; + for(; a < positionIterations - 1 + i; ++a) + { + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxNormalIndex += headersPerPartition[b]; + maxProgress += headersPerPartition[b]; + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, solveTable, + normalIteration, frictionIteration, normalIteration); + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + ++normalIteration; + } + + } + + + for(PxU32 i = 0; i < 2; ++i) + { + SolveBlockMethod* solveTable = i == 0 ? gVTableSolveBlockCoulomb : gVTableSolveConcludeBlockCoulomb; + const PxI32 numIterations = positionIterations *2; + for(; a < numIterations - 1 + i; ++a) + { + for(PxU32 b = 0; b < nbFrictionPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxProgress += frictionHeadersPerPartition[b]; + maxFrictionIndex += frictionHeadersPerPartition[b]; + PxI32 nbSolved = 0; + while(frictionIndex < maxFrictionIndex) + { + const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount); + SolveBlockParallel<false>(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter, + solveTable, normalIteration, frictionIteration, frictionIteration); + frictionIndex += remainder; + frictionEndIndexCount -= remainder; + nbSolved += remainder; + if(frictionEndIndexCount == 0) + { + frictionEndIndexCount = UnrollCount; + frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + ++frictionIteration; + + } + + } + + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + + + PxI32* bodyListIndex = ¶ms.bodyListIndex; + + ArticulationSolverDesc* PX_RESTRICT articulationListStart = params.articulationListStart; + + PxSolverBody* PX_RESTRICT bodyListStart = params.bodyListStart; + + Cm::SpatialVector* PX_RESTRICT motionVelocityArray = params.motionVelocityArray; + + PxI32* bodyListIndex2 = ¶ms.bodyListIndex2; + + PxI32 endIndexCount2 = SaveUnrollCount; + PxI32 index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount; + { + PxI32 nbConcluded = 0; + while(index2 < articulationListSize) + { + const PxI32 remainder = PxMin(SaveUnrollCount, (articulationListSize - index2)); + endIndexCount2 -= remainder; + for(PxI32 b = 0; b < remainder; ++b, ++index2) + { + ArticulationPImpl::saveVelocity(articulationListStart[index2]); + } + nbConcluded += remainder; + if(endIndexCount2 == 0) + { + index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount; + endIndexCount2 = SaveUnrollCount; + } + nbConcluded += remainder; + } + + index2 -= articulationListSize; + + //save velocity + + + while(index2 < bodyListSize) + { + const PxI32 remainder = PxMin(endIndexCount2, (bodyListSize - index2)); + endIndexCount2 -= remainder; + for(PxI32 b = 0; b < remainder; ++b, ++index2) + { + Ps::prefetchLine(&bodyListStart[index2 + 8]); + Ps::prefetchLine(&motionVelocityArray[index2 + 8]); + PxSolverBody& body = bodyListStart[index2]; + Cm::SpatialVector& motionVel = motionVelocityArray[index2]; + motionVel.linear = body.linearVelocity; + motionVel.angular = body.angularState; + PX_ASSERT(motionVel.linear.isFinite()); + PX_ASSERT(motionVel.angular.isFinite()); + } + + nbConcluded += remainder; + + //Branch not required because this is the last time we use this atomic variable + //if(index2 < articulationListSizePlusbodyListSize) + { + index2 = physx::shdfnd::atomicAdd(bodyListIndex, SaveUnrollCount) - SaveUnrollCount - articulationListSize; + endIndexCount2 = SaveUnrollCount; + } + } + + if(nbConcluded) + { + Ps::memoryBarrier(); + physx::shdfnd::atomicAdd(bodyListIndex2, nbConcluded); + } + } + + + WAIT_FOR_PROGRESS(bodyListIndex2, (bodyListSize + articulationListSize)); + + a = 0; + for(; a < velocityIterations-1; ++a) + { + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxNormalIndex += headersPerPartition[b]; + maxProgress += headersPerPartition[b]; + + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb, normalIteration, frictionIteration, normalIteration); + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + ++normalIteration; + + for(PxU32 b = 0; b < nbFrictionPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxFrictionIndex += frictionHeadersPerPartition[b]; + maxProgress += frictionHeadersPerPartition[b]; + + PxI32 nbSolved = 0; + while(frictionIndex < maxFrictionIndex) + { + const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount); + SolveBlockParallel<false>(constraintList, remainder, index, batchCount, cache, contactIter, gVTableSolveBlockCoulomb, + normalIteration, frictionIteration, normalIteration); + + frictionIndex += remainder; + frictionEndIndexCount -= remainder; + nbSolved += remainder; + if(frictionEndIndexCount == 0) + { + frictionEndIndexCount = UnrollCount; + frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + + ++frictionIteration; + } + + ThresholdStreamElement* PX_RESTRICT thresholdStream = params.thresholdStream; + const PxU32 thresholdStreamLength = params.thresholdStreamLength; + PxI32* outThresholdPairs = params.outThresholdPairs; + + cache.mSharedThresholdStream = thresholdStream; + cache.mSharedOutThresholdPairs = outThresholdPairs; + cache.mSharedThresholdStreamLength = thresholdStreamLength; + + { + for(PxU32 b = 0; b < nbPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxNormalIndex += headersPerPartition[b]; + maxProgress += headersPerPartition[b]; + + PxI32 nbSolved = 0; + while(index < maxNormalIndex) + { + const PxI32 remainder = PxMin(maxNormalIndex - index, endIndexCount); + + SolveBlockParallel<false>(constraintList, remainder, normalIteration * batchCount, batchCount, + cache, contactIter, gVTableSolveWriteBackBlockCoulomb, normalIteration, frictionIteration, normalIteration); + + index += remainder; + endIndexCount -= remainder; + nbSolved += remainder; + if(endIndexCount == 0) + { + endIndexCount = UnrollCount; + index = physx::shdfnd::atomicAdd(constraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + + ++normalIteration; + + cache.mSharedOutThresholdPairs = outThresholdPairs; + cache.mSharedThresholdStream = thresholdStream; + cache.mSharedThresholdStreamLength = thresholdStreamLength; + + for(PxU32 b = 0; b < nbFrictionPartitions; ++b) + { + WAIT_FOR_PROGRESS(constraintIndex2, maxProgress); + maxFrictionIndex += frictionHeadersPerPartition[b]; + maxProgress += frictionHeadersPerPartition[b]; + + PxI32 nbSolved = 0; + while(frictionIndex < maxFrictionIndex) + { + const PxI32 remainder = PxMin(maxFrictionIndex - frictionIndex, frictionEndIndexCount); + + SolveBlockParallel<false>(frictionConstraintList, remainder, frictionIndex, frictionBatchCount, cache, frictionIter, + gVTableSolveWriteBackBlockCoulomb, normalIteration, frictionIteration, frictionIteration); + + frictionIndex += remainder; + frictionEndIndexCount -= remainder; + nbSolved += remainder; + if(frictionEndIndexCount == 0) + { + frictionEndIndexCount = UnrollCount; + frictionIndex = physx::shdfnd::atomicAdd(frictionConstraintIndex, UnrollCount) - UnrollCount; + } + } + if(nbSolved) + { + Ps::memoryBarrier(); + Ps::atomicAdd(constraintIndex2, nbSolved); + } + } + + if(cache.mThresholdStreamIndex > 0) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(outThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 b = 0; b < cache.mThresholdStreamIndex; ++b) + { + thresholdStream[b + threshIndex] = cache.mThresholdStream[b]; + } + cache.mThresholdStreamIndex = 0; + } + + ++frictionIteration; + } + + return normalIteration * batchCount + frictionIteration * frictionBatchCount; +} + + +void SolverCoreGeneralPF::writeBackV +(const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 /*constraintListSize*/, PxConstraintBatchHeader* batchHeaders, const PxU32 numBatches, + ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs, + PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const +{ + SolverContext cache; + cache.solverBodyArray = atomListData; + cache.mThresholdStream = thresholdStream; + cache.mThresholdStreamLength = thresholdStreamLength; + cache.mThresholdStreamIndex = 0; + + PxI32 outThreshIndex = 0; + for(PxU32 j = 0; j < numBatches; ++j) + { + PxU8 type = *constraintList[batchHeaders[j].mStartIndex].constraint; + writeBackTable[type](constraintList + batchHeaders[j].mStartIndex, + batchHeaders[j].mStride, cache); + } + + outThresholdPairs = PxU32(outThreshIndex); +} + +} + +} + + +//#endif diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.h new file mode 100644 index 00000000..b8684cbb --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverControlPF.h @@ -0,0 +1,71 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVERCONTROLPF_H +#define DY_SOLVERCONTROLPF_H + +#include "DySolverCore.h" +#include "DySolverConstraintDesc.h" + +namespace physx +{ + +namespace Dy +{ + +class SolverCoreGeneralPF : public SolverCore +{ +public: + static SolverCoreGeneralPF* create(); + + // Implements SolverCore + virtual void destroyV(); + + virtual PxI32 solveVParallelAndWriteBack + (SolverIslandParams& params) const; + + virtual void solveV_Blocks + (SolverIslandParams& params) const; + + virtual void writeBackV + (const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numBatches, + ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs, + PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const; + +private: + + //~Implements SolverCore +}; + +} + +} + +#endif //DY_SOLVERCOREGENERALPF_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverCore.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverCore.h new file mode 100644 index 00000000..a6f579f9 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverCore.h @@ -0,0 +1,242 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVERCORE_H +#define DY_SOLVERCORE_H + +#include "PxvConfig.h" +#include "PsArray.h" +#include "PsThread.h" + + +namespace physx +{ + +struct PxSolverBody; +struct PxSolverBodyData; +struct PxSolverConstraintDesc; +struct PxConstraintBatchHeader; + +namespace Dy +{ +struct ThresholdStreamElement; + + +struct ArticulationSolverDesc; +class Articulation; +struct SolverContext; + +typedef void (*WriteBackMethod)(const PxSolverConstraintDesc& desc, SolverContext& cache, PxSolverBodyData& sbd0, PxSolverBodyData& sbd1); +typedef void (*SolveMethod)(const PxSolverConstraintDesc& desc, SolverContext& cache); +typedef void (*SolveBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache); +typedef void (*SolveWriteBackBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache); +typedef void (*WriteBackBlockMethod)(const PxSolverConstraintDesc* desc, const PxU32 constraintCount, SolverContext& cache); + +#define PX_PROFILE_SOLVE_STALLS 0 +#if PX_PROFILE_SOLVE_STALLS +#if PX_WINDOWS +#include <windows.h> + + +PX_FORCE_INLINE PxU64 readTimer() +{ + //return __rdtsc(); + + LARGE_INTEGER i; + QueryPerformanceCounter(&i); + return i.QuadPart; +} + +#endif +#endif + + +#define YIELD_THREADS 1 + +#if YIELD_THREADS + +#define ATTEMPTS_BEFORE_BACKOFF 30000 +#define ATTEMPTS_BEFORE_RETEST 10000 + +#endif + +PX_INLINE void WaitForProgressCount(volatile PxI32* pGlobalIndex, const PxI32 targetIndex) +{ +#if YIELD_THREADS + if(*pGlobalIndex < targetIndex) + { + bool satisfied = false; + PxU32 count = ATTEMPTS_BEFORE_BACKOFF; + do + { + satisfied = true; + while(*pGlobalIndex < targetIndex) + { + if(--count == 0) + { + satisfied = false; + break; + } + } + if(!satisfied) + Ps::Thread::yield(); + count = ATTEMPTS_BEFORE_RETEST; + } + while(!satisfied); + } +#else + while(*pGlobalIndex < targetIndex); +#endif +} + + +#if PX_PROFILE_SOLVE_STALLS +PX_INLINE void WaitForProgressCount(volatile PxI32* pGlobalIndex, const PxI32 targetIndex, PxU64& stallTime) +{ + if(*pGlobalIndex < targetIndex) + { + bool satisfied = false; + PxU32 count = ATTEMPTS_BEFORE_BACKOFF; + do + { + satisfied = true; + PxU64 startTime = readTimer(); + while(*pGlobalIndex < targetIndex) + { + if(--count == 0) + { + satisfied = false; + break; + } + } + PxU64 endTime = readTimer(); + stallTime += (endTime - startTime); + if(!satisfied) + Ps::Thread::yield(); + count = ATTEMPTS_BEFORE_BACKOFF; + } + while(!satisfied); + } +} + +#define WAIT_FOR_PROGRESS(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex, stallCount) +#else +#define WAIT_FOR_PROGRESS(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex) +#endif +#define WAIT_FOR_PROGRESS_NO_TIMER(pGlobalIndex, targetIndex) if(*pGlobalIndex < targetIndex) WaitForProgressCount(pGlobalIndex, targetIndex) + + +struct SolverIslandParams +{ + //Default friction model params + PxU32 positionIterations; + PxU32 velocityIterations; + PxSolverBody* PX_RESTRICT bodyListStart; + PxSolverBodyData* PX_RESTRICT bodyDataList; + PxU32 bodyListSize; + PxU32 solverBodyOffset; + ArticulationSolverDesc* PX_RESTRICT articulationListStart; + PxU32 articulationListSize; + PxSolverConstraintDesc* PX_RESTRICT constraintList; + PxConstraintBatchHeader* constraintBatchHeaders; + PxU32 numConstraintHeaders; + PxU32* headersPerPartition; + PxU32 nbPartitions; + Cm::SpatialVector* PX_RESTRICT motionVelocityArray; + PxU32 batchSize; + PxsBodyCore*const* bodyArray; + PxsRigidBody** PX_RESTRICT rigidBodies; + + //Shared state progress counters + PxI32 constraintIndex; + PxI32 constraintIndex2; + PxI32 bodyListIndex; + PxI32 bodyListIndex2; + PxI32 bodyIntegrationListIndex; + PxI32 numObjectsIntegrated; + + + //Additional 1d/2d friction model params + PxSolverConstraintDesc* PX_RESTRICT frictionConstraintList; + + PxConstraintBatchHeader* frictionConstraintBatches; + PxU32 numFrictionConstraintHeaders; + PxU32* frictionHeadersPerPartition; + PxU32 nbFrictionPartitions; + + //Additional Shared state progress counters + PxI32 frictionConstraintIndex; + + //Write-back threshold information + ThresholdStreamElement* PX_RESTRICT thresholdStream; + PxU32 thresholdStreamLength; + + PxI32* outThresholdPairs; +}; + + +/*! +Interface to constraint solver cores + +*/ +class SolverCore +{ +public: + virtual void destroyV() = 0; + virtual ~SolverCore() {} + /* + solves dual problem exactly by GS-iterating until convergence stops + only uses regular velocity vector for storing results, and backs up initial state, which is restored. + the solution forces are saved in a vector. + + state should not be stored, this function is safe to call from multiple threads. + + Returns the total number of constraints that should be solved across all threads. Used for synchronization outside of this method + */ + + virtual PxI32 solveVParallelAndWriteBack + (SolverIslandParams& params) const = 0; + + + virtual void solveV_Blocks + (SolverIslandParams& params) const = 0; + + + virtual void writeBackV + (const PxSolverConstraintDesc* PX_RESTRICT constraintList, const PxU32 constraintListSize, PxConstraintBatchHeader* contactConstraintBatches, const PxU32 numConstraintBatches, + ThresholdStreamElement* PX_RESTRICT thresholdStream, const PxU32 thresholdStreamLength, PxU32& outThresholdPairs, + PxSolverBodyData* atomListData, WriteBackBlockMethod writeBackTable[]) const = 0; +}; + +} + +} + +#endif //DY_SOLVERCORE_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverExt.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverExt.h new file mode 100644 index 00000000..18fd5bcc --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverExt.h @@ -0,0 +1,85 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SOLVEREXTBODY_H +#define DY_SOLVEREXTBODY_H + +#include "foundation/PxVec3.h" +#include "foundation/PxTransform.h" +#include "CmPhysXCommon.h" +#include "CmSpatialVector.h" + +namespace physx +{ + +class PxsRigidBody; +struct PxsBodyCore; +struct PxSolverBody; +struct PxSolverBodyData; + + +namespace Dy +{ + + +struct FsData; +struct SolverConstraint1D; + +class SolverExtBody +{ +public: + union + { + const FsData* mFsData; + const PxSolverBody* mBody; + }; + const PxSolverBodyData* mBodyData; + + PxU16 mLinkIndex; + + SolverExtBody(const void* bodyOrArticulation, const void* bodyData, PxU16 linkIndex): + mBody(reinterpret_cast<const PxSolverBody*>(bodyOrArticulation)), + mBodyData(reinterpret_cast<const PxSolverBodyData*>(bodyData)), + mLinkIndex(linkIndex) + {} + + void getResponse(const PxVec3& linImpulse, const PxVec3& angImpulse, + PxVec3& linDeltaV, PxVec3& angDeltaV, PxReal dominance) const; + + PxReal projectVelocity(const PxVec3& linear, const PxVec3& angular) const; + PxVec3 getLinVel() const; + PxVec3 getAngVel() const; +}; + +} + +} + +#endif //DY_SOLVEREXTBODY_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraints.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraints.cpp new file mode 100644 index 00000000..e5eb3328 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraints.cpp @@ -0,0 +1,868 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" + +#ifdef PX_SUPPORT_SIMD + +#include "CmPhysXCommon.h" +#include "DySolverBody.h" +#include "DySolverContact.h" +#include "DySolverContactPF.h" +#include "DySolverConstraint1D.h" +#include "DySolverConstraintDesc.h" +#include "DyThresholdTable.h" +#include "DySolverContext.h" +#include "PsUtilities.h" +#include "DyConstraint.h" +#include "PsAtomic.h" +#include "DyThresholdTable.h" +#include "DySolverConstraintsShared.h" + +namespace physx +{ + +namespace Dy +{ + +void solveContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + PxSolverBody& b0 = *desc.bodyA; + PxSolverBody& b1 = *desc.bodyB; + + Vec3V linVel0 = V3LoadA(b0.linearVelocity); + Vec3V linVel1 = V3LoadA(b1.linearVelocity); + Vec3V angState0 = V3LoadA(b0.angularState); + Vec3V angState1 = V3LoadA(b1.angularState); + + SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint); + const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc); + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc.constraint; + + + //const FloatV zero = FZero(); + + while(currPtr < last) + { + SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr); + currPtr += sizeof(SolverContactCoulombHeader); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + const Vec3V normal = hdr->getNormal(); + const FloatV invMassDom0 = FLoad(hdr->dominance0); + const FloatV invMassDom1 = FLoad(hdr->dominance1); + const FloatV angD0 = FLoad(hdr->angDom0); + const FloatV angD1 = FLoad(hdr->angDom1); + + + + SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr); + currPtr += numNormalConstr * sizeof(SolverContactPoint); + + PxF32* appliedImpulse = reinterpret_cast<PxF32*> ((reinterpret_cast<PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader)); + Ps::prefetchLine(appliedImpulse); + + solveDynamicContacts(contacts, numNormalConstr, normal, invMassDom0, invMassDom1, + angD0, angD1, linVel0, angState0, linVel1, angState1, appliedImpulse); + } + + // Write back + V3StoreA(linVel0, b0.linearVelocity); + V3StoreA(linVel1, b1.linearVelocity); + V3StoreA(angState0, b0.angularState); + V3StoreA(angState1, b1.angularState); + + PX_ASSERT(currPtr == last); +} + +void solveFriction(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + PxSolverBody& b0 = *desc.bodyA; + PxSolverBody& b1 = *desc.bodyB; + + Vec3V linVel0 = V3LoadA(b0.linearVelocity); + Vec3V linVel1 = V3LoadA(b1.linearVelocity); + Vec3V angState0 = V3LoadA(b0.angularState); + Vec3V angState1 = V3LoadA(b1.angularState); + + PxU8* PX_RESTRICT ptr = desc.constraint; + PxU8* PX_RESTRICT currPtr = ptr; + + const PxU8* PX_RESTRICT last = ptr + getConstraintLength(desc); + + + while(currPtr < last) + { + const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr); + currPtr += sizeof(SolverFrictionHeader); + PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr); + currPtr += frictionHeader->getAppliedForcePaddingSize(); + + SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr); + const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr; + const PxU32 numNormalConstr = frictionHeader->numNormalConstr; + + const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr; + + currPtr += numFrictionConstr * sizeof(SolverContactFriction); + const FloatV staticFriction = frictionHeader->getStaticFriction(); + + const FloatV invMass0D0 = FLoad(frictionHeader->invMass0D0); + const FloatV invMass1D1 = FLoad(frictionHeader->invMass1D1); + + + const FloatV angD0 = FLoad(frictionHeader->angDom0); + const FloatV angD1 = FLoad(frictionHeader->angDom1); + + for(PxU32 i=0, j = 0;i<numFrictionConstr;j++) + { + for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++) + { + + SolverContactFriction& f = frictions[i]; + Ps::prefetchLine(&frictions[i], 128); + + const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW); + const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW); + const Vec3V rbXt0 = Vec3V_From_Vec4V(f.rbXnXYZ_biasW); + + const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW); + const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW); + + const FloatV targetVel = FLoad(f.targetVel); + + const FloatV normalImpulse = FLoad(appliedImpulse[j]); + const FloatV maxFriction = FMul(staticFriction, normalImpulse); + const FloatV nMaxFriction = FNeg(maxFriction); + + //Compute the normal velocity of the constraint. + + const FloatV t0Vel1 = V3Dot(t0, linVel0); + const FloatV t0Vel2 = V3Dot(raXt0, angState0); + const FloatV t0Vel3 = V3Dot(t0, linVel1); + const FloatV t0Vel4 = V3Dot(rbXt0, angState1); + + + const FloatV t0Vel = FSub(FAdd(t0Vel1, t0Vel2), FAdd(t0Vel3, t0Vel4)); + + const Vec3V delLinVel0 = V3Scale(t0, invMass0D0); + const Vec3V delLinVel1 = V3Scale(t0, invMass1D1); + + // still lots to do here: using loop pipelining we can interweave this code with the + // above - the code here has a lot of stalls that we would thereby eliminate + + const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce); + FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp); + newForce = FClamp(newForce, nMaxFriction, maxFriction); + FloatV deltaF = FSub(newForce, appliedForce); + + linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); + linVel1 = V3NegScaleSub(delLinVel1, deltaF, linVel1); + angState0 = V3ScaleAdd(raXt0, FMul(deltaF, angD0), angState0); + angState1 = V3NegScaleSub(rbXt0, FMul(deltaF, angD1), angState1); + + f.setAppliedForce(newForce); + } + } + } + + // Write back + V3StoreA(linVel0, b0.linearVelocity); + V3StoreA(linVel1, b1.linearVelocity); + V3StoreA(angState0, b0.angularState); + V3StoreA(angState1, b1.angularState); + + + PX_ASSERT(currPtr == last); +} + +void solveContactCoulomb_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + PxSolverBody& b0 = *desc.bodyA; + + + Vec3V linVel0 = V3LoadA(b0.linearVelocity); + Vec3V angState0 = V3LoadA(b0.angularState); + + SolverContactCoulombHeader* firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint); + const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc); + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc.constraint; + + //const FloatV zero = FZero(); + + while(currPtr < last) + { + SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr); + currPtr += sizeof(SolverContactCoulombHeader); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + SolverContactPoint* PX_RESTRICT contacts = reinterpret_cast<SolverContactPoint*>(currPtr); + Ps::prefetchLine(contacts); + currPtr += numNormalConstr * sizeof(SolverContactPoint); + + PxF32* appliedImpulse = reinterpret_cast<PxF32*> ((reinterpret_cast<PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader)); + Ps::prefetchLine(appliedImpulse); + + const Vec3V normal = hdr->getNormal(); + + const FloatV invMassDom0 = FLoad(hdr->dominance0); + + const FloatV angD0 = FLoad(hdr->angDom0); + + solveStaticContacts(contacts, numNormalConstr, normal, invMassDom0, + angD0, linVel0, angState0, appliedImpulse); + } + + // Write back + V3StoreA(linVel0, b0.linearVelocity); + V3StoreA(angState0, b0.angularState); + + PX_ASSERT(currPtr == last); +} + +void solveFriction_BStatic(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + PxSolverBody& b0 = *desc.bodyA; + + Vec3V linVel0 = V3LoadA(b0.linearVelocity); + Vec3V angState0 = V3LoadA(b0.angularState); + + PxU8* PX_RESTRICT currPtr = desc.constraint; + + const PxU8* PX_RESTRICT last = currPtr + getConstraintLength(desc); + + while(currPtr < last) + { + + const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr); + const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr; + const PxU32 numNormalConstr = frictionHeader->numNormalConstr; + const PxU32 numFrictionPerPoint = numFrictionConstr/numNormalConstr; + currPtr +=sizeof(SolverFrictionHeader); + PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr); + currPtr +=frictionHeader->getAppliedForcePaddingSize(); + + SolverContactFriction* PX_RESTRICT frictions = reinterpret_cast<SolverContactFriction*>(currPtr); + currPtr += numFrictionConstr * sizeof(SolverContactFriction); + + const FloatV invMass0 = FLoad(frictionHeader->invMass0D0); + const FloatV angD0 = FLoad(frictionHeader->angDom0); + //const FloatV angD1 = FLoad(frictionHeader->angDom1); + + + const FloatV staticFriction = frictionHeader->getStaticFriction(); + + for(PxU32 i=0, j = 0;i<numFrictionConstr;j++) + { + for(PxU32 p = 0; p < numFrictionPerPoint; p++, i++) + { + SolverContactFriction& f = frictions[i]; + Ps::prefetchLine(&frictions[i+1]); + + const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW); + const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW); + + const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW); + const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW); + + const FloatV targetVel = FLoad(f.targetVel); + + //const FloatV normalImpulse = contacts[f.contactIndex].getAppliedForce(); + const FloatV normalImpulse = FLoad(appliedImpulse[j]); + const FloatV maxFriction = FMul(staticFriction, normalImpulse); + const FloatV nMaxFriction = FNeg(maxFriction); + + //Compute the normal velocity of the constraint. + + const FloatV t0Vel1 = V3Dot(t0, linVel0); + const FloatV t0Vel2 = V3Dot(raXt0, angState0); + + const FloatV t0Vel = FAdd(t0Vel1, t0Vel2); + + const Vec3V delangState0 = V3Scale(raXt0, angD0); + const Vec3V delLinVel0 = V3Scale(t0, invMass0); + + // still lots to do here: using loop pipelining we can interweave this code with the + // above - the code here has a lot of stalls that we would thereby eliminate + + const FloatV tmp = FNegScaleSub(targetVel,velMultiplier,appliedForce); + FloatV newForce = FScaleAdd(t0Vel, velMultiplier, tmp); + newForce = FClamp(newForce, nMaxFriction, maxFriction); + const FloatV deltaF = FSub(newForce, appliedForce); + + linVel0 = V3ScaleAdd(delLinVel0, deltaF, linVel0); + angState0 = V3ScaleAdd(delangState0, deltaF, angState0); + + f.setAppliedForce(newForce); + } + } + } + + // Write back + V3StoreA(linVel0, b0.linearVelocity); + V3StoreA(angState0, b0.angularState); + + PX_ASSERT(currPtr == last); +} + + +void concludeContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + PxU8* PX_RESTRICT cPtr = desc.constraint; + + const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr); + PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset;//getConstraintLength(desc); + while(cPtr < last) + { + const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr); + cPtr += sizeof(SolverContactCoulombHeader); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + //if(cPtr < last) + //Ps::prefetchLine(cPtr, 512); + Ps::prefetchLine(cPtr,128); + Ps::prefetchLine(cPtr,256); + Ps::prefetchLine(cPtr,384); + + const PxU32 pointStride = hdr->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt) + : sizeof(SolverContactPoint); + for(PxU32 i=0;i<numNormalConstr;i++) + { + SolverContactPoint *c = reinterpret_cast<SolverContactPoint*>(cPtr); + cPtr += pointStride; + //c->scaledBias = PxMin(c->scaledBias, 0.f); + c->biasedErr = c->unbiasedErr; + } + } + PX_ASSERT(cPtr == last); +} + +void writeBackContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& cache, + PxSolverBodyData& bd0, PxSolverBodyData& bd1) +{ + + PxReal normalForce = 0.f; + + PxU8* PX_RESTRICT cPtr = desc.constraint; + PxReal* PX_RESTRICT vForceWriteback = reinterpret_cast<PxReal*>(desc.writeBack); + const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr); + PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset; + + const PxU32 pointStride = firstHeader->type == DY_SC_TYPE_EXT_CONTACT ? sizeof(SolverContactPointExt) + : sizeof(SolverContactPoint); + + bool hasForceThresholds = false; + while(cPtr < last) + { + const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader*>(cPtr); + cPtr += sizeof(SolverContactCoulombHeader); + + PxF32* appliedImpulse = reinterpret_cast<PxF32*> (const_cast<PxU8*>((reinterpret_cast<const PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader))); + + hasForceThresholds = hdr->flags & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + Ps::prefetchLine(cPtr, 256); + Ps::prefetchLine(cPtr, 384); + + if(vForceWriteback!=NULL) + { + for(PxU32 i=0; i<numNormalConstr; i++) + { + PxF32 imp = appliedImpulse[i]; + *vForceWriteback = imp; + vForceWriteback++; + normalForce += imp; + } + } + cPtr += numNormalConstr * pointStride; + } + PX_ASSERT(cPtr == last); + + if(hasForceThresholds && desc.linkIndexA == PxSolverConstraintDesc::NO_LINK && desc.linkIndexB == PxSolverConstraintDesc::NO_LINK && + normalForce !=0 && (bd0.reportThreshold < PX_MAX_REAL || bd1.reportThreshold < PX_MAX_REAL)) + { + ThresholdStreamElement elt; + elt.normalForce = normalForce; + elt.threshold = PxMin<float>(bd0.reportThreshold, bd1.reportThreshold); + elt.nodeIndexA = bd0.nodeIndex; + elt.nodeIndexB = bd1.nodeIndex; + elt.shapeInteraction = (reinterpret_cast<SolverContactCoulombHeader*>(desc.constraint))->shapeInteraction; + Ps::order(elt.nodeIndexA, elt.nodeIndexB); + PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB); + + PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength); + cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt; + } + +} + + +void solveFrictionBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveFriction(desc[a], cache); + } +} + + +void solveFrictionBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveFriction(desc[a], cache); + } +} + +void solveFriction_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveFriction_BStatic(desc[a], cache); + } +} + + +void solveFriction_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveFriction_BStatic(desc[a], cache); + } +} + +void solveFriction_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveFriction_BStatic(desc[a], cache); + } +} + + +void solveContactCoulombBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveContactCoulomb(desc[a], cache); + } +} + +void solveContactCoulombConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveContactCoulomb(desc[a], cache); + concludeContactCoulomb(desc[a], cache); + } +} + +void solveContactCoulombBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].bodyBDataIndex]; + solveContactCoulomb(desc[a], cache); + writeBackContactCoulomb(desc[a], cache, bd0, bd1); + } + + if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4)) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +void solveContactCoulomb_BStaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveContactCoulomb_BStatic(desc[a], cache); + } +} + +void solveContactCoulomb_BStaticConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveContactCoulomb_BStatic(desc[a], cache); + concludeContactCoulomb(desc[a], cache); + } +} + +void solveContactCoulomb_BStaticBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].bodyBDataIndex]; + solveContactCoulomb_BStatic(desc[a], cache); + writeBackContactCoulomb(desc[a], cache, bd0, bd1); + } + + if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4)) + { + //Not enough space to write 4 more thresholds back! + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +void solveExtContactCoulomb(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + //We'll need this. +// const FloatV zero = FZero(); +// const FloatV one = FOne(); + + Vec3V linVel0, angVel0, linVel1, angVel1; + + if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK) + { + linVel0 = V3LoadA(desc.bodyA->linearVelocity); + angVel0 = V3LoadA(desc.bodyA->angularState); + } + else + { + Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA); + linVel0 = v.linear; + angVel0 = v.angular; + } + + if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK) + { + linVel1 = V3LoadA(desc.bodyB->linearVelocity); + angVel1 = V3LoadA(desc.bodyB->angularState); + } + else + { + Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB); + linVel1 = v.linear; + angVel1 = v.angular; + } + + //const PxU8* PX_RESTRICT last = desc.constraint + desc.constraintLengthOver16*16; + + PxU8* PX_RESTRICT currPtr = desc.constraint; + + const SolverContactCoulombHeader* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader*>(currPtr); + + const PxU8* PX_RESTRICT last = desc.constraint + firstHeader->frictionOffset; + + //hopefully pointer aliasing doesn't bite. + + Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero(); + + while(currPtr < last) + { + const SolverContactCoulombHeader* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader*>(currPtr); + currPtr += sizeof(SolverContactCoulombHeader); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + PxF32* appliedImpulse = reinterpret_cast<PxF32*>(const_cast<PxU8*>(((reinterpret_cast<const PxU8*>(hdr)) + hdr->frictionOffset + sizeof(SolverFrictionHeader)))); + Ps::prefetchLine(appliedImpulse); + + SolverContactPointExt* PX_RESTRICT contacts = reinterpret_cast<SolverContactPointExt*>(currPtr); + Ps::prefetchLine(contacts); + currPtr += numNormalConstr * sizeof(SolverContactPointExt); + + Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero(); + + const Vec3V normal = hdr->getNormal(); + + solveExtContacts(contacts, numNormalConstr, normal, linVel0, angVel0, linVel1, angVel1, li0, ai0, li1, ai1, appliedImpulse); + + linImpulse0 = V3ScaleAdd(li0, FLoad(hdr->dominance0), linImpulse0); + angImpulse0 = V3ScaleAdd(ai0, FLoad(hdr->angDom0), angImpulse0); + linImpulse1 = V3NegScaleSub(li1, FLoad(hdr->dominance1), linImpulse1); + angImpulse1 = V3NegScaleSub(ai1, FLoad(hdr->angDom1), angImpulse1); + } + + if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK) + { + V3StoreA(linVel0, desc.bodyA->linearVelocity); + V3StoreA(angVel0, desc.bodyA->angularState); + } + else + PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, linImpulse0, angImpulse0); + + if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK) + { + V3StoreA(linVel1, desc.bodyB->linearVelocity); + V3StoreA(angVel1, desc.bodyB->angularState); + } + else + PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, linImpulse1, angImpulse1); + + PX_ASSERT(currPtr == last); +} + +void solveExtFriction(const PxSolverConstraintDesc& desc, SolverContext& /*cache*/) +{ + Vec3V linVel0, angVel0, linVel1, angVel1; + + if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK) + { + linVel0 = V3LoadA(desc.bodyA->linearVelocity); + angVel0 = V3LoadA(desc.bodyA->angularState); + } + else + { + Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationA, desc.linkIndexA); + linVel0 = v.linear; + angVel0 = v.angular; + } + + if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK) + { + linVel1 = V3LoadA(desc.bodyB->linearVelocity); + angVel1 = V3LoadA(desc.bodyB->angularState); + } + else + { + Cm::SpatialVectorV v = PxcFsGetVelocity(*desc.articulationB, desc.linkIndexB); + linVel1 = v.linear; + angVel1 = v.angular; + } + + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc.constraint; + + const PxU8* PX_RESTRICT last = currPtr + desc.constraintLengthOver16*16; + + Vec3V linImpulse0 = V3Zero(), linImpulse1 = V3Zero(), angImpulse0 = V3Zero(), angImpulse1 = V3Zero(); + + while(currPtr < last) + { + + const SolverFrictionHeader* PX_RESTRICT frictionHeader = reinterpret_cast<SolverFrictionHeader*>(currPtr); + currPtr += sizeof(SolverFrictionHeader); + PxF32* appliedImpulse = reinterpret_cast<PxF32*>(currPtr); + currPtr += frictionHeader->getAppliedForcePaddingSize(); + + SolverContactFrictionExt* PX_RESTRICT frictions = reinterpret_cast<SolverContactFrictionExt*>(currPtr); + const PxU32 numFrictionConstr = frictionHeader->numFrictionConstr; + + currPtr += numFrictionConstr * sizeof(SolverContactFrictionExt); + const FloatV staticFriction = frictionHeader->getStaticFriction(); + + + Vec3V li0 = V3Zero(), li1 = V3Zero(), ai0 = V3Zero(), ai1 = V3Zero(); + + PxU32 numNormalConstr = frictionHeader->numNormalConstr; + PxU32 nbFrictionsPerPoint = numFrictionConstr/numNormalConstr; + + + + + for(PxU32 i = 0, j = 0; i < numFrictionConstr; j++) + { + for(PxU32 p=0;p<nbFrictionsPerPoint;p++, i++) + { + SolverContactFrictionExt& f = frictions[i]; + Ps::prefetchLine(&frictions[i+1]); + + + const Vec3V t0 = Vec3V_From_Vec4V(f.normalXYZ_appliedForceW); + const Vec3V raXt0 = Vec3V_From_Vec4V(f.raXnXYZ_velMultiplierW); + const Vec3V rbXt0 = Vec3V_From_Vec4V(f.rbXnXYZ_biasW); + + const FloatV appliedForce = V4GetW(f.normalXYZ_appliedForceW); + const FloatV velMultiplier = V4GetW(f.raXnXYZ_velMultiplierW); + const FloatV targetVel = FLoad(f.targetVel); + + const FloatV normalImpulse = FLoad(appliedImpulse[j]);//contacts[f.contactIndex].getAppliedForce(); + const FloatV maxFriction = FMul(staticFriction, normalImpulse); + const FloatV nMaxFriction = FNeg(maxFriction); + + //Compute the normal velocity of the constraint. + + Vec3V rVel = V3MulAdd(linVel0, t0, V3Mul(angVel0, raXt0)); + rVel = V3Sub(rVel, V3MulAdd(linVel1, t0, V3Mul(angVel1, rbXt0))); + const FloatV t0Vel = FAdd(V3SumElems(rVel), targetVel); + + FloatV deltaF = FNeg(FMul(t0Vel, velMultiplier)); + FloatV newForce = FAdd(appliedForce, deltaF); + newForce = FClamp(newForce, nMaxFriction, maxFriction); + deltaF = FSub(newForce, appliedForce); + + linVel0 = V3ScaleAdd(f.linDeltaVA, deltaF, linVel0); + angVel0 = V3ScaleAdd(f.angDeltaVA, deltaF, angVel0); + linVel1 = V3ScaleAdd(f.linDeltaVB, deltaF, linVel1); + angVel1 = V3ScaleAdd(f.angDeltaVB, deltaF, angVel1); + + li0 = V3ScaleAdd(t0, deltaF, li0); ai0 = V3ScaleAdd(raXt0, deltaF, ai0); + li1 = V3ScaleAdd(t0, deltaF, li1); ai1 = V3ScaleAdd(rbXt0, deltaF, ai1); + + f.setAppliedForce(newForce); + } + } + + + linImpulse0 = V3ScaleAdd(li0, FLoad(frictionHeader->invMass0D0), linImpulse0); + angImpulse0 = V3ScaleAdd(ai0, FLoad(frictionHeader->angDom0), angImpulse0); + linImpulse1 = V3NegScaleSub(li1, FLoad(frictionHeader->invMass1D1), linImpulse1); + angImpulse1 = V3NegScaleSub(ai1, FLoad(frictionHeader->angDom1), angImpulse1); + } + + if(desc.linkIndexA == PxSolverConstraintDesc::NO_LINK) + { + V3StoreA(linVel0, desc.bodyA->linearVelocity); + V3StoreA(angVel0, desc.bodyA->angularState); + } + else + PxcFsApplyImpulse(*desc.articulationA, desc.linkIndexA, linImpulse0, angImpulse0); + + if(desc.linkIndexB == PxSolverConstraintDesc::NO_LINK) + { + V3StoreA(linVel1, desc.bodyB->linearVelocity); + V3StoreA(angVel1, desc.bodyB->angularState); + } + else + PxcFsApplyImpulse(*desc.articulationB, desc.linkIndexB, linImpulse1, angImpulse1); + + PX_ASSERT(currPtr == last); + +} + +void solveExtFrictionBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExtFriction(desc[a], cache); + } +} + +void solveExtFrictionConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExtFriction(desc[a], cache); + } +} + +void solveExtFrictionBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExtFriction(desc[a], cache); + } +} + + +void solveConcludeExtContactCoulomb (const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + solveExtContactCoulomb(desc, cache); + concludeContactCoulomb(desc, cache); +} + +void solveExtContactCoulombBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExtContactCoulomb(desc[a], cache); + } +} + +void solveExtContactCoulombConcludeBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + solveExtContactCoulomb(desc[a], cache); + concludeContactCoulomb(desc[a], cache); + } +} + +void solveExtContactCoulombBlockWriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 constraintCount, SolverContext& cache) +{ + for(PxU32 a = 0; a < constraintCount; ++a) + { + PxSolverBodyData& bd0 = cache.solverBodyArray[desc[a].linkIndexA != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyADataIndex]; + PxSolverBodyData& bd1 = cache.solverBodyArray[desc[a].linkIndexB != PxSolverConstraintDesc::NO_LINK ? 0 : desc[a].bodyBDataIndex]; + + solveExtContactCoulomb(desc[a], cache); + writeBackContactCoulomb(desc[a], cache, bd0, bd1); + } + if(cache.mThresholdStreamIndex > 0) + { + //Not enough space to write 4 more thresholds back! + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + + +void solveConcludeContactCoulomb (const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + solveContactCoulomb(desc, cache); + concludeContactCoulomb(desc, cache); +} + + +void solveConcludeContactCoulomb_BStatic (const PxSolverConstraintDesc& desc, SolverContext& cache) +{ + solveContactCoulomb_BStatic(desc, cache); + concludeContactCoulomb(desc, cache); +} + + + +} + +} + +#endif //PX_SUPPORT_SIMD diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraintsBlock.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraintsBlock.cpp new file mode 100644 index 00000000..c6d7288e --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySolverPFConstraintsBlock.cpp @@ -0,0 +1,985 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "foundation/PxPreprocessor.h" +#include "PsVecMath.h" +#include "PsFPU.h" +#include "CmPhysXCommon.h" +#include "DySolverBody.h" +#include "DySolverContactPF4.h" +#include "DySolverConstraint1D.h" +#include "DySolverConstraintDesc.h" +#include "DyThresholdTable.h" +#include "DySolverContext.h" +#include "PsUtilities.h" +#include "DyConstraint.h" +#include "PsAtomic.h" +#include "DySolverContact.h" + +namespace physx +{ + +namespace Dy +{ + +static void solveContactCoulomb4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/) +{ + PxSolverBody& b00 = *desc[0].bodyA; + PxSolverBody& b01 = *desc[0].bodyB; + PxSolverBody& b10 = *desc[1].bodyA; + PxSolverBody& b11 = *desc[1].bodyB; + PxSolverBody& b20 = *desc[2].bodyA; + PxSolverBody& b21 = *desc[2].bodyB; + PxSolverBody& b30 = *desc[3].bodyA; + PxSolverBody& b31 = *desc[3].bodyB; + + //We'll need this. + const Vec4V vZero = V4Zero(); + + Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x); + Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x); + Vec4V angState00 = V4LoadA(&b00.angularState.x); + Vec4V angState01 = V4LoadA(&b01.angularState.x); + + Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x); + Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x); + Vec4V angState10 = V4LoadA(&b10.angularState.x); + Vec4V angState11 = V4LoadA(&b11.angularState.x); + + Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x); + Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x); + Vec4V angState20 = V4LoadA(&b20.angularState.x); + Vec4V angState21 = V4LoadA(&b21.angularState.x); + + Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x); + Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x); + Vec4V angState30 = V4LoadA(&b30.angularState.x); + Vec4V angState31 = V4LoadA(&b31.angularState.x); + + + Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3; + Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3; + Vec4V angState0T0, angState0T1, angState0T2, angState0T3; + Vec4V angState1T0, angState1T1, angState1T2, angState1T3; + + + PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3); + PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3); + PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3); + PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3); + + + + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc[0].constraint; + + SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr); + + const PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset; + + //const PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]); + + + //TODO - can I avoid this many tests??? + while(currPtr < last) + { + + SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr); + + Vec4V* appliedForceBuffer = reinterpret_cast<Vec4V*>(currPtr + hdr->frictionOffset + sizeof(SolverFrictionHeader4)); + + //PX_ASSERT((PxU8*)appliedForceBuffer < endPtr); + + currPtr = reinterpret_cast<PxU8*>(hdr + 1); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + SolverContact4Dynamic* PX_RESTRICT contacts = reinterpret_cast<SolverContact4Dynamic*>(currPtr); + //const Vec4V dominance1 = V4Neg(__dominance1); + + currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr); + + const Vec4V invMass0D0 = hdr->invMassADom; + const Vec4V invMass1D1 = hdr->invMassBDom; + const Vec4V angD0 = hdr->angD0; + const Vec4V angD1 = hdr->angD1; + + const Vec4V normalT0 = hdr->normalX; + const Vec4V normalT1 = hdr->normalY; + const Vec4V normalT2 = hdr->normalZ; + + const Vec4V __normalVel1 = V4Mul(linVel0T0, normalT0); + const Vec4V __normalVel3 = V4Mul(linVel1T0, normalT0); + const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalT1, __normalVel1); + const Vec4V _normalVel3 = V4MulAdd(linVel1T1, normalT1, __normalVel3); + + Vec4V normalVel1 = V4MulAdd(linVel0T2, normalT2, _normalVel1); + Vec4V normalVel3 = V4MulAdd(linVel1T2, normalT2, _normalVel3); + + Vec4V accumDeltaF = vZero; + + for(PxU32 i=0;i<numNormalConstr;i++) + { + SolverContact4Dynamic& c = contacts[i]; + Ps::prefetchLine((&contacts[i+1])); + Ps::prefetchLine((&contacts[i+1]), 128); + Ps::prefetchLine((&contacts[i+1]), 256); + Ps::prefetchLine((&contacts[i+1]), 384); + + const Vec4V appliedForce = c.appliedForce; + const Vec4V velMultiplier = c.velMultiplier; + + const Vec4V targetVel = c.targetVelocity; + const Vec4V scaledBias = c.scaledBias; + const Vec4V maxImpulse = c.maxImpulse; + + const Vec4V raXnT0 = c.raXnX; + const Vec4V raXnT1 = c.raXnY; + const Vec4V raXnT2 = c.raXnZ; + const Vec4V rbXnT0 = c.rbXnX; + const Vec4V rbXnT1 = c.rbXnY; + const Vec4V rbXnT2 = c.rbXnZ; + + + const Vec4V __normalVel2 = V4Mul(raXnT0, angState0T0); + const Vec4V __normalVel4 = V4Mul(rbXnT0, angState1T0); + + + const Vec4V _normalVel2 = V4MulAdd(raXnT1, angState0T1, __normalVel2); + const Vec4V _normalVel4 = V4MulAdd(rbXnT1, angState1T1, __normalVel4); + + + const Vec4V normalVel2 = V4MulAdd(raXnT2, angState0T2, _normalVel2); + const Vec4V normalVel4 = V4MulAdd(rbXnT2, angState1T2, _normalVel4); + + const Vec4V biasedErr = V4MulAdd(targetVel, velMultiplier, V4Neg(scaledBias)); + + //Linear component - normal * invMass_dom + + const Vec4V _normalVel(V4Add(normalVel1, normalVel2)); + const Vec4V __normalVel(V4Add(normalVel3, normalVel4)); + + const Vec4V normalVel = V4Sub(_normalVel, __normalVel ); + + const Vec4V _deltaF = V4NegMulSub(normalVel, velMultiplier, biasedErr); + const Vec4V nAppliedForce = V4Neg(appliedForce); + const Vec4V _deltaF2 = V4Max(_deltaF, nAppliedForce); + const Vec4V _newAppliedForce(V4Add(appliedForce, _deltaF2)); + const Vec4V newAppliedForce = V4Min(_newAppliedForce, maxImpulse); + const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce); + + normalVel1 = V4MulAdd(invMass0D0, deltaF, normalVel1); + normalVel3 = V4NegMulSub(invMass1D1, deltaF, normalVel3); + + accumDeltaF = V4Add(deltaF, accumDeltaF); + + const Vec4V deltaFAng0 = V4Mul(angD0, deltaF); + const Vec4V deltaFAng1 = V4Mul(angD1, deltaF); + + angState0T0 = V4MulAdd(raXnT0, deltaFAng0, angState0T0); + angState1T0 = V4NegMulSub(rbXnT0, deltaFAng1, angState1T0); + + angState0T1 = V4MulAdd(raXnT1, deltaFAng0, angState0T1); + angState1T1 = V4NegMulSub(rbXnT1, deltaFAng1, angState1T1); + + angState0T2 = V4MulAdd(raXnT2, deltaFAng0, angState0T2); + angState1T2 = V4NegMulSub(rbXnT2, deltaFAng1, angState1T2); + + c.appliedForce = newAppliedForce; + appliedForceBuffer[i] = newAppliedForce; + } + + const Vec4V accumDeltaF0 = V4Mul(accumDeltaF, invMass0D0); + const Vec4V accumDeltaF1 = V4Mul(accumDeltaF, invMass1D1); + + linVel0T0 = V4MulAdd(normalT0, accumDeltaF0, linVel0T0); + linVel1T0 = V4NegMulSub(normalT0, accumDeltaF1, linVel1T0); + linVel0T1 = V4MulAdd(normalT1, accumDeltaF0, linVel0T1); + linVel1T1 = V4NegMulSub(normalT1, accumDeltaF1, linVel1T1); + linVel0T2 = V4MulAdd(normalT2, accumDeltaF0, linVel0T2); + linVel1T2 = V4NegMulSub(normalT2, accumDeltaF1, linVel1T2); + } + + PX_ASSERT(currPtr == last); + + + //KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to + //store the bodies' progress counters. + + PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30); + PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31); + PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30); + PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31); + + + // Write back + V4StoreA(linVel00, &b00.linearVelocity.x); + V4StoreA(linVel10, &b10.linearVelocity.x); + V4StoreA(linVel20, &b20.linearVelocity.x); + V4StoreA(linVel30, &b30.linearVelocity.x); + + V4StoreA(linVel01, &b01.linearVelocity.x); + V4StoreA(linVel11, &b11.linearVelocity.x); + V4StoreA(linVel21, &b21.linearVelocity.x); + V4StoreA(linVel31, &b31.linearVelocity.x); + + V4StoreA(angState00, &b00.angularState.x); + V4StoreA(angState10, &b10.angularState.x); + V4StoreA(angState20, &b20.angularState.x); + V4StoreA(angState30, &b30.angularState.x); + + V4StoreA(angState01, &b01.angularState.x); + V4StoreA(angState11, &b11.angularState.x); + V4StoreA(angState21, &b21.angularState.x); + V4StoreA(angState31, &b31.angularState.x); +} + + +static void solveContactCoulomb4_StaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/) +{ + PxSolverBody& b00 = *desc[0].bodyA; + PxSolverBody& b10 = *desc[1].bodyA; + PxSolverBody& b20 = *desc[2].bodyA; + PxSolverBody& b30 = *desc[3].bodyA; + + //We'll need this. + const Vec4V vZero = V4Zero(); + + Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x); + Vec4V angState00 = V4LoadA(&b00.angularState.x); + + Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x); + Vec4V angState10 = V4LoadA(&b10.angularState.x); + + Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x); + Vec4V angState20 = V4LoadA(&b20.angularState.x); + + Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x); + Vec4V angState30 = V4LoadA(&b30.angularState.x); + + + Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3; + Vec4V angState0T0, angState0T1, angState0T2, angState0T3; + + + PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3); + PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3); + + + //hopefully pointer aliasing doesn't bite. + PxU8* PX_RESTRICT currPtr = desc[0].constraint; + + SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr); + + const PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset; + + + //TODO - can I avoid this many tests??? + while(currPtr < last) + { + + SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverContactCoulombHeader4*>(currPtr); + + Vec4V* appliedForceBuffer = reinterpret_cast<Vec4V*>(currPtr + hdr->frictionOffset + sizeof(SolverFrictionHeader4)); + + currPtr = reinterpret_cast<PxU8*>(hdr + 1); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + SolverContact4Base* PX_RESTRICT contacts = reinterpret_cast<SolverContact4Base*>(currPtr); + + currPtr = reinterpret_cast<PxU8*>(contacts + numNormalConstr); + + const Vec4V invMass0D0 = hdr->invMassADom; + const Vec4V angD0 = hdr->angD0; + + const Vec4V normalT0 = hdr->normalX; + const Vec4V normalT1 = hdr->normalY; + const Vec4V normalT2 = hdr->normalZ; + + const Vec4V __normalVel1 = V4Mul(linVel0T0, normalT0); + const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalT1, __normalVel1); + + Vec4V normalVel1 = V4MulAdd(linVel0T2, normalT2, _normalVel1); + + Vec4V accumDeltaF = vZero; + + for(PxU32 i=0;i<numNormalConstr;i++) + { + SolverContact4Base& c = contacts[i]; + Ps::prefetchLine((&contacts[i+1])); + Ps::prefetchLine((&contacts[i+1]), 128); + Ps::prefetchLine((&contacts[i+1]), 256); + + const Vec4V appliedForce = c.appliedForce; + const Vec4V velMultiplier = c.velMultiplier; + + const Vec4V targetVel = c.targetVelocity; + const Vec4V scaledBias = c.scaledBias; + const Vec4V maxImpulse = c.maxImpulse; + + const Vec4V raXnT0 = c.raXnX; + const Vec4V raXnT1 = c.raXnY; + const Vec4V raXnT2 = c.raXnZ; + + + const Vec4V __normalVel2 = V4Mul(raXnT0, angState0T0); + + const Vec4V _normalVel2 = V4MulAdd(raXnT1, angState0T1, __normalVel2); + + const Vec4V normalVel2 = V4MulAdd(raXnT2, angState0T2, _normalVel2); + + const Vec4V biasedErr = V4MulAdd(targetVel, velMultiplier, V4Neg(scaledBias)); + + //Linear component - normal * invMass_dom + + const Vec4V normalVel(V4Add(normalVel1, normalVel2)); + + const Vec4V _deltaF = V4NegMulSub(normalVel, velMultiplier, biasedErr); + const Vec4V nAppliedForce = V4Neg(appliedForce); + + const Vec4V _deltaF2 = V4Max(_deltaF, nAppliedForce); + + const Vec4V _newAppliedForce(V4Add(appliedForce, _deltaF2)); + const Vec4V newAppliedForce = V4Min(_newAppliedForce, maxImpulse); + const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce); + const Vec4V deltaAngF = V4Mul(deltaF, angD0); + + normalVel1 = V4MulAdd(invMass0D0, deltaF, normalVel1); + + accumDeltaF = V4Add(deltaF, accumDeltaF); + + angState0T0 = V4MulAdd(raXnT0, deltaAngF, angState0T0); + angState0T1 = V4MulAdd(raXnT1, deltaAngF, angState0T1); + angState0T2 = V4MulAdd(raXnT2, deltaAngF, angState0T2); + + c.appliedForce = newAppliedForce; + appliedForceBuffer[i] = newAppliedForce; + } + const Vec4V scaledAccumDeltaF = V4Mul(accumDeltaF, invMass0D0); + linVel0T0 = V4MulAdd(normalT0, scaledAccumDeltaF, linVel0T0); + linVel0T1 = V4MulAdd(normalT1, scaledAccumDeltaF, linVel0T1); + linVel0T2 = V4MulAdd(normalT2, scaledAccumDeltaF, linVel0T2); + } + + PX_ASSERT(currPtr == last); + + //KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to + //store the bodies' progress counters. + + PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30); + PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30); + + // Write back + // Write back + V4StoreA(linVel00, &b00.linearVelocity.x); + V4StoreA(linVel10, &b10.linearVelocity.x); + V4StoreA(linVel20, &b20.linearVelocity.x); + V4StoreA(linVel30, &b30.linearVelocity.x); + + V4StoreA(angState00, &b00.angularState.x); + V4StoreA(angState10, &b10.angularState.x); + V4StoreA(angState20, &b20.angularState.x); + V4StoreA(angState30, &b30.angularState.x); +} + +static void solveFriction4_Block(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/) +{ + PxSolverBody& b00 = *desc[0].bodyA; + PxSolverBody& b01 = *desc[0].bodyB; + PxSolverBody& b10 = *desc[1].bodyA; + PxSolverBody& b11 = *desc[1].bodyB; + PxSolverBody& b20 = *desc[2].bodyA; + PxSolverBody& b21 = *desc[2].bodyB; + PxSolverBody& b30 = *desc[3].bodyA; + PxSolverBody& b31 = *desc[3].bodyB; + + + Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x); + Vec4V linVel01 = V4LoadA(&b01.linearVelocity.x); + Vec4V angState00 = V4LoadA(&b00.angularState.x); + Vec4V angState01 = V4LoadA(&b01.angularState.x); + + Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x); + Vec4V linVel11 = V4LoadA(&b11.linearVelocity.x); + Vec4V angState10 = V4LoadA(&b10.angularState.x); + Vec4V angState11 = V4LoadA(&b11.angularState.x); + + Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x); + Vec4V linVel21 = V4LoadA(&b21.linearVelocity.x); + Vec4V angState20 = V4LoadA(&b20.angularState.x); + Vec4V angState21 = V4LoadA(&b21.angularState.x); + + Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x); + Vec4V linVel31 = V4LoadA(&b31.linearVelocity.x); + Vec4V angState30 = V4LoadA(&b30.angularState.x); + Vec4V angState31 = V4LoadA(&b31.angularState.x); + + + Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3; + Vec4V linVel1T0, linVel1T1, linVel1T2, linVel1T3; + Vec4V angState0T0, angState0T1, angState0T2, angState0T3; + Vec4V angState1T0, angState1T1, angState1T2, angState1T3; + + + PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3); + PX_TRANSPOSE_44(linVel01, linVel11, linVel21, linVel31, linVel1T0, linVel1T1, linVel1T2, linVel1T3); + PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3); + PX_TRANSPOSE_44(angState01, angState11, angState21, angState31, angState1T0, angState1T1, angState1T2, angState1T3); + + PxU8* PX_RESTRICT currPtr = desc[0].constraint; + PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]); + + + while(currPtr < endPtr) + { + SolverFrictionHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverFrictionHeader4*>(currPtr); + + currPtr = reinterpret_cast<PxU8*>(hdr + 1); + + Vec4V* appliedImpulses = reinterpret_cast<Vec4V*>(currPtr); + + currPtr += hdr->numNormalConstr * sizeof(Vec4V); + + Ps::prefetchLine(currPtr, 128); + Ps::prefetchLine(currPtr,256); + Ps::prefetchLine(currPtr,384); + + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + SolverFriction4Dynamic* PX_RESTRICT frictions = reinterpret_cast<SolverFriction4Dynamic*>(currPtr); + + currPtr = reinterpret_cast<PxU8*>(frictions + hdr->numFrictionConstr); + + const PxU32 maxFrictionConstr = numFrictionConstr; + + const Vec4V staticFric = hdr->staticFriction; + + const Vec4V invMass0D0 = hdr->invMassADom; + const Vec4V invMass1D1 = hdr->invMassBDom; + + const Vec4V angD0 = hdr->angD0; + const Vec4V angD1 = hdr->angD1; + + for(PxU32 i=0;i<maxFrictionConstr;i++) + { + SolverFriction4Dynamic& f = frictions[i]; + Ps::prefetchLine((&f)+1); + Ps::prefetchLine((&f)+1,128); + Ps::prefetchLine((&f)+1,256); + Ps::prefetchLine((&f)+1,384); + + const Vec4V appliedImpulse = appliedImpulses[i>>hdr->frictionPerContact]; + + const Vec4V maxFriction = V4Mul(staticFric, appliedImpulse); + + const Vec4V nMaxFriction = V4Neg(maxFriction); + + const Vec4V normalX = f.normalX; + const Vec4V normalY = f.normalY; + const Vec4V normalZ = f.normalZ; + + const Vec4V raXnX = f.raXnX; + const Vec4V raXnY = f.raXnY; + const Vec4V raXnZ = f.raXnZ; + + const Vec4V rbXnX = f.rbXnX; + const Vec4V rbXnY = f.rbXnY; + const Vec4V rbXnZ = f.rbXnZ; + + const Vec4V appliedForce(f.appliedForce); + const Vec4V velMultiplier(f.velMultiplier); + const Vec4V targetVel(f.targetVelocity); + + //4 x 4 Dot3 products encoded as 8 M44 transposes, 4 MulV and 8 MulAdd ops + + const Vec4V __normalVel1 = V4Mul(linVel0T0, normalX); + const Vec4V __normalVel2 = V4Mul(raXnX, angState0T0); + const Vec4V __normalVel3 = V4Mul(linVel1T0, normalX); + const Vec4V __normalVel4 = V4Mul(rbXnX, angState1T0); + + const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalY, __normalVel1); + const Vec4V _normalVel2 = V4MulAdd(raXnY, angState0T1, __normalVel2); + const Vec4V _normalVel3 = V4MulAdd(linVel1T1, normalY, __normalVel3); + const Vec4V _normalVel4 = V4MulAdd(rbXnY, angState1T1, __normalVel4); + + const Vec4V normalVel1 = V4MulAdd(linVel0T2, normalZ, _normalVel1); + const Vec4V normalVel2 = V4MulAdd(raXnZ, angState0T2, _normalVel2); + const Vec4V normalVel3 = V4MulAdd(linVel1T2, normalZ, _normalVel3); + const Vec4V normalVel4 = V4MulAdd(rbXnZ, angState1T2, _normalVel4); + + + const Vec4V _normalVel = V4Add(normalVel1, normalVel2); + const Vec4V __normalVel = V4Add(normalVel3, normalVel4); + + const Vec4V normalVel = V4Sub(_normalVel, __normalVel ); + + const Vec4V tmp = V4NegMulSub(targetVel, velMultiplier, appliedForce); + Vec4V newAppliedForce = V4MulAdd(normalVel, velMultiplier, tmp); + newAppliedForce = V4Clamp(newAppliedForce,nMaxFriction, maxFriction); + const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce); + + const Vec4V deltaLinF0 = V4Mul(invMass0D0, deltaF); + const Vec4V deltaLinF1 = V4Mul(invMass1D1, deltaF); + + const Vec4V deltaAngF0 = V4Mul(angD0, deltaF); + const Vec4V deltaAngF1 = V4Mul(angD1, deltaF); + + + linVel0T0 = V4MulAdd(normalX, deltaLinF0, linVel0T0); + linVel1T0 = V4NegMulSub(normalX, deltaLinF1, linVel1T0); + angState0T0 = V4MulAdd(raXnX, deltaAngF0, angState0T0); + angState1T0 = V4NegMulSub(rbXnX, deltaAngF1, angState1T0); + + linVel0T1 = V4MulAdd(normalY, deltaLinF0, linVel0T1); + linVel1T1 = V4NegMulSub(normalY, deltaLinF1, linVel1T1); + angState0T1 = V4MulAdd(raXnY, deltaAngF0, angState0T1); + angState1T1 = V4NegMulSub(rbXnY, deltaAngF1, angState1T1); + + linVel0T2 = V4MulAdd(normalZ, deltaLinF0, linVel0T2); + linVel1T2 = V4NegMulSub(normalZ, deltaLinF1, linVel1T2); + angState0T2 = V4MulAdd(raXnZ, deltaAngF0, angState0T2); + angState1T2 = V4NegMulSub(rbXnZ, deltaAngF1, angState1T2); + + f.appliedForce = newAppliedForce; + } + } + + PX_ASSERT(currPtr == endPtr); + + //KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to + //store the bodies' progress counters. + + PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30); + PX_TRANSPOSE_44(linVel1T0, linVel1T1, linVel1T2, linVel1T3, linVel01, linVel11, linVel21, linVel31); + PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30); + PX_TRANSPOSE_44(angState1T0, angState1T1, angState1T2, angState1T3, angState01, angState11, angState21, angState31); + + + // Write back + // Write back + V4StoreA(linVel00, &b00.linearVelocity.x); + V4StoreA(linVel10, &b10.linearVelocity.x); + V4StoreA(linVel20, &b20.linearVelocity.x); + V4StoreA(linVel30, &b30.linearVelocity.x); + + V4StoreA(linVel01, &b01.linearVelocity.x); + V4StoreA(linVel11, &b11.linearVelocity.x); + V4StoreA(linVel21, &b21.linearVelocity.x); + V4StoreA(linVel31, &b31.linearVelocity.x); + + V4StoreA(angState00, &b00.angularState.x); + V4StoreA(angState10, &b10.angularState.x); + V4StoreA(angState20, &b20.angularState.x); + V4StoreA(angState30, &b30.angularState.x); + + V4StoreA(angState01, &b01.angularState.x); + V4StoreA(angState11, &b11.angularState.x); + V4StoreA(angState21, &b21.angularState.x); + V4StoreA(angState31, &b31.angularState.x); + +} + + +static void solveFriction4_StaticBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, SolverContext& /*cache*/) +{ + + PxSolverBody& b00 = *desc[0].bodyA; + PxSolverBody& b10 = *desc[1].bodyA; + PxSolverBody& b20 = *desc[2].bodyA; + PxSolverBody& b30 = *desc[3].bodyA; + + + Vec4V linVel00 = V4LoadA(&b00.linearVelocity.x); + Vec4V angState00 = V4LoadA(&b00.angularState.x); + + Vec4V linVel10 = V4LoadA(&b10.linearVelocity.x); + Vec4V angState10 = V4LoadA(&b10.angularState.x); + + Vec4V linVel20 = V4LoadA(&b20.linearVelocity.x); + Vec4V angState20 = V4LoadA(&b20.angularState.x); + + Vec4V linVel30 = V4LoadA(&b30.linearVelocity.x); + Vec4V angState30 = V4LoadA(&b30.angularState.x); + + + Vec4V linVel0T0, linVel0T1, linVel0T2, linVel0T3; + Vec4V angState0T0, angState0T1, angState0T2, angState0T3; + + + PX_TRANSPOSE_44(linVel00, linVel10, linVel20, linVel30, linVel0T0, linVel0T1, linVel0T2, linVel0T3); + PX_TRANSPOSE_44(angState00, angState10, angState20, angState30, angState0T0, angState0T1, angState0T2, angState0T3); + + PxU8* PX_RESTRICT currPtr = desc[0].constraint; + PxU8* PX_RESTRICT endPtr = desc[0].constraint + getConstraintLength(desc[0]); + + + while(currPtr < endPtr) + { + SolverFrictionHeader4* PX_RESTRICT hdr = reinterpret_cast<SolverFrictionHeader4*>(currPtr); + + currPtr = reinterpret_cast<PxU8*>(hdr + 1); + + Vec4V* appliedImpulses = reinterpret_cast<Vec4V*>(currPtr); + + currPtr += hdr->numNormalConstr * sizeof(Vec4V); + + Ps::prefetchLine(currPtr, 128); + Ps::prefetchLine(currPtr,256); + Ps::prefetchLine(currPtr,384); + + const PxU32 numFrictionConstr = hdr->numFrictionConstr; + + SolverFriction4Base* PX_RESTRICT frictions = reinterpret_cast<SolverFriction4Base*>(currPtr); + + currPtr = reinterpret_cast<PxU8*>(frictions + hdr->numFrictionConstr); + + const PxU32 maxFrictionConstr = numFrictionConstr; + + const Vec4V staticFric = hdr->staticFriction; + + const Vec4V invMass0D0 = hdr->invMassADom; + const Vec4V angD0 = hdr->angD0; + + for(PxU32 i=0;i<maxFrictionConstr;i++) + { + SolverFriction4Base& f = frictions[i]; + Ps::prefetchLine((&f)+1); + Ps::prefetchLine((&f)+1,128); + Ps::prefetchLine((&f)+1,256); + + const Vec4V appliedImpulse = appliedImpulses[i>>hdr->frictionPerContact]; + + const Vec4V maxFriction = V4Mul(staticFric, appliedImpulse); + + const Vec4V nMaxFriction = V4Neg(maxFriction); + + const Vec4V normalX = f.normalX; + const Vec4V normalY = f.normalY; + const Vec4V normalZ = f.normalZ; + + const Vec4V raXnX = f.raXnX; + const Vec4V raXnY = f.raXnY; + const Vec4V raXnZ = f.raXnZ; + + const Vec4V appliedForce(f.appliedForce); + const Vec4V velMultiplier(f.velMultiplier); + const Vec4V targetVel(f.targetVelocity); + + //4 x 4 Dot3 products encoded as 8 M44 transposes, 4 MulV and 8 MulAdd ops + + const Vec4V __normalVel1 = V4Mul(linVel0T0, normalX); + const Vec4V __normalVel2 = V4Mul(raXnX, angState0T0); + + const Vec4V _normalVel1 = V4MulAdd(linVel0T1, normalY, __normalVel1); + const Vec4V _normalVel2 = V4MulAdd(raXnY, angState0T1, __normalVel2); + + const Vec4V normalVel1 = V4MulAdd(linVel0T2, normalZ, _normalVel1); + const Vec4V normalVel2 = V4MulAdd(raXnZ, angState0T2, _normalVel2); + + const Vec4V delLinVel00 = V4Mul(normalX, invMass0D0); + + const Vec4V delLinVel10 = V4Mul(normalY, invMass0D0); + + const Vec4V normalVel = V4Add(normalVel1, normalVel2); + + const Vec4V delLinVel20 = V4Mul(normalZ, invMass0D0); + + const Vec4V tmp = V4NegMulSub(targetVel, velMultiplier, appliedForce); + + Vec4V newAppliedForce = V4MulAdd(normalVel, velMultiplier, tmp); + newAppliedForce = V4Clamp(newAppliedForce,nMaxFriction, maxFriction); + const Vec4V deltaF = V4Sub(newAppliedForce, appliedForce); + + const Vec4V deltaAngF0 = V4Mul(angD0, deltaF); + + linVel0T0 = V4MulAdd(delLinVel00, deltaF, linVel0T0); + angState0T0 = V4MulAdd(raXnX, deltaAngF0, angState0T0); + + linVel0T1 = V4MulAdd(delLinVel10, deltaF, linVel0T1); + angState0T1 = V4MulAdd(raXnY, deltaAngF0, angState0T1); + + linVel0T2 = V4MulAdd(delLinVel20, deltaF, linVel0T2); + angState0T2 = V4MulAdd(raXnZ, deltaAngF0, angState0T2); + + f.appliedForce = newAppliedForce; + } + } + + PX_ASSERT(currPtr == endPtr); + + //KS - we need to use PX_TRANSPOSE_44 here instead of the 34_43 variants because the W components are being used to + //store the bodies' progress counters. + + PX_TRANSPOSE_44(linVel0T0, linVel0T1, linVel0T2, linVel0T3, linVel00, linVel10, linVel20, linVel30); + PX_TRANSPOSE_44(angState0T0, angState0T1, angState0T2, angState0T3, angState00, angState10, angState20, angState30); + + // Write back + // Write back + V4StoreA(linVel00, &b00.linearVelocity.x); + V4StoreA(linVel10, &b10.linearVelocity.x); + V4StoreA(linVel20, &b20.linearVelocity.x); + V4StoreA(linVel30, &b30.linearVelocity.x); + + V4StoreA(angState00, &b00.angularState.x); + V4StoreA(angState10, &b10.angularState.x); + V4StoreA(angState20, &b20.angularState.x); + V4StoreA(angState30, &b30.angularState.x); +} + +static void concludeContactCoulomb4(const PxSolverConstraintDesc* desc, SolverContext& /*cache*/) +{ + PxU8* PX_RESTRICT cPtr = desc[0].constraint; + + const Vec4V zero = V4Zero(); + + const SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr); + PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset; + + PxU32 pointStride = firstHeader->type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContact4Dynamic) : sizeof(SolverContact4Base); + + while(cPtr < last) + { + const SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr); + cPtr += sizeof(SolverContactCoulombHeader4); + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + //if(cPtr < last) + //Ps::prefetchLine(cPtr, 512); + Ps::prefetchLine(cPtr,128); + Ps::prefetchLine(cPtr,256); + Ps::prefetchLine(cPtr,384); + + for(PxU32 i=0;i<numNormalConstr;i++) + { + SolverContact4Base *c = reinterpret_cast<SolverContact4Base*>(cPtr); + cPtr += pointStride; + c->scaledBias = V4Max(c->scaledBias, zero); + } + } + PX_ASSERT(cPtr == last); +} + +void writeBackContactCoulomb4(const PxSolverConstraintDesc* desc, SolverContext& cache, + const PxSolverBodyData** PX_RESTRICT bd0, const PxSolverBodyData** PX_RESTRICT bd1) +{ + Vec4V normalForceV = V4Zero(); + PxU8* PX_RESTRICT cPtr = desc[0].constraint; + PxReal* PX_RESTRICT vForceWriteback0 = reinterpret_cast<PxReal*>(desc[0].writeBack); + PxReal* PX_RESTRICT vForceWriteback1 = reinterpret_cast<PxReal*>(desc[1].writeBack); + PxReal* PX_RESTRICT vForceWriteback2 = reinterpret_cast<PxReal*>(desc[2].writeBack); + PxReal* PX_RESTRICT vForceWriteback3 = reinterpret_cast<PxReal*>(desc[3].writeBack); + + const SolverContactCoulombHeader4* PX_RESTRICT firstHeader = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr); + PxU8* PX_RESTRICT last = desc[0].constraint + firstHeader->frictionOffset; + + const PxU32 pointStride = firstHeader->type == DY_SC_TYPE_BLOCK_RB_CONTACT ? sizeof(SolverContact4Dynamic) + : sizeof(SolverContact4Base); + + bool writeBackThresholds[4] = {false, false, false, false}; + + + while(cPtr < last) + { + const SolverContactCoulombHeader4* PX_RESTRICT hdr = reinterpret_cast<const SolverContactCoulombHeader4*>(cPtr); + cPtr += sizeof(SolverContactCoulombHeader4); + + writeBackThresholds[0] = hdr->flags[0] & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + writeBackThresholds[1] = hdr->flags[1] & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + writeBackThresholds[2] = hdr->flags[2] & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + writeBackThresholds[3] = hdr->flags[3] & SolverContactHeader::eHAS_FORCE_THRESHOLDS; + + const PxU32 numNormalConstr = hdr->numNormalConstr; + + Ps::prefetchLine(cPtr, 256); + Ps::prefetchLine(cPtr, 384); + + + for(PxU32 i=0; i<numNormalConstr; i++) + { + SolverContact4Base* c = reinterpret_cast<SolverContact4Base*>(cPtr); + cPtr += pointStride; + + const Vec4V appliedForce = c->appliedForce; + if(vForceWriteback0 && i < hdr->numNormalConstr0) + FStore(V4GetX(appliedForce), vForceWriteback0++); + if(vForceWriteback1 && i < hdr->numNormalConstr1) + FStore(V4GetY(appliedForce), vForceWriteback1++); + if(vForceWriteback2 && i < hdr->numNormalConstr2) + FStore(V4GetZ(appliedForce), vForceWriteback2++); + if(vForceWriteback3 && i < hdr->numNormalConstr3) + FStore(V4GetW(appliedForce), vForceWriteback3++); + + normalForceV = V4Add(normalForceV, appliedForce); + } + } + PX_ASSERT(cPtr == last); + + PX_ALIGN(16, PxReal nf[4]); + V4StoreA(normalForceV, nf); + + //all constraint pointer in descs are the same constraint + Sc::ShapeInteraction** shapeInteractions = reinterpret_cast<SolverContactCoulombHeader4*>(desc[0].constraint)->shapeInteraction; + + for(PxU32 a = 0; a < 4; ++a) + { + if(writeBackThresholds[a] && desc[a].linkIndexA == PxSolverConstraintDesc::NO_LINK && desc[a].linkIndexB == PxSolverConstraintDesc::NO_LINK && + nf[a] !=0.f && (bd0[a]->reportThreshold < PX_MAX_REAL || bd1[a]->reportThreshold < PX_MAX_REAL)) + { + ThresholdStreamElement elt; + elt.normalForce = nf[a]; + elt.threshold = PxMin<float>(bd0[a]->reportThreshold, bd1[a]->reportThreshold); + elt.nodeIndexA = bd0[a]->nodeIndex; + elt.nodeIndexB = bd1[a]->nodeIndex; + elt.shapeInteraction = shapeInteractions[a]; + Ps::order(elt.nodeIndexA, elt.nodeIndexB); + PX_ASSERT(elt.nodeIndexA < elt.nodeIndexB); + PX_ASSERT(cache.mThresholdStreamIndex<cache.mThresholdStreamLength); + cache.mThresholdStream[cache.mThresholdStreamIndex++] = elt; + } + } +} + +void solveContactCoulombPreBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContactCoulomb4_Block(desc, cache); +} + +void solveContactCoulombPreBlock_Static(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContactCoulomb4_StaticBlock(desc, cache); +} + +void solveContactCoulombPreBlock_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContactCoulomb4_Block(desc, cache); + concludeContactCoulomb4(desc, cache); +} + +void solveContactCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContactCoulomb4_StaticBlock(desc, cache); + concludeContactCoulomb4(desc, cache); +} + +void solveContactCoulombPreBlock_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContactCoulomb4_Block(desc, cache); + + const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex], + &cache.solverBodyArray[desc[1].bodyADataIndex], + &cache.solverBodyArray[desc[2].bodyADataIndex], + &cache.solverBodyArray[desc[3].bodyADataIndex]}; + + const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex], + &cache.solverBodyArray[desc[1].bodyBDataIndex], + &cache.solverBodyArray[desc[2].bodyBDataIndex], + &cache.solverBodyArray[desc[3].bodyBDataIndex]}; + + + + writeBackContactCoulomb4(desc, cache, bd0, bd1); + + if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4)) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +void solveContactCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveContactCoulomb4_StaticBlock(desc, cache); + const PxSolverBodyData* bd0[4] = { &cache.solverBodyArray[desc[0].bodyADataIndex], + &cache.solverBodyArray[desc[1].bodyADataIndex], + &cache.solverBodyArray[desc[2].bodyADataIndex], + &cache.solverBodyArray[desc[3].bodyADataIndex]}; + + const PxSolverBodyData* bd1[4] = { &cache.solverBodyArray[desc[0].bodyBDataIndex], + &cache.solverBodyArray[desc[1].bodyBDataIndex], + &cache.solverBodyArray[desc[2].bodyBDataIndex], + &cache.solverBodyArray[desc[3].bodyBDataIndex]}; + + writeBackContactCoulomb4(desc, cache, bd0, bd1); + + if(cache.mThresholdStreamIndex > (cache.mThresholdStreamLength - 4)) + { + //Write back to global buffer + PxI32 threshIndex = physx::shdfnd::atomicAdd(cache.mSharedOutThresholdPairs, PxI32(cache.mThresholdStreamIndex)) - PxI32(cache.mThresholdStreamIndex); + for(PxU32 a = 0; a < cache.mThresholdStreamIndex; ++a) + { + cache.mSharedThresholdStream[a + threshIndex] = cache.mThresholdStream[a]; + } + cache.mThresholdStreamIndex = 0; + } +} + +void solveFrictionCoulombPreBlock(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveFriction4_Block(desc, cache); +} + +void solveFrictionCoulombPreBlock_Static(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveFriction4_StaticBlock(desc, cache); +} + +void solveFrictionCoulombPreBlock_Conclude(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveFriction4_Block(desc, cache); +} + +void solveFrictionCoulombPreBlock_ConcludeStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveFriction4_StaticBlock(desc, cache); +} + +void solveFrictionCoulombPreBlock_WriteBack(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveFriction4_Block(desc, cache); +} + +void solveFrictionCoulombPreBlock_WriteBackStatic(const PxSolverConstraintDesc* PX_RESTRICT desc, const PxU32 /*constraintCount*/, SolverContext& cache) +{ + solveFriction4_StaticBlock(desc, cache); +} + + +} + +} + diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DySpatial.h b/PhysX_3.4/Source/LowLevelDynamics/src/DySpatial.h new file mode 100644 index 00000000..e27406b3 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DySpatial.h @@ -0,0 +1,142 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_SPATIAL_H +#define DY_SPATIAL_H + +#include "foundation/PxVec3.h" +#include "foundation/PxTransform.h" +#include "PsMathUtils.h" +#include "CmSpatialVector.h" + +namespace physx +{ +namespace Dy +{ +// translate a motion resolved at position p to the origin + + +// should have a 'from' frame and a 'to' frame +class SpInertia +{ +public: + SpInertia() {} + + SpInertia(const PxMat33& ll, const PxMat33& la, const PxMat33& aa): mLL(ll), mLA(la), mAA(aa) + { + } + + static SpInertia getZero() + { + return SpInertia(PxMat33(PxZero), PxMat33(PxZero), + PxMat33(PxZero)); + } + + static SpInertia dyad(const Cm::SpatialVector& column, const Cm::SpatialVector& row) + { + return SpInertia(dyad(column.linear, row.linear), + dyad(column.linear, row.angular), + dyad(column.angular, row.angular)); + } + + + static SpInertia inertia(PxReal mass, const PxVec3& inertia) + { + return SpInertia(PxMat33::createDiagonal(PxVec3(mass,mass,mass)), PxMat33(PxZero), + PxMat33::createDiagonal(inertia)); + } + + + SpInertia operator+(const SpInertia& m) const + { + return SpInertia(mLL+m.mLL, mLA+m.mLA, mAA+m.mAA); + } + + SpInertia operator-(const SpInertia& m) const + { + return SpInertia(mLL-m.mLL, mLA-m.mLA, mAA-m.mAA); + } + + SpInertia operator*(PxReal r) const + { + return SpInertia(mLL*r, mLA*r, mAA*r); + } + + void operator+=(const SpInertia& m) + { + mLL+=m.mLL; + mLA+=m.mLA; + mAA+=m.mAA; + } + + void operator-=(const SpInertia& m) + { + mLL-=m.mLL; + mLA-=m.mLA; + mAA-=m.mAA; + } + + + PX_FORCE_INLINE Cm::SpatialVector operator *(const Cm::SpatialVector& v) const + { + return Cm::SpatialVector(mLL*v.linear +mLA*v.angular, + mLA.transformTranspose(v.linear)+mAA*v.angular); + } + + SpInertia operator *(const SpInertia& v) const + { + return SpInertia(mLL*v.mLL + mLA * v.mLA.getTranspose(), + mLL*v.mLA + mLA * v.mAA, + mLA.getTranspose()*v.mLA + mAA * v.mAA); + } + + + bool isFinite() const + { + return true; +// return mLL.isFinite() && mLA.isFinite() && mAA.isFinite(); + } + + PxMat33 mLL, mLA; // linear force from angular motion, linear force from linear motion + PxMat33 mAA; // angular force from angular motion, mAL = mLA.transpose() + +private: + static PxMat33 dyad(PxVec3 col, PxVec3 row) + { + return PxMat33(col*row.x, col*row.y, col*row.z); + } + + +}; + +} +} + +#endif //DY_SPATIAL_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.cpp new file mode 100644 index 00000000..5526b83a --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.cpp @@ -0,0 +1,110 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#include "DyThreadContext.h" +#include "PsBitUtils.h" + +namespace physx +{ +namespace Dy +{ + +ThreadContext::ThreadContext(PxcNpMemBlockPool* memBlockPool): + mFrictionPatchStreamPair(*memBlockPool), + mConstraintBlockManager (*memBlockPool), + mConstraintBlockStream (*memBlockPool), + mNumDifferentBodyConstraints(0), + mNumSelfConstraints(0), + mNumSelfConstraintBlocks(0), + mConstraintsPerPartition(PX_DEBUG_EXP("ThreadContext::mConstraintsPerPartition")), + mFrictionConstraintsPerPartition(PX_DEBUG_EXP("ThreadContext::frictionsConstraintsPerPartition")), + mPartitionNormalizationBitmap(PX_DEBUG_EXP("ThreadContext::mPartitionNormalizationBitmap")), + frictionConstraintDescArray(PX_DEBUG_EXP("ThreadContext::solverFrictionConstraintArray")), + frictionConstraintBatchHeaders(PX_DEBUG_EXP("ThreadContext::frictionConstraintBatchHeaders")), + compoundConstraints(PX_DEBUG_EXP("ThreadContext::compoundConstraints")), + orderedContactList(PX_DEBUG_EXP("ThreadContext::orderedContactList")), + tempContactList(PX_DEBUG_EXP("ThreadContext::tempContactList")), + sortIndexArray(PX_DEBUG_EXP("ThreadContext::sortIndexArray")), + mConstraintSize (0), + mAxisConstraintCount(0), + mSelfConstraintBlocks(NULL), + mMaxPartitions(0), + mMaxSolverPositionIterations(0), + mMaxSolverVelocityIterations(0), + mMaxArticulationLength(0), + mContactDescPtr(NULL), + mFrictionDescPtr(NULL), + mArticulations(PX_DEBUG_EXP("ThreadContext::articulations")) + +{ +#if PX_ENABLE_SIM_STATS + mThreadSimStats.clear(); +#endif + //Defaulted to have space for 16384 bodies + mPartitionNormalizationBitmap.reserve(512); + //Defaulted to have space for 128 partitions (should be more-than-enough) + mConstraintsPerPartition.reserve(128); +} + +void ThreadContext::resizeArrays(PxU32 frictionConstraintDescCount, PxU32 articulationCount) +{ + // resize resizes smaller arrays to the exact target size, which can generate a lot of churn + frictionConstraintDescArray.forceSize_Unsafe(0); + frictionConstraintDescArray.reserve((frictionConstraintDescCount+63)&~63); + + mArticulations.forceSize_Unsafe(0); + mArticulations.reserve(PxMax<PxU32>(Ps::nextPowerOfTwo(articulationCount), 16)); + mArticulations.forceSize_Unsafe(articulationCount); + + mContactDescPtr = contactConstraintDescArray; + mFrictionDescPtr = frictionConstraintDescArray.begin(); +} + +void ThreadContext::reset() +{ + // TODO: move these to the PxcNpThreadContext + mFrictionPatchStreamPair.reset(); + mConstraintBlockStream.reset(); + + mContactDescPtr = contactConstraintDescArray; + mFrictionDescPtr = frictionConstraintDescArray.begin(); + + mAxisConstraintCount = 0; + mMaxSolverPositionIterations = 0; + mMaxSolverVelocityIterations = 0; + mNumDifferentBodyConstraints = 0; + mNumSelfConstraints = 0; + mSelfConstraintBlocks = NULL; + mNumSelfConstraintBlocks = 0; + mConstraintSize = 0; +} + +} +} diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.h b/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.h new file mode 100644 index 00000000..a958ac23 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyThreadContext.h @@ -0,0 +1,203 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + + +#ifndef DY_THREADCONTEXT_H +#define DY_THREADCONTEXT_H + +#include "foundation/PxTransform.h" +#include "PxvConfig.h" +#include "CmBitMap.h" +#include "CmMatrix34.h" +#include "PxcThreadCoherentCache.h" +#include "DyThresholdTable.h" +#include "PsAllocator.h" +#include "PsAllocator.h" +#include "GuContactBuffer.h" +#include "DySolverConstraintDesc.h" +#include "PxvDynamics.h" +#include "DyArticulation.h" +#include "DyFrictionPatchStreamPair.h" +#include "PxcConstraintBlockStream.h" +#include "DyCorrelationBuffer.h" + +namespace physx +{ +struct PxsIndexedContactManager; + +namespace Dy +{ + +/*! +Cache information specific to the software implementation(non common). + +See PxcgetThreadContext. + +Not thread-safe, so remember to have one object per thread! + +TODO! refactor this and rename(it is a general per thread cache). Move transform cache into its own class. +*/ +class ThreadContext : + public PxcThreadCoherentCache<ThreadContext, PxcNpMemBlockPool>::EntryBase +{ + PX_NOCOPY(ThreadContext) +public: + +#if PX_ENABLE_SIM_STATS + struct ThreadSimStats + { + void clear() + { + + numActiveConstraints = 0; + numActiveDynamicBodies = 0; + numActiveKinematicBodies = 0; + numAxisSolverConstraints = 0; + + } + + PxU32 numActiveConstraints; + PxU32 numActiveDynamicBodies; + PxU32 numActiveKinematicBodies; + PxU32 numAxisSolverConstraints; + + }; +#endif + + //TODO: tune cache size based on number of active objects. + ThreadContext(PxcNpMemBlockPool* memBlockPool); + void reset(); + void resizeArrays(PxU32 frictionConstraintDescCount, PxU32 articulationCount); + + PX_FORCE_INLINE Ps::Array<ArticulationSolverDesc>& getArticulations() { return mArticulations; } + + +#if PX_ENABLE_SIM_STATS + PX_FORCE_INLINE ThreadSimStats& getSimStats() + { + return mThreadSimStats; + } +#endif + + Gu::ContactBuffer mContactBuffer; + + // temporary buffer for correlation + PX_ALIGN(16, CorrelationBuffer mCorrelationBuffer); + + FrictionPatchStreamPair mFrictionPatchStreamPair; // patch streams + + PxsConstraintBlockManager mConstraintBlockManager; // for when this thread context is "lead" on an island + PxcConstraintBlockStream mConstraintBlockStream; // constraint block pool + + + // this stuff is just used for reformatting the solver data. Hopefully we should have a more + // sane format for this when the dust settles - so it's just temporary. If we keep this around + // here we should move these from public to private + + PxU32 mNumDifferentBodyConstraints; + PxU32 mNumDifferentBodyFrictionConstraints; + PxU32 mNumSelfConstraints; + PxU32 mNumSelfFrictionConstraints; + PxU32 mNumSelfConstraintBlocks; + PxU32 mNumSelfConstraintFrictionBlocks; + + Ps::Array<PxU32> mConstraintsPerPartition; + Ps::Array<PxU32> mFrictionConstraintsPerPartition; + Ps::Array<PxU32> mPartitionNormalizationBitmap; + PxsBodyCore** mBodyCoreArray; + PxsRigidBody** mRigidBodyArray; + Articulation** mArticulationArray; + Cm::SpatialVector* motionVelocityArray; + PxU32* bodyRemapTable; + PxU32* mNodeIndexArray; + + //Constraint info for normal constraint sovler + PxSolverConstraintDesc* contactConstraintDescArray; + PxU32 contactDescArraySize; + PxSolverConstraintDesc* orderedContactConstraints; + PxConstraintBatchHeader* contactConstraintBatchHeaders; + PxU32 numContactConstraintBatches; + + //Constraint info for partitioning + PxSolverConstraintDesc* tempConstraintDescArray; + + //Additional constraint info for 1d/2d friction model + Ps::Array<PxSolverConstraintDesc> frictionConstraintDescArray; + Ps::Array<PxConstraintBatchHeader> frictionConstraintBatchHeaders; + + //Info for tracking compound contact managers (temporary data - could use scratch memory!) + Ps::Array<CompoundContactManager> compoundConstraints; + + //Used for sorting constraints. Temporary, could use scratch memory + Ps::Array<const PxsIndexedContactManager*> orderedContactList; + Ps::Array<const PxsIndexedContactManager*> tempContactList; + Ps::Array<PxU32> sortIndexArray; + + PxU32 numDifferentBodyBatchHeaders; + PxU32 numSelfConstraintBatchHeaders; + + + PxU32 mOrderedContactDescCount; + PxU32 mOrderedFrictionDescCount; + + PxU32 mConstraintSize; + + PxU32 mAxisConstraintCount; + SelfConstraintBlock* mSelfConstraintBlocks; + + SelfConstraintBlock* mSelfConstraintFrictionBlocks; + + PxU32 mMaxPartitions; + PxU32 mMaxFrictionPartitions; + PxU32 mMaxSolverPositionIterations; + PxU32 mMaxSolverVelocityIterations; + PxU32 mMaxArticulationLength; + PxU32 mMaxArticulationSolverLength; + + PxSolverConstraintDesc* mContactDescPtr; + PxSolverConstraintDesc* mStartContactDescPtr; + PxSolverConstraintDesc* mFrictionDescPtr; + +private: + + Ps::Array<ArticulationSolverDesc> mArticulations; + +#if PX_ENABLE_SIM_STATS + ThreadSimStats mThreadSimStats; +#endif + + public: + +}; + +} + +} + +#endif //DY_THREADCONTEXT_H diff --git a/PhysX_3.4/Source/LowLevelDynamics/src/DyThresholdTable.cpp b/PhysX_3.4/Source/LowLevelDynamics/src/DyThresholdTable.cpp new file mode 100644 index 00000000..b7b613f6 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelDynamics/src/DyThresholdTable.cpp @@ -0,0 +1,68 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "foundation/PxMemory.h" +#include "DyThresholdTable.h" +#include "PsHash.h" +#include "PsUtilities.h" +#include "PsAllocator.h" + +namespace physx +{ + namespace Dy + { + bool ThresholdTable::check(const ThresholdStream& stream, const PxU32 nodeIndexA, const PxU32 nodeIndexB, PxReal dt) + { + PxU32* PX_RESTRICT hashes = mHash; + PxU32* PX_RESTRICT nextIndices = mNexts; + Pair* PX_RESTRICT pairs = mPairs; + + /*const PxsRigidBody* b0 = PxMin(body0, body1); + const PxsRigidBody* b1 = PxMax(body0, body1);*/ + + const PxU32 nA = PxMin(nodeIndexA, nodeIndexB); + const PxU32 nB = PxMax(nodeIndexA, nodeIndexB); + + PxU32 hashKey = computeHashKey(nodeIndexA, nodeIndexB, mHashSize); + + PxU32 pairIndex = hashes[hashKey]; + while(NO_INDEX != pairIndex) + { + Pair& pair = pairs[pairIndex]; + const PxU32 thresholdStreamIndex = pair.thresholdStreamIndex; + PX_ASSERT(thresholdStreamIndex < stream.size()); + const ThresholdStreamElement& otherElement = stream[thresholdStreamIndex]; + if(otherElement.nodeIndexA==nA && otherElement.nodeIndexB==nB) + return (pair.accumulatedForce > (otherElement.threshold * dt)); + pairIndex = nextIndices[pairIndex]; + } + return false; + } + } +} |