From b350eb5f4d44e8448115796144375d79438d74ae Mon Sep 17 00:00:00 2001 From: Marijn Tamis Date: Fri, 28 Apr 2017 14:19:07 +0200 Subject: NvCloth 1.1.0 Release. (22041545) --- NvCloth/src/SwCollision.cpp | 990 ++++++++++++++++++++++---------------------- 1 file changed, 495 insertions(+), 495 deletions(-) (limited to 'NvCloth/src/SwCollision.cpp') diff --git a/NvCloth/src/SwCollision.cpp b/NvCloth/src/SwCollision.cpp index 3774795..89df8a5 100644 --- a/NvCloth/src/SwCollision.cpp +++ b/NvCloth/src/SwCollision.cpp @@ -42,8 +42,8 @@ using namespace nv; using namespace physx; // the particle trajectory needs to penetrate more than 0.2 * radius to trigger continuous collision -template -const Simd4f cloth::SwCollision::sSkeletonWidth = simd4f(cloth::sqr(1 - 0.2f) - 1); +template +const T4f cloth::SwCollision::sSkeletonWidth = simd4f(cloth::sqr(1 - 0.2f) - 1); #if NV_SIMD_SSE2 const Simd4i cloth::Gather::sIntSignBit = simd4i(0x80000000); @@ -66,8 +66,8 @@ const Simd4fScalarFactory sGridExpand = simd4f(1e-4f); const Simd4fTupleFactory sMinusFloatMaxXYZ = simd4f(-FLT_MAX, -FLT_MAX, -FLT_MAX, 0.0f); #if PX_PROFILE || PX_DEBUG -template -uint32_t horizontalSum(const Simd4f& x) +template +uint32_t horizontalSum(const T4f& x) { const float* p = array(x); return uint32_t(0.5f + p[0] + p[1] + p[2] + p[3]); @@ -75,8 +75,8 @@ uint32_t horizontalSum(const Simd4f& x) #endif // 7 elements are written to ptr! -template -void storeBounds(float* ptr, const cloth::BoundingBox& bounds) +template +void storeBounds(float* ptr, const cloth::BoundingBox& bounds) { store(ptr, bounds.mLower); store(ptr + 3, bounds.mUpper); @@ -128,14 +128,14 @@ namespace nv { namespace cloth { -template -BoundingBox expandBounds(const BoundingBox& bbox, const SphereData* sIt, const SphereData* sEnd) +template +BoundingBox expandBounds(const BoundingBox& bbox, const SphereData* sIt, const SphereData* sEnd) { - BoundingBox result = bbox; + BoundingBox result = bbox; for (; sIt != sEnd; ++sIt) { - Simd4f p = loadAligned(array(sIt->center)); - Simd4f r = splat<3>(p); + T4f p = loadAligned(array(sIt->center)); + T4f r = splat<3>(p); result.mLower = min(result.mLower, p - r); result.mUpper = max(result.mUpper, p + r); } @@ -146,8 +146,8 @@ BoundingBox expandBounds(const BoundingBox& bbox, const SphereDa namespace { -template -void generateSpheres(Simd4f* dIt, const SrcIterator& src, uint32_t count) +template +void generateSpheres(T4f* dIt, const SrcIterator& src, uint32_t count) { // have to copy out iterator to ensure alignment is maintained for (SrcIterator sIt = src; 0 < count--; ++sIt, ++dIt) @@ -192,41 +192,41 @@ void generateCones(cloth::ConeData* dst, const cloth::SphereData* sourceSpheres, } } -template -void generatePlanes(Simd4f* dIt, const SrcIterator& src, uint32_t count) +template +void generatePlanes(T4f* dIt, const SrcIterator& src, uint32_t count) { // have to copy out iterator to ensure alignment is maintained for (SrcIterator sIt = src; 0 < count--; ++sIt, ++dIt) *dIt = *sIt; } -template +template void generateTriangles(cloth::TriangleData* dIt, const SrcIterator& src, uint32_t count) { // have to copy out iterator to ensure alignment is maintained for (SrcIterator sIt = src; 0 < count--; ++dIt) { - Simd4f p0 = *sIt; + T4f p0 = *sIt; ++sIt; - Simd4f p1 = *sIt; + T4f p1 = *sIt; ++sIt; - Simd4f p2 = *sIt; + T4f p2 = *sIt; ++sIt; - Simd4f edge0 = p1 - p0; - Simd4f edge1 = p2 - p0; - Simd4f normal = cross3(edge0, edge1); + T4f edge0 = p1 - p0; + T4f edge1 = p2 - p0; + T4f normal = cross3(edge0, edge1); - Simd4f edge0SqrLength = dot3(edge0, edge0); - Simd4f edge1SqrLength = dot3(edge1, edge1); - Simd4f edge0DotEdge1 = dot3(edge0, edge1); - Simd4f normalInvLength = rsqrt(dot3(normal, normal)); + T4f edge0SqrLength = dot3(edge0, edge0); + T4f edge1SqrLength = dot3(edge1, edge1); + T4f edge0DotEdge1 = dot3(edge0, edge1); + T4f normalInvLength = rsqrt(dot3(normal, normal)); - Simd4f det = edge0SqrLength * edge1SqrLength - edge0DotEdge1 * edge0DotEdge1; - Simd4f denom = edge0SqrLength + edge1SqrLength - edge0DotEdge1 - edge0DotEdge1; + T4f det = edge0SqrLength * edge1SqrLength - edge0DotEdge1 * edge0DotEdge1; + T4f denom = edge0SqrLength + edge1SqrLength - edge0DotEdge1 - edge0DotEdge1; // there are definitely faster ways... - Simd4f aux = select(sMaskX, det, denom); + T4f aux = select(sMaskX, det, denom); aux = select(sMaskZ, edge0SqrLength, aux); aux = select(sMaskW, edge1SqrLength, aux); @@ -240,14 +240,14 @@ void generateTriangles(cloth::TriangleData* dIt, const SrcIterator& src, uint32_ } // namespace -template -cloth::SwCollision::CollisionData::CollisionData() +template +cloth::SwCollision::CollisionData::CollisionData() : mSpheres(0), mCones(0) { } -template -cloth::SwCollision::SwCollision(SwClothData& clothData, SwKernelAllocator& alloc) +template +cloth::SwCollision::SwCollision(SwClothData& clothData, SwKernelAllocator& alloc) : mClothData(clothData), mAllocator(alloc) { allocate(mCurData); @@ -256,22 +256,22 @@ cloth::SwCollision::SwCollision(SwClothData& clothData, SwKernelAllocato { allocate(mPrevData); - generateSpheres(reinterpret_cast(mPrevData.mSpheres), - reinterpret_cast(clothData.mStartCollisionSpheres), clothData.mNumSpheres); + generateSpheres(reinterpret_cast(mPrevData.mSpheres), + reinterpret_cast(clothData.mStartCollisionSpheres), clothData.mNumSpheres); generateCones(mPrevData.mCones, mPrevData.mSpheres, clothData.mCapsuleIndices, clothData.mNumCapsules); } } -template -cloth::SwCollision::~SwCollision() +template +cloth::SwCollision::~SwCollision() { deallocate(mCurData); deallocate(mPrevData); } -template -void cloth::SwCollision::operator()(const IterationState& state) +template +void cloth::SwCollision::operator()(const IterationState& state) { mNumCollisions = 0; @@ -285,20 +285,20 @@ void cloth::SwCollision::operator()(const IterationState& state) bool lastIteration = state.mRemainingIterations == 1; - const Simd4f* targetSpheres = reinterpret_cast(mClothData.mTargetCollisionSpheres); + const T4f* targetSpheres = reinterpret_cast(mClothData.mTargetCollisionSpheres); // generate sphere and cone collision data if (!lastIteration) { // interpolate spheres - LerpIterator pIter(reinterpret_cast(mClothData.mStartCollisionSpheres), + LerpIterator pIter(reinterpret_cast(mClothData.mStartCollisionSpheres), targetSpheres, state.getCurrentAlpha()); - generateSpheres(reinterpret_cast(mCurData.mSpheres), pIter, mClothData.mNumSpheres); + generateSpheres(reinterpret_cast(mCurData.mSpheres), pIter, mClothData.mNumSpheres); } else { // otherwise use the target spheres directly - generateSpheres(reinterpret_cast(mCurData.mSpheres), targetSpheres, mClothData.mNumSpheres); + generateSpheres(reinterpret_cast(mCurData.mSpheres), targetSpheres, mClothData.mNumSpheres); } // generate cones even if test below fails because @@ -323,8 +323,8 @@ void cloth::SwCollision::operator()(const IterationState& state) shdfnd::swap(mCurData, mPrevData); } -template -size_t cloth::SwCollision::estimateTemporaryMemory(const SwCloth& cloth) +template +size_t cloth::SwCollision::estimateTemporaryMemory(const SwCloth& cloth) { size_t numTriangles = cloth.mStartCollisionTriangles.size(); size_t numPlanes = cloth.mStartCollisionPlanes.size(); @@ -335,8 +335,8 @@ size_t cloth::SwCollision::estimateTemporaryMemory(const SwCloth& cloth) return std::max(kTriangleDataSize, kPlaneDataSize); } -template -size_t cloth::SwCollision::estimatePersistentMemory(const SwCloth& cloth) +template +size_t cloth::SwCollision::estimatePersistentMemory(const SwCloth& cloth) { size_t numCapsules = cloth.mCapsuleIndices.size(); size_t numSpheres = cloth.mStartCollisionSpheres.size(); @@ -347,74 +347,74 @@ size_t cloth::SwCollision::estimatePersistentMemory(const SwCloth& cloth return sphereDataSize + coneDataSize; } -template -void cloth::SwCollision::allocate(CollisionData& data) +template +void cloth::SwCollision::allocate(CollisionData& data) { data.mSpheres = static_cast(mAllocator.allocate(sizeof(SphereData) * mClothData.mNumSpheres)); data.mCones = static_cast(mAllocator.allocate(sizeof(ConeData) * mClothData.mNumCapsules)); } -template -void cloth::SwCollision::deallocate(const CollisionData& data) +template +void cloth::SwCollision::deallocate(const CollisionData& data) { mAllocator.deallocate(data.mSpheres); mAllocator.deallocate(data.mCones); } -template -void cloth::SwCollision::computeBounds() +template +void cloth::SwCollision::computeBounds() { NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::computeBounds", /*ProfileContext::None*/ 0); - Simd4f* prevIt = reinterpret_cast(mClothData.mPrevParticles); - Simd4f* curIt = reinterpret_cast(mClothData.mCurParticles); - Simd4f* curEnd = curIt + mClothData.mNumParticles; - Simd4f floatMaxXYZ = -static_cast(sMinusFloatMaxXYZ); + T4f* prevIt = reinterpret_cast(mClothData.mPrevParticles); + T4f* curIt = reinterpret_cast(mClothData.mCurParticles); + T4f* curEnd = curIt + mClothData.mNumParticles; + T4f floatMaxXYZ = -static_cast(sMinusFloatMaxXYZ); - Simd4f lower = simd4f(FLT_MAX), upper = -lower; + T4f lower = simd4f(FLT_MAX), upper = -lower; for (; curIt < curEnd; ++curIt, ++prevIt) { - Simd4f current = *curIt; + T4f current = *curIt; lower = min(lower, current); upper = max(upper, current); // if (current.w > 0) current.w = previous.w *curIt = select(current > floatMaxXYZ, *prevIt, current); } - BoundingBox curBounds; + BoundingBox curBounds; curBounds.mLower = lower; curBounds.mUpper = upper; // don't change this order, storeBounds writes 7 floats - BoundingBox prevBounds = loadBounds(mClothData.mCurBounds); + BoundingBox prevBounds = loadBounds(mClothData.mCurBounds); storeBounds(mClothData.mCurBounds, curBounds); storeBounds(mClothData.mPrevBounds, prevBounds); } namespace { -template -Simd4i andNotIsZero(const Simd4i& left, const Simd4i& right) +template +T4i andNotIsZero(const T4i& left, const T4i& right) { return (left & ~right) == gSimd4iZero; } } // build per-axis mask arrays of spheres on the right/left of grid cell -template -void cloth::SwCollision::buildSphereAcceleration(const SphereData* sIt) +template +void cloth::SwCollision::buildSphereAcceleration(const SphereData* sIt) { static const int maxIndex = sGridSize - 1; const SphereData* sEnd = sIt + mClothData.mNumSpheres; for (uint32_t mask = 0x1; sIt != sEnd; ++sIt, mask <<= 1) { - Simd4f sphere = loadAligned(array(sIt->center)); - Simd4f radius = splat<3>(sphere); + T4f sphere = loadAligned(array(sIt->center)); + T4f radius = splat<3>(sphere); - Simd4i first = intFloor(max((sphere - radius) * mGridScale + mGridBias, gSimd4fZero)); - Simd4i last = intFloor(min((sphere + radius) * mGridScale + mGridBias, sGridLength)); + T4i first = intFloor(max((sphere - radius) * mGridScale + mGridBias, gSimd4fZero)); + T4i last = intFloor(min((sphere + radius) * mGridScale + mGridBias, sGridLength)); const int* firstIdx = array(first); const int* lastIdx = array(last); @@ -434,8 +434,8 @@ void cloth::SwCollision::buildSphereAcceleration(const SphereData* sIt) } // generate cone masks from sphere masks -template -void cloth::SwCollision::buildConeAcceleration() +template +void cloth::SwCollision::buildConeAcceleration() { const ConeData* coneIt = mCurData.mCones; const ConeData* coneEnd = coneIt + mClothData.mNumCapsules; @@ -456,8 +456,8 @@ void cloth::SwCollision::buildConeAcceleration() } // convert right/left mask arrays into single overlap array -template -void cloth::SwCollision::mergeAcceleration(uint32_t* firstIt) +template +void cloth::SwCollision::mergeAcceleration(uint32_t* firstIt) { uint32_t* firstEnd = firstIt + 3 * sGridSize; uint32_t* lastIt = firstEnd; @@ -466,28 +466,28 @@ void cloth::SwCollision::mergeAcceleration(uint32_t* firstIt) } // build mask of spheres/cones touching a regular grid along each axis -template -bool cloth::SwCollision::buildAcceleration() +template +bool cloth::SwCollision::buildAcceleration() { // determine sphere bbox - BoundingBox sphereBounds = - expandBounds(emptyBounds(), mCurData.mSpheres, mCurData.mSpheres + mClothData.mNumSpheres); - BoundingBox particleBounds = loadBounds(mClothData.mCurBounds); + BoundingBox sphereBounds = + expandBounds(emptyBounds(), mCurData.mSpheres, mCurData.mSpheres + mClothData.mNumSpheres); + BoundingBox particleBounds = loadBounds(mClothData.mCurBounds); if (mClothData.mEnableContinuousCollision) { sphereBounds = expandBounds(sphereBounds, mPrevData.mSpheres, mPrevData.mSpheres + mClothData.mNumSpheres); - particleBounds = expandBounds(particleBounds, loadBounds(mClothData.mPrevBounds)); + particleBounds = expandBounds(particleBounds, loadBounds(mClothData.mPrevBounds)); } - BoundingBox bounds = intersectBounds(sphereBounds, particleBounds); - Simd4f edgeLength = (bounds.mUpper - bounds.mLower) & ~static_cast(sMaskW); + BoundingBox bounds = intersectBounds(sphereBounds, particleBounds); + T4f edgeLength = (bounds.mUpper - bounds.mLower) & ~static_cast(sMaskW); if (!allGreaterEqual(edgeLength, gSimd4fZero)) return false; // calculate an expanded bounds to account for numerical inaccuracy - const Simd4f expandedLower = bounds.mLower - abs(bounds.mLower) * sGridExpand; - const Simd4f expandedUpper = bounds.mUpper + abs(bounds.mUpper) * sGridExpand; - const Simd4f expandedEdgeLength = max(expandedUpper - expandedLower, gSimd4fEpsilon); + const T4f expandedLower = bounds.mLower - abs(bounds.mLower) * sGridExpand; + const T4f expandedUpper = bounds.mUpper + abs(bounds.mUpper) * sGridExpand; + const T4f expandedEdgeLength = max(expandedUpper - expandedLower, gSimd4fEpsilon); // make grid minimal thickness and strict upper bound of spheres mGridScale = sGridLength * recip<1>(expandedEdgeLength); @@ -514,8 +514,8 @@ bool cloth::SwCollision::buildAcceleration() #define FORCE_INLINE inline __attribute__((always_inline)) #endif -template -FORCE_INLINE typename cloth::SwCollision::ShapeMask& cloth::SwCollision::ShapeMask:: +template +FORCE_INLINE typename cloth::SwCollision::ShapeMask& cloth::SwCollision::ShapeMask:: operator = (const ShapeMask& right) { mCones = right.mCones; @@ -523,8 +523,8 @@ operator = (const ShapeMask& right) return *this; } -template -FORCE_INLINE typename cloth::SwCollision::ShapeMask& cloth::SwCollision::ShapeMask:: +template +FORCE_INLINE typename cloth::SwCollision::ShapeMask& cloth::SwCollision::ShapeMask:: operator &= (const ShapeMask& right) { mCones = mCones & right.mCones; @@ -532,12 +532,12 @@ operator &= (const ShapeMask& right) return *this; } -template -FORCE_INLINE typename cloth::SwCollision::ShapeMask -cloth::SwCollision::getShapeMask(const Simd4f& position, const Simd4i* __restrict sphereGrid, - const Simd4i* __restrict coneGrid) +template +FORCE_INLINE typename cloth::SwCollision::ShapeMask +cloth::SwCollision::getShapeMask(const T4f& position, const T4i* __restrict sphereGrid, + const T4i* __restrict coneGrid) { - Gather gather(intFloor(position)); + Gather gather(intFloor(position)); ShapeMask result; result.mCones = gather(coneGrid); @@ -546,13 +546,13 @@ cloth::SwCollision::getShapeMask(const Simd4f& position, const Simd4i* _ } // lookup acceleration structure and return mask of potential intersectors -template -FORCE_INLINE typename cloth::SwCollision::ShapeMask -cloth::SwCollision::getShapeMask(const Simd4f* __restrict positions) const +template +FORCE_INLINE typename cloth::SwCollision::ShapeMask +cloth::SwCollision::getShapeMask(const T4f* __restrict positions) const { - Simd4f posX = positions[0] * splat<0>(mGridScale) + splat<0>(mGridBias); - Simd4f posY = positions[1] * splat<1>(mGridScale) + splat<1>(mGridBias); - Simd4f posZ = positions[2] * splat<2>(mGridScale) + splat<2>(mGridBias); + T4f posX = positions[0] * splat<0>(mGridScale) + splat<0>(mGridBias); + T4f posY = positions[1] * splat<1>(mGridScale) + splat<1>(mGridBias); + T4f posZ = positions[2] * splat<2>(mGridScale) + splat<2>(mGridBias); ShapeMask result = getShapeMask(posX, mSphereGrid, mConeGrid); result &= getShapeMask(posY, mSphereGrid + 2, mConeGrid + 2); @@ -562,38 +562,38 @@ cloth::SwCollision::getShapeMask(const Simd4f* __restrict positions) con } // lookup acceleration structure and return mask of potential intersectors -template -FORCE_INLINE typename cloth::SwCollision::ShapeMask -cloth::SwCollision::getShapeMask(const Simd4f* __restrict prevPos, const Simd4f* __restrict curPos) const +template +FORCE_INLINE typename cloth::SwCollision::ShapeMask +cloth::SwCollision::getShapeMask(const T4f* __restrict prevPos, const T4f* __restrict curPos) const { - Simd4f scaleX = splat<0>(mGridScale); - Simd4f scaleY = splat<1>(mGridScale); - Simd4f scaleZ = splat<2>(mGridScale); + T4f scaleX = splat<0>(mGridScale); + T4f scaleY = splat<1>(mGridScale); + T4f scaleZ = splat<2>(mGridScale); - Simd4f biasX = splat<0>(mGridBias); - Simd4f biasY = splat<1>(mGridBias); - Simd4f biasZ = splat<2>(mGridBias); + T4f biasX = splat<0>(mGridBias); + T4f biasY = splat<1>(mGridBias); + T4f biasZ = splat<2>(mGridBias); - Simd4f prevX = prevPos[0] * scaleX + biasX; - Simd4f prevY = prevPos[1] * scaleY + biasY; - Simd4f prevZ = prevPos[2] * scaleZ + biasZ; + T4f prevX = prevPos[0] * scaleX + biasX; + T4f prevY = prevPos[1] * scaleY + biasY; + T4f prevZ = prevPos[2] * scaleZ + biasZ; - Simd4f curX = curPos[0] * scaleX + biasX; - Simd4f curY = curPos[1] * scaleY + biasY; - Simd4f curZ = curPos[2] * scaleZ + biasZ; + T4f curX = curPos[0] * scaleX + biasX; + T4f curY = curPos[1] * scaleY + biasY; + T4f curZ = curPos[2] * scaleZ + biasZ; - Simd4f maxX = min(max(prevX, curX), sGridLength); - Simd4f maxY = min(max(prevY, curY), sGridLength); - Simd4f maxZ = min(max(prevZ, curZ), sGridLength); + T4f maxX = min(max(prevX, curX), sGridLength); + T4f maxY = min(max(prevY, curY), sGridLength); + T4f maxZ = min(max(prevZ, curZ), sGridLength); ShapeMask result = getShapeMask(maxX, mSphereGrid, mConeGrid); result &= getShapeMask(maxY, mSphereGrid + 2, mConeGrid + 2); result &= getShapeMask(maxZ, mSphereGrid + 4, mConeGrid + 4); - Simd4f zero = gSimd4fZero; - Simd4f minX = max(min(prevX, curX), zero); - Simd4f minY = max(min(prevY, curY), zero); - Simd4f minZ = max(min(prevZ, curZ), zero); + T4f zero = gSimd4fZero; + T4f minX = max(min(prevX, curX), zero); + T4f minY = max(min(prevY, curY), zero); + T4f minZ = max(min(prevZ, curZ), zero); result &= getShapeMask(minX, mSphereGrid + 6, mConeGrid + 6); result &= getShapeMask(minY, mSphereGrid + 8, mConeGrid + 8); @@ -602,8 +602,8 @@ cloth::SwCollision::getShapeMask(const Simd4f* __restrict prevPos, const return result; } -template -struct cloth::SwCollision::ImpulseAccumulator +template +struct cloth::SwCollision::ImpulseAccumulator { ImpulseAccumulator() : mDeltaX(gSimd4fZero) @@ -616,21 +616,21 @@ struct cloth::SwCollision::ImpulseAccumulator { } - void add(const Simd4f& x, const Simd4f& y, const Simd4f& z, const Simd4f& scale, const Simd4f& mask) + void add(const T4f& x, const T4f& y, const T4f& z, const T4f& scale, const T4f& mask) { NV_CLOTH_ASSERT(allTrue((mask & x) == (mask & x))); NV_CLOTH_ASSERT(allTrue((mask & y) == (mask & y))); NV_CLOTH_ASSERT(allTrue((mask & z) == (mask & z))); NV_CLOTH_ASSERT(allTrue((mask & scale) == (mask & scale))); - Simd4f maskedScale = scale & mask; + T4f maskedScale = scale & mask; mDeltaX = mDeltaX + x * maskedScale; mDeltaY = mDeltaY + y * maskedScale; mDeltaZ = mDeltaZ + z * maskedScale; mNumCollisions = mNumCollisions + (gSimd4fOne & mask); } - void addVelocity(const Simd4f& vx, const Simd4f& vy, const Simd4f& vz, const Simd4f& mask) + void addVelocity(const T4f& vx, const T4f& vy, const T4f& vz, const T4f& mask) { NV_CLOTH_ASSERT(allTrue((mask & vx) == (mask & vx))); NV_CLOTH_ASSERT(allTrue((mask & vy) == (mask & vy))); @@ -641,34 +641,34 @@ struct cloth::SwCollision::ImpulseAccumulator mVelZ = mVelZ + (vz & mask); } - void subtract(const Simd4f& x, const Simd4f& y, const Simd4f& z, const Simd4f& scale, const Simd4f& mask) + void subtract(const T4f& x, const T4f& y, const T4f& z, const T4f& scale, const T4f& mask) { NV_CLOTH_ASSERT(allTrue((mask & x) == (mask & x))); NV_CLOTH_ASSERT(allTrue((mask & y) == (mask & y))); NV_CLOTH_ASSERT(allTrue((mask & z) == (mask & z))); NV_CLOTH_ASSERT(allTrue((mask & scale) == (mask & scale))); - Simd4f maskedScale = scale & mask; + T4f maskedScale = scale & mask; mDeltaX = mDeltaX - x * maskedScale; mDeltaY = mDeltaY - y * maskedScale; mDeltaZ = mDeltaZ - z * maskedScale; mNumCollisions = mNumCollisions + (gSimd4fOne & mask); } - Simd4f mDeltaX, mDeltaY, mDeltaZ; - Simd4f mVelX, mVelY, mVelZ; - Simd4f mNumCollisions; + T4f mDeltaX, mDeltaY, mDeltaZ; + T4f mVelX, mVelY, mVelZ; + T4f mNumCollisions; }; -template -FORCE_INLINE void cloth::SwCollision::collideSpheres(const Simd4i& sphereMask, const Simd4f* positions, +template +FORCE_INLINE void cloth::SwCollision::collideSpheres(const T4i& sphereMask, const T4f* positions, ImpulseAccumulator& accum) const { const float* __restrict spherePtr = array(mCurData.mSpheres->center); bool frictionEnabled = mClothData.mFrictionScale > 0.0f; - Simd4i mask4 = horizontalOr(sphereMask); + T4i mask4 = horizontalOr(sphereMask); uint32_t mask = uint32_t(array(mask4)[0]); while (mask) { @@ -676,16 +676,16 @@ FORCE_INLINE void cloth::SwCollision::collideSpheres(const Simd4i& spher uint32_t offset = findBitSet(mask & ~test) * sizeof(SphereData); mask = mask & test; - Simd4f sphere = loadAligned(spherePtr, offset); + T4f sphere = loadAligned(spherePtr, offset); - Simd4f deltaX = positions[0] - splat<0>(sphere); - Simd4f deltaY = positions[1] - splat<1>(sphere); - Simd4f deltaZ = positions[2] - splat<2>(sphere); + T4f deltaX = positions[0] - splat<0>(sphere); + T4f deltaY = positions[1] - splat<1>(sphere); + T4f deltaZ = positions[2] - splat<2>(sphere); - Simd4f sqrDistance = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; - Simd4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * splat<3>(sphere); + T4f sqrDistance = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; + T4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * splat<3>(sphere); - Simd4f contactMask; + T4f contactMask; if (!anyGreater(gSimd4fZero, negativeScale, contactMask)) continue; @@ -696,17 +696,17 @@ FORCE_INLINE void cloth::SwCollision::collideSpheres(const Simd4i& spher // load previous sphere pos const float* __restrict prevSpherePtr = array(mPrevData.mSpheres->center); - Simd4f prevSphere = loadAligned(prevSpherePtr, offset); - Simd4f velocity = sphere - prevSphere; + T4f prevSphere = loadAligned(prevSpherePtr, offset); + T4f velocity = sphere - prevSphere; accum.addVelocity(splat<0>(velocity), splat<1>(velocity), splat<2>(velocity), contactMask); } } } -template -FORCE_INLINE typename cloth::SwCollision::Simd4i -cloth::SwCollision::collideCones(const Simd4f* __restrict positions, ImpulseAccumulator& accum) const +template +FORCE_INLINE typename cloth::SwCollision::T4i +cloth::SwCollision::collideCones(const T4f* __restrict positions, ImpulseAccumulator& accum) const { const float* __restrict centerPtr = array(mCurData.mCones->center); const float* __restrict axisPtr = array(mCurData.mCones->axis); @@ -715,7 +715,7 @@ cloth::SwCollision::collideCones(const Simd4f* __restrict positions, Imp bool frictionEnabled = mClothData.mFrictionScale > 0.0f; ShapeMask shapeMask = getShapeMask(positions); - Simd4i mask4 = horizontalOr(shapeMask.mCones); + T4i mask4 = horizontalOr(shapeMask.mCones); uint32_t mask = uint32_t(array(mask4)[0]); while (mask) { @@ -724,35 +724,35 @@ cloth::SwCollision::collideCones(const Simd4f* __restrict positions, Imp uint32_t offset = coneIndex * sizeof(ConeData); mask = mask & test; - Simd4i test4 = mask4 - gSimd4iOne; - Simd4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4)); + T4i test4 = mask4 - gSimd4iOne; + T4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4)); mask4 = mask4 & test4; - Simd4f center = loadAligned(centerPtr, offset); + T4f center = loadAligned(centerPtr, offset); - Simd4f deltaX = positions[0] - splat<0>(center); - Simd4f deltaY = positions[1] - splat<1>(center); - Simd4f deltaZ = positions[2] - splat<2>(center); + T4f deltaX = positions[0] - splat<0>(center); + T4f deltaY = positions[1] - splat<1>(center); + T4f deltaZ = positions[2] - splat<2>(center); - Simd4f axis = loadAligned(axisPtr, offset); + T4f axis = loadAligned(axisPtr, offset); - Simd4f axisX = splat<0>(axis); - Simd4f axisY = splat<1>(axis); - Simd4f axisZ = splat<2>(axis); - Simd4f slope = splat<3>(axis); + T4f axisX = splat<0>(axis); + T4f axisY = splat<1>(axis); + T4f axisZ = splat<2>(axis); + T4f slope = splat<3>(axis); - Simd4f dot = deltaX * axisX + deltaY * axisY + deltaZ * axisZ; - Simd4f radius = dot * slope + splat<3>(center); + T4f dot = deltaX * axisX + deltaY * axisY + deltaZ * axisZ; + T4f radius = dot * slope + splat<3>(center); // set radius to zero if cone is culled radius = max(radius, gSimd4fZero) & ~culled; - Simd4f sqrDistance = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ - dot * dot; + T4f sqrDistance = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ - dot * dot; - Simd4i auxiliary = loadAligned(auxiliaryPtr, offset); - Simd4i bothMask = splat<3>(auxiliary); + T4i auxiliary = loadAligned(auxiliaryPtr, offset); + T4i bothMask = splat<3>(auxiliary); - Simd4f contactMask; + T4f contactMask; if (!anyGreater(radius * radius, sqrDistance, contactMask)) { // cone only culled when spheres culled, ok to clear those too @@ -764,19 +764,19 @@ cloth::SwCollision::collideCones(const Simd4f* __restrict positions, Imp // making sqrDistance negative when point lies on the cone axis sqrDistance = max(sqrDistance, gSimd4fEpsilon); - Simd4f invDistance = rsqrt(sqrDistance); - Simd4f base = dot + slope * sqrDistance * invDistance; + T4f invDistance = rsqrt(sqrDistance); + T4f base = dot + slope * sqrDistance * invDistance; // force left/rightMask to false if not inside cone base = base & contactMask; - Simd4f halfLength = splat<1>(simd4f(auxiliary)); - Simd4i leftMask = simd4i(base < -halfLength); - Simd4i rightMask = simd4i(base > halfLength); + T4f halfLength = splat<1>(simd4f(auxiliary)); + T4i leftMask = simd4i(base < -halfLength); + T4i rightMask = simd4i(base > halfLength); // we use both mask because of the early out above. - Simd4i firstMask = splat<2>(auxiliary); - Simd4i secondMask = firstMask ^ bothMask; + T4i firstMask = splat<2>(auxiliary); + T4i secondMask = firstMask ^ bothMask; shapeMask.mSpheres = shapeMask.mSpheres & ~(firstMask & ~leftMask); shapeMask.mSpheres = shapeMask.mSpheres & ~(secondMask & ~rightMask); @@ -784,8 +784,8 @@ cloth::SwCollision::collideCones(const Simd4f* __restrict positions, Imp deltaY = deltaY - base * axisY; deltaZ = deltaZ - base * axisZ; - Simd4f sqrCosine = splat<0>(simd4f(auxiliary)); - Simd4f scale = radius * invDistance * sqrCosine - sqrCosine; + T4f sqrCosine = splat<0>(simd4f(auxiliary)); + T4f scale = radius * invDistance * sqrCosine - sqrCosine; contactMask = contactMask & ~simd4f(leftMask | rightMask); @@ -804,23 +804,23 @@ cloth::SwCollision::collideCones(const Simd4f* __restrict positions, Imp // todo: could pre-compute sphere velocities or it might be // faster to compute cur/prev sphere positions directly - Simd4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData)); - Simd4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData)); + T4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData)); + T4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData)); - Simd4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData)); - Simd4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData)); + T4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData)); + T4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData)); - Simd4f v0 = s0p1 - s0p0; - Simd4f v1 = s1p1 - s1p0; - Simd4f vd = v1 - v0; + T4f v0 = s0p1 - s0p0; + T4f v1 = s1p1 - s1p0; + T4f vd = v1 - v0; // dot is in the range -1 to 1, scale and bias to 0 to 1 dot = dot * gSimd4fHalf + gSimd4fHalf; // interpolate velocity at contact points - Simd4f vx = splat<0>(v0) + dot * splat<0>(vd); - Simd4f vy = splat<1>(v0) + dot * splat<1>(vd); - Simd4f vz = splat<2>(v0) + dot * splat<2>(vd); + T4f vx = splat<0>(v0) + dot * splat<0>(vd); + T4f vy = splat<1>(v0) + dot * splat<1>(vd); + T4f vz = splat<2>(v0) + dot * splat<2>(vd); accum.addVelocity(vx, vy, vz, contactMask); } @@ -829,16 +829,16 @@ cloth::SwCollision::collideCones(const Simd4f* __restrict positions, Imp return shapeMask.mSpheres; } -template -FORCE_INLINE void cloth::SwCollision::collideSpheres(const Simd4i& sphereMask, const Simd4f* __restrict prevPos, - Simd4f* __restrict curPos, ImpulseAccumulator& accum) const +template +FORCE_INLINE void cloth::SwCollision::collideSpheres(const T4i& sphereMask, const T4f* __restrict prevPos, + T4f* __restrict curPos, ImpulseAccumulator& accum) const { const float* __restrict prevSpheres = array(mPrevData.mSpheres->center); const float* __restrict curSpheres = array(mCurData.mSpheres->center); bool frictionEnabled = mClothData.mFrictionScale > 0.0f; - Simd4i mask4 = horizontalOr(sphereMask); + T4i mask4 = horizontalOr(sphereMask); uint32_t mask = uint32_t(array(mask4)[0]); while (mask) { @@ -846,53 +846,53 @@ FORCE_INLINE void cloth::SwCollision::collideSpheres(const Simd4i& spher uint32_t offset = findBitSet(mask & ~test) * sizeof(SphereData); mask = mask & test; - Simd4f prevSphere = loadAligned(prevSpheres, offset); - Simd4f prevX = prevPos[0] - splat<0>(prevSphere); - Simd4f prevY = prevPos[1] - splat<1>(prevSphere); - Simd4f prevZ = prevPos[2] - splat<2>(prevSphere); - Simd4f prevRadius = splat<3>(prevSphere); + T4f prevSphere = loadAligned(prevSpheres, offset); + T4f prevX = prevPos[0] - splat<0>(prevSphere); + T4f prevY = prevPos[1] - splat<1>(prevSphere); + T4f prevZ = prevPos[2] - splat<2>(prevSphere); + T4f prevRadius = splat<3>(prevSphere); - Simd4f curSphere = loadAligned(curSpheres, offset); - Simd4f curX = curPos[0] - splat<0>(curSphere); - Simd4f curY = curPos[1] - splat<1>(curSphere); - Simd4f curZ = curPos[2] - splat<2>(curSphere); - Simd4f curRadius = splat<3>(curSphere); + T4f curSphere = loadAligned(curSpheres, offset); + T4f curX = curPos[0] - splat<0>(curSphere); + T4f curY = curPos[1] - splat<1>(curSphere); + T4f curZ = curPos[2] - splat<2>(curSphere); + T4f curRadius = splat<3>(curSphere); - Simd4f sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ; + T4f sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ; - Simd4f dotPrevPrev = prevX * prevX + prevY * prevY + prevZ * prevZ - prevRadius * prevRadius; - Simd4f dotPrevCur = prevX * curX + prevY * curY + prevZ * curZ - prevRadius * curRadius; - Simd4f dotCurCur = sqrDistance - curRadius * curRadius; + T4f dotPrevPrev = prevX * prevX + prevY * prevY + prevZ * prevZ - prevRadius * prevRadius; + T4f dotPrevCur = prevX * curX + prevY * curY + prevZ * curZ - prevRadius * curRadius; + T4f dotCurCur = sqrDistance - curRadius * curRadius; - Simd4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev; - Simd4f sqrtD = sqrt(discriminant); - Simd4f halfB = dotPrevCur - dotPrevPrev; - Simd4f minusA = dotPrevCur - dotCurCur + halfB; + T4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev; + T4f sqrtD = sqrt(discriminant); + T4f halfB = dotPrevCur - dotPrevPrev; + T4f minusA = dotPrevCur - dotCurCur + halfB; // time of impact or 0 if prevPos inside sphere - Simd4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD); - Simd4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD); + T4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD); + T4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD); // skip continuous collision if the (un-clamped) particle // trajectory only touches the outer skin of the cone. - Simd4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius); + T4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius); collisionMask = collisionMask & (discriminant > minusA * rMin * rMin * sSkeletonWidth); // a is negative when one sphere is contained in the other, // which is already handled by discrete collision. - collisionMask = collisionMask & (minusA < -static_cast(gSimd4fEpsilon)); + collisionMask = collisionMask & (minusA < -static_cast(gSimd4fEpsilon)); if (!allEqual(collisionMask, gSimd4fZero)) { - Simd4f deltaX = prevX - curX; - Simd4f deltaY = prevY - curY; - Simd4f deltaZ = prevZ - curZ; + T4f deltaX = prevX - curX; + T4f deltaY = prevY - curY; + T4f deltaZ = prevZ - curZ; - Simd4f oneMinusToi = (gSimd4fOne - toi) & collisionMask; + T4f oneMinusToi = (gSimd4fOne - toi) & collisionMask; // reduce ccd impulse if (clamped) particle trajectory stays in sphere skin, // i.e. scale by exp2(-k) or 1/(1+k) with k = (tmin - toi) / (1 - toi) - Simd4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon); + T4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon); oneMinusToi = oneMinusToi * recip(gSimd4fOne - minusK); curX = curX + deltaX * oneMinusToi; @@ -906,9 +906,9 @@ FORCE_INLINE void cloth::SwCollision::collideSpheres(const Simd4i& spher sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ; } - Simd4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * curRadius; + T4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * curRadius; - Simd4f contactMask; + T4f contactMask; if (!anyGreater(gSimd4fZero, negativeScale, contactMask)) continue; @@ -916,15 +916,15 @@ FORCE_INLINE void cloth::SwCollision::collideSpheres(const Simd4i& spher if (frictionEnabled) { - Simd4f velocity = curSphere - prevSphere; + T4f velocity = curSphere - prevSphere; accum.addVelocity(splat<0>(velocity), splat<1>(velocity), splat<2>(velocity), contactMask); } } } -template -FORCE_INLINE typename cloth::SwCollision::Simd4i -cloth::SwCollision::collideCones(const Simd4f* __restrict prevPos, Simd4f* __restrict curPos, +template +FORCE_INLINE typename cloth::SwCollision::T4i +cloth::SwCollision::collideCones(const T4f* __restrict prevPos, T4f* __restrict curPos, ImpulseAccumulator& accum) const { const float* __restrict prevCenterPtr = array(mPrevData.mCones->center); @@ -938,7 +938,7 @@ cloth::SwCollision::collideCones(const Simd4f* __restrict prevPos, Simd4 bool frictionEnabled = mClothData.mFrictionScale > 0.0f; ShapeMask shapeMask = getShapeMask(prevPos, curPos); - Simd4i mask4 = horizontalOr(shapeMask.mCones); + T4i mask4 = horizontalOr(shapeMask.mCones); uint32_t mask = uint32_t(array(mask4)[0]); while (mask) { @@ -947,106 +947,106 @@ cloth::SwCollision::collideCones(const Simd4f* __restrict prevPos, Simd4 uint32_t offset = coneIndex * sizeof(ConeData); mask = mask & test; - Simd4i test4 = mask4 - gSimd4iOne; - Simd4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4)); + T4i test4 = mask4 - gSimd4iOne; + T4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4)); mask4 = mask4 & test4; - Simd4f prevCenter = loadAligned(prevCenterPtr, offset); - Simd4f prevAxis = loadAligned(prevAxisPtr, offset); - Simd4f prevAxisX = splat<0>(prevAxis); - Simd4f prevAxisY = splat<1>(prevAxis); - Simd4f prevAxisZ = splat<2>(prevAxis); - Simd4f prevSlope = splat<3>(prevAxis); - - Simd4f prevX = prevPos[0] - splat<0>(prevCenter); - Simd4f prevY = prevPos[1] - splat<1>(prevCenter); - Simd4f prevZ = prevPos[2] - splat<2>(prevCenter); - Simd4f prevT = prevY * prevAxisZ - prevZ * prevAxisY; - Simd4f prevU = prevZ * prevAxisX - prevX * prevAxisZ; - Simd4f prevV = prevX * prevAxisY - prevY * prevAxisX; - Simd4f prevDot = prevX * prevAxisX + prevY * prevAxisY + prevZ * prevAxisZ; - Simd4f prevRadius = prevDot * prevSlope + splat<3>(prevCenter); - - Simd4f curCenter = loadAligned(curCenterPtr, offset); - Simd4f curAxis = loadAligned(curAxisPtr, offset); - Simd4f curAxisX = splat<0>(curAxis); - Simd4f curAxisY = splat<1>(curAxis); - Simd4f curAxisZ = splat<2>(curAxis); - Simd4f curSlope = splat<3>(curAxis); - Simd4i curAuxiliary = loadAligned(curAuxiliaryPtr, offset); - - Simd4f curX = curPos[0] - splat<0>(curCenter); - Simd4f curY = curPos[1] - splat<1>(curCenter); - Simd4f curZ = curPos[2] - splat<2>(curCenter); - Simd4f curT = curY * curAxisZ - curZ * curAxisY; - Simd4f curU = curZ * curAxisX - curX * curAxisZ; - Simd4f curV = curX * curAxisY - curY * curAxisX; - Simd4f curDot = curX * curAxisX + curY * curAxisY + curZ * curAxisZ; - Simd4f curRadius = curDot * curSlope + splat<3>(curCenter); - - Simd4f curSqrDistance = gSimd4fEpsilon + curT * curT + curU * curU + curV * curV; + T4f prevCenter = loadAligned(prevCenterPtr, offset); + T4f prevAxis = loadAligned(prevAxisPtr, offset); + T4f prevAxisX = splat<0>(prevAxis); + T4f prevAxisY = splat<1>(prevAxis); + T4f prevAxisZ = splat<2>(prevAxis); + T4f prevSlope = splat<3>(prevAxis); + + T4f prevX = prevPos[0] - splat<0>(prevCenter); + T4f prevY = prevPos[1] - splat<1>(prevCenter); + T4f prevZ = prevPos[2] - splat<2>(prevCenter); + T4f prevT = prevY * prevAxisZ - prevZ * prevAxisY; + T4f prevU = prevZ * prevAxisX - prevX * prevAxisZ; + T4f prevV = prevX * prevAxisY - prevY * prevAxisX; + T4f prevDot = prevX * prevAxisX + prevY * prevAxisY + prevZ * prevAxisZ; + T4f prevRadius = prevDot * prevSlope + splat<3>(prevCenter); + + T4f curCenter = loadAligned(curCenterPtr, offset); + T4f curAxis = loadAligned(curAxisPtr, offset); + T4f curAxisX = splat<0>(curAxis); + T4f curAxisY = splat<1>(curAxis); + T4f curAxisZ = splat<2>(curAxis); + T4f curSlope = splat<3>(curAxis); + T4i curAuxiliary = loadAligned(curAuxiliaryPtr, offset); + + T4f curX = curPos[0] - splat<0>(curCenter); + T4f curY = curPos[1] - splat<1>(curCenter); + T4f curZ = curPos[2] - splat<2>(curCenter); + T4f curT = curY * curAxisZ - curZ * curAxisY; + T4f curU = curZ * curAxisX - curX * curAxisZ; + T4f curV = curX * curAxisY - curY * curAxisX; + T4f curDot = curX * curAxisX + curY * curAxisY + curZ * curAxisZ; + T4f curRadius = curDot * curSlope + splat<3>(curCenter); + + T4f curSqrDistance = gSimd4fEpsilon + curT * curT + curU * curU + curV * curV; // set radius to zero if cone is culled prevRadius = max(prevRadius, gSimd4fZero) & ~culled; curRadius = max(curRadius, gSimd4fZero) & ~culled; - Simd4f dotPrevPrev = prevT * prevT + prevU * prevU + prevV * prevV - prevRadius * prevRadius; - Simd4f dotPrevCur = prevT * curT + prevU * curU + prevV * curV - prevRadius * curRadius; - Simd4f dotCurCur = curSqrDistance - curRadius * curRadius; + T4f dotPrevPrev = prevT * prevT + prevU * prevU + prevV * prevV - prevRadius * prevRadius; + T4f dotPrevCur = prevT * curT + prevU * curU + prevV * curV - prevRadius * curRadius; + T4f dotCurCur = curSqrDistance - curRadius * curRadius; - Simd4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev; - Simd4f sqrtD = sqrt(discriminant); - Simd4f halfB = dotPrevCur - dotPrevPrev; - Simd4f minusA = dotPrevCur - dotCurCur + halfB; + T4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev; + T4f sqrtD = sqrt(discriminant); + T4f halfB = dotPrevCur - dotPrevPrev; + T4f minusA = dotPrevCur - dotCurCur + halfB; // time of impact or 0 if prevPos inside cone - Simd4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD); - Simd4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD); + T4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD); + T4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD); // skip continuous collision if the (un-clamped) particle // trajectory only touches the outer skin of the cone. - Simd4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius); + T4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius); collisionMask = collisionMask & (discriminant > minusA * rMin * rMin * sSkeletonWidth); // a is negative when one cone is contained in the other, // which is already handled by discrete collision. - collisionMask = collisionMask & (minusA < -static_cast(gSimd4fEpsilon)); + collisionMask = collisionMask & (minusA < -static_cast(gSimd4fEpsilon)); // test if any particle hits infinite cone (and 0