diff options
| author | Marijn Tamis <[email protected]> | 2017-04-28 14:19:07 +0200 |
|---|---|---|
| committer | Marijn Tamis <[email protected]> | 2017-04-28 14:19:07 +0200 |
| commit | b350eb5f4d44e8448115796144375d79438d74ae (patch) | |
| tree | 8e102e8c28f45a1b87bd335ceee4f33c3d4ee7c2 /NvCloth/src/SwCollision.cpp | |
| parent | Add visual samples. (diff) | |
| download | nvcloth-b350eb5f4d44e8448115796144375d79438d74ae.tar.xz nvcloth-b350eb5f4d44e8448115796144375d79438d74ae.zip | |
NvCloth 1.1.0 Release. (22041545)
Diffstat (limited to 'NvCloth/src/SwCollision.cpp')
| -rw-r--r-- | NvCloth/src/SwCollision.cpp | 990 |
1 files changed, 495 insertions, 495 deletions
diff --git a/NvCloth/src/SwCollision.cpp b/NvCloth/src/SwCollision.cpp index 3774795..89df8a5 100644 --- a/NvCloth/src/SwCollision.cpp +++ b/NvCloth/src/SwCollision.cpp @@ -42,8 +42,8 @@ using namespace nv; using namespace physx; // the particle trajectory needs to penetrate more than 0.2 * radius to trigger continuous collision -template <typename Simd4f> -const Simd4f cloth::SwCollision<Simd4f>::sSkeletonWidth = simd4f(cloth::sqr(1 - 0.2f) - 1); +template <typename T4f> +const T4f cloth::SwCollision<T4f>::sSkeletonWidth = simd4f(cloth::sqr(1 - 0.2f) - 1); #if NV_SIMD_SSE2 const Simd4i cloth::Gather<Simd4i>::sIntSignBit = simd4i(0x80000000); @@ -66,8 +66,8 @@ const Simd4fScalarFactory sGridExpand = simd4f(1e-4f); const Simd4fTupleFactory sMinusFloatMaxXYZ = simd4f(-FLT_MAX, -FLT_MAX, -FLT_MAX, 0.0f); #if PX_PROFILE || PX_DEBUG -template <typename Simd4f> -uint32_t horizontalSum(const Simd4f& x) +template <typename T4f> +uint32_t horizontalSum(const T4f& x) { const float* p = array(x); return uint32_t(0.5f + p[0] + p[1] + p[2] + p[3]); @@ -75,8 +75,8 @@ uint32_t horizontalSum(const Simd4f& x) #endif // 7 elements are written to ptr! -template <typename Simd4f> -void storeBounds(float* ptr, const cloth::BoundingBox<Simd4f>& bounds) +template <typename T4f> +void storeBounds(float* ptr, const cloth::BoundingBox<T4f>& bounds) { store(ptr, bounds.mLower); store(ptr + 3, bounds.mUpper); @@ -128,14 +128,14 @@ namespace nv { namespace cloth { -template <typename Simd4f> -BoundingBox<Simd4f> expandBounds(const BoundingBox<Simd4f>& bbox, const SphereData* sIt, const SphereData* sEnd) +template <typename T4f> +BoundingBox<T4f> expandBounds(const BoundingBox<T4f>& bbox, const SphereData* sIt, const SphereData* sEnd) { - BoundingBox<Simd4f> result = bbox; + BoundingBox<T4f> result = bbox; for (; sIt != sEnd; ++sIt) { - Simd4f p = loadAligned(array(sIt->center)); - Simd4f r = splat<3>(p); + T4f p = loadAligned(array(sIt->center)); + T4f r = splat<3>(p); result.mLower = min(result.mLower, p - r); result.mUpper = max(result.mUpper, p + r); } @@ -146,8 +146,8 @@ BoundingBox<Simd4f> expandBounds(const BoundingBox<Simd4f>& bbox, const SphereDa namespace { -template <typename Simd4f, typename SrcIterator> -void generateSpheres(Simd4f* dIt, const SrcIterator& src, uint32_t count) +template <typename T4f, typename SrcIterator> +void generateSpheres(T4f* dIt, const SrcIterator& src, uint32_t count) { // have to copy out iterator to ensure alignment is maintained for (SrcIterator sIt = src; 0 < count--; ++sIt, ++dIt) @@ -192,41 +192,41 @@ void generateCones(cloth::ConeData* dst, const cloth::SphereData* sourceSpheres, } } -template <typename Simd4f, typename SrcIterator> -void generatePlanes(Simd4f* dIt, const SrcIterator& src, uint32_t count) +template <typename T4f, typename SrcIterator> +void generatePlanes(T4f* dIt, const SrcIterator& src, uint32_t count) { // have to copy out iterator to ensure alignment is maintained for (SrcIterator sIt = src; 0 < count--; ++sIt, ++dIt) *dIt = *sIt; } -template <typename Simd4f, typename SrcIterator> +template <typename T4f, typename SrcIterator> void generateTriangles(cloth::TriangleData* dIt, const SrcIterator& src, uint32_t count) { // have to copy out iterator to ensure alignment is maintained for (SrcIterator sIt = src; 0 < count--; ++dIt) { - Simd4f p0 = *sIt; + T4f p0 = *sIt; ++sIt; - Simd4f p1 = *sIt; + T4f p1 = *sIt; ++sIt; - Simd4f p2 = *sIt; + T4f p2 = *sIt; ++sIt; - Simd4f edge0 = p1 - p0; - Simd4f edge1 = p2 - p0; - Simd4f normal = cross3(edge0, edge1); + T4f edge0 = p1 - p0; + T4f edge1 = p2 - p0; + T4f normal = cross3(edge0, edge1); - Simd4f edge0SqrLength = dot3(edge0, edge0); - Simd4f edge1SqrLength = dot3(edge1, edge1); - Simd4f edge0DotEdge1 = dot3(edge0, edge1); - Simd4f normalInvLength = rsqrt(dot3(normal, normal)); + T4f edge0SqrLength = dot3(edge0, edge0); + T4f edge1SqrLength = dot3(edge1, edge1); + T4f edge0DotEdge1 = dot3(edge0, edge1); + T4f normalInvLength = rsqrt(dot3(normal, normal)); - Simd4f det = edge0SqrLength * edge1SqrLength - edge0DotEdge1 * edge0DotEdge1; - Simd4f denom = edge0SqrLength + edge1SqrLength - edge0DotEdge1 - edge0DotEdge1; + T4f det = edge0SqrLength * edge1SqrLength - edge0DotEdge1 * edge0DotEdge1; + T4f denom = edge0SqrLength + edge1SqrLength - edge0DotEdge1 - edge0DotEdge1; // there are definitely faster ways... - Simd4f aux = select(sMaskX, det, denom); + T4f aux = select(sMaskX, det, denom); aux = select(sMaskZ, edge0SqrLength, aux); aux = select(sMaskW, edge1SqrLength, aux); @@ -240,14 +240,14 @@ void generateTriangles(cloth::TriangleData* dIt, const SrcIterator& src, uint32_ } // namespace -template <typename Simd4f> -cloth::SwCollision<Simd4f>::CollisionData::CollisionData() +template <typename T4f> +cloth::SwCollision<T4f>::CollisionData::CollisionData() : mSpheres(0), mCones(0) { } -template <typename Simd4f> -cloth::SwCollision<Simd4f>::SwCollision(SwClothData& clothData, SwKernelAllocator& alloc) +template <typename T4f> +cloth::SwCollision<T4f>::SwCollision(SwClothData& clothData, SwKernelAllocator& alloc) : mClothData(clothData), mAllocator(alloc) { allocate(mCurData); @@ -256,22 +256,22 @@ cloth::SwCollision<Simd4f>::SwCollision(SwClothData& clothData, SwKernelAllocato { allocate(mPrevData); - generateSpheres(reinterpret_cast<Simd4f*>(mPrevData.mSpheres), - reinterpret_cast<const Simd4f*>(clothData.mStartCollisionSpheres), clothData.mNumSpheres); + generateSpheres(reinterpret_cast<T4f*>(mPrevData.mSpheres), + reinterpret_cast<const T4f*>(clothData.mStartCollisionSpheres), clothData.mNumSpheres); generateCones(mPrevData.mCones, mPrevData.mSpheres, clothData.mCapsuleIndices, clothData.mNumCapsules); } } -template <typename Simd4f> -cloth::SwCollision<Simd4f>::~SwCollision() +template <typename T4f> +cloth::SwCollision<T4f>::~SwCollision() { deallocate(mCurData); deallocate(mPrevData); } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::operator()(const IterationState<Simd4f>& state) +template <typename T4f> +void cloth::SwCollision<T4f>::operator()(const IterationState<T4f>& state) { mNumCollisions = 0; @@ -285,20 +285,20 @@ void cloth::SwCollision<Simd4f>::operator()(const IterationState<Simd4f>& state) bool lastIteration = state.mRemainingIterations == 1; - const Simd4f* targetSpheres = reinterpret_cast<const Simd4f*>(mClothData.mTargetCollisionSpheres); + const T4f* targetSpheres = reinterpret_cast<const T4f*>(mClothData.mTargetCollisionSpheres); // generate sphere and cone collision data if (!lastIteration) { // interpolate spheres - LerpIterator<Simd4f, const Simd4f*> pIter(reinterpret_cast<const Simd4f*>(mClothData.mStartCollisionSpheres), + LerpIterator<T4f, const T4f*> pIter(reinterpret_cast<const T4f*>(mClothData.mStartCollisionSpheres), targetSpheres, state.getCurrentAlpha()); - generateSpheres(reinterpret_cast<Simd4f*>(mCurData.mSpheres), pIter, mClothData.mNumSpheres); + generateSpheres(reinterpret_cast<T4f*>(mCurData.mSpheres), pIter, mClothData.mNumSpheres); } else { // otherwise use the target spheres directly - generateSpheres(reinterpret_cast<Simd4f*>(mCurData.mSpheres), targetSpheres, mClothData.mNumSpheres); + generateSpheres(reinterpret_cast<T4f*>(mCurData.mSpheres), targetSpheres, mClothData.mNumSpheres); } // generate cones even if test below fails because @@ -323,8 +323,8 @@ void cloth::SwCollision<Simd4f>::operator()(const IterationState<Simd4f>& state) shdfnd::swap(mCurData, mPrevData); } -template <typename Simd4f> -size_t cloth::SwCollision<Simd4f>::estimateTemporaryMemory(const SwCloth& cloth) +template <typename T4f> +size_t cloth::SwCollision<T4f>::estimateTemporaryMemory(const SwCloth& cloth) { size_t numTriangles = cloth.mStartCollisionTriangles.size(); size_t numPlanes = cloth.mStartCollisionPlanes.size(); @@ -335,8 +335,8 @@ size_t cloth::SwCollision<Simd4f>::estimateTemporaryMemory(const SwCloth& cloth) return std::max(kTriangleDataSize, kPlaneDataSize); } -template <typename Simd4f> -size_t cloth::SwCollision<Simd4f>::estimatePersistentMemory(const SwCloth& cloth) +template <typename T4f> +size_t cloth::SwCollision<T4f>::estimatePersistentMemory(const SwCloth& cloth) { size_t numCapsules = cloth.mCapsuleIndices.size(); size_t numSpheres = cloth.mStartCollisionSpheres.size(); @@ -347,74 +347,74 @@ size_t cloth::SwCollision<Simd4f>::estimatePersistentMemory(const SwCloth& cloth return sphereDataSize + coneDataSize; } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::allocate(CollisionData& data) +template <typename T4f> +void cloth::SwCollision<T4f>::allocate(CollisionData& data) { data.mSpheres = static_cast<SphereData*>(mAllocator.allocate(sizeof(SphereData) * mClothData.mNumSpheres)); data.mCones = static_cast<ConeData*>(mAllocator.allocate(sizeof(ConeData) * mClothData.mNumCapsules)); } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::deallocate(const CollisionData& data) +template <typename T4f> +void cloth::SwCollision<T4f>::deallocate(const CollisionData& data) { mAllocator.deallocate(data.mSpheres); mAllocator.deallocate(data.mCones); } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::computeBounds() +template <typename T4f> +void cloth::SwCollision<T4f>::computeBounds() { NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::computeBounds", /*ProfileContext::None*/ 0); - Simd4f* prevIt = reinterpret_cast<Simd4f*>(mClothData.mPrevParticles); - Simd4f* curIt = reinterpret_cast<Simd4f*>(mClothData.mCurParticles); - Simd4f* curEnd = curIt + mClothData.mNumParticles; - Simd4f floatMaxXYZ = -static_cast<Simd4f>(sMinusFloatMaxXYZ); + T4f* prevIt = reinterpret_cast<T4f*>(mClothData.mPrevParticles); + T4f* curIt = reinterpret_cast<T4f*>(mClothData.mCurParticles); + T4f* curEnd = curIt + mClothData.mNumParticles; + T4f floatMaxXYZ = -static_cast<T4f>(sMinusFloatMaxXYZ); - Simd4f lower = simd4f(FLT_MAX), upper = -lower; + T4f lower = simd4f(FLT_MAX), upper = -lower; for (; curIt < curEnd; ++curIt, ++prevIt) { - Simd4f current = *curIt; + T4f current = *curIt; lower = min(lower, current); upper = max(upper, current); // if (current.w > 0) current.w = previous.w *curIt = select(current > floatMaxXYZ, *prevIt, current); } - BoundingBox<Simd4f> curBounds; + BoundingBox<T4f> curBounds; curBounds.mLower = lower; curBounds.mUpper = upper; // don't change this order, storeBounds writes 7 floats - BoundingBox<Simd4f> prevBounds = loadBounds<Simd4f>(mClothData.mCurBounds); + BoundingBox<T4f> prevBounds = loadBounds<T4f>(mClothData.mCurBounds); storeBounds(mClothData.mCurBounds, curBounds); storeBounds(mClothData.mPrevBounds, prevBounds); } namespace { -template <typename Simd4i> -Simd4i andNotIsZero(const Simd4i& left, const Simd4i& right) +template <typename T4i> +T4i andNotIsZero(const T4i& left, const T4i& right) { return (left & ~right) == gSimd4iZero; } } // build per-axis mask arrays of spheres on the right/left of grid cell -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::buildSphereAcceleration(const SphereData* sIt) +template <typename T4f> +void cloth::SwCollision<T4f>::buildSphereAcceleration(const SphereData* sIt) { static const int maxIndex = sGridSize - 1; const SphereData* sEnd = sIt + mClothData.mNumSpheres; for (uint32_t mask = 0x1; sIt != sEnd; ++sIt, mask <<= 1) { - Simd4f sphere = loadAligned(array(sIt->center)); - Simd4f radius = splat<3>(sphere); + T4f sphere = loadAligned(array(sIt->center)); + T4f radius = splat<3>(sphere); - Simd4i first = intFloor(max((sphere - radius) * mGridScale + mGridBias, gSimd4fZero)); - Simd4i last = intFloor(min((sphere + radius) * mGridScale + mGridBias, sGridLength)); + T4i first = intFloor(max((sphere - radius) * mGridScale + mGridBias, gSimd4fZero)); + T4i last = intFloor(min((sphere + radius) * mGridScale + mGridBias, sGridLength)); const int* firstIdx = array(first); const int* lastIdx = array(last); @@ -434,8 +434,8 @@ void cloth::SwCollision<Simd4f>::buildSphereAcceleration(const SphereData* sIt) } // generate cone masks from sphere masks -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::buildConeAcceleration() +template <typename T4f> +void cloth::SwCollision<T4f>::buildConeAcceleration() { const ConeData* coneIt = mCurData.mCones; const ConeData* coneEnd = coneIt + mClothData.mNumCapsules; @@ -456,8 +456,8 @@ void cloth::SwCollision<Simd4f>::buildConeAcceleration() } // convert right/left mask arrays into single overlap array -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::mergeAcceleration(uint32_t* firstIt) +template <typename T4f> +void cloth::SwCollision<T4f>::mergeAcceleration(uint32_t* firstIt) { uint32_t* firstEnd = firstIt + 3 * sGridSize; uint32_t* lastIt = firstEnd; @@ -466,28 +466,28 @@ void cloth::SwCollision<Simd4f>::mergeAcceleration(uint32_t* firstIt) } // build mask of spheres/cones touching a regular grid along each axis -template <typename Simd4f> -bool cloth::SwCollision<Simd4f>::buildAcceleration() +template <typename T4f> +bool cloth::SwCollision<T4f>::buildAcceleration() { // determine sphere bbox - BoundingBox<Simd4f> sphereBounds = - expandBounds(emptyBounds<Simd4f>(), mCurData.mSpheres, mCurData.mSpheres + mClothData.mNumSpheres); - BoundingBox<Simd4f> particleBounds = loadBounds<Simd4f>(mClothData.mCurBounds); + BoundingBox<T4f> sphereBounds = + expandBounds(emptyBounds<T4f>(), mCurData.mSpheres, mCurData.mSpheres + mClothData.mNumSpheres); + BoundingBox<T4f> particleBounds = loadBounds<T4f>(mClothData.mCurBounds); if (mClothData.mEnableContinuousCollision) { sphereBounds = expandBounds(sphereBounds, mPrevData.mSpheres, mPrevData.mSpheres + mClothData.mNumSpheres); - particleBounds = expandBounds(particleBounds, loadBounds<Simd4f>(mClothData.mPrevBounds)); + particleBounds = expandBounds(particleBounds, loadBounds<T4f>(mClothData.mPrevBounds)); } - BoundingBox<Simd4f> bounds = intersectBounds(sphereBounds, particleBounds); - Simd4f edgeLength = (bounds.mUpper - bounds.mLower) & ~static_cast<Simd4f>(sMaskW); + BoundingBox<T4f> bounds = intersectBounds(sphereBounds, particleBounds); + T4f edgeLength = (bounds.mUpper - bounds.mLower) & ~static_cast<T4f>(sMaskW); if (!allGreaterEqual(edgeLength, gSimd4fZero)) return false; // calculate an expanded bounds to account for numerical inaccuracy - const Simd4f expandedLower = bounds.mLower - abs(bounds.mLower) * sGridExpand; - const Simd4f expandedUpper = bounds.mUpper + abs(bounds.mUpper) * sGridExpand; - const Simd4f expandedEdgeLength = max(expandedUpper - expandedLower, gSimd4fEpsilon); + const T4f expandedLower = bounds.mLower - abs(bounds.mLower) * sGridExpand; + const T4f expandedUpper = bounds.mUpper + abs(bounds.mUpper) * sGridExpand; + const T4f expandedEdgeLength = max(expandedUpper - expandedLower, gSimd4fEpsilon); // make grid minimal thickness and strict upper bound of spheres mGridScale = sGridLength * recip<1>(expandedEdgeLength); @@ -514,8 +514,8 @@ bool cloth::SwCollision<Simd4f>::buildAcceleration() #define FORCE_INLINE inline __attribute__((always_inline)) #endif -template <typename Simd4f> -FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask& cloth::SwCollision<Simd4f>::ShapeMask:: +template <typename T4f> +FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask& cloth::SwCollision<T4f>::ShapeMask:: operator = (const ShapeMask& right) { mCones = right.mCones; @@ -523,8 +523,8 @@ operator = (const ShapeMask& right) return *this; } -template <typename Simd4f> -FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask& cloth::SwCollision<Simd4f>::ShapeMask:: +template <typename T4f> +FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask& cloth::SwCollision<T4f>::ShapeMask:: operator &= (const ShapeMask& right) { mCones = mCones & right.mCones; @@ -532,12 +532,12 @@ operator &= (const ShapeMask& right) return *this; } -template <typename Simd4f> -FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask -cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f& position, const Simd4i* __restrict sphereGrid, - const Simd4i* __restrict coneGrid) +template <typename T4f> +FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask +cloth::SwCollision<T4f>::getShapeMask(const T4f& position, const T4i* __restrict sphereGrid, + const T4i* __restrict coneGrid) { - Gather<Simd4i> gather(intFloor(position)); + Gather<T4i> gather(intFloor(position)); ShapeMask result; result.mCones = gather(coneGrid); @@ -546,13 +546,13 @@ cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f& position, const Simd4i* _ } // lookup acceleration structure and return mask of potential intersectors -template <typename Simd4f> -FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask -cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f* __restrict positions) const +template <typename T4f> +FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask +cloth::SwCollision<T4f>::getShapeMask(const T4f* __restrict positions) const { - Simd4f posX = positions[0] * splat<0>(mGridScale) + splat<0>(mGridBias); - Simd4f posY = positions[1] * splat<1>(mGridScale) + splat<1>(mGridBias); - Simd4f posZ = positions[2] * splat<2>(mGridScale) + splat<2>(mGridBias); + T4f posX = positions[0] * splat<0>(mGridScale) + splat<0>(mGridBias); + T4f posY = positions[1] * splat<1>(mGridScale) + splat<1>(mGridBias); + T4f posZ = positions[2] * splat<2>(mGridScale) + splat<2>(mGridBias); ShapeMask result = getShapeMask(posX, mSphereGrid, mConeGrid); result &= getShapeMask(posY, mSphereGrid + 2, mConeGrid + 2); @@ -562,38 +562,38 @@ cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f* __restrict positions) con } // lookup acceleration structure and return mask of potential intersectors -template <typename Simd4f> -FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask -cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f* __restrict prevPos, const Simd4f* __restrict curPos) const +template <typename T4f> +FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask +cloth::SwCollision<T4f>::getShapeMask(const T4f* __restrict prevPos, const T4f* __restrict curPos) const { - Simd4f scaleX = splat<0>(mGridScale); - Simd4f scaleY = splat<1>(mGridScale); - Simd4f scaleZ = splat<2>(mGridScale); + T4f scaleX = splat<0>(mGridScale); + T4f scaleY = splat<1>(mGridScale); + T4f scaleZ = splat<2>(mGridScale); - Simd4f biasX = splat<0>(mGridBias); - Simd4f biasY = splat<1>(mGridBias); - Simd4f biasZ = splat<2>(mGridBias); + T4f biasX = splat<0>(mGridBias); + T4f biasY = splat<1>(mGridBias); + T4f biasZ = splat<2>(mGridBias); - Simd4f prevX = prevPos[0] * scaleX + biasX; - Simd4f prevY = prevPos[1] * scaleY + biasY; - Simd4f prevZ = prevPos[2] * scaleZ + biasZ; + T4f prevX = prevPos[0] * scaleX + biasX; + T4f prevY = prevPos[1] * scaleY + biasY; + T4f prevZ = prevPos[2] * scaleZ + biasZ; - Simd4f curX = curPos[0] * scaleX + biasX; - Simd4f curY = curPos[1] * scaleY + biasY; - Simd4f curZ = curPos[2] * scaleZ + biasZ; + T4f curX = curPos[0] * scaleX + biasX; + T4f curY = curPos[1] * scaleY + biasY; + T4f curZ = curPos[2] * scaleZ + biasZ; - Simd4f maxX = min(max(prevX, curX), sGridLength); - Simd4f maxY = min(max(prevY, curY), sGridLength); - Simd4f maxZ = min(max(prevZ, curZ), sGridLength); + T4f maxX = min(max(prevX, curX), sGridLength); + T4f maxY = min(max(prevY, curY), sGridLength); + T4f maxZ = min(max(prevZ, curZ), sGridLength); ShapeMask result = getShapeMask(maxX, mSphereGrid, mConeGrid); result &= getShapeMask(maxY, mSphereGrid + 2, mConeGrid + 2); result &= getShapeMask(maxZ, mSphereGrid + 4, mConeGrid + 4); - Simd4f zero = gSimd4fZero; - Simd4f minX = max(min(prevX, curX), zero); - Simd4f minY = max(min(prevY, curY), zero); - Simd4f minZ = max(min(prevZ, curZ), zero); + T4f zero = gSimd4fZero; + T4f minX = max(min(prevX, curX), zero); + T4f minY = max(min(prevY, curY), zero); + T4f minZ = max(min(prevZ, curZ), zero); result &= getShapeMask(minX, mSphereGrid + 6, mConeGrid + 6); result &= getShapeMask(minY, mSphereGrid + 8, mConeGrid + 8); @@ -602,8 +602,8 @@ cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f* __restrict prevPos, const return result; } -template <typename Simd4f> -struct cloth::SwCollision<Simd4f>::ImpulseAccumulator +template <typename T4f> +struct cloth::SwCollision<T4f>::ImpulseAccumulator { ImpulseAccumulator() : mDeltaX(gSimd4fZero) @@ -616,21 +616,21 @@ struct cloth::SwCollision<Simd4f>::ImpulseAccumulator { } - void add(const Simd4f& x, const Simd4f& y, const Simd4f& z, const Simd4f& scale, const Simd4f& mask) + void add(const T4f& x, const T4f& y, const T4f& z, const T4f& scale, const T4f& mask) { NV_CLOTH_ASSERT(allTrue((mask & x) == (mask & x))); NV_CLOTH_ASSERT(allTrue((mask & y) == (mask & y))); NV_CLOTH_ASSERT(allTrue((mask & z) == (mask & z))); NV_CLOTH_ASSERT(allTrue((mask & scale) == (mask & scale))); - Simd4f maskedScale = scale & mask; + T4f maskedScale = scale & mask; mDeltaX = mDeltaX + x * maskedScale; mDeltaY = mDeltaY + y * maskedScale; mDeltaZ = mDeltaZ + z * maskedScale; mNumCollisions = mNumCollisions + (gSimd4fOne & mask); } - void addVelocity(const Simd4f& vx, const Simd4f& vy, const Simd4f& vz, const Simd4f& mask) + void addVelocity(const T4f& vx, const T4f& vy, const T4f& vz, const T4f& mask) { NV_CLOTH_ASSERT(allTrue((mask & vx) == (mask & vx))); NV_CLOTH_ASSERT(allTrue((mask & vy) == (mask & vy))); @@ -641,34 +641,34 @@ struct cloth::SwCollision<Simd4f>::ImpulseAccumulator mVelZ = mVelZ + (vz & mask); } - void subtract(const Simd4f& x, const Simd4f& y, const Simd4f& z, const Simd4f& scale, const Simd4f& mask) + void subtract(const T4f& x, const T4f& y, const T4f& z, const T4f& scale, const T4f& mask) { NV_CLOTH_ASSERT(allTrue((mask & x) == (mask & x))); NV_CLOTH_ASSERT(allTrue((mask & y) == (mask & y))); NV_CLOTH_ASSERT(allTrue((mask & z) == (mask & z))); NV_CLOTH_ASSERT(allTrue((mask & scale) == (mask & scale))); - Simd4f maskedScale = scale & mask; + T4f maskedScale = scale & mask; mDeltaX = mDeltaX - x * maskedScale; mDeltaY = mDeltaY - y * maskedScale; mDeltaZ = mDeltaZ - z * maskedScale; mNumCollisions = mNumCollisions + (gSimd4fOne & mask); } - Simd4f mDeltaX, mDeltaY, mDeltaZ; - Simd4f mVelX, mVelY, mVelZ; - Simd4f mNumCollisions; + T4f mDeltaX, mDeltaY, mDeltaZ; + T4f mVelX, mVelY, mVelZ; + T4f mNumCollisions; }; -template <typename Simd4f> -FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& sphereMask, const Simd4f* positions, +template <typename T4f> +FORCE_INLINE void cloth::SwCollision<T4f>::collideSpheres(const T4i& sphereMask, const T4f* positions, ImpulseAccumulator& accum) const { const float* __restrict spherePtr = array(mCurData.mSpheres->center); bool frictionEnabled = mClothData.mFrictionScale > 0.0f; - Simd4i mask4 = horizontalOr(sphereMask); + T4i mask4 = horizontalOr(sphereMask); uint32_t mask = uint32_t(array(mask4)[0]); while (mask) { @@ -676,16 +676,16 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher uint32_t offset = findBitSet(mask & ~test) * sizeof(SphereData); mask = mask & test; - Simd4f sphere = loadAligned(spherePtr, offset); + T4f sphere = loadAligned(spherePtr, offset); - Simd4f deltaX = positions[0] - splat<0>(sphere); - Simd4f deltaY = positions[1] - splat<1>(sphere); - Simd4f deltaZ = positions[2] - splat<2>(sphere); + T4f deltaX = positions[0] - splat<0>(sphere); + T4f deltaY = positions[1] - splat<1>(sphere); + T4f deltaZ = positions[2] - splat<2>(sphere); - Simd4f sqrDistance = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; - Simd4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * splat<3>(sphere); + T4f sqrDistance = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; + T4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * splat<3>(sphere); - Simd4f contactMask; + T4f contactMask; if (!anyGreater(gSimd4fZero, negativeScale, contactMask)) continue; @@ -696,17 +696,17 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher // load previous sphere pos const float* __restrict prevSpherePtr = array(mPrevData.mSpheres->center); - Simd4f prevSphere = loadAligned(prevSpherePtr, offset); - Simd4f velocity = sphere - prevSphere; + T4f prevSphere = loadAligned(prevSpherePtr, offset); + T4f velocity = sphere - prevSphere; accum.addVelocity(splat<0>(velocity), splat<1>(velocity), splat<2>(velocity), contactMask); } } } -template <typename Simd4f> -FORCE_INLINE typename cloth::SwCollision<Simd4f>::Simd4i -cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, ImpulseAccumulator& accum) const +template <typename T4f> +FORCE_INLINE typename cloth::SwCollision<T4f>::T4i +cloth::SwCollision<T4f>::collideCones(const T4f* __restrict positions, ImpulseAccumulator& accum) const { const float* __restrict centerPtr = array(mCurData.mCones->center); const float* __restrict axisPtr = array(mCurData.mCones->axis); @@ -715,7 +715,7 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp bool frictionEnabled = mClothData.mFrictionScale > 0.0f; ShapeMask shapeMask = getShapeMask(positions); - Simd4i mask4 = horizontalOr(shapeMask.mCones); + T4i mask4 = horizontalOr(shapeMask.mCones); uint32_t mask = uint32_t(array(mask4)[0]); while (mask) { @@ -724,35 +724,35 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp uint32_t offset = coneIndex * sizeof(ConeData); mask = mask & test; - Simd4i test4 = mask4 - gSimd4iOne; - Simd4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4)); + T4i test4 = mask4 - gSimd4iOne; + T4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4)); mask4 = mask4 & test4; - Simd4f center = loadAligned(centerPtr, offset); + T4f center = loadAligned(centerPtr, offset); - Simd4f deltaX = positions[0] - splat<0>(center); - Simd4f deltaY = positions[1] - splat<1>(center); - Simd4f deltaZ = positions[2] - splat<2>(center); + T4f deltaX = positions[0] - splat<0>(center); + T4f deltaY = positions[1] - splat<1>(center); + T4f deltaZ = positions[2] - splat<2>(center); - Simd4f axis = loadAligned(axisPtr, offset); + T4f axis = loadAligned(axisPtr, offset); - Simd4f axisX = splat<0>(axis); - Simd4f axisY = splat<1>(axis); - Simd4f axisZ = splat<2>(axis); - Simd4f slope = splat<3>(axis); + T4f axisX = splat<0>(axis); + T4f axisY = splat<1>(axis); + T4f axisZ = splat<2>(axis); + T4f slope = splat<3>(axis); - Simd4f dot = deltaX * axisX + deltaY * axisY + deltaZ * axisZ; - Simd4f radius = dot * slope + splat<3>(center); + T4f dot = deltaX * axisX + deltaY * axisY + deltaZ * axisZ; + T4f radius = dot * slope + splat<3>(center); // set radius to zero if cone is culled radius = max(radius, gSimd4fZero) & ~culled; - Simd4f sqrDistance = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ - dot * dot; + T4f sqrDistance = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ - dot * dot; - Simd4i auxiliary = loadAligned(auxiliaryPtr, offset); - Simd4i bothMask = splat<3>(auxiliary); + T4i auxiliary = loadAligned(auxiliaryPtr, offset); + T4i bothMask = splat<3>(auxiliary); - Simd4f contactMask; + T4f contactMask; if (!anyGreater(radius * radius, sqrDistance, contactMask)) { // cone only culled when spheres culled, ok to clear those too @@ -764,19 +764,19 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp // making sqrDistance negative when point lies on the cone axis sqrDistance = max(sqrDistance, gSimd4fEpsilon); - Simd4f invDistance = rsqrt(sqrDistance); - Simd4f base = dot + slope * sqrDistance * invDistance; + T4f invDistance = rsqrt(sqrDistance); + T4f base = dot + slope * sqrDistance * invDistance; // force left/rightMask to false if not inside cone base = base & contactMask; - Simd4f halfLength = splat<1>(simd4f(auxiliary)); - Simd4i leftMask = simd4i(base < -halfLength); - Simd4i rightMask = simd4i(base > halfLength); + T4f halfLength = splat<1>(simd4f(auxiliary)); + T4i leftMask = simd4i(base < -halfLength); + T4i rightMask = simd4i(base > halfLength); // we use both mask because of the early out above. - Simd4i firstMask = splat<2>(auxiliary); - Simd4i secondMask = firstMask ^ bothMask; + T4i firstMask = splat<2>(auxiliary); + T4i secondMask = firstMask ^ bothMask; shapeMask.mSpheres = shapeMask.mSpheres & ~(firstMask & ~leftMask); shapeMask.mSpheres = shapeMask.mSpheres & ~(secondMask & ~rightMask); @@ -784,8 +784,8 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp deltaY = deltaY - base * axisY; deltaZ = deltaZ - base * axisZ; - Simd4f sqrCosine = splat<0>(simd4f(auxiliary)); - Simd4f scale = radius * invDistance * sqrCosine - sqrCosine; + T4f sqrCosine = splat<0>(simd4f(auxiliary)); + T4f scale = radius * invDistance * sqrCosine - sqrCosine; contactMask = contactMask & ~simd4f(leftMask | rightMask); @@ -804,23 +804,23 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp // todo: could pre-compute sphere velocities or it might be // faster to compute cur/prev sphere positions directly - Simd4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData)); - Simd4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData)); + T4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData)); + T4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData)); - Simd4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData)); - Simd4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData)); + T4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData)); + T4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData)); - Simd4f v0 = s0p1 - s0p0; - Simd4f v1 = s1p1 - s1p0; - Simd4f vd = v1 - v0; + T4f v0 = s0p1 - s0p0; + T4f v1 = s1p1 - s1p0; + T4f vd = v1 - v0; // dot is in the range -1 to 1, scale and bias to 0 to 1 dot = dot * gSimd4fHalf + gSimd4fHalf; // interpolate velocity at contact points - Simd4f vx = splat<0>(v0) + dot * splat<0>(vd); - Simd4f vy = splat<1>(v0) + dot * splat<1>(vd); - Simd4f vz = splat<2>(v0) + dot * splat<2>(vd); + T4f vx = splat<0>(v0) + dot * splat<0>(vd); + T4f vy = splat<1>(v0) + dot * splat<1>(vd); + T4f vz = splat<2>(v0) + dot * splat<2>(vd); accum.addVelocity(vx, vy, vz, contactMask); } @@ -829,16 +829,16 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp return shapeMask.mSpheres; } -template <typename Simd4f> -FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& sphereMask, const Simd4f* __restrict prevPos, - Simd4f* __restrict curPos, ImpulseAccumulator& accum) const +template <typename T4f> +FORCE_INLINE void cloth::SwCollision<T4f>::collideSpheres(const T4i& sphereMask, const T4f* __restrict prevPos, + T4f* __restrict curPos, ImpulseAccumulator& accum) const { const float* __restrict prevSpheres = array(mPrevData.mSpheres->center); const float* __restrict curSpheres = array(mCurData.mSpheres->center); bool frictionEnabled = mClothData.mFrictionScale > 0.0f; - Simd4i mask4 = horizontalOr(sphereMask); + T4i mask4 = horizontalOr(sphereMask); uint32_t mask = uint32_t(array(mask4)[0]); while (mask) { @@ -846,53 +846,53 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher uint32_t offset = findBitSet(mask & ~test) * sizeof(SphereData); mask = mask & test; - Simd4f prevSphere = loadAligned(prevSpheres, offset); - Simd4f prevX = prevPos[0] - splat<0>(prevSphere); - Simd4f prevY = prevPos[1] - splat<1>(prevSphere); - Simd4f prevZ = prevPos[2] - splat<2>(prevSphere); - Simd4f prevRadius = splat<3>(prevSphere); + T4f prevSphere = loadAligned(prevSpheres, offset); + T4f prevX = prevPos[0] - splat<0>(prevSphere); + T4f prevY = prevPos[1] - splat<1>(prevSphere); + T4f prevZ = prevPos[2] - splat<2>(prevSphere); + T4f prevRadius = splat<3>(prevSphere); - Simd4f curSphere = loadAligned(curSpheres, offset); - Simd4f curX = curPos[0] - splat<0>(curSphere); - Simd4f curY = curPos[1] - splat<1>(curSphere); - Simd4f curZ = curPos[2] - splat<2>(curSphere); - Simd4f curRadius = splat<3>(curSphere); + T4f curSphere = loadAligned(curSpheres, offset); + T4f curX = curPos[0] - splat<0>(curSphere); + T4f curY = curPos[1] - splat<1>(curSphere); + T4f curZ = curPos[2] - splat<2>(curSphere); + T4f curRadius = splat<3>(curSphere); - Simd4f sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ; + T4f sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ; - Simd4f dotPrevPrev = prevX * prevX + prevY * prevY + prevZ * prevZ - prevRadius * prevRadius; - Simd4f dotPrevCur = prevX * curX + prevY * curY + prevZ * curZ - prevRadius * curRadius; - Simd4f dotCurCur = sqrDistance - curRadius * curRadius; + T4f dotPrevPrev = prevX * prevX + prevY * prevY + prevZ * prevZ - prevRadius * prevRadius; + T4f dotPrevCur = prevX * curX + prevY * curY + prevZ * curZ - prevRadius * curRadius; + T4f dotCurCur = sqrDistance - curRadius * curRadius; - Simd4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev; - Simd4f sqrtD = sqrt(discriminant); - Simd4f halfB = dotPrevCur - dotPrevPrev; - Simd4f minusA = dotPrevCur - dotCurCur + halfB; + T4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev; + T4f sqrtD = sqrt(discriminant); + T4f halfB = dotPrevCur - dotPrevPrev; + T4f minusA = dotPrevCur - dotCurCur + halfB; // time of impact or 0 if prevPos inside sphere - Simd4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD); - Simd4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD); + T4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD); + T4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD); // skip continuous collision if the (un-clamped) particle // trajectory only touches the outer skin of the cone. - Simd4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius); + T4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius); collisionMask = collisionMask & (discriminant > minusA * rMin * rMin * sSkeletonWidth); // a is negative when one sphere is contained in the other, // which is already handled by discrete collision. - collisionMask = collisionMask & (minusA < -static_cast<Simd4f>(gSimd4fEpsilon)); + collisionMask = collisionMask & (minusA < -static_cast<T4f>(gSimd4fEpsilon)); if (!allEqual(collisionMask, gSimd4fZero)) { - Simd4f deltaX = prevX - curX; - Simd4f deltaY = prevY - curY; - Simd4f deltaZ = prevZ - curZ; + T4f deltaX = prevX - curX; + T4f deltaY = prevY - curY; + T4f deltaZ = prevZ - curZ; - Simd4f oneMinusToi = (gSimd4fOne - toi) & collisionMask; + T4f oneMinusToi = (gSimd4fOne - toi) & collisionMask; // reduce ccd impulse if (clamped) particle trajectory stays in sphere skin, // i.e. scale by exp2(-k) or 1/(1+k) with k = (tmin - toi) / (1 - toi) - Simd4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon); + T4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon); oneMinusToi = oneMinusToi * recip(gSimd4fOne - minusK); curX = curX + deltaX * oneMinusToi; @@ -906,9 +906,9 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ; } - Simd4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * curRadius; + T4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * curRadius; - Simd4f contactMask; + T4f contactMask; if (!anyGreater(gSimd4fZero, negativeScale, contactMask)) continue; @@ -916,15 +916,15 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher if (frictionEnabled) { - Simd4f velocity = curSphere - prevSphere; + T4f velocity = curSphere - prevSphere; accum.addVelocity(splat<0>(velocity), splat<1>(velocity), splat<2>(velocity), contactMask); } } } -template <typename Simd4f> -FORCE_INLINE typename cloth::SwCollision<Simd4f>::Simd4i -cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4f* __restrict curPos, +template <typename T4f> +FORCE_INLINE typename cloth::SwCollision<T4f>::T4i +cloth::SwCollision<T4f>::collideCones(const T4f* __restrict prevPos, T4f* __restrict curPos, ImpulseAccumulator& accum) const { const float* __restrict prevCenterPtr = array(mPrevData.mCones->center); @@ -938,7 +938,7 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4 bool frictionEnabled = mClothData.mFrictionScale > 0.0f; ShapeMask shapeMask = getShapeMask(prevPos, curPos); - Simd4i mask4 = horizontalOr(shapeMask.mCones); + T4i mask4 = horizontalOr(shapeMask.mCones); uint32_t mask = uint32_t(array(mask4)[0]); while (mask) { @@ -947,106 +947,106 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4 uint32_t offset = coneIndex * sizeof(ConeData); mask = mask & test; - Simd4i test4 = mask4 - gSimd4iOne; - Simd4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4)); + T4i test4 = mask4 - gSimd4iOne; + T4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4)); mask4 = mask4 & test4; - Simd4f prevCenter = loadAligned(prevCenterPtr, offset); - Simd4f prevAxis = loadAligned(prevAxisPtr, offset); - Simd4f prevAxisX = splat<0>(prevAxis); - Simd4f prevAxisY = splat<1>(prevAxis); - Simd4f prevAxisZ = splat<2>(prevAxis); - Simd4f prevSlope = splat<3>(prevAxis); - - Simd4f prevX = prevPos[0] - splat<0>(prevCenter); - Simd4f prevY = prevPos[1] - splat<1>(prevCenter); - Simd4f prevZ = prevPos[2] - splat<2>(prevCenter); - Simd4f prevT = prevY * prevAxisZ - prevZ * prevAxisY; - Simd4f prevU = prevZ * prevAxisX - prevX * prevAxisZ; - Simd4f prevV = prevX * prevAxisY - prevY * prevAxisX; - Simd4f prevDot = prevX * prevAxisX + prevY * prevAxisY + prevZ * prevAxisZ; - Simd4f prevRadius = prevDot * prevSlope + splat<3>(prevCenter); - - Simd4f curCenter = loadAligned(curCenterPtr, offset); - Simd4f curAxis = loadAligned(curAxisPtr, offset); - Simd4f curAxisX = splat<0>(curAxis); - Simd4f curAxisY = splat<1>(curAxis); - Simd4f curAxisZ = splat<2>(curAxis); - Simd4f curSlope = splat<3>(curAxis); - Simd4i curAuxiliary = loadAligned(curAuxiliaryPtr, offset); - - Simd4f curX = curPos[0] - splat<0>(curCenter); - Simd4f curY = curPos[1] - splat<1>(curCenter); - Simd4f curZ = curPos[2] - splat<2>(curCenter); - Simd4f curT = curY * curAxisZ - curZ * curAxisY; - Simd4f curU = curZ * curAxisX - curX * curAxisZ; - Simd4f curV = curX * curAxisY - curY * curAxisX; - Simd4f curDot = curX * curAxisX + curY * curAxisY + curZ * curAxisZ; - Simd4f curRadius = curDot * curSlope + splat<3>(curCenter); - - Simd4f curSqrDistance = gSimd4fEpsilon + curT * curT + curU * curU + curV * curV; + T4f prevCenter = loadAligned(prevCenterPtr, offset); + T4f prevAxis = loadAligned(prevAxisPtr, offset); + T4f prevAxisX = splat<0>(prevAxis); + T4f prevAxisY = splat<1>(prevAxis); + T4f prevAxisZ = splat<2>(prevAxis); + T4f prevSlope = splat<3>(prevAxis); + + T4f prevX = prevPos[0] - splat<0>(prevCenter); + T4f prevY = prevPos[1] - splat<1>(prevCenter); + T4f prevZ = prevPos[2] - splat<2>(prevCenter); + T4f prevT = prevY * prevAxisZ - prevZ * prevAxisY; + T4f prevU = prevZ * prevAxisX - prevX * prevAxisZ; + T4f prevV = prevX * prevAxisY - prevY * prevAxisX; + T4f prevDot = prevX * prevAxisX + prevY * prevAxisY + prevZ * prevAxisZ; + T4f prevRadius = prevDot * prevSlope + splat<3>(prevCenter); + + T4f curCenter = loadAligned(curCenterPtr, offset); + T4f curAxis = loadAligned(curAxisPtr, offset); + T4f curAxisX = splat<0>(curAxis); + T4f curAxisY = splat<1>(curAxis); + T4f curAxisZ = splat<2>(curAxis); + T4f curSlope = splat<3>(curAxis); + T4i curAuxiliary = loadAligned(curAuxiliaryPtr, offset); + + T4f curX = curPos[0] - splat<0>(curCenter); + T4f curY = curPos[1] - splat<1>(curCenter); + T4f curZ = curPos[2] - splat<2>(curCenter); + T4f curT = curY * curAxisZ - curZ * curAxisY; + T4f curU = curZ * curAxisX - curX * curAxisZ; + T4f curV = curX * curAxisY - curY * curAxisX; + T4f curDot = curX * curAxisX + curY * curAxisY + curZ * curAxisZ; + T4f curRadius = curDot * curSlope + splat<3>(curCenter); + + T4f curSqrDistance = gSimd4fEpsilon + curT * curT + curU * curU + curV * curV; // set radius to zero if cone is culled prevRadius = max(prevRadius, gSimd4fZero) & ~culled; curRadius = max(curRadius, gSimd4fZero) & ~culled; - Simd4f dotPrevPrev = prevT * prevT + prevU * prevU + prevV * prevV - prevRadius * prevRadius; - Simd4f dotPrevCur = prevT * curT + prevU * curU + prevV * curV - prevRadius * curRadius; - Simd4f dotCurCur = curSqrDistance - curRadius * curRadius; + T4f dotPrevPrev = prevT * prevT + prevU * prevU + prevV * prevV - prevRadius * prevRadius; + T4f dotPrevCur = prevT * curT + prevU * curU + prevV * curV - prevRadius * curRadius; + T4f dotCurCur = curSqrDistance - curRadius * curRadius; - Simd4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev; - Simd4f sqrtD = sqrt(discriminant); - Simd4f halfB = dotPrevCur - dotPrevPrev; - Simd4f minusA = dotPrevCur - dotCurCur + halfB; + T4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev; + T4f sqrtD = sqrt(discriminant); + T4f halfB = dotPrevCur - dotPrevPrev; + T4f minusA = dotPrevCur - dotCurCur + halfB; // time of impact or 0 if prevPos inside cone - Simd4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD); - Simd4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD); + T4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD); + T4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD); // skip continuous collision if the (un-clamped) particle // trajectory only touches the outer skin of the cone. - Simd4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius); + T4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius); collisionMask = collisionMask & (discriminant > minusA * rMin * rMin * sSkeletonWidth); // a is negative when one cone is contained in the other, // which is already handled by discrete collision. - collisionMask = collisionMask & (minusA < -static_cast<Simd4f>(gSimd4fEpsilon)); + collisionMask = collisionMask & (minusA < -static_cast<T4f>(gSimd4fEpsilon)); // test if any particle hits infinite cone (and 0<time of impact<1) if (!allEqual(collisionMask, gSimd4fZero)) { - Simd4f deltaX = prevX - curX; - Simd4f deltaY = prevY - curY; - Simd4f deltaZ = prevZ - curZ; + T4f deltaX = prevX - curX; + T4f deltaY = prevY - curY; + T4f deltaZ = prevZ - curZ; // interpolate delta at toi - Simd4f posX = prevX - deltaX * toi; - Simd4f posY = prevY - deltaY * toi; - Simd4f posZ = prevZ - deltaZ * toi; + T4f posX = prevX - deltaX * toi; + T4f posY = prevY - deltaY * toi; + T4f posZ = prevZ - deltaZ * toi; - Simd4f curScaledAxis = curAxis * splat<1>(simd4f(curAuxiliary)); - Simd4i prevAuxiliary = loadAligned(prevAuxiliaryPtr, offset); - Simd4f deltaScaledAxis = curScaledAxis - prevAxis * splat<1>(simd4f(prevAuxiliary)); + T4f curScaledAxis = curAxis * splat<1>(simd4f(curAuxiliary)); + T4i prevAuxiliary = loadAligned(prevAuxiliaryPtr, offset); + T4f deltaScaledAxis = curScaledAxis - prevAxis * splat<1>(simd4f(prevAuxiliary)); - Simd4f oneMinusToi = gSimd4fOne - toi; + T4f oneMinusToi = gSimd4fOne - toi; // interpolate axis at toi - Simd4f axisX = splat<0>(curScaledAxis) - splat<0>(deltaScaledAxis) * oneMinusToi; - Simd4f axisY = splat<1>(curScaledAxis) - splat<1>(deltaScaledAxis) * oneMinusToi; - Simd4f axisZ = splat<2>(curScaledAxis) - splat<2>(deltaScaledAxis) * oneMinusToi; - Simd4f slope = (prevSlope * oneMinusToi + curSlope * toi); + T4f axisX = splat<0>(curScaledAxis) - splat<0>(deltaScaledAxis) * oneMinusToi; + T4f axisY = splat<1>(curScaledAxis) - splat<1>(deltaScaledAxis) * oneMinusToi; + T4f axisZ = splat<2>(curScaledAxis) - splat<2>(deltaScaledAxis) * oneMinusToi; + T4f slope = (prevSlope * oneMinusToi + curSlope * toi); - Simd4f sqrHalfLength = axisX * axisX + axisY * axisY + axisZ * axisZ; - Simd4f invHalfLength = rsqrt(sqrHalfLength); - Simd4f dot = (posX * axisX + posY * axisY + posZ * axisZ) * invHalfLength; + T4f sqrHalfLength = axisX * axisX + axisY * axisY + axisZ * axisZ; + T4f invHalfLength = rsqrt(sqrHalfLength); + T4f dot = (posX * axisX + posY * axisY + posZ * axisZ) * invHalfLength; - Simd4f sqrDistance = posX * posX + posY * posY + posZ * posZ - dot * dot; - Simd4f invDistance = rsqrt(sqrDistance) & (sqrDistance > gSimd4fZero); + T4f sqrDistance = posX * posX + posY * posY + posZ * posZ - dot * dot; + T4f invDistance = rsqrt(sqrDistance) & (sqrDistance > gSimd4fZero); - Simd4f base = dot + slope * sqrDistance * invDistance; - Simd4f scale = base * invHalfLength & collisionMask; + T4f base = dot + slope * sqrDistance * invDistance; + T4f scale = base * invHalfLength & collisionMask; - Simd4f cullMask = (abs(scale) < gSimd4fOne) & collisionMask; + T4f cullMask = (abs(scale) < gSimd4fOne) & collisionMask; // test if any impact position is in cone section if (!allEqual(cullMask, gSimd4fZero)) @@ -1060,7 +1060,7 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4 // reduce ccd impulse if (clamped) particle trajectory stays in cone skin, // i.e. scale by exp2(-k) or 1/(1+k) with k = (tmin - toi) / (1 - toi) // oneMinusToi = oneMinusToi * recip(gSimd4fOne - sqrtD * recip(minusA * oneMinusToi)); - Simd4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon); + T4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon); oneMinusToi = oneMinusToi * recip(gSimd4fOne - minusK); curX = curX + deltaX * oneMinusToi; @@ -1079,39 +1079,39 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4 } // curPos inside cone (discrete collision) - Simd4f contactMask; + T4f contactMask; int anyContact = anyGreater(curRadius * curRadius, curSqrDistance, contactMask); - Simd4i bothMask = splat<3>(curAuxiliary); + T4i bothMask = splat<3>(curAuxiliary); // instead of culling continuous collision for ~collisionMask, and discrete // collision for ~contactMask, disable both if ~collisionMask & ~contactMask - Simd4i cullMask = bothMask & ~simd4i(collisionMask | contactMask); + T4i cullMask = bothMask & ~simd4i(collisionMask | contactMask); shapeMask.mSpheres = shapeMask.mSpheres & ~cullMask; if (!anyContact) continue; - Simd4f invDistance = rsqrt(curSqrDistance) & (curSqrDistance > gSimd4fZero); - Simd4f base = curDot + curSlope * curSqrDistance * invDistance; + T4f invDistance = rsqrt(curSqrDistance) & (curSqrDistance > gSimd4fZero); + T4f base = curDot + curSlope * curSqrDistance * invDistance; - Simd4f halfLength = splat<1>(simd4f(curAuxiliary)); - Simd4i leftMask = simd4i(base < -halfLength); - Simd4i rightMask = simd4i(base > halfLength); + T4f halfLength = splat<1>(simd4f(curAuxiliary)); + T4i leftMask = simd4i(base < -halfLength); + T4i rightMask = simd4i(base > halfLength); // can only skip continuous sphere collision if post-ccd position // is on code side *and* particle had cone-ccd collision. - Simd4i firstMask = splat<2>(curAuxiliary); - Simd4i secondMask = firstMask ^ bothMask; + T4i firstMask = splat<2>(curAuxiliary); + T4i secondMask = firstMask ^ bothMask; cullMask = (firstMask & ~leftMask) | (secondMask & ~rightMask); shapeMask.mSpheres = shapeMask.mSpheres & ~(cullMask & simd4i(collisionMask)); - Simd4f deltaX = curX - base * curAxisX; - Simd4f deltaY = curY - base * curAxisY; - Simd4f deltaZ = curZ - base * curAxisZ; + T4f deltaX = curX - base * curAxisX; + T4f deltaY = curY - base * curAxisY; + T4f deltaZ = curZ - base * curAxisZ; - Simd4f sqrCosine = splat<0>(simd4f(curAuxiliary)); - Simd4f scale = curRadius * invDistance * sqrCosine - sqrCosine; + T4f sqrCosine = splat<0>(simd4f(curAuxiliary)); + T4f scale = curRadius * invDistance * sqrCosine - sqrCosine; contactMask = contactMask & ~simd4f(leftMask | rightMask); @@ -1130,23 +1130,23 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4 // todo: could pre-compute sphere velocities or it might be // faster to compute cur/prev sphere positions directly - Simd4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData)); - Simd4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData)); + T4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData)); + T4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData)); - Simd4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData)); - Simd4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData)); + T4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData)); + T4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData)); - Simd4f v0 = s0p1 - s0p0; - Simd4f v1 = s1p1 - s1p0; - Simd4f vd = v1 - v0; + T4f v0 = s0p1 - s0p0; + T4f v1 = s1p1 - s1p0; + T4f vd = v1 - v0; // dot is in the range -1 to 1, scale and bias to 0 to 1 curDot = curDot * gSimd4fHalf + gSimd4fHalf; // interpolate velocity at contact points - Simd4f vx = splat<0>(v0) + curDot * splat<0>(vd); - Simd4f vy = splat<1>(v0) + curDot * splat<1>(vd); - Simd4f vz = splat<2>(v0) + curDot * splat<2>(vd); + T4f vx = splat<0>(v0) + curDot * splat<0>(vd); + T4f vy = splat<1>(v0) + curDot * splat<1>(vd); + T4f vz = splat<2>(v0) + curDot * splat<2>(vd); accum.addVelocity(vx, vy, vz, contactMask); } @@ -1158,39 +1158,39 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4 namespace { -template <typename Simd4f> -PX_INLINE void calculateFrictionImpulse(const Simd4f& deltaX, const Simd4f& deltaY, const Simd4f& deltaZ, - const Simd4f& velX, const Simd4f& velY, const Simd4f& velZ, - const Simd4f* curPos, const Simd4f* prevPos, const Simd4f& scale, - const Simd4f& coefficient, const Simd4f& mask, Simd4f* impulse) +template <typename T4f> +PX_INLINE void calculateFrictionImpulse(const T4f& deltaX, const T4f& deltaY, const T4f& deltaZ, + const T4f& velX, const T4f& velY, const T4f& velZ, + const T4f* curPos, const T4f* prevPos, const T4f& scale, + const T4f& coefficient, const T4f& mask, T4f* impulse) { // calculate collision normal - Simd4f deltaSq = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; + T4f deltaSq = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; - Simd4f rcpDelta = rsqrt(deltaSq + gSimd4fEpsilon); + T4f rcpDelta = rsqrt(deltaSq + gSimd4fEpsilon); - Simd4f nx = deltaX * rcpDelta; - Simd4f ny = deltaY * rcpDelta; - Simd4f nz = deltaZ * rcpDelta; + T4f nx = deltaX * rcpDelta; + T4f ny = deltaY * rcpDelta; + T4f nz = deltaZ * rcpDelta; // calculate relative velocity scaled by number of collisions - Simd4f rvx = curPos[0] - prevPos[0] - velX * scale; - Simd4f rvy = curPos[1] - prevPos[1] - velY * scale; - Simd4f rvz = curPos[2] - prevPos[2] - velZ * scale; + T4f rvx = curPos[0] - prevPos[0] - velX * scale; + T4f rvy = curPos[1] - prevPos[1] - velY * scale; + T4f rvz = curPos[2] - prevPos[2] - velZ * scale; // calculate magnitude of relative normal velocity - Simd4f rvn = rvx * nx + rvy * ny + rvz * nz; + T4f rvn = rvx * nx + rvy * ny + rvz * nz; // calculate relative tangential velocity - Simd4f rvtx = rvx - rvn * nx; - Simd4f rvty = rvy - rvn * ny; - Simd4f rvtz = rvz - rvn * nz; + T4f rvtx = rvx - rvn * nx; + T4f rvty = rvy - rvn * ny; + T4f rvtz = rvz - rvn * nz; // calculate magnitude of vt - Simd4f rcpVt = rsqrt(rvtx * rvtx + rvty * rvty + rvtz * rvtz + gSimd4fEpsilon); + T4f rcpVt = rsqrt(rvtx * rvtx + rvty * rvty + rvtz * rvtz + gSimd4fEpsilon); // magnitude of friction impulse (cannot be greater than -vt) - Simd4f j = max(-coefficient * deltaSq * rcpDelta * rcpVt, gSimd4fMinusOne) & mask; + T4f j = max(-coefficient * deltaSq * rcpDelta * rcpVt, gSimd4fMinusOne) & mask; impulse[0] = rvtx * j; impulse[1] = rvty * j; @@ -1199,17 +1199,17 @@ PX_INLINE void calculateFrictionImpulse(const Simd4f& deltaX, const Simd4f& delt } // anonymous namespace -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::collideParticles() +template <typename T4f> +void cloth::SwCollision<T4f>::collideParticles() { const bool massScalingEnabled = mClothData.mCollisionMassScale > 0.0f; - const Simd4f massScale = simd4f(mClothData.mCollisionMassScale); + const T4f massScale = simd4f(mClothData.mCollisionMassScale); const bool frictionEnabled = mClothData.mFrictionScale > 0.0f; - const Simd4f frictionScale = simd4f(mClothData.mFrictionScale); + const T4f frictionScale = simd4f(mClothData.mFrictionScale); - Simd4f curPos[4]; - Simd4f prevPos[4]; + T4f curPos[4]; + T4f prevPos[4]; float* __restrict prevIt = mClothData.mPrevParticles; float* __restrict pIt = mClothData.mCurParticles; @@ -1223,14 +1223,14 @@ void cloth::SwCollision<Simd4f>::collideParticles() transpose(curPos[0], curPos[1], curPos[2], curPos[3]); ImpulseAccumulator accum; - Simd4i sphereMask = collideCones(curPos, accum); + T4i sphereMask = collideCones(curPos, accum); collideSpheres(sphereMask, curPos, accum); - Simd4f mask; + T4f mask; if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask)) continue; - Simd4f invNumCollisions = recip(accum.mNumCollisions); + T4f invNumCollisions = recip(accum.mNumCollisions); if (frictionEnabled) { @@ -1240,7 +1240,7 @@ void cloth::SwCollision<Simd4f>::collideParticles() prevPos[3] = loadAligned(prevIt, 48); transpose(prevPos[0], prevPos[1], prevPos[2], prevPos[3]); - Simd4f frictionImpulse[3]; + T4f frictionImpulse[3]; calculateFrictionImpulse(accum.mDeltaX, accum.mDeltaY, accum.mDeltaZ, accum.mVelX, accum.mVelY, accum.mVelZ, curPos, prevPos, invNumCollisions, frictionScale, mask, frictionImpulse); @@ -1258,10 +1258,10 @@ void cloth::SwCollision<Simd4f>::collideParticles() if (massScalingEnabled) { // calculate the inverse mass scale based on the collision impulse magnitude - Simd4f dSq = invNumCollisions * invNumCollisions * + T4f dSq = invNumCollisions * invNumCollisions * (accum.mDeltaX * accum.mDeltaX + accum.mDeltaY * accum.mDeltaY + accum.mDeltaZ * accum.mDeltaZ); - Simd4f scale = recip(gSimd4fOne + massScale * dSq); + T4f scale = recip(gSimd4fOne + massScale * dSq); // scale invmass curPos[3] = select(mask, curPos[3] * scale, curPos[3]); @@ -1283,24 +1283,24 @@ void cloth::SwCollision<Simd4f>::collideParticles() } } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::collideVirtualParticles() +template <typename T4f> +void cloth::SwCollision<T4f>::collideVirtualParticles() { const bool massScalingEnabled = mClothData.mCollisionMassScale > 0.0f; - const Simd4f massScale = simd4f(mClothData.mCollisionMassScale); + const T4f massScale = simd4f(mClothData.mCollisionMassScale); const bool frictionEnabled = mClothData.mFrictionScale > 0.0f; - const Simd4f frictionScale = simd4f(mClothData.mFrictionScale); + const T4f frictionScale = simd4f(mClothData.mFrictionScale); - Simd4f curPos[3]; + T4f curPos[3]; const float* __restrict weights = mClothData.mVirtualParticleWeights; float* __restrict particles = mClothData.mCurParticles; float* __restrict prevParticles = mClothData.mPrevParticles; // move dummy particles outside of collision range - Simd4f* __restrict dummy = mClothData.mNumParticles + reinterpret_cast<Simd4f*>(mClothData.mCurParticles); - Simd4f invGridScale = recip(mGridScale) & (mGridScale > gSimd4fEpsilon); + T4f* __restrict dummy = mClothData.mNumParticles + reinterpret_cast<T4f*>(mClothData.mCurParticles); + T4f invGridScale = recip(mGridScale) & (mGridScale > gSimd4fEpsilon); dummy[0] = dummy[1] = dummy[2] = invGridScale * mGridBias - invGridScale; const uint16_t* __restrict vpIt = mClothData.mVirtualParticlesBegin; @@ -1308,31 +1308,31 @@ void cloth::SwCollision<Simd4f>::collideVirtualParticles() for (; vpIt != vpEnd; vpIt += 16) { // load 12 particles and 4 weights - Simd4f p0v0 = loadAligned(particles, vpIt[0] * sizeof(PxVec4)); - Simd4f p0v1 = loadAligned(particles, vpIt[1] * sizeof(PxVec4)); - Simd4f p0v2 = loadAligned(particles, vpIt[2] * sizeof(PxVec4)); - Simd4f w0 = loadAligned(weights, vpIt[3] * sizeof(PxVec4)); - - Simd4f p1v0 = loadAligned(particles, vpIt[4] * sizeof(PxVec4)); - Simd4f p1v1 = loadAligned(particles, vpIt[5] * sizeof(PxVec4)); - Simd4f p1v2 = loadAligned(particles, vpIt[6] * sizeof(PxVec4)); - Simd4f w1 = loadAligned(weights, vpIt[7] * sizeof(PxVec4)); - - Simd4f p2v0 = loadAligned(particles, vpIt[8] * sizeof(PxVec4)); - Simd4f p2v1 = loadAligned(particles, vpIt[9] * sizeof(PxVec4)); - Simd4f p2v2 = loadAligned(particles, vpIt[10] * sizeof(PxVec4)); - Simd4f w2 = loadAligned(weights, vpIt[11] * sizeof(PxVec4)); - - Simd4f p3v1 = loadAligned(particles, vpIt[13] * sizeof(PxVec4)); - Simd4f p3v0 = loadAligned(particles, vpIt[12] * sizeof(PxVec4)); - Simd4f p3v2 = loadAligned(particles, vpIt[14] * sizeof(PxVec4)); - Simd4f w3 = loadAligned(weights, vpIt[15] * sizeof(PxVec4)); + T4f p0v0 = loadAligned(particles, vpIt[0] * sizeof(PxVec4)); + T4f p0v1 = loadAligned(particles, vpIt[1] * sizeof(PxVec4)); + T4f p0v2 = loadAligned(particles, vpIt[2] * sizeof(PxVec4)); + T4f w0 = loadAligned(weights, vpIt[3] * sizeof(PxVec4)); + + T4f p1v0 = loadAligned(particles, vpIt[4] * sizeof(PxVec4)); + T4f p1v1 = loadAligned(particles, vpIt[5] * sizeof(PxVec4)); + T4f p1v2 = loadAligned(particles, vpIt[6] * sizeof(PxVec4)); + T4f w1 = loadAligned(weights, vpIt[7] * sizeof(PxVec4)); + + T4f p2v0 = loadAligned(particles, vpIt[8] * sizeof(PxVec4)); + T4f p2v1 = loadAligned(particles, vpIt[9] * sizeof(PxVec4)); + T4f p2v2 = loadAligned(particles, vpIt[10] * sizeof(PxVec4)); + T4f w2 = loadAligned(weights, vpIt[11] * sizeof(PxVec4)); + + T4f p3v1 = loadAligned(particles, vpIt[13] * sizeof(PxVec4)); + T4f p3v0 = loadAligned(particles, vpIt[12] * sizeof(PxVec4)); + T4f p3v2 = loadAligned(particles, vpIt[14] * sizeof(PxVec4)); + T4f w3 = loadAligned(weights, vpIt[15] * sizeof(PxVec4)); // interpolate particles and transpose - Simd4f px = p0v0 * splat<0>(w0) + p0v1 * splat<1>(w0) + p0v2 * splat<2>(w0); - Simd4f py = p1v0 * splat<0>(w1) + p1v1 * splat<1>(w1) + p1v2 * splat<2>(w1); - Simd4f pz = p2v0 * splat<0>(w2) + p2v1 * splat<1>(w2) + p2v2 * splat<2>(w2); - Simd4f pw = p3v0 * splat<0>(w3) + p3v1 * splat<1>(w3) + p3v2 * splat<2>(w3); + T4f px = p0v0 * splat<0>(w0) + p0v1 * splat<1>(w0) + p0v2 * splat<2>(w0); + T4f py = p1v0 * splat<0>(w1) + p1v1 * splat<1>(w1) + p1v2 * splat<2>(w1); + T4f pz = p2v0 * splat<0>(w2) + p2v1 * splat<1>(w2) + p2v2 * splat<2>(w2); + T4f pw = p3v0 * splat<0>(w3) + p3v1 * splat<1>(w3) + p3v2 * splat<2>(w3); transpose(px, py, pz, pw); curPos[0] = px; @@ -1340,55 +1340,55 @@ void cloth::SwCollision<Simd4f>::collideVirtualParticles() curPos[2] = pz; ImpulseAccumulator accum; - Simd4i sphereMask = collideCones(curPos, accum); + T4i sphereMask = collideCones(curPos, accum); collideSpheres(sphereMask, curPos, accum); - Simd4f mask; + T4f mask; if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask)) continue; - Simd4f invNumCollisions = recip(accum.mNumCollisions); + T4f invNumCollisions = recip(accum.mNumCollisions); // displacement and transpose back - Simd4f d0 = accum.mDeltaX * invNumCollisions; - Simd4f d1 = accum.mDeltaY * invNumCollisions; - Simd4f d2 = accum.mDeltaZ * invNumCollisions; - Simd4f d3 = gSimd4fZero; + T4f d0 = accum.mDeltaX * invNumCollisions; + T4f d1 = accum.mDeltaY * invNumCollisions; + T4f d2 = accum.mDeltaZ * invNumCollisions; + T4f d3 = gSimd4fZero; transpose(d0, d1, d2, d3); // scale weights by 1/dot(w,w) - Simd4f rw0 = w0 * splat<3>(w0); - Simd4f rw1 = w1 * splat<3>(w1); - Simd4f rw2 = w2 * splat<3>(w2); - Simd4f rw3 = w3 * splat<3>(w3); + T4f rw0 = w0 * splat<3>(w0); + T4f rw1 = w1 * splat<3>(w1); + T4f rw2 = w2 * splat<3>(w2); + T4f rw3 = w3 * splat<3>(w3); if (frictionEnabled) { - Simd4f q0v0 = loadAligned(prevParticles, vpIt[0] * sizeof(PxVec4)); - Simd4f q0v1 = loadAligned(prevParticles, vpIt[1] * sizeof(PxVec4)); - Simd4f q0v2 = loadAligned(prevParticles, vpIt[2] * sizeof(PxVec4)); + T4f q0v0 = loadAligned(prevParticles, vpIt[0] * sizeof(PxVec4)); + T4f q0v1 = loadAligned(prevParticles, vpIt[1] * sizeof(PxVec4)); + T4f q0v2 = loadAligned(prevParticles, vpIt[2] * sizeof(PxVec4)); - Simd4f q1v0 = loadAligned(prevParticles, vpIt[4] * sizeof(PxVec4)); - Simd4f q1v1 = loadAligned(prevParticles, vpIt[5] * sizeof(PxVec4)); - Simd4f q1v2 = loadAligned(prevParticles, vpIt[6] * sizeof(PxVec4)); + T4f q1v0 = loadAligned(prevParticles, vpIt[4] * sizeof(PxVec4)); + T4f q1v1 = loadAligned(prevParticles, vpIt[5] * sizeof(PxVec4)); + T4f q1v2 = loadAligned(prevParticles, vpIt[6] * sizeof(PxVec4)); - Simd4f q2v0 = loadAligned(prevParticles, vpIt[8] * sizeof(PxVec4)); - Simd4f q2v1 = loadAligned(prevParticles, vpIt[9] * sizeof(PxVec4)); - Simd4f q2v2 = loadAligned(prevParticles, vpIt[10] * sizeof(PxVec4)); + T4f q2v0 = loadAligned(prevParticles, vpIt[8] * sizeof(PxVec4)); + T4f q2v1 = loadAligned(prevParticles, vpIt[9] * sizeof(PxVec4)); + T4f q2v2 = loadAligned(prevParticles, vpIt[10] * sizeof(PxVec4)); - Simd4f q3v0 = loadAligned(prevParticles, vpIt[12] * sizeof(PxVec4)); - Simd4f q3v1 = loadAligned(prevParticles, vpIt[13] * sizeof(PxVec4)); - Simd4f q3v2 = loadAligned(prevParticles, vpIt[14] * sizeof(PxVec4)); + T4f q3v0 = loadAligned(prevParticles, vpIt[12] * sizeof(PxVec4)); + T4f q3v1 = loadAligned(prevParticles, vpIt[13] * sizeof(PxVec4)); + T4f q3v2 = loadAligned(prevParticles, vpIt[14] * sizeof(PxVec4)); // calculate previous interpolated positions - Simd4f qx = q0v0 * splat<0>(w0) + q0v1 * splat<1>(w0) + q0v2 * splat<2>(w0); - Simd4f qy = q1v0 * splat<0>(w1) + q1v1 * splat<1>(w1) + q1v2 * splat<2>(w1); - Simd4f qz = q2v0 * splat<0>(w2) + q2v1 * splat<1>(w2) + q2v2 * splat<2>(w2); - Simd4f qw = q3v0 * splat<0>(w3) + q3v1 * splat<1>(w3) + q3v2 * splat<2>(w3); + T4f qx = q0v0 * splat<0>(w0) + q0v1 * splat<1>(w0) + q0v2 * splat<2>(w0); + T4f qy = q1v0 * splat<0>(w1) + q1v1 * splat<1>(w1) + q1v2 * splat<2>(w1); + T4f qz = q2v0 * splat<0>(w2) + q2v1 * splat<1>(w2) + q2v2 * splat<2>(w2); + T4f qw = q3v0 * splat<0>(w3) + q3v1 * splat<1>(w3) + q3v2 * splat<2>(w3); transpose(qx, qy, qz, qw); - Simd4f prevPos[3] = { qx, qy, qz }; - Simd4f frictionImpulse[4]; + T4f prevPos[3] = { qx, qy, qz }; + T4f frictionImpulse[4]; frictionImpulse[3] = gSimd4fZero; calculateFrictionImpulse(accum.mDeltaX, accum.mDeltaY, accum.mDeltaZ, accum.mVelX, accum.mVelY, accum.mVelZ, @@ -1433,16 +1433,16 @@ void cloth::SwCollision<Simd4f>::collideVirtualParticles() if (massScalingEnabled) { // calculate the inverse mass scale based on the collision impulse - Simd4f dSq = invNumCollisions * invNumCollisions * + T4f dSq = invNumCollisions * invNumCollisions * (accum.mDeltaX * accum.mDeltaX + accum.mDeltaY * accum.mDeltaY + accum.mDeltaZ * accum.mDeltaZ); - Simd4f weightScale = recip(gSimd4fOne + massScale * dSq); + T4f weightScale = recip(gSimd4fOne + massScale * dSq); weightScale = weightScale - gSimd4fOne; - Simd4f s0 = gSimd4fOne + splat<0>(weightScale) * (w0 & splat<0>(mask)); - Simd4f s1 = gSimd4fOne + splat<1>(weightScale) * (w1 & splat<1>(mask)); - Simd4f s2 = gSimd4fOne + splat<2>(weightScale) * (w2 & splat<2>(mask)); - Simd4f s3 = gSimd4fOne + splat<3>(weightScale) * (w3 & splat<3>(mask)); + T4f s0 = gSimd4fOne + splat<0>(weightScale) * (w0 & splat<0>(mask)); + T4f s1 = gSimd4fOne + splat<1>(weightScale) * (w1 & splat<1>(mask)); + T4f s2 = gSimd4fOne + splat<2>(weightScale) * (w2 & splat<2>(mask)); + T4f s3 = gSimd4fOne + splat<3>(weightScale) * (w3 & splat<3>(mask)); p0v0 = p0v0 * (gSimd4fOneXYZ | (splat<0>(s0) & sMaskW)); p0v1 = p0v1 * (gSimd4fOneXYZ | (splat<1>(s0) & sMaskW)); @@ -1500,17 +1500,17 @@ void cloth::SwCollision<Simd4f>::collideVirtualParticles() } } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::collideContinuousParticles() +template <typename T4f> +void cloth::SwCollision<T4f>::collideContinuousParticles() { - Simd4f curPos[4]; - Simd4f prevPos[4]; + T4f curPos[4]; + T4f prevPos[4]; const bool massScalingEnabled = mClothData.mCollisionMassScale > 0.0f; - const Simd4f massScale = simd4f(mClothData.mCollisionMassScale); + const T4f massScale = simd4f(mClothData.mCollisionMassScale); const bool frictionEnabled = mClothData.mFrictionScale > 0.0f; - const Simd4f frictionScale = simd4f(mClothData.mFrictionScale); + const T4f frictionScale = simd4f(mClothData.mFrictionScale); float* __restrict prevIt = mClothData.mPrevParticles; float* __restrict curIt = mClothData.mCurParticles; @@ -1531,18 +1531,18 @@ void cloth::SwCollision<Simd4f>::collideContinuousParticles() transpose(curPos[0], curPos[1], curPos[2], curPos[3]); ImpulseAccumulator accum; - Simd4i sphereMask = collideCones(prevPos, curPos, accum); + T4i sphereMask = collideCones(prevPos, curPos, accum); collideSpheres(sphereMask, prevPos, curPos, accum); - Simd4f mask; + T4f mask; if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask)) continue; - Simd4f invNumCollisions = recip(accum.mNumCollisions); + T4f invNumCollisions = recip(accum.mNumCollisions); if (frictionEnabled) { - Simd4f frictionImpulse[3]; + T4f frictionImpulse[3]; calculateFrictionImpulse(accum.mDeltaX, accum.mDeltaY, accum.mDeltaZ, accum.mVelX, accum.mVelY, accum.mVelZ, curPos, prevPos, invNumCollisions, frictionScale, mask, frictionImpulse); @@ -1560,10 +1560,10 @@ void cloth::SwCollision<Simd4f>::collideContinuousParticles() if (massScalingEnabled) { // calculate the inverse mass scale based on the collision impulse magnitude - Simd4f dSq = invNumCollisions * invNumCollisions * + T4f dSq = invNumCollisions * invNumCollisions * (accum.mDeltaX * accum.mDeltaX + accum.mDeltaY * accum.mDeltaY + accum.mDeltaZ * accum.mDeltaZ); - Simd4f weightScale = recip(gSimd4fOne + massScale * dSq); + T4f weightScale = recip(gSimd4fOne + massScale * dSq); // scale invmass curPos[3] = select(mask, curPos[3] * weightScale, curPos[3]); @@ -1585,22 +1585,22 @@ void cloth::SwCollision<Simd4f>::collideContinuousParticles() } } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& state) +template <typename T4f> +void cloth::SwCollision<T4f>::collideConvexes(const IterationState<T4f>& state) { if (!mClothData.mNumConvexes) return; // times 2 for plane equation result buffer - Simd4f* planes = static_cast<Simd4f*>(mAllocator.allocate(sizeof(Simd4f) * mClothData.mNumPlanes * 2)); + T4f* planes = static_cast<T4f*>(mAllocator.allocate(sizeof(T4f) * mClothData.mNumPlanes * 2)); - const Simd4f* targetPlanes = reinterpret_cast<const Simd4f*>(mClothData.mTargetCollisionPlanes); + const T4f* targetPlanes = reinterpret_cast<const T4f*>(mClothData.mTargetCollisionPlanes); // generate plane collision data if (state.mRemainingIterations != 1) { // interpolate planes - LerpIterator<Simd4f, const Simd4f*> planeIter(reinterpret_cast<const Simd4f*>(mClothData.mStartCollisionPlanes), + LerpIterator<T4f, const T4f*> planeIter(reinterpret_cast<const T4f*>(mClothData.mStartCollisionPlanes), targetPlanes, state.getCurrentAlpha()); // todo: normalize plane equations @@ -1612,10 +1612,10 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& s generatePlanes(planes, targetPlanes, mClothData.mNumPlanes); } - Simd4f curPos[4], prevPos[4]; + T4f curPos[4], prevPos[4]; const bool frictionEnabled = mClothData.mFrictionScale > 0.0f; - const Simd4f frictionScale = simd4f(mClothData.mFrictionScale); + const T4f frictionScale = simd4f(mClothData.mFrictionScale); float* __restrict curIt = mClothData.mCurParticles; float* __restrict curEnd = curIt + mClothData.mNumParticles * 4; @@ -1631,11 +1631,11 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& s ImpulseAccumulator accum; collideConvexes(planes, curPos, accum); - Simd4f mask; + T4f mask; if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask)) continue; - Simd4f invNumCollisions = recip(accum.mNumCollisions); + T4f invNumCollisions = recip(accum.mNumCollisions); if (frictionEnabled) { @@ -1645,7 +1645,7 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& s prevPos[3] = loadAligned(prevIt, 48); transpose(prevPos[0], prevPos[1], prevPos[2], prevPos[3]); - Simd4f frictionImpulse[3]; + T4f frictionImpulse[3]; calculateFrictionImpulse(accum.mDeltaX, accum.mDeltaY, accum.mDeltaZ, accum.mVelX, accum.mVelY, accum.mVelZ, curPos, prevPos, invNumCollisions, frictionScale, mask, frictionImpulse); @@ -1678,20 +1678,20 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& s mAllocator.deallocate(planes); } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::collideConvexes(const Simd4f* __restrict planes, Simd4f* __restrict curPos, +template <typename T4f> +void cloth::SwCollision<T4f>::collideConvexes(const T4f* __restrict planes, T4f* __restrict curPos, ImpulseAccumulator& accum) { - Simd4i result = gSimd4iZero; - Simd4i mask4 = gSimd4iOne; + T4i result = gSimd4iZero; + T4i mask4 = gSimd4iOne; - const Simd4f* __restrict pIt, *pEnd = planes + mClothData.mNumPlanes; - Simd4f* __restrict dIt = const_cast<Simd4f*>(pEnd); + const T4f* __restrict pIt, *pEnd = planes + mClothData.mNumPlanes; + T4f* __restrict dIt = const_cast<T4f*>(pEnd); for (pIt = planes; pIt != pEnd; ++pIt, ++dIt) { *dIt = splat<3>(*pIt) + curPos[2] * splat<2>(*pIt) + curPos[1] * splat<1>(*pIt) + curPos[0] * splat<0>(*pIt); result = result | (mask4 & simd4i(*dIt < gSimd4fZero)); - mask4 = mask4 << 1; // todo: shift by Simd4i on consoles + mask4 = mask4 << 1; // todo: shift by T4i on consoles } if (allEqual(result, gSimd4iZero)) @@ -1708,18 +1708,18 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const Simd4f* __restrict planes uint32_t test = mask - 1; uint32_t planeIndex = findBitSet(mask & ~test); - Simd4f plane = planes[planeIndex]; - Simd4f planeX = splat<0>(plane); - Simd4f planeY = splat<1>(plane); - Simd4f planeZ = splat<2>(plane); - Simd4f planeD = pEnd[planeIndex]; + T4f plane = planes[planeIndex]; + T4f planeX = splat<0>(plane); + T4f planeY = splat<1>(plane); + T4f planeZ = splat<2>(plane); + T4f planeD = pEnd[planeIndex]; while (mask &= test) { test = mask - 1; planeIndex = findBitSet(mask & ~test); plane = planes[planeIndex]; - Simd4f dist = pEnd[planeIndex]; - Simd4f closer = dist > planeD; + T4f dist = pEnd[planeIndex]; + T4f closer = dist > planeD; planeX = select(closer, splat<0>(plane), planeX); planeY = select(closer, splat<1>(plane), planeY); planeZ = select(closer, splat<2>(plane), planeZ); @@ -1730,8 +1730,8 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const Simd4f* __restrict planes } } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::collideTriangles(const IterationState<Simd4f>& state) +template <typename T4f> +void cloth::SwCollision<T4f>::collideTriangles(const IterationState<T4f>& state) { if (!mClothData.mNumCollisionTriangles) return; @@ -1739,24 +1739,24 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const IterationState<Simd4f>& TriangleData* triangles = static_cast<TriangleData*>(mAllocator.allocate(sizeof(TriangleData) * mClothData.mNumCollisionTriangles)); - UnalignedIterator<Simd4f, 3> targetTriangles(mClothData.mTargetCollisionTriangles); + UnalignedIterator<T4f, 3> targetTriangles(mClothData.mTargetCollisionTriangles); // generate triangle collision data if (state.mRemainingIterations != 1) { // interpolate triangles - LerpIterator<Simd4f, UnalignedIterator<Simd4f, 3> > triangleIter(mClothData.mStartCollisionTriangles, + LerpIterator<T4f, UnalignedIterator<T4f, 3> > triangleIter(mClothData.mStartCollisionTriangles, targetTriangles, state.getCurrentAlpha()); - generateTriangles<Simd4f>(triangles, triangleIter, mClothData.mNumCollisionTriangles); + generateTriangles<T4f>(triangles, triangleIter, mClothData.mNumCollisionTriangles); } else { // otherwise use the target triangles directly - generateTriangles<Simd4f>(triangles, targetTriangles, mClothData.mNumCollisionTriangles); + generateTriangles<T4f>(triangles, targetTriangles, mClothData.mNumCollisionTriangles); } - Simd4f positions[4]; + T4f positions[4]; float* __restrict pIt = mClothData.mCurParticles; float* __restrict pEnd = pIt + mClothData.mNumParticles * 4; @@ -1771,11 +1771,11 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const IterationState<Simd4f>& ImpulseAccumulator accum; collideTriangles(triangles, positions, accum); - Simd4f mask; + T4f mask; if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask)) continue; - Simd4f invNumCollisions = recip(accum.mNumCollisions); + T4f invNumCollisions = recip(accum.mNumCollisions); positions[0] = positions[0] + accum.mDeltaX * invNumCollisions; positions[1] = positions[1] + accum.mDeltaY * invNumCollisions; @@ -1795,60 +1795,60 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const IterationState<Simd4f>& mAllocator.deallocate(triangles); } -template <typename Simd4f> -void cloth::SwCollision<Simd4f>::collideTriangles(const TriangleData* __restrict triangles, Simd4f* __restrict curPos, +template <typename T4f> +void cloth::SwCollision<T4f>::collideTriangles(const TriangleData* __restrict triangles, T4f* __restrict curPos, ImpulseAccumulator& accum) { - Simd4f normalX, normalY, normalZ, normalD; + T4f normalX, normalY, normalZ, normalD; normalX = normalY = normalZ = normalD = gSimd4fZero; - Simd4f minSqrLength = gSimd4fFloatMax; + T4f minSqrLength = gSimd4fFloatMax; const TriangleData* __restrict tIt, *tEnd = triangles + mClothData.mNumCollisionTriangles; for (tIt = triangles; tIt != tEnd; ++tIt) { - Simd4f base = loadAligned(&tIt->base.x); - Simd4f edge0 = loadAligned(&tIt->edge0.x); - Simd4f edge1 = loadAligned(&tIt->edge1.x); - Simd4f normal = loadAligned(&tIt->normal.x); - Simd4f aux = loadAligned(&tIt->det); + T4f base = loadAligned(&tIt->base.x); + T4f edge0 = loadAligned(&tIt->edge0.x); + T4f edge1 = loadAligned(&tIt->edge1.x); + T4f normal = loadAligned(&tIt->normal.x); + T4f aux = loadAligned(&tIt->det); - Simd4f dx = curPos[0] - splat<0>(base); - Simd4f dy = curPos[1] - splat<1>(base); - Simd4f dz = curPos[2] - splat<2>(base); + T4f dx = curPos[0] - splat<0>(base); + T4f dy = curPos[1] - splat<1>(base); + T4f dz = curPos[2] - splat<2>(base); - Simd4f e0x = splat<0>(edge0); - Simd4f e0y = splat<1>(edge0); - Simd4f e0z = splat<2>(edge0); + T4f e0x = splat<0>(edge0); + T4f e0y = splat<1>(edge0); + T4f e0z = splat<2>(edge0); - Simd4f e1x = splat<0>(edge1); - Simd4f e1y = splat<1>(edge1); - Simd4f e1z = splat<2>(edge1); + T4f e1x = splat<0>(edge1); + T4f e1y = splat<1>(edge1); + T4f e1z = splat<2>(edge1); - Simd4f nx = splat<0>(normal); - Simd4f ny = splat<1>(normal); - Simd4f nz = splat<2>(normal); + T4f nx = splat<0>(normal); + T4f ny = splat<1>(normal); + T4f nz = splat<2>(normal); - Simd4f deltaDotEdge0 = dx * e0x + dy * e0y + dz * e0z; - Simd4f deltaDotEdge1 = dx * e1x + dy * e1y + dz * e1z; - Simd4f deltaDotNormal = dx * nx + dy * ny + dz * nz; + T4f deltaDotEdge0 = dx * e0x + dy * e0y + dz * e0z; + T4f deltaDotEdge1 = dx * e1x + dy * e1y + dz * e1z; + T4f deltaDotNormal = dx * nx + dy * ny + dz * nz; - Simd4f edge0DotEdge1 = splat<3>(base); - Simd4f edge0SqrLength = splat<3>(edge0); - Simd4f edge1SqrLength = splat<3>(edge1); + T4f edge0DotEdge1 = splat<3>(base); + T4f edge0SqrLength = splat<3>(edge0); + T4f edge1SqrLength = splat<3>(edge1); - Simd4f s = edge1SqrLength * deltaDotEdge0 - edge0DotEdge1 * deltaDotEdge1; - Simd4f t = edge0SqrLength * deltaDotEdge1 - edge0DotEdge1 * deltaDotEdge0; + T4f s = edge1SqrLength * deltaDotEdge0 - edge0DotEdge1 * deltaDotEdge1; + T4f t = edge0SqrLength * deltaDotEdge1 - edge0DotEdge1 * deltaDotEdge0; - Simd4f sPositive = s > gSimd4fZero; - Simd4f tPositive = t > gSimd4fZero; + T4f sPositive = s > gSimd4fZero; + T4f tPositive = t > gSimd4fZero; - Simd4f det = splat<0>(aux); + T4f det = splat<0>(aux); s = select(tPositive, s * det, deltaDotEdge0 * splat<2>(aux)); t = select(sPositive, t * det, deltaDotEdge1 * splat<3>(aux)); - Simd4f clamp = gSimd4fOne < s + t; - Simd4f numerator = edge1SqrLength - edge0DotEdge1 + deltaDotEdge0 - deltaDotEdge1; + T4f clamp = gSimd4fOne < s + t; + T4f numerator = edge1SqrLength - edge0DotEdge1 + deltaDotEdge0 - deltaDotEdge1; s = select(clamp, numerator * splat<1>(aux), s); @@ -1859,13 +1859,13 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const TriangleData* __restrict dy = dy - e0y * s - e1y * t; dz = dz - e0z * s - e1z * t; - Simd4f sqrLength = dx * dx + dy * dy + dz * dz; + T4f sqrLength = dx * dx + dy * dy + dz * dz; // slightly increase distance for colliding triangles - Simd4f slack = (gSimd4fZero > deltaDotNormal) & simd4f(1e-4f); + T4f slack = (gSimd4fZero > deltaDotNormal) & simd4f(1e-4f); sqrLength = sqrLength + sqrLength * slack; - Simd4f mask = sqrLength < minSqrLength; + T4f mask = sqrLength < minSqrLength; normalX = select(mask, nx, normalX); normalY = select(mask, ny, normalY); @@ -1875,7 +1875,7 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const TriangleData* __restrict minSqrLength = min(sqrLength, minSqrLength); } - Simd4f mask; + T4f mask; if (!anyGreater(gSimd4fZero, normalD, mask)) return; |