aboutsummaryrefslogtreecommitdiff
path: root/NvCloth/src/SwCollision.cpp
diff options
context:
space:
mode:
authorMarijn Tamis <[email protected]>2017-04-28 14:19:07 +0200
committerMarijn Tamis <[email protected]>2017-04-28 14:19:07 +0200
commitb350eb5f4d44e8448115796144375d79438d74ae (patch)
tree8e102e8c28f45a1b87bd335ceee4f33c3d4ee7c2 /NvCloth/src/SwCollision.cpp
parentAdd visual samples. (diff)
downloadnvcloth-b350eb5f4d44e8448115796144375d79438d74ae.tar.xz
nvcloth-b350eb5f4d44e8448115796144375d79438d74ae.zip
NvCloth 1.1.0 Release. (22041545)
Diffstat (limited to 'NvCloth/src/SwCollision.cpp')
-rw-r--r--NvCloth/src/SwCollision.cpp990
1 files changed, 495 insertions, 495 deletions
diff --git a/NvCloth/src/SwCollision.cpp b/NvCloth/src/SwCollision.cpp
index 3774795..89df8a5 100644
--- a/NvCloth/src/SwCollision.cpp
+++ b/NvCloth/src/SwCollision.cpp
@@ -42,8 +42,8 @@ using namespace nv;
using namespace physx;
// the particle trajectory needs to penetrate more than 0.2 * radius to trigger continuous collision
-template <typename Simd4f>
-const Simd4f cloth::SwCollision<Simd4f>::sSkeletonWidth = simd4f(cloth::sqr(1 - 0.2f) - 1);
+template <typename T4f>
+const T4f cloth::SwCollision<T4f>::sSkeletonWidth = simd4f(cloth::sqr(1 - 0.2f) - 1);
#if NV_SIMD_SSE2
const Simd4i cloth::Gather<Simd4i>::sIntSignBit = simd4i(0x80000000);
@@ -66,8 +66,8 @@ const Simd4fScalarFactory sGridExpand = simd4f(1e-4f);
const Simd4fTupleFactory sMinusFloatMaxXYZ = simd4f(-FLT_MAX, -FLT_MAX, -FLT_MAX, 0.0f);
#if PX_PROFILE || PX_DEBUG
-template <typename Simd4f>
-uint32_t horizontalSum(const Simd4f& x)
+template <typename T4f>
+uint32_t horizontalSum(const T4f& x)
{
const float* p = array(x);
return uint32_t(0.5f + p[0] + p[1] + p[2] + p[3]);
@@ -75,8 +75,8 @@ uint32_t horizontalSum(const Simd4f& x)
#endif
// 7 elements are written to ptr!
-template <typename Simd4f>
-void storeBounds(float* ptr, const cloth::BoundingBox<Simd4f>& bounds)
+template <typename T4f>
+void storeBounds(float* ptr, const cloth::BoundingBox<T4f>& bounds)
{
store(ptr, bounds.mLower);
store(ptr + 3, bounds.mUpper);
@@ -128,14 +128,14 @@ namespace nv
{
namespace cloth
{
-template <typename Simd4f>
-BoundingBox<Simd4f> expandBounds(const BoundingBox<Simd4f>& bbox, const SphereData* sIt, const SphereData* sEnd)
+template <typename T4f>
+BoundingBox<T4f> expandBounds(const BoundingBox<T4f>& bbox, const SphereData* sIt, const SphereData* sEnd)
{
- BoundingBox<Simd4f> result = bbox;
+ BoundingBox<T4f> result = bbox;
for (; sIt != sEnd; ++sIt)
{
- Simd4f p = loadAligned(array(sIt->center));
- Simd4f r = splat<3>(p);
+ T4f p = loadAligned(array(sIt->center));
+ T4f r = splat<3>(p);
result.mLower = min(result.mLower, p - r);
result.mUpper = max(result.mUpper, p + r);
}
@@ -146,8 +146,8 @@ BoundingBox<Simd4f> expandBounds(const BoundingBox<Simd4f>& bbox, const SphereDa
namespace
{
-template <typename Simd4f, typename SrcIterator>
-void generateSpheres(Simd4f* dIt, const SrcIterator& src, uint32_t count)
+template <typename T4f, typename SrcIterator>
+void generateSpheres(T4f* dIt, const SrcIterator& src, uint32_t count)
{
// have to copy out iterator to ensure alignment is maintained
for (SrcIterator sIt = src; 0 < count--; ++sIt, ++dIt)
@@ -192,41 +192,41 @@ void generateCones(cloth::ConeData* dst, const cloth::SphereData* sourceSpheres,
}
}
-template <typename Simd4f, typename SrcIterator>
-void generatePlanes(Simd4f* dIt, const SrcIterator& src, uint32_t count)
+template <typename T4f, typename SrcIterator>
+void generatePlanes(T4f* dIt, const SrcIterator& src, uint32_t count)
{
// have to copy out iterator to ensure alignment is maintained
for (SrcIterator sIt = src; 0 < count--; ++sIt, ++dIt)
*dIt = *sIt;
}
-template <typename Simd4f, typename SrcIterator>
+template <typename T4f, typename SrcIterator>
void generateTriangles(cloth::TriangleData* dIt, const SrcIterator& src, uint32_t count)
{
// have to copy out iterator to ensure alignment is maintained
for (SrcIterator sIt = src; 0 < count--; ++dIt)
{
- Simd4f p0 = *sIt;
+ T4f p0 = *sIt;
++sIt;
- Simd4f p1 = *sIt;
+ T4f p1 = *sIt;
++sIt;
- Simd4f p2 = *sIt;
+ T4f p2 = *sIt;
++sIt;
- Simd4f edge0 = p1 - p0;
- Simd4f edge1 = p2 - p0;
- Simd4f normal = cross3(edge0, edge1);
+ T4f edge0 = p1 - p0;
+ T4f edge1 = p2 - p0;
+ T4f normal = cross3(edge0, edge1);
- Simd4f edge0SqrLength = dot3(edge0, edge0);
- Simd4f edge1SqrLength = dot3(edge1, edge1);
- Simd4f edge0DotEdge1 = dot3(edge0, edge1);
- Simd4f normalInvLength = rsqrt(dot3(normal, normal));
+ T4f edge0SqrLength = dot3(edge0, edge0);
+ T4f edge1SqrLength = dot3(edge1, edge1);
+ T4f edge0DotEdge1 = dot3(edge0, edge1);
+ T4f normalInvLength = rsqrt(dot3(normal, normal));
- Simd4f det = edge0SqrLength * edge1SqrLength - edge0DotEdge1 * edge0DotEdge1;
- Simd4f denom = edge0SqrLength + edge1SqrLength - edge0DotEdge1 - edge0DotEdge1;
+ T4f det = edge0SqrLength * edge1SqrLength - edge0DotEdge1 * edge0DotEdge1;
+ T4f denom = edge0SqrLength + edge1SqrLength - edge0DotEdge1 - edge0DotEdge1;
// there are definitely faster ways...
- Simd4f aux = select(sMaskX, det, denom);
+ T4f aux = select(sMaskX, det, denom);
aux = select(sMaskZ, edge0SqrLength, aux);
aux = select(sMaskW, edge1SqrLength, aux);
@@ -240,14 +240,14 @@ void generateTriangles(cloth::TriangleData* dIt, const SrcIterator& src, uint32_
} // namespace
-template <typename Simd4f>
-cloth::SwCollision<Simd4f>::CollisionData::CollisionData()
+template <typename T4f>
+cloth::SwCollision<T4f>::CollisionData::CollisionData()
: mSpheres(0), mCones(0)
{
}
-template <typename Simd4f>
-cloth::SwCollision<Simd4f>::SwCollision(SwClothData& clothData, SwKernelAllocator& alloc)
+template <typename T4f>
+cloth::SwCollision<T4f>::SwCollision(SwClothData& clothData, SwKernelAllocator& alloc)
: mClothData(clothData), mAllocator(alloc)
{
allocate(mCurData);
@@ -256,22 +256,22 @@ cloth::SwCollision<Simd4f>::SwCollision(SwClothData& clothData, SwKernelAllocato
{
allocate(mPrevData);
- generateSpheres(reinterpret_cast<Simd4f*>(mPrevData.mSpheres),
- reinterpret_cast<const Simd4f*>(clothData.mStartCollisionSpheres), clothData.mNumSpheres);
+ generateSpheres(reinterpret_cast<T4f*>(mPrevData.mSpheres),
+ reinterpret_cast<const T4f*>(clothData.mStartCollisionSpheres), clothData.mNumSpheres);
generateCones(mPrevData.mCones, mPrevData.mSpheres, clothData.mCapsuleIndices, clothData.mNumCapsules);
}
}
-template <typename Simd4f>
-cloth::SwCollision<Simd4f>::~SwCollision()
+template <typename T4f>
+cloth::SwCollision<T4f>::~SwCollision()
{
deallocate(mCurData);
deallocate(mPrevData);
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::operator()(const IterationState<Simd4f>& state)
+template <typename T4f>
+void cloth::SwCollision<T4f>::operator()(const IterationState<T4f>& state)
{
mNumCollisions = 0;
@@ -285,20 +285,20 @@ void cloth::SwCollision<Simd4f>::operator()(const IterationState<Simd4f>& state)
bool lastIteration = state.mRemainingIterations == 1;
- const Simd4f* targetSpheres = reinterpret_cast<const Simd4f*>(mClothData.mTargetCollisionSpheres);
+ const T4f* targetSpheres = reinterpret_cast<const T4f*>(mClothData.mTargetCollisionSpheres);
// generate sphere and cone collision data
if (!lastIteration)
{
// interpolate spheres
- LerpIterator<Simd4f, const Simd4f*> pIter(reinterpret_cast<const Simd4f*>(mClothData.mStartCollisionSpheres),
+ LerpIterator<T4f, const T4f*> pIter(reinterpret_cast<const T4f*>(mClothData.mStartCollisionSpheres),
targetSpheres, state.getCurrentAlpha());
- generateSpheres(reinterpret_cast<Simd4f*>(mCurData.mSpheres), pIter, mClothData.mNumSpheres);
+ generateSpheres(reinterpret_cast<T4f*>(mCurData.mSpheres), pIter, mClothData.mNumSpheres);
}
else
{
// otherwise use the target spheres directly
- generateSpheres(reinterpret_cast<Simd4f*>(mCurData.mSpheres), targetSpheres, mClothData.mNumSpheres);
+ generateSpheres(reinterpret_cast<T4f*>(mCurData.mSpheres), targetSpheres, mClothData.mNumSpheres);
}
// generate cones even if test below fails because
@@ -323,8 +323,8 @@ void cloth::SwCollision<Simd4f>::operator()(const IterationState<Simd4f>& state)
shdfnd::swap(mCurData, mPrevData);
}
-template <typename Simd4f>
-size_t cloth::SwCollision<Simd4f>::estimateTemporaryMemory(const SwCloth& cloth)
+template <typename T4f>
+size_t cloth::SwCollision<T4f>::estimateTemporaryMemory(const SwCloth& cloth)
{
size_t numTriangles = cloth.mStartCollisionTriangles.size();
size_t numPlanes = cloth.mStartCollisionPlanes.size();
@@ -335,8 +335,8 @@ size_t cloth::SwCollision<Simd4f>::estimateTemporaryMemory(const SwCloth& cloth)
return std::max(kTriangleDataSize, kPlaneDataSize);
}
-template <typename Simd4f>
-size_t cloth::SwCollision<Simd4f>::estimatePersistentMemory(const SwCloth& cloth)
+template <typename T4f>
+size_t cloth::SwCollision<T4f>::estimatePersistentMemory(const SwCloth& cloth)
{
size_t numCapsules = cloth.mCapsuleIndices.size();
size_t numSpheres = cloth.mStartCollisionSpheres.size();
@@ -347,74 +347,74 @@ size_t cloth::SwCollision<Simd4f>::estimatePersistentMemory(const SwCloth& cloth
return sphereDataSize + coneDataSize;
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::allocate(CollisionData& data)
+template <typename T4f>
+void cloth::SwCollision<T4f>::allocate(CollisionData& data)
{
data.mSpheres = static_cast<SphereData*>(mAllocator.allocate(sizeof(SphereData) * mClothData.mNumSpheres));
data.mCones = static_cast<ConeData*>(mAllocator.allocate(sizeof(ConeData) * mClothData.mNumCapsules));
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::deallocate(const CollisionData& data)
+template <typename T4f>
+void cloth::SwCollision<T4f>::deallocate(const CollisionData& data)
{
mAllocator.deallocate(data.mSpheres);
mAllocator.deallocate(data.mCones);
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::computeBounds()
+template <typename T4f>
+void cloth::SwCollision<T4f>::computeBounds()
{
NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::computeBounds", /*ProfileContext::None*/ 0);
- Simd4f* prevIt = reinterpret_cast<Simd4f*>(mClothData.mPrevParticles);
- Simd4f* curIt = reinterpret_cast<Simd4f*>(mClothData.mCurParticles);
- Simd4f* curEnd = curIt + mClothData.mNumParticles;
- Simd4f floatMaxXYZ = -static_cast<Simd4f>(sMinusFloatMaxXYZ);
+ T4f* prevIt = reinterpret_cast<T4f*>(mClothData.mPrevParticles);
+ T4f* curIt = reinterpret_cast<T4f*>(mClothData.mCurParticles);
+ T4f* curEnd = curIt + mClothData.mNumParticles;
+ T4f floatMaxXYZ = -static_cast<T4f>(sMinusFloatMaxXYZ);
- Simd4f lower = simd4f(FLT_MAX), upper = -lower;
+ T4f lower = simd4f(FLT_MAX), upper = -lower;
for (; curIt < curEnd; ++curIt, ++prevIt)
{
- Simd4f current = *curIt;
+ T4f current = *curIt;
lower = min(lower, current);
upper = max(upper, current);
// if (current.w > 0) current.w = previous.w
*curIt = select(current > floatMaxXYZ, *prevIt, current);
}
- BoundingBox<Simd4f> curBounds;
+ BoundingBox<T4f> curBounds;
curBounds.mLower = lower;
curBounds.mUpper = upper;
// don't change this order, storeBounds writes 7 floats
- BoundingBox<Simd4f> prevBounds = loadBounds<Simd4f>(mClothData.mCurBounds);
+ BoundingBox<T4f> prevBounds = loadBounds<T4f>(mClothData.mCurBounds);
storeBounds(mClothData.mCurBounds, curBounds);
storeBounds(mClothData.mPrevBounds, prevBounds);
}
namespace
{
-template <typename Simd4i>
-Simd4i andNotIsZero(const Simd4i& left, const Simd4i& right)
+template <typename T4i>
+T4i andNotIsZero(const T4i& left, const T4i& right)
{
return (left & ~right) == gSimd4iZero;
}
}
// build per-axis mask arrays of spheres on the right/left of grid cell
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::buildSphereAcceleration(const SphereData* sIt)
+template <typename T4f>
+void cloth::SwCollision<T4f>::buildSphereAcceleration(const SphereData* sIt)
{
static const int maxIndex = sGridSize - 1;
const SphereData* sEnd = sIt + mClothData.mNumSpheres;
for (uint32_t mask = 0x1; sIt != sEnd; ++sIt, mask <<= 1)
{
- Simd4f sphere = loadAligned(array(sIt->center));
- Simd4f radius = splat<3>(sphere);
+ T4f sphere = loadAligned(array(sIt->center));
+ T4f radius = splat<3>(sphere);
- Simd4i first = intFloor(max((sphere - radius) * mGridScale + mGridBias, gSimd4fZero));
- Simd4i last = intFloor(min((sphere + radius) * mGridScale + mGridBias, sGridLength));
+ T4i first = intFloor(max((sphere - radius) * mGridScale + mGridBias, gSimd4fZero));
+ T4i last = intFloor(min((sphere + radius) * mGridScale + mGridBias, sGridLength));
const int* firstIdx = array(first);
const int* lastIdx = array(last);
@@ -434,8 +434,8 @@ void cloth::SwCollision<Simd4f>::buildSphereAcceleration(const SphereData* sIt)
}
// generate cone masks from sphere masks
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::buildConeAcceleration()
+template <typename T4f>
+void cloth::SwCollision<T4f>::buildConeAcceleration()
{
const ConeData* coneIt = mCurData.mCones;
const ConeData* coneEnd = coneIt + mClothData.mNumCapsules;
@@ -456,8 +456,8 @@ void cloth::SwCollision<Simd4f>::buildConeAcceleration()
}
// convert right/left mask arrays into single overlap array
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::mergeAcceleration(uint32_t* firstIt)
+template <typename T4f>
+void cloth::SwCollision<T4f>::mergeAcceleration(uint32_t* firstIt)
{
uint32_t* firstEnd = firstIt + 3 * sGridSize;
uint32_t* lastIt = firstEnd;
@@ -466,28 +466,28 @@ void cloth::SwCollision<Simd4f>::mergeAcceleration(uint32_t* firstIt)
}
// build mask of spheres/cones touching a regular grid along each axis
-template <typename Simd4f>
-bool cloth::SwCollision<Simd4f>::buildAcceleration()
+template <typename T4f>
+bool cloth::SwCollision<T4f>::buildAcceleration()
{
// determine sphere bbox
- BoundingBox<Simd4f> sphereBounds =
- expandBounds(emptyBounds<Simd4f>(), mCurData.mSpheres, mCurData.mSpheres + mClothData.mNumSpheres);
- BoundingBox<Simd4f> particleBounds = loadBounds<Simd4f>(mClothData.mCurBounds);
+ BoundingBox<T4f> sphereBounds =
+ expandBounds(emptyBounds<T4f>(), mCurData.mSpheres, mCurData.mSpheres + mClothData.mNumSpheres);
+ BoundingBox<T4f> particleBounds = loadBounds<T4f>(mClothData.mCurBounds);
if (mClothData.mEnableContinuousCollision)
{
sphereBounds = expandBounds(sphereBounds, mPrevData.mSpheres, mPrevData.mSpheres + mClothData.mNumSpheres);
- particleBounds = expandBounds(particleBounds, loadBounds<Simd4f>(mClothData.mPrevBounds));
+ particleBounds = expandBounds(particleBounds, loadBounds<T4f>(mClothData.mPrevBounds));
}
- BoundingBox<Simd4f> bounds = intersectBounds(sphereBounds, particleBounds);
- Simd4f edgeLength = (bounds.mUpper - bounds.mLower) & ~static_cast<Simd4f>(sMaskW);
+ BoundingBox<T4f> bounds = intersectBounds(sphereBounds, particleBounds);
+ T4f edgeLength = (bounds.mUpper - bounds.mLower) & ~static_cast<T4f>(sMaskW);
if (!allGreaterEqual(edgeLength, gSimd4fZero))
return false;
// calculate an expanded bounds to account for numerical inaccuracy
- const Simd4f expandedLower = bounds.mLower - abs(bounds.mLower) * sGridExpand;
- const Simd4f expandedUpper = bounds.mUpper + abs(bounds.mUpper) * sGridExpand;
- const Simd4f expandedEdgeLength = max(expandedUpper - expandedLower, gSimd4fEpsilon);
+ const T4f expandedLower = bounds.mLower - abs(bounds.mLower) * sGridExpand;
+ const T4f expandedUpper = bounds.mUpper + abs(bounds.mUpper) * sGridExpand;
+ const T4f expandedEdgeLength = max(expandedUpper - expandedLower, gSimd4fEpsilon);
// make grid minimal thickness and strict upper bound of spheres
mGridScale = sGridLength * recip<1>(expandedEdgeLength);
@@ -514,8 +514,8 @@ bool cloth::SwCollision<Simd4f>::buildAcceleration()
#define FORCE_INLINE inline __attribute__((always_inline))
#endif
-template <typename Simd4f>
-FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask& cloth::SwCollision<Simd4f>::ShapeMask::
+template <typename T4f>
+FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask& cloth::SwCollision<T4f>::ShapeMask::
operator = (const ShapeMask& right)
{
mCones = right.mCones;
@@ -523,8 +523,8 @@ operator = (const ShapeMask& right)
return *this;
}
-template <typename Simd4f>
-FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask& cloth::SwCollision<Simd4f>::ShapeMask::
+template <typename T4f>
+FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask& cloth::SwCollision<T4f>::ShapeMask::
operator &= (const ShapeMask& right)
{
mCones = mCones & right.mCones;
@@ -532,12 +532,12 @@ operator &= (const ShapeMask& right)
return *this;
}
-template <typename Simd4f>
-FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask
-cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f& position, const Simd4i* __restrict sphereGrid,
- const Simd4i* __restrict coneGrid)
+template <typename T4f>
+FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask
+cloth::SwCollision<T4f>::getShapeMask(const T4f& position, const T4i* __restrict sphereGrid,
+ const T4i* __restrict coneGrid)
{
- Gather<Simd4i> gather(intFloor(position));
+ Gather<T4i> gather(intFloor(position));
ShapeMask result;
result.mCones = gather(coneGrid);
@@ -546,13 +546,13 @@ cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f& position, const Simd4i* _
}
// lookup acceleration structure and return mask of potential intersectors
-template <typename Simd4f>
-FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask
-cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f* __restrict positions) const
+template <typename T4f>
+FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask
+cloth::SwCollision<T4f>::getShapeMask(const T4f* __restrict positions) const
{
- Simd4f posX = positions[0] * splat<0>(mGridScale) + splat<0>(mGridBias);
- Simd4f posY = positions[1] * splat<1>(mGridScale) + splat<1>(mGridBias);
- Simd4f posZ = positions[2] * splat<2>(mGridScale) + splat<2>(mGridBias);
+ T4f posX = positions[0] * splat<0>(mGridScale) + splat<0>(mGridBias);
+ T4f posY = positions[1] * splat<1>(mGridScale) + splat<1>(mGridBias);
+ T4f posZ = positions[2] * splat<2>(mGridScale) + splat<2>(mGridBias);
ShapeMask result = getShapeMask(posX, mSphereGrid, mConeGrid);
result &= getShapeMask(posY, mSphereGrid + 2, mConeGrid + 2);
@@ -562,38 +562,38 @@ cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f* __restrict positions) con
}
// lookup acceleration structure and return mask of potential intersectors
-template <typename Simd4f>
-FORCE_INLINE typename cloth::SwCollision<Simd4f>::ShapeMask
-cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f* __restrict prevPos, const Simd4f* __restrict curPos) const
+template <typename T4f>
+FORCE_INLINE typename cloth::SwCollision<T4f>::ShapeMask
+cloth::SwCollision<T4f>::getShapeMask(const T4f* __restrict prevPos, const T4f* __restrict curPos) const
{
- Simd4f scaleX = splat<0>(mGridScale);
- Simd4f scaleY = splat<1>(mGridScale);
- Simd4f scaleZ = splat<2>(mGridScale);
+ T4f scaleX = splat<0>(mGridScale);
+ T4f scaleY = splat<1>(mGridScale);
+ T4f scaleZ = splat<2>(mGridScale);
- Simd4f biasX = splat<0>(mGridBias);
- Simd4f biasY = splat<1>(mGridBias);
- Simd4f biasZ = splat<2>(mGridBias);
+ T4f biasX = splat<0>(mGridBias);
+ T4f biasY = splat<1>(mGridBias);
+ T4f biasZ = splat<2>(mGridBias);
- Simd4f prevX = prevPos[0] * scaleX + biasX;
- Simd4f prevY = prevPos[1] * scaleY + biasY;
- Simd4f prevZ = prevPos[2] * scaleZ + biasZ;
+ T4f prevX = prevPos[0] * scaleX + biasX;
+ T4f prevY = prevPos[1] * scaleY + biasY;
+ T4f prevZ = prevPos[2] * scaleZ + biasZ;
- Simd4f curX = curPos[0] * scaleX + biasX;
- Simd4f curY = curPos[1] * scaleY + biasY;
- Simd4f curZ = curPos[2] * scaleZ + biasZ;
+ T4f curX = curPos[0] * scaleX + biasX;
+ T4f curY = curPos[1] * scaleY + biasY;
+ T4f curZ = curPos[2] * scaleZ + biasZ;
- Simd4f maxX = min(max(prevX, curX), sGridLength);
- Simd4f maxY = min(max(prevY, curY), sGridLength);
- Simd4f maxZ = min(max(prevZ, curZ), sGridLength);
+ T4f maxX = min(max(prevX, curX), sGridLength);
+ T4f maxY = min(max(prevY, curY), sGridLength);
+ T4f maxZ = min(max(prevZ, curZ), sGridLength);
ShapeMask result = getShapeMask(maxX, mSphereGrid, mConeGrid);
result &= getShapeMask(maxY, mSphereGrid + 2, mConeGrid + 2);
result &= getShapeMask(maxZ, mSphereGrid + 4, mConeGrid + 4);
- Simd4f zero = gSimd4fZero;
- Simd4f minX = max(min(prevX, curX), zero);
- Simd4f minY = max(min(prevY, curY), zero);
- Simd4f minZ = max(min(prevZ, curZ), zero);
+ T4f zero = gSimd4fZero;
+ T4f minX = max(min(prevX, curX), zero);
+ T4f minY = max(min(prevY, curY), zero);
+ T4f minZ = max(min(prevZ, curZ), zero);
result &= getShapeMask(minX, mSphereGrid + 6, mConeGrid + 6);
result &= getShapeMask(minY, mSphereGrid + 8, mConeGrid + 8);
@@ -602,8 +602,8 @@ cloth::SwCollision<Simd4f>::getShapeMask(const Simd4f* __restrict prevPos, const
return result;
}
-template <typename Simd4f>
-struct cloth::SwCollision<Simd4f>::ImpulseAccumulator
+template <typename T4f>
+struct cloth::SwCollision<T4f>::ImpulseAccumulator
{
ImpulseAccumulator()
: mDeltaX(gSimd4fZero)
@@ -616,21 +616,21 @@ struct cloth::SwCollision<Simd4f>::ImpulseAccumulator
{
}
- void add(const Simd4f& x, const Simd4f& y, const Simd4f& z, const Simd4f& scale, const Simd4f& mask)
+ void add(const T4f& x, const T4f& y, const T4f& z, const T4f& scale, const T4f& mask)
{
NV_CLOTH_ASSERT(allTrue((mask & x) == (mask & x)));
NV_CLOTH_ASSERT(allTrue((mask & y) == (mask & y)));
NV_CLOTH_ASSERT(allTrue((mask & z) == (mask & z)));
NV_CLOTH_ASSERT(allTrue((mask & scale) == (mask & scale)));
- Simd4f maskedScale = scale & mask;
+ T4f maskedScale = scale & mask;
mDeltaX = mDeltaX + x * maskedScale;
mDeltaY = mDeltaY + y * maskedScale;
mDeltaZ = mDeltaZ + z * maskedScale;
mNumCollisions = mNumCollisions + (gSimd4fOne & mask);
}
- void addVelocity(const Simd4f& vx, const Simd4f& vy, const Simd4f& vz, const Simd4f& mask)
+ void addVelocity(const T4f& vx, const T4f& vy, const T4f& vz, const T4f& mask)
{
NV_CLOTH_ASSERT(allTrue((mask & vx) == (mask & vx)));
NV_CLOTH_ASSERT(allTrue((mask & vy) == (mask & vy)));
@@ -641,34 +641,34 @@ struct cloth::SwCollision<Simd4f>::ImpulseAccumulator
mVelZ = mVelZ + (vz & mask);
}
- void subtract(const Simd4f& x, const Simd4f& y, const Simd4f& z, const Simd4f& scale, const Simd4f& mask)
+ void subtract(const T4f& x, const T4f& y, const T4f& z, const T4f& scale, const T4f& mask)
{
NV_CLOTH_ASSERT(allTrue((mask & x) == (mask & x)));
NV_CLOTH_ASSERT(allTrue((mask & y) == (mask & y)));
NV_CLOTH_ASSERT(allTrue((mask & z) == (mask & z)));
NV_CLOTH_ASSERT(allTrue((mask & scale) == (mask & scale)));
- Simd4f maskedScale = scale & mask;
+ T4f maskedScale = scale & mask;
mDeltaX = mDeltaX - x * maskedScale;
mDeltaY = mDeltaY - y * maskedScale;
mDeltaZ = mDeltaZ - z * maskedScale;
mNumCollisions = mNumCollisions + (gSimd4fOne & mask);
}
- Simd4f mDeltaX, mDeltaY, mDeltaZ;
- Simd4f mVelX, mVelY, mVelZ;
- Simd4f mNumCollisions;
+ T4f mDeltaX, mDeltaY, mDeltaZ;
+ T4f mVelX, mVelY, mVelZ;
+ T4f mNumCollisions;
};
-template <typename Simd4f>
-FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& sphereMask, const Simd4f* positions,
+template <typename T4f>
+FORCE_INLINE void cloth::SwCollision<T4f>::collideSpheres(const T4i& sphereMask, const T4f* positions,
ImpulseAccumulator& accum) const
{
const float* __restrict spherePtr = array(mCurData.mSpheres->center);
bool frictionEnabled = mClothData.mFrictionScale > 0.0f;
- Simd4i mask4 = horizontalOr(sphereMask);
+ T4i mask4 = horizontalOr(sphereMask);
uint32_t mask = uint32_t(array(mask4)[0]);
while (mask)
{
@@ -676,16 +676,16 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher
uint32_t offset = findBitSet(mask & ~test) * sizeof(SphereData);
mask = mask & test;
- Simd4f sphere = loadAligned(spherePtr, offset);
+ T4f sphere = loadAligned(spherePtr, offset);
- Simd4f deltaX = positions[0] - splat<0>(sphere);
- Simd4f deltaY = positions[1] - splat<1>(sphere);
- Simd4f deltaZ = positions[2] - splat<2>(sphere);
+ T4f deltaX = positions[0] - splat<0>(sphere);
+ T4f deltaY = positions[1] - splat<1>(sphere);
+ T4f deltaZ = positions[2] - splat<2>(sphere);
- Simd4f sqrDistance = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ;
- Simd4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * splat<3>(sphere);
+ T4f sqrDistance = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ;
+ T4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * splat<3>(sphere);
- Simd4f contactMask;
+ T4f contactMask;
if (!anyGreater(gSimd4fZero, negativeScale, contactMask))
continue;
@@ -696,17 +696,17 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher
// load previous sphere pos
const float* __restrict prevSpherePtr = array(mPrevData.mSpheres->center);
- Simd4f prevSphere = loadAligned(prevSpherePtr, offset);
- Simd4f velocity = sphere - prevSphere;
+ T4f prevSphere = loadAligned(prevSpherePtr, offset);
+ T4f velocity = sphere - prevSphere;
accum.addVelocity(splat<0>(velocity), splat<1>(velocity), splat<2>(velocity), contactMask);
}
}
}
-template <typename Simd4f>
-FORCE_INLINE typename cloth::SwCollision<Simd4f>::Simd4i
-cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, ImpulseAccumulator& accum) const
+template <typename T4f>
+FORCE_INLINE typename cloth::SwCollision<T4f>::T4i
+cloth::SwCollision<T4f>::collideCones(const T4f* __restrict positions, ImpulseAccumulator& accum) const
{
const float* __restrict centerPtr = array(mCurData.mCones->center);
const float* __restrict axisPtr = array(mCurData.mCones->axis);
@@ -715,7 +715,7 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp
bool frictionEnabled = mClothData.mFrictionScale > 0.0f;
ShapeMask shapeMask = getShapeMask(positions);
- Simd4i mask4 = horizontalOr(shapeMask.mCones);
+ T4i mask4 = horizontalOr(shapeMask.mCones);
uint32_t mask = uint32_t(array(mask4)[0]);
while (mask)
{
@@ -724,35 +724,35 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp
uint32_t offset = coneIndex * sizeof(ConeData);
mask = mask & test;
- Simd4i test4 = mask4 - gSimd4iOne;
- Simd4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4));
+ T4i test4 = mask4 - gSimd4iOne;
+ T4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4));
mask4 = mask4 & test4;
- Simd4f center = loadAligned(centerPtr, offset);
+ T4f center = loadAligned(centerPtr, offset);
- Simd4f deltaX = positions[0] - splat<0>(center);
- Simd4f deltaY = positions[1] - splat<1>(center);
- Simd4f deltaZ = positions[2] - splat<2>(center);
+ T4f deltaX = positions[0] - splat<0>(center);
+ T4f deltaY = positions[1] - splat<1>(center);
+ T4f deltaZ = positions[2] - splat<2>(center);
- Simd4f axis = loadAligned(axisPtr, offset);
+ T4f axis = loadAligned(axisPtr, offset);
- Simd4f axisX = splat<0>(axis);
- Simd4f axisY = splat<1>(axis);
- Simd4f axisZ = splat<2>(axis);
- Simd4f slope = splat<3>(axis);
+ T4f axisX = splat<0>(axis);
+ T4f axisY = splat<1>(axis);
+ T4f axisZ = splat<2>(axis);
+ T4f slope = splat<3>(axis);
- Simd4f dot = deltaX * axisX + deltaY * axisY + deltaZ * axisZ;
- Simd4f radius = dot * slope + splat<3>(center);
+ T4f dot = deltaX * axisX + deltaY * axisY + deltaZ * axisZ;
+ T4f radius = dot * slope + splat<3>(center);
// set radius to zero if cone is culled
radius = max(radius, gSimd4fZero) & ~culled;
- Simd4f sqrDistance = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ - dot * dot;
+ T4f sqrDistance = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ - dot * dot;
- Simd4i auxiliary = loadAligned(auxiliaryPtr, offset);
- Simd4i bothMask = splat<3>(auxiliary);
+ T4i auxiliary = loadAligned(auxiliaryPtr, offset);
+ T4i bothMask = splat<3>(auxiliary);
- Simd4f contactMask;
+ T4f contactMask;
if (!anyGreater(radius * radius, sqrDistance, contactMask))
{
// cone only culled when spheres culled, ok to clear those too
@@ -764,19 +764,19 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp
// making sqrDistance negative when point lies on the cone axis
sqrDistance = max(sqrDistance, gSimd4fEpsilon);
- Simd4f invDistance = rsqrt(sqrDistance);
- Simd4f base = dot + slope * sqrDistance * invDistance;
+ T4f invDistance = rsqrt(sqrDistance);
+ T4f base = dot + slope * sqrDistance * invDistance;
// force left/rightMask to false if not inside cone
base = base & contactMask;
- Simd4f halfLength = splat<1>(simd4f(auxiliary));
- Simd4i leftMask = simd4i(base < -halfLength);
- Simd4i rightMask = simd4i(base > halfLength);
+ T4f halfLength = splat<1>(simd4f(auxiliary));
+ T4i leftMask = simd4i(base < -halfLength);
+ T4i rightMask = simd4i(base > halfLength);
// we use both mask because of the early out above.
- Simd4i firstMask = splat<2>(auxiliary);
- Simd4i secondMask = firstMask ^ bothMask;
+ T4i firstMask = splat<2>(auxiliary);
+ T4i secondMask = firstMask ^ bothMask;
shapeMask.mSpheres = shapeMask.mSpheres & ~(firstMask & ~leftMask);
shapeMask.mSpheres = shapeMask.mSpheres & ~(secondMask & ~rightMask);
@@ -784,8 +784,8 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp
deltaY = deltaY - base * axisY;
deltaZ = deltaZ - base * axisZ;
- Simd4f sqrCosine = splat<0>(simd4f(auxiliary));
- Simd4f scale = radius * invDistance * sqrCosine - sqrCosine;
+ T4f sqrCosine = splat<0>(simd4f(auxiliary));
+ T4f scale = radius * invDistance * sqrCosine - sqrCosine;
contactMask = contactMask & ~simd4f(leftMask | rightMask);
@@ -804,23 +804,23 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp
// todo: could pre-compute sphere velocities or it might be
// faster to compute cur/prev sphere positions directly
- Simd4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData));
- Simd4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData));
+ T4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData));
+ T4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData));
- Simd4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData));
- Simd4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData));
+ T4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData));
+ T4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData));
- Simd4f v0 = s0p1 - s0p0;
- Simd4f v1 = s1p1 - s1p0;
- Simd4f vd = v1 - v0;
+ T4f v0 = s0p1 - s0p0;
+ T4f v1 = s1p1 - s1p0;
+ T4f vd = v1 - v0;
// dot is in the range -1 to 1, scale and bias to 0 to 1
dot = dot * gSimd4fHalf + gSimd4fHalf;
// interpolate velocity at contact points
- Simd4f vx = splat<0>(v0) + dot * splat<0>(vd);
- Simd4f vy = splat<1>(v0) + dot * splat<1>(vd);
- Simd4f vz = splat<2>(v0) + dot * splat<2>(vd);
+ T4f vx = splat<0>(v0) + dot * splat<0>(vd);
+ T4f vy = splat<1>(v0) + dot * splat<1>(vd);
+ T4f vz = splat<2>(v0) + dot * splat<2>(vd);
accum.addVelocity(vx, vy, vz, contactMask);
}
@@ -829,16 +829,16 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict positions, Imp
return shapeMask.mSpheres;
}
-template <typename Simd4f>
-FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& sphereMask, const Simd4f* __restrict prevPos,
- Simd4f* __restrict curPos, ImpulseAccumulator& accum) const
+template <typename T4f>
+FORCE_INLINE void cloth::SwCollision<T4f>::collideSpheres(const T4i& sphereMask, const T4f* __restrict prevPos,
+ T4f* __restrict curPos, ImpulseAccumulator& accum) const
{
const float* __restrict prevSpheres = array(mPrevData.mSpheres->center);
const float* __restrict curSpheres = array(mCurData.mSpheres->center);
bool frictionEnabled = mClothData.mFrictionScale > 0.0f;
- Simd4i mask4 = horizontalOr(sphereMask);
+ T4i mask4 = horizontalOr(sphereMask);
uint32_t mask = uint32_t(array(mask4)[0]);
while (mask)
{
@@ -846,53 +846,53 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher
uint32_t offset = findBitSet(mask & ~test) * sizeof(SphereData);
mask = mask & test;
- Simd4f prevSphere = loadAligned(prevSpheres, offset);
- Simd4f prevX = prevPos[0] - splat<0>(prevSphere);
- Simd4f prevY = prevPos[1] - splat<1>(prevSphere);
- Simd4f prevZ = prevPos[2] - splat<2>(prevSphere);
- Simd4f prevRadius = splat<3>(prevSphere);
+ T4f prevSphere = loadAligned(prevSpheres, offset);
+ T4f prevX = prevPos[0] - splat<0>(prevSphere);
+ T4f prevY = prevPos[1] - splat<1>(prevSphere);
+ T4f prevZ = prevPos[2] - splat<2>(prevSphere);
+ T4f prevRadius = splat<3>(prevSphere);
- Simd4f curSphere = loadAligned(curSpheres, offset);
- Simd4f curX = curPos[0] - splat<0>(curSphere);
- Simd4f curY = curPos[1] - splat<1>(curSphere);
- Simd4f curZ = curPos[2] - splat<2>(curSphere);
- Simd4f curRadius = splat<3>(curSphere);
+ T4f curSphere = loadAligned(curSpheres, offset);
+ T4f curX = curPos[0] - splat<0>(curSphere);
+ T4f curY = curPos[1] - splat<1>(curSphere);
+ T4f curZ = curPos[2] - splat<2>(curSphere);
+ T4f curRadius = splat<3>(curSphere);
- Simd4f sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ;
+ T4f sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ;
- Simd4f dotPrevPrev = prevX * prevX + prevY * prevY + prevZ * prevZ - prevRadius * prevRadius;
- Simd4f dotPrevCur = prevX * curX + prevY * curY + prevZ * curZ - prevRadius * curRadius;
- Simd4f dotCurCur = sqrDistance - curRadius * curRadius;
+ T4f dotPrevPrev = prevX * prevX + prevY * prevY + prevZ * prevZ - prevRadius * prevRadius;
+ T4f dotPrevCur = prevX * curX + prevY * curY + prevZ * curZ - prevRadius * curRadius;
+ T4f dotCurCur = sqrDistance - curRadius * curRadius;
- Simd4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev;
- Simd4f sqrtD = sqrt(discriminant);
- Simd4f halfB = dotPrevCur - dotPrevPrev;
- Simd4f minusA = dotPrevCur - dotCurCur + halfB;
+ T4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev;
+ T4f sqrtD = sqrt(discriminant);
+ T4f halfB = dotPrevCur - dotPrevPrev;
+ T4f minusA = dotPrevCur - dotCurCur + halfB;
// time of impact or 0 if prevPos inside sphere
- Simd4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD);
- Simd4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD);
+ T4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD);
+ T4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD);
// skip continuous collision if the (un-clamped) particle
// trajectory only touches the outer skin of the cone.
- Simd4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius);
+ T4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius);
collisionMask = collisionMask & (discriminant > minusA * rMin * rMin * sSkeletonWidth);
// a is negative when one sphere is contained in the other,
// which is already handled by discrete collision.
- collisionMask = collisionMask & (minusA < -static_cast<Simd4f>(gSimd4fEpsilon));
+ collisionMask = collisionMask & (minusA < -static_cast<T4f>(gSimd4fEpsilon));
if (!allEqual(collisionMask, gSimd4fZero))
{
- Simd4f deltaX = prevX - curX;
- Simd4f deltaY = prevY - curY;
- Simd4f deltaZ = prevZ - curZ;
+ T4f deltaX = prevX - curX;
+ T4f deltaY = prevY - curY;
+ T4f deltaZ = prevZ - curZ;
- Simd4f oneMinusToi = (gSimd4fOne - toi) & collisionMask;
+ T4f oneMinusToi = (gSimd4fOne - toi) & collisionMask;
// reduce ccd impulse if (clamped) particle trajectory stays in sphere skin,
// i.e. scale by exp2(-k) or 1/(1+k) with k = (tmin - toi) / (1 - toi)
- Simd4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon);
+ T4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon);
oneMinusToi = oneMinusToi * recip(gSimd4fOne - minusK);
curX = curX + deltaX * oneMinusToi;
@@ -906,9 +906,9 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher
sqrDistance = gSimd4fEpsilon + curX * curX + curY * curY + curZ * curZ;
}
- Simd4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * curRadius;
+ T4f negativeScale = gSimd4fOne - rsqrt(sqrDistance) * curRadius;
- Simd4f contactMask;
+ T4f contactMask;
if (!anyGreater(gSimd4fZero, negativeScale, contactMask))
continue;
@@ -916,15 +916,15 @@ FORCE_INLINE void cloth::SwCollision<Simd4f>::collideSpheres(const Simd4i& spher
if (frictionEnabled)
{
- Simd4f velocity = curSphere - prevSphere;
+ T4f velocity = curSphere - prevSphere;
accum.addVelocity(splat<0>(velocity), splat<1>(velocity), splat<2>(velocity), contactMask);
}
}
}
-template <typename Simd4f>
-FORCE_INLINE typename cloth::SwCollision<Simd4f>::Simd4i
-cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4f* __restrict curPos,
+template <typename T4f>
+FORCE_INLINE typename cloth::SwCollision<T4f>::T4i
+cloth::SwCollision<T4f>::collideCones(const T4f* __restrict prevPos, T4f* __restrict curPos,
ImpulseAccumulator& accum) const
{
const float* __restrict prevCenterPtr = array(mPrevData.mCones->center);
@@ -938,7 +938,7 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4
bool frictionEnabled = mClothData.mFrictionScale > 0.0f;
ShapeMask shapeMask = getShapeMask(prevPos, curPos);
- Simd4i mask4 = horizontalOr(shapeMask.mCones);
+ T4i mask4 = horizontalOr(shapeMask.mCones);
uint32_t mask = uint32_t(array(mask4)[0]);
while (mask)
{
@@ -947,106 +947,106 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4
uint32_t offset = coneIndex * sizeof(ConeData);
mask = mask & test;
- Simd4i test4 = mask4 - gSimd4iOne;
- Simd4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4));
+ T4i test4 = mask4 - gSimd4iOne;
+ T4f culled = simd4f(andNotIsZero(shapeMask.mCones, test4));
mask4 = mask4 & test4;
- Simd4f prevCenter = loadAligned(prevCenterPtr, offset);
- Simd4f prevAxis = loadAligned(prevAxisPtr, offset);
- Simd4f prevAxisX = splat<0>(prevAxis);
- Simd4f prevAxisY = splat<1>(prevAxis);
- Simd4f prevAxisZ = splat<2>(prevAxis);
- Simd4f prevSlope = splat<3>(prevAxis);
-
- Simd4f prevX = prevPos[0] - splat<0>(prevCenter);
- Simd4f prevY = prevPos[1] - splat<1>(prevCenter);
- Simd4f prevZ = prevPos[2] - splat<2>(prevCenter);
- Simd4f prevT = prevY * prevAxisZ - prevZ * prevAxisY;
- Simd4f prevU = prevZ * prevAxisX - prevX * prevAxisZ;
- Simd4f prevV = prevX * prevAxisY - prevY * prevAxisX;
- Simd4f prevDot = prevX * prevAxisX + prevY * prevAxisY + prevZ * prevAxisZ;
- Simd4f prevRadius = prevDot * prevSlope + splat<3>(prevCenter);
-
- Simd4f curCenter = loadAligned(curCenterPtr, offset);
- Simd4f curAxis = loadAligned(curAxisPtr, offset);
- Simd4f curAxisX = splat<0>(curAxis);
- Simd4f curAxisY = splat<1>(curAxis);
- Simd4f curAxisZ = splat<2>(curAxis);
- Simd4f curSlope = splat<3>(curAxis);
- Simd4i curAuxiliary = loadAligned(curAuxiliaryPtr, offset);
-
- Simd4f curX = curPos[0] - splat<0>(curCenter);
- Simd4f curY = curPos[1] - splat<1>(curCenter);
- Simd4f curZ = curPos[2] - splat<2>(curCenter);
- Simd4f curT = curY * curAxisZ - curZ * curAxisY;
- Simd4f curU = curZ * curAxisX - curX * curAxisZ;
- Simd4f curV = curX * curAxisY - curY * curAxisX;
- Simd4f curDot = curX * curAxisX + curY * curAxisY + curZ * curAxisZ;
- Simd4f curRadius = curDot * curSlope + splat<3>(curCenter);
-
- Simd4f curSqrDistance = gSimd4fEpsilon + curT * curT + curU * curU + curV * curV;
+ T4f prevCenter = loadAligned(prevCenterPtr, offset);
+ T4f prevAxis = loadAligned(prevAxisPtr, offset);
+ T4f prevAxisX = splat<0>(prevAxis);
+ T4f prevAxisY = splat<1>(prevAxis);
+ T4f prevAxisZ = splat<2>(prevAxis);
+ T4f prevSlope = splat<3>(prevAxis);
+
+ T4f prevX = prevPos[0] - splat<0>(prevCenter);
+ T4f prevY = prevPos[1] - splat<1>(prevCenter);
+ T4f prevZ = prevPos[2] - splat<2>(prevCenter);
+ T4f prevT = prevY * prevAxisZ - prevZ * prevAxisY;
+ T4f prevU = prevZ * prevAxisX - prevX * prevAxisZ;
+ T4f prevV = prevX * prevAxisY - prevY * prevAxisX;
+ T4f prevDot = prevX * prevAxisX + prevY * prevAxisY + prevZ * prevAxisZ;
+ T4f prevRadius = prevDot * prevSlope + splat<3>(prevCenter);
+
+ T4f curCenter = loadAligned(curCenterPtr, offset);
+ T4f curAxis = loadAligned(curAxisPtr, offset);
+ T4f curAxisX = splat<0>(curAxis);
+ T4f curAxisY = splat<1>(curAxis);
+ T4f curAxisZ = splat<2>(curAxis);
+ T4f curSlope = splat<3>(curAxis);
+ T4i curAuxiliary = loadAligned(curAuxiliaryPtr, offset);
+
+ T4f curX = curPos[0] - splat<0>(curCenter);
+ T4f curY = curPos[1] - splat<1>(curCenter);
+ T4f curZ = curPos[2] - splat<2>(curCenter);
+ T4f curT = curY * curAxisZ - curZ * curAxisY;
+ T4f curU = curZ * curAxisX - curX * curAxisZ;
+ T4f curV = curX * curAxisY - curY * curAxisX;
+ T4f curDot = curX * curAxisX + curY * curAxisY + curZ * curAxisZ;
+ T4f curRadius = curDot * curSlope + splat<3>(curCenter);
+
+ T4f curSqrDistance = gSimd4fEpsilon + curT * curT + curU * curU + curV * curV;
// set radius to zero if cone is culled
prevRadius = max(prevRadius, gSimd4fZero) & ~culled;
curRadius = max(curRadius, gSimd4fZero) & ~culled;
- Simd4f dotPrevPrev = prevT * prevT + prevU * prevU + prevV * prevV - prevRadius * prevRadius;
- Simd4f dotPrevCur = prevT * curT + prevU * curU + prevV * curV - prevRadius * curRadius;
- Simd4f dotCurCur = curSqrDistance - curRadius * curRadius;
+ T4f dotPrevPrev = prevT * prevT + prevU * prevU + prevV * prevV - prevRadius * prevRadius;
+ T4f dotPrevCur = prevT * curT + prevU * curU + prevV * curV - prevRadius * curRadius;
+ T4f dotCurCur = curSqrDistance - curRadius * curRadius;
- Simd4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev;
- Simd4f sqrtD = sqrt(discriminant);
- Simd4f halfB = dotPrevCur - dotPrevPrev;
- Simd4f minusA = dotPrevCur - dotCurCur + halfB;
+ T4f discriminant = dotPrevCur * dotPrevCur - dotCurCur * dotPrevPrev;
+ T4f sqrtD = sqrt(discriminant);
+ T4f halfB = dotPrevCur - dotPrevPrev;
+ T4f minusA = dotPrevCur - dotCurCur + halfB;
// time of impact or 0 if prevPos inside cone
- Simd4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD);
- Simd4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD);
+ T4f toi = recip(minusA) * min(gSimd4fZero, halfB + sqrtD);
+ T4f collisionMask = (toi < gSimd4fOne) & (halfB < sqrtD);
// skip continuous collision if the (un-clamped) particle
// trajectory only touches the outer skin of the cone.
- Simd4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius);
+ T4f rMin = prevRadius + halfB * minusA * (curRadius - prevRadius);
collisionMask = collisionMask & (discriminant > minusA * rMin * rMin * sSkeletonWidth);
// a is negative when one cone is contained in the other,
// which is already handled by discrete collision.
- collisionMask = collisionMask & (minusA < -static_cast<Simd4f>(gSimd4fEpsilon));
+ collisionMask = collisionMask & (minusA < -static_cast<T4f>(gSimd4fEpsilon));
// test if any particle hits infinite cone (and 0<time of impact<1)
if (!allEqual(collisionMask, gSimd4fZero))
{
- Simd4f deltaX = prevX - curX;
- Simd4f deltaY = prevY - curY;
- Simd4f deltaZ = prevZ - curZ;
+ T4f deltaX = prevX - curX;
+ T4f deltaY = prevY - curY;
+ T4f deltaZ = prevZ - curZ;
// interpolate delta at toi
- Simd4f posX = prevX - deltaX * toi;
- Simd4f posY = prevY - deltaY * toi;
- Simd4f posZ = prevZ - deltaZ * toi;
+ T4f posX = prevX - deltaX * toi;
+ T4f posY = prevY - deltaY * toi;
+ T4f posZ = prevZ - deltaZ * toi;
- Simd4f curScaledAxis = curAxis * splat<1>(simd4f(curAuxiliary));
- Simd4i prevAuxiliary = loadAligned(prevAuxiliaryPtr, offset);
- Simd4f deltaScaledAxis = curScaledAxis - prevAxis * splat<1>(simd4f(prevAuxiliary));
+ T4f curScaledAxis = curAxis * splat<1>(simd4f(curAuxiliary));
+ T4i prevAuxiliary = loadAligned(prevAuxiliaryPtr, offset);
+ T4f deltaScaledAxis = curScaledAxis - prevAxis * splat<1>(simd4f(prevAuxiliary));
- Simd4f oneMinusToi = gSimd4fOne - toi;
+ T4f oneMinusToi = gSimd4fOne - toi;
// interpolate axis at toi
- Simd4f axisX = splat<0>(curScaledAxis) - splat<0>(deltaScaledAxis) * oneMinusToi;
- Simd4f axisY = splat<1>(curScaledAxis) - splat<1>(deltaScaledAxis) * oneMinusToi;
- Simd4f axisZ = splat<2>(curScaledAxis) - splat<2>(deltaScaledAxis) * oneMinusToi;
- Simd4f slope = (prevSlope * oneMinusToi + curSlope * toi);
+ T4f axisX = splat<0>(curScaledAxis) - splat<0>(deltaScaledAxis) * oneMinusToi;
+ T4f axisY = splat<1>(curScaledAxis) - splat<1>(deltaScaledAxis) * oneMinusToi;
+ T4f axisZ = splat<2>(curScaledAxis) - splat<2>(deltaScaledAxis) * oneMinusToi;
+ T4f slope = (prevSlope * oneMinusToi + curSlope * toi);
- Simd4f sqrHalfLength = axisX * axisX + axisY * axisY + axisZ * axisZ;
- Simd4f invHalfLength = rsqrt(sqrHalfLength);
- Simd4f dot = (posX * axisX + posY * axisY + posZ * axisZ) * invHalfLength;
+ T4f sqrHalfLength = axisX * axisX + axisY * axisY + axisZ * axisZ;
+ T4f invHalfLength = rsqrt(sqrHalfLength);
+ T4f dot = (posX * axisX + posY * axisY + posZ * axisZ) * invHalfLength;
- Simd4f sqrDistance = posX * posX + posY * posY + posZ * posZ - dot * dot;
- Simd4f invDistance = rsqrt(sqrDistance) & (sqrDistance > gSimd4fZero);
+ T4f sqrDistance = posX * posX + posY * posY + posZ * posZ - dot * dot;
+ T4f invDistance = rsqrt(sqrDistance) & (sqrDistance > gSimd4fZero);
- Simd4f base = dot + slope * sqrDistance * invDistance;
- Simd4f scale = base * invHalfLength & collisionMask;
+ T4f base = dot + slope * sqrDistance * invDistance;
+ T4f scale = base * invHalfLength & collisionMask;
- Simd4f cullMask = (abs(scale) < gSimd4fOne) & collisionMask;
+ T4f cullMask = (abs(scale) < gSimd4fOne) & collisionMask;
// test if any impact position is in cone section
if (!allEqual(cullMask, gSimd4fZero))
@@ -1060,7 +1060,7 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4
// reduce ccd impulse if (clamped) particle trajectory stays in cone skin,
// i.e. scale by exp2(-k) or 1/(1+k) with k = (tmin - toi) / (1 - toi)
// oneMinusToi = oneMinusToi * recip(gSimd4fOne - sqrtD * recip(minusA * oneMinusToi));
- Simd4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon);
+ T4f minusK = sqrtD * recip(minusA * oneMinusToi) & (oneMinusToi > gSimd4fEpsilon);
oneMinusToi = oneMinusToi * recip(gSimd4fOne - minusK);
curX = curX + deltaX * oneMinusToi;
@@ -1079,39 +1079,39 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4
}
// curPos inside cone (discrete collision)
- Simd4f contactMask;
+ T4f contactMask;
int anyContact = anyGreater(curRadius * curRadius, curSqrDistance, contactMask);
- Simd4i bothMask = splat<3>(curAuxiliary);
+ T4i bothMask = splat<3>(curAuxiliary);
// instead of culling continuous collision for ~collisionMask, and discrete
// collision for ~contactMask, disable both if ~collisionMask & ~contactMask
- Simd4i cullMask = bothMask & ~simd4i(collisionMask | contactMask);
+ T4i cullMask = bothMask & ~simd4i(collisionMask | contactMask);
shapeMask.mSpheres = shapeMask.mSpheres & ~cullMask;
if (!anyContact)
continue;
- Simd4f invDistance = rsqrt(curSqrDistance) & (curSqrDistance > gSimd4fZero);
- Simd4f base = curDot + curSlope * curSqrDistance * invDistance;
+ T4f invDistance = rsqrt(curSqrDistance) & (curSqrDistance > gSimd4fZero);
+ T4f base = curDot + curSlope * curSqrDistance * invDistance;
- Simd4f halfLength = splat<1>(simd4f(curAuxiliary));
- Simd4i leftMask = simd4i(base < -halfLength);
- Simd4i rightMask = simd4i(base > halfLength);
+ T4f halfLength = splat<1>(simd4f(curAuxiliary));
+ T4i leftMask = simd4i(base < -halfLength);
+ T4i rightMask = simd4i(base > halfLength);
// can only skip continuous sphere collision if post-ccd position
// is on code side *and* particle had cone-ccd collision.
- Simd4i firstMask = splat<2>(curAuxiliary);
- Simd4i secondMask = firstMask ^ bothMask;
+ T4i firstMask = splat<2>(curAuxiliary);
+ T4i secondMask = firstMask ^ bothMask;
cullMask = (firstMask & ~leftMask) | (secondMask & ~rightMask);
shapeMask.mSpheres = shapeMask.mSpheres & ~(cullMask & simd4i(collisionMask));
- Simd4f deltaX = curX - base * curAxisX;
- Simd4f deltaY = curY - base * curAxisY;
- Simd4f deltaZ = curZ - base * curAxisZ;
+ T4f deltaX = curX - base * curAxisX;
+ T4f deltaY = curY - base * curAxisY;
+ T4f deltaZ = curZ - base * curAxisZ;
- Simd4f sqrCosine = splat<0>(simd4f(curAuxiliary));
- Simd4f scale = curRadius * invDistance * sqrCosine - sqrCosine;
+ T4f sqrCosine = splat<0>(simd4f(curAuxiliary));
+ T4f scale = curRadius * invDistance * sqrCosine - sqrCosine;
contactMask = contactMask & ~simd4f(leftMask | rightMask);
@@ -1130,23 +1130,23 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4
// todo: could pre-compute sphere velocities or it might be
// faster to compute cur/prev sphere positions directly
- Simd4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData));
- Simd4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData));
+ T4f s0p0 = loadAligned(prevSpheres, s0 * sizeof(SphereData));
+ T4f s0p1 = loadAligned(curSpheres, s0 * sizeof(SphereData));
- Simd4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData));
- Simd4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData));
+ T4f s1p0 = loadAligned(prevSpheres, s1 * sizeof(SphereData));
+ T4f s1p1 = loadAligned(curSpheres, s1 * sizeof(SphereData));
- Simd4f v0 = s0p1 - s0p0;
- Simd4f v1 = s1p1 - s1p0;
- Simd4f vd = v1 - v0;
+ T4f v0 = s0p1 - s0p0;
+ T4f v1 = s1p1 - s1p0;
+ T4f vd = v1 - v0;
// dot is in the range -1 to 1, scale and bias to 0 to 1
curDot = curDot * gSimd4fHalf + gSimd4fHalf;
// interpolate velocity at contact points
- Simd4f vx = splat<0>(v0) + curDot * splat<0>(vd);
- Simd4f vy = splat<1>(v0) + curDot * splat<1>(vd);
- Simd4f vz = splat<2>(v0) + curDot * splat<2>(vd);
+ T4f vx = splat<0>(v0) + curDot * splat<0>(vd);
+ T4f vy = splat<1>(v0) + curDot * splat<1>(vd);
+ T4f vz = splat<2>(v0) + curDot * splat<2>(vd);
accum.addVelocity(vx, vy, vz, contactMask);
}
@@ -1158,39 +1158,39 @@ cloth::SwCollision<Simd4f>::collideCones(const Simd4f* __restrict prevPos, Simd4
namespace
{
-template <typename Simd4f>
-PX_INLINE void calculateFrictionImpulse(const Simd4f& deltaX, const Simd4f& deltaY, const Simd4f& deltaZ,
- const Simd4f& velX, const Simd4f& velY, const Simd4f& velZ,
- const Simd4f* curPos, const Simd4f* prevPos, const Simd4f& scale,
- const Simd4f& coefficient, const Simd4f& mask, Simd4f* impulse)
+template <typename T4f>
+PX_INLINE void calculateFrictionImpulse(const T4f& deltaX, const T4f& deltaY, const T4f& deltaZ,
+ const T4f& velX, const T4f& velY, const T4f& velZ,
+ const T4f* curPos, const T4f* prevPos, const T4f& scale,
+ const T4f& coefficient, const T4f& mask, T4f* impulse)
{
// calculate collision normal
- Simd4f deltaSq = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ;
+ T4f deltaSq = deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ;
- Simd4f rcpDelta = rsqrt(deltaSq + gSimd4fEpsilon);
+ T4f rcpDelta = rsqrt(deltaSq + gSimd4fEpsilon);
- Simd4f nx = deltaX * rcpDelta;
- Simd4f ny = deltaY * rcpDelta;
- Simd4f nz = deltaZ * rcpDelta;
+ T4f nx = deltaX * rcpDelta;
+ T4f ny = deltaY * rcpDelta;
+ T4f nz = deltaZ * rcpDelta;
// calculate relative velocity scaled by number of collisions
- Simd4f rvx = curPos[0] - prevPos[0] - velX * scale;
- Simd4f rvy = curPos[1] - prevPos[1] - velY * scale;
- Simd4f rvz = curPos[2] - prevPos[2] - velZ * scale;
+ T4f rvx = curPos[0] - prevPos[0] - velX * scale;
+ T4f rvy = curPos[1] - prevPos[1] - velY * scale;
+ T4f rvz = curPos[2] - prevPos[2] - velZ * scale;
// calculate magnitude of relative normal velocity
- Simd4f rvn = rvx * nx + rvy * ny + rvz * nz;
+ T4f rvn = rvx * nx + rvy * ny + rvz * nz;
// calculate relative tangential velocity
- Simd4f rvtx = rvx - rvn * nx;
- Simd4f rvty = rvy - rvn * ny;
- Simd4f rvtz = rvz - rvn * nz;
+ T4f rvtx = rvx - rvn * nx;
+ T4f rvty = rvy - rvn * ny;
+ T4f rvtz = rvz - rvn * nz;
// calculate magnitude of vt
- Simd4f rcpVt = rsqrt(rvtx * rvtx + rvty * rvty + rvtz * rvtz + gSimd4fEpsilon);
+ T4f rcpVt = rsqrt(rvtx * rvtx + rvty * rvty + rvtz * rvtz + gSimd4fEpsilon);
// magnitude of friction impulse (cannot be greater than -vt)
- Simd4f j = max(-coefficient * deltaSq * rcpDelta * rcpVt, gSimd4fMinusOne) & mask;
+ T4f j = max(-coefficient * deltaSq * rcpDelta * rcpVt, gSimd4fMinusOne) & mask;
impulse[0] = rvtx * j;
impulse[1] = rvty * j;
@@ -1199,17 +1199,17 @@ PX_INLINE void calculateFrictionImpulse(const Simd4f& deltaX, const Simd4f& delt
} // anonymous namespace
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::collideParticles()
+template <typename T4f>
+void cloth::SwCollision<T4f>::collideParticles()
{
const bool massScalingEnabled = mClothData.mCollisionMassScale > 0.0f;
- const Simd4f massScale = simd4f(mClothData.mCollisionMassScale);
+ const T4f massScale = simd4f(mClothData.mCollisionMassScale);
const bool frictionEnabled = mClothData.mFrictionScale > 0.0f;
- const Simd4f frictionScale = simd4f(mClothData.mFrictionScale);
+ const T4f frictionScale = simd4f(mClothData.mFrictionScale);
- Simd4f curPos[4];
- Simd4f prevPos[4];
+ T4f curPos[4];
+ T4f prevPos[4];
float* __restrict prevIt = mClothData.mPrevParticles;
float* __restrict pIt = mClothData.mCurParticles;
@@ -1223,14 +1223,14 @@ void cloth::SwCollision<Simd4f>::collideParticles()
transpose(curPos[0], curPos[1], curPos[2], curPos[3]);
ImpulseAccumulator accum;
- Simd4i sphereMask = collideCones(curPos, accum);
+ T4i sphereMask = collideCones(curPos, accum);
collideSpheres(sphereMask, curPos, accum);
- Simd4f mask;
+ T4f mask;
if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask))
continue;
- Simd4f invNumCollisions = recip(accum.mNumCollisions);
+ T4f invNumCollisions = recip(accum.mNumCollisions);
if (frictionEnabled)
{
@@ -1240,7 +1240,7 @@ void cloth::SwCollision<Simd4f>::collideParticles()
prevPos[3] = loadAligned(prevIt, 48);
transpose(prevPos[0], prevPos[1], prevPos[2], prevPos[3]);
- Simd4f frictionImpulse[3];
+ T4f frictionImpulse[3];
calculateFrictionImpulse(accum.mDeltaX, accum.mDeltaY, accum.mDeltaZ, accum.mVelX, accum.mVelY, accum.mVelZ,
curPos, prevPos, invNumCollisions, frictionScale, mask, frictionImpulse);
@@ -1258,10 +1258,10 @@ void cloth::SwCollision<Simd4f>::collideParticles()
if (massScalingEnabled)
{
// calculate the inverse mass scale based on the collision impulse magnitude
- Simd4f dSq = invNumCollisions * invNumCollisions *
+ T4f dSq = invNumCollisions * invNumCollisions *
(accum.mDeltaX * accum.mDeltaX + accum.mDeltaY * accum.mDeltaY + accum.mDeltaZ * accum.mDeltaZ);
- Simd4f scale = recip(gSimd4fOne + massScale * dSq);
+ T4f scale = recip(gSimd4fOne + massScale * dSq);
// scale invmass
curPos[3] = select(mask, curPos[3] * scale, curPos[3]);
@@ -1283,24 +1283,24 @@ void cloth::SwCollision<Simd4f>::collideParticles()
}
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::collideVirtualParticles()
+template <typename T4f>
+void cloth::SwCollision<T4f>::collideVirtualParticles()
{
const bool massScalingEnabled = mClothData.mCollisionMassScale > 0.0f;
- const Simd4f massScale = simd4f(mClothData.mCollisionMassScale);
+ const T4f massScale = simd4f(mClothData.mCollisionMassScale);
const bool frictionEnabled = mClothData.mFrictionScale > 0.0f;
- const Simd4f frictionScale = simd4f(mClothData.mFrictionScale);
+ const T4f frictionScale = simd4f(mClothData.mFrictionScale);
- Simd4f curPos[3];
+ T4f curPos[3];
const float* __restrict weights = mClothData.mVirtualParticleWeights;
float* __restrict particles = mClothData.mCurParticles;
float* __restrict prevParticles = mClothData.mPrevParticles;
// move dummy particles outside of collision range
- Simd4f* __restrict dummy = mClothData.mNumParticles + reinterpret_cast<Simd4f*>(mClothData.mCurParticles);
- Simd4f invGridScale = recip(mGridScale) & (mGridScale > gSimd4fEpsilon);
+ T4f* __restrict dummy = mClothData.mNumParticles + reinterpret_cast<T4f*>(mClothData.mCurParticles);
+ T4f invGridScale = recip(mGridScale) & (mGridScale > gSimd4fEpsilon);
dummy[0] = dummy[1] = dummy[2] = invGridScale * mGridBias - invGridScale;
const uint16_t* __restrict vpIt = mClothData.mVirtualParticlesBegin;
@@ -1308,31 +1308,31 @@ void cloth::SwCollision<Simd4f>::collideVirtualParticles()
for (; vpIt != vpEnd; vpIt += 16)
{
// load 12 particles and 4 weights
- Simd4f p0v0 = loadAligned(particles, vpIt[0] * sizeof(PxVec4));
- Simd4f p0v1 = loadAligned(particles, vpIt[1] * sizeof(PxVec4));
- Simd4f p0v2 = loadAligned(particles, vpIt[2] * sizeof(PxVec4));
- Simd4f w0 = loadAligned(weights, vpIt[3] * sizeof(PxVec4));
-
- Simd4f p1v0 = loadAligned(particles, vpIt[4] * sizeof(PxVec4));
- Simd4f p1v1 = loadAligned(particles, vpIt[5] * sizeof(PxVec4));
- Simd4f p1v2 = loadAligned(particles, vpIt[6] * sizeof(PxVec4));
- Simd4f w1 = loadAligned(weights, vpIt[7] * sizeof(PxVec4));
-
- Simd4f p2v0 = loadAligned(particles, vpIt[8] * sizeof(PxVec4));
- Simd4f p2v1 = loadAligned(particles, vpIt[9] * sizeof(PxVec4));
- Simd4f p2v2 = loadAligned(particles, vpIt[10] * sizeof(PxVec4));
- Simd4f w2 = loadAligned(weights, vpIt[11] * sizeof(PxVec4));
-
- Simd4f p3v1 = loadAligned(particles, vpIt[13] * sizeof(PxVec4));
- Simd4f p3v0 = loadAligned(particles, vpIt[12] * sizeof(PxVec4));
- Simd4f p3v2 = loadAligned(particles, vpIt[14] * sizeof(PxVec4));
- Simd4f w3 = loadAligned(weights, vpIt[15] * sizeof(PxVec4));
+ T4f p0v0 = loadAligned(particles, vpIt[0] * sizeof(PxVec4));
+ T4f p0v1 = loadAligned(particles, vpIt[1] * sizeof(PxVec4));
+ T4f p0v2 = loadAligned(particles, vpIt[2] * sizeof(PxVec4));
+ T4f w0 = loadAligned(weights, vpIt[3] * sizeof(PxVec4));
+
+ T4f p1v0 = loadAligned(particles, vpIt[4] * sizeof(PxVec4));
+ T4f p1v1 = loadAligned(particles, vpIt[5] * sizeof(PxVec4));
+ T4f p1v2 = loadAligned(particles, vpIt[6] * sizeof(PxVec4));
+ T4f w1 = loadAligned(weights, vpIt[7] * sizeof(PxVec4));
+
+ T4f p2v0 = loadAligned(particles, vpIt[8] * sizeof(PxVec4));
+ T4f p2v1 = loadAligned(particles, vpIt[9] * sizeof(PxVec4));
+ T4f p2v2 = loadAligned(particles, vpIt[10] * sizeof(PxVec4));
+ T4f w2 = loadAligned(weights, vpIt[11] * sizeof(PxVec4));
+
+ T4f p3v1 = loadAligned(particles, vpIt[13] * sizeof(PxVec4));
+ T4f p3v0 = loadAligned(particles, vpIt[12] * sizeof(PxVec4));
+ T4f p3v2 = loadAligned(particles, vpIt[14] * sizeof(PxVec4));
+ T4f w3 = loadAligned(weights, vpIt[15] * sizeof(PxVec4));
// interpolate particles and transpose
- Simd4f px = p0v0 * splat<0>(w0) + p0v1 * splat<1>(w0) + p0v2 * splat<2>(w0);
- Simd4f py = p1v0 * splat<0>(w1) + p1v1 * splat<1>(w1) + p1v2 * splat<2>(w1);
- Simd4f pz = p2v0 * splat<0>(w2) + p2v1 * splat<1>(w2) + p2v2 * splat<2>(w2);
- Simd4f pw = p3v0 * splat<0>(w3) + p3v1 * splat<1>(w3) + p3v2 * splat<2>(w3);
+ T4f px = p0v0 * splat<0>(w0) + p0v1 * splat<1>(w0) + p0v2 * splat<2>(w0);
+ T4f py = p1v0 * splat<0>(w1) + p1v1 * splat<1>(w1) + p1v2 * splat<2>(w1);
+ T4f pz = p2v0 * splat<0>(w2) + p2v1 * splat<1>(w2) + p2v2 * splat<2>(w2);
+ T4f pw = p3v0 * splat<0>(w3) + p3v1 * splat<1>(w3) + p3v2 * splat<2>(w3);
transpose(px, py, pz, pw);
curPos[0] = px;
@@ -1340,55 +1340,55 @@ void cloth::SwCollision<Simd4f>::collideVirtualParticles()
curPos[2] = pz;
ImpulseAccumulator accum;
- Simd4i sphereMask = collideCones(curPos, accum);
+ T4i sphereMask = collideCones(curPos, accum);
collideSpheres(sphereMask, curPos, accum);
- Simd4f mask;
+ T4f mask;
if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask))
continue;
- Simd4f invNumCollisions = recip(accum.mNumCollisions);
+ T4f invNumCollisions = recip(accum.mNumCollisions);
// displacement and transpose back
- Simd4f d0 = accum.mDeltaX * invNumCollisions;
- Simd4f d1 = accum.mDeltaY * invNumCollisions;
- Simd4f d2 = accum.mDeltaZ * invNumCollisions;
- Simd4f d3 = gSimd4fZero;
+ T4f d0 = accum.mDeltaX * invNumCollisions;
+ T4f d1 = accum.mDeltaY * invNumCollisions;
+ T4f d2 = accum.mDeltaZ * invNumCollisions;
+ T4f d3 = gSimd4fZero;
transpose(d0, d1, d2, d3);
// scale weights by 1/dot(w,w)
- Simd4f rw0 = w0 * splat<3>(w0);
- Simd4f rw1 = w1 * splat<3>(w1);
- Simd4f rw2 = w2 * splat<3>(w2);
- Simd4f rw3 = w3 * splat<3>(w3);
+ T4f rw0 = w0 * splat<3>(w0);
+ T4f rw1 = w1 * splat<3>(w1);
+ T4f rw2 = w2 * splat<3>(w2);
+ T4f rw3 = w3 * splat<3>(w3);
if (frictionEnabled)
{
- Simd4f q0v0 = loadAligned(prevParticles, vpIt[0] * sizeof(PxVec4));
- Simd4f q0v1 = loadAligned(prevParticles, vpIt[1] * sizeof(PxVec4));
- Simd4f q0v2 = loadAligned(prevParticles, vpIt[2] * sizeof(PxVec4));
+ T4f q0v0 = loadAligned(prevParticles, vpIt[0] * sizeof(PxVec4));
+ T4f q0v1 = loadAligned(prevParticles, vpIt[1] * sizeof(PxVec4));
+ T4f q0v2 = loadAligned(prevParticles, vpIt[2] * sizeof(PxVec4));
- Simd4f q1v0 = loadAligned(prevParticles, vpIt[4] * sizeof(PxVec4));
- Simd4f q1v1 = loadAligned(prevParticles, vpIt[5] * sizeof(PxVec4));
- Simd4f q1v2 = loadAligned(prevParticles, vpIt[6] * sizeof(PxVec4));
+ T4f q1v0 = loadAligned(prevParticles, vpIt[4] * sizeof(PxVec4));
+ T4f q1v1 = loadAligned(prevParticles, vpIt[5] * sizeof(PxVec4));
+ T4f q1v2 = loadAligned(prevParticles, vpIt[6] * sizeof(PxVec4));
- Simd4f q2v0 = loadAligned(prevParticles, vpIt[8] * sizeof(PxVec4));
- Simd4f q2v1 = loadAligned(prevParticles, vpIt[9] * sizeof(PxVec4));
- Simd4f q2v2 = loadAligned(prevParticles, vpIt[10] * sizeof(PxVec4));
+ T4f q2v0 = loadAligned(prevParticles, vpIt[8] * sizeof(PxVec4));
+ T4f q2v1 = loadAligned(prevParticles, vpIt[9] * sizeof(PxVec4));
+ T4f q2v2 = loadAligned(prevParticles, vpIt[10] * sizeof(PxVec4));
- Simd4f q3v0 = loadAligned(prevParticles, vpIt[12] * sizeof(PxVec4));
- Simd4f q3v1 = loadAligned(prevParticles, vpIt[13] * sizeof(PxVec4));
- Simd4f q3v2 = loadAligned(prevParticles, vpIt[14] * sizeof(PxVec4));
+ T4f q3v0 = loadAligned(prevParticles, vpIt[12] * sizeof(PxVec4));
+ T4f q3v1 = loadAligned(prevParticles, vpIt[13] * sizeof(PxVec4));
+ T4f q3v2 = loadAligned(prevParticles, vpIt[14] * sizeof(PxVec4));
// calculate previous interpolated positions
- Simd4f qx = q0v0 * splat<0>(w0) + q0v1 * splat<1>(w0) + q0v2 * splat<2>(w0);
- Simd4f qy = q1v0 * splat<0>(w1) + q1v1 * splat<1>(w1) + q1v2 * splat<2>(w1);
- Simd4f qz = q2v0 * splat<0>(w2) + q2v1 * splat<1>(w2) + q2v2 * splat<2>(w2);
- Simd4f qw = q3v0 * splat<0>(w3) + q3v1 * splat<1>(w3) + q3v2 * splat<2>(w3);
+ T4f qx = q0v0 * splat<0>(w0) + q0v1 * splat<1>(w0) + q0v2 * splat<2>(w0);
+ T4f qy = q1v0 * splat<0>(w1) + q1v1 * splat<1>(w1) + q1v2 * splat<2>(w1);
+ T4f qz = q2v0 * splat<0>(w2) + q2v1 * splat<1>(w2) + q2v2 * splat<2>(w2);
+ T4f qw = q3v0 * splat<0>(w3) + q3v1 * splat<1>(w3) + q3v2 * splat<2>(w3);
transpose(qx, qy, qz, qw);
- Simd4f prevPos[3] = { qx, qy, qz };
- Simd4f frictionImpulse[4];
+ T4f prevPos[3] = { qx, qy, qz };
+ T4f frictionImpulse[4];
frictionImpulse[3] = gSimd4fZero;
calculateFrictionImpulse(accum.mDeltaX, accum.mDeltaY, accum.mDeltaZ, accum.mVelX, accum.mVelY, accum.mVelZ,
@@ -1433,16 +1433,16 @@ void cloth::SwCollision<Simd4f>::collideVirtualParticles()
if (massScalingEnabled)
{
// calculate the inverse mass scale based on the collision impulse
- Simd4f dSq = invNumCollisions * invNumCollisions *
+ T4f dSq = invNumCollisions * invNumCollisions *
(accum.mDeltaX * accum.mDeltaX + accum.mDeltaY * accum.mDeltaY + accum.mDeltaZ * accum.mDeltaZ);
- Simd4f weightScale = recip(gSimd4fOne + massScale * dSq);
+ T4f weightScale = recip(gSimd4fOne + massScale * dSq);
weightScale = weightScale - gSimd4fOne;
- Simd4f s0 = gSimd4fOne + splat<0>(weightScale) * (w0 & splat<0>(mask));
- Simd4f s1 = gSimd4fOne + splat<1>(weightScale) * (w1 & splat<1>(mask));
- Simd4f s2 = gSimd4fOne + splat<2>(weightScale) * (w2 & splat<2>(mask));
- Simd4f s3 = gSimd4fOne + splat<3>(weightScale) * (w3 & splat<3>(mask));
+ T4f s0 = gSimd4fOne + splat<0>(weightScale) * (w0 & splat<0>(mask));
+ T4f s1 = gSimd4fOne + splat<1>(weightScale) * (w1 & splat<1>(mask));
+ T4f s2 = gSimd4fOne + splat<2>(weightScale) * (w2 & splat<2>(mask));
+ T4f s3 = gSimd4fOne + splat<3>(weightScale) * (w3 & splat<3>(mask));
p0v0 = p0v0 * (gSimd4fOneXYZ | (splat<0>(s0) & sMaskW));
p0v1 = p0v1 * (gSimd4fOneXYZ | (splat<1>(s0) & sMaskW));
@@ -1500,17 +1500,17 @@ void cloth::SwCollision<Simd4f>::collideVirtualParticles()
}
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::collideContinuousParticles()
+template <typename T4f>
+void cloth::SwCollision<T4f>::collideContinuousParticles()
{
- Simd4f curPos[4];
- Simd4f prevPos[4];
+ T4f curPos[4];
+ T4f prevPos[4];
const bool massScalingEnabled = mClothData.mCollisionMassScale > 0.0f;
- const Simd4f massScale = simd4f(mClothData.mCollisionMassScale);
+ const T4f massScale = simd4f(mClothData.mCollisionMassScale);
const bool frictionEnabled = mClothData.mFrictionScale > 0.0f;
- const Simd4f frictionScale = simd4f(mClothData.mFrictionScale);
+ const T4f frictionScale = simd4f(mClothData.mFrictionScale);
float* __restrict prevIt = mClothData.mPrevParticles;
float* __restrict curIt = mClothData.mCurParticles;
@@ -1531,18 +1531,18 @@ void cloth::SwCollision<Simd4f>::collideContinuousParticles()
transpose(curPos[0], curPos[1], curPos[2], curPos[3]);
ImpulseAccumulator accum;
- Simd4i sphereMask = collideCones(prevPos, curPos, accum);
+ T4i sphereMask = collideCones(prevPos, curPos, accum);
collideSpheres(sphereMask, prevPos, curPos, accum);
- Simd4f mask;
+ T4f mask;
if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask))
continue;
- Simd4f invNumCollisions = recip(accum.mNumCollisions);
+ T4f invNumCollisions = recip(accum.mNumCollisions);
if (frictionEnabled)
{
- Simd4f frictionImpulse[3];
+ T4f frictionImpulse[3];
calculateFrictionImpulse(accum.mDeltaX, accum.mDeltaY, accum.mDeltaZ, accum.mVelX, accum.mVelY, accum.mVelZ,
curPos, prevPos, invNumCollisions, frictionScale, mask, frictionImpulse);
@@ -1560,10 +1560,10 @@ void cloth::SwCollision<Simd4f>::collideContinuousParticles()
if (massScalingEnabled)
{
// calculate the inverse mass scale based on the collision impulse magnitude
- Simd4f dSq = invNumCollisions * invNumCollisions *
+ T4f dSq = invNumCollisions * invNumCollisions *
(accum.mDeltaX * accum.mDeltaX + accum.mDeltaY * accum.mDeltaY + accum.mDeltaZ * accum.mDeltaZ);
- Simd4f weightScale = recip(gSimd4fOne + massScale * dSq);
+ T4f weightScale = recip(gSimd4fOne + massScale * dSq);
// scale invmass
curPos[3] = select(mask, curPos[3] * weightScale, curPos[3]);
@@ -1585,22 +1585,22 @@ void cloth::SwCollision<Simd4f>::collideContinuousParticles()
}
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& state)
+template <typename T4f>
+void cloth::SwCollision<T4f>::collideConvexes(const IterationState<T4f>& state)
{
if (!mClothData.mNumConvexes)
return;
// times 2 for plane equation result buffer
- Simd4f* planes = static_cast<Simd4f*>(mAllocator.allocate(sizeof(Simd4f) * mClothData.mNumPlanes * 2));
+ T4f* planes = static_cast<T4f*>(mAllocator.allocate(sizeof(T4f) * mClothData.mNumPlanes * 2));
- const Simd4f* targetPlanes = reinterpret_cast<const Simd4f*>(mClothData.mTargetCollisionPlanes);
+ const T4f* targetPlanes = reinterpret_cast<const T4f*>(mClothData.mTargetCollisionPlanes);
// generate plane collision data
if (state.mRemainingIterations != 1)
{
// interpolate planes
- LerpIterator<Simd4f, const Simd4f*> planeIter(reinterpret_cast<const Simd4f*>(mClothData.mStartCollisionPlanes),
+ LerpIterator<T4f, const T4f*> planeIter(reinterpret_cast<const T4f*>(mClothData.mStartCollisionPlanes),
targetPlanes, state.getCurrentAlpha());
// todo: normalize plane equations
@@ -1612,10 +1612,10 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& s
generatePlanes(planes, targetPlanes, mClothData.mNumPlanes);
}
- Simd4f curPos[4], prevPos[4];
+ T4f curPos[4], prevPos[4];
const bool frictionEnabled = mClothData.mFrictionScale > 0.0f;
- const Simd4f frictionScale = simd4f(mClothData.mFrictionScale);
+ const T4f frictionScale = simd4f(mClothData.mFrictionScale);
float* __restrict curIt = mClothData.mCurParticles;
float* __restrict curEnd = curIt + mClothData.mNumParticles * 4;
@@ -1631,11 +1631,11 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& s
ImpulseAccumulator accum;
collideConvexes(planes, curPos, accum);
- Simd4f mask;
+ T4f mask;
if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask))
continue;
- Simd4f invNumCollisions = recip(accum.mNumCollisions);
+ T4f invNumCollisions = recip(accum.mNumCollisions);
if (frictionEnabled)
{
@@ -1645,7 +1645,7 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& s
prevPos[3] = loadAligned(prevIt, 48);
transpose(prevPos[0], prevPos[1], prevPos[2], prevPos[3]);
- Simd4f frictionImpulse[3];
+ T4f frictionImpulse[3];
calculateFrictionImpulse(accum.mDeltaX, accum.mDeltaY, accum.mDeltaZ, accum.mVelX, accum.mVelY, accum.mVelZ,
curPos, prevPos, invNumCollisions, frictionScale, mask, frictionImpulse);
@@ -1678,20 +1678,20 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const IterationState<Simd4f>& s
mAllocator.deallocate(planes);
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::collideConvexes(const Simd4f* __restrict planes, Simd4f* __restrict curPos,
+template <typename T4f>
+void cloth::SwCollision<T4f>::collideConvexes(const T4f* __restrict planes, T4f* __restrict curPos,
ImpulseAccumulator& accum)
{
- Simd4i result = gSimd4iZero;
- Simd4i mask4 = gSimd4iOne;
+ T4i result = gSimd4iZero;
+ T4i mask4 = gSimd4iOne;
- const Simd4f* __restrict pIt, *pEnd = planes + mClothData.mNumPlanes;
- Simd4f* __restrict dIt = const_cast<Simd4f*>(pEnd);
+ const T4f* __restrict pIt, *pEnd = planes + mClothData.mNumPlanes;
+ T4f* __restrict dIt = const_cast<T4f*>(pEnd);
for (pIt = planes; pIt != pEnd; ++pIt, ++dIt)
{
*dIt = splat<3>(*pIt) + curPos[2] * splat<2>(*pIt) + curPos[1] * splat<1>(*pIt) + curPos[0] * splat<0>(*pIt);
result = result | (mask4 & simd4i(*dIt < gSimd4fZero));
- mask4 = mask4 << 1; // todo: shift by Simd4i on consoles
+ mask4 = mask4 << 1; // todo: shift by T4i on consoles
}
if (allEqual(result, gSimd4iZero))
@@ -1708,18 +1708,18 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const Simd4f* __restrict planes
uint32_t test = mask - 1;
uint32_t planeIndex = findBitSet(mask & ~test);
- Simd4f plane = planes[planeIndex];
- Simd4f planeX = splat<0>(plane);
- Simd4f planeY = splat<1>(plane);
- Simd4f planeZ = splat<2>(plane);
- Simd4f planeD = pEnd[planeIndex];
+ T4f plane = planes[planeIndex];
+ T4f planeX = splat<0>(plane);
+ T4f planeY = splat<1>(plane);
+ T4f planeZ = splat<2>(plane);
+ T4f planeD = pEnd[planeIndex];
while (mask &= test)
{
test = mask - 1;
planeIndex = findBitSet(mask & ~test);
plane = planes[planeIndex];
- Simd4f dist = pEnd[planeIndex];
- Simd4f closer = dist > planeD;
+ T4f dist = pEnd[planeIndex];
+ T4f closer = dist > planeD;
planeX = select(closer, splat<0>(plane), planeX);
planeY = select(closer, splat<1>(plane), planeY);
planeZ = select(closer, splat<2>(plane), planeZ);
@@ -1730,8 +1730,8 @@ void cloth::SwCollision<Simd4f>::collideConvexes(const Simd4f* __restrict planes
}
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::collideTriangles(const IterationState<Simd4f>& state)
+template <typename T4f>
+void cloth::SwCollision<T4f>::collideTriangles(const IterationState<T4f>& state)
{
if (!mClothData.mNumCollisionTriangles)
return;
@@ -1739,24 +1739,24 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const IterationState<Simd4f>&
TriangleData* triangles =
static_cast<TriangleData*>(mAllocator.allocate(sizeof(TriangleData) * mClothData.mNumCollisionTriangles));
- UnalignedIterator<Simd4f, 3> targetTriangles(mClothData.mTargetCollisionTriangles);
+ UnalignedIterator<T4f, 3> targetTriangles(mClothData.mTargetCollisionTriangles);
// generate triangle collision data
if (state.mRemainingIterations != 1)
{
// interpolate triangles
- LerpIterator<Simd4f, UnalignedIterator<Simd4f, 3> > triangleIter(mClothData.mStartCollisionTriangles,
+ LerpIterator<T4f, UnalignedIterator<T4f, 3> > triangleIter(mClothData.mStartCollisionTriangles,
targetTriangles, state.getCurrentAlpha());
- generateTriangles<Simd4f>(triangles, triangleIter, mClothData.mNumCollisionTriangles);
+ generateTriangles<T4f>(triangles, triangleIter, mClothData.mNumCollisionTriangles);
}
else
{
// otherwise use the target triangles directly
- generateTriangles<Simd4f>(triangles, targetTriangles, mClothData.mNumCollisionTriangles);
+ generateTriangles<T4f>(triangles, targetTriangles, mClothData.mNumCollisionTriangles);
}
- Simd4f positions[4];
+ T4f positions[4];
float* __restrict pIt = mClothData.mCurParticles;
float* __restrict pEnd = pIt + mClothData.mNumParticles * 4;
@@ -1771,11 +1771,11 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const IterationState<Simd4f>&
ImpulseAccumulator accum;
collideTriangles(triangles, positions, accum);
- Simd4f mask;
+ T4f mask;
if (!anyGreater(accum.mNumCollisions, gSimd4fEpsilon, mask))
continue;
- Simd4f invNumCollisions = recip(accum.mNumCollisions);
+ T4f invNumCollisions = recip(accum.mNumCollisions);
positions[0] = positions[0] + accum.mDeltaX * invNumCollisions;
positions[1] = positions[1] + accum.mDeltaY * invNumCollisions;
@@ -1795,60 +1795,60 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const IterationState<Simd4f>&
mAllocator.deallocate(triangles);
}
-template <typename Simd4f>
-void cloth::SwCollision<Simd4f>::collideTriangles(const TriangleData* __restrict triangles, Simd4f* __restrict curPos,
+template <typename T4f>
+void cloth::SwCollision<T4f>::collideTriangles(const TriangleData* __restrict triangles, T4f* __restrict curPos,
ImpulseAccumulator& accum)
{
- Simd4f normalX, normalY, normalZ, normalD;
+ T4f normalX, normalY, normalZ, normalD;
normalX = normalY = normalZ = normalD = gSimd4fZero;
- Simd4f minSqrLength = gSimd4fFloatMax;
+ T4f minSqrLength = gSimd4fFloatMax;
const TriangleData* __restrict tIt, *tEnd = triangles + mClothData.mNumCollisionTriangles;
for (tIt = triangles; tIt != tEnd; ++tIt)
{
- Simd4f base = loadAligned(&tIt->base.x);
- Simd4f edge0 = loadAligned(&tIt->edge0.x);
- Simd4f edge1 = loadAligned(&tIt->edge1.x);
- Simd4f normal = loadAligned(&tIt->normal.x);
- Simd4f aux = loadAligned(&tIt->det);
+ T4f base = loadAligned(&tIt->base.x);
+ T4f edge0 = loadAligned(&tIt->edge0.x);
+ T4f edge1 = loadAligned(&tIt->edge1.x);
+ T4f normal = loadAligned(&tIt->normal.x);
+ T4f aux = loadAligned(&tIt->det);
- Simd4f dx = curPos[0] - splat<0>(base);
- Simd4f dy = curPos[1] - splat<1>(base);
- Simd4f dz = curPos[2] - splat<2>(base);
+ T4f dx = curPos[0] - splat<0>(base);
+ T4f dy = curPos[1] - splat<1>(base);
+ T4f dz = curPos[2] - splat<2>(base);
- Simd4f e0x = splat<0>(edge0);
- Simd4f e0y = splat<1>(edge0);
- Simd4f e0z = splat<2>(edge0);
+ T4f e0x = splat<0>(edge0);
+ T4f e0y = splat<1>(edge0);
+ T4f e0z = splat<2>(edge0);
- Simd4f e1x = splat<0>(edge1);
- Simd4f e1y = splat<1>(edge1);
- Simd4f e1z = splat<2>(edge1);
+ T4f e1x = splat<0>(edge1);
+ T4f e1y = splat<1>(edge1);
+ T4f e1z = splat<2>(edge1);
- Simd4f nx = splat<0>(normal);
- Simd4f ny = splat<1>(normal);
- Simd4f nz = splat<2>(normal);
+ T4f nx = splat<0>(normal);
+ T4f ny = splat<1>(normal);
+ T4f nz = splat<2>(normal);
- Simd4f deltaDotEdge0 = dx * e0x + dy * e0y + dz * e0z;
- Simd4f deltaDotEdge1 = dx * e1x + dy * e1y + dz * e1z;
- Simd4f deltaDotNormal = dx * nx + dy * ny + dz * nz;
+ T4f deltaDotEdge0 = dx * e0x + dy * e0y + dz * e0z;
+ T4f deltaDotEdge1 = dx * e1x + dy * e1y + dz * e1z;
+ T4f deltaDotNormal = dx * nx + dy * ny + dz * nz;
- Simd4f edge0DotEdge1 = splat<3>(base);
- Simd4f edge0SqrLength = splat<3>(edge0);
- Simd4f edge1SqrLength = splat<3>(edge1);
+ T4f edge0DotEdge1 = splat<3>(base);
+ T4f edge0SqrLength = splat<3>(edge0);
+ T4f edge1SqrLength = splat<3>(edge1);
- Simd4f s = edge1SqrLength * deltaDotEdge0 - edge0DotEdge1 * deltaDotEdge1;
- Simd4f t = edge0SqrLength * deltaDotEdge1 - edge0DotEdge1 * deltaDotEdge0;
+ T4f s = edge1SqrLength * deltaDotEdge0 - edge0DotEdge1 * deltaDotEdge1;
+ T4f t = edge0SqrLength * deltaDotEdge1 - edge0DotEdge1 * deltaDotEdge0;
- Simd4f sPositive = s > gSimd4fZero;
- Simd4f tPositive = t > gSimd4fZero;
+ T4f sPositive = s > gSimd4fZero;
+ T4f tPositive = t > gSimd4fZero;
- Simd4f det = splat<0>(aux);
+ T4f det = splat<0>(aux);
s = select(tPositive, s * det, deltaDotEdge0 * splat<2>(aux));
t = select(sPositive, t * det, deltaDotEdge1 * splat<3>(aux));
- Simd4f clamp = gSimd4fOne < s + t;
- Simd4f numerator = edge1SqrLength - edge0DotEdge1 + deltaDotEdge0 - deltaDotEdge1;
+ T4f clamp = gSimd4fOne < s + t;
+ T4f numerator = edge1SqrLength - edge0DotEdge1 + deltaDotEdge0 - deltaDotEdge1;
s = select(clamp, numerator * splat<1>(aux), s);
@@ -1859,13 +1859,13 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const TriangleData* __restrict
dy = dy - e0y * s - e1y * t;
dz = dz - e0z * s - e1z * t;
- Simd4f sqrLength = dx * dx + dy * dy + dz * dz;
+ T4f sqrLength = dx * dx + dy * dy + dz * dz;
// slightly increase distance for colliding triangles
- Simd4f slack = (gSimd4fZero > deltaDotNormal) & simd4f(1e-4f);
+ T4f slack = (gSimd4fZero > deltaDotNormal) & simd4f(1e-4f);
sqrLength = sqrLength + sqrLength * slack;
- Simd4f mask = sqrLength < minSqrLength;
+ T4f mask = sqrLength < minSqrLength;
normalX = select(mask, nx, normalX);
normalY = select(mask, ny, normalY);
@@ -1875,7 +1875,7 @@ void cloth::SwCollision<Simd4f>::collideTriangles(const TriangleData* __restrict
minSqrLength = min(sqrLength, minSqrLength);
}
- Simd4f mask;
+ T4f mask;
if (!anyGreater(gSimd4fZero, normalD, mask))
return;