diff options
| author | Marijn Tamis <[email protected]> | 2017-04-28 14:19:07 +0200 |
|---|---|---|
| committer | Marijn Tamis <[email protected]> | 2017-04-28 14:19:07 +0200 |
| commit | b350eb5f4d44e8448115796144375d79438d74ae (patch) | |
| tree | 8e102e8c28f45a1b87bd335ceee4f33c3d4ee7c2 /NvCloth/src/SwSolverKernel.cpp | |
| parent | Add visual samples. (diff) | |
| download | nvcloth-b350eb5f4d44e8448115796144375d79438d74ae.tar.xz nvcloth-b350eb5f4d44e8448115796144375d79438d74ae.zip | |
NvCloth 1.1.0 Release. (22041545)
Diffstat (limited to 'NvCloth/src/SwSolverKernel.cpp')
| -rw-r--r-- | NvCloth/src/SwSolverKernel.cpp | 352 |
1 files changed, 176 insertions, 176 deletions
diff --git a/NvCloth/src/SwSolverKernel.cpp b/NvCloth/src/SwSolverKernel.cpp index f53c9ab..52dfdaa 100644 --- a/NvCloth/src/SwSolverKernel.cpp +++ b/NvCloth/src/SwSolverKernel.cpp @@ -124,55 +124,55 @@ const Simd4fTupleFactory sMinusFloatMaxXYZ = simd4f(-FLT_MAX, -FLT_MAX, -FLT_MAX The g * dt * dt term is folded into accelIt. */ -template <typename Simd4f, typename AccelerationIterator> -void integrateParticles(Simd4f* __restrict curIt, Simd4f* __restrict curEnd, Simd4f* __restrict prevIt, - const Simd4f& scale, const AccelerationIterator& aIt, const Simd4f& prevBias) +template <typename T4f, typename AccelerationIterator> +void integrateParticles(T4f* __restrict curIt, T4f* __restrict curEnd, T4f* __restrict prevIt, + const T4f& scale, const AccelerationIterator& aIt, const T4f& prevBias) { // local copy to avoid LHS AccelerationIterator accelIt(aIt); for (; curIt != curEnd; ++curIt, ++prevIt, ++accelIt) { - Simd4f current = *curIt; - Simd4f previous = *prevIt; + T4f current = *curIt; + T4f previous = *prevIt; // if (current.w == 0) current.w = previous.w current = select(current > sMinusFloatMaxXYZ, current, previous); - Simd4f finiteMass = splat<3>(previous) > sFloatMaxW; - Simd4f delta = (current - previous) * scale + *accelIt; + T4f finiteMass = splat<3>(previous) > sFloatMaxW; + T4f delta = (current - previous) * scale + *accelIt; *curIt = current + (delta & finiteMass); *prevIt = select(sMaskW, previous, current) + (prevBias & finiteMass); } } -template <typename Simd4f, typename AccelerationIterator> -void integrateParticles(Simd4f* __restrict curIt, Simd4f* __restrict curEnd, Simd4f* __restrict prevIt, - const Simd4f (&prevMatrix)[3], const Simd4f (&curMatrix)[3], const AccelerationIterator& aIt, - const Simd4f& prevBias) +template <typename T4f, typename AccelerationIterator> +void integrateParticles(T4f* __restrict curIt, T4f* __restrict curEnd, T4f* __restrict prevIt, + const T4f (&prevMatrix)[3], const T4f (&curMatrix)[3], const AccelerationIterator& aIt, + const T4f& prevBias) { // local copy to avoid LHS AccelerationIterator accelIt(aIt); for (; curIt != curEnd; ++curIt, ++prevIt, ++accelIt) { - Simd4f current = *curIt; - Simd4f previous = *prevIt; + T4f current = *curIt; + T4f previous = *prevIt; // if (current.w == 0) current.w = previous.w current = select(current > sMinusFloatMaxXYZ, current, previous); - Simd4f finiteMass = splat<3>(previous) > sFloatMaxW; + T4f finiteMass = splat<3>(previous) > sFloatMaxW; // curMatrix * current + prevMatrix * previous + accel - Simd4f delta = cloth::transform(curMatrix, cloth::transform(prevMatrix, *accelIt, previous), current); + T4f delta = cloth::transform(curMatrix, cloth::transform(prevMatrix, *accelIt, previous), current); *curIt = current + (delta & finiteMass); *prevIt = select(sMaskW, previous, current) + (prevBias & finiteMass); } } -template <typename Simd4f, typename ConstraintIterator> -void constrainMotion(Simd4f* __restrict curIt, const Simd4f* __restrict curEnd, const ConstraintIterator& spheres, - const Simd4f& scaleBiasStiffness) +template <typename T4f, typename ConstraintIterator> +void constrainMotion(T4f* __restrict curIt, const T4f* __restrict curEnd, const ConstraintIterator& spheres, + const T4f& scaleBiasStiffness) { - Simd4f scale = splat<0>(scaleBiasStiffness); - Simd4f bias = splat<1>(scaleBiasStiffness); - Simd4f stiffness = splat<3>(scaleBiasStiffness); + T4f scale = splat<0>(scaleBiasStiffness); + T4f bias = splat<1>(scaleBiasStiffness); + T4f stiffness = splat<3>(scaleBiasStiffness); // local copy of iterator to maintain alignment ConstraintIterator sphIt = spheres; @@ -180,33 +180,33 @@ void constrainMotion(Simd4f* __restrict curIt, const Simd4f* __restrict curEnd, for (; curIt < curEnd; curIt += 4) { // todo: use msub where available - Simd4f curPos0 = curIt[0]; - Simd4f curPos1 = curIt[1]; - Simd4f curPos2 = curIt[2]; - Simd4f curPos3 = curIt[3]; + T4f curPos0 = curIt[0]; + T4f curPos1 = curIt[1]; + T4f curPos2 = curIt[2]; + T4f curPos3 = curIt[3]; //delta.xyz = sphereCenter - currentPosition //delta.w = sphereRadius - Simd4f delta0 = *sphIt - (sMaskXYZ & curPos0); + T4f delta0 = *sphIt - (sMaskXYZ & curPos0); ++sphIt; - Simd4f delta1 = *sphIt - (sMaskXYZ & curPos1); + T4f delta1 = *sphIt - (sMaskXYZ & curPos1); ++sphIt; - Simd4f delta2 = *sphIt - (sMaskXYZ & curPos2); + T4f delta2 = *sphIt - (sMaskXYZ & curPos2); ++sphIt; - Simd4f delta3 = *sphIt - (sMaskXYZ & curPos3); + T4f delta3 = *sphIt - (sMaskXYZ & curPos3); ++sphIt; - Simd4f deltaX = delta0, deltaY = delta1, deltaZ = delta2, deltaW = delta3; + T4f deltaX = delta0, deltaY = delta1, deltaZ = delta2, deltaW = delta3; transpose(deltaX, deltaY, deltaZ, deltaW); - Simd4f sqrLength = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; - Simd4f radius = max(gSimd4fZero, deltaW * scale + bias); + T4f sqrLength = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; + T4f radius = max(gSimd4fZero, deltaW * scale + bias); - Simd4f slack = gSimd4fOne - radius * rsqrt(sqrLength); + T4f slack = gSimd4fOne - radius * rsqrt(sqrLength); // if slack <= 0.0f then we don't want to affect particle // and can skip if all particles are unaffected - Simd4f isPositive; + T4f isPositive; if (anyGreater(slack, gSimd4fZero, isPositive)) { // set invMass to zero if radius is zero @@ -225,8 +225,8 @@ void constrainMotion(Simd4f* __restrict curIt, const Simd4f* __restrict curEnd, } } -template <typename Simd4f, typename ConstraintIterator> -void constrainSeparation(Simd4f* __restrict curIt, const Simd4f* __restrict curEnd, const ConstraintIterator& spheres) +template <typename T4f, typename ConstraintIterator> +void constrainSeparation(T4f* __restrict curIt, const T4f* __restrict curEnd, const ConstraintIterator& spheres) { // local copy of iterator to maintain alignment ConstraintIterator sphIt = spheres; @@ -234,32 +234,32 @@ void constrainSeparation(Simd4f* __restrict curIt, const Simd4f* __restrict curE for (; curIt < curEnd; curIt += 4) { // todo: use msub where available - Simd4f curPos0 = curIt[0]; - Simd4f curPos1 = curIt[1]; - Simd4f curPos2 = curIt[2]; - Simd4f curPos3 = curIt[3]; + T4f curPos0 = curIt[0]; + T4f curPos1 = curIt[1]; + T4f curPos2 = curIt[2]; + T4f curPos3 = curIt[3]; //delta.xyz = sphereCenter - currentPosition //delta.w = sphereRadius - Simd4f delta0 = *sphIt - (sMaskXYZ & curPos0); + T4f delta0 = *sphIt - (sMaskXYZ & curPos0); ++sphIt; - Simd4f delta1 = *sphIt - (sMaskXYZ & curPos1); + T4f delta1 = *sphIt - (sMaskXYZ & curPos1); ++sphIt; - Simd4f delta2 = *sphIt - (sMaskXYZ & curPos2); + T4f delta2 = *sphIt - (sMaskXYZ & curPos2); ++sphIt; - Simd4f delta3 = *sphIt - (sMaskXYZ & curPos3); + T4f delta3 = *sphIt - (sMaskXYZ & curPos3); ++sphIt; - Simd4f deltaX = delta0, deltaY = delta1, deltaZ = delta2, deltaW = delta3; + T4f deltaX = delta0, deltaY = delta1, deltaZ = delta2, deltaW = delta3; transpose(deltaX, deltaY, deltaZ, deltaW); - Simd4f sqrLength = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; + T4f sqrLength = gSimd4fEpsilon + deltaX * deltaX + deltaY * deltaY + deltaZ * deltaZ; - Simd4f slack = gSimd4fOne - deltaW * rsqrt<1>(sqrLength); + T4f slack = gSimd4fOne - deltaW * rsqrt<1>(sqrLength); // if slack >= 0.0f then we don't want to affect particle // and can skip if all particles are unaffected - Simd4f isNegative; + T4f isNegative; if (anyGreater(gSimd4fZero, slack, isNegative)) { slack = slack & isNegative; @@ -275,22 +275,22 @@ void constrainSeparation(Simd4f* __restrict curIt, const Simd4f* __restrict curE /** traditional gauss-seidel internal constraint solver */ -template <bool useMultiplier, typename Simd4f> +template <bool useMultiplier, typename T4f> void solveConstraints(float* __restrict posIt, const float* __restrict rIt, const float* __restrict stIt, const float* __restrict rEnd, - const uint16_t* __restrict iIt, const Simd4f& stiffnessEtc, const Simd4f& stiffnessExponent) + const uint16_t* __restrict iIt, const T4f& stiffnessEtc, const T4f& stiffnessExponent) { //posIt particle position (and invMass) iterator //rIt,rEnd edge rest length iterator //iIt index set iterator - Simd4f stretchLimit, compressionLimit, multiplier; + T4f stretchLimit, compressionLimit, multiplier; if (useMultiplier) { stretchLimit = splat<3>(stiffnessEtc); compressionLimit = splat<2>(stiffnessEtc); multiplier = splat<1>(stiffnessEtc); } - Simd4f stiffness = splat<0>(stiffnessEtc); + T4f stiffness = splat<0>(stiffnessEtc); bool useStiffnessPerConstraint = stIt!=nullptr; for (; rIt != rEnd; rIt += 4, stIt += 4, iIt += 8) @@ -307,39 +307,39 @@ void solveConstraints(float* __restrict posIt, const float* __restrict rIt, cons //Load particle positions //v.w = invMass - Simd4f v0i = loadAligned(posIt, p0i); - Simd4f v0j = loadAligned(posIt, p0j); - Simd4f v1i = loadAligned(posIt, p1i); - Simd4f v1j = loadAligned(posIt, p1j); - Simd4f v2i = loadAligned(posIt, p2i); - Simd4f v2j = loadAligned(posIt, p2j); - Simd4f v3i = loadAligned(posIt, p3i); - Simd4f v3j = loadAligned(posIt, p3j); + T4f v0i = loadAligned(posIt, p0i); + T4f v0j = loadAligned(posIt, p0j); + T4f v1i = loadAligned(posIt, p1i); + T4f v1j = loadAligned(posIt, p1j); + T4f v2i = loadAligned(posIt, p2i); + T4f v2j = loadAligned(posIt, p2j); + T4f v3i = loadAligned(posIt, p3i); + T4f v3j = loadAligned(posIt, p3j); //offset.xyz = posB - posA //offset.w = invMassB + invMassA - Simd4f h0ij = v0j + v0i * sMinusOneXYZOneW; - Simd4f h1ij = v1j + v1i * sMinusOneXYZOneW; - Simd4f h2ij = v2j + v2i * sMinusOneXYZOneW; - Simd4f h3ij = v3j + v3i * sMinusOneXYZOneW; + T4f h0ij = v0j + v0i * sMinusOneXYZOneW; + T4f h1ij = v1j + v1i * sMinusOneXYZOneW; + T4f h2ij = v2j + v2i * sMinusOneXYZOneW; + T4f h3ij = v3j + v3i * sMinusOneXYZOneW; //h xyz = offset //vw = invMass sum - Simd4f hxij = h0ij, hyij = h1ij, hzij = h2ij, vwij = h3ij; + T4f hxij = h0ij, hyij = h1ij, hzij = h2ij, vwij = h3ij; transpose(hxij, hyij, hzij, vwij); //load rest lengths - Simd4f rij = loadAligned(rIt); + T4f rij = loadAligned(rIt); //Load/calculate the constraint stiffness - Simd4f stij = useStiffnessPerConstraint ? gSimd4fOne - exp2(stiffnessExponent * static_cast<Simd4f>(loadAligned(stIt))) : stiffness; + T4f stij = useStiffnessPerConstraint ? gSimd4fOne - exp2(stiffnessExponent * static_cast<T4f>(loadAligned(stIt))) : stiffness; //squared distance between particles: e2 = epsilon + |h|^2 - Simd4f e2ij = gSimd4fEpsilon + hxij * hxij + hyij * hyij + hzij * hzij; + T4f e2ij = gSimd4fEpsilon + hxij * hxij + hyij * hyij + hzij * hzij; //slack: er = 1 - r / sqrt(e2) // or er = 0 if rest length < epsilon - Simd4f erij = (gSimd4fOne - rij * rsqrt(e2ij)) & (rij > gSimd4fEpsilon); + T4f erij = (gSimd4fOne - rij * rsqrt(e2ij)) & (rij > gSimd4fEpsilon); if (useMultiplier) { @@ -347,7 +347,7 @@ void solveConstraints(float* __restrict posIt, const float* __restrict rIt, cons } //ex = er * stiffness / sqrt(epsilon + vw) - Simd4f exij = erij * stij * recip(gSimd4fEpsilon + vwij); + T4f exij = erij * stij * recip(gSimd4fEpsilon + vwij); //h = h * ex h0ij = h0ij * splat<0>(exij) & sMaskXYZ; @@ -372,22 +372,22 @@ void solveConstraints(float* __restrict posIt, const float* __restrict rIt, cons #endif // calculates upper bound of all position deltas -template <typename Simd4f> -Simd4f calculateMaxDelta(const Simd4f* prevIt, const Simd4f* curIt, const Simd4f* curEnd) +template <typename T4f> +T4f calculateMaxDelta(const T4f* prevIt, const T4f* curIt, const T4f* curEnd) { - Simd4f maxDelta = gSimd4fZero; + T4f maxDelta = gSimd4fZero; for (; curIt < curEnd; ++curIt, ++prevIt) maxDelta = max(maxDelta, abs(*curIt - *prevIt)); return maxDelta & sMaskXYZ; } -template <bool IsTurning, typename Simd4f> -void applyWind(Simd4f* __restrict curIt, const Simd4f* __restrict prevIt, const uint16_t* __restrict tIt, - const uint16_t* __restrict tEnd, Simd4f dragCoefficient, Simd4f liftCoefficient, Simd4f wind, - const Simd4f (&rotation)[3]) +template <bool IsTurning, typename T4f> +void applyWind(T4f* __restrict curIt, const T4f* __restrict prevIt, const uint16_t* __restrict tIt, + const uint16_t* __restrict tEnd, T4f dragCoefficient, T4f liftCoefficient, T4f wind, + const T4f (&rotation)[3]) { - const Simd4f oneThird = simd4f(1.0f / 3.0f); + const T4f oneThird = simd4f(1.0f / 3.0f); for (; tIt < tEnd; tIt += 3) { @@ -397,20 +397,20 @@ void applyWind(Simd4f* __restrict curIt, const Simd4f* __restrict prevIt, const uint16_t i2 = tIt[2]; //Get the current particle positions - Simd4f c0 = curIt[i0]; - Simd4f c1 = curIt[i1]; - Simd4f c2 = curIt[i2]; + T4f c0 = curIt[i0]; + T4f c1 = curIt[i1]; + T4f c2 = curIt[i2]; //Previous positions - Simd4f p0 = prevIt[i0]; - Simd4f p1 = prevIt[i1]; - Simd4f p2 = prevIt[i2]; + T4f p0 = prevIt[i0]; + T4f p1 = prevIt[i1]; + T4f p2 = prevIt[i2]; - Simd4f current = oneThird * (c0 + c1 + c2); - Simd4f previous = oneThird * (p0 + p1 + p2); + T4f current = oneThird * (c0 + c1 + c2); + T4f previous = oneThird * (p0 + p1 + p2); //offset of the triangle center, including wind - Simd4f delta = current - previous + wind; + T4f delta = current - previous + wind; if (IsTurning) { @@ -420,26 +420,26 @@ void applyWind(Simd4f* __restrict curIt, const Simd4f* __restrict prevIt, const } //not normalized - Simd4f normal = cross3(c2 - c0, c1 - c0); + T4f normal = cross3(c2 - c0, c1 - c0); - Simd4f doubleArea = sqrt(dot3(normal, normal)); + T4f doubleArea = sqrt(dot3(normal, normal)); - Simd4f invSqrScale = dot3(delta, delta); - Simd4f isZero = invSqrScale < gSimd4fEpsilon; - Simd4f scale = rsqrt(invSqrScale); + T4f invSqrScale = dot3(delta, delta); + T4f isZero = invSqrScale < gSimd4fEpsilon; + T4f scale = rsqrt(invSqrScale); //scale 'normalizes' delta, doubleArea normalized normal - Simd4f cosTheta = dot3(normal, delta) * scale / doubleArea; - Simd4f sinTheta = sqrt(max(gSimd4fZero, gSimd4fOne - cosTheta * cosTheta)); + T4f cosTheta = dot3(normal, delta) * scale / doubleArea; + T4f sinTheta = sqrt(max(gSimd4fZero, gSimd4fOne - cosTheta * cosTheta)); // orthogonal to delta, in delta-normal plane, same length as delta - Simd4f liftDir = cross3(cross3(delta, normal), delta * scale); + T4f liftDir = cross3(cross3(delta, normal), delta * scale); // sin(theta) * cos(theta) = 0.5 * sin(2 * theta) - Simd4f lift = liftCoefficient * cosTheta * sinTheta * liftDir; - Simd4f drag = dragCoefficient * abs(cosTheta) * doubleArea * delta; //dragCoefficient should compensate for double area + T4f lift = liftCoefficient * cosTheta * sinTheta * liftDir; + T4f drag = dragCoefficient * abs(cosTheta) * doubleArea * delta; //dragCoefficient should compensate for double area - Simd4f impulse = (lift + drag) & ~isZero; + T4f impulse = (lift + drag) & ~isZero; curIt[i0] = c0 - impulse * splat<3>(c0); curIt[i1] = c1 - impulse * splat<3>(c1); @@ -449,33 +449,33 @@ void applyWind(Simd4f* __restrict curIt, const Simd4f* __restrict prevIt, const } // anonymous namespace -template <typename Simd4f> -cloth::SwSolverKernel<Simd4f>::SwSolverKernel(SwCloth const& cloth, SwClothData& clothData, +template <typename T4f> +cloth::SwSolverKernel<T4f>::SwSolverKernel(SwCloth const& cloth, SwClothData& clothData, SwKernelAllocator& allocator, IterationStateFactory& factory) : mCloth(cloth) , mClothData(clothData) , mAllocator(allocator) , mCollision(clothData, allocator) , mSelfCollision(clothData, allocator) -, mState(factory.create<Simd4f>(cloth)) +, mState(factory.create<T4f>(cloth)) { mClothData.verify(); } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::operator()() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::operator()() { simulateCloth(); } -template <typename Simd4f> -size_t cloth::SwSolverKernel<Simd4f>::estimateTemporaryMemory(const SwCloth& cloth) +template <typename T4f> +size_t cloth::SwSolverKernel<T4f>::estimateTemporaryMemory(const SwCloth& cloth) { - size_t collisionTempMemory = SwCollision<Simd4f>::estimateTemporaryMemory(cloth); - size_t selfCollisionTempMemory = SwSelfCollision<Simd4f>::estimateTemporaryMemory(cloth); + size_t collisionTempMemory = SwCollision<T4f>::estimateTemporaryMemory(cloth); + size_t selfCollisionTempMemory = SwSelfCollision<T4f>::estimateTemporaryMemory(cloth); size_t tempMemory = std::max(collisionTempMemory, selfCollisionTempMemory); - size_t persistentMemory = SwCollision<Simd4f>::estimatePersistentMemory(cloth); + size_t persistentMemory = SwCollision<T4f>::estimatePersistentMemory(cloth); // account for any allocator overhead (this could be exposed in the allocator) size_t maxAllocs = 32; @@ -485,13 +485,13 @@ size_t cloth::SwSolverKernel<Simd4f>::estimateTemporaryMemory(const SwCloth& clo return maxAllocatorOverhead + persistentMemory + tempMemory; } -template <typename Simd4f> +template <typename T4f> template <typename AccelerationIterator> -void cloth::SwSolverKernel<Simd4f>::integrateParticles(AccelerationIterator& accelIt, const Simd4f& prevBias) +void cloth::SwSolverKernel<T4f>::integrateParticles(AccelerationIterator& accelIt, const T4f& prevBias) { - Simd4f* curIt = reinterpret_cast<Simd4f*>(mClothData.mCurParticles); - Simd4f* curEnd = curIt + mClothData.mNumParticles; - Simd4f* prevIt = reinterpret_cast<Simd4f*>(mClothData.mPrevParticles); + T4f* curIt = reinterpret_cast<T4f*>(mClothData.mCurParticles); + T4f* curEnd = curIt + mClothData.mNumParticles; + T4f* prevIt = reinterpret_cast<T4f*>(mClothData.mPrevParticles); if (!mState.mIsTurning) { @@ -504,32 +504,32 @@ void cloth::SwSolverKernel<Simd4f>::integrateParticles(AccelerationIterator& acc } } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::integrateParticles() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::integrateParticles() { NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::integrateParticles", /*ProfileContext::None*/ 0); - const Simd4f* startAccelIt = reinterpret_cast<const Simd4f*>(mClothData.mParticleAccelerations); + const T4f* startAccelIt = reinterpret_cast<const T4f*>(mClothData.mParticleAccelerations); // dt^2 (todo: should this be the smoothed dt used for gravity?) - const Simd4f sqrIterDt = simd4f(sqr(mState.mIterDt)) & static_cast<Simd4f>(sMaskXYZ); + const T4f sqrIterDt = simd4f(sqr(mState.mIterDt)) & static_cast<T4f>(sMaskXYZ); if (!startAccelIt) { // no per-particle accelerations, use a constant - ConstantIterator<Simd4f> accelIt(mState.mCurBias); + ConstantIterator<T4f> accelIt(mState.mCurBias); integrateParticles(accelIt, mState.mPrevBias); } else { // iterator implicitly scales by dt^2 and adds gravity - ScaleBiasIterator<Simd4f, const Simd4f*> accelIt(startAccelIt, sqrIterDt, mState.mCurBias); + ScaleBiasIterator<T4f, const T4f*> accelIt(startAccelIt, sqrIterDt, mState.mCurBias); integrateParticles(accelIt, mState.mPrevBias); } } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::constrainTether() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::constrainTether() { if (0.0f == mClothData.mTetherConstraintStiffness || !mClothData.mNumTethers) return; @@ -551,15 +551,15 @@ void cloth::SwSolverKernel<Simd4f>::constrainTether() TetherIter tEnd = tFirst + numTethers; //Tether properties - Simd4f stiffness = - static_cast<Simd4f>(sMaskXYZ) & simd4f(numParticles * mClothData.mTetherConstraintStiffness / numTethers); - Simd4f scale = simd4f(mClothData.mTetherConstraintScale); + T4f stiffness = + static_cast<T4f>(sMaskXYZ) & simd4f(numParticles * mClothData.mTetherConstraintStiffness / numTethers); + T4f scale = simd4f(mClothData.mTetherConstraintScale); //Loop through all particles for (; curIt != curEnd; curIt += 4, ++tFirst) { - Simd4f position = loadAligned(curIt); //Get the first particle - Simd4f offset = gSimd4fZero; //We accumulate the offset in this variable + T4f position = loadAligned(curIt); //Get the first particle + T4f offset = gSimd4fZero; //We accumulate the offset in this variable //Loop through all tethers connected to our particle for (TetherIter tIt = tFirst; tIt < tEnd; tIt += numParticles) @@ -567,15 +567,15 @@ void cloth::SwSolverKernel<Simd4f>::constrainTether() NV_CLOTH_ASSERT(tIt->mAnchor < numParticles); //Get the particle on the other end of the tether - Simd4f anchor = loadAligned(curFirst, tIt->mAnchor * sizeof(PxVec4)); - Simd4f delta = anchor - position; - Simd4f sqrLength = gSimd4fEpsilon + dot3(delta, delta); + T4f anchor = loadAligned(curFirst, tIt->mAnchor * sizeof(PxVec4)); + T4f delta = anchor - position; + T4f sqrLength = gSimd4fEpsilon + dot3(delta, delta); - Simd4f tetherLength = load(&tIt->mLength); + T4f tetherLength = load(&tIt->mLength); tetherLength = splat<0>(tetherLength); - Simd4f radius = tetherLength * scale; - Simd4f slack = gSimd4fOne - radius * rsqrt(sqrLength); + T4f radius = tetherLength * scale; + T4f slack = gSimd4fOne - radius * rsqrt(sqrLength); offset = offset + delta * max(slack, gSimd4fZero); } @@ -584,8 +584,8 @@ void cloth::SwSolverKernel<Simd4f>::constrainTether() } } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::solveFabric() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::solveFabric() { NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::solveFabric", /*ProfileContext::None*/ 0); @@ -604,7 +604,7 @@ void cloth::SwSolverKernel<Simd4f>::solveFabric() uint32_t totalConstraints = 0; - Simd4f stiffnessExponent = simd4f(mCloth.mStiffnessFrequency * mState.mIterDt); + T4f stiffnessExponent = simd4f(mCloth.mStiffnessFrequency * mState.mIterDt); //Loop through all phase configs for (; cIt != cEnd; ++cIt) @@ -623,10 +623,10 @@ void cloth::SwSolverKernel<Simd4f>::solveFabric() totalConstraints += uint32_t(rEnd - rIt); // (stiffness, multiplier, compressionLimit, stretchLimit) - Simd4f config = load(&cIt->mStiffness); + T4f config = load(&cIt->mStiffness); // stiffness specified as fraction of constraint error per-millisecond - Simd4f scaledConfig = gSimd4fOne - exp2(config * stiffnessExponent); - Simd4f stiffness = select(sMaskXY, scaledConfig, config); + T4f scaledConfig = gSimd4fOne - exp2(config * stiffnessExponent); + T4f stiffness = select(sMaskXY, scaledConfig, config); int neutralMultiplier = allEqual(sMaskYZW & stiffness, gSimd4fZero); @@ -654,22 +654,22 @@ void cloth::SwSolverKernel<Simd4f>::solveFabric() } } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::applyWind() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::applyWind() { if (mClothData.mDragCoefficient == 0.0f && mClothData.mLiftCoefficient == 0.0f) return; NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::applyWind", /*ProfileContext::None*/ 0); - Simd4f* curIt = reinterpret_cast<Simd4f*>(mClothData.mCurParticles); - Simd4f* prevIt = reinterpret_cast<Simd4f*>(mClothData.mPrevParticles); + T4f* curIt = reinterpret_cast<T4f*>(mClothData.mCurParticles); + T4f* prevIt = reinterpret_cast<T4f*>(mClothData.mPrevParticles); const uint16_t* tIt = mClothData.mTriangles; const uint16_t* tEnd = tIt + 3 * mClothData.mNumTriangles; - Simd4f dragCoefficient = simd4f(mClothData.mDragCoefficient); - Simd4f liftCoefficient = simd4f(mClothData.mLiftCoefficient); + T4f dragCoefficient = simd4f(mClothData.mDragCoefficient); + T4f liftCoefficient = simd4f(mClothData.mLiftCoefficient); if (mState.mIsTurning) { @@ -683,23 +683,23 @@ void cloth::SwSolverKernel<Simd4f>::applyWind() } } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::constrainMotion() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::constrainMotion() { if (!mClothData.mStartMotionConstraints) return; NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::constrainMotion", /*ProfileContext::None*/ 0); - Simd4f* curIt = reinterpret_cast<Simd4f*>(mClothData.mCurParticles); - Simd4f* curEnd = curIt + mClothData.mNumParticles; + T4f* curIt = reinterpret_cast<T4f*>(mClothData.mCurParticles); + T4f* curEnd = curIt + mClothData.mNumParticles; - const Simd4f* startIt = reinterpret_cast<const Simd4f*>(mClothData.mStartMotionConstraints); - const Simd4f* targetIt = reinterpret_cast<const Simd4f*>(mClothData.mTargetMotionConstraints); + const T4f* startIt = reinterpret_cast<const T4f*>(mClothData.mStartMotionConstraints); + const T4f* targetIt = reinterpret_cast<const T4f*>(mClothData.mTargetMotionConstraints); - Simd4f scaleBias = load(&mCloth.mMotionConstraintScale); - Simd4f stiffness = simd4f(mClothData.mMotionConstraintStiffness); - Simd4f scaleBiasStiffness = select(sMaskXYZ, scaleBias, stiffness); + T4f scaleBias = load(&mCloth.mMotionConstraintScale); + T4f stiffness = simd4f(mClothData.mMotionConstraintStiffness); + T4f scaleBiasStiffness = select(sMaskXYZ, scaleBias, stiffness); if (!mClothData.mTargetMotionConstraints) { @@ -714,23 +714,23 @@ void cloth::SwSolverKernel<Simd4f>::constrainMotion() } // otherwise use an interpolating iterator - LerpIterator<Simd4f, const Simd4f*> interpolator(startIt, targetIt, mState.getCurrentAlpha()); + LerpIterator<T4f, const T4f*> interpolator(startIt, targetIt, mState.getCurrentAlpha()); ::constrainMotion(curIt, curEnd, interpolator, scaleBiasStiffness); } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::constrainSeparation() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::constrainSeparation() { if (!mClothData.mStartSeparationConstraints) return; NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::constrainSeparation", /*ProfileContext::None*/ 0); - Simd4f* curIt = reinterpret_cast<Simd4f*>(mClothData.mCurParticles); - Simd4f* curEnd = curIt + mClothData.mNumParticles; + T4f* curIt = reinterpret_cast<T4f*>(mClothData.mCurParticles); + T4f* curEnd = curIt + mClothData.mNumParticles; - const Simd4f* startIt = reinterpret_cast<const Simd4f*>(mClothData.mStartSeparationConstraints); - const Simd4f* targetIt = reinterpret_cast<const Simd4f*>(mClothData.mTargetSeparationConstraints); + const T4f* startIt = reinterpret_cast<const T4f*>(mClothData.mStartSeparationConstraints); + const T4f* targetIt = reinterpret_cast<const T4f*>(mClothData.mTargetSeparationConstraints); if (!mClothData.mTargetSeparationConstraints) { @@ -744,43 +744,43 @@ void cloth::SwSolverKernel<Simd4f>::constrainSeparation() return ::constrainSeparation(curIt, curEnd, targetIt); } // otherwise use an interpolating iterator - LerpIterator<Simd4f, const Simd4f*> interpolator(startIt, targetIt, mState.getCurrentAlpha()); + LerpIterator<T4f, const T4f*> interpolator(startIt, targetIt, mState.getCurrentAlpha()); ::constrainSeparation(curIt, curEnd, interpolator); } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::collideParticles() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::collideParticles() { NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::collideParticles", /*ProfileContext::None*/ 0); mCollision(mState); } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::selfCollideParticles() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::selfCollideParticles() { NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::selfCollideParticles", /*ProfileContext::None*/ 0); mSelfCollision(); } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::updateSleepState() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::updateSleepState() { NV_CLOTH_PROFILE_ZONE("cloth::SwSolverKernel::updateSleepState", /*ProfileContext::None*/ 0); mClothData.mSleepTestCounter += std::max(1u, uint32_t(mState.mIterDt * 1000)); if (mClothData.mSleepTestCounter >= mCloth.mSleepTestInterval) { - const Simd4f* prevIt = reinterpret_cast<Simd4f*>(mClothData.mPrevParticles); - const Simd4f* curIt = reinterpret_cast<Simd4f*>(mClothData.mCurParticles); - const Simd4f* curEnd = curIt + mClothData.mNumParticles; + const T4f* prevIt = reinterpret_cast<T4f*>(mClothData.mPrevParticles); + const T4f* curIt = reinterpret_cast<T4f*>(mClothData.mCurParticles); + const T4f* curEnd = curIt + mClothData.mNumParticles; // calculate max particle delta since last iteration - Simd4f maxDelta = calculateMaxDelta(prevIt, curIt, curEnd); + T4f maxDelta = calculateMaxDelta(prevIt, curIt, curEnd); ++mClothData.mSleepPassCounter; - Simd4f threshold = simd4f(mCloth.mSleepThreshold * mState.mIterDt); + T4f threshold = simd4f(mCloth.mSleepThreshold * mState.mIterDt); if (anyGreaterEqual(maxDelta, threshold)) mClothData.mSleepPassCounter = 0; @@ -788,8 +788,8 @@ void cloth::SwSolverKernel<Simd4f>::updateSleepState() } } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::iterateCloth() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::iterateCloth() { // note on invMass (stored in current/previous positions.w): // integrateParticles() @@ -832,8 +832,8 @@ void cloth::SwSolverKernel<Simd4f>::iterateCloth() updateSleepState(); } -template <typename Simd4f> -void cloth::SwSolverKernel<Simd4f>::simulateCloth() +template <typename T4f> +void cloth::SwSolverKernel<T4f>::simulateCloth() { while (mState.mRemainingIterations) { |