From b350eb5f4d44e8448115796144375d79438d74ae Mon Sep 17 00:00:00 2001 From: Marijn Tamis Date: Fri, 28 Apr 2017 14:19:07 +0200 Subject: NvCloth 1.1.0 Release. (22041545) --- NvCloth/src/SwInterCollision.cpp | 167 +++++++++++++++++++++------------------ 1 file changed, 88 insertions(+), 79 deletions(-) (limited to 'NvCloth/src/SwInterCollision.cpp') diff --git a/NvCloth/src/SwInterCollision.cpp b/NvCloth/src/SwInterCollision.cpp index 6d5e013..b9b494f 100644 --- a/NvCloth/src/SwInterCollision.cpp +++ b/NvCloth/src/SwInterCollision.cpp @@ -73,16 +73,16 @@ void radixSort(const uint32_t* first, const uint32_t* last, uint32_t* out) for (uint32_t i = 0; i < 256; ++i) { uint32_t temp0 = histograms[0][i] + sums[0]; - histograms[0][i] = sums[0], sums[0] = temp0; + histograms[0][i] = sums[0]; sums[0] = temp0; uint32_t temp1 = histograms[1][i] + sums[1]; - histograms[1][i] = sums[1], sums[1] = temp1; + histograms[1][i] = sums[1]; sums[1] = temp1; uint32_t temp2 = histograms[2][i] + sums[2]; - histograms[2][i] = sums[2], sums[2] = temp2; + histograms[2][i] = sums[2]; sums[2] = temp2; uint32_t temp3 = histograms[3][i] + sums[3]; - histograms[3][i] = sums[3], sums[3] = temp3; + histograms[3][i] = sums[3]; sums[3] = temp3; } NV_CLOTH_ASSERT(sums[0] == n && sums[1] == n && sums[2] == n && sums[3] == n); @@ -98,18 +98,27 @@ void radixSort(const uint32_t* first, const uint32_t* last, uint32_t* out) for (uint32_t i = 0; i != n; ++i) indices[1][histograms[0][0xff & first[i]]++] = i; - for (uint32_t i = 0, index; index = indices[1][i], i != n; ++i) + for (uint32_t i = 0, index; i != n; ++i) + { + index = indices[1][i]; indices[0][histograms[1][0xff & (first[index] >> 8)]++] = index; + } - for (uint32_t i = 0, index; index = indices[0][i], i != n; ++i) + for (uint32_t i = 0, index; i != n; ++i) + { + index = indices[0][i]; indices[1][histograms[2][0xff & (first[index] >> 16)]++] = index; + } - for (uint32_t i = 0, index; index = indices[1][i], i != n; ++i) + for (uint32_t i = 0, index; i != n; ++i) + { + index = indices[1][i]; indices[0][histograms[3][first[index] >> 24]++] = index; + } } -template -uint32_t longestAxis(const Simd4f& edgeLength) +template +uint32_t longestAxis(const T4f& edgeLength) { const float* e = array(edgeLength); @@ -120,8 +129,8 @@ uint32_t longestAxis(const Simd4f& edgeLength) } } -template -cloth::SwInterCollision::SwInterCollision(const cloth::SwInterCollisionData* instances, uint32_t n, +template +cloth::SwInterCollision::SwInterCollision(const cloth::SwInterCollisionData* instances, uint32_t n, float colDist, float stiffness, uint32_t iterations, InterCollisionFilter filter, cloth::SwKernelAllocator& alloc) : mInstances(instances) @@ -145,33 +154,33 @@ cloth::SwInterCollision::SwInterCollision(const cloth::SwInterCollisionD mTotalParticles += instances[i].mNumParticles; } -template -cloth::SwInterCollision::~SwInterCollision() +template +cloth::SwInterCollision::~SwInterCollision() { } namespace { // multiple x by m leaving w component of x intact -template -PX_INLINE Simd4f transform(const Simd4f m[4], const Simd4f& x) +template +PX_INLINE T4f transform(const T4f m[4], const T4f& x) { - const Simd4f a = m[3] + splat<0>(x) * m[0] + splat<1>(x) * m[1] + splat<2>(x) * m[2]; + const T4f a = m[3] + splat<0>(x) * m[0] + splat<1>(x) * m[1] + splat<2>(x) * m[2]; return select(sMaskXYZ, a, x); } // rotate x by m leaving w component intact -template -PX_INLINE Simd4f rotate(const Simd4f m[4], const Simd4f& x) +template +PX_INLINE T4f rotate(const T4f m[4], const T4f& x) { - const Simd4f a = splat<0>(x) * m[0] + splat<1>(x) * m[1] + splat<2>(x) * m[2]; + const T4f a = splat<0>(x) * m[0] + splat<1>(x) * m[1] + splat<2>(x) * m[2]; return select(sMaskXYZ, a, x); } -template +template struct ClothSorter { - typedef cloth::BoundingBox BoundingBox; + typedef cloth::BoundingBox BoundingBox; ClothSorter(BoundingBox* bounds, uint32_t n, uint32_t axis) : mBounds(bounds), mNumBounds(n), mAxis(axis) { @@ -194,15 +203,15 @@ struct ClothSorter // which potentially interact, the potential colliders are returned with their // cloth index and particle index in clothIndices and particleIndices, the // function returns the number of potential colliders -template +template uint32_t calculatePotentialColliders(const cloth::SwInterCollisionData* cBegin, const cloth::SwInterCollisionData* cEnd, - const Simd4f& colDist, uint16_t* clothIndices, uint32_t* particleIndices, - cloth::BoundingBox& bounds, uint32_t* overlapMasks, + const T4f& colDist, uint16_t* clothIndices, uint32_t* particleIndices, + cloth::BoundingBox& bounds, uint32_t* overlapMasks, cloth::InterCollisionFilter filter, cloth::SwKernelAllocator& allocator) { using namespace cloth; - typedef BoundingBox BoundingBox; + typedef BoundingBox BoundingBox; uint32_t numParticles = 0; const uint32_t numCloths = uint32_t(cEnd - cBegin); @@ -212,7 +221,7 @@ uint32_t calculatePotentialColliders(const cloth::SwInterCollisionData* cBegin, BoundingBox* const overlapBounds = static_cast(allocator.allocate(numCloths * sizeof(BoundingBox))); // union of all cloth world bounds - BoundingBox totalClothBounds = emptyBounds(); + BoundingBox totalClothBounds = emptyBounds(); uint32_t* sortedIndices = static_cast(allocator.allocate(numCloths * sizeof(uint32_t))); @@ -237,7 +246,7 @@ uint32_t calculatePotentialColliders(const cloth::SwInterCollisionData* cBegin, // sort indices by their minimum extent on the longest axis const uint32_t sweepAxis = longestAxis(totalClothBounds.mUpper - totalClothBounds.mLower); - ClothSorter predicate(clothBounds, numCloths, sweepAxis); + ClothSorter predicate(clothBounds, numCloths, sweepAxis); shdfnd::sort(sortedIndices, numCloths, predicate, nv::cloth::NonTrackingAllocator()); for (uint32_t i = 0; i < numCloths; ++i) @@ -247,8 +256,8 @@ uint32_t calculatePotentialColliders(const cloth::SwInterCollisionData* cBegin, const SwInterCollisionData& a = cBegin[sortedIndices[i]]; // local bounds - const Simd4f aCenter = load(reinterpret_cast(&a.mBoundsCenter)); - const Simd4f aHalfExtent = load(reinterpret_cast(&a.mBoundsHalfExtent)) + colDist; + const T4f aCenter = load(reinterpret_cast(&a.mBoundsCenter)); + const T4f aHalfExtent = load(reinterpret_cast(&a.mBoundsHalfExtent)) + colDist; const BoundingBox aBounds = { aCenter - aHalfExtent, aCenter + aHalfExtent }; const PxMat44 aToWorld = PxMat44(a.mGlobalPose); @@ -296,7 +305,7 @@ uint32_t calculatePotentialColliders(const cloth::SwInterCollisionData* cBegin, BoundingBox iBounds = intersectBounds(aBounds, bBounds); // setup bounding box w to make point containment test cheaper - Simd4f floatMax = gSimd4fFloatMax & static_cast(sMaskW); + T4f floatMax = gSimd4fFloatMax & static_cast(sMaskW); iBounds.mLower = (iBounds.mLower & sMaskXYZ) | -floatMax; iBounds.mUpper = (iBounds.mUpper & sMaskXYZ) | floatMax; @@ -310,22 +319,22 @@ uint32_t calculatePotentialColliders(const cloth::SwInterCollisionData* cBegin, const uint32_t clothIndex = sortedIndices[i]; overlapMasks[clothIndex] = overlapMask; - Simd4f* pBegin = reinterpret_cast(a.mParticles); - Simd4f* qBegin = reinterpret_cast(a.mPrevParticles); + T4f* pBegin = reinterpret_cast(a.mParticles); + T4f* qBegin = reinterpret_cast(a.mPrevParticles); - const Simd4f xform[4] = { load(reinterpret_cast(&aToWorld.column0)), + const T4f xform[4] = { load(reinterpret_cast(&aToWorld.column0)), load(reinterpret_cast(&aToWorld.column1)), load(reinterpret_cast(&aToWorld.column2)), load(reinterpret_cast(&aToWorld.column3)) }; - Simd4f impulseInvScale = recip(Simd4f(simd4f(cBegin[clothIndex].mImpulseScale))); + T4f impulseInvScale = recip(T4f(simd4f(cBegin[clothIndex].mImpulseScale))); for (uint32_t k = 0; k < a.mNumParticles; ++k) { - Simd4f* pIt = a.mIndices ? pBegin + a.mIndices[k] : pBegin + k; - Simd4f* qIt = a.mIndices ? qBegin + a.mIndices[k] : qBegin + k; + T4f* pIt = a.mIndices ? pBegin + a.mIndices[k] : pBegin + k; + T4f* qIt = a.mIndices ? qBegin + a.mIndices[k] : qBegin + k; - const Simd4f p = *pIt; + const T4f p = *pIt; for (const BoundingBox* oIt = overlapBounds, *oEnd = overlapBounds + numOverlaps; oIt != oEnd; ++oIt) { @@ -339,7 +348,7 @@ uint32_t calculatePotentialColliders(const cloth::SwInterCollisionData* cBegin, // (will be transformed back after collision) *pIt = transform(xform, p); - Simd4f impulse = (p - *qIt) * impulseInvScale; + T4f impulse = (p - *qIt) * impulseInvScale; *qIt = rotate(xform, impulse); // update world bounds @@ -364,8 +373,8 @@ uint32_t calculatePotentialColliders(const cloth::SwInterCollisionData* cBegin, } } -template -PX_INLINE Simd4f& cloth::SwInterCollision::getParticle(uint32_t index) +template +PX_INLINE T4f& cloth::SwInterCollision::getParticle(uint32_t index) { NV_CLOTH_ASSERT(index < mNumParticles); @@ -374,11 +383,11 @@ PX_INLINE Simd4f& cloth::SwInterCollision::getParticle(uint32_t index) NV_CLOTH_ASSERT(clothIndex < mNumInstances); - return reinterpret_cast(mInstances[clothIndex].mParticles[particleIndex]); + return reinterpret_cast(mInstances[clothIndex].mParticles[particleIndex]); } -template -void cloth::SwInterCollision::operator()() +template +void cloth::SwInterCollision::operator()() { mNumTests = mNumCollisions = 0; @@ -389,7 +398,7 @@ void cloth::SwInterCollision::operator()() for (uint32_t k = 0; k < mNumIterations; ++k) { // world bounds of particles - BoundingBox bounds = emptyBounds(); + BoundingBox bounds = emptyBounds(); // calculate potentially colliding set { @@ -405,8 +414,8 @@ void cloth::SwInterCollision::operator()() { NV_CLOTH_PROFILE_ZONE("cloth::SwInterCollision::Collide", /*ProfileContext::None*/ 0); - Simd4f lowerBound = bounds.mLower; - Simd4f edgeLength = max(bounds.mUpper - lowerBound, sEpsilon); + T4f lowerBound = bounds.mLower; + T4f edgeLength = max(bounds.mUpper - lowerBound, sEpsilon); // sweep along longest axis uint32_t sweepAxis = longestAxis(edgeLength); @@ -414,15 +423,15 @@ void cloth::SwInterCollision::operator()() uint32_t hashAxis1 = (sweepAxis + 2) % 3; // reserve 0, 127, and 65535 for sentinel - Simd4f cellSize = max(mCollisionDistance, simd4f(1.0f / 253) * edgeLength); + T4f cellSize = max(mCollisionDistance, simd4f(1.0f / 253) * edgeLength); array(cellSize)[sweepAxis] = array(edgeLength)[sweepAxis] / 65533; - Simd4f one = gSimd4fOne; - Simd4f gridSize = simd4f(254.0f); + T4f one = gSimd4fOne; + T4f gridSize = simd4f(254.0f); array(gridSize)[sweepAxis] = 65534.0f; - Simd4f gridScale = recip<1>(cellSize); - Simd4f gridBias = -lowerBound * gridScale + one; + T4f gridScale = recip<1>(cellSize); + T4f gridBias = -lowerBound * gridScale + one; void* buffer = mAllocator.allocate(getBufferSize(mNumParticles)); @@ -430,13 +439,13 @@ void cloth::SwInterCollision::operator()() uint32_t* __restrict sortedKeys = sortedIndices + mNumParticles; uint32_t* __restrict keys = std::max(sortedKeys + mNumParticles, sortedIndices + 2 * mNumParticles + 1024); - typedef typename Simd4fToSimd4i::Type Simd4i; + typedef typename Simd4fToSimd4i::Type Simd4i; // create keys for (uint32_t i = 0; i < mNumParticles; ++i) { // grid coordinate - Simd4f indexf = getParticle(i) * gridScale + gridBias; + T4f indexf = getParticle(i) * gridScale + gridBias; // need to clamp index because shape collision potentially // pushes particles outside of their original bounds @@ -486,7 +495,7 @@ void cloth::SwInterCollision::operator()() { NV_CLOTH_PROFILE_ZONE("cloth::SwInterCollision::PostTransform", /*ProfileContext::None*/ 0); - Simd4f toLocal[4], impulseScale; + T4f toLocal[4], impulseScale; uint16_t lastCloth = uint16_t(0xffff); for (uint32_t i = 0; i < mNumParticles; ++i) @@ -510,12 +519,12 @@ void cloth::SwInterCollision::operator()() } uint32_t particleIndex = mParticleIndices[i]; - Simd4f& particle = reinterpret_cast(instance->mParticles[particleIndex]); - Simd4f& impulse = reinterpret_cast(instance->mPrevParticles[particleIndex]); + T4f& particle = reinterpret_cast(instance->mParticles[particleIndex]); + T4f& impulse = reinterpret_cast(instance->mPrevParticles[particleIndex]); particle = transform(toLocal, particle); // avoid w becoming negative due to numerical inaccuracies - impulse = max(sZeroW, particle - rotate(toLocal, Simd4f(impulse * impulseScale))); + impulse = max(sZeroW, particle - rotate(toLocal, T4f(impulse * impulseScale))); } } } @@ -525,15 +534,15 @@ void cloth::SwInterCollision::operator()() mAllocator.deallocate(mClothIndices); } -template -size_t cloth::SwInterCollision::estimateTemporaryMemory(SwInterCollisionData* cloths, uint32_t n) +template +size_t cloth::SwInterCollision::estimateTemporaryMemory(SwInterCollisionData* cloths, uint32_t n) { // count total particles uint32_t numParticles = 0; for (uint32_t i = 0; i < n; ++i) numParticles += cloths[i].mNumParticles; - uint32_t boundsSize = 2 * n * sizeof(BoundingBox) + n * sizeof(uint32_t); + uint32_t boundsSize = 2 * n * sizeof(BoundingBox) + n * sizeof(uint32_t); uint32_t clothIndicesSize = numParticles * sizeof(uint16_t); uint32_t particleIndicesSize = numParticles * sizeof(uint32_t); uint32_t masksSize = n * sizeof(uint32_t); @@ -541,8 +550,8 @@ size_t cloth::SwInterCollision::estimateTemporaryMemory(SwInterCollision return boundsSize + clothIndicesSize + particleIndicesSize + masksSize + getBufferSize(numParticles); } -template -size_t cloth::SwInterCollision::getBufferSize(uint32_t numParticles) +template +size_t cloth::SwInterCollision::getBufferSize(uint32_t numParticles) { uint32_t keysSize = numParticles * sizeof(uint32_t); uint32_t indicesSize = numParticles * sizeof(uint32_t); @@ -551,8 +560,8 @@ size_t cloth::SwInterCollision::getBufferSize(uint32_t numParticles) return keysSize + indicesSize + std::max(indicesSize + histogramSize, keysSize); } -template -void cloth::SwInterCollision::collideParticle(uint32_t index) +template +void cloth::SwInterCollision::collideParticle(uint32_t index) { uint16_t clothIndex = mClothIndices[index]; @@ -562,10 +571,10 @@ void cloth::SwInterCollision::collideParticle(uint32_t index) const SwInterCollisionData* instance = mInstances + clothIndex; uint32_t particleIndex = mParticleIndices[index]; - Simd4f& particle = reinterpret_cast(instance->mParticles[particleIndex]); + T4f& particle = reinterpret_cast(instance->mParticles[particleIndex]); - Simd4f diff = particle - mParticle; - Simd4f distSqr = dot3(diff, diff); + T4f diff = particle - mParticle; + T4f distSqr = dot3(diff, diff); #if PX_DEBUG ++mNumTests; @@ -574,17 +583,17 @@ void cloth::SwInterCollision::collideParticle(uint32_t index) if (allGreater(distSqr, mCollisionSquareDistance)) return; - Simd4f w0 = splat<3>(mParticle); - Simd4f w1 = splat<3>(particle); + T4f w0 = splat<3>(mParticle); + T4f w1 = splat<3>(particle); - Simd4f ratio = mCollisionDistance * rsqrt<1>(distSqr); - Simd4f scale = mStiffness * recip<1>(sEpsilon + w0 + w1); - Simd4f delta = (scale * (diff - diff * ratio)) & sMaskXYZ; + T4f ratio = mCollisionDistance * rsqrt<1>(distSqr); + T4f scale = mStiffness * recip<1>(sEpsilon + w0 + w1); + T4f delta = (scale * (diff - diff * ratio)) & sMaskXYZ; mParticle = mParticle + delta * w0; particle = particle - delta * w1; - Simd4f& impulse = reinterpret_cast(instance->mPrevParticles[particleIndex]); + T4f& impulse = reinterpret_cast(instance->mPrevParticles[particleIndex]); mImpulse = mImpulse + delta * w0; impulse = impulse - delta * w1; @@ -594,8 +603,8 @@ void cloth::SwInterCollision::collideParticle(uint32_t index) #endif } -template -void cloth::SwInterCollision::collideParticles(const uint32_t* keys, uint32_t firstColumnSize, +template +void cloth::SwInterCollision::collideParticles(const uint32_t* keys, uint32_t firstColumnSize, const uint32_t* indices, uint32_t numParticles, uint32_t collisionDistance) { @@ -653,8 +662,8 @@ void cloth::SwInterCollision::collideParticles(const uint32_t* keys, uin const SwInterCollisionData* instance = mInstances + mClothIndex; mParticleIndex = mParticleIndices[index]; - mParticle = reinterpret_cast(instance->mParticles[mParticleIndex]); - mImpulse = reinterpret_cast(instance->mPrevParticles[mParticleIndex]); + mParticle = reinterpret_cast(instance->mParticles[mParticleIndex]); + mImpulse = reinterpret_cast(instance->mPrevParticles[mParticleIndex]); uint32_t key = *kFirst[0]; @@ -689,8 +698,8 @@ void cloth::SwInterCollision::collideParticles(const uint32_t* keys, uin } // write back particle and impulse - reinterpret_cast(instance->mParticles[mParticleIndex]) = mParticle; - reinterpret_cast(instance->mPrevParticles[mParticleIndex]) = mImpulse; + reinterpret_cast(instance->mParticles[mParticleIndex]) = mParticle; + reinterpret_cast(instance->mPrevParticles[mParticleIndex]) = mImpulse; } } -- cgit v1.2.3