aboutsummaryrefslogtreecommitdiff
path: root/NvCloth/src/SwSelfCollision.cpp
diff options
context:
space:
mode:
authormtamis <[email protected]>2017-02-15 16:06:25 +0100
committermtamis <[email protected]>2017-02-15 16:06:25 +0100
commit85305930aeeb1d513e23522bd91f29ba81aa6d14 (patch)
tree45f1bb20a45a300d1fef107e436cac95602a0e57 /NvCloth/src/SwSelfCollision.cpp
downloadnvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.tar.xz
nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.zip
NvCloth library v1.0.0
Diffstat (limited to 'NvCloth/src/SwSelfCollision.cpp')
-rw-r--r--NvCloth/src/SwSelfCollision.cpp416
1 files changed, 416 insertions, 0 deletions
diff --git a/NvCloth/src/SwSelfCollision.cpp b/NvCloth/src/SwSelfCollision.cpp
new file mode 100644
index 0000000..6b3e267
--- /dev/null
+++ b/NvCloth/src/SwSelfCollision.cpp
@@ -0,0 +1,416 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "SwSelfCollision.h"
+#include "SwCloth.h"
+#include "SwClothData.h"
+#include "SwCollisionHelpers.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable : 4127) // conditional expression is constant
+#endif
+
+using namespace nv;
+
+namespace
+{
+
+const Simd4fTupleFactory sMaskXYZ = simd4f(simd4i(~0, ~0, ~0, 0));
+
+// returns sorted indices, output needs to be at least 2*(last - first) + 1024
+void radixSort(const uint32_t* first, const uint32_t* last, uint16_t* out)
+{
+ uint16_t n = uint16_t(last - first);
+
+ uint16_t* buffer = out + 2 * n;
+ uint16_t* __restrict histograms[] = { buffer, buffer + 256, buffer + 512, buffer + 768 };
+
+ memset(buffer, 0, 1024 * sizeof(uint16_t));
+
+ // build 3 histograms in one pass
+ for (const uint32_t* __restrict it = first; it != last; ++it)
+ {
+ uint32_t key = *it;
+ ++histograms[0][0xff & key];
+ ++histograms[1][0xff & (key >> 8)];
+ ++histograms[2][0xff & (key >> 16)];
+ ++histograms[3][key >> 24];
+ }
+
+ // convert histograms to offset tables in-place
+ uint16_t sums[4] = {};
+ for (uint32_t i = 0; i < 256; ++i)
+ {
+ uint16_t temp0 = uint16_t(histograms[0][i] + sums[0]);
+ histograms[0][i] = sums[0], sums[0] = temp0;
+
+ uint16_t temp1 = uint16_t(histograms[1][i] + sums[1]);
+ histograms[1][i] = sums[1], sums[1] = temp1;
+
+ uint16_t temp2 = uint16_t(histograms[2][i] + sums[2]);
+ histograms[2][i] = sums[2], sums[2] = temp2;
+
+ uint16_t temp3 = uint16_t(histograms[3][i] + sums[3]);
+ histograms[3][i] = sums[3], sums[3] = temp3;
+ }
+
+ NV_CLOTH_ASSERT(sums[0] == n && sums[1] == n && sums[2] == n && sums[3] == n);
+
+#if PX_DEBUG
+ memset(out, 0xff, 2 * n * sizeof(uint16_t));
+#endif
+
+ // sort 8 bits per pass
+
+ uint16_t* __restrict indices[] = { out, out + n };
+
+ for (uint16_t i = 0; i != n; ++i)
+ indices[1][histograms[0][0xff & first[i]]++] = i;
+
+ for (uint16_t i = 0, index; index = indices[1][i], i != n; ++i)
+ indices[0][histograms[1][0xff & (first[index] >> 8)]++] = index;
+
+ for (uint16_t i = 0, index; index = indices[0][i], i != n; ++i)
+ indices[1][histograms[2][0xff & (first[index] >> 16)]++] = index;
+
+ for (uint16_t i = 0, index; index = indices[1][i], i != n; ++i)
+ indices[0][histograms[3][first[index] >> 24]++] = index;
+}
+
+template <typename Simd4f>
+uint32_t longestAxis(const Simd4f& edgeLength)
+{
+ const float* e = array(edgeLength);
+
+ if (e[0] > e[1])
+ return uint32_t(e[0] > e[2] ? 0 : 2);
+ else
+ return uint32_t(e[1] > e[2] ? 1 : 2);
+}
+
+bool isSelfCollisionEnabled(const cloth::SwClothData& cloth)
+{
+ return std::min(cloth.mSelfCollisionDistance, cloth.mSelfCollisionStiffness) > 0.0f;
+}
+
+bool isSelfCollisionEnabled(const cloth::SwCloth& cloth)
+{
+ return std::min(cloth.mSelfCollisionDistance, -cloth.mSelfCollisionLogStiffness) > 0.0f;
+}
+
+inline uint32_t align2(uint32_t x)
+{
+ return (x + 1) & ~1;
+}
+
+} // anonymous namespace
+
+template <typename Simd4f>
+cloth::SwSelfCollision<Simd4f>::SwSelfCollision(cloth::SwClothData& clothData, cloth::SwKernelAllocator& alloc)
+: mClothData(clothData), mAllocator(alloc)
+{
+ mCollisionDistance = simd4f(mClothData.mSelfCollisionDistance);
+ mCollisionSquareDistance = mCollisionDistance * mCollisionDistance;
+ mStiffness = sMaskXYZ & static_cast<Simd4f>(simd4f(mClothData.mSelfCollisionStiffness));
+}
+
+template <typename Simd4f>
+cloth::SwSelfCollision<Simd4f>::~SwSelfCollision()
+{
+}
+
+template <typename Simd4f>
+void cloth::SwSelfCollision<Simd4f>::operator()()
+{
+ mNumTests = mNumCollisions = 0;
+
+ if (!isSelfCollisionEnabled(mClothData))
+ return;
+
+ Simd4f lowerBound = load(mClothData.mCurBounds);
+ Simd4f edgeLength = max(load(mClothData.mCurBounds + 3) - lowerBound, gSimd4fEpsilon);
+
+ // sweep along longest axis
+ uint32_t sweepAxis = longestAxis(edgeLength);
+ uint32_t hashAxis0 = (sweepAxis + 1) % 3;
+ uint32_t hashAxis1 = (sweepAxis + 2) % 3;
+
+ // reserve 0, 127, and 65535 for sentinel
+ Simd4f cellSize = max(mCollisionDistance, simd4f(1.0f / 253) * edgeLength);
+ array(cellSize)[sweepAxis] = array(edgeLength)[sweepAxis] / 65533;
+
+ Simd4f one = gSimd4fOne;
+ Simd4f gridSize = simd4f(254.0f);
+ array(gridSize)[sweepAxis] = 65534.0f;
+
+ Simd4f gridScale = recip<1>(cellSize);
+ Simd4f gridBias = -lowerBound * gridScale + one;
+
+ uint32_t numIndices = mClothData.mNumSelfCollisionIndices;
+ void* buffer = mAllocator.allocate(getBufferSize(numIndices));
+
+ const uint32_t* __restrict indices = mClothData.mSelfCollisionIndices;
+ uint32_t* __restrict keys = reinterpret_cast<uint32_t*>(buffer);
+ uint16_t* __restrict sortedIndices = reinterpret_cast<uint16_t*>(keys + numIndices);
+ uint32_t* __restrict sortedKeys = reinterpret_cast<uint32_t*>(sortedIndices + align2(numIndices));
+
+ const Simd4f* particles = reinterpret_cast<const Simd4f*>(mClothData.mCurParticles);
+
+ // create keys
+ for (uint32_t i = 0; i < numIndices; ++i)
+ {
+ uint32_t index = indices ? indices[i] : i;
+
+ // grid coordinate
+ Simd4f keyf = particles[index] * gridScale + gridBias;
+
+ // need to clamp index because shape collision potentially
+ // pushes particles outside of their original bounds
+ Simd4i keyi = intFloor(max(one, min(keyf, gridSize)));
+
+ const int32_t* ptr = array(keyi);
+ keys[i] = uint32_t(ptr[sweepAxis] | (ptr[hashAxis0] << 16) | (ptr[hashAxis1] << 24));
+ }
+
+ // compute sorted keys indices
+ radixSort(keys, keys + numIndices, sortedIndices);
+
+ // snoop histogram: offset of first index with 8 msb > 1 (0 is sentinel)
+ uint16_t firstColumnSize = sortedIndices[2 * numIndices + 769];
+
+ // sort keys
+ for (uint32_t i = 0; i < numIndices; ++i)
+ sortedKeys[i] = keys[sortedIndices[i]];
+ sortedKeys[numIndices] = uint32_t(-1); // sentinel
+
+ if (indices)
+ {
+ // sort indices (into no-longer-needed keys array)
+ const uint16_t* __restrict permutation = sortedIndices;
+ sortedIndices = reinterpret_cast<uint16_t*>(keys);
+ for (uint32_t i = 0; i < numIndices; ++i)
+ sortedIndices[i] = uint16_t(indices[permutation[i]]);
+ }
+
+ // calculate the number of buckets we need to search forward
+ const Simd4i data = intFloor(gridScale * mCollisionDistance);
+ uint32_t collisionDistance = 2 + static_cast<uint32_t>(array(data)[sweepAxis]);
+
+ // collide particles
+ if (mClothData.mRestPositions)
+ collideParticles<true>(sortedKeys, firstColumnSize, sortedIndices, collisionDistance);
+ else
+ collideParticles<false>(sortedKeys, firstColumnSize, sortedIndices, collisionDistance);
+
+ mAllocator.deallocate(buffer);
+
+ // verify against brute force (disable collision response when testing)
+ /*
+ uint32_t numCollisions = mNumCollisions;
+ mNumCollisions = 0;
+
+ Simd4f* qarticles = reinterpret_cast<
+ Simd4f*>(mClothData.mCurParticles);
+ for (uint32_t i = 0; i < numIndices; ++i)
+ {
+ uint32_t indexI = indices ? indices[i] : i;
+ for (uint32_t j = i + 1; j < numIndices; ++j)
+ {
+ uint32_t indexJ = indices ? indices[j] : j;
+ collideParticles(qarticles[indexI], qarticles[indexJ]);
+ }
+ }
+
+ static uint32_t iter = 0; ++iter;
+ if (numCollisions != mNumCollisions)
+ printf("%u: %u != %u\n", iter, numCollisions, mNumCollisions);
+ */
+}
+
+template <typename Simd4f>
+size_t cloth::SwSelfCollision<Simd4f>::estimateTemporaryMemory(const SwCloth& cloth)
+{
+ uint32_t numIndices =
+ uint32_t(cloth.mSelfCollisionIndices.empty() ? cloth.mCurParticles.size() : cloth.mSelfCollisionIndices.size());
+ return isSelfCollisionEnabled(cloth) ? getBufferSize(numIndices) : 0;
+}
+
+template <typename Simd4f>
+size_t cloth::SwSelfCollision<Simd4f>::getBufferSize(uint32_t numIndices)
+{
+ uint32_t keysSize = numIndices * sizeof(uint32_t);
+ uint32_t indicesSize = align2(numIndices) * sizeof(uint16_t);
+ uint32_t radixSize = (numIndices + 1024) * sizeof(uint16_t);
+ return keysSize + indicesSize + std::max(radixSize, keysSize + uint32_t(sizeof(uint32_t)));
+}
+
+template <typename Simd4f>
+template <bool useRestParticles>
+void cloth::SwSelfCollision<Simd4f>::collideParticles(Simd4f& pos0, Simd4f& pos1, const Simd4f& pos0rest,
+ const Simd4f& pos1rest)
+{
+ Simd4f diff = pos1 - pos0;
+ Simd4f distSqr = dot3(diff, diff);
+
+#if PX_DEBUG
+ ++mNumTests;
+#endif
+
+ if (allGreater(distSqr, mCollisionSquareDistance))
+ return;
+
+ if (useRestParticles)
+ {
+ // calculate distance in rest configuration, if less than collision
+ // distance then ignore collision between particles in deformed config
+ Simd4f restDiff = pos1rest - pos0rest;
+ Simd4f restDistSqr = dot3(restDiff, restDiff);
+
+ if (allGreater(mCollisionSquareDistance, restDistSqr))
+ return;
+ }
+
+ Simd4f w0 = splat<3>(pos0);
+ Simd4f w1 = splat<3>(pos1);
+
+ Simd4f ratio = mCollisionDistance * rsqrt(distSqr);
+ Simd4f scale = mStiffness * recip(gSimd4fEpsilon + w0 + w1);
+ Simd4f delta = (scale * (diff - diff * ratio)) & sMaskXYZ;
+
+ pos0 = pos0 + delta * w0;
+ pos1 = pos1 - delta * w1;
+
+#if PX_DEBUG || PX_PROFILE
+ ++mNumCollisions;
+#endif
+}
+
+template <typename Simd4f>
+template <bool useRestParticles>
+void cloth::SwSelfCollision<Simd4f>::collideParticles(const uint32_t* keys, uint16_t firstColumnSize,
+ const uint16_t* indices, uint32_t collisionDistance)
+{
+ Simd4f* __restrict particles = reinterpret_cast<Simd4f*>(mClothData.mCurParticles);
+ Simd4f* __restrict restParticles =
+ useRestParticles ? reinterpret_cast<Simd4f*>(mClothData.mRestPositions) : particles;
+
+ const uint32_t bucketMask = uint16_t(-1);
+
+ const uint32_t keyOffsets[] = { 0, 0x00010000, 0x00ff0000, 0x01000000, 0x01010000 };
+
+ const uint32_t* __restrict kFirst[5];
+ const uint32_t* __restrict kLast[5];
+
+ {
+ // optimization: scan forward iterator starting points once instead of 9 times
+ const uint32_t* __restrict kIt = keys;
+
+ uint32_t key = *kIt;
+ uint32_t firstKey = key - std::min(collisionDistance, key & bucketMask);
+ uint32_t lastKey = std::min(key + collisionDistance, key | bucketMask);
+
+ kFirst[0] = kIt;
+ while (*kIt < lastKey)
+ ++kIt;
+ kLast[0] = kIt;
+
+ for (uint32_t k = 1; k < 5; ++k)
+ {
+ for (uint32_t n = firstKey + keyOffsets[k]; *kIt < n;)
+ ++kIt;
+ kFirst[k] = kIt;
+
+ for (uint32_t n = lastKey + keyOffsets[k]; *kIt < n;)
+ ++kIt;
+ kLast[k] = kIt;
+
+ // jump forward once to second column
+ kIt = keys + firstColumnSize;
+ firstColumnSize = 0;
+ }
+ }
+
+ const uint16_t* __restrict iIt = indices;
+ const uint16_t* __restrict iEnd = indices + mClothData.mNumSelfCollisionIndices;
+
+ const uint16_t* __restrict jIt;
+ const uint16_t* __restrict jEnd;
+
+ for (; iIt != iEnd; ++iIt, ++kFirst[0])
+ {
+ NV_CLOTH_ASSERT(*iIt < mClothData.mNumParticles);
+
+ // load current particle once outside of inner loop
+ Simd4f particle = particles[*iIt];
+ Simd4f restParticle = restParticles[*iIt];
+
+ uint32_t key = *kFirst[0];
+
+ // range of keys we need to check against for this particle
+ uint32_t firstKey = key - std::min(collisionDistance, key & bucketMask);
+ uint32_t lastKey = std::min(key + collisionDistance, key | bucketMask);
+
+ // scan forward end point
+ while (*kLast[0] < lastKey)
+ ++kLast[0];
+
+ // process potential colliders of same cell
+ jEnd = indices + (kLast[0] - keys);
+ for (jIt = iIt + 1; jIt != jEnd; ++jIt)
+ collideParticles<useRestParticles>(particle, particles[*jIt], restParticle, restParticles[*jIt]);
+
+ // process neighbor cells
+ for (uint32_t k = 1; k < 5; ++k)
+ {
+ // scan forward start point
+ for (uint32_t n = firstKey + keyOffsets[k]; *kFirst[k] < n;)
+ ++kFirst[k];
+
+ // scan forward end point
+ for (uint32_t n = lastKey + keyOffsets[k]; *kLast[k] < n;)
+ ++kLast[k];
+
+ // process potential colliders
+ jEnd = indices + (kLast[k] - keys);
+ for (jIt = indices + (kFirst[k] - keys); jIt != jEnd; ++jIt)
+ collideParticles<useRestParticles>(particle, particles[*jIt], restParticle, restParticles[*jIt]);
+ }
+
+ // store current particle
+ particles[*iIt] = particle;
+ }
+}
+
+// explicit template instantiation
+#if NV_SIMD_SIMD
+template class cloth::SwSelfCollision<Simd4f>;
+#endif
+#if NV_SIMD_SCALAR
+template class cloth::SwSelfCollision<Scalar4f>;
+#endif