NvCloth library v1.0.0

author: mtamis <[email protected]> 2017-02-15 16:06:25 +0100
committer: mtamis <[email protected]> 2017-02-15 16:06:25 +0100
commit: 85305930aeeb1d513e23522bd91f29ba81aa6d14 (patch)
tree: 45f1bb20a45a300d1fef107e436cac95602a0e57 /NvCloth/src/SwSelfCollision.cpp
download: nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.tar.xz
nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.zip
1 files changed, 416 insertions, 0 deletions
diff --git a/NvCloth/src/SwSelfCollision.cpp b/NvCloth/src/SwSelfCollision.cpp
new file mode 100644
index 0000000..6b3e267
--- /dev/null
+++ b/NvCloth/src/SwSelfCollision.cpp
@@ -0,0 +1,416 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "SwSelfCollision.h"
+#include "SwCloth.h"
+#include "SwClothData.h"
+#include "SwCollisionHelpers.h"
+
+#ifdef _MSC_VER 
+#pragma warning(disable : 4127) // conditional expression is constant
+#endif
+
+using namespace nv;
+
+namespace
+{
+
+const Simd4fTupleFactory sMaskXYZ = simd4f(simd4i(~0, ~0, ~0, 0));
+
+// returns sorted indices, output needs to be at least 2*(last - first) + 1024
+void radixSort(const uint32_t* first, const uint32_t* last, uint16_t* out)
+{
+	uint16_t n = uint16_t(last - first);
+
+	uint16_t* buffer = out + 2 * n;
+	uint16_t* __restrict histograms[] = { buffer, buffer + 256, buffer + 512, buffer + 768 };
+
+	memset(buffer, 0, 1024 * sizeof(uint16_t));
+
+	// build 3 histograms in one pass
+	for (const uint32_t* __restrict it = first; it != last; ++it)
+	{
+		uint32_t key = *it;
+		++histograms[0][0xff & key];
+		++histograms[1][0xff & (key >> 8)];
+		++histograms[2][0xff & (key >> 16)];
+		++histograms[3][key >> 24];
+	}
+
+	// convert histograms to offset tables in-place
+	uint16_t sums[4] = {};
+	for (uint32_t i = 0; i < 256; ++i)
+	{
+		uint16_t temp0 = uint16_t(histograms[0][i] + sums[0]);
+		histograms[0][i] = sums[0], sums[0] = temp0;
+
+		uint16_t temp1 = uint16_t(histograms[1][i] + sums[1]);
+		histograms[1][i] = sums[1], sums[1] = temp1;
+
+		uint16_t temp2 = uint16_t(histograms[2][i] + sums[2]);
+		histograms[2][i] = sums[2], sums[2] = temp2;
+
+		uint16_t temp3 = uint16_t(histograms[3][i] + sums[3]);
+		histograms[3][i] = sums[3], sums[3] = temp3;
+	}
+
+	NV_CLOTH_ASSERT(sums[0] == n && sums[1] == n && sums[2] == n && sums[3] == n);
+
+#if PX_DEBUG
+	memset(out, 0xff, 2 * n * sizeof(uint16_t));
+#endif
+
+	// sort 8 bits per pass
+
+	uint16_t* __restrict indices[] = { out, out + n };
+
+	for (uint16_t i = 0; i != n; ++i)
+		indices[1][histograms[0][0xff & first[i]]++] = i;
+
+	for (uint16_t i = 0, index; index = indices[1][i], i != n; ++i)
+		indices[0][histograms[1][0xff & (first[index] >> 8)]++] = index;
+
+	for (uint16_t i = 0, index; index = indices[0][i], i != n; ++i)
+		indices[1][histograms[2][0xff & (first[index] >> 16)]++] = index;
+
+	for (uint16_t i = 0, index; index = indices[1][i], i != n; ++i)
+		indices[0][histograms[3][first[index] >> 24]++] = index;
+}
+
+template <typename Simd4f>
+uint32_t longestAxis(const Simd4f& edgeLength)
+{
+	const float* e = array(edgeLength);
+
+	if (e[0] > e[1])
+		return uint32_t(e[0] > e[2] ? 0 : 2);
+	else
+		return uint32_t(e[1] > e[2] ? 1 : 2);
+}
+
+bool isSelfCollisionEnabled(const cloth::SwClothData& cloth)
+{
+	return std::min(cloth.mSelfCollisionDistance, cloth.mSelfCollisionStiffness) > 0.0f;
+}
+
+bool isSelfCollisionEnabled(const cloth::SwCloth& cloth)
+{
+	return std::min(cloth.mSelfCollisionDistance, -cloth.mSelfCollisionLogStiffness) > 0.0f;
+}
+
+inline uint32_t align2(uint32_t x)
+{
+	return (x + 1) & ~1;
+}
+
+} // anonymous namespace
+
+template <typename Simd4f>
+cloth::SwSelfCollision<Simd4f>::SwSelfCollision(cloth::SwClothData& clothData, cloth::SwKernelAllocator& alloc)
+: mClothData(clothData), mAllocator(alloc)
+{
+	mCollisionDistance = simd4f(mClothData.mSelfCollisionDistance);
+	mCollisionSquareDistance = mCollisionDistance * mCollisionDistance;
+	mStiffness = sMaskXYZ & static_cast<Simd4f>(simd4f(mClothData.mSelfCollisionStiffness));
+}
+
+template <typename Simd4f>
+cloth::SwSelfCollision<Simd4f>::~SwSelfCollision()
+{
+}
+
+template <typename Simd4f>
+void cloth::SwSelfCollision<Simd4f>::operator()()
+{
+	mNumTests = mNumCollisions = 0;
+
+	if (!isSelfCollisionEnabled(mClothData))
+		return;
+
+	Simd4f lowerBound = load(mClothData.mCurBounds);
+	Simd4f edgeLength = max(load(mClothData.mCurBounds + 3) - lowerBound, gSimd4fEpsilon);
+
+	// sweep along longest axis
+	uint32_t sweepAxis = longestAxis(edgeLength);
+	uint32_t hashAxis0 = (sweepAxis + 1) % 3;
+	uint32_t hashAxis1 = (sweepAxis + 2) % 3;
+
+	// reserve 0, 127, and 65535 for sentinel
+	Simd4f cellSize = max(mCollisionDistance, simd4f(1.0f / 253) * edgeLength);
+	array(cellSize)[sweepAxis] = array(edgeLength)[sweepAxis] / 65533;
+
+	Simd4f one = gSimd4fOne;
+	Simd4f gridSize = simd4f(254.0f);
+	array(gridSize)[sweepAxis] = 65534.0f;
+
+	Simd4f gridScale = recip<1>(cellSize);
+	Simd4f gridBias = -lowerBound * gridScale + one;
+
+	uint32_t numIndices = mClothData.mNumSelfCollisionIndices;
+	void* buffer = mAllocator.allocate(getBufferSize(numIndices));
+
+	const uint32_t* __restrict indices = mClothData.mSelfCollisionIndices;
+	uint32_t* __restrict keys = reinterpret_cast<uint32_t*>(buffer);
+	uint16_t* __restrict sortedIndices = reinterpret_cast<uint16_t*>(keys + numIndices);
+	uint32_t* __restrict sortedKeys = reinterpret_cast<uint32_t*>(sortedIndices + align2(numIndices));
+
+	const Simd4f* particles = reinterpret_cast<const Simd4f*>(mClothData.mCurParticles);
+
+	// create keys
+	for (uint32_t i = 0; i < numIndices; ++i)
+	{
+		uint32_t index = indices ? indices[i] : i;
+
+		// grid coordinate
+		Simd4f keyf = particles[index] * gridScale + gridBias;
+
+		// need to clamp index because shape collision potentially
+		// pushes particles outside of their original bounds
+		Simd4i keyi = intFloor(max(one, min(keyf, gridSize)));
+
+		const int32_t* ptr = array(keyi);
+		keys[i] = uint32_t(ptr[sweepAxis] | (ptr[hashAxis0] << 16) | (ptr[hashAxis1] << 24));
+	}
+
+	// compute sorted keys indices
+	radixSort(keys, keys + numIndices, sortedIndices);
+
+	// snoop histogram: offset of first index with 8 msb > 1 (0 is sentinel)
+	uint16_t firstColumnSize = sortedIndices[2 * numIndices + 769];
+
+	// sort keys
+	for (uint32_t i = 0; i < numIndices; ++i)
+		sortedKeys[i] = keys[sortedIndices[i]];
+	sortedKeys[numIndices] = uint32_t(-1); // sentinel
+
+	if (indices)
+	{
+		// sort indices (into no-longer-needed keys array)
+		const uint16_t* __restrict permutation = sortedIndices;
+		sortedIndices = reinterpret_cast<uint16_t*>(keys);
+		for (uint32_t i = 0; i < numIndices; ++i)
+			sortedIndices[i] = uint16_t(indices[permutation[i]]);
+	}
+
+	// calculate the number of buckets we need to search forward
+	const Simd4i data = intFloor(gridScale * mCollisionDistance);
+	uint32_t collisionDistance = 2 + static_cast<uint32_t>(array(data)[sweepAxis]);
+
+	// collide particles
+	if (mClothData.mRestPositions)
+		collideParticles<true>(sortedKeys, firstColumnSize, sortedIndices, collisionDistance);
+	else
+		collideParticles<false>(sortedKeys, firstColumnSize, sortedIndices, collisionDistance);
+
+	mAllocator.deallocate(buffer);
+
+	// verify against brute force (disable collision response when testing)
+	/*
+	uint32_t numCollisions = mNumCollisions;
+	mNumCollisions = 0;
+
+	Simd4f* qarticles = reinterpret_cast<
+	    Simd4f*>(mClothData.mCurParticles);
+	for (uint32_t i = 0; i < numIndices; ++i)
+	{
+	    uint32_t indexI = indices ? indices[i] : i;
+	    for (uint32_t j = i + 1; j < numIndices; ++j)
+	    {
+	        uint32_t indexJ = indices ? indices[j] : j;
+	        collideParticles(qarticles[indexI], qarticles[indexJ]);
+	    }
+	}
+
+	static uint32_t iter = 0; ++iter;
+	if (numCollisions != mNumCollisions)
+	    printf("%u: %u != %u\n", iter, numCollisions, mNumCollisions);
+	*/
+}
+
+template <typename Simd4f>
+size_t cloth::SwSelfCollision<Simd4f>::estimateTemporaryMemory(const SwCloth& cloth)
+{
+	uint32_t numIndices =
+	    uint32_t(cloth.mSelfCollisionIndices.empty() ? cloth.mCurParticles.size() : cloth.mSelfCollisionIndices.size());
+	return isSelfCollisionEnabled(cloth) ? getBufferSize(numIndices) : 0;
+}
+
+template <typename Simd4f>
+size_t cloth::SwSelfCollision<Simd4f>::getBufferSize(uint32_t numIndices)
+{
+	uint32_t keysSize = numIndices * sizeof(uint32_t);
+	uint32_t indicesSize = align2(numIndices) * sizeof(uint16_t);
+	uint32_t radixSize = (numIndices + 1024) * sizeof(uint16_t);
+	return keysSize + indicesSize + std::max(radixSize, keysSize + uint32_t(sizeof(uint32_t)));
+}
+
+template <typename Simd4f>
+template <bool useRestParticles>
+void cloth::SwSelfCollision<Simd4f>::collideParticles(Simd4f& pos0, Simd4f& pos1, const Simd4f& pos0rest,
+                                                      const Simd4f& pos1rest)
+{
+	Simd4f diff = pos1 - pos0;
+	Simd4f distSqr = dot3(diff, diff);
+
+#if PX_DEBUG
+	++mNumTests;
+#endif
+
+	if (allGreater(distSqr, mCollisionSquareDistance))
+		return;
+
+	if (useRestParticles)
+	{
+		// calculate distance in rest configuration, if less than collision
+		// distance then ignore collision between particles in deformed config
+		Simd4f restDiff = pos1rest - pos0rest;
+		Simd4f restDistSqr = dot3(restDiff, restDiff);
+
+		if (allGreater(mCollisionSquareDistance, restDistSqr))
+			return;
+	}
+
+	Simd4f w0 = splat<3>(pos0);
+	Simd4f w1 = splat<3>(pos1);
+
+	Simd4f ratio = mCollisionDistance * rsqrt(distSqr);
+	Simd4f scale = mStiffness * recip(gSimd4fEpsilon + w0 + w1);
+	Simd4f delta = (scale * (diff - diff * ratio)) & sMaskXYZ;
+
+	pos0 = pos0 + delta * w0;
+	pos1 = pos1 - delta * w1;
+
+#if PX_DEBUG || PX_PROFILE
+	++mNumCollisions;
+#endif
+}
+
+template <typename Simd4f>
+template <bool useRestParticles>
+void cloth::SwSelfCollision<Simd4f>::collideParticles(const uint32_t* keys, uint16_t firstColumnSize,
+                                                      const uint16_t* indices, uint32_t collisionDistance)
+{
+	Simd4f* __restrict particles = reinterpret_cast<Simd4f*>(mClothData.mCurParticles);
+	Simd4f* __restrict restParticles =
+	    useRestParticles ? reinterpret_cast<Simd4f*>(mClothData.mRestPositions) : particles;
+
+	const uint32_t bucketMask = uint16_t(-1);
+
+	const uint32_t keyOffsets[] = { 0, 0x00010000, 0x00ff0000, 0x01000000, 0x01010000 };
+
+	const uint32_t* __restrict kFirst[5];
+	const uint32_t* __restrict kLast[5];
+
+	{
+		// optimization: scan forward iterator starting points once instead of 9 times
+		const uint32_t* __restrict kIt = keys;
+
+		uint32_t key = *kIt;
+		uint32_t firstKey = key - std::min(collisionDistance, key & bucketMask);
+		uint32_t lastKey = std::min(key + collisionDistance, key | bucketMask);
+
+		kFirst[0] = kIt;
+		while (*kIt < lastKey)
+			++kIt;
+		kLast[0] = kIt;
+
+		for (uint32_t k = 1; k < 5; ++k)
+		{
+			for (uint32_t n = firstKey + keyOffsets[k]; *kIt < n;)
+				++kIt;
+			kFirst[k] = kIt;
+
+			for (uint32_t n = lastKey + keyOffsets[k]; *kIt < n;)
+				++kIt;
+			kLast[k] = kIt;
+
+			// jump forward once to second column
+			kIt = keys + firstColumnSize;
+			firstColumnSize = 0;
+		}
+	}
+
+	const uint16_t* __restrict iIt = indices;
+	const uint16_t* __restrict iEnd = indices + mClothData.mNumSelfCollisionIndices;
+
+	const uint16_t* __restrict jIt;
+	const uint16_t* __restrict jEnd;
+
+	for (; iIt != iEnd; ++iIt, ++kFirst[0])
+	{
+		NV_CLOTH_ASSERT(*iIt < mClothData.mNumParticles);
+
+		// load current particle once outside of inner loop
+		Simd4f particle = particles[*iIt];
+		Simd4f restParticle = restParticles[*iIt];
+
+		uint32_t key = *kFirst[0];
+
+		// range of keys we need to check against for this particle
+		uint32_t firstKey = key - std::min(collisionDistance, key & bucketMask);
+		uint32_t lastKey = std::min(key + collisionDistance, key | bucketMask);
+
+		// scan forward end point
+		while (*kLast[0] < lastKey)
+			++kLast[0];
+
+		// process potential colliders of same cell
+		jEnd = indices + (kLast[0] - keys);
+		for (jIt = iIt + 1; jIt != jEnd; ++jIt)
+			collideParticles<useRestParticles>(particle, particles[*jIt], restParticle, restParticles[*jIt]);
+
+		// process neighbor cells
+		for (uint32_t k = 1; k < 5; ++k)
+		{
+			// scan forward start point
+			for (uint32_t n = firstKey + keyOffsets[k]; *kFirst[k] < n;)
+				++kFirst[k];
+
+			// scan forward end point
+			for (uint32_t n = lastKey + keyOffsets[k]; *kLast[k] < n;)
+				++kLast[k];
+
+			// process potential colliders
+			jEnd = indices + (kLast[k] - keys);
+			for (jIt = indices + (kFirst[k] - keys); jIt != jEnd; ++jIt)
+				collideParticles<useRestParticles>(particle, particles[*jIt], restParticle, restParticles[*jIt]);
+		}
+
+		// store current particle
+		particles[*iIt] = particle;
+	}
+}
+
+// explicit template instantiation
+#if NV_SIMD_SIMD
+template class cloth::SwSelfCollision<Simd4f>;
+#endif
+#if NV_SIMD_SCALAR
+template class cloth::SwSelfCollision<Scalar4f>;
+#endif
author	mtamis <[email protected]>	2017-02-15 16:06:25 +0100
committer	mtamis <[email protected]>	2017-02-15 16:06:25 +0100
commit	85305930aeeb1d513e23522bd91f29ba81aa6d14 (patch)
tree	45f1bb20a45a300d1fef107e436cac95602a0e57 /NvCloth/src/SwSelfCollision.cpp
download	nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.tar.xz nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.zip