NvCloth library v1.0.0

author: mtamis <[email protected]> 2017-02-15 16:06:25 +0100
committer: mtamis <[email protected]> 2017-02-15 16:06:25 +0100
commit: 85305930aeeb1d513e23522bd91f29ba81aa6d14 (patch)
tree: 45f1bb20a45a300d1fef107e436cac95602a0e57 /NvCloth/src/sse2
download: nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.tar.xz
nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.zip
2 files changed, 230 insertions, 0 deletions
diff --git a/NvCloth/src/sse2/SwCollisionHelpers.h b/NvCloth/src/sse2/SwCollisionHelpers.h
new file mode 100644
index 0000000..c80ba1d
--- /dev/null
+++ b/NvCloth/src/sse2/SwCollisionHelpers.h
@@ -0,0 +1,96 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#pragma once
+
+#if PX_GCC_FAMILY ||  NV_ORBIS
+#include <xmmintrin.h> // _BitScanForward
+#else
+#if __APPLE__
+#include <x86intrin.h>
+#else
+#pragma warning(push)
+#pragma warning(disable : 4668) //'symbol'  is not defined as a preprocessor macro, replacing with '0' for 'directives'
+#pragma warning(disable : 4987) // nonstandard extension used: 'throw (...)'
+#pragma warning(pop)
+#include <intrin.h>             // _BitScanForward
+#endif
+#endif
+
+namespace nv
+{
+namespace cloth
+{
+
+uint32_t findBitSet(uint32_t mask)
+{
+#if defined(_MSC_VER)
+	unsigned long result;
+	_BitScanForward(&result, unsigned long(mask));
+	return result;
+#else
+	return __builtin_ffs(mask) - 1;
+#endif
+}
+
+Simd4i intFloor(const Simd4f& v)
+{
+	Simd4i i = _mm_cvttps_epi32(v);
+	return _mm_sub_epi32(i, _mm_srli_epi32(simd4i(v), 31));
+}
+
+Simd4i horizontalOr(const Simd4i& mask)
+{
+	Simd4i tmp = mask | _mm_shuffle_epi32(mask, 0xb1); // w z y x -> z w x y
+	return tmp | _mm_shuffle_epi32(tmp, 0x4e);         // w z y x -> y x w z
+}
+
+Gather<Simd4i>::Gather(const Simd4i& index)
+{
+	mSelectQ = _mm_srai_epi32(index << 29, 31);
+	mSelectD = _mm_srai_epi32(index << 30, 31);
+	mSelectW = _mm_srai_epi32(index << 31, 31);
+	mOutOfRange = (index ^ sIntSignBit) > sSignedMask;
+}
+
+Simd4i Gather<Simd4i>::operator()(const Simd4i* ptr) const
+{
+	// more efficient with _mm_shuffle_epi8 (SSSE3)
+	Simd4i lo = ptr[0], hi = ptr[1];
+	Simd4i m01 = select(mSelectW, splat<1>(lo), splat<0>(lo));
+	Simd4i m23 = select(mSelectW, splat<3>(lo), splat<2>(lo));
+	Simd4i m45 = select(mSelectW, splat<1>(hi), splat<0>(hi));
+	Simd4i m67 = select(mSelectW, splat<3>(hi), splat<2>(hi));
+	Simd4i m0123 = select(mSelectD, m23, m01);
+	Simd4i m4567 = select(mSelectD, m67, m45);
+	return select(mSelectQ, m4567, m0123) & ~mOutOfRange;
+}
+
+} // namespace cloth
+} // namespace nv
diff --git a/NvCloth/src/sse2/SwSolveConstraints.h b/NvCloth/src/sse2/SwSolveConstraints.h
new file mode 100644
index 0000000..a6e88af
--- /dev/null
+++ b/NvCloth/src/sse2/SwSolveConstraints.h
@@ -0,0 +1,134 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "NvSimd/sse2/NvSse2Simd4f.h"
+
+__pragma(warning(push))
+#pragma warning(disable : 4127) // conditional expression is constant
+
+template <bool useMultiplier>
+void solveConstraints(float* __restrict posIt, const float* __restrict rIt, const float* __restrict stIt, const float* __restrict rEnd,
+						const uint16_t* __restrict iIt, const Simd4f& stiffnessEtc, const Simd4f& stiffnessExponent)
+{
+	PX_UNUSED(stIt);
+	PX_UNUSED(stiffnessEtc);
+	PX_UNUSED(stiffnessExponent);
+	__m128 sOne = _mm_set1_ps(1.0f);
+
+	__m128 stretchLimit, compressionLimit, multiplier;
+	if (useMultiplier)
+	{
+		stretchLimit = _mm_shuffle_ps(stiffnessEtc, stiffnessEtc, 0xff);
+		compressionLimit = _mm_shuffle_ps(stiffnessEtc, stiffnessEtc, 0xaa);
+		multiplier = _mm_shuffle_ps(stiffnessEtc, stiffnessEtc, 0x55);
+	}
+	__m128 stiffness = _mm_shuffle_ps(stiffnessEtc, stiffnessEtc, 0x00);
+	bool useStiffnessPerConstraint = nullptr != stIt;
+
+	for (; rIt != rEnd; rIt += 4, stIt += 4, iIt += 8)
+	{
+		float* p0i = posIt + iIt[0] * 4;
+		float* p0j = posIt + iIt[1] * 4;
+		float* p1i = posIt + iIt[2] * 4;
+		float* p1j = posIt + iIt[3] * 4;
+		float* p2i = posIt + iIt[4] * 4;
+		float* p2j = posIt + iIt[5] * 4;
+		float* p3i = posIt + iIt[6] * 4;
+		float* p3j = posIt + iIt[7] * 4;
+
+		__m128 v0i = _mm_load_ps(p0i);
+		__m128 v0j = _mm_load_ps(p0j);
+		__m128 v1i = _mm_load_ps(p1i);
+		__m128 v1j = _mm_load_ps(p1j);
+		__m128 v2i = _mm_load_ps(p2i);
+		__m128 v2j = _mm_load_ps(p2j);
+		__m128 v3i = _mm_load_ps(p3i);
+		__m128 v3j = _mm_load_ps(p3j);
+
+		__m128 h0ij = _mm_add_ps(v0j, _mm_mul_ps(v0i, sMinusOneXYZOneW));
+		__m128 h1ij = _mm_add_ps(v1j, _mm_mul_ps(v1i, sMinusOneXYZOneW));
+		__m128 h2ij = _mm_add_ps(v2j, _mm_mul_ps(v2i, sMinusOneXYZOneW));
+		__m128 h3ij = _mm_add_ps(v3j, _mm_mul_ps(v3i, sMinusOneXYZOneW));
+
+		__m128 a = _mm_unpacklo_ps(h0ij, h2ij);
+		__m128 b = _mm_unpackhi_ps(h0ij, h2ij);
+		__m128 c = _mm_unpacklo_ps(h1ij, h3ij);
+		__m128 d = _mm_unpackhi_ps(h1ij, h3ij);
+
+		__m128 hxij = _mm_unpacklo_ps(a, c);
+		__m128 hyij = _mm_unpackhi_ps(a, c);
+		__m128 hzij = _mm_unpacklo_ps(b, d);
+		__m128 vwij = _mm_unpackhi_ps(b, d);
+
+		__m128 rij = _mm_load_ps(rIt);
+		__m128 e2ij = _mm_add_ps(gSimd4fEpsilon, _mm_add_ps(_mm_mul_ps(hxij, hxij),
+		                                                    _mm_add_ps(_mm_mul_ps(hyij, hyij), _mm_mul_ps(hzij, hzij))));
+
+		//Load/calculate the constraint stiffness
+		__m128 stij = useStiffnessPerConstraint ? _mm_set_ps1(1.0f) - exp2(stiffnessExponent * _mm_load_ps(stIt)) : stiffness;
+
+
+		__m128 mask = _mm_cmpnle_ps(rij, gSimd4fEpsilon);
+		__m128 erij = _mm_and_ps(_mm_sub_ps(sOne, _mm_mul_ps(rij, _mm_rsqrt_ps(e2ij))), mask);
+
+		if (useMultiplier)
+		{
+			erij = _mm_sub_ps(erij, _mm_mul_ps(multiplier, _mm_max_ps(compressionLimit, _mm_min_ps(erij, stretchLimit))));
+		}
+		__m128 exij = _mm_mul_ps(erij, _mm_mul_ps(stij, _mm_rcp_ps(_mm_add_ps(gSimd4fEpsilon, vwij))));
+
+		__m128 exlo = _mm_and_ps(sMaskXY, exij);
+		__m128 exhi = _mm_andnot_ps(sMaskXY, exij);
+
+		__m128 f0ij = _mm_mul_ps(h0ij, _mm_shuffle_ps(exlo, exlo, 0xc0));
+		__m128 f1ij = _mm_mul_ps(h1ij, _mm_shuffle_ps(exlo, exlo, 0xd5));
+		__m128 f2ij = _mm_mul_ps(h2ij, _mm_shuffle_ps(exhi, exhi, 0x2a));
+		__m128 f3ij = _mm_mul_ps(h3ij, _mm_shuffle_ps(exhi, exhi, 0x3f));
+
+		__m128 u0i = _mm_add_ps(v0i, _mm_mul_ps(f0ij, _mm_shuffle_ps(v0i, v0i, 0xff)));
+		__m128 u0j = _mm_sub_ps(v0j, _mm_mul_ps(f0ij, _mm_shuffle_ps(v0j, v0j, 0xff)));
+		__m128 u1i = _mm_add_ps(v1i, _mm_mul_ps(f1ij, _mm_shuffle_ps(v1i, v1i, 0xff)));
+		__m128 u1j = _mm_sub_ps(v1j, _mm_mul_ps(f1ij, _mm_shuffle_ps(v1j, v1j, 0xff)));
+		__m128 u2i = _mm_add_ps(v2i, _mm_mul_ps(f2ij, _mm_shuffle_ps(v2i, v2i, 0xff)));
+		__m128 u2j = _mm_sub_ps(v2j, _mm_mul_ps(f2ij, _mm_shuffle_ps(v2j, v2j, 0xff)));
+		__m128 u3i = _mm_add_ps(v3i, _mm_mul_ps(f3ij, _mm_shuffle_ps(v3i, v3i, 0xff)));
+		__m128 u3j = _mm_sub_ps(v3j, _mm_mul_ps(f3ij, _mm_shuffle_ps(v3j, v3j, 0xff)));
+
+		_mm_store_ps(p0i, u0i);
+		_mm_store_ps(p0j, u0j);
+		_mm_store_ps(p1i, u1i);
+		_mm_store_ps(p1j, u1j);
+		_mm_store_ps(p2i, u2i);
+		_mm_store_ps(p2j, u2j);
+		_mm_store_ps(p3i, u3i);
+		_mm_store_ps(p3j, u3j);
+	}
+}
+
+__pragma(warning(pop))
author	mtamis <[email protected]>	2017-02-15 16:06:25 +0100
committer	mtamis <[email protected]>	2017-02-15 16:06:25 +0100
commit	85305930aeeb1d513e23522bd91f29ba81aa6d14 (patch)
tree	45f1bb20a45a300d1fef107e436cac95602a0e57 /NvCloth/src/sse2
download	nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.tar.xz nvcloth-85305930aeeb1d513e23522bd91f29ba81aa6d14.zip