aboutsummaryrefslogtreecommitdiff
path: root/NvCloth/src/sse2
diff options
context:
space:
mode:
authorMarijn Tamis <[email protected]>2018-05-03 18:22:48 +0200
committerMarijn Tamis <[email protected]>2018-05-03 18:22:48 +0200
commitca32c59a58d37c1822e185a2d5f3d0d3e8943593 (patch)
treeb06b9eec03f34344ef8fc31aa147b2714d3962ee /NvCloth/src/sse2
parentForced rename of platform folders in cmake dir. Git didn't pick this up before. (diff)
downloadnvcloth-ca32c59a58d37c1822e185a2d5f3d0d3e8943593.tar.xz
nvcloth-ca32c59a58d37c1822e185a2d5f3d0d3e8943593.zip
NvCloth 1.1.4 Release. (24070740)
Diffstat (limited to 'NvCloth/src/sse2')
-rw-r--r--NvCloth/src/sse2/SwCollisionHelpers.h22
1 files changed, 18 insertions, 4 deletions
diff --git a/NvCloth/src/sse2/SwCollisionHelpers.h b/NvCloth/src/sse2/SwCollisionHelpers.h
index b759868..4882818 100644
--- a/NvCloth/src/sse2/SwCollisionHelpers.h
+++ b/NvCloth/src/sse2/SwCollisionHelpers.h
@@ -76,22 +76,36 @@ Simd4i horizontalOr(const Simd4i& mask)
Gather<Simd4i>::Gather(const Simd4i& index)
{
- mSelectQ = _mm_srai_epi32(index << 29, 31);
- mSelectD = _mm_srai_epi32(index << 30, 31);
- mSelectW = _mm_srai_epi32(index << 31, 31);
- mOutOfRange = (index ^ sIntSignBit) > sSignedMask;
+ //index are grid positions
+ //grid has 8 spaces, grid indices that are within rage use 3 bits (0x7 mask)
+ mSelectQ = _mm_srai_epi32(index << 29, 31); //expand bit 0x4 to the whole register
+ mSelectD = _mm_srai_epi32(index << 30, 31); //expand bit 0x2 to the whole register
+ mSelectW = _mm_srai_epi32(index << 31, 31); //expand the least significant bit (0x1) to the whole register
+ mOutOfRange = (index ^ sIntSignBit) > sSignedMask; //true if index is outside the grid = (index > 0x7 || index < 0x0)
}
Simd4i Gather<Simd4i>::operator()(const Simd4i* ptr) const
{
+ //ptr points to the cone/sphere grid
+
// more efficient with _mm_shuffle_epi8 (SSSE3)
+
+ //lo contains the bottom 4 grid cells, heigh the top 4
Simd4i lo = ptr[0], hi = ptr[1];
+
+ //select correct cell in the grid using binary decision tree
+ // m = select ? a : b
+ //first bit (even/uneven cells)
Simd4i m01 = select(mSelectW, splat<1>(lo), splat<0>(lo));
Simd4i m23 = select(mSelectW, splat<3>(lo), splat<2>(lo));
Simd4i m45 = select(mSelectW, splat<1>(hi), splat<0>(hi));
Simd4i m67 = select(mSelectW, splat<3>(hi), splat<2>(hi));
+
+ //second bit
Simd4i m0123 = select(mSelectD, m23, m01);
Simd4i m4567 = select(mSelectD, m67, m45);
+
+ // third bit force result zero if it is out of range
return select(mSelectQ, m4567, m0123) & ~mOutOfRange;
}