diff options
| author | Marijn Tamis <[email protected]> | 2018-05-03 18:22:48 +0200 |
|---|---|---|
| committer | Marijn Tamis <[email protected]> | 2018-05-03 18:22:48 +0200 |
| commit | ca32c59a58d37c1822e185a2d5f3d0d3e8943593 (patch) | |
| tree | b06b9eec03f34344ef8fc31aa147b2714d3962ee /NvCloth/src/sse2 | |
| parent | Forced rename of platform folders in cmake dir. Git didn't pick this up before. (diff) | |
| download | nvcloth-ca32c59a58d37c1822e185a2d5f3d0d3e8943593.tar.xz nvcloth-ca32c59a58d37c1822e185a2d5f3d0d3e8943593.zip | |
NvCloth 1.1.4 Release. (24070740)
Diffstat (limited to 'NvCloth/src/sse2')
| -rw-r--r-- | NvCloth/src/sse2/SwCollisionHelpers.h | 22 |
1 files changed, 18 insertions, 4 deletions
diff --git a/NvCloth/src/sse2/SwCollisionHelpers.h b/NvCloth/src/sse2/SwCollisionHelpers.h index b759868..4882818 100644 --- a/NvCloth/src/sse2/SwCollisionHelpers.h +++ b/NvCloth/src/sse2/SwCollisionHelpers.h @@ -76,22 +76,36 @@ Simd4i horizontalOr(const Simd4i& mask) Gather<Simd4i>::Gather(const Simd4i& index) { - mSelectQ = _mm_srai_epi32(index << 29, 31); - mSelectD = _mm_srai_epi32(index << 30, 31); - mSelectW = _mm_srai_epi32(index << 31, 31); - mOutOfRange = (index ^ sIntSignBit) > sSignedMask; + //index are grid positions + //grid has 8 spaces, grid indices that are within rage use 3 bits (0x7 mask) + mSelectQ = _mm_srai_epi32(index << 29, 31); //expand bit 0x4 to the whole register + mSelectD = _mm_srai_epi32(index << 30, 31); //expand bit 0x2 to the whole register + mSelectW = _mm_srai_epi32(index << 31, 31); //expand the least significant bit (0x1) to the whole register + mOutOfRange = (index ^ sIntSignBit) > sSignedMask; //true if index is outside the grid = (index > 0x7 || index < 0x0) } Simd4i Gather<Simd4i>::operator()(const Simd4i* ptr) const { + //ptr points to the cone/sphere grid + // more efficient with _mm_shuffle_epi8 (SSSE3) + + //lo contains the bottom 4 grid cells, heigh the top 4 Simd4i lo = ptr[0], hi = ptr[1]; + + //select correct cell in the grid using binary decision tree + // m = select ? a : b + //first bit (even/uneven cells) Simd4i m01 = select(mSelectW, splat<1>(lo), splat<0>(lo)); Simd4i m23 = select(mSelectW, splat<3>(lo), splat<2>(lo)); Simd4i m45 = select(mSelectW, splat<1>(hi), splat<0>(hi)); Simd4i m67 = select(mSelectW, splat<3>(hi), splat<2>(hi)); + + //second bit Simd4i m0123 = select(mSelectD, m23, m01); Simd4i m4567 = select(mSelectD, m67, m45); + + // third bit force result zero if it is out of range return select(mSelectQ, m4567, m0123) & ~mOutOfRange; } |