diff options
| author | Sheikh Dawood Abdul Ajees <[email protected]> | 2017-05-12 17:45:18 -0500 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <[email protected]> | 2017-05-12 17:45:18 -0500 |
| commit | 7f12de60542edc8f1c6683e6b4cdce8570e51456 (patch) | |
| tree | 0b5d533bae189ea286257b5ab78b635fafb19aa0 /APEX_1.4/module/clothing/src/simd/sse2 | |
| parent | PhysX 3.4, APEX 1.4 patch release @22017166 (diff) | |
| download | physx-3.4-7f12de60542edc8f1c6683e6b4cdce8570e51456.tar.xz physx-3.4-7f12de60542edc8f1c6683e6b4cdce8570e51456.zip | |
PhysX 3.4, APEX 1.4 patch release @22121272
Diffstat (limited to 'APEX_1.4/module/clothing/src/simd/sse2')
3 files changed, 825 insertions, 0 deletions
diff --git a/APEX_1.4/module/clothing/src/simd/sse2/NvSse2Simd4f.h b/APEX_1.4/module/clothing/src/simd/sse2/NvSse2Simd4f.h new file mode 100644 index 00000000..20a2e247 --- /dev/null +++ b/APEX_1.4/module/clothing/src/simd/sse2/NvSse2Simd4f.h @@ -0,0 +1,471 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#pragma once + +NV_SIMD_NAMESPACE_BEGIN + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// factory implementation +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Simd4fZeroFactory::operator Simd4f() const +{ + return _mm_setzero_ps(); +} + +Simd4fOneFactory::operator Simd4f() const +{ + return _mm_set1_ps(1.0f); +} + +Simd4fScalarFactory::operator Simd4f() const +{ + return _mm_set1_ps(value); +} + +Simd4fTupleFactory::operator Simd4f() const +{ + return reinterpret_cast<const Simd4f&>(tuple); +} + +Simd4fLoadFactory::operator Simd4f() const +{ + return _mm_loadu_ps(ptr); +} + +Simd4fLoad3Factory::operator Simd4f() const +{ + /* [f0 f1 f2 f3] = [ptr[0] ptr[1] 0 0] */ + __m128i xy = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(ptr)); + __m128 z = _mm_load_ss(ptr + 2); + return _mm_movelh_ps(_mm_castsi128_ps(xy), z); +} + +Simd4fLoad3SetWFactory::operator Simd4f() const +{ + __m128 z = _mm_load_ss(ptr + 2); + __m128 wTmp = _mm_load_ss(&w); + + __m128i xy = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(ptr)); + __m128 zw = _mm_movelh_ps(z, wTmp); + + return _mm_shuffle_ps(_mm_castsi128_ps(xy), zw, _MM_SHUFFLE(2, 0, 1, 0)); +} + +Simd4fAlignedLoadFactory::operator Simd4f() const +{ + return _mm_load_ps(ptr); +} + +Simd4fOffsetLoadFactory::operator Simd4f() const +{ + return _mm_load_ps(reinterpret_cast<const float*>(reinterpret_cast<const char*>(ptr) + offset)); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// expression template +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +template <> +inline ComplementExpr<Simd4f>::operator Simd4f() const +{ + return _mm_andnot_ps(v, _mm_castsi128_ps(_mm_set1_epi32(-1))); +} + +template <> +inline Simd4f operator&(const ComplementExpr<Simd4f>& complement, const Simd4f& v) +{ + return _mm_andnot_ps(complement.v, v); +} + +template <> +inline Simd4f operator&(const Simd4f& v, const ComplementExpr<Simd4f>& complement) +{ + return _mm_andnot_ps(complement.v, v); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// operator implementations +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Simd4f operator==(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_cmpeq_ps(v0, v1); +} + +Simd4f operator<(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_cmplt_ps(v0, v1); +} + +Simd4f operator<=(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_cmple_ps(v0, v1); +} + +Simd4f operator>(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_cmpgt_ps(v0, v1); +} + +Simd4f operator>=(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_cmpge_ps(v0, v1); +} + +ComplementExpr<Simd4f> operator~(const Simd4f& v) +{ + return ComplementExpr<Simd4f>(v); +} + +Simd4f operator&(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_and_ps(v0, v1); +} + +Simd4f operator|(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_or_ps(v0, v1); +} + +Simd4f operator^(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_xor_ps(v0, v1); +} + +Simd4f operator<<(const Simd4f& v, int shift) +{ + return _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(v), shift)); +} + +Simd4f operator>>(const Simd4f& v, int shift) +{ + return _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(v), shift)); +} + +Simd4f operator+(const Simd4f& v) +{ + return v; +} + +Simd4f operator+(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_add_ps(v0, v1); +} + +Simd4f operator-(const Simd4f& v) +{ + return _mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), v); +} + +Simd4f operator-(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_sub_ps(v0, v1); +} + +Simd4f operator*(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_mul_ps(v0, v1); +} + +Simd4f operator/(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_div_ps(v0, v1); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// function implementations +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Simd4f simd4f(const Simd4i& v) +{ + return _mm_castsi128_ps(v); +} + +Simd4f convert(const Simd4i& v) +{ + return _mm_cvtepi32_ps(v); +} + +float (&array(Simd4f& v))[4] +{ + return reinterpret_cast<float(&)[4]>(v); +} + +const float (&array(const Simd4f& v))[4] +{ + return reinterpret_cast<const float(&)[4]>(v); +} + +void store(float* ptr, Simd4f const& v) +{ + _mm_storeu_ps(ptr, v); +} + +void storeAligned(float* ptr, Simd4f const& v) +{ + _mm_store_ps(ptr, v); +} + +void store3(float* dst, const Simd4f& v) +{ + const float* __restrict src = array(v); + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; +} + +void storeAligned(float* ptr, unsigned int offset, Simd4f const& v) +{ + _mm_store_ps(reinterpret_cast<float*>(reinterpret_cast<char*>(ptr) + offset), v); +} + +template <size_t i> +Simd4f splat(Simd4f const& v) +{ + return _mm_shuffle_ps(v, v, _MM_SHUFFLE(i, i, i, i)); +} + +Simd4f select(Simd4f const& mask, Simd4f const& v0, Simd4f const& v1) +{ + return _mm_xor_ps(v1, _mm_and_ps(mask, _mm_xor_ps(v1, v0))); +} + +Simd4f abs(const Simd4f& v) +{ + return _mm_andnot_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), v); +} + +Simd4f floor(const Simd4f& v) +{ + // SSE 4.1: return _mm_floor_ps(v); + Simd4i i = _mm_cvttps_epi32(v); + Simd4i s = _mm_castps_si128(_mm_cmpgt_ps(_mm_cvtepi32_ps(i), v)); + return _mm_cvtepi32_ps(_mm_sub_epi32(i, _mm_srli_epi32(s, 31))); +} + +#if !defined max +Simd4f max(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_max_ps(v0, v1); +} +#endif + +#if !defined min +Simd4f min(const Simd4f& v0, const Simd4f& v1) +{ + return _mm_min_ps(v0, v1); +} +#endif + +Simd4f recip(const Simd4f& v) +{ + return _mm_rcp_ps(v); +} + +template <int n> +Simd4f recip(const Simd4f& v) +{ + Simd4f two = simd4f(2.0f); + Simd4f r = recip(v); + for(int i = 0; i < n; ++i) + r = r * (two - v * r); + return r; +} + +Simd4f sqrt(const Simd4f& v) +{ + return _mm_sqrt_ps(v); +} + +Simd4f rsqrt(const Simd4f& v) +{ + return _mm_rsqrt_ps(v); +} + +template <int n> +Simd4f rsqrt(const Simd4f& v) +{ + Simd4f halfV = v * simd4f(0.5f); + Simd4f threeHalf = simd4f(1.5f); + Simd4f r = rsqrt(v); + for(int i = 0; i < n; ++i) + r = r * (threeHalf - halfV * r * r); + return r; +} + +Simd4f exp2(const Simd4f& v) +{ + // http://www.netlib.org/cephes/ + + Simd4f limit = simd4f(127.4999f); + Simd4f x = min(max(-limit, v), limit); + + // separate into integer and fractional part + + Simd4f fx = x + simd4f(0.5f); + Simd4i ix = _mm_sub_epi32(_mm_cvttps_epi32(fx), _mm_srli_epi32(_mm_castps_si128(fx), 31)); + fx = x - Simd4f(_mm_cvtepi32_ps(ix)); + + // exp2(fx) ~ 1 + 2*P(fx) / (Q(fx) - P(fx)) + + Simd4f fx2 = fx * fx; + + Simd4f px = fx * (simd4f(1.51390680115615096133e+3f) + + fx2 * (simd4f(2.02020656693165307700e+1f) + fx2 * simd4f(2.30933477057345225087e-2f))); + Simd4f qx = simd4f(4.36821166879210612817e+3f) + fx2 * (simd4f(2.33184211722314911771e+2f) + fx2); + + Simd4f exp2fx = px * recip(qx - px); + exp2fx = gSimd4fOne + exp2fx + exp2fx; + + // exp2(ix) + + Simd4f exp2ix = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ix, _mm_set1_epi32(0x7f)), 23)); + + return exp2fx * exp2ix; +} + +Simd4f log2(const Simd4f& v) +{ + // todo: fast approximate implementation like exp2 + Simd4f scale = simd4f(1.44269504088896341f); // 1/ln(2) + const float* ptr = array(v); + return simd4f(::logf(ptr[0]), ::logf(ptr[1]), ::logf(ptr[2]), ::logf(ptr[3])) * scale; +} + +Simd4f dot3(const Simd4f& v0, const Simd4f& v1) +{ + Simd4f tmp = v0 * v1; + return splat<0>(tmp) + splat<1>(tmp) + splat<2>(tmp); +} + +Simd4f cross3(const Simd4f& v0, const Simd4f& v1) +{ + Simd4f t0 = _mm_shuffle_ps(v0, v0, 0xc9); // w z y x -> w x z y + Simd4f t1 = _mm_shuffle_ps(v1, v1, 0xc9); + Simd4f tmp = v0 * t1 - t0 * v1; + return _mm_shuffle_ps(tmp, tmp, 0xc9); +} + +void transpose(Simd4f& x, Simd4f& y, Simd4f& z, Simd4f& w) +{ + _MM_TRANSPOSE4_PS(x, y, z, w); +} + +void zip(Simd4f& v0, Simd4f& v1) +{ + Simd4f t0 = v0; + v0 = _mm_unpacklo_ps(v0, v1); + v1 = _mm_unpackhi_ps(t0, v1); +} + +void unzip(Simd4f& v0, Simd4f& v1) +{ + Simd4f t0 = v0; + v0 = _mm_shuffle_ps(v0, v1, _MM_SHUFFLE(2, 0, 2, 0)); + v1 = _mm_shuffle_ps(t0, v1, _MM_SHUFFLE(3, 1, 3, 1)); +} + +Simd4f swaphilo(const Simd4f& v) +{ + return _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 0, 3, 2)); +} + +int allEqual(const Simd4f& v0, const Simd4f& v1) +{ + return allTrue(v0 == v1); +} + +int allEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask) +{ + return allTrue(outMask = v0 == v1); +} + +int anyEqual(const Simd4f& v0, const Simd4f& v1) +{ + return anyTrue(v0 == v1); +} + +int anyEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask) +{ + return anyTrue(outMask = v0 == v1); +} + +int allGreater(const Simd4f& v0, const Simd4f& v1) +{ + return allTrue(v0 > v1); +} + +int allGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask) +{ + return allTrue(outMask = v0 > v1); +} + +int anyGreater(const Simd4f& v0, const Simd4f& v1) +{ + return anyTrue(v0 > v1); +} + +int anyGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask) +{ + return anyTrue(outMask = v0 > v1); +} + +int allGreaterEqual(const Simd4f& v0, const Simd4f& v1) +{ + return allTrue(v0 >= v1); +} + +int allGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask) +{ + return allTrue(outMask = v0 >= v1); +} + +int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1) +{ + return anyTrue(v0 >= v1); +} + +int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask) +{ + return anyTrue(outMask = v0 >= v1); +} + +int allTrue(const Simd4f& v) +{ + return _mm_movemask_ps(v) == 0xf; +} + +int anyTrue(const Simd4f& v) +{ + return _mm_movemask_ps(v); +} + +NV_SIMD_NAMESPACE_END diff --git a/APEX_1.4/module/clothing/src/simd/sse2/NvSse2Simd4i.h b/APEX_1.4/module/clothing/src/simd/sse2/NvSse2Simd4i.h new file mode 100644 index 00000000..86a1848d --- /dev/null +++ b/APEX_1.4/module/clothing/src/simd/sse2/NvSse2Simd4i.h @@ -0,0 +1,259 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#pragma once + +NV_SIMD_NAMESPACE_BEGIN + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// factory implementation +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Simd4iZeroFactory::operator Simd4i() const +{ + return _mm_setzero_si128(); +} + +Simd4iScalarFactory::operator Simd4i() const +{ + return _mm_set1_epi32(value); +} + +Simd4iTupleFactory::operator Simd4i() const +{ + return reinterpret_cast<const Simd4i&>(tuple); +} + +Simd4iLoadFactory::operator Simd4i() const +{ + return _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr)); +} + +Simd4iAlignedLoadFactory::operator Simd4i() const +{ + return _mm_load_si128(reinterpret_cast<const __m128i*>(ptr)); +} + +Simd4iOffsetLoadFactory::operator Simd4i() const +{ + return _mm_load_si128(reinterpret_cast<const __m128i*>(reinterpret_cast<const char*>(ptr) + offset)); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// expression template +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +template <> +inline ComplementExpr<Simd4i>::operator Simd4i() const +{ + return _mm_andnot_si128(v, _mm_set1_epi32(0xffffffff)); +} + +template <> +inline Simd4i operator&(const ComplementExpr<Simd4i>& complement, const Simd4i& v) +{ + return _mm_andnot_si128(complement.v, v); +} + +template <> +inline Simd4i operator&(const Simd4i& v, const ComplementExpr<Simd4i>& complement) +{ + return _mm_andnot_si128(complement.v, v); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// operator implementations +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Simd4i operator==(const Simd4i& v0, const Simd4i& v1) +{ + return _mm_cmpeq_epi32(v0, v1); +} + +Simd4i operator<(const Simd4i& v0, const Simd4i& v1) +{ + return _mm_cmplt_epi32(v0, v1); +} + +Simd4i operator>(const Simd4i& v0, const Simd4i& v1) +{ + return _mm_cmpgt_epi32(v0, v1); +} + +ComplementExpr<Simd4i> operator~(const Simd4i& v) +{ + return ComplementExpr<Simd4i>(v); +} + +Simd4i operator&(const Simd4i& v0, const Simd4i& v1) +{ + return _mm_and_si128(v0, v1); +} + +Simd4i operator|(const Simd4i& v0, const Simd4i& v1) +{ + return _mm_or_si128(v0, v1); +} + +Simd4i operator^(const Simd4i& v0, const Simd4i& v1) +{ + return _mm_xor_si128(v0, v1); +} + +Simd4i operator<<(const Simd4i& v, int shift) +{ + return _mm_slli_epi32(v, shift); +} + +Simd4i operator>>(const Simd4i& v, int shift) +{ + return _mm_srli_epi32(v, shift); +} + +Simd4i operator+(const Simd4i& v) +{ + return v; +} + +Simd4i operator+(const Simd4i& v0, const Simd4i& v1) +{ + return _mm_add_epi32(v0, v1); +} + +Simd4i operator-(const Simd4i& v) +{ + return _mm_sub_epi32(_mm_setzero_si128(), v); +} + +Simd4i operator-(const Simd4i& v0, const Simd4i& v1) +{ + return _mm_sub_epi32(v0, v1); +} + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +// function implementations +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Simd4i simd4i(const Simd4f& v) +{ + return _mm_castps_si128(v); +} + +Simd4i truncate(const Simd4f& v) +{ + return _mm_cvttps_epi32(v); +} + +int (&array(Simd4i& v))[4] +{ + return reinterpret_cast<int(&)[4]>(v); +} + +const int (&array(const Simd4i& v))[4] +{ + return reinterpret_cast<const int(&)[4]>(v); +} + +void store(int* ptr, const Simd4i& v) +{ + _mm_storeu_si128(reinterpret_cast<__m128i*>(ptr), v); +} + +void storeAligned(int* ptr, const Simd4i& v) +{ + _mm_store_si128(reinterpret_cast<__m128i*>(ptr), v); +} + +void storeAligned(int* ptr, unsigned int offset, const Simd4i& v) +{ + _mm_store_si128(reinterpret_cast<__m128i*>(reinterpret_cast<char*>(ptr) + offset), v); +} + +template <size_t i> +Simd4i splat(const Simd4i& v) +{ + return _mm_shuffle_epi32(v, _MM_SHUFFLE(i, i, i, i)); +} + +Simd4i select(const Simd4i& mask, const Simd4i& v0, const Simd4i& v1) +{ + return _mm_xor_si128(v1, _mm_and_si128(mask, _mm_xor_si128(v1, v0))); +} + +int allEqual(const Simd4i& v0, const Simd4i& v1) +{ + return allTrue(operator==(v0, v1)); +} + +int allEqual(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask) +{ + return allTrue(outMask = operator==(v0, v1)); +} + +int anyEqual(const Simd4i& v0, const Simd4i& v1) +{ + return anyTrue(operator==(v0, v1)); +} + +int anyEqual(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask) +{ + return anyTrue(outMask = operator==(v0, v1)); +} + +int allGreater(const Simd4i& v0, const Simd4i& v1) +{ + return allTrue(operator>(v0, v1)); +} + +int allGreater(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask) +{ + return allTrue(outMask = operator>(v0, v1)); +} + +int anyGreater(const Simd4i& v0, const Simd4i& v1) +{ + return anyTrue(operator>(v0, v1)); +} + +int anyGreater(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask) +{ + return anyTrue(outMask = operator>(v0, v1)); +} + +int allTrue(const Simd4i& v) +{ + return _mm_movemask_ps(_mm_castsi128_ps(v)) == 0xf; +} + +int anyTrue(const Simd4i& v) +{ + return _mm_movemask_ps(_mm_castsi128_ps(v)); +} + +NV_SIMD_NAMESPACE_END diff --git a/APEX_1.4/module/clothing/src/simd/sse2/NvSse2SimdTypes.h b/APEX_1.4/module/clothing/src/simd/sse2/NvSse2SimdTypes.h new file mode 100644 index 00000000..353f17ae --- /dev/null +++ b/APEX_1.4/module/clothing/src/simd/sse2/NvSse2SimdTypes.h @@ -0,0 +1,95 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#pragma once + +// SSE + SSE2 (don't include intrin.h!) +#include <emmintrin.h> + +#if defined _MSC_VER && !(defined NV_SIMD_USE_NAMESPACE && NV_SIMD_USE_NAMESPACE) + +// SIMD libarary lives in global namespace and Simd4f is +// typedef'd to __m128 so it can be passed by value on MSVC. + +typedef __m128 Simd4f; +typedef __m128i Simd4i; + +#else + +NV_SIMD_NAMESPACE_BEGIN + +/** \brief SIMD type containing 4 floats */ +struct Simd4f +{ + Simd4f() + { + } + Simd4f(__m128 x) : m128(x) + { + } + + operator __m128&() + { + return m128; + } + operator const __m128&() const + { + return m128; + } + + private: + __m128 m128; +}; + +/** \brief SIMD type containing 4 integers */ +struct Simd4i +{ + Simd4i() + { + } + Simd4i(__m128i x) : m128i(x) + { + } + + operator __m128i&() + { + return m128i; + } + operator const __m128i&() const + { + return m128i; + } + + private: + __m128i m128i; +}; + +NV_SIMD_NAMESPACE_END + +#endif |