aboutsummaryrefslogtreecommitdiff
path: root/PxShared/src
diff options
context:
space:
mode:
authorSheikh Dawood Abdul Ajees <[email protected]>2017-05-12 17:45:18 -0500
committerSheikh Dawood Abdul Ajees <[email protected]>2017-05-12 17:45:18 -0500
commit7f12de60542edc8f1c6683e6b4cdce8570e51456 (patch)
tree0b5d533bae189ea286257b5ab78b635fafb19aa0 /PxShared/src
parentPhysX 3.4, APEX 1.4 patch release @22017166 (diff)
downloadphysx-3.4-7f12de60542edc8f1c6683e6b4cdce8570e51456.tar.xz
physx-3.4-7f12de60542edc8f1c6683e6b4cdce8570e51456.zip
PhysX 3.4, APEX 1.4 patch release @22121272
Diffstat (limited to 'PxShared/src')
-rw-r--r--PxShared/src/NvSimd/include/NvSimd4f.h629
-rw-r--r--PxShared/src/NvSimd/include/NvSimd4i.h368
-rw-r--r--PxShared/src/NvSimd/include/NvSimdTypes.h239
-rw-r--r--PxShared/src/NvSimd/include/neon/NvNeonSimd4f.h585
-rw-r--r--PxShared/src/NvSimd/include/neon/NvNeonSimd4i.h303
-rw-r--r--PxShared/src/NvSimd/include/neon/NvNeonSimdTypes.h71
-rw-r--r--PxShared/src/NvSimd/include/scalar/NvScalarSimd4f.h464
-rw-r--r--PxShared/src/NvSimd/include/scalar/NvScalarSimd4i.h272
-rw-r--r--PxShared/src/NvSimd/include/scalar/NvScalarSimdTypes.h154
-rw-r--r--PxShared/src/NvSimd/include/sse2/NvSse2Simd4f.h471
-rw-r--r--PxShared/src/NvSimd/include/sse2/NvSse2Simd4i.h259
-rw-r--r--PxShared/src/NvSimd/include/sse2/NvSse2SimdTypes.h95
12 files changed, 0 insertions, 3910 deletions
diff --git a/PxShared/src/NvSimd/include/NvSimd4f.h b/PxShared/src/NvSimd/include/NvSimd4f.h
deleted file mode 100644
index fc4f70e3..00000000
--- a/PxShared/src/NvSimd/include/NvSimd4f.h
+++ /dev/null
@@ -1,629 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-#include "NvSimdTypes.h"
-#include <float.h>
-#include <math.h>
-
-NV_SIMD_NAMESPACE_BEGIN
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// factories
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-/*! \brief Creates Simd4f with all components set to zero.
-* \relates Simd4f */
-struct Simd4fZeroFactory
-{
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
-};
-
-/*! \brief Creates Simd4f with all components set to one.
-* \relates Simd4f */
-struct Simd4fOneFactory
-{
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
-};
-
-/*! \brief Replicates float into all four Simd4f components.
-* \relates Simd4f */
-struct Simd4fScalarFactory
-{
- explicit Simd4fScalarFactory(const float& s) : value(s)
- {
- }
- Simd4fScalarFactory& operator=(const Simd4fScalarFactory&); // not implemented
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
-
- const float value;
-};
-
-/*! \brief Creates Simd4f from four floats.
-* \relates Simd4f */
-struct Simd4fTupleFactory
-{
- Simd4fTupleFactory(float x, float y, float z, float w)
- // c++11: : tuple{ x, y, z, w }
- {
- tuple[0] = x;
- tuple[1] = y;
- tuple[2] = z;
- tuple[3] = w;
- }
- Simd4fTupleFactory(unsigned x, unsigned y, unsigned z, unsigned w)
- {
- unsigned* ptr = reinterpret_cast<unsigned*>(tuple);
- ptr[0] = x;
- ptr[1] = y;
- ptr[2] = z;
- ptr[3] = w;
- }
- Simd4fTupleFactory& operator=(const Simd4fTupleFactory&); // not implemented
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
- NV_SIMD_ALIGN(16, float) tuple[4];
-};
-
-/*! \brief Loads Simd4f from (unaligned) pointer.
-* \relates Simd4f */
-struct Simd4fLoadFactory
-{
- explicit Simd4fLoadFactory(const float* p) : ptr(p)
- {
- }
- Simd4fLoadFactory& operator=(const Simd4fLoadFactory&); // not implemented
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
- const float* const ptr;
-};
-
-/*! \brief Loads Simd4f from (aligned) pointer.
-* \relates Simd4f */
-struct Simd4fAlignedLoadFactory
-{
- explicit Simd4fAlignedLoadFactory(const float* p) : ptr(p)
- {
- }
- Simd4fAlignedLoadFactory& operator=(const Simd4fAlignedLoadFactory&); // not implemented
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
- const float* const ptr;
-};
-
-/*! \brief Loads Simd4f from (unaligned) pointer.
-* \relates Simd4f */
-struct Simd4fLoad3Factory
-{
- explicit Simd4fLoad3Factory(const float* p) : ptr(p)
- {
- }
- Simd4fLoad3Factory& operator=(const Simd4fLoad3Factory&); // not implemented
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
- const float* const ptr;
-};
-
-/*! \brief Loads Simd4f from (unaligned) pointer, which point to 3 floats in memory. 4th component will be initialized
-* with w
-* \relates Simd4f */
-struct Simd4fLoad3SetWFactory
-{
- explicit Simd4fLoad3SetWFactory(const float* p, const float wComponent) : ptr(p), w(wComponent)
- {
- }
- Simd4fLoad3SetWFactory& operator=(const Simd4fLoad3SetWFactory&); // not implemented
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
- const float* const ptr;
- const float w;
-};
-
-/*! \brief Loads Simd4f from (aligned) pointer with offset.
-* \relates Simd4f */
-struct Simd4fOffsetLoadFactory
-{
- Simd4fOffsetLoadFactory(const float* p, unsigned int off) : ptr(p), offset(off)
- {
- }
- Simd4fOffsetLoadFactory& operator=(const Simd4fOffsetLoadFactory&); // not implemented
- inline operator Simd4f() const;
- inline operator Scalar4f() const;
- const float* const ptr;
- const unsigned int offset;
-};
-
-// forward declaration
-struct Simd4iScalarFactory;
-struct Simd4iTupleFactory;
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// expression templates
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if NV_SIMD_FUSE_MULTIPLY_ADD
-/*! \brief Expression template to fuse multiply-adds.
-* \relates Simd4f */
-struct ProductExpr
-{
- inline ProductExpr(Simd4f const& v0_, Simd4f const& v1_) : v0(v0_), v1(v1_)
- {
- }
- inline operator Simd4f() const;
- const Simd4f v0, v1;
-
- private:
- ProductExpr& operator=(const ProductExpr&); // not implemented
-};
-#else // NV_SIMD_FUSE_MULTIPLY_ADD
-typedef Simd4f ProductExpr;
-#endif // NV_SIMD_FUSE_MULTIPLY_ADD
-
-// multiply-add expression templates
-inline Simd4f operator+(const ProductExpr&, const Simd4f&);
-inline Simd4f operator+(const Simd4f&, const ProductExpr&);
-inline Simd4f operator+(const ProductExpr&, const ProductExpr&);
-inline Simd4f operator-(const Simd4f&, const ProductExpr&);
-inline Simd4f operator-(const ProductExpr&, const ProductExpr&);
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// operators
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-// note: operator?= missing because they don't have corresponding intrinsics.
-
-/*! \brief Test for equality of two vectors.
-* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
-* \note QNaNs aren't handled on SPU: comparing two QNaNs will return true.
-* \relates Simd4f */
-inline Simd4f operator==(const Simd4f& v0, const Simd4f& v1);
-
-// no operator!= because VMX128 does not support it, use ~operator== and handle QNaNs
-
-/*! \brief Less-compare all elements of two vectors.
-* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline Simd4f operator<(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Less-or-equal-compare all elements of two vectors.
-* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline Simd4f operator<=(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Greater-compare all elements of two vectors.
-* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline Simd4f operator>(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Greater-or-equal-compare all elements of two vectors.
-* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline Simd4f operator>=(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Vector bit-wise NOT operator
-* \return A vector holding the bit-negate of \a v.
-* \relates Simd4f */
-inline ComplementExpr<Simd4f> operator~(const Simd4f& v);
-
-/*! \brief Vector bit-wise AND operator
-* \return A vector holding the bit-wise AND of \a v0 and \a v1.
-* \relates Simd4f */
-inline Simd4f operator&(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Vector bit-wise OR operator
-* \return A vector holding the bit-wise OR of \a v0 and \a v1.
-* \relates Simd4f */
-inline Simd4f operator|(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Vector bit-wise XOR operator
-* \return A vector holding the bit-wise XOR of \a v0 and \a v1.
-* \relates Simd4f */
-inline Simd4f operator^(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Vector logical left shift.
-* \return A vector with 4 elements of \a v0, each shifted left by \a shift bits.
-* \relates Simd4f */
-inline Simd4f operator<<(const Simd4f& v, int shift);
-
-/*! \brief Vector logical right shift.
-* \return A vector with 4 elements of \a v0, each shifted right by \a shift bits.
-* \relates Simd4f */
-inline Simd4f operator>>(const Simd4f& v, int shift);
-
-#if NV_SIMD_SHIFT_BY_VECTOR
-/*! \brief Vector logical left shift.
-* \return A vector with 4 elements of \a v0, each shifted left by \a shift bits.
-* \relates Simd4f */
-inline Simd4f operator<<(const Simd4f& v, const Simd4f& shift);
-
-/*! \brief Vector logical right shift.
-* \return A vector with 4 elements of \a v0, each shifted right by \a shift bits.
-* \relates Simd4f */
-inline Simd4f operator>>(const Simd4f& v, const Simd4f& shift);
-#endif
-
-/*! \brief Unary vector addition operator.
-* \return A vector holding the component-wise copy of \a v.
-* \relates Simd4f */
-inline Simd4f operator+(const Simd4f& v);
-
-/*! \brief Vector addition operator
-* \return A vector holding the component-wise sum of \a v0 and \a v1.
-* \relates Simd4f */
-inline Simd4f operator+(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Unary vector negation operator.
-* \return A vector holding the component-wise negation of \a v.
-* \relates Simd4f */
-inline Simd4f operator-(const Simd4f& v);
-
-/*! \brief Vector subtraction operator.
-* \return A vector holding the component-wise difference of \a v0 and \a v1.
-* \relates Simd4f */
-inline Simd4f operator-(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Vector multiplication.
-* \return Element-wise product of \a v0 and \a v1.
-* \note For VMX, returns expression template to fuse multiply-add.
-* \relates Simd4f */
-inline ProductExpr operator*(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Vector division.
-* \return Element-wise division of \a v0 and \a v1.
-* \relates Simd4f */
-inline Simd4f operator/(const Simd4f& v0, const Simd4f& v1);
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// functions
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-/*! \brief Load float value into all vector components.
-* \relates Simd4f */
-inline Simd4fScalarFactory simd4f(const float& s)
-{
- return Simd4fScalarFactory(s);
-}
-
-/*! \brief Load 4 float values into vector.
-* \relates Simd4f */
-inline Simd4fTupleFactory simd4f(float x, float y, float z, float w)
-{
- return Simd4fTupleFactory(x, y, z, w);
-}
-
-/*! \brief Reinterpret Simd4i as Simd4f.
-* \return A copy of \a v, but reinterpreted as Simd4f.
-* \relates Simd4f */
-inline Simd4f simd4f(const Simd4i& v);
-
-/*! \brief Reinterpret Simd4iScalarFactory as Simd4fScalarFactory.
-* \relates Simd4f */
-inline Simd4fScalarFactory simd4f(const Simd4iScalarFactory& v)
-{
- return reinterpret_cast<const Simd4fScalarFactory&>(v);
-}
-
-/*! \brief Reinterpret Simd4iTupleFactory as Simd4fTupleFactory.
-* \relates Simd4f */
-inline Simd4fTupleFactory simd4f(const Simd4iTupleFactory& v)
-{
- return reinterpret_cast<const Simd4fTupleFactory&>(v);
-}
-
-/*! \brief Convert Simd4i to Simd4f.
-* \relates Simd4f */
-inline Simd4f convert(const Simd4i& v);
-
-/*! \brief Return reference to contiguous array of vector elements
-* \relates Simd4f */
-inline float (&array(Simd4f& v))[4];
-
-/*! \brief Return constant reference to contiguous array of vector elements
-* \relates Simd4f */
-inline const float (&array(const Simd4f& v))[4];
-
-/*! \brief Create vector from float array.
-* \relates Simd4f */
-inline Simd4fLoadFactory load(const float* ptr)
-{
- return Simd4fLoadFactory(ptr);
-}
-
-/*! \brief Create vector from aligned float array.
-* \note \a ptr needs to be 16 byte aligned.
-* \relates Simd4f */
-inline Simd4fAlignedLoadFactory loadAligned(const float* ptr)
-{
- return Simd4fAlignedLoadFactory(ptr);
-}
-
-/*! \brief Create vector from float[3] \a ptr array. 4th component of simd4f will be equal to 0.0
-* \relates Simd4f */
-inline Simd4fLoad3Factory load3(const float* ptr)
-{
- return Simd4fLoad3Factory(ptr);
-}
-
-/*! \brief Create vector from float[3] \a ptr array and extra \a wComponent
-* \relates Simd4f */
-inline Simd4fLoad3SetWFactory load3(const float* ptr, const float wComponent)
-{
- return Simd4fLoad3SetWFactory(ptr, wComponent);
-}
-
-/*! \brief Create vector from aligned float array.
-* \param offset pointer offset in bytes.
-* \note \a ptr+offset needs to be 16 byte aligned.
-* \relates Simd4f */
-inline Simd4fOffsetLoadFactory loadAligned(const float* ptr, unsigned int offset)
-{
- return Simd4fOffsetLoadFactory(ptr, offset);
-}
-
-/*! \brief Store vector \a v to float array \a ptr.
-* \relates Simd4f */
-inline void store(float* ptr, Simd4f const& v);
-
-/*! \brief Store vector \a v to float[3] array \a ptr.
-* \relates Simd4f */
-inline void store3(float* ptr, Simd4f const& v);
-
-/*! \brief Store vector \a v to aligned float array \a ptr.
-* \note \a ptr needs to be 16 byte aligned.
-* \relates Simd4f */
-inline void storeAligned(float* ptr, Simd4f const& v);
-
-/*! \brief Store vector \a v to aligned float array \a ptr.
-* \param offset pointer offset in bytes.
-* \note \a ptr+offset needs to be 16 byte aligned.
-* \relates Simd4f */
-inline void storeAligned(float* ptr, unsigned int offset, Simd4f const& v);
-
-/*! \brief replicate i-th component into all vector components.
-* \return Vector with all elements set to \a v[i].
-* \relates Simd4f */
-template <size_t i>
-inline Simd4f splat(Simd4f const& v);
-
-/*! \brief Select \a v0 or \a v1 based on \a mask.
-* \return mask ? v0 : v1
-* \relates Simd4f */
-inline Simd4f select(Simd4f const& mask, Simd4f const& v0, Simd4f const& v1);
-
-/*! \brief Per element absolute value.
-* \return Vector with absolute values of \a v.
-* \relates Simd4f */
-inline Simd4f abs(const Simd4f& v);
-
-/*! \brief Per element floor value.
-* \note Result undefined for values outside of the integer range.
-* \note Translates to 6 instructions on SSE and NEON.
-* \relates Simd4f */
-inline Simd4f floor(const Simd4f& v);
-
-#if !defined max
-/*! \brief Per-component minimum of two vectors
-* \note Result undefined for QNaN elements.
-* \relates Simd4f */
-inline Simd4f max(const Simd4f& v0, const Simd4f& v1);
-#endif
-
-#if !defined min
-/*! \brief Per-component minimum of two vectors
-* \note Result undefined for QNaN elements.
-* \relates Simd4f */
-inline Simd4f min(const Simd4f& v0, const Simd4f& v1);
-#endif
-
-/*! \brief Return reciprocal estimate of a vector.
-* \return Vector of per-element reciprocal estimate.
-* \relates Simd4f */
-inline Simd4f recip(const Simd4f& v);
-
-/*! \brief Return reciprocal of a vector.
-* \return Vector of per-element reciprocal.
-* \note Performs \a n Newton-Raphson iterations on initial estimate.
-* \relates Simd4f */
-template <int n>
-inline Simd4f recip(const Simd4f& v);
-
-/*! \brief Return square root of a vector.
-* \return Vector of per-element square root.
-* \note Result undefined for negative elements.
-* \relates Simd4f */
-inline Simd4f sqrt(const Simd4f& v);
-
-/*! \brief Return inverse square root estimate of a vector.
-* \return Vector of per-element inverse square root estimate.
-* \note Result undefined for negative, zero, and infinity elements.
-* \relates Simd4f */
-inline Simd4f rsqrt(const Simd4f& v);
-
-/*! \brief Return inverse square root of a vector.
-* \return Vector of per-element inverse square root.
-* \note Performs \a n Newton-Raphson iterations on initial estimate.
-* \note The result is undefined for negative and infinity elements.
-* \relates Simd4f */
-template <int n>
-inline Simd4f rsqrt(const Simd4f& v);
-
-/*! \brief Return 2 raised to the power of v.
-* \note Result only defined for finite elements.
-* \relates Simd4f */
-inline Simd4f exp2(const Simd4f& v);
-
-/*! \brief Return logarithm of v to base 2.
-* \note Result undefined for QNaN elements.
-* \relates Simd4f */
-inline Simd4f log2(const Simd4f& v);
-
-/*! \brief Return dot product of two 3-vectors.
-* \note The result is replicated across all 4 components.
-* \relates Simd4f */
-inline Simd4f dot3(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Return cross product of two 3-vectors.
-* \note The 4th component is undefined.
-* \note Result only defined for finite x, y, and z values.
-* \relates Simd4f */
-inline Simd4f cross3(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief Transposes 4x4 matrix represented by \a x, \a y, \a z, and \a w.
-* \relates Simd4f */
-inline void transpose(Simd4f& x, Simd4f& y, Simd4f& z, Simd4f& w);
-
-/*! \brief Interleave elements.
-* \a v0 becomes {x0, x1, y0, y1}, v1 becomes {z0, z1, w0, w1}.
-* \relates Simd4f */
-inline void zip(Simd4f& v0, Simd4f& v1);
-
-/*! \brief De-interleave elements.
-* \a v0 becomes {x0, z0, x1, z1}, v1 becomes {y0, w0, y1, w1}.
-* \relates Simd4f */
-inline void unzip(Simd4f& v0, Simd4f& v1);
-
-/*! \brief Swaps quad words.
-* Returns {z0, w0, x0, y0}
-* \relates Simd4f */
-inline Simd4f swaphilo(const Simd4f& v);
-
-/*! \brief returns non-zero if all elements or \a v0 and \a v1 are equal
-* \note QNaNs aren't handled on SPU: comparing two QNaNs will return true.
-* \relates Simd4f */
-inline int allEqual(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief returns non-zero if all elements or \a v0 and \a v1 are equal
-* \param outMask holds the result of \a v0 == \a v1.
-* \note QNaNs aren't handled on SPU: comparing two QNaNs will return true.
-* \relates Simd4f */
-inline int allEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are equal
-* \note QNaNs aren't handled on SPU: comparing two QNaNs will return true.
-* \relates Simd4f */
-inline int anyEqual(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are equal
-* \param outMask holds the result of \a v0 == \a v1.
-* \note QNaNs aren't handled on SPU: comparing two QNaNs will return true.
-* \relates Simd4f */
-inline int anyEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
-
-/*! \brief returns non-zero if all elements or \a v0 and \a v1 are greater
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline int allGreater(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief returns non-zero if all elements or \a v0 and \a v1 are greater
-* \param outMask holds the result of \a v0 == \a v1.
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline int allGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline int anyGreater(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater
-* \param outMask holds the result of \a v0 == \a v1.
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline int anyGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
-
-/*! \brief returns non-zero if all elements or \a v0 and \a v1 are greater or equal
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline int allGreaterEqual(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief returns non-zero if all elements or \a v0 and \a v1 are greater or equal
-* \param outMask holds the result of \a v0 == \a v1.
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline int allGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater or equal
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater or equal
-* \param outMask holds the result of \a v0 == \a v1.
-* \note QNaNs aren't handled on SPU: comparisons against QNaNs don't necessarily return false.
-* \relates Simd4f */
-inline int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
-
-/*! \brief returns non-zero if all elements are true
-* \note Undefined if parameter is not result of a comparison.
-* \relates Simd4f */
-inline int allTrue(const Simd4f& v);
-
-/*! \brief returns non-zero if any element is true
-* \note Undefined if parameter is not result of a comparison.
-* \relates Simd4f */
-inline int anyTrue(const Simd4f& v);
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// constants
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-NV_SIMD_GLOBAL_CONSTANT Simd4fZeroFactory gSimd4fZero = Simd4fZeroFactory();
-NV_SIMD_GLOBAL_CONSTANT Simd4fOneFactory gSimd4fOne = Simd4fOneFactory();
-NV_SIMD_GLOBAL_CONSTANT Simd4fScalarFactory gSimd4fMinusOne = simd4f(-1.0f);
-NV_SIMD_GLOBAL_CONSTANT Simd4fScalarFactory gSimd4fHalf = simd4f(0.5f);
-NV_SIMD_GLOBAL_CONSTANT Simd4fScalarFactory gSimd4fTwo = simd4f(2.0f);
-NV_SIMD_GLOBAL_CONSTANT Simd4fScalarFactory gSimd4fPi = simd4f(3.14159265358979323846f);
-NV_SIMD_GLOBAL_CONSTANT Simd4fScalarFactory gSimd4fEpsilon = simd4f(FLT_EPSILON);
-NV_SIMD_GLOBAL_CONSTANT Simd4fScalarFactory gSimd4fFloatMax = simd4f(FLT_MAX);
-NV_SIMD_GLOBAL_CONSTANT Simd4fTupleFactory gSimd4fMaskX = Simd4fTupleFactory(~0u, 0u, 0u, 0u);
-NV_SIMD_GLOBAL_CONSTANT Simd4fTupleFactory gSimd4fMaskXYZ = Simd4fTupleFactory(~0u, ~0u, ~0u, 0u);
-
-NV_SIMD_NAMESPACE_END
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// platform specific includes
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if NV_SIMD_SSE2
-#include "sse2/NvSse2Simd4f.h"
-#elif NV_SIMD_NEON
-#include "neon/NvNeonSimd4f.h"
-#endif
-
-#if NV_SIMD_SCALAR
-#include "scalar/NvScalarSimd4f.h"
-#endif
diff --git a/PxShared/src/NvSimd/include/NvSimd4i.h b/PxShared/src/NvSimd/include/NvSimd4i.h
deleted file mode 100644
index 27e2ba90..00000000
--- a/PxShared/src/NvSimd/include/NvSimd4i.h
+++ /dev/null
@@ -1,368 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-#include "NvSimdTypes.h"
-
-NV_SIMD_NAMESPACE_BEGIN
-
-/*! \brief Creates Simd4i with all components set to zero.
-* \relates Simd4i */
-struct Simd4iZeroFactory
-{
- inline operator Simd4i() const;
- inline operator Scalar4i() const;
-};
-
-/*! \brief Replicates int into all four Simd4i components.
-* \relates Simd4i */
-struct Simd4iScalarFactory
-{
- explicit Simd4iScalarFactory(const int& s) : value(s)
- {
- }
- Simd4iScalarFactory& operator=(const Simd4iScalarFactory&); // not implemented
- inline operator Simd4i() const;
- inline operator Scalar4i() const;
- const int value;
-};
-
-/*! \brief Creates Simd4i from four ints.
-* \relates Simd4i */
-struct Simd4iTupleFactory
-{
- Simd4iTupleFactory(int x, int y, int z, int w)
- // c++11: : tuple{ x, y, z, w }
- {
- tuple[0] = x;
- tuple[1] = y;
- tuple[2] = z;
- tuple[3] = w;
- }
- Simd4iTupleFactory& operator=(const Simd4iTupleFactory&); // not implemented
- inline operator Simd4i() const;
- inline operator Scalar4i() const;
- NV_SIMD_ALIGN(16, int) tuple[4];
-};
-
-/*! \brief Loads Simd4i from (unaligned) pointer.
-* \relates Simd4i */
-struct Simd4iLoadFactory
-{
- explicit Simd4iLoadFactory(const int* p) : ptr(p)
- {
- }
- Simd4iLoadFactory& operator=(const Simd4iLoadFactory&); // not implemented
- inline operator Simd4i() const;
- inline operator Scalar4i() const;
- const int* const ptr;
-};
-
-/*! \brief Loads Simd4i from (aligned) pointer.
-* \relates Simd4i */
-struct Simd4iAlignedLoadFactory
-{
- explicit Simd4iAlignedLoadFactory(const int* p) : ptr(p)
- {
- }
- Simd4iAlignedLoadFactory& operator=(const Simd4iAlignedLoadFactory&); // not implemented
- inline operator Simd4i() const;
- inline operator Scalar4i() const;
- const int* const ptr;
-};
-
-/*! \brief Loads Simd4i from (aligned) pointer with offset.
-* \relates Simd4i */
-struct Simd4iOffsetLoadFactory
-{
- Simd4iOffsetLoadFactory(const int* p, unsigned int off) : ptr(p), offset(off)
- {
- }
- Simd4iOffsetLoadFactory& operator=(const Simd4iOffsetLoadFactory&); // not implemented
- inline operator Simd4i() const;
- inline operator Scalar4i() const;
- const int* const ptr;
- const unsigned int offset;
-};
-
-// map Simd4f/Scalar4f to Simd4i/Scalar4i
-template <typename>
-struct Simd4fToSimd4i;
-template <>
-struct Simd4fToSimd4i<Simd4f>
-{
- typedef Simd4i Type;
-};
-template <>
-struct Simd4fToSimd4i<Scalar4f>
-{
- typedef Scalar4i Type;
-};
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// operators
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-/*! \brief Vector bit-wise NOT operator
-* \return A vector holding the bit-negate of \a v.
-* \relates Simd4i */
-inline ComplementExpr<Simd4i> operator~(const Simd4i& v);
-
-/*! \brief Vector bit-wise AND operator
-* \return A vector holding the bit-wise AND of \a v0 and \a v1.
-* \relates Simd4i */
-inline Simd4i operator&(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief Vector bit-wise OR operator
-* \return A vector holding the bit-wise OR of \a v0 and \a v1.
-* \relates Simd4i */
-inline Simd4i operator|(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief Vector bit-wise XOR operator
-* \return A vector holding the bit-wise XOR of \a v0 and \a v1.
-* \relates Simd4i */
-inline Simd4i operator^(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief Vector logical left shift.
-* \return A vector with 4 elements of \a v0, each shifted left by \a shift bits.
-* \relates Simd4i */
-inline Simd4i operator<<(const Simd4i& v, int shift);
-
-/*! \brief Vector logical right shift.
-* \return A vector with 4 elements of \a v0, each shifted right by \a shift bits.
-* \relates Simd4i */
-inline Simd4i operator>>(const Simd4i& v, int shift);
-
-#if NV_SIMD_SHIFT_BY_VECTOR
-
-/*! \brief Vector logical left shift.
-* \return A vector with 4 elements of \a v0, each shifted left by \a shift bits.
-* \relates Simd4i */
-inline Simd4i operator<<(const Simd4i& v, const Simd4i& shift);
-
-/*! \brief Vector logical right shift.
-* \return A vector with 4 elements of \a v0, each shifted right by \a shift bits.
-* \relates Simd4i */
-inline Simd4i operator>>(const Simd4i& v, const Simd4i& shift);
-
-#endif // NV_SIMD_SHIFT_BY_VECTOR
-
-// note: operator?= missing because they don't have corresponding intrinsics.
-
-/*! \brief Test for equality of two vectors.
-* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
-* \relates Simd4i */
-inline Simd4i operator==(const Simd4i& v0, const Simd4i& v1);
-
-// no !=, <=, >= because VMX128/SSE don't support it, use ~operator== etc.
-
-/*! \brief Less-compare all elements of two *signed* vectors.
-* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
-* \relates Simd4i */
-inline Simd4i operator<(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief Greater-compare all elements of two *signed* vectors.
-* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
-* \relates Simd4i */
-inline Simd4i operator>(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief Unary vector addition operator.
-* \return A vector holding the component-wise copy of \a v.
-* \relates Simd4i */
-inline Simd4i operator+(const Simd4i& v);
-
-/*! \brief Vector addition operator
-* \return A vector holding the component-wise sum of \a v0 and \a v1.
-* \relates Simd4i */
-inline Simd4i operator+(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief Unary vector negation operator.
-* \return A vector holding the component-wise negation of \a v.
-* \relates Simd4i */
-inline Simd4i operator-(const Simd4i& v);
-
-/*! \brief Vector subtraction operator.
-* \return A vector holding the component-wise difference of \a v0 and \a v1.
-* \relates Simd4i */
-inline Simd4i operator-(const Simd4i& v0, const Simd4i& v1);
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// functions
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-/*! \brief Load int value into all vector components.
-* \relates Simd4i */
-inline Simd4iScalarFactory simd4i(const int& s)
-{
- return Simd4iScalarFactory(s);
-}
-
-/*! \brief Load 4 int values into vector.
-* \relates Simd4i */
-inline Simd4iTupleFactory simd4i(int x, int y, int z, int w)
-{
- return Simd4iTupleFactory(x, y, z, w);
-}
-
-/*! \brief Reinterpret Simd4f as Simd4i.
-* \return A copy of \a v, but reinterpreted as Simd4i.
-* \relates Simd4i */
-inline Simd4i simd4i(const Simd4f& v);
-
-/*! \brief Truncate Simd4f to Simd4i.
-* \relates Simd4i */
-inline Simd4i truncate(const Simd4f& v);
-
-/*! \brief return reference to contiguous array of vector elements
-* \relates Simd4i */
-inline int (&array(Simd4i& v))[4];
-
-/*! \brief return constant reference to contiguous array of vector elements
-* \relates Simd4i */
-inline const int (&array(const Simd4i& v))[4];
-
-/*! \brief Create vector from int array.
-* \relates Simd4i */
-inline Simd4iLoadFactory load(const int* ptr)
-{
- return Simd4iLoadFactory(ptr);
-}
-
-/*! \brief Create vector from aligned int array.
-* \note \a ptr needs to be 16 byte aligned.
-* \relates Simd4i */
-inline Simd4iAlignedLoadFactory loadAligned(const int* ptr)
-{
- return Simd4iAlignedLoadFactory(ptr);
-}
-
-/*! \brief Create vector from aligned float array.
-* \param offset pointer offset in bytes.
-* \note \a ptr+offset needs to be 16 byte aligned.
-* \relates Simd4i */
-inline Simd4iOffsetLoadFactory loadAligned(const int* ptr, unsigned int offset)
-{
- return Simd4iOffsetLoadFactory(ptr, offset);
-}
-
-/*! \brief Store vector \a v to int array \a ptr.
-* \relates Simd4i */
-inline void store(int* ptr, const Simd4i& v);
-
-/*! \brief Store vector \a v to aligned int array \a ptr.
-* \note \a ptr needs to be 16 byte aligned.
-* \relates Simd4i */
-inline void storeAligned(int* ptr, const Simd4i& v);
-
-/*! \brief Store vector \a v to aligned int array \a ptr.
-* \param offset pointer offset in bytes.
-* \note \a ptr+offset needs to be 16 byte aligned.
-* \relates Simd4i */
-inline void storeAligned(int* ptr, unsigned int offset, const Simd4i& v);
-
-/*! \brief replicate i-th component into all vector components.
-* \return Vector with all elements set to \a v[i].
-* \relates Simd4i */
-template <size_t i>
-inline Simd4i splat(const Simd4i& v);
-
-/*! \brief Select \a v0 or \a v1 based on \a mask.
-* \return mask ? v0 : v1
-* \relates Simd4i */
-inline Simd4i select(const Simd4i& mask, const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief returns non-zero if all elements or \a v0 and \a v1 are equal
-* \relates Simd4i */
-inline int allEqual(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief returns non-zero if all elements or \a v0 and \a v1 are equal
-* \param outMask holds the result of \a v0 == \a v1.
-* \relates Simd4i */
-inline int allEqual(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are equal
-* \relates Simd4i */
-inline int anyEqual(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are equal
-* \param outMask holds the result of \a v0 == \a v1.
-* \relates Simd4i */
-inline int anyEqual(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask);
-
-/*! \brief returns non-zero if all *signed* elements or \a v0 and \a v1 are greater
-* \relates Simd4i */
-inline int allGreater(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief returns non-zero if all *signed* elements or \a v0 and \a v1 are greater
-* \param outMask holds the result of \a v0 == \a v1.
-* \relates Simd4i */
-inline int allGreater(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater
-* \relates Simd4i */
-inline int anyGreater(const Simd4i& v0, const Simd4i& v1);
-
-/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater
-* \param outMask holds the result of \a v0 == \a v1.
-* \relates Simd4i */
-inline int anyGreater(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask);
-
-/*! \brief returns non-zero if all elements are true
-* \note undefined if parameter is not result of a comparison.
-* \relates Simd4i */
-inline int allTrue(const Simd4i& v);
-
-/*! \brief returns non-zero if any element is true
-* \note undefined if parameter is not result of a comparison.
-* \relates Simd4i */
-inline int anyTrue(const Simd4i& v);
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// constants
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-NV_SIMD_GLOBAL_CONSTANT Simd4iZeroFactory gSimd4iZero = Simd4iZeroFactory();
-NV_SIMD_GLOBAL_CONSTANT Simd4iScalarFactory gSimd4iOne = simd4i(1);
-
-NV_SIMD_NAMESPACE_END
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// platform specific includes
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if NV_SIMD_SSE2
-#include "sse2/NvSse2Simd4i.h"
-#elif NV_SIMD_NEON
-#include "neon/NvNeonSimd4i.h"
-#endif
-
-#if NV_SIMD_SCALAR
-#include "scalar/NvScalarSimd4i.h"
-#endif
diff --git a/PxShared/src/NvSimd/include/NvSimdTypes.h b/PxShared/src/NvSimd/include/NvSimdTypes.h
deleted file mode 100644
index d7b92647..00000000
--- a/PxShared/src/NvSimd/include/NvSimdTypes.h
+++ /dev/null
@@ -1,239 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-/*! @file
-\mainpage NVIDIA(R) SIMD Library
-This library provides an abstraction to SSE2 and NEON SIMD instructions and provides
-a scalar fallback for other architectures. The documentation of Simd4f and Simd4i contain
-everything to get started.
-
-The following design choices have been made:
-- Use typedef for SSE2 data on MSVC (implies global namespace, see NV_SIMD_USE_NAMESPACE for options)
-- Exposing SIMD types as float/integer values as well as bit patterns
-- Free functions and overloaded operators for better code readability
-- Expression templates for common use cases (and-not and multiply-add)
-- Support for constants with same or individual values (see Scalar/TubleFactory)
-- Documentation (!)
-- Altivec/VMX128 support has been removed
-
-The following areas could still use some work:
-- generic shuffling instructions
-- matrix and quaterion types
-
-Here is a simple example of how to use the SIMD libarary:
-
-\code
-void foo(const float* ptr)
-{
- assert(!(ptr & 0xf)); // make sure ptr is aligned
- using namespace nvidia::simd;
- Simd4f a = loadAligned(ptr);
- Simd4f b = simd4f(0.0f, 1.0f, 0.0f, 1.0f);
- Simd4f c = simd4f(3.0f);
- Simd4f d = a * b + gSimd4fOne; // maps to FMA on NEON
- Simd4f mask, e;
- // same result as e = max(c, d);
- if(anyGreater(c, d, mask))
- e = select(mask, c, d);
- Simd4f f = splat<2>(d) - rsqrt(e);
- printf("%f\n", array(f)[0]);
-}
-\endcode
-*/
-
-/*! \def NV_SIMD_SIMD
-* Define Simd4f and Simd4i, which map to four 32bit float or integer tuples.
-* */
-// note: ps4 compiler defines _M_X64 without value
-#if defined (_M_IX86) || defined (_M_X64) || defined (__i386__) || defined (__x86_64__) || PX_EMSCRIPTEN
-#define NV_SIMD_SSE2 1
-#else
-#define NV_SIMD_SSE2 0
-#endif
-#if defined (_M_ARM) || defined (__ARM_NEON__) || defined (__ARM_NEON)
-#define NV_SIMD_NEON 1
-#else
-#define NV_SIMD_NEON 0
-#endif
-#define NV_SIMD_SIMD (NV_SIMD_SSE2 || NV_SIMD_NEON)
-
-/*! \def NV_SIMD_SCALAR
-* Define Scalar4f and Scalar4i (default: 0 if SIMD is supported, 1 otherwise).
-* Scalar4f and Scalar4i can be typedef'd to Simd4f and Simd4i respectively to replace
-* the SIMD classes, or they can be used in combination as template parameters to
-* implement a scalar run-time fallback. */
-#if !defined NV_SIMD_SCALAR
-#define NV_SIMD_SCALAR !NV_SIMD_SIMD
-#endif
-
-// use template expression to fuse multiply-adds into a single instruction
-#define NV_SIMD_FUSE_MULTIPLY_ADD (NV_SIMD_NEON)
-// support shift by vector operarations
-#define NV_SIMD_SHIFT_BY_VECTOR (NV_SIMD_NEON)
-// support inline assembler
-#if defined _M_ARM || defined SN_TARGET_PSP2 || defined __arm64__ || defined __aarch64__
-#define NV_SIMD_INLINE_ASSEMBLER 0
-#else
-#define NV_SIMD_INLINE_ASSEMBLER 1
-#endif
-
-/*! \def NV_SIMD_USE_NAMESPACE
-* \brief Set to 1 to define the SIMD library types and functions inside the nvidia::simd namespace.
-* By default, the types and functions defined in this header live in the global namespace.
-* This is because MSVC (prior to version 12, Visual Studio 2013) does an inferior job at optimizing
-* SSE2 code when __m128 is wrapped in a struct (the cloth solver for example is more than 50% slower).
-* Therefore, Simd4f is typedefe'd to __m128 on MSVC, and for name lookup to work all related functions
-* live in the global namespace. This behavior can be overriden by defining NV_SIMD_USE_NAMESPACE to 1.
-* The types and functions of the SIMD library are then defined inside the nvidia::simd namespace, but
-* performance on MSVC version 11 and earlier is expected to be lower in this mode because __m128 and
-* __m128i are wrapped into structs. Arguments need to be passed by reference in this mode.
-* \see NV_SIMD_VECTORCALL, Simd4fArg */
-
-#if defined NV_SIMD_USE_NAMESPACE&& NV_SIMD_USE_NAMESPACE
-#define NV_SIMD_NAMESPACE_BEGIN \
- namespace nvidia \
- { \
- namespace simd \
- {
-#define NV_SIMD_NAMESPACE_END \
- } \
- }
-#else
-#define NV_SIMD_NAMESPACE_BEGIN
-#define NV_SIMD_NAMESPACE_END
-#endif
-
-// alignment struct to \c alignment byte
-#ifdef _MSC_VER
-#define NV_SIMD_ALIGN(alignment, decl) __declspec(align(alignment)) decl
-#else
-#define NV_SIMD_ALIGN(alignment, decl) decl __attribute__((aligned(alignment)))
-#endif
-
-// define a global constant
-#ifdef _MSC_VER
-#define NV_SIMD_GLOBAL_CONSTANT extern const __declspec(selectany)
-#else
-#define NV_SIMD_GLOBAL_CONSTANT extern const __attribute__((weak))
-#endif
-
-// suppress warning of unused identifiers
-#if defined(__GNUC__)
-#define NV_SIMD_UNUSED __attribute__((unused))
-#else
-#define NV_SIMD_UNUSED
-#endif
-
-// disable warning
-#if defined _MSC_VER
-#if _MSC_VER < 1700
-#pragma warning(disable : 4347) // behavior change: 'function template' is called instead of 'function'
-#endif
-#pragma warning(disable : 4350) // behavior change: 'member1' called instead of 'member2'
-#endif
-
-NV_SIMD_NAMESPACE_BEGIN
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// expression templates
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-/*! \brief Expression template to fuse and-not. */
-template <typename T>
-struct ComplementExpr
-{
- inline explicit ComplementExpr(T const& v_) : v(v_)
- {
- }
- ComplementExpr& operator=(const ComplementExpr&); // not implemented
- inline operator T() const;
- const T v;
-};
-
-template <typename T>
-inline T operator&(const ComplementExpr<T>&, const T&);
-template <typename T>
-inline T operator&(const T&, const ComplementExpr<T>&);
-
-NV_SIMD_NAMESPACE_END
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// platform specific includes
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-#if NV_SIMD_SSE2
-#include "sse2/NvSse2SimdTypes.h"
-#elif NV_SIMD_NEON
-#include "neon/NvNeonSimdTypes.h"
-#elif NV_SIMD_SIMD
-#error unknown SIMD architecture
-#else
-struct Simd4f;
-struct Simd4i;
-#endif
-
-#if NV_SIMD_SCALAR
-#include "scalar/NvScalarSimdTypes.h"
-#else
-struct Scalar4f;
-struct Scalar4i;
-#endif
-
-NV_SIMD_NAMESPACE_BEGIN
-
-/*! \typedef Simd4fArg
-* Maps to Simd4f value or reference, whichever is faster. */
-
-/*! \def NV_SIMD_VECTORCALL
-* MSVC passes aligned arguments by pointer, unless the vector calling convention
-* introduced in Visual Studio 2013 is being used. For the last bit of performance
-* of non-inlined functions, use the following pattern:
-* Simd4f NV_SIMD_VECTORCALL foo(Simd4fArg x);
-* This will pass the argument in register where possible (instead of by pointer).
-* For inlined functions, the compiler will remove the store/load (except for MSVC
-* when NV_SIMD_USE_NAMESPACE is set to 1).
-* Non-inlined functions are rarely perf-critical, so it might be simpler
-* to always pass by reference instead: Simd4f foo(const Simd4f&); */
-
-#if defined _MSC_VER
-#if _MSC_VER >= 1800 // Visual Studio 2013
-typedef Simd4f Simd4fArg;
-#define NV_SIMD_VECTORCALL __vectorcall
-#else
-typedef const Simd4f& Simd4fArg;
-#define NV_SIMD_VECTORCALL
-#endif
-#else
-typedef Simd4f Simd4fArg;
-#define NV_SIMD_VECTORCALL
-#endif
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/neon/NvNeonSimd4f.h b/PxShared/src/NvSimd/include/neon/NvNeonSimd4f.h
deleted file mode 100644
index 44f0eea1..00000000
--- a/PxShared/src/NvSimd/include/neon/NvNeonSimd4f.h
+++ /dev/null
@@ -1,585 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2014 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-NV_SIMD_NAMESPACE_BEGIN
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// factory implementation
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4fZeroFactory::operator Simd4f() const
-{
- return vdupq_n_u32(0);
-}
-
-Simd4fOneFactory::operator Simd4f() const
-{
- return vdupq_n_f32(1.0f);
-}
-
-Simd4fScalarFactory::operator Simd4f() const
-{
- return vdupq_n_f32(reinterpret_cast<const float32_t&>(value));
-}
-
-Simd4fTupleFactory::operator Simd4f() const
-{
- return reinterpret_cast<const Simd4f&>(tuple);
-}
-
-Simd4fLoadFactory::operator Simd4f() const
-{
- return vld1q_f32(static_cast<const float32_t*>(ptr));
-}
-
-Simd4fLoad3Factory::operator Simd4f() const
-{
-#if 0
- float32x2_t xy = vld1_f32(ptr);
- float32x2_t zz = vld1_dup_f32(ptr+2);
- return vcombine_f32(xy, zz);
-#else
- float fltArray[] = { ptr[0], ptr[1], ptr[2], 0.0 };
- return vld1q_f32(static_cast<const float32_t*>(fltArray));
-#endif
-}
-
-Simd4fLoad3SetWFactory::operator Simd4f() const
-{
-#if 0
- float32x2_t xy = vld1_f32(ptr);
- float32x2_t zz = vld1_dup_f32(ptr+2);
- return vcombine_f32(xy, zz);
-#else
- float fltArray[] = { ptr[0], ptr[1], ptr[2], w };
- return vld1q_f32(static_cast<const float32_t*>(fltArray));
-#endif
-}
-
-Simd4fAlignedLoadFactory::operator Simd4f() const
-{
- return vld1q_f32(static_cast<const float32_t*>(ptr));
-}
-
-Simd4fOffsetLoadFactory::operator Simd4f() const
-{
- return vld1q_f32(reinterpret_cast<const float32_t*>(reinterpret_cast<const char*>(ptr) + offset));
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// expression templates
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-template <>
-inline ComplementExpr<Simd4f>::operator Simd4f() const
-{
- return vbicq_u32(vdupq_n_u32(0xffffffff), v.u4);
-}
-
-template <>
-inline Simd4f operator&(const ComplementExpr<Simd4f>& complement, const Simd4f& v)
-{
- return vbicq_u32(v.u4, complement.v.u4);
-}
-
-template <>
-inline Simd4f operator&(const Simd4f& v, const ComplementExpr<Simd4f>& complement)
-{
- return vbicq_u32(v.u4, complement.v.u4);
-}
-
-ProductExpr::operator Simd4f() const
-{
- return vmulq_f32(v0.f4, v1.f4);
-}
-
-Simd4f operator+(const ProductExpr& p, const Simd4f& v)
-{
- return vmlaq_f32(v.f4, p.v0.f4, p.v1.f4);
-}
-
-Simd4f operator+(const Simd4f& v, const ProductExpr& p)
-{
- return vmlaq_f32(v.f4, p.v0.f4, p.v1.f4);
-}
-
-Simd4f operator+(const ProductExpr& p0, const ProductExpr& p1)
-{
- // cast calls operator Simd4f() which evaluates the other ProductExpr
- return vmlaq_f32(static_cast<Simd4f>(p0).f4, p1.v0.f4, p1.v1.f4);
-}
-
-Simd4f operator-(const Simd4f& v, const ProductExpr& p)
-{
- return vmlsq_f32(v.f4, p.v0.f4, p.v1.f4);
-}
-
-Simd4f operator-(const ProductExpr& p0, const ProductExpr& p1)
-{
- // cast calls operator Simd4f() which evaluates the other ProductExpr
- return vmlsq_f32(static_cast<Simd4f>(p0).f4, p1.v0.f4, p1.v1.f4);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// operator implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4f operator==(const Simd4f& v0, const Simd4f& v1)
-{
- return vceqq_f32(v0.f4, v1.f4);
-}
-
-Simd4f operator<(const Simd4f& v0, const Simd4f& v1)
-{
- return vcltq_f32(v0.f4, v1.f4);
-}
-
-Simd4f operator<=(const Simd4f& v0, const Simd4f& v1)
-{
- return vcleq_f32(v0.f4, v1.f4);
-}
-
-Simd4f operator>(const Simd4f& v0, const Simd4f& v1)
-{
- return vcgtq_f32(v0.f4, v1.f4);
-}
-
-Simd4f operator>=(const Simd4f& v0, const Simd4f& v1)
-{
- return vcgeq_f32(v0.f4, v1.f4);
-}
-
-ComplementExpr<Simd4f> operator~(const Simd4f& v)
-{
- return ComplementExpr<Simd4f>(v);
-}
-
-Simd4f operator&(const Simd4f& v0, const Simd4f& v1)
-{
- return vandq_u32(v0.u4, v1.u4);
-}
-
-Simd4f operator|(const Simd4f& v0, const Simd4f& v1)
-{
- return vorrq_u32(v0.u4, v1.u4);
-}
-
-Simd4f operator^(const Simd4f& v0, const Simd4f& v1)
-{
- return veorq_u32(v0.u4, v1.u4);
-}
-
-Simd4f operator<<(const Simd4f& v, int shift)
-{
- return vshlq_u32(v.u4, vdupq_n_s32(shift));
-}
-
-Simd4f operator>>(const Simd4f& v, int shift)
-{
- return vshlq_u32(v.u4, vdupq_n_s32(-shift));
-}
-
-Simd4f operator<<(const Simd4f& v, const Simd4f& shift)
-{
- return vshlq_u32(v.u4, shift.i4);
-}
-
-Simd4f operator>>(const Simd4f& v, const Simd4f& shift)
-{
- return vshlq_u32(v.u4, vnegq_s32(shift.i4));
-}
-
-Simd4f operator+(const Simd4f& v)
-{
- return v;
-}
-
-Simd4f operator+(const Simd4f& v0, const Simd4f& v1)
-{
- return vaddq_f32(v0.f4, v1.f4);
-}
-
-Simd4f operator-(const Simd4f& v)
-{
- return vnegq_f32(v.f4);
-}
-
-Simd4f operator-(const Simd4f& v0, const Simd4f& v1)
-{
- return vsubq_f32(v0.f4, v1.f4);
-}
-
-ProductExpr operator*(const Simd4f& v0, const Simd4f& v1)
-{
- return ProductExpr(v0, v1);
-}
-
-Simd4f operator/(const Simd4f& v0, const Simd4f& v1)
-{
- return v0 * vrecpeq_f32(v1.f4); // reciprocal estimate
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// function implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4f simd4f(const Simd4i& v)
-{
- return v.u4;
-}
-
-Simd4f convert(const Simd4i& v)
-{
- return vcvtq_f32_s32(v.i4);
-}
-
-float (&array(Simd4f& v))[4]
-{
- return reinterpret_cast<float(&)[4]>(v);
-}
-
-const float (&array(const Simd4f& v))[4]
-{
- return reinterpret_cast<const float(&)[4]>(v);
-}
-
-void store(float* ptr, Simd4f const& v)
-{
- return vst1q_f32(reinterpret_cast<float32_t*>(ptr), v.f4);
-}
-
-void storeAligned(float* ptr, Simd4f const& v)
-{
- return vst1q_f32(reinterpret_cast<float32_t*>(ptr), v.f4);
-}
-
-void store3(float* dst, const Simd4f& v)
-{
- const float* __restrict src = array(v);
- dst[0] = src[0];
- dst[1] = src[1];
- dst[2] = src[2];
-}
-
-void storeAligned(float* ptr, unsigned int offset, Simd4f const& v)
-{
- return storeAligned(reinterpret_cast<float*>(reinterpret_cast<char*>(ptr) + offset), v);
-}
-
-template <size_t i>
-Simd4f splat(Simd4f const& v)
-{
- return vdupq_n_f32(array(v)[i]);
-}
-
-Simd4f select(Simd4f const& mask, Simd4f const& v0, Simd4f const& v1)
-{
- return vbslq_f32(mask.u4, v0.f4, v1.f4);
-}
-
-Simd4f abs(const Simd4f& v)
-{
- return vabsq_f32(v.f4);
-}
-
-Simd4f floor(const Simd4f& v)
-{
- int32x4_t i = vcvtq_s32_f32(v.f4);
- int32x4_t s = vreinterpretq_s32_u32(vcgtq_f32(vcvtq_f32_s32(i), v.f4));
- return vcvtq_f32_s32(vsubq_s32(i, vshrq_n_s32(s, 31)));
-}
-
-#if !defined max
-Simd4f max(const Simd4f& v0, const Simd4f& v1)
-{
- return vmaxq_f32(v0.f4, v1.f4);
-}
-#endif
-
-#if !defined min
-Simd4f min(const Simd4f& v0, const Simd4f& v1)
-{
- return vminq_f32(v0.f4, v1.f4);
-}
-#endif
-
-Simd4f recip(const Simd4f& v)
-{
- return recip<0>(v);
-}
-
-template <int n>
-Simd4f recip(const Simd4f& v)
-{
- Simd4f r = vrecpeq_f32(v.f4);
- // n+1 newton iterations because initial approximation is crude
- for(int i = 0; i <= n; ++i)
- r = vrecpsq_f32(v.f4, r.f4) * r;
- return r;
-}
-
-Simd4f sqrt(const Simd4f& v)
-{
- return (v > gSimd4fZero) & (v * rsqrt(v));
-}
-
-Simd4f rsqrt(const Simd4f& v)
-{
- return rsqrt<0>(v);
-}
-
-template <int n>
-Simd4f rsqrt(const Simd4f& v)
-{
- Simd4f r = vrsqrteq_f32(v.f4);
- // n+1 newton iterations because initial approximation is crude
- for(int i = 0; i <= n; ++i)
- r = vrsqrtsq_f32(vmulq_f32(v.f4, r.f4), r.f4) * r;
- return r;
-}
-
-Simd4f exp2(const Simd4f& v)
-{
- // http://www.netlib.org/cephes/
-
- Simd4f limit = simd4f(127.4999f);
- Simd4f x = min(max(-limit, v), limit);
-
- // separate into integer and fractional part
-
- Simd4f fx = x + simd4f(0.5f);
- Simd4i ix = vsubq_s32(vcvtq_s32_f32(fx.f4), vreinterpretq_s32_u32(vshrq_n_u32(fx.u4, 31)));
- fx = x - vcvtq_f32_s32(ix.i4);
-
- // exp2(fx) ~ 1 + 2*P(fx) / (Q(fx) - P(fx))
-
- Simd4f fx2 = fx * fx;
-
- Simd4f px = fx * (simd4f(1.51390680115615096133e+3f) +
- fx2 * (simd4f(2.02020656693165307700e+1f) + fx2 * simd4f(2.30933477057345225087e-2f)));
- Simd4f qx = simd4f(4.36821166879210612817e+3f) + fx2 * (simd4f(2.33184211722314911771e+2f) + fx2);
-
- Simd4f exp2fx = px * recip(qx - px);
- exp2fx = gSimd4fOne + exp2fx + exp2fx;
-
- // exp2(ix)
-
- Simd4f exp2ix = vreinterpretq_f32_s32(vshlq_n_s32(vaddq_s32(ix.i4, vdupq_n_s32(0x7f)), 23));
-
- return exp2fx * exp2ix;
-}
-
-Simd4f log2(const Simd4f& v)
-{
- Simd4f scale = simd4f(1.44269504088896341f); // 1/ln(2)
- const float* ptr = array(v);
- return simd4f(::logf(ptr[0]), ::logf(ptr[1]), ::logf(ptr[2]), ::logf(ptr[3])) * scale;
-}
-
-Simd4f dot3(const Simd4f& v0, const Simd4f& v1)
-{
- Simd4f tmp = v0 * v1;
- return splat<0>(tmp) + splat<1>(tmp) + splat<2>(tmp);
-}
-
-Simd4f cross3(const Simd4f& v0, const Simd4f& v1)
-{
- float32x2_t x0_y0 = vget_low_f32(v0.f4);
- float32x2_t z0_w0 = vget_high_f32(v0.f4);
- float32x2_t x1_y1 = vget_low_f32(v1.f4);
- float32x2_t z1_w1 = vget_high_f32(v1.f4);
-
- float32x2_t y1_z1 = vext_f32(x1_y1, z1_w1, 1);
- float32x2_t y0_z0 = vext_f32(x0_y0, z0_w0, 1);
-
- float32x2_t z0x1_w0y1 = vmul_f32(z0_w0, x1_y1);
- float32x2_t x0y1_y0z1 = vmul_f32(x0_y0, y1_z1);
-
- float32x2_t y2_w2 = vmls_f32(z0x1_w0y1, x0_y0, z1_w1);
- float32x2_t z2_x2 = vmls_f32(x0y1_y0z1, y0_z0, x1_y1);
- float32x2_t x2_y2 = vext_f32(z2_x2, y2_w2, 1);
-
- return vcombine_f32(x2_y2, z2_x2);
-}
-
-void transpose(Simd4f& x, Simd4f& y, Simd4f& z, Simd4f& w)
-{
-#if NV_SIMD_INLINE_ASSEMBLER
- asm volatile("vzip.f32 %q0, %q2 \n\t"
- "vzip.f32 %q1, %q3 \n\t"
- "vzip.f32 %q0, %q1 \n\t"
- "vzip.f32 %q2, %q3 \n\t"
- : "+w"(x.f4), "+w"(y.f4), "+w"(z.f4), "+w"(w.f4));
-#else
- float32x4x2_t v0v1 = vzipq_f32(x.f4, z.f4);
- float32x4x2_t v2v3 = vzipq_f32(y.f4, w.f4);
- float32x4x2_t zip0 = vzipq_f32(v0v1.val[0], v2v3.val[0]);
- float32x4x2_t zip1 = vzipq_f32(v0v1.val[1], v2v3.val[1]);
-
- x = zip0.val[0];
- y = zip0.val[1];
- z = zip1.val[0];
- w = zip1.val[1];
-#endif
-}
-
-void zip(Simd4f& v0, Simd4f& v1)
-{
-#if NV_SIMD_INLINE_ASSEMBLER
- asm volatile("vzip.f32 %q0, %q1 \n\t" : "+w"(v0.f4), "+w"(v1.f4));
-#else
- float32x4x2_t uzp = vzipq_f32(v0.f4, v1.f4);
- v0 = uzp.val[0];
- v1 = uzp.val[1];
-#endif
-}
-
-void unzip(Simd4f& v0, Simd4f& v1)
-{
-#if NV_SIMD_INLINE_ASSEMBLER
- asm volatile("vuzp.f32 %q0, %q1 \n\t" : "+w"(v0.f4), "+w"(v1.f4));
-#else
- float32x4x2_t uzp = vuzpq_f32(v0.f4, v1.f4);
- v0 = uzp.val[0];
- v1 = uzp.val[1];
-#endif
-}
-
-Simd4f swaphilo(const Simd4f& v)
-{
- return vcombine_f32(vget_high_f32(v.f4), vget_low_f32(v.f4));
-}
-
-int allEqual(const Simd4f& v0, const Simd4f& v1)
-{
- return allTrue(v0 == v1);
-}
-
-int allEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return allTrue(outMask = v0 == v1);
-}
-
-int anyEqual(const Simd4f& v0, const Simd4f& v1)
-{
- return anyTrue(v0 == v1);
-}
-
-int anyEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return anyTrue(outMask = v0 == v1);
-}
-
-int allGreater(const Simd4f& v0, const Simd4f& v1)
-{
- return allTrue(v0 > v1);
-}
-
-int allGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return allTrue(outMask = v0 > v1);
-}
-
-int anyGreater(const Simd4f& v0, const Simd4f& v1)
-{
- return anyTrue(v0 > v1);
-}
-
-int anyGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return anyTrue(outMask = v0 > v1);
-}
-
-int allGreaterEqual(const Simd4f& v0, const Simd4f& v1)
-{
- return allTrue(v0 >= v1);
-}
-
-int allGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return allTrue(outMask = v0 >= v1);
-}
-
-int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1)
-{
- return anyTrue(v0 >= v1);
-}
-
-int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return anyTrue(outMask = v0 >= v1);
-}
-
-int allTrue(const Simd4f& v)
-{
-#if NV_SIMD_INLINE_ASSEMBLER
- int result;
- asm volatile("vmovq q0, %q1 \n\t"
- "vand.u32 d0, d0, d1 \n\t"
- "vpmin.u32 d0, d0, d0 \n\t"
- "vcmp.f32 s0, #0 \n\t"
- "fmrx %0, fpscr"
- : "=r"(result)
- : "w"(v.f4)
- : "q0");
- return result >> 28 & 0x1;
-#else
- uint16x4_t hi = vget_high_u16(vreinterpretq_u16_u32(v.u4));
- uint16x4_t lo = vmovn_u32(v.u4);
- uint16x8_t combined = vcombine_u16(lo, hi);
- uint32x2_t reduced = vreinterpret_u32_u8(vmovn_u16(combined));
- return vget_lane_u32(reduced, 0) == 0xffffffff;
-#endif
-}
-
-int anyTrue(const Simd4f& v)
-{
-#if NV_SIMD_INLINE_ASSEMBLER
- int result;
- asm volatile("vmovq q0, %q1 \n\t"
- "vorr.u32 d0, d0, d1 \n\t"
- "vpmax.u32 d0, d0, d0 \n\t"
- "vcmp.f32 s0, #0 \n\t"
- "fmrx %0, fpscr"
- : "=r"(result)
- : "w"(v.f4)
- : "q0");
- return result >> 28 & 0x1;
-#else
- uint16x4_t hi = vget_high_u16(vreinterpretq_u16_u32(v.u4));
- uint16x4_t lo = vmovn_u32(v.u4);
- uint16x8_t combined = vcombine_u16(lo, hi);
- uint32x2_t reduced = vreinterpret_u32_u8(vmovn_u16(combined));
- return vget_lane_u32(reduced, 0) != 0x0;
-#endif
-}
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/neon/NvNeonSimd4i.h b/PxShared/src/NvSimd/include/neon/NvNeonSimd4i.h
deleted file mode 100644
index 7cf4ec49..00000000
--- a/PxShared/src/NvSimd/include/neon/NvNeonSimd4i.h
+++ /dev/null
@@ -1,303 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-NV_SIMD_NAMESPACE_BEGIN
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// factory implementation
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4iZeroFactory::operator Simd4i() const
-{
- return vdupq_n_s32(0);
-}
-
-Simd4iScalarFactory::operator Simd4i() const
-{
- return vdupq_n_s32(value);
-}
-
-Simd4iTupleFactory::operator Simd4i() const
-{
- return reinterpret_cast<const Simd4i&>(tuple);
-}
-
-Simd4iLoadFactory::operator Simd4i() const
-{
- return vld1q_s32(ptr);
-}
-
-Simd4iAlignedLoadFactory::operator Simd4i() const
-{
- return vld1q_s32(ptr);
-}
-
-Simd4iOffsetLoadFactory::operator Simd4i() const
-{
- return vld1q_s32(reinterpret_cast<const int*>(reinterpret_cast<const char*>(ptr) + offset));
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// expression template
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-template <>
-inline ComplementExpr<Simd4i>::operator Simd4i() const
-{
- return vbicq_u32(vdupq_n_u32(0xffffffff), v.u4);
-}
-
-template <>
-inline Simd4i operator&(const ComplementExpr<Simd4i>& complement, const Simd4i& v)
-{
- return vbicq_u32(v.u4, complement.v.u4);
-}
-
-template <>
-inline Simd4i operator&(const Simd4i& v, const ComplementExpr<Simd4i>& complement)
-{
- return vbicq_u32(v.u4, complement.v.u4);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// operator implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4i operator==(const Simd4i& v0, const Simd4i& v1)
-{
- return vceqq_u32(v0.u4, v1.u4);
-}
-
-Simd4i operator<(const Simd4i& v0, const Simd4i& v1)
-{
- return vcltq_s32(v0.i4, v1.i4);
-}
-
-Simd4i operator>(const Simd4i& v0, const Simd4i& v1)
-{
- return vcgtq_s32(v0.i4, v1.i4);
-}
-
-ComplementExpr<Simd4i> operator~(const Simd4i& v)
-{
- return ComplementExpr<Simd4i>(v);
-}
-
-Simd4i operator&(const Simd4i& v0, const Simd4i& v1)
-{
- return vandq_u32(v0.u4, v1.u4);
-}
-
-Simd4i operator|(const Simd4i& v0, const Simd4i& v1)
-{
- return vorrq_u32(v0.u4, v1.u4);
-}
-
-Simd4i operator^(const Simd4i& v0, const Simd4i& v1)
-{
- return veorq_u32(v0.u4, v1.u4);
-}
-
-Simd4i operator<<(const Simd4i& v, int shift)
-{
- return vshlq_u32(v.u4, vdupq_n_s32(shift));
-}
-
-Simd4i operator>>(const Simd4i& v, int shift)
-{
- return vshlq_u32(v.u4, vdupq_n_s32(-shift));
-}
-
-Simd4i operator<<(const Simd4i& v, const Simd4i& shift)
-{
- return vshlq_u32(v.u4, shift.i4);
-}
-
-Simd4i operator>>(const Simd4i& v, const Simd4i& shift)
-{
- return vshlq_u32(v.u4, vnegq_s32(shift.i4));
-}
-
-Simd4i operator+(const Simd4i& v)
-{
- return v;
-}
-
-Simd4i operator+(const Simd4i& v0, const Simd4i& v1)
-{
- return vaddq_u32(v0.u4, v1.u4);
-}
-
-Simd4i operator-(const Simd4i& v)
-{
- return vnegq_s32(v.i4);
-}
-
-Simd4i operator-(const Simd4i& v0, const Simd4i& v1)
-{
- return vsubq_u32(v0.u4, v1.u4);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// function implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4i simd4i(const Simd4f& v)
-{
- return v.u4;
-}
-
-Simd4i truncate(const Simd4f& v)
-{
- return vcvtq_s32_f32(v.f4);
-}
-
-int (&array(Simd4i& v))[4]
-{
- return reinterpret_cast<int(&)[4]>(v);
-}
-
-const int (&array(const Simd4i& v))[4]
-{
- return reinterpret_cast<const int(&)[4]>(v);
-}
-
-void store(int* ptr, const Simd4i& v)
-{
- return vst1q_s32(ptr, v.i4);
-}
-
-void storeAligned(int* ptr, const Simd4i& v)
-{
- vst1q_s32(ptr, v.i4);
-}
-
-void storeAligned(int* ptr, unsigned int offset, const Simd4i& v)
-{
- return storeAligned(reinterpret_cast<int*>(reinterpret_cast<char*>(ptr) + offset), v);
-}
-
-template <size_t i>
-Simd4i splat(Simd4i const& v)
-{
- return vdupq_n_s32(array(v)[i]);
-}
-
-Simd4i select(Simd4i const& mask, Simd4i const& v0, Simd4i const& v1)
-{
- return vbslq_u32(mask.u4, v0.u4, v1.u4);
-}
-
-int allEqual(const Simd4i& v0, const Simd4i& v1)
-{
- return allTrue(operator==(v0, v1));
-}
-
-int allEqual(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask)
-{
- return allTrue(outMask = operator==(v0, v1));
-}
-
-int anyEqual(const Simd4i& v0, const Simd4i& v1)
-{
- return anyTrue(operator==(v0, v1));
-}
-
-int anyEqual(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask)
-{
- return anyTrue(outMask = operator==(v0, v1));
-}
-
-int allGreater(const Simd4i& v0, const Simd4i& v1)
-{
- return allTrue(operator>(v0, v1));
-}
-
-int allGreater(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask)
-{
- return allTrue(outMask = operator>(v0, v1));
-}
-
-int anyGreater(const Simd4i& v0, const Simd4i& v1)
-{
- return anyTrue(operator>(v0, v1));
-}
-
-int anyGreater(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask)
-{
- return anyTrue(outMask = operator>(v0, v1));
-}
-
-int allTrue(const Simd4i& v)
-{
-#if NV_SIMD_INLINE_ASSEMBLER
- int result;
- asm volatile("vmovq q0, %q1 \n\t"
- "vand.u32 d0, d0, d1 \n\t"
- "vpmin.u32 d0, d0, d0 \n\t"
- "vcmp.f32 s0, #0 \n\t"
- "fmrx %0, fpscr"
- : "=r"(result)
- : "w"(v.u4)
- : "q0");
- return result >> 28 & 0x1;
-#else
- uint16x4_t hi = vget_high_u16(vreinterpretq_u16_u32(v.u4));
- uint16x4_t lo = vmovn_u32(v.u4);
- uint16x8_t combined = vcombine_u16(lo, hi);
- uint32x2_t reduced = vreinterpret_u32_u8(vmovn_u16(combined));
- return vget_lane_u32(reduced, 0) == 0xffffffff;
-#endif
-}
-
-int anyTrue(const Simd4i& v)
-{
-#if NV_SIMD_INLINE_ASSEMBLER
- int result;
- asm volatile("vmovq q0, %q1 \n\t"
- "vorr.u32 d0, d0, d1 \n\t"
- "vpmax.u32 d0, d0, d0 \n\t"
- "vcmp.f32 s0, #0 \n\t"
- "fmrx %0, fpscr"
- : "=r"(result)
- : "w"(v.u4)
- : "q0");
- return result >> 28 & 0x1;
-#else
- uint16x4_t hi = vget_high_u16(vreinterpretq_u16_u32(v.u4));
- uint16x4_t lo = vmovn_u32(v.u4);
- uint16x8_t combined = vcombine_u16(lo, hi);
- uint32x2_t reduced = vreinterpret_u32_u8(vmovn_u16(combined));
- return vget_lane_u32(reduced, 0) != 0x0;
-#endif
-}
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/neon/NvNeonSimdTypes.h b/PxShared/src/NvSimd/include/neon/NvNeonSimdTypes.h
deleted file mode 100644
index a1d6820e..00000000
--- a/PxShared/src/NvSimd/include/neon/NvNeonSimdTypes.h
+++ /dev/null
@@ -1,71 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-#include <arm_neon.h>
-
-NV_SIMD_NAMESPACE_BEGIN
-
-union Simd4f
-{
- Simd4f()
- {
- }
- Simd4f(const float32x4_t& v) : f4(v)
- {
- }
-#ifndef _M_ARM // all *32x4_t map to the same type
- Simd4f(const uint32x4_t& v) : u4(v)
- {
- }
-#endif
- float32x4_t f4;
- uint32x4_t u4;
- int32x4_t i4;
-};
-
-union Simd4i
-{
- Simd4i()
- {
- }
- Simd4i(const uint32x4_t& v) : u4(v)
- {
- }
-#ifndef _M_ARM // all *32x4_t map to the same type
- Simd4i(const int32x4_t& v) : i4(v)
- {
- }
-#endif
- uint32x4_t u4;
- int32x4_t i4;
-};
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/scalar/NvScalarSimd4f.h b/PxShared/src/NvSimd/include/scalar/NvScalarSimd4f.h
deleted file mode 100644
index 75a8d033..00000000
--- a/PxShared/src/NvSimd/include/scalar/NvScalarSimd4f.h
+++ /dev/null
@@ -1,464 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-NV_SIMD_NAMESPACE_BEGIN
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// factory implementation
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4fZeroFactory::operator Scalar4f() const
-{
- return Scalar4f(0.0f, 0.0f, 0.0f, 0.0f);
-}
-
-Simd4fOneFactory::operator Scalar4f() const
-{
- return Scalar4f(1.0f, 1.0f, 1.0f, 1.0f);
-}
-
-Simd4fScalarFactory::operator Scalar4f() const
-{
- return Scalar4f(value, value, value, value);
-}
-
-Simd4fTupleFactory::operator Scalar4f() const
-{
- return reinterpret_cast<const Scalar4f&>(tuple);
-}
-
-Simd4fLoadFactory::operator Scalar4f() const
-{
- return Scalar4f(ptr[0], ptr[1], ptr[2], ptr[3]);
-}
-
-Simd4fLoad3Factory::operator Scalar4f() const
-{
- return Scalar4f(ptr[0], ptr[1], ptr[2], 0.0f);
-}
-
-Simd4fLoad3SetWFactory::operator Scalar4f() const
-{
- return Scalar4f(ptr[0], ptr[1], ptr[2], w);
-}
-
-Simd4fAlignedLoadFactory::operator Scalar4f() const
-{
- return Scalar4f(ptr[0], ptr[1], ptr[2], ptr[3]);
-}
-
-Simd4fOffsetLoadFactory::operator Scalar4f() const
-{
- return Simd4fAlignedLoadFactory(reinterpret_cast<const float*>(reinterpret_cast<const char*>(ptr) + offset));
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// expression template
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-template <>
-inline ComplementExpr<Scalar4f>::operator Scalar4f() const
-{
- return Scalar4f(~v.u4[0], ~v.u4[1], ~v.u4[2], ~v.u4[3]);
-}
-
-template <>
-inline Scalar4f operator&<Scalar4f>(const ComplementExpr<Scalar4f>& complement, const Scalar4f& v)
-{
- return Scalar4f(v.u4[0] & ~complement.v.u4[0], v.u4[1] & ~complement.v.u4[1], v.u4[2] & ~complement.v.u4[2],
- v.u4[3] & ~complement.v.u4[3]);
-}
-
-template <>
-inline Scalar4f operator&<Scalar4f>(const Scalar4f& v, const ComplementExpr<Scalar4f>& complement)
-{
- return Scalar4f(v.u4[0] & ~complement.v.u4[0], v.u4[1] & ~complement.v.u4[1], v.u4[2] & ~complement.v.u4[2],
- v.u4[3] & ~complement.v.u4[3]);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// operator implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-inline Scalar4f operator==(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] == v1.f4[0], v0.f4[1] == v1.f4[1], v0.f4[2] == v1.f4[2], v0.f4[3] == v1.f4[3]);
-}
-
-inline Scalar4f operator<(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] < v1.f4[0], v0.f4[1] < v1.f4[1], v0.f4[2] < v1.f4[2], v0.f4[3] < v1.f4[3]);
-}
-
-inline Scalar4f operator<=(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] <= v1.f4[0], v0.f4[1] <= v1.f4[1], v0.f4[2] <= v1.f4[2], v0.f4[3] <= v1.f4[3]);
-}
-
-inline Scalar4f operator>(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] > v1.f4[0], v0.f4[1] > v1.f4[1], v0.f4[2] > v1.f4[2], v0.f4[3] > v1.f4[3]);
-}
-
-inline Scalar4f operator>=(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] >= v1.f4[0], v0.f4[1] >= v1.f4[1], v0.f4[2] >= v1.f4[2], v0.f4[3] >= v1.f4[3]);
-}
-
-inline ComplementExpr<Scalar4f> operator~(const Scalar4f& v)
-{
- return ComplementExpr<Scalar4f>(v);
-}
-
-inline Scalar4f operator&(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.u4[0] & v1.u4[0], v0.u4[1] & v1.u4[1], v0.u4[2] & v1.u4[2], v0.u4[3] & v1.u4[3]);
-}
-
-inline Scalar4f operator|(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.u4[0] | v1.u4[0], v0.u4[1] | v1.u4[1], v0.u4[2] | v1.u4[2], v0.u4[3] | v1.u4[3]);
-}
-
-inline Scalar4f operator^(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.u4[0] ^ v1.u4[0], v0.u4[1] ^ v1.u4[1], v0.u4[2] ^ v1.u4[2], v0.u4[3] ^ v1.u4[3]);
-}
-
-inline Scalar4f operator<<(const Scalar4f& v, int shift)
-{
- return Scalar4f(v.u4[0] << shift, v.u4[1] << shift, v.u4[2] << shift, v.u4[3] << shift);
-}
-
-inline Scalar4f operator>>(const Scalar4f& v, int shift)
-{
- return Scalar4f(v.u4[0] >> shift, v.u4[1] >> shift, v.u4[2] >> shift, v.u4[3] >> shift);
-}
-
-inline Scalar4f operator+(const Scalar4f& v)
-{
- return v;
-}
-
-inline Scalar4f operator+(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] + v1.f4[0], v0.f4[1] + v1.f4[1], v0.f4[2] + v1.f4[2], v0.f4[3] + v1.f4[3]);
-}
-
-inline Scalar4f operator-(const Scalar4f& v)
-{
- return Scalar4f(-v.f4[0], -v.f4[1], -v.f4[2], -v.f4[3]);
-}
-
-inline Scalar4f operator-(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] - v1.f4[0], v0.f4[1] - v1.f4[1], v0.f4[2] - v1.f4[2], v0.f4[3] - v1.f4[3]);
-}
-
-inline Scalar4f operator*(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] * v1.f4[0], v0.f4[1] * v1.f4[1], v0.f4[2] * v1.f4[2], v0.f4[3] * v1.f4[3]);
-}
-
-inline Scalar4f operator/(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(v0.f4[0] / v1.f4[0], v0.f4[1] / v1.f4[1], v0.f4[2] / v1.f4[2], v0.f4[3] / v1.f4[3]);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// function implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-inline Scalar4f simd4f(const Scalar4i& v)
-{
- return reinterpret_cast<const Scalar4f&>(v);
-}
-
-inline Scalar4f convert(const Scalar4i& v)
-{
- return Scalar4f(float(v.i4[0]), float(v.i4[1]), float(v.i4[2]), float(v.i4[3]));
-}
-
-inline float (&array(Scalar4f& v))[4]
-{
- return v.f4;
-}
-
-inline const float (&array(const Scalar4f& v))[4]
-{
- return v.f4;
-}
-
-inline void store(float* ptr, const Scalar4f& v)
-{
- ptr[0] = v.f4[0];
- ptr[1] = v.f4[1];
- ptr[2] = v.f4[2];
- ptr[3] = v.f4[3];
-}
-
-inline void storeAligned(float* ptr, const Scalar4f& v)
-{
- store(ptr, v);
-}
-
-inline void store3(float* ptr, const Scalar4f& v)
-{
- ptr[0] = v.f4[0];
- ptr[1] = v.f4[1];
- ptr[2] = v.f4[2];
-}
-
-inline void storeAligned(float* ptr, unsigned int offset, const Scalar4f& v)
-{
- storeAligned(reinterpret_cast<float*>(reinterpret_cast<char*>(ptr) + offset), v);
-}
-
-template <size_t i>
-inline Scalar4f splat(const Scalar4f& v)
-{
- return Scalar4f(v.f4[i], v.f4[i], v.f4[i], v.f4[i]);
-}
-
-inline Scalar4f select(const Scalar4f& mask, const Scalar4f& v0, const Scalar4f& v1)
-{
- return ((v0 ^ v1) & mask) ^ v1;
-}
-
-inline Scalar4f abs(const Scalar4f& v)
-{
- return Scalar4f(::fabsf(v.f4[0]), ::fabsf(v.f4[1]), ::fabsf(v.f4[2]), ::fabsf(v.f4[3]));
-}
-
-inline Scalar4f floor(const Scalar4f& v)
-{
- return Scalar4f(::floorf(v.f4[0]), ::floorf(v.f4[1]), ::floorf(v.f4[2]), ::floorf(v.f4[3]));
-}
-
-#if !defined max
-inline Scalar4f max(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(std::max(v0.f4[0], v1.f4[0]), std::max(v0.f4[1], v1.f4[1]), std::max(v0.f4[2], v1.f4[2]),
- std::max(v0.f4[3], v1.f4[3]));
-}
-#endif
-
-#if !defined min
-inline Scalar4f min(const Scalar4f& v0, const Scalar4f& v1)
-{
- return Scalar4f(std::min(v0.f4[0], v1.f4[0]), std::min(v0.f4[1], v1.f4[1]), std::min(v0.f4[2], v1.f4[2]),
- std::min(v0.f4[3], v1.f4[3]));
-}
-#endif
-
-inline Scalar4f recip(const Scalar4f& v)
-{
- return Scalar4f(1 / v.f4[0], 1 / v.f4[1], 1 / v.f4[2], 1 / v.f4[3]);
-}
-
-template <int n>
-inline Scalar4f recip(const Scalar4f& v)
-{
- return recip(v);
-}
-
-inline Scalar4f sqrt(const Scalar4f& v)
-{
- return Scalar4f(::sqrtf(v.f4[0]), ::sqrtf(v.f4[1]), ::sqrtf(v.f4[2]), ::sqrtf(v.f4[3]));
-}
-
-inline Scalar4f rsqrt(const Scalar4f& v)
-{
- return recip(sqrt(v));
-}
-
-template <int n>
-inline Scalar4f rsqrt(const Scalar4f& v)
-{
- return rsqrt(v);
-}
-
-inline Scalar4f exp2(const Scalar4f& v)
-{
- float scale = 0.69314718055994531f; // ::logf(2.0f);
- return Scalar4f(::expf(v.f4[0] * scale), ::expf(v.f4[1] * scale), ::expf(v.f4[2] * scale), ::expf(v.f4[3] * scale));
-}
-
-inline Scalar4f log2(const Scalar4f& v)
-{
- float scale = 1.44269504088896341f; // 1/ln(2)
- return Scalar4f(::logf(v.f4[0]) * scale, ::logf(v.f4[1]) * scale, ::logf(v.f4[2]) * scale, ::logf(v.f4[3]) * scale);
-}
-
-inline Scalar4f dot3(const Scalar4f& v0, const Scalar4f& v1)
-{
- return simd4f(v0.f4[0] * v1.f4[0] + v0.f4[1] * v1.f4[1] + v0.f4[2] * v1.f4[2]);
-}
-
-inline Scalar4f cross3(const Scalar4f& v0, const Scalar4f& v1)
-{
- return simd4f(v0.f4[1] * v1.f4[2] - v0.f4[2] * v1.f4[1], v0.f4[2] * v1.f4[0] - v0.f4[0] * v1.f4[2],
- v0.f4[0] * v1.f4[1] - v0.f4[1] * v1.f4[0], 0.0f);
-}
-
-inline void transpose(Scalar4f& x, Scalar4f& y, Scalar4f& z, Scalar4f& w)
-{
- float x1 = x.f4[1], x2 = x.f4[2], x3 = x.f4[3];
- float y2 = y.f4[2], y3 = y.f4[3], z3 = z.f4[3];
-
- x.f4[1] = y.f4[0];
- x.f4[2] = z.f4[0];
- x.f4[3] = w.f4[0];
- y.f4[0] = x1;
- y.f4[2] = z.f4[1];
- y.f4[3] = w.f4[1];
- z.f4[0] = x2;
- z.f4[1] = y2;
- z.f4[3] = w.f4[2];
- w.f4[0] = x3;
- w.f4[1] = y3;
- w.f4[2] = z3;
-}
-
-inline void zip(Scalar4f& v0, Scalar4f& v1)
-{
- float z0 = v0.f4[2];
- v0.f4[2] = v0.f4[1];
- v0.f4[1] = v1.f4[0];
- v1.f4[0] = z0;
-
- float z1 = v1.f4[2];
- v1.f4[2] = v0.f4[3];
- v0.f4[3] = v1.f4[1];
- v1.f4[1] = z1;
-}
-
-inline void unzip(Scalar4f& v0, Scalar4f& v1)
-{
- float z0 = v0.f4[2];
- v0.f4[2] = v1.f4[0];
- v1.f4[0] = v0.f4[1];
- v0.f4[1] = z0;
-
- float z1 = v1.f4[2];
- v1.f4[2] = v1.f4[1];
- v1.f4[1] = v0.f4[3];
- v0.f4[3] = z1;
-}
-
-inline Scalar4f swaphilo(const Scalar4f& v)
-{
- return Scalar4f(v.f4[2], v.f4[3], v.f4[0], v.f4[1]);
-}
-
-inline int allEqual(const Scalar4f& v0, const Scalar4f& v1)
-{
- return v0.f4[0] == v1.f4[0] && v0.f4[1] == v1.f4[1] && v0.f4[2] == v1.f4[2] && v0.f4[3] == v1.f4[3];
-}
-
-inline int allEqual(const Scalar4f& v0, const Scalar4f& v1, Scalar4f& outMask)
-{
- bool b0 = v0.f4[0] == v1.f4[0], b1 = v0.f4[1] == v1.f4[1], b2 = v0.f4[2] == v1.f4[2], b3 = v0.f4[3] == v1.f4[3];
- outMask = Scalar4f(b0, b1, b2, b3);
- return b0 && b1 && b2 && b3;
-}
-
-inline int anyEqual(const Scalar4f& v0, const Scalar4f& v1)
-{
- return v0.f4[0] == v1.f4[0] || v0.f4[1] == v1.f4[1] || v0.f4[2] == v1.f4[2] || v0.f4[3] == v1.f4[3];
-}
-
-inline int anyEqual(const Scalar4f& v0, const Scalar4f& v1, Scalar4f& outMask)
-{
- bool b0 = v0.f4[0] == v1.f4[0], b1 = v0.f4[1] == v1.f4[1], b2 = v0.f4[2] == v1.f4[2], b3 = v0.f4[3] == v1.f4[3];
- outMask = Scalar4f(b0, b1, b2, b3);
- return b0 || b1 || b2 || b3;
-}
-
-inline int allGreater(const Scalar4f& v0, const Scalar4f& v1)
-{
- return v0.f4[0] > v1.f4[0] && v0.f4[1] > v1.f4[1] && v0.f4[2] > v1.f4[2] && v0.f4[3] > v1.f4[3];
-}
-
-inline int allGreater(const Scalar4f& v0, const Scalar4f& v1, Scalar4f& outMask)
-{
- bool b0 = v0.f4[0] > v1.f4[0], b1 = v0.f4[1] > v1.f4[1], b2 = v0.f4[2] > v1.f4[2], b3 = v0.f4[3] > v1.f4[3];
- outMask = Scalar4f(b0, b1, b2, b3);
- return b0 && b1 && b2 && b3;
-}
-
-inline int anyGreater(const Scalar4f& v0, const Scalar4f& v1)
-{
- return v0.f4[0] > v1.f4[0] || v0.f4[1] > v1.f4[1] || v0.f4[2] > v1.f4[2] || v0.f4[3] > v1.f4[3];
-}
-
-inline int anyGreater(const Scalar4f& v0, const Scalar4f& v1, Scalar4f& outMask)
-{
- bool b0 = v0.f4[0] > v1.f4[0], b1 = v0.f4[1] > v1.f4[1], b2 = v0.f4[2] > v1.f4[2], b3 = v0.f4[3] > v1.f4[3];
- outMask = Scalar4f(b0, b1, b2, b3);
- return b0 || b1 || b2 || b3;
-}
-
-inline int allGreaterEqual(const Scalar4f& v0, const Scalar4f& v1)
-{
- return v0.f4[0] >= v1.f4[0] && v0.f4[1] >= v1.f4[1] && v0.f4[2] >= v1.f4[2] && v0.f4[3] >= v1.f4[3];
-}
-
-inline int allGreaterEqual(const Scalar4f& v0, const Scalar4f& v1, Scalar4f& outMask)
-{
- bool b0 = v0.f4[0] >= v1.f4[0], b1 = v0.f4[1] >= v1.f4[1], b2 = v0.f4[2] >= v1.f4[2], b3 = v0.f4[3] >= v1.f4[3];
- outMask = Scalar4f(b0, b1, b2, b3);
- return b0 && b1 && b2 && b3;
-}
-
-inline int anyGreaterEqual(const Scalar4f& v0, const Scalar4f& v1)
-{
- return v0.f4[0] >= v1.f4[0] || v0.f4[1] >= v1.f4[1] || v0.f4[2] >= v1.f4[2] || v0.f4[3] >= v1.f4[3];
-}
-
-inline int anyGreaterEqual(const Scalar4f& v0, const Scalar4f& v1, Scalar4f& outMask)
-{
- bool b0 = v0.f4[0] >= v1.f4[0], b1 = v0.f4[1] >= v1.f4[1], b2 = v0.f4[2] >= v1.f4[2], b3 = v0.f4[3] >= v1.f4[3];
- outMask = Scalar4f(b0, b1, b2, b3);
- return b0 || b1 || b2 || b3;
-}
-
-inline int allTrue(const Scalar4f& v)
-{
- return v.i4[0] & v.i4[1] & v.i4[2] & v.i4[3];
-}
-
-inline int anyTrue(const Scalar4f& v)
-{
- return v.i4[0] | v.i4[1] | v.i4[2] | v.i4[3];
-}
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/scalar/NvScalarSimd4i.h b/PxShared/src/NvSimd/include/scalar/NvScalarSimd4i.h
deleted file mode 100644
index 5f99e208..00000000
--- a/PxShared/src/NvSimd/include/scalar/NvScalarSimd4i.h
+++ /dev/null
@@ -1,272 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-NV_SIMD_NAMESPACE_BEGIN
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// factory implementation
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4iZeroFactory::operator Scalar4i() const
-{
- return Scalar4i(0, 0, 0, 0);
-}
-
-Simd4iScalarFactory::operator Scalar4i() const
-{
- return Scalar4i(value, value, value, value);
-}
-
-Simd4iTupleFactory::operator Scalar4i() const
-{
- return reinterpret_cast<const Scalar4i&>(tuple);
-}
-
-Simd4iLoadFactory::operator Scalar4i() const
-{
- return Scalar4i(ptr[0], ptr[1], ptr[2], ptr[3]);
-}
-
-Simd4iAlignedLoadFactory::operator Scalar4i() const
-{
- return Scalar4i(ptr[0], ptr[1], ptr[2], ptr[3]);
-}
-
-Simd4iOffsetLoadFactory::operator Scalar4i() const
-{
- return Simd4iAlignedLoadFactory(reinterpret_cast<const int*>(reinterpret_cast<const char*>(ptr) + offset));
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// expression template
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-template <>
-inline ComplementExpr<Scalar4i>::operator Scalar4i() const
-{
- return Scalar4i(~v.u4[0], ~v.u4[1], ~v.u4[2], ~v.u4[3]);
-}
-
-template <>
-inline Scalar4i operator&(const ComplementExpr<Scalar4i>& complement, const Scalar4i& v)
-{
- return Scalar4i(v.u4[0] & ~complement.v.u4[0], v.u4[1] & ~complement.v.u4[1], v.u4[2] & ~complement.v.u4[2],
- v.u4[3] & ~complement.v.u4[3]);
-}
-
-template <>
-inline Scalar4i operator&(const Scalar4i& v, const ComplementExpr<Scalar4i>& complement)
-{
- return Scalar4i(v.u4[0] & ~complement.v.u4[0], v.u4[1] & ~complement.v.u4[1], v.u4[2] & ~complement.v.u4[2],
- v.u4[3] & ~complement.v.u4[3]);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// operator implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-inline ComplementExpr<Scalar4i> operator~(const Scalar4i& v)
-{
- return ComplementExpr<Scalar4i>(v);
-}
-
-inline Scalar4i operator&(const Scalar4i& v0, const Scalar4i& v1)
-{
- return Scalar4i(v0.u4[0] & v1.u4[0], v0.u4[1] & v1.u4[1], v0.u4[2] & v1.u4[2], v0.u4[3] & v1.u4[3]);
-}
-
-inline Scalar4i operator|(const Scalar4i& v0, const Scalar4i& v1)
-{
- return Scalar4i(v0.u4[0] | v1.u4[0], v0.u4[1] | v1.u4[1], v0.u4[2] | v1.u4[2], v0.u4[3] | v1.u4[3]);
-}
-
-inline Scalar4i operator^(const Scalar4i& v0, const Scalar4i& v1)
-{
- return Scalar4i(v0.u4[0] ^ v1.u4[0], v0.u4[1] ^ v1.u4[1], v0.u4[2] ^ v1.u4[2], v0.u4[3] ^ v1.u4[3]);
-}
-
-inline Scalar4i operator<<(const Scalar4i& v, int shift)
-{
- return Scalar4i(v.u4[0] << shift, v.u4[1] << shift, v.u4[2] << shift, v.u4[3] << shift);
-}
-
-inline Scalar4i operator>>(const Scalar4i& v, int shift)
-{
- return Scalar4i(v.u4[0] >> shift, v.u4[1] >> shift, v.u4[2] >> shift, v.u4[3] >> shift);
-}
-
-inline Scalar4i operator==(const Scalar4i& v0, const Scalar4i& v1)
-{
- return Scalar4i(v0.i4[0] == v1.i4[0], v0.i4[1] == v1.i4[1], v0.i4[2] == v1.i4[2], v0.i4[3] == v1.i4[3]);
-}
-
-inline Scalar4i operator<(const Scalar4i& v0, const Scalar4i& v1)
-{
- return Scalar4i(v0.i4[0] < v1.i4[0], v0.i4[1] < v1.i4[1], v0.i4[2] < v1.i4[2], v0.i4[3] < v1.i4[3]);
-}
-
-inline Scalar4i operator>(const Scalar4i& v0, const Scalar4i& v1)
-{
- return Scalar4i(v0.i4[0] > v1.i4[0], v0.i4[1] > v1.i4[1], v0.i4[2] > v1.i4[2], v0.i4[3] > v1.i4[3]);
-}
-
-inline Scalar4i operator+(const Scalar4i& v)
-{
- return v;
-}
-
-inline Scalar4i operator+(const Scalar4i& v0, const Scalar4i& v1)
-{
- return Scalar4i(v0.i4[0] + v1.i4[0], v0.i4[1] + v1.i4[1], v0.i4[2] + v1.i4[2], v0.i4[3] + v1.i4[3]);
-}
-
-inline Scalar4i operator-(const Scalar4i& v)
-{
- return Scalar4i(-v.i4[0], -v.i4[1], -v.i4[2], -v.i4[3]);
-}
-
-inline Scalar4i operator-(const Scalar4i& v0, const Scalar4i& v1)
-{
- return Scalar4i(v0.i4[0] - v1.i4[0], v0.i4[1] - v1.i4[1], v0.i4[2] - v1.i4[2], v0.i4[3] - v1.i4[3]);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// function implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-inline Scalar4i simd4i(const Scalar4f& v)
-{
- return reinterpret_cast<const Scalar4i&>(v);
-}
-
-inline Scalar4i truncate(const Scalar4f& v)
-{
- return Scalar4i(int(v.f4[0]), int(v.f4[1]), int(v.f4[2]), int(v.f4[3]));
-}
-
-inline int (&array(Scalar4i& v))[4]
-{
- return v.i4;
-}
-
-inline const int (&array(const Scalar4i& v))[4]
-{
- return v.i4;
-}
-
-inline void store(int* ptr, const Scalar4i& v)
-{
- ptr[0] = v.i4[0];
- ptr[1] = v.i4[1];
- ptr[2] = v.i4[2];
- ptr[3] = v.i4[3];
-}
-
-inline void storeAligned(int* ptr, const Scalar4i& v)
-{
- store(ptr, v);
-}
-
-inline void storeAligned(int* ptr, unsigned int offset, const Scalar4i& v)
-{
- store(reinterpret_cast<int*>(reinterpret_cast<char*>(ptr) + offset), v);
-}
-
-template <size_t i>
-inline Scalar4i splat(const Scalar4i& v)
-{
- return Scalar4i(v.u4[i], v.u4[i], v.u4[i], v.u4[i]);
-}
-
-inline Scalar4i select(const Scalar4i& mask, const Scalar4i& v0, const Scalar4i& v1)
-{
- return ((v0 ^ v1) & mask) ^ v1;
-}
-
-inline int allEqual(const Scalar4i& v0, const Scalar4i& v1)
-{
- return v0.i4[0] == v1.i4[0] && v0.i4[1] == v1.i4[1] && v0.i4[2] == v1.i4[2] && v0.i4[3] == v1.i4[3];
-}
-
-inline int allEqual(const Scalar4i& v0, const Scalar4i& v1, Scalar4i& outMask)
-{
- bool b0 = v0.i4[0] == v1.i4[0], b1 = v0.i4[1] == v1.i4[1], b2 = v0.i4[2] == v1.i4[2], b3 = v0.i4[3] == v1.i4[3];
- outMask = Scalar4i(b0, b1, b2, b3);
- return b0 && b1 && b2 && b3;
-}
-
-inline int anyEqual(const Scalar4i& v0, const Scalar4i& v1)
-{
- return v0.i4[0] == v1.i4[0] || v0.i4[1] == v1.i4[1] || v0.i4[2] == v1.i4[2] || v0.i4[3] == v1.i4[3];
-}
-
-inline int anyEqual(const Scalar4i& v0, const Scalar4i& v1, Scalar4i& outMask)
-{
- bool b0 = v0.i4[0] == v1.i4[0], b1 = v0.i4[1] == v1.i4[1], b2 = v0.i4[2] == v1.i4[2], b3 = v0.i4[3] == v1.i4[3];
- outMask = Scalar4i(b0, b1, b2, b3);
- return b0 || b1 || b2 || b3;
-}
-
-inline int allGreater(const Scalar4i& v0, const Scalar4i& v1)
-{
- return v0.i4[0] > v1.i4[0] && v0.i4[1] > v1.i4[1] && v0.i4[2] > v1.i4[2] && v0.i4[3] > v1.i4[3];
-}
-
-inline int allGreater(const Scalar4i& v0, const Scalar4i& v1, Scalar4i& outMask)
-{
- bool b0 = v0.i4[0] > v1.i4[0], b1 = v0.i4[1] > v1.i4[1], b2 = v0.i4[2] > v1.i4[2], b3 = v0.i4[3] > v1.i4[3];
- outMask = Scalar4i(b0, b1, b2, b3);
- return b0 && b1 && b2 && b3;
-}
-
-inline int anyGreater(const Scalar4i& v0, const Scalar4i& v1)
-{
- return v0.i4[0] > v1.i4[0] || v0.i4[1] > v1.i4[1] || v0.i4[2] > v1.i4[2] || v0.i4[3] > v1.i4[3];
-}
-
-inline int anyGreater(const Scalar4i& v0, const Scalar4i& v1, Scalar4i& outMask)
-{
- bool b0 = v0.i4[0] > v1.i4[0], b1 = v0.i4[1] > v1.i4[1], b2 = v0.i4[2] > v1.i4[2], b3 = v0.i4[3] > v1.i4[3];
- outMask = Scalar4i(b0, b1, b2, b3);
- return b0 || b1 || b2 || b3;
-}
-
-inline int allTrue(const Scalar4i& v)
-{
- return v.i4[0] & v.i4[1] & v.i4[2] & v.i4[3];
-}
-
-inline int anyTrue(const Scalar4i& v)
-{
- return v.i4[0] | v.i4[1] | v.i4[2] | v.i4[3];
-}
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/scalar/NvScalarSimdTypes.h b/PxShared/src/NvSimd/include/scalar/NvScalarSimdTypes.h
deleted file mode 100644
index ac4239e9..00000000
--- a/PxShared/src/NvSimd/include/scalar/NvScalarSimdTypes.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-#if NV_WIIU
-#pragma ghs nowarning 193 // warning #193-D: zero used for undefined preprocessing identifier
-#endif
-
-#include <algorithm>
-
-#if NV_WIIU
-#pragma ghs endnowarning
-#endif
-
-#ifdef _MSC_VER
-typedef __int32 int32_t;
-typedef unsigned __int32 uint32_t;
-#endif
-
-NV_SIMD_NAMESPACE_BEGIN
-
-/** \brief Scalar fallback for SIMD containing 4 floats */
-union Scalar4f
-{
- Scalar4f()
- {
- }
-
- Scalar4f(float x, float y, float z, float w)
- {
- f4[0] = x;
- f4[1] = y;
- f4[2] = z;
- f4[3] = w;
- }
-
- Scalar4f(uint32_t x, uint32_t y, uint32_t z, uint32_t w)
- {
- u4[0] = x;
- u4[1] = y;
- u4[2] = z;
- u4[3] = w;
- }
-
- Scalar4f(bool x, bool y, bool z, bool w)
- {
- u4[0] = ~(uint32_t(x) - 1);
- u4[1] = ~(uint32_t(y) - 1);
- u4[2] = ~(uint32_t(z) - 1);
- u4[3] = ~(uint32_t(w) - 1);
- }
-
- Scalar4f(const Scalar4f& other)
- {
- f4[0] = other.f4[0];
- f4[1] = other.f4[1];
- f4[2] = other.f4[2];
- f4[3] = other.f4[3];
- }
-
- Scalar4f& operator=(const Scalar4f& other)
- {
- f4[0] = other.f4[0];
- f4[1] = other.f4[1];
- f4[2] = other.f4[2];
- f4[3] = other.f4[3];
- return *this;
- }
-
- float f4[4];
- int32_t i4[4];
- uint32_t u4[4];
-};
-
-/** \brief Scalar fallback for SIMD containing 4 integers */
-union Scalar4i
-{
- Scalar4i()
- {
- }
-
- Scalar4i(int32_t x, int32_t y, int32_t z, int32_t w)
- {
- i4[0] = x;
- i4[1] = y;
- i4[2] = z;
- i4[3] = w;
- }
-
- Scalar4i(uint32_t x, uint32_t y, uint32_t z, uint32_t w)
- {
- u4[0] = x;
- u4[1] = y;
- u4[2] = z;
- u4[3] = w;
- }
-
- Scalar4i(bool x, bool y, bool z, bool w)
- {
- u4[0] = ~(uint32_t(x) - 1);
- u4[1] = ~(uint32_t(y) - 1);
- u4[2] = ~(uint32_t(z) - 1);
- u4[3] = ~(uint32_t(w) - 1);
- }
-
- Scalar4i(const Scalar4i& other)
- {
- u4[0] = other.u4[0];
- u4[1] = other.u4[1];
- u4[2] = other.u4[2];
- u4[3] = other.u4[3];
- }
-
- Scalar4i& operator=(const Scalar4i& other)
- {
- u4[0] = other.u4[0];
- u4[1] = other.u4[1];
- u4[2] = other.u4[2];
- u4[3] = other.u4[3];
- return *this;
- }
-
- int32_t i4[4];
- uint32_t u4[4];
-};
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/sse2/NvSse2Simd4f.h b/PxShared/src/NvSimd/include/sse2/NvSse2Simd4f.h
deleted file mode 100644
index 20a2e247..00000000
--- a/PxShared/src/NvSimd/include/sse2/NvSse2Simd4f.h
+++ /dev/null
@@ -1,471 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-NV_SIMD_NAMESPACE_BEGIN
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// factory implementation
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4fZeroFactory::operator Simd4f() const
-{
- return _mm_setzero_ps();
-}
-
-Simd4fOneFactory::operator Simd4f() const
-{
- return _mm_set1_ps(1.0f);
-}
-
-Simd4fScalarFactory::operator Simd4f() const
-{
- return _mm_set1_ps(value);
-}
-
-Simd4fTupleFactory::operator Simd4f() const
-{
- return reinterpret_cast<const Simd4f&>(tuple);
-}
-
-Simd4fLoadFactory::operator Simd4f() const
-{
- return _mm_loadu_ps(ptr);
-}
-
-Simd4fLoad3Factory::operator Simd4f() const
-{
- /* [f0 f1 f2 f3] = [ptr[0] ptr[1] 0 0] */
- __m128i xy = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(ptr));
- __m128 z = _mm_load_ss(ptr + 2);
- return _mm_movelh_ps(_mm_castsi128_ps(xy), z);
-}
-
-Simd4fLoad3SetWFactory::operator Simd4f() const
-{
- __m128 z = _mm_load_ss(ptr + 2);
- __m128 wTmp = _mm_load_ss(&w);
-
- __m128i xy = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(ptr));
- __m128 zw = _mm_movelh_ps(z, wTmp);
-
- return _mm_shuffle_ps(_mm_castsi128_ps(xy), zw, _MM_SHUFFLE(2, 0, 1, 0));
-}
-
-Simd4fAlignedLoadFactory::operator Simd4f() const
-{
- return _mm_load_ps(ptr);
-}
-
-Simd4fOffsetLoadFactory::operator Simd4f() const
-{
- return _mm_load_ps(reinterpret_cast<const float*>(reinterpret_cast<const char*>(ptr) + offset));
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// expression template
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-template <>
-inline ComplementExpr<Simd4f>::operator Simd4f() const
-{
- return _mm_andnot_ps(v, _mm_castsi128_ps(_mm_set1_epi32(-1)));
-}
-
-template <>
-inline Simd4f operator&(const ComplementExpr<Simd4f>& complement, const Simd4f& v)
-{
- return _mm_andnot_ps(complement.v, v);
-}
-
-template <>
-inline Simd4f operator&(const Simd4f& v, const ComplementExpr<Simd4f>& complement)
-{
- return _mm_andnot_ps(complement.v, v);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// operator implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4f operator==(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_cmpeq_ps(v0, v1);
-}
-
-Simd4f operator<(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_cmplt_ps(v0, v1);
-}
-
-Simd4f operator<=(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_cmple_ps(v0, v1);
-}
-
-Simd4f operator>(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_cmpgt_ps(v0, v1);
-}
-
-Simd4f operator>=(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_cmpge_ps(v0, v1);
-}
-
-ComplementExpr<Simd4f> operator~(const Simd4f& v)
-{
- return ComplementExpr<Simd4f>(v);
-}
-
-Simd4f operator&(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_and_ps(v0, v1);
-}
-
-Simd4f operator|(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_or_ps(v0, v1);
-}
-
-Simd4f operator^(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_xor_ps(v0, v1);
-}
-
-Simd4f operator<<(const Simd4f& v, int shift)
-{
- return _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(v), shift));
-}
-
-Simd4f operator>>(const Simd4f& v, int shift)
-{
- return _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(v), shift));
-}
-
-Simd4f operator+(const Simd4f& v)
-{
- return v;
-}
-
-Simd4f operator+(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_add_ps(v0, v1);
-}
-
-Simd4f operator-(const Simd4f& v)
-{
- return _mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), v);
-}
-
-Simd4f operator-(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_sub_ps(v0, v1);
-}
-
-Simd4f operator*(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_mul_ps(v0, v1);
-}
-
-Simd4f operator/(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_div_ps(v0, v1);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// function implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4f simd4f(const Simd4i& v)
-{
- return _mm_castsi128_ps(v);
-}
-
-Simd4f convert(const Simd4i& v)
-{
- return _mm_cvtepi32_ps(v);
-}
-
-float (&array(Simd4f& v))[4]
-{
- return reinterpret_cast<float(&)[4]>(v);
-}
-
-const float (&array(const Simd4f& v))[4]
-{
- return reinterpret_cast<const float(&)[4]>(v);
-}
-
-void store(float* ptr, Simd4f const& v)
-{
- _mm_storeu_ps(ptr, v);
-}
-
-void storeAligned(float* ptr, Simd4f const& v)
-{
- _mm_store_ps(ptr, v);
-}
-
-void store3(float* dst, const Simd4f& v)
-{
- const float* __restrict src = array(v);
- dst[0] = src[0];
- dst[1] = src[1];
- dst[2] = src[2];
-}
-
-void storeAligned(float* ptr, unsigned int offset, Simd4f const& v)
-{
- _mm_store_ps(reinterpret_cast<float*>(reinterpret_cast<char*>(ptr) + offset), v);
-}
-
-template <size_t i>
-Simd4f splat(Simd4f const& v)
-{
- return _mm_shuffle_ps(v, v, _MM_SHUFFLE(i, i, i, i));
-}
-
-Simd4f select(Simd4f const& mask, Simd4f const& v0, Simd4f const& v1)
-{
- return _mm_xor_ps(v1, _mm_and_ps(mask, _mm_xor_ps(v1, v0)));
-}
-
-Simd4f abs(const Simd4f& v)
-{
- return _mm_andnot_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), v);
-}
-
-Simd4f floor(const Simd4f& v)
-{
- // SSE 4.1: return _mm_floor_ps(v);
- Simd4i i = _mm_cvttps_epi32(v);
- Simd4i s = _mm_castps_si128(_mm_cmpgt_ps(_mm_cvtepi32_ps(i), v));
- return _mm_cvtepi32_ps(_mm_sub_epi32(i, _mm_srli_epi32(s, 31)));
-}
-
-#if !defined max
-Simd4f max(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_max_ps(v0, v1);
-}
-#endif
-
-#if !defined min
-Simd4f min(const Simd4f& v0, const Simd4f& v1)
-{
- return _mm_min_ps(v0, v1);
-}
-#endif
-
-Simd4f recip(const Simd4f& v)
-{
- return _mm_rcp_ps(v);
-}
-
-template <int n>
-Simd4f recip(const Simd4f& v)
-{
- Simd4f two = simd4f(2.0f);
- Simd4f r = recip(v);
- for(int i = 0; i < n; ++i)
- r = r * (two - v * r);
- return r;
-}
-
-Simd4f sqrt(const Simd4f& v)
-{
- return _mm_sqrt_ps(v);
-}
-
-Simd4f rsqrt(const Simd4f& v)
-{
- return _mm_rsqrt_ps(v);
-}
-
-template <int n>
-Simd4f rsqrt(const Simd4f& v)
-{
- Simd4f halfV = v * simd4f(0.5f);
- Simd4f threeHalf = simd4f(1.5f);
- Simd4f r = rsqrt(v);
- for(int i = 0; i < n; ++i)
- r = r * (threeHalf - halfV * r * r);
- return r;
-}
-
-Simd4f exp2(const Simd4f& v)
-{
- // http://www.netlib.org/cephes/
-
- Simd4f limit = simd4f(127.4999f);
- Simd4f x = min(max(-limit, v), limit);
-
- // separate into integer and fractional part
-
- Simd4f fx = x + simd4f(0.5f);
- Simd4i ix = _mm_sub_epi32(_mm_cvttps_epi32(fx), _mm_srli_epi32(_mm_castps_si128(fx), 31));
- fx = x - Simd4f(_mm_cvtepi32_ps(ix));
-
- // exp2(fx) ~ 1 + 2*P(fx) / (Q(fx) - P(fx))
-
- Simd4f fx2 = fx * fx;
-
- Simd4f px = fx * (simd4f(1.51390680115615096133e+3f) +
- fx2 * (simd4f(2.02020656693165307700e+1f) + fx2 * simd4f(2.30933477057345225087e-2f)));
- Simd4f qx = simd4f(4.36821166879210612817e+3f) + fx2 * (simd4f(2.33184211722314911771e+2f) + fx2);
-
- Simd4f exp2fx = px * recip(qx - px);
- exp2fx = gSimd4fOne + exp2fx + exp2fx;
-
- // exp2(ix)
-
- Simd4f exp2ix = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ix, _mm_set1_epi32(0x7f)), 23));
-
- return exp2fx * exp2ix;
-}
-
-Simd4f log2(const Simd4f& v)
-{
- // todo: fast approximate implementation like exp2
- Simd4f scale = simd4f(1.44269504088896341f); // 1/ln(2)
- const float* ptr = array(v);
- return simd4f(::logf(ptr[0]), ::logf(ptr[1]), ::logf(ptr[2]), ::logf(ptr[3])) * scale;
-}
-
-Simd4f dot3(const Simd4f& v0, const Simd4f& v1)
-{
- Simd4f tmp = v0 * v1;
- return splat<0>(tmp) + splat<1>(tmp) + splat<2>(tmp);
-}
-
-Simd4f cross3(const Simd4f& v0, const Simd4f& v1)
-{
- Simd4f t0 = _mm_shuffle_ps(v0, v0, 0xc9); // w z y x -> w x z y
- Simd4f t1 = _mm_shuffle_ps(v1, v1, 0xc9);
- Simd4f tmp = v0 * t1 - t0 * v1;
- return _mm_shuffle_ps(tmp, tmp, 0xc9);
-}
-
-void transpose(Simd4f& x, Simd4f& y, Simd4f& z, Simd4f& w)
-{
- _MM_TRANSPOSE4_PS(x, y, z, w);
-}
-
-void zip(Simd4f& v0, Simd4f& v1)
-{
- Simd4f t0 = v0;
- v0 = _mm_unpacklo_ps(v0, v1);
- v1 = _mm_unpackhi_ps(t0, v1);
-}
-
-void unzip(Simd4f& v0, Simd4f& v1)
-{
- Simd4f t0 = v0;
- v0 = _mm_shuffle_ps(v0, v1, _MM_SHUFFLE(2, 0, 2, 0));
- v1 = _mm_shuffle_ps(t0, v1, _MM_SHUFFLE(3, 1, 3, 1));
-}
-
-Simd4f swaphilo(const Simd4f& v)
-{
- return _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 0, 3, 2));
-}
-
-int allEqual(const Simd4f& v0, const Simd4f& v1)
-{
- return allTrue(v0 == v1);
-}
-
-int allEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return allTrue(outMask = v0 == v1);
-}
-
-int anyEqual(const Simd4f& v0, const Simd4f& v1)
-{
- return anyTrue(v0 == v1);
-}
-
-int anyEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return anyTrue(outMask = v0 == v1);
-}
-
-int allGreater(const Simd4f& v0, const Simd4f& v1)
-{
- return allTrue(v0 > v1);
-}
-
-int allGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return allTrue(outMask = v0 > v1);
-}
-
-int anyGreater(const Simd4f& v0, const Simd4f& v1)
-{
- return anyTrue(v0 > v1);
-}
-
-int anyGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return anyTrue(outMask = v0 > v1);
-}
-
-int allGreaterEqual(const Simd4f& v0, const Simd4f& v1)
-{
- return allTrue(v0 >= v1);
-}
-
-int allGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return allTrue(outMask = v0 >= v1);
-}
-
-int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1)
-{
- return anyTrue(v0 >= v1);
-}
-
-int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask)
-{
- return anyTrue(outMask = v0 >= v1);
-}
-
-int allTrue(const Simd4f& v)
-{
- return _mm_movemask_ps(v) == 0xf;
-}
-
-int anyTrue(const Simd4f& v)
-{
- return _mm_movemask_ps(v);
-}
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/sse2/NvSse2Simd4i.h b/PxShared/src/NvSimd/include/sse2/NvSse2Simd4i.h
deleted file mode 100644
index 86a1848d..00000000
--- a/PxShared/src/NvSimd/include/sse2/NvSse2Simd4i.h
+++ /dev/null
@@ -1,259 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-NV_SIMD_NAMESPACE_BEGIN
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// factory implementation
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4iZeroFactory::operator Simd4i() const
-{
- return _mm_setzero_si128();
-}
-
-Simd4iScalarFactory::operator Simd4i() const
-{
- return _mm_set1_epi32(value);
-}
-
-Simd4iTupleFactory::operator Simd4i() const
-{
- return reinterpret_cast<const Simd4i&>(tuple);
-}
-
-Simd4iLoadFactory::operator Simd4i() const
-{
- return _mm_loadu_si128(reinterpret_cast<const __m128i*>(ptr));
-}
-
-Simd4iAlignedLoadFactory::operator Simd4i() const
-{
- return _mm_load_si128(reinterpret_cast<const __m128i*>(ptr));
-}
-
-Simd4iOffsetLoadFactory::operator Simd4i() const
-{
- return _mm_load_si128(reinterpret_cast<const __m128i*>(reinterpret_cast<const char*>(ptr) + offset));
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// expression template
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-template <>
-inline ComplementExpr<Simd4i>::operator Simd4i() const
-{
- return _mm_andnot_si128(v, _mm_set1_epi32(0xffffffff));
-}
-
-template <>
-inline Simd4i operator&(const ComplementExpr<Simd4i>& complement, const Simd4i& v)
-{
- return _mm_andnot_si128(complement.v, v);
-}
-
-template <>
-inline Simd4i operator&(const Simd4i& v, const ComplementExpr<Simd4i>& complement)
-{
- return _mm_andnot_si128(complement.v, v);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// operator implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4i operator==(const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_cmpeq_epi32(v0, v1);
-}
-
-Simd4i operator<(const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_cmplt_epi32(v0, v1);
-}
-
-Simd4i operator>(const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_cmpgt_epi32(v0, v1);
-}
-
-ComplementExpr<Simd4i> operator~(const Simd4i& v)
-{
- return ComplementExpr<Simd4i>(v);
-}
-
-Simd4i operator&(const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_and_si128(v0, v1);
-}
-
-Simd4i operator|(const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_or_si128(v0, v1);
-}
-
-Simd4i operator^(const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_xor_si128(v0, v1);
-}
-
-Simd4i operator<<(const Simd4i& v, int shift)
-{
- return _mm_slli_epi32(v, shift);
-}
-
-Simd4i operator>>(const Simd4i& v, int shift)
-{
- return _mm_srli_epi32(v, shift);
-}
-
-Simd4i operator+(const Simd4i& v)
-{
- return v;
-}
-
-Simd4i operator+(const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_add_epi32(v0, v1);
-}
-
-Simd4i operator-(const Simd4i& v)
-{
- return _mm_sub_epi32(_mm_setzero_si128(), v);
-}
-
-Simd4i operator-(const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_sub_epi32(v0, v1);
-}
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// function implementations
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
-Simd4i simd4i(const Simd4f& v)
-{
- return _mm_castps_si128(v);
-}
-
-Simd4i truncate(const Simd4f& v)
-{
- return _mm_cvttps_epi32(v);
-}
-
-int (&array(Simd4i& v))[4]
-{
- return reinterpret_cast<int(&)[4]>(v);
-}
-
-const int (&array(const Simd4i& v))[4]
-{
- return reinterpret_cast<const int(&)[4]>(v);
-}
-
-void store(int* ptr, const Simd4i& v)
-{
- _mm_storeu_si128(reinterpret_cast<__m128i*>(ptr), v);
-}
-
-void storeAligned(int* ptr, const Simd4i& v)
-{
- _mm_store_si128(reinterpret_cast<__m128i*>(ptr), v);
-}
-
-void storeAligned(int* ptr, unsigned int offset, const Simd4i& v)
-{
- _mm_store_si128(reinterpret_cast<__m128i*>(reinterpret_cast<char*>(ptr) + offset), v);
-}
-
-template <size_t i>
-Simd4i splat(const Simd4i& v)
-{
- return _mm_shuffle_epi32(v, _MM_SHUFFLE(i, i, i, i));
-}
-
-Simd4i select(const Simd4i& mask, const Simd4i& v0, const Simd4i& v1)
-{
- return _mm_xor_si128(v1, _mm_and_si128(mask, _mm_xor_si128(v1, v0)));
-}
-
-int allEqual(const Simd4i& v0, const Simd4i& v1)
-{
- return allTrue(operator==(v0, v1));
-}
-
-int allEqual(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask)
-{
- return allTrue(outMask = operator==(v0, v1));
-}
-
-int anyEqual(const Simd4i& v0, const Simd4i& v1)
-{
- return anyTrue(operator==(v0, v1));
-}
-
-int anyEqual(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask)
-{
- return anyTrue(outMask = operator==(v0, v1));
-}
-
-int allGreater(const Simd4i& v0, const Simd4i& v1)
-{
- return allTrue(operator>(v0, v1));
-}
-
-int allGreater(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask)
-{
- return allTrue(outMask = operator>(v0, v1));
-}
-
-int anyGreater(const Simd4i& v0, const Simd4i& v1)
-{
- return anyTrue(operator>(v0, v1));
-}
-
-int anyGreater(const Simd4i& v0, const Simd4i& v1, Simd4i& outMask)
-{
- return anyTrue(outMask = operator>(v0, v1));
-}
-
-int allTrue(const Simd4i& v)
-{
- return _mm_movemask_ps(_mm_castsi128_ps(v)) == 0xf;
-}
-
-int anyTrue(const Simd4i& v)
-{
- return _mm_movemask_ps(_mm_castsi128_ps(v));
-}
-
-NV_SIMD_NAMESPACE_END
diff --git a/PxShared/src/NvSimd/include/sse2/NvSse2SimdTypes.h b/PxShared/src/NvSimd/include/sse2/NvSse2SimdTypes.h
deleted file mode 100644
index 353f17ae..00000000
--- a/PxShared/src/NvSimd/include/sse2/NvSse2SimdTypes.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
-// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
-
-#pragma once
-
-// SSE + SSE2 (don't include intrin.h!)
-#include <emmintrin.h>
-
-#if defined _MSC_VER && !(defined NV_SIMD_USE_NAMESPACE && NV_SIMD_USE_NAMESPACE)
-
-// SIMD libarary lives in global namespace and Simd4f is
-// typedef'd to __m128 so it can be passed by value on MSVC.
-
-typedef __m128 Simd4f;
-typedef __m128i Simd4i;
-
-#else
-
-NV_SIMD_NAMESPACE_BEGIN
-
-/** \brief SIMD type containing 4 floats */
-struct Simd4f
-{
- Simd4f()
- {
- }
- Simd4f(__m128 x) : m128(x)
- {
- }
-
- operator __m128&()
- {
- return m128;
- }
- operator const __m128&() const
- {
- return m128;
- }
-
- private:
- __m128 m128;
-};
-
-/** \brief SIMD type containing 4 integers */
-struct Simd4i
-{
- Simd4i()
- {
- }
- Simd4i(__m128i x) : m128i(x)
- {
- }
-
- operator __m128i&()
- {
- return m128i;
- }
- operator const __m128i&() const
- {
- return m128i;
- }
-
- private:
- __m128i m128i;
-};
-
-NV_SIMD_NAMESPACE_END
-
-#endif