aboutsummaryrefslogtreecommitdiff
path: root/APEX_1.4/module/clothing/embedded/LowLevelCloth/src/Simd4f.h
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/module/clothing/embedded/LowLevelCloth/src/Simd4f.h
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'APEX_1.4/module/clothing/embedded/LowLevelCloth/src/Simd4f.h')
-rw-r--r--APEX_1.4/module/clothing/embedded/LowLevelCloth/src/Simd4f.h478
1 files changed, 478 insertions, 0 deletions
diff --git a/APEX_1.4/module/clothing/embedded/LowLevelCloth/src/Simd4f.h b/APEX_1.4/module/clothing/embedded/LowLevelCloth/src/Simd4f.h
new file mode 100644
index 00000000..8755a010
--- /dev/null
+++ b/APEX_1.4/module/clothing/embedded/LowLevelCloth/src/Simd4f.h
@@ -0,0 +1,478 @@
+/*
+ * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto. Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#pragma once
+
+#include "SimdTypes.h"
+
+#if NVMATH_FUSE_MULTIPLY_ADD
+
+/*! \brief Expression template to fuse multiply-adds.
+ * \relates Simd4f */
+struct ProductExpr
+{
+ inline ProductExpr(Simd4f const& v0_, Simd4f const& v1_) : v0(v0_), v1(v1_)
+ {
+ }
+ inline operator Simd4f() const;
+ const Simd4f v0, v1;
+
+ private:
+ ProductExpr& operator=(const ProductExpr&); // not implemented
+};
+
+inline Simd4f operator+(const ProductExpr&, const Simd4f&);
+inline Simd4f operator+(const Simd4f& v, const ProductExpr&);
+inline Simd4f operator+(const ProductExpr&, const ProductExpr&);
+inline Simd4f operator-(const Simd4f& v, const ProductExpr&);
+inline Simd4f operator-(const ProductExpr&, const ProductExpr&);
+
+#else // NVMATH_FUSE_MULTIPLY_ADD
+typedef Simd4f ProductExpr;
+#endif // NVMATH_FUSE_MULTIPLY_ADD
+
+template <typename T>
+struct Simd4fFactory
+{
+ Simd4fFactory(T v_) : v(v_)
+ {
+ }
+ inline operator Simd4f() const;
+ inline operator Scalar4f() const;
+ Simd4fFactory& operator=(const Simd4fFactory&); // not implemented
+ T v;
+};
+
+template <>
+struct Simd4fFactory<detail::FourTuple>
+{
+ Simd4fFactory(float x, float y, float z, float w)
+ {
+ v[0] = x, v[1] = y, v[2] = z, v[3] = w;
+ }
+ Simd4fFactory(const Simd4fFactory<const float&>& f)
+ {
+ v[3] = v[2] = v[1] = v[0] = f.v;
+ }
+ inline operator Simd4f() const;
+ inline operator Scalar4f() const;
+ Simd4fFactory& operator=(const Simd4fFactory&); // not implemented
+ PX_ALIGN(16, float) v[4];
+};
+
+template <int i>
+struct Simd4fFactory<detail::IntType<i> >
+{
+ inline operator Simd4f() const;
+ inline operator Scalar4f() const;
+};
+
+// forward declaration
+template <typename>
+struct Simd4iFactory;
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// expression template
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if NVMATH_SIMD
+inline Simd4f operator&(const ComplementExpr<Simd4f>&, const Simd4f&);
+inline Simd4f operator&(const Simd4f&, const ComplementExpr<Simd4f>&);
+#endif
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// operators
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+// note: operator?= missing because they don't have corresponding intrinsics.
+
+/*! \brief Test for equality of two vectors.
+* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
+* \note QNaPs aren't handled on SPU: comparing two QNaPs will return true.
+* \relates Simd4f */
+inline Simd4f operator==(const Simd4f& v0, const Simd4f& v1);
+
+// no operator!= because VMX128 does not support it, use ~operator== and handle QNaPs
+
+/*! \brief Less-compare all elements of two vectors.
+* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline Simd4f operator<(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Less-or-equal-compare all elements of two vectors.
+* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline Simd4f operator<=(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Greater-compare all elements of two vectors.
+* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline Simd4f operator>(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Greater-or-equal-compare all elements of two vectors.
+* \return Vector of per element result mask (all bits set for 'true', none set for 'false').
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline Simd4f operator>=(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Vector bit-wise NOT operator
+* \return A vector holding the bit-negate of \a v.
+* \relates Simd4f */
+inline ComplementExpr<Simd4f> operator~(const Simd4f& v);
+
+/*! \brief Vector bit-wise AND operator
+* \return A vector holding the bit-wise AND of \a v0 and \a v1.
+* \relates Simd4f */
+inline Simd4f operator&(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Vector bit-wise OR operator
+* \return A vector holding the bit-wise OR of \a v0 and \a v1.
+* \relates Simd4f */
+inline Simd4f operator|(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Vector bit-wise XOR operator
+* \return A vector holding the bit-wise XOR of \a v0 and \a v1.
+* \relates Simd4f */
+inline Simd4f operator^(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Vector logical left shift.
+* \return A vector with 4 elements of \a v0, each shifted left by \a shift bits.
+* \relates Simd4f */
+inline Simd4f operator<<(const Simd4f& v, int shift);
+
+/*! \brief Vector logical right shift.
+* \return A vector with 4 elements of \a v0, each shifted right by \a shift bits.
+* \relates Simd4f */
+inline Simd4f operator>>(const Simd4f& v, int shift);
+
+#if NVMATH_SHIFT_BY_VECTOR
+/*! \brief Vector logical left shift.
+* \return A vector with 4 elements of \a v0, each shifted left by \a shift bits.
+* \relates Simd4f */
+inline Simd4f operator<<(const Simd4f& v, const Simd4f& shift);
+
+/*! \brief Vector logical right shift.
+* \return A vector with 4 elements of \a v0, each shifted right by \a shift bits.
+* \relates Simd4f */
+inline Simd4f operator>>(const Simd4f& v, const Simd4f& shift);
+#endif
+
+/*! \brief Unary vector addition operator.
+* \return A vector holding the component-wise copy of \a v.
+* \relates Simd4f */
+inline Simd4f operator+(const Simd4f& v);
+
+/*! \brief Vector addition operator
+* \return A vector holding the component-wise sum of \a v0 and \a v1.
+* \relates Simd4f */
+inline Simd4f operator+(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Unary vector negation operator.
+* \return A vector holding the component-wise negation of \a v.
+* \relates Simd4f */
+inline Simd4f operator-(const Simd4f& v);
+
+/*! \brief Vector subtraction operator.
+* \return A vector holding the component-wise difference of \a v0 and \a v1.
+* \relates Simd4f */
+inline Simd4f operator-(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Vector multiplication.
+* \return Element-wise product of \a v0 and \a v1.
+* \note For VMX, returns expression template to fuse multiply-add.
+* \relates Simd4f */
+inline ProductExpr operator*(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Vector division.
+* \return Element-wise division of \a v0 and \a v1.
+* \relates Simd4f */
+inline Simd4f operator/(const Simd4f& v0, const Simd4f& v1);
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// functions
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+/*! \brief Load float value into all vector components.
+* \relates Simd4f */
+inline Simd4fFactory<const float&> simd4f(const float& s)
+{
+ return Simd4fFactory<const float&>(s);
+}
+
+/*! \brief Load 4 float values into vector.
+* \relates Simd4f */
+inline Simd4fFactory<detail::FourTuple> simd4f(float x, float y, float z, float w)
+{
+ return Simd4fFactory<detail::FourTuple>(x, y, z, w);
+}
+
+/*! \brief Create vector from literal.
+* \return Vector with all elements set to i.
+* \relates Simd4f */
+template <int i>
+inline Simd4fFactory<detail::IntType<i> > simd4f(detail::IntType<i> const&)
+{
+ return Simd4fFactory<detail::IntType<i> >();
+}
+
+/*! \brief Reinterpret Simd4i as Simd4f.
+* \return A copy of \a v, but cast as Simd4f.
+* \relates Simd4f */
+inline Simd4f simd4f(const Simd4i& v);
+
+/*! \brief Reinterpret Simd4iFactory as Simd4fFactory.
+* \relates Simd4f */
+template <typename T>
+inline Simd4fFactory<T> simd4f(const Simd4iFactory<T>& v)
+{
+ return reinterpret_cast<const Simd4fFactory<T>&>(v);
+}
+
+/*! \brief return reference to contiguous array of vector elements
+* \relates Simd4f */
+inline float (&array(Simd4f& v))[4];
+
+/*! \brief return constant reference to contiguous array of vector elements
+* \relates Simd4f */
+inline const float (&array(const Simd4f& v))[4];
+
+/*! \brief Create vector from float array.
+* \relates Simd4f */
+inline Simd4fFactory<const float*> load(const float* ptr)
+{
+ return ptr;
+}
+
+/*! \brief Create vector from aligned float array.
+* \note \a ptr needs to be 16 byte aligned.
+* \relates Simd4f */
+inline Simd4fFactory<detail::AlignedPointer<float> > loadAligned(const float* ptr)
+{
+ return detail::AlignedPointer<float>(ptr);
+}
+
+/*! \brief Create vector from aligned float array.
+* \param offset pointer offset in bytes.
+* \note \a ptr+offset needs to be 16 byte aligned.
+* \relates Simd4f */
+inline Simd4fFactory<detail::OffsetPointer<float> > loadAligned(const float* ptr, unsigned int offset)
+{
+ return detail::OffsetPointer<float>(ptr, offset);
+}
+
+/*! \brief Store vector \a v to float array \a ptr.
+* \relates Simd4f */
+inline void store(float* ptr, Simd4f const& v);
+
+/*! \brief Store vector \a v to aligned float array \a ptr.
+* \note \a ptr needs to be 16 byte aligned.
+* \relates Simd4f */
+inline void storeAligned(float* ptr, Simd4f const& v);
+
+/*! \brief Store vector \a v to aligned float array \a ptr.
+* \param offset pointer offset in bytes.
+* \note \a ptr+offset needs to be 16 byte aligned.
+* \relates Simd4f */
+inline void storeAligned(float* ptr, unsigned int offset, Simd4f const& v);
+
+/*! \brief replicate i-th component into all vector components.
+* \return Vector with all elements set to \a v[i].
+* \relates Simd4f */
+template <size_t i>
+inline Simd4f splat(Simd4f const& v);
+
+/*! \brief Select \a v0 or \a v1 based on \a mask.
+* \return mask ? v0 : v1
+* \relates Simd4f */
+inline Simd4f select(Simd4f const& mask, Simd4f const& v0, Simd4f const& v1);
+
+/*! \brief Per element absolute value.
+* \return Vector with absolute values of \a v.
+* \relates Simd4f */
+inline Simd4f abs(const Simd4f& v);
+
+/*! \brief Per element floor value.
+* \note Result undefined for QNaN elements.
+* \note On SSE and NEON, returns v-1 if v is negative integer value
+* \relates Simd4f */
+inline Simd4f floor(const Simd4f& v);
+
+/*! \brief Per-component minimum of two vectors
+* \note Result undefined for QNaN elements.
+* \relates Simd4f */
+inline Simd4f max(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Per-component minimum of two vectors
+* \note Result undefined for QNaN elements.
+* \relates Simd4f */
+inline Simd4f min(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Return reciprocal estimate of a vector.
+* \return Vector of per-element reciprocal estimate.
+* \relates Simd4f */
+inline Simd4f recip(const Simd4f& v);
+
+/*! \brief Return reciprocal of a vector.
+* \return Vector of per-element reciprocal.
+* \note Performs \a n Newton-Raphson iterations on initial estimate.
+* \relates Simd4f */
+template <int n>
+inline Simd4f recipT(const Simd4f& v);
+
+/*! \brief Return square root of a vector.
+* \return Vector of per-element square root.
+* \note The behavior is undefined for negative elements.
+* \relates Simd4f */
+inline Simd4f sqrt(const Simd4f& v);
+
+/*! \brief Return inverse square root estimate of a vector.
+* \return Vector of per-element inverse square root estimate.
+* \note The behavior is undefined for negative, zero, and infinity elements.
+* \relates Simd4f */
+inline Simd4f rsqrt(const Simd4f& v);
+
+/*! \brief Return inverse square root of a vector.
+* \return Vector of per-element inverse square root.
+* \note Performs \a n Newton-Raphson iterations on initial estimate.
+* \note The behavior is undefined for negative and infinity elements.
+* \relates Simd4f */
+template <int n>
+inline Simd4f rsqrtT(const Simd4f& v);
+
+/*! \brief Return 2 raised to the power of v.
+* \note Result undefined for QNaN elements.
+* \relates Simd4f */
+inline Simd4f exp2(const Simd4f& v);
+
+#if NVMATH_SIMD
+namespace simdf
+{
+// PSP2 is confused resolving about exp2, forwarding works
+inline Simd4f exp2(const Simd4f& v)
+{
+ return ::exp2(v);
+}
+}
+#endif
+
+/*! \brief Return logarithm of v to base 2.
+* \note Result undefined for QNaN elements.
+* \relates Simd4f */
+inline Simd4f log2(const Simd4f& v);
+
+/*! \brief Return dot product of two 3-vectors.
+* \note The result is replicated across all 4 components.
+* \relates Simd4f */
+inline Simd4f dot3(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Return cross product of two 3-vectors.
+* \note The 4th component is undefined.
+* \relates Simd4f */
+inline Simd4f cross3(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief Transposes 4x4 matrix represented by \a x, \a y, \a z, and \a w.
+* \relates Simd4f */
+inline void transpose(Simd4f& x, Simd4f& y, Simd4f& z, Simd4f& w);
+
+/*! \brief returns non-zero if all elements or \a v0 and \a v1 are equal
+* \note QNaPs aren't handled on SPU: comparing two QNaPs will return true.
+* \relates Simd4f */
+inline int allEqual(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief returns non-zero if all elements or \a v0 and \a v1 are equal
+* \param outMask holds the result of \a v0 == \a v1.
+* \note QNaPs aren't handled on SPU: comparing two QNaPs will return true.
+* \relates Simd4f */
+inline int allEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
+
+/*! \brief returns non-zero if any elements or \a v0 and \a v1 are equal
+* \note QNaPs aren't handled on SPU: comparing two QNaPs will return true.
+* \relates Simd4f */
+inline int anyEqual(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief returns non-zero if any elements or \a v0 and \a v1 are equal
+* \param outMask holds the result of \a v0 == \a v1.
+* \note QNaPs aren't handled on SPU: comparing two QNaPs will return true.
+* \relates Simd4f */
+inline int anyEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
+
+/*! \brief returns non-zero if all elements or \a v0 and \a v1 are greater
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline int allGreater(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief returns non-zero if all elements or \a v0 and \a v1 are greater
+* \param outMask holds the result of \a v0 == \a v1.
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline int allGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
+
+/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline int anyGreater(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater
+* \param outMask holds the result of \a v0 == \a v1.
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline int anyGreater(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
+
+/*! \brief returns non-zero if all elements or \a v0 and \a v1 are greater or equal
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline int allGreaterEqual(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief returns non-zero if all elements or \a v0 and \a v1 are greater or equal
+* \param outMask holds the result of \a v0 == \a v1.
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline int allGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
+
+/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater or equal
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1);
+
+/*! \brief returns non-zero if any elements or \a v0 and \a v1 are greater or equal
+* \param outMask holds the result of \a v0 == \a v1.
+* \note QNaPs aren't handled on SPU: comparisons against QNaPs don't necessarily return false.
+* \relates Simd4f */
+inline int anyGreaterEqual(const Simd4f& v0, const Simd4f& v1, Simd4f& outMask);
+
+/*! \brief returns non-zero if all elements are true
+* \note Undefined if parameter is not result of a comparison.
+* \relates Simd4f */
+inline int allTrue(const Simd4f& v);
+
+/*! \brief returns non-zero if any element is true
+* \note Undefined if parameter is not result of a comparison.
+* \relates Simd4f */
+inline int anyTrue(const Simd4f& v);
+
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+// platform specific includes
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+#if NVMATH_SSE2
+#include "sse2/Simd4f.h"
+#elif NVMATH_NEON
+#include "neon/Simd4f.h"
+#endif
+
+#if NVMATH_SCALAR
+#include "scalar/Simd4f.h"
+#endif