diff options
Diffstat (limited to 'APEX_1.4/common/include/ApexSimdMath.h')
| -rw-r--r-- | APEX_1.4/common/include/ApexSimdMath.h | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/APEX_1.4/common/include/ApexSimdMath.h b/APEX_1.4/common/include/ApexSimdMath.h new file mode 100644 index 00000000..9c807046 --- /dev/null +++ b/APEX_1.4/common/include/ApexSimdMath.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + +#ifndef APEX_SIMD_MATH_H +#define APEX_SIMD_MATH_H + +#include "PxMat44.h" +#include "PxVec3.h" +#include "PsMathUtils.h" +#include "NvSimd4f.h" + +namespace nvidia +{ + /** Normalization of the (a[0], a[1], a[2]) vector + * @param a input vector + * @return normalized vector + */ + inline Simd4f normalizeSimd3f(const Simd4f& a) + { + return a * rsqrt(dot3(a, a)); + } + + /** Create simd 4-float tuple from vec3 and wComponent + * @param vec3 vector with 3 components + * @param wComponent with this value final element will be initialized + * @return filled simd 4-float tuple + */ + inline Simd4f createSimd3f(const physx::PxVec3& vec3, float wComponent = 0.0f) + { + return Simd4fLoad3SetWFactory(&vec3.x, wComponent); + } + + /** Apply affine transform to position. Algorithm is not sensitive to pos.w. + * @param transformAlignMemLayout transform + * @param pos input position. + * @return transformed position. With pos.w setuped to one. + */ + inline Simd4f applyAffineTransform(const physx::PxMat44& transformAlignMemLayout, const Simd4f& pos) + { + const physx::PxMat44& tr = transformAlignMemLayout; + + const Simd4f& col0 = Simd4fAlignedLoadFactory(&tr.column0.x); + const Simd4f xMultiplier = splat<0>(pos); + + const Simd4f& col1 = Simd4fAlignedLoadFactory(&tr.column1.x); + const Simd4f yMultiplier = splat<1>(pos); + + const Simd4f& col2 = Simd4fAlignedLoadFactory(&tr.column2.x); + const Simd4f zMultiplier = splat<2>(pos); + + Simd4f result = xMultiplier * col0 + yMultiplier * col1 + zMultiplier * col2 + Simd4fAlignedLoadFactory(&tr.column3.x); + + array(result)[3] = 1.0f; + + return result; + } + + /** Apply linear transform to position or more probability to the vector(direction) + * @param transformAlignMemLayout transform + * @param pos input position. Algo does not sensitive to pos.w. + * @return transformed position. With pos.w setuped to zero. + */ + inline Simd4f applyLinearTransform(const physx::PxMat44& transformAlignMemLayout, const Simd4f& direction) + { + const physx::PxMat44& tr = transformAlignMemLayout; + const Simd4f& col0 = Simd4fAlignedLoadFactory(&tr.column0.x); + const Simd4f xMultiplier = splat<0>(direction); + const Simd4f& col1 = Simd4fAlignedLoadFactory(&tr.column1.x); + const Simd4f yMultiplier = splat<1>(direction); + const Simd4f& col2 = Simd4fAlignedLoadFactory(&tr.column2.x); + const Simd4f zMultiplier = splat<2>(direction); + Simd4f result = xMultiplier * col0 + yMultiplier * col1 + zMultiplier * col2; + result = result & gSimd4fMaskXYZ; + + return result; + } + + /** Apply transpose of matrix consisted of col0, col1, col2, col3. + * Ported version of V4Transpose() from PxShared\*\foundation\include\PsVecMathAoSScalarInline.h + * @param col0 input column of the matrix, and output column of the result matrix + * @param col1 input column of the matrix, and output column of the result matrix + * @param col2 input column of the matrix, and output column of the result matrix + * @param col3 input column of the matrix, and output column of the result matrix + * @return None + */ + inline void applyTranspose(Simd4f& col0, Simd4f& col1, Simd4f& col2, Simd4f& col3) + { + /* + col0 col1 col2 col3 + 0 col0[0] col0[1] col0[1] col0[1] + 1 col0[1] col0[1] col0[1] col0[1] + 2 col0[2] col0[1] col0[1] col0[1] + 3 col0[3] col0[1] col0[1] col0[1] + */ + + float* arrayCol0 = array(col0); + float* arrayCol1 = array(col1); + float* arrayCol2 = array(col2); + float* arrayCol3 = array(col3); + + using physx::PxF32; + const PxF32 t01 = arrayCol0[1]; + const PxF32 t02 = arrayCol0[2]; + const PxF32 t03 = arrayCol0[3]; + const PxF32 t12 = arrayCol1[2]; + const PxF32 t13 = arrayCol1[3]; + const PxF32 t23 = arrayCol2[3]; + + // x -- 0, y -- 1, z -- 2, w -- 3 + arrayCol0[1] = arrayCol1[0]; arrayCol0[2] = arrayCol2[0]; arrayCol0[3] = arrayCol3[0]; + arrayCol1[2] = arrayCol2[1]; arrayCol1[3] = arrayCol3[1]; + arrayCol2[3] = arrayCol3[2]; + + arrayCol1[0] = t01; arrayCol2[0] = t02; arrayCol3[0] = t03; + arrayCol2[1] = t12; arrayCol3[1] = t13; + arrayCol3[2] = t23; + } +} + +#endif // APEX_SIND_MATH_H |