diff options
Diffstat (limited to 'PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h')
| -rw-r--r-- | PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h | 258 |
1 files changed, 258 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h b/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h new file mode 100644 index 00000000..9ded6d26 --- /dev/null +++ b/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h @@ -0,0 +1,258 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef GU_RAWQUERY_TESTS_SIMD_H +#define GU_RAWQUERY_TESTS_SIMD_H + +#include "foundation/PxTransform.h" +#include "foundation/PxBounds3.h" +#include "CmPhysXCommon.h" +#include "PxBoxGeometry.h" +#include "PxSphereGeometry.h" +#include "PxCapsuleGeometry.h" +#include "PsVecMath.h" + +namespace physx +{ +namespace Gu +{ + +struct RayAABBTest +{ + PX_FORCE_INLINE RayAABBTest(const PxVec3& origin_, const PxVec3& unitDir_, const PxReal maxDist, const PxVec3& inflation_) + : mOrigin(V3LoadU(origin_)) + , mDir(V3LoadU(unitDir_)) + , mDirYZX(V3PermYZX(mDir)) + , mInflation(V3LoadU(inflation_)) + , mAbsDir(V3Abs(mDir)) + , mAbsDirYZX(V3PermYZX(mAbsDir)) + { + const PxVec3 ext = maxDist >= PX_MAX_F32 ? PxVec3( unitDir_.x == 0 ? origin_.x : PxSign(unitDir_.x)*PX_MAX_F32, + unitDir_.y == 0 ? origin_.y : PxSign(unitDir_.y)*PX_MAX_F32, + unitDir_.z == 0 ? origin_.z : PxSign(unitDir_.z)*PX_MAX_F32) + : origin_ + unitDir_ * maxDist; + mRayMin = V3Min(mOrigin, V3LoadU(ext)); + mRayMax = V3Max(mOrigin, V3LoadU(ext)); + } + + PX_FORCE_INLINE void setDistance(PxReal distance) + { + const Vec3V ext = V3ScaleAdd(mDir, FLoad(distance), mOrigin); + mRayMin = V3Min(mOrigin, ext); + mRayMax = V3Max(mOrigin, ext); + } + + template<bool TInflate> + PX_FORCE_INLINE PxU32 check(const Vec3V center, const Vec3V extents) const + { + const Vec3V iExt = TInflate ? V3Add(extents, mInflation) : extents; + + // coordinate axes + const Vec3V nodeMax = V3Add(center, iExt); + const Vec3V nodeMin = V3Sub(center, iExt); + + // cross axes + const Vec3V offset = V3Sub(mOrigin, center); + const Vec3V offsetYZX = V3PermYZX(offset); + const Vec3V iExtYZX = V3PermYZX(iExt); + + const Vec3V f = V3NegMulSub(mDirYZX, offset, V3Mul(mDir, offsetYZX)); + const Vec3V g = V3MulAdd(iExt, mAbsDirYZX, V3Mul(iExtYZX, mAbsDir)); + + const BoolV + maskA = V3IsGrtrOrEq(nodeMax, mRayMin), + maskB = V3IsGrtrOrEq(mRayMax, nodeMin), + maskC = V3IsGrtrOrEq(g, V3Abs(f)); + const BoolV andABCMasks = BAnd(BAnd(maskA, maskB), maskC); + + return BAllEqTTTT(andABCMasks); + } + + const Vec3V mOrigin, mDir, mDirYZX, mInflation, mAbsDir, mAbsDirYZX; + Vec3V mRayMin, mRayMax; +protected: + RayAABBTest& operator=(const RayAABBTest&); +}; + +// probably not worth having a SIMD version of this unless the traversal passes Vec3Vs +struct AABBAABBTest +{ + PX_FORCE_INLINE AABBAABBTest(const PxTransform&t, const PxBoxGeometry&b) + : mCenter(V3LoadU(t.p)) + , mExtents(V3LoadU(b.halfExtents)) + { } + + PX_FORCE_INLINE AABBAABBTest(const PxBounds3& b) + : mCenter(V3LoadU(b.getCenter())) + , mExtents(V3LoadU(b.getExtents())) + { } + + PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const + { + //PxVec3 c; PxVec3_From_Vec3V(center, c); + //PxVec3 e; PxVec3_From_Vec3V(extents, e); + //if(PxAbs(c.x - mCenter.x) > mExtents.x + e.x) return Ps::IntFalse; + //if(PxAbs(c.y - mCenter.y) > mExtents.y + e.y) return Ps::IntFalse; + //if(PxAbs(c.z - mCenter.z) > mExtents.z + e.z) return Ps::IntFalse; + //return Ps::IntTrue; + return Ps::IntBool(V3AllGrtrOrEq(V3Add(mExtents, extents), V3Abs(V3Sub(center, mCenter)))); + } + +private: + AABBAABBTest& operator=(const AABBAABBTest&); + const Vec3V mCenter, mExtents; +}; + +struct SphereAABBTest +{ + PX_FORCE_INLINE SphereAABBTest(const PxTransform& t, const PxSphereGeometry& s) + : mCenter(V3LoadU(t.p)) + , mRadius2(FLoad(s.radius * s.radius)) + {} + + PX_FORCE_INLINE SphereAABBTest(const PxVec3& center, PxF32 radius) + : mCenter(V3LoadU(center)) + , mRadius2(FLoad(radius * radius)) + {} + + PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V boxCenter, const Vec3V boxExtents) const + { + const Vec3V offset = V3Sub(mCenter, boxCenter); + const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents); + const Vec3V d = V3Sub(offset, closest); + return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d)))); + } + +private: + SphereAABBTest& operator=(const SphereAABBTest&); + const Vec3V mCenter; + const FloatV mRadius2; +}; + +// The Opcode capsule-AABB traversal test seems to be *exactly* the same as the ray-box test inflated by the capsule radius (so not a true capsule/box test) +// and the code for the ray-box test is better. TODO: check the zero length case and use the sphere traversal if this one fails. +// (OTOH it's not that hard to adapt the Ray-AABB test to a capsule test) + +struct CapsuleAABBTest: private RayAABBTest +{ + PX_FORCE_INLINE CapsuleAABBTest(const PxVec3& origin, const PxVec3& unitDir, const PxReal length, const PxVec3& inflation) + : RayAABBTest(origin, unitDir, length, inflation) + {} + + PX_FORCE_INLINE Ps::IntBool operator()(const Vec3VArg center, const Vec3VArg extents) const + { + return Ps::IntBool(RayAABBTest::check<true>(center, extents)); + } +}; + +template<bool fullTest> +struct OBBAABBTests +{ + OBBAABBTests(const PxVec3& pos, const PxMat33& rot, const PxVec3& halfExtentsInflated) + { + const Vec3V eps = V3Load(1e-6f); + + mT = V3LoadU(pos); + mExtents = V3LoadU(halfExtentsInflated); + + // storing the transpose matrices yields a simpler SIMD test + mRT = Mat33V_From_PxMat33(rot.getTranspose()); + mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps)); + mBB_xyz = M33TrnspsMulV3(mART, mExtents); + + if(fullTest) + { + const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents); + + mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0))); + mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1))); + mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2))); + } + } + + // TODO: force inline it? + Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const + { + const Vec3V t = V3Sub(mT, center); + + // class I - axes of AABB + if(V3OutOfBounds(t, V3Add(extents, mBB_xyz))) + return Ps::IntFalse; + + const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2; + const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2; + + const FloatV eX = V3GetX(extents), eY = V3GetY(extents), eZ = V3GetZ(extents); + const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t); + + // class II - axes of OBB + { + const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX))); + const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents))); + if(V3OutOfBounds(v, v2)) + return Ps::IntFalse; + } + + if(!fullTest) + return Ps::IntTrue; + + // class III - edge cross products. Almost all OBB tests early-out with type I or type II, + // so early-outs here probably aren't useful (TODO: profile) + + const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ)); + const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123)); + const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va)); + + const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX)); + const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456)); + const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb)); + + const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY)); + const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789)); + const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc)); + + return Ps::IntBool(BAllEqFFFF(BOr(ba, BOr(bb,bc)))); + } + + Vec3V mExtents; // extents of OBB + Vec3V mT; // translation of OBB + Mat33V mRT; // transpose of rotation matrix of OBB + Mat33V mART; // transpose of mRT, padded by epsilon + + Vec3V mBB_xyz; // extents of OBB along coordinate axes + Vec3V mBB_123; // projections of extents onto edge-cross axes + Vec3V mBB_456; + Vec3V mBB_789; +}; + +typedef OBBAABBTests<true> OBBAABBTest; + +} +} +#endif |