aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/SceneQuery/src/SqPrunerTestsSIMD.h
blob: 9cdeb277bb3756a47eb4bcf6cd52ee1d51ceb55b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
// This code contains NVIDIA Confidential Information and is disclosed to you
// under a form of NVIDIA software license agreement provided separately to you.
//
// Notice
// NVIDIA Corporation and its licensors retain all intellectual property and
// proprietary rights in and to this software and related documentation and
// any modifications thereto. Any use, reproduction, disclosure, or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA Corporation is strictly prohibited.
//
// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
//
// Information and code furnished is believed to be accurate and reliable.
// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
// information or for any infringement of patents or other rights of third parties that may
// result from its use. No license is granted by implication or otherwise under any patent
// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
// This code supersedes and replaces all information previously supplied.
// NVIDIA Corporation products are not authorized for use as critical
// components in life support devices or systems without express written approval of
// NVIDIA Corporation.
//
// Copyright (c) 2008-2018 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  

#ifndef GU_RAWQUERY_TESTS_SIMD_H
#define GU_RAWQUERY_TESTS_SIMD_H

#include "foundation/PxTransform.h"
#include "foundation/PxBounds3.h"
#include "CmPhysXCommon.h"
#include "PxBoxGeometry.h"
#include "PxSphereGeometry.h"
#include "PxCapsuleGeometry.h"
#include "PsVecMath.h"

namespace physx
{
namespace Gu
{

struct RayAABBTest
{
	PX_FORCE_INLINE RayAABBTest(const PxVec3& origin_, const PxVec3& unitDir_, const PxReal maxDist, const PxVec3& inflation_)
	: mOrigin(V3LoadU(origin_))
	, mDir(V3LoadU(unitDir_))
	, mDirYZX(V3PermYZX(mDir))
	, mInflation(V3LoadU(inflation_))
	, mAbsDir(V3Abs(mDir))
	, mAbsDirYZX(V3PermYZX(mAbsDir))
	{
		const PxVec3 ext = maxDist >= PX_MAX_F32 ? PxVec3(	unitDir_.x == 0 ? origin_.x : PxSign(unitDir_.x)*PX_MAX_F32,
															unitDir_.y == 0 ? origin_.y : PxSign(unitDir_.y)*PX_MAX_F32,
															unitDir_.z == 0 ? origin_.z : PxSign(unitDir_.z)*PX_MAX_F32)
										   : origin_ + unitDir_ * maxDist;
		mRayMin = V3Min(mOrigin, V3LoadU(ext));
		mRayMax = V3Max(mOrigin, V3LoadU(ext));
	}

	PX_FORCE_INLINE void setDistance(PxReal distance)
	{
		const Vec3V ext = V3ScaleAdd(mDir, FLoad(distance), mOrigin);
		mRayMin = V3Min(mOrigin, ext);
		mRayMax = V3Max(mOrigin, ext);
	}

	template<bool TInflate>
	PX_FORCE_INLINE PxU32 check(const Vec3V center, const Vec3V extents) const
	{
		const Vec3V iExt = TInflate ? V3Add(extents, mInflation) : extents;

		// coordinate axes
		const Vec3V nodeMax = V3Add(center, iExt);
		const Vec3V nodeMin = V3Sub(center, iExt);

		// cross axes
		const Vec3V offset = V3Sub(mOrigin, center);
		const Vec3V offsetYZX = V3PermYZX(offset);
		const Vec3V iExtYZX = V3PermYZX(iExt);
	
		const Vec3V f = V3NegMulSub(mDirYZX, offset, V3Mul(mDir, offsetYZX));
		const Vec3V g = V3MulAdd(iExt, mAbsDirYZX, V3Mul(iExtYZX, mAbsDir));

		const BoolV
			maskA = V3IsGrtrOrEq(nodeMax, mRayMin),
			maskB = V3IsGrtrOrEq(mRayMax, nodeMin),
			maskC = V3IsGrtrOrEq(g, V3Abs(f));
		const BoolV andABCMasks = BAnd(BAnd(maskA, maskB), maskC);

		return BAllEqTTTT(andABCMasks);
	}

	const Vec3V mOrigin, mDir, mDirYZX, mInflation, mAbsDir, mAbsDirYZX;
	Vec3V mRayMin, mRayMax;
protected:
	RayAABBTest& operator=(const RayAABBTest&);
};

// probably not worth having a SIMD version of this unless the traversal passes Vec3Vs
struct AABBAABBTest
{
	PX_FORCE_INLINE AABBAABBTest(const PxTransform&t, const PxBoxGeometry&b)
	: mCenter(V3LoadU(t.p))
	, mExtents(V3LoadU(b.halfExtents))
	{ }

	PX_FORCE_INLINE AABBAABBTest(const PxBounds3& b)
	: mCenter(V3LoadU(b.getCenter()))
	, mExtents(V3LoadU(b.getExtents()))
	{ }

	PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const		
	{	
		//PxVec3 c; PxVec3_From_Vec3V(center, c);
		//PxVec3 e; PxVec3_From_Vec3V(extents, e);
		//if(PxAbs(c.x - mCenter.x) > mExtents.x + e.x) return Ps::IntFalse;
		//if(PxAbs(c.y - mCenter.y) > mExtents.y + e.y) return Ps::IntFalse;
		//if(PxAbs(c.z - mCenter.z) > mExtents.z + e.z) return Ps::IntFalse;
		//return Ps::IntTrue;
		return Ps::IntBool(V3AllGrtrOrEq(V3Add(mExtents, extents), V3Abs(V3Sub(center, mCenter))));
	}

private:
	AABBAABBTest& operator=(const AABBAABBTest&);
	const Vec3V mCenter, mExtents;
};

struct SphereAABBTest
{
	PX_FORCE_INLINE SphereAABBTest(const PxTransform& t, const PxSphereGeometry& s)
	: mCenter(V3LoadU(t.p))
	, mRadius2(FLoad(s.radius * s.radius))
	{}

	PX_FORCE_INLINE SphereAABBTest(const PxVec3& center, PxF32 radius)
	: mCenter(V3LoadU(center))
	, mRadius2(FLoad(radius * radius))
	{}
	
	PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V boxCenter, const Vec3V boxExtents) const		
	{	
		const Vec3V offset = V3Sub(mCenter, boxCenter);
		const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents);
		const Vec3V d = V3Sub(offset, closest);
		return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d))));
	}

private:
	SphereAABBTest& operator=(const SphereAABBTest&);
	const Vec3V mCenter;
	const FloatV mRadius2;
};

// The Opcode capsule-AABB traversal test seems to be *exactly* the same as the ray-box test inflated by the capsule radius (so not a true capsule/box test)
// and the code for the ray-box test is better. TODO: check the zero length case and use the sphere traversal if this one fails.
// (OTOH it's not that hard to adapt the Ray-AABB test to a capsule test)

struct CapsuleAABBTest: private RayAABBTest
{
	PX_FORCE_INLINE CapsuleAABBTest(const PxVec3& origin, const PxVec3& unitDir, const PxReal length, const PxVec3& inflation)
		: RayAABBTest(origin, unitDir, length, inflation)
	{}

	PX_FORCE_INLINE Ps::IntBool operator()(const Vec3VArg center, const Vec3VArg extents) const
	{
		return Ps::IntBool(RayAABBTest::check<true>(center, extents));
	}
};

template<bool fullTest>
struct OBBAABBTests
{
	OBBAABBTests(const PxVec3& pos, const PxMat33& rot, const PxVec3& halfExtentsInflated)
	{
		const Vec3V eps = V3Load(1e-6f);

		mT = V3LoadU(pos);
		mExtents = V3LoadU(halfExtentsInflated);

		// storing the transpose matrices yields a simpler SIMD test
		mRT = Mat33V_From_PxMat33(rot.getTranspose());
		mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps));
		mBB_xyz = M33TrnspsMulV3(mART, mExtents);

		if(fullTest)
		{
			const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents);

			mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0)));
			mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1)));
			mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2)));
		}
	}

	// TODO: force inline it?
	Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const
	{	
		const Vec3V t = V3Sub(mT, center);

		// class I - axes of AABB
		if(V3OutOfBounds(t, V3Add(extents, mBB_xyz)))
			return Ps::IntFalse;

		const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2;
		const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2;

		const FloatV eX = V3GetX(extents), eY = V3GetY(extents), eZ = V3GetZ(extents);
		const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t);

		// class II - axes of OBB
		{
			const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX)));
			const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents)));
			if(V3OutOfBounds(v, v2))
				return Ps::IntFalse;
		}

		if(!fullTest)
			return Ps::IntTrue;

		// class III - edge cross products. Almost all OBB tests early-out with type I or type II,
		// so early-outs here probably aren't useful (TODO: profile)

		const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ));
		const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123));
		const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va));
	
		const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX));
		const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456));
		const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb));
		
		const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY));
		const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789));
		const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc));

		return Ps::IntBool(BAllEqFFFF(BOr(ba, BOr(bb,bc))));
	}

	Vec3V		mExtents;	// extents of OBB
	Vec3V		mT;			// translation of OBB
	Mat33V		mRT;		// transpose of rotation matrix of OBB
	Mat33V		mART;		// transpose of mRT, padded by epsilon

	Vec3V		mBB_xyz;	// extents of OBB along coordinate axes
	Vec3V		mBB_123;	// projections of extents onto edge-cross axes
	Vec3V		mBB_456;
	Vec3V		mBB_789;
};

typedef OBBAABBTests<true> OBBAABBTest;

}
}
#endif