1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
|
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Copyright (c) 2008-2018 NVIDIA Corporation. All rights reserved.
// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
#ifndef GU_RAWQUERY_TESTS_SIMD_H
#define GU_RAWQUERY_TESTS_SIMD_H
#include "foundation/PxTransform.h"
#include "foundation/PxBounds3.h"
#include "CmPhysXCommon.h"
#include "PxBoxGeometry.h"
#include "PxSphereGeometry.h"
#include "PxCapsuleGeometry.h"
#include "PsVecMath.h"
namespace physx
{
namespace Gu
{
struct RayAABBTest
{
PX_FORCE_INLINE RayAABBTest(const PxVec3& origin_, const PxVec3& unitDir_, const PxReal maxDist, const PxVec3& inflation_)
: mOrigin(V3LoadU(origin_))
, mDir(V3LoadU(unitDir_))
, mDirYZX(V3PermYZX(mDir))
, mInflation(V3LoadU(inflation_))
, mAbsDir(V3Abs(mDir))
, mAbsDirYZX(V3PermYZX(mAbsDir))
{
const PxVec3 ext = maxDist >= PX_MAX_F32 ? PxVec3( unitDir_.x == 0 ? origin_.x : PxSign(unitDir_.x)*PX_MAX_F32,
unitDir_.y == 0 ? origin_.y : PxSign(unitDir_.y)*PX_MAX_F32,
unitDir_.z == 0 ? origin_.z : PxSign(unitDir_.z)*PX_MAX_F32)
: origin_ + unitDir_ * maxDist;
mRayMin = V3Min(mOrigin, V3LoadU(ext));
mRayMax = V3Max(mOrigin, V3LoadU(ext));
}
PX_FORCE_INLINE void setDistance(PxReal distance)
{
const Vec3V ext = V3ScaleAdd(mDir, FLoad(distance), mOrigin);
mRayMin = V3Min(mOrigin, ext);
mRayMax = V3Max(mOrigin, ext);
}
template<bool TInflate>
PX_FORCE_INLINE PxU32 check(const Vec3V center, const Vec3V extents) const
{
const Vec3V iExt = TInflate ? V3Add(extents, mInflation) : extents;
// coordinate axes
const Vec3V nodeMax = V3Add(center, iExt);
const Vec3V nodeMin = V3Sub(center, iExt);
// cross axes
const Vec3V offset = V3Sub(mOrigin, center);
const Vec3V offsetYZX = V3PermYZX(offset);
const Vec3V iExtYZX = V3PermYZX(iExt);
const Vec3V f = V3NegMulSub(mDirYZX, offset, V3Mul(mDir, offsetYZX));
const Vec3V g = V3MulAdd(iExt, mAbsDirYZX, V3Mul(iExtYZX, mAbsDir));
const BoolV
maskA = V3IsGrtrOrEq(nodeMax, mRayMin),
maskB = V3IsGrtrOrEq(mRayMax, nodeMin),
maskC = V3IsGrtrOrEq(g, V3Abs(f));
const BoolV andABCMasks = BAnd(BAnd(maskA, maskB), maskC);
return BAllEqTTTT(andABCMasks);
}
const Vec3V mOrigin, mDir, mDirYZX, mInflation, mAbsDir, mAbsDirYZX;
Vec3V mRayMin, mRayMax;
protected:
RayAABBTest& operator=(const RayAABBTest&);
};
// probably not worth having a SIMD version of this unless the traversal passes Vec3Vs
struct AABBAABBTest
{
PX_FORCE_INLINE AABBAABBTest(const PxTransform&t, const PxBoxGeometry&b)
: mCenter(V3LoadU(t.p))
, mExtents(V3LoadU(b.halfExtents))
{ }
PX_FORCE_INLINE AABBAABBTest(const PxBounds3& b)
: mCenter(V3LoadU(b.getCenter()))
, mExtents(V3LoadU(b.getExtents()))
{ }
PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const
{
//PxVec3 c; PxVec3_From_Vec3V(center, c);
//PxVec3 e; PxVec3_From_Vec3V(extents, e);
//if(PxAbs(c.x - mCenter.x) > mExtents.x + e.x) return Ps::IntFalse;
//if(PxAbs(c.y - mCenter.y) > mExtents.y + e.y) return Ps::IntFalse;
//if(PxAbs(c.z - mCenter.z) > mExtents.z + e.z) return Ps::IntFalse;
//return Ps::IntTrue;
return Ps::IntBool(V3AllGrtrOrEq(V3Add(mExtents, extents), V3Abs(V3Sub(center, mCenter))));
}
private:
AABBAABBTest& operator=(const AABBAABBTest&);
const Vec3V mCenter, mExtents;
};
struct SphereAABBTest
{
PX_FORCE_INLINE SphereAABBTest(const PxTransform& t, const PxSphereGeometry& s)
: mCenter(V3LoadU(t.p))
, mRadius2(FLoad(s.radius * s.radius))
{}
PX_FORCE_INLINE SphereAABBTest(const PxVec3& center, PxF32 radius)
: mCenter(V3LoadU(center))
, mRadius2(FLoad(radius * radius))
{}
PX_FORCE_INLINE Ps::IntBool operator()(const Vec3V boxCenter, const Vec3V boxExtents) const
{
const Vec3V offset = V3Sub(mCenter, boxCenter);
const Vec3V closest = V3Clamp(offset, V3Neg(boxExtents), boxExtents);
const Vec3V d = V3Sub(offset, closest);
return Ps::IntBool(BAllEqTTTT(FIsGrtrOrEq(mRadius2, V3Dot(d, d))));
}
private:
SphereAABBTest& operator=(const SphereAABBTest&);
const Vec3V mCenter;
const FloatV mRadius2;
};
// The Opcode capsule-AABB traversal test seems to be *exactly* the same as the ray-box test inflated by the capsule radius (so not a true capsule/box test)
// and the code for the ray-box test is better. TODO: check the zero length case and use the sphere traversal if this one fails.
// (OTOH it's not that hard to adapt the Ray-AABB test to a capsule test)
struct CapsuleAABBTest: private RayAABBTest
{
PX_FORCE_INLINE CapsuleAABBTest(const PxVec3& origin, const PxVec3& unitDir, const PxReal length, const PxVec3& inflation)
: RayAABBTest(origin, unitDir, length, inflation)
{}
PX_FORCE_INLINE Ps::IntBool operator()(const Vec3VArg center, const Vec3VArg extents) const
{
return Ps::IntBool(RayAABBTest::check<true>(center, extents));
}
};
template<bool fullTest>
struct OBBAABBTests
{
OBBAABBTests(const PxVec3& pos, const PxMat33& rot, const PxVec3& halfExtentsInflated)
{
const Vec3V eps = V3Load(1e-6f);
mT = V3LoadU(pos);
mExtents = V3LoadU(halfExtentsInflated);
// storing the transpose matrices yields a simpler SIMD test
mRT = Mat33V_From_PxMat33(rot.getTranspose());
mART = Mat33V(V3Add(V3Abs(mRT.col0), eps), V3Add(V3Abs(mRT.col1), eps), V3Add(V3Abs(mRT.col2), eps));
mBB_xyz = M33TrnspsMulV3(mART, mExtents);
if(fullTest)
{
const Vec3V eYZX = V3PermYZX(mExtents), eZXY = V3PermZXY(mExtents);
mBB_123 = V3MulAdd(eYZX, V3PermZXY(mART.col0), V3Mul(eZXY, V3PermYZX(mART.col0)));
mBB_456 = V3MulAdd(eYZX, V3PermZXY(mART.col1), V3Mul(eZXY, V3PermYZX(mART.col1)));
mBB_789 = V3MulAdd(eYZX, V3PermZXY(mART.col2), V3Mul(eZXY, V3PermYZX(mART.col2)));
}
}
// TODO: force inline it?
Ps::IntBool operator()(const Vec3V center, const Vec3V extents) const
{
const Vec3V t = V3Sub(mT, center);
// class I - axes of AABB
if(V3OutOfBounds(t, V3Add(extents, mBB_xyz)))
return Ps::IntFalse;
const Vec3V rX = mRT.col0, rY = mRT.col1, rZ = mRT.col2;
const Vec3V arX = mART.col0, arY = mART.col1, arZ = mART.col2;
const FloatV eX = V3GetX(extents), eY = V3GetY(extents), eZ = V3GetZ(extents);
const FloatV tX = V3GetX(t), tY = V3GetY(t), tZ = V3GetZ(t);
// class II - axes of OBB
{
const Vec3V v = V3ScaleAdd(rZ, tZ, V3ScaleAdd(rY, tY, V3Scale(rX, tX)));
const Vec3V v2 = V3ScaleAdd(arZ, eZ, V3ScaleAdd(arY, eY, V3ScaleAdd(arX, eX, mExtents)));
if(V3OutOfBounds(v, v2))
return Ps::IntFalse;
}
if(!fullTest)
return Ps::IntTrue;
// class III - edge cross products. Almost all OBB tests early-out with type I or type II,
// so early-outs here probably aren't useful (TODO: profile)
const Vec3V va = V3NegScaleSub(rZ, tY, V3Scale(rY, tZ));
const Vec3V va2 = V3ScaleAdd(arY, eZ, V3ScaleAdd(arZ, eY, mBB_123));
const BoolV ba = BOr(V3IsGrtr(va, va2), V3IsGrtr(V3Neg(va2), va));
const Vec3V vb = V3NegScaleSub(rX, tZ, V3Scale(rZ, tX));
const Vec3V vb2 = V3ScaleAdd(arX, eZ, V3ScaleAdd(arZ, eX, mBB_456));
const BoolV bb = BOr(V3IsGrtr(vb, vb2), V3IsGrtr(V3Neg(vb2), vb));
const Vec3V vc = V3NegScaleSub(rY, tX, V3Scale(rX, tY));
const Vec3V vc2 = V3ScaleAdd(arX, eY, V3ScaleAdd(arY, eX, mBB_789));
const BoolV bc = BOr(V3IsGrtr(vc, vc2), V3IsGrtr(V3Neg(vc2), vc));
return Ps::IntBool(BAllEqFFFF(BOr(ba, BOr(bb,bc))));
}
Vec3V mExtents; // extents of OBB
Vec3V mT; // translation of OBB
Mat33V mRT; // transpose of rotation matrix of OBB
Mat33V mART; // transpose of mRT, padded by epsilon
Vec3V mBB_xyz; // extents of OBB along coordinate axes
Vec3V mBB_123; // projections of extents onto edge-cross axes
Vec3V mBB_456;
Vec3V mBB_789;
};
typedef OBBAABBTests<true> OBBAABBTest;
}
}
#endif
|