diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/GeomUtils/src/distance/GuDistanceSegmentSegment.cpp | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/GeomUtils/src/distance/GuDistanceSegmentSegment.cpp')
| -rw-r--r-- | PhysX_3.4/Source/GeomUtils/src/distance/GuDistanceSegmentSegment.cpp | 576 |
1 files changed, 576 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/GeomUtils/src/distance/GuDistanceSegmentSegment.cpp b/PhysX_3.4/Source/GeomUtils/src/distance/GuDistanceSegmentSegment.cpp new file mode 100644 index 00000000..bb9fd314 --- /dev/null +++ b/PhysX_3.4/Source/GeomUtils/src/distance/GuDistanceSegmentSegment.cpp @@ -0,0 +1,576 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "GuDistanceSegmentSegment.h" +#include "GuDistanceSegmentSegmentSIMD.h" + +using namespace physx; +using namespace Ps; +using namespace aos; + +static const float ZERO_TOLERANCE = 1e-06f; + +// S0 = origin + extent * dir; +// S1 = origin - extent * dir; +PxReal Gu::distanceSegmentSegmentSquared( const PxVec3& origin0, const PxVec3& dir0, PxReal extent0, + const PxVec3& origin1, const PxVec3& dir1, PxReal extent1, + PxReal* param0, PxReal* param1) +{ + const PxVec3 kDiff = origin0 - origin1; + const PxReal fA01 = -dir0.dot(dir1); + const PxReal fB0 = kDiff.dot(dir0); + const PxReal fB1 = -kDiff.dot(dir1); + const PxReal fC = kDiff.magnitudeSquared(); + const PxReal fDet = PxAbs(1.0f - fA01*fA01); + PxReal fS0, fS1, fSqrDist, fExtDet0, fExtDet1, fTmpS0, fTmpS1; + + if (fDet >= ZERO_TOLERANCE) + { + // segments are not parallel + fS0 = fA01*fB1-fB0; + fS1 = fA01*fB0-fB1; + fExtDet0 = extent0*fDet; + fExtDet1 = extent1*fDet; + + if (fS0 >= -fExtDet0) + { + if (fS0 <= fExtDet0) + { + if (fS1 >= -fExtDet1) + { + if (fS1 <= fExtDet1) // region 0 (interior) + { + // minimum at two interior points of 3D lines + PxReal fInvDet = 1.0f/fDet; + fS0 *= fInvDet; + fS1 *= fInvDet; + fSqrDist = fS0*(fS0+fA01*fS1+2.0f*fB0) + fS1*(fA01*fS0+fS1+2.0f*fB1)+fC; + } + else // region 3 (side) + { + fS1 = extent1; + fTmpS0 = -(fA01*fS1+fB0); + if (fTmpS0 < -extent0) + { + fS0 = -extent0; + fSqrDist = fS0*(fS0-2.0f*fTmpS0) + fS1*(fS1+2.0f*fB1)+fC; + } + else if (fTmpS0 <= extent0) + { + fS0 = fTmpS0; + fSqrDist = -fS0*fS0+fS1*(fS1+2.0f*fB1)+fC; + } + else + { + fS0 = extent0; + fSqrDist = fS0*(fS0-2.0f*fTmpS0) + fS1*(fS1+2.0f*fB1)+fC; + } + } + } + else // region 7 (side) + { + fS1 = -extent1; + fTmpS0 = -(fA01*fS1+fB0); + if (fTmpS0 < -extent0) + { + fS0 = -extent0; + fSqrDist = fS0*(fS0-2.0f*fTmpS0) + fS1*(fS1+2.0f*fB1)+fC; + } + else if (fTmpS0 <= extent0) + { + fS0 = fTmpS0; + fSqrDist = -fS0*fS0+fS1*(fS1+2.0f*fB1)+fC; + } + else + { + fS0 = extent0; + fSqrDist = fS0*(fS0-2.0f*fTmpS0) + fS1*(fS1+2.0f*fB1)+fC; + } + } + } + else + { + if (fS1 >= -fExtDet1) + { + if (fS1 <= fExtDet1) // region 1 (side) + { + fS0 = extent0; + fTmpS1 = -(fA01*fS0+fB1); + if (fTmpS1 < -extent1) + { + fS1 = -extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + else if (fTmpS1 <= extent1) + { + fS1 = fTmpS1; + fSqrDist = -fS1*fS1+fS0*(fS0+2.0f*fB0)+fC; + } + else + { + fS1 = extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + } + else // region 2 (corner) + { + fS1 = extent1; + fTmpS0 = -(fA01*fS1+fB0); + if (fTmpS0 < -extent0) + { + fS0 = -extent0; + fSqrDist = fS0*(fS0-2.0f*fTmpS0) + fS1*(fS1+2.0f*fB1)+fC; + } + else if (fTmpS0 <= extent0) + { + fS0 = fTmpS0; + fSqrDist = -fS0*fS0+fS1*(fS1+2.0f*fB1)+fC; + } + else + { + fS0 = extent0; + fTmpS1 = -(fA01*fS0+fB1); + if (fTmpS1 < -extent1) + { + fS1 = -extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + else if (fTmpS1 <= extent1) + { + fS1 = fTmpS1; + fSqrDist = -fS1*fS1+fS0*(fS0+2.0f*fB0) + fC; + } + else + { + fS1 = extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + } + } + } + else // region 8 (corner) + { + fS1 = -extent1; + fTmpS0 = -(fA01*fS1+fB0); + if (fTmpS0 < -extent0) + { + fS0 = -extent0; + fSqrDist = fS0*(fS0-2.0f*fTmpS0) + fS1*(fS1+2.0f*fB1)+fC; + } + else if (fTmpS0 <= extent0) + { + fS0 = fTmpS0; + fSqrDist = -fS0*fS0+fS1*(fS1+2.0f*fB1)+fC; + } + else + { + fS0 = extent0; + fTmpS1 = -(fA01*fS0+fB1); + if (fTmpS1 > extent1) + { + fS1 = extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + else if (fTmpS1 >= -extent1) + { + fS1 = fTmpS1; + fSqrDist = -fS1*fS1+fS0*(fS0+2.0f*fB0) + fC; + } + else + { + fS1 = -extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + } + } + } + } + else + { + if (fS1 >= -fExtDet1) + { + if (fS1 <= fExtDet1) // region 5 (side) + { + fS0 = -extent0; + fTmpS1 = -(fA01*fS0+fB1); + if (fTmpS1 < -extent1) + { + fS1 = -extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + else if (fTmpS1 <= extent1) + { + fS1 = fTmpS1; + fSqrDist = -fS1*fS1+fS0*(fS0+2.0f*fB0)+fC; + } + else + { + fS1 = extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + } + else // region 4 (corner) + { + fS1 = extent1; + fTmpS0 = -(fA01*fS1+fB0); + if (fTmpS0 > extent0) + { + fS0 = extent0; + fSqrDist = fS0*(fS0-2.0f*fTmpS0) + fS1*(fS1+2.0f*fB1)+fC; + } + else if (fTmpS0 >= -extent0) + { + fS0 = fTmpS0; + fSqrDist = -fS0*fS0+fS1*(fS1+2.0f*fB1)+fC; + } + else + { + fS0 = -extent0; + fTmpS1 = -(fA01*fS0+fB1); + if (fTmpS1 < -extent1) + { + fS1 = -extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + else if (fTmpS1 <= extent1) + { + fS1 = fTmpS1; + fSqrDist = -fS1*fS1+fS0*(fS0+2.0f*fB0) + fC; + } + else + { + fS1 = extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + } + } + } + else // region 6 (corner) + { + fS1 = -extent1; + fTmpS0 = -(fA01*fS1+fB0); + if (fTmpS0 > extent0) + { + fS0 = extent0; + fSqrDist = fS0*(fS0-2.0f*fTmpS0) + fS1*(fS1+2.0f*fB1)+fC; + } + else if (fTmpS0 >= -extent0) + { + fS0 = fTmpS0; + fSqrDist = -fS0*fS0+fS1*(fS1+2.0f*fB1)+fC; + } + else + { + fS0 = -extent0; + fTmpS1 = -(fA01*fS0+fB1); + if (fTmpS1 < -extent1) + { + fS1 = -extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + else if (fTmpS1 <= extent1) + { + fS1 = fTmpS1; + fSqrDist = -fS1*fS1+fS0*(fS0+2.0f*fB0) + fC; + } + else + { + fS1 = extent1; + fSqrDist = fS1*(fS1-2.0f*fTmpS1) + fS0*(fS0+2.0f*fB0)+fC; + } + } + } + } + } + else + { + // The segments are parallel. + PxReal fE0pE1 = extent0 + extent1; + PxReal fSign = (fA01 > 0.0f ? -1.0f : 1.0f); + PxReal b0Avr = 0.5f*(fB0 - fSign*fB1); + PxReal fLambda = -b0Avr; + if(fLambda < -fE0pE1) + { + fLambda = -fE0pE1; + } + else if(fLambda > fE0pE1) + { + fLambda = fE0pE1; + } + + fS1 = -fSign*fLambda*extent1/fE0pE1; + fS0 = fLambda + fSign*fS1; + fSqrDist = fLambda*(fLambda + 2.0f*b0Avr) + fC; + } + + if(param0) + *param0 = fS0; + if(param1) + *param1 = fS1; + + // account for numerical round-off error + return physx::intrinsics::selectMax(0.0f, fSqrDist); +} + +PxReal Gu::distanceSegmentSegmentSquared( const PxVec3& origin0, const PxVec3& extent0, + const PxVec3& origin1, const PxVec3& extent1, + PxReal* param0, + PxReal* param1) +{ + // Some conversion is needed between the old & new code + // Old: + // segment (s0, s1) + // origin = s0 + // extent = s1 - s0 + // + // New: + // s0 = origin + extent * dir; + // s1 = origin - extent * dir; + + // dsequeira: is this really sensible? We use a highly optimized Wild Magic routine, + // then use a segment representation that requires an expensive conversion to/from... + + PxVec3 dir0 = extent0; + const PxVec3 center0 = origin0 + extent0*0.5f; + PxReal length0 = extent0.magnitude(); //AM: change to make it work for degenerate (zero length) segments. + const bool b0 = length0 != 0.0f; + PxReal oneOverLength0 = 0.0f; + if(b0) + { + oneOverLength0 = 1.0f / length0; + dir0 *= oneOverLength0; + length0 *= 0.5f; + } + + PxVec3 dir1 = extent1; + const PxVec3 center1 = origin1 + extent1*0.5f; + PxReal length1 = extent1.magnitude(); + const bool b1 = length1 != 0.0f; + PxReal oneOverLength1 = 0.0f; + if(b1) + { + oneOverLength1 = 1.0f / length1; + dir1 *= oneOverLength1; + length1 *= 0.5f; + } + + // the return param vals have -extent = s0, extent = s1 + + const PxReal d2 = distanceSegmentSegmentSquared(center0, dir0, length0, + center1, dir1, length1, + param0, param1); + + //ML : This is wrong for some reason, I guess it has precision issue + //// renormalize into the 0 = s0, 1 = s1 range + //if (param0) + // *param0 = b0 ? ((*param0) * oneOverLength0 * 0.5f + 0.5f) : 0.0f; + //if (param1) + // *param1 = b1 ? ((*param1) * oneOverLength1 * 0.5f + 0.5f) : 0.0f; + + if(param0) + *param0 = b0 ? ((length0 + (*param0))*oneOverLength0) : 0.0f; + if(param1) + *param1 = b1 ? ((length1 + (*param1))*oneOverLength1) : 0.0f; + + return d2; +} + +/* + S0 = origin + extent * dir; + S1 = origin + extent * dir; + dir is the vector from start to end point + p1 is the start point of segment1 + d1 is the direction vector(q1 - p1) + p2 is the start point of segment2 + d2 is the direction vector(q2 - p2) +*/ + +FloatV Gu::distanceSegmentSegmentSquared( const Vec3VArg p1, + const Vec3VArg d1, + const Vec3VArg p2, + const Vec3VArg d2, + FloatV& s, + FloatV& t) +{ + const FloatV zero = FZero(); + const FloatV one = FOne(); + const FloatV eps = FEps(); + + const Vec3V r = V3Sub(p1, p2); + const Vec4V combinedDot = V3Dot4(d1, d1, d2, d2, d1, d2, d1, r); + const Vec4V combinedRecip = V4Sel(V4IsGrtr(combinedDot, V4Splat(eps)), V4Recip(combinedDot), V4Splat(zero)); + const FloatV a = V4GetX(combinedDot); + const FloatV e = V4GetY(combinedDot); + const FloatV b = V4GetZ(combinedDot); + const FloatV c = V4GetW(combinedDot); + const FloatV aRecip = V4GetX(combinedRecip);//FSel(FIsGrtr(a, eps), FRecip(a), zero); + const FloatV eRecip = V4GetY(combinedRecip);//FSel(FIsGrtr(e, eps), FRecip(e), zero); + + const FloatV f = V3Dot(d2, r); + + /* + s = (b*f - c*e)/(a*e - b*b); + t = (a*f - b*c)/(a*e - b*b); + + s = (b*t - c)/a; + t = (b*s + f)/e; + */ + + //if segments not parallel, the general non-degenerated case, compute closest point on two segments and clamp to segment1 + const FloatV denom = FSub(FMul(a, e), FMul(b, b)); + const FloatV temp = FSub(FMul(b, f), FMul(c, e)); + const FloatV s0 = FClamp(FDiv(temp, denom), zero, one); + + //if segment is parallel, demon < eps + const BoolV con2 = FIsGrtr(eps, denom);//FIsEq(denom, zero); + const FloatV sTmp = FSel(con2, FHalf(), s0); + + //compute point on segment2 closest to segment1 + //const FloatV tTmp = FMul(FAdd(FMul(b, sTmp), f), eRecip); + const FloatV tTmp = FMul(FScaleAdd(b, sTmp, f), eRecip); + + //if t is in [zero, one], done. otherwise clamp t + const FloatV t2 = FClamp(tTmp, zero, one); + + //recompute s for the new value + const FloatV comp = FMul(FSub(FMul(b,t2), c), aRecip); + const FloatV s2 = FClamp(comp, zero, one); + + s = s2; + t = t2; + + const Vec3V closest1 = V3ScaleAdd(d1, s2, p1);//V3Add(p1, V3Scale(d1, tempS)); + const Vec3V closest2 = V3ScaleAdd(d2, t2, p2);//V3Add(p2, V3Scale(d2, tempT)); + const Vec3V vv = V3Sub(closest1, closest2); + return V3Dot(vv, vv); +} + + + + +/* + segment (p, d) and segment (p02, d02) + segment (p, d) and segment (p12, d12) + segment (p, d) and segment (p22, d22) + segment (p, d) and segment (p32, d32) +*/ +Vec4V Gu::distanceSegmentSegmentSquared4( const Vec3VArg p, const Vec3VArg d0, + const Vec3VArg p02, const Vec3VArg d02, + const Vec3VArg p12, const Vec3VArg d12, + const Vec3VArg p22, const Vec3VArg d22, + const Vec3VArg p32, const Vec3VArg d32, + Vec4V& s, Vec4V& t) +{ + const Vec4V zero = V4Zero(); + const Vec4V one = V4One(); + const Vec4V eps = V4Eps(); + const Vec4V half = V4Splat(FHalf()); + + const Vec4V d0X = V4Splat(V3GetX(d0)); + const Vec4V d0Y = V4Splat(V3GetY(d0)); + const Vec4V d0Z = V4Splat(V3GetZ(d0)); + const Vec4V pX = V4Splat(V3GetX(p)); + const Vec4V pY = V4Splat(V3GetY(p)); + const Vec4V pZ = V4Splat(V3GetZ(p)); + + Vec4V d024 = Vec4V_From_Vec3V(d02); + Vec4V d124 = Vec4V_From_Vec3V(d12); + Vec4V d224 = Vec4V_From_Vec3V(d22); + Vec4V d324 = Vec4V_From_Vec3V(d32); + + Vec4V p024 = Vec4V_From_Vec3V(p02); + Vec4V p124 = Vec4V_From_Vec3V(p12); + Vec4V p224 = Vec4V_From_Vec3V(p22); + Vec4V p324 = Vec4V_From_Vec3V(p32); + + Vec4V d0123X, d0123Y, d0123Z; + Vec4V p0123X, p0123Y, p0123Z; + + PX_TRANSPOSE_44_34(d024, d124, d224, d324, d0123X, d0123Y, d0123Z); + PX_TRANSPOSE_44_34(p024, p124, p224, p324, p0123X, p0123Y, p0123Z); + + const Vec4V rX = V4Sub(pX, p0123X); + const Vec4V rY = V4Sub(pY, p0123Y); + const Vec4V rZ = V4Sub(pZ, p0123Z); + + //TODO - store this in a transposed state and avoid so many dot products? + + const FloatV dd = V3Dot(d0, d0); + + const Vec4V e = V4MulAdd(d0123Z, d0123Z, V4MulAdd(d0123X, d0123X, V4Mul(d0123Y, d0123Y))); + const Vec4V b = V4MulAdd(d0Z, d0123Z, V4MulAdd(d0X, d0123X, V4Mul(d0Y, d0123Y))); + const Vec4V c = V4MulAdd(d0Z, rZ, V4MulAdd(d0X, rX, V4Mul(d0Y, rY))); + const Vec4V f = V4MulAdd(d0123Z, rZ, V4MulAdd(d0123X, rX, V4Mul(d0123Y, rY))); + + const Vec4V a(V4Splat(dd)); + + const Vec4V aRecip(V4Recip(a)); + const Vec4V eRecip(V4Recip(e)); + + //if segments not parallell, compute closest point on two segments and clamp to segment1 + const Vec4V denom = V4Sub(V4Mul(a, e), V4Mul(b, b)); + const Vec4V temp = V4Sub(V4Mul(b, f), V4Mul(c, e)); + const Vec4V s0 = V4Clamp(V4Div(temp, denom), zero, one); + + //test whether segments are parallel + const BoolV con2 = V4IsGrtrOrEq(eps, denom); + const Vec4V sTmp = V4Sel(con2, half, s0); + + //compute point on segment2 closest to segment1 + const Vec4V tTmp = V4Mul(V4Add(V4Mul(b, sTmp), f), eRecip); + + //if t is in [zero, one], done. otherwise clamp t + const Vec4V t2 = V4Clamp(tTmp, zero, one); + + //recompute s for the new value + const Vec4V comp = V4Mul(V4Sub(V4Mul(b,t2), c), aRecip); + const BoolV aaNearZero = V4IsGrtrOrEq(eps, a); // check if aRecip is valid (aa>eps) + const Vec4V s2 = V4Sel(aaNearZero, V4Zero(), V4Clamp(comp, zero, one)); + + /* s = V4Sel(con0, zero, V4Sel(con1, cd, s2)); + t = V4Sel(con1, zero, V4Sel(con0, cg, t2)); */ + s = s2; + t = t2; + + const Vec4V closest1X = V4MulAdd(d0X, s2, pX); + const Vec4V closest1Y = V4MulAdd(d0Y, s2, pY); + const Vec4V closest1Z = V4MulAdd(d0Z, s2, pZ); + + const Vec4V closest2X = V4MulAdd(d0123X, t2, p0123X); + const Vec4V closest2Y = V4MulAdd(d0123Y, t2, p0123Y); + const Vec4V closest2Z = V4MulAdd(d0123Z, t2, p0123Z); + + const Vec4V vvX = V4Sub(closest1X, closest2X); + const Vec4V vvY = V4Sub(closest1Y, closest2Y); + const Vec4V vvZ = V4Sub(closest1Z, closest2Z); + + const Vec4V vd = V4MulAdd(vvX, vvX, V4MulAdd(vvY, vvY, V4Mul(vvZ, vvZ))); + + return vd; +} |