aboutsummaryrefslogtreecommitdiff
path: root/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp')
-rwxr-xr-x[-rw-r--r--]sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp2044
1 files changed, 1022 insertions, 1022 deletions
diff --git a/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp b/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp
index 14537ed..e69d039 100644..100755
--- a/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp
+++ b/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp
@@ -1,1022 +1,1022 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2016-2018 NVIDIA Corporation. All rights reserved.
-
-
-#include "NvBlastExtApexSharedParts.h"
-
-#include "PxMat44.h"
-#include "PxBounds3.h"
-#include "PxFoundation.h"
-#include "PxPhysics.h"
-#include "PsVecMath.h"
-#include <vector>
-
-using namespace physx;
-using namespace physx::shdfnd::aos;
-
-
-namespace Nv
-{
-namespace Blast
-{
-
-PX_NOALIAS PX_FORCE_INLINE BoolV PointOutsideOfPlane4(const Vec3VArg _a, const Vec3VArg _b, const Vec3VArg _c, const Vec3VArg _d)
-{
- // this is not 0 because of the following scenario:
- // All the points lie on the same plane and the plane goes through the origin (0,0,0).
- // On the Wii U, the math below has the problem that when point A gets projected on the
- // plane cumputed by A, B, C, the distance to the plane might not be 0 for the mentioned
- // scenario but a small positive or negative value. This can lead to the wrong boolean
- // results. Using a small negative value as threshold is more conservative but safer.
- const Vec4V zero = V4Load(-1e-6);
-
- const Vec3V ab = V3Sub(_b, _a);
- const Vec3V ac = V3Sub(_c, _a);
- const Vec3V ad = V3Sub(_d, _a);
- const Vec3V bd = V3Sub(_d, _b);
- const Vec3V bc = V3Sub(_c, _b);
-
- const Vec3V v0 = V3Cross(ab, ac);
- const Vec3V v1 = V3Cross(ac, ad);
- const Vec3V v2 = V3Cross(ad, ab);
- const Vec3V v3 = V3Cross(bd, bc);
-
- const FloatV signa0 = V3Dot(v0, _a);
- const FloatV signa1 = V3Dot(v1, _a);
- const FloatV signa2 = V3Dot(v2, _a);
- const FloatV signd3 = V3Dot(v3, _a);
-
- const FloatV signd0 = V3Dot(v0, _d);
- const FloatV signd1 = V3Dot(v1, _b);
- const FloatV signd2 = V3Dot(v2, _c);
- const FloatV signa3 = V3Dot(v3, _b);
-
- const Vec4V signa = V4Merge(signa0, signa1, signa2, signa3);
- const Vec4V signd = V4Merge(signd0, signd1, signd2, signd3);
- return V4IsGrtrOrEq(V4Mul(signa, signd), zero);//same side, outside of the plane
-}
-
-PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg a, const Vec3VArg b)
-{
- const FloatV zero = FZero();
- const FloatV one = FOne();
-
- //Test degenerated case
- const Vec3V ab = V3Sub(b, a);
- const FloatV denom = V3Dot(ab, ab);
- const Vec3V ap = V3Neg(a);//V3Sub(origin, a);
- const FloatV nom = V3Dot(ap, ab);
- const BoolV con = FIsEq(denom, zero);
- const FloatV tValue = FClamp(FDiv(nom, denom), zero, one);
- const FloatV t = FSel(con, zero, tValue);
-
- return V3Sel(con, a, V3ScaleAdd(ab, t, a));
-}
-
-PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1,
- const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB)
-{
- const Vec3V a = Q0;
- const Vec3V b = Q1;
-
- const BoolV bTrue = BTTTT();
- const FloatV zero = FZero();
- const FloatV one = FOne();
-
- //Test degenerated case
- const Vec3V ab = V3Sub(b, a);
- const FloatV denom = V3Dot(ab, ab);
- const Vec3V ap = V3Neg(a);//V3Sub(origin, a);
- const FloatV nom = V3Dot(ap, ab);
- const BoolV con = FIsEq(denom, zero);
-
- if (BAllEq(con, bTrue))
- {
- size = 1;
- closestA = A0;
- closestB = B0;
- return Q0;
- }
-
- const Vec3V v = V3Sub(A1, A0);
- const Vec3V w = V3Sub(B1, B0);
- const FloatV tValue = FClamp(FDiv(nom, denom), zero, one);
- const FloatV t = FSel(con, zero, tValue);
-
- const Vec3V tempClosestA = V3ScaleAdd(v, t, A0);
- const Vec3V tempClosestB = V3ScaleAdd(w, t, B0);
- closestA = tempClosestA;
- closestB = tempClosestB;
- return V3Sub(tempClosestA, tempClosestB);
-}
-
-PX_NOALIAS Vec3V closestPtPointSegmentTesselation(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1,
- const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB)
-{
- const FloatV half = FHalf();
-
- const FloatV targetSegmentLengthSq = FLoad(10000.f);//100 unit
-
- Vec3V q0 = Q0;
- Vec3V q1 = Q1;
- Vec3V a0 = A0;
- Vec3V a1 = A1;
- Vec3V b0 = B0;
- Vec3V b1 = B1;
-
- for (;;)
- {
- const Vec3V midPoint = V3Scale(V3Add(q0, q1), half);
- const Vec3V midA = V3Scale(V3Add(a0, a1), half);
- const Vec3V midB = V3Scale(V3Add(b0, b1), half);
-
- const Vec3V v = V3Sub(midPoint, q0);
- const FloatV sqV = V3Dot(v, v);
- if (FAllGrtr(targetSegmentLengthSq, sqV))
- break;
- //split the segment into half
- const Vec3V tClos0 = closestPtPointSegment(q0, midPoint);
- const FloatV sqDist0 = V3Dot(tClos0, tClos0);
-
- const Vec3V tClos1 = closestPtPointSegment(q1, midPoint);
- const FloatV sqDist1 = V3Dot(tClos1, tClos1);
- //const BoolV con = FIsGrtr(sqDist0, sqDist1);
- if (FAllGrtr(sqDist0, sqDist1))
- {
- //segment [m, q1]
- q0 = midPoint;
- a0 = midA;
- b0 = midB;
- }
- else
- {
- //segment [q0, m]
- q1 = midPoint;
- a1 = midA;
- b1 = midB;
- }
-
- }
-
- return closestPtPointSegment(q0, q1, a0, a1, b0, b1, size, closestA, closestB);
-}
-
-PX_NOALIAS Vec3V closestPtPointTriangleTesselation(const Vec3V* PX_RESTRICT Q, const Vec3V* PX_RESTRICT A, const Vec3V* PX_RESTRICT B, const PxU32* PX_RESTRICT indices, PxU32& size, Vec3V& closestA, Vec3V& closestB)
-{
- size = 3;
- const FloatV zero = FZero();
- const FloatV eps = FEps();
- const FloatV half = FHalf();
- const BoolV bTrue = BTTTT();
- const FloatV four = FLoad(4.f);
- const FloatV sixty = FLoad(100.f);
-
- const PxU32 ind0 = indices[0];
- const PxU32 ind1 = indices[1];
- const PxU32 ind2 = indices[2];
-
- const Vec3V a = Q[ind0];
- const Vec3V b = Q[ind1];
- const Vec3V c = Q[ind2];
-
- Vec3V ab_ = V3Sub(b, a);
- Vec3V ac_ = V3Sub(c, a);
- Vec3V bc_ = V3Sub(b, c);
-
- const FloatV dac_ = V3Dot(ac_, ac_);
- const FloatV dbc_ = V3Dot(bc_, bc_);
- if (FAllGrtrOrEq(eps, FMin(dac_, dbc_)))
- {
- //degenerate
- size = 2;
- return closestPtPointSegment(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB);
- }
-
- Vec3V ap = V3Neg(a);
- Vec3V bp = V3Neg(b);
- Vec3V cp = V3Neg(c);
-
- FloatV d1 = V3Dot(ab_, ap); // snom
- FloatV d2 = V3Dot(ac_, ap); // tnom
- FloatV d3 = V3Dot(ab_, bp); // -sdenom
- FloatV d4 = V3Dot(ac_, bp); // unom = d4 - d3
- FloatV d5 = V3Dot(ab_, cp); // udenom = d5 - d6
- FloatV d6 = V3Dot(ac_, cp); // -tdenom
- /* FloatV unom = FSub(d4, d3);
- FloatV udenom = FSub(d5, d6);*/
-
- FloatV va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC
- FloatV vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC
- FloatV vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB
-
- //check if p in vertex region outside a
- const BoolV con00 = FIsGrtrOrEq(zero, d1); // snom <= 0
- const BoolV con01 = FIsGrtrOrEq(zero, d2); // tnom <= 0
- const BoolV con0 = BAnd(con00, con01); // vertex region a
- if (BAllEq(con0, bTrue))
- {
- //size = 1;
- closestA = A[ind0];
- closestB = B[ind0];
- return Q[ind0];
- }
-
- //check if p in vertex region outside b
- const BoolV con10 = FIsGrtrOrEq(d3, zero);
- const BoolV con11 = FIsGrtrOrEq(d3, d4);
- const BoolV con1 = BAnd(con10, con11); // vertex region b
- if (BAllEq(con1, bTrue))
- {
- /*size = 1;
- indices[0] = ind1;*/
- closestA = A[ind1];
- closestB = B[ind1];
- return Q[ind1];
- }
-
-
- //check if p in vertex region outside of c
- const BoolV con20 = FIsGrtrOrEq(d6, zero);
- const BoolV con21 = FIsGrtrOrEq(d6, d5);
- const BoolV con2 = BAnd(con20, con21); // vertex region c
- if (BAllEq(con2, bTrue))
- {
- closestA = A[ind2];
- closestB = B[ind2];
- return Q[ind2];
- }
-
- //check if p in edge region of AB
- const BoolV con30 = FIsGrtrOrEq(zero, vc);
- const BoolV con31 = FIsGrtrOrEq(d1, zero);
- const BoolV con32 = FIsGrtrOrEq(zero, d3);
- const BoolV con3 = BAnd(con30, BAnd(con31, con32));
-
- if (BAllEq(con3, bTrue))
- {
- //size = 2;
- //p in edge region of AB, split AB
- return closestPtPointSegmentTesselation(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB);
- }
-
- //check if p in edge region of BC
- const BoolV con40 = FIsGrtrOrEq(zero, va);
- const BoolV con41 = FIsGrtrOrEq(d4, d3);
- const BoolV con42 = FIsGrtrOrEq(d5, d6);
- const BoolV con4 = BAnd(con40, BAnd(con41, con42));
-
- if (BAllEq(con4, bTrue))
- {
- //p in edge region of BC, split BC
- return closestPtPointSegmentTesselation(Q[ind1], Q[ind2], A[ind1], A[ind2], B[ind1], B[ind2], size, closestA, closestB);
- }
-
- //check if p in edge region of AC
- const BoolV con50 = FIsGrtrOrEq(zero, vb);
- const BoolV con51 = FIsGrtrOrEq(d2, zero);
- const BoolV con52 = FIsGrtrOrEq(zero, d6);
- const BoolV con5 = BAnd(con50, BAnd(con51, con52));
-
- if (BAllEq(con5, bTrue))
- {
- //p in edge region of AC, split AC
- return closestPtPointSegmentTesselation(Q[ind0], Q[ind2], A[ind0], A[ind2], B[ind0], B[ind2], size, closestA, closestB);
- }
-
- size = 3;
-
- Vec3V q0 = Q[ind0];
- Vec3V q1 = Q[ind1];
- Vec3V q2 = Q[ind2];
- Vec3V a0 = A[ind0];
- Vec3V a1 = A[ind1];
- Vec3V a2 = A[ind2];
- Vec3V b0 = B[ind0];
- Vec3V b1 = B[ind1];
- Vec3V b2 = B[ind2];
-
- for (;;)
- {
-
- const Vec3V ab = V3Sub(q1, q0);
- const Vec3V ac = V3Sub(q2, q0);
- const Vec3V bc = V3Sub(q2, q1);
-
- const FloatV dab = V3Dot(ab, ab);
- const FloatV dac = V3Dot(ac, ac);
- const FloatV dbc = V3Dot(bc, bc);
-
- const FloatV fMax = FMax(dab, FMax(dac, dbc));
- const FloatV fMin = FMin(dab, FMin(dac, dbc));
-
- const Vec3V w = V3Cross(ab, ac);
-
- const FloatV area = V3Length(w);
- const FloatV ratio = FDiv(FSqrt(fMax), FSqrt(fMin));
- if (FAllGrtr(four, ratio) && FAllGrtr(sixty, area))
- break;
-
- //calculate the triangle normal
- const Vec3V triNormal = V3Normalize(w);
-
- PX_ASSERT(V3AllEq(triNormal, V3Zero()) == 0);
-
-
- //split the longest edge
- if (FAllGrtrOrEq(dab, dac) && FAllGrtrOrEq(dab, dbc))
- {
- //split edge q0q1
- const Vec3V midPoint = V3Scale(V3Add(q0, q1), half);
- const Vec3V midA = V3Scale(V3Add(a0, a1), half);
- const Vec3V midB = V3Scale(V3Add(b0, b1), half);
-
- const Vec3V v = V3Sub(midPoint, q2);
- const Vec3V n = V3Normalize(V3Cross(v, triNormal));
-
- const FloatV d = FNeg(V3Dot(n, midPoint));
- const FloatV dp = FAdd(V3Dot(n, q0), d);
- const FloatV sum = FMul(d, dp);
-
- if (FAllGrtr(sum, zero))
- {
- //q0 and origin at the same side, split triangle[q0, m, q2]
- q1 = midPoint;
- a1 = midA;
- b1 = midB;
- }
- else
- {
- //q1 and origin at the same side, split triangle[m, q1, q2]
- q0 = midPoint;
- a0 = midA;
- b0 = midB;
- }
-
- }
- else if (FAllGrtrOrEq(dac, dbc))
- {
- //split edge q0q2
- const Vec3V midPoint = V3Scale(V3Add(q0, q2), half);
- const Vec3V midA = V3Scale(V3Add(a0, a2), half);
- const Vec3V midB = V3Scale(V3Add(b0, b2), half);
-
- const Vec3V v = V3Sub(midPoint, q1);
- const Vec3V n = V3Normalize(V3Cross(v, triNormal));
-
- const FloatV d = FNeg(V3Dot(n, midPoint));
- const FloatV dp = FAdd(V3Dot(n, q0), d);
- const FloatV sum = FMul(d, dp);
-
- if (FAllGrtr(sum, zero))
- {
- //q0 and origin at the same side, split triangle[q0, q1, m]
- q2 = midPoint;
- a2 = midA;
- b2 = midB;
- }
- else
- {
- //q2 and origin at the same side, split triangle[m, q1, q2]
- q0 = midPoint;
- a0 = midA;
- b0 = midB;
- }
- }
- else
- {
- //split edge q1q2
- const Vec3V midPoint = V3Scale(V3Add(q1, q2), half);
- const Vec3V midA = V3Scale(V3Add(a1, a2), half);
- const Vec3V midB = V3Scale(V3Add(b1, b2), half);
-
- const Vec3V v = V3Sub(midPoint, q0);
- const Vec3V n = V3Normalize(V3Cross(v, triNormal));
-
- const FloatV d = FNeg(V3Dot(n, midPoint));
- const FloatV dp = FAdd(V3Dot(n, q1), d);
- const FloatV sum = FMul(d, dp);
-
- if (FAllGrtr(sum, zero))
- {
- //q1 and origin at the same side, split triangle[q0, q1, m]
- q2 = midPoint;
- a2 = midA;
- b2 = midB;
- }
- else
- {
- //q2 and origin at the same side, split triangle[q0, m, q2]
- q1 = midPoint;
- a1 = midA;
- b1 = midB;
- }
-
-
- }
- }
-
- //P must project inside face region. Compute Q using Barycentric coordinates
- ab_ = V3Sub(q1, q0);
- ac_ = V3Sub(q2, q0);
- ap = V3Neg(q0);
- bp = V3Neg(q1);
- cp = V3Neg(q2);
-
- d1 = V3Dot(ab_, ap); // snom
- d2 = V3Dot(ac_, ap); // tnom
- d3 = V3Dot(ab_, bp); // -sdenom
- d4 = V3Dot(ac_, bp); // unom = d4 - d3
- d5 = V3Dot(ab_, cp); // udenom = d5 - d6
- d6 = V3Dot(ac_, cp); // -tdenom
-
- va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC
- vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC
- vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB
-
- const FloatV toRecipD = FAdd(va, FAdd(vb, vc));
- const FloatV denom = FRecip(toRecipD);//V4GetW(recipTmp);
- const Vec3V v0 = V3Sub(a1, a0);
- const Vec3V v1 = V3Sub(a2, a0);
- const Vec3V w0 = V3Sub(b1, b0);
- const Vec3V w1 = V3Sub(b2, b0);
-
- const FloatV t = FMul(vb, denom);
- const FloatV w = FMul(vc, denom);
- const Vec3V vA1 = V3Scale(v1, w);
- const Vec3V vB1 = V3Scale(w1, w);
- const Vec3V tempClosestA = V3Add(a0, V3ScaleAdd(v0, t, vA1));
- const Vec3V tempClosestB = V3Add(b0, V3ScaleAdd(w0, t, vB1));
- closestA = tempClosestA;
- closestB = tempClosestB;
- return V3Sub(tempClosestA, tempClosestB);
-}
-
-PX_NOALIAS Vec3V closestPtPointTetrahedronTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, PxU32& size, Vec3V& closestA, Vec3V& closestB)
-{
- const FloatV eps = FEps();
- const Vec3V zeroV = V3Zero();
- PxU32 tempSize = size;
-
- FloatV bestSqDist = FLoad(PX_MAX_REAL);
- const Vec3V a = Q[0];
- const Vec3V b = Q[1];
- const Vec3V c = Q[2];
- const Vec3V d = Q[3];
- const BoolV bTrue = BTTTT();
- const BoolV bFalse = BFFFF();
-
- //degenerated
- const Vec3V ad = V3Sub(d, a);
- const Vec3V bd = V3Sub(d, b);
- const Vec3V cd = V3Sub(d, c);
- const FloatV dad = V3Dot(ad, ad);
- const FloatV dbd = V3Dot(bd, bd);
- const FloatV dcd = V3Dot(cd, cd);
- const FloatV fMin = FMin(dad, FMin(dbd, dcd));
- if (FAllGrtr(eps, fMin))
- {
- size = 3;
- PxU32 tempIndices[] = { 0, 1, 2 };
- return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB);
- }
-
- Vec3V _Q[] = { Q[0], Q[1], Q[2], Q[3] };
- Vec3V _A[] = { A[0], A[1], A[2], A[3] };
- Vec3V _B[] = { B[0], B[1], B[2], B[3] };
-
- PxU32 indices[3] = { 0, 1, 2 };
-
- const BoolV bIsOutside4 = PointOutsideOfPlane4(a, b, c, d);
-
- if (BAllEq(bIsOutside4, bFalse))
- {
- //origin is inside the tetrahedron, we are done
- return zeroV;
- }
-
- Vec3V result = zeroV;
- Vec3V tempClosestA, tempClosestB;
-
- if (BAllEq(BGetX(bIsOutside4), bTrue))
- {
-
- PxU32 tempIndices[] = { 0, 1, 2 };
- PxU32 _size = 3;
-
- result = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
-
- const FloatV sqDist = V3Dot(result, result);
- bestSqDist = sqDist;
-
- indices[0] = tempIndices[0];
- indices[1] = tempIndices[1];
- indices[2] = tempIndices[2];
-
- tempSize = _size;
- closestA = tempClosestA;
- closestB = tempClosestB;
- }
-
- if (BAllEq(BGetY(bIsOutside4), bTrue))
- {
-
- PxU32 tempIndices[] = { 0, 2, 3 };
-
- PxU32 _size = 3;
-
- const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
-
- const FloatV sqDist = V3Dot(q, q);
- const BoolV con = FIsGrtr(bestSqDist, sqDist);
- if (BAllEq(con, bTrue))
- {
- result = q;
- bestSqDist = sqDist;
- indices[0] = tempIndices[0];
- indices[1] = tempIndices[1];
- indices[2] = tempIndices[2];
-
- tempSize = _size;
- closestA = tempClosestA;
- closestB = tempClosestB;
- }
- }
-
- if (BAllEq(BGetZ(bIsOutside4), bTrue))
- {
-
- PxU32 tempIndices[] = { 0, 3, 1 };
- PxU32 _size = 3;
-
- const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
-
- const FloatV sqDist = V3Dot(q, q);
- const BoolV con = FIsGrtr(bestSqDist, sqDist);
- if (BAllEq(con, bTrue))
- {
- result = q;
- bestSqDist = sqDist;
- indices[0] = tempIndices[0];
- indices[1] = tempIndices[1];
- indices[2] = tempIndices[2];
- tempSize = _size;
- closestA = tempClosestA;
- closestB = tempClosestB;
- }
-
- }
-
- if (BAllEq(BGetW(bIsOutside4), bTrue))
- {
-
- PxU32 tempIndices[] = { 1, 3, 2 };
- PxU32 _size = 3;
-
- const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
-
- const FloatV sqDist = V3Dot(q, q);
- const BoolV con = FIsGrtr(bestSqDist, sqDist);
-
- if (BAllEq(con, bTrue))
- {
- result = q;
- bestSqDist = sqDist;
-
- indices[0] = tempIndices[0];
- indices[1] = tempIndices[1];
- indices[2] = tempIndices[2];
-
- tempSize = _size;
- closestA = tempClosestA;
- closestB = tempClosestB;
- }
- }
-
- A[0] = _A[indices[0]]; A[1] = _A[indices[1]]; A[2] = _A[indices[2]];
- B[0] = _B[indices[0]]; B[1] = _B[indices[1]]; B[2] = _B[indices[2]];
- Q[0] = _Q[indices[0]]; Q[1] = _Q[indices[1]]; Q[2] = _Q[indices[2]];
-
-
- size = tempSize;
- return result;
-}
-
-PX_NOALIAS PX_FORCE_INLINE Vec3V doTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B,
- const Vec3VArg support, const Vec3VArg supportA, const Vec3VArg supportB, PxU32& size, Vec3V& closestA, Vec3V& closestB)
-{
- switch (size)
- {
- case 1:
- {
- closestA = supportA;
- closestB = supportB;
- return support;
- }
- case 2:
- {
- return closestPtPointSegmentTesselation(Q[0], support, A[0], supportA, B[0], supportB, size, closestA, closestB);
- }
- case 3:
- {
-
- PxU32 tempIndices[3] = { 0, 1, 2 };
- return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB);
- }
- case 4:
- {
- return closestPtPointTetrahedronTesselation(Q, A, B, size, closestA, closestB);
- }
- default:
- PX_ASSERT(0);
- }
- return support;
-}
-
-
-
-
-enum Status
-{
- STATUS_NON_INTERSECT,
- STATUS_CONTACT,
- STATUS_DEGENERATE,
-};
-
-struct Output
-{
- /// Get the normal to push apart in direction from A to B
- PX_FORCE_INLINE Vec3V getNormal() const { return V3Normalize(V3Sub(mClosestB, mClosestA)); }
- Vec3V mClosestA; ///< Closest point on A
- Vec3V mClosestB; ///< Closest point on B
- FloatV mDistSq;
-};
-
-struct ConvexV
-{
- void calcExtent(const Vec3V& dir, PxF32& minOut, PxF32& maxOut) const
- {
- // Expand
- const Vec4V x = Vec4V_From_FloatV(V3GetX(dir));
- const Vec4V y = Vec4V_From_FloatV(V3GetY(dir));
- const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir));
-
- const Vec4V* src = mAovVertices;
- const Vec4V* end = src + mNumAovVertices * 3;
-
- // Do first step
- Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
- Vec4V min = max;
- src += 3;
- // Do the rest
- for (; src < end; src += 3)
- {
- const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
- max = V4Max(dot, max);
- min = V4Min(dot, min);
- }
- FStore(V4ExtractMax(max), &maxOut);
- FStore(V4ExtractMin(min), &minOut);
- }
- Vec3V calcSupport(const Vec3V& dir) const
- {
- // Expand
- const Vec4V x = Vec4V_From_FloatV(V3GetX(dir));
- const Vec4V y = Vec4V_From_FloatV(V3GetY(dir));
- const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir));
-
- PX_ALIGN(16, static const PxF32 index4const[]) = { 0.0f, 1.0f, 2.0f, 3.0f };
- Vec4V index4 = *(const Vec4V*)index4const;
- PX_ALIGN(16, static const PxF32 delta4const[]) = { 4.0f, 4.0f, 4.0f, 4.0f };
- const Vec4V delta4 = *(const Vec4V*)delta4const;
-
- const Vec4V* src = mAovVertices;
- const Vec4V* end = src + mNumAovVertices * 3;
-
- // Do first step
- Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
- Vec4V maxIndex = index4;
- index4 = V4Add(index4, delta4);
- src += 3;
- // Do the rest
- for (; src < end; src += 3)
- {
- const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
- const BoolV cmp = V4IsGrtr(dot, max);
- max = V4Max(dot, max);
- maxIndex = V4Sel(cmp, index4, maxIndex);
- index4 = V4Add(index4, delta4);
- }
- Vec4V horiMax = Vec4V_From_FloatV(V4ExtractMax(max));
- PxU32 mask = BGetBitMask(V4IsEq(horiMax, max));
- const PxU32 simdIndex = (0x12131210 >> (mask + mask)) & PxU32(3);
-
- /// NOTE! Could be load hit store
- /// Would be better to have all simd.
- PX_ALIGN(16, PxF32 f[4]);
- V4StoreA(maxIndex, f);
- PxU32 index = PxU32(PxI32(f[simdIndex]));
-
- const Vec4V* aovIndex = (mAovVertices + (index >> 2) * 3);
- const PxF32* aovOffset = ((const PxF32*)aovIndex) + (index & 3);
-
- return Vec3V_From_Vec4V(V4LoadXYZW(aovOffset[0], aovOffset[4], aovOffset[8], 1.0f));
- }
-
- const Vec4V* mAovVertices; ///< Vertices storex x,x,x,x, y,y,y,y, z,z,z,z
- PxU32 mNumAovVertices; ///< Number of groups of 4 of vertices
-};
-
-Status Collide(const Vec3V& initialDir, const ConvexV& convexA, const Mat34V& bToA, const ConvexV& convexB, Output& out)
-{
- Vec3V Q[4];
- Vec3V A[4];
- Vec3V B[4];
-
- Mat33V aToB = M34Trnsps33(bToA);
-
- PxU32 size = 0;
-
- const Vec3V zeroV = V3Zero();
- const BoolV bTrue = BTTTT();
-
- //Vec3V v = V3UnitX();
- Vec3V v = V3Sel(FIsGrtr(V3Dot(initialDir, initialDir), FZero()), initialDir, V3UnitX());
-
- //const FloatV minMargin = zero;
- //const FloatV eps2 = FMul(minMargin, FLoad(0.01f));
- //FloatV eps2 = zero;
- FloatV eps2 = FLoad(1e-6f);
- const FloatV epsRel = FLoad(0.000225f);
-
- Vec3V closA(zeroV), closB(zeroV);
- FloatV sDist = FMax();
- FloatV minDist = sDist;
- Vec3V closAA = zeroV;
- Vec3V closBB = zeroV;
-
- BoolV bNotTerminated = bTrue;
- BoolV bCon = bTrue;
-
- do
- {
- minDist = sDist;
- closAA = closA;
- closBB = closB;
-
- PxU32 index = size++;
- PX_ASSERT(index < 4);
-
- const Vec3V supportA = convexA.calcSupport(V3Neg(v));
- const Vec3V supportB = M34MulV3(bToA, convexB.calcSupport(M33MulV3(aToB, v)));
- const Vec3V support = Vec3V_From_Vec4V(Vec4V_From_Vec3V(V3Sub(supportA, supportB)));
-
- A[index] = supportA;
- B[index] = supportB;
- Q[index] = support;
-
- const FloatV signDist = V3Dot(v, support);
- const FloatV tmp0 = FSub(sDist, signDist);
- if (FAllGrtr(FMul(epsRel, sDist), tmp0))
- {
- out.mClosestA = closA;
- out.mClosestB = closB;
- out.mDistSq = sDist;
- return STATUS_NON_INTERSECT;
- }
-
- //calculate the closest point between two convex hull
- v = doTesselation(Q, A, B, support, supportA, supportB, size, closA, closB);
- sDist = V3Dot(v, v);
- bCon = FIsGrtr(minDist, sDist);
-
- bNotTerminated = BAnd(FIsGrtr(sDist, eps2), bCon);
- } while (BAllEq(bNotTerminated, bTrue));
-
- out.mClosestA = V3Sel(bCon, closA, closAA);
- out.mClosestB = V3Sel(bCon, closB, closBB);
- out.mDistSq = FSel(bCon, sDist, minDist);
- return Status(BAllEq(bCon, bTrue) == 1 ? STATUS_CONTACT : STATUS_DEGENERATE);
-}
-
-static void _calcSeparation(const ConvexV& convexA, const physx::PxTransform& aToWorldIn, const Mat34V& bToA, ConvexV& convexB, Output& out, Separation& sep)
-{
-
- Mat33V aToB = M34Trnsps33(bToA);
- Vec3V normalA = out.getNormal();
-
- convexA.calcExtent(normalA, sep.min0, sep.max0);
- Vec3V normalB = M33MulV3(aToB, normalA);
- convexB.calcExtent(normalB, sep.min1, sep.max1);
-
- {
- // Offset the min max taking into account transform
- // Distance of origin from B's space in As space in direction of the normal in As space should fix it...
- PxF32 fix;
- FStore(V3Dot(bToA.col3, normalA), &fix);
- sep.min1 += fix;
- sep.max1 += fix;
- }
-
- // Looks like it's the plane at the midpoint
- Vec3V center = V3Scale(V3Add(out.mClosestA, out.mClosestB), FLoad(0.5f));
- // Transform to world space
- Mat34V aToWorld;
- *(PxMat44*)&aToWorld = aToWorldIn;
- // Put the normal in world space
- Vec3V worldCenter = M34MulV3(aToWorld, center);
- Vec3V worldNormal = M34Mul33V3(aToWorld, normalA);
-
- FloatV dist = V3Dot(worldNormal, worldCenter);
- V3StoreU(worldNormal, sep.plane.n);
- FStore(dist, &sep.plane.d);
- sep.plane.d = -sep.plane.d;
-}
-
-static void _arrayVec3ToVec4(const PxVec3* src, Vec4V* dst, PxU32 num)
-{
- const PxU32 num4 = num >> 2;
- for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4)
- {
- Vec3V v0 = V3LoadU(&src[0].x);
- Vec3V v1 = V3LoadU(&src[1].x);
- Vec3V v2 = V3LoadU(&src[2].x);
- Vec3V v3 = V3LoadU(&src[3].x);
- // Transpose
- V4Transpose(v0, v1, v2, v3);
- // Save
- dst[0] = v0;
- dst[1] = v1;
- dst[2] = v2;
- }
- const PxU32 remain = num & 3;
- if (remain)
- {
- Vec3V work[4];
- PxU32 i = 0;
- for (; i < remain; i++) work[i] = V3LoadU(&src[i].x);
- for (; i < 4; i++) work[i] = work[remain - 1];
- V4Transpose(work[0], work[1], work[2], work[3]);
- dst[0] = work[0];
- dst[1] = work[1];
- dst[2] = work[2];
- }
-}
-
-
-static void _arrayVec3ToVec4(const PxVec3* src, const Vec3V& scale, Vec4V* dst, PxU32 num)
-{
- // If no scale - use the faster version
- if (V3AllEq(scale, V3One()))
- {
- return _arrayVec3ToVec4(src, dst, num);
- }
-
- const PxU32 num4 = num >> 2;
- for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4)
- {
- Vec3V v0 = V3Mul(scale, V3LoadU(&src[0].x));
- Vec3V v1 = V3Mul(scale, V3LoadU(&src[1].x));
- Vec3V v2 = V3Mul(scale, V3LoadU(&src[2].x));
- Vec3V v3 = V3Mul(scale, V3LoadU(&src[3].x));
- // Transpose
- V4Transpose(v0, v1, v2, v3);
- // Save
- dst[0] = v0;
- dst[1] = v1;
- dst[2] = v2;
- }
- const PxU32 remain = num & 3;
- if (remain)
- {
- Vec3V work[4];
- PxU32 i = 0;
- for (; i < remain; i++) work[i] = V3Mul(scale, V3LoadU(&src[i].x));
- for (; i < 4; i++) work[i] = work[remain - 1];
- V4Transpose(work[0], work[1], work[2], work[3]);
- dst[0] = work[0];
- dst[1] = work[1];
- dst[2] = work[2];
- }
-}
-
-
-bool importerHullsInProximityApexFree(uint32_t hull0Count, const PxVec3* hull0, PxBounds3& hull0Bounds, const physx::PxTransform& localToWorldRT0In, const physx::PxVec3& scale0In,
- uint32_t hull1Count, const PxVec3* hull1, PxBounds3& hull1Bounds, const physx::PxTransform& localToWorldRT1In, const physx::PxVec3& scale1In,
- physx::PxF32 maxDistance, Separation* separation)
-{
-
-
- const PxU32 numVerts0 = static_cast<PxU32>(hull0Count);
- const PxU32 numVerts1 = static_cast<PxU32>(hull1Count);
- const PxU32 numAov0 = (numVerts0 + 3) >> 2;
- const PxU32 numAov1 = (numVerts1 + 3) >> 2;
- Vec4V* verts0 = (Vec4V*)alloca((numAov0 + numAov1) * sizeof(Vec4V) * 3);
-
- // Make sure it's aligned
- PX_ASSERT((size_t(verts0) & 0xf) == 0);
-
- Vec4V* verts1 = verts0 + (numAov0 * 3);
-
- const Vec3V scale0 = V3LoadU(&scale0In.x);
- const Vec3V scale1 = V3LoadU(&scale1In.x);
- std::vector<PxVec3> vert0(numVerts0);
- for (uint32_t i = 0; i < numVerts0; ++i)
- {
- vert0[i] = hull0[i];
- }
- std::vector<PxVec3> vert1(numVerts1);
- for (uint32_t i = 0; i < numVerts1; ++i)
- {
- vert1[i] = hull1[i];
- }
-
- _arrayVec3ToVec4(vert0.data(), scale0, verts0, numVerts0);
- _arrayVec3ToVec4(vert1.data(), scale1, verts1, numVerts1);
-
- const PxTransform trans1To0 = localToWorldRT0In.transformInv(localToWorldRT1In);
-
- // Load into simd mat
- Mat34V bToA;
- *(PxMat44*)&bToA = trans1To0;
- (*(PxMat44*)&bToA).column3.w = 0.0f; // AOS wants the 4th component of Vec3V to be 0 to work properly
-
- ConvexV convexA;
- ConvexV convexB;
-
- convexA.mNumAovVertices = numAov0;
- convexA.mAovVertices = verts0;
-
- convexB.mNumAovVertices = numAov1;
- convexB.mAovVertices = verts1;
-
- // Take the origin of B in As space as the inital direction as it is 'the difference in transform origins B-A in A's space'
- // Should be a good first guess
- const Vec3V initialDir = bToA.col3;
- Output output;
- Status status = Collide(initialDir, convexA, bToA, convexB, output);
-
- if (status == STATUS_DEGENERATE)
- {
- // Calculate the tolerance from the extents
- const PxVec3 extents0 = hull0Bounds.getExtents();
- const PxVec3 extents1 = hull1Bounds.getExtents();
-
- const FloatV tolerance0 = V3ExtractMin(V3Mul(V3LoadU(&extents0.x), scale0));
- const FloatV tolerance1 = V3ExtractMin(V3Mul(V3LoadU(&extents1.x), scale1));
-
- const FloatV tolerance = FMul(FAdd(tolerance0, tolerance1), FLoad(0.01f));
- const FloatV sqTolerance = FMul(tolerance, tolerance);
-
- status = FAllGrtr(sqTolerance, output.mDistSq) ? STATUS_CONTACT : STATUS_NON_INTERSECT;
- }
-
- switch (status)
- {
- case STATUS_CONTACT:
- {
- if (separation)
- {
- _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation);
- }
- return true;
- }
- default:
- case STATUS_NON_INTERSECT:
- {
- if (separation)
- {
- _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation);
- }
- PxF32 val;
- FStore(output.mDistSq, &val);
- return val < (maxDistance * maxDistance);
- }
- }
-}
-
-} // namespace Blast
-} // namespace Nv
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2016-2018 NVIDIA Corporation. All rights reserved.
+
+
+#include "NvBlastExtApexSharedParts.h"
+
+#include "PxMat44.h"
+#include "PxBounds3.h"
+#include "PxFoundation.h"
+#include "PxPhysics.h"
+#include "PsVecMath.h"
+#include <vector>
+
+using namespace physx;
+using namespace physx::shdfnd::aos;
+
+
+namespace Nv
+{
+namespace Blast
+{
+
+PX_NOALIAS PX_FORCE_INLINE BoolV PointOutsideOfPlane4(const Vec3VArg _a, const Vec3VArg _b, const Vec3VArg _c, const Vec3VArg _d)
+{
+ // this is not 0 because of the following scenario:
+ // All the points lie on the same plane and the plane goes through the origin (0,0,0).
+ // On the Wii U, the math below has the problem that when point A gets projected on the
+ // plane cumputed by A, B, C, the distance to the plane might not be 0 for the mentioned
+ // scenario but a small positive or negative value. This can lead to the wrong boolean
+ // results. Using a small negative value as threshold is more conservative but safer.
+ const Vec4V zero = V4Load(-1e-6);
+
+ const Vec3V ab = V3Sub(_b, _a);
+ const Vec3V ac = V3Sub(_c, _a);
+ const Vec3V ad = V3Sub(_d, _a);
+ const Vec3V bd = V3Sub(_d, _b);
+ const Vec3V bc = V3Sub(_c, _b);
+
+ const Vec3V v0 = V3Cross(ab, ac);
+ const Vec3V v1 = V3Cross(ac, ad);
+ const Vec3V v2 = V3Cross(ad, ab);
+ const Vec3V v3 = V3Cross(bd, bc);
+
+ const FloatV signa0 = V3Dot(v0, _a);
+ const FloatV signa1 = V3Dot(v1, _a);
+ const FloatV signa2 = V3Dot(v2, _a);
+ const FloatV signd3 = V3Dot(v3, _a);
+
+ const FloatV signd0 = V3Dot(v0, _d);
+ const FloatV signd1 = V3Dot(v1, _b);
+ const FloatV signd2 = V3Dot(v2, _c);
+ const FloatV signa3 = V3Dot(v3, _b);
+
+ const Vec4V signa = V4Merge(signa0, signa1, signa2, signa3);
+ const Vec4V signd = V4Merge(signd0, signd1, signd2, signd3);
+ return V4IsGrtrOrEq(V4Mul(signa, signd), zero);//same side, outside of the plane
+}
+
+PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg a, const Vec3VArg b)
+{
+ const FloatV zero = FZero();
+ const FloatV one = FOne();
+
+ //Test degenerated case
+ const Vec3V ab = V3Sub(b, a);
+ const FloatV denom = V3Dot(ab, ab);
+ const Vec3V ap = V3Neg(a);//V3Sub(origin, a);
+ const FloatV nom = V3Dot(ap, ab);
+ const BoolV con = FIsEq(denom, zero);
+ const FloatV tValue = FClamp(FDiv(nom, denom), zero, one);
+ const FloatV t = FSel(con, zero, tValue);
+
+ return V3Sel(con, a, V3ScaleAdd(ab, t, a));
+}
+
+PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1,
+ const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+ const Vec3V a = Q0;
+ const Vec3V b = Q1;
+
+ const BoolV bTrue = BTTTT();
+ const FloatV zero = FZero();
+ const FloatV one = FOne();
+
+ //Test degenerated case
+ const Vec3V ab = V3Sub(b, a);
+ const FloatV denom = V3Dot(ab, ab);
+ const Vec3V ap = V3Neg(a);//V3Sub(origin, a);
+ const FloatV nom = V3Dot(ap, ab);
+ const BoolV con = FIsEq(denom, zero);
+
+ if (BAllEq(con, bTrue))
+ {
+ size = 1;
+ closestA = A0;
+ closestB = B0;
+ return Q0;
+ }
+
+ const Vec3V v = V3Sub(A1, A0);
+ const Vec3V w = V3Sub(B1, B0);
+ const FloatV tValue = FClamp(FDiv(nom, denom), zero, one);
+ const FloatV t = FSel(con, zero, tValue);
+
+ const Vec3V tempClosestA = V3ScaleAdd(v, t, A0);
+ const Vec3V tempClosestB = V3ScaleAdd(w, t, B0);
+ closestA = tempClosestA;
+ closestB = tempClosestB;
+ return V3Sub(tempClosestA, tempClosestB);
+}
+
+PX_NOALIAS Vec3V closestPtPointSegmentTesselation(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1,
+ const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+ const FloatV half = FHalf();
+
+ const FloatV targetSegmentLengthSq = FLoad(10000.f);//100 unit
+
+ Vec3V q0 = Q0;
+ Vec3V q1 = Q1;
+ Vec3V a0 = A0;
+ Vec3V a1 = A1;
+ Vec3V b0 = B0;
+ Vec3V b1 = B1;
+
+ for (;;)
+ {
+ const Vec3V midPoint = V3Scale(V3Add(q0, q1), half);
+ const Vec3V midA = V3Scale(V3Add(a0, a1), half);
+ const Vec3V midB = V3Scale(V3Add(b0, b1), half);
+
+ const Vec3V v = V3Sub(midPoint, q0);
+ const FloatV sqV = V3Dot(v, v);
+ if (FAllGrtr(targetSegmentLengthSq, sqV))
+ break;
+ //split the segment into half
+ const Vec3V tClos0 = closestPtPointSegment(q0, midPoint);
+ const FloatV sqDist0 = V3Dot(tClos0, tClos0);
+
+ const Vec3V tClos1 = closestPtPointSegment(q1, midPoint);
+ const FloatV sqDist1 = V3Dot(tClos1, tClos1);
+ //const BoolV con = FIsGrtr(sqDist0, sqDist1);
+ if (FAllGrtr(sqDist0, sqDist1))
+ {
+ //segment [m, q1]
+ q0 = midPoint;
+ a0 = midA;
+ b0 = midB;
+ }
+ else
+ {
+ //segment [q0, m]
+ q1 = midPoint;
+ a1 = midA;
+ b1 = midB;
+ }
+
+ }
+
+ return closestPtPointSegment(q0, q1, a0, a1, b0, b1, size, closestA, closestB);
+}
+
+PX_NOALIAS Vec3V closestPtPointTriangleTesselation(const Vec3V* PX_RESTRICT Q, const Vec3V* PX_RESTRICT A, const Vec3V* PX_RESTRICT B, const PxU32* PX_RESTRICT indices, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+ size = 3;
+ const FloatV zero = FZero();
+ const FloatV eps = FEps();
+ const FloatV half = FHalf();
+ const BoolV bTrue = BTTTT();
+ const FloatV four = FLoad(4.f);
+ const FloatV sixty = FLoad(100.f);
+
+ const PxU32 ind0 = indices[0];
+ const PxU32 ind1 = indices[1];
+ const PxU32 ind2 = indices[2];
+
+ const Vec3V a = Q[ind0];
+ const Vec3V b = Q[ind1];
+ const Vec3V c = Q[ind2];
+
+ Vec3V ab_ = V3Sub(b, a);
+ Vec3V ac_ = V3Sub(c, a);
+ Vec3V bc_ = V3Sub(b, c);
+
+ const FloatV dac_ = V3Dot(ac_, ac_);
+ const FloatV dbc_ = V3Dot(bc_, bc_);
+ if (FAllGrtrOrEq(eps, FMin(dac_, dbc_)))
+ {
+ //degenerate
+ size = 2;
+ return closestPtPointSegment(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB);
+ }
+
+ Vec3V ap = V3Neg(a);
+ Vec3V bp = V3Neg(b);
+ Vec3V cp = V3Neg(c);
+
+ FloatV d1 = V3Dot(ab_, ap); // snom
+ FloatV d2 = V3Dot(ac_, ap); // tnom
+ FloatV d3 = V3Dot(ab_, bp); // -sdenom
+ FloatV d4 = V3Dot(ac_, bp); // unom = d4 - d3
+ FloatV d5 = V3Dot(ab_, cp); // udenom = d5 - d6
+ FloatV d6 = V3Dot(ac_, cp); // -tdenom
+ /* FloatV unom = FSub(d4, d3);
+ FloatV udenom = FSub(d5, d6);*/
+
+ FloatV va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC
+ FloatV vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC
+ FloatV vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB
+
+ //check if p in vertex region outside a
+ const BoolV con00 = FIsGrtrOrEq(zero, d1); // snom <= 0
+ const BoolV con01 = FIsGrtrOrEq(zero, d2); // tnom <= 0
+ const BoolV con0 = BAnd(con00, con01); // vertex region a
+ if (BAllEq(con0, bTrue))
+ {
+ //size = 1;
+ closestA = A[ind0];
+ closestB = B[ind0];
+ return Q[ind0];
+ }
+
+ //check if p in vertex region outside b
+ const BoolV con10 = FIsGrtrOrEq(d3, zero);
+ const BoolV con11 = FIsGrtrOrEq(d3, d4);
+ const BoolV con1 = BAnd(con10, con11); // vertex region b
+ if (BAllEq(con1, bTrue))
+ {
+ /*size = 1;
+ indices[0] = ind1;*/
+ closestA = A[ind1];
+ closestB = B[ind1];
+ return Q[ind1];
+ }
+
+
+ //check if p in vertex region outside of c
+ const BoolV con20 = FIsGrtrOrEq(d6, zero);
+ const BoolV con21 = FIsGrtrOrEq(d6, d5);
+ const BoolV con2 = BAnd(con20, con21); // vertex region c
+ if (BAllEq(con2, bTrue))
+ {
+ closestA = A[ind2];
+ closestB = B[ind2];
+ return Q[ind2];
+ }
+
+ //check if p in edge region of AB
+ const BoolV con30 = FIsGrtrOrEq(zero, vc);
+ const BoolV con31 = FIsGrtrOrEq(d1, zero);
+ const BoolV con32 = FIsGrtrOrEq(zero, d3);
+ const BoolV con3 = BAnd(con30, BAnd(con31, con32));
+
+ if (BAllEq(con3, bTrue))
+ {
+ //size = 2;
+ //p in edge region of AB, split AB
+ return closestPtPointSegmentTesselation(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB);
+ }
+
+ //check if p in edge region of BC
+ const BoolV con40 = FIsGrtrOrEq(zero, va);
+ const BoolV con41 = FIsGrtrOrEq(d4, d3);
+ const BoolV con42 = FIsGrtrOrEq(d5, d6);
+ const BoolV con4 = BAnd(con40, BAnd(con41, con42));
+
+ if (BAllEq(con4, bTrue))
+ {
+ //p in edge region of BC, split BC
+ return closestPtPointSegmentTesselation(Q[ind1], Q[ind2], A[ind1], A[ind2], B[ind1], B[ind2], size, closestA, closestB);
+ }
+
+ //check if p in edge region of AC
+ const BoolV con50 = FIsGrtrOrEq(zero, vb);
+ const BoolV con51 = FIsGrtrOrEq(d2, zero);
+ const BoolV con52 = FIsGrtrOrEq(zero, d6);
+ const BoolV con5 = BAnd(con50, BAnd(con51, con52));
+
+ if (BAllEq(con5, bTrue))
+ {
+ //p in edge region of AC, split AC
+ return closestPtPointSegmentTesselation(Q[ind0], Q[ind2], A[ind0], A[ind2], B[ind0], B[ind2], size, closestA, closestB);
+ }
+
+ size = 3;
+
+ Vec3V q0 = Q[ind0];
+ Vec3V q1 = Q[ind1];
+ Vec3V q2 = Q[ind2];
+ Vec3V a0 = A[ind0];
+ Vec3V a1 = A[ind1];
+ Vec3V a2 = A[ind2];
+ Vec3V b0 = B[ind0];
+ Vec3V b1 = B[ind1];
+ Vec3V b2 = B[ind2];
+
+ for (;;)
+ {
+
+ const Vec3V ab = V3Sub(q1, q0);
+ const Vec3V ac = V3Sub(q2, q0);
+ const Vec3V bc = V3Sub(q2, q1);
+
+ const FloatV dab = V3Dot(ab, ab);
+ const FloatV dac = V3Dot(ac, ac);
+ const FloatV dbc = V3Dot(bc, bc);
+
+ const FloatV fMax = FMax(dab, FMax(dac, dbc));
+ const FloatV fMin = FMin(dab, FMin(dac, dbc));
+
+ const Vec3V w = V3Cross(ab, ac);
+
+ const FloatV area = V3Length(w);
+ const FloatV ratio = FDiv(FSqrt(fMax), FSqrt(fMin));
+ if (FAllGrtr(four, ratio) && FAllGrtr(sixty, area))
+ break;
+
+ //calculate the triangle normal
+ const Vec3V triNormal = V3Normalize(w);
+
+ PX_ASSERT(V3AllEq(triNormal, V3Zero()) == 0);
+
+
+ //split the longest edge
+ if (FAllGrtrOrEq(dab, dac) && FAllGrtrOrEq(dab, dbc))
+ {
+ //split edge q0q1
+ const Vec3V midPoint = V3Scale(V3Add(q0, q1), half);
+ const Vec3V midA = V3Scale(V3Add(a0, a1), half);
+ const Vec3V midB = V3Scale(V3Add(b0, b1), half);
+
+ const Vec3V v = V3Sub(midPoint, q2);
+ const Vec3V n = V3Normalize(V3Cross(v, triNormal));
+
+ const FloatV d = FNeg(V3Dot(n, midPoint));
+ const FloatV dp = FAdd(V3Dot(n, q0), d);
+ const FloatV sum = FMul(d, dp);
+
+ if (FAllGrtr(sum, zero))
+ {
+ //q0 and origin at the same side, split triangle[q0, m, q2]
+ q1 = midPoint;
+ a1 = midA;
+ b1 = midB;
+ }
+ else
+ {
+ //q1 and origin at the same side, split triangle[m, q1, q2]
+ q0 = midPoint;
+ a0 = midA;
+ b0 = midB;
+ }
+
+ }
+ else if (FAllGrtrOrEq(dac, dbc))
+ {
+ //split edge q0q2
+ const Vec3V midPoint = V3Scale(V3Add(q0, q2), half);
+ const Vec3V midA = V3Scale(V3Add(a0, a2), half);
+ const Vec3V midB = V3Scale(V3Add(b0, b2), half);
+
+ const Vec3V v = V3Sub(midPoint, q1);
+ const Vec3V n = V3Normalize(V3Cross(v, triNormal));
+
+ const FloatV d = FNeg(V3Dot(n, midPoint));
+ const FloatV dp = FAdd(V3Dot(n, q0), d);
+ const FloatV sum = FMul(d, dp);
+
+ if (FAllGrtr(sum, zero))
+ {
+ //q0 and origin at the same side, split triangle[q0, q1, m]
+ q2 = midPoint;
+ a2 = midA;
+ b2 = midB;
+ }
+ else
+ {
+ //q2 and origin at the same side, split triangle[m, q1, q2]
+ q0 = midPoint;
+ a0 = midA;
+ b0 = midB;
+ }
+ }
+ else
+ {
+ //split edge q1q2
+ const Vec3V midPoint = V3Scale(V3Add(q1, q2), half);
+ const Vec3V midA = V3Scale(V3Add(a1, a2), half);
+ const Vec3V midB = V3Scale(V3Add(b1, b2), half);
+
+ const Vec3V v = V3Sub(midPoint, q0);
+ const Vec3V n = V3Normalize(V3Cross(v, triNormal));
+
+ const FloatV d = FNeg(V3Dot(n, midPoint));
+ const FloatV dp = FAdd(V3Dot(n, q1), d);
+ const FloatV sum = FMul(d, dp);
+
+ if (FAllGrtr(sum, zero))
+ {
+ //q1 and origin at the same side, split triangle[q0, q1, m]
+ q2 = midPoint;
+ a2 = midA;
+ b2 = midB;
+ }
+ else
+ {
+ //q2 and origin at the same side, split triangle[q0, m, q2]
+ q1 = midPoint;
+ a1 = midA;
+ b1 = midB;
+ }
+
+
+ }
+ }
+
+ //P must project inside face region. Compute Q using Barycentric coordinates
+ ab_ = V3Sub(q1, q0);
+ ac_ = V3Sub(q2, q0);
+ ap = V3Neg(q0);
+ bp = V3Neg(q1);
+ cp = V3Neg(q2);
+
+ d1 = V3Dot(ab_, ap); // snom
+ d2 = V3Dot(ac_, ap); // tnom
+ d3 = V3Dot(ab_, bp); // -sdenom
+ d4 = V3Dot(ac_, bp); // unom = d4 - d3
+ d5 = V3Dot(ab_, cp); // udenom = d5 - d6
+ d6 = V3Dot(ac_, cp); // -tdenom
+
+ va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC
+ vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC
+ vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB
+
+ const FloatV toRecipD = FAdd(va, FAdd(vb, vc));
+ const FloatV denom = FRecip(toRecipD);//V4GetW(recipTmp);
+ const Vec3V v0 = V3Sub(a1, a0);
+ const Vec3V v1 = V3Sub(a2, a0);
+ const Vec3V w0 = V3Sub(b1, b0);
+ const Vec3V w1 = V3Sub(b2, b0);
+
+ const FloatV t = FMul(vb, denom);
+ const FloatV w = FMul(vc, denom);
+ const Vec3V vA1 = V3Scale(v1, w);
+ const Vec3V vB1 = V3Scale(w1, w);
+ const Vec3V tempClosestA = V3Add(a0, V3ScaleAdd(v0, t, vA1));
+ const Vec3V tempClosestB = V3Add(b0, V3ScaleAdd(w0, t, vB1));
+ closestA = tempClosestA;
+ closestB = tempClosestB;
+ return V3Sub(tempClosestA, tempClosestB);
+}
+
+PX_NOALIAS Vec3V closestPtPointTetrahedronTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+ const FloatV eps = FEps();
+ const Vec3V zeroV = V3Zero();
+ PxU32 tempSize = size;
+
+ FloatV bestSqDist = FLoad(PX_MAX_REAL);
+ const Vec3V a = Q[0];
+ const Vec3V b = Q[1];
+ const Vec3V c = Q[2];
+ const Vec3V d = Q[3];
+ const BoolV bTrue = BTTTT();
+ const BoolV bFalse = BFFFF();
+
+ //degenerated
+ const Vec3V ad = V3Sub(d, a);
+ const Vec3V bd = V3Sub(d, b);
+ const Vec3V cd = V3Sub(d, c);
+ const FloatV dad = V3Dot(ad, ad);
+ const FloatV dbd = V3Dot(bd, bd);
+ const FloatV dcd = V3Dot(cd, cd);
+ const FloatV fMin = FMin(dad, FMin(dbd, dcd));
+ if (FAllGrtr(eps, fMin))
+ {
+ size = 3;
+ PxU32 tempIndices[] = { 0, 1, 2 };
+ return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB);
+ }
+
+ Vec3V _Q[] = { Q[0], Q[1], Q[2], Q[3] };
+ Vec3V _A[] = { A[0], A[1], A[2], A[3] };
+ Vec3V _B[] = { B[0], B[1], B[2], B[3] };
+
+ PxU32 indices[3] = { 0, 1, 2 };
+
+ const BoolV bIsOutside4 = PointOutsideOfPlane4(a, b, c, d);
+
+ if (BAllEq(bIsOutside4, bFalse))
+ {
+ //origin is inside the tetrahedron, we are done
+ return zeroV;
+ }
+
+ Vec3V result = zeroV;
+ Vec3V tempClosestA, tempClosestB;
+
+ if (BAllEq(BGetX(bIsOutside4), bTrue))
+ {
+
+ PxU32 tempIndices[] = { 0, 1, 2 };
+ PxU32 _size = 3;
+
+ result = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
+
+ const FloatV sqDist = V3Dot(result, result);
+ bestSqDist = sqDist;
+
+ indices[0] = tempIndices[0];
+ indices[1] = tempIndices[1];
+ indices[2] = tempIndices[2];
+
+ tempSize = _size;
+ closestA = tempClosestA;
+ closestB = tempClosestB;
+ }
+
+ if (BAllEq(BGetY(bIsOutside4), bTrue))
+ {
+
+ PxU32 tempIndices[] = { 0, 2, 3 };
+
+ PxU32 _size = 3;
+
+ const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
+
+ const FloatV sqDist = V3Dot(q, q);
+ const BoolV con = FIsGrtr(bestSqDist, sqDist);
+ if (BAllEq(con, bTrue))
+ {
+ result = q;
+ bestSqDist = sqDist;
+ indices[0] = tempIndices[0];
+ indices[1] = tempIndices[1];
+ indices[2] = tempIndices[2];
+
+ tempSize = _size;
+ closestA = tempClosestA;
+ closestB = tempClosestB;
+ }
+ }
+
+ if (BAllEq(BGetZ(bIsOutside4), bTrue))
+ {
+
+ PxU32 tempIndices[] = { 0, 3, 1 };
+ PxU32 _size = 3;
+
+ const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
+
+ const FloatV sqDist = V3Dot(q, q);
+ const BoolV con = FIsGrtr(bestSqDist, sqDist);
+ if (BAllEq(con, bTrue))
+ {
+ result = q;
+ bestSqDist = sqDist;
+ indices[0] = tempIndices[0];
+ indices[1] = tempIndices[1];
+ indices[2] = tempIndices[2];
+ tempSize = _size;
+ closestA = tempClosestA;
+ closestB = tempClosestB;
+ }
+
+ }
+
+ if (BAllEq(BGetW(bIsOutside4), bTrue))
+ {
+
+ PxU32 tempIndices[] = { 1, 3, 2 };
+ PxU32 _size = 3;
+
+ const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
+
+ const FloatV sqDist = V3Dot(q, q);
+ const BoolV con = FIsGrtr(bestSqDist, sqDist);
+
+ if (BAllEq(con, bTrue))
+ {
+ result = q;
+ bestSqDist = sqDist;
+
+ indices[0] = tempIndices[0];
+ indices[1] = tempIndices[1];
+ indices[2] = tempIndices[2];
+
+ tempSize = _size;
+ closestA = tempClosestA;
+ closestB = tempClosestB;
+ }
+ }
+
+ A[0] = _A[indices[0]]; A[1] = _A[indices[1]]; A[2] = _A[indices[2]];
+ B[0] = _B[indices[0]]; B[1] = _B[indices[1]]; B[2] = _B[indices[2]];
+ Q[0] = _Q[indices[0]]; Q[1] = _Q[indices[1]]; Q[2] = _Q[indices[2]];
+
+
+ size = tempSize;
+ return result;
+}
+
+PX_NOALIAS PX_FORCE_INLINE Vec3V doTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B,
+ const Vec3VArg support, const Vec3VArg supportA, const Vec3VArg supportB, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+ switch (size)
+ {
+ case 1:
+ {
+ closestA = supportA;
+ closestB = supportB;
+ return support;
+ }
+ case 2:
+ {
+ return closestPtPointSegmentTesselation(Q[0], support, A[0], supportA, B[0], supportB, size, closestA, closestB);
+ }
+ case 3:
+ {
+
+ PxU32 tempIndices[3] = { 0, 1, 2 };
+ return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB);
+ }
+ case 4:
+ {
+ return closestPtPointTetrahedronTesselation(Q, A, B, size, closestA, closestB);
+ }
+ default:
+ PX_ASSERT(0);
+ }
+ return support;
+}
+
+
+
+
+enum Status
+{
+ STATUS_NON_INTERSECT,
+ STATUS_CONTACT,
+ STATUS_DEGENERATE,
+};
+
+struct Output
+{
+ /// Get the normal to push apart in direction from A to B
+ PX_FORCE_INLINE Vec3V getNormal() const { return V3Normalize(V3Sub(mClosestB, mClosestA)); }
+ Vec3V mClosestA; ///< Closest point on A
+ Vec3V mClosestB; ///< Closest point on B
+ FloatV mDistSq;
+};
+
+struct ConvexV
+{
+ void calcExtent(const Vec3V& dir, PxF32& minOut, PxF32& maxOut) const
+ {
+ // Expand
+ const Vec4V x = Vec4V_From_FloatV(V3GetX(dir));
+ const Vec4V y = Vec4V_From_FloatV(V3GetY(dir));
+ const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir));
+
+ const Vec4V* src = mAovVertices;
+ const Vec4V* end = src + mNumAovVertices * 3;
+
+ // Do first step
+ Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
+ Vec4V min = max;
+ src += 3;
+ // Do the rest
+ for (; src < end; src += 3)
+ {
+ const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
+ max = V4Max(dot, max);
+ min = V4Min(dot, min);
+ }
+ FStore(V4ExtractMax(max), &maxOut);
+ FStore(V4ExtractMin(min), &minOut);
+ }
+ Vec3V calcSupport(const Vec3V& dir) const
+ {
+ // Expand
+ const Vec4V x = Vec4V_From_FloatV(V3GetX(dir));
+ const Vec4V y = Vec4V_From_FloatV(V3GetY(dir));
+ const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir));
+
+ PX_ALIGN(16, static const PxF32 index4const[]) = { 0.0f, 1.0f, 2.0f, 3.0f };
+ Vec4V index4 = *(const Vec4V*)index4const;
+ PX_ALIGN(16, static const PxF32 delta4const[]) = { 4.0f, 4.0f, 4.0f, 4.0f };
+ const Vec4V delta4 = *(const Vec4V*)delta4const;
+
+ const Vec4V* src = mAovVertices;
+ const Vec4V* end = src + mNumAovVertices * 3;
+
+ // Do first step
+ Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
+ Vec4V maxIndex = index4;
+ index4 = V4Add(index4, delta4);
+ src += 3;
+ // Do the rest
+ for (; src < end; src += 3)
+ {
+ const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
+ const BoolV cmp = V4IsGrtr(dot, max);
+ max = V4Max(dot, max);
+ maxIndex = V4Sel(cmp, index4, maxIndex);
+ index4 = V4Add(index4, delta4);
+ }
+ Vec4V horiMax = Vec4V_From_FloatV(V4ExtractMax(max));
+ PxU32 mask = BGetBitMask(V4IsEq(horiMax, max));
+ const PxU32 simdIndex = (0x12131210 >> (mask + mask)) & PxU32(3);
+
+ /// NOTE! Could be load hit store
+ /// Would be better to have all simd.
+ PX_ALIGN(16, PxF32 f[4]);
+ V4StoreA(maxIndex, f);
+ PxU32 index = PxU32(PxI32(f[simdIndex]));
+
+ const Vec4V* aovIndex = (mAovVertices + (index >> 2) * 3);
+ const PxF32* aovOffset = ((const PxF32*)aovIndex) + (index & 3);
+
+ return Vec3V_From_Vec4V(V4LoadXYZW(aovOffset[0], aovOffset[4], aovOffset[8], 1.0f));
+ }
+
+ const Vec4V* mAovVertices; ///< Vertices storex x,x,x,x, y,y,y,y, z,z,z,z
+ PxU32 mNumAovVertices; ///< Number of groups of 4 of vertices
+};
+
+Status Collide(const Vec3V& initialDir, const ConvexV& convexA, const Mat34V& bToA, const ConvexV& convexB, Output& out)
+{
+ Vec3V Q[4];
+ Vec3V A[4];
+ Vec3V B[4];
+
+ Mat33V aToB = M34Trnsps33(bToA);
+
+ PxU32 size = 0;
+
+ const Vec3V zeroV = V3Zero();
+ const BoolV bTrue = BTTTT();
+
+ //Vec3V v = V3UnitX();
+ Vec3V v = V3Sel(FIsGrtr(V3Dot(initialDir, initialDir), FZero()), initialDir, V3UnitX());
+
+ //const FloatV minMargin = zero;
+ //const FloatV eps2 = FMul(minMargin, FLoad(0.01f));
+ //FloatV eps2 = zero;
+ FloatV eps2 = FLoad(1e-6f);
+ const FloatV epsRel = FLoad(0.000225f);
+
+ Vec3V closA(zeroV), closB(zeroV);
+ FloatV sDist = FMax();
+ FloatV minDist = sDist;
+ Vec3V closAA = zeroV;
+ Vec3V closBB = zeroV;
+
+ BoolV bNotTerminated = bTrue;
+ BoolV bCon = bTrue;
+
+ do
+ {
+ minDist = sDist;
+ closAA = closA;
+ closBB = closB;
+
+ PxU32 index = size++;
+ PX_ASSERT(index < 4);
+
+ const Vec3V supportA = convexA.calcSupport(V3Neg(v));
+ const Vec3V supportB = M34MulV3(bToA, convexB.calcSupport(M33MulV3(aToB, v)));
+ const Vec3V support = Vec3V_From_Vec4V(Vec4V_From_Vec3V(V3Sub(supportA, supportB)));
+
+ A[index] = supportA;
+ B[index] = supportB;
+ Q[index] = support;
+
+ const FloatV signDist = V3Dot(v, support);
+ const FloatV tmp0 = FSub(sDist, signDist);
+ if (FAllGrtr(FMul(epsRel, sDist), tmp0))
+ {
+ out.mClosestA = closA;
+ out.mClosestB = closB;
+ out.mDistSq = sDist;
+ return STATUS_NON_INTERSECT;
+ }
+
+ //calculate the closest point between two convex hull
+ v = doTesselation(Q, A, B, support, supportA, supportB, size, closA, closB);
+ sDist = V3Dot(v, v);
+ bCon = FIsGrtr(minDist, sDist);
+
+ bNotTerminated = BAnd(FIsGrtr(sDist, eps2), bCon);
+ } while (BAllEq(bNotTerminated, bTrue));
+
+ out.mClosestA = V3Sel(bCon, closA, closAA);
+ out.mClosestB = V3Sel(bCon, closB, closBB);
+ out.mDistSq = FSel(bCon, sDist, minDist);
+ return Status(BAllEq(bCon, bTrue) == 1 ? STATUS_CONTACT : STATUS_DEGENERATE);
+}
+
+static void _calcSeparation(const ConvexV& convexA, const physx::PxTransform& aToWorldIn, const Mat34V& bToA, ConvexV& convexB, Output& out, Separation& sep)
+{
+
+ Mat33V aToB = M34Trnsps33(bToA);
+ Vec3V normalA = out.getNormal();
+
+ convexA.calcExtent(normalA, sep.min0, sep.max0);
+ Vec3V normalB = M33MulV3(aToB, normalA);
+ convexB.calcExtent(normalB, sep.min1, sep.max1);
+
+ {
+ // Offset the min max taking into account transform
+ // Distance of origin from B's space in As space in direction of the normal in As space should fix it...
+ PxF32 fix;
+ FStore(V3Dot(bToA.col3, normalA), &fix);
+ sep.min1 += fix;
+ sep.max1 += fix;
+ }
+
+ // Looks like it's the plane at the midpoint
+ Vec3V center = V3Scale(V3Add(out.mClosestA, out.mClosestB), FLoad(0.5f));
+ // Transform to world space
+ Mat34V aToWorld;
+ *(PxMat44*)&aToWorld = aToWorldIn;
+ // Put the normal in world space
+ Vec3V worldCenter = M34MulV3(aToWorld, center);
+ Vec3V worldNormal = M34Mul33V3(aToWorld, normalA);
+
+ FloatV dist = V3Dot(worldNormal, worldCenter);
+ V3StoreU(worldNormal, sep.plane.n);
+ FStore(dist, &sep.plane.d);
+ sep.plane.d = -sep.plane.d;
+}
+
+static void _arrayVec3ToVec4(const PxVec3* src, Vec4V* dst, PxU32 num)
+{
+ const PxU32 num4 = num >> 2;
+ for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4)
+ {
+ Vec3V v0 = V3LoadU(&src[0].x);
+ Vec3V v1 = V3LoadU(&src[1].x);
+ Vec3V v2 = V3LoadU(&src[2].x);
+ Vec3V v3 = V3LoadU(&src[3].x);
+ // Transpose
+ V4Transpose(v0, v1, v2, v3);
+ // Save
+ dst[0] = v0;
+ dst[1] = v1;
+ dst[2] = v2;
+ }
+ const PxU32 remain = num & 3;
+ if (remain)
+ {
+ Vec3V work[4];
+ PxU32 i = 0;
+ for (; i < remain; i++) work[i] = V3LoadU(&src[i].x);
+ for (; i < 4; i++) work[i] = work[remain - 1];
+ V4Transpose(work[0], work[1], work[2], work[3]);
+ dst[0] = work[0];
+ dst[1] = work[1];
+ dst[2] = work[2];
+ }
+}
+
+
+static void _arrayVec3ToVec4(const PxVec3* src, const Vec3V& scale, Vec4V* dst, PxU32 num)
+{
+ // If no scale - use the faster version
+ if (V3AllEq(scale, V3One()))
+ {
+ return _arrayVec3ToVec4(src, dst, num);
+ }
+
+ const PxU32 num4 = num >> 2;
+ for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4)
+ {
+ Vec3V v0 = V3Mul(scale, V3LoadU(&src[0].x));
+ Vec3V v1 = V3Mul(scale, V3LoadU(&src[1].x));
+ Vec3V v2 = V3Mul(scale, V3LoadU(&src[2].x));
+ Vec3V v3 = V3Mul(scale, V3LoadU(&src[3].x));
+ // Transpose
+ V4Transpose(v0, v1, v2, v3);
+ // Save
+ dst[0] = v0;
+ dst[1] = v1;
+ dst[2] = v2;
+ }
+ const PxU32 remain = num & 3;
+ if (remain)
+ {
+ Vec3V work[4];
+ PxU32 i = 0;
+ for (; i < remain; i++) work[i] = V3Mul(scale, V3LoadU(&src[i].x));
+ for (; i < 4; i++) work[i] = work[remain - 1];
+ V4Transpose(work[0], work[1], work[2], work[3]);
+ dst[0] = work[0];
+ dst[1] = work[1];
+ dst[2] = work[2];
+ }
+}
+
+
+bool importerHullsInProximityApexFree(uint32_t hull0Count, const PxVec3* hull0, PxBounds3& hull0Bounds, const physx::PxTransform& localToWorldRT0In, const physx::PxVec3& scale0In,
+ uint32_t hull1Count, const PxVec3* hull1, PxBounds3& hull1Bounds, const physx::PxTransform& localToWorldRT1In, const physx::PxVec3& scale1In,
+ physx::PxF32 maxDistance, Separation* separation)
+{
+
+
+ const PxU32 numVerts0 = static_cast<PxU32>(hull0Count);
+ const PxU32 numVerts1 = static_cast<PxU32>(hull1Count);
+ const PxU32 numAov0 = (numVerts0 + 3) >> 2;
+ const PxU32 numAov1 = (numVerts1 + 3) >> 2;
+ Vec4V* verts0 = (Vec4V*)alloca((numAov0 + numAov1) * sizeof(Vec4V) * 3);
+
+ // Make sure it's aligned
+ PX_ASSERT((size_t(verts0) & 0xf) == 0);
+
+ Vec4V* verts1 = verts0 + (numAov0 * 3);
+
+ const Vec3V scale0 = V3LoadU(&scale0In.x);
+ const Vec3V scale1 = V3LoadU(&scale1In.x);
+ std::vector<PxVec3> vert0(numVerts0);
+ for (uint32_t i = 0; i < numVerts0; ++i)
+ {
+ vert0[i] = hull0[i];
+ }
+ std::vector<PxVec3> vert1(numVerts1);
+ for (uint32_t i = 0; i < numVerts1; ++i)
+ {
+ vert1[i] = hull1[i];
+ }
+
+ _arrayVec3ToVec4(vert0.data(), scale0, verts0, numVerts0);
+ _arrayVec3ToVec4(vert1.data(), scale1, verts1, numVerts1);
+
+ const PxTransform trans1To0 = localToWorldRT0In.transformInv(localToWorldRT1In);
+
+ // Load into simd mat
+ Mat34V bToA;
+ *(PxMat44*)&bToA = trans1To0;
+ (*(PxMat44*)&bToA).column3.w = 0.0f; // AOS wants the 4th component of Vec3V to be 0 to work properly
+
+ ConvexV convexA;
+ ConvexV convexB;
+
+ convexA.mNumAovVertices = numAov0;
+ convexA.mAovVertices = verts0;
+
+ convexB.mNumAovVertices = numAov1;
+ convexB.mAovVertices = verts1;
+
+ // Take the origin of B in As space as the inital direction as it is 'the difference in transform origins B-A in A's space'
+ // Should be a good first guess
+ const Vec3V initialDir = bToA.col3;
+ Output output;
+ Status status = Collide(initialDir, convexA, bToA, convexB, output);
+
+ if (status == STATUS_DEGENERATE)
+ {
+ // Calculate the tolerance from the extents
+ const PxVec3 extents0 = hull0Bounds.getExtents();
+ const PxVec3 extents1 = hull1Bounds.getExtents();
+
+ const FloatV tolerance0 = V3ExtractMin(V3Mul(V3LoadU(&extents0.x), scale0));
+ const FloatV tolerance1 = V3ExtractMin(V3Mul(V3LoadU(&extents1.x), scale1));
+
+ const FloatV tolerance = FMul(FAdd(tolerance0, tolerance1), FLoad(0.01f));
+ const FloatV sqTolerance = FMul(tolerance, tolerance);
+
+ status = FAllGrtr(sqTolerance, output.mDistSq) ? STATUS_CONTACT : STATUS_NON_INTERSECT;
+ }
+
+ switch (status)
+ {
+ case STATUS_CONTACT:
+ {
+ if (separation)
+ {
+ _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation);
+ }
+ return true;
+ }
+ default:
+ case STATUS_NON_INTERSECT:
+ {
+ if (separation)
+ {
+ _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation);
+ }
+ PxF32 val;
+ FStore(output.mDistSq, &val);
+ return val < (maxDistance * maxDistance);
+ }
+ }
+}
+
+} // namespace Blast
+} // namespace Nv