From 7115f60b91b5717d90f643fd692010905c7004db Mon Sep 17 00:00:00 2001 From: Bryan Galdrikian Date: Thu, 31 May 2018 11:36:08 -0700 Subject: Blast 1.1.3. See docs/release_notes.txt. --- .../authoring/source/NvBlastExtApexSharedParts.cpp | 2044 ++++++++++---------- 1 file changed, 1022 insertions(+), 1022 deletions(-) mode change 100644 => 100755 sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp (limited to 'sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp') diff --git a/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp b/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp old mode 100644 new mode 100755 index 14537ed..e69d039 --- a/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp +++ b/sdk/extensions/authoring/source/NvBlastExtApexSharedParts.cpp @@ -1,1022 +1,1022 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2016-2018 NVIDIA Corporation. All rights reserved. - - -#include "NvBlastExtApexSharedParts.h" - -#include "PxMat44.h" -#include "PxBounds3.h" -#include "PxFoundation.h" -#include "PxPhysics.h" -#include "PsVecMath.h" -#include - -using namespace physx; -using namespace physx::shdfnd::aos; - - -namespace Nv -{ -namespace Blast -{ - -PX_NOALIAS PX_FORCE_INLINE BoolV PointOutsideOfPlane4(const Vec3VArg _a, const Vec3VArg _b, const Vec3VArg _c, const Vec3VArg _d) -{ - // this is not 0 because of the following scenario: - // All the points lie on the same plane and the plane goes through the origin (0,0,0). - // On the Wii U, the math below has the problem that when point A gets projected on the - // plane cumputed by A, B, C, the distance to the plane might not be 0 for the mentioned - // scenario but a small positive or negative value. This can lead to the wrong boolean - // results. Using a small negative value as threshold is more conservative but safer. - const Vec4V zero = V4Load(-1e-6); - - const Vec3V ab = V3Sub(_b, _a); - const Vec3V ac = V3Sub(_c, _a); - const Vec3V ad = V3Sub(_d, _a); - const Vec3V bd = V3Sub(_d, _b); - const Vec3V bc = V3Sub(_c, _b); - - const Vec3V v0 = V3Cross(ab, ac); - const Vec3V v1 = V3Cross(ac, ad); - const Vec3V v2 = V3Cross(ad, ab); - const Vec3V v3 = V3Cross(bd, bc); - - const FloatV signa0 = V3Dot(v0, _a); - const FloatV signa1 = V3Dot(v1, _a); - const FloatV signa2 = V3Dot(v2, _a); - const FloatV signd3 = V3Dot(v3, _a); - - const FloatV signd0 = V3Dot(v0, _d); - const FloatV signd1 = V3Dot(v1, _b); - const FloatV signd2 = V3Dot(v2, _c); - const FloatV signa3 = V3Dot(v3, _b); - - const Vec4V signa = V4Merge(signa0, signa1, signa2, signa3); - const Vec4V signd = V4Merge(signd0, signd1, signd2, signd3); - return V4IsGrtrOrEq(V4Mul(signa, signd), zero);//same side, outside of the plane -} - -PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg a, const Vec3VArg b) -{ - const FloatV zero = FZero(); - const FloatV one = FOne(); - - //Test degenerated case - const Vec3V ab = V3Sub(b, a); - const FloatV denom = V3Dot(ab, ab); - const Vec3V ap = V3Neg(a);//V3Sub(origin, a); - const FloatV nom = V3Dot(ap, ab); - const BoolV con = FIsEq(denom, zero); - const FloatV tValue = FClamp(FDiv(nom, denom), zero, one); - const FloatV t = FSel(con, zero, tValue); - - return V3Sel(con, a, V3ScaleAdd(ab, t, a)); -} - -PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1, - const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB) -{ - const Vec3V a = Q0; - const Vec3V b = Q1; - - const BoolV bTrue = BTTTT(); - const FloatV zero = FZero(); - const FloatV one = FOne(); - - //Test degenerated case - const Vec3V ab = V3Sub(b, a); - const FloatV denom = V3Dot(ab, ab); - const Vec3V ap = V3Neg(a);//V3Sub(origin, a); - const FloatV nom = V3Dot(ap, ab); - const BoolV con = FIsEq(denom, zero); - - if (BAllEq(con, bTrue)) - { - size = 1; - closestA = A0; - closestB = B0; - return Q0; - } - - const Vec3V v = V3Sub(A1, A0); - const Vec3V w = V3Sub(B1, B0); - const FloatV tValue = FClamp(FDiv(nom, denom), zero, one); - const FloatV t = FSel(con, zero, tValue); - - const Vec3V tempClosestA = V3ScaleAdd(v, t, A0); - const Vec3V tempClosestB = V3ScaleAdd(w, t, B0); - closestA = tempClosestA; - closestB = tempClosestB; - return V3Sub(tempClosestA, tempClosestB); -} - -PX_NOALIAS Vec3V closestPtPointSegmentTesselation(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1, - const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB) -{ - const FloatV half = FHalf(); - - const FloatV targetSegmentLengthSq = FLoad(10000.f);//100 unit - - Vec3V q0 = Q0; - Vec3V q1 = Q1; - Vec3V a0 = A0; - Vec3V a1 = A1; - Vec3V b0 = B0; - Vec3V b1 = B1; - - for (;;) - { - const Vec3V midPoint = V3Scale(V3Add(q0, q1), half); - const Vec3V midA = V3Scale(V3Add(a0, a1), half); - const Vec3V midB = V3Scale(V3Add(b0, b1), half); - - const Vec3V v = V3Sub(midPoint, q0); - const FloatV sqV = V3Dot(v, v); - if (FAllGrtr(targetSegmentLengthSq, sqV)) - break; - //split the segment into half - const Vec3V tClos0 = closestPtPointSegment(q0, midPoint); - const FloatV sqDist0 = V3Dot(tClos0, tClos0); - - const Vec3V tClos1 = closestPtPointSegment(q1, midPoint); - const FloatV sqDist1 = V3Dot(tClos1, tClos1); - //const BoolV con = FIsGrtr(sqDist0, sqDist1); - if (FAllGrtr(sqDist0, sqDist1)) - { - //segment [m, q1] - q0 = midPoint; - a0 = midA; - b0 = midB; - } - else - { - //segment [q0, m] - q1 = midPoint; - a1 = midA; - b1 = midB; - } - - } - - return closestPtPointSegment(q0, q1, a0, a1, b0, b1, size, closestA, closestB); -} - -PX_NOALIAS Vec3V closestPtPointTriangleTesselation(const Vec3V* PX_RESTRICT Q, const Vec3V* PX_RESTRICT A, const Vec3V* PX_RESTRICT B, const PxU32* PX_RESTRICT indices, PxU32& size, Vec3V& closestA, Vec3V& closestB) -{ - size = 3; - const FloatV zero = FZero(); - const FloatV eps = FEps(); - const FloatV half = FHalf(); - const BoolV bTrue = BTTTT(); - const FloatV four = FLoad(4.f); - const FloatV sixty = FLoad(100.f); - - const PxU32 ind0 = indices[0]; - const PxU32 ind1 = indices[1]; - const PxU32 ind2 = indices[2]; - - const Vec3V a = Q[ind0]; - const Vec3V b = Q[ind1]; - const Vec3V c = Q[ind2]; - - Vec3V ab_ = V3Sub(b, a); - Vec3V ac_ = V3Sub(c, a); - Vec3V bc_ = V3Sub(b, c); - - const FloatV dac_ = V3Dot(ac_, ac_); - const FloatV dbc_ = V3Dot(bc_, bc_); - if (FAllGrtrOrEq(eps, FMin(dac_, dbc_))) - { - //degenerate - size = 2; - return closestPtPointSegment(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB); - } - - Vec3V ap = V3Neg(a); - Vec3V bp = V3Neg(b); - Vec3V cp = V3Neg(c); - - FloatV d1 = V3Dot(ab_, ap); // snom - FloatV d2 = V3Dot(ac_, ap); // tnom - FloatV d3 = V3Dot(ab_, bp); // -sdenom - FloatV d4 = V3Dot(ac_, bp); // unom = d4 - d3 - FloatV d5 = V3Dot(ab_, cp); // udenom = d5 - d6 - FloatV d6 = V3Dot(ac_, cp); // -tdenom - /* FloatV unom = FSub(d4, d3); - FloatV udenom = FSub(d5, d6);*/ - - FloatV va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC - FloatV vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC - FloatV vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB - - //check if p in vertex region outside a - const BoolV con00 = FIsGrtrOrEq(zero, d1); // snom <= 0 - const BoolV con01 = FIsGrtrOrEq(zero, d2); // tnom <= 0 - const BoolV con0 = BAnd(con00, con01); // vertex region a - if (BAllEq(con0, bTrue)) - { - //size = 1; - closestA = A[ind0]; - closestB = B[ind0]; - return Q[ind0]; - } - - //check if p in vertex region outside b - const BoolV con10 = FIsGrtrOrEq(d3, zero); - const BoolV con11 = FIsGrtrOrEq(d3, d4); - const BoolV con1 = BAnd(con10, con11); // vertex region b - if (BAllEq(con1, bTrue)) - { - /*size = 1; - indices[0] = ind1;*/ - closestA = A[ind1]; - closestB = B[ind1]; - return Q[ind1]; - } - - - //check if p in vertex region outside of c - const BoolV con20 = FIsGrtrOrEq(d6, zero); - const BoolV con21 = FIsGrtrOrEq(d6, d5); - const BoolV con2 = BAnd(con20, con21); // vertex region c - if (BAllEq(con2, bTrue)) - { - closestA = A[ind2]; - closestB = B[ind2]; - return Q[ind2]; - } - - //check if p in edge region of AB - const BoolV con30 = FIsGrtrOrEq(zero, vc); - const BoolV con31 = FIsGrtrOrEq(d1, zero); - const BoolV con32 = FIsGrtrOrEq(zero, d3); - const BoolV con3 = BAnd(con30, BAnd(con31, con32)); - - if (BAllEq(con3, bTrue)) - { - //size = 2; - //p in edge region of AB, split AB - return closestPtPointSegmentTesselation(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB); - } - - //check if p in edge region of BC - const BoolV con40 = FIsGrtrOrEq(zero, va); - const BoolV con41 = FIsGrtrOrEq(d4, d3); - const BoolV con42 = FIsGrtrOrEq(d5, d6); - const BoolV con4 = BAnd(con40, BAnd(con41, con42)); - - if (BAllEq(con4, bTrue)) - { - //p in edge region of BC, split BC - return closestPtPointSegmentTesselation(Q[ind1], Q[ind2], A[ind1], A[ind2], B[ind1], B[ind2], size, closestA, closestB); - } - - //check if p in edge region of AC - const BoolV con50 = FIsGrtrOrEq(zero, vb); - const BoolV con51 = FIsGrtrOrEq(d2, zero); - const BoolV con52 = FIsGrtrOrEq(zero, d6); - const BoolV con5 = BAnd(con50, BAnd(con51, con52)); - - if (BAllEq(con5, bTrue)) - { - //p in edge region of AC, split AC - return closestPtPointSegmentTesselation(Q[ind0], Q[ind2], A[ind0], A[ind2], B[ind0], B[ind2], size, closestA, closestB); - } - - size = 3; - - Vec3V q0 = Q[ind0]; - Vec3V q1 = Q[ind1]; - Vec3V q2 = Q[ind2]; - Vec3V a0 = A[ind0]; - Vec3V a1 = A[ind1]; - Vec3V a2 = A[ind2]; - Vec3V b0 = B[ind0]; - Vec3V b1 = B[ind1]; - Vec3V b2 = B[ind2]; - - for (;;) - { - - const Vec3V ab = V3Sub(q1, q0); - const Vec3V ac = V3Sub(q2, q0); - const Vec3V bc = V3Sub(q2, q1); - - const FloatV dab = V3Dot(ab, ab); - const FloatV dac = V3Dot(ac, ac); - const FloatV dbc = V3Dot(bc, bc); - - const FloatV fMax = FMax(dab, FMax(dac, dbc)); - const FloatV fMin = FMin(dab, FMin(dac, dbc)); - - const Vec3V w = V3Cross(ab, ac); - - const FloatV area = V3Length(w); - const FloatV ratio = FDiv(FSqrt(fMax), FSqrt(fMin)); - if (FAllGrtr(four, ratio) && FAllGrtr(sixty, area)) - break; - - //calculate the triangle normal - const Vec3V triNormal = V3Normalize(w); - - PX_ASSERT(V3AllEq(triNormal, V3Zero()) == 0); - - - //split the longest edge - if (FAllGrtrOrEq(dab, dac) && FAllGrtrOrEq(dab, dbc)) - { - //split edge q0q1 - const Vec3V midPoint = V3Scale(V3Add(q0, q1), half); - const Vec3V midA = V3Scale(V3Add(a0, a1), half); - const Vec3V midB = V3Scale(V3Add(b0, b1), half); - - const Vec3V v = V3Sub(midPoint, q2); - const Vec3V n = V3Normalize(V3Cross(v, triNormal)); - - const FloatV d = FNeg(V3Dot(n, midPoint)); - const FloatV dp = FAdd(V3Dot(n, q0), d); - const FloatV sum = FMul(d, dp); - - if (FAllGrtr(sum, zero)) - { - //q0 and origin at the same side, split triangle[q0, m, q2] - q1 = midPoint; - a1 = midA; - b1 = midB; - } - else - { - //q1 and origin at the same side, split triangle[m, q1, q2] - q0 = midPoint; - a0 = midA; - b0 = midB; - } - - } - else if (FAllGrtrOrEq(dac, dbc)) - { - //split edge q0q2 - const Vec3V midPoint = V3Scale(V3Add(q0, q2), half); - const Vec3V midA = V3Scale(V3Add(a0, a2), half); - const Vec3V midB = V3Scale(V3Add(b0, b2), half); - - const Vec3V v = V3Sub(midPoint, q1); - const Vec3V n = V3Normalize(V3Cross(v, triNormal)); - - const FloatV d = FNeg(V3Dot(n, midPoint)); - const FloatV dp = FAdd(V3Dot(n, q0), d); - const FloatV sum = FMul(d, dp); - - if (FAllGrtr(sum, zero)) - { - //q0 and origin at the same side, split triangle[q0, q1, m] - q2 = midPoint; - a2 = midA; - b2 = midB; - } - else - { - //q2 and origin at the same side, split triangle[m, q1, q2] - q0 = midPoint; - a0 = midA; - b0 = midB; - } - } - else - { - //split edge q1q2 - const Vec3V midPoint = V3Scale(V3Add(q1, q2), half); - const Vec3V midA = V3Scale(V3Add(a1, a2), half); - const Vec3V midB = V3Scale(V3Add(b1, b2), half); - - const Vec3V v = V3Sub(midPoint, q0); - const Vec3V n = V3Normalize(V3Cross(v, triNormal)); - - const FloatV d = FNeg(V3Dot(n, midPoint)); - const FloatV dp = FAdd(V3Dot(n, q1), d); - const FloatV sum = FMul(d, dp); - - if (FAllGrtr(sum, zero)) - { - //q1 and origin at the same side, split triangle[q0, q1, m] - q2 = midPoint; - a2 = midA; - b2 = midB; - } - else - { - //q2 and origin at the same side, split triangle[q0, m, q2] - q1 = midPoint; - a1 = midA; - b1 = midB; - } - - - } - } - - //P must project inside face region. Compute Q using Barycentric coordinates - ab_ = V3Sub(q1, q0); - ac_ = V3Sub(q2, q0); - ap = V3Neg(q0); - bp = V3Neg(q1); - cp = V3Neg(q2); - - d1 = V3Dot(ab_, ap); // snom - d2 = V3Dot(ac_, ap); // tnom - d3 = V3Dot(ab_, bp); // -sdenom - d4 = V3Dot(ac_, bp); // unom = d4 - d3 - d5 = V3Dot(ab_, cp); // udenom = d5 - d6 - d6 = V3Dot(ac_, cp); // -tdenom - - va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC - vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC - vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB - - const FloatV toRecipD = FAdd(va, FAdd(vb, vc)); - const FloatV denom = FRecip(toRecipD);//V4GetW(recipTmp); - const Vec3V v0 = V3Sub(a1, a0); - const Vec3V v1 = V3Sub(a2, a0); - const Vec3V w0 = V3Sub(b1, b0); - const Vec3V w1 = V3Sub(b2, b0); - - const FloatV t = FMul(vb, denom); - const FloatV w = FMul(vc, denom); - const Vec3V vA1 = V3Scale(v1, w); - const Vec3V vB1 = V3Scale(w1, w); - const Vec3V tempClosestA = V3Add(a0, V3ScaleAdd(v0, t, vA1)); - const Vec3V tempClosestB = V3Add(b0, V3ScaleAdd(w0, t, vB1)); - closestA = tempClosestA; - closestB = tempClosestB; - return V3Sub(tempClosestA, tempClosestB); -} - -PX_NOALIAS Vec3V closestPtPointTetrahedronTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, PxU32& size, Vec3V& closestA, Vec3V& closestB) -{ - const FloatV eps = FEps(); - const Vec3V zeroV = V3Zero(); - PxU32 tempSize = size; - - FloatV bestSqDist = FLoad(PX_MAX_REAL); - const Vec3V a = Q[0]; - const Vec3V b = Q[1]; - const Vec3V c = Q[2]; - const Vec3V d = Q[3]; - const BoolV bTrue = BTTTT(); - const BoolV bFalse = BFFFF(); - - //degenerated - const Vec3V ad = V3Sub(d, a); - const Vec3V bd = V3Sub(d, b); - const Vec3V cd = V3Sub(d, c); - const FloatV dad = V3Dot(ad, ad); - const FloatV dbd = V3Dot(bd, bd); - const FloatV dcd = V3Dot(cd, cd); - const FloatV fMin = FMin(dad, FMin(dbd, dcd)); - if (FAllGrtr(eps, fMin)) - { - size = 3; - PxU32 tempIndices[] = { 0, 1, 2 }; - return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB); - } - - Vec3V _Q[] = { Q[0], Q[1], Q[2], Q[3] }; - Vec3V _A[] = { A[0], A[1], A[2], A[3] }; - Vec3V _B[] = { B[0], B[1], B[2], B[3] }; - - PxU32 indices[3] = { 0, 1, 2 }; - - const BoolV bIsOutside4 = PointOutsideOfPlane4(a, b, c, d); - - if (BAllEq(bIsOutside4, bFalse)) - { - //origin is inside the tetrahedron, we are done - return zeroV; - } - - Vec3V result = zeroV; - Vec3V tempClosestA, tempClosestB; - - if (BAllEq(BGetX(bIsOutside4), bTrue)) - { - - PxU32 tempIndices[] = { 0, 1, 2 }; - PxU32 _size = 3; - - result = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); - - const FloatV sqDist = V3Dot(result, result); - bestSqDist = sqDist; - - indices[0] = tempIndices[0]; - indices[1] = tempIndices[1]; - indices[2] = tempIndices[2]; - - tempSize = _size; - closestA = tempClosestA; - closestB = tempClosestB; - } - - if (BAllEq(BGetY(bIsOutside4), bTrue)) - { - - PxU32 tempIndices[] = { 0, 2, 3 }; - - PxU32 _size = 3; - - const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); - - const FloatV sqDist = V3Dot(q, q); - const BoolV con = FIsGrtr(bestSqDist, sqDist); - if (BAllEq(con, bTrue)) - { - result = q; - bestSqDist = sqDist; - indices[0] = tempIndices[0]; - indices[1] = tempIndices[1]; - indices[2] = tempIndices[2]; - - tempSize = _size; - closestA = tempClosestA; - closestB = tempClosestB; - } - } - - if (BAllEq(BGetZ(bIsOutside4), bTrue)) - { - - PxU32 tempIndices[] = { 0, 3, 1 }; - PxU32 _size = 3; - - const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); - - const FloatV sqDist = V3Dot(q, q); - const BoolV con = FIsGrtr(bestSqDist, sqDist); - if (BAllEq(con, bTrue)) - { - result = q; - bestSqDist = sqDist; - indices[0] = tempIndices[0]; - indices[1] = tempIndices[1]; - indices[2] = tempIndices[2]; - tempSize = _size; - closestA = tempClosestA; - closestB = tempClosestB; - } - - } - - if (BAllEq(BGetW(bIsOutside4), bTrue)) - { - - PxU32 tempIndices[] = { 1, 3, 2 }; - PxU32 _size = 3; - - const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); - - const FloatV sqDist = V3Dot(q, q); - const BoolV con = FIsGrtr(bestSqDist, sqDist); - - if (BAllEq(con, bTrue)) - { - result = q; - bestSqDist = sqDist; - - indices[0] = tempIndices[0]; - indices[1] = tempIndices[1]; - indices[2] = tempIndices[2]; - - tempSize = _size; - closestA = tempClosestA; - closestB = tempClosestB; - } - } - - A[0] = _A[indices[0]]; A[1] = _A[indices[1]]; A[2] = _A[indices[2]]; - B[0] = _B[indices[0]]; B[1] = _B[indices[1]]; B[2] = _B[indices[2]]; - Q[0] = _Q[indices[0]]; Q[1] = _Q[indices[1]]; Q[2] = _Q[indices[2]]; - - - size = tempSize; - return result; -} - -PX_NOALIAS PX_FORCE_INLINE Vec3V doTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, - const Vec3VArg support, const Vec3VArg supportA, const Vec3VArg supportB, PxU32& size, Vec3V& closestA, Vec3V& closestB) -{ - switch (size) - { - case 1: - { - closestA = supportA; - closestB = supportB; - return support; - } - case 2: - { - return closestPtPointSegmentTesselation(Q[0], support, A[0], supportA, B[0], supportB, size, closestA, closestB); - } - case 3: - { - - PxU32 tempIndices[3] = { 0, 1, 2 }; - return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB); - } - case 4: - { - return closestPtPointTetrahedronTesselation(Q, A, B, size, closestA, closestB); - } - default: - PX_ASSERT(0); - } - return support; -} - - - - -enum Status -{ - STATUS_NON_INTERSECT, - STATUS_CONTACT, - STATUS_DEGENERATE, -}; - -struct Output -{ - /// Get the normal to push apart in direction from A to B - PX_FORCE_INLINE Vec3V getNormal() const { return V3Normalize(V3Sub(mClosestB, mClosestA)); } - Vec3V mClosestA; ///< Closest point on A - Vec3V mClosestB; ///< Closest point on B - FloatV mDistSq; -}; - -struct ConvexV -{ - void calcExtent(const Vec3V& dir, PxF32& minOut, PxF32& maxOut) const - { - // Expand - const Vec4V x = Vec4V_From_FloatV(V3GetX(dir)); - const Vec4V y = Vec4V_From_FloatV(V3GetY(dir)); - const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir)); - - const Vec4V* src = mAovVertices; - const Vec4V* end = src + mNumAovVertices * 3; - - // Do first step - Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); - Vec4V min = max; - src += 3; - // Do the rest - for (; src < end; src += 3) - { - const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); - max = V4Max(dot, max); - min = V4Min(dot, min); - } - FStore(V4ExtractMax(max), &maxOut); - FStore(V4ExtractMin(min), &minOut); - } - Vec3V calcSupport(const Vec3V& dir) const - { - // Expand - const Vec4V x = Vec4V_From_FloatV(V3GetX(dir)); - const Vec4V y = Vec4V_From_FloatV(V3GetY(dir)); - const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir)); - - PX_ALIGN(16, static const PxF32 index4const[]) = { 0.0f, 1.0f, 2.0f, 3.0f }; - Vec4V index4 = *(const Vec4V*)index4const; - PX_ALIGN(16, static const PxF32 delta4const[]) = { 4.0f, 4.0f, 4.0f, 4.0f }; - const Vec4V delta4 = *(const Vec4V*)delta4const; - - const Vec4V* src = mAovVertices; - const Vec4V* end = src + mNumAovVertices * 3; - - // Do first step - Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); - Vec4V maxIndex = index4; - index4 = V4Add(index4, delta4); - src += 3; - // Do the rest - for (; src < end; src += 3) - { - const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); - const BoolV cmp = V4IsGrtr(dot, max); - max = V4Max(dot, max); - maxIndex = V4Sel(cmp, index4, maxIndex); - index4 = V4Add(index4, delta4); - } - Vec4V horiMax = Vec4V_From_FloatV(V4ExtractMax(max)); - PxU32 mask = BGetBitMask(V4IsEq(horiMax, max)); - const PxU32 simdIndex = (0x12131210 >> (mask + mask)) & PxU32(3); - - /// NOTE! Could be load hit store - /// Would be better to have all simd. - PX_ALIGN(16, PxF32 f[4]); - V4StoreA(maxIndex, f); - PxU32 index = PxU32(PxI32(f[simdIndex])); - - const Vec4V* aovIndex = (mAovVertices + (index >> 2) * 3); - const PxF32* aovOffset = ((const PxF32*)aovIndex) + (index & 3); - - return Vec3V_From_Vec4V(V4LoadXYZW(aovOffset[0], aovOffset[4], aovOffset[8], 1.0f)); - } - - const Vec4V* mAovVertices; ///< Vertices storex x,x,x,x, y,y,y,y, z,z,z,z - PxU32 mNumAovVertices; ///< Number of groups of 4 of vertices -}; - -Status Collide(const Vec3V& initialDir, const ConvexV& convexA, const Mat34V& bToA, const ConvexV& convexB, Output& out) -{ - Vec3V Q[4]; - Vec3V A[4]; - Vec3V B[4]; - - Mat33V aToB = M34Trnsps33(bToA); - - PxU32 size = 0; - - const Vec3V zeroV = V3Zero(); - const BoolV bTrue = BTTTT(); - - //Vec3V v = V3UnitX(); - Vec3V v = V3Sel(FIsGrtr(V3Dot(initialDir, initialDir), FZero()), initialDir, V3UnitX()); - - //const FloatV minMargin = zero; - //const FloatV eps2 = FMul(minMargin, FLoad(0.01f)); - //FloatV eps2 = zero; - FloatV eps2 = FLoad(1e-6f); - const FloatV epsRel = FLoad(0.000225f); - - Vec3V closA(zeroV), closB(zeroV); - FloatV sDist = FMax(); - FloatV minDist = sDist; - Vec3V closAA = zeroV; - Vec3V closBB = zeroV; - - BoolV bNotTerminated = bTrue; - BoolV bCon = bTrue; - - do - { - minDist = sDist; - closAA = closA; - closBB = closB; - - PxU32 index = size++; - PX_ASSERT(index < 4); - - const Vec3V supportA = convexA.calcSupport(V3Neg(v)); - const Vec3V supportB = M34MulV3(bToA, convexB.calcSupport(M33MulV3(aToB, v))); - const Vec3V support = Vec3V_From_Vec4V(Vec4V_From_Vec3V(V3Sub(supportA, supportB))); - - A[index] = supportA; - B[index] = supportB; - Q[index] = support; - - const FloatV signDist = V3Dot(v, support); - const FloatV tmp0 = FSub(sDist, signDist); - if (FAllGrtr(FMul(epsRel, sDist), tmp0)) - { - out.mClosestA = closA; - out.mClosestB = closB; - out.mDistSq = sDist; - return STATUS_NON_INTERSECT; - } - - //calculate the closest point between two convex hull - v = doTesselation(Q, A, B, support, supportA, supportB, size, closA, closB); - sDist = V3Dot(v, v); - bCon = FIsGrtr(minDist, sDist); - - bNotTerminated = BAnd(FIsGrtr(sDist, eps2), bCon); - } while (BAllEq(bNotTerminated, bTrue)); - - out.mClosestA = V3Sel(bCon, closA, closAA); - out.mClosestB = V3Sel(bCon, closB, closBB); - out.mDistSq = FSel(bCon, sDist, minDist); - return Status(BAllEq(bCon, bTrue) == 1 ? STATUS_CONTACT : STATUS_DEGENERATE); -} - -static void _calcSeparation(const ConvexV& convexA, const physx::PxTransform& aToWorldIn, const Mat34V& bToA, ConvexV& convexB, Output& out, Separation& sep) -{ - - Mat33V aToB = M34Trnsps33(bToA); - Vec3V normalA = out.getNormal(); - - convexA.calcExtent(normalA, sep.min0, sep.max0); - Vec3V normalB = M33MulV3(aToB, normalA); - convexB.calcExtent(normalB, sep.min1, sep.max1); - - { - // Offset the min max taking into account transform - // Distance of origin from B's space in As space in direction of the normal in As space should fix it... - PxF32 fix; - FStore(V3Dot(bToA.col3, normalA), &fix); - sep.min1 += fix; - sep.max1 += fix; - } - - // Looks like it's the plane at the midpoint - Vec3V center = V3Scale(V3Add(out.mClosestA, out.mClosestB), FLoad(0.5f)); - // Transform to world space - Mat34V aToWorld; - *(PxMat44*)&aToWorld = aToWorldIn; - // Put the normal in world space - Vec3V worldCenter = M34MulV3(aToWorld, center); - Vec3V worldNormal = M34Mul33V3(aToWorld, normalA); - - FloatV dist = V3Dot(worldNormal, worldCenter); - V3StoreU(worldNormal, sep.plane.n); - FStore(dist, &sep.plane.d); - sep.plane.d = -sep.plane.d; -} - -static void _arrayVec3ToVec4(const PxVec3* src, Vec4V* dst, PxU32 num) -{ - const PxU32 num4 = num >> 2; - for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4) - { - Vec3V v0 = V3LoadU(&src[0].x); - Vec3V v1 = V3LoadU(&src[1].x); - Vec3V v2 = V3LoadU(&src[2].x); - Vec3V v3 = V3LoadU(&src[3].x); - // Transpose - V4Transpose(v0, v1, v2, v3); - // Save - dst[0] = v0; - dst[1] = v1; - dst[2] = v2; - } - const PxU32 remain = num & 3; - if (remain) - { - Vec3V work[4]; - PxU32 i = 0; - for (; i < remain; i++) work[i] = V3LoadU(&src[i].x); - for (; i < 4; i++) work[i] = work[remain - 1]; - V4Transpose(work[0], work[1], work[2], work[3]); - dst[0] = work[0]; - dst[1] = work[1]; - dst[2] = work[2]; - } -} - - -static void _arrayVec3ToVec4(const PxVec3* src, const Vec3V& scale, Vec4V* dst, PxU32 num) -{ - // If no scale - use the faster version - if (V3AllEq(scale, V3One())) - { - return _arrayVec3ToVec4(src, dst, num); - } - - const PxU32 num4 = num >> 2; - for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4) - { - Vec3V v0 = V3Mul(scale, V3LoadU(&src[0].x)); - Vec3V v1 = V3Mul(scale, V3LoadU(&src[1].x)); - Vec3V v2 = V3Mul(scale, V3LoadU(&src[2].x)); - Vec3V v3 = V3Mul(scale, V3LoadU(&src[3].x)); - // Transpose - V4Transpose(v0, v1, v2, v3); - // Save - dst[0] = v0; - dst[1] = v1; - dst[2] = v2; - } - const PxU32 remain = num & 3; - if (remain) - { - Vec3V work[4]; - PxU32 i = 0; - for (; i < remain; i++) work[i] = V3Mul(scale, V3LoadU(&src[i].x)); - for (; i < 4; i++) work[i] = work[remain - 1]; - V4Transpose(work[0], work[1], work[2], work[3]); - dst[0] = work[0]; - dst[1] = work[1]; - dst[2] = work[2]; - } -} - - -bool importerHullsInProximityApexFree(uint32_t hull0Count, const PxVec3* hull0, PxBounds3& hull0Bounds, const physx::PxTransform& localToWorldRT0In, const physx::PxVec3& scale0In, - uint32_t hull1Count, const PxVec3* hull1, PxBounds3& hull1Bounds, const physx::PxTransform& localToWorldRT1In, const physx::PxVec3& scale1In, - physx::PxF32 maxDistance, Separation* separation) -{ - - - const PxU32 numVerts0 = static_cast(hull0Count); - const PxU32 numVerts1 = static_cast(hull1Count); - const PxU32 numAov0 = (numVerts0 + 3) >> 2; - const PxU32 numAov1 = (numVerts1 + 3) >> 2; - Vec4V* verts0 = (Vec4V*)alloca((numAov0 + numAov1) * sizeof(Vec4V) * 3); - - // Make sure it's aligned - PX_ASSERT((size_t(verts0) & 0xf) == 0); - - Vec4V* verts1 = verts0 + (numAov0 * 3); - - const Vec3V scale0 = V3LoadU(&scale0In.x); - const Vec3V scale1 = V3LoadU(&scale1In.x); - std::vector vert0(numVerts0); - for (uint32_t i = 0; i < numVerts0; ++i) - { - vert0[i] = hull0[i]; - } - std::vector vert1(numVerts1); - for (uint32_t i = 0; i < numVerts1; ++i) - { - vert1[i] = hull1[i]; - } - - _arrayVec3ToVec4(vert0.data(), scale0, verts0, numVerts0); - _arrayVec3ToVec4(vert1.data(), scale1, verts1, numVerts1); - - const PxTransform trans1To0 = localToWorldRT0In.transformInv(localToWorldRT1In); - - // Load into simd mat - Mat34V bToA; - *(PxMat44*)&bToA = trans1To0; - (*(PxMat44*)&bToA).column3.w = 0.0f; // AOS wants the 4th component of Vec3V to be 0 to work properly - - ConvexV convexA; - ConvexV convexB; - - convexA.mNumAovVertices = numAov0; - convexA.mAovVertices = verts0; - - convexB.mNumAovVertices = numAov1; - convexB.mAovVertices = verts1; - - // Take the origin of B in As space as the inital direction as it is 'the difference in transform origins B-A in A's space' - // Should be a good first guess - const Vec3V initialDir = bToA.col3; - Output output; - Status status = Collide(initialDir, convexA, bToA, convexB, output); - - if (status == STATUS_DEGENERATE) - { - // Calculate the tolerance from the extents - const PxVec3 extents0 = hull0Bounds.getExtents(); - const PxVec3 extents1 = hull1Bounds.getExtents(); - - const FloatV tolerance0 = V3ExtractMin(V3Mul(V3LoadU(&extents0.x), scale0)); - const FloatV tolerance1 = V3ExtractMin(V3Mul(V3LoadU(&extents1.x), scale1)); - - const FloatV tolerance = FMul(FAdd(tolerance0, tolerance1), FLoad(0.01f)); - const FloatV sqTolerance = FMul(tolerance, tolerance); - - status = FAllGrtr(sqTolerance, output.mDistSq) ? STATUS_CONTACT : STATUS_NON_INTERSECT; - } - - switch (status) - { - case STATUS_CONTACT: - { - if (separation) - { - _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation); - } - return true; - } - default: - case STATUS_NON_INTERSECT: - { - if (separation) - { - _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation); - } - PxF32 val; - FStore(output.mDistSq, &val); - return val < (maxDistance * maxDistance); - } - } -} - -} // namespace Blast -} // namespace Nv +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2016-2018 NVIDIA Corporation. All rights reserved. + + +#include "NvBlastExtApexSharedParts.h" + +#include "PxMat44.h" +#include "PxBounds3.h" +#include "PxFoundation.h" +#include "PxPhysics.h" +#include "PsVecMath.h" +#include + +using namespace physx; +using namespace physx::shdfnd::aos; + + +namespace Nv +{ +namespace Blast +{ + +PX_NOALIAS PX_FORCE_INLINE BoolV PointOutsideOfPlane4(const Vec3VArg _a, const Vec3VArg _b, const Vec3VArg _c, const Vec3VArg _d) +{ + // this is not 0 because of the following scenario: + // All the points lie on the same plane and the plane goes through the origin (0,0,0). + // On the Wii U, the math below has the problem that when point A gets projected on the + // plane cumputed by A, B, C, the distance to the plane might not be 0 for the mentioned + // scenario but a small positive or negative value. This can lead to the wrong boolean + // results. Using a small negative value as threshold is more conservative but safer. + const Vec4V zero = V4Load(-1e-6); + + const Vec3V ab = V3Sub(_b, _a); + const Vec3V ac = V3Sub(_c, _a); + const Vec3V ad = V3Sub(_d, _a); + const Vec3V bd = V3Sub(_d, _b); + const Vec3V bc = V3Sub(_c, _b); + + const Vec3V v0 = V3Cross(ab, ac); + const Vec3V v1 = V3Cross(ac, ad); + const Vec3V v2 = V3Cross(ad, ab); + const Vec3V v3 = V3Cross(bd, bc); + + const FloatV signa0 = V3Dot(v0, _a); + const FloatV signa1 = V3Dot(v1, _a); + const FloatV signa2 = V3Dot(v2, _a); + const FloatV signd3 = V3Dot(v3, _a); + + const FloatV signd0 = V3Dot(v0, _d); + const FloatV signd1 = V3Dot(v1, _b); + const FloatV signd2 = V3Dot(v2, _c); + const FloatV signa3 = V3Dot(v3, _b); + + const Vec4V signa = V4Merge(signa0, signa1, signa2, signa3); + const Vec4V signd = V4Merge(signd0, signd1, signd2, signd3); + return V4IsGrtrOrEq(V4Mul(signa, signd), zero);//same side, outside of the plane +} + +PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg a, const Vec3VArg b) +{ + const FloatV zero = FZero(); + const FloatV one = FOne(); + + //Test degenerated case + const Vec3V ab = V3Sub(b, a); + const FloatV denom = V3Dot(ab, ab); + const Vec3V ap = V3Neg(a);//V3Sub(origin, a); + const FloatV nom = V3Dot(ap, ab); + const BoolV con = FIsEq(denom, zero); + const FloatV tValue = FClamp(FDiv(nom, denom), zero, one); + const FloatV t = FSel(con, zero, tValue); + + return V3Sel(con, a, V3ScaleAdd(ab, t, a)); +} + +PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1, + const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB) +{ + const Vec3V a = Q0; + const Vec3V b = Q1; + + const BoolV bTrue = BTTTT(); + const FloatV zero = FZero(); + const FloatV one = FOne(); + + //Test degenerated case + const Vec3V ab = V3Sub(b, a); + const FloatV denom = V3Dot(ab, ab); + const Vec3V ap = V3Neg(a);//V3Sub(origin, a); + const FloatV nom = V3Dot(ap, ab); + const BoolV con = FIsEq(denom, zero); + + if (BAllEq(con, bTrue)) + { + size = 1; + closestA = A0; + closestB = B0; + return Q0; + } + + const Vec3V v = V3Sub(A1, A0); + const Vec3V w = V3Sub(B1, B0); + const FloatV tValue = FClamp(FDiv(nom, denom), zero, one); + const FloatV t = FSel(con, zero, tValue); + + const Vec3V tempClosestA = V3ScaleAdd(v, t, A0); + const Vec3V tempClosestB = V3ScaleAdd(w, t, B0); + closestA = tempClosestA; + closestB = tempClosestB; + return V3Sub(tempClosestA, tempClosestB); +} + +PX_NOALIAS Vec3V closestPtPointSegmentTesselation(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1, + const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB) +{ + const FloatV half = FHalf(); + + const FloatV targetSegmentLengthSq = FLoad(10000.f);//100 unit + + Vec3V q0 = Q0; + Vec3V q1 = Q1; + Vec3V a0 = A0; + Vec3V a1 = A1; + Vec3V b0 = B0; + Vec3V b1 = B1; + + for (;;) + { + const Vec3V midPoint = V3Scale(V3Add(q0, q1), half); + const Vec3V midA = V3Scale(V3Add(a0, a1), half); + const Vec3V midB = V3Scale(V3Add(b0, b1), half); + + const Vec3V v = V3Sub(midPoint, q0); + const FloatV sqV = V3Dot(v, v); + if (FAllGrtr(targetSegmentLengthSq, sqV)) + break; + //split the segment into half + const Vec3V tClos0 = closestPtPointSegment(q0, midPoint); + const FloatV sqDist0 = V3Dot(tClos0, tClos0); + + const Vec3V tClos1 = closestPtPointSegment(q1, midPoint); + const FloatV sqDist1 = V3Dot(tClos1, tClos1); + //const BoolV con = FIsGrtr(sqDist0, sqDist1); + if (FAllGrtr(sqDist0, sqDist1)) + { + //segment [m, q1] + q0 = midPoint; + a0 = midA; + b0 = midB; + } + else + { + //segment [q0, m] + q1 = midPoint; + a1 = midA; + b1 = midB; + } + + } + + return closestPtPointSegment(q0, q1, a0, a1, b0, b1, size, closestA, closestB); +} + +PX_NOALIAS Vec3V closestPtPointTriangleTesselation(const Vec3V* PX_RESTRICT Q, const Vec3V* PX_RESTRICT A, const Vec3V* PX_RESTRICT B, const PxU32* PX_RESTRICT indices, PxU32& size, Vec3V& closestA, Vec3V& closestB) +{ + size = 3; + const FloatV zero = FZero(); + const FloatV eps = FEps(); + const FloatV half = FHalf(); + const BoolV bTrue = BTTTT(); + const FloatV four = FLoad(4.f); + const FloatV sixty = FLoad(100.f); + + const PxU32 ind0 = indices[0]; + const PxU32 ind1 = indices[1]; + const PxU32 ind2 = indices[2]; + + const Vec3V a = Q[ind0]; + const Vec3V b = Q[ind1]; + const Vec3V c = Q[ind2]; + + Vec3V ab_ = V3Sub(b, a); + Vec3V ac_ = V3Sub(c, a); + Vec3V bc_ = V3Sub(b, c); + + const FloatV dac_ = V3Dot(ac_, ac_); + const FloatV dbc_ = V3Dot(bc_, bc_); + if (FAllGrtrOrEq(eps, FMin(dac_, dbc_))) + { + //degenerate + size = 2; + return closestPtPointSegment(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB); + } + + Vec3V ap = V3Neg(a); + Vec3V bp = V3Neg(b); + Vec3V cp = V3Neg(c); + + FloatV d1 = V3Dot(ab_, ap); // snom + FloatV d2 = V3Dot(ac_, ap); // tnom + FloatV d3 = V3Dot(ab_, bp); // -sdenom + FloatV d4 = V3Dot(ac_, bp); // unom = d4 - d3 + FloatV d5 = V3Dot(ab_, cp); // udenom = d5 - d6 + FloatV d6 = V3Dot(ac_, cp); // -tdenom + /* FloatV unom = FSub(d4, d3); + FloatV udenom = FSub(d5, d6);*/ + + FloatV va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC + FloatV vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC + FloatV vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB + + //check if p in vertex region outside a + const BoolV con00 = FIsGrtrOrEq(zero, d1); // snom <= 0 + const BoolV con01 = FIsGrtrOrEq(zero, d2); // tnom <= 0 + const BoolV con0 = BAnd(con00, con01); // vertex region a + if (BAllEq(con0, bTrue)) + { + //size = 1; + closestA = A[ind0]; + closestB = B[ind0]; + return Q[ind0]; + } + + //check if p in vertex region outside b + const BoolV con10 = FIsGrtrOrEq(d3, zero); + const BoolV con11 = FIsGrtrOrEq(d3, d4); + const BoolV con1 = BAnd(con10, con11); // vertex region b + if (BAllEq(con1, bTrue)) + { + /*size = 1; + indices[0] = ind1;*/ + closestA = A[ind1]; + closestB = B[ind1]; + return Q[ind1]; + } + + + //check if p in vertex region outside of c + const BoolV con20 = FIsGrtrOrEq(d6, zero); + const BoolV con21 = FIsGrtrOrEq(d6, d5); + const BoolV con2 = BAnd(con20, con21); // vertex region c + if (BAllEq(con2, bTrue)) + { + closestA = A[ind2]; + closestB = B[ind2]; + return Q[ind2]; + } + + //check if p in edge region of AB + const BoolV con30 = FIsGrtrOrEq(zero, vc); + const BoolV con31 = FIsGrtrOrEq(d1, zero); + const BoolV con32 = FIsGrtrOrEq(zero, d3); + const BoolV con3 = BAnd(con30, BAnd(con31, con32)); + + if (BAllEq(con3, bTrue)) + { + //size = 2; + //p in edge region of AB, split AB + return closestPtPointSegmentTesselation(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB); + } + + //check if p in edge region of BC + const BoolV con40 = FIsGrtrOrEq(zero, va); + const BoolV con41 = FIsGrtrOrEq(d4, d3); + const BoolV con42 = FIsGrtrOrEq(d5, d6); + const BoolV con4 = BAnd(con40, BAnd(con41, con42)); + + if (BAllEq(con4, bTrue)) + { + //p in edge region of BC, split BC + return closestPtPointSegmentTesselation(Q[ind1], Q[ind2], A[ind1], A[ind2], B[ind1], B[ind2], size, closestA, closestB); + } + + //check if p in edge region of AC + const BoolV con50 = FIsGrtrOrEq(zero, vb); + const BoolV con51 = FIsGrtrOrEq(d2, zero); + const BoolV con52 = FIsGrtrOrEq(zero, d6); + const BoolV con5 = BAnd(con50, BAnd(con51, con52)); + + if (BAllEq(con5, bTrue)) + { + //p in edge region of AC, split AC + return closestPtPointSegmentTesselation(Q[ind0], Q[ind2], A[ind0], A[ind2], B[ind0], B[ind2], size, closestA, closestB); + } + + size = 3; + + Vec3V q0 = Q[ind0]; + Vec3V q1 = Q[ind1]; + Vec3V q2 = Q[ind2]; + Vec3V a0 = A[ind0]; + Vec3V a1 = A[ind1]; + Vec3V a2 = A[ind2]; + Vec3V b0 = B[ind0]; + Vec3V b1 = B[ind1]; + Vec3V b2 = B[ind2]; + + for (;;) + { + + const Vec3V ab = V3Sub(q1, q0); + const Vec3V ac = V3Sub(q2, q0); + const Vec3V bc = V3Sub(q2, q1); + + const FloatV dab = V3Dot(ab, ab); + const FloatV dac = V3Dot(ac, ac); + const FloatV dbc = V3Dot(bc, bc); + + const FloatV fMax = FMax(dab, FMax(dac, dbc)); + const FloatV fMin = FMin(dab, FMin(dac, dbc)); + + const Vec3V w = V3Cross(ab, ac); + + const FloatV area = V3Length(w); + const FloatV ratio = FDiv(FSqrt(fMax), FSqrt(fMin)); + if (FAllGrtr(four, ratio) && FAllGrtr(sixty, area)) + break; + + //calculate the triangle normal + const Vec3V triNormal = V3Normalize(w); + + PX_ASSERT(V3AllEq(triNormal, V3Zero()) == 0); + + + //split the longest edge + if (FAllGrtrOrEq(dab, dac) && FAllGrtrOrEq(dab, dbc)) + { + //split edge q0q1 + const Vec3V midPoint = V3Scale(V3Add(q0, q1), half); + const Vec3V midA = V3Scale(V3Add(a0, a1), half); + const Vec3V midB = V3Scale(V3Add(b0, b1), half); + + const Vec3V v = V3Sub(midPoint, q2); + const Vec3V n = V3Normalize(V3Cross(v, triNormal)); + + const FloatV d = FNeg(V3Dot(n, midPoint)); + const FloatV dp = FAdd(V3Dot(n, q0), d); + const FloatV sum = FMul(d, dp); + + if (FAllGrtr(sum, zero)) + { + //q0 and origin at the same side, split triangle[q0, m, q2] + q1 = midPoint; + a1 = midA; + b1 = midB; + } + else + { + //q1 and origin at the same side, split triangle[m, q1, q2] + q0 = midPoint; + a0 = midA; + b0 = midB; + } + + } + else if (FAllGrtrOrEq(dac, dbc)) + { + //split edge q0q2 + const Vec3V midPoint = V3Scale(V3Add(q0, q2), half); + const Vec3V midA = V3Scale(V3Add(a0, a2), half); + const Vec3V midB = V3Scale(V3Add(b0, b2), half); + + const Vec3V v = V3Sub(midPoint, q1); + const Vec3V n = V3Normalize(V3Cross(v, triNormal)); + + const FloatV d = FNeg(V3Dot(n, midPoint)); + const FloatV dp = FAdd(V3Dot(n, q0), d); + const FloatV sum = FMul(d, dp); + + if (FAllGrtr(sum, zero)) + { + //q0 and origin at the same side, split triangle[q0, q1, m] + q2 = midPoint; + a2 = midA; + b2 = midB; + } + else + { + //q2 and origin at the same side, split triangle[m, q1, q2] + q0 = midPoint; + a0 = midA; + b0 = midB; + } + } + else + { + //split edge q1q2 + const Vec3V midPoint = V3Scale(V3Add(q1, q2), half); + const Vec3V midA = V3Scale(V3Add(a1, a2), half); + const Vec3V midB = V3Scale(V3Add(b1, b2), half); + + const Vec3V v = V3Sub(midPoint, q0); + const Vec3V n = V3Normalize(V3Cross(v, triNormal)); + + const FloatV d = FNeg(V3Dot(n, midPoint)); + const FloatV dp = FAdd(V3Dot(n, q1), d); + const FloatV sum = FMul(d, dp); + + if (FAllGrtr(sum, zero)) + { + //q1 and origin at the same side, split triangle[q0, q1, m] + q2 = midPoint; + a2 = midA; + b2 = midB; + } + else + { + //q2 and origin at the same side, split triangle[q0, m, q2] + q1 = midPoint; + a1 = midA; + b1 = midB; + } + + + } + } + + //P must project inside face region. Compute Q using Barycentric coordinates + ab_ = V3Sub(q1, q0); + ac_ = V3Sub(q2, q0); + ap = V3Neg(q0); + bp = V3Neg(q1); + cp = V3Neg(q2); + + d1 = V3Dot(ab_, ap); // snom + d2 = V3Dot(ac_, ap); // tnom + d3 = V3Dot(ab_, bp); // -sdenom + d4 = V3Dot(ac_, bp); // unom = d4 - d3 + d5 = V3Dot(ab_, cp); // udenom = d5 - d6 + d6 = V3Dot(ac_, cp); // -tdenom + + va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC + vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC + vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB + + const FloatV toRecipD = FAdd(va, FAdd(vb, vc)); + const FloatV denom = FRecip(toRecipD);//V4GetW(recipTmp); + const Vec3V v0 = V3Sub(a1, a0); + const Vec3V v1 = V3Sub(a2, a0); + const Vec3V w0 = V3Sub(b1, b0); + const Vec3V w1 = V3Sub(b2, b0); + + const FloatV t = FMul(vb, denom); + const FloatV w = FMul(vc, denom); + const Vec3V vA1 = V3Scale(v1, w); + const Vec3V vB1 = V3Scale(w1, w); + const Vec3V tempClosestA = V3Add(a0, V3ScaleAdd(v0, t, vA1)); + const Vec3V tempClosestB = V3Add(b0, V3ScaleAdd(w0, t, vB1)); + closestA = tempClosestA; + closestB = tempClosestB; + return V3Sub(tempClosestA, tempClosestB); +} + +PX_NOALIAS Vec3V closestPtPointTetrahedronTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, PxU32& size, Vec3V& closestA, Vec3V& closestB) +{ + const FloatV eps = FEps(); + const Vec3V zeroV = V3Zero(); + PxU32 tempSize = size; + + FloatV bestSqDist = FLoad(PX_MAX_REAL); + const Vec3V a = Q[0]; + const Vec3V b = Q[1]; + const Vec3V c = Q[2]; + const Vec3V d = Q[3]; + const BoolV bTrue = BTTTT(); + const BoolV bFalse = BFFFF(); + + //degenerated + const Vec3V ad = V3Sub(d, a); + const Vec3V bd = V3Sub(d, b); + const Vec3V cd = V3Sub(d, c); + const FloatV dad = V3Dot(ad, ad); + const FloatV dbd = V3Dot(bd, bd); + const FloatV dcd = V3Dot(cd, cd); + const FloatV fMin = FMin(dad, FMin(dbd, dcd)); + if (FAllGrtr(eps, fMin)) + { + size = 3; + PxU32 tempIndices[] = { 0, 1, 2 }; + return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB); + } + + Vec3V _Q[] = { Q[0], Q[1], Q[2], Q[3] }; + Vec3V _A[] = { A[0], A[1], A[2], A[3] }; + Vec3V _B[] = { B[0], B[1], B[2], B[3] }; + + PxU32 indices[3] = { 0, 1, 2 }; + + const BoolV bIsOutside4 = PointOutsideOfPlane4(a, b, c, d); + + if (BAllEq(bIsOutside4, bFalse)) + { + //origin is inside the tetrahedron, we are done + return zeroV; + } + + Vec3V result = zeroV; + Vec3V tempClosestA, tempClosestB; + + if (BAllEq(BGetX(bIsOutside4), bTrue)) + { + + PxU32 tempIndices[] = { 0, 1, 2 }; + PxU32 _size = 3; + + result = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); + + const FloatV sqDist = V3Dot(result, result); + bestSqDist = sqDist; + + indices[0] = tempIndices[0]; + indices[1] = tempIndices[1]; + indices[2] = tempIndices[2]; + + tempSize = _size; + closestA = tempClosestA; + closestB = tempClosestB; + } + + if (BAllEq(BGetY(bIsOutside4), bTrue)) + { + + PxU32 tempIndices[] = { 0, 2, 3 }; + + PxU32 _size = 3; + + const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); + + const FloatV sqDist = V3Dot(q, q); + const BoolV con = FIsGrtr(bestSqDist, sqDist); + if (BAllEq(con, bTrue)) + { + result = q; + bestSqDist = sqDist; + indices[0] = tempIndices[0]; + indices[1] = tempIndices[1]; + indices[2] = tempIndices[2]; + + tempSize = _size; + closestA = tempClosestA; + closestB = tempClosestB; + } + } + + if (BAllEq(BGetZ(bIsOutside4), bTrue)) + { + + PxU32 tempIndices[] = { 0, 3, 1 }; + PxU32 _size = 3; + + const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); + + const FloatV sqDist = V3Dot(q, q); + const BoolV con = FIsGrtr(bestSqDist, sqDist); + if (BAllEq(con, bTrue)) + { + result = q; + bestSqDist = sqDist; + indices[0] = tempIndices[0]; + indices[1] = tempIndices[1]; + indices[2] = tempIndices[2]; + tempSize = _size; + closestA = tempClosestA; + closestB = tempClosestB; + } + + } + + if (BAllEq(BGetW(bIsOutside4), bTrue)) + { + + PxU32 tempIndices[] = { 1, 3, 2 }; + PxU32 _size = 3; + + const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); + + const FloatV sqDist = V3Dot(q, q); + const BoolV con = FIsGrtr(bestSqDist, sqDist); + + if (BAllEq(con, bTrue)) + { + result = q; + bestSqDist = sqDist; + + indices[0] = tempIndices[0]; + indices[1] = tempIndices[1]; + indices[2] = tempIndices[2]; + + tempSize = _size; + closestA = tempClosestA; + closestB = tempClosestB; + } + } + + A[0] = _A[indices[0]]; A[1] = _A[indices[1]]; A[2] = _A[indices[2]]; + B[0] = _B[indices[0]]; B[1] = _B[indices[1]]; B[2] = _B[indices[2]]; + Q[0] = _Q[indices[0]]; Q[1] = _Q[indices[1]]; Q[2] = _Q[indices[2]]; + + + size = tempSize; + return result; +} + +PX_NOALIAS PX_FORCE_INLINE Vec3V doTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, + const Vec3VArg support, const Vec3VArg supportA, const Vec3VArg supportB, PxU32& size, Vec3V& closestA, Vec3V& closestB) +{ + switch (size) + { + case 1: + { + closestA = supportA; + closestB = supportB; + return support; + } + case 2: + { + return closestPtPointSegmentTesselation(Q[0], support, A[0], supportA, B[0], supportB, size, closestA, closestB); + } + case 3: + { + + PxU32 tempIndices[3] = { 0, 1, 2 }; + return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB); + } + case 4: + { + return closestPtPointTetrahedronTesselation(Q, A, B, size, closestA, closestB); + } + default: + PX_ASSERT(0); + } + return support; +} + + + + +enum Status +{ + STATUS_NON_INTERSECT, + STATUS_CONTACT, + STATUS_DEGENERATE, +}; + +struct Output +{ + /// Get the normal to push apart in direction from A to B + PX_FORCE_INLINE Vec3V getNormal() const { return V3Normalize(V3Sub(mClosestB, mClosestA)); } + Vec3V mClosestA; ///< Closest point on A + Vec3V mClosestB; ///< Closest point on B + FloatV mDistSq; +}; + +struct ConvexV +{ + void calcExtent(const Vec3V& dir, PxF32& minOut, PxF32& maxOut) const + { + // Expand + const Vec4V x = Vec4V_From_FloatV(V3GetX(dir)); + const Vec4V y = Vec4V_From_FloatV(V3GetY(dir)); + const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir)); + + const Vec4V* src = mAovVertices; + const Vec4V* end = src + mNumAovVertices * 3; + + // Do first step + Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); + Vec4V min = max; + src += 3; + // Do the rest + for (; src < end; src += 3) + { + const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); + max = V4Max(dot, max); + min = V4Min(dot, min); + } + FStore(V4ExtractMax(max), &maxOut); + FStore(V4ExtractMin(min), &minOut); + } + Vec3V calcSupport(const Vec3V& dir) const + { + // Expand + const Vec4V x = Vec4V_From_FloatV(V3GetX(dir)); + const Vec4V y = Vec4V_From_FloatV(V3GetY(dir)); + const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir)); + + PX_ALIGN(16, static const PxF32 index4const[]) = { 0.0f, 1.0f, 2.0f, 3.0f }; + Vec4V index4 = *(const Vec4V*)index4const; + PX_ALIGN(16, static const PxF32 delta4const[]) = { 4.0f, 4.0f, 4.0f, 4.0f }; + const Vec4V delta4 = *(const Vec4V*)delta4const; + + const Vec4V* src = mAovVertices; + const Vec4V* end = src + mNumAovVertices * 3; + + // Do first step + Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); + Vec4V maxIndex = index4; + index4 = V4Add(index4, delta4); + src += 3; + // Do the rest + for (; src < end; src += 3) + { + const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); + const BoolV cmp = V4IsGrtr(dot, max); + max = V4Max(dot, max); + maxIndex = V4Sel(cmp, index4, maxIndex); + index4 = V4Add(index4, delta4); + } + Vec4V horiMax = Vec4V_From_FloatV(V4ExtractMax(max)); + PxU32 mask = BGetBitMask(V4IsEq(horiMax, max)); + const PxU32 simdIndex = (0x12131210 >> (mask + mask)) & PxU32(3); + + /// NOTE! Could be load hit store + /// Would be better to have all simd. + PX_ALIGN(16, PxF32 f[4]); + V4StoreA(maxIndex, f); + PxU32 index = PxU32(PxI32(f[simdIndex])); + + const Vec4V* aovIndex = (mAovVertices + (index >> 2) * 3); + const PxF32* aovOffset = ((const PxF32*)aovIndex) + (index & 3); + + return Vec3V_From_Vec4V(V4LoadXYZW(aovOffset[0], aovOffset[4], aovOffset[8], 1.0f)); + } + + const Vec4V* mAovVertices; ///< Vertices storex x,x,x,x, y,y,y,y, z,z,z,z + PxU32 mNumAovVertices; ///< Number of groups of 4 of vertices +}; + +Status Collide(const Vec3V& initialDir, const ConvexV& convexA, const Mat34V& bToA, const ConvexV& convexB, Output& out) +{ + Vec3V Q[4]; + Vec3V A[4]; + Vec3V B[4]; + + Mat33V aToB = M34Trnsps33(bToA); + + PxU32 size = 0; + + const Vec3V zeroV = V3Zero(); + const BoolV bTrue = BTTTT(); + + //Vec3V v = V3UnitX(); + Vec3V v = V3Sel(FIsGrtr(V3Dot(initialDir, initialDir), FZero()), initialDir, V3UnitX()); + + //const FloatV minMargin = zero; + //const FloatV eps2 = FMul(minMargin, FLoad(0.01f)); + //FloatV eps2 = zero; + FloatV eps2 = FLoad(1e-6f); + const FloatV epsRel = FLoad(0.000225f); + + Vec3V closA(zeroV), closB(zeroV); + FloatV sDist = FMax(); + FloatV minDist = sDist; + Vec3V closAA = zeroV; + Vec3V closBB = zeroV; + + BoolV bNotTerminated = bTrue; + BoolV bCon = bTrue; + + do + { + minDist = sDist; + closAA = closA; + closBB = closB; + + PxU32 index = size++; + PX_ASSERT(index < 4); + + const Vec3V supportA = convexA.calcSupport(V3Neg(v)); + const Vec3V supportB = M34MulV3(bToA, convexB.calcSupport(M33MulV3(aToB, v))); + const Vec3V support = Vec3V_From_Vec4V(Vec4V_From_Vec3V(V3Sub(supportA, supportB))); + + A[index] = supportA; + B[index] = supportB; + Q[index] = support; + + const FloatV signDist = V3Dot(v, support); + const FloatV tmp0 = FSub(sDist, signDist); + if (FAllGrtr(FMul(epsRel, sDist), tmp0)) + { + out.mClosestA = closA; + out.mClosestB = closB; + out.mDistSq = sDist; + return STATUS_NON_INTERSECT; + } + + //calculate the closest point between two convex hull + v = doTesselation(Q, A, B, support, supportA, supportB, size, closA, closB); + sDist = V3Dot(v, v); + bCon = FIsGrtr(minDist, sDist); + + bNotTerminated = BAnd(FIsGrtr(sDist, eps2), bCon); + } while (BAllEq(bNotTerminated, bTrue)); + + out.mClosestA = V3Sel(bCon, closA, closAA); + out.mClosestB = V3Sel(bCon, closB, closBB); + out.mDistSq = FSel(bCon, sDist, minDist); + return Status(BAllEq(bCon, bTrue) == 1 ? STATUS_CONTACT : STATUS_DEGENERATE); +} + +static void _calcSeparation(const ConvexV& convexA, const physx::PxTransform& aToWorldIn, const Mat34V& bToA, ConvexV& convexB, Output& out, Separation& sep) +{ + + Mat33V aToB = M34Trnsps33(bToA); + Vec3V normalA = out.getNormal(); + + convexA.calcExtent(normalA, sep.min0, sep.max0); + Vec3V normalB = M33MulV3(aToB, normalA); + convexB.calcExtent(normalB, sep.min1, sep.max1); + + { + // Offset the min max taking into account transform + // Distance of origin from B's space in As space in direction of the normal in As space should fix it... + PxF32 fix; + FStore(V3Dot(bToA.col3, normalA), &fix); + sep.min1 += fix; + sep.max1 += fix; + } + + // Looks like it's the plane at the midpoint + Vec3V center = V3Scale(V3Add(out.mClosestA, out.mClosestB), FLoad(0.5f)); + // Transform to world space + Mat34V aToWorld; + *(PxMat44*)&aToWorld = aToWorldIn; + // Put the normal in world space + Vec3V worldCenter = M34MulV3(aToWorld, center); + Vec3V worldNormal = M34Mul33V3(aToWorld, normalA); + + FloatV dist = V3Dot(worldNormal, worldCenter); + V3StoreU(worldNormal, sep.plane.n); + FStore(dist, &sep.plane.d); + sep.plane.d = -sep.plane.d; +} + +static void _arrayVec3ToVec4(const PxVec3* src, Vec4V* dst, PxU32 num) +{ + const PxU32 num4 = num >> 2; + for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4) + { + Vec3V v0 = V3LoadU(&src[0].x); + Vec3V v1 = V3LoadU(&src[1].x); + Vec3V v2 = V3LoadU(&src[2].x); + Vec3V v3 = V3LoadU(&src[3].x); + // Transpose + V4Transpose(v0, v1, v2, v3); + // Save + dst[0] = v0; + dst[1] = v1; + dst[2] = v2; + } + const PxU32 remain = num & 3; + if (remain) + { + Vec3V work[4]; + PxU32 i = 0; + for (; i < remain; i++) work[i] = V3LoadU(&src[i].x); + for (; i < 4; i++) work[i] = work[remain - 1]; + V4Transpose(work[0], work[1], work[2], work[3]); + dst[0] = work[0]; + dst[1] = work[1]; + dst[2] = work[2]; + } +} + + +static void _arrayVec3ToVec4(const PxVec3* src, const Vec3V& scale, Vec4V* dst, PxU32 num) +{ + // If no scale - use the faster version + if (V3AllEq(scale, V3One())) + { + return _arrayVec3ToVec4(src, dst, num); + } + + const PxU32 num4 = num >> 2; + for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4) + { + Vec3V v0 = V3Mul(scale, V3LoadU(&src[0].x)); + Vec3V v1 = V3Mul(scale, V3LoadU(&src[1].x)); + Vec3V v2 = V3Mul(scale, V3LoadU(&src[2].x)); + Vec3V v3 = V3Mul(scale, V3LoadU(&src[3].x)); + // Transpose + V4Transpose(v0, v1, v2, v3); + // Save + dst[0] = v0; + dst[1] = v1; + dst[2] = v2; + } + const PxU32 remain = num & 3; + if (remain) + { + Vec3V work[4]; + PxU32 i = 0; + for (; i < remain; i++) work[i] = V3Mul(scale, V3LoadU(&src[i].x)); + for (; i < 4; i++) work[i] = work[remain - 1]; + V4Transpose(work[0], work[1], work[2], work[3]); + dst[0] = work[0]; + dst[1] = work[1]; + dst[2] = work[2]; + } +} + + +bool importerHullsInProximityApexFree(uint32_t hull0Count, const PxVec3* hull0, PxBounds3& hull0Bounds, const physx::PxTransform& localToWorldRT0In, const physx::PxVec3& scale0In, + uint32_t hull1Count, const PxVec3* hull1, PxBounds3& hull1Bounds, const physx::PxTransform& localToWorldRT1In, const physx::PxVec3& scale1In, + physx::PxF32 maxDistance, Separation* separation) +{ + + + const PxU32 numVerts0 = static_cast(hull0Count); + const PxU32 numVerts1 = static_cast(hull1Count); + const PxU32 numAov0 = (numVerts0 + 3) >> 2; + const PxU32 numAov1 = (numVerts1 + 3) >> 2; + Vec4V* verts0 = (Vec4V*)alloca((numAov0 + numAov1) * sizeof(Vec4V) * 3); + + // Make sure it's aligned + PX_ASSERT((size_t(verts0) & 0xf) == 0); + + Vec4V* verts1 = verts0 + (numAov0 * 3); + + const Vec3V scale0 = V3LoadU(&scale0In.x); + const Vec3V scale1 = V3LoadU(&scale1In.x); + std::vector vert0(numVerts0); + for (uint32_t i = 0; i < numVerts0; ++i) + { + vert0[i] = hull0[i]; + } + std::vector vert1(numVerts1); + for (uint32_t i = 0; i < numVerts1; ++i) + { + vert1[i] = hull1[i]; + } + + _arrayVec3ToVec4(vert0.data(), scale0, verts0, numVerts0); + _arrayVec3ToVec4(vert1.data(), scale1, verts1, numVerts1); + + const PxTransform trans1To0 = localToWorldRT0In.transformInv(localToWorldRT1In); + + // Load into simd mat + Mat34V bToA; + *(PxMat44*)&bToA = trans1To0; + (*(PxMat44*)&bToA).column3.w = 0.0f; // AOS wants the 4th component of Vec3V to be 0 to work properly + + ConvexV convexA; + ConvexV convexB; + + convexA.mNumAovVertices = numAov0; + convexA.mAovVertices = verts0; + + convexB.mNumAovVertices = numAov1; + convexB.mAovVertices = verts1; + + // Take the origin of B in As space as the inital direction as it is 'the difference in transform origins B-A in A's space' + // Should be a good first guess + const Vec3V initialDir = bToA.col3; + Output output; + Status status = Collide(initialDir, convexA, bToA, convexB, output); + + if (status == STATUS_DEGENERATE) + { + // Calculate the tolerance from the extents + const PxVec3 extents0 = hull0Bounds.getExtents(); + const PxVec3 extents1 = hull1Bounds.getExtents(); + + const FloatV tolerance0 = V3ExtractMin(V3Mul(V3LoadU(&extents0.x), scale0)); + const FloatV tolerance1 = V3ExtractMin(V3Mul(V3LoadU(&extents1.x), scale1)); + + const FloatV tolerance = FMul(FAdd(tolerance0, tolerance1), FLoad(0.01f)); + const FloatV sqTolerance = FMul(tolerance, tolerance); + + status = FAllGrtr(sqTolerance, output.mDistSq) ? STATUS_CONTACT : STATUS_NON_INTERSECT; + } + + switch (status) + { + case STATUS_CONTACT: + { + if (separation) + { + _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation); + } + return true; + } + default: + case STATUS_NON_INTERSECT: + { + if (separation) + { + _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation); + } + PxF32 val; + FStore(output.mDistSq, &val); + return val < (maxDistance * maxDistance); + } + } +} + +} // namespace Blast +} // namespace Nv -- cgit v1.2.3