// This code contains NVIDIA Confidential Information and is disclosed to you // under a form of NVIDIA software license agreement provided separately to you. // // Notice // NVIDIA Corporation and its licensors retain all intellectual property and // proprietary rights in and to this software and related documentation and // any modifications thereto. Any use, reproduction, disclosure, or // distribution of this software and related documentation without an express // license agreement from NVIDIA Corporation is strictly prohibited. // // ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES // NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO // THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, // MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. // // Information and code furnished is believed to be accurate and reliable. // However, NVIDIA Corporation assumes no responsibility for the consequences of use of such // information or for any infringement of patents or other rights of third parties that may // result from its use. No license is granted by implication or otherwise under any patent // or patent rights of NVIDIA Corporation. Details are subject to change without notice. // This code supersedes and replaces all information previously supplied. // NVIDIA Corporation products are not authorized for use as critical // components in life support devices or systems without express written approval of // NVIDIA Corporation. // // Copyright (c) 2016-2020 NVIDIA Corporation. All rights reserved. #include "NvBlastExtApexSharedParts.h" #include "PxMat44.h" #include "PxBounds3.h" #include "PxFoundation.h" #include "PsVecMath.h" #include using namespace physx; using namespace physx::shdfnd::aos; namespace Nv { namespace Blast { PX_NOALIAS PX_FORCE_INLINE BoolV PointOutsideOfPlane4(const Vec3VArg _a, const Vec3VArg _b, const Vec3VArg _c, const Vec3VArg _d) { // this is not 0 because of the following scenario: // All the points lie on the same plane and the plane goes through the origin (0,0,0). // On the Wii U, the math below has the problem that when point A gets projected on the // plane cumputed by A, B, C, the distance to the plane might not be 0 for the mentioned // scenario but a small positive or negative value. This can lead to the wrong boolean // results. Using a small negative value as threshold is more conservative but safer. const Vec4V zero = V4Load(-1e-6); const Vec3V ab = V3Sub(_b, _a); const Vec3V ac = V3Sub(_c, _a); const Vec3V ad = V3Sub(_d, _a); const Vec3V bd = V3Sub(_d, _b); const Vec3V bc = V3Sub(_c, _b); const Vec3V v0 = V3Cross(ab, ac); const Vec3V v1 = V3Cross(ac, ad); const Vec3V v2 = V3Cross(ad, ab); const Vec3V v3 = V3Cross(bd, bc); const FloatV signa0 = V3Dot(v0, _a); const FloatV signa1 = V3Dot(v1, _a); const FloatV signa2 = V3Dot(v2, _a); const FloatV signd3 = V3Dot(v3, _a); const FloatV signd0 = V3Dot(v0, _d); const FloatV signd1 = V3Dot(v1, _b); const FloatV signd2 = V3Dot(v2, _c); const FloatV signa3 = V3Dot(v3, _b); const Vec4V signa = V4Merge(signa0, signa1, signa2, signa3); const Vec4V signd = V4Merge(signd0, signd1, signd2, signd3); return V4IsGrtrOrEq(V4Mul(signa, signd), zero);//same side, outside of the plane } PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg a, const Vec3VArg b) { const FloatV zero = FZero(); const FloatV one = FOne(); //Test degenerated case const Vec3V ab = V3Sub(b, a); const FloatV denom = V3Dot(ab, ab); const Vec3V ap = V3Neg(a);//V3Sub(origin, a); const FloatV nom = V3Dot(ap, ab); const BoolV con = FIsEq(denom, zero); const FloatV tValue = FClamp(FDiv(nom, denom), zero, one); const FloatV t = FSel(con, zero, tValue); return V3Sel(con, a, V3ScaleAdd(ab, t, a)); } PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1, const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB) { const Vec3V a = Q0; const Vec3V b = Q1; const BoolV bTrue = BTTTT(); const FloatV zero = FZero(); const FloatV one = FOne(); //Test degenerated case const Vec3V ab = V3Sub(b, a); const FloatV denom = V3Dot(ab, ab); const Vec3V ap = V3Neg(a);//V3Sub(origin, a); const FloatV nom = V3Dot(ap, ab); const BoolV con = FIsEq(denom, zero); if (BAllEq(con, bTrue)) { size = 1; closestA = A0; closestB = B0; return Q0; } const Vec3V v = V3Sub(A1, A0); const Vec3V w = V3Sub(B1, B0); const FloatV tValue = FClamp(FDiv(nom, denom), zero, one); const FloatV t = FSel(con, zero, tValue); const Vec3V tempClosestA = V3ScaleAdd(v, t, A0); const Vec3V tempClosestB = V3ScaleAdd(w, t, B0); closestA = tempClosestA; closestB = tempClosestB; return V3Sub(tempClosestA, tempClosestB); } PX_NOALIAS Vec3V closestPtPointSegmentTesselation(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1, const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB) { const FloatV half = FHalf(); const FloatV targetSegmentLengthSq = FLoad(10000.f);//100 unit Vec3V q0 = Q0; Vec3V q1 = Q1; Vec3V a0 = A0; Vec3V a1 = A1; Vec3V b0 = B0; Vec3V b1 = B1; for (;;) { const Vec3V midPoint = V3Scale(V3Add(q0, q1), half); const Vec3V midA = V3Scale(V3Add(a0, a1), half); const Vec3V midB = V3Scale(V3Add(b0, b1), half); const Vec3V v = V3Sub(midPoint, q0); const FloatV sqV = V3Dot(v, v); if (FAllGrtr(targetSegmentLengthSq, sqV)) break; //split the segment into half const Vec3V tClos0 = closestPtPointSegment(q0, midPoint); const FloatV sqDist0 = V3Dot(tClos0, tClos0); const Vec3V tClos1 = closestPtPointSegment(q1, midPoint); const FloatV sqDist1 = V3Dot(tClos1, tClos1); //const BoolV con = FIsGrtr(sqDist0, sqDist1); if (FAllGrtr(sqDist0, sqDist1)) { //segment [m, q1] q0 = midPoint; a0 = midA; b0 = midB; } else { //segment [q0, m] q1 = midPoint; a1 = midA; b1 = midB; } } return closestPtPointSegment(q0, q1, a0, a1, b0, b1, size, closestA, closestB); } PX_NOALIAS Vec3V closestPtPointTriangleTesselation(const Vec3V* PX_RESTRICT Q, const Vec3V* PX_RESTRICT A, const Vec3V* PX_RESTRICT B, const PxU32* PX_RESTRICT indices, PxU32& size, Vec3V& closestA, Vec3V& closestB) { size = 3; const FloatV zero = FZero(); const FloatV eps = FEps(); const FloatV half = FHalf(); const BoolV bTrue = BTTTT(); const FloatV four = FLoad(4.f); const FloatV sixty = FLoad(100.f); const PxU32 ind0 = indices[0]; const PxU32 ind1 = indices[1]; const PxU32 ind2 = indices[2]; const Vec3V a = Q[ind0]; const Vec3V b = Q[ind1]; const Vec3V c = Q[ind2]; Vec3V ab_ = V3Sub(b, a); Vec3V ac_ = V3Sub(c, a); Vec3V bc_ = V3Sub(b, c); const FloatV dac_ = V3Dot(ac_, ac_); const FloatV dbc_ = V3Dot(bc_, bc_); if (FAllGrtrOrEq(eps, FMin(dac_, dbc_))) { //degenerate size = 2; return closestPtPointSegment(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB); } Vec3V ap = V3Neg(a); Vec3V bp = V3Neg(b); Vec3V cp = V3Neg(c); FloatV d1 = V3Dot(ab_, ap); // snom FloatV d2 = V3Dot(ac_, ap); // tnom FloatV d3 = V3Dot(ab_, bp); // -sdenom FloatV d4 = V3Dot(ac_, bp); // unom = d4 - d3 FloatV d5 = V3Dot(ab_, cp); // udenom = d5 - d6 FloatV d6 = V3Dot(ac_, cp); // -tdenom /* FloatV unom = FSub(d4, d3); FloatV udenom = FSub(d5, d6);*/ FloatV va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC FloatV vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC FloatV vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB //check if p in vertex region outside a const BoolV con00 = FIsGrtrOrEq(zero, d1); // snom <= 0 const BoolV con01 = FIsGrtrOrEq(zero, d2); // tnom <= 0 const BoolV con0 = BAnd(con00, con01); // vertex region a if (BAllEq(con0, bTrue)) { //size = 1; closestA = A[ind0]; closestB = B[ind0]; return Q[ind0]; } //check if p in vertex region outside b const BoolV con10 = FIsGrtrOrEq(d3, zero); const BoolV con11 = FIsGrtrOrEq(d3, d4); const BoolV con1 = BAnd(con10, con11); // vertex region b if (BAllEq(con1, bTrue)) { /*size = 1; indices[0] = ind1;*/ closestA = A[ind1]; closestB = B[ind1]; return Q[ind1]; } //check if p in vertex region outside of c const BoolV con20 = FIsGrtrOrEq(d6, zero); const BoolV con21 = FIsGrtrOrEq(d6, d5); const BoolV con2 = BAnd(con20, con21); // vertex region c if (BAllEq(con2, bTrue)) { closestA = A[ind2]; closestB = B[ind2]; return Q[ind2]; } //check if p in edge region of AB const BoolV con30 = FIsGrtrOrEq(zero, vc); const BoolV con31 = FIsGrtrOrEq(d1, zero); const BoolV con32 = FIsGrtrOrEq(zero, d3); const BoolV con3 = BAnd(con30, BAnd(con31, con32)); if (BAllEq(con3, bTrue)) { //size = 2; //p in edge region of AB, split AB return closestPtPointSegmentTesselation(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB); } //check if p in edge region of BC const BoolV con40 = FIsGrtrOrEq(zero, va); const BoolV con41 = FIsGrtrOrEq(d4, d3); const BoolV con42 = FIsGrtrOrEq(d5, d6); const BoolV con4 = BAnd(con40, BAnd(con41, con42)); if (BAllEq(con4, bTrue)) { //p in edge region of BC, split BC return closestPtPointSegmentTesselation(Q[ind1], Q[ind2], A[ind1], A[ind2], B[ind1], B[ind2], size, closestA, closestB); } //check if p in edge region of AC const BoolV con50 = FIsGrtrOrEq(zero, vb); const BoolV con51 = FIsGrtrOrEq(d2, zero); const BoolV con52 = FIsGrtrOrEq(zero, d6); const BoolV con5 = BAnd(con50, BAnd(con51, con52)); if (BAllEq(con5, bTrue)) { //p in edge region of AC, split AC return closestPtPointSegmentTesselation(Q[ind0], Q[ind2], A[ind0], A[ind2], B[ind0], B[ind2], size, closestA, closestB); } size = 3; Vec3V q0 = Q[ind0]; Vec3V q1 = Q[ind1]; Vec3V q2 = Q[ind2]; Vec3V a0 = A[ind0]; Vec3V a1 = A[ind1]; Vec3V a2 = A[ind2]; Vec3V b0 = B[ind0]; Vec3V b1 = B[ind1]; Vec3V b2 = B[ind2]; for (;;) { const Vec3V ab = V3Sub(q1, q0); const Vec3V ac = V3Sub(q2, q0); const Vec3V bc = V3Sub(q2, q1); const FloatV dab = V3Dot(ab, ab); const FloatV dac = V3Dot(ac, ac); const FloatV dbc = V3Dot(bc, bc); const FloatV fMax = FMax(dab, FMax(dac, dbc)); const FloatV fMin = FMin(dab, FMin(dac, dbc)); const Vec3V w = V3Cross(ab, ac); const FloatV area = V3Length(w); const FloatV ratio = FDiv(FSqrt(fMax), FSqrt(fMin)); if (FAllGrtr(four, ratio) && FAllGrtr(sixty, area)) break; //calculate the triangle normal const Vec3V triNormal = V3Normalize(w); PX_ASSERT(V3AllEq(triNormal, V3Zero()) == 0); //split the longest edge if (FAllGrtrOrEq(dab, dac) && FAllGrtrOrEq(dab, dbc)) { //split edge q0q1 const Vec3V midPoint = V3Scale(V3Add(q0, q1), half); const Vec3V midA = V3Scale(V3Add(a0, a1), half); const Vec3V midB = V3Scale(V3Add(b0, b1), half); const Vec3V v = V3Sub(midPoint, q2); const Vec3V n = V3Normalize(V3Cross(v, triNormal)); const FloatV d = FNeg(V3Dot(n, midPoint)); const FloatV dp = FAdd(V3Dot(n, q0), d); const FloatV sum = FMul(d, dp); if (FAllGrtr(sum, zero)) { //q0 and origin at the same side, split triangle[q0, m, q2] q1 = midPoint; a1 = midA; b1 = midB; } else { //q1 and origin at the same side, split triangle[m, q1, q2] q0 = midPoint; a0 = midA; b0 = midB; } } else if (FAllGrtrOrEq(dac, dbc)) { //split edge q0q2 const Vec3V midPoint = V3Scale(V3Add(q0, q2), half); const Vec3V midA = V3Scale(V3Add(a0, a2), half); const Vec3V midB = V3Scale(V3Add(b0, b2), half); const Vec3V v = V3Sub(midPoint, q1); const Vec3V n = V3Normalize(V3Cross(v, triNormal)); const FloatV d = FNeg(V3Dot(n, midPoint)); const FloatV dp = FAdd(V3Dot(n, q0), d); const FloatV sum = FMul(d, dp); if (FAllGrtr(sum, zero)) { //q0 and origin at the same side, split triangle[q0, q1, m] q2 = midPoint; a2 = midA; b2 = midB; } else { //q2 and origin at the same side, split triangle[m, q1, q2] q0 = midPoint; a0 = midA; b0 = midB; } } else { //split edge q1q2 const Vec3V midPoint = V3Scale(V3Add(q1, q2), half); const Vec3V midA = V3Scale(V3Add(a1, a2), half); const Vec3V midB = V3Scale(V3Add(b1, b2), half); const Vec3V v = V3Sub(midPoint, q0); const Vec3V n = V3Normalize(V3Cross(v, triNormal)); const FloatV d = FNeg(V3Dot(n, midPoint)); const FloatV dp = FAdd(V3Dot(n, q1), d); const FloatV sum = FMul(d, dp); if (FAllGrtr(sum, zero)) { //q1 and origin at the same side, split triangle[q0, q1, m] q2 = midPoint; a2 = midA; b2 = midB; } else { //q2 and origin at the same side, split triangle[q0, m, q2] q1 = midPoint; a1 = midA; b1 = midB; } } } //P must project inside face region. Compute Q using Barycentric coordinates ab_ = V3Sub(q1, q0); ac_ = V3Sub(q2, q0); ap = V3Neg(q0); bp = V3Neg(q1); cp = V3Neg(q2); d1 = V3Dot(ab_, ap); // snom d2 = V3Dot(ac_, ap); // tnom d3 = V3Dot(ab_, bp); // -sdenom d4 = V3Dot(ac_, bp); // unom = d4 - d3 d5 = V3Dot(ab_, cp); // udenom = d5 - d6 d6 = V3Dot(ac_, cp); // -tdenom va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB const FloatV toRecipD = FAdd(va, FAdd(vb, vc)); const FloatV denom = FRecip(toRecipD);//V4GetW(recipTmp); const Vec3V v0 = V3Sub(a1, a0); const Vec3V v1 = V3Sub(a2, a0); const Vec3V w0 = V3Sub(b1, b0); const Vec3V w1 = V3Sub(b2, b0); const FloatV t = FMul(vb, denom); const FloatV w = FMul(vc, denom); const Vec3V vA1 = V3Scale(v1, w); const Vec3V vB1 = V3Scale(w1, w); const Vec3V tempClosestA = V3Add(a0, V3ScaleAdd(v0, t, vA1)); const Vec3V tempClosestB = V3Add(b0, V3ScaleAdd(w0, t, vB1)); closestA = tempClosestA; closestB = tempClosestB; return V3Sub(tempClosestA, tempClosestB); } PX_NOALIAS Vec3V closestPtPointTetrahedronTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, PxU32& size, Vec3V& closestA, Vec3V& closestB) { const FloatV eps = FEps(); const Vec3V zeroV = V3Zero(); PxU32 tempSize = size; FloatV bestSqDist = FLoad(PX_MAX_REAL); const Vec3V a = Q[0]; const Vec3V b = Q[1]; const Vec3V c = Q[2]; const Vec3V d = Q[3]; const BoolV bTrue = BTTTT(); const BoolV bFalse = BFFFF(); //degenerated const Vec3V ad = V3Sub(d, a); const Vec3V bd = V3Sub(d, b); const Vec3V cd = V3Sub(d, c); const FloatV dad = V3Dot(ad, ad); const FloatV dbd = V3Dot(bd, bd); const FloatV dcd = V3Dot(cd, cd); const FloatV fMin = FMin(dad, FMin(dbd, dcd)); if (FAllGrtr(eps, fMin)) { size = 3; PxU32 tempIndices[] = { 0, 1, 2 }; return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB); } Vec3V _Q[] = { Q[0], Q[1], Q[2], Q[3] }; Vec3V _A[] = { A[0], A[1], A[2], A[3] }; Vec3V _B[] = { B[0], B[1], B[2], B[3] }; PxU32 indices[3] = { 0, 1, 2 }; const BoolV bIsOutside4 = PointOutsideOfPlane4(a, b, c, d); if (BAllEq(bIsOutside4, bFalse)) { //origin is inside the tetrahedron, we are done return zeroV; } Vec3V result = zeroV; Vec3V tempClosestA, tempClosestB; if (BAllEq(BGetX(bIsOutside4), bTrue)) { PxU32 tempIndices[] = { 0, 1, 2 }; PxU32 _size = 3; result = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); const FloatV sqDist = V3Dot(result, result); bestSqDist = sqDist; indices[0] = tempIndices[0]; indices[1] = tempIndices[1]; indices[2] = tempIndices[2]; tempSize = _size; closestA = tempClosestA; closestB = tempClosestB; } if (BAllEq(BGetY(bIsOutside4), bTrue)) { PxU32 tempIndices[] = { 0, 2, 3 }; PxU32 _size = 3; const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); const FloatV sqDist = V3Dot(q, q); const BoolV con = FIsGrtr(bestSqDist, sqDist); if (BAllEq(con, bTrue)) { result = q; bestSqDist = sqDist; indices[0] = tempIndices[0]; indices[1] = tempIndices[1]; indices[2] = tempIndices[2]; tempSize = _size; closestA = tempClosestA; closestB = tempClosestB; } } if (BAllEq(BGetZ(bIsOutside4), bTrue)) { PxU32 tempIndices[] = { 0, 3, 1 }; PxU32 _size = 3; const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); const FloatV sqDist = V3Dot(q, q); const BoolV con = FIsGrtr(bestSqDist, sqDist); if (BAllEq(con, bTrue)) { result = q; bestSqDist = sqDist; indices[0] = tempIndices[0]; indices[1] = tempIndices[1]; indices[2] = tempIndices[2]; tempSize = _size; closestA = tempClosestA; closestB = tempClosestB; } } if (BAllEq(BGetW(bIsOutside4), bTrue)) { PxU32 tempIndices[] = { 1, 3, 2 }; PxU32 _size = 3; const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB); const FloatV sqDist = V3Dot(q, q); const BoolV con = FIsGrtr(bestSqDist, sqDist); if (BAllEq(con, bTrue)) { result = q; bestSqDist = sqDist; indices[0] = tempIndices[0]; indices[1] = tempIndices[1]; indices[2] = tempIndices[2]; tempSize = _size; closestA = tempClosestA; closestB = tempClosestB; } } A[0] = _A[indices[0]]; A[1] = _A[indices[1]]; A[2] = _A[indices[2]]; B[0] = _B[indices[0]]; B[1] = _B[indices[1]]; B[2] = _B[indices[2]]; Q[0] = _Q[indices[0]]; Q[1] = _Q[indices[1]]; Q[2] = _Q[indices[2]]; size = tempSize; return result; } PX_NOALIAS PX_FORCE_INLINE Vec3V doTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, const Vec3VArg support, const Vec3VArg supportA, const Vec3VArg supportB, PxU32& size, Vec3V& closestA, Vec3V& closestB) { switch (size) { case 1: { closestA = supportA; closestB = supportB; return support; } case 2: { return closestPtPointSegmentTesselation(Q[0], support, A[0], supportA, B[0], supportB, size, closestA, closestB); } case 3: { PxU32 tempIndices[3] = { 0, 1, 2 }; return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB); } case 4: { return closestPtPointTetrahedronTesselation(Q, A, B, size, closestA, closestB); } default: PX_ASSERT(0); } return support; } enum Status { STATUS_NON_INTERSECT, STATUS_CONTACT, STATUS_DEGENERATE, }; struct Output { /// Get the normal to push apart in direction from A to B PX_FORCE_INLINE Vec3V getNormal() const { return V3Normalize(V3Sub(mClosestB, mClosestA)); } Vec3V mClosestA; ///< Closest point on A Vec3V mClosestB; ///< Closest point on B FloatV mDistSq; }; struct ConvexV { void calcExtent(const Vec3V& dir, PxF32& minOut, PxF32& maxOut) const { // Expand const Vec4V x = Vec4V_From_FloatV(V3GetX(dir)); const Vec4V y = Vec4V_From_FloatV(V3GetY(dir)); const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir)); const Vec4V* src = mAovVertices; const Vec4V* end = src + mNumAovVertices * 3; // Do first step Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); Vec4V min = max; src += 3; // Do the rest for (; src < end; src += 3) { const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); max = V4Max(dot, max); min = V4Min(dot, min); } FStore(V4ExtractMax(max), &maxOut); FStore(V4ExtractMin(min), &minOut); } Vec3V calcSupport(const Vec3V& dir) const { // Expand const Vec4V x = Vec4V_From_FloatV(V3GetX(dir)); const Vec4V y = Vec4V_From_FloatV(V3GetY(dir)); const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir)); PX_ALIGN(16, static const PxF32 index4const[]) = { 0.0f, 1.0f, 2.0f, 3.0f }; Vec4V index4 = *(const Vec4V*)index4const; PX_ALIGN(16, static const PxF32 delta4const[]) = { 4.0f, 4.0f, 4.0f, 4.0f }; const Vec4V delta4 = *(const Vec4V*)delta4const; const Vec4V* src = mAovVertices; const Vec4V* end = src + mNumAovVertices * 3; // Do first step Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); Vec4V maxIndex = index4; index4 = V4Add(index4, delta4); src += 3; // Do the rest for (; src < end; src += 3) { const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2]))); const BoolV cmp = V4IsGrtr(dot, max); max = V4Max(dot, max); maxIndex = V4Sel(cmp, index4, maxIndex); index4 = V4Add(index4, delta4); } Vec4V horiMax = Vec4V_From_FloatV(V4ExtractMax(max)); PxU32 mask = BGetBitMask(V4IsEq(horiMax, max)); const PxU32 simdIndex = (0x12131210 >> (mask + mask)) & PxU32(3); /// NOTE! Could be load hit store /// Would be better to have all simd. PX_ALIGN(16, PxF32 f[4]); V4StoreA(maxIndex, f); PxU32 index = PxU32(PxI32(f[simdIndex])); const Vec4V* aovIndex = (mAovVertices + (index >> 2) * 3); const PxF32* aovOffset = ((const PxF32*)aovIndex) + (index & 3); return Vec3V_From_Vec4V(V4LoadXYZW(aovOffset[0], aovOffset[4], aovOffset[8], 1.0f)); } const Vec4V* mAovVertices; ///< Vertices storex x,x,x,x, y,y,y,y, z,z,z,z PxU32 mNumAovVertices; ///< Number of groups of 4 of vertices }; Status Collide(const Vec3V& initialDir, const ConvexV& convexA, const Mat34V& bToA, const ConvexV& convexB, Output& out) { Vec3V Q[4]; Vec3V A[4]; Vec3V B[4]; Mat33V aToB = M34Trnsps33(bToA); PxU32 size = 0; const Vec3V zeroV = V3Zero(); const BoolV bTrue = BTTTT(); //Vec3V v = V3UnitX(); Vec3V v = V3Sel(FIsGrtr(V3Dot(initialDir, initialDir), FZero()), initialDir, V3UnitX()); //const FloatV minMargin = zero; //const FloatV eps2 = FMul(minMargin, FLoad(0.01f)); //FloatV eps2 = zero; FloatV eps2 = FLoad(1e-6f); const FloatV epsRel = FLoad(0.000225f); Vec3V closA(zeroV), closB(zeroV); FloatV sDist = FMax(); FloatV minDist = sDist; Vec3V closAA = zeroV; Vec3V closBB = zeroV; BoolV bNotTerminated = bTrue; BoolV bCon = bTrue; do { minDist = sDist; closAA = closA; closBB = closB; PxU32 index = size++; PX_ASSERT(index < 4); const Vec3V supportA = convexA.calcSupport(V3Neg(v)); const Vec3V supportB = M34MulV3(bToA, convexB.calcSupport(M33MulV3(aToB, v))); const Vec3V support = Vec3V_From_Vec4V(Vec4V_From_Vec3V(V3Sub(supportA, supportB))); A[index] = supportA; B[index] = supportB; Q[index] = support; const FloatV signDist = V3Dot(v, support); const FloatV tmp0 = FSub(sDist, signDist); if (FAllGrtr(FMul(epsRel, sDist), tmp0)) { out.mClosestA = closA; out.mClosestB = closB; out.mDistSq = sDist; return STATUS_NON_INTERSECT; } //calculate the closest point between two convex hull v = doTesselation(Q, A, B, support, supportA, supportB, size, closA, closB); sDist = V3Dot(v, v); bCon = FIsGrtr(minDist, sDist); bNotTerminated = BAnd(FIsGrtr(sDist, eps2), bCon); } while (BAllEq(bNotTerminated, bTrue)); out.mClosestA = V3Sel(bCon, closA, closAA); out.mClosestB = V3Sel(bCon, closB, closBB); out.mDistSq = FSel(bCon, sDist, minDist); return Status(BAllEq(bCon, bTrue) == 1 ? STATUS_CONTACT : STATUS_DEGENERATE); } static void _calcSeparation(const ConvexV& convexA, const physx::PxTransform& aToWorldIn, const Mat34V& bToA, ConvexV& convexB, const Vec3V& centroidAToB, Output& out, Separation& sep) { Mat33V aToB = M34Trnsps33(bToA); Vec3V normalA = out.getNormal(); FloatV vEpsilon = FLoad(1e-6f); if (BAllEqFFFF(FIsGrtr(out.mDistSq, vEpsilon))) { if (BAllEqTTTT(FIsGrtr(V3Dot(centroidAToB, centroidAToB), vEpsilon))) { normalA = V3Normalize(centroidAToB); } else { normalA = V3UnitX(); } } convexA.calcExtent(normalA, sep.min0, sep.max0); Vec3V normalB = M33MulV3(aToB, normalA); convexB.calcExtent(normalB, sep.min1, sep.max1); { // Offset the min max taking into account transform // Distance of origin from B's space in As space in direction of the normal in As space should fix it... PxF32 fix; FStore(V3Dot(bToA.col3, normalA), &fix); sep.min1 += fix; sep.max1 += fix; } // Looks like it's the plane at the midpoint Vec3V center = V3Scale(V3Add(out.mClosestA, out.mClosestB), FLoad(0.5f)); // Transform to world space Mat34V aToWorld; *(PxMat44*)&aToWorld = aToWorldIn; // Put the normal in world space Vec3V worldCenter = M34MulV3(aToWorld, center); Vec3V worldNormal = M34Mul33V3(aToWorld, normalA); FloatV dist = V3Dot(worldNormal, worldCenter); V3StoreU(worldNormal, sep.plane.n); FStore(dist, &sep.plane.d); sep.plane.d = -sep.plane.d; } static void _arrayVec3ToVec4(const PxVec3* src, Vec4V* dst, PxU32 num) { const PxU32 num4 = num >> 2; for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4) { Vec3V v0 = V3LoadU(&src[0].x); Vec3V v1 = V3LoadU(&src[1].x); Vec3V v2 = V3LoadU(&src[2].x); Vec3V v3 = V3LoadU(&src[3].x); // Transpose V4Transpose(v0, v1, v2, v3); // Save dst[0] = v0; dst[1] = v1; dst[2] = v2; } const PxU32 remain = num & 3; if (remain) { Vec3V work[4]; PxU32 i = 0; for (; i < remain; i++) work[i] = V3LoadU(&src[i].x); for (; i < 4; i++) work[i] = work[remain - 1]; V4Transpose(work[0], work[1], work[2], work[3]); dst[0] = work[0]; dst[1] = work[1]; dst[2] = work[2]; } } static void _arrayVec3ToVec4(const PxVec3* src, const Vec3V& scale, Vec4V* dst, PxU32 num) { // If no scale - use the faster version if (V3AllEq(scale, V3One())) { return _arrayVec3ToVec4(src, dst, num); } const PxU32 num4 = num >> 2; for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4) { Vec3V v0 = V3Mul(scale, V3LoadU(&src[0].x)); Vec3V v1 = V3Mul(scale, V3LoadU(&src[1].x)); Vec3V v2 = V3Mul(scale, V3LoadU(&src[2].x)); Vec3V v3 = V3Mul(scale, V3LoadU(&src[3].x)); // Transpose V4Transpose(v0, v1, v2, v3); // Save dst[0] = v0; dst[1] = v1; dst[2] = v2; } const PxU32 remain = num & 3; if (remain) { Vec3V work[4]; PxU32 i = 0; for (; i < remain; i++) work[i] = V3Mul(scale, V3LoadU(&src[i].x)); for (; i < 4; i++) work[i] = work[remain - 1]; V4Transpose(work[0], work[1], work[2], work[3]); dst[0] = work[0]; dst[1] = work[1]; dst[2] = work[2]; } } bool importerHullsInProximityApexFree(uint32_t hull0Count, const PxVec3* hull0, PxBounds3& hull0Bounds, const physx::PxTransform& localToWorldRT0In, const physx::PxVec3& scale0In, uint32_t hull1Count, const PxVec3* hull1, PxBounds3& hull1Bounds, const physx::PxTransform& localToWorldRT1In, const physx::PxVec3& scale1In, physx::PxF32 maxDistance, Separation* separation) { const PxU32 numVerts0 = static_cast(hull0Count); const PxU32 numVerts1 = static_cast(hull1Count); const PxU32 numAov0 = (numVerts0 + 3) >> 2; const PxU32 numAov1 = (numVerts1 + 3) >> 2; Vec4V* verts0 = (Vec4V*)alloca((numAov0 + numAov1) * sizeof(Vec4V) * 3); // Make sure it's aligned PX_ASSERT((size_t(verts0) & 0xf) == 0); Vec4V* verts1 = verts0 + (numAov0 * 3); const Vec3V scale0 = V3LoadU(&scale0In.x); const Vec3V scale1 = V3LoadU(&scale1In.x); std::vector vert0(numVerts0); for (uint32_t i = 0; i < numVerts0; ++i) { vert0[i] = hull0[i]; } std::vector vert1(numVerts1); for (uint32_t i = 0; i < numVerts1; ++i) { vert1[i] = hull1[i]; } _arrayVec3ToVec4(vert0.data(), scale0, verts0, numVerts0); _arrayVec3ToVec4(vert1.data(), scale1, verts1, numVerts1); const PxTransform trans1To0 = localToWorldRT0In.transformInv(localToWorldRT1In); // Load into simd mat Mat34V bToA; *(PxMat44*)&bToA = trans1To0; (*(PxMat44*)&bToA).column3.w = 0.0f; // AOS wants the 4th component of Vec3V to be 0 to work properly ConvexV convexA; ConvexV convexB; convexA.mNumAovVertices = numAov0; convexA.mAovVertices = verts0; convexB.mNumAovVertices = numAov1; convexB.mAovVertices = verts1; const physx::PxVec3 hullACenter = hull0Bounds.getCenter(); const physx::PxVec3 hullBCenter = hull1Bounds.getCenter(); const Vec3V centroidA = V3LoadU(&hullACenter.x); const Vec3V centroidB = M34MulV3(bToA, V3LoadU(&hullBCenter.x)); // Take the origin of B in As space as the inital direction as it is 'the difference in transform origins B-A in A's space' // Should be a good first guess // Use centroid information const Vec3V initialDir = V3Sub(centroidB, centroidA); Output output; Status status = Collide(initialDir, convexA, bToA, convexB, output); if (status == STATUS_DEGENERATE) { // Calculate the tolerance from the extents const PxVec3 extents0 = hull0Bounds.getExtents(); const PxVec3 extents1 = hull1Bounds.getExtents(); const FloatV tolerance0 = V3ExtractMin(V3Mul(V3LoadU(&extents0.x), scale0)); const FloatV tolerance1 = V3ExtractMin(V3Mul(V3LoadU(&extents1.x), scale1)); const FloatV tolerance = FMul(FAdd(tolerance0, tolerance1), FLoad(0.01f)); const FloatV sqTolerance = FMul(tolerance, tolerance); status = FAllGrtr(sqTolerance, output.mDistSq) ? STATUS_CONTACT : STATUS_NON_INTERSECT; } switch (status) { case STATUS_CONTACT: { if (separation) { _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, initialDir, output, *separation); } return true; } default: case STATUS_NON_INTERSECT: { if (separation) { _calcSeparation(convexA, localToWorldRT0In, bToA, convexB, initialDir, output, *separation); } PxF32 val; FStore(output.mDistSq, &val); return val < (maxDistance * maxDistance); } } } } // namespace Blast } // namespace Nv