Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/common/src/ApexSharedUtils.cpp
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
1 files changed, 4177 insertions, 0 deletions
diff --git a/APEX_1.4/common/src/ApexSharedUtils.cpp b/APEX_1.4/common/src/ApexSharedUtils.cpp
new file mode 100644
index 00000000..9cdc2a30
--- /dev/null
+++ b/APEX_1.4/common/src/ApexSharedUtils.cpp
@@ -0,0 +1,4177 @@
+/*
+ * Copyright (c) 2008-2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto.  Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+
+
+#include "Apex.h"
+#include "ApexSharedUtils.h"
+#include "ApexRand.h"
+#include "PsMemoryBuffer.h"
+
+#if PX_PHYSICS_VERSION_MAJOR == 3
+#include "PxPhysics.h"
+#include "cooking/PxConvexMeshDesc.h"
+#include "cooking/PxCooking.h"
+#include <PxShape.h>
+#endif
+
+#include "PsSort.h"
+#include "PsBitUtils.h"
+
+#include "ApexSDKIntl.h"
+
+#include "Cof44.h"
+#include "PxPlane.h"
+
+#include "PsVecMath.h"
+#include "PsMathUtils.h"
+
+#include "../../shared/general/stan_hull/include/StanHull.h"
+
+#define REDUCE_CONVEXHULL_INPUT_POINT_SET	0
+
+namespace nvidia
+{
+namespace apex
+{
+// Local utilities
+
+// copied from //sw/physx/PhysXSDK/3.3/trunk/Samples/SampleBase/RenderClothActor.cpp
+float det(PxVec4 v0, PxVec4 v1, PxVec4 v2, PxVec4 v3)
+{
+	const PxVec3& d0 = reinterpret_cast<const PxVec3&>(v0);
+	const PxVec3& d1 = reinterpret_cast<const PxVec3&>(v1);
+	const PxVec3& d2 = reinterpret_cast<const PxVec3&>(v2);
+	const PxVec3& d3 = reinterpret_cast<const PxVec3&>(v3);
+
+	return v0.w * d1.cross(d2).dot(d3)
+		- v1.w * d0.cross(d2).dot(d3)
+		+ v2.w * d0.cross(d1).dot(d3)
+		- v3.w * d0.cross(d1).dot(d2);
+}
+
+PX_INLINE float det(const PxMat44& m)
+{
+	return det(m.column0, m.column1, m.column2, m.column3);
+}
+
+PX_INLINE float extentDistance(float min0, float max0, float min1, float max1)
+{
+	return PxMax(min0 - max1, min1 - max0);
+}
+
+PX_INLINE float extentDistanceAndNormalDirection(float min0, float max0, float min1, float max1, float& midpoint, bool& normalPointsFrom0to1)
+{
+	const float d01 = min1 - max0;
+	const float d10 = min0 - max1;
+
+	normalPointsFrom0to1 = d01 > d10;
+
+	if (normalPointsFrom0to1)
+	{
+		midpoint = 0.5f*(min1 + max0);
+		return d01;
+	}
+	else
+	{
+		midpoint = 0.5f*(min0 + max1);
+		return d10;
+	}
+}
+
+PX_INLINE void extentOverlapTimeInterval(float& tIn, float& tOut, float vel0, float min0, float max0, float min1, float max1)
+{
+	if (PxAbs(vel0) < PX_EPS_F32)
+	{
+		// Not moving
+		if (extentDistance(min0, max0, min1, max1) < 0.0f)
+		{
+			// Always overlap
+			tIn = -PX_MAX_F32;
+			tOut = PX_MAX_F32;
+		}
+		else
+		{
+			// Never overlap
+			tIn = PX_MAX_F32;
+			tOut = -PX_MAX_F32;
+		}
+		return;
+	}
+
+	const float recipVel0 = 1.0f / vel0;
+
+	if (vel0 > 0.0f)
+	{
+		tIn = (min1 - max0) * recipVel0;
+		tOut = (max1 - min0) * recipVel0;
+	}
+	else
+	{
+		tIn = (max1 - min0) * recipVel0;
+		tOut = (min1 - max0) * recipVel0;
+	}
+}
+
+PX_INLINE bool updateTimeIntervalAndNormal(float& in, float& out, float& tNormal, PxVec3* normal, float tIn, float tOut, const PxVec3& testNormal)
+{
+	if (tIn >= out || tOut <= in)
+	{
+		return false;	// No intersection will occur
+	}
+
+	if (normal != NULL && tIn > tNormal)
+	{
+		tNormal = tIn;
+		*normal = testNormal;
+	}
+
+	in = PxMax(tIn, in);
+	out = PxMin(tOut, out);
+
+	return true;
+}
+
+PX_INLINE void getExtent(float& min, float& max, const void* points, uint32_t pointCount, const uint32_t pointByteStride, const PxVec3& normal)
+{
+	min = PX_MAX_F32;
+	max = -PX_MAX_F32;
+	const PxVec3* p = (const PxVec3*)points;
+	for (uint32_t i = 0; i < pointCount; ++i, p = (const PxVec3*)(((const uint8_t*)p) + pointByteStride))
+	{
+		const float d = normal.dot(*p);
+		if (d < min)
+		{
+			min = d;
+		}
+		if (d > max)
+		{
+			max = d;
+		}
+	}
+}
+
+PX_INLINE bool intersectPlanes(PxVec3& point, const PxPlane& p0, const PxPlane& p1, const PxPlane& p2)
+{
+	const PxVec3 p1xp2 = p1.n.cross(p2.n);
+	const float det = p0.n.dot(p1xp2);
+	if (PxAbs(det) < 1.0e-18)
+	{
+		return false;	// No intersection
+	}
+	point = (-p0.d * p1xp2 - p1.d * (p2.n.cross(p0.n)) - p2.d * (p0.n.cross(p1.n))) / det;
+	return true;
+}
+
+PX_INLINE bool pointInsidePlanes(const PxVec3& point, const PxPlane* planes, uint32_t numPlanes, float eps)
+{
+	for (uint32_t i = 0; i < numPlanes; ++i)
+	{
+		const PxPlane& plane = planes[i];
+		if (plane.distance(point) > eps)
+		{
+			return false;
+		}
+	}
+	return true;
+}
+
+static void findPrincipleComponents(PxVec3& mean, PxVec3& variance, PxMat33& axes, const PxVec3* points, uint32_t pointCount)
+{
+	// Find the average of the points
+	mean = PxVec3(0.0f);
+	for (uint32_t i = 0; i < pointCount; ++i)
+	{
+		mean += points[i];
+	}
+
+	PxMat33 cov = PxMat33(PxZero);
+
+	if (pointCount > 0)
+	{
+		mean *= 1.0f/pointCount;
+
+		// Now subtract the mean and add to the covariance matrix cov
+		for (uint32_t i = 0; i < pointCount; ++i)
+		{
+			const PxVec3 relativePoint = points[i] - mean;
+			for (uint32_t j = 0; j < 3; ++j)
+			{
+				for (uint32_t k = 0; k < 3; ++k)
+				{
+					cov(j,k) += relativePoint[j]*relativePoint[k];
+				}
+			}
+		}
+		cov *= 1.0f/pointCount;
+	}
+
+	// Now diagonalize cov
+	variance = diagonalizeSymmetric(axes, cov);
+}
+
+#if 0
+/* John's k-means clustering function, slightly specialized */
+static uint32_t kmeans_cluster(const PxVec3* input,
+								   uint32_t inputCount,
+								   PxVec3* clusters,
+								   uint32_t clumpCount,
+								   float threshold = 0.01f, // controls how long it works to converge towards a least errors solution.
+								   float collapseDistance = 0.0001f) // distance between clumps to consider them to be essentially equal.
+{
+	uint32_t convergeCount = 64; // maximum number of iterations attempting to converge to a solution..
+	physx::Array<uint32_t> counts(clumpCount);
+	float error = 0.0f;
+	if (inputCount <= clumpCount) // if the number of input points is less than our clumping size, just return the input points.
+	{
+		clumpCount = inputCount;
+		for (uint32_t i = 0; i < inputCount; ++i)
+		{
+			clusters[i] = input[i];
+			counts[i] = 1;
+		}
+	}
+	else
+	{
+		physx::Array<PxVec3> centroids(clumpCount);
+
+		// Take a sampling of the input points as initial centroid estimates.
+		for (uint32_t i = 0; i < clumpCount; ++i)
+		{
+			uint32_t index = (i*inputCount)/clumpCount;
+			PX_ASSERT( index < inputCount );
+			clusters[i] = input[index];
+		}
+
+		// Here is the main convergence loop
+		float old_error = PX_MAX_F32;	// old and initial error estimates are max float
+		error = PX_MAX_F32;
+		do
+		{
+			old_error = error;	// preserve the old error
+			// reset the counts and centroids to current cluster location
+			for (uint32_t i = 0; i < clumpCount; ++i)
+			{
+				counts[i] = 0;
+				centroids[i] = PxVec3(0.0f);
+			}
+			error = 0.0f;
+			// For each input data point, figure out which cluster it is closest too and add it to that cluster.
+			for (uint32_t i = 0; i < inputCount; ++i)
+			{
+				float min_distance2 = PX_MAX_F32;
+				uint32_t j_nearest = 0;
+				// find the nearest clump to this point.
+				for (uint32_t j = 0; j < clumpCount; ++j)
+				{
+					const float distance2 = (input[i]-clusters[j]).magnitudeSquared();
+					if (distance2 < min_distance2)
+					{
+						min_distance2 = distance2;
+						j_nearest = j; // save which clump this point indexes
+					}
+				}
+				centroids[j_nearest] += input[i];
+				++counts[j_nearest];	// increment the counter indicating how many points are in this clump.
+				error += min_distance2; // save the error accumulation
+			}
+			// Now, for each clump, compute the mean and store the result.
+			for (uint32_t i = 0; i < clumpCount; ++i)
+			{
+				if (counts[i]) // if this clump got any points added to it...
+				{
+					centroids[i] /= (float)counts[i];	// compute the average center of the points in this clump.
+					clusters[i] = centroids[i]; // store it as the new cluster.
+				}
+			}
+			// decrement the convergence counter and bail if it is taking too long to converge to a solution.
+			--convergeCount;
+			if (convergeCount == 0)
+			{
+				break;
+			}
+			if (error < threshold) // early exit if our first guess is already good enough (if all input points are the same)
+			{
+				break;
+			}
+		} while (PxAbs(error - old_error) > threshold); // keep going until the error is reduced by this threshold amount.
+	}
+
+	// ok..now we prune the clumps if necessary.
+	// The rules are; first, if a clump has no 'counts' then we prune it as it's unused.
+	// The second, is if the centroid of this clump is essentially  the same (based on the distance tolerance)
+	// as an existing clump, then it is pruned and all indices which used to point to it, now point to the one
+	// it is closest too.
+	uint32_t outCount = 0; // number of clumps output after pruning performed.
+	float d2 = collapseDistance*collapseDistance; // squared collapse distance.
+	for (uint32_t i = 0; i < clumpCount; ++i)
+	{
+		if (counts[i] == 0) // if no points ended up in this clump, eliminate it.
+		{
+			continue;
+		}
+		// see if this clump is too close to any already accepted clump.
+		bool add = true;
+		for (uint32_t j = 0; j < outCount; ++j)
+		{
+			if ((clusters[i]-clusters[j]).magnitudeSquared() < d2)
+			{
+				add = false; // we do not add this clump
+				break;
+			}
+		}
+		if (add)
+		{
+			clusters[outCount++] = clusters[i];
+		}
+	}
+
+	clumpCount = outCount;
+
+	return clumpCount;
+};
+#endif
+
+// barycentric utilities
+void generateBarycentricCoordinatesTri(const PxVec3& pa, const PxVec3& pb, const PxVec3& pc, const PxVec3& p, float& s, float& t)
+{
+	// pe = pb-ba, pf = pc-pa
+	// Barycentric coordinates: (s,t) -> pa + s*pe + t*pf
+	// Minimize: [s*pe + t*pf - (p-pa)]^2
+	// Minimize: a s^2 + 2 b s t + c t^2 + 2 d s + 2 e t + f
+	// Minimization w.r.t s and t :
+	//    as + bt = -d
+	//    bs + ct = -e
+
+	PxVec3 pe = pb - pa;
+	PxVec3 pf = pc - pa;
+	PxVec3 pg = pa - p;
+
+	float a = pe.dot(pe);
+	float b = pe.dot(pf);
+	float c = pf.dot(pf);
+	float d = pe.dot(pg);
+	float e = pf.dot(pg);
+
+	float det = a * c - b * b;
+	if (det != 0.0f)
+	{
+		s = (b * e - c * d) / det;
+		t = (b * d - a * e) / det;
+	}
+}
+
+void generateBarycentricCoordinatesTet(const PxVec3& pa, const PxVec3& pb, const PxVec3& pc, const PxVec3& pd, const PxVec3& p, PxVec3& bary)
+{
+	PxVec3 q  = p - pd;
+	PxVec3 q0 = pa - pd;
+	PxVec3 q1 = pb - pd;
+	PxVec3 q2 = pc - pd;
+	PxMat33 m(q0,q1,q2);
+	float det = m.getDeterminant();
+	m.column0 = q;
+	bary.x = m.getDeterminant();
+	m.column0 = q0;
+	m.column1 = q;
+	bary.y = m.getDeterminant();
+	m.column1 = q1;
+	m.column2 = q;
+	bary.z = m.getDeterminant();
+	if (det != 0.0f)
+	{
+		bary /= det;
+	}
+}
+
+/* TriangleFrame */
+
+size_t TriangleFrame::s_offsets[TriangleFrame::VertexFieldCount];
+
+static TriangleFrameBuilder sTriangleFrameBuilder;
+
+/*
+	File-local functions and definitions
+ */
+
+struct Marker
+{
+	float	pos;
+	uint32_t	id;	// lsb = type (0 = max, 1 = min), other bits used for object index
+
+	void	set(float _pos, int32_t _id)
+	{
+		pos = _pos;
+		id = (uint32_t)_id;
+	}
+};
+
+static int compareMarkers(const void* A, const void* B)
+{
+	// Sorts by value.  If values equal, sorts min types greater than max types, to reduce the # of overlaps
+	const float delta = ((Marker*)A)->pos - ((Marker*)B)->pos;
+	return delta != 0 ? (delta < 0 ? -1 : 1) : ((int)(((Marker*)A)->id & 1) - (int)(((Marker*)B)->id & 1));
+}
+
+struct EdgeWithFace
+{
+	EdgeWithFace() {}
+	EdgeWithFace(uint32_t v0, uint32_t v1, uint32_t face, uint32_t oppositeFace, bool sort = false) : faceIndex(face), v0Index(v0), v1Index(v1), opposite(oppositeFace)
+	{
+		if (sort)
+		{
+			if (v1Index < v0Index)
+			{
+				nvidia::swap(v0Index, v1Index);
+			}
+		}
+	}
+
+	bool operator == (const EdgeWithFace& e)
+	{
+		return v0Index == e.v0Index && v1Index == e.v1Index;
+	}
+
+	uint32_t	faceIndex;
+	uint32_t	v0Index;
+	uint32_t	v1Index;
+	uint32_t	opposite;
+};
+
+/*
+	Global utilities
+ */
+
+void boundsCalculateOverlaps(physx::Array<IntPair>& overlaps, Bounds3Axes axesToUse, const BoundsRep* bounds, uint32_t boundsCount, uint32_t boundsByteStride,
+                             const BoundsInteractions& interactions, bool append)
+{
+	if (!append)
+	{
+		overlaps.reset();
+	}
+
+	uint32_t D = 0;
+	uint32_t axisNums[3];
+	for (unsigned i = 0; i < 3; ++i)
+	{
+		if ((axesToUse >> i) & 1)
+		{
+			axisNums[D++] = i;
+		}
+	}
+
+	if (D == 0 || D > 3)
+	{
+		return;
+	}
+
+	physx::Array< physx::Array<Marker> > axes;
+	axes.resize(D);
+	uint32_t overlapCount[3];
+
+	for (uint32_t n = 0; n < D; ++n)
+	{
+		const uint32_t axisNum = axisNums[n];
+		physx::Array<Marker>& axis = axes[n];
+		overlapCount[n] = 0;
+		axis.resize(2 * boundsCount);
+		uint8_t* boundsPtr = (uint8_t*)bounds;
+		for (uint32_t i = 0; i < boundsCount; ++i, boundsPtr += boundsByteStride)
+		{
+			const BoundsRep& boundsRep = *(const BoundsRep*)boundsPtr;
+			const PxBounds3& box = boundsRep.aabb;
+			float min = box.minimum[axisNum];
+			float max = box.maximum[axisNum];
+			if (min >= max)
+			{
+				const float mid = 0.5f * (min + max);
+				float pad = 0.000001f * fabsf(mid);
+				min = mid - pad;
+				max = mid + pad;
+			}
+			axis[i << 1].set(min, (int32_t)i << 1 | 1);
+			axis[i << 1 | 1].set(max, (int32_t)i << 1);
+		}
+		qsort(axis.begin(), axis.size(), sizeof(Marker), compareMarkers);
+		uint32_t localOverlapCount = 0;
+		for (uint32_t i = 0; i < axis.size(); ++i)
+		{
+			Marker& marker = axis[i];
+			if (marker.id & 1)
+			{
+				overlapCount[n] += localOverlapCount;
+				++localOverlapCount;
+			}
+			else
+			{
+				--localOverlapCount;
+			}
+		}
+	}
+
+	unsigned int axis0;
+	unsigned int axis1;
+	unsigned int axis2;
+	unsigned int maxBin;
+	if (D == 1)
+	{
+		maxBin = 0;
+		axis0 = axisNums[0];
+		axis1 = axis0;
+		axis2 = axis0;
+	}
+	else if (D == 2)
+	{
+		if (overlapCount[0] < overlapCount[1])
+		{
+			maxBin = 0;
+			axis0 = axisNums[0];
+			axis1 = axisNums[1];
+			axis2 = axis0;
+		}
+		else
+		{
+			maxBin = 1;
+			axis0 = axisNums[1];
+			axis1 = axisNums[0];
+			axis2 = axis0;
+		}
+	}
+	else
+	{
+		maxBin = overlapCount[0] < overlapCount[1] ? (overlapCount[0] < overlapCount[2] ? 0U : 2U) : (overlapCount[1] < overlapCount[2] ? 1U : 2U);
+		axis0 = axisNums[maxBin];
+		axis1 = (axis0 + 1) % 3;
+		axis2 = (axis0 + 2) % 3;
+	}
+
+	const uint64_t interactionBits = interactions.bits;
+
+	IndexBank<uint32_t> localOverlaps(boundsCount);
+	physx::Array<Marker>& axis = axes[maxBin];
+	float boxMin1 = 0.0f;
+	float boxMax1 = 0.0f;
+	float boxMin2 = 0.0f;
+	float boxMax2 = 0.0f;
+
+	for (uint32_t i = 0; i < axis.size(); ++i)
+	{
+		Marker& marker = axis[i];
+		const uint32_t index = marker.id >> 1;
+		if (marker.id & 1)
+		{
+			const BoundsRep& boundsRep = *(const BoundsRep*)((uint8_t*)bounds + index*boundsByteStride);
+			const uint8_t interaction = (uint8_t)((interactionBits >> (boundsRep.type << 3)) & 0xFF);
+			const PxBounds3& box = boundsRep.aabb;
+			// These conditionals compile out with optimization:
+			if (D > 1)
+			{
+				boxMin1 = box.minimum[axis1];
+				boxMax1 = box.maximum[axis1];
+				if (D == 3)
+				{
+					boxMin2 = box.minimum[axis2];
+					boxMax2 = box.maximum[axis2];
+				}
+			}
+			const uint32_t localOverlapCount = localOverlaps.usedCount();
+			const uint32_t* localOverlapIndices = localOverlaps.usedIndices();
+			for (uint32_t j = 0; j < localOverlapCount; ++j)
+			{
+				const uint32_t overlapIndex = localOverlapIndices[j];
+				const BoundsRep& overlapBoundsRep = *(const BoundsRep*)((uint8_t*)bounds + overlapIndex*boundsByteStride);
+				if ((interaction >> overlapBoundsRep.type) & 1)
+				{
+					const PxBounds3& overlapBox = overlapBoundsRep.aabb;
+					// These conditionals compile out with optimization:
+					if (D > 1)
+					{
+						if (boxMin1 >= overlapBox.maximum[axis1] || boxMax1 <= overlapBox.minimum[axis1])
+						{
+							continue;
+						}
+						if (D == 3)
+						{
+							if (boxMin2 >= overlapBox.maximum[axis2] || boxMax2 <= overlapBox.minimum[axis2])
+							{
+								continue;
+							}
+						}
+					}
+					// Add overlap
+					IntPair& pair = overlaps.insert();
+					pair.i0 = (int32_t)index;
+					pair.i1 = (int32_t)overlapIndex;
+				}
+			}
+			PX_ASSERT(localOverlaps.isValid(index));
+			PX_ASSERT(!localOverlaps.isUsed(index));
+			localOverlaps.use(index);
+		}
+		else
+		{
+			// Remove local overlap
+			PX_ASSERT(localOverlaps.isValid(index));
+			localOverlaps.free(index);
+		}
+	}
+}
+
+
+/*
+	ConvexHullImpl functions
+ */
+
+ConvexHullImpl::ConvexHullImpl() : mParams(NULL), mOwnsParams(false)
+{
+}
+
+ConvexHullImpl::~ConvexHullImpl()
+{
+	term();
+}
+
+// If params == NULL, ConvexHullImpl will build (and own) its own internal parameters
+void ConvexHullImpl::init(NvParameterized::Interface* params)
+{
+	term();
+
+	if (params != NULL)
+	{
+		mParams = DYNAMIC_CAST(ConvexHullParameters*)(params);
+		if (mParams == NULL)
+		{
+			PX_ASSERT(!"params must be ConvexHullParameters");
+		}
+	}
+	else
+	{
+		NvParameterized::Traits* traits = GetInternalApexSDK()->getParameterizedTraits();
+		mParams = DYNAMIC_CAST(ConvexHullParameters*)(traits->createNvParameterized(ConvexHullParameters::staticClassName()));
+		PX_ASSERT(mParams != NULL);
+		if (mParams != NULL)
+		{
+			mOwnsParams = true;
+			setEmpty();
+		}
+	}
+}
+
+
+NvParameterized::Interface*	ConvexHullImpl::giveOwnersipOfNvParameters()
+{
+	if (mOwnsParams)
+	{
+		mOwnsParams = false;
+		return mParams;
+	}
+
+	return NULL;
+}
+
+// Releases parameters if it owns them
+void ConvexHullImpl::term()
+{
+	if (mParams != NULL && mOwnsParams)
+	{
+		mParams->destroy();
+	}
+	mParams = NULL;
+	mOwnsParams = false;
+}
+
+static int compareEdgesWithFaces(const void* A, const void* B)
+{
+	const EdgeWithFace& eA = *(const EdgeWithFace*)A;
+	const EdgeWithFace& eB = *(const EdgeWithFace*)B;
+	if (eA.v0Index != eB.v0Index)
+	{
+		return (int)eA.v0Index - (int)eB.v0Index;
+	}
+	if (eA.v1Index != eB.v1Index)
+	{
+		return (int)eA.v1Index - (int)eB.v1Index;
+	}
+	return (int)eA.faceIndex - (int)eB.faceIndex;
+}
+
+void ConvexHullImpl::buildFromDesc(const ConvexHullDesc& desc)
+{
+	if (!desc.isValid())
+	{
+		setEmpty();
+		return;
+	}
+
+	setEmpty();
+
+	Array<PxPlane>	uniquePlanes;
+	Array<uint32_t>	edges;
+	Array<float>	widths;
+
+	NvParameterized::Handle handle(*mParams);
+
+	// Copy vertices and compute bounds
+	mParams->getParameterHandle("vertices", handle);
+	mParams->resizeArray(handle, (int32_t)desc.numVertices);
+	const uint8_t* vertPointer = (const uint8_t*)desc.vertices;
+	for (uint32_t i = 0; i < desc.numVertices; ++i, vertPointer += desc.vertexStrideBytes)
+	{
+		mParams->vertices.buf[i] = *(const PxVec3*)vertPointer;
+		mParams->bounds.include(mParams->vertices.buf[i]);
+	}
+
+	PxVec3 center;
+	center = mParams->bounds.getCenter();
+
+	// Find faces and compute volume
+	physx::Array<EdgeWithFace> fEdges;
+	const uint32_t* indices = desc.indices;
+	mParams->volume = 0.0f;
+	for (uint32_t fI = 0; fI < desc.numFaces; ++fI)
+	{
+		const uint32_t indexCount = (const uint32_t)desc.faceIndexCounts[fI];
+		const PxVec3& vI0 = mParams->vertices.buf[indices[0]];
+		PxVec3 normal(0.0f);
+		for (uint32_t pI = 1; pI < indexCount-1; ++pI)
+		{
+			const uint32_t i1 = indices[pI];
+			const uint32_t i2 = indices[pI+1];
+			const PxVec3& vI1 = mParams->vertices.buf[i1];
+			const PxVec3& vI2 = mParams->vertices.buf[i2];
+			const PxVec3 eI1 = vI1 - vI0;
+			const PxVec3 eI2 = vI2 - vI0;
+			PxVec3 normalI = eI1.cross(eI2);
+			mParams->volume += (normalI.dot(vI0 - center));
+			normal += normalI;
+		}
+
+		const bool normalValid = normal.normalize() > 0.0f;
+
+		float d = 0.0f;
+		for (uint32_t pI = 0; pI < indexCount; ++pI)
+		{
+			d -= normal.dot(mParams->vertices.buf[indices[pI]]);
+		}
+		d /= indexCount;
+
+		uint32_t oppositeFace = 0;
+		uint32_t faceN = uniquePlanes.size();	// unless we find an opposite
+
+		if (normalValid)
+		{
+			// See if this face is opposite an existing one
+			for (uint32_t j = 0; j < uniquePlanes.size(); ++j)
+			{
+				if (normal.dot(uniquePlanes[j].n) < -0.999999f && widths[j] == PX_MAX_F32)
+				{
+					oppositeFace = 1;
+					faceN = j;
+					widths[j] = -d - uniquePlanes[j].d;
+					break;
+				}
+			}
+		}
+
+		if (!oppositeFace)
+		{
+			uniquePlanes.pushBack(PxPlane(normal, d));
+			widths.pushBack(PX_MAX_F32);
+		}
+
+		for (uint32_t pI = 0; pI < indexCount; ++pI)
+		{
+			fEdges.pushBack(EdgeWithFace(indices[pI], indices[(pI+1)%indexCount], faceN, oppositeFace, true));
+		}
+
+		indices += indexCount;
+	}
+
+	// Create a map from current plane indices to the arrangement we'll have after the following loop
+	// This map will be its inverse
+	physx::Array<uint32_t> invMap;
+	invMap.resize(2*widths.size());
+	for (uint32_t i = 0; i < invMap.size(); ++i)
+	{
+		invMap[i] = i;
+	}
+
+	// This ensures that all the slabs are represented first in the planes list
+	uint32_t slabCount = widths.size();
+	for (uint32_t i = widths.size(); i--;)
+	{
+		if (widths[i] == PX_MAX_F32)
+		{
+			--slabCount;
+			nvidia::swap(widths[i], widths[slabCount]);
+			nvidia::swap(uniquePlanes[i], uniquePlanes[slabCount]);
+			nvidia::swap(invMap[i], invMap[slabCount]);
+		}
+	}
+
+	// Now invert invMap to get the map
+	physx::Array<uint32_t> map;
+	map.resize(invMap.size());
+	for (uint32_t i = 0; i < map.size(); ++i)
+	{
+		map[invMap[i]] = i;
+	}
+
+	// Plane indices have been rearranged, need to make corresponding rearrangements to fEdges' faceIndex value.
+	for (uint32_t i = 0; i < fEdges.size(); ++i)
+	{
+		fEdges[i].faceIndex = map[fEdges[i].faceIndex];
+		if (fEdges[i].opposite)
+		{
+			fEdges[i].faceIndex += uniquePlanes.size();
+		}
+	}
+
+	// Total plane count, with non-unique (back-facing) normals
+	mParams->planeCount = uniquePlanes.size() + slabCount;
+
+	// Fill in remaining widths
+	for (uint32_t fI = mParams->planeCount - uniquePlanes.size(); fI < widths.size(); ++fI)
+	{
+		widths[fI] = 0.0f;
+		for (uint32_t vI = 0; vI < desc.numVertices; ++vI)
+		{
+			const float vDepth = -uniquePlanes[fI].distance(mParams->vertices.buf[vI]);
+			if (vDepth > widths[fI])
+			{
+				widths[fI] = vDepth;
+			}
+		}
+	}
+
+	// Record faceIndex pairs in adjacentFaces
+	physx::Array<uint32_t> adjacentFaces;
+
+	// Eliminate redundant edges
+	qsort(fEdges.begin(), fEdges.size(), sizeof(EdgeWithFace), compareEdgesWithFaces);
+	for (uint32_t eI = 0; eI < fEdges.size();)
+	{
+		uint32_t i0 = fEdges[eI].v0Index;
+		uint32_t i1 = fEdges[eI].v1Index;
+		PX_ASSERT(i0 < 65536 && i1 < 65536);
+		if (eI < fEdges.size() - 1 && fEdges[eI] == fEdges[eI + 1])
+		{
+			if (fEdges[eI].faceIndex != fEdges[eI + 1].faceIndex)
+			{
+				edges.pushBack(i0 << 16 | i1);
+				adjacentFaces.pushBack(fEdges[eI].faceIndex << 16 | fEdges[eI + 1].faceIndex);
+			}
+			eI += 2;
+		}
+		else
+		{
+			edges.pushBack(i0 << 16 | i1);
+			adjacentFaces.pushBack(0xFFFF0000 | fEdges[eI].faceIndex);
+			++eI;
+		}
+	}
+
+	// Find unique edge directions, put them at the front of the edge list
+	const uint32_t edgeCount = edges.size();
+	if (edgeCount > 0)
+	{
+		mParams->uniqueEdgeDirectionCount = 1;
+		for (uint32_t testEdgeIndex = 1; testEdgeIndex < edgeCount; ++testEdgeIndex)
+		{
+			const uint32_t testEdgeEndpointIndices = edges[testEdgeIndex];
+			const PxVec3 testEdge = mParams->vertices.buf[testEdgeEndpointIndices & 0xFFFF] - mParams->vertices.buf[testEdgeEndpointIndices >> 16];
+			const float testEdge2 = testEdge.magnitudeSquared();
+			uint32_t uniqueEdgeIndex = 0;
+			for (; uniqueEdgeIndex < mParams->uniqueEdgeDirectionCount; ++uniqueEdgeIndex)
+			{
+				const uint32_t uniqueEdgeEndpointIndices = edges[uniqueEdgeIndex];
+				const PxVec3 uniqueEdge = mParams->vertices.buf[uniqueEdgeEndpointIndices & 0xFFFF] - mParams->vertices.buf[uniqueEdgeEndpointIndices >> 16];
+				if (uniqueEdge.cross(testEdge).magnitudeSquared() < testEdge2 * uniqueEdge.magnitudeSquared()*PX_EPS_F32 * PX_EPS_F32)
+				{
+					break;
+				}
+			}
+			if (uniqueEdgeIndex == mParams->uniqueEdgeDirectionCount)
+			{
+				nvidia::swap(edges[mParams->uniqueEdgeDirectionCount], edges[testEdgeIndex]);
+				nvidia::swap(adjacentFaces[mParams->uniqueEdgeDirectionCount], adjacentFaces[testEdgeIndex]);
+				++mParams->uniqueEdgeDirectionCount;
+			}
+		}
+	}
+
+	mParams->volume *= 0.1666666667f;
+
+	// Transfer from temporary arrays into mParams
+
+	// uniquePlanes
+	mParams->getParameterHandle("uniquePlanes", handle);
+	mParams->resizeArray(handle, (int32_t)uniquePlanes.size());
+	for (uint32_t i = 0; i < uniquePlanes.size(); ++i)
+	{
+		PxPlane& plane = uniquePlanes[i];
+		ConvexHullParametersNS::Plane_Type& paramPlane = mParams->uniquePlanes.buf[i];
+		paramPlane.normal = plane.n;
+		paramPlane.d = plane.d;
+	}
+
+	// edges
+	mParams->getParameterHandle("edges", handle);
+	mParams->resizeArray(handle, (int32_t)edges.size());
+	mParams->setParamU32Array(handle, edges.begin(), (int32_t)edges.size());
+
+	// adjacentFaces
+	mParams->getParameterHandle("adjacentFaces", handle);
+	mParams->resizeArray(handle, (int32_t)adjacentFaces.size());
+	mParams->setParamU32Array(handle, adjacentFaces.begin(), (int32_t)adjacentFaces.size());
+
+	// widths
+	mParams->getParameterHandle("widths", handle);
+	mParams->resizeArray(handle, (int32_t)widths.size());
+	mParams->setParamF32Array(handle, widths.begin(), (int32_t)widths.size());
+
+}
+
+void ConvexHullImpl::buildFromPoints(const void* points, uint32_t numPoints, uint32_t pointStrideBytes)
+{
+	if (numPoints < 4)
+	{
+		setEmpty();
+		return;
+	}
+
+	// First try stanhull
+
+	// We will transform the points of the hull such that they fit well into a rotated 2x2x2 cube.
+	// To find a suitable set of axes for this rotated cube, we will find the  of the point distribution.
+
+	// Create an array of points which we will transform
+	physx::Array<PxVec3> transformedPoints(numPoints);
+	uint8_t* pointPtr = (uint8_t*)points;
+	for (uint32_t i = 0; i < numPoints; ++i, pointPtr += pointStrideBytes)
+	{
+		transformedPoints[i] = *(PxVec3*)pointPtr;
+	}
+
+	// Optionally reduce the point set
+#if REDUCE_CONVEXHULL_INPUT_POINT_SET
+	const uint32_t maxSetSize = 400;
+	physx::Array<PxVec3> reducedPointSet(numPoints);
+	const uint32_t reducedSetSize = kmeans_cluster(&transformedPoints[0], numPoints, &reducedPointSet[0], maxSetSize);
+	PX_ASSERT(reducedSetSize <= 400 && reducedSetSize >= 4);
+	transformedPoints = reducedPointSet;
+#endif // REDUCE_CONVEXHULL_INPUT_POINT_SET
+
+	PxVec3 mean;
+	PxVec3 variance;
+	PxMat33 axes;
+	findPrincipleComponents(mean, variance, axes, &transformedPoints[0], numPoints);
+
+	// Now subtract the mean from the points and rotate the points into the frame of the axes
+	for (uint32_t i = 0; i < numPoints; ++i)
+	{
+		transformedPoints[i] = axes.transformTranspose(transformedPoints[i] - mean);
+	}
+
+	// Finally find a scale such that the maximum absolute coordinate on each axis is 1
+	PxVec3 scale(0.0f);
+	for (uint32_t i = 0; i < numPoints; ++i)
+	{
+		for (unsigned j = 0; j < 3; ++j)
+		{
+			scale[j] = PxMax(scale[j], PxAbs(transformedPoints[i][j]));
+		}
+	}
+	if (scale.x*scale.y*scale.z == 0.0f)
+	{
+		// We have a degeneracy, planar (or colinear or coincident) points
+		setEmpty();
+		return;
+	}
+
+	// Now scale the points
+	const PxVec3 recipScale(1.0f/scale.x, 1.0f/scale.y, 1.0f/scale.z);
+	for (uint32_t i = 0; i < numPoints; ++i)
+	{
+		transformedPoints[i] = transformedPoints[i].multiply(recipScale);
+	}
+
+	// The points are transformed, and for completeness let us now build the transform which restores them
+	const PxMat44 tm(axes.column0*scale.x, axes.column1*scale.y, axes.column2*scale.z, mean);
+
+#if 0
+	// Now find the convex hull of the transformed points
+	physx::stanhull::HullDesc hullDesc;
+	hullDesc.mVertices = (const float*)&transformedPoints[0];
+	hullDesc.mVcount = numPoints;
+	hullDesc.mVertexStride = sizeof(PxVec3);
+	hullDesc.mSkinWidth = 0.0f;
+
+	physx::stanhull::HullLibrary hulllib;
+	physx::stanhull::HullResult hull;
+	if (physx::stanhull::QE_OK == hulllib.CreateConvexHull(hullDesc, hull))
+	{	// Success
+		ConvexHullDesc desc;
+		desc.vertices = hull.mOutputVertices;
+		desc.numVertices = hull.mNumOutputVertices;
+		desc.vertexStrideBytes = 3*sizeof(float);
+		desc.indices = hull.mIndices;
+		desc.numIndices = hull.mNumIndices;
+		desc.faceIndexCounts = hull.mFaces;
+		desc.numFaces = hull.mNumFaces;
+		buildFromDesc(desc);
+		hulllib.ReleaseResult(hull);
+
+		// Transform our hull back before returning
+		applyTransformation(tm);
+
+		return;
+	}
+#endif
+
+#if PX_PHYSICS_VERSION_MAJOR == 3
+	// Stanhull failed, try physx sdk cooker
+	ApexSDK* sdk = GetApexSDK();
+	if (sdk && sdk->getCookingInterface() && sdk->getPhysXSDK())
+	{
+
+		physx::PxConvexMeshDesc convexMeshDesc;
+		convexMeshDesc.points.count = numPoints;
+		convexMeshDesc.points.data = &transformedPoints[0];
+		convexMeshDesc.points.stride = pointStrideBytes;
+		convexMeshDesc.flags = physx::PxConvexFlag::eCOMPUTE_CONVEX;
+
+		nvidia::PsMemoryBuffer stream;
+		stream.setEndianMode(PxFileBuf::ENDIAN_NONE);
+		PxStreamFromFileBuf nvs(stream);
+		if (sdk->getCookingInterface()->cookConvexMesh(convexMeshDesc, nvs))
+		{
+			physx::PxConvexMesh* mesh = sdk->getPhysXSDK()->createConvexMesh(nvs);
+			if (mesh)
+			{
+				buildFromConvexMesh(mesh);
+				mesh->release();
+				// Transform our hull back before returning
+				applyTransformation(tm);
+				return;
+			}
+		}
+	}
+#endif
+
+	// No luck
+	setEmpty();
+}
+
+void ConvexHullImpl::buildFromPlanes(const PxPlane* planes, uint32_t numPlanes, float eps)
+{
+	physx::Array<PxVec3> points;
+	points.reserve((numPlanes * (numPlanes - 1) * (numPlanes - 2)) / 6);
+	for (uint32_t i = 0; i < numPlanes; ++i)
+	{
+		const PxPlane& planeI = planes[i];
+		for (uint32_t j = i + 1; j < numPlanes; ++j)
+		{
+			const PxPlane& planeJ = planes[j];
+			for (uint32_t k = j + 1; k < numPlanes; ++k)
+			{
+				const PxPlane& planeK = planes[k];
+				PxVec3 point;
+				if (intersectPlanes(point, planeI, planeJ, planeK))
+				{
+					if (pointInsidePlanes(point, planes, i, eps) &&
+					        pointInsidePlanes(point, planes + i + 1, j - (i + 1), eps) &&
+					        pointInsidePlanes(point, planes + j + 1, k - (j + 1), eps) &&
+					        pointInsidePlanes(point, planes + k + 1, numPlanes - (k + 1), eps))
+					{
+						uint32_t m = 0;
+						for (; m < points.size(); ++m)
+						{
+							if ((point - points[m]).magnitudeSquared() < eps)
+							{
+								break;
+							}
+						}
+						if (m == points.size())
+						{
+							points.pushBack(point);
+						}
+					}
+				}
+			}
+		}
+	}
+
+	buildFromPoints(points.begin(), points.size(), sizeof(PxVec3));
+}
+
+
+#if PX_PHYSICS_VERSION_MAJOR == 3
+
+void ConvexHullImpl::buildFromConvexMesh(const ConvexMesh* mesh)
+{
+	setEmpty();
+
+	if (!mesh)
+	{
+		return;
+	}
+
+	const uint32_t numVerts = mesh->getNbVertices();
+	const PxVec3* verts = (const PxVec3*)mesh->getVertices();
+	const uint32_t numPolygons = mesh->getNbPolygons();
+	const uint8_t* index = (const uint8_t*)mesh->getIndexBuffer();
+
+
+	Array<PxPlane>	uniquePlanes;
+	Array<uint32_t>	edges;
+	Array<float>	widths;
+
+	NvParameterized::Handle handle(*mParams);
+
+	// Copy vertices and compute bounds
+	mParams->getParameterHandle("vertices", handle);
+	mParams->resizeArray(handle, (int32_t)numVerts);
+	for (uint32_t i = 0; i < numVerts; ++i)
+	{
+		mParams->vertices.buf[i] = verts[i];
+		mParams->bounds.include(verts[i]);
+	}
+
+	PxVec3 center;
+	center = mParams->bounds.getCenter();
+
+	PxMat33 localInertia;
+	PxVec3 localCenterOfMass;
+	mesh->getMassInformation(mParams->volume, localInertia, localCenterOfMass);
+
+	// Find planes and compute volume
+	physx::Array<EdgeWithFace> fEdges;
+
+	for (uint32_t i = 0; i < numPolygons; i++)
+	{
+		physx::PxHullPolygon data;
+		bool status = mesh->getPolygonData(i, data);
+		PX_ASSERT(status);
+		if (!status)
+		{
+			continue;
+		}
+
+		const uint32_t numIndices = data.mNbVerts;
+		for (uint32_t vI = 0; vI < numIndices; ++vI)
+		{
+			const uint16_t i0 = (uint16_t)index[data.mIndexBase + vI];
+			const uint16_t i1 = (uint16_t)index[data.mIndexBase + ((vI+1)%numIndices)];
+			const PxVec3& vI0 = verts[i0];
+			const PxVec3 normal(data.mPlane[0], data.mPlane[1], data.mPlane[2]);
+			PX_ASSERT(PxEquals(normal.magnitudeSquared(), 1.0f, 0.000001f));
+			uint32_t faceIndex;
+			uint32_t oppositeFace = 0;
+			for (faceIndex = 0; faceIndex < uniquePlanes.size(); ++faceIndex)
+			{
+				const float cosTheta = normal.dot(uniquePlanes[faceIndex].n);
+				oppositeFace = (uint32_t)(cosTheta < 0.0f);
+				if (cosTheta * cosTheta > 0.999999f)
+				{
+					break;
+				}
+			}
+			if (faceIndex == uniquePlanes.size())
+			{
+				uniquePlanes.pushBack(PxPlane(vI0, normal));
+				widths.pushBack(PX_MAX_F32);
+				oppositeFace = 0;
+			}
+			else if (oppositeFace && widths[faceIndex] == PX_MAX_F32)
+			{
+				// New slab
+				widths[faceIndex] = -uniquePlanes[faceIndex].distance(vI0);
+			}
+			fEdges.pushBack(EdgeWithFace(i0, i1, faceIndex, oppositeFace, true));
+		}
+	}
+
+	// Create a map from current plane indices to the arrangement we'll have after the following loop
+	// This map will be its inverse
+	physx::Array<uint32_t> invMap;
+	invMap.resize(2*widths.size());
+	for (uint32_t i = 0; i < invMap.size(); ++i)
+	{
+		invMap[i] = i;
+	}
+
+	// This ensures that all the slabs are represented first in the planes list
+	uint32_t slabCount = widths.size();
+	for (uint32_t i = widths.size(); i--;)
+	{
+		if (widths[i] == PX_MAX_F32)
+		{
+			--slabCount;
+			nvidia::swap(widths[i], widths[slabCount]);
+			nvidia::swap(uniquePlanes[i], uniquePlanes[slabCount]);
+			nvidia::swap(invMap[i], invMap[slabCount]);
+		}
+	}
+
+	// Now invert invMap to get the map
+	physx::Array<uint32_t> map;
+	map.resize(invMap.size());
+	for (uint32_t i = 0; i < map.size(); ++i)
+	{
+		map[invMap[i]] = i;
+	}
+
+	// Plane indices have been rearranged, need to make corresponding rearrangements to fEdges' faceIndex value.
+	for (uint32_t i = 0; i < fEdges.size(); ++i)
+	{
+		fEdges[i].faceIndex = map[fEdges[i].faceIndex];
+		if (fEdges[i].opposite)
+		{
+			fEdges[i].faceIndex += uniquePlanes.size();
+		}
+	}
+
+	// Total plane count, with non-unique (back-facing) normals
+	mParams->planeCount = uniquePlanes.size() + slabCount;
+
+	// Fill in remaining widths
+	for (uint32_t pI = mParams->planeCount - uniquePlanes.size(); pI < widths.size(); ++pI)
+	{
+		widths[pI] = 0.0f;
+		for (uint32_t i = 0; i < numPolygons; i++)
+		{
+			physx::PxHullPolygon data;
+			bool status = mesh->getPolygonData(i, data);
+			PX_ASSERT(status);
+			if (!status)
+			{
+				continue;
+			}
+
+			const uint32_t numIndices = (uint32_t)data.mNbVerts - 2;
+			for (uint32_t vI = 0; vI < numIndices; ++vI)
+			{
+				const float vDepth = -uniquePlanes[pI].distance(verts[index[data.mIndexBase + vI]]);
+				if (vDepth > widths[pI])
+				{
+					widths[pI] = vDepth;
+				}
+			}
+		}
+	}
+
+	// Record faceIndex pairs in adjacentFaces
+	physx::Array<uint32_t> adjacentFaces;
+
+	// Eliminate redundant edges
+	qsort(fEdges.begin(), fEdges.size(), sizeof(EdgeWithFace), compareEdgesWithFaces);
+	for (uint32_t eI = 0; eI < fEdges.size();)
+	{
+		uint32_t i0 = fEdges[eI].v0Index;
+		uint32_t i1 = fEdges[eI].v1Index;
+		PX_ASSERT(i0 < 65536 && i1 < 65536);
+		if (eI < fEdges.size() - 1 && fEdges[eI] == fEdges[eI + 1])
+		{
+			if (fEdges[eI].faceIndex != fEdges[eI + 1].faceIndex)
+			{
+				edges.pushBack(i0 << 16 | i1);
+				adjacentFaces.pushBack(fEdges[eI].faceIndex << 16 | fEdges[eI + 1].faceIndex);
+			}
+			eI += 2;
+		}
+		else
+		{
+			edges.pushBack(i0 << 16 | i1);
+			adjacentFaces.pushBack(0xFFFF0000 | fEdges[eI].faceIndex);
+			++eI;
+		}
+	}
+
+	// Find unique edge directions, put them at the front of the edge list
+	const uint32_t edgeCount = edges.size();
+	if (edgeCount > 0)
+	{
+		mParams->uniqueEdgeDirectionCount = 1;
+		for (uint32_t testEdgeIndex = 1; testEdgeIndex < edgeCount; ++testEdgeIndex)
+		{
+			const PxVec3 testEdge = verts[edges[testEdgeIndex] & 0xFFFF] - verts[edges[testEdgeIndex] >> 16];
+			const float testEdge2 = testEdge.magnitudeSquared();
+			uint32_t uniqueEdgeIndex = 0;
+			for (; uniqueEdgeIndex < mParams->uniqueEdgeDirectionCount; ++uniqueEdgeIndex)
+			{
+				const PxVec3 uniqueEdge = verts[edges[uniqueEdgeIndex] & 0xFFFF] - verts[edges[uniqueEdgeIndex] >> 16];
+				if (uniqueEdge.cross(testEdge).magnitudeSquared() < testEdge2 * uniqueEdge.magnitudeSquared()*PX_EPS_F32 * PX_EPS_F32)
+				{
+					break;
+				}
+			}
+			if (uniqueEdgeIndex == mParams->uniqueEdgeDirectionCount)
+			{
+				nvidia::swap(edges[mParams->uniqueEdgeDirectionCount], edges[testEdgeIndex]);
+				nvidia::swap(adjacentFaces[mParams->uniqueEdgeDirectionCount], adjacentFaces[testEdgeIndex]);
+				++mParams->uniqueEdgeDirectionCount;
+			}
+		}
+	}
+
+	// Transfer from temporary arrays into mParams
+
+	// uniquePlanes
+	mParams->getParameterHandle("uniquePlanes", handle);
+	mParams->resizeArray(handle, (int32_t)uniquePlanes.size());
+	for (uint32_t i = 0; i < uniquePlanes.size(); ++i)
+	{
+		PxPlane& plane = uniquePlanes[i];
+		ConvexHullParametersNS::Plane_Type& paramPlane = mParams->uniquePlanes.buf[i];
+		paramPlane.normal = plane.n;
+		paramPlane.d = plane.d;
+	}
+
+	// edges
+	mParams->getParameterHandle("edges", handle);
+	mParams->resizeArray(handle, (int32_t)edges.size());
+	mParams->setParamU32Array(handle, edges.begin(), (int32_t)edges.size());
+
+	// adjacentFaces
+	mParams->getParameterHandle("adjacentFaces", handle);
+	mParams->resizeArray(handle, (int32_t)adjacentFaces.size());
+	mParams->setParamU32Array(handle, adjacentFaces.begin(), (int32_t)adjacentFaces.size());
+
+	// widths
+	mParams->getParameterHandle("widths", handle);
+	mParams->resizeArray(handle, (int32_t)widths.size());
+	mParams->setParamF32Array(handle, widths.begin(), (int32_t)widths.size());
+}
+#endif
+
+
+
+void ConvexHullImpl::buildFromAABB(const PxBounds3& aabb)
+{
+	PxVec3 center, extent;
+	center = aabb.getCenter();
+	extent = aabb.getExtents();
+
+	NvParameterized::Handle handle(*mParams);
+
+	// Copy vertices and compute bounds
+	mParams->getParameterHandle("vertices", handle);
+	mParams->resizeArray(handle, 8);
+	for (int i = 0; i < 8; ++i)
+	{
+		mParams->vertices.buf[i] = center + PxVec3((2.0f * (i & 1) - 1.0f) * extent.x, ((float)(i & 2) - 1.0f) * extent.y, (0.5f * (i & 4) - 1.0f) * extent.z);
+	}
+
+	mParams->getParameterHandle("uniquePlanes", handle);
+	mParams->resizeArray(handle, 3);
+	mParams->getParameterHandle("widths", handle);
+	mParams->resizeArray(handle, 3);
+	for (unsigned i = 0; i < 3; ++i)
+	{
+		ConvexHullParametersNS::Plane_Type& plane = mParams->uniquePlanes.buf[i];
+		plane.normal = PxVec3(0.0f);
+		plane.normal[i] = -1.0f;
+		plane.d = -aabb.maximum[i];
+		mParams->widths.buf[i] = aabb.maximum[i] - aabb.minimum[i];
+	}
+	mParams->planeCount = 6;
+
+	mParams->getParameterHandle("edges", handle);
+	mParams->resizeArray(handle, 12);
+	mParams->getParameterHandle("adjacentFaces", handle);
+	mParams->resizeArray(handle, 12);
+	for (uint32_t i = 0; i < 3; ++i)
+	{
+		uint32_t i1 = ((i + 1) % 3);
+		uint32_t i2 = ((i + 2) % 3);
+		uint32_t vOffset1 = 1u << i1;
+		uint32_t vOffset2 = 1u << i2;
+		for (int j = 0; j < 4; ++j)
+		{
+			uint32_t v0 = 0;
+			uint32_t v1 = 1u << i;
+			uint32_t f0 = i1;
+			uint32_t f1 = i2;
+			if (j & 1)
+			{
+				v0 += vOffset1;
+				v1 += vOffset1;
+				f0 += 3;
+			}
+			if (j & 2)
+			{
+				v0 += vOffset2;
+				v1 += vOffset2;
+				f1 += 3;
+			}
+			if (j == 1 || j == 2)
+			{
+				nvidia::swap(f0, f1);
+			}
+			uint32_t index = i + 3 * j;
+			mParams->edges.buf[index] = v0 << 16 | v1;
+			mParams->adjacentFaces.buf[i] = f0 << 16 | f1;
+		}
+	}
+	mParams->uniqueEdgeDirectionCount = 3;
+
+	mParams->bounds = aabb;
+
+	const PxVec3 diag = mParams->bounds.maximum - mParams->bounds.minimum;
+	mParams->volume = diag.x * diag.y * diag.z;
+}
+
+void ConvexHullImpl::buildKDOP(const void* points, uint32_t numPoints, uint32_t pointStrideBytes, const PxVec3* directions, uint32_t numDirections)
+{
+	setEmpty();
+
+	if (numPoints == 0)
+	{
+		return;
+	}
+
+	float size = 0;
+
+	physx::Array<PxPlane> planes;
+	planes.reserve(2 * numDirections);
+	for (uint32_t i = 0; i < numDirections; ++i)
+	{
+		PxVec3 dir = directions[i];
+		dir.normalize();
+		float min, max;
+		getExtent(min, max, points, numPoints, pointStrideBytes, dir);
+		planes.pushBack(PxPlane(dir, -max));
+		planes.pushBack(PxPlane(-dir, min));
+		size = PxMax(size, max - min);
+	}
+
+	buildFromPlanes(planes.begin(), planes.size(), 0.00001f * size);
+}
+
+void ConvexHullImpl::intersectPlaneSide(const PxPlane& plane)
+{
+	const float size = (mParams->bounds.maximum - mParams->bounds.minimum).magnitude();
+	const float eps = 0.00001f * size;
+
+	const uint32_t oldVertexCount = (uint32_t)mParams->vertices.arraySizes[0];
+
+	Array<PxVec3> vertices;
+	vertices.resize(oldVertexCount);
+	NvParameterized::Handle handle(*mParams);
+	mParams->getParameterHandle("vertices", handle);
+	mParams->getParamVec3Array(handle, vertices.begin(), (int32_t)vertices.size());
+
+	// Throw out all vertices on the + side of the plane
+	for (uint32_t i = oldVertexCount; i--;)
+	{
+		if (plane.distance(vertices[i]) > eps)
+		{
+			vertices.replaceWithLast(i);
+		}
+	}
+
+	if (vertices.size() == 0)
+	{
+		// plane excludes this whole hull.  Delete everything.
+		setEmpty();
+		return;
+	}
+
+	if (vertices.size() == oldVertexCount)
+	{
+		// plane includes this whole hull.  Do nothing.
+		return;
+	}
+
+	// Intersect new plane with all pairs of old planes.
+	const uint32_t planeCount = getPlaneCount();
+	for (uint32_t j = 1; j < planeCount; ++j)
+	{
+		const PxPlane planeJ = getPlane(j);
+		for (uint32_t i = 0; i < j; ++i)
+		{
+			const PxPlane planeI = getPlane(i);
+			PxVec3 point;
+			if (intersectPlanes(point, plane, planeI, planeJ))
+			{
+				// See if point is excluded by any of the other planes
+				uint32_t k;
+				for (k = 0; k < planeCount; ++k)
+				{
+					if (k != i && k != j && getPlane(k).distance(point) > eps)
+					{
+						break;
+					}
+				}
+				if (k == planeCount)
+				{
+					// Point is part of the new intersected hull
+					uint32_t m = 0;
+					for (; m < vertices.size(); ++m)
+					{
+						if ((point - vertices[m]).magnitudeSquared() < eps)
+						{
+							break;
+						}
+					}
+					if (m == vertices.size())
+					{
+						vertices.pushBack(point);
+					}
+				}
+			}
+		}
+	}
+
+	buildFromPoints(vertices.begin(), vertices.size(), sizeof(vertices[0]));
+}
+
+void ConvexHullImpl::intersectHull(const ConvexHullImpl& hull)
+{
+	if (hull.getPlaneCount() == 0)
+	{
+		setEmpty();
+		return;
+	}
+
+	physx::Array<PxPlane> planes;
+	planes.reserve(getPlaneCount() + hull.getPlaneCount());
+	for (uint32_t planeN = 0; planeN < getPlaneCount(); ++planeN)
+	{
+		planes.pushBack(getPlane(planeN));
+	}
+	for (uint32_t planeN = 0; planeN < hull.getPlaneCount(); ++planeN)
+	{
+		planes.pushBack(hull.getPlane(planeN));
+	}
+
+	const float size = (mParams->bounds.maximum - mParams->bounds.minimum).magnitude();
+
+	buildFromPlanes(planes.begin(), planes.size(), 0.00001f * size);
+}
+
+// Returns true iff distance does not exceed result and maxDistance
+PX_INLINE bool updateResultAndSeparation(bool& updated, bool& normalPointsFrom0to1, float& result, ConvexHullImpl::Separation* separation,
+										 float min0, float max0, float min1, float max1, const PxVec3& n, float maxDistance)
+{
+	float midpoint;
+	const float dist = extentDistanceAndNormalDirection(min0, max0, min1, max1, midpoint, normalPointsFrom0to1);
+	updated = dist > result;
+	if (updated)
+	{
+		if (dist > maxDistance)
+		{
+			return false;
+		}
+		result = dist;
+		if (separation != NULL)
+		{
+			if (normalPointsFrom0to1)
+			{
+				separation->plane = PxPlane(n, -midpoint);
+				separation->min0 = min0;
+				separation->max0 = max0;
+				separation->min1 = min1;
+				separation->max1 = max1;
+			}
+			else
+			{
+				separation->plane = PxPlane(-n, midpoint);
+				separation->min0 = -max0;
+				separation->max0 = -min0;
+				separation->min1 = -max1;
+				separation->max1 = -min1;
+			}
+		}
+	}
+	return true;
+}
+
+
+namespace GjkInternal
+{
+using namespace physx::aos;
+
+PX_NOALIAS PX_FORCE_INLINE BoolV PointOutsideOfPlane4(const Vec3VArg _a, const Vec3VArg _b, const Vec3VArg _c, const Vec3VArg _d)
+{
+	// this is not 0 because of the following scenario:
+	// All the points lie on the same plane and the plane goes through the origin (0,0,0).
+	// On the Wii U, the math below has the problem that when point A gets projected on the
+	// plane cumputed by A, B, C, the distance to the plane might not be 0 for the mentioned
+	// scenario but a small positive or negative value. This can lead to the wrong boolean
+	// results. Using a small negative value as threshold is more conservative but safer.
+	const Vec4V zero = V4Load(-1e-6);
+
+	const Vec3V ab = V3Sub(_b, _a);
+	const Vec3V ac = V3Sub(_c, _a);
+	const Vec3V ad = V3Sub(_d, _a);
+	const Vec3V bd = V3Sub(_d, _b);
+	const Vec3V bc = V3Sub(_c, _b);
+
+	const Vec3V v0 = V3Cross(ab, ac);
+	const Vec3V v1 = V3Cross(ac, ad);
+	const Vec3V v2 = V3Cross(ad, ab);
+	const Vec3V v3 = V3Cross(bd, bc);
+
+	const FloatV signa0 = V3Dot(v0, _a);
+	const FloatV signa1 = V3Dot(v1, _a);
+	const FloatV signa2 = V3Dot(v2, _a);
+	const FloatV signd3 = V3Dot(v3, _a);
+
+	const FloatV signd0 = V3Dot(v0, _d);
+	const FloatV signd1 = V3Dot(v1, _b);
+	const FloatV signd2 = V3Dot(v2, _c);
+	const FloatV signa3 = V3Dot(v3, _b);
+
+	const Vec4V signa = V4Merge(signa0, signa1, signa2, signa3);
+	const Vec4V signd = V4Merge(signd0, signd1, signd2, signd3);
+	return V4IsGrtrOrEq(V4Mul(signa, signd), zero);//same side, outside of the plane
+}
+
+PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg a, const Vec3VArg b)
+{
+	const FloatV zero = FZero();
+	const FloatV one = FOne();
+
+	//Test degenerated case
+	const Vec3V ab = V3Sub(b, a);
+	const FloatV denom = V3Dot(ab, ab);
+	const Vec3V ap = V3Neg(a);//V3Sub(origin, a);
+	const FloatV nom = V3Dot(ap, ab);
+	const BoolV con = FIsEq(denom, zero);
+	const FloatV tValue = FClamp(FDiv(nom, denom), zero, one);
+	const FloatV t = FSel(con, zero, tValue);
+
+	return V3Sel(con, a, V3ScaleAdd(ab, t, a));
+}
+
+PX_NOALIAS PX_FORCE_INLINE Vec3V closestPtPointSegment(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1,
+	const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+	const Vec3V a = Q0;
+	const Vec3V b = Q1;
+
+	const BoolV bTrue = BTTTT();
+	const FloatV zero = FZero();
+	const FloatV one = FOne();
+
+	//Test degenerated case
+	const Vec3V ab = V3Sub(b, a);
+	const FloatV denom = V3Dot(ab, ab);
+	const Vec3V ap = V3Neg(a);//V3Sub(origin, a);
+	const FloatV nom = V3Dot(ap, ab);
+	const BoolV con = FIsEq(denom, zero);
+
+	if (BAllEq(con, bTrue))
+	{
+		size = 1;
+		closestA = A0;
+		closestB = B0;
+		return Q0;
+	}
+
+	const Vec3V v = V3Sub(A1, A0);
+	const Vec3V w = V3Sub(B1, B0);
+	const FloatV tValue = FClamp(FDiv(nom, denom), zero, one);
+	const FloatV t = FSel(con, zero, tValue);
+
+	const Vec3V tempClosestA = V3ScaleAdd(v, t, A0);
+	const Vec3V tempClosestB = V3ScaleAdd(w, t, B0);
+	closestA = tempClosestA;
+	closestB = tempClosestB;
+	return V3Sub(tempClosestA, tempClosestB);
+}
+
+PX_NOALIAS Vec3V closestPtPointSegmentTesselation(const Vec3VArg Q0, const Vec3VArg Q1, const Vec3VArg A0, const Vec3VArg A1,
+	const Vec3VArg B0, const Vec3VArg B1, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+	const FloatV half = FHalf();
+
+	const FloatV targetSegmentLengthSq = FLoad(10000.f);//100 unit
+
+	Vec3V q0 = Q0;
+	Vec3V q1 = Q1;
+	Vec3V a0 = A0;
+	Vec3V a1 = A1;
+	Vec3V b0 = B0;
+	Vec3V b1 = B1;
+
+	do
+	{
+		const Vec3V midPoint = V3Scale(V3Add(q0, q1), half);
+		const Vec3V midA = V3Scale(V3Add(a0, a1), half);
+		const Vec3V midB = V3Scale(V3Add(b0, b1), half);
+
+		const Vec3V v = V3Sub(midPoint, q0);
+		const FloatV sqV = V3Dot(v, v);
+		if (FAllGrtr(targetSegmentLengthSq, sqV))
+			break;
+		//split the segment into half
+		const Vec3V tClos0 = closestPtPointSegment(q0, midPoint);
+		const FloatV sqDist0 = V3Dot(tClos0, tClos0);
+
+		const Vec3V tClos1 = closestPtPointSegment(q1, midPoint);
+		const FloatV sqDist1 = V3Dot(tClos1, tClos1);
+		//const BoolV con = FIsGrtr(sqDist0, sqDist1);
+		if (FAllGrtr(sqDist0, sqDist1))
+		{
+			//segment [m, q1]
+			q0 = midPoint;
+			a0 = midA;
+			b0 = midB;
+		}
+		else
+		{
+			//segment [q0, m]
+			q1 = midPoint;
+			a1 = midA;
+			b1 = midB;
+		}
+
+	} while (1);
+
+	return closestPtPointSegment(q0, q1, a0, a1, b0, b1, size, closestA, closestB);
+}
+
+PX_NOALIAS Vec3V closestPtPointTriangleTesselation(const Vec3V* PX_RESTRICT Q, const Vec3V* PX_RESTRICT A, const Vec3V* PX_RESTRICT B, const PxU32* PX_RESTRICT indices, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+	size = 3;
+	const FloatV zero = FZero();
+	const FloatV eps = FEps();
+	const FloatV half = FHalf();
+	const BoolV bTrue = BTTTT();
+	const FloatV four = FLoad(4.f);
+	const FloatV sixty = FLoad(100.f);
+
+	const PxU32 ind0 = indices[0];
+	const PxU32 ind1 = indices[1];
+	const PxU32 ind2 = indices[2];
+
+	const Vec3V a = Q[ind0];
+	const Vec3V b = Q[ind1];
+	const Vec3V c = Q[ind2];
+
+	Vec3V ab_ = V3Sub(b, a);
+	Vec3V ac_ = V3Sub(c, a);
+	Vec3V bc_ = V3Sub(b, c);
+
+	const FloatV dac_ = V3Dot(ac_, ac_);
+	const FloatV dbc_ = V3Dot(bc_, bc_);
+	if (FAllGrtrOrEq(eps, FMin(dac_, dbc_)))
+	{
+		//degenerate
+		size = 2;
+		return closestPtPointSegment(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB);
+	}
+
+	Vec3V ap = V3Neg(a);
+	Vec3V bp = V3Neg(b);
+	Vec3V cp = V3Neg(c);
+
+	FloatV d1 = V3Dot(ab_, ap); //  snom
+	FloatV d2 = V3Dot(ac_, ap); //  tnom
+	FloatV d3 = V3Dot(ab_, bp); // -sdenom
+	FloatV d4 = V3Dot(ac_, bp); //  unom = d4 - d3
+	FloatV d5 = V3Dot(ab_, cp); //  udenom = d5 - d6
+	FloatV d6 = V3Dot(ac_, cp); // -tdenom
+	/*	FloatV unom = FSub(d4, d3);
+	FloatV udenom = FSub(d5, d6);*/
+
+	FloatV va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC
+	FloatV vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC
+	FloatV vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB
+
+	//check if p in vertex region outside a
+	const BoolV con00 = FIsGrtrOrEq(zero, d1); // snom <= 0
+	const BoolV con01 = FIsGrtrOrEq(zero, d2); // tnom <= 0
+	const BoolV con0 = BAnd(con00, con01); // vertex region a
+	if (BAllEq(con0, bTrue))
+	{
+		//size = 1;
+		closestA = A[ind0];
+		closestB = B[ind0];
+		return Q[ind0];
+	}
+
+	//check if p in vertex region outside b
+	const BoolV con10 = FIsGrtrOrEq(d3, zero);
+	const BoolV con11 = FIsGrtrOrEq(d3, d4);
+	const BoolV con1 = BAnd(con10, con11); // vertex region b
+	if (BAllEq(con1, bTrue))
+	{
+		/*size = 1;
+		indices[0] = ind1;*/
+		closestA = A[ind1];
+		closestB = B[ind1];
+		return Q[ind1];
+	}
+
+
+	//check if p in vertex region outside of c
+	const BoolV con20 = FIsGrtrOrEq(d6, zero);
+	const BoolV con21 = FIsGrtrOrEq(d6, d5);
+	const BoolV con2 = BAnd(con20, con21); // vertex region c
+	if (BAllEq(con2, bTrue))
+	{
+		closestA = A[ind2];
+		closestB = B[ind2];
+		return Q[ind2];
+	}
+
+	//check if p in edge region of AB
+	const BoolV con30 = FIsGrtrOrEq(zero, vc);
+	const BoolV con31 = FIsGrtrOrEq(d1, zero);
+	const BoolV con32 = FIsGrtrOrEq(zero, d3);
+	const BoolV con3 = BAnd(con30, BAnd(con31, con32));
+
+	if (BAllEq(con3, bTrue))
+	{
+		//size = 2;
+		//p in edge region of AB, split AB
+		return closestPtPointSegmentTesselation(Q[ind0], Q[ind1], A[ind0], A[ind1], B[ind0], B[ind1], size, closestA, closestB);
+	}
+
+	//check if p in edge region of BC
+	const BoolV con40 = FIsGrtrOrEq(zero, va);
+	const BoolV con41 = FIsGrtrOrEq(d4, d3);
+	const BoolV con42 = FIsGrtrOrEq(d5, d6);
+	const BoolV con4 = BAnd(con40, BAnd(con41, con42));
+
+	if (BAllEq(con4, bTrue))
+	{
+		//p in edge region of BC, split BC
+		return closestPtPointSegmentTesselation(Q[ind1], Q[ind2], A[ind1], A[ind2], B[ind1], B[ind2], size, closestA, closestB);
+	}
+
+	//check if p in edge region of AC
+	const BoolV con50 = FIsGrtrOrEq(zero, vb);
+	const BoolV con51 = FIsGrtrOrEq(d2, zero);
+	const BoolV con52 = FIsGrtrOrEq(zero, d6);
+	const BoolV con5 = BAnd(con50, BAnd(con51, con52));
+
+	if (BAllEq(con5, bTrue))
+	{
+		//p in edge region of AC, split AC
+		return closestPtPointSegmentTesselation(Q[ind0], Q[ind2], A[ind0], A[ind2], B[ind0], B[ind2], size, closestA, closestB);
+	}
+
+	size = 3;
+
+	Vec3V q0 = Q[ind0];
+	Vec3V q1 = Q[ind1];
+	Vec3V q2 = Q[ind2];
+	Vec3V a0 = A[ind0];
+	Vec3V a1 = A[ind1];
+	Vec3V a2 = A[ind2];
+	Vec3V b0 = B[ind0];
+	Vec3V b1 = B[ind1];
+	Vec3V b2 = B[ind2];
+
+	do
+	{
+
+		const Vec3V ab = V3Sub(q1, q0);
+		const Vec3V ac = V3Sub(q2, q0);
+		const Vec3V bc = V3Sub(q2, q1);
+
+		const FloatV dab = V3Dot(ab, ab);
+		const FloatV dac = V3Dot(ac, ac);
+		const FloatV dbc = V3Dot(bc, bc);
+
+		const FloatV fMax = FMax(dab, FMax(dac, dbc));
+		const FloatV fMin = FMin(dab, FMin(dac, dbc));
+
+		const Vec3V w = V3Cross(ab, ac);
+
+		const FloatV area = V3Length(w);
+		const FloatV ratio = FDiv(FSqrt(fMax), FSqrt(fMin));
+		if (FAllGrtr(four, ratio) && FAllGrtr(sixty, area))
+			break;
+
+		//calculate the triangle normal
+		const Vec3V triNormal = V3Normalize(w);
+
+		PX_ASSERT(V3AllEq(triNormal, V3Zero()) == 0);
+
+
+		//split the longest edge
+		if (FAllGrtrOrEq(dab, dac) && FAllGrtrOrEq(dab, dbc))
+		{
+			//split edge q0q1
+			const Vec3V midPoint = V3Scale(V3Add(q0, q1), half);
+			const Vec3V midA = V3Scale(V3Add(a0, a1), half);
+			const Vec3V midB = V3Scale(V3Add(b0, b1), half);
+
+			const Vec3V v = V3Sub(midPoint, q2);
+			const Vec3V n = V3Normalize(V3Cross(v, triNormal));
+
+			const FloatV d = FNeg(V3Dot(n, midPoint));
+			const FloatV dp = FAdd(V3Dot(n, q0), d);
+			const FloatV sum = FMul(d, dp);
+
+			if (FAllGrtr(sum, zero))
+			{
+				//q0 and origin at the same side, split triangle[q0, m, q2]
+				q1 = midPoint;
+				a1 = midA;
+				b1 = midB;
+			}
+			else
+			{
+				//q1 and origin at the same side, split triangle[m, q1, q2]
+				q0 = midPoint;
+				a0 = midA;
+				b0 = midB;
+			}
+
+		}
+		else if (FAllGrtrOrEq(dac, dbc))
+		{
+			//split edge q0q2
+			const Vec3V midPoint = V3Scale(V3Add(q0, q2), half);
+			const Vec3V midA = V3Scale(V3Add(a0, a2), half);
+			const Vec3V midB = V3Scale(V3Add(b0, b2), half);
+
+			const Vec3V v = V3Sub(midPoint, q1);
+			const Vec3V n = V3Normalize(V3Cross(v, triNormal));
+
+			const FloatV d = FNeg(V3Dot(n, midPoint));
+			const FloatV dp = FAdd(V3Dot(n, q0), d);
+			const FloatV sum = FMul(d, dp);
+
+			if (FAllGrtr(sum, zero))
+			{
+				//q0 and origin at the same side, split triangle[q0, q1, m]
+				q2 = midPoint;
+				a2 = midA;
+				b2 = midB;
+			}
+			else
+			{
+				//q2 and origin at the same side, split triangle[m, q1, q2]
+				q0 = midPoint;
+				a0 = midA;
+				b0 = midB;
+			}
+		}
+		else
+		{
+			//split edge q1q2
+			const Vec3V midPoint = V3Scale(V3Add(q1, q2), half);
+			const Vec3V midA = V3Scale(V3Add(a1, a2), half);
+			const Vec3V midB = V3Scale(V3Add(b1, b2), half);
+
+			const Vec3V v = V3Sub(midPoint, q0);
+			const Vec3V n = V3Normalize(V3Cross(v, triNormal));
+
+			const FloatV d = FNeg(V3Dot(n, midPoint));
+			const FloatV dp = FAdd(V3Dot(n, q1), d);
+			const FloatV sum = FMul(d, dp);
+
+			if (FAllGrtr(sum, zero))
+			{
+				//q1 and origin at the same side, split triangle[q0, q1, m]
+				q2 = midPoint;
+				a2 = midA;
+				b2 = midB;
+			}
+			else
+			{
+				//q2 and origin at the same side, split triangle[q0, m, q2]
+				q1 = midPoint;
+				a1 = midA;
+				b1 = midB;
+			}
+
+
+		}
+	} while (1);
+
+	//P must project inside face region. Compute Q using Barycentric coordinates
+	ab_ = V3Sub(q1, q0);
+	ac_ = V3Sub(q2, q0);
+	ap = V3Neg(q0);
+	bp = V3Neg(q1);
+	cp = V3Neg(q2);
+
+	d1 = V3Dot(ab_, ap); //  snom
+	d2 = V3Dot(ac_, ap); //  tnom
+	d3 = V3Dot(ab_, bp); // -sdenom
+	d4 = V3Dot(ac_, bp); //  unom = d4 - d3
+	d5 = V3Dot(ab_, cp); //  udenom = d5 - d6
+	d6 = V3Dot(ac_, cp); // -tdenom
+
+	va = FNegScaleSub(d5, d4, FMul(d3, d6));//edge region of BC
+	vb = FNegScaleSub(d1, d6, FMul(d5, d2));//edge region of AC
+	vc = FNegScaleSub(d3, d2, FMul(d1, d4));//edge region of AB
+
+	const FloatV toRecipD = FAdd(va, FAdd(vb, vc));
+	const FloatV denom = FRecip(toRecipD);//V4GetW(recipTmp);
+	const Vec3V v0 = V3Sub(a1, a0);
+	const Vec3V v1 = V3Sub(a2, a0);
+	const Vec3V w0 = V3Sub(b1, b0);
+	const Vec3V w1 = V3Sub(b2, b0);
+
+	const FloatV t = FMul(vb, denom);
+	const FloatV w = FMul(vc, denom);
+	const Vec3V vA1 = V3Scale(v1, w);
+	const Vec3V vB1 = V3Scale(w1, w);
+	const Vec3V tempClosestA = V3Add(a0, V3ScaleAdd(v0, t, vA1));
+	const Vec3V tempClosestB = V3Add(b0, V3ScaleAdd(w0, t, vB1));
+	closestA = tempClosestA;
+	closestB = tempClosestB;
+	return V3Sub(tempClosestA, tempClosestB);
+}
+
+PX_NOALIAS Vec3V closestPtPointTetrahedronTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+	const FloatV eps = FEps();
+	const Vec3V zeroV = V3Zero();
+	PxU32 tempSize = size;
+
+	FloatV bestSqDist = FLoad(PX_MAX_REAL);
+	const Vec3V a = Q[0];
+	const Vec3V b = Q[1];
+	const Vec3V c = Q[2];
+	const Vec3V d = Q[3];
+	const BoolV bTrue = BTTTT();
+	const BoolV bFalse = BFFFF();
+
+	//degenerated
+	const Vec3V ad = V3Sub(d, a);
+	const Vec3V bd = V3Sub(d, b);
+	const Vec3V cd = V3Sub(d, c);
+	const FloatV dad = V3Dot(ad, ad);
+	const FloatV dbd = V3Dot(bd, bd);
+	const FloatV dcd = V3Dot(cd, cd);
+	const FloatV fMin = FMin(dad, FMin(dbd, dcd));
+	if (FAllGrtr(eps, fMin))
+	{
+		size = 3;
+		PxU32 tempIndices[] = { 0, 1, 2 };
+		return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB);
+	}
+
+	Vec3V _Q[] = { Q[0], Q[1], Q[2], Q[3] };
+	Vec3V _A[] = { A[0], A[1], A[2], A[3] };
+	Vec3V _B[] = { B[0], B[1], B[2], B[3] };
+
+	PxU32 indices[3] = { 0, 1, 2 };
+
+	const BoolV bIsOutside4 = PointOutsideOfPlane4(a, b, c, d);
+
+	if (BAllEq(bIsOutside4, bFalse))
+	{
+		//origin is inside the tetrahedron, we are done
+		return zeroV;
+	}
+
+	Vec3V result = zeroV;
+	Vec3V tempClosestA, tempClosestB;
+
+	if (BAllEq(BGetX(bIsOutside4), bTrue))
+	{
+
+		PxU32 tempIndices[] = { 0, 1, 2 };
+		PxU32 _size = 3;
+
+		result = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
+
+		const FloatV sqDist = V3Dot(result, result);
+		bestSqDist = sqDist;
+
+		indices[0] = tempIndices[0];
+		indices[1] = tempIndices[1];
+		indices[2] = tempIndices[2];
+
+		tempSize = _size;
+		closestA = tempClosestA;
+		closestB = tempClosestB;
+	}
+
+	if (BAllEq(BGetY(bIsOutside4), bTrue))
+	{
+
+		PxU32 tempIndices[] = { 0, 2, 3 };
+
+		PxU32 _size = 3;
+
+		const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
+
+		const FloatV sqDist = V3Dot(q, q);
+		const BoolV con = FIsGrtr(bestSqDist, sqDist);
+		if (BAllEq(con, bTrue))
+		{
+			result = q;
+			bestSqDist = sqDist;
+			indices[0] = tempIndices[0];
+			indices[1] = tempIndices[1];
+			indices[2] = tempIndices[2];
+
+			tempSize = _size;
+			closestA = tempClosestA;
+			closestB = tempClosestB;
+		}
+	}
+
+	if (BAllEq(BGetZ(bIsOutside4), bTrue))
+	{
+
+		PxU32 tempIndices[] = { 0, 3, 1 };
+		PxU32 _size = 3;
+
+		const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
+
+		const FloatV sqDist = V3Dot(q, q);
+		const BoolV con = FIsGrtr(bestSqDist, sqDist);
+		if (BAllEq(con, bTrue))
+		{
+			result = q;
+			bestSqDist = sqDist;
+			indices[0] = tempIndices[0];
+			indices[1] = tempIndices[1];
+			indices[2] = tempIndices[2];
+			tempSize = _size;
+			closestA = tempClosestA;
+			closestB = tempClosestB;
+		}
+
+	}
+
+	if (BAllEq(BGetW(bIsOutside4), bTrue))
+	{
+
+		PxU32 tempIndices[] = { 1, 3, 2 };
+		PxU32 _size = 3;
+
+		const Vec3V q = closestPtPointTriangleTesselation(_Q, _A, _B, tempIndices, _size, tempClosestA, tempClosestB);
+
+		const FloatV sqDist = V3Dot(q, q);
+		const BoolV con = FIsGrtr(bestSqDist, sqDist);
+
+		if (BAllEq(con, bTrue))
+		{
+			result = q;
+			bestSqDist = sqDist;
+
+			indices[0] = tempIndices[0];
+			indices[1] = tempIndices[1];
+			indices[2] = tempIndices[2];
+
+			tempSize = _size;
+			closestA = tempClosestA;
+			closestB = tempClosestB;
+		}
+	}
+
+	A[0] = _A[indices[0]]; A[1] = _A[indices[1]]; A[2] = _A[indices[2]];
+	B[0] = _B[indices[0]]; B[1] = _B[indices[1]]; B[2] = _B[indices[2]];
+	Q[0] = _Q[indices[0]]; Q[1] = _Q[indices[1]]; Q[2] = _Q[indices[2]];
+
+
+	size = tempSize;
+	return result;
+}
+
+PX_NOALIAS PX_FORCE_INLINE Vec3V doTesselation(Vec3V* PX_RESTRICT Q, Vec3V* PX_RESTRICT A, Vec3V* PX_RESTRICT B,
+	const Vec3VArg support, const Vec3VArg supportA, const Vec3VArg supportB, PxU32& size, Vec3V& closestA, Vec3V& closestB)
+{
+	switch (size)
+	{
+	case 1:
+	{
+		closestA = supportA;
+		closestB = supportB;
+		return support;
+	}
+	case 2:
+	{
+		return closestPtPointSegmentTesselation(Q[0], support, A[0], supportA, B[0], supportB, size, closestA, closestB);
+	}
+	case 3:
+	{
+
+		PxU32 tempIndices[3] = { 0, 1, 2 };
+		return closestPtPointTriangleTesselation(Q, A, B, tempIndices, size, closestA, closestB);
+	}
+	case 4:
+	{
+		return closestPtPointTetrahedronTesselation(Q, A, B, size, closestA, closestB);
+	}
+	default:
+		PX_ASSERT(0);
+	}
+	return support;
+}
+
+struct ConvexV
+{
+	void calcExtent(const Vec3V& dir, PxF32& minOut, PxF32& maxOut) const
+	{
+		// Expand 
+		const Vec4V x = Vec4V_From_FloatV(V3GetX(dir));
+		const Vec4V y = Vec4V_From_FloatV(V3GetY(dir));
+		const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir));
+
+		const Vec4V* src = mAovVertices;
+		const Vec4V* end = src + mNumAovVertices * 3;
+
+		// Do first step
+		Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
+		Vec4V min = max;
+		src += 3;
+		// Do the rest
+		for (; src < end; src += 3)
+		{
+			const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
+			max = V4Max(dot, max);
+			min = V4Min(dot, min);
+		}
+		FStore(V4ExtractMax(max), &maxOut);
+		FStore(V4ExtractMin(min), &minOut);
+	}
+	Vec3V calcSupport(const Vec3V& dir) const
+	{
+		// Expand 
+		const Vec4V x = Vec4V_From_FloatV(V3GetX(dir));
+		const Vec4V y = Vec4V_From_FloatV(V3GetY(dir));
+		const Vec4V z = Vec4V_From_FloatV(V3GetZ(dir));
+
+		PX_ALIGN(16, static const PxF32 index4const[]) = { 0.0f, 1.0f, 2.0f, 3.0f };
+		Vec4V index4 = *(const Vec4V*)index4const;
+		PX_ALIGN(16, static const PxF32 delta4const[]) = { 4.0f, 4.0f, 4.0f, 4.0f };
+		const Vec4V delta4 = *(const Vec4V*)delta4const;
+
+		const Vec4V* src = mAovVertices;
+		const Vec4V* end = src + mNumAovVertices * 3;
+
+		// Do first step
+		Vec4V max = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
+		Vec4V maxIndex = index4;
+		index4 = V4Add(index4, delta4);
+		src += 3;
+		// Do the rest
+		for (; src < end; src += 3)
+		{
+			const Vec4V dot = V4MulAdd(x, src[0], V4MulAdd(y, src[1], V4Mul(z, src[2])));
+			const BoolV cmp = V4IsGrtr(dot, max);
+			max = V4Max(dot, max);
+			maxIndex = V4Sel(cmp, index4, maxIndex);
+			index4 = V4Add(index4, delta4);
+		}
+		Vec4V horiMax = Vec4V_From_FloatV(V4ExtractMax(max));
+		PxU32 mask = BGetBitMask(V4IsEq(horiMax, max));
+		const PxU32 simdIndex = (0x12131210 >> (mask + mask)) & PxU32(3);
+
+		/// NOTE! Could be load hit store
+		/// Would be better to have all simd. 
+		PX_ALIGN(16, PxF32 f[4]);
+		V4StoreA(maxIndex, f);
+		PxU32 index = PxU32(PxI32(f[simdIndex]));
+
+		const Vec4V* aovIndex = (mAovVertices + (index >> 2) * 3);
+		const PxF32* aovOffset = ((const PxF32*)aovIndex) + (index & 3);
+
+		return Vec3V_From_Vec4V(V4LoadXYZW(aovOffset[0], aovOffset[4], aovOffset[8], 1.0f));
+	}
+
+	const Vec4V* mAovVertices;			///< Vertices storex x,x,x,x, y,y,y,y, z,z,z,z
+	PxU32 mNumAovVertices;			///< Number of groups of 4 of vertices
+};
+
+struct Output
+{
+		/// Get the normal to push apart in direction from A to B
+	PX_FORCE_INLINE Vec3V getNormal() const { const Vec3V dir = V3Sub(mClosestB, mClosestA);  if (!V3AllGrtr(V3Eps(), V3Abs(dir))) return V3Normalize(dir); return V3Zero(); }
+	Vec3V mClosestA;				///< Closest point on A
+	Vec3V mClosestB;				///< Closest point on B
+	FloatV mDistSq;
+};
+
+enum Status
+{
+	STATUS_NON_INTERSECT,
+	STATUS_CONTACT,
+	STATUS_DEGENERATE,
+};
+
+Status Collide(const Vec3V& initialDir, const ConvexV& convexA, const Mat34V& bToA, const ConvexV& convexB, Output& out)
+{
+	Vec3V Q[4];
+	Vec3V A[4];
+	Vec3V B[4];
+
+	Mat33V aToB = M34Trnsps33(bToA);
+
+	PxU32 size = 0;
+
+	const Vec3V zeroV = V3Zero();
+	const BoolV bTrue = BTTTT();
+	
+	//Vec3V v = V3UnitX();
+	Vec3V v = V3Sel(FIsGrtr(V3Dot(initialDir, initialDir), FZero()), initialDir, V3UnitX());
+
+	//const FloatV minMargin = zero;
+	//const FloatV eps2 = FMul(minMargin, FLoad(0.01f));
+	//FloatV eps2 = zero;
+	FloatV eps2 = FLoad(1e-6f);
+	const FloatV epsRel = FLoad(0.000225f);
+
+	Vec3V closA(zeroV), closB(zeroV);
+	FloatV sDist = FMax();
+	FloatV minDist = sDist;
+	Vec3V closAA = zeroV;
+	Vec3V closBB = zeroV;
+
+	BoolV bNotTerminated = bTrue;
+	BoolV bCon = bTrue;
+
+	do
+	{
+		minDist = sDist;
+		closAA = closA;
+		closBB = closB;
+
+		PxU32 index = size++;
+		PX_ASSERT(index < 4);
+
+		const Vec3V supportA = convexA.calcSupport(V3Neg(v));
+		const Vec3V supportB = M34MulV3(bToA, convexB.calcSupport(M33MulV3(aToB, v)));
+		const Vec3V support = Vec3V_From_Vec4V(Vec4V_From_Vec3V(V3Sub(supportA, supportB)));
+		
+		A[index] = supportA;
+		B[index] = supportB;
+		Q[index] = support;
+
+		const FloatV signDist = V3Dot(v, support);
+		const FloatV tmp0 = FSub(sDist, signDist);
+		if (FAllGrtr(FMul(epsRel, sDist), tmp0))
+		{
+			out.mClosestA = closA;
+			out.mClosestB = closB;
+			out.mDistSq = sDist;
+			return STATUS_NON_INTERSECT;
+		}
+
+		//calculate the closest point between two convex hull
+		v = doTesselation(Q, A, B, support, supportA, supportB, size, closA, closB);
+		sDist = V3Dot(v, v);
+		bCon = FIsGrtr(minDist, sDist);
+
+		bNotTerminated = BAnd(FIsGrtr(sDist, eps2), bCon);
+	} while (BAllEq(bNotTerminated, bTrue));
+
+	out.mClosestA = V3Sel(bCon, closA, closAA);
+	out.mClosestB = V3Sel(bCon, closB, closBB);
+	out.mDistSq = FSel(bCon, sDist, minDist);
+	return Status(BAllEq(bCon, bTrue) == 1 ? STATUS_CONTACT : STATUS_DEGENERATE);
+}
+
+Status CollidePoint(const ConvexV& convexA, const Vec3V& pointB, Output& out)
+{
+	Vec3V Q[4];
+	Vec3V A[4];
+	Vec3V B[4];
+
+	PxU32 size = 0;
+
+	const Vec3V zeroV = V3Zero();
+	//const FloatV zero = FZero();
+	const BoolV bTrue = BTTTT();
+
+	Vec3V v = V3UnitX();
+
+	//const FloatV minMargin = zero;
+	//const FloatV eps2 = FMul(minMargin, FLoad(0.01f));
+	//FloatV eps2 = zero;
+	FloatV eps2 = FLoad(1e-6f);
+	const FloatV epsRel = FLoad(0.000225f);
+
+	Vec3V closA(zeroV), closB(zeroV);
+	FloatV sDist = FMax();
+	FloatV minDist = sDist;
+	Vec3V closAA = zeroV;
+	Vec3V closBB = zeroV;
+
+	BoolV bNotTerminated = bTrue;
+	BoolV bCon = bTrue;
+
+	do
+	{
+		minDist = sDist;
+		closAA = closA;
+		closBB = closB;
+
+		PxU32 index = size++;
+		PX_ASSERT(index < 4);
+
+		const Vec3V supportA = convexA.calcSupport(V3Neg(v));
+		const Vec3V supportB = pointB; 
+		const Vec3V support = Vec3V_From_Vec4V(Vec4V_From_Vec3V(V3Sub(supportA, supportB)));
+
+		A[index] = supportA;
+		B[index] = supportB;
+		Q[index] = support;
+
+		const FloatV signDist = V3Dot(v, support);
+		const FloatV tmp0 = FSub(sDist, signDist);
+		if (FAllGrtr(FMul(epsRel, sDist), tmp0))
+		{
+			out.mClosestA = closA;
+			out.mClosestB = closB;
+			out.mDistSq = sDist;
+			return STATUS_NON_INTERSECT;
+		}
+
+		//calculate the closest point between two convex hull
+		v = doTesselation(Q, A, B, support, supportA, supportB, size, closA, closB);
+		sDist = V3Dot(v, v);
+		bCon = FIsGrtr(minDist, sDist);
+
+		bNotTerminated = BAnd(FIsGrtr(sDist, eps2), bCon);
+	} while (BAllEq(bNotTerminated, bTrue));
+
+	out.mClosestA = V3Sel(bCon, closA, closAA);
+	out.mClosestB = V3Sel(bCon, closB, closBB);
+	out.mDistSq = FSel(bCon, sDist, minDist);
+	return Status(BAllEq(bCon, bTrue) == 1 ? STATUS_CONTACT : STATUS_DEGENERATE);
+}
+
+
+} // namespace GjkInternal
+
+static void _arrayVec3ToVec4(const PxVec3* src, physx::aos::Vec4V* dst, PxU32 num)
+{
+	using namespace physx::aos;
+	const PxU32 num4 = num >> 2;
+	for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4)
+	{
+		Vec3V v0 = V3LoadU(&src[0].x);
+		Vec3V v1 = V3LoadU(&src[1].x);
+		Vec3V v2 = V3LoadU(&src[2].x);
+		Vec3V v3 = V3LoadU(&src[3].x);
+		// Transpose
+		V4Transpose(v0, v1, v2, v3);
+		// Save 
+		dst[0] = v0;
+		dst[1] = v1;
+		dst[2] = v2;
+	}
+	const PxU32 remain = num & 3;
+	if (remain)
+	{
+		Vec3V work[4];
+		PxU32 i = 0;
+		for (; i < remain; i++) work[i] = V3LoadU(&src[i].x);
+		for (; i < 4; i++) work[i] = work[remain - 1];
+		V4Transpose(work[0], work[1], work[2], work[3]);
+		dst[0] = work[0];
+		dst[1] = work[1];
+		dst[2] = work[2];
+	}
+}
+
+static void _arrayVec3ToVec4(const PxVec3* src, const physx::aos::Vec3V& scale, physx::aos::Vec4V* dst, PxU32 num)
+{
+	using namespace physx::aos;
+	
+	// If no scale - use the faster version
+	if (V3AllEq(scale, V3One()))
+	{
+		return _arrayVec3ToVec4(src, dst, num);
+	}
+
+	const PxU32 num4 = num >> 2;
+	for (PxU32 i = 0; i < num4; i++, dst += 3, src += 4)
+	{
+		Vec3V v0 = V3Mul(scale, V3LoadU(&src[0].x));
+		Vec3V v1 = V3Mul(scale, V3LoadU(&src[1].x));
+		Vec3V v2 = V3Mul(scale, V3LoadU(&src[2].x));
+		Vec3V v3 = V3Mul(scale, V3LoadU(&src[3].x));
+		// Transpose
+		V4Transpose(v0, v1, v2, v3);
+		// Save 
+		dst[0] = v0;
+		dst[1] = v1;
+		dst[2] = v2;
+	}
+	const PxU32 remain = num & 3;
+	if (remain)
+	{
+		Vec3V work[4];
+		PxU32 i = 0;
+		for (; i < remain; i++) work[i] = V3Mul(scale, V3LoadU(&src[i].x));
+		for (; i < 4; i++) work[i] = work[remain - 1];
+		V4Transpose(work[0], work[1], work[2], work[3]);
+		dst[0] = work[0];
+		dst[1] = work[1];
+		dst[2] = work[2];
+	}
+}
+
+static void _calcSeparation(const GjkInternal::ConvexV& convexA, const physx::PxTransform& aToWorldIn, const physx::aos::Mat34V& bToA, const GjkInternal::ConvexV& convexB, const GjkInternal::Output& out, ConvexHullImpl::Separation& sep)
+{
+	using namespace physx::aos;
+
+	Mat33V aToB = M34Trnsps33(bToA);
+	Vec3V normalA = out.getNormal();
+
+	convexA.calcExtent(normalA, sep.min0, sep.max0);
+	Vec3V normalB = M33MulV3(aToB, normalA);
+	convexB.calcExtent(normalB, sep.min1, sep.max1);
+
+	{
+		// Offset the min max taking into account transform
+		// Distance of origin from B's space in As space in direction of the normal in As space should fix it...
+		PxF32 fix;
+		FStore(V3Dot(bToA.col3, normalA), &fix);
+		sep.min1 += fix;
+		sep.max1 += fix;
+	}
+
+	// Looks like it's the plane at the midpoint
+	Vec3V center = V3Scale(V3Add(out.mClosestA, out.mClosestB), FLoad(0.5f));
+	// Transform to world space
+	Mat34V aToWorld;
+	*(PxMat44*)&aToWorld = aToWorldIn;
+	// Put the normal in world space
+	Vec3V worldCenter = M34MulV3(aToWorld, center);
+	Vec3V worldNormal = M34Mul33V3(aToWorld, normalA);
+
+	FloatV dist = V3Dot(worldNormal, worldCenter);
+	V3StoreU(worldNormal, sep.plane.n);
+	FStore(dist, &sep.plane.d);
+	sep.plane.d = -sep.plane.d;
+}
+
+bool ConvexHullImpl::hullsInProximity(const ConvexHullImpl& hull0, const physx::PxTransform& localToWorldRT0In, const physx::PxVec3& scale0In,
+	const ConvexHullImpl& hull1, const physx::PxTransform& localToWorldRT1In, const physx::PxVec3& scale1In,
+	physx::PxF32 maxDistance, Separation* separation)
+{
+	using namespace physx::aos;
+
+	const PxU32 numVerts0 = hull0.getVertexCount();
+	const PxU32 numVerts1 = hull1.getVertexCount();
+	const PxU32 numAov0 = (numVerts0 + 3) >> 2;
+	const PxU32 numAov1 = (numVerts1 + 3) >> 2;
+
+#if PX_ANDROID == 1
+	Vec4V* verts0 = (Vec4V*)PxAllocaAligned((numAov0 + numAov1) * sizeof(Vec4V) * 3, 16);
+#else
+	Vec4V* verts0 = (Vec4V*)PxAlloca((numAov0 + numAov1) * sizeof(Vec4V) * 3);
+#endif
+	// Make sure it's aligned
+	PX_ASSERT((size_t(verts0) & 0xf) == 0);
+
+	Vec4V* verts1 = verts0 + (numAov0 * 3);
+
+	const Vec3V scale0 = V3LoadU(&scale0In.x);
+	const Vec3V scale1 = V3LoadU(&scale1In.x);
+
+	_arrayVec3ToVec4(hull0.mParams->vertices.buf, scale0, verts0, numVerts0);
+	_arrayVec3ToVec4(hull1.mParams->vertices.buf, scale1, verts1, numVerts1);
+
+	const PxTransform trans1To0 = localToWorldRT0In.transformInv(localToWorldRT1In);
+
+	// Load into simd mat
+	Mat34V bToA;
+	*(PxMat44*)&bToA = trans1To0;
+	(*(PxMat44*)&bToA).column3.w = 0.0f; // AOS wants the 4th component of Vec3V to be 0 to work properly
+
+	GjkInternal::ConvexV convexA;
+	GjkInternal::ConvexV convexB;
+
+	convexA.mNumAovVertices = numAov0;
+	convexA.mAovVertices = verts0;
+
+	convexB.mNumAovVertices = numAov1;
+	convexB.mAovVertices = verts1;
+
+	// Take the origin of B in As space as the inital direction as it is 'the difference in transform origins B-A in A's space'
+	// Should be a good first guess
+	const Vec3V initialDir = bToA.col3;
+	GjkInternal::Output output;
+	GjkInternal::Status status = GjkInternal::Collide(initialDir, convexA, bToA, convexB, output);
+
+	if (status == GjkInternal::STATUS_DEGENERATE)
+	{
+		// Calculate the tolerance from the extents
+		const PxVec3 extents0 = hull0.getBounds().getExtents();
+		const PxVec3 extents1 = hull1.getBounds().getExtents();
+
+		const FloatV tolerance0 = V3ExtractMin(V3Mul(V3LoadU(&extents0.x), scale0));
+		const FloatV tolerance1 = V3ExtractMin(V3Mul(V3LoadU(&extents1.x), scale1));
+
+		const FloatV tolerance = FMul(FAdd(tolerance0, tolerance1), FLoad(0.01f));
+		const FloatV sqTolerance = FMul(tolerance, tolerance);
+
+		status = FAllGrtr(sqTolerance, output.mDistSq) ? GjkInternal::STATUS_CONTACT : GjkInternal::STATUS_NON_INTERSECT;
+	}
+
+	switch (status)
+	{
+		case GjkInternal::STATUS_CONTACT:
+		{ 
+			if (separation)
+			{
+				_calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation);
+			}
+			return true;
+		}
+		default:
+		case GjkInternal::STATUS_NON_INTERSECT:
+		{
+			if (separation)
+			{
+				_calcSeparation(convexA, localToWorldRT0In, bToA, convexB, output, *separation);
+			}
+			PxF32 val;
+			FStore(output.mDistSq, &val);
+			return val < (maxDistance * maxDistance);
+		}
+	}
+}
+
+static void _calcSeparation(const GjkInternal::ConvexV& convex, const physx::PxTransform& hullLocalToWorldRT, const PxVec3& sphereCenterWorld, PxF32 sphereRadius, const GjkInternal::Output& gjkOut, ConvexHullImpl::Separation& sep)
+{
+	using namespace physx::aos;
+
+	const Vec3V normal = gjkOut.getNormal();
+
+	// Calc the plane normal (in world space)
+	V3StoreU(normal, sep.plane.n);
+	sep.plane.n = hullLocalToWorldRT.rotate(sep.plane.n);
+
+	const PxF32 originOffset = sep.plane.n.dot(hullLocalToWorldRT.p);
+	convex.calcExtent(normal, sep.min0, sep.max0);
+	// The min and mix are in local space -> fix by taking into account world space origin
+	sep.min0 += originOffset;
+	sep.max0 += originOffset;
+
+	// Calculate the sphere radius
+	const PxF32 centerDist = sep.plane.n.dot(sphereCenterWorld);
+	// Work out the spheres 
+	sep.min1 = centerDist - sphereRadius;
+	sep.max1 = centerDist + sphereRadius;
+	// Set the plane distance
+	sep.plane.d = (sep.max0 + sep.min1) * -0.5f;
+}
+
+bool ConvexHullImpl::sphereInProximity(const physx::PxTransform& hullLocalToWorldRT, const physx::PxVec3& hullScaleIn,
+								   const physx::PxVec3& sphereWorldCenterIn, physx::PxF32 sphereRadius,
+								   physx::PxF32 maxDistance, Separation* separation)
+{
+	using namespace physx::aos;
+
+	const PxU32 numVerts = getVertexCount();
+	const PxU32 numAov = (numVerts + 3) >> 2;
+	
+#if PX_ANDROID == 1
+	Vec4V* verts = (Vec4V*)PxAllocaAligned(numAov * sizeof(Vec4V) * 3, 16);
+#else
+	Vec4V* verts = (Vec4V*)PxAlloca(numAov * sizeof(Vec4V) * 3);
+#endif
+	// Make sure it's aligned
+	PX_ASSERT((size_t(verts) & 0xf) == 0);
+
+	const Vec3V hullScale = V3LoadU(&hullScaleIn.x);
+
+	_arrayVec3ToVec4(mParams->vertices.buf, hullScale, verts, numVerts);
+
+	// Put the spheres center in the hulls space
+	Vec3V sphereCenterLocal = V3LoadU(hullLocalToWorldRT.transformInv(sphereWorldCenterIn));
+
+	// Load into simd mat
+	GjkInternal::ConvexV convex;
+
+	convex.mNumAovVertices = numAov;
+	convex.mAovVertices = verts;
+
+	// Calculate distance to center of sphere
+	GjkInternal::Output gjkOutput;
+	GjkInternal::Status status = GjkInternal::CollidePoint(convex, sphereCenterLocal, gjkOutput);
+
+	if (status == GjkInternal::STATUS_DEGENERATE)
+	{
+		FloatV sphereRadiusV = FLoad(sphereRadius);
+		status = FAllGrtr(gjkOutput.mDistSq, FMul(sphereRadiusV, sphereRadiusV)) ? GjkInternal::STATUS_NON_INTERSECT : GjkInternal::STATUS_CONTACT;
+	}
+
+	switch (status)
+	{
+		case GjkInternal::STATUS_CONTACT:
+		{
+			if (separation)
+			{
+				_calcSeparation(convex, hullLocalToWorldRT, sphereWorldCenterIn, sphereRadius, gjkOutput, *separation);
+			}
+			return true;
+		}
+		default:
+		case GjkInternal::STATUS_NON_INTERSECT:
+		{
+			if (separation)
+			{
+				_calcSeparation(convex, hullLocalToWorldRT, sphereWorldCenterIn, sphereRadius, gjkOutput, *separation);
+			}
+			const PxF32 maxTotal = maxDistance + sphereRadius;
+			PxF32 val;
+			FStore(gjkOutput.mDistSq, &val);
+			return val < (maxTotal * maxTotal);
+		}
+	}
+}
+
+#if PX_PHYSICS_VERSION_MAJOR == 3
+bool ConvexHullImpl::intersects(const PxShape& shape, const PxTransform& localToWorldRT, const PxVec3& scale, float padding) const
+{
+	PX_UNUSED(localToWorldRT);
+	PX_UNUSED(scale);
+	PX_UNUSED(padding);
+
+	switch (shape.getGeometryType())
+	{
+	case PxGeometryType::ePLANE:	// xxx todo - implement
+		return true;
+	case PxGeometryType::eSPHERE:	// xxx todo - implement
+		return true;
+	case PxGeometryType::eBOX:	// xxx todo - implement
+		return true;
+	case PxGeometryType::eCAPSULE:	// xxx todo - implement
+		return true;
+	case PxGeometryType::eCONVEXMESH:	// xxx todo - implement
+		return true;
+	case PxGeometryType::eTRIANGLEMESH:	// xxx todo - implement
+		return true;
+	case PxGeometryType::eHEIGHTFIELD:	// xxx todo - implement
+		return true;
+	default:
+		return false;
+	}
+}
+#endif
+
+
+
+bool ConvexHullImpl::rayCast(float& in, float& out, const PxVec3& orig, const PxVec3& dir, const PxTransform& localToWorldRT, const PxVec3& scale, PxVec3* normal) const
+{
+	const uint32_t numSlabs = getUniquePlaneNormalCount();
+
+	if (numSlabs == 0)
+	{
+		return false;
+	}
+
+	// Create hull-local ray
+	PxVec3 localorig, localdir;
+	if (!worldToLocalRay(localorig, localdir, orig, dir, localToWorldRT, scale))
+	{
+		return false;
+	}
+
+	// Intersect with hull - local and world intersect 'times' are equal
+
+	if (normal != NULL)
+	{
+		*normal = PxVec3(0.0f);	// This will be the value if the ray origin is within the hull
+	}
+
+	const float tol2 = (float)1.0e-14 * localdir.magnitudeSquared();
+
+	for (uint32_t slabN = 0; slabN < numSlabs; ++slabN)
+	{
+		ConvexHullParametersNS::Plane_Type& paramPlane = mParams->uniquePlanes.buf[slabN];
+		const PxPlane plane(paramPlane.normal, paramPlane.d);
+		const float num0 = -plane.distance(localorig);
+		const float num1 = mParams->widths.buf[slabN] - num0;
+		const float den = localdir.dot(plane.n);
+		if (den* den <= tol2)	// Needs to be <=, so that localdir = (0,0,0) will act as a point check
+		{
+			if (num0 < 0 || num1 < 0)
+			{
+				return false;
+			}
+		}
+		else
+		{
+			if (den > 0)
+			{
+				if (num0 < in * den || num1 < out * (-den))
+				{
+					return false;
+				}
+				const float recipDen = 1.0f / den;
+				const float slabIn = -num1 * recipDen;
+				if (slabIn > in)
+				{
+					in = slabIn;
+					if (normal != NULL)
+					{
+						*normal = -plane.n;
+					}
+				}
+				out = PxMin(num0 * recipDen, out);
+			}
+			else
+			{
+				if (num0 < out * den || num1 < in * (-den))
+				{
+					return false;
+				}
+				const float recipDen = 1.0f / den;
+				const float slabIn = num0 * recipDen;
+				if (slabIn > in)
+				{
+					in = slabIn;
+					if (normal != NULL)
+					{
+						*normal = plane.n;
+					}
+				}
+				out = PxMin(-num1 * recipDen, out);
+			}
+		}
+	}
+
+	if (normal != NULL)
+	{
+		Cof44 cof(localToWorldRT, scale);
+		*normal = cof.getBlock33().transform(*normal);
+		normal->normalize();
+	}
+
+	return true;
+}
+
+bool ConvexHullImpl::obbSweep(float& in, float& out, const PxVec3& worldBoxCenter, const PxVec3& worldBoxExtents, const PxVec3 worldBoxAxes[3],
+                          const PxVec3& worldDisp, const PxTransform& localToWorldRT, const PxVec3& scale, PxVec3* normal) const
+{
+	float tNormal = -PX_MAX_F32;
+
+	// Leave hull untransformed
+	const uint32_t numHullSlabs = getUniquePlaneNormalCount();
+	const uint32_t numHullVerts = getVertexCount();
+	ConvexHullParametersNS::Plane_Type* paramPlanes = mParams->uniquePlanes.buf;
+	const PxVec3* hullVerts = mParams->vertices.buf;
+	const float* hullWidths = mParams->widths.buf;
+
+	// Create inverse transform for box and displacement
+	const float detS = scale.x * scale.y * scale.z;
+	if (detS == 0.0f)
+	{
+		return false;	// Not handling singular TMs
+	}
+	const float recipDetS = 1.0f / detS;
+	const PxVec3 invS(scale.y * scale.z * recipDetS, scale.z * scale.x * recipDetS, scale.x * scale.y * recipDetS);
+
+	// Create box directions and displacement - the box will become a parallelepiped in general.  For brevity we'll still call it a box.
+	PxVec3 disp = localToWorldRT.rotateInv(worldDisp);
+	disp = invS.multiply(disp);
+
+	PxVec3 boxCenter = localToWorldRT.transformInv(worldBoxCenter);
+	boxCenter = invS.multiply(boxCenter);
+
+	PxVec3 boxAxes[3];	// Will not be orthonormal in general
+	for (uint32_t i = 0; i < 3; ++i)
+	{
+		boxAxes[i] = localToWorldRT.rotateInv(worldBoxAxes[i]);
+		boxAxes[i] *= worldBoxExtents[i];
+		boxAxes[i] = invS.multiply(boxAxes[i]);
+	}
+
+	PxVec3 boxFaceNormals[3];
+	float boxRadii[3];
+	const float octantVol = boxAxes[0].dot(boxAxes[1].cross(boxAxes[2]));
+	for (uint32_t i = 0; i < 3; ++i)
+	{
+		boxFaceNormals[i] = boxAxes[(1 << i) & 3].cross(boxAxes[(3 >> i) ^ 1]);
+		const float norm = PxRecipSqrt(boxFaceNormals[i].magnitudeSquared());
+		boxFaceNormals[i] *= norm;
+		boxRadii[i] = octantVol * norm;
+	}
+
+	// Test box against slabs of hull
+	for (uint32_t slabIndex = 0; slabIndex < numHullSlabs; ++slabIndex)
+	{
+		ConvexHullParametersNS::Plane_Type& paramPlane = paramPlanes[slabIndex];
+		const PxPlane plane(paramPlane.normal, paramPlane.d);
+		const float projectedRadius = PxAbs(plane.n.dot(boxAxes[0])) + PxAbs(plane.n.dot(boxAxes[1])) + PxAbs(plane.n.dot(boxAxes[2]));
+		const float projectedCenter = plane.n.dot(boxCenter);
+		const float vel0 = disp.dot(plane.n);
+		float tIn, tOut;
+		extentOverlapTimeInterval(tIn, tOut, vel0, projectedCenter - projectedRadius, projectedCenter + projectedRadius, -plane.d - hullWidths[slabIndex], -plane.d);
+		if (!updateTimeIntervalAndNormal(in, out, tNormal, normal, tIn, tOut, (-PxSign(vel0))*plane.n))
+		{
+			return false;	// No intersection within the input time interval
+		}
+	}
+
+	// Test hull against box face directions
+	for (uint32_t faceIndex = 0; faceIndex < 3; ++faceIndex)
+	{
+		const PxVec3& faceNormal = boxFaceNormals[faceIndex];
+		float min, max;
+		getExtent(min, max, hullVerts, numHullVerts, sizeof(PxVec3), faceNormal);
+		const float projectedRadius = boxRadii[faceIndex];
+		const float projectedCenter = faceNormal.dot(boxCenter);
+		const float vel0 = disp.dot(faceNormal);
+		float tIn, tOut;
+		extentOverlapTimeInterval(tIn, tOut, vel0, projectedCenter - projectedRadius, projectedCenter + projectedRadius, min, max);
+		PxVec3 testFace = (-PxSign(vel0)) * faceNormal;
+		if (!updateTimeIntervalAndNormal(in, out, tNormal, normal, tIn, tOut, testFace))
+		{
+			return false;	// No intersection within the input time interval
+		}
+	}
+
+	// Test hulls against cross-edge planes
+	const uint32_t numHullEdges = getUniqueEdgeDirectionCount();
+	for (uint32_t hullEdgeIndex = 0; hullEdgeIndex < numHullEdges; ++hullEdgeIndex)
+	{
+		const PxVec3 hullEdge = getEdgeDirection(hullEdgeIndex);
+		for (uint32_t boxEdgeIndex = 0; boxEdgeIndex < 3; ++boxEdgeIndex)
+		{
+			PxVec3 n = hullEdge.cross(boxAxes[boxEdgeIndex]);
+			const float n2 = n.magnitudeSquared();
+			if (n2 < PX_EPS_F32 * PX_EPS_F32)
+			{
+				continue;
+			}
+			n *= nvidia::recipSqrtFast(n2);
+			float vel0 = disp.dot(n);
+			// Choose normal direction such that the normal component of velocity is negative
+			if (vel0 > 0.0f)
+			{
+				vel0 = -vel0;
+				n *= -1.0f;
+			}
+			const float projectedRadius = PxAbs(n.dot(boxAxes[(1 << boxEdgeIndex) & 3])) +
+			                                     PxAbs(n.dot(boxAxes[(3 >> boxEdgeIndex) ^ 1]));
+			const float projectedCenter = n.dot(boxCenter);
+			float min, max;
+			getExtent(min, max, hullVerts, numHullVerts, sizeof(PxVec3), n);
+			float tIn, tOut;
+			extentOverlapTimeInterval(tIn, tOut, vel0, projectedCenter - projectedRadius, projectedCenter + projectedRadius, min, max);
+			if (!updateTimeIntervalAndNormal(in, out, tNormal, normal, tIn, tOut, n))
+			{
+				return false;	// No intersection within the input time interval
+			}
+		}
+	}
+
+	if (normal != NULL)
+	{
+		Cof44 cof(localToWorldRT, scale);
+		*normal = cof.getBlock33().transform(*normal);
+		normal->normalize();
+	}
+
+	return true;
+}
+
+void ConvexHullImpl::extent(float& min, float& max, const PxVec3& normal) const
+{
+	getExtent(min, max, mParams->vertices.buf, (uint32_t)mParams->vertices.arraySizes[0], sizeof(PxVec3), normal);
+}
+
+void ConvexHullImpl::fill(physx::Array<PxVec3>& outPoints, const PxTransform& localToWorldRT, const PxVec3& scale,
+                      float spacing, float jitter, uint32_t maxPoints, bool adjustSpacing) const
+{
+	if (!maxPoints)
+	{
+		return;
+	}
+
+	const uint32_t numPlanes = getPlaneCount();
+	PxPlane* hull = (PxPlane*)PxAlloca(numPlanes * sizeof(PxPlane));
+	float* recipDens = (float*)PxAlloca(numPlanes * sizeof(float));
+
+	const Cof44 cof(localToWorldRT, scale);	// matrix of cofactors to correctly transform planes
+	PxMat44 srt(localToWorldRT);
+	srt.scale(PxVec4(scale, 1.f));
+
+	PxBounds3 bounds;
+	bounds.setEmpty();
+	for (uint32_t vertN = 0; vertN < getVertexCount(); ++vertN)
+	{
+		PxVec3 worldVert = srt.transform(getVertex(vertN));
+		bounds.include(worldVert);
+	}
+
+	PxVec3 center, extents;
+	center = bounds.getCenter();
+	extents = bounds.getExtents();
+
+	const PxVec3 areas(extents[1]*extents[2], extents[2]*extents[0], extents[0]*extents[1]);
+	const uint32_t axisN = areas[0] < areas[1] ? (areas[0] < areas[2] ? 0u : 2u) : (areas[1] < areas[2] ? 1u : 2u);
+	const uint32_t axisN1 = (axisN + 1) % 3;
+	const uint32_t axisN2 = (axisN + 2) % 3;
+
+	if (adjustSpacing)
+	{
+		const float boxVolume = 8 * extents[0] * areas[0];
+		const float cellVolume = spacing * spacing * spacing;
+		if (boxVolume > maxPoints * cellVolume)
+		{
+			spacing = PxPow(boxVolume / maxPoints, 0.333333333f);
+		}
+	}
+
+	for (uint32_t planeN = 0; planeN < numPlanes; ++planeN)
+	{
+		cof.transform(getPlane(planeN), hull[planeN]);
+		recipDens[planeN] = PxAbs(hull[planeN].n[axisN]) > 1.0e-7 ? 1.0f / hull[planeN].n[axisN] : 0.0f;
+	}
+
+	const float recipSpacing = 1.0f / spacing;
+	const int32_t num1 = (int32_t)(extents[axisN1] * recipSpacing);
+	const int32_t num2 = (int32_t)(extents[axisN2] * recipSpacing);
+
+	QDSRand rnd;
+	const float scaledJitter = jitter * spacing;
+
+	PxVec3 orig;
+	for (int32_t i1 = -num1; i1 <= num1; ++i1)
+	{
+		orig[axisN1] = i1 * spacing + center[axisN1];
+		for (int32_t i2 = -num2; i2 <= num2; ++i2)
+		{
+			orig[axisN2] = i2 * spacing + center[axisN2];
+
+			float out = extents[axisN];
+			float in = -out;
+
+			orig[axisN] = center[axisN];
+
+			uint32_t planeN;
+			for (planeN = 0; planeN < numPlanes; ++planeN)
+			{
+				const PxPlane& plane = hull[planeN];
+				const float recipDen = recipDens[planeN];
+				const float num = -plane.distance(orig);
+				if (recipDen == 0.0f)
+				{
+					if (num < 0)
+					{
+						break;
+					}
+				}
+				else
+				{
+					const float t = num * recipDen;
+					if (recipDen > 0)
+					{
+						if (t < in)
+						{
+							break;
+						}
+						out = PxMin(t, out);
+					}
+					else
+					{
+						if (t > out)
+						{
+							break;
+						}
+						in = PxMax(t, in);
+					}
+				}
+			}
+
+			if (planeN == numPlanes)
+			{
+				const float depth = out - in;
+				const float stop = orig[axisN] + out;
+				orig[axisN] += in + 0.5f * (depth - spacing * (int)(depth * recipSpacing));
+				do
+				{
+					outPoints.pushBack(orig + scaledJitter * PxVec3(rnd.getNext(), rnd.getNext(), rnd.getNext()));
+					if (!--maxPoints)
+					{
+						return;
+					}
+				}
+				while ((orig[axisN] += spacing) <= stop);
+			}
+		}
+	}
+}
+
+#if PX_PHYSICS_VERSION_MAJOR == 3
+uint32_t ConvexHullImpl::calculateCookedSizes(uint32_t& vertexCount, uint32_t& faceCount, bool inflated) const
+{
+	PX_UNUSED(inflated);
+	vertexCount = 0;
+	faceCount = 0;
+	nvidia::PsMemoryBuffer memStream;
+	memStream.setEndianMode(PxFileBuf::ENDIAN_NONE);
+	PxStreamFromFileBuf nvs(memStream);
+	physx::PxConvexMeshDesc meshDesc;
+	meshDesc.points.count = getVertexCount();
+	meshDesc.points.data = &getVertex(0);
+	meshDesc.points.stride = sizeof(PxVec3);
+	meshDesc.flags = physx::PxConvexFlag::eCOMPUTE_CONVEX;
+	const float skinWidth = GetApexSDK()->getCookingInterface() != NULL ? GetApexSDK()->getCookingInterface()->getParams().skinWidth : 0.0f;
+	if (inflated && skinWidth > 0.0f)
+	{
+		meshDesc.flags |= physx::PxConvexFlag::eINFLATE_CONVEX;
+	}
+	if (GetApexSDK()->getCookingInterface()->cookConvexMesh(meshDesc, nvs))
+	{
+		PxConvexMesh* convexMesh = GetApexSDK()->getPhysXSDK()->createConvexMesh(nvs);
+		if (convexMesh != NULL)
+		{
+			vertexCount = convexMesh->getNbVertices();
+			faceCount = convexMesh->getNbPolygons();
+		}
+		convexMesh->release();
+	}
+
+	return memStream.getFileLength();
+}
+
+bool ConvexHullImpl::reduceByCooking()
+{
+	bool reduced = false;
+	nvidia::PsMemoryBuffer memStream;
+	memStream.setEndianMode(PxFileBuf::ENDIAN_NONE);
+	PxStreamFromFileBuf nvs(memStream);
+	physx::PxConvexMeshDesc meshDesc;
+	meshDesc.points.count = getVertexCount();
+	meshDesc.points.data = &getVertex(0);
+	meshDesc.points.stride = sizeof(PxVec3);
+	meshDesc.flags = physx::PxConvexFlag::eCOMPUTE_CONVEX;
+	if (GetApexSDK()->getCookingInterface()->cookConvexMesh(meshDesc, nvs))
+	{
+		PxConvexMesh* convexMesh = GetApexSDK()->getPhysXSDK()->createConvexMesh(nvs);
+		if (convexMesh != NULL)
+		{
+			const uint32_t vertexCount = convexMesh->getNbVertices();
+			reduced = vertexCount < getVertexCount();
+			if (reduced)
+			{
+				buildFromConvexMesh(convexMesh);
+			}
+		}
+		convexMesh->release();
+	}
+
+	return reduced;
+}
+#endif
+
+struct IndexedAngle
+{
+	float angle;
+	uint32_t index;
+
+	struct LT
+	{
+		PX_INLINE bool operator()(const IndexedAngle& a, const IndexedAngle& b) const
+		{
+			return a.angle < b.angle;
+		}
+	};
+};
+
+#if PX_PHYSICS_VERSION_MAJOR == 3
+bool ConvexHullImpl::reduceHull(uint32_t maxVertexCount, uint32_t maxEdgeCount, uint32_t maxFaceCount, bool inflated)
+{
+	// Translate max = 0 => max = "infinity"
+	if (maxVertexCount == 0)
+	{
+		maxVertexCount = 0xFFFFFFFF;
+	}
+
+	if (maxEdgeCount == 0)
+	{
+		maxEdgeCount = 0xFFFFFFFF;
+	}
+
+	if (maxFaceCount == 0)
+	{
+		maxFaceCount = 0xFFFFFFFF;
+	}
+
+	while (reduceByCooking()) {}
+
+	// Calculate sizes
+	uint32_t cookedVertexCount;
+	uint32_t cookedFaceCount;
+	calculateCookedSizes(cookedVertexCount, cookedFaceCount, inflated);
+	uint32_t cookedEdgeCount = cookedVertexCount + cookedFaceCount - 2;
+
+	// Return successfully if we've met the required limits
+	if (cookedVertexCount <= maxVertexCount && cookedEdgeCount <= maxEdgeCount && cookedFaceCount <= maxFaceCount)
+	{
+		return true;
+	}
+
+	NvParameterized::Handle handle(*mParams, "vertices");
+	uint32_t vertexCount = 0;
+	mParams->getArraySize(handle, (int32_t&)vertexCount);
+	if (vertexCount < 4)
+	{
+		return false;
+	}
+	physx::Array<PxVec3> vertices(vertexCount);
+	mParams->getParamVec3Array(handle, &vertices[0], (int32_t)vertexCount);
+
+	// We have at least four vertices in our hull.  Find the tetrahedron with the largest volume.
+	PxMat44 tetrahedron;
+	float maxTetVolume = 0.0f;
+	uint32_t tetIndices[4] = {0,1,2,3};
+	for (uint32_t i = 0; i < vertexCount-3; ++i)
+	{
+		tetrahedron.column0 = PxVec4(vertices[i], 1.0f);
+		for (uint32_t j = i+1; j < vertexCount-2; ++j)
+		{
+			tetrahedron.column1 = PxVec4(vertices[j], 1.0f);
+			for (uint32_t k = j+1; k < vertexCount-1; ++k)
+			{
+				tetrahedron.column2 = PxVec4(vertices[k], 1.0f);
+				for (uint32_t l = k+1; l < vertexCount; ++l)
+				{
+					tetrahedron.column3 = PxVec4(vertices[l], 1.0f);
+					const float v = PxAbs(det(tetrahedron));
+					if (v > maxTetVolume)
+					{
+						maxTetVolume = v;
+						tetIndices[0] = i;
+						tetIndices[1] = j;
+						tetIndices[2] = k;
+						tetIndices[3] = l;
+					}
+				}
+			}
+		}
+	}
+
+	// Remove tetradhedral vertices from vertices array, put in reducedVertices array
+	physx::Array<PxVec3> reducedVertices(4);
+	for (uint32_t vNum = 4; vNum--;)
+	{
+		reducedVertices[vNum] = vertices[tetIndices[vNum]];
+		vertices.replaceWithLast(tetIndices[vNum]);	// Works because tetIndices[i] < tetIndices[j] if i < j
+	}
+	buildFromPoints(&reducedVertices[0], 4, sizeof(PxVec3));
+
+	calculateCookedSizes(cookedVertexCount, cookedFaceCount, inflated);
+	cookedEdgeCount = cookedVertexCount + cookedFaceCount - 2;
+	if (cookedVertexCount > maxVertexCount || cookedEdgeCount > maxEdgeCount || cookedFaceCount > maxFaceCount)
+	{
+		return false;	// Even a tetrahedron exceeds our limits
+	}
+
+	physx::Array<uint32_t> faceEdges;
+	while (vertices.size())
+	{
+		float maxVolumeIncrease = 0.0f;
+		uint32_t bestVertexIndex = 0;
+		for (uint32_t testVertexIndex = 0; testVertexIndex < vertices.size(); ++testVertexIndex)
+		{
+			const PxVec3& testVertex = vertices[testVertexIndex];
+			float volumeIncrease = 0.0f;
+			PxMat44 addedTet;
+			addedTet.column0 = PxVec4(testVertex, 1.0f);
+			for (uint32_t planeIndex = 0; planeIndex < getPlaneCount(); ++planeIndex)
+			{
+				const PxPlane plane = getPlane(planeIndex);
+				if (plane.distance(testVertex) <= 0.0f)
+				{
+					continue;
+				}
+				faceEdges.resize(0);
+				PxVec3 faceCenter(0.0f);
+				for (uint32_t edgeIndex = 0; edgeIndex < getEdgeCount(); ++edgeIndex)
+				{
+					if (getEdgeAdjacentFaceIndex(edgeIndex, 0) == planeIndex || getEdgeAdjacentFaceIndex(edgeIndex, 1) == planeIndex)
+					{
+						faceCenter += getVertex(getEdgeEndpointIndex(edgeIndex, 0)) + getVertex(getEdgeEndpointIndex(edgeIndex, 1));
+						faceEdges.pushBack(edgeIndex);
+					}
+				}
+				if (faceEdges.size())
+				{
+					faceCenter *= 0.5f/(float)faceEdges.size();
+				}
+				addedTet.column1 = PxVec4(faceCenter, 1.0f);
+				for (uint32_t edgeNum = 0; edgeNum < faceEdges.size(); ++edgeNum)
+				{
+					const uint32_t edgeIndex = faceEdges[edgeNum];
+					addedTet.column2 = PxVec4(getVertex(getEdgeEndpointIndex(edgeIndex,0)), 1.0f);
+					addedTet.column3 = PxVec4(getVertex(getEdgeEndpointIndex(edgeIndex,1)), 1.0f);
+					volumeIncrease += PxAbs(det(addedTet));
+				}
+			}
+			if (volumeIncrease > maxVolumeIncrease)
+			{
+				maxVolumeIncrease = volumeIncrease;
+				bestVertexIndex = testVertexIndex;
+			}
+		}
+
+		reducedVertices.pushBack(vertices[bestVertexIndex]);
+		vertices.replaceWithLast(bestVertexIndex);
+		buildFromPoints(&reducedVertices[0], reducedVertices.size(), sizeof(PxVec3));
+
+		calculateCookedSizes(cookedVertexCount, cookedFaceCount, inflated);
+		cookedEdgeCount = cookedVertexCount + cookedFaceCount - 2;
+		if (cookedVertexCount > maxVertexCount || cookedEdgeCount > maxEdgeCount || cookedFaceCount > maxFaceCount)
+		{
+			// Exceeded limits, remove last
+			reducedVertices.popBack();
+			buildFromPoints(&reducedVertices[0], reducedVertices.size(), sizeof(PxVec3));
+			break;
+		}
+	}
+
+	return true;
+}
+#endif
+
+bool ConvexHullImpl::createKDOPDirections(physx::Array<PxVec3>& directions, ConvexHullMethod::Enum method)
+{
+	uint32_t dirs;
+	switch (method)
+	{
+	case ConvexHullMethod::USE_6_DOP:
+		dirs = 0;
+		break;
+	case ConvexHullMethod::USE_10_DOP_X:
+		dirs = 1;
+		break;
+	case ConvexHullMethod::USE_10_DOP_Y:
+		dirs = 2;
+		break;
+	case ConvexHullMethod::USE_10_DOP_Z:
+		dirs = 4;
+		break;
+	case ConvexHullMethod::USE_14_DOP_XY:
+		dirs = 3;
+		break;
+	case ConvexHullMethod::USE_14_DOP_YZ:
+		dirs = 6;
+		break;
+	case ConvexHullMethod::USE_14_DOP_ZX:
+		dirs = 5;
+		break;
+	case ConvexHullMethod::USE_18_DOP:
+		dirs = 7;
+		break;
+	case ConvexHullMethod::USE_26_DOP:
+		dirs = 15;
+		break;
+	default:
+		return false;
+	}
+	directions.reset();
+	directions.pushBack(PxVec3(1, 0, 0));
+	directions.pushBack(PxVec3(0, 1, 0));
+	directions.pushBack(PxVec3(0, 0, 1));
+	if (dirs & 1)
+	{
+		directions.pushBack(PxVec3(0, 1, 1));
+		directions.pushBack(PxVec3(0, -1, 1));
+	}
+	if (dirs & 2)
+	{
+		directions.pushBack(PxVec3(1, 0, 1));
+		directions.pushBack(PxVec3(1, 0, -1));
+	}
+	if (dirs & 4)
+	{
+		directions.pushBack(PxVec3(1, 1, 0));
+		directions.pushBack(PxVec3(-1, 1, 0));
+	}
+	if (dirs & 8)
+	{
+		directions.pushBack(PxVec3(1, 1, 1));
+		directions.pushBack(PxVec3(-1, 1, 1));
+		directions.pushBack(PxVec3(1, -1, 1));
+		directions.pushBack(PxVec3(1, 1, -1));
+	}
+	return true;
+}
+
+
+void createIndexStartLookup(physx::Array<uint32_t>& lookup, int32_t indexBase, uint32_t indexRange, int32_t* indexSource, uint32_t indexCount, uint32_t indexByteStride)
+{
+	if (indexRange == 0)
+	{
+		lookup.resize(PxMax(indexRange + 1, 2u));
+		lookup[0] = 0;
+		lookup[1] = indexCount;
+	}
+	else
+	{
+		lookup.resize(indexRange + 1);
+		uint32_t indexPos = 0;
+		for (uint32_t i = 0; i < indexRange; ++i)
+		{
+			for (; indexPos < indexCount; ++indexPos, indexSource = (int32_t*)((uintptr_t)indexSource + indexByteStride))
+			{
+				if (*indexSource >= (int32_t)i + indexBase)
+				{
+					lookup[i] = indexPos;
+					break;
+				}
+			}
+			if (indexPos == indexCount)
+			{
+				lookup[i] = indexPos;
+			}
+		}
+		lookup[indexRange] = indexCount;
+	}
+}
+
+void findIslands(physx::Array< physx::Array<uint32_t> >& islands, const physx::Array<IntPair>& overlaps, uint32_t indexRange)
+{
+	// symmetrize the overlap list
+	physx::Array<IntPair> symOverlaps;
+	symOverlaps.reserve(2 * overlaps.size());
+	for (uint32_t i = 0; i < overlaps.size(); ++i)
+	{
+		const IntPair& pair = overlaps[i];
+		symOverlaps.pushBack(pair);
+		IntPair& reversed = symOverlaps.insert();
+		reversed.set(pair.i1, pair.i0);
+	}
+	// Sort symmetrized list
+	qsort(symOverlaps.begin(), symOverlaps.size(), sizeof(IntPair), IntPair::compare);
+	// Create overlap look-up
+	physx::Array<uint32_t> overlapStarts;
+	createIndexStartLookup(overlapStarts, 0, indexRange, &symOverlaps.begin()->i0, symOverlaps.size(), sizeof(IntPair));
+	// Find islands
+	IndexBank<uint32_t> objectIndices(indexRange);
+	objectIndices.lockCapacity(true);
+	uint32_t seedIndex = UINT32_MAX;
+	while (objectIndices.useNextFree(seedIndex))
+	{
+		physx::Array<uint32_t>& island = islands.insert();
+		island.pushBack(seedIndex);
+		for (uint32_t i = 0; i < island.size(); ++i)
+		{
+			const uint32_t index = island[i];
+			for (uint32_t j = overlapStarts[index]; j < overlapStarts[index + 1]; ++j)
+			{
+				PX_ASSERT(symOverlaps[j].i0 == (int32_t)index);
+				const uint32_t overlapIndex = (uint32_t)symOverlaps[j].i1;
+				if (objectIndices.use(overlapIndex))
+				{
+					island.pushBack(overlapIndex);
+				}
+			}
+		}
+	}
+}
+
+
+// Neighbor-finding utility class
+
+void NeighborLookup::setBounds(const BoundsRep* bounds, uint32_t boundsCount, uint32_t boundsByteStride)
+{
+	m_neighbors.resize(0);
+	m_firstNeighbor.resize(0);
+
+	if (bounds != NULL && boundsCount > 0 && boundsByteStride >= sizeof(BoundsRep))
+	{
+		physx::Array<IntPair> overlaps;
+		boundsCalculateOverlaps(overlaps, Bounds3XYZ, bounds, boundsCount, boundsByteStride);
+		// symmetrize the overlap list
+		const uint32_t oldSize = overlaps.size();
+		overlaps.resize(2 * oldSize);
+		for (uint32_t i = 0; i < oldSize; ++i)
+		{
+			const IntPair& pair = overlaps[i];
+			overlaps[i+oldSize].set(pair.i1, pair.i0);
+		}
+		// Sort symmetrized list
+		qsort(overlaps.begin(), overlaps.size(), sizeof(IntPair), IntPair::compare);
+		// Create overlap look-up
+		if (overlaps.size() > 0)
+		{
+			createIndexStartLookup(m_firstNeighbor, 0, boundsCount, &overlaps[0].i0, overlaps.size(), sizeof(IntPair));
+			m_neighbors.resize(overlaps.size());
+			for (uint32_t i = 0; i < overlaps.size(); ++i)
+			{
+				m_neighbors[i] = (uint32_t)overlaps[i].i1;
+			}
+		}
+	}
+}
+
+uint32_t NeighborLookup::getNeighborCount(const uint32_t index) const
+{
+	if (index + 1 >= m_firstNeighbor.size())
+	{
+		return 0;	// Invalid neighbor list or index
+	}
+
+	return m_firstNeighbor[index+1] - m_firstNeighbor[index];
+}
+
+const uint32_t* NeighborLookup::getNeighbors(const uint32_t index) const
+{
+	if (index + 1 >= m_firstNeighbor.size())
+	{
+		return 0;	// Invalid neighbor list or index
+	}
+
+	return &m_neighbors[m_firstNeighbor[index]];
+}
+
+
+// Data conversion macros
+
+#define copyBuffer( _DstFormat, _SrcFormat ) \
+	{ \
+		uint32_t _numVertices = numVertices; \
+		int32_t* _invMap = invMap; \
+		_DstFormat##_TYPE* _dst = (_DstFormat##_TYPE*)dst; \
+		const _SrcFormat##_TYPE* _src = (const _SrcFormat##_TYPE*)src; \
+		if( _invMap == NULL ) \
+		{ \
+			while( _numVertices-- ) \
+			{ \
+				convert_##_DstFormat##_from_##_SrcFormat( *_dst, *_src ); \
+				((uint8_t*&)_dst) += dstStride; \
+				((const uint8_t*&)_src) += srcStride; \
+			} \
+		} \
+		else \
+		{ \
+			while( _numVertices-- ) \
+			{ \
+				const int32_t invMapValue = *_invMap++; \
+				if (invMapValue >= 0) \
+				{ \
+					_DstFormat##_TYPE* _dstElem = (_DstFormat##_TYPE*)((uint8_t*)_dst + invMapValue*dstStride); \
+					convert_##_DstFormat##_from_##_SrcFormat( *_dstElem, *_src ); \
+				} \
+				((const uint8_t*&)_src) += srcStride; \
+			} \
+		} \
+	}
+
+
+#define HANDLE_COPY1( _DstFormat, _SrcFormat ) \
+	case RenderDataFormat::_DstFormat : \
+		if( srcFormat == RenderDataFormat::_SrcFormat ) \
+		{ \
+			copyBuffer( _DstFormat, _SrcFormat ); \
+		} \
+		break;
+
+#define HANDLE_COPY2( _DstFormat, _SrcFormat1, _SrcFormat2 ) \
+	case RenderDataFormat::_DstFormat : \
+		if( srcFormat == RenderDataFormat::_SrcFormat1 ) \
+		{ \
+			copyBuffer( _DstFormat, _SrcFormat1 ); \
+		} \
+		else if( srcFormat == RenderDataFormat::_SrcFormat2 ) \
+		{ \
+			copyBuffer( _DstFormat, _SrcFormat2 ); \
+		} \
+		break;
+
+#define HANDLE_COPY3( _DstFormat, _SrcFormat1, _SrcFormat2, _SrcFormat3 ) \
+	case RenderDataFormat::_DstFormat : \
+		if( srcFormat == RenderDataFormat::_SrcFormat1 ) \
+		{ \
+			copyBuffer( _DstFormat, _SrcFormat1 ); \
+		} \
+		else if( srcFormat == RenderDataFormat::_SrcFormat2 ) \
+		{ \
+			copyBuffer( _DstFormat, _SrcFormat2 ); \
+		} \
+		else if( srcFormat == RenderDataFormat::_SrcFormat3 ) \
+		{ \
+			copyBuffer( _DstFormat, _SrcFormat3 ); \
+		} \
+		break;
+
+// ... etc.
+
+
+bool copyRenderVertexBuffer(void* dst, RenderDataFormat::Enum dstFormat, uint32_t dstStride, uint32_t dstStart, const void* src, RenderDataFormat::Enum srcFormat, uint32_t srcStride, uint32_t srcStart, uint32_t numVertices, int32_t* invMap)
+{
+	const uint32_t dstDataSize = RenderDataFormat::getFormatDataSize(dstFormat);
+	if (dstStride == 0)
+	{
+		dstStride = dstDataSize;
+	}
+
+	if (dstStride < dstDataSize)
+	{
+		return false;
+	}
+
+	// advance src pointer
+	((const uint8_t*&)src) += srcStart * srcStride;
+
+	PX_ASSERT((dstStart == 0) || (invMap == NULL)); // can only be one of them, won't work if its both!
+
+	if (dstFormat == srcFormat)
+	{
+		if (dstFormat != RenderDataFormat::UNSPECIFIED)
+		{
+			// Direct data copy
+
+			// advance dst pointer
+			((const uint8_t*&)dst) += dstStart * dstStride;
+
+			if (invMap == NULL)
+			{
+				while (numVertices--)
+				{
+					memcpy(dst, src, dstDataSize);
+					((uint8_t*&)dst) += dstStride;
+					((const uint8_t*&)src) += srcStride;
+				}
+			}
+			else
+			{
+				while (numVertices--)
+				{
+					const int32_t invMapValue = *invMap++;
+					if (invMapValue >= 0)
+					{
+						void* mappedDst = (void*)((uint8_t*)dst + dstStride * (invMapValue));
+						memcpy(mappedDst, src, dstDataSize);
+					}
+					((const uint8_t*&)src) += srcStride;
+				}
+			}
+		}
+
+		return true;
+	}
+
+	// advance dst pointer
+	((const uint8_t*&)dst) += dstStart * dstStride;
+
+	switch (dstFormat)
+	{
+	case RenderDataFormat::UNSPECIFIED:
+		// Handle unspecified by doing nothing (still no error!)
+		break;
+
+		// Put format converters here
+		HANDLE_COPY1(USHORT1, UINT1)
+		HANDLE_COPY1(USHORT2, UINT2)
+		HANDLE_COPY1(USHORT3, UINT3)
+		HANDLE_COPY1(USHORT4, UINT4)
+
+		HANDLE_COPY1(UINT1, USHORT1)
+		HANDLE_COPY1(UINT2, USHORT2)
+		HANDLE_COPY1(UINT3, USHORT3)
+		HANDLE_COPY1(UINT4, USHORT4)
+
+		HANDLE_COPY1(BYTE_SNORM1, FLOAT1)
+		HANDLE_COPY1(BYTE_SNORM2, FLOAT2)
+		HANDLE_COPY1(BYTE_SNORM3, FLOAT3)
+		HANDLE_COPY1(BYTE_SNORM4, FLOAT4)
+		HANDLE_COPY1(BYTE_SNORM4_QUATXYZW, FLOAT4_QUAT)
+		HANDLE_COPY1(SHORT_SNORM1, FLOAT1)
+		HANDLE_COPY1(SHORT_SNORM2, FLOAT2)
+		HANDLE_COPY1(SHORT_SNORM3, FLOAT3)
+		HANDLE_COPY1(SHORT_SNORM4, FLOAT4)
+		HANDLE_COPY1(SHORT_SNORM4_QUATXYZW, FLOAT4_QUAT)
+
+		HANDLE_COPY2(FLOAT1, BYTE_SNORM1, SHORT_SNORM1)
+		HANDLE_COPY2(FLOAT2, BYTE_SNORM2, SHORT_SNORM2)
+		HANDLE_COPY2(FLOAT3, BYTE_SNORM3, SHORT_SNORM3)
+		HANDLE_COPY2(FLOAT4, BYTE_SNORM4, SHORT_SNORM4)
+		HANDLE_COPY2(FLOAT4_QUAT, BYTE_SNORM4_QUATXYZW, SHORT_SNORM4_QUATXYZW)
+
+		HANDLE_COPY3(R8G8B8A8, B8G8R8A8, R32G32B32A32_FLOAT, B32G32R32A32_FLOAT)
+		HANDLE_COPY3(B8G8R8A8, R8G8B8A8, R32G32B32A32_FLOAT, B32G32R32A32_FLOAT)
+		HANDLE_COPY3(R32G32B32A32_FLOAT, R8G8B8A8, B8G8R8A8, B32G32R32A32_FLOAT)
+		HANDLE_COPY3(B32G32R32A32_FLOAT, R8G8B8A8, B8G8R8A8, R32G32B32A32_FLOAT)
+
+	default:
+	{
+		PX_ALWAYS_ASSERT();	// Format conversion not handled
+		return false;
+	}
+	}
+
+
+	return true;
+}
+
+
+
+/************************************************************************/
+// Convex Hull from Planes
+/************************************************************************/
+
+// copied from //sw/physx/PhysXSDK/3.2/trunk/Source/Common/src/CmMathUtils.cpp
+PxQuat PxShortestRotation(const PxVec3& v0, const PxVec3& v1)
+{
+	const float d = v0.dot(v1);
+	const PxVec3 cross = v0.cross(v1);
+
+	PxQuat q = d>-1 ? PxQuat(cross.x, cross.y, cross.z, 1+d) 
+		: PxAbs(v0.x)<0.1f ? PxQuat(0.0f, v0.z, -v0.y, 0.0f) : PxQuat(v0.y, -v0.x, 0.0f, 0.0f);
+
+	return q.getNormalized();
+}
+
+PxTransform PxTransformFromPlaneEquation(const PxPlane& plane)
+{
+	PxPlane p = plane; 
+	p.normalize();
+
+	// special case handling for axis aligned planes
+	const float halfsqrt2 = 0.707106781;
+	PxQuat q;
+	if(2 == (p.n.x == 0.0f) + (p.n.y == 0.0f) + (p.n.z == 0.0f)) // special handling for axis aligned planes
+	{
+		if(p.n.x > 0)		q = PxQuat(PxIdentity);
+		else if(p.n.x < 0)	q = PxQuat(0, 0, 1, 0);
+		else				q = PxQuat(0.0f, -p.n.z, p.n.y, 1) * halfsqrt2;
+	}
+	else q = PxShortestRotation(PxVec3(1,0,0), p.n);
+
+	return PxTransform(-p.n * p.d, q);
+}
+
+
+
+PxVec3 intersect(PxVec4 p0, PxVec4 p1, PxVec4 p2)
+{
+	const PxVec3& d0 = reinterpret_cast<const PxVec3&>(p0);
+	const PxVec3& d1 = reinterpret_cast<const PxVec3&>(p1);
+	const PxVec3& d2 = reinterpret_cast<const PxVec3&>(p2);
+
+	return (p0.w * d1.cross(d2) 
+		  + p1.w * d2.cross(d0) 
+		  + p2.w * d0.cross(d1))
+		  / d0.dot(d2.cross(d1));
+}
+
+const uint16_t sInvalid = uint16_t(-1);
+
+// restriction: only supports a single patch per vertex.
+struct HalfedgeMesh
+{
+	struct Halfedge
+	{
+		Halfedge(uint16_t vertex = sInvalid, uint16_t face = sInvalid, 
+			uint16_t next = sInvalid, uint16_t prev = sInvalid)
+			: mVertex(vertex), mFace(face), mNext(next), mPrev(prev)
+		{}
+
+		uint16_t mVertex; // to
+		uint16_t mFace; // left
+		uint16_t mNext; // ccw
+		uint16_t mPrev; // cw
+	};
+
+	HalfedgeMesh() : mNumTriangles(0) {}
+
+	uint16_t findHalfedge(uint16_t v0, uint16_t v1)
+	{
+		uint16_t h = mVertices[v0], start = h;
+		while(h != sInvalid && mHalfedges[h].mVertex != v1)
+		{
+			h = mHalfedges[(uint32_t)h ^ 1].mNext;
+			if(h == start) 
+				return sInvalid;
+		}
+		return h;
+	}
+
+	void connect(uint16_t h0, uint16_t h1)
+	{
+		mHalfedges[h0].mNext = h1;
+		mHalfedges[h1].mPrev = h0;
+	}
+
+	void addTriangle(uint16_t v0, uint16_t v1, uint16_t v2)
+	{
+		// add new vertices
+		uint16_t n = (uint16_t)(PxMax(v0, PxMax(v1, v2))+1);
+		if(mVertices.size() < n)
+			mVertices.resize(n, sInvalid);
+
+		// collect halfedges, prev and next of triangle
+		uint16_t verts[] = { v0, v1, v2 };
+		uint16_t handles[3], prev[3], next[3];
+		for(uint16_t i=0; i<3; ++i)
+		{
+			uint16_t j = (uint16_t)((i+1)%3);
+			uint16_t h = findHalfedge(verts[i], verts[j]);
+			if(h == sInvalid)
+			{
+				// add new edge
+				PX_ASSERT(mHalfedges.size() <= UINT16_MAX);
+				h = (uint16_t)mHalfedges.size();
+				mHalfedges.pushBack(Halfedge(verts[j]));
+				mHalfedges.pushBack(Halfedge(verts[i]));
+			}
+			handles[i] = h;
+			prev[i] = mHalfedges[h].mPrev;
+			next[i] = mHalfedges[h].mNext;
+		}
+
+		// patch connectivity
+		for(uint16_t i=0; i<3; ++i)
+		{
+			uint16_t j = (uint16_t)((i+1)%3);
+
+			PX_ASSERT(mFaces.size() <= UINT16_MAX);
+			mHalfedges[handles[i]].mFace = (uint16_t)mFaces.size();
+
+			// connect prev and next
+			connect(handles[i], handles[j]);
+
+			if(next[j] == sInvalid) // new next edge, connect opposite
+				connect((uint32_t)(handles[j]^1), next[i]!=sInvalid ? next[i] : (uint32_t)(handles[i]^1));
+
+			if(prev[i] == sInvalid) // new prev edge, connect opposite
+				connect(prev[j]!=sInvalid ? prev[j] : (uint32_t)(handles[j]^1), (uint32_t)(handles[i]^1));
+
+			// prev is boundary, update middle vertex
+			if(mHalfedges[(uint32_t)(handles[i]^1)].mFace == sInvalid)
+				mVertices[verts[j]] = (uint32_t)(handles[i]^1);
+		}
+
+		mFaces.pushBack(handles[2]);
+		++mNumTriangles;
+	}
+
+	uint16_t removeTriangle(uint16_t f)
+	{
+		uint16_t result = sInvalid;
+
+		for(uint16_t i=0, h = mFaces[f]; i<3; ++i)
+		{
+			uint16_t v0 = mHalfedges[(uint32_t)(h^1)].mVertex;
+			uint16_t v1 = mHalfedges[h].mVertex;
+
+			mHalfedges[h].mFace = sInvalid;
+
+			if(mHalfedges[(uint32_t)(h^1)].mFace == sInvalid) // was boundary edge, remove
+			{
+				uint16_t v0Prev = mHalfedges[h  ].mPrev;
+				uint16_t v0Next = mHalfedges[(uint32_t)(h^1)].mNext;
+				uint16_t v1Prev = mHalfedges[(uint32_t)(h^1)].mPrev;
+				uint16_t v1Next = mHalfedges[h  ].mNext;
+
+				// update halfedge connectivity
+				connect(v0Prev, v0Next);
+				connect(v1Prev, v1Next);
+
+				// update vertex boundary or delete
+				mVertices[v0] = (v0Prev^1) == v0Next ? sInvalid : v0Next;
+				mVertices[v1] = (v1Prev^1) == v1Next ? sInvalid : v1Next;
+			} 
+			else 
+			{
+				mVertices[v0] = h; // update vertex boundary
+				result = v1;
+			}
+
+			h = mHalfedges[h].mNext;
+		}
+
+		mFaces[f] = sInvalid;
+		--mNumTriangles;
+
+		return result;
+	}
+
+	// true if vertex v is in front of face f
+	bool visible(uint16_t v, uint16_t f)
+	{
+		uint16_t h = mFaces[f];
+		if(h == sInvalid)
+			return false;
+
+		uint16_t v0 = mHalfedges[h].mVertex;
+		h = mHalfedges[h].mNext;
+		uint16_t v1 = mHalfedges[h].mVertex;
+		h = mHalfedges[h].mNext;
+		uint16_t v2 = mHalfedges[h].mVertex;
+		h = mHalfedges[h].mNext;
+
+		return det(mPoints[v], mPoints[v0], mPoints[v1], mPoints[v2]) < -1e-5f;
+	}
+
+	/*
+	void print() const
+	{
+		for(uint32_t i=0; i<mFaces.size(); ++i)
+		{
+			printf("f%u: ", i);
+			uint16_t h = mFaces[i];
+			if(h == sInvalid)
+			{
+				printf("deleted\n");
+				continue;
+			}
+
+			for(int j=0; j<3; ++j)
+			{
+				printf("h%u -> v%u -> ", uint32_t(h), uint32_t(mHalfedges[h].mVertex));
+				h = mHalfedges[h].mNext;
+			}
+
+			printf("\n");
+		}
+
+		for(uint32_t i=0; i<mVertices.size(); ++i)
+		{
+			printf("v%u: ", i);
+			uint16_t h = mVertices[i];
+			if(h == sInvalid)
+			{
+				printf("deleted\n");
+				continue;
+			}
+			
+			uint16_t start = h;
+			do {
+				printf("h%u -> v%u, ", uint32_t(h), uint32_t(mHalfedges[h].mVertex));
+				h = mHalfedges[h^1].mNext;
+			} while (h != start);
+
+			printf("\n");
+		}
+
+		for(uint32_t i=0; i<mHalfedges.size(); ++i)
+			printf("h%u: v%u, f%u, p%u, n%u\n", i, uint32_t(mHalfedges[i].mVertex),
+				uint32_t(mHalfedges[i].mFace), uint32_t(mHalfedges[i].mPrev), uint32_t(mHalfedges[i].mNext));
+	}
+	*/
+
+	nvidia::Array<Halfedge> mHalfedges;
+	nvidia::Array<uint16_t> mVertices; // vertex -> (boundary) halfedge
+	nvidia::Array<uint16_t> mFaces; // face -> halfedge
+	nvidia::Array<PxVec4> mPoints;
+	uint16_t mNumTriangles;
+};
+
+
+
+void ConvexMeshBuilder::operator()(uint32_t planeMask, float scale)
+{
+	uint32_t numPlanes = shdfnd::bitCount(planeMask);
+
+	if (numPlanes == 1)
+	{
+		PxTransform t = nvidia::apex::PxTransformFromPlaneEquation(reinterpret_cast<const PxPlane&>(mPlanes[lowestSetBit(planeMask)]));
+
+		if (!t.isValid())
+			return;
+
+		const uint16_t indices[] = { 0, 1, 2, 0, 2, 3 };
+		const PxVec3 vertices[] = { 
+			PxVec3(0.0f,  scale,  scale), 
+			PxVec3(0.0f, -scale,  scale),
+			PxVec3(0.0f, -scale, -scale),			
+			PxVec3(0.0f,  scale, -scale) };
+
+			PX_ASSERT(mVertices.size() <= UINT16_MAX);
+			uint16_t baseIndex = (uint16_t)mVertices.size();
+
+			for (uint32_t i=0; i < 4; ++i)
+				mVertices.pushBack(t.transform(vertices[i]));
+
+			for (uint32_t i=0; i < 6; ++i)
+				mIndices.pushBack((uint16_t)(indices[i] + baseIndex));
+
+			return;
+	}
+
+	if(numPlanes < 4)
+		return; // todo: handle degenerate cases
+
+	HalfedgeMesh mesh;
+
+	// gather points (planes, that is)
+	mesh.mPoints.reserve(numPlanes);
+	for(; planeMask; planeMask &= planeMask-1)
+		mesh.mPoints.pushBack(mPlanes[shdfnd::lowestSetBit(planeMask)]);
+
+	// initialize to tetrahedron
+	mesh.addTriangle(0, 1, 2);
+	mesh.addTriangle(0, 3, 1);
+	mesh.addTriangle(1, 3, 2);
+	mesh.addTriangle(2, 3, 0);
+
+	// flip if inside-out
+	if(mesh.visible(3, 0))
+		shdfnd::swap(mesh.mPoints[0], mesh.mPoints[1]);
+
+	// iterate through remaining points
+	for(uint16_t i=4; i<mesh.mPoints.size(); ++i)
+	{
+		// remove any visible triangle
+		uint16_t v0 = sInvalid;
+		for(uint16_t j=0; j<mesh.mFaces.size(); ++j)
+		{
+			if(mesh.visible(i, j))
+				v0 = PxMin(v0, mesh.removeTriangle(j));
+		}
+
+		if(v0 == sInvalid)
+			continue; // no triangle removed
+
+		if(!mesh.mNumTriangles)
+			return; // empty mesh
+
+		// find non-deleted boundary vertex
+		for(uint16_t h=0; mesh.mVertices[v0] == sInvalid; h+=2)
+		{
+			if ((mesh.mHalfedges[h  ].mFace == sInvalid) ^ 
+				(mesh.mHalfedges[(uint32_t)(h+1)].mFace == sInvalid))
+			{
+				v0 = mesh.mHalfedges[h].mVertex;
+			}
+		}
+
+		// tesselate hole
+		uint16_t start = v0;
+		do {
+			uint16_t h = mesh.mVertices[v0];
+			uint16_t v1 = mesh.mHalfedges[h].mVertex;
+			if(mesh.mFaces.size() == UINT16_MAX)
+				break; // safety net
+			mesh.addTriangle(v0, v1, i);
+			v0 = v1;
+		} while(v0 != start);
+
+		bool noHole = true;
+		for(uint16_t h=0; noHole && h < mesh.mHalfedges.size(); h+=2)
+		{
+			if ((mesh.mHalfedges[h  ].mFace == sInvalid) ^ 
+				(mesh.mHalfedges[(uint32_t)(h+1)].mFace == sInvalid))
+				noHole = false;
+		}
+
+		if(!noHole || mesh.mFaces.size() == UINT16_MAX)
+		{
+			mesh.mFaces.resize(0);
+			mesh.mVertices.resize(0);
+			break;
+		}
+	}
+
+	// convert triangles to vertices (intersection of 3 planes)
+	nvidia::Array<uint32_t> face2Vertex(mesh.mFaces.size());
+	for(uint32_t i=0; i<mesh.mFaces.size(); ++i)
+	{
+		face2Vertex[i] = mVertices.size();
+
+		uint16_t h = mesh.mFaces[i];
+		if(h == sInvalid)
+			continue;
+
+		uint16_t v0 = mesh.mHalfedges[h].mVertex;
+		h = mesh.mHalfedges[h].mNext;
+		uint16_t v1 = mesh.mHalfedges[h].mVertex;
+		h = mesh.mHalfedges[h].mNext;
+		uint16_t v2 = mesh.mHalfedges[h].mVertex;
+
+		mVertices.pushBack(intersect(mesh.mPoints[v0], mesh.mPoints[v1], mesh.mPoints[v2]));
+	}
+
+	// convert vertices to polygons (face one-ring)
+	for(uint32_t i=0; i<mesh.mVertices.size(); ++i)
+	{
+		uint16_t h = mesh.mVertices[i];
+		if(h == sInvalid)
+			continue;
+
+		uint32_t v0 = face2Vertex[mesh.mHalfedges[h].mFace];
+		h = (uint16_t)(mesh.mHalfedges[h].mPrev^1);
+		uint32_t v1 = face2Vertex[mesh.mHalfedges[h].mFace];
+
+		while(true)
+		{
+			h = (uint16_t)(mesh.mHalfedges[h].mPrev^1);
+			uint32_t v2 = face2Vertex[mesh.mHalfedges[h].mFace];
+
+			if(v0 == v2) 
+				break;
+			
+			PX_ASSERT(v0 <= UINT16_MAX);
+			PX_ASSERT(v1 <= UINT16_MAX);
+			PX_ASSERT(v2 <= UINT16_MAX);
+			mIndices.pushBack((uint16_t)v0);
+			mIndices.pushBack((uint16_t)v2);
+			mIndices.pushBack((uint16_t)v1);
+
+			v1 = v2; 
+		}
+
+	}
+}
+
+// Diagonalize a symmetric 3x3 matrix.  Returns the eigenvectors in the first parameter, eigenvalues as the return value.
+PxVec3 diagonalizeSymmetric(PxMat33& eigenvectors, const PxMat33& m)
+{
+	// jacobi rotation using quaternions (from an idea of Stan Melax, with fix for precision issues)
+
+	const uint32_t MAX_ITERS = 24;
+
+	PxQuat q = PxQuat(PxIdentity);
+
+	PxMat33 d;
+	for(uint32_t i=0; i < MAX_ITERS;i++)
+	{
+		eigenvectors = PxMat33(q);
+		d = eigenvectors.getTranspose() * m * eigenvectors;
+
+		const float d0 = PxAbs(d[1][2]), d1 = PxAbs(d[0][2]), d2 = PxAbs(d[0][1]);
+		const uint32_t a = d0 > d1 && d0 > d2 ? 0u : d1 > d2 ? 1u : 2u;						// rotation axis index, from largest off-diagonal element
+
+		const uint32_t a1 = (a+1)%3;
+		const uint32_t a2 = (a+2)%3;											
+		if(d[a1][a2] == 0.0f || PxAbs(d[a1][a1]-d[a2][a2]) > 2e6*PxAbs(2.0f*d[a1][a2]))
+		{
+			break;
+		}
+
+		const float w = (d[a1][a1]-d[a2][a2]) / (2.0f*d[a1][a2]);					// cot(2 * phi), where phi is the rotation angle
+		const float absw = PxAbs(w);
+
+		float c, s;
+		if(absw>1000)
+		{
+			c = 1;
+			s = 1/(4*w);														// h will be very close to 1, so use small angle approx instead
+		}
+		else
+		{
+			float t = 1 / (absw + PxSqrt(w*w+1));								// absolute value of tan phi
+			float h = 1 / PxSqrt(t*t+1);										// absolute value of cos phi
+			PX_ASSERT(h!=1);													// |w|<1000 guarantees this with typical IEEE754 machine eps (approx 6e-8)
+			c = PxSqrt((1+h)/2);
+			s = PxSqrt((1-h)/2) * PxSign(w);
+		}
+
+		PxQuat r(0,0,0,c);
+		reinterpret_cast<PxVec4&>(r)[a] = s;
+
+		q = (q*r).getNormalized();
+	}
+
+	return PxVec3(d.column0.x, d.column1.y, d.column2.z);
+}
+
+
+
+}
+} // end namespace nvidia::apex
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/common/src/ApexSharedUtils.cpp
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip