Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/GeomUtils/src/mesh
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
58 files changed, 15683 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.cpp
new file mode 100644
index 00000000..1ee2a683
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.cpp
@@ -0,0 +1,277 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxMemory.h"
+#include "GuBV32.h"
+#include "GuSerialize.h"
+#include "CmUtils.h"
+#include "PsUtilities.h"
+
+using namespace physx;
+using namespace Gu;
+
+#define DELETEARRAY(x)		if (x) { delete []x;	x = NULL; }
+
+
+BV32Tree::BV32Tree(SourceMesh* meshInterface, const PxBounds3& localBounds)
+{
+	reset();
+	init(meshInterface, localBounds);
+}
+
+BV32Tree::BV32Tree()
+{
+	reset();
+}
+
+void BV32Tree::release()
+{
+	if (!mUserAllocated)
+	{
+		DELETEARRAY(mNodes);
+		PX_FREE_AND_RESET(mPackedNodes);
+	}
+	mNodes = NULL;
+	mNbNodes = 0;
+}
+
+BV32Tree::~BV32Tree()
+{
+	release();
+}
+
+void BV32Tree::reset()
+{
+	mMeshInterface = NULL;
+	mNbNodes = 0;
+	mNodes = NULL;
+	mNbPackedNodes = 0;
+	mPackedNodes = NULL;
+	mInitData = 0;
+	mUserAllocated = false;
+}
+
+void BV32Tree::operator=(BV32Tree& v)
+{
+	mMeshInterface = v.mMeshInterface;
+	mLocalBounds = v.mLocalBounds;
+	mNbNodes = v.mNbNodes;
+	mNodes = v.mNodes;
+	mInitData = v.mInitData;
+	mUserAllocated = v.mUserAllocated;
+	v.reset();
+}
+
+bool BV32Tree::init(SourceMesh* meshInterface, const PxBounds3& localBounds)
+{
+	mMeshInterface = meshInterface;
+	mLocalBounds.init(localBounds);
+	return true;
+}
+
+// PX_SERIALIZATION
+BV32Tree::BV32Tree(const PxEMPTY)
+{
+	mUserAllocated = true;
+}
+
+void BV32Tree::exportExtraData(PxSerializationContext& stream)
+{
+	stream.alignData(16);
+	stream.writeData(mNodes, mNbNodes*sizeof(BVDataPacked));
+}
+
+void BV32Tree::importExtraData(PxDeserializationContext& context)
+{
+	context.alignExtraData(16);
+	mNodes = context.readExtraData<BV32Data>(mNbNodes);
+}
+//~PX_SERIALIZATION
+
+bool BV32Tree::load(PxInputStream& stream, PxU32 meshVersion)
+{
+	PX_ASSERT(!mUserAllocated);
+	PX_UNUSED(meshVersion);
+
+	release();
+
+	PxI8 a, b, c, d;
+	readChunk(a, b, c, d, stream);
+	if (a != 'B' || b != 'V' || c != '3' || d != '2')
+		return false;
+
+	const PxU32 version = 1;
+	const bool mismatch = (shdfnd::littleEndian() == 1);
+	if (readDword(mismatch, stream) != version)
+		return false;
+
+	mLocalBounds.mCenter.x = readFloat(mismatch, stream);
+	mLocalBounds.mCenter.y = readFloat(mismatch, stream);
+	mLocalBounds.mCenter.z = readFloat(mismatch, stream);
+	mLocalBounds.mExtentsMagnitude = readFloat(mismatch, stream);
+
+	mInitData = readDword(mismatch, stream);
+
+	/*const PxU32 nbNodes = readDword(mismatch, stream);
+	mNbNodes = nbNodes;
+
+	if (nbNodes)
+	{
+		BV32Data* nodes = PX_NEW(BV32Data)[nbNodes];
+
+		mNodes = nodes;
+		Cm::markSerializedMem(nodes, sizeof(BV32Data)*nbNodes);
+
+		for (PxU32 i = 0; i<nbNodes; i++)
+		{
+			BV32Data& node = nodes[i];
+
+			readFloatBuffer(&node.mCenter.x, 3, mismatch, stream);
+			node.mData = readDword(mismatch, stream);
+			readFloatBuffer(&node.mExtents.x, 3, mismatch, stream);
+		}
+	}*/
+
+
+	//read SOA format node data
+	const PxU32 nbPackedNodes = readDword(mismatch, stream);
+	mNbPackedNodes = nbPackedNodes;
+
+	if (nbPackedNodes)
+	{
+		mPackedNodes = reinterpret_cast<BV32DataPacked*>(PX_ALLOC(sizeof(BV32DataPacked)*nbPackedNodes, "BV32DataPacked"));
+
+		Cm::markSerializedMem(mPackedNodes, sizeof(BV32DataPacked)*nbPackedNodes);
+
+		for (PxU32 i = 0; i < nbPackedNodes; ++i)
+		{
+			BV32DataPacked& node = mPackedNodes[i];
+			node.mNbNodes = readDword(mismatch, stream);
+			PX_ASSERT(node.mNbNodes > 0);
+			ReadDwordBuffer(node.mData, node.mNbNodes, mismatch, stream);
+			const PxU32 nbElements = 4 * node.mNbNodes;
+			readFloatBuffer(&node.mCenter[0].x, nbElements, mismatch, stream);
+			readFloatBuffer(&node.mExtents[0].x, nbElements, mismatch, stream);
+			
+		}
+	}
+
+	return true;
+}
+
+
+void BV32Tree::calculateLeafNode(BV32Data& node)
+{
+	if (!node.isLeaf())
+	{
+		const PxU32 nbChildren = node.getNbChildren();
+		const PxU32 offset = node.getChildOffset();
+		//calcualte how many children nodes are leaf nodes
+		PxU32 nbLeafNodes = 0;
+		for (PxU32 i = 0; i < nbChildren; ++i)
+		{
+			BV32Data& child = mNodes[offset + i];
+
+			if (child.isLeaf())
+			{
+				nbLeafNodes++;
+			}
+		}
+
+		node.mNbLeafNodes = nbLeafNodes;
+		for (PxU32 i = 0; i < nbChildren; ++i)
+		{
+			BV32Data& child = mNodes[offset + i];
+			calculateLeafNode(child);
+		}
+
+	}
+}
+
+
+
+void BV32Tree::createSOAformatNode(BV32DataPacked& packedData, const BV32Data& node, const PxU32 childOffset, PxU32& currentIndex, PxU32& nbPackedNodes)
+{
+	
+	//found the next 32 nodes and fill it in SOA format
+	
+	const PxU32 nbChildren = node.getNbChildren();
+	const PxU32 offset = node.getChildOffset();
+
+
+	for (PxU32 i = 0; i < nbChildren; ++i)
+	{
+		BV32Data& child = mNodes[offset + i];
+
+		packedData.mCenter[i] = PxVec4(child.mCenter, 0.f);
+		packedData.mExtents[i] = PxVec4(child.mExtents, 0.f);
+		packedData.mData[i] = PxU32(child.mData);
+	}
+
+	packedData.mNbNodes = nbChildren;
+	
+	PxU32 NbToGo = 0;
+	PxU32 NextIDs[32];
+	memset(NextIDs, PX_INVALID_U32, sizeof(PxU32) * 32);
+	const BV32Data* ChildNodes[32];
+	memset(ChildNodes, 0, sizeof(BV32Data*) * 32);
+	
+
+	for (PxU32 i = 0; i< nbChildren; i++)
+	{
+		BV32Data& child = mNodes[offset + i];
+
+		if (!child.isLeaf())
+		{
+			const PxU32 NextID = currentIndex;
+
+			const PxU32 ChildSize = child.getNbChildren() - child.mNbLeafNodes;
+			currentIndex += ChildSize;
+
+			//packedData.mData[i] = (packedData.mData[i] & ((1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT) - 1)) | (NextID << GU_BV4_CHILD_OFFSET_SHIFT_COUNT);
+			packedData.mData[i] = (packedData.mData[i] & ((1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT) - 1)) | ((childOffset + NbToGo) << GU_BV4_CHILD_OFFSET_SHIFT_COUNT);
+
+			NextIDs[NbToGo] = NextID;
+			ChildNodes[NbToGo] = &child;
+			NbToGo++;
+		}
+	}
+
+	nbPackedNodes += NbToGo;
+	for (PxU32 i = 0; i < NbToGo; ++i)
+	{
+		const BV32Data& child = *ChildNodes[i];
+	
+		BV32DataPacked& childData = mPackedNodes[childOffset+i];
+		
+		createSOAformatNode(childData, child, NextIDs[i], currentIndex, nbPackedNodes);
+
+	}
+
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.h
new file mode 100644
index 00000000..4caf67d5
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32.h
@@ -0,0 +1,146 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV32_H
+#define GU_BV32_H
+
+#include "foundation/PxBounds3.h"
+#include "PxSerialFramework.h"
+#include "PsUserAllocated.h"
+#include "GuBV4.h"
+#include "CmPhysXCommon.h"
+#include "PsArray.h"
+#include "foundation/PxVec4.h"
+
+namespace physx
+{
+	namespace Gu
+	{
+		struct BV32Data : public physx::shdfnd::UserAllocated
+		{
+			PxVec3		mCenter;
+			PxU32		mNbLeafNodes;
+			PxVec3		mExtents;
+			size_t		mData;
+			
+
+			PX_FORCE_INLINE BV32Data() : mNbLeafNodes(0), mData(PX_INVALID_U32)
+			{
+				setEmpty();
+			}
+
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			isLeaf()			const	{ return mData & 1; }
+
+			//if the node is leaf, 
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getNbReferencedTriangles()	const	{ PX_ASSERT(isLeaf()); return PxU32((mData >>1)&63); }
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getTriangleStartIndex()	const	{ PX_ASSERT(isLeaf()); return PxU32(mData >> 7); }
+
+			//PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getPrimitive()		const	{ return mData >> 1; }
+			//if the node isn't leaf, we will get the childOffset
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getChildOffset()	const	{ PX_ASSERT(!isLeaf()); return PxU32(mData >> GU_BV4_CHILD_OFFSET_SHIFT_COUNT); }
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getNbChildren()		const	{ PX_ASSERT(!isLeaf()); return ((mData) & ((1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT) - 1))>>1; }
+			
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	void			getMinMax(PxVec3& min, PxVec3& max)			const
+			{
+				min = mCenter - mExtents;
+				max = mCenter + mExtents;
+			}
+
+			PX_FORCE_INLINE	void setEmpty()
+			{
+				mCenter = PxVec3(0.0f, 0.0f, 0.0f);
+				mExtents = PxVec3(-1.0f, -1.0f, -1.0f);
+			}
+			
+		};
+
+		PX_ALIGN_PREFIX(16)
+		struct BV32DataPacked
+		{
+			PxVec4 mCenter[32];
+			PxVec4 mExtents[32];
+			PxU32 mData[32];
+			PxU32 mNbNodes;
+			PxU32 pad[3];
+
+			PX_CUDA_CALLABLE PX_FORCE_INLINE BV32DataPacked() : mNbNodes(0)
+			{
+			}
+
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			isLeaf(const PxU32 index)			const	{ return mData[index] & 1; }
+			//if the node is leaf, 
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getNbReferencedTriangles(const PxU32 index)	const	{ PX_ASSERT(isLeaf(index)); return (mData[index] >> 1) & 63; }
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getTriangleStartIndex(const PxU32 index)	const	{ PX_ASSERT(isLeaf(index)); return (mData[index] >> 7); }
+			//if the node isn't leaf, we will get the childOffset
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getChildOffset(const PxU32 index)	const	{ PX_ASSERT(!isLeaf(index)); return mData[index] >> GU_BV4_CHILD_OFFSET_SHIFT_COUNT; }
+			PX_CUDA_CALLABLE PX_FORCE_INLINE	PxU32			getNbChildren(const PxU32 index)		const	{ PX_ASSERT(!isLeaf(index)); return ((mData[index])& ((1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT) - 1)) >> 1; }
+		} 
+		PX_ALIGN_SUFFIX(16);
+
+		class BV32Tree : public physx::shdfnd::UserAllocated
+		{
+		public:
+			// PX_SERIALIZATION
+			BV32Tree(const PxEMPTY);
+			void			exportExtraData(PxSerializationContext&);
+			void			importExtraData(PxDeserializationContext& context);
+			static			void			getBinaryMetaData(PxOutputStream& stream);
+			//~PX_SERIALIZATION
+
+			PX_PHYSX_COMMON_API				BV32Tree();
+			PX_PHYSX_COMMON_API				BV32Tree(SourceMesh* meshInterface, const PxBounds3& localBounds);
+			PX_PHYSX_COMMON_API				~BV32Tree();
+
+			bool			load(PxInputStream& stream, PxU32 meshVersion); // converts to proper endian at load time
+
+			void			calculateLeafNode(BV32Data& node);
+			void			createSOAformatNode(BV32DataPacked& packedData, const BV32Data& node, const PxU32 childOffset, PxU32& currentIndex, PxU32& nbPackedNodes);
+
+			void			reset();
+			void			operator = (BV32Tree& v);
+
+			bool			init(SourceMesh* meshInterface, const PxBounds3& localBounds);
+			void			release();
+
+			SourceMesh*		mMeshInterface;
+			LocalBounds		mLocalBounds;
+
+			PxU32			mNbNodes;
+			BV32Data*		mNodes;
+			BV32DataPacked*	mPackedNodes;
+			PxU32			mNbPackedNodes;
+			PxU32			mInitData;
+			bool			mUserAllocated;	// PT: please keep these 4 bytes right after mCenterOrMinCoeff/mExtentsOrMaxCoeff for safe V4 loading
+			bool			mPadding[3];
+		};
+
+	} // namespace Gu
+}
+
+#endif // GU_BV32_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.cpp
new file mode 100644
index 00000000..da62280f
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.cpp
@@ -0,0 +1,530 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxVec4.h"
+#include "GuBV32Build.h"
+#include "GuBV32.h"
+#include "PxTriangle.h"
+#include "CmPhysXCommon.h"
+#include "PsBasicTemplates.h"
+#include "GuCenterExtents.h"
+#include "GuBV4Build.h"
+#include "PsAllocator.h"
+
+using namespace physx;
+using namespace Gu;
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#define DELETESINGLE(x)	if (x) { delete x;		x = NULL; }
+#define DELETEARRAY(x)	if (x) { delete []x;	x = NULL; }
+
+struct BV32Node : public physx::shdfnd::UserAllocated
+{
+	BV32Node() : mNbChildBVNodes(0)
+	{}
+
+	BV32Data	mBVData[32];
+	PxU32		mNbChildBVNodes;
+
+	PX_FORCE_INLINE	size_t			isLeaf(PxU32 i)			const	{ return mBVData[i].mData & 1; }
+	PX_FORCE_INLINE	PxU32			getPrimitive(PxU32 i)	const	{ return PxU32(mBVData[i].mData >> 1); }
+	PX_FORCE_INLINE	const BV32Node*	getChild(PxU32 i)		const	{ return reinterpret_cast<BV32Node*>(mBVData[i].mData); }
+
+
+	PxU32 getSize() const
+	{
+		return sizeof(BV32Data)*mNbChildBVNodes;
+	}
+};
+
+
+static void fillInNodes(const AABBTreeNode* current_node, const PxU32 startIndex, const PxU32 endIndex, const AABBTreeNode** NODES, PxU32& stat)
+{
+
+	if (startIndex + 1 == endIndex)
+	{
+		//fill in nodes
+		const AABBTreeNode* P = current_node->getPos();
+		const AABBTreeNode* N = current_node->getNeg();
+		NODES[startIndex] = P;
+		NODES[endIndex] = N;
+		stat += 2;
+	}
+	else
+	{
+		const AABBTreeNode* P = current_node->getPos();
+		const AABBTreeNode* N = current_node->getNeg();
+		const PxU32 midIndex = startIndex + ((endIndex - startIndex) / 2);
+		if (!P->isLeaf())
+			fillInNodes(P, startIndex, midIndex, NODES, stat);
+		else
+		{
+			NODES[startIndex] = P;
+			stat++;
+		}
+
+		if (!N->isLeaf())
+			fillInNodes(N, midIndex + 1, endIndex, NODES, stat);
+		else
+		{
+			NODES[midIndex + 1] = N;
+			stat++;
+		}
+	}
+}
+
+
+
+static void setPrimitive(const AABBTree& source, BV32Node* node32, PxU32 i, const AABBTreeNode* node, float epsilon)
+{
+	const PxU32 nbPrims = node->getNbPrimitives();
+	PX_ASSERT(nbPrims<=32);
+	const PxU32* indexBase = source.getIndices();
+	const PxU32* prims = node->getPrimitives();
+	const PxU32 offset = PxU32(prims - indexBase);
+	
+#if BV32_VALIDATE
+	for (PxU32 j = 0; j<nbPrims; j++)
+	{
+		PX_ASSERT(prims[j] == offset + j);
+	}
+#endif
+	const PxU32 primitiveIndex = (offset << 6) | (nbPrims & 63);
+
+	node32->mBVData[i].mCenter = node->getAABB().getCenter();
+	node32->mBVData[i].mExtents = node->getAABB().getExtents();
+	if (epsilon != 0.0f)
+		node32->mBVData[i].mExtents += PxVec3(epsilon, epsilon, epsilon);
+	node32->mBVData[i].mData = (primitiveIndex << 1) | 1;
+}
+
+static BV32Node* setNode(const AABBTree& source, BV32Node* node32, PxU32 i, const AABBTreeNode* node, float epsilon)
+{
+	BV32Node* child = NULL;
+
+	if (node)
+	{
+		if (node->isLeaf())
+		{
+			setPrimitive(source, node32, i, node, epsilon);
+		}
+		else
+		{
+			node32->mBVData[i].mCenter = node->getAABB().getCenter();
+			node32->mBVData[i].mExtents = node->getAABB().getExtents();
+			if (epsilon != 0.0f)
+				node32->mBVData[i].mExtents += PxVec3(epsilon, epsilon, epsilon);
+
+			child = PX_NEW(BV32Node);
+			node32->mBVData[i].mData = size_t(child);
+		}
+	}
+	
+	return child;
+}
+
+
+static void _BuildBV32(const AABBTree& source, BV32Node* tmp, const AABBTreeNode* current_node, float epsilon, PxU32& nbNodes)
+{
+	PX_ASSERT(!current_node->isLeaf());
+
+	const AABBTreeNode* NODES[32];
+	memset(NODES, 0, sizeof(AABBTreeNode*) * 32);
+
+	fillInNodes(current_node, 0, 31, NODES, tmp->mNbChildBVNodes);
+
+	PxU32 left = 0;
+	PxU32 right = 31;
+
+	while (left < right)
+	{
+		
+		//sweep from the front
+		while (left<right)
+		{
+			//found a hole
+			if (NODES[left] == NULL)
+				break;
+			left++;
+		}
+
+		//sweep from the back
+		while (left < right)
+		{
+			//found a node
+			if (NODES[right])
+				break;
+			right--;
+		}
+
+		if (left != right)
+		{
+			//swap left and right
+			const AABBTreeNode* node = NODES[right];
+			NODES[right] = NODES[left];
+			NODES[left] = node;
+		}
+
+	}
+
+	nbNodes += tmp->mNbChildBVNodes;
+
+	for (PxU32 i = 0; i < tmp->mNbChildBVNodes; ++i)
+	{
+		const AABBTreeNode* tempNode = NODES[i];
+		BV32Node* Child = setNode(source, tmp, i, tempNode, epsilon);
+		if (Child)
+		{
+			_BuildBV32(source, Child, tempNode, epsilon, nbNodes);
+		}
+	}
+
+}
+
+//
+//static void validateTree(const AABBTree& Source, const AABBTreeNode* currentNode)
+//{
+//	if (currentNode->isLeaf())
+//	{
+//		const PxU32* indexBase = Source.getIndices();
+//		const PxU32* prims = currentNode->getPrimitives();
+//		const PxU32 offset = PxU32(prims - indexBase);
+//		const PxU32 nbPrims = currentNode->getNbPrimitives();
+//		for (PxU32 j = 0; j<nbPrims; j++)
+//		{
+//			PX_ASSERT(prims[j] == offset + j);
+//		}
+//	}
+//	else
+//	{
+//		const AABBTreeNode* pos = currentNode->getPos();
+//		validateTree(Source, pos);
+//		const AABBTreeNode* neg = currentNode->getNeg();
+//		validateTree(Source, neg);
+//	}
+//}
+
+#if BV32_VALIDATE
+static void validateNodeBound(const BV32Node* currentNode, SourceMesh* mesh)
+{
+	const PxU32 nbNodes = currentNode->mNbChildBVNodes;
+	for (PxU32 i = 0; i < nbNodes; ++i)
+	{
+		const BV32Node* node = currentNode->getChild(i);
+		if (currentNode->isLeaf(i))
+		{
+			BV32Data data = currentNode->mBVData[i];
+			PxU32 nbTriangles = data.getNbReferencedTriangles();
+			PxU32 startIndex = data.getTriangleStartIndex();
+			const IndTri32* triIndices = mesh->getTris32();
+			const PxVec3* verts = mesh->getVerts();
+			PxVec3 min(PX_MAX_F32, PX_MAX_F32, PX_MAX_F32);
+			PxVec3 max(-PX_MAX_F32, -PX_MAX_F32, -PX_MAX_F32);
+			for (PxU32 j = 0; j < nbTriangles; ++j)
+			{
+				IndTri32 index = triIndices[startIndex + j];
+
+				for (PxU32 k = 0; k < 3; ++k)
+				{
+					const PxVec3& v = verts[index.mRef[k]];
+
+					min.x = (min.x > v.x) ? v.x : min.x;
+					min.y = (min.y > v.y) ? v.y : min.y;
+					min.z = (min.z > v.z) ? v.z : min.z;
+
+					max.x = (max.x < v.x) ? v.x : max.x;
+					max.y = (max.y > v.y) ? v.y : max.y;
+					max.z = (max.z > v.z) ? v.z : max.z;
+				}
+			}
+
+			PxVec3 dMin, dMax;
+			data.getMinMax(dMin, dMax);
+			PX_ASSERT(dMin.x <= min.x && dMin.y <= min.y && dMin.z <= min.z);
+			PX_ASSERT(dMax.x >= max.x && dMax.y >= max.y && dMax.z >= min.z);
+
+		}
+		else
+		{
+			validateNodeBound(node, mesh);
+		}
+	}
+}
+#endif
+
+static bool BuildBV32Internal(BV32Tree& bv32Tree, const AABBTree& Source, SourceMesh* mesh, float epsilon)
+{
+	if (mesh->getNbTriangles() <= 32)
+	{
+		bv32Tree.mNbPackedNodes = 1;
+		bv32Tree.mPackedNodes = reinterpret_cast<BV32DataPacked*>(PX_ALLOC(sizeof(BV32DataPacked), "BV32DataPacked"));
+		BV32DataPacked& packedData = bv32Tree.mPackedNodes[0];
+		packedData.mNbNodes = 1;
+		packedData.mCenter[0] = PxVec4(Source.getBV().getCenter(), 0.f);
+		packedData.mExtents[0] = PxVec4(Source.getBV().getExtents(), 0.f);
+		packedData.mData[0] = (mesh->getNbTriangles() << 1) | 1;
+		return bv32Tree.init(mesh, Source.getBV());
+	}
+
+	{
+		struct Local
+		{
+			static void _CheckMD(const AABBTreeNode* current_node, PxU32& md, PxU32& cd)
+			{
+				cd++;
+				md = PxMax(md, cd);
+
+				if (current_node->getPos())	{ _CheckMD(current_node->getPos(), md, cd);	cd--; }
+				if (current_node->getNeg())	{ _CheckMD(current_node->getNeg(), md, cd);	cd--; }
+			}
+
+			static void _Check(AABBTreeNode* current_node)
+			{
+				if (current_node->isLeaf())
+					return;
+
+				AABBTreeNode* P = const_cast<AABBTreeNode*>(current_node->getPos());
+				AABBTreeNode* N = const_cast<AABBTreeNode*>(current_node->getNeg());
+				{
+					PxU32 MDP = 0;	PxU32 CDP = 0;	_CheckMD(P, MDP, CDP);
+					PxU32 MDN = 0;	PxU32 CDN = 0;	_CheckMD(N, MDN, CDN);
+
+					if (MDP>MDN)
+						//					if(MDP<MDN)
+					{
+						Ps::swap(*P, *N);
+						Ps::swap(P, N);
+					}
+				}
+				_Check(P);
+				_Check(N);
+			}
+		};
+		Local::_Check(const_cast<AABBTreeNode*>(Source.getNodes()));
+	}
+
+
+	PxU32 nbNodes = 1;
+	BV32Node* Root32 = PX_NEW(BV32Node);
+
+
+	_BuildBV32(Source, Root32, Source.getNodes(), epsilon, nbNodes);
+
+#if BV32_VALIDATE
+	validateNodeBound(Root32, mesh);
+#endif
+
+	if (!bv32Tree.init(mesh, Source.getBV()))
+		return false;
+	BV32Tree* T = &bv32Tree;
+
+	// Version with variable-sized nodes in single stream
+	{
+		struct Local
+		{
+			static void _Flatten(BV32Data* const dest, const PxU32 box_id, PxU32& current_id, const BV32Node* current, PxU32& max_depth, PxU32& current_depth, const PxU32 nb_nodes)
+			{
+				// Entering a new node => increase depth
+				current_depth++;
+				// Keep track of max depth
+				if (current_depth>max_depth)
+					max_depth = current_depth;
+
+				for (PxU32 i = 0; i<current->mNbChildBVNodes; i++)
+				{
+					dest[box_id + i].mCenter = current->mBVData[i].mCenter;
+					dest[box_id + i].mExtents = current->mBVData[i].mExtents;
+					dest[box_id + i].mData = PxU32(current->mBVData[i].mData);
+
+					PX_ASSERT(box_id + i < nb_nodes);
+				}
+
+				PxU32 NbToGo = 0;
+				PxU32 NextIDs[32];
+				memset(NextIDs, PX_INVALID_U32, sizeof(PxU32)*32); 
+				const BV32Node* ChildNodes[32];
+				memset(ChildNodes, 0, sizeof(BV32Node*)*32);
+
+				BV32Data* data = dest + box_id;
+				for (PxU32 i = 0; i<current->mNbChildBVNodes; i++)
+				{
+					PX_ASSERT(current->mBVData[i].mData != PX_INVALID_U32);
+
+					if (!current->isLeaf(i))
+					{
+
+						const BV32Node* ChildNode = current->getChild(i);
+
+						const PxU32 NextID = current_id;
+
+						const PxU32 ChildSize = ChildNode->mNbChildBVNodes;
+						current_id += ChildSize;
+
+						const PxU32 ChildType = ChildNode->mNbChildBVNodes << 1;
+						data[i].mData = size_t(ChildType + (NextID << GU_BV4_CHILD_OFFSET_SHIFT_COUNT));
+						//PX_ASSERT(data[i].mData == size_t(ChildType+(NextID<<3)));
+
+						PX_ASSERT(box_id + i < nb_nodes);
+
+						NextIDs[NbToGo] = NextID;
+						ChildNodes[NbToGo] = ChildNode;
+						NbToGo++;
+					}
+				}
+
+			
+
+				for (PxU32 i = 0; i<NbToGo; i++)
+				{
+					_Flatten(dest, NextIDs[i], current_id, ChildNodes[i], max_depth, current_depth, nb_nodes);
+					current_depth--;
+				}
+
+				DELETESINGLE(current);
+			}
+		};
+
+
+		PxU32 CurID = Root32->mNbChildBVNodes+1;
+
+		BV32Data* Nodes = PX_NEW(BV32Data)[nbNodes];
+		Nodes[0].mCenter = Source.getBV().getCenter();
+		Nodes[0].mExtents = Source.getBV().getExtents();
+
+		const PxU32 ChildType = Root32->mNbChildBVNodes << 1;
+		Nodes[0].mData = size_t(ChildType + (1 << GU_BV4_CHILD_OFFSET_SHIFT_COUNT));
+
+		const PxU32 nbChilden = Nodes[0].getNbChildren();
+
+		PX_UNUSED(nbChilden);
+
+
+		T->mInitData = CurID;
+		PxU32 MaxDepth = 0;
+		PxU32 CurrentDepth = 0;
+
+		Local::_Flatten(Nodes, 1, CurID, Root32, MaxDepth, CurrentDepth, nbNodes);
+
+		PX_ASSERT(CurID == nbNodes);
+
+		T->mNbNodes = nbNodes;
+
+		T->mNodes = Nodes;
+	}
+
+	
+	bv32Tree.calculateLeafNode(bv32Tree.mNodes[0]);
+	
+	bv32Tree.mPackedNodes = reinterpret_cast<BV32DataPacked*>(PX_ALLOC(sizeof(BV32DataPacked)*nbNodes, "BV32DataPacked"));
+	bv32Tree.mNbPackedNodes = nbNodes;
+
+	PxU32 nbPackedNodes = 1;
+	PxU32 currentIndex = bv32Tree.mNodes[0].getNbChildren() - bv32Tree.mNodes[0].mNbLeafNodes + 1;
+	BV32DataPacked& packedData = bv32Tree.mPackedNodes[0];
+	bv32Tree.createSOAformatNode(packedData, bv32Tree.mNodes[0], 1, currentIndex, nbPackedNodes);
+
+	bv32Tree.mNbPackedNodes = nbPackedNodes;
+
+	PX_ASSERT(nbPackedNodes == currentIndex);
+	PX_ASSERT(nbPackedNodes > 0);
+
+	return true;
+}
+
+/////
+
+struct ReorderData32
+{
+	const SourceMesh*	mMesh;
+	PxU32*				mOrder;
+	PxU32				mNbTrisPerLeaf;
+	PxU32				mIndex;
+	PxU32				mNbTris;
+	PxU32				mStats[32];
+};
+
+static bool gReorderCallback(const AABBTreeNode* current, PxU32 /*depth*/, void* userData)
+{
+	ReorderData32* Data = reinterpret_cast<ReorderData32*>(userData);
+	if (current->isLeaf())
+	{
+		const PxU32 n = current->getNbPrimitives();
+		PX_ASSERT(n > 0);
+		PX_ASSERT(n <= Data->mNbTrisPerLeaf);
+		Data->mStats[n-1]++;
+		PxU32* Prims = const_cast<PxU32*>(current->getPrimitives());
+
+		for (PxU32 i = 0; i<n; i++)
+		{
+			PX_ASSERT(Prims[i]<Data->mNbTris);
+			Data->mOrder[Data->mIndex] = Prims[i];
+			PX_ASSERT(Data->mIndex<Data->mNbTris);
+			Prims[i] = Data->mIndex;
+			Data->mIndex++;
+		}
+	}
+	return true;
+}
+
+
+bool physx::Gu::BuildBV32Ex(BV32Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf)
+{
+	const PxU32 nbTris = mesh.mNbTris;
+
+	AABBTree Source;
+	if (!Source.buildFromMesh(mesh, nbTrisPerLeaf))
+		return false;
+
+
+	{
+		PxU32* order = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbTris, "BV32"));
+		ReorderData32 RD;
+		RD.mMesh = &mesh;
+		RD.mOrder = order;
+		RD.mNbTrisPerLeaf = nbTrisPerLeaf;
+		RD.mIndex = 0;
+		RD.mNbTris = nbTris;
+		for (PxU32 i = 0; i<32; i++)
+			RD.mStats[i] = 0;
+		Source.walk(gReorderCallback, &RD);
+		PX_ASSERT(RD.mIndex == nbTris);
+		mesh.remapTopology(order);
+		PX_FREE(order);
+		//		for(PxU32 i=0;i<16;i++)
+		//			printf("%d: %d\n", i, RD.mStats[i]);
+	}
+
+
+	//if (mesh.getNbTriangles() <= nbTrisPerLeaf)
+	//	return tree.init(&mesh, Source.getBV());
+
+	return BuildBV32Internal(tree, Source, &mesh, epsilon);
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.h
new file mode 100644
index 00000000..68b8ebaf
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV32Build.h
@@ -0,0 +1,50 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV32_BUILD_H
+#define GU_BV32_BUILD_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "common/PxPhysXCommonConfig.h"
+
+#define BV32_VALIDATE	0
+
+namespace physx
+{
+	namespace Gu
+	{
+		class BV32Tree;
+		class SourceMesh;
+
+		PX_PHYSX_COMMON_API bool BuildBV32Ex(BV32Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf);
+
+	} // namespace Gu
+}
+
+#endif // GU_BV32_BUILD_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.cpp
new file mode 100644
index 00000000..b7e0f4d0
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.cpp
@@ -0,0 +1,261 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxMemory.h"
+#include "GuBV4.h"
+#include "GuSerialize.h"
+#include "CmUtils.h"
+#include "PsUtilities.h"
+
+using namespace physx;
+using namespace Gu;
+
+#define DELETEARRAY(x)		if (x) { delete []x;	x = NULL; }
+
+SourceMesh::SourceMesh()
+{
+	reset();
+}
+
+SourceMesh::~SourceMesh()
+{
+	PX_FREE_AND_RESET(mRemap);
+}
+
+void SourceMesh::reset()
+{
+	mNbVerts		= 0;
+	mVerts			= NULL;
+	mNbTris			= 0;
+	mTriangles32	= NULL;
+	mTriangles16	= NULL;
+	mRemap			= NULL;
+}
+
+void SourceMesh::operator=(SourceMesh& v)
+{
+	mNbVerts		= v.mNbVerts;
+	mVerts			= v.mVerts;
+	mNbTris			= v.mNbTris;
+	mTriangles32	= v.mTriangles32;
+	mTriangles16	= v.mTriangles16;
+	mRemap			= v.mRemap;
+	v.reset();
+}
+
+void SourceMesh::remapTopology(const PxU32* order)
+{
+	if(!mNbTris)
+		return;
+
+	if(mTriangles32)
+	{
+		IndTri32* newTopo = PX_NEW(IndTri32)[mNbTris];
+		for(PxU32 i=0;i<mNbTris;i++)
+			newTopo[i] = mTriangles32[order[i]];
+
+		PxMemCopy(mTriangles32, newTopo, sizeof(IndTri32)*mNbTris);
+		DELETEARRAY(newTopo);
+	}
+	else
+	{
+		PX_ASSERT(mTriangles16);
+		IndTri16* newTopo = PX_NEW(IndTri16)[mNbTris];
+		for(PxU32 i=0;i<mNbTris;i++)
+			newTopo[i] = mTriangles16[order[i]];
+
+		PxMemCopy(mTriangles16, newTopo, sizeof(IndTri16)*mNbTris);
+		DELETEARRAY(newTopo);
+	}
+
+	{
+		PxU32* newMap = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*mNbTris, "OPC2"));
+		for(PxU32 i=0;i<mNbTris;i++)
+			newMap[i] = mRemap ? mRemap[order[i]] : order[i];
+
+		PX_FREE_AND_RESET(mRemap);
+		mRemap = newMap;
+	}
+}
+
+bool SourceMesh::isValid() const
+{
+	if(!mNbTris || !mNbVerts)			return false;
+	if(!mVerts)							return false;
+	if(!mTriangles32 && !mTriangles16)	return false;
+	return true;
+}
+
+/////
+
+BV4Tree::BV4Tree(SourceMesh* meshInterface, const PxBounds3& localBounds)
+{
+	reset();
+	init(meshInterface, localBounds);
+}
+
+BV4Tree::BV4Tree()
+{
+	reset();
+}
+
+void BV4Tree::release()
+{
+	if(!mUserAllocated)
+	{
+#ifdef GU_BV4_USE_SLABS
+		PX_DELETE_AND_RESET(mNodes);
+#else
+		DELETEARRAY(mNodes);
+#endif
+	}
+
+	mNodes = NULL;
+	mNbNodes = 0;
+}
+
+BV4Tree::~BV4Tree()
+{
+	release();
+}
+
+void BV4Tree::reset()
+{
+	mMeshInterface		= NULL;
+	mNbNodes			= 0;
+	mNodes				= NULL;
+	mInitData			= 0;
+#ifdef GU_BV4_QUANTIZED_TREE
+	mCenterOrMinCoeff	= PxVec3(0.0f);
+	mExtentsOrMaxCoeff	= PxVec3(0.0f);
+#endif
+	mUserAllocated		= false;
+}
+
+void BV4Tree::operator=(BV4Tree& v)
+{
+	mMeshInterface		= v.mMeshInterface;
+	mLocalBounds		= v.mLocalBounds;
+	mNbNodes			= v.mNbNodes;
+	mNodes				= v.mNodes;
+	mInitData			= v.mInitData;
+#ifdef GU_BV4_QUANTIZED_TREE
+	mCenterOrMinCoeff	= v.mCenterOrMinCoeff;
+	mExtentsOrMaxCoeff	= v.mExtentsOrMaxCoeff;
+#endif
+	mUserAllocated		= v.mUserAllocated;
+	v.reset();
+}
+
+bool BV4Tree::init(SourceMesh* meshInterface, const PxBounds3& localBounds)
+{
+	mMeshInterface	= meshInterface;
+	mLocalBounds.init(localBounds);
+	return true;
+}
+
+// PX_SERIALIZATION
+BV4Tree::BV4Tree(const PxEMPTY)
+{
+	mUserAllocated = true;
+}
+
+void BV4Tree::exportExtraData(PxSerializationContext& stream)
+{
+	stream.alignData(16);
+	stream.writeData(mNodes, mNbNodes*sizeof(BVDataPacked));
+}
+
+void BV4Tree::importExtraData(PxDeserializationContext& context)
+{
+	context.alignExtraData(16);
+	mNodes = context.readExtraData<BVDataPacked>(mNbNodes);
+}
+//~PX_SERIALIZATION
+
+bool BV4Tree::load(PxInputStream& stream, PxU32 meshVersion)
+{
+	PX_ASSERT(!mUserAllocated);
+	PX_UNUSED(meshVersion);
+
+	release();
+
+	PxI8 a, b, c, d;
+	readChunk(a, b, c, d, stream);
+	if(a!='B' || b!='V' || c!='4' || d!=' ')
+		return false;
+
+	const PxU32 version = 1;
+	const bool mismatch = (shdfnd::littleEndian() == 1);
+	if(readDword(mismatch, stream) != version)
+		return false;
+
+	mLocalBounds.mCenter.x = readFloat(mismatch, stream);
+	mLocalBounds.mCenter.y = readFloat(mismatch, stream);
+	mLocalBounds.mCenter.z = readFloat(mismatch, stream);
+	mLocalBounds.mExtentsMagnitude = readFloat(mismatch, stream);
+
+	mInitData = readDword(mismatch, stream);
+
+#ifdef GU_BV4_QUANTIZED_TREE
+	mCenterOrMinCoeff.x = readFloat(mismatch, stream);
+	mCenterOrMinCoeff.y = readFloat(mismatch, stream);
+	mCenterOrMinCoeff.z = readFloat(mismatch, stream);
+	mExtentsOrMaxCoeff.x = readFloat(mismatch, stream);
+	mExtentsOrMaxCoeff.y = readFloat(mismatch, stream);
+	mExtentsOrMaxCoeff.z = readFloat(mismatch, stream);
+#endif
+	const PxU32 nbNodes = readDword(mismatch, stream);
+	mNbNodes = nbNodes;
+
+	if(nbNodes)
+	{
+#ifdef GU_BV4_USE_SLABS
+		BVDataPacked* nodes = reinterpret_cast<BVDataPacked*>(PX_ALLOC(sizeof(BVDataPacked)*nbNodes, "BV4 nodes"));	// PT: PX_NEW breaks alignment here
+#else
+		BVDataPacked* nodes = PX_NEW(BVDataPacked)[nbNodes];
+#endif
+		mNodes = nodes;
+		Cm::markSerializedMem(nodes, sizeof(BVDataPacked)*nbNodes);
+
+		for(PxU32 i=0;i<nbNodes;i++)
+		{
+			BVDataPacked& node = nodes[i];
+#ifdef GU_BV4_QUANTIZED_TREE
+			readWordBuffer(&node.mAABB.mData[0].mExtents, 6, mismatch, stream);
+#else
+			readFloatBuffer(&node.mAABB.mCenter.x, 6, mismatch, stream);
+#endif
+			node.mData = readDword(mismatch, stream);
+		}
+	}
+	else mNodes = NULL;
+
+	return true;
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.h
new file mode 100644
index 00000000..8746ef08
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4.h
@@ -0,0 +1,254 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_H
+#define GU_BV4_H
+
+#include "foundation/PxBounds3.h"
+#include "PxSerialFramework.h"
+#include "PsUserAllocated.h"
+#include "GuBV4Settings.h"
+
+#define V4LoadU_Safe	V4LoadU
+#define V4LoadA_Safe	V4LoadA
+#define V4StoreA_Safe	V4StoreA
+#define V4StoreU_Safe	V4StoreU
+
+namespace physx
+{
+namespace Gu
+{
+
+	struct VertexPointers
+	{
+		const PxVec3*	Vertex[3];
+	};
+
+	class IndTri32 : public physx::shdfnd::UserAllocated
+	{
+		public:
+		PX_FORCE_INLINE				IndTri32()								{}
+		PX_FORCE_INLINE				IndTri32(PxU32 r0, PxU32 r1, PxU32 r2)	{ mRef[0]=r0; mRef[1]=r1; mRef[2]=r2; }
+		PX_FORCE_INLINE				IndTri32(const IndTri32& triangle)
+									{
+										mRef[0] = triangle.mRef[0];
+										mRef[1] = triangle.mRef[1];
+										mRef[2] = triangle.mRef[2];
+									}
+		PX_FORCE_INLINE				~IndTri32()								{}
+						PxU32		mRef[3];
+	};
+	PX_COMPILE_TIME_ASSERT(sizeof(IndTri32)==12);
+
+	class IndTri16 : public physx::shdfnd::UserAllocated
+	{
+		public:
+		PX_FORCE_INLINE				IndTri16()								{}
+		PX_FORCE_INLINE				IndTri16(PxU16 r0, PxU16 r1, PxU16 r2)	{ mRef[0]=r0; mRef[1]=r1; mRef[2]=r2; }
+		PX_FORCE_INLINE				IndTri16(const IndTri16& triangle)
+									{
+										mRef[0] = triangle.mRef[0];
+										mRef[1] = triangle.mRef[1];
+										mRef[2] = triangle.mRef[2];
+									}
+		PX_FORCE_INLINE				~IndTri16()								{}
+						PxU16		mRef[3];
+	};
+	PX_COMPILE_TIME_ASSERT(sizeof(IndTri16)==6);
+
+	PX_FORCE_INLINE void getVertexReferences(PxU32& vref0, PxU32& vref1, PxU32& vref2, PxU32 index, const IndTri32* T32, const IndTri16* T16)
+	{
+		if(T32)
+		{
+			const IndTri32* PX_RESTRICT tri = T32 + index;
+			vref0 = tri->mRef[0];
+			vref1 = tri->mRef[1];
+			vref2 = tri->mRef[2];
+		}
+		else
+		{
+			const IndTri16* PX_RESTRICT tri = T16 + index;
+			vref0 = tri->mRef[0];
+			vref1 = tri->mRef[1];
+			vref2 = tri->mRef[2];
+		}
+	}
+
+	class SourceMesh : public physx::shdfnd::UserAllocated
+	{
+		public:
+		PX_PHYSX_COMMON_API				SourceMesh();
+		PX_PHYSX_COMMON_API				~SourceMesh();
+		// PX_SERIALIZATION
+						SourceMesh(const PxEMPTY)	{}
+		static			void			getBinaryMetaData(PxOutputStream& stream);
+		//~PX_SERIALIZATION
+
+						void			reset();
+						void			operator = (SourceMesh& v);
+
+						PxU32			mNbVerts;
+						const PxVec3*	mVerts;
+						PxU32			mNbTris;
+						IndTri32*		mTriangles32;
+						IndTri16*		mTriangles16;
+
+		PX_FORCE_INLINE	PxU32			getNbTriangles()	const	{ return mNbTris;		}
+		PX_FORCE_INLINE	PxU32			getNbVertices()		const	{ return mNbVerts;		}
+		PX_FORCE_INLINE	const IndTri32*	getTris32()			const	{ return mTriangles32;	}
+		PX_FORCE_INLINE	const IndTri16*	getTris16()			const	{ return mTriangles16;	}
+		PX_FORCE_INLINE	const PxVec3*	getVerts()			const	{ return mVerts;		}
+
+		PX_FORCE_INLINE	void			setNbTriangles(PxU32 nb)	{ mNbTris = nb;			}
+		PX_FORCE_INLINE	void			setNbVertices(PxU32 nb)		{ mNbVerts = nb;		}
+
+		PX_FORCE_INLINE	void			setPointers(IndTri32* tris32, IndTri16* tris16, const PxVec3* verts)
+										{
+											mTriangles32	= tris32;
+											mTriangles16	= tris16;
+											mVerts			= verts;
+										}
+
+		PX_FORCE_INLINE	void			initRemap()			{ mRemap = NULL;				}
+		PX_FORCE_INLINE	const PxU32*	getRemap()	const	{ return mRemap;				}
+		PX_FORCE_INLINE	void			releaseRemap()		{ PX_FREE_AND_RESET(mRemap);	}
+						void			remapTopology(const PxU32* order);
+
+						bool			isValid()		const;
+
+		PX_FORCE_INLINE	void			getTriangle(VertexPointers& vp, PxU32 index)	const
+										{
+											PxU32 VRef0, VRef1, VRef2;
+											getVertexReferences(VRef0, VRef1, VRef2, index, mTriangles32, mTriangles16);
+											vp.Vertex[0] = mVerts + VRef0;
+											vp.Vertex[1] = mVerts + VRef1;
+											vp.Vertex[2] = mVerts + VRef2;
+										}
+		private:
+						PxU32*			mRemap;
+	};
+
+	struct LocalBounds
+	{
+										LocalBounds() : mCenter(PxVec3(0.0f)), mExtentsMagnitude(0.0f)	{}
+
+						PxVec3			mCenter;
+						float			mExtentsMagnitude;
+
+		PX_FORCE_INLINE	void			init(const PxBounds3& bounds)
+										{
+											mCenter = bounds.getCenter();
+											// PT: TODO: compute mag first, then multiplies by 0.5f (TA34704)
+											mExtentsMagnitude = bounds.getExtents().magnitude();
+										}
+	};
+
+#ifdef GU_BV4_QUANTIZED_TREE
+	class QuantizedAABB
+	{
+		public:
+
+		struct Data
+		{
+				PxU16		mExtents;	//!< Quantized extents
+				PxI16		mCenter;	//!< Quantized center
+		};
+				Data		mData[3];
+	};
+	PX_COMPILE_TIME_ASSERT(sizeof(QuantizedAABB)==12);
+#endif
+
+	/////
+
+	#define GU_BV4_CHILD_OFFSET_SHIFT_COUNT	11
+
+	struct BVDataPacked : public physx::shdfnd::UserAllocated
+	{
+#ifdef GU_BV4_QUANTIZED_TREE
+						QuantizedAABB	mAABB;
+#else
+						CenterExtents	mAABB;
+#endif
+						PxU32			mData;
+
+		PX_FORCE_INLINE	PxU32			isLeaf()			const	{ return mData&1;								}
+		PX_FORCE_INLINE	PxU32			getPrimitive()		const	{ return mData>>1;								}
+		PX_FORCE_INLINE	PxU32			getChildOffset()	const	{ return mData>>GU_BV4_CHILD_OFFSET_SHIFT_COUNT;}
+		PX_FORCE_INLINE	PxU32			getChildType()		const	{ return (mData>>1)&3;							}
+		PX_FORCE_INLINE	PxU32			getChildData()		const	{ return mData;									}
+
+		PX_FORCE_INLINE	void			encodePNS(PxU32 code)
+										{
+											PX_ASSERT(code<256);
+											mData |= code<<3;
+										}
+		PX_FORCE_INLINE	PxU32			decodePNSNoShift()	const	{ return mData;			}
+	};
+
+	// PT: TODO: align class to 16? (TA34704)
+	class BV4Tree : public physx::shdfnd::UserAllocated
+	{
+		public:
+		// PX_SERIALIZATION
+										BV4Tree(const PxEMPTY);
+						void			exportExtraData(PxSerializationContext&);
+						void			importExtraData(PxDeserializationContext& context);
+		static			void			getBinaryMetaData(PxOutputStream& stream);
+		//~PX_SERIALIZATION
+
+		PX_PHYSX_COMMON_API				BV4Tree();
+		PX_PHYSX_COMMON_API				BV4Tree(SourceMesh* meshInterface, const PxBounds3& localBounds);
+		PX_PHYSX_COMMON_API				~BV4Tree();
+
+						bool			load(PxInputStream& stream, PxU32 meshVersion); // converts to proper endian at load time
+
+						void			reset();
+						void			operator = (BV4Tree& v);
+
+						bool			init(SourceMesh* meshInterface, const PxBounds3& localBounds);
+						void			release();
+
+						SourceMesh*		mMeshInterface;
+						LocalBounds		mLocalBounds;
+
+						PxU32			mNbNodes;
+						BVDataPacked*	mNodes;
+						PxU32			mInitData;
+#ifdef GU_BV4_QUANTIZED_TREE
+						PxVec3			mCenterOrMinCoeff;	// PT: dequantization coeff, either for Center or Min (depending on AABB format)
+						PxVec3			mExtentsOrMaxCoeff;	// PT: dequantization coeff, either for Extents or Max (depending on AABB format)
+#endif
+						bool			mUserAllocated;	// PT: please keep these 4 bytes right after mCenterOrMinCoeff/mExtentsOrMaxCoeff for safe V4 loading
+						bool			mPadding[3];
+	};
+
+} // namespace Gu
+}
+
+#endif // GU_BV4_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp
new file mode 100644
index 00000000..fbe97042
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.cpp
@@ -0,0 +1,1294 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxVec4.h"
+#include "GuBV4Build.h"
+#include "GuBV4.h"
+#include "PxTriangle.h"
+#include "CmPhysXCommon.h"
+#include "PsBasicTemplates.h"
+#include "GuCenterExtents.h"
+
+using namespace physx;
+using namespace Gu;
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#define GU_BV4_USE_NODE_POOLS
+
+#define DELETESINGLE(x)	if (x) { delete x;		x = NULL; }
+#define DELETEARRAY(x)	if (x) { delete []x;	x = NULL; }
+
+static PX_FORCE_INLINE PxU32 largestAxis(const PxVec4& v)
+{
+	const float* Vals = &v.x;
+	PxU32 m = 0;
+	if(Vals[1] > Vals[m]) m = 1;
+	if(Vals[2] > Vals[m]) m = 2;
+	return m;
+}
+
+AABBTree::AABBTree() : mIndices(NULL), mPool(NULL), mTotalNbNodes(0)
+{
+}
+
+AABBTree::~AABBTree()
+{
+	release();
+}
+
+void AABBTree::release()
+{
+	DELETEARRAY(mPool);
+	PX_FREE_AND_RESET(mIndices);
+}
+
+static PxU32 local_Split(const AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT /*Boxes*/, const PxVec3* PX_RESTRICT centers, PxU32 axis)
+{
+	const PxU32 nb = node->mNbPrimitives;
+	PxU32* PX_RESTRICT prims = node->mNodePrimitives;
+
+	// Get node split value
+	const float splitValue = node->mBV.getCenter(axis);
+
+	PxU32 nbPos = 0;
+	// Loop through all node-related primitives. Their indices range from mNodePrimitives[0] to mNodePrimitives[mNbPrimitives-1].
+	// Those indices map the global list in the tree builder.
+	const size_t ptrValue = size_t(centers) + axis*sizeof(float);
+	const PxVec3* PX_RESTRICT centersX = reinterpret_cast<const PxVec3*>(ptrValue);
+
+	for(PxU32 i=0;i<nb;i++)
+	{
+		// Get index in global list
+		const PxU32 index = prims[i];
+
+		// Test against the splitting value. The primitive value is tested against the enclosing-box center.
+		// [We only need an approximate partition of the enclosing box here.]
+		const float primitiveValue = centersX[index].x;
+
+		// Reorganize the list of indices in this order: positive - negative.
+		if(primitiveValue > splitValue)
+		{
+			// Swap entries
+			prims[i] = prims[nbPos];
+			prims[nbPos] = index;
+			// Count primitives assigned to positive space
+			nbPos++;
+		}
+	}
+	return nbPos;
+}
+
+static bool local_Subdivide(AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT boxes, const PxVec3* PX_RESTRICT centers, BuildStats& stats, const AABBTreeNode* const PX_RESTRICT node_base, PxU32 limit)
+{
+	const PxU32* PX_RESTRICT prims = node->mNodePrimitives;
+	const PxU32 nb = node->mNbPrimitives;
+
+	// Compute bv & means at the same time
+	Vec4V meansV;
+	{
+		Vec4V minV = V4LoadU(&boxes[prims[0]].minimum.x);
+		Vec4V maxV = V4LoadU(&boxes[prims[0]].maximum.x);
+		meansV = V4LoadU(&centers[prims[0]].x);
+
+		for(PxU32 i=1;i<nb;i++)
+		{
+			const PxU32 index = prims[i];
+			minV = V4Min(minV, V4LoadU(&boxes[index].minimum.x));
+			maxV = V4Max(maxV, V4LoadU(&boxes[index].maximum.x));
+			meansV = V4Add(meansV, V4LoadU(&centers[index].x));
+		}
+		const float coeffNb = 1.0f/float(nb);
+		meansV = V4Scale(meansV, FLoad(coeffNb));
+
+//		BV4_ALIGN16(PxVec4 mergedMin);
+//		BV4_ALIGN16(PxVec4 mergedMax);
+		PX_ALIGN_PREFIX(16) PxVec4 mergedMin PX_ALIGN_SUFFIX(16);
+		PX_ALIGN_PREFIX(16) PxVec4 mergedMax PX_ALIGN_SUFFIX(16);
+
+		V4StoreA_Safe(minV, &mergedMin.x);
+		V4StoreA_Safe(maxV, &mergedMax.x);
+		node->mBV.minimum = PxVec3(mergedMin.x, mergedMin.y, mergedMin.z);
+		node->mBV.maximum = PxVec3(mergedMax.x, mergedMax.y, mergedMax.z);
+	}
+
+//	// Stop subdividing if we reach a leaf node. This is always performed here,
+//	// else we could end in trouble if user overrides this.
+//	if(nb==1)
+//		return false;
+	if(nb<=limit)
+		return false;
+
+	bool validSplit = true;
+	PxU32 nbPos;
+	{
+		// Compute variances
+		Vec4V varsV = V4Zero();
+		for(PxU32 i=0;i<nb;i++)
+		{
+			const PxU32 index = prims[i];
+			Vec4V centerV = V4LoadU(&centers[index].x);
+			centerV = V4Sub(centerV, meansV);
+			centerV = V4Mul(centerV, centerV);
+			varsV = V4Add(varsV, centerV);
+		}
+		const float coeffNb1 = 1.0f/float(nb-1);
+		varsV = V4Scale(varsV, FLoad(coeffNb1));
+
+//		BV4_ALIGN16(PxVec4 vars);
+		PX_ALIGN_PREFIX(16) PxVec4 vars PX_ALIGN_SUFFIX(16);
+		V4StoreA_Safe(varsV, &vars.x);
+
+		// Choose axis with greatest variance
+		const PxU32 axis = largestAxis(vars);
+
+		// Split along the axis
+		nbPos = local_Split(node, boxes, centers, axis);
+
+		// Check split validity
+		if(!nbPos || nbPos==nb)
+			validSplit = false;
+	}
+
+	// Check the subdivision has been successful
+	if(!validSplit)
+	{
+		// Here, all boxes lie in the same sub-space. Two strategies:
+		// - if the tree *must* be complete, make an arbitrary 50-50 split
+		// - else stop subdividing
+//		if(nb>limit)
+		{
+			nbPos = node->mNbPrimitives>>1;
+
+			if(1)
+			{
+				// Test 3 axis, take the best
+				float results[3];
+				nbPos = local_Split(node, boxes, centers, 0);	results[0] = float(nbPos)/float(node->mNbPrimitives);
+				nbPos = local_Split(node, boxes, centers, 1);	results[1] = float(nbPos)/float(node->mNbPrimitives);
+				nbPos = local_Split(node, boxes, centers, 2);	results[2] = float(nbPos)/float(node->mNbPrimitives);
+				results[0]-=0.5f;	results[0]*=results[0];
+				results[1]-=0.5f;	results[1]*=results[1];
+				results[2]-=0.5f;	results[2]*=results[2];
+				PxU32 Min=0;
+				if(results[1]<results[Min])	Min = 1;
+				if(results[2]<results[Min])	Min = 2;
+
+				// Split along the axis
+				nbPos = local_Split(node, boxes, centers, Min);
+
+				// Check split validity
+				if(!nbPos || nbPos==node->mNbPrimitives)
+					nbPos = node->mNbPrimitives>>1;
+			}
+		}
+		//else return
+	}
+
+	// Now create children and assign their pointers.
+	// We use a pre-allocated linear pool for complete trees [Opcode 1.3]
+	const PxU32 count = stats.getCount();
+	node->mPos = size_t(node_base + count);
+
+	// Update stats
+	stats.increaseCount(2);
+
+	// Assign children
+	AABBTreeNode* pos = const_cast<AABBTreeNode*>(node->getPos());
+	AABBTreeNode* neg = const_cast<AABBTreeNode*>(node->getNeg());
+	pos->mNodePrimitives	= node->mNodePrimitives;
+	pos->mNbPrimitives		= nbPos;
+	neg->mNodePrimitives	= node->mNodePrimitives + nbPos;
+	neg->mNbPrimitives		= node->mNbPrimitives - nbPos;
+	return true;
+}
+
+static void local_BuildHierarchy(AABBTreeNode* PX_RESTRICT node, const PxBounds3* PX_RESTRICT Boxes, const PxVec3* PX_RESTRICT centers, BuildStats& stats, const AABBTreeNode* const PX_RESTRICT node_base, PxU32 limit)
+{
+	if(local_Subdivide(node, Boxes, centers, stats, node_base, limit))
+	{
+		AABBTreeNode* pos = const_cast<AABBTreeNode*>(node->getPos());
+		AABBTreeNode* neg = const_cast<AABBTreeNode*>(node->getNeg());
+		local_BuildHierarchy(pos, Boxes, centers, stats, node_base, limit);
+		local_BuildHierarchy(neg, Boxes, centers, stats, node_base, limit);
+	}
+}
+
+bool AABBTree::buildFromMesh(SourceMesh& mesh, PxU32 limit)
+{
+	const PxU32 nbBoxes = mesh.getNbTriangles();
+	if(!nbBoxes)
+		return false;
+	PxBounds3* boxes = reinterpret_cast<PxBounds3*>(PX_ALLOC(sizeof(PxBounds3)*(nbBoxes+1), "BV4"));	// PT: +1 to safely V4Load/V4Store the last element
+	PxVec3* centers = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*(nbBoxes+1), "BV4"));			// PT: +1 to safely V4Load/V4Store the last element
+	const FloatV halfV = FLoad(0.5f);
+	for(PxU32 i=0;i<nbBoxes;i++)
+	{
+		VertexPointers VP;
+		mesh.getTriangle(VP, i);
+
+		const Vec4V v0V = V4LoadU(&VP.Vertex[0]->x);	
+		const Vec4V v1V = V4LoadU(&VP.Vertex[1]->x);
+		const Vec4V v2V = V4LoadU(&VP.Vertex[2]->x);
+		Vec4V minV = V4Min(v0V, v1V);
+		minV = V4Min(minV, v2V);
+		Vec4V maxV = V4Max(v0V, v1V);
+		maxV = V4Max(maxV, v2V);
+		V4StoreU_Safe(minV, &boxes[i].minimum.x);	// PT: safe because 'maximum' follows 'minimum'
+		V4StoreU_Safe(maxV, &boxes[i].maximum.x);	// PT: safe because we allocated one more box
+
+		const Vec4V centerV = V4Scale(V4Add(maxV, minV), halfV);
+		V4StoreU_Safe(centerV, &centers[i].x);	// PT: safe because we allocated one more PxVec3
+	}
+
+	{
+		// Release previous tree
+		release();
+
+		// Init stats
+		BuildStats Stats;
+		Stats.setCount(1);
+
+		// Initialize indices. This list will be modified during build.
+		mIndices = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbBoxes, "BV4 indices"));
+		// Identity permutation
+		for(PxU32 i=0;i<nbBoxes;i++)
+			mIndices[i] = i;
+
+		// Use a linear array for complete trees (since we can predict the final number of nodes) [Opcode 1.3]
+		// Allocate a pool of nodes
+		// PT: TODO: optimize memory here (TA34704)
+		mPool = PX_NEW(AABBTreeNode)[nbBoxes*2 - 1];
+
+		// Setup initial node. Here we have a complete permutation of the app's primitives.
+		mPool->mNodePrimitives	= mIndices;
+		mPool->mNbPrimitives	= nbBoxes;
+
+		// Build the hierarchy
+		local_BuildHierarchy(mPool, boxes, centers, Stats, mPool, limit);
+
+		// Get back total number of nodes
+		mTotalNbNodes = Stats.getCount();
+	}
+
+	PX_FREE(centers);
+	PX_FREE(boxes);
+	return true;
+}
+
+PxU32 AABBTree::walk(WalkingCallback cb, void* userData) const
+{
+	// Call it without callback to compute max depth
+	PxU32 maxDepth = 0;
+	PxU32 currentDepth = 0;
+
+	struct Local
+	{
+		static void _Walk(const AABBTreeNode* current_node, PxU32& max_depth, PxU32& current_depth, WalkingCallback callback, void* userData_)
+		{
+			// Checkings
+			if(!current_node)
+				return;
+			// Entering a new node => increase depth
+			current_depth++;
+			// Keep track of max depth
+			if(current_depth>max_depth)
+				max_depth = current_depth;
+
+			// Callback
+			if(callback && !(callback)(current_node, current_depth, userData_))
+				return;
+
+			// Recurse
+			if(current_node->getPos())	{ _Walk(current_node->getPos(), max_depth, current_depth, callback, userData_);	current_depth--;	}
+			if(current_node->getNeg())	{ _Walk(current_node->getNeg(), max_depth, current_depth, callback, userData_);	current_depth--;	}
+		}
+	};
+	Local::_Walk(mPool, maxDepth, currentDepth, cb, userData);
+	return maxDepth;
+}
+
+
+
+#include "GuBV4_Internal.h"
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+// PT: see http://www.codercorner.com/blog/?p=734
+static PxU32 precomputeNodeSorting(const PxBounds3& box0, const PxBounds3& box1)
+{
+	const PxVec3 C0 = box0.getCenter();
+	const PxVec3 C1 = box1.getCenter();
+
+	PxVec3 dirPPP(1.0f, 1.0f, 1.0f);	dirPPP.normalize();
+	PxVec3 dirPPN(1.0f, 1.0f, -1.0f);	dirPPN.normalize();
+	PxVec3 dirPNP(1.0f, -1.0f, 1.0f);	dirPNP.normalize();
+	PxVec3 dirPNN(1.0f, -1.0f, -1.0f);	dirPNN.normalize();
+	PxVec3 dirNPP(-1.0f, 1.0f, 1.0f);	dirNPP.normalize();
+	PxVec3 dirNPN(-1.0f, 1.0f, -1.0f);	dirNPN.normalize();
+	PxVec3 dirNNP(-1.0f, -1.0f, 1.0f);	dirNNP.normalize();
+	PxVec3 dirNNN(-1.0f, -1.0f, -1.0f);	dirNNN.normalize();
+
+	const PxVec3 deltaC = C0 - C1;
+	const bool bPPP = deltaC.dot(dirPPP)<0.0f;
+	const bool bPPN = deltaC.dot(dirPPN)<0.0f;
+	const bool bPNP = deltaC.dot(dirPNP)<0.0f;
+	const bool bPNN = deltaC.dot(dirPNN)<0.0f;
+	const bool bNPP = deltaC.dot(dirNPP)<0.0f;
+	const bool bNPN = deltaC.dot(dirNPN)<0.0f;
+	const bool bNNP = deltaC.dot(dirNNP)<0.0f;
+	const bool bNNN = deltaC.dot(dirNNN)<0.0f;
+
+	PxU32 code = 0;
+	if(!bPPP)
+		code |= (1<<7);	// Bit 0: PPP
+	if(!bPPN)
+		code |= (1<<6);	// Bit 1: PPN
+	if(!bPNP)
+		code |= (1<<5);	// Bit 2: PNP	
+	if(!bPNN)
+		code |= (1<<4);	// Bit 3: PNN
+	if(!bNPP)
+		code |= (1<<3);	// Bit 4: NPP
+	if(!bNPN)
+		code |= (1<<2);	// Bit 5: NPN
+	if(!bNNP)
+		code |= (1<<1);	// Bit 6: NNP
+	if(!bNNN)
+		code |= (1<<0);	// Bit 7: NNN
+	return code;
+}
+#endif
+
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Common.h"
+#endif
+
+static void setEmpty(CenterExtents& box)
+{
+	box.mCenter = PxVec3(0.0f, 0.0f, 0.0f);
+	box.mExtents = PxVec3(-1.0f, -1.0f, -1.0f);
+}
+
+// Data:
+// 1 bit for leaf/no leaf
+// 2 bits for child-node type
+// 8 bits for PNS
+// => 32 - 1 - 2 - 8 = 21 bits left for encoding triangle index or node *offset*
+// => limited to 2.097.152 triangles
+// => and 2Mb-large trees (this one may not work out well in practice)
+// ==> lines marked with //* have been changed to address this. Now we don't store offsets in bytes directly
+// but in BVData indices. There's more work at runtime calculating addresses, but now the format can support
+// 2 million single nodes.
+//
+// That being said we only need 3*8 = 24 bits in total, so that could be only 6 bits in each BVData.
+// For type0: we have 2 nodes, we need 8 bits        => 6 bits/node = 12 bits available, ok
+// For type1: we have 3 nodes, we need 8*2 = 16 bits => 6 bits/node = 18 bits available, ok
+// For type2: we have 4 nodes, we need 8*3 = 24 bits => 6 bits/node = 24 bits available, ok
+//#pragma pack(1)
+struct BVData : public physx::shdfnd::UserAllocated
+{
+	BVData();
+	CenterExtents	mAABB;
+	size_t			mData;
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+	PxU32			mTempPNS;
+#endif
+};
+//#pragma pack()
+
+BVData::BVData() : mData(PX_INVALID_U32)
+{
+	setEmpty(mAABB);
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+	mTempPNS = 0;
+#endif
+}
+
+struct BV4Node : public physx::shdfnd::UserAllocated
+{
+	PX_FORCE_INLINE	BV4Node()	{}
+	PX_FORCE_INLINE	~BV4Node()	{}
+
+	BVData	mBVData[4];
+
+	PX_FORCE_INLINE	size_t			isLeaf(PxU32 i)			const	{ return mBVData[i].mData&1;							}
+	PX_FORCE_INLINE	PxU32			getPrimitive(PxU32 i)	const	{ return PxU32(mBVData[i].mData>>1);					}
+	PX_FORCE_INLINE	const BV4Node*	getChild(PxU32 i)		const	{ return reinterpret_cast<BV4Node*>(mBVData[i].mData);	}
+
+	PxU32	getType()	const
+	{
+		PxU32 Nb=0;
+		for(PxU32 i=0;i<4;i++)
+		{
+			if(mBVData[i].mData!=PX_INVALID_U32)
+				Nb++;
+		}
+		return Nb;
+	}
+
+	PxU32	getSize()	const
+	{
+		const PxU32 type = getType();
+		return sizeof(BVData)*type;
+	}
+};
+
+#define NB_NODES_PER_SLAB	256
+struct BV4BuildParams
+{
+	PX_FORCE_INLINE	BV4BuildParams(float epsilon) : mEpsilon(epsilon)
+#ifdef GU_BV4_USE_NODE_POOLS
+		,mTop(NULL)
+#endif
+	{}
+					~BV4BuildParams();
+
+	// Stats
+	PxU32			mNbNodes;
+	PxU32			mStats[4];
+
+	//
+	float			mEpsilon;
+
+#ifdef GU_BV4_USE_NODE_POOLS
+	//
+	struct Slab : public physx::shdfnd::UserAllocated
+	{
+		BV4Node	mNodes[NB_NODES_PER_SLAB];
+		PxU32	mNbUsedNodes;
+		Slab*	mNext;
+	};
+	Slab*			mTop;
+
+	BV4Node*		allocateNode();
+	void			releaseNodes();
+#endif
+};
+
+BV4BuildParams::~BV4BuildParams()
+{
+#ifdef GU_BV4_USE_NODE_POOLS
+	releaseNodes();
+#endif
+}
+
+#ifdef GU_BV4_USE_NODE_POOLS
+BV4Node* BV4BuildParams::allocateNode()
+{
+	if(!mTop || mTop->mNbUsedNodes==NB_NODES_PER_SLAB)
+	{
+		Slab* newSlab = PX_NEW(Slab);
+		newSlab->mNbUsedNodes = 0;
+		newSlab->mNext = mTop;
+		mTop = newSlab;
+	}
+	return &mTop->mNodes[mTop->mNbUsedNodes++];
+}
+
+void BV4BuildParams::releaseNodes()
+{
+	Slab* current = mTop;
+	while(current)
+	{
+		Slab* next = current->mNext;
+		PX_DELETE(current);
+		current = next;
+	}
+	mTop = NULL;
+}
+#endif
+
+static void setPrimitive(const AABBTree& source, BV4Node* node4, PxU32 i, const AABBTreeNode* node, float epsilon)
+{
+	const PxU32 nbPrims = node->getNbPrimitives();
+	PX_ASSERT(nbPrims<16);
+	const PxU32* indexBase = source.getIndices();
+	const PxU32* prims = node->getPrimitives();
+	const PxU32 offset = PxU32(prims - indexBase);
+	for(PxU32 j=0;j<nbPrims;j++)
+	{
+		PX_ASSERT(prims[j] == offset+j);
+	}
+	const PxU32 primitiveIndex = (offset<<4)|(nbPrims&15);
+
+	node4->mBVData[i].mAABB = node->getAABB();
+	if(epsilon!=0.0f)
+		node4->mBVData[i].mAABB.mExtents += PxVec3(epsilon, epsilon, epsilon);
+	node4->mBVData[i].mData = (primitiveIndex<<1)|1;
+}
+
+static BV4Node* setNode(const AABBTree& source, BV4Node* node4, PxU32 i, const AABBTreeNode* node, BV4BuildParams& params)
+{
+	BV4Node* child = NULL;
+	if(node->isLeaf())
+	{
+		setPrimitive(source, node4, i, node, params.mEpsilon);
+	}
+	else
+	{
+		node4->mBVData[i].mAABB = node->getAABB();
+		if(params.mEpsilon!=0.0f)
+			node4->mBVData[i].mAABB.mExtents += PxVec3(params.mEpsilon);
+
+		params.mNbNodes++;
+#ifdef GU_BV4_USE_NODE_POOLS
+		child = params.allocateNode();
+#else
+		child = PX_NEW(BV4Node);
+#endif
+		node4->mBVData[i].mData = size_t(child);
+	}
+	return child;
+}
+
+static void _BuildBV4(const AABBTree& source, BV4Node* tmp, const AABBTreeNode* current_node, BV4BuildParams& params)
+{
+	PX_ASSERT(!current_node->isLeaf());
+
+	// In the regular tree we have current node A, and:
+	//   ____A____
+	//   P       N
+	// __|__   __|__
+	// PP PN   NP NN
+	//
+	// For PNS we have:
+	// bit0 to sort P|N
+	// bit1 to sort PP|PN
+	// bit2 to sort NP|NN
+	//
+	// As much as possible we need to preserve the original order in BV4, if we want to reuse the same PNS bits.
+	//
+	// bit0|bit1|bit2   Order			8bits code
+	// 0    0    0      PP PN NP NN		0 1 2 3
+	// 0    0    1      PP PN NN NP		0 1 3 2
+	// 0    1    0      PN PP NP NN		1 0 2 3
+	// 0    1    1      PN PP NN NP		1 0 3 2
+	// 1    0    0      NP NN PP PN		2 3 0 1
+	// 1    0    1      NN NP PP PN		3 2	0 1
+	// 1    1    0      NP NN PN PP		2 3	1 0
+	// 1    1    1      NN NP PN PP		3 2	1 0
+	//
+	// So we can fetch/compute the sequence from the bits, combine it with limitations from the node type, and process the nodes in order. In theory.
+	// 8*8bits => the whole thing fits in a single 64bit register, so we could potentially use a "register LUT" here.
+
+	const AABBTreeNode* P = current_node->getPos();
+	const AABBTreeNode* N = current_node->getNeg();
+
+	const bool PLeaf = P->isLeaf();
+	const bool NLeaf = N->isLeaf();
+
+	if(PLeaf)
+	{
+		if(NLeaf)
+		{
+			// Case 1: P and N are both leaves:
+			//   ____A____
+			//   P       N
+			// => store as (P,N) and keep bit0
+			params.mStats[0]++;
+			// PN leaves => store 2 triangle pointers, lose 50% of node space
+			setPrimitive(source, tmp, 0, P, params.mEpsilon);
+			setPrimitive(source, tmp, 1, N, params.mEpsilon);
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+			tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV);
+#endif
+		}
+		else
+		{
+			// Case 2: P leaf, N no leaf
+			//   ____A____
+			//   P       N
+			//         __|__
+			//         NP NN
+			// => store as (P,NP,NN), keep bit0 and bit2
+			params.mStats[1]++;
+			// P leaf => store 1 triangle pointers and 2 node pointers
+			// => 3 slots used, 25% wasted
+			setPrimitive(source, tmp, 0, P, params.mEpsilon);
+
+			//
+
+			const AABBTreeNode* NP = N->getPos();
+			const AABBTreeNode* NN = N->getNeg();
+
+//#define NODE_FUSION
+#ifdef NODE_FUSION
+			PxU32 c=0;
+			BV4Node* ChildNP;
+			if(!NP->isLeaf() && NP->getPos()->isLeaf() && NP->getNeg()->isLeaf())
+			{
+				// Drag the terminal leaves directly into this BV4 node, drop internal node NP
+				setPrimitive(source, tmp, 1, NP->getPos(), params.mEpsilon);
+				setPrimitive(source, tmp, 2, NP->getNeg(), params.mEpsilon);
+				ChildNP = NULL;
+				params.mStats[1]--;
+				params.mStats[3]++;
+				c=1;
+			}
+			else
+			{
+				ChildNP = setNode(source, tmp, 1, NP, params);
+			}
+
+			BV4Node* ChildNN;
+			if(c==0 && !NN->isLeaf() && NN->getPos()->isLeaf() && NN->getNeg()->isLeaf())
+			{
+				// Drag the terminal leaves directly into this BV4 node, drop internal node NN
+				setPrimitive(source, tmp, 2, NN->getPos(), params.mEpsilon);
+				setPrimitive(source, tmp, 3, NN->getNeg(), params.mEpsilon);
+				ChildNN = NULL;
+				params.mStats[1]--;
+				params.mStats[3]++;
+			}
+			else
+			{
+				ChildNN = setNode(source, tmp, 2+c, NN, params);
+			}
+
+			//BV4Node* ChildNN = setNode(tmp, 2+c, NN, epsilon, params);
+#else
+			BV4Node* ChildNP = setNode(source, tmp, 1, NP, params);
+			BV4Node* ChildNN = setNode(source, tmp, 2, NN, params);
+#endif
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+			tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV);
+			tmp->mBVData[2].mTempPNS = precomputeNodeSorting(NP->mBV, NN->mBV);
+#endif
+			if(ChildNP)
+				_BuildBV4(source, ChildNP, NP, params);
+			if(ChildNN)
+				_BuildBV4(source, ChildNN, NN, params);
+		}
+	}
+	else
+	{
+		if(NLeaf)
+		{
+			// Case 3: P no leaf, N leaf
+			//   ____A____
+			//   P       N
+			// __|__
+			// PP PN
+			// => store as (PP,PN,N), keep bit0 and bit1
+			params.mStats[2]++;
+
+			// N leaf => store 1 triangle pointers and 2 node pointers
+			// => 3 slots used, 25% wasted
+			setPrimitive(source, tmp, 2, N, params.mEpsilon);
+
+			//
+
+			const AABBTreeNode* PP = P->getPos();
+			const AABBTreeNode* PN = P->getNeg();
+
+			BV4Node* ChildPP = setNode(source, tmp, 0, PP, params);
+			BV4Node* ChildPN = setNode(source, tmp, 1, PN, params);
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+			tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV);
+			tmp->mBVData[1].mTempPNS = precomputeNodeSorting(PP->mBV, PN->mBV);
+#endif
+			if(ChildPP)
+				_BuildBV4(source, ChildPP, PP, params);
+			if(ChildPN)
+				_BuildBV4(source, ChildPN, PN, params);
+		}
+		else
+		{
+			// Case 4: P and N are no leaves:
+			// => store as (PP,PN,NP,NN), keep bit0/bit1/bit2
+			params.mStats[3]++;
+
+			// No leaves => store 4 node pointers
+			const AABBTreeNode* PP = P->getPos();
+			const AABBTreeNode* PN = P->getNeg();
+			const AABBTreeNode* NP = N->getPos();
+			const AABBTreeNode* NN = N->getNeg();
+
+			BV4Node* ChildPP = setNode(source, tmp, 0, PP, params);
+			BV4Node* ChildPN = setNode(source, tmp, 1, PN, params);
+			BV4Node* ChildNP = setNode(source, tmp, 2, NP, params);
+			BV4Node* ChildNN = setNode(source, tmp, 3, NN, params);
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+			tmp->mBVData[0].mTempPNS = precomputeNodeSorting(P->mBV, N->mBV);
+			tmp->mBVData[1].mTempPNS = precomputeNodeSorting(PP->mBV, PN->mBV);
+			tmp->mBVData[2].mTempPNS = precomputeNodeSorting(NP->mBV, NN->mBV);
+#endif
+			if(ChildPP)
+				_BuildBV4(source, ChildPP, PP, params);
+			if(ChildPN)
+				_BuildBV4(source, ChildPN, PN, params);
+			if(ChildNP)
+				_BuildBV4(source, ChildNP, NP, params);
+			if(ChildNN)
+				_BuildBV4(source, ChildNN, NN, params);
+		}
+	}
+}
+
+static bool BuildBV4Internal(BV4Tree& tree, const AABBTree& Source, SourceMesh* mesh, float epsilon)
+{
+	if(mesh->getNbTriangles()<=4)
+		return tree.init(mesh, Source.getBV());
+
+	{
+		struct Local
+		{
+			static void _CheckMD(const AABBTreeNode* current_node, PxU32& md, PxU32& cd)
+			{
+				cd++;
+				md = PxMax(md, cd);
+
+				if(current_node->getPos())	{ _CheckMD(current_node->getPos(), md, cd);	cd--;	}
+				if(current_node->getNeg())	{ _CheckMD(current_node->getNeg(), md, cd);	cd--;	}
+			}
+
+			static void _Check(AABBTreeNode* current_node)
+			{
+				if(current_node->isLeaf())
+					return;
+
+				AABBTreeNode* P = const_cast<AABBTreeNode*>(current_node->getPos());
+				AABBTreeNode* N = const_cast<AABBTreeNode*>(current_node->getNeg());
+				{
+					PxU32 MDP = 0;	PxU32 CDP = 0;	_CheckMD(P, MDP, CDP);
+					PxU32 MDN = 0;	PxU32 CDN = 0;	_CheckMD(N, MDN, CDN);
+
+					if(MDP>MDN)
+//					if(MDP<MDN)
+					{
+						Ps::swap(*P, *N);
+						Ps::swap(P, N);
+					}
+				}
+				_Check(P);
+				_Check(N);
+			}
+		};
+		Local::_Check(const_cast<AABBTreeNode*>(Source.getNodes()));
+	}
+
+	BV4BuildParams Params(epsilon);
+	Params.mNbNodes=1;	// Root node
+	Params.mStats[0]=0;
+	Params.mStats[1]=0;
+	Params.mStats[2]=0;
+	Params.mStats[3]=0;
+
+#ifdef GU_BV4_USE_NODE_POOLS
+	BV4Node* Root = Params.allocateNode();
+#else
+	BV4Node* Root = PX_NEW(BV4Node);
+#endif
+	_BuildBV4(Source, Root, Source.getNodes(), Params);
+
+	if(!tree.init(mesh, Source.getBV()))
+		return false;
+	BV4Tree* T = &tree;
+
+	// Version with variable-sized nodes in single stream
+	{
+		struct Local
+		{
+#ifdef GU_BV4_QUANTIZED_TREE
+	#ifdef GU_BV4_USE_SLABS
+			static void _ComputeMaxValues(const BV4Node* current, PxVec3& MinMax, PxVec3& MaxMax)
+			{
+				for(PxU32 i=0;i<4;i++)
+				{
+					if(current->mBVData[i].mData!=PX_INVALID_U32)
+					{
+						const CenterExtents& Box = current->mBVData[i].mAABB;
+						const PxVec3 Min = Box.mCenter - Box.mExtents;
+						const PxVec3 Max = Box.mCenter + Box.mExtents;
+						if(fabsf(Min.x)>MinMax.x)	MinMax.x = fabsf(Min.x);
+						if(fabsf(Min.y)>MinMax.y)	MinMax.y = fabsf(Min.y);
+						if(fabsf(Min.z)>MinMax.z)	MinMax.z = fabsf(Min.z);
+						if(fabsf(Max.x)>MaxMax.x)	MaxMax.x = fabsf(Max.x);
+						if(fabsf(Max.y)>MaxMax.y)	MaxMax.y = fabsf(Max.y);
+						if(fabsf(Max.z)>MaxMax.z)	MaxMax.z = fabsf(Max.z);
+						if(!current->isLeaf(i))
+						{
+							const BV4Node* ChildNode = current->getChild(i);
+							_ComputeMaxValues(ChildNode, MinMax, MaxMax);
+						}
+					}
+				}
+			}
+	#else
+			static void _ComputeMaxValues(const BV4Node* current, PxVec3& CMax, PxVec3& EMax)
+			{
+				for(PxU32 i=0;i<4;i++)
+				{
+					if(current->mBVData[i].mData!=PX_INVALID_U32)
+					{
+						const CenterExtents& Box = current->mBVData[i].mAABB;
+						if(fabsf(Box.mCenter.x)>CMax.x)		CMax.x = fabsf(Box.mCenter.x);
+						if(fabsf(Box.mCenter.y)>CMax.y)		CMax.y = fabsf(Box.mCenter.y);
+						if(fabsf(Box.mCenter.z)>CMax.z)		CMax.z = fabsf(Box.mCenter.z);
+						if(fabsf(Box.mExtents.x)>EMax.x)	EMax.x = fabsf(Box.mExtents.x);
+						if(fabsf(Box.mExtents.y)>EMax.y)	EMax.y = fabsf(Box.mExtents.y);
+						if(fabsf(Box.mExtents.z)>EMax.z)	EMax.z = fabsf(Box.mExtents.z);
+
+						if(!current->isLeaf(i))
+						{
+							const BV4Node* ChildNode = current->getChild(i);
+							_ComputeMaxValues(ChildNode, CMax, EMax);
+						}
+					}
+				}
+			}
+	#endif
+#endif
+
+			static void _Flatten(BVDataPacked* const dest, const PxU32 box_id, PxU32& current_id, const BV4Node* current, PxU32& max_depth, PxU32& current_depth
+#ifdef GU_BV4_QUANTIZED_TREE
+				, const PxVec3& CQuantCoeff, const PxVec3& EQuantCoeff,
+				const PxVec3& mCenterCoeff, const PxVec3& mExtentsCoeff
+#endif
+				)
+			{
+				// Entering a new node => increase depth
+				current_depth++;
+				// Keep track of max depth
+				if(current_depth>max_depth)
+					max_depth = current_depth;
+
+//				dest[box_id] = *current;
+					const PxU32 CurrentType = current->getType();
+					for(PxU32 i=0;i<CurrentType;i++)
+					{
+#ifdef GU_BV4_QUANTIZED_TREE
+						const CenterExtents& Box = current->mBVData[i].mAABB;
+	#ifdef GU_BV4_USE_SLABS
+						const PxVec3 m = Box.mCenter - Box.mExtents;
+						const PxVec3 M = Box.mCenter + Box.mExtents;
+
+						dest[box_id+i].mAABB.mData[0].mCenter = PxI16(m.x * CQuantCoeff.x);
+						dest[box_id+i].mAABB.mData[1].mCenter = PxI16(m.y * CQuantCoeff.y);
+						dest[box_id+i].mAABB.mData[2].mCenter = PxI16(m.z * CQuantCoeff.z);
+						dest[box_id+i].mAABB.mData[0].mExtents = PxU16(PxI16(M.x * EQuantCoeff.x));
+						dest[box_id+i].mAABB.mData[1].mExtents = PxU16(PxI16(M.y * EQuantCoeff.y));
+						dest[box_id+i].mAABB.mData[2].mExtents = PxU16(PxI16(M.z * EQuantCoeff.z));
+
+						if(1)
+						{
+							for(PxU32 j=0;j<3;j++)
+							{
+								// Dequantize the min/max
+//								const float qmin = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j];
+//								const float qmax = float(PxI16(dest[box_id+i].mAABB.mData[j].mExtents)) * mExtentsCoeff[j];
+								// Compare real & dequantized values
+/*								if(qmax<M[j] || qmin>m[j])
+								{
+									int stop=1;
+								}*/
+								bool CanLeave;
+								do
+								{
+									CanLeave=true;
+									const float qmin = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j];
+									const float qmax = float(PxI16(dest[box_id+i].mAABB.mData[j].mExtents)) * mExtentsCoeff[j];
+
+									if(qmax<M[j])
+									{
+//										if(dest[box_id+i].mAABB.mData[j].mExtents!=0xffff)
+										if(dest[box_id+i].mAABB.mData[j].mExtents!=0x7fff)
+										{
+											dest[box_id+i].mAABB.mData[j].mExtents++;
+											CanLeave = false;
+										}
+									}
+									if(qmin>m[j])
+									{
+										if(dest[box_id+i].mAABB.mData[j].mCenter)
+										{
+											dest[box_id+i].mAABB.mData[j].mCenter--;
+											CanLeave = false;
+										}
+									}
+								}while(!CanLeave);
+							}
+						}
+	#else
+						dest[box_id+i].mAABB.mData[0].mCenter = PxI16(Box.mCenter.x * CQuantCoeff.x);
+						dest[box_id+i].mAABB.mData[1].mCenter = PxI16(Box.mCenter.y * CQuantCoeff.y);
+						dest[box_id+i].mAABB.mData[2].mCenter = PxI16(Box.mCenter.z * CQuantCoeff.z);
+						dest[box_id+i].mAABB.mData[0].mExtents = PxU16(Box.mExtents.x * EQuantCoeff.x);
+						dest[box_id+i].mAABB.mData[1].mExtents = PxU16(Box.mExtents.y * EQuantCoeff.y);
+						dest[box_id+i].mAABB.mData[2].mExtents = PxU16(Box.mExtents.z * EQuantCoeff.z);
+
+						// Fix quantized boxes
+						if(1)
+						{
+							// Make sure the quantized box is still valid
+							const PxVec3 Max = Box.mCenter + Box.mExtents;
+							const PxVec3 Min = Box.mCenter - Box.mExtents;
+							// For each axis
+							for(PxU32 j=0;j<3;j++)
+							{	// Dequantize the box center
+								const float qc = float(dest[box_id+i].mAABB.mData[j].mCenter) * mCenterCoeff[j];
+								bool FixMe=true;
+								do
+								{	// Dequantize the box extent
+									const float qe = float(dest[box_id+i].mAABB.mData[j].mExtents) * mExtentsCoeff[j];
+									// Compare real & dequantized values
+									if(qc+qe<Max[j] || qc-qe>Min[j])	dest[box_id+i].mAABB.mData[j].mExtents++;
+									else								FixMe=false;
+									// Prevent wrapping
+									if(!dest[box_id+i].mAABB.mData[j].mExtents)
+									{
+										dest[box_id+i].mAABB.mData[j].mExtents=0xffff;
+										FixMe=false;
+									}
+								}while(FixMe);
+							}
+						}
+	#endif
+#else
+	#ifdef GU_BV4_USE_SLABS
+						// Compute min & max right here. Store temp as Center/Extents = Min/Max
+						const CenterExtents& Box = current->mBVData[i].mAABB;
+						dest[box_id+i].mAABB.mCenter = Box.mCenter - Box.mExtents;
+						dest[box_id+i].mAABB.mExtents = Box.mCenter + Box.mExtents;
+	#else
+						dest[box_id+i].mAABB = current->mBVData[i].mAABB;
+	#endif
+#endif
+						dest[box_id+i].mData = PxU32(current->mBVData[i].mData);
+//						dest[box_id+i].encodePNS(current->mBVData[i].mTempPNS);
+					}
+
+				PxU32 NbToGo=0;
+				PxU32 NextIDs[4] = {PX_INVALID_U32, PX_INVALID_U32, PX_INVALID_U32, PX_INVALID_U32};
+				const BV4Node* ChildNodes[4] = {NULL,NULL,NULL,NULL};
+
+				BVDataPacked* data = dest+box_id;
+				for(PxU32 i=0;i<4;i++)
+				{
+					if(current->mBVData[i].mData!=PX_INVALID_U32 && !current->isLeaf(i))
+					{
+						const BV4Node* ChildNode = current->getChild(i);
+
+						const PxU32 NextID = current_id;
+#ifdef GU_BV4_USE_SLABS
+						current_id += 4;
+#else
+						const PxU32 ChildSize = ChildNode->getType();
+						current_id += ChildSize;
+#endif
+						const PxU32 ChildType = (ChildNode->getType()-2)<<1;
+						data[i].mData = size_t(ChildType+(NextID<<GU_BV4_CHILD_OFFSET_SHIFT_COUNT));
+						//PX_ASSERT(data[i].mData == size_t(ChildType+(NextID<<3)));
+
+						NextIDs[NbToGo] = NextID;
+						ChildNodes[NbToGo] = ChildNode;
+						NbToGo++;
+
+#ifdef GU_BV4_PRECOMPUTED_NODE_SORT
+						data[i].encodePNS(current->mBVData[i].mTempPNS);
+#endif
+//#define DEPTH_FIRST
+#ifdef DEPTH_FIRST
+						_Flatten(dest, NextID, current_id, ChildNode, max_depth, current_depth
+	#ifdef GU_BV4_QUANTIZED_TREE
+							, CQuantCoeff, EQuantCoeff, mCenterCoeff, mExtentsCoeff
+	#endif
+							);
+						current_depth--;
+#endif
+					}
+#ifdef GU_BV4_USE_SLABS
+					if(current->mBVData[i].mData==PX_INVALID_U32)
+					{
+	#ifdef GU_BV4_QUANTIZED_TREE
+						data[i].mAABB.mData[0].mExtents = 0;
+						data[i].mAABB.mData[1].mExtents = 0;
+						data[i].mAABB.mData[2].mExtents = 0;
+						data[i].mAABB.mData[0].mCenter = 0;
+						data[i].mAABB.mData[1].mCenter = 0;
+						data[i].mAABB.mData[2].mCenter = 0;
+	#else
+						data[i].mAABB.mCenter = PxVec3(0.0f);
+						data[i].mAABB.mExtents = PxVec3(0.0f);
+	#endif
+						data[i].mData = PX_INVALID_U32;
+					}
+#endif
+				}
+
+#ifndef DEPTH_FIRST
+				for(PxU32 i=0;i<NbToGo;i++)
+				{
+					_Flatten(dest, NextIDs[i], current_id, ChildNodes[i], max_depth, current_depth
+	#ifdef GU_BV4_QUANTIZED_TREE
+						, CQuantCoeff, EQuantCoeff, mCenterCoeff, mExtentsCoeff
+	#endif
+						);
+					current_depth--;
+				}
+#endif
+#ifndef GU_BV4_USE_NODE_POOLS
+				DELETESINGLE(current);
+#endif
+			}
+		};
+
+		const PxU32 NbSingleNodes = Params.mStats[0]*2+(Params.mStats[1]+Params.mStats[2])*3+Params.mStats[3]*4;
+
+		PxU32 CurID = Root->getType();
+		PxU32 InitData = PX_INVALID_U32;
+#ifdef GU_BV4_USE_SLABS
+		PX_UNUSED(NbSingleNodes);
+		const PxU32 NbNeeded = (Params.mStats[0]+Params.mStats[1]+Params.mStats[2]+Params.mStats[3])*4;
+		BVDataPacked* Nodes = reinterpret_cast<BVDataPacked*>(PX_ALLOC(sizeof(BVDataPacked)*NbNeeded, "BV4 nodes"));	// PT: PX_NEW breaks alignment here
+//		BVDataPacked* Nodes = PX_NEW(BVDataPacked)[NbNeeded];
+
+		if(CurID==2)
+		{
+			InitData = 0;
+		}
+		else if(CurID==3)
+		{
+			InitData = 2;
+		}
+		else if(CurID==4)
+		{
+			InitData = 4;
+		}
+
+		CurID = 4;
+//		PxU32 CurID = 4;
+//		PxU32 InitData = 4;
+#else
+		BVDataPacked* Nodes = PX_NEW(BVDataPacked)[NbSingleNodes];
+
+		if(CurID==2)
+		{
+			InitData = 0;
+		}
+		else if(CurID==3)
+		{
+			InitData = 2;
+		}
+		else if(CurID==4)
+		{
+			InitData = 4;
+		}
+#endif
+
+		T->mInitData = InitData;
+		PxU32 MaxDepth = 0;
+		PxU32 CurrentDepth = 0;
+#ifdef GU_BV4_QUANTIZED_TREE
+	#ifdef GU_BV4_USE_SLABS
+		PxVec3 MinQuantCoeff, MaxQuantCoeff;
+		{
+			// Get max values
+			PxVec3 MinMax(-FLT_MAX);
+			PxVec3 MaxMax(-FLT_MAX);
+			Local::_ComputeMaxValues(Root, MinMax, MaxMax);
+
+			const PxU32 nbm=15;
+
+			// Compute quantization coeffs
+			const float MinCoeff = float((1<<nbm)-1);
+			const float MaxCoeff = float((1<<nbm)-1);
+			MinQuantCoeff.x = MinMax.x!=0.0f ? MinCoeff/MinMax.x : 0.0f;
+			MinQuantCoeff.y = MinMax.y!=0.0f ? MinCoeff/MinMax.y : 0.0f;
+			MinQuantCoeff.z = MinMax.z!=0.0f ? MinCoeff/MinMax.z : 0.0f;
+			MaxQuantCoeff.x = MaxMax.x!=0.0f ? MaxCoeff/MaxMax.x : 0.0f;
+			MaxQuantCoeff.y = MaxMax.y!=0.0f ? MaxCoeff/MaxMax.y : 0.0f;
+			MaxQuantCoeff.z = MaxMax.z!=0.0f ? MaxCoeff/MaxMax.z : 0.0f;
+			// Compute and save dequantization coeffs
+			T->mCenterOrMinCoeff.x = MinMax.x/MinCoeff;
+			T->mCenterOrMinCoeff.y = MinMax.y/MinCoeff;
+			T->mCenterOrMinCoeff.z = MinMax.z/MinCoeff;
+			T->mExtentsOrMaxCoeff.x = MaxMax.x/MaxCoeff;
+			T->mExtentsOrMaxCoeff.y = MaxMax.y/MaxCoeff;
+			T->mExtentsOrMaxCoeff.z = MaxMax.z/MaxCoeff;
+		}
+		Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth, MinQuantCoeff, MaxQuantCoeff, T->mCenterOrMinCoeff, T->mExtentsOrMaxCoeff);
+	#else
+		PxVec3 CQuantCoeff, EQuantCoeff;
+		{
+			// Get max values
+			PxVec3 CMax(-FLT_MAX);
+			PxVec3 EMax(-FLT_MAX);
+			Local::_ComputeMaxValues(Root, CMax, EMax);
+
+			const PxU32 nbc=15;
+			const PxU32 nbe=16;
+//			const PxU32 nbc=7;
+//			const PxU32 nbe=8;
+
+			const float UnitQuantError = 2.0f/65535.0f;
+			EMax.x += CMax.x*UnitQuantError;
+			EMax.y += CMax.y*UnitQuantError;
+			EMax.z += CMax.z*UnitQuantError;
+
+			// Compute quantization coeffs
+			const float CCoeff = float((1<<nbc)-1);
+			CQuantCoeff.x = CMax.x!=0.0f ? CCoeff/CMax.x : 0.0f;
+			CQuantCoeff.y = CMax.y!=0.0f ? CCoeff/CMax.y : 0.0f;
+			CQuantCoeff.z = CMax.z!=0.0f ? CCoeff/CMax.z : 0.0f;
+			const float ECoeff = float((1<<nbe)-32);
+			EQuantCoeff.x = EMax.x!=0.0f ? ECoeff/EMax.x : 0.0f;
+			EQuantCoeff.y = EMax.y!=0.0f ? ECoeff/EMax.y : 0.0f;
+			EQuantCoeff.z = EMax.z!=0.0f ? ECoeff/EMax.z : 0.0f;
+			// Compute and save dequantization coeffs
+			T->mCenterOrMinCoeff.x = CMax.x/CCoeff;
+			T->mCenterOrMinCoeff.y = CMax.y/CCoeff;
+			T->mCenterOrMinCoeff.z = CMax.z/CCoeff;
+			T->mExtentsOrMaxCoeff.x = EMax.x/ECoeff;
+			T->mExtentsOrMaxCoeff.y = EMax.y/ECoeff;
+			T->mExtentsOrMaxCoeff.z = EMax.z/ECoeff;
+		}
+		Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth, CQuantCoeff, EQuantCoeff, T->mCenterOrMinCoeff, T->mExtentsOrMaxCoeff);
+	#endif
+#else
+		Local::_Flatten(Nodes, 0, CurID, Root, MaxDepth, CurrentDepth);
+#endif
+
+#ifdef GU_BV4_USE_NODE_POOLS
+		Params.releaseNodes();
+#endif
+
+#ifdef GU_BV4_USE_SLABS
+		{
+			PX_ASSERT(sizeof(BVDataSwizzled)==sizeof(BVDataPacked)*4);
+			BVDataPacked* Copy = PX_NEW(BVDataPacked)[NbNeeded];
+			memcpy(Copy, Nodes, sizeof(BVDataPacked)*NbNeeded);
+			for(PxU32 i=0;i<NbNeeded/4;i++)
+			{
+				const BVDataPacked* Src = Copy + i*4;
+				BVDataSwizzled* Dst = reinterpret_cast<BVDataSwizzled*>(Nodes + i*4);
+				for(PxU32 j=0;j<4;j++)
+				{
+					// We previously stored m/M within c/e so we just need to swizzle now
+	#ifdef GU_BV4_QUANTIZED_TREE
+					const QuantizedAABB& Box = Src[j].mAABB;
+					Dst->mX[j].mMin = Box.mData[0].mCenter;
+					Dst->mY[j].mMin = Box.mData[1].mCenter;
+					Dst->mZ[j].mMin = Box.mData[2].mCenter;
+					Dst->mX[j].mMax = PxI16(Box.mData[0].mExtents);
+					Dst->mY[j].mMax = PxI16(Box.mData[1].mExtents);
+					Dst->mZ[j].mMax = PxI16(Box.mData[2].mExtents);
+	#else
+					const CenterExtents& Box = Src[j].mAABB;
+					Dst->mMinX[j] = Box.mCenter.x;
+					Dst->mMinY[j] = Box.mCenter.y;
+					Dst->mMinZ[j] = Box.mCenter.z;
+					Dst->mMaxX[j] = Box.mExtents.x;
+					Dst->mMaxY[j] = Box.mExtents.y;
+					Dst->mMaxZ[j] = Box.mExtents.z;
+	#endif
+					Dst->mData[j] = Src[j].mData;
+				}
+			}
+			DELETEARRAY(Copy);
+		}
+		T->mNbNodes = NbNeeded;
+#else
+		PX_ASSERT(CurID==NbSingleNodes);
+		T->mNbNodes = NbSingleNodes;
+#endif
+		T->mNodes = Nodes;
+	}
+	return true;
+}
+
+/////
+
+struct ReorderData
+{
+	const SourceMesh*	mMesh;
+	PxU32*				mOrder;
+	PxU32				mNbTrisPerLeaf;
+	PxU32				mIndex;
+	PxU32				mNbTris;
+	PxU32				mStats[16];
+};
+static bool gReorderCallback(const AABBTreeNode* current, PxU32 /*depth*/, void* userData)
+{
+	ReorderData* Data = reinterpret_cast<ReorderData*>(userData);
+	if(current->isLeaf())
+	{
+		const PxU32 n = current->getNbPrimitives();
+		PX_ASSERT(n<=Data->mNbTrisPerLeaf);
+		Data->mStats[n]++;
+		PxU32* Prims = const_cast<PxU32*>(current->getPrimitives());
+
+		for(PxU32 i=0;i<n;i++)
+		{
+			PX_ASSERT(Prims[i]<Data->mNbTris);
+			Data->mOrder[Data->mIndex] = Prims[i];
+			PX_ASSERT(Data->mIndex<Data->mNbTris);
+			Prims[i] = Data->mIndex;
+			Data->mIndex++;
+		}
+	}
+	return true;
+}
+
+bool physx::Gu::BuildBV4Ex(BV4Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf)
+{
+	const PxU32 nbTris = mesh.mNbTris;
+
+	AABBTree Source;
+	if(!Source.buildFromMesh(mesh, nbTrisPerLeaf))
+		return false;
+
+	{
+		PxU32* order = reinterpret_cast<PxU32*>(PX_ALLOC(sizeof(PxU32)*nbTris, "BV4"));
+		ReorderData RD;
+		RD.mMesh			= &mesh;
+		RD.mOrder			= order;
+		RD.mNbTrisPerLeaf	= nbTrisPerLeaf;
+		RD.mIndex			= 0;
+		RD.mNbTris			= nbTris;
+		for(PxU32 i=0;i<16;i++)
+			RD.mStats[i] = 0;
+		Source.walk(gReorderCallback, &RD);
+		PX_ASSERT(RD.mIndex==nbTris);
+		mesh.remapTopology(order);
+		PX_FREE(order);
+//		for(PxU32 i=0;i<16;i++)
+//			printf("%d: %d\n", i, RD.mStats[i]);
+	}
+
+	if(mesh.getNbTriangles()<=nbTrisPerLeaf)
+		return tree.init(&mesh, Source.getBV());
+
+	return BuildBV4Internal(tree, Source, &mesh, epsilon);
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.h
new file mode 100644
index 00000000..eb2d9e99
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Build.h
@@ -0,0 +1,125 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_BUILD_H
+#define GU_BV4_BUILD_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "GuBV4.h"
+
+namespace physx
+{
+namespace Gu
+{
+	class BV4Tree;
+	class SourceMesh;
+
+	//! Contains AABB-tree build statistics
+	// PT: TODO: this is a duplicate of the SQ structure (TA34704)
+	struct BuildStats
+	{
+								BuildStats() : mCount(0), mTotalPrims(0)	{}
+
+						PxU32	mCount;			//!< Number of nodes created
+						PxU32	mTotalPrims;	//!< Total accumulated number of primitives. Should be much higher than the source
+												//!< number of prims, since it accumulates all prims covered by each node (i.e. internal
+												//!< nodes too, not just leaf ones)
+
+		PX_FORCE_INLINE	void	reset()							{ mCount = mTotalPrims = 0;	}
+
+		PX_FORCE_INLINE	void	setCount(PxU32 nb)				{ mCount=nb;				}
+		PX_FORCE_INLINE	void	increaseCount(PxU32 nb)			{ mCount+=nb;				}
+		PX_FORCE_INLINE	PxU32	getCount()				const	{ return mCount;			}
+	};
+
+	// PT: TODO: refactor with SQ version (TA34704)
+	class AABBTreeNode : public physx::shdfnd::UserAllocated
+	{
+		public:
+		PX_FORCE_INLINE							AABBTreeNode() : mPos(0), mNodePrimitives(NULL), mNbPrimitives(0)
+												{
+												}
+		PX_FORCE_INLINE							~AABBTreeNode()
+												{
+													mPos = 0;
+													mNodePrimitives	= NULL;	// This was just a shortcut to the global list => no release
+													mNbPrimitives	= 0;
+												}
+		// Data access
+		PX_FORCE_INLINE	const PxBounds3&		getAABB()		const	{ return mBV;							}
+
+		PX_FORCE_INLINE	const AABBTreeNode*		getPos()		const	{ return reinterpret_cast<const AABBTreeNode*>(mPos);		}
+		PX_FORCE_INLINE	const AABBTreeNode*		getNeg()		const	{ const AABBTreeNode* P = getPos(); return P ? P+1 : NULL;	}
+
+		PX_FORCE_INLINE	bool					isLeaf()		const	{ return !getPos();						}
+
+						PxBounds3				mBV;		// Global bounding-volume enclosing all the node-related primitives
+						size_t					mPos;		// "Positive" & "Negative" children
+
+		// Data access
+		PX_FORCE_INLINE	const PxU32*			getPrimitives()		const	{ return mNodePrimitives;	}
+		PX_FORCE_INLINE	PxU32					getNbPrimitives()	const	{ return mNbPrimitives;		}
+
+						PxU32*					mNodePrimitives;	//!< Node-related primitives (shortcut to a position in mIndices below)
+						PxU32					mNbPrimitives;		//!< Number of primitives for this node
+	};
+
+	typedef		bool	(*WalkingCallback)	(const AABBTreeNode* current, PxU32 depth, void* userData);
+
+	// PT: TODO: refactor with SQ version (TA34704)
+	class AABBTree : public physx::shdfnd::UserAllocated
+	{
+		public:
+											AABBTree();
+											~AABBTree();
+
+						bool				buildFromMesh(SourceMesh& mesh, PxU32 limit);
+						void				release();
+
+		PX_FORCE_INLINE	const PxU32*		getIndices()		const	{ return mIndices;		}	//!< Catch the indices
+		PX_FORCE_INLINE	PxU32				getNbNodes()		const	{ return mTotalNbNodes;	}	//!< Catch the number of nodes
+
+		PX_FORCE_INLINE	const PxU32*		getPrimitives()		const	{ return mPool->mNodePrimitives;	}
+		PX_FORCE_INLINE	PxU32				getNbPrimitives()	const	{ return mPool->mNbPrimitives;		}
+		PX_FORCE_INLINE	const AABBTreeNode*	getNodes()			const	{ return mPool;						}
+		PX_FORCE_INLINE	const PxBounds3&	getBV()				const	{ return mPool->mBV;				}
+
+						PxU32				walk(WalkingCallback callback, void* userData) const;
+		private:
+						PxU32*				mIndices;			//!< Indices in the app list. Indices are reorganized during build (permutation).
+						AABBTreeNode*		mPool;				//!< Linear pool of nodes for complete trees. Null otherwise. [Opcode 1.3]
+						PxU32				mTotalNbNodes;		//!< Number of nodes in the tree.
+	};
+
+	PX_PHYSX_COMMON_API bool BuildBV4Ex(BV4Tree& tree, SourceMesh& mesh, float epsilon, PxU32 nbTrisPerLeaf);
+
+} // namespace Gu
+}
+
+#endif // GU_BV4_BUILD_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Settings.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Settings.h
new file mode 100644
index 00000000..9807e526
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4Settings.h
@@ -0,0 +1,39 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_SETTINGS_H
+#define GU_BV4_SETTINGS_H
+
+	// PT: "BV4" ported from "Opcode 2.0". Available compile-time options are:
+	#define GU_BV4_STACK_SIZE	256			// Default size of local stacks for non-recursive traversals.
+	#define GU_BV4_PRECOMPUTED_NODE_SORT	// Use node sorting or not. This should probably always be enabled.
+	#define GU_BV4_QUANTIZED_TREE			// Use AABB quantization/compression or not.
+	#define GU_BV4_USE_SLABS				// Use swizzled data format or not. Swizzled = faster raycasts, but slower overlaps & larger trees.
+
+#endif // GU_BV4_SETTINGS_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBAABBSweepTest.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBAABBSweepTest.h
new file mode 100644
index 00000000..1131edad
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBAABBSweepTest.h
@@ -0,0 +1,114 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_AABB_AABB_SWEEP_TEST_H
+#define GU_BV4_AABB_AABB_SWEEP_TEST_H
+
+#ifndef GU_BV4_USE_SLABS
+#if PX_INTEL_FAMILY
+	PX_FORCE_INLINE Ps::IntBool BV4_SegmentAABBOverlap(const PxVec3& center, const PxVec3& extents, const PxVec3& extents2, const RayParams* PX_RESTRICT params)
+	{
+		const PxU32 maskI = 0x7fffffff;
+		const Vec4V fdirV = V4LoadA_Safe(&params->mFDir_PaddedAligned.x);
+		const Vec4V extentsV = V4Add(V4LoadU(&extents.x), V4LoadU(&extents2.x));
+		const Vec4V DV = V4Sub(V4LoadA_Safe(&params->mData2_PaddedAligned.x), V4LoadU(&center.x));
+		__m128 absDV = _mm_and_ps(DV, _mm_load1_ps((float*)&maskI));
+		absDV = _mm_cmpgt_ps(absDV, V4Add(extentsV, fdirV));
+		const PxU32 test = (PxU32)_mm_movemask_ps(absDV);
+		if(test&7)
+			return 0;
+
+		if(1)
+		{
+			const Vec4V dataZYX_V = V4LoadA_Safe(&params->mData_PaddedAligned.x);
+			const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1)));
+			const Vec4V fV = V4Sub(V4Mul(dataZYX_V, DXZY_V), V4Mul(dataXZY_V, DV));
+
+			const Vec4V fdirZYX_V = V4LoadA_Safe(&params->mFDir_PaddedAligned.x);
+			const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+			// PT: TODO: use V4MulAdd here (TA34704)
+			const Vec4V fg = V4Add(V4Mul(extentsV, fdirXZY_V), V4Mul(extentsXZY_V, fdirZYX_V));
+
+			__m128 absfV = _mm_and_ps(fV, _mm_load1_ps((float*)&maskI));
+			absfV = _mm_cmpgt_ps(absfV, fg);
+			const PxU32 test2 = (PxU32)_mm_movemask_ps(absfV);
+			if(test2&7)
+				return 0;
+			return 1;
+		}
+	}
+
+#ifdef GU_BV4_QUANTIZED_TREE
+	template<class T>
+	PX_FORCE_INLINE Ps::IntBool BV4_SegmentAABBOverlap(const T* PX_RESTRICT node, const PxVec3& extents2, const RayParams* PX_RESTRICT params)
+	{
+		const __m128i testV = _mm_load_si128((__m128i*)node->mAABB.mData);
+		const __m128i qextentsV = _mm_and_si128(testV, _mm_set1_epi32(0x0000ffff));
+		const __m128i qcenterV = _mm_srai_epi32(testV, 16);
+		const Vec4V centerV0 = V4Mul(_mm_cvtepi32_ps(qcenterV), V4LoadA_Safe(&params->mCenterOrMinCoeff_PaddedAligned.x));
+		const Vec4V extentsV0 = V4Mul(_mm_cvtepi32_ps(qextentsV), V4LoadA_Safe(&params->mExtentsOrMaxCoeff_PaddedAligned.x));
+
+		const PxU32 maskI = 0x7fffffff;
+		const Vec4V fdirV = V4LoadA_Safe(&params->mFDir_PaddedAligned.x);
+		const Vec4V extentsV = V4Add(extentsV0, V4LoadU(&extents2.x));
+		const Vec4V DV = V4Sub(V4LoadA_Safe(&params->mData2_PaddedAligned.x), centerV0);
+		__m128 absDV = _mm_and_ps(DV, _mm_load1_ps((float*)&maskI));
+		absDV = _mm_cmpgt_ps(absDV, V4Add(extentsV, fdirV));
+		const PxU32 test = (PxU32)_mm_movemask_ps(absDV);
+		if(test&7)
+			return 0;
+
+		if(1)
+		{
+			const Vec4V dataZYX_V = V4LoadA_Safe(&params->mData_PaddedAligned.x);
+			const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1)));
+			const Vec4V fV = V4Sub(V4Mul(dataZYX_V, DXZY_V), V4Mul(dataXZY_V, DV));
+
+			const Vec4V fdirZYX_V = V4LoadA_Safe(&params->mFDir_PaddedAligned.x);
+			const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+			// PT: TODO: use V4MulAdd here (TA34704)
+			const Vec4V fg = V4Add(V4Mul(extentsV, fdirXZY_V), V4Mul(extentsXZY_V, fdirZYX_V));
+
+			__m128 absfV = _mm_and_ps(fV, _mm_load1_ps((float*)&maskI));
+			absfV = _mm_cmpgt_ps(absfV, fg);
+			const PxU32 test2 = (PxU32)_mm_movemask_ps(absfV);
+			if(test2&7)
+				return 0;
+			return 1;
+		}
+	}
+#endif
+#endif
+#endif
+
+#endif // GU_BV4_AABB_AABB_SWEEP_TEST_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBSweep.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBSweep.cpp
new file mode 100644
index 00000000..2f377521
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_AABBSweep.cpp
@@ -0,0 +1,39 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuBV4.h"
+using namespace physx;
+using namespace Gu;
+
+#if PX_INTEL_FAMILY
+#define SWEEP_AABB_IMPL
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+#include "GuBV4_BoxSweep_Internal.h"
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxBoxOverlapTest.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxBoxOverlapTest.h
new file mode 100644
index 00000000..ff696a38
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxBoxOverlapTest.h
@@ -0,0 +1,201 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_BOX_BOX_OVERLAP_TEST_H
+#define GU_BV4_BOX_BOX_OVERLAP_TEST_H
+
+#if PX_INTEL_FAMILY	
+#ifndef GU_BV4_USE_SLABS		
+	PX_FORCE_INLINE Ps::IntBool BV4_BoxBoxOverlap(const PxVec3& extents, const PxVec3& center, const OBBTestParams* PX_RESTRICT params)
+	{
+		const PxU32 maskI = 0x7fffffff;
+
+		const Vec4V extentsV = V4LoadU(&extents.x);
+
+		const Vec4V TV = V4Sub(V4LoadA_Safe(&params->mTBoxToModel_PaddedAligned.x), V4LoadU(&center.x));
+		{
+			__m128 absTV = _mm_and_ps(TV, _mm_load1_ps((float*)&maskI));
+			absTV = _mm_cmpgt_ps(absTV, V4Add(extentsV, V4LoadA_Safe(&params->mBB_PaddedAligned.x)));
+			const PxU32 test = (PxU32)_mm_movemask_ps(absTV);
+			if(test&7)
+				return 0;
+		}
+
+		__m128 tV;
+		{
+			const __m128 T_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 T_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,1,0,2)));
+
+			tV = V4Mul(TV, V4LoadA_Safe(&params->mPreca0_PaddedAligned.x));
+			tV = V4Add(tV, V4Mul(T_YZX_V, V4LoadA_Safe(&params->mPreca1_PaddedAligned.x)));
+			tV = V4Add(tV, V4Mul(T_ZXY_V, V4LoadA_Safe(&params->mPreca2_PaddedAligned.x)));
+		}
+
+		__m128 t2V;
+		{
+			const __m128 extents_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 extents_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,1,0,2)));
+
+			t2V = V4Mul(extentsV, V4LoadA_Safe(&params->mPreca0b_PaddedAligned.x));
+			t2V = V4Add(t2V, V4Mul(extents_YZX_V, V4LoadA_Safe(&params->mPreca1b_PaddedAligned.x)));
+			t2V = V4Add(t2V, V4Mul(extents_ZXY_V, V4LoadA_Safe(&params->mPreca2b_PaddedAligned.x)));
+			t2V = V4Add(t2V, V4LoadA_Safe(&params->mBoxExtents_PaddedAligned.x));
+		}
+
+		{
+			__m128 abstV = _mm_and_ps(tV, _mm_load1_ps((float*)&maskI));
+			abstV = _mm_cmpgt_ps(abstV, t2V);
+			const PxU32 test = (PxU32)_mm_movemask_ps(abstV);
+			if(test&7)
+				return 0;
+		}
+		return 1;
+	}
+
+#ifdef GU_BV4_QUANTIZED_TREE	
+	template<class T>
+	PX_FORCE_INLINE Ps::IntBool BV4_BoxBoxOverlap(const T* PX_RESTRICT node, const OBBTestParams* PX_RESTRICT params)
+	{ 
+#define NEW_VERSION
+#ifdef NEW_VERSION
+	SSE_CONST4(maskV,	0x7fffffff);
+	SSE_CONST4(maskQV,	0x0000ffff);
+#else
+	const PxU32 maskI = 0x7fffffff;
+#endif
+
+		Vec4V centerV = V4LoadA((float*)node->mAABB.mData);
+#ifdef NEW_VERSION
+		__m128 extentsV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(centerV), SSE_CONST(maskQV)));
+#else
+		__m128 extentsV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(centerV), _mm_set1_epi32(0x0000ffff)));
+#endif
+		extentsV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(extentsV)), V4LoadA_Safe(&params->mExtentsOrMaxCoeff_PaddedAligned.x));
+		centerV = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(centerV), 16));
+		centerV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(centerV)), V4LoadA_Safe(&params->mCenterOrMinCoeff_PaddedAligned.x));
+
+		const Vec4V TV = V4Sub(V4LoadA_Safe(&params->mTBoxToModel_PaddedAligned.x), centerV);
+		{
+#ifdef NEW_VERSION
+			__m128 absTV = _mm_and_ps(TV, SSE_CONSTF(maskV));
+#else
+			__m128 absTV = _mm_and_ps(TV, _mm_load1_ps((float*)&maskI));
+#endif
+
+			absTV = _mm_cmpgt_ps(absTV, V4Add(extentsV, V4LoadA_Safe(&params->mBB_PaddedAligned.x)));
+			const PxU32 test = (PxU32)_mm_movemask_ps(absTV);
+			if(test&7)
+				return 0;
+		}
+
+		__m128 tV;
+		{
+			const __m128 T_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 T_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,1,0,2)));
+
+			tV = V4Mul(TV, V4LoadA_Safe(&params->mPreca0_PaddedAligned.x));
+			tV = V4Add(tV, V4Mul(T_YZX_V, V4LoadA_Safe(&params->mPreca1_PaddedAligned.x)));
+			tV = V4Add(tV, V4Mul(T_ZXY_V, V4LoadA_Safe(&params->mPreca2_PaddedAligned.x)));
+		}
+
+		__m128 t2V;
+		{
+			const __m128 extents_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 extents_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,1,0,2)));
+
+			t2V = V4Mul(extentsV, V4LoadA_Safe(&params->mPreca0b_PaddedAligned.x));
+			t2V = V4Add(t2V, V4Mul(extents_YZX_V, V4LoadA_Safe(&params->mPreca1b_PaddedAligned.x)));
+			t2V = V4Add(t2V, V4Mul(extents_ZXY_V, V4LoadA_Safe(&params->mPreca2b_PaddedAligned.x)));
+			t2V = V4Add(t2V, V4LoadA_Safe(&params->mBoxExtents_PaddedAligned.x));
+		}
+
+		{
+#ifdef NEW_VERSION
+			__m128 abstV = _mm_and_ps(tV, SSE_CONSTF(maskV));
+#else
+			__m128 abstV = _mm_and_ps(tV, _mm_load1_ps((float*)&maskI));
+#endif
+			abstV = _mm_cmpgt_ps(abstV, t2V);
+			const PxU32 test = (PxU32)_mm_movemask_ps(abstV);
+			if(test&7)
+				return 0;
+		}
+		return 1;
+	}
+#endif	// GU_BV4_QUANTIZED_TREE
+#endif	// GU_BV4_USE_SLABS
+
+#ifdef GU_BV4_USE_SLABS	
+	PX_FORCE_INLINE Ps::IntBool BV4_BoxBoxOverlap(const __m128 boxCenter, const __m128 extentsV, const OBBTestParams* PX_RESTRICT params)
+	{
+		const PxU32 maskI = 0x7fffffff;
+
+		const Vec4V TV = V4Sub(V4LoadA_Safe(&params->mTBoxToModel_PaddedAligned.x), boxCenter);
+		{
+			__m128 absTV = _mm_and_ps(TV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI)));
+			absTV = _mm_cmpgt_ps(absTV, V4Add(extentsV, V4LoadA_Safe(&params->mBB_PaddedAligned.x)));
+			const PxU32 test = PxU32(_mm_movemask_ps(absTV));
+			if(test&7)
+				return 0;
+		}
+
+		__m128 tV;
+		{
+			const __m128 T_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 T_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(TV), _MM_SHUFFLE(3,1,0,2)));
+
+			tV = V4Mul(TV, V4LoadA_Safe(&params->mPreca0_PaddedAligned.x));
+			tV = V4Add(tV, V4Mul(T_YZX_V, V4LoadA_Safe(&params->mPreca1_PaddedAligned.x)));
+			tV = V4Add(tV, V4Mul(T_ZXY_V, V4LoadA_Safe(&params->mPreca2_PaddedAligned.x)));
+		}
+
+		__m128 t2V;
+		{
+			const __m128 extents_YZX_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+			const __m128 extents_ZXY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,1,0,2)));
+
+			t2V = V4Mul(extentsV, V4LoadA_Safe(&params->mPreca0b_PaddedAligned.x));
+			t2V = V4Add(t2V, V4Mul(extents_YZX_V, V4LoadA_Safe(&params->mPreca1b_PaddedAligned.x)));
+			t2V = V4Add(t2V, V4Mul(extents_ZXY_V, V4LoadA_Safe(&params->mPreca2b_PaddedAligned.x)));
+			t2V = V4Add(t2V, V4LoadA_Safe(&params->mBoxExtents_PaddedAligned.x));
+		}
+
+		{
+			__m128 abstV = _mm_and_ps(tV, _mm_load1_ps(reinterpret_cast<const float*>(&maskI)));
+			abstV = _mm_cmpgt_ps(abstV, t2V);
+			const PxU32 test = PxU32(_mm_movemask_ps(abstV));
+			if(test&7)
+				return 0;
+		}
+		return 1;
+	}
+#endif	// GU_BV4_USE_SLABS
+#endif	// PX_INTEL_FAMILY
+
+#endif	// GU_BV4_BOX_BOX_OVERLAP_TEST_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap.cpp
new file mode 100644
index 00000000..febf7261
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap.cpp
@@ -0,0 +1,473 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuBV4.h"
+using namespace physx;
+using namespace Gu;
+
+#if PX_INTEL_FAMILY
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#include "GuInternal.h"
+#include "GuDistancePointSegment.h"
+#include "GuIntersectionCapsuleTriangle.h"
+#include "GuIntersectionTriangleBox.h"
+
+#include "GuBV4_BoxOverlap_Internal.h"
+#include "GuBV4_BoxBoxOverlapTest.h"
+
+// Box overlap any
+
+struct OBBParams : OBBTestParams
+{
+	const IndTri32*	PX_RESTRICT	mTris32;
+	const IndTri16*	PX_RESTRICT	mTris16;
+	const PxVec3*	PX_RESTRICT	mVerts;
+
+	PxMat33			mRModelToBox_Padded;	//!< Rotation from model space to obb space
+	Vec3p			mTModelToBox_Padded;	//!< Translation from model space to obb space
+};
+
+// PT: TODO: this used to be inlined so we lost some perf by moving to PhysX's version. Revisit. (TA34704)
+Ps::IntBool intersectTriangleBoxBV4(const PxVec3& p0, const PxVec3& p1, const PxVec3& p2,
+									const PxMat33& rotModelToBox, const PxVec3& transModelToBox, const PxVec3& extents);
+namespace
+{
+class LeafFunction_BoxOverlapAny
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(const OBBParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			if(intersectTriangleBoxBV4(params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params->mRModelToBox_Padded, params->mTModelToBox_Padded, params->mBoxExtents_PaddedAligned))
+				return 1;
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+template<class ParamsT>
+static PX_FORCE_INLINE void setupBoxParams(ParamsT* PX_RESTRICT params, const Box& localBox, const BV4Tree* PX_RESTRICT tree, const SourceMesh* PX_RESTRICT mesh)
+{
+	invertBoxMatrix(params->mRModelToBox_Padded, params->mTModelToBox_Padded, localBox);
+	params->mTBoxToModel_PaddedAligned = localBox.center;
+
+	setupMeshPointersAndQuantizedCoeffs(params, mesh, tree);
+
+	params->precomputeBoxData(localBox.extents, &localBox.rot);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+#include "GuBV4_Internal.h"
+#include "GuBV4_BoxBoxOverlapTest.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs.h"
+#endif
+#include "GuBV4_ProcessStreamNoOrder_OBBOBB.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs_SwizzledNoOrder.h"
+#endif
+
+Ps::IntBool BV4_OverlapBoxAny(const Box& box, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	Box localBox;
+	computeLocalBox(localBox, box, worldm_Aligned);
+
+	OBBParams Params;
+	setupBoxParams(&Params, localBox, &tree, mesh);
+
+	if(tree.mNodes)
+	{
+		return processStreamNoOrder<LeafFunction_BoxOverlapAny>(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		return LeafFunction_BoxOverlapAny::doLeafTest(&Params, nbTris);
+	}
+}
+
+
+// Box overlap all
+
+struct OBBParamsAll : OBBParams
+{
+	PxU32	mNbHits;
+	PxU32	mMaxNbHits;
+	PxU32*	mHits;
+};
+
+namespace
+{
+class LeafFunction_BoxOverlapAll
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(OBBParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			if(intersectTriangleBoxBV4(params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params->mRModelToBox_Padded, params->mTModelToBox_Padded, params->mBoxExtents_PaddedAligned))
+			{
+				OBBParamsAll* ParamsAll = static_cast<OBBParamsAll*>(params);
+				ParamsAll->mHits[ParamsAll->mNbHits] = primIndex;
+				ParamsAll->mNbHits++;
+				if(ParamsAll->mNbHits==ParamsAll->mMaxNbHits)
+					return 1;
+			}
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+
+}
+
+PxU32 BV4_OverlapBoxAll(const Box& box, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	Box localBox;
+	computeLocalBox(localBox, box, worldm_Aligned);
+
+	OBBParamsAll Params;
+	Params.mNbHits		= 0;
+	Params.mMaxNbHits	= size;
+	Params.mHits		= results;
+	setupBoxParams(&Params, localBox, &tree, mesh);
+
+	if(tree.mNodes)
+	{
+		overflow = processStreamNoOrder<LeafFunction_BoxOverlapAll>(tree.mNodes, tree.mInitData, &Params)!=0;
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		overflow = LeafFunction_BoxOverlapAll::doLeafTest(&Params, nbTris)!=0;
+	}
+	return Params.mNbHits;
+}
+
+// Box overlap - callback version
+
+struct OBBParamsCB : OBBParams
+{
+	MeshOverlapCallback	mCallback;
+	void*				mUserData;
+};
+
+namespace
+{
+class LeafFunction_BoxOverlapCB
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(const OBBParamsCB* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			if(intersectTriangleBoxBV4(params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params->mRModelToBox_Padded, params->mTModelToBox_Padded, params->mBoxExtents_PaddedAligned))
+			{
+				const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 };
+				if((params->mCallback)(params->mUserData, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], primIndex, vrefs))
+					return 1;
+			}
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+void BV4_OverlapBoxCB(const Box& localBox, const BV4Tree& tree, MeshOverlapCallback callback, void* userData)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	OBBParamsCB Params;
+	Params.mCallback	= callback;
+	Params.mUserData	= userData;
+	setupBoxParams(&Params, localBox, &tree, mesh);
+
+	if(tree.mNodes)
+	{
+		processStreamNoOrder<LeafFunction_BoxOverlapCB>(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		LeafFunction_BoxOverlapCB::doLeafTest(&Params, nbTris);
+	}
+}
+
+// Capsule overlap any
+
+struct CapsuleParamsAny : OBBParams
+{
+	Capsule						mLocalCapsule;	// Capsule in mesh space
+	CapsuleTriangleOverlapData	mData;
+};
+
+// PT: TODO: try to refactor this one with the PhysX version (TA34704)
+static bool CapsuleVsTriangle_SAT(const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, const CapsuleParamsAny* PX_RESTRICT params)
+{
+//	PX_ASSERT(capsule.p0!=capsule.p1);
+
+	{
+		const PxReal d2 = distancePointSegmentSquaredInternal(params->mLocalCapsule.p0, params->mData.mCapsuleDir, p0);
+		if(d2<=params->mLocalCapsule.radius*params->mLocalCapsule.radius)
+			return 1;
+	}
+
+	const PxVec3 N = (p0 - p1).cross(p0 - p2);
+
+	if(!testAxis(p0, p1, p2, params->mLocalCapsule, N))
+		return 0;
+
+	const float BDotB = params->mData.mBDotB;
+	const float oneOverBDotB = params->mData.mOneOverBDotB;
+	const PxVec3& capP0 = params->mLocalCapsule.p0;
+	const PxVec3& capDir = params->mData.mCapsuleDir;
+
+	if(!testAxis(p0, p1, p2, params->mLocalCapsule, computeEdgeAxis(p0, p1 - p0, capP0, capDir, BDotB, oneOverBDotB)))
+		return 0;
+
+	if(!testAxis(p0, p1, p2, params->mLocalCapsule, computeEdgeAxis(p1, p2 - p1, capP0, capDir, BDotB, oneOverBDotB)))
+		return 0;
+
+	if(!testAxis(p0, p1, p2, params->mLocalCapsule, computeEdgeAxis(p2, p0 - p2, capP0, capDir, BDotB, oneOverBDotB)))
+		return 0;
+
+	return 1;
+}
+
+static Ps::IntBool PX_FORCE_INLINE __CapsuleTriangle(const CapsuleParamsAny* PX_RESTRICT params, PxU32 primIndex)
+{
+	PxU32 VRef0, VRef1, VRef2;
+	getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+	return CapsuleVsTriangle_SAT(params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params);
+}
+
+namespace
+{
+class LeafFunction_CapsuleOverlapAny
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(const OBBParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(__CapsuleTriangle(static_cast<const CapsuleParamsAny*>(params), primIndex))
+				return 1;
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+template<class ParamsT>
+static PX_FORCE_INLINE void setupCapsuleParams(ParamsT* PX_RESTRICT params, const Capsule& capsule, const BV4Tree* PX_RESTRICT tree, const PxMat44* PX_RESTRICT worldm_Aligned, const SourceMesh* PX_RESTRICT mesh)
+{
+	computeLocalCapsule(params->mLocalCapsule, capsule, worldm_Aligned);
+
+	params->mData.init(params->mLocalCapsule);
+
+	Box localBox;
+	computeBoxAroundCapsule(params->mLocalCapsule, localBox);
+
+	setupBoxParams(params, localBox, tree, mesh);
+}
+
+Ps::IntBool BV4_OverlapCapsuleAny(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	CapsuleParamsAny Params;
+	setupCapsuleParams(&Params, capsule, &tree, worldm_Aligned, mesh);
+
+	if(tree.mNodes)
+	{
+		return processStreamNoOrder<LeafFunction_CapsuleOverlapAny>(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		return LeafFunction_CapsuleOverlapAny::doLeafTest(&Params, nbTris);
+	}
+}
+
+
+// Capsule overlap all
+
+struct CapsuleParamsAll : CapsuleParamsAny
+{
+	PxU32	mNbHits;
+	PxU32	mMaxNbHits;
+	PxU32*	mHits;
+};
+
+namespace
+{
+class LeafFunction_CapsuleOverlapAll
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(OBBParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		CapsuleParamsAll* ParamsAll = static_cast<CapsuleParamsAll*>(params);
+
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(__CapsuleTriangle(ParamsAll, primIndex))
+			{
+				ParamsAll->mHits[ParamsAll->mNbHits] = primIndex;
+				ParamsAll->mNbHits++;
+				if(ParamsAll->mNbHits==ParamsAll->mMaxNbHits)
+					return 1;
+			}
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+PxU32 BV4_OverlapCapsuleAll(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	CapsuleParamsAll Params;
+	Params.mNbHits		= 0;
+	Params.mMaxNbHits	= size;
+	Params.mHits		= results;
+	setupCapsuleParams(&Params, capsule, &tree, worldm_Aligned, mesh);
+
+	if(tree.mNodes)
+	{
+		overflow = processStreamNoOrder<LeafFunction_CapsuleOverlapAll>(tree.mNodes, tree.mInitData, &Params)!=0;
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		overflow = LeafFunction_CapsuleOverlapAll::doLeafTest(&Params, nbTris)!=0;
+	}
+	return Params.mNbHits;
+}
+
+// Capsule overlap - callback version
+
+struct CapsuleParamsCB : CapsuleParamsAny
+{
+	MeshOverlapCallback	mCallback;
+	void*				mUserData;
+};
+
+namespace
+{
+class LeafFunction_CapsuleOverlapCB
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(const CapsuleParamsCB* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			const PxVec3& p0 = params->mVerts[VRef0];
+			const PxVec3& p1 = params->mVerts[VRef1];
+			const PxVec3& p2 = params->mVerts[VRef2];
+
+			if(CapsuleVsTriangle_SAT(p0, p1, p2, params))
+			{
+				const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 };
+				if((params->mCallback)(params->mUserData, p0, p1, p2, primIndex, vrefs))
+					return 1;
+			}
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+// PT: this one is currently not used
+void BV4_OverlapCapsuleCB(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshOverlapCallback callback, void* userData)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	CapsuleParamsCB Params;
+	Params.mCallback	= callback;
+	Params.mUserData	= userData;
+	setupCapsuleParams(&Params, capsule, &tree, worldm_Aligned, mesh);
+
+	if(tree.mNodes)
+	{
+		processStreamNoOrder<LeafFunction_CapsuleOverlapCB>(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		LeafFunction_CapsuleOverlapCB::doLeafTest(&Params, nbTris);
+	}
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap_Internal.h
new file mode 100644
index 00000000..410af5b8
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxOverlap_Internal.h
@@ -0,0 +1,105 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_BOX_OVERLAP_INTERNAL_H
+#define GU_BV4_BOX_OVERLAP_INTERNAL_H
+
+#include "GuBV4_Common.h"
+
+	template<class ParamsT>
+	PX_FORCE_INLINE void precomputeData(ParamsT* PX_RESTRICT dst, PxMat33* PX_RESTRICT absRot, const PxMat33* PX_RESTRICT boxToModelR)
+	{
+		// Precompute absolute box-to-model rotation matrix
+		dst->mPreca0_PaddedAligned.x = boxToModelR->column0.x;
+		dst->mPreca0_PaddedAligned.y = boxToModelR->column1.y;
+		dst->mPreca0_PaddedAligned.z = boxToModelR->column2.z;
+
+		dst->mPreca1_PaddedAligned.x = boxToModelR->column0.y;
+		dst->mPreca1_PaddedAligned.y = boxToModelR->column1.z;
+		dst->mPreca1_PaddedAligned.z = boxToModelR->column2.x;
+
+		dst->mPreca2_PaddedAligned.x = boxToModelR->column0.z;
+		dst->mPreca2_PaddedAligned.y = boxToModelR->column1.x;
+		dst->mPreca2_PaddedAligned.z = boxToModelR->column2.y;
+
+		// Epsilon value prevents floating-point inaccuracies (strategy borrowed from RAPID)
+		const PxReal epsilon = 1e-6f;
+		absRot->column0.x = dst->mPreca0b_PaddedAligned.x = epsilon + fabsf(boxToModelR->column0.x);
+		absRot->column0.y = dst->mPreca1b_PaddedAligned.x = epsilon + fabsf(boxToModelR->column0.y);
+		absRot->column0.z = dst->mPreca2b_PaddedAligned.x = epsilon + fabsf(boxToModelR->column0.z);
+
+		absRot->column1.x = dst->mPreca2b_PaddedAligned.y = epsilon + fabsf(boxToModelR->column1.x);
+		absRot->column1.y = dst->mPreca0b_PaddedAligned.y = epsilon + fabsf(boxToModelR->column1.y);
+		absRot->column1.z = dst->mPreca1b_PaddedAligned.y = epsilon + fabsf(boxToModelR->column1.z);
+
+		absRot->column2.x = dst->mPreca1b_PaddedAligned.z = epsilon + fabsf(boxToModelR->column2.x);
+		absRot->column2.y = dst->mPreca2b_PaddedAligned.z = epsilon + fabsf(boxToModelR->column2.y);
+		absRot->column2.z = dst->mPreca0b_PaddedAligned.z = epsilon + fabsf(boxToModelR->column2.z);
+	}
+
+	template<class ParamsT>
+	PX_FORCE_INLINE	void setupBoxData(ParamsT* PX_RESTRICT dst, const PxVec3& extents, const PxMat33* PX_RESTRICT mAR)
+	{
+		dst->mBoxExtents_PaddedAligned = extents;
+
+		const float Ex = extents.x;
+		const float Ey = extents.y;
+		const float Ez = extents.z;
+		dst->mBB_PaddedAligned.x = Ex*mAR->column0.x + Ey*mAR->column1.x + Ez*mAR->column2.x;
+		dst->mBB_PaddedAligned.y = Ex*mAR->column0.y + Ey*mAR->column1.y + Ez*mAR->column2.y;
+		dst->mBB_PaddedAligned.z = Ex*mAR->column0.z + Ey*mAR->column1.z + Ez*mAR->column2.z;
+	}
+
+	struct OBBTestParams	// Data needed to perform the OBB-OBB overlap test
+	{
+#ifdef GU_BV4_QUANTIZED_TREE
+		BV4_ALIGN16(Vec3p	mCenterOrMinCoeff_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mExtentsOrMaxCoeff_PaddedAligned);
+#endif
+		BV4_ALIGN16(Vec3p	mTBoxToModel_PaddedAligned);		//!< Translation from obb space to model space
+		BV4_ALIGN16(Vec3p	mBB_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mBoxExtents_PaddedAligned);
+
+		BV4_ALIGN16(Vec3p	mPreca0_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mPreca1_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mPreca2_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mPreca0b_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mPreca1b_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mPreca2b_PaddedAligned);
+
+		PX_FORCE_INLINE	void	precomputeBoxData(const PxVec3& extents, const PxMat33* PX_RESTRICT box_to_model)
+		{
+			PxMat33	absRot;	//!< Absolute rotation matrix
+			precomputeData(this, &absRot, box_to_model);
+
+			setupBoxData(this, extents, &absRot);
+		}
+	};
+
+#endif // GU_BV4_BOX_OVERLAP_INTERNAL_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Internal.h
new file mode 100644
index 00000000..ed595e39
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Internal.h
@@ -0,0 +1,512 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuSweepTriangleUtils.h"
+#include "GuSweepBoxTriangle_FeatureBased.h"
+#include "GuSweepBoxTriangle_SAT.h"
+#include "GuBV4_BoxOverlap_Internal.h"
+
+// PT: for box-sweeps please refer to \\sw\physx\PhysXSDK\3.4\trunk\InternalDocumentation\GU\Sweep strategies.ppt.
+// We use:
+// - method 3 if the box is an AABB (SWEEP_AABB_IMPL is defined)
+// - method 2 if the box is an OBB (SWEEP_AABB_IMPL is undefined)
+
+#ifdef SWEEP_AABB_IMPL
+	// PT: TODO: refactor structure (TA34704)
+	struct RayParams
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		BV4_ALIGN16(Vec3p	mCenterOrMinCoeff_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mExtentsOrMaxCoeff_PaddedAligned);
+	#endif
+	#ifndef GU_BV4_USE_SLABS
+		BV4_ALIGN16(Vec3p	mData2_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mFDir_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mData_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mLocalDir_PaddedAligned);
+	#endif
+		BV4_ALIGN16(Vec3p	mOrigin_Padded);		// PT: TODO: this one could be switched to PaddedAligned & V4LoadA (TA34704)
+	};
+
+	#include "GuBV4_AABBAABBSweepTest.h"
+#else
+	#include "GuBV4_BoxBoxOverlapTest.h"
+#endif
+
+#include "GuBV4_BoxSweep_Params.h"
+
+static PX_FORCE_INLINE Vec4V multiply3x3V(const Vec4V p, const PxMat33& mat_Padded)
+{
+	const FloatV xxxV = V4GetX(p);
+	const FloatV yyyV = V4GetY(p);
+	const FloatV zzzV = V4GetZ(p);
+
+	Vec4V ResV = V4Scale(V4LoadU_Safe(&mat_Padded.column0.x), xxxV);
+	ResV = V4Add(ResV, V4Scale(V4LoadU_Safe(&mat_Padded.column1.x), yyyV));
+	ResV = V4Add(ResV, V4Scale(V4LoadU_Safe(&mat_Padded.column2.x), zzzV));
+
+	return ResV;
+}
+
+// PT: TODO: __fastcall removed to make it compile everywhere. Revisit.
+static bool /*__fastcall*/ triBoxSweep(BoxSweepParams* PX_RESTRICT params, PxU32 primIndex, bool nodeSorting=true)
+{
+	PxU32 VRef0, VRef1, VRef2;
+	getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+	const PxVec3& p0 = params->mVerts[VRef0];
+	const PxVec3& p1 = params->mVerts[VRef1];
+	const PxVec3& p2 = params->mVerts[VRef2];
+
+	// Don't bother doing the actual sweep test if the triangle is too far away
+	if(1)
+	{
+		const float dp0 = p0.dot(params->mLocalDir_Padded);
+		const float dp1 = p1.dot(params->mLocalDir_Padded);
+		const float dp2 = p2.dot(params->mLocalDir_Padded);
+
+		float TriMin = PxMin(dp0, dp1);
+		TriMin = PxMin(TriMin, dp2);
+
+		if(TriMin >= params->mOffset + params->mStabbedFace.mDistance)
+			return false;
+	}
+
+	TrianglePadded triBoxSpace;
+	const Vec4V transModelToBoxV = V4LoadU_Safe(&params->mTModelToBox_Padded.x);
+	const Vec4V v0V = V4Add(multiply3x3V(V4LoadU_Safe(&p0.x), params->mRModelToBox_Padded), transModelToBoxV);
+	V4StoreU_Safe(v0V, &triBoxSpace.verts[0].x);
+	const Vec4V v1V = V4Add(multiply3x3V(V4LoadU_Safe(&p1.x), params->mRModelToBox_Padded), transModelToBoxV);
+	V4StoreU_Safe(v1V, &triBoxSpace.verts[1].x);
+	const Vec4V v2V = V4Add(multiply3x3V(V4LoadU_Safe(&p2.x), params->mRModelToBox_Padded), transModelToBoxV);
+	V4StoreU_Safe(v2V, &triBoxSpace.verts[2].x);
+
+	float Dist;
+	if(triBoxSweepTestBoxSpace_inlined(triBoxSpace, params->mOriginalExtents_Padded, params->mOriginalDir_Padded*params->mStabbedFace.mDistance, params->mOneOverDir_Padded, 1.0f, Dist, params->mBackfaceCulling))
+	{
+		// PT: TODO: these muls & divs may not be needed at all - we just pass the unit dir/inverse dir to the sweep code. Revisit. (TA34704)
+		Dist *= params->mStabbedFace.mDistance;
+		params->mOneOverDir_Padded = params->mOneOverOriginalDir / Dist;
+		params->mStabbedFace.mDistance = Dist;
+		params->mStabbedFace.mTriangleID = primIndex;
+		// PT: TODO: revisit this (TA34704)
+		params->mP0 = triBoxSpace.verts[0];
+		params->mP1 = triBoxSpace.verts[1];
+		params->mP2 = triBoxSpace.verts[2];
+//		V4StoreU_Safe(v0V, &params->mP0.x);
+//		V4StoreU_Safe(v1V, &params->mP1.x);
+//		V4StoreU_Safe(v2V, &params->mP2.x);
+
+		if(nodeSorting)
+		{
+#ifdef SWEEP_AABB_IMPL
+	#ifndef GU_BV4_USE_SLABS
+			setupRayData(params, Dist, params->mOrigin_Padded, params->mLocalDir_PaddedAligned);
+	#endif
+#else
+			params->ShrinkOBB(Dist);
+#endif
+		}
+		return true;
+	}
+	return false;
+}
+
+namespace
+{
+class LeafFunction_BoxSweepClosest
+{
+public:
+	static PX_FORCE_INLINE void doLeafTest(BoxSweepParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			triBoxSweep(params, primIndex);
+			primIndex++;
+		}while(nbToGo--);
+	}
+};
+
+class LeafFunction_BoxSweepAny
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(BoxSweepParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(triBoxSweep(params, primIndex))
+				return 1;
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+// PT: TODO: refactor with sphere/capsule versions (TA34704)
+static PX_FORCE_INLINE bool computeImpactData(const Box& box, const PxVec3& dir, SweepHit* PX_RESTRICT hit, const BoxSweepParams* PX_RESTRICT params, bool isDoubleSided, bool meshBothSides)
+{
+	if(params->mStabbedFace.mTriangleID==PX_INVALID_U32)
+		return false;	// We didn't touch any triangle
+
+	if(hit)
+	{
+		const float t = params->mStabbedFace.mDistance;
+		hit->mTriangleID = params->mStabbedFace.mTriangleID;
+		hit->mDistance = t;
+
+		if(t==0.0f)
+		{
+			hit->mPos = PxVec3(0.0f);
+			hit->mNormal = -dir;
+		}
+		else
+		{
+			// PT: TODO: revisit/optimize/use this (TA34704)
+			const PxTriangle triInBoxSpace(params->mP0, params->mP1, params->mP2);
+			PxHitFlags outFlags = PxHitFlag::Enum(0);
+			computeBoxLocalImpact(hit->mPos, hit->mNormal, outFlags, box, params->mOriginalDir_Padded, triInBoxSpace, PxHitFlag::ePOSITION|PxHitFlag::eNORMAL, isDoubleSided, meshBothSides, t);
+		}
+	}
+	return true;
+}
+
+template<class ParamsT>
+static PX_FORCE_INLINE void setupBoxSweepParams(ParamsT* PX_RESTRICT params, const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree* PX_RESTRICT tree, const SourceMesh* PX_RESTRICT mesh, PxU32 flags)
+{
+	params->mStabbedFace.mTriangleID = PX_INVALID_U32;
+	setupParamsFlags(params, flags);
+
+	setupMeshPointersAndQuantizedCoeffs(params, mesh, tree);
+
+	prepareSweepData(localBox, localDir, maxDist, params);
+
+#ifdef SWEEP_AABB_IMPL
+	params->mOrigin_Padded = localBox.center;
+	#ifndef GU_BV4_USE_SLABS
+	params->mLocalDir_PaddedAligned = localDir;
+	setupRayData(params, maxDist, localBox.center, localDir);
+	#endif
+#endif
+}
+
+#include "GuBV4_Internal.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs.h"
+#endif
+#ifdef SWEEP_AABB_IMPL
+	#include "GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h"
+	#include "GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h"
+	#ifdef GU_BV4_USE_SLABS
+		#include "GuBV4_Slabs_KajiyaNoOrder.h"
+		#include "GuBV4_Slabs_KajiyaOrdered.h"
+	#endif
+#else
+	#include "GuBV4_ProcessStreamOrdered_OBBOBB.h"
+	#include "GuBV4_ProcessStreamNoOrder_OBBOBB.h"
+	#ifdef GU_BV4_USE_SLABS
+		#include "GuBV4_Slabs_SwizzledNoOrder.h"
+		#include "GuBV4_Slabs_SwizzledOrdered.h"
+	#endif
+#endif
+
+#ifdef SWEEP_AABB_IMPL
+Ps::IntBool Sweep_AABB_BV4(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags)
+#else
+Ps::IntBool Sweep_OBB_BV4(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags)
+#endif
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	BoxSweepParams Params;
+	setupBoxSweepParams(&Params, localBox, localDir, maxDist, &tree, mesh, flags);
+
+	if(tree.mNodes)
+	{
+#ifdef SWEEP_AABB_IMPL
+		if(Params.mEarlyExit)
+			processStreamRayNoOrder(1, LeafFunction_BoxSweepAny)(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamRayOrdered(1, LeafFunction_BoxSweepClosest)(tree.mNodes, tree.mInitData, &Params);
+#else
+		if(Params.mEarlyExit)
+			processStreamNoOrder<LeafFunction_BoxSweepAny>(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamOrdered<LeafFunction_BoxSweepClosest>(tree.mNodes, tree.mInitData, &Params);
+#endif
+	}
+	else
+		doBruteForceTests<LeafFunction_BoxSweepAny, LeafFunction_BoxSweepClosest>(mesh->getNbTriangles(), &Params);
+
+	return computeImpactData(localBox, localDir, hit, &Params, (flags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (flags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0);
+}
+
+
+
+// PT: box sweep callback version - currently not used
+
+namespace
+{
+	struct BoxSweepParamsCB : BoxSweepParams
+	{
+		// PT: these new members are only here to call computeImpactData during traversal :( 
+		// PT: TODO: most of them may not be needed
+		Box						mBoxCB;		// Box in original space (maybe not local/mesh space)
+		PxVec3					mDirCB;		// Dir in original space (maybe not local/mesh space)
+		const PxMat44*			mWorldm_Aligned;
+		PxU32					mFlags;
+
+		SweepUnlimitedCallback	mCallback;
+		void*					mUserData;
+		float					mMaxDist;
+		bool					mNodeSorting;
+	};
+
+class LeafFunction_BoxSweepCB
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(BoxSweepParamsCB* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(triBoxSweep(params, primIndex, params->mNodeSorting))
+			{
+				// PT: TODO: in this version we must compute the impact data immediately,
+				// which is a terrible idea in general, but I'm not sure what else I can do.
+				SweepHit hit;
+				const bool b = computeImpactData(params->mBoxCB, params->mDirCB, &hit, params, (params->mFlags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (params->mFlags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0);
+				PX_ASSERT(b);
+
+				// PT: then replicate part from BV4_BoxSweepSingle:
+				if(b && params->mWorldm_Aligned)
+				{
+					// Move to world space
+					// PT: TODO: optimize (TA34704)
+					hit.mPos = params->mWorldm_Aligned->transform(hit.mPos);
+					hit.mNormal = params->mWorldm_Aligned->rotate(hit.mNormal);
+				}
+
+				reportUnlimitedCallbackHit(params, hit);
+			}
+
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+
+}
+
+// PT: for design decisions in this function, refer to the comments of BV4_GenericSweepCB().
+// PT: 'worldm_Aligned' is only here to move back results to world space, but input is already in local space.
+#ifdef SWEEP_AABB_IMPL
+void Sweep_AABB_BV4_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting)
+#else
+void Sweep_OBB_BV4_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting)
+#endif
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	BoxSweepParamsCB Params;
+	Params.mBoxCB			= localBox;
+	Params.mDirCB			= localDir;
+	Params.mWorldm_Aligned	= worldm_Aligned;
+	Params.mFlags			= flags;
+
+	Params.mCallback		= callback;
+	Params.mUserData		= userData;
+	Params.mMaxDist			= maxDist;
+	Params.mNodeSorting		= nodeSorting;
+	setupBoxSweepParams(&Params, localBox, localDir, maxDist, &tree, mesh, flags);
+
+	PX_ASSERT(!Params.mEarlyExit);
+
+	if(tree.mNodes)
+	{
+		if(nodeSorting)
+		{
+#ifdef SWEEP_AABB_IMPL
+			processStreamRayOrdered(1, LeafFunction_BoxSweepCB)(tree.mNodes, tree.mInitData, &Params);
+#else
+			processStreamOrdered<LeafFunction_BoxSweepCB>(tree.mNodes, tree.mInitData, &Params);
+#endif
+		}
+		else
+		{
+#ifdef SWEEP_AABB_IMPL
+			processStreamRayNoOrder(1, LeafFunction_BoxSweepCB)(tree.mNodes, tree.mInitData, &Params);
+#else
+			processStreamNoOrder<LeafFunction_BoxSweepCB>(tree.mNodes, tree.mInitData, &Params);
+#endif
+		}
+	}
+	else
+		doBruteForceTests<LeafFunction_BoxSweepCB, LeafFunction_BoxSweepCB>(mesh->getNbTriangles(), &Params);
+}
+
+
+
+
+// New callback-based box sweeps. Reuses code above, allow early exits. Some init code may be done in vain
+// since the leaf tests are not performed (we don't do box-sweeps-vs-tri since the box is only a BV around
+// the actual shape, say a convex)
+
+namespace
+{
+struct GenericSweepParamsCB : BoxSweepParams
+{
+	MeshSweepCallback	mCallback;
+	void*				mUserData;
+};
+
+class LeafFunction_BoxSweepClosestCB
+{
+public:
+	static PX_FORCE_INLINE void doLeafTest(GenericSweepParamsCB* PX_RESTRICT params, PxU32 prim_index)
+	{
+		PxU32 nbToGo = getNbPrimitives(prim_index);
+		do
+		{
+			// PT: in the regular version we'd do a box-vs-triangle sweep test here
+			// Instead we just grab the triangle and send it to the callback
+			//
+			// This can be used for regular "closest hit" sweeps, when the scale is not identity or
+			// when the box is just around a more complex shape (e.g. convex). In this case we want
+			// the calling code to compute a convex-triangle distance, and then we want to shrink
+			// the ray/box while doing an ordered traversal.
+			//
+			// For "sweep all" or "sweep any" purposes we want to either report all hits or early exit
+			// as soon as we find one. There is no need for shrinking or ordered traversals here.
+
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, prim_index, params->mTris32, params->mTris16);
+
+			const PxVec3& p0 = params->mVerts[VRef0];
+			const PxVec3& p1 = params->mVerts[VRef1];
+			const PxVec3& p2 = params->mVerts[VRef2];
+
+			// Don't bother doing the actual sweep test if the triangle is too far away
+			const float dp0 = p0.dot(params->mLocalDir_Padded);
+			const float dp1 = p1.dot(params->mLocalDir_Padded);
+			const float dp2 = p2.dot(params->mLocalDir_Padded);
+
+			float TriMin = PxMin(dp0, dp1);
+			TriMin = PxMin(TriMin, dp2);
+
+			if(TriMin < params->mOffset + params->mStabbedFace.mDistance)
+			{
+//				const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 };
+				float Dist = params->mStabbedFace.mDistance;
+				if((params->mCallback)(params->mUserData, p0, p1, p2, prim_index, /*vrefs,*/ Dist))
+					return;	// PT: TODO: we return here but the ordered path doesn't really support early exits (TA34704)
+
+				if(Dist<params->mStabbedFace.mDistance)
+				{
+					params->mStabbedFace.mDistance = Dist;
+					params->mStabbedFace.mTriangleID = prim_index;
+#ifdef SWEEP_AABB_IMPL
+	#ifndef GU_BV4_USE_SLABS
+					setupRayData(params, Dist, params->mOrigin_Padded, params->mLocalDir_PaddedAligned);
+	#endif
+#else
+					params->ShrinkOBB(Dist);
+#endif
+				}
+			}
+
+			prim_index++;
+		}while(nbToGo--);
+	}
+};
+
+class LeafFunction_BoxSweepAnyCB
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(GenericSweepParamsCB* PX_RESTRICT params, PxU32 prim_index)
+	{
+		PxU32 nbToGo = getNbPrimitives(prim_index);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, prim_index, params->mTris32, params->mTris16);
+
+			const PxVec3& p0 = params->mVerts[VRef0];
+			const PxVec3& p1 = params->mVerts[VRef1];
+			const PxVec3& p2 = params->mVerts[VRef2];
+
+			{
+//				const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 };
+				float Dist = params->mStabbedFace.mDistance;
+				if((params->mCallback)(params->mUserData, p0, p1, p2, prim_index, /*vrefs,*/ Dist))
+					return 1;
+			}
+
+			prim_index++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+#ifdef SWEEP_AABB_IMPL
+void GenericSweep_AABB_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, PxU32 flags)
+#else
+void GenericSweep_OBB_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, PxU32 flags)
+#endif
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	GenericSweepParamsCB Params;
+	Params.mCallback	= callback;
+	Params.mUserData	= userData;
+	setupBoxSweepParams(&Params, localBox, localDir, maxDist, &tree, mesh, flags);
+
+	if(tree.mNodes)
+	{
+#ifdef SWEEP_AABB_IMPL
+		if(Params.mEarlyExit)
+			processStreamRayNoOrder(1, LeafFunction_BoxSweepAnyCB)(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamRayOrdered(1, LeafFunction_BoxSweepClosestCB)(tree.mNodes, tree.mInitData, &Params);
+#else
+		if(Params.mEarlyExit)
+			processStreamNoOrder<LeafFunction_BoxSweepAnyCB>(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamOrdered<LeafFunction_BoxSweepClosestCB>(tree.mNodes, tree.mInitData, &Params);
+#endif
+	}
+	else
+		doBruteForceTests<LeafFunction_BoxSweepAnyCB, LeafFunction_BoxSweepClosestCB>(mesh->getNbTriangles(), &Params);
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Params.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Params.h
new file mode 100644
index 00000000..6869783f
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_BoxSweep_Params.h
@@ -0,0 +1,211 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+// This is used by the box-sweep & capsule-sweep code
+
+#if PX_VC 
+	#pragma warning(disable: 4505)	// unreferenced local function has been removed
+#endif
+
+#include "PsBasicTemplates.h"
+
+namespace
+{
+#ifdef SWEEP_AABB_IMPL
+struct BoxSweepParams : RayParams
+#else
+struct BoxSweepParams : OBBTestParams
+#endif
+{
+	const IndTri32*	PX_RESTRICT	mTris32;
+	const IndTri16*	PX_RESTRICT	mTris16;
+	const PxVec3*	PX_RESTRICT	mVerts;
+
+#ifndef SWEEP_AABB_IMPL
+	Box					mLocalBox;
+#endif
+	PxVec3				mLocalDir_Padded;
+	RaycastHitInternal	mStabbedFace;
+
+	PxU32				mBackfaceCulling;
+	PxU32				mEarlyExit;
+
+	PxVec3				mP0, mP1, mP2;
+	PxVec3				mBestTriNormal;
+
+	float				mOffset;
+	PxVec3				mProj;
+	PxVec3				mDP;
+
+#ifndef SWEEP_AABB_IMPL
+	PxMat33				mAR;				//!< Absolute rotation matrix
+#endif
+
+	PxMat33				mRModelToBox_Padded;	//!< Rotation from model space to obb space
+	PxVec3				mTModelToBox_Padded;	//!< Translation from model space to obb space
+	PxVec3				mOriginalExtents_Padded;
+	PxVec3				mOriginalDir_Padded;
+	PxVec3				mOneOverDir_Padded;
+	PxVec3				mOneOverOriginalDir;
+
+#ifndef SWEEP_AABB_IMPL
+	PX_FORCE_INLINE void ShrinkOBB(float d)
+	{
+		const PxVec3 BoxExtents = mDP + d * mProj;
+		mTBoxToModel_PaddedAligned = mLocalBox.center + mLocalDir_Padded*d*0.5f;
+
+		setupBoxData(this, BoxExtents, &mAR);
+	}
+#endif
+};
+}
+
+// PT: TODO: check asm again in PhysX version, compare to original (TA34704)
+static void prepareSweepData(const Box& box, const PxVec3& dir, float maxDist, BoxSweepParams* PX_RESTRICT params)
+{
+	invertBoxMatrix(params->mRModelToBox_Padded, params->mTModelToBox_Padded, box);
+
+	params->mOriginalExtents_Padded = box.extents;
+
+	const PxVec3 OriginalDir = params->mRModelToBox_Padded.transform(dir);
+	params->mOriginalDir_Padded = OriginalDir;
+
+	const PxVec3 OneOverOriginalDir(OriginalDir.x!=0.0f ? 1.0f/OriginalDir.x : 0.0f,
+									OriginalDir.y!=0.0f ? 1.0f/OriginalDir.y : 0.0f,
+									OriginalDir.z!=0.0f ? 1.0f/OriginalDir.z : 0.0f);
+
+	params->mOneOverOriginalDir = OneOverOriginalDir;
+	params->mOneOverDir_Padded = OneOverOriginalDir / maxDist;
+
+	{
+		const Box& LocalBox = box;
+		const PxVec3& LocalDir = dir;
+
+		params->mLocalDir_Padded		= LocalDir;
+		params->mStabbedFace.mDistance	= maxDist;
+#ifndef SWEEP_AABB_IMPL
+		params->mLocalBox				= LocalBox;	// PT: TODO: check asm for operator=
+#endif
+
+		PxMat33 boxToModelR;
+
+		// Original code:
+		// OBB::CreateOBB(LocalBox, LocalDir, 0.5f)
+		{
+			PxVec3 R1, R2;
+			{
+				float dd[3];
+				dd[0] = fabsf(LocalBox.rot.column0.dot(LocalDir));
+				dd[1] = fabsf(LocalBox.rot.column1.dot(LocalDir));
+				dd[2] = fabsf(LocalBox.rot.column2.dot(LocalDir));
+				float dmax = dd[0];
+				PxU32 ax0=1;
+				PxU32 ax1=2;
+				if(dd[1]>dmax)
+				{
+					dmax=dd[1];
+					ax0=0;
+					ax1=2;
+				}
+				if(dd[2]>dmax)
+				{
+					dmax=dd[2];
+					ax0=0;
+					ax1=1;
+				}
+				if(dd[ax1]<dd[ax0])
+					Ps::swap(ax0, ax1);
+
+				R1 = LocalBox.rot[ax0];
+				R1 -= R1.dot(LocalDir)*LocalDir;	// Project to plane whose normal is dir
+				R1.normalize();
+				R2 = LocalDir.cross(R1);
+			}
+			// Original code:
+			// mRot = params->mRBoxToModel
+			boxToModelR.column0 = LocalDir;
+			boxToModelR.column1 = R1;
+			boxToModelR.column2 = R2;
+
+			// Original code:
+			// float Offset[3];
+			// 0.5f comes from the Offset[r]*0.5f, doesn't mean 'd' is 0.5f
+			params->mProj.x = 0.5f;
+			params->mProj.y = LocalDir.dot(R1)*0.5f;
+			params->mProj.z = LocalDir.dot(R2)*0.5f;
+
+			// Original code:
+			//mExtents[r] = Offset[r]*0.5f + fabsf(box.mRot[0]|R)*box.mExtents.x + fabsf(box.mRot[1]|R)*box.mExtents.y + fabsf(box.mRot[2]|R)*box.mExtents.z;
+			// => we store the first part of the computation, minus 'Offset[r]*0.5f'
+			for(PxU32 r=0;r<3;r++)
+			{
+				const PxVec3& R = boxToModelR[r];
+				params->mDP[r] =	fabsf(LocalBox.rot.column0.dot(R)*LocalBox.extents.x)
+								+	fabsf(LocalBox.rot.column1.dot(R)*LocalBox.extents.y)
+								+	fabsf(LocalBox.rot.column2.dot(R)*LocalBox.extents.z);
+			}
+			// In the original code, both mCenter & mExtents depend on 'd', and thus we will need to recompute these two members.
+			//
+			// For mExtents we have:
+			// 
+			//	float Offset[3];
+			//	Offset[0] = d;
+			//	Offset[1] = d*(dir|R1);
+			//	Offset[2] = d*(dir|R2);
+			//
+			//		mExtents[r] = Offset[r]*0.5f + fabsf(box.mRot[0]|R)*box.mExtents.x + fabsf(box.mRot[1]|R)*box.mExtents.y + fabsf(box.mRot[2]|R)*box.mExtents.z;
+			// <=>	mExtents[r] = Offset[r]*0.5f + Params.mDP[r];		We precompute the second part that doesn't depend on d, stored in mDP
+			// <=>	mExtents[r] = Params.mProj[r]*d + Params.mDP[r];	We extract d from the first part, store what is left in mProj
+			//
+			// Thus in ShrinkOBB the code needed to update the extents is just:
+			//	mBoxExtents = mDP + d * mProj;
+			//
+			// For mCenter we have:
+			//
+			//	mCenter = box.mCenter + dir*d*0.5f;
+			//
+			// So we simply use this formula directly, with the new d. Result is stored in 'mTBoxToModel'
+/*
+			PX_FORCE_INLINE void ShrinkOBB(float d)
+			{
+				mBoxExtents = mDP + d * mProj;
+				mTBoxToModel = mLocalBox.mCenter + mLocalDir*d*0.5f;
+*/
+		}
+
+		// This one is for culling tris, unrelated to CreateOBB
+		params->mOffset = params->mDP.x + LocalBox.center.dot(LocalDir);
+
+#ifndef SWEEP_AABB_IMPL
+		precomputeData(params, &params->mAR, &boxToModelR);
+
+		params->ShrinkOBB(maxDist);
+#endif
+	}
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep.cpp
new file mode 100644
index 00000000..c8d8a5c2
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep.cpp
@@ -0,0 +1,173 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuBV4.h"
+#include "GuSweepSphereTriangle.h"
+using namespace physx;
+using namespace Gu;
+
+#if PX_INTEL_FAMILY
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#include "GuSIMDHelpers.h"
+#include "GuInternal.h"
+
+#include "GuBV4_BoxOverlap_Internal.h"
+#include "GuBV4_BoxSweep_Params.h"
+
+namespace
+{
+	struct CapsuleSweepParams : BoxSweepParams
+	{
+		Capsule	mLocalCapsule;
+		PxVec3	mCapsuleCenter;
+		PxVec3	mExtrusionDir;
+		PxU32	mEarlyExit;
+		float	mBestAlignmentValue;
+		float	mBestDistance;
+		float	mMaxDist;
+	};
+}
+
+#include "GuBV4_CapsuleSweep_Internal.h"
+#include "GuBV4_Internal.h"
+
+#include "GuBV4_BoxBoxOverlapTest.h"
+
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs.h"
+#endif
+#include "GuBV4_ProcessStreamOrdered_OBBOBB.h"
+#include "GuBV4_ProcessStreamNoOrder_OBBOBB.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs_SwizzledNoOrder.h"
+	#include "GuBV4_Slabs_SwizzledOrdered.h"
+#endif
+
+Ps::IntBool BV4_CapsuleSweepSingle(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	CapsuleSweepParams Params;
+	setupCapsuleParams(&Params, capsule, dir, maxDist, &tree, mesh, flags);
+
+	if(tree.mNodes)
+	{
+		if(Params.mEarlyExit)
+			processStreamNoOrder<LeafFunction_CapsuleSweepAny>(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamOrdered<LeafFunction_CapsuleSweepClosest>(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+		doBruteForceTests<LeafFunction_CapsuleSweepAny, LeafFunction_CapsuleSweepClosest>(mesh->getNbTriangles(), &Params);
+
+	return computeImpactDataT<ImpactFunctionCapsule>(capsule, dir, hit, &Params, NULL, (flags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (flags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0);
+}
+
+// PT: capsule sweep callback version - currently not used
+
+namespace
+{
+	struct CapsuleSweepParamsCB : CapsuleSweepParams
+	{
+		// PT: these new members are only here to call computeImpactDataT during traversal :( 
+		// PT: TODO: most of them may not be needed
+		// PT: TODO: for example mCapsuleCB probably dup of mLocalCapsule
+		Capsule					mCapsuleCB;		// Capsule in original space (maybe not local/mesh space)
+		PxVec3					mDirCB;			// Dir in original space (maybe not local/mesh space)
+		const PxMat44*			mWorldm_Aligned;
+		PxU32					mFlags;
+
+		SweepUnlimitedCallback	mCallback;
+		void*					mUserData;
+		float					mMaxDist;
+		bool					mNodeSorting;
+	};
+
+class LeafFunction_CapsuleSweepCB
+{
+public:
+
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(CapsuleSweepParamsCB* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(triCapsuleSweep(params, primIndex, params->mNodeSorting))
+			{
+				// PT: TODO: in this version we must compute the impact data immediately,
+				// which is a terrible idea in general, but I'm not sure what else I can do.
+				SweepHit hit;
+				const bool b = computeImpactDataT<ImpactFunctionCapsule>(params->mCapsuleCB, params->mDirCB, &hit, params, params->mWorldm_Aligned, (params->mFlags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (params->mFlags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0);
+				PX_ASSERT(b);
+				PX_UNUSED(b);
+
+				reportUnlimitedCallbackHit(params, hit);
+			}
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+// PT: for design decisions in this function, refer to the comments of BV4_GenericSweepCB().
+void BV4_CapsuleSweepCB(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	CapsuleSweepParamsCB Params;
+	Params.mCapsuleCB		= capsule;
+	Params.mDirCB			= dir;
+	Params.mWorldm_Aligned	= worldm_Aligned;
+	Params.mFlags			= flags;
+
+	Params.mCallback		= callback;
+	Params.mUserData		= userData;
+	Params.mMaxDist			= maxDist;
+	Params.mNodeSorting		= nodeSorting;
+	setupCapsuleParams(&Params, capsule, dir, maxDist, &tree, mesh, flags);
+
+	PX_ASSERT(!Params.mEarlyExit);
+
+	if(tree.mNodes)
+	{
+		if(nodeSorting)
+			processStreamOrdered<LeafFunction_CapsuleSweepCB>(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamNoOrder<LeafFunction_CapsuleSweepCB>(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+		doBruteForceTests<LeafFunction_CapsuleSweepCB, LeafFunction_CapsuleSweepCB>(mesh->getNbTriangles(), &Params);
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweepAA.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweepAA.cpp
new file mode 100644
index 00000000..1fd6aa05
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweepAA.cpp
@@ -0,0 +1,111 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuBV4.h"
+#include "GuSweepSphereTriangle.h"
+using namespace physx;
+using namespace Gu;
+
+#if PX_INTEL_FAMILY
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#include "GuBV4_Common.h"
+#include "GuInternal.h"
+
+#define SWEEP_AABB_IMPL
+
+	// PT: TODO: refactor structure (TA34704)
+	struct RayParams
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		BV4_ALIGN16(Vec3p	mCenterOrMinCoeff_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mExtentsOrMaxCoeff_PaddedAligned);
+	#endif
+	#ifndef GU_BV4_USE_SLABS
+		BV4_ALIGN16(Vec3p	mData2_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mFDir_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mData_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mLocalDir_PaddedAligned);
+	#endif
+		BV4_ALIGN16(Vec3p	mOrigin_Padded);		// PT: TODO: this one could be switched to PaddedAligned & V4LoadA (TA34704)
+	};
+
+#include "GuBV4_BoxSweep_Params.h"
+
+namespace
+{
+	struct CapsuleSweepParams : BoxSweepParams
+	{
+		Capsule	mLocalCapsule;
+		PxVec3	mCapsuleCenter;
+		PxVec3	mExtrusionDir;
+		PxU32	mEarlyExit;
+		float	mBestAlignmentValue;
+		float	mBestDistance;
+		float	mMaxDist;
+	};
+}
+
+#include "GuBV4_CapsuleSweep_Internal.h"
+#include "GuBV4_Internal.h"
+
+#include "GuBV4_AABBAABBSweepTest.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs.h"
+#endif
+#include "GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h"
+#include "GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs_KajiyaNoOrder.h"
+	#include "GuBV4_Slabs_KajiyaOrdered.h"
+#endif
+
+Ps::IntBool BV4_CapsuleSweepSingleAA(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	CapsuleSweepParams Params;
+	setupCapsuleParams(&Params, capsule, dir, maxDist, &tree, mesh, flags);
+
+	if(tree.mNodes)
+	{
+		if(Params.mEarlyExit)
+			processStreamRayNoOrder(1, LeafFunction_CapsuleSweepAny)(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamRayOrdered(1, LeafFunction_CapsuleSweepClosest)(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+		doBruteForceTests<LeafFunction_CapsuleSweepAny, LeafFunction_CapsuleSweepClosest>(mesh->getNbTriangles(), &Params);
+
+	return computeImpactDataT<ImpactFunctionCapsule>(capsule, dir, hit, &Params, NULL, (flags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (flags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0);
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep_Internal.h
new file mode 100644
index 00000000..260ba0af
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_CapsuleSweep_Internal.h
@@ -0,0 +1,434 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_CAPSULE_SWEEP_INTERNAL_H
+#define GU_BV4_CAPSULE_SWEEP_INTERNAL_H
+
+// PT: for capsule-sweeps please refer to \\sw\physx\PhysXSDK\3.4\trunk\InternalDocumentation\GU\Sweep strategies.ppt.
+// We use:
+// - method 3 if the capsule is axis-aligned (SWEEP_AABB_IMPL is defined)
+// - method 2 otherwise (SWEEP_AABB_IMPL is undefined)
+
+// PT: TODO: get rid of that one
+static PX_FORCE_INLINE bool sweepSphereVSTriangle(	const PxVec3& center, const float radius,
+													const PxVec3* PX_RESTRICT triVerts, const PxVec3& triUnitNormal,
+													const PxVec3& unitDir,
+													float& curT, bool& directHit)
+{
+	float currentDistance;
+	if(!sweepSphereVSTri(triVerts, triUnitNormal, center, radius, unitDir, currentDistance, directHit, true))
+		return false;
+
+	// PT: using ">" or ">=" is enough to block the CCT or not in the DE5967 visual test. Change to ">=" if a repro is needed.
+	if(currentDistance > curT)
+		return false;
+	curT = currentDistance;
+	return true;
+}
+
+static PX_FORCE_INLINE bool sweepSphereVSQuad(	const PxVec3& center, const float radius,
+												const PxVec3* PX_RESTRICT quadVerts, const PxVec3& quadUnitNormal,
+												const PxVec3& unitDir,
+												float& curT)
+{
+	float currentDistance;
+	if(!sweepSphereVSQuad(quadVerts, quadUnitNormal, center, radius, unitDir, currentDistance))
+		return false;
+
+	// PT: using ">" or ">=" is enough to block the CCT or not in the DE5967 visual test. Change to ">=" if a repro is needed.
+	if(currentDistance > curT)
+		return false;
+	curT = currentDistance;
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+// PT: TODO: __fastcall removed to make it compile everywhere. Revisit.
+static bool /*__fastcall*/ testTri(	const CapsuleSweepParams* PX_RESTRICT params, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, const PxVec3& N,
+								const PxVec3& unitDir, const float capsuleRadius, const float dpc0, float& curT, bool& status)
+{
+	// PT: TODO: check the assembly here (TA34704)
+	PxVec3 currentTri[3];
+	// PT: TODO: optimize this copy (TA34704)
+	currentTri[0] = p0;
+	currentTri[1] = p1;
+	currentTri[2] = p2;
+
+	// PT: beware, culling is only ok on the sphere I think
+	if(rejectTriangle(params->mCapsuleCenter, unitDir, curT, capsuleRadius, currentTri, dpc0))
+		return false;
+
+	float magnitude = N.magnitude();
+	if(magnitude==0.0f)
+		return false;
+
+	PxVec3 triNormal = N / magnitude;
+
+	bool DirectHit;
+	if(sweepSphereVSTriangle(params->mCapsuleCenter, capsuleRadius, currentTri, triNormal, unitDir, curT, DirectHit))
+	{
+		status = true;
+	}
+	return DirectHit;
+}
+
+// PT: TODO: __fastcall removed to make it compile everywhere. Revisit.
+static void /*__fastcall*/ testQuad(const CapsuleSweepParams* PX_RESTRICT params, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, const PxVec3& p3, const PxVec3& N,
+								const PxVec3& unitDir, const float capsuleRadius, const float dpc0, float& curT, bool& status)
+{
+	// PT: TODO: optimize this copy (TA34704)
+	PxVec3 currentQuad[4];
+	currentQuad[0] = p0;
+	currentQuad[1] = p1;
+	currentQuad[2] = p2;
+	currentQuad[3] = p3;
+
+	// PT: beware, culling is only ok on the sphere I think
+	if(rejectQuad(params->mCapsuleCenter, unitDir, curT, capsuleRadius, currentQuad, dpc0))
+		return;
+
+	float magnitude = N.magnitude();
+	if(magnitude==0.0f)
+		return;
+
+	PxVec3 triNormal = N / magnitude;
+
+	if(sweepSphereVSQuad(params->mCapsuleCenter, capsuleRadius, currentQuad, triNormal, unitDir, curT))
+	{
+		status = true;
+	}
+}
+
+static PX_FORCE_INLINE float Set2(const PxVec3& p0, const PxVec3& n, const PxVec3& p)
+{
+	return (p-p0).dot(n);
+}
+
+static PX_FORCE_INLINE bool sweepCapsuleVsTriangle(const CapsuleSweepParams* PX_RESTRICT params, const PxTriangle& triangle, float& t, bool isDoubleSided, PxVec3& normal)
+{
+	const PxVec3& unitDir = params->mLocalDir_Padded;
+
+	// Create triangle normal
+	PxVec3 denormalizedNormal = (triangle.verts[0] - triangle.verts[1]).cross(triangle.verts[0] - triangle.verts[2]);
+
+	normal = denormalizedNormal;
+
+	// Backface culling
+	const bool culled = denormalizedNormal.dot(unitDir) > 0.0f;
+	if(culled)
+	{
+		if(!isDoubleSided)
+			return false;
+
+		denormalizedNormal = -denormalizedNormal;
+	}
+
+	const float capsuleRadius = params->mLocalCapsule.radius;
+	float curT = params->mStabbedFace.mDistance;
+	const float dpc0 = params->mCapsuleCenter.dot(unitDir);
+
+	bool status = false;
+
+	// Extrude mesh on the fly
+	const PxVec3 p0 = triangle.verts[0] - params->mExtrusionDir;
+	const PxVec3 p1 = triangle.verts[1+culled] - params->mExtrusionDir;
+	const PxVec3 p2 = triangle.verts[2-culled] - params->mExtrusionDir;
+
+	const PxVec3 p0b = triangle.verts[0] + params->mExtrusionDir;
+	const PxVec3 p1b = triangle.verts[1+culled] + params->mExtrusionDir;
+	const PxVec3 p2b = triangle.verts[2-culled] + params->mExtrusionDir;
+
+	const float extrusionSign = denormalizedNormal.dot(params->mExtrusionDir);
+
+	const PxVec3 p2b_p1b = p2b - p1b;
+	const PxVec3 p0b_p1b = p0b - p1b;
+	const PxVec3 p2b_p2 = 2.0f * params->mExtrusionDir;
+	const PxVec3 p1_p1b = -p2b_p2;
+
+	const PxVec3 N1 = p2b_p1b.cross(p0b_p1b);
+	const float dp0 = Set2(p0b, N1, params->mCapsuleCenter);
+
+	const PxVec3 N2 = (p2 - p1).cross(p0 - p1);
+	const float dp1 = -Set2(p0, N2, params->mCapsuleCenter);
+
+	bool directHit;
+	if(extrusionSign >= 0.0f)
+		directHit = testTri(params, p0b, p1b, p2b, N1, unitDir, capsuleRadius, dpc0, curT, status);
+	else
+		directHit = testTri(params, p0, p1, p2, N2, unitDir, capsuleRadius, dpc0, curT, status);
+
+	const PxVec3 N3 = p2b_p1b.cross(p1_p1b);
+	const float dp2 = -Set2(p1, N3, params->mCapsuleCenter);
+	if(!directHit)
+	{
+		const float dp = N3.dot(unitDir);
+		if(dp*extrusionSign>=0.0f)
+			testQuad(params, p1, p1b, p2, p2b, N3, unitDir, capsuleRadius, dpc0, curT, status);
+	}
+
+	const PxVec3 N5 = p2b_p2.cross(p0 - p2);
+	const float dp3 = -Set2(p0, N5, params->mCapsuleCenter);
+	if(!directHit)
+	{
+		const float dp = N5.dot(unitDir);
+		if(dp*extrusionSign>=0.0f)
+			testQuad(params, p2, p2b, p0, p0b, N5, unitDir, capsuleRadius, dpc0, curT, status);
+	}
+
+	const PxVec3 N7 = p1_p1b.cross(p0b_p1b);
+	const float dp4 = -Set2(p0b, N7, params->mCapsuleCenter);
+	if(!directHit)
+	{
+		const float dp = N7.dot(unitDir);
+		if(dp*extrusionSign>=0.0f)
+			testQuad(params, p0, p0b, p1, p1b, N7, unitDir, capsuleRadius, dpc0, curT, status);
+	}
+
+	if(1)
+	{
+		bool originInside = true;
+		if(extrusionSign<0.0f)
+		{
+			if(dp0<0.0f || dp1<0.0f || dp2<0.0f || dp3<0.0f || dp4<0.0f)
+				originInside = false;
+		}
+		else
+		{
+			if(dp0>0.0f || dp1>0.0f || dp2>0.0f || dp3>0.0f || dp4>0.0f)
+				originInside = false;
+		}
+		if(originInside)
+		{
+			t = 0.0f;
+			return true;
+		}
+	}
+
+	if(!status)
+		return false;	// We didn't touch any triangle
+
+	t = curT;
+
+	return true;
+}
+
+// PT: TODO: __fastcall removed to make it compile everywhere. Revisit.
+static bool /*__fastcall*/ triCapsuleSweep(CapsuleSweepParams* PX_RESTRICT params, PxU32 primIndex, bool nodeSorting=true)
+{
+	PxU32 VRef0, VRef1, VRef2;
+	getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+	const PxVec3& p0 = params->mVerts[VRef0];
+	const PxVec3& p1 = params->mVerts[VRef1];
+	const PxVec3& p2 = params->mVerts[VRef2];
+
+	const PxTriangle Tri(p0, p1, p2);	// PT: TODO: check calls to empty ctor/dtor here (TA34704)
+
+	const bool isDoubleSided = params->mBackfaceCulling==0;
+
+	float Dist;
+	PxVec3 denormalizedNormal;
+	if(sweepCapsuleVsTriangle(params, Tri, Dist, isDoubleSided, denormalizedNormal))
+	{
+		const PxReal distEpsilon = GU_EPSILON_SAME_DISTANCE; // pick a farther hit within distEpsilon that is more opposing than the previous closest hit
+		const PxReal alignmentValue = computeAlignmentValue(denormalizedNormal, params->mLocalDir_Padded);
+
+		if(keepTriangle(Dist, alignmentValue, params->mBestDistance, params->mBestAlignmentValue, params->mMaxDist, distEpsilon))		
+		{
+			params->mStabbedFace.mDistance = Dist;
+			params->mStabbedFace.mTriangleID = primIndex;
+
+			params->mP0 = p0;
+			params->mP1 = p1;
+			params->mP2 = p2;
+
+			params->mBestDistance = PxMin(params->mBestDistance, Dist); // exact lower bound
+			params->mBestAlignmentValue = alignmentValue;
+			params->mBestTriNormal = denormalizedNormal;
+
+			if(nodeSorting)
+			{
+#ifdef SWEEP_AABB_IMPL
+	#ifndef GU_BV4_USE_SLABS
+				setupRayData(params, Dist, params->mOrigin_Padded, params->mLocalDir_PaddedAligned);
+	#endif
+#else
+				params->ShrinkOBB(Dist);
+#endif
+			}
+			return true;
+		}
+	}
+	return false;
+}
+
+#include "GuDistanceSegmentTriangleSIMD.h"
+
+namespace
+{
+class LeafFunction_CapsuleSweepClosest
+{
+public:
+	static PX_FORCE_INLINE void doLeafTest(CapsuleSweepParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			triCapsuleSweep(params, primIndex);
+			primIndex++;
+		}while(nbToGo--);
+	}
+};
+
+class LeafFunction_CapsuleSweepAny
+{
+public:
+
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(CapsuleSweepParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(triCapsuleSweep(params, primIndex))
+				return 1;
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+
+class ImpactFunctionCapsule
+{
+public:
+	static PX_FORCE_INLINE void computeImpact(PxVec3& impactPos, PxVec3& impactNormal, const Capsule& capsule, const PxVec3& dir, const PxReal t, const TrianglePadded& triangle)
+	{
+		const PxVec3 delta = dir * t;
+		const Vec3p P0 = capsule.p0 + delta;
+		const Vec3p P1 = capsule.p1 + delta;
+		Vec3V pointOnSeg, pointOnTri;
+		distanceSegmentTriangleSquared(
+			// PT: we use Vec3p so it is safe to V4LoadU P0 and P1
+			V3LoadU_SafeReadW(P0), V3LoadU_SafeReadW(P1),
+			// PT: we use TrianglePadded so it is safe to V4LoadU the triangle vertices
+			V3LoadU_SafeReadW(triangle.verts[0]), V3LoadU_SafeReadW(triangle.verts[1]), V3LoadU_SafeReadW(triangle.verts[2]),
+			pointOnSeg, pointOnTri);
+
+		PxVec3 localImpactPos, tmp;
+		V3StoreU(pointOnTri, localImpactPos);
+		V3StoreU(pointOnSeg, tmp);
+
+		// PT: TODO: refactor with computeSphereTriImpactData (TA34704)
+		PxVec3 localImpactNormal = tmp - localImpactPos;
+		const float M = localImpactNormal.magnitude();
+		if(M<1e-3f)
+		{
+			localImpactNormal = (triangle.verts[0] - triangle.verts[1]).cross(triangle.verts[0] - triangle.verts[2]);
+			localImpactNormal.normalize();
+		}
+		else
+			localImpactNormal /= M;
+
+		impactPos = localImpactPos;
+		impactNormal = localImpactNormal;
+	}
+};
+}
+
+static void computeBoxAroundCapsule(const Capsule& capsule, Box& box, PxVec3& extrusionDir)
+{
+	// Box center = center of the two capsule's endpoints
+	box.center = capsule.computeCenter();
+
+	extrusionDir = (capsule.p0 - capsule.p1)*0.5f;
+	const PxF32 d = extrusionDir.magnitude();
+
+	// Box extents
+	box.extents.x = capsule.radius + d;
+	box.extents.y = capsule.radius;
+	box.extents.z = capsule.radius;
+
+	// Box orientation
+	if(d==0.0f)
+	{
+		box.rot = PxMat33(PxIdentity);
+	}
+	else
+	{
+		PxVec3 dir, right, up;
+		Ps::computeBasis(capsule.p0, capsule.p1, dir, right, up);
+		box.setAxes(dir, right, up);
+	}
+}
+
+template<class ParamsT>
+static PX_FORCE_INLINE void setupCapsuleParams(ParamsT* PX_RESTRICT params, const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree* PX_RESTRICT tree, const SourceMesh* PX_RESTRICT mesh, PxU32 flags)
+{
+	params->mStabbedFace.mTriangleID = PX_INVALID_U32;
+	params->mBestAlignmentValue = 2.0f;
+	params->mBestDistance = maxDist + GU_EPSILON_SAME_DISTANCE;
+	params->mMaxDist = maxDist;
+
+	setupParamsFlags(params, flags);
+
+	setupMeshPointersAndQuantizedCoeffs(params, mesh, tree);
+
+	params->mLocalCapsule = capsule;
+
+	Box localBox;
+	computeBoxAroundCapsule(capsule, localBox, params->mExtrusionDir);
+
+	params->mCapsuleCenter = localBox.center;
+
+	const PxVec3& localDir = dir;
+
+#ifdef SWEEP_AABB_IMPL
+	const PxVec3& localP0 = params->mLocalCapsule.p0;
+	const PxVec3& localP1 = params->mLocalCapsule.p1;
+	const PxVec3 sweepOrigin = (localP0+localP1)*0.5f;
+	const PxVec3 sweepExtents = PxVec3(params->mLocalCapsule.radius) + (localP0-localP1).abs()*0.5f;
+
+	#ifndef GU_BV4_USE_SLABS
+	params->mLocalDir_PaddedAligned = localDir;
+	#endif
+	params->mOrigin_Padded = sweepOrigin;
+
+	const Box aabb(sweepOrigin, sweepExtents, PxMat33(PxIdentity));
+	prepareSweepData(aabb, localDir, maxDist, params);	// PT: TODO: optimize this call for idt rotation (TA34704)
+
+	#ifndef GU_BV4_USE_SLABS
+	setupRayData(params, maxDist, sweepOrigin, localDir);
+	#endif
+#else
+	prepareSweepData(localBox, localDir, maxDist, params);
+#endif
+}
+
+#endif // GU_BV4_CAPSULE_SWEEP_INTERNAL_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Common.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Common.h
new file mode 100644
index 00000000..2596785f
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Common.h
@@ -0,0 +1,437 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Include Guard
+#ifndef GU_BV4_COMMON_H
+#define GU_BV4_COMMON_H
+
+#include "foundation/PxMat44.h"
+#include "GuBox.h"
+#include "GuSphere.h"
+#include "GuCapsule.h"
+#include "GuSIMDHelpers.h"
+
+#define BV4_ALIGN16(x)	PX_ALIGN_PREFIX(16)	x PX_ALIGN_SUFFIX(16)
+
+namespace physx
+{
+namespace Gu
+{
+	enum QueryModifierFlag
+	{
+		QUERY_MODIFIER_ANY_HIT			= (1<<0),
+		QUERY_MODIFIER_DOUBLE_SIDED		= (1<<1),
+		QUERY_MODIFIER_MESH_BOTH_SIDES	= (1<<2)
+	};
+
+	template<class ParamsT>
+	PX_FORCE_INLINE void setupParamsFlags(ParamsT* PX_RESTRICT params, PxU32 flags)
+	{
+		params->mBackfaceCulling	= (flags & (QUERY_MODIFIER_DOUBLE_SIDED|QUERY_MODIFIER_MESH_BOTH_SIDES)) ? 0 : 1u;
+		params->mEarlyExit			= flags & QUERY_MODIFIER_ANY_HIT;
+	}
+
+	enum HitCode
+	{
+		HIT_NONE		= 0,	//!< No hit
+		HIT_CONTINUE	= 1,	//!< Hit found, but keep looking for closer one
+		HIT_EXIT		= 2		//!< Hit found, you can early-exit (raycast any)
+	};
+
+	class RaycastHitInternal : public physx::shdfnd::UserAllocated
+	{
+		public:
+		PX_FORCE_INLINE		RaycastHitInternal()	{}
+		PX_FORCE_INLINE		~RaycastHitInternal()	{}
+
+				float		mDistance;
+				PxU32		mTriangleID;
+	};
+
+	class SweepHit : public physx::shdfnd::UserAllocated
+	{
+		public:
+		PX_FORCE_INLINE		SweepHit()		{}
+		PX_FORCE_INLINE		~SweepHit()		{}
+
+				PxU32		mTriangleID;	//!< Index of touched face
+				float		mDistance;		//!< Impact distance
+
+				PxVec3		mPos;
+				PxVec3		mNormal;
+	};
+
+	typedef		HitCode		(*MeshRayCallback)			(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, float dist, float u, float v);
+	typedef		bool		(*MeshOverlapCallback)		(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* vertexIndices);
+	typedef		bool		(*MeshSweepCallback)		(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, /*const PxU32* vertexIndices,*/ float& dist);
+	typedef		bool		(*SweepUnlimitedCallback)	(void* userData, const SweepHit& hit);
+
+	template<class ParamsT>
+	PX_FORCE_INLINE	void reportUnlimitedCallbackHit(ParamsT* PX_RESTRICT params, const SweepHit& hit)
+	{
+		// PT: we can't reuse the MeshSweepCallback here since it's designed for doing the sweep test inside the callback
+		// (in the user's code) rather than inside the traversal code. So we use the SweepUnlimitedCallback instead to
+		// report the already fully computed hit to users.
+		// PT: TODO: this may not be very efficient, since computing the full hit is expensive. If we use this codepath
+		// to implement the Epic Tweak, the resulting code will not be optimal.
+		(params->mCallback)(params->mUserData, hit);
+
+		// PT: the existing traversal code already shrunk the ray. For real "sweep all" calls we must undo that by reseting the max dist.
+		// (params->mStabbedFace.mDistance is used in computeImpactDataX code, so we need it before that point - we can't simply avoid
+		// modifying this value before this point).
+		if(!params->mNodeSorting)
+			params->mStabbedFace.mDistance = params->mMaxDist;
+	}
+
+	PX_FORCE_INLINE void invertPRMatrix(PxMat44* PX_RESTRICT dest, const PxMat44* PX_RESTRICT src)
+	{
+		const float m30 = src->column3.x;
+		const float m31 = src->column3.y;
+		const float m32 = src->column3.z;
+
+		const float m00 = src->column0.x;
+		const float m01 = src->column0.y;
+		const float m02 = src->column0.z;
+
+		dest->column0.x = m00;
+		dest->column1.x = m01;
+		dest->column2.x = m02;
+		dest->column3.x = -(m30*m00 + m31*m01 + m32*m02);
+
+		const float m10 = src->column1.x;
+		const float m11 = src->column1.y;
+		const float m12 = src->column1.z;
+
+		dest->column0.y = m10;
+		dest->column1.y = m11;
+		dest->column2.y = m12;
+		dest->column3.y = -(m30*m10 + m31*m11 + m32*m12);
+
+		const float m20 = src->column2.x;
+		const float m21 = src->column2.y;
+		const float m22 = src->column2.z;
+
+		dest->column0.z = m20;
+		dest->column1.z = m21;
+		dest->column2.z = m22;
+		dest->column3.z = -(m30*m20 + m31*m21 + m32*m22);
+
+		dest->column0.w = 0.0f;
+		dest->column1.w = 0.0f;
+		dest->column2.w = 0.0f;
+		dest->column3.w = 1.0f;
+	}
+
+	PX_FORCE_INLINE void invertBoxMatrix(PxMat33& m, PxVec3& t, const Gu::Box& box)
+	{
+		const float m30 = box.center.x;
+		const float m31 = box.center.y;
+		const float m32 = box.center.z;
+
+		const float m00 = box.rot.column0.x;
+		const float m01 = box.rot.column0.y;
+		const float m02 = box.rot.column0.z;
+
+		m.column0.x = m00;
+		m.column1.x = m01;
+		m.column2.x = m02;
+		t.x = -(m30*m00 + m31*m01 + m32*m02);
+
+		const float m10 = box.rot.column1.x;
+		const float m11 = box.rot.column1.y;
+		const float m12 = box.rot.column1.z;
+
+		m.column0.y = m10;
+		m.column1.y = m11;
+		m.column2.y = m12;
+		t.y = -(m30*m10 + m31*m11 + m32*m12);
+
+		const float m20 = box.rot.column2.x;
+		const float m21 = box.rot.column2.y;
+		const float m22 = box.rot.column2.z;
+
+		m.column0.z = m20;
+		m.column1.z = m21;
+		m.column2.z = m22;
+		t.z = -(m30*m20 + m31*m21 + m32*m22);
+	}
+
+#ifdef GU_BV4_USE_SLABS
+	// PT: this class moved here to make things compile with pedantic compilers.
+	struct BVDataSwizzled : public physx::shdfnd::UserAllocated
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		struct Data
+		{
+			PxI16	mMin;	//!< Quantized min
+			PxI16	mMax;	//!< Quantized max
+		};
+
+		Data		mX[4];
+		Data		mY[4];
+		Data		mZ[4];
+	#else
+		float		mMinX[4];
+		float		mMinY[4];
+		float		mMinZ[4];
+		float		mMaxX[4];
+		float		mMaxY[4];
+		float		mMaxZ[4];
+	#endif
+		PxU32		mData[4];	
+
+		PX_FORCE_INLINE	PxU32	isLeaf(PxU32 i)				const	{ return mData[i]&1;								}
+		PX_FORCE_INLINE	PxU32	getPrimitive(PxU32 i)		const	{ return mData[i]>>1;								}
+		PX_FORCE_INLINE	PxU32	getChildOffset(PxU32 i)		const	{ return mData[i]>>GU_BV4_CHILD_OFFSET_SHIFT_COUNT;	}
+		PX_FORCE_INLINE	PxU32	getChildType(PxU32 i)		const	{ return (mData[i]>>1)&3;							}
+		PX_FORCE_INLINE	PxU32	getChildData(PxU32 i)		const	{ return mData[i];									}
+		PX_FORCE_INLINE	PxU32	decodePNSNoShift(PxU32 i)	const	{ return mData[i];									}
+	};
+#else
+	#define SSE_CONST4(name, val) static const __declspec(align(16)) PxU32 name[4] = { (val), (val), (val), (val) }
+	#define SSE_CONST(name) *(const __m128i *)&name
+	#define SSE_CONSTF(name) *(const __m128 *)&name
+#endif
+
+	PX_FORCE_INLINE PxU32 getNbPrimitives(PxU32& primIndex)
+	{
+		PxU32 NbToGo = (primIndex & 15)-1;
+		primIndex>>=4;
+		return NbToGo;
+	}
+
+	template<class ParamsT>
+	PX_FORCE_INLINE	void setupMeshPointersAndQuantizedCoeffs(ParamsT* PX_RESTRICT params, const SourceMesh* PX_RESTRICT mesh, const BV4Tree* PX_RESTRICT tree)
+	{
+		params->mTris32	= mesh->getTris32();
+		params->mTris16	= mesh->getTris16();
+		params->mVerts	= mesh->getVerts();
+
+#ifdef GU_BV4_QUANTIZED_TREE
+		V4StoreA_Safe(V4LoadU_Safe(&tree->mCenterOrMinCoeff.x), &params->mCenterOrMinCoeff_PaddedAligned.x);
+		V4StoreA_Safe(V4LoadU_Safe(&tree->mExtentsOrMaxCoeff.x), &params->mExtentsOrMaxCoeff_PaddedAligned.x);
+#else
+		PX_UNUSED(tree);
+#endif
+	}
+
+	PX_FORCE_INLINE void rotateBox(Gu::Box& dst, const PxMat44& m, const Gu::Box& src)
+	{
+		// The extents remain constant
+		dst.extents = src.extents;
+		// The center gets x-formed
+		dst.center = m.transform(src.center);
+		// Combine rotations
+		// PT: TODO: revisit.. this is awkward... grab 3x3 part of 4x4 matrix (TA34704)
+		const PxMat33 tmp(	PxVec3(m.column0.x, m.column0.y, m.column0.z),
+							PxVec3(m.column1.x, m.column1.y, m.column1.z),
+							PxVec3(m.column2.x, m.column2.y, m.column2.z));
+		dst.rot = tmp * src.rot;
+	}
+
+	PX_FORCE_INLINE PxVec3 inverseRotate(const PxMat44* PX_RESTRICT src, const PxVec3& p)
+	{
+		const float m00 = src->column0.x;
+		const float m01 = src->column0.y;
+		const float m02 = src->column0.z;
+
+		const float m10 = src->column1.x;
+		const float m11 = src->column1.y;
+		const float m12 = src->column1.z;
+
+		const float m20 = src->column2.x;
+		const float m21 = src->column2.y;
+		const float m22 = src->column2.z;
+
+		return PxVec3(	m00*p.x + m01*p.y + m02*p.z,
+						m10*p.x + m11*p.y + m12*p.z,
+						m20*p.x + m21*p.y + m22*p.z);
+	}
+
+	PX_FORCE_INLINE PxVec3 inverseTransform(const PxMat44* PX_RESTRICT src, const PxVec3& p)
+	{
+		const float m30 = src->column3.x;
+		const float m31 = src->column3.y;
+		const float m32 = src->column3.z;
+
+		const float m00 = src->column0.x;
+		const float m01 = src->column0.y;
+		const float m02 = src->column0.z;
+
+		const float m10 = src->column1.x;
+		const float m11 = src->column1.y;
+		const float m12 = src->column1.z;
+
+		const float m20 = src->column2.x;
+		const float m21 = src->column2.y;
+		const float m22 = src->column2.z;
+
+		return PxVec3(	m00*p.x + m01*p.y + m02*p.z -(m30*m00 + m31*m01 + m32*m02),
+						m10*p.x + m11*p.y + m12*p.z -(m30*m10 + m31*m11 + m32*m12),
+						m20*p.x + m21*p.y + m22*p.z -(m30*m20 + m31*m21 + m32*m22));
+	}
+
+	PX_FORCE_INLINE	void computeLocalRay(PxVec3& localDir, PxVec3& localOrigin, const PxVec3& dir, const PxVec3& origin, const PxMat44* PX_RESTRICT worldm_Aligned)
+	{
+		if(worldm_Aligned)
+		{
+			localDir = inverseRotate(worldm_Aligned, dir);
+			localOrigin = inverseTransform(worldm_Aligned, origin);
+		}
+		else
+		{
+			localDir	= dir;
+			localOrigin	= origin;
+		}
+	}
+
+	PX_FORCE_INLINE void computeLocalSphere(float& radius2, PxVec3& local_center, const Sphere& sphere, const PxMat44* PX_RESTRICT worldm_Aligned)
+	{
+		radius2 = sphere.radius * sphere.radius;
+		if(worldm_Aligned)
+		{
+			local_center = inverseTransform(worldm_Aligned, sphere.center);
+		}
+		else
+		{
+			local_center = sphere.center;
+		}
+	}
+
+	PX_FORCE_INLINE void computeLocalCapsule(Capsule& localCapsule, const Capsule& capsule, const PxMat44* PX_RESTRICT worldm_Aligned)
+	{
+		localCapsule.radius = capsule.radius;
+		if(worldm_Aligned)
+		{
+			localCapsule.p0 = inverseTransform(worldm_Aligned, capsule.p0);
+			localCapsule.p1 = inverseTransform(worldm_Aligned, capsule.p1);
+		}
+		else
+		{
+			localCapsule.p0 = capsule.p0;
+			localCapsule.p1 = capsule.p1;
+		}
+	}
+
+	PX_FORCE_INLINE void computeLocalBox(Gu::Box& dst, const Gu::Box& src, const PxMat44* PX_RESTRICT worldm_Aligned)
+	{
+		if(worldm_Aligned)
+		{
+			PxMat44 invWorldM;
+			invertPRMatrix(&invWorldM, worldm_Aligned);
+
+			rotateBox(dst, invWorldM, src);
+		}
+		else
+		{
+			dst = src;	// PT: TODO: check asm for operator= (TA34704)
+		}
+	}
+
+	template<class ImpactFunctionT, class ShapeT, class ParamsT>
+	static PX_FORCE_INLINE bool computeImpactDataT(const ShapeT& shape, const PxVec3& dir, SweepHit* PX_RESTRICT hit, const ParamsT* PX_RESTRICT params, const PxMat44* PX_RESTRICT worldm, bool isDoubleSided, bool meshBothSides)
+	{
+		if(params->mStabbedFace.mTriangleID==PX_INVALID_U32)
+			return false;	// We didn't touch any triangle
+
+		if(hit)
+		{
+			const float t = params->mStabbedFace.mDistance;
+			hit->mTriangleID = params->mStabbedFace.mTriangleID;
+			hit->mDistance = t;
+
+			if(t==0.0f)
+			{
+				hit->mPos = PxVec3(0.0f);
+				hit->mNormal = -dir;
+			}
+			else
+			{
+				// PT: TODO: we shouldn't compute impact in world space, and in fact moving this to local space is necessary if we want to reuse this for box-sweeps (TA34704)
+				TrianglePadded WP;
+				if(worldm)
+				{
+					WP.verts[0] = worldm->transform(params->mP0);
+					WP.verts[1] = worldm->transform(params->mP1);
+					WP.verts[2] = worldm->transform(params->mP2);
+				}
+				else
+				{
+					WP.verts[0] = params->mP0;
+					WP.verts[1] = params->mP1;
+					WP.verts[2] = params->mP2;
+				}
+
+				PxVec3 impactNormal;
+				ImpactFunctionT::computeImpact(hit->mPos, impactNormal, shape, dir, t, WP);
+
+				// PT: by design, returned normal is opposed to the sweep direction.
+				if(shouldFlipNormal(impactNormal, meshBothSides, isDoubleSided, params->mBestTriNormal, dir))
+					impactNormal = -impactNormal;
+
+				hit->mNormal = impactNormal;
+			}
+		}
+		return true;
+	}
+
+	// PT: we don't create a structure for small meshes with just a few triangles. We use brute-force tests on these.
+	template<class LeafFunction_AnyT, class LeafFunction_ClosestT, class ParamsT>
+	static void doBruteForceTests(PxU32 nbTris, ParamsT* PX_RESTRICT params)
+	{
+		PX_ASSERT(nbTris<16);
+		if(params->mEarlyExit)
+			LeafFunction_AnyT::doLeafTest(params, nbTris);
+		else
+			LeafFunction_ClosestT::doLeafTest(params, nbTris);
+	}
+
+#if PX_INTEL_FAMILY
+#ifndef GU_BV4_USE_SLABS
+	template<class ParamsT>
+	PX_FORCE_INLINE void setupRayData(ParamsT* PX_RESTRICT params, float max_dist, const PxVec3& origin, const PxVec3& dir)
+	{
+		const float Half = 0.5f*max_dist;
+		const FloatV HalfV = FLoad(Half);
+		const Vec4V DataV = V4Scale(V4LoadU(&dir.x), HalfV);
+		const Vec4V Data2V = V4Add(V4LoadU(&origin.x), DataV);
+		const PxU32 MaskI = 0x7fffffff;
+		const Vec4V FDirV = _mm_and_ps(_mm_load1_ps((float*)&MaskI), DataV);
+		V4StoreA_Safe(DataV, &params->mData_PaddedAligned.x);
+		V4StoreA_Safe(Data2V, &params->mData2_PaddedAligned.x);
+		V4StoreA_Safe(FDirV, &params->mFDir_PaddedAligned.x);
+	}
+#endif
+#endif
+
+}
+}
+
+#endif // GU_BV4_COMMON_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h
new file mode 100644
index 00000000..07df2109
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Internal.h
@@ -0,0 +1,265 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_INTERNAL_H
+#define GU_BV4_INTERNAL_H
+
+#include "CmPhysXCommon.h"
+#include "PsFPU.h"
+
+	static	PX_FORCE_INLINE	PxU32	getChildOffset(PxU32 data)	{ return data>>GU_BV4_CHILD_OFFSET_SHIFT_COUNT;	}
+	static	PX_FORCE_INLINE	PxU32	getChildType(PxU32 data)	{ return (data>>1)&3;							}
+
+	// PT: the general structure is that there is a root "process stream" function which is the entry point for the query.
+	// It then calls "process node" functions for each traversed node, except for the Slabs-based raycast versions that deal
+	// with 4 nodes at a time within the "process stream" function itself. When a leaf is found, "doLeafTest" functors
+	// passed to the "process stream" entry point are called.
+#ifdef GU_BV4_USE_SLABS
+	#define	processStreamNoOrder			BV4_ProcessStreamSwizzledNoOrder
+	#define	processStreamOrdered			BV4_ProcessStreamSwizzledOrdered
+	#define	processStreamRayNoOrder(a, b)	BV4_ProcessStreamKajiyaNoOrder<a, b>
+	#define	processStreamRayOrdered(a, b)	BV4_ProcessStreamKajiyaOrdered<a, b>
+#else
+	#define	processStreamNoOrder			BV4_ProcessStreamNoOrder
+	#define	processStreamOrdered			BV4_ProcessStreamOrdered2
+	#define	processStreamRayNoOrder(a, b)	BV4_ProcessStreamNoOrder<b>
+	#define	processStreamRayOrdered(a, b)	BV4_ProcessStreamOrdered2<b>
+#endif
+
+#ifndef GU_BV4_USE_SLABS
+#ifdef	GU_BV4_PRECOMPUTED_NODE_SORT
+	// PT: see http://www.codercorner.com/blog/?p=734
+
+	// PT: TODO: refactor with dup in bucket pruner (TA34704)
+	PX_FORCE_INLINE PxU32 computeDirMask(const PxVec3& dir)
+	{
+		// XYZ
+		// ---
+		// --+
+		// -+-
+		// -++
+		// +--
+		// +-+
+		// ++-
+		// +++
+
+		const PxU32 X = PX_IR(dir.x)>>31;
+		const PxU32 Y = PX_IR(dir.y)>>31;
+		const PxU32 Z = PX_IR(dir.z)>>31;
+		const PxU32 bitIndex = Z|(Y<<1)|(X<<2);
+		return 1u<<bitIndex;
+	}
+
+	// 0    0    0      PP PN NP NN		0 1 2 3
+	// 0    0    1      PP PN NN NP		0 1 3 2
+	// 0    1    0      PN PP NP NN		1 0 2 3
+	// 0    1    1      PN PP NN NP		1 0 3 2
+	// 1    0    0      NP NN PP PN		2 3 0 1
+	// 1    0    1      NN NP PP PN		3 2	0 1
+	// 1    1    0      NP NN PN PP		2 3	1 0
+	// 1    1    1      NN NP PN PP		3 2	1 0
+	static const PxU8 order[] = {
+		0,1,2,3,
+		0,1,3,2,
+		1,0,2,3,
+		1,0,3,2,
+		2,3,0,1,
+		3,2,0,1,
+		2,3,1,0,
+		3,2,1,0,
+	};
+
+	PX_FORCE_INLINE PxU32 decodePNS(const BVDataPacked* PX_RESTRICT node, const PxU32 dirMask)
+	{
+		const PxU32 bit0 = (node[0].decodePNSNoShift() & dirMask) ? 1u : 0;
+		const PxU32 bit1 = (node[1].decodePNSNoShift() & dirMask) ? 1u : 0;
+		const PxU32 bit2 = (node[2].decodePNSNoShift() & dirMask) ? 1u : 0;		//### potentially reads past the end of the stream here!
+		return bit2|(bit1<<1)|(bit0<<2);
+	}
+#endif	// GU_BV4_PRECOMPUTED_NODE_SORT
+
+	#define PNS_BLOCK(i, a, b, c, d)										\
+		case i:																\
+		{																	\
+			if(code & (1<<a))	{ stack[nb++] = node[a].getChildData();	}	\
+			if(code & (1<<b))	{ stack[nb++] = node[b].getChildData();	}	\
+			if(code & (1<<c))	{ stack[nb++] = node[c].getChildData();	}	\
+			if(code & (1<<d))	{ stack[nb++] = node[d].getChildData();	}	\
+		}break;
+
+	#define PNS_BLOCK1(i, a, b, c, d)										\
+		case i:																\
+		{																	\
+		stack[nb] = node[a].getChildData();	nb += (code & (1<<a))?1:0;		\
+		stack[nb] = node[b].getChildData();	nb += (code & (1<<b))?1:0;		\
+		stack[nb] = node[c].getChildData();	nb += (code & (1<<c))?1:0;		\
+		stack[nb] = node[d].getChildData();	nb += (code & (1<<d))?1:0;		\
+		}break;
+
+	#define PNS_BLOCK2(a, b, c, d)	{										\
+		if(code & (1<<a))	{ stack[nb++] = node[a].getChildData();	}		\
+		if(code & (1<<b))	{ stack[nb++] = node[b].getChildData();	}		\
+		if(code & (1<<c))	{ stack[nb++] = node[c].getChildData();	}		\
+		if(code & (1<<d))	{ stack[nb++] = node[d].getChildData();	}	}	\
+
+#if PX_INTEL_FAMILY
+	template<class LeafTestT, class ParamsT>
+	static Ps::IntBool BV4_ProcessStreamNoOrder(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params)
+	{
+		const BVDataPacked* root = node;
+
+		PxU32 nb=1;
+		PxU32 stack[GU_BV4_STACK_SIZE];
+		stack[0] = initData;
+
+		do
+		{
+			const PxU32 childData = stack[--nb];
+			node = root + getChildOffset(childData);
+			const PxU32 nodeType = getChildType(childData);
+
+			if(nodeType>1 && BV4_ProcessNodeNoOrder<LeafTestT, 3>(stack, nb, node, params))
+				return 1;
+			if(nodeType>0 && BV4_ProcessNodeNoOrder<LeafTestT, 2>(stack, nb, node, params))
+				return 1;
+			if(BV4_ProcessNodeNoOrder<LeafTestT, 1>(stack, nb, node, params))
+				return 1;
+			if(BV4_ProcessNodeNoOrder<LeafTestT, 0>(stack, nb, node, params))
+				return 1;
+
+		}while(nb);
+
+		return 0;
+	}
+
+	template<class LeafTestT, class ParamsT>
+	static void BV4_ProcessStreamOrdered(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params)
+	{
+		const BVDataPacked* root = node;
+
+		PxU32 nb=1;
+		PxU32 stack[GU_BV4_STACK_SIZE];
+		stack[0] = initData;
+
+		const PxU32 dirMask = computeDirMask(params->mLocalDir)<<3;
+
+		do
+		{
+			const PxU32 childData = stack[--nb];
+			node = root + getChildOffset(childData);
+
+			const PxU8* PX_RESTRICT ord = order + decodePNS(node, dirMask)*4;
+			const PxU32 limit = 2 + getChildType(childData);
+
+			BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[0], limit);
+			BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[1], limit);
+			BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[2], limit);
+			BV4_ProcessNodeOrdered<LeafTestT>(stack, nb, node, params, ord[3], limit);
+		}while(Nb);
+	}
+
+	// Alternative, experimental version using PNS
+	template<class LeafTestT, class ParamsT>
+	static void BV4_ProcessStreamOrdered2(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params)
+	{
+		const BVDataPacked* root = node;
+
+		PxU32 nb=1;
+		PxU32 stack[GU_BV4_STACK_SIZE];
+		stack[0] = initData;
+
+		const PxU32 X = PX_IR(params->mLocalDir_Padded.x)>>31;
+		const PxU32 Y = PX_IR(params->mLocalDir_Padded.y)>>31;
+		const PxU32 Z = PX_IR(params->mLocalDir_Padded.z)>>31;
+		const PxU32 bitIndex = 3+(Z|(Y<<1)|(X<<2));
+		const PxU32 dirMask = 1u<<bitIndex;
+
+		do
+		{
+			const PxU32 childData = stack[--nb];
+			node = root + getChildOffset(childData);
+			const PxU32 nodeType = getChildType(childData);
+
+			PxU32 code = 0;
+			BV4_ProcessNodeOrdered2<LeafTestT, 0>(code, node, params);
+			BV4_ProcessNodeOrdered2<LeafTestT, 1>(code, node, params);
+			if(nodeType>0)
+				BV4_ProcessNodeOrdered2<LeafTestT, 2>(code, node, params);
+			if(nodeType>1)
+				BV4_ProcessNodeOrdered2<LeafTestT, 3>(code, node, params);
+
+			if(code)
+			{
+				// PT: TODO: check which implementation is best on each platform (TA34704)
+#define FOURTH_TEST	// Version avoids computing the PNS index, and also avoids all non-constant shifts. Full of branches though. Fastest on Win32.
+#ifdef FOURTH_TEST
+				{
+					if(node[0].decodePNSNoShift() & dirMask)	//  Bit2
+					{
+						if(node[1].decodePNSNoShift() & dirMask)	// Bit1
+						{
+							if(node[2].decodePNSNoShift() & dirMask)	// Bit0
+								PNS_BLOCK2(3,2,1,0)	// 7
+							else
+								PNS_BLOCK2(2,3,1,0)	// 6
+						}
+						else
+						{
+							if(node[2].decodePNSNoShift() & dirMask)	// Bit0
+								PNS_BLOCK2(3,2,0,1)	// 5
+							else
+								PNS_BLOCK2(2,3,0,1)	// 4
+						}
+					}
+					else
+					{
+						if(node[1].decodePNSNoShift() & dirMask)	// Bit1
+						{
+							if(node[2].decodePNSNoShift() & dirMask)	// Bit0
+								PNS_BLOCK2(1,0,3,2)	// 3
+							else
+								PNS_BLOCK2(1,0,2,3)	// 2
+						}
+						else
+						{
+							if(node[2].decodePNSNoShift() & dirMask)	// Bit0
+								PNS_BLOCK2(0,1,3,2)	// 1
+							else
+								PNS_BLOCK2(0,1,2,3)	// 0
+						}
+					}
+				}
+#endif
+			}
+		}while(nb);
+	}
+#endif	// PX_INTEL_FAMILY
+#endif	// GU_BV4_USE_SLABS
+
+#endif // GU_BV4_INTERNAL_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_OBBSweep.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_OBBSweep.cpp
new file mode 100644
index 00000000..c578b359
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_OBBSweep.cpp
@@ -0,0 +1,170 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuBV4.h"
+using namespace physx;
+using namespace Gu;
+
+#if PX_INTEL_FAMILY
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+#include "GuBV4_BoxSweep_Internal.h"
+
+Ps::IntBool Sweep_AABB_BV4(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags);
+void GenericSweep_AABB_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, PxU32 flags);
+void Sweep_AABB_BV4_CB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting);
+
+// PT: TODO: optimize this (TA34704)
+static PX_FORCE_INLINE void computeLocalData(Box& localBox, PxVec3& localDir, const Box& box, const PxVec3& dir, const PxMat44* PX_RESTRICT worldm_Aligned)
+{
+	if(worldm_Aligned)
+	{
+		PxMat44 IWM;
+		invertPRMatrix(&IWM, worldm_Aligned);
+
+		localDir = IWM.rotate(dir);
+
+		rotateBox(localBox, IWM, box);
+	}
+	else
+	{
+		localDir = dir;
+		localBox = box;	// PT: TODO: check asm for operator= (TA34704)
+	}
+}
+
+static PX_FORCE_INLINE bool isAxisAligned(const PxVec3& axis)
+{
+	const PxReal minLimit = 1e-3f;
+	const PxReal maxLimit = 1.0f - 1e-3f;
+
+	const PxReal absX = PxAbs(axis.x);
+	if(absX>minLimit && absX<maxLimit)
+		return false;
+
+	const PxReal absY = PxAbs(axis.y);
+	if(absY>minLimit && absY<maxLimit)
+		return false;
+
+	const PxReal absZ = PxAbs(axis.z);
+	if(absZ>minLimit && absZ<maxLimit)
+		return false;
+
+	return true;
+}
+
+static PX_FORCE_INLINE bool isAABB(const Box& box)
+{
+	if(!isAxisAligned(box.rot.column0))
+		return false;
+	if(!isAxisAligned(box.rot.column1))
+		return false;
+	if(!isAxisAligned(box.rot.column2))
+		return false;
+	return true;
+}
+
+Ps::IntBool BV4_BoxSweepSingle(const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepHit* PX_RESTRICT hit, PxU32 flags)
+{
+	Box localBox;
+	PxVec3 localDir;
+	computeLocalData(localBox, localDir, box, dir, worldm_Aligned);
+
+	Ps::IntBool Status;
+	if(isAABB(localBox))
+		Status = Sweep_AABB_BV4(localBox, localDir, maxDist, tree, hit, flags);
+	else
+		Status = Sweep_OBB_BV4(localBox, localDir, maxDist, tree, hit, flags);
+	if(Status && worldm_Aligned)
+	{
+		// Move to world space
+		// PT: TODO: optimize (TA34704)
+		hit->mPos = worldm_Aligned->transform(hit->mPos);
+		hit->mNormal = worldm_Aligned->rotate(hit->mNormal);
+	}
+	return Status;
+}
+
+// PT: for design decisions in this function, refer to the comments of BV4_GenericSweepCB().
+void BV4_BoxSweepCB(const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting)
+{
+	Box localBox;
+	PxVec3 localDir;
+	computeLocalData(localBox, localDir, box, dir, worldm_Aligned);
+
+	if(isAABB(localBox))
+		Sweep_AABB_BV4_CB(localBox, localDir, maxDist, tree, worldm_Aligned, callback, userData, flags, nodeSorting);
+	else
+		Sweep_OBB_BV4_CB(localBox, localDir, maxDist, tree, worldm_Aligned, callback, userData, flags, nodeSorting);
+}
+
+
+// PT: this generic sweep uses an OBB because this is the most versatile volume, but it does not mean this function is
+// a "box sweep function" per-se. In fact it could be used all alone to implement all sweeps in the SDK (but that would
+// have an impact on performance).
+//
+// So the idea here is simply to provide and use a generic function for everything that the BV4 code does not support directly.
+// In particular this should be used:
+// - for convex sweeps (where the OBB is the box around the swept convex)
+// - for non-trivial sphere/capsule/box sweeps where mesh scaling or inflation 
+//
+// By design we don't do leaf tests inside the BV4 traversal code here (because we don't support them, e.g. convex
+// sweeps. If we could do them inside the BV4 traversal code, like we do for regular sweeps, then this would not be a generic
+// sweep function, but instead a built-in, natively supported query). So the leaf tests are performed outside of BV4, in the
+// client code, through MeshSweepCallback. This has a direct impact on the design & parameters of MeshSweepCallback.
+//
+// On the other hand this is used for "regular sweeps with shapes we don't natively support", i.e. SweepSingle kind of queries.
+// This means that we need to support an early-exit codepath (without node-sorting) and a regular sweep single codepath (with
+// node sorting) for this generic function. The leaf tests are external, but everything traversal-related should be exactly the
+// same as the regular box-sweep function otherwise.
+//
+// As a consequence, this function is not well-suited to implement "unlimited results" kind of queries, a.k.a. "sweep all":
+//
+// - for regular sphere/capsule/box "sweep all" queries, the leaf tests should be internal (same as sweep single queries). This
+//   means the existing MeshSweepCallback can't be reused.
+//
+// - there is no need to support "sweep any" (it is already supported by the other sweep functions).
+//
+// - there may be no need for ordered traversal/node sorting/ray shrinking, since we want to return all results anyway. But this
+//   may not be true if the "sweep all" function is used to emulate the Epic Tweak. In that case we still want to shrink the ray
+//   and use node sorting. Since both versions are useful, we should probably have a bool param to enable/disable node sorting.
+//
+// - we are interested in all hits so we can't delay the computation of impact data (computing it only once in the end, for the
+//   closest hit). We actually need to compute the data for all hits, possibly within the traversal code.
+void BV4_GenericSweepCB(const Box& localBox, const PxVec3& localDir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, bool anyHit)
+{
+	const PxU32 flags = anyHit ? PxU32(QUERY_MODIFIER_ANY_HIT) : 0;
+
+	if(isAABB(localBox))
+		GenericSweep_AABB_CB(localBox, localDir, maxDist, tree, callback, userData, flags);
+	else
+		GenericSweep_OBB_CB(localBox, localDir, maxDist, tree, callback, userData, flags);
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_OBBOBB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_OBBOBB.h
new file mode 100644
index 00000000..9c55cd66
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_OBBOBB.h
@@ -0,0 +1,73 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_PROCESS_STREAM_NOORDER_OBB_OBB_H
+#define GU_BV4_PROCESS_STREAM_NOORDER_OBB_OBB_H
+
+#ifdef GU_BV4_USE_SLABS
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder_Swizzled(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataSwizzled* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+		OPC_SLABS_GET_CE(i)
+
+		if(BV4_BoxBoxOverlap(centerV, extentsV, params))
+		{
+			if(node->isLeaf(i))
+			{
+				if(LeafTestT::doLeafTest(params, node->getPrimitive(i)))
+					return 1;
+			}
+			else
+				Stack[Nb++] = node->getChildData(i);
+		}
+		return 0;
+	}
+#else
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(BV4_BoxBoxOverlap(node+i, params))
+	#else
+		if(BV4_BoxBoxOverlap(node[i].mAABB.mExtents, node[i].mAABB.mCenter, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+			{
+				if(LeafTestT::doLeafTest(params, node[i].getPrimitive()))
+					return 1;
+			}
+			else
+				Stack[Nb++] = node[i].getChildData();
+		}
+		return 0;
+	}
+#endif
+
+#endif	// GU_BV4_PROCESS_STREAM_NOORDER_OBB_OBB_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB.h
new file mode 100644
index 00000000..86ea5f97
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB.h
@@ -0,0 +1,55 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_H
+#define GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_H
+
+#ifndef GU_BV4_USE_SLABS
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(BV4_SegmentAABBOverlap(node+i, params))
+	#else
+		if(BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+			{
+				if(LeafTestT::doLeafTest(params, node[i].getPrimitive()))
+					return 1;
+			}
+			else
+				Stack[Nb++] = node[i].getChildData();
+		}
+		return 0;
+	}
+#endif
+
+#endif	// GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h
new file mode 100644
index 00000000..7bf3285c
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h
@@ -0,0 +1,55 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_INFLATED_H
+#define GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_INFLATED_H
+
+#ifndef GU_BV4_USE_SLABS
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(BV4_SegmentAABBOverlap(node+i, params->mOriginalExtents_Padded, params))
+	#else
+		if(BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params->mOriginalExtents_Padded, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+			{
+				if(LeafTestT::doLeafTest(params, node[i].getPrimitive()))
+					return 1;
+			}
+			else
+				Stack[Nb++] = node[i].getChildData();
+		}
+		return 0;
+	}
+#endif
+
+#endif	// GU_BV4_PROCESS_STREAM_NOORDER_SEGMENT_AABB_INFLATED_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SphereAABB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SphereAABB.h
new file mode 100644
index 00000000..52d1dce5
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamNoOrder_SphereAABB.h
@@ -0,0 +1,74 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_PROCESS_STREAM_NOORDER_SPHERE_AABB_H
+#define GU_BV4_PROCESS_STREAM_NOORDER_SPHERE_AABB_H
+
+#ifdef GU_BV4_USE_SLABS
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder_Swizzled(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataSwizzled* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+//		OPC_SLABS_GET_CE(i)
+		OPC_SLABS_GET_CE2(i)
+
+		if(BV4_SphereAABBOverlap(centerV, extentsV, params))
+		{
+			if(node->isLeaf(i))
+			{
+				if(LeafTestT::doLeafTest(params, node->getPrimitive(i)))
+					return 1;
+			}
+			else
+				Stack[Nb++] = node->getChildData(i);
+		}
+		return 0;
+	}
+#else
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE Ps::IntBool BV4_ProcessNodeNoOrder(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(BV4_SphereAABBOverlap(node+i, params))
+	#else
+		if(BV4_SphereAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+			{
+				if(LeafTestT::doLeafTest(params, node[i].getPrimitive()))
+					return 1;
+			}
+			else
+				Stack[Nb++] = node[i].getChildData();
+		}
+		return 0;
+	}
+#endif
+
+#endif	// GU_BV4_PROCESS_STREAM_NOORDER_SPHERE_AABB_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_OBBOBB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_OBBOBB.h
new file mode 100644
index 00000000..24dd9757
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_OBBOBB.h
@@ -0,0 +1,81 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_PROCESS_STREAM_ORDERED_OBB_OBB_H
+#define GU_BV4_PROCESS_STREAM_ORDERED_OBB_OBB_H
+
+#ifdef GU_BV4_USE_SLABS
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE void BV4_ProcessNodeOrdered2_Swizzled(PxU32& code, const BVDataSwizzled* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+		OPC_SLABS_GET_CE(i)
+
+		if(BV4_BoxBoxOverlap(centerV, extentsV, params))
+		{
+			if(node->isLeaf(i))
+				LeafTestT::doLeafTest(params, node->getPrimitive(i));
+			else
+				code |= 1<<i;
+		}
+	}
+#else
+	template<class LeafTestT, class ParamsT>
+	PX_FORCE_INLINE void BV4_ProcessNodeOrdered(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params, PxU32 i, PxU32 limit)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(i<limit && BV4_BoxBoxOverlap(node+i, params))
+	#else
+		if(i<limit && BV4_BoxBoxOverlap(node[i].mAABB.mExtents, node[i].mAABB.mCenter, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+				LeafTestT::doLeafTest(params, node[i].getPrimitive());
+			else
+				Stack[Nb++] = node[i].getChildData();
+		}
+	}
+
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE void BV4_ProcessNodeOrdered2(PxU32& code, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(BV4_BoxBoxOverlap(node+i, params))
+	#else
+		if(BV4_BoxBoxOverlap(node[i].mAABB.mExtents, node[i].mAABB.mCenter, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+				LeafTestT::doLeafTest(params, node[i].getPrimitive());
+			else
+				code |= 1<<i;
+		}
+	}
+#endif
+
+#endif	// GU_BV4_PROCESS_STREAM_ORDERED_OBB_OBB_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB.h
new file mode 100644
index 00000000..b3b3e90a
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB.h
@@ -0,0 +1,67 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_H
+#define GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_H
+
+#ifndef GU_BV4_USE_SLABS
+	template<class LeafTestT, class ParamsT>
+	PX_FORCE_INLINE void BV4_ProcessNodeOrdered(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params, PxU32 i, PxU32 limit)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(i<limit && BV4_SegmentAABBOverlap(node+i, params))
+	#else
+		if(i<limit && BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+				LeafTestT::doLeafTest(params, node[i].getPrimitive());
+			else
+				Stack[Nb++] = node[i].getChildData();
+		}
+	}
+
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE void BV4_ProcessNodeOrdered2(PxU32& code, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(BV4_SegmentAABBOverlap(node+i, params))
+	#else
+		if(BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+				LeafTestT::doLeafTest(params, node[i].getPrimitive());
+			else
+				code |= 1<<i;
+		}
+	}
+#endif
+
+#endif	// GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h
new file mode 100644
index 00000000..9e4f8ed1
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h
@@ -0,0 +1,67 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_INFLATED_H
+#define GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_INFLATED_H
+
+#ifndef GU_BV4_USE_SLABS
+	template<class LeafTestT, class ParamsT>
+	PX_FORCE_INLINE void BV4_ProcessNodeOrdered(PxU32* PX_RESTRICT Stack, PxU32& Nb, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params, PxU32 i, PxU32 limit)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(i<limit && BV4_SegmentAABBOverlap(node+i, params->mOriginalExtents, params))
+	#else
+		if(i<limit && BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params->mOriginalExtents, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+				LeafTestT::doLeafTest(params, node[i].getPrimitive());
+			else
+				Stack[Nb++] = node[i].getChildData();
+		}
+	}
+
+	template<class LeafTestT, int i, class ParamsT>
+	PX_FORCE_INLINE void BV4_ProcessNodeOrdered2(PxU32& code, const BVDataPacked* PX_RESTRICT node, ParamsT* PX_RESTRICT params)
+	{
+	#ifdef GU_BV4_QUANTIZED_TREE
+		if(BV4_SegmentAABBOverlap(node+i, params->mOriginalExtents_Padded, params))
+	#else
+		if(BV4_SegmentAABBOverlap(node[i].mAABB.mCenter, node[i].mAABB.mExtents, params->mOriginalExtents_Padded, params))
+	#endif
+		{
+			if(node[i].isLeaf())
+				LeafTestT::doLeafTest(params, node[i].getPrimitive());
+			else
+				code |= 1<<i;
+		}
+	}
+#endif
+
+#endif	// GU_BV4_PROCESS_STREAM_ORDERED_SEGMENT_AABB_INFLATED_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Raycast.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Raycast.cpp
new file mode 100644
index 00000000..83becacc
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Raycast.cpp
@@ -0,0 +1,625 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuBV4.h"
+using namespace physx;
+using namespace Gu;
+
+#if PX_INTEL_FAMILY
+
+#include "PxQueryReport.h"
+#include "GuInternal.h"
+
+#include "GuIntersectionRayTriangle.h"
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#include "GuBV4_Common.h"
+
+class RaycastHitInternalUV : public RaycastHitInternal
+{
+	public:
+	PX_FORCE_INLINE			RaycastHitInternalUV()	{}
+	PX_FORCE_INLINE			~RaycastHitInternalUV()	{}
+
+					float	mU, mV;
+};
+
+template<class T>
+PX_FORCE_INLINE Ps::IntBool RayTriOverlapT(PxRaycastHit& mStabbedFace, const PxVec3& vert0, const PxVec3& vert1, const PxVec3& vert2, const T* PX_RESTRICT params)
+{
+	// Find vectors for two edges sharing vert0
+	const PxVec3 edge1 = vert1 - vert0;
+	const PxVec3 edge2 = vert2 - vert0;
+
+	// Begin calculating determinant - also used to calculate U parameter
+	const PxVec3 pvec = params->mLocalDir_Padded.cross(edge2);
+
+	// If determinant is near zero, ray lies in plane of triangle
+	const float det = edge1.dot(pvec);
+
+	if(params->mBackfaceCulling)
+	{
+		if(det<GU_CULLING_EPSILON_RAY_TRIANGLE)
+			return 0;
+
+		// Calculate distance from vert0 to ray origin
+		const PxVec3 tvec = params->mOrigin_Padded - vert0;
+
+		// Calculate U parameter and test bounds
+		const float u = tvec.dot(pvec);
+
+		const PxReal enlargeCoeff = params->mGeomEpsilon*det;
+		const PxReal uvlimit = -enlargeCoeff;
+		const PxReal uvlimit2 = det + enlargeCoeff;
+
+		if(u < uvlimit || u > uvlimit2)
+			return 0;
+
+		// Prepare to test V parameter
+		const PxVec3 qvec = tvec.cross(edge1);
+
+		// Calculate V parameter and test bounds
+		const float v = params->mLocalDir_Padded.dot(qvec);
+		if(v < uvlimit || (u + v) > uvlimit2)
+			return 0;
+
+		// Calculate t, scale parameters, ray intersects triangle
+		const float d = edge2.dot(qvec);
+		// Det > 0 so we can early exit here
+		// Intersection point is valid if distance is positive (else it can just be a face behind the orig point)
+		if(d<0.0f)
+			return 0;
+
+		// Else go on
+		const float OneOverDet = 1.0f / det;
+		mStabbedFace.distance = d * OneOverDet;
+		mStabbedFace.u = u * OneOverDet;
+		mStabbedFace.v = v * OneOverDet;
+	}
+	else
+	{
+		if(PxAbs(det)<GU_CULLING_EPSILON_RAY_TRIANGLE)
+			return 0;
+
+		const float OneOverDet = 1.0f / det;
+
+		const PxVec3 tvec = params->mOrigin_Padded - vert0;
+
+		const float u = tvec.dot(pvec) * OneOverDet;
+		if(u<-params->mGeomEpsilon || u>1.0f+params->mGeomEpsilon)
+			return 0;
+
+		// prepare to test V parameter
+		const PxVec3 qvec = tvec.cross(edge1);
+
+		// Calculate V parameter and test bounds
+		const float v = params->mLocalDir_Padded.dot(qvec) * OneOverDet;
+		if(v < -params->mGeomEpsilon || (u + v) > 1.0f + params->mGeomEpsilon)
+			return 0;
+
+		// Calculate t, ray intersects triangle
+		const float d = edge2.dot(qvec) * OneOverDet;
+		// Intersection point is valid if distance is positive (else it can just be a face behind the orig point)
+		if(d<0.0f)
+			return 0;
+		mStabbedFace.distance = d;
+		mStabbedFace.u = u;
+		mStabbedFace.v = v;
+	}
+	return 1;
+}
+
+#if PX_VC
+#pragma warning ( disable : 4324 )
+#endif
+
+struct RayParams
+{
+#ifdef GU_BV4_QUANTIZED_TREE
+	BV4_ALIGN16(Vec3p			mCenterOrMinCoeff_PaddedAligned);
+	BV4_ALIGN16(Vec3p			mExtentsOrMaxCoeff_PaddedAligned);
+#endif
+// Organized in the order they are accessed
+#ifndef GU_BV4_USE_SLABS
+	BV4_ALIGN16(Vec3p			mData2_PaddedAligned);
+	BV4_ALIGN16(Vec3p			mFDir_PaddedAligned);
+	BV4_ALIGN16(Vec3p			mData_PaddedAligned);
+#endif
+	const IndTri32*	PX_RESTRICT	mTris32;
+	const IndTri16*	PX_RESTRICT	mTris16;
+	const PxVec3*	PX_RESTRICT	mVerts;
+	PxVec3						mLocalDir_Padded;
+	PxVec3						mOrigin_Padded;
+
+	float						mGeomEpsilon;
+	PxU32						mBackfaceCulling;
+
+	RaycastHitInternalUV		mStabbedFace;
+	PxU32						mEarlyExit;
+
+	PxVec3						mOriginalExtents_Padded;	// Added to please the slabs code
+
+	BV4_ALIGN16(Vec3p			mP0_PaddedAligned);
+	BV4_ALIGN16(Vec3p			mP1_PaddedAligned);
+	BV4_ALIGN16(Vec3p			mP2_PaddedAligned);
+};
+
+///////////////////////////////////////////////////////////////////////////////
+
+static PX_FORCE_INLINE void updateParamsAfterImpact(RayParams* PX_RESTRICT params, PxU32 primIndex, PxU32 VRef0, PxU32 VRef1, PxU32 VRef2, const PxRaycastHit& StabbedFace)
+{
+	V4StoreA_Safe(V4LoadU_Safe(&params->mVerts[VRef0].x), &params->mP0_PaddedAligned.x);
+	V4StoreA_Safe(V4LoadU_Safe(&params->mVerts[VRef1].x), &params->mP1_PaddedAligned.x);
+	V4StoreA_Safe(V4LoadU_Safe(&params->mVerts[VRef2].x), &params->mP2_PaddedAligned.x);
+
+	params->mStabbedFace.mTriangleID = primIndex;
+	params->mStabbedFace.mDistance = StabbedFace.distance;
+	params->mStabbedFace.mU = StabbedFace.u;
+	params->mStabbedFace.mV = StabbedFace.v;
+}
+
+namespace
+{
+class LeafFunction_RaycastClosest
+{
+public:
+	static /*PX_FORCE_INLINE*/ Ps::IntBool doLeafTest(RayParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PX_ALIGN_PREFIX(16)	char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16);
+		PxRaycastHit& StabbedFace = reinterpret_cast<PxRaycastHit&>(buffer);
+
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			if(RayTriOverlapT<RayParams>(StabbedFace, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params))
+			{
+				if(StabbedFace.distance<params->mStabbedFace.mDistance)	//### just for a corner case UT in PhysX :(
+				{
+					updateParamsAfterImpact(params, primIndex, VRef0, VRef1, VRef2, StabbedFace);
+
+#ifndef GU_BV4_USE_SLABS
+					setupRayData(params, StabbedFace.distance, params->mOrigin_Padded, params->mLocalDir_Padded);
+#endif
+				}
+			}
+
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+
+class LeafFunction_RaycastAny
+{
+public:
+	static /*PX_FORCE_INLINE*/ Ps::IntBool doLeafTest(RayParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			PX_ALIGN_PREFIX(16)	char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16);
+			PxRaycastHit& StabbedFace = reinterpret_cast<PxRaycastHit&>(buffer);
+			if(RayTriOverlapT<RayParams>(StabbedFace, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params))
+			{
+				if(StabbedFace.distance<params->mStabbedFace.mDistance)	//### just for a corner case UT in PhysX :(
+				{
+					updateParamsAfterImpact(params, primIndex, VRef0, VRef1, VRef2, StabbedFace);
+					return 1;
+				}
+			}
+
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+static PX_FORCE_INLINE Vec4V multiply3x3V_Aligned(const Vec4V p, const PxMat44* PX_RESTRICT mat)	
+{
+	const FloatV xxxV = V4GetX(p);
+	const FloatV yyyV = V4GetY(p);
+	const FloatV zzzV = V4GetZ(p);
+
+	Vec4V ResV = V4Scale(V4LoadA(&mat->column0.x), xxxV);
+	ResV = V4Add(ResV, V4Scale(V4LoadA(&mat->column1.x), yyyV));
+	ResV = V4Add(ResV, V4Scale(V4LoadA(&mat->column2.x), zzzV));
+	return ResV;
+}
+
+static PX_FORCE_INLINE Ps::IntBool computeImpactData(PxRaycastHit* PX_RESTRICT hit, const RayParams* PX_RESTRICT params, const PxMat44* PX_RESTRICT worldm_Aligned, PxHitFlags /*hitFlags*/)
+{
+	if(params->mStabbedFace.mTriangleID!=PX_INVALID_U32 /*&& !params->mEarlyExit*/)	//### PhysX needs the raycast data even for "any hit" :(
+	{
+		const float u = params->mStabbedFace.mU;
+		const float v = params->mStabbedFace.mV;
+		const float d = params->mStabbedFace.mDistance;
+		const PxU32 id = params->mStabbedFace.mTriangleID;
+		hit->u = u;
+		hit->v = v;
+		hit->distance = d;
+		hit->faceIndex = id;
+
+		{
+			const Vec4V P0V = V4LoadA_Safe(&params->mP0_PaddedAligned.x);
+			const Vec4V P1V = V4LoadA_Safe(&params->mP1_PaddedAligned.x);
+			const Vec4V P2V = V4LoadA_Safe(&params->mP2_PaddedAligned.x);
+
+			const FloatV uV = FLoad(params->mStabbedFace.mU);
+			const FloatV vV = FLoad(params->mStabbedFace.mV);
+			const float w = 1.0f - params->mStabbedFace.mU - params->mStabbedFace.mV;
+			const FloatV wV = FLoad(w);
+			//pt = (1.0f - u - v)*p0 + u*p1 + v*p2;
+			Vec4V LocalPtV = V4Scale(P1V, uV);
+			LocalPtV = V4Add(LocalPtV, V4Scale(P2V, vV));
+			LocalPtV = V4Add(LocalPtV, V4Scale(P0V, wV));
+
+			const Vec4V LocalNormalV = V4Cross(V4Sub(P0V, P1V), V4Sub(P0V, P2V));
+
+			BV4_ALIGN16(Vec3p tmp_PaddedAligned);
+			if(worldm_Aligned)
+			{
+				const Vec4V TransV = V4LoadA(&worldm_Aligned->column3.x);
+				V4StoreU_Safe(V4Add(multiply3x3V_Aligned(LocalPtV, worldm_Aligned), TransV), &hit->position.x);
+				V4StoreA_Safe(multiply3x3V_Aligned(LocalNormalV, worldm_Aligned), &tmp_PaddedAligned.x);
+			}
+			else
+			{
+				V4StoreU_Safe(LocalPtV, &hit->position.x);
+				V4StoreA_Safe(LocalNormalV, &tmp_PaddedAligned.x);
+			}
+			tmp_PaddedAligned.normalize();
+			hit->normal = tmp_PaddedAligned;	// PT: TODO: check asm here (TA34704)
+		}
+	}
+	return params->mStabbedFace.mTriangleID!=PX_INVALID_U32;
+}
+
+static PX_FORCE_INLINE float clipRay(const PxVec3& ray_orig, const PxVec3& ray_dir, const LocalBounds& local_bounds)
+{
+	const float dpc = local_bounds.mCenter.dot(ray_dir);
+	const float dpMin = dpc - local_bounds.mExtentsMagnitude;
+	const float dpMax = dpc + local_bounds.mExtentsMagnitude;
+	const float dpO = ray_orig.dot(ray_dir);
+	const float boxLength = local_bounds.mExtentsMagnitude * 2.0f;
+	const float distToBox = PxMin(fabsf(dpMin - dpO), fabsf(dpMax - dpO));
+	return distToBox + boxLength * 2.0f;
+}
+
+template<class ParamsT>
+static PX_FORCE_INLINE void setupRayParams(ParamsT* PX_RESTRICT params, const PxVec3& origin, const PxVec3& dir, const BV4Tree* PX_RESTRICT tree, const PxMat44* PX_RESTRICT world, const SourceMesh* PX_RESTRICT mesh, float maxDist, float geomEpsilon, PxU32 flags)
+{
+	params->mGeomEpsilon = geomEpsilon;
+	setupParamsFlags(params, flags);
+
+	computeLocalRay(params->mLocalDir_Padded, params->mOrigin_Padded, dir, origin, world);
+
+	// PT: TODO: clipRay may not be needed with GU_BV4_USE_SLABS (TA34704)
+	const float MaxDist = clipRay(params->mOrigin_Padded, params->mLocalDir_Padded, tree->mLocalBounds);
+	maxDist = PxMin(maxDist, MaxDist);
+	params->mStabbedFace.mDistance = maxDist;
+	params->mStabbedFace.mTriangleID = PX_INVALID_U32;
+
+	setupMeshPointersAndQuantizedCoeffs(params, mesh, tree);
+
+#ifndef GU_BV4_USE_SLABS
+	setupRayData(params, maxDist, params->mOrigin_Padded, params->mLocalDir_Padded);
+#endif
+}
+
+#include "GuBV4_Internal.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs.h"
+#endif
+#include "GuBV4_ProcessStreamOrdered_SegmentAABB.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs_KajiyaNoOrder.h"
+	#include "GuBV4_Slabs_KajiyaOrdered.h"
+#endif
+
+#ifndef GU_BV4_USE_SLABS
+#ifdef GU_BV4_QUANTIZED_TREE
+
+#define NEW_VERSION
+
+static PX_FORCE_INLINE /*PX_NOINLINE*/ Ps::IntBool BV4_SegmentAABBOverlap(const BVDataPacked* PX_RESTRICT node, const RayParams* PX_RESTRICT params)
+{
+#ifdef NEW_VERSION
+	SSE_CONST4(maskV,	0x7fffffff);
+	SSE_CONST4(maskQV,	0x0000ffff);
+#else
+	const PxU32 maskI = 0x7fffffff;
+#endif
+
+	Vec4V centerV = V4LoadA((float*)node->mAABB.mData);
+#ifdef NEW_VERSION
+	__m128 extentsV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(centerV), SSE_CONST(maskQV)));
+#else
+	__m128 extentsV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(centerV), _mm_set1_epi32(0x0000ffff)));
+#endif
+	extentsV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(extentsV)), V4LoadA_Safe(&params->mExtentsOrMaxCoeff_PaddedAligned.x));
+	centerV = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(centerV), 16));
+	centerV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(centerV)), V4LoadA_Safe(&params->mCenterOrMinCoeff_PaddedAligned.x));
+
+	const Vec4V fdirV = V4LoadA_Safe(&params->mFDir_PaddedAligned.x);
+	const Vec4V DV = V4Sub(V4LoadA_Safe(&params->mData2_PaddedAligned.x), centerV);
+
+#ifdef NEW_VERSION
+	__m128 absDV = _mm_and_ps(DV, SSE_CONSTF(maskV));
+#else
+	__m128 absDV = _mm_and_ps(DV, _mm_load1_ps((float*)&maskI));
+#endif
+
+	absDV = V4Sub(V4Add(extentsV, fdirV), absDV);
+	const PxU32 test = (PxU32)_mm_movemask_ps(absDV);
+	if(test&7)
+		return 0;
+
+	if(1)
+	{
+		const Vec4V dataZYX_V = V4LoadA_Safe(&params->mData_PaddedAligned.x);
+		const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1)));
+		const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1)));
+		const Vec4V fV = V4Sub(V4Mul(dataZYX_V, DXZY_V), V4Mul(dataXZY_V, DV));
+
+		const Vec4V fdirZYX_V = V4LoadA_Safe(&params->mFDir_PaddedAligned.x);
+		const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1)));
+		const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+		// PT: TODO: use V4MulAdd here (TA34704)
+		const Vec4V fg = V4Add(V4Mul(extentsV, fdirXZY_V), V4Mul(extentsXZY_V, fdirZYX_V));
+
+#ifdef NEW_VERSION
+		__m128 absfV = _mm_and_ps(fV, SSE_CONSTF(maskV));
+#else
+		__m128 absfV = _mm_and_ps(fV, _mm_load1_ps((float*)&maskI));
+#endif
+		absfV = V4Sub(fg, absfV);
+		const PxU32 test2 = (PxU32)_mm_movemask_ps(absfV);
+
+		if(test2&7)
+			return 0;
+		return 1;
+	}
+}
+#else
+static PX_FORCE_INLINE /*PX_NOINLINE*/ Ps::IntBool BV4_SegmentAABBOverlap(const PxVec3& center, const PxVec3& extents, const RayParams* PX_RESTRICT params)
+{
+	const PxU32 maskI = 0x7fffffff;
+
+	const Vec4V fdirV = V4LoadA_Safe(&params->mFDir_PaddedAligned.x);
+	const Vec4V extentsV = V4LoadU(&extents.x);
+
+	const Vec4V DV = V4Sub(V4LoadA_Safe(&params->mData2_PaddedAligned.x), V4LoadU(&center.x));	//###center should be aligned
+
+	__m128 absDV = _mm_and_ps(DV, _mm_load1_ps((float*)&maskI));
+	
+	absDV = _mm_cmpgt_ps(absDV, V4Add(extentsV, fdirV));
+	const PxU32 test = (PxU32)_mm_movemask_ps(absDV);
+	if(test&7)
+		return 0;
+
+	if(1)
+	{
+		const Vec4V dataZYX_V = V4LoadA_Safe(&params->mData_PaddedAligned.x);
+		const __m128 dataXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(dataZYX_V), _MM_SHUFFLE(3,0,2,1)));
+		const __m128 DXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(DV), _MM_SHUFFLE(3,0,2,1)));
+		const Vec4V fV = V4Sub(V4Mul(dataZYX_V, DXZY_V), V4Mul(dataXZY_V, DV));
+
+		const Vec4V fdirZYX_V = V4LoadA_Safe(&params->mFDir_PaddedAligned.x);
+		const __m128 fdirXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(fdirZYX_V), _MM_SHUFFLE(3,0,2,1)));
+		const __m128 extentsXZY_V = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(extentsV), _MM_SHUFFLE(3,0,2,1)));
+		// PT: TODO: use V4MulAdd here (TA34704)
+		const Vec4V fg = V4Add(V4Mul(extentsV, fdirXZY_V), V4Mul(extentsXZY_V, fdirZYX_V));
+
+		__m128 absfV = _mm_and_ps(fV, _mm_load1_ps((float*)&maskI));
+		absfV = _mm_cmpgt_ps(absfV, fg);
+		const PxU32 test2 = (PxU32)_mm_movemask_ps(absfV);
+		if(test2&7)
+			return 0;
+		return 1;
+	}
+}
+#endif
+#endif
+
+Ps::IntBool BV4_RaycastSingle(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxRaycastHit* PX_RESTRICT hit, float maxDist, float geomEpsilon, PxU32 flags, PxHitFlags hitFlags)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	RayParams Params;
+	setupRayParams(&Params, origin, dir, &tree, worldm_Aligned, mesh, maxDist, geomEpsilon, flags);
+
+	if(tree.mNodes)
+	{
+		if(Params.mEarlyExit)
+			processStreamRayNoOrder(0, LeafFunction_RaycastAny)(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamRayOrdered(0, LeafFunction_RaycastClosest)(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+		doBruteForceTests<LeafFunction_RaycastAny, LeafFunction_RaycastClosest>(mesh->getNbTriangles(), &Params);
+
+	return computeImpactData(hit, &Params, worldm_Aligned, hitFlags);
+}
+
+
+
+// Callback-based version
+
+namespace
+{
+
+struct RayParamsCB : RayParams
+{
+	MeshRayCallback	mCallback;
+	void*			mUserData;
+};
+
+class LeafFunction_RaycastCB
+{
+public:
+	static Ps::IntBool doLeafTest(RayParamsCB* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			const PxVec3& p0 = params->mVerts[VRef0];
+			const PxVec3& p1 = params->mVerts[VRef1];
+			const PxVec3& p2 = params->mVerts[VRef2];
+
+			PX_ALIGN_PREFIX(16)	char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16);
+			PxRaycastHit& StabbedFace = reinterpret_cast<PxRaycastHit&>(buffer);
+			if(RayTriOverlapT<RayParams>(StabbedFace, p0, p1, p2, params))
+			{
+				HitCode Code = (params->mCallback)(params->mUserData, p0, p1, p2, primIndex, StabbedFace.distance, StabbedFace.u, StabbedFace.v);
+				if(Code==HIT_EXIT)
+					return 1;
+
+				// PT: TODO: no shrinking here? (TA34704)
+			}
+
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+
+}
+
+#include "GuBV4_ProcessStreamNoOrder_SegmentAABB.h"
+
+void BV4_RaycastCB(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, float maxDist, float geomEpsilon, PxU32 flags, MeshRayCallback callback, void* userData)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	//### beware, some parameters in the struct aren't used
+	RayParamsCB Params;
+	Params.mCallback	= callback;
+	Params.mUserData	= userData;
+	setupRayParams(&Params, origin, dir, &tree, worldm_Aligned, mesh, maxDist, geomEpsilon, flags);
+
+	if(tree.mNodes)
+	{
+		processStreamRayNoOrder(0, LeafFunction_RaycastCB)(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+//		if(Params.mEarlyExit)
+//			LeafFunction_BoxSweepAnyCB::doLeafTest(&Params, nbTris);
+//		else
+			LeafFunction_RaycastCB::doLeafTest(&Params, nbTris);
+	}
+}
+
+// Raycast all
+
+namespace
+{
+struct RayParamsAll : RayParams
+{
+	PxU32			mNbHits;
+	PxU32			mMaxNbHits;
+	PxRaycastHit*	mHits;
+	const PxMat44*	mWorld_Aligned;
+	PxHitFlags		mHitFlags;
+};
+
+class LeafFunction_RaycastAll
+{
+public:
+	static /*PX_FORCE_INLINE*/ Ps::IntBool doLeafTest(RayParams* PX_RESTRICT p, PxU32 primIndex)
+	{
+		RayParamsAll* params = static_cast<RayParamsAll*>(p);
+
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			PxRaycastHit& StabbedFace = params->mHits[params->mNbHits];
+			if(RayTriOverlapT<RayParams>(StabbedFace, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], params))
+			{
+				updateParamsAfterImpact(params, primIndex, VRef0, VRef1, VRef2, StabbedFace);
+
+				computeImpactData(&StabbedFace, params, params->mWorld_Aligned, params->mHitFlags);
+
+				params->mNbHits++;
+				if(params->mNbHits==params->mMaxNbHits)
+					return 1;
+			}
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+// PT: this function is not used yet, but eventually it should be
+PxU32 BV4_RaycastAll(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxRaycastHit* PX_RESTRICT hits, PxU32 maxNbHits, float maxDist, float geomEpsilon, PxU32 flags, PxHitFlags hitFlags)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	RayParamsAll Params;
+	Params.mNbHits			= 0;
+	Params.mMaxNbHits		= maxNbHits;
+	Params.mHits			= hits;
+	Params.mWorld_Aligned	= worldm_Aligned;
+	Params.mHitFlags		= hitFlags;
+	setupRayParams(&Params, origin, dir, &tree, worldm_Aligned, mesh, maxDist, geomEpsilon, flags);
+
+	if(tree.mNodes)
+	{
+		processStreamRayNoOrder(0, LeafFunction_RaycastAll)(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		PX_ASSERT(0);
+	}
+	return Params.mNbHits;
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs.h
new file mode 100644
index 00000000..a371ea93
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs.h
@@ -0,0 +1,206 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_SLABS_H
+#define GU_BV4_SLABS_H
+
+#include "PsFPU.h"
+#include "GuBV4_Common.h"
+
+#ifdef GU_BV4_USE_SLABS
+
+	// PT: contains code for tree-traversal using the swizzled format.
+	// PT: ray traversal based on Kay & Kajiya's slab intersection code, but using SIMD to do 4 ray-vs-AABB tests at a time.
+	// PT: other (ordered or unordered) traversals just process one node at a time, similar to the non-swizzled format.
+
+	#define BV4_SLABS_FIX
+	#define BV4_SLABS_SORT
+
+	#define PNS_BLOCK3(a, b, c, d)	{										\
+		if(code2 & (1<<a))	{ stack[nb++] = tn->getChildData(a);	}		\
+		if(code2 & (1<<b))	{ stack[nb++] = tn->getChildData(b);	}		\
+		if(code2 & (1<<c))	{ stack[nb++] = tn->getChildData(c);	}		\
+		if(code2 & (1<<d))	{ stack[nb++] = tn->getChildData(d);	}	}	\
+
+	#ifdef GU_BV4_QUANTIZED_TREE	
+		#define OPC_SLABS_GET_MIN_MAX(i)																	\
+			const __m128i minVi = _mm_set_epi32(0, node->mZ[i].mMin, node->mY[i].mMin, node->mX[i].mMin);	\
+			const Vec4V minCoeffV = V4LoadA_Safe(&params->mCenterOrMinCoeff_PaddedAligned.x);				\
+			Vec4V minV = V4Mul(_mm_cvtepi32_ps(minVi), minCoeffV);											\
+			const __m128i maxVi = _mm_set_epi32(0, node->mZ[i].mMax, node->mY[i].mMax, node->mX[i].mMax);	\
+			const Vec4V maxCoeffV = V4LoadA_Safe(&params->mExtentsOrMaxCoeff_PaddedAligned.x);				\
+			Vec4V maxV = V4Mul(_mm_cvtepi32_ps(maxVi), maxCoeffV);											\
+
+		#define OPC_SLABS_GET_CE(i)										\
+			OPC_SLABS_GET_MIN_MAX(i)									\
+			const FloatV HalfV = FLoad(0.5f);							\
+			const Vec4V centerV = V4Scale(V4Add(maxV, minV), HalfV);	\
+			const Vec4V extentsV = V4Scale(V4Sub(maxV, minV), HalfV);
+
+		#define OPC_SLABS_GET_CE2(i)					\
+			OPC_SLABS_GET_MIN_MAX(i)					\
+			const Vec4V centerV = V4Add(maxV, minV);	\
+			const Vec4V extentsV = V4Sub(maxV, minV);
+	#else
+		#define OPC_SLABS_GET_CE(i)																	\
+			const FloatV HalfV = FLoad(0.5f);														\
+			const Vec4V minV = _mm_set_ps(0.0f, node->mMinZ[i], node->mMinY[i], node->mMinX[i]);	\
+			const Vec4V maxV = _mm_set_ps(0.0f, node->mMaxZ[i], node->mMaxY[i], node->mMaxX[i]);	\
+			const Vec4V centerV = V4Scale(V4Add(maxV, minV), HalfV);								\
+			const Vec4V extentsV = V4Scale(V4Sub(maxV, minV), HalfV);
+
+		#define OPC_SLABS_GET_CE2(i)																\
+			const Vec4V minV = _mm_set_ps(0.0f, node->mMinZ[i], node->mMinY[i], node->mMinX[i]);	\
+			const Vec4V maxV = _mm_set_ps(0.0f, node->mMaxZ[i], node->mMaxY[i], node->mMaxX[i]);	\
+			const Vec4V centerV = V4Add(maxV, minV);												\
+			const Vec4V extentsV = V4Sub(maxV, minV);
+	#endif	// GU_BV4_QUANTIZED_TREE
+
+#if PX_PS4
+	// PT: TODO: for some reason using the intrinsics directly produces a compile error on PS4. TODO: find a better fix.
+	PX_FORCE_INLINE __m128i my_mm_srai_epi32(__m128i a, int count)
+	{
+		return _mm_srai_epi32(a, count);
+	}
+
+	PX_FORCE_INLINE __m128i my_mm_slli_epi32(__m128i a, int count)
+	{
+		return _mm_slli_epi32(a, count);
+	}
+#else
+	#define my_mm_srai_epi32	_mm_srai_epi32
+	#define my_mm_slli_epi32	_mm_slli_epi32
+#endif
+
+#define OPC_DEQ4(part2xV, part1xV, mMember, minCoeff, maxCoeff)												\
+{																											\
+	part2xV = V4LoadA(reinterpret_cast<const float*>(tn->mMember));											\
+	part1xV = _mm_castsi128_ps(_mm_and_si128(_mm_castps_si128(part2xV), _mm_set1_epi32(0x0000ffff)));		\
+		part1xV = _mm_castsi128_ps(my_mm_srai_epi32(my_mm_slli_epi32(_mm_castps_si128(part1xV), 16), 16));	\
+	part1xV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(part1xV)), minCoeff);									\
+	part2xV = _mm_castsi128_ps(my_mm_srai_epi32(_mm_castps_si128(part2xV), 16));							\
+	part2xV = V4Mul(_mm_cvtepi32_ps(_mm_castps_si128(part2xV)), maxCoeff);									\
+}
+
+#define SLABS_INIT\
+	Vec4V maxT4 = V4Load(params->mStabbedFace.mDistance);\
+	const Vec4V rayP = V4LoadU_Safe(&params->mOrigin_Padded.x);\
+	Vec4V rayD = V4LoadU_Safe(&params->mLocalDir_Padded.x);\
+	const VecU32V raySign = V4U32and(VecU32V_ReinterpretFrom_Vec4V(rayD), signMask);\
+	const Vec4V rayDAbs = V4Abs(rayD);\
+	Vec4V rayInvD = Vec4V_ReinterpretFrom_VecU32V(V4U32or(raySign, VecU32V_ReinterpretFrom_Vec4V(V4Max(rayDAbs, epsFloat4))));\
+	rayD = rayInvD;\
+	rayInvD = V4RecipFast(rayInvD);\
+	rayInvD = V4Mul(rayInvD, V4NegMulSub(rayD, rayInvD, twos));\
+	const Vec4V rayPinvD = V4NegMulSub(rayInvD, rayP, zeroes);\
+	const Vec4V rayInvDsplatX = V4SplatElement<0>(rayInvD);\
+	const Vec4V rayInvDsplatY = V4SplatElement<1>(rayInvD);\
+	const Vec4V rayInvDsplatZ = V4SplatElement<2>(rayInvD);\
+	const Vec4V rayPinvDsplatX = V4SplatElement<0>(rayPinvD);\
+	const Vec4V rayPinvDsplatY = V4SplatElement<1>(rayPinvD);\
+	const Vec4V rayPinvDsplatZ = V4SplatElement<2>(rayPinvD);
+
+#define SLABS_TEST\
+	const Vec4V tminxa0 = V4MulAdd(minx4a, rayInvDsplatX, rayPinvDsplatX);\
+	const Vec4V tminya0 = V4MulAdd(miny4a, rayInvDsplatY, rayPinvDsplatY);\
+	const Vec4V tminza0 = V4MulAdd(minz4a, rayInvDsplatZ, rayPinvDsplatZ);\
+	const Vec4V tmaxxa0 = V4MulAdd(maxx4a, rayInvDsplatX, rayPinvDsplatX);\
+	const Vec4V tmaxya0 = V4MulAdd(maxy4a, rayInvDsplatY, rayPinvDsplatY);\
+	const Vec4V tmaxza0 = V4MulAdd(maxz4a, rayInvDsplatZ, rayPinvDsplatZ);\
+	const Vec4V tminxa = V4Min(tminxa0, tmaxxa0);\
+	const Vec4V tmaxxa = V4Max(tminxa0, tmaxxa0);\
+	const Vec4V tminya = V4Min(tminya0, tmaxya0);\
+	const Vec4V tmaxya = V4Max(tminya0, tmaxya0);\
+	const Vec4V tminza = V4Min(tminza0, tmaxza0);\
+	const Vec4V tmaxza = V4Max(tminza0, tmaxza0);\
+	const Vec4V maxOfNeasa = V4Max(V4Max(tminxa, tminya), tminza);\
+	const Vec4V minOfFarsa = V4Min(V4Min(tmaxxa, tmaxya), tmaxza);\
+
+	#define SLABS_TEST2\
+		__m128 ignore4a = _mm_cmpgt_ps(epsFloat4, minOfFarsa);  /* if tfar is negative, ignore since its a ray, not a line */\
+		ignore4a = _mm_or_ps(ignore4a, _mm_cmpgt_ps(maxOfNeasa, maxT4));  /* if tnear is over maxT, ignore this result */\
+		__m128 resa4 = _mm_cmpgt_ps(maxOfNeasa, minOfFarsa); /* if 1 => fail */\
+		resa4 = _mm_or_ps(resa4, ignore4a);\
+		const int code = _mm_movemask_ps(resa4);\
+		if(code==15)\
+			continue;
+
+#define SLABS_PNS										\
+	if(code2)											\
+	{													\
+		if(tn->decodePNSNoShift(0) & dirMask)			\
+		{												\
+			if(tn->decodePNSNoShift(1) & dirMask)		\
+			{											\
+				if(tn->decodePNSNoShift(2) & dirMask)	\
+					PNS_BLOCK3(3,2,1,0)					\
+				else									\
+					PNS_BLOCK3(2,3,1,0)					\
+			}											\
+			else										\
+			{											\
+				if(tn->decodePNSNoShift(2) & dirMask)	\
+					PNS_BLOCK3(3,2,0,1)					\
+				else									\
+					PNS_BLOCK3(2,3,0,1)					\
+			}											\
+		}												\
+		else											\
+		{												\
+			if(tn->decodePNSNoShift(1) & dirMask)		\
+			{											\
+				if(tn->decodePNSNoShift(2) & dirMask)	\
+					PNS_BLOCK3(1,0,3,2)					\
+				else									\
+					PNS_BLOCK3(1,0,2,3)					\
+			}											\
+			else										\
+			{											\
+				if(tn->decodePNSNoShift(2) & dirMask)	\
+					PNS_BLOCK3(0,1,3,2)					\
+				else									\
+					PNS_BLOCK3(0,1,2,3)					\
+			}											\
+		}												\
+	}
+
+#if PX_INTEL_FAMILY
+namespace
+{
+	const VecU32V signMask = U4LoadXYZW((PxU32(1)<<31), (PxU32(1)<<31), (PxU32(1)<<31), (PxU32(1)<<31));
+	const Vec4V epsFloat4 = V4Load(1e-9f);
+	const Vec4V zeroes = V4Zero();
+	const Vec4V twos = V4Load(2.0f);
+	const Vec4V epsInflateFloat4 = V4Load(1e-7f);
+}
+#endif	// PX_INTEL_FAMILY
+
+#endif	// GU_BV4_USE_SLABS
+
+#endif // GU_BV4_SLABS_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaNoOrder.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaNoOrder.h
new file mode 100644
index 00000000..45f4e4a9
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaNoOrder.h
@@ -0,0 +1,136 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_SLABS_KAJIYA_NO_ORDER_H
+#define GU_BV4_SLABS_KAJIYA_NO_ORDER_H
+
+	// Kajiya, no sort
+	template<int inflateT, class LeafTestT, class ParamsT>
+	static Ps::IntBool BV4_ProcessStreamKajiyaNoOrder(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params)
+	{
+		const BVDataPacked* root = node;
+
+		PxU32 nb=1;
+		PxU32 stack[GU_BV4_STACK_SIZE];
+		stack[0] = initData;
+
+		///
+
+		Vec4V fattenAABBsX, fattenAABBsY, fattenAABBsZ;
+		if(inflateT)
+		{
+			Vec4V fattenAABBs4 = V4LoadU_Safe(&params->mOriginalExtents_Padded.x);
+			fattenAABBs4 = V4Add(fattenAABBs4, epsInflateFloat4);	// US2385 - shapes are "closed" meaning exactly touching shapes should report overlap
+			fattenAABBsX = V4SplatElement<0>(fattenAABBs4);
+			fattenAABBsY = V4SplatElement<1>(fattenAABBs4);
+			fattenAABBsZ = V4SplatElement<2>(fattenAABBs4);
+		}
+
+		///
+
+		SLABS_INIT
+
+#ifdef GU_BV4_QUANTIZED_TREE
+		const Vec4V minCoeffV = V4LoadA_Safe(&params->mCenterOrMinCoeff_PaddedAligned.x);
+		const Vec4V maxCoeffV = V4LoadA_Safe(&params->mExtentsOrMaxCoeff_PaddedAligned.x);
+		const Vec4V minCoeffxV = V4SplatElement<0>(minCoeffV);
+		const Vec4V minCoeffyV = V4SplatElement<1>(minCoeffV);
+		const Vec4V minCoeffzV = V4SplatElement<2>(minCoeffV);
+		const Vec4V maxCoeffxV = V4SplatElement<0>(maxCoeffV);
+		const Vec4V maxCoeffyV = V4SplatElement<1>(maxCoeffV);
+		const Vec4V maxCoeffzV = V4SplatElement<2>(maxCoeffV);
+#endif
+
+		do
+		{
+			const PxU32 childData = stack[--nb];
+			node = root + getChildOffset(childData);
+
+			const BVDataSwizzled* tn = reinterpret_cast<const BVDataSwizzled*>(node);
+
+#ifdef GU_BV4_QUANTIZED_TREE
+			Vec4V minx4a;
+			Vec4V maxx4a;
+			OPC_DEQ4(maxx4a, minx4a, mX, minCoeffxV, maxCoeffxV)
+
+			Vec4V miny4a;
+			Vec4V maxy4a;
+			OPC_DEQ4(maxy4a, miny4a, mY, minCoeffyV, maxCoeffyV)
+
+			Vec4V minz4a;
+			Vec4V maxz4a;
+			OPC_DEQ4(maxz4a, minz4a, mZ, minCoeffzV, maxCoeffzV)
+#else
+			Vec4V minx4a = V4LoadA(tn->mMinX);
+			Vec4V miny4a = V4LoadA(tn->mMinY);
+			Vec4V minz4a = V4LoadA(tn->mMinZ);
+
+			Vec4V maxx4a = V4LoadA(tn->mMaxX);
+			Vec4V maxy4a = V4LoadA(tn->mMaxY);
+			Vec4V maxz4a = V4LoadA(tn->mMaxZ);
+#endif
+			if(inflateT)
+			{
+				maxx4a = V4Add(maxx4a, fattenAABBsX); maxy4a = V4Add(maxy4a, fattenAABBsY); maxz4a = V4Add(maxz4a, fattenAABBsZ);
+				minx4a = V4Sub(minx4a, fattenAABBsX); miny4a = V4Sub(miny4a, fattenAABBsY); minz4a = V4Sub(minz4a, fattenAABBsZ);
+			}
+
+			SLABS_TEST
+
+			SLABS_TEST2
+
+#define DO_LEAF_TEST(x)														\
+				{if(tn->isLeaf(x))											\
+				{															\
+					if(LeafTestT::doLeafTest(params, tn->getPrimitive(x)))	\
+						return 1;											\
+				}															\
+				else														\
+					stack[nb++] = tn->getChildData(x);}
+
+			const PxU32 nodeType = getChildType(childData);
+			if(!(code&8) && nodeType>1)
+				DO_LEAF_TEST(3)
+
+			if(!(code&4) && nodeType>0)
+				DO_LEAF_TEST(2)
+
+			if(!(code&2))
+				DO_LEAF_TEST(1)
+
+			if(!(code&1))
+				DO_LEAF_TEST(0)
+
+		}while(nb);
+
+		return 0;
+	}
+#undef DO_LEAF_TEST
+
+#endif // GU_BV4_SLABS_KAJIYA_NO_ORDER_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaOrdered.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaOrdered.h
new file mode 100644
index 00000000..4bdcee3a
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_KajiyaOrdered.h
@@ -0,0 +1,240 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_SLABS_KAJIYA_ORDERED_H
+#define GU_BV4_SLABS_KAJIYA_ORDERED_H
+
+	// Kajiya + PNS
+	template<const int inflateT, class LeafTestT, class ParamsT>
+	static void BV4_ProcessStreamKajiyaOrdered(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params)
+	{
+		const BVDataPacked* root = node;
+
+		PxU32 nb=1;
+		PxU32 stack[GU_BV4_STACK_SIZE];
+		stack[0] = initData;
+
+#ifdef BV4_SLABS_SORT
+		const PxU32* tmp = reinterpret_cast<const PxU32*>(&params->mLocalDir_Padded);
+		const PxU32 X = tmp[0]>>31;
+		const PxU32 Y = tmp[1]>>31;
+		const PxU32 Z = tmp[2]>>31;
+//		const PxU32 X = PX_IR(params->mLocalDir_Padded.x)>>31;
+//		const PxU32 Y = PX_IR(params->mLocalDir_Padded.y)>>31;
+//		const PxU32 Z = PX_IR(params->mLocalDir_Padded.z)>>31;
+		const PxU32 bitIndex = 3+(Z|(Y<<1)|(X<<2));
+		const PxU32 dirMask = 1u<<bitIndex;
+#endif
+
+#ifdef BV4_SLABS_FIX
+		BV4_ALIGN16(float distances4[4]);
+#endif
+		///
+
+		Vec4V fattenAABBsX, fattenAABBsY, fattenAABBsZ;
+		if(inflateT)
+		{
+			Vec4V fattenAABBs4 = V4LoadU_Safe(&params->mOriginalExtents_Padded.x);
+			fattenAABBs4 = V4Add(fattenAABBs4, epsInflateFloat4);	// US2385 - shapes are "closed" meaning exactly touching shapes should report overlap
+			fattenAABBsX = V4SplatElement<0>(fattenAABBs4);
+			fattenAABBsY = V4SplatElement<1>(fattenAABBs4);
+			fattenAABBsZ = V4SplatElement<2>(fattenAABBs4);
+		}
+
+		///
+
+		SLABS_INIT
+
+#ifdef GU_BV4_QUANTIZED_TREE
+		const Vec4V minCoeffV = V4LoadA_Safe(&params->mCenterOrMinCoeff_PaddedAligned.x);
+		const Vec4V maxCoeffV = V4LoadA_Safe(&params->mExtentsOrMaxCoeff_PaddedAligned.x);
+		const Vec4V minCoeffxV = V4SplatElement<0>(minCoeffV);
+		const Vec4V minCoeffyV = V4SplatElement<1>(minCoeffV);
+		const Vec4V minCoeffzV = V4SplatElement<2>(minCoeffV);
+		const Vec4V maxCoeffxV = V4SplatElement<0>(maxCoeffV);
+		const Vec4V maxCoeffyV = V4SplatElement<1>(maxCoeffV);
+		const Vec4V maxCoeffzV = V4SplatElement<2>(maxCoeffV);
+#endif
+
+		do
+		{
+			const PxU32 childData = stack[--nb];
+			node = root + getChildOffset(childData);
+
+			const BVDataSwizzled* tn = reinterpret_cast<const BVDataSwizzled*>(node);
+
+#ifdef GU_BV4_QUANTIZED_TREE
+			Vec4V minx4a;
+			Vec4V maxx4a;
+			OPC_DEQ4(maxx4a, minx4a, mX, minCoeffxV, maxCoeffxV)
+
+			Vec4V miny4a;
+			Vec4V maxy4a;
+			OPC_DEQ4(maxy4a, miny4a, mY, minCoeffyV, maxCoeffyV)
+
+			Vec4V minz4a;
+			Vec4V maxz4a;
+			OPC_DEQ4(maxz4a, minz4a, mZ, minCoeffzV, maxCoeffzV)
+#else
+			Vec4V minx4a = V4LoadA(tn->mMinX);
+			Vec4V miny4a = V4LoadA(tn->mMinY);
+			Vec4V minz4a = V4LoadA(tn->mMinZ);
+
+			Vec4V maxx4a = V4LoadA(tn->mMaxX);
+			Vec4V maxy4a = V4LoadA(tn->mMaxY);
+			Vec4V maxz4a = V4LoadA(tn->mMaxZ);
+#endif
+			if(inflateT)
+			{
+				maxx4a = V4Add(maxx4a, fattenAABBsX); maxy4a = V4Add(maxy4a, fattenAABBsY); maxz4a = V4Add(maxz4a, fattenAABBsZ);
+				minx4a = V4Sub(minx4a, fattenAABBsX); miny4a = V4Sub(miny4a, fattenAABBsY); minz4a = V4Sub(minz4a, fattenAABBsZ);
+			}
+
+			SLABS_TEST
+
+#ifdef BV4_SLABS_FIX
+			if(inflateT)
+				_mm_store_ps(distances4, maxOfNeasa);
+#endif
+
+			SLABS_TEST2
+
+#ifdef BV4_SLABS_SORT
+	#ifdef BV4_SLABS_FIX
+		// PT: for some unknown reason the PS4/Linux/OSX compilers fail to understand this version
+/*		#define DO_LEAF_TEST(x)														\
+			{																		\
+				if(!inflateT)														\
+				{																	\
+					if(tn->isLeaf(x))												\
+					{																\
+						LeafTestT::doLeafTest(params, tn->getPrimitive(x));			\
+						maxT4 = V4Load(params->mStabbedFace.mDistance);				\
+					}																\
+					else															\
+					{																\
+						code2 |= 1<<x;												\
+					}																\
+				}																	\
+				else																\
+				{																	\
+					if(distances4[x]<params->mStabbedFace.mDistance)				\
+					{																\
+						if(tn->isLeaf(x))											\
+						{															\
+							LeafTestT::doLeafTest(params, tn->getPrimitive(x));		\
+							maxT4 = V4Load(params->mStabbedFace.mDistance);			\
+						}															\
+						else														\
+						{															\
+							code2 |= 1<<x;											\
+						}															\
+					}																\
+				}																	\
+			}*/
+
+		// PT: TODO: check that this version compiles to the same code as above. Redo benchmarks.
+		#define DO_LEAF_TEST(x)														\
+			{																		\
+				if(!inflateT || distances4[x]<params->mStabbedFace.mDistance + GU_EPSILON_SAME_DISTANCE)	\
+				{																	\
+					if(tn->isLeaf(x))												\
+					{																\
+						LeafTestT::doLeafTest(params, tn->getPrimitive(x));			\
+						maxT4 = V4Load(params->mStabbedFace.mDistance);				\
+					}																\
+					else															\
+					{																\
+						code2 |= 1<<x;												\
+					}																\
+				}																	\
+			}
+
+	#else
+		#define DO_LEAF_TEST(x)														\
+				{																	\
+					if(tn->isLeaf(x))												\
+					{																\
+						LeafTestT::doLeafTest(params, tn->getPrimitive(x));			\
+						maxT4 = V4Load(params->mStabbedFace.mDistance);				\
+					}																\
+					else															\
+					{																\
+						code2 |= 1<<x;												\
+					}																\
+				}
+	#endif
+			PxU32 code2 = 0;
+			const PxU32 nodeType = getChildType(childData);
+
+			if(!(code&8) && nodeType>1)
+				DO_LEAF_TEST(3)
+
+			if(!(code&4) && nodeType>0)
+				DO_LEAF_TEST(2)
+
+			if(!(code&2))
+				DO_LEAF_TEST(1)
+
+			if(!(code&1))
+				DO_LEAF_TEST(0)
+
+			SLABS_PNS
+#else
+	#define DO_LEAF_TEST(x)														\
+					{if(tn->isLeaf(x))											\
+					{															\
+						LeafTestT::doLeafTest(params, tn->getPrimitive(x));		\
+						maxT4 = V4Load(params->mStabbedFace.mDistance);			\
+					}															\
+					else														\
+					{															\
+						stack[nb++] = tn->getChildData(x);						\
+					}}
+
+
+				const PxU32 nodeType = getChildType(childData);
+				if(!(code&8) && nodeType>1)
+					DO_LEAF_TEST(3)
+
+				if(!(code&4) && nodeType>0)
+					DO_LEAF_TEST(2)
+
+				if(!(code&2))
+					DO_LEAF_TEST(1)
+
+				if(!(code&1))
+					DO_LEAF_TEST(0)
+#endif
+
+		}while(nb);
+	}
+#undef DO_LEAF_TEST
+
+#endif // GU_BV4_SLABS_KAJIYA_ORDERED_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledNoOrder.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledNoOrder.h
new file mode 100644
index 00000000..a7717d7c
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledNoOrder.h
@@ -0,0 +1,66 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_SLABS_SWIZZLED_NO_ORDER_H
+#define GU_BV4_SLABS_SWIZZLED_NO_ORDER_H
+
+	// Generic, no sort
+	template<class LeafTestT, class ParamsT>
+	static Ps::IntBool BV4_ProcessStreamSwizzledNoOrder(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params)
+	{
+		const BVDataPacked* root = node;
+
+		PxU32 nb=1;
+		PxU32 stack[GU_BV4_STACK_SIZE];
+		stack[0] = initData;
+
+		do
+		{
+			const PxU32 childData = stack[--nb];
+			node = root + getChildOffset(childData);
+
+			const BVDataSwizzled* tn = reinterpret_cast<const BVDataSwizzled*>(node);
+
+			const PxU32 nodeType = getChildType(childData);
+
+			if(nodeType>1 && BV4_ProcessNodeNoOrder_Swizzled<LeafTestT, 3>(stack, nb, tn, params))
+				return 1;
+			if(nodeType>0 && BV4_ProcessNodeNoOrder_Swizzled<LeafTestT, 2>(stack, nb, tn, params))
+				return 1;
+			if(BV4_ProcessNodeNoOrder_Swizzled<LeafTestT, 1>(stack, nb, tn, params))
+				return 1;
+			if(BV4_ProcessNodeNoOrder_Swizzled<LeafTestT, 0>(stack, nb, tn, params))
+				return 1;
+
+		}while(nb);
+
+		return 0;
+	}
+
+#endif // GU_BV4_SLABS_SWIZZLED_NO_ORDER_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledOrdered.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledOrdered.h
new file mode 100644
index 00000000..4be851e1
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_Slabs_SwizzledOrdered.h
@@ -0,0 +1,74 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_BV4_SLABS_SWIZZLED_ORDERED_H
+#define GU_BV4_SLABS_SWIZZLED_ORDERED_H
+
+	// Generic + PNS
+	template<class LeafTestT, class ParamsT>
+	static void BV4_ProcessStreamSwizzledOrdered(const BVDataPacked* PX_RESTRICT node, PxU32 initData, ParamsT* PX_RESTRICT params)
+	{
+		const BVDataPacked* root = node;
+
+		PxU32 nb=1;
+		PxU32 stack[GU_BV4_STACK_SIZE];
+		stack[0] = initData;
+
+		const PxU32* tmp = reinterpret_cast<const PxU32*>(&params->mLocalDir_Padded);
+		const PxU32 X = tmp[0]>>31;
+		const PxU32 Y = tmp[1]>>31;
+		const PxU32 Z = tmp[2]>>31;
+//		const PxU32 X = PX_IR(params->mLocalDir_Padded.x)>>31;
+//		const PxU32 Y = PX_IR(params->mLocalDir_Padded.y)>>31;
+//		const PxU32 Z = PX_IR(params->mLocalDir_Padded.z)>>31;
+		const PxU32 bitIndex = 3+(Z|(Y<<1)|(X<<2));
+		const PxU32 dirMask = 1u<<bitIndex;
+
+		do
+		{
+			const PxU32 childData = stack[--nb];
+			node = root + getChildOffset(childData);
+			const PxU32 nodeType = getChildType(childData);
+
+			const BVDataSwizzled* tn = reinterpret_cast<const BVDataSwizzled*>(node);
+
+			PxU32 code2 = 0;
+			BV4_ProcessNodeOrdered2_Swizzled<LeafTestT, 0>(code2, tn, params);
+			BV4_ProcessNodeOrdered2_Swizzled<LeafTestT, 1>(code2, tn, params);
+			if(nodeType>0)
+				BV4_ProcessNodeOrdered2_Swizzled<LeafTestT, 2>(code2, tn, params);
+			if(nodeType>1)
+				BV4_ProcessNodeOrdered2_Swizzled<LeafTestT, 3>(code2, tn, params);
+
+			SLABS_PNS
+
+		}while(nb);
+	}
+
+#endif // GU_BV4_SLABS_SWIZZLED_ORDERED_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereOverlap.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereOverlap.cpp
new file mode 100644
index 00000000..d709e273
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereOverlap.cpp
@@ -0,0 +1,330 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuBV4.h"
+using namespace physx;
+using namespace Gu;
+
+#if PX_INTEL_FAMILY
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#include "GuBV4_Common.h"
+#include "GuSphere.h"
+#include "GuDistancePointTriangle.h"
+#include "PsVecMath.h"
+
+using namespace physx::shdfnd::aos;
+
+#if PX_VC
+#pragma warning ( disable : 4324 )
+#endif
+
+// Sphere overlap any
+
+struct SphereParams
+{
+	const IndTri32*	PX_RESTRICT	mTris32;
+	const IndTri16*	PX_RESTRICT	mTris16;
+	const PxVec3*	PX_RESTRICT	mVerts;
+
+#ifdef GU_BV4_QUANTIZED_TREE
+	BV4_ALIGN16(Vec3p	mCenterOrMinCoeff_PaddedAligned);
+	BV4_ALIGN16(Vec3p	mExtentsOrMaxCoeff_PaddedAligned);
+#endif
+
+	BV4_ALIGN16(PxVec3	mCenter_PaddedAligned);		float	mRadius2;
+#ifdef GU_BV4_USE_SLABS
+	BV4_ALIGN16(PxVec3	mCenter_PaddedAligned2);	float	mRadius22;
+#endif
+};
+
+#ifndef GU_BV4_QUANTIZED_TREE
+// PT: TODO: refactor with bucket pruner code (TA34704)
+static PX_FORCE_INLINE Ps::IntBool BV4_SphereAABBOverlap(const PxVec3& center, const PxVec3& extents, const SphereParams* PX_RESTRICT params)
+{
+	const Vec4V mCenter = V4LoadA_Safe(&params->mCenter_PaddedAligned.x);
+	const FloatV mRadius2 = FLoad(params->mRadius2);
+
+	const Vec4V boxCenter = V4LoadU(&center.x);
+	const Vec4V boxExtents = V4LoadU(&extents.x);
+
+	const Vec4V offset = V4Sub(mCenter, boxCenter);
+	const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents);
+	const Vec4V d = V4Sub(offset, closest);
+	
+	const PxU32 test = (PxU32)_mm_movemask_ps(FIsGrtrOrEq(mRadius2, V4Dot3(d, d)));
+	return (test & 0x7) == 0x7;
+}
+#endif
+
+static PX_FORCE_INLINE Ps::IntBool __SphereTriangle(const SphereParams* PX_RESTRICT params, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2)
+{
+	{
+		const float sqrDist = (p0 - params->mCenter_PaddedAligned).magnitudeSquared();
+		if(sqrDist <= params->mRadius2)
+			return 1;
+	}
+
+	const PxVec3 edge10 = p1 - p0;
+	const PxVec3 edge20 = p2 - p0;
+	const PxVec3 cp = closestPtPointTriangle2(params->mCenter_PaddedAligned, p0, p1, p2, edge10, edge20);
+	const float sqrDist = (cp - params->mCenter_PaddedAligned).magnitudeSquared();
+	return sqrDist <= params->mRadius2;
+}
+
+// PT: TODO: evaluate if SIMD distance function would be faster here (TA34704)
+// PT: TODO: __fastcall removed to make it compile everywhere. Revisit.
+static /*PX_FORCE_INLINE*/ Ps::IntBool /*__fastcall*/ __SphereTriangle(const SphereParams* PX_RESTRICT params, PxU32 primIndex)
+{
+	PxU32 VRef0, VRef1, VRef2;
+	getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+	return __SphereTriangle(params, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2]);
+}
+
+namespace
+{
+class LeafFunction_SphereOverlapAny
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(const SphereParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(__SphereTriangle(params, primIndex))
+				return 1;
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+template<class ParamsT>
+static PX_FORCE_INLINE void setupSphereParams(ParamsT* PX_RESTRICT params, const Sphere& sphere, const BV4Tree* PX_RESTRICT tree, const PxMat44* PX_RESTRICT worldm_Aligned, const SourceMesh* PX_RESTRICT mesh)
+{
+	computeLocalSphere(params->mRadius2, params->mCenter_PaddedAligned, sphere, worldm_Aligned);
+
+#ifdef GU_BV4_USE_SLABS
+	params->mCenter_PaddedAligned2 = params->mCenter_PaddedAligned*2.0f;
+	params->mRadius22 = params->mRadius2*4.0f;
+#endif
+
+	setupMeshPointersAndQuantizedCoeffs(params, mesh, tree);
+}
+
+#include "GuBV4_Internal.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs.h"
+
+	static PX_FORCE_INLINE Ps::IntBool BV4_SphereAABBOverlap(const Vec4V boxCenter, const Vec4V boxExtents, const SphereParams* PX_RESTRICT params)
+	{
+		const Vec4V mCenter = V4LoadA_Safe(&params->mCenter_PaddedAligned2.x);
+		const FloatV mRadius2 = FLoad(params->mRadius22);
+
+		const Vec4V offset = V4Sub(mCenter, boxCenter);
+		const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents);
+		const Vec4V d = V4Sub(offset, closest);
+
+		const PxU32 test = PxU32(_mm_movemask_ps(FIsGrtrOrEq(mRadius2, V4Dot3(d, d))));
+		return (test & 0x7) == 0x7;
+	}
+#else
+	#ifdef GU_BV4_QUANTIZED_TREE
+	static PX_FORCE_INLINE Ps::IntBool BV4_SphereAABBOverlap(const BVDataPacked* PX_RESTRICT node, const SphereParams* PX_RESTRICT params)
+	{
+		const __m128i testV = _mm_load_si128((__m128i*)node->mAABB.mData);
+		const __m128i qextentsV = _mm_and_si128(testV, _mm_set1_epi32(0x0000ffff));
+		const __m128i qcenterV = _mm_srai_epi32(testV, 16);
+		const Vec4V boxCenter = V4Mul(_mm_cvtepi32_ps(qcenterV), V4LoadA_Safe(&params->mCenterOrMinCoeff_PaddedAligned.x));
+		const Vec4V boxExtents = V4Mul(_mm_cvtepi32_ps(qextentsV), V4LoadA_Safe(&params->mExtentsOrMaxCoeff_PaddedAligned.x));
+
+		const Vec4V mCenter = V4LoadA_Safe(&params->mCenter_PaddedAligned.x);
+		const FloatV mRadius2 = FLoad(params->mRadius2);
+
+		const Vec4V offset = V4Sub(mCenter, boxCenter);
+		const Vec4V closest = V4Clamp(offset, V4Neg(boxExtents), boxExtents);
+		const Vec4V d = V4Sub(offset, closest);
+
+		const PxU32 test = (PxU32)_mm_movemask_ps(FIsGrtrOrEq(mRadius2, V4Dot3(d, d)));
+		return (test & 0x7) == 0x7;
+	}
+	#endif
+#endif
+
+#include "GuBV4_ProcessStreamNoOrder_SphereAABB.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs_SwizzledNoOrder.h"
+#endif
+
+Ps::IntBool BV4_OverlapSphereAny(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	SphereParams Params;
+	setupSphereParams(&Params, sphere, &tree, worldm_Aligned, mesh);
+
+	if(tree.mNodes)
+	{
+		return processStreamNoOrder<LeafFunction_SphereOverlapAny>(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		return LeafFunction_SphereOverlapAny::doLeafTest(&Params, nbTris);
+	}
+}
+
+// Sphere overlap all
+
+struct SphereParamsAll : SphereParams
+{
+	PxU32	mNbHits;
+	PxU32	mMaxNbHits;
+	PxU32*	mHits;
+};
+
+namespace
+{
+class LeafFunction_SphereOverlapAll
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(SphereParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(__SphereTriangle(params, primIndex))
+			{
+				SphereParamsAll* ParamsAll = static_cast<SphereParamsAll*>(params);
+				ParamsAll->mHits[ParamsAll->mNbHits] = primIndex;
+				ParamsAll->mNbHits++;
+				if(ParamsAll->mNbHits==ParamsAll->mMaxNbHits)
+					return 1;
+			}
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+PxU32 BV4_OverlapSphereAll(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	SphereParamsAll Params;
+	Params.mNbHits		= 0;
+	Params.mMaxNbHits	= size;
+	Params.mHits		= results;
+
+	setupSphereParams(&Params, sphere, &tree, worldm_Aligned, mesh);
+
+	if(tree.mNodes)
+	{
+		overflow = processStreamNoOrder<LeafFunction_SphereOverlapAll>(tree.mNodes, tree.mInitData, &Params)!=0;
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		overflow = LeafFunction_SphereOverlapAll::doLeafTest(&Params, nbTris)!=0;
+	}
+	return Params.mNbHits;
+}
+
+
+// Sphere overlap - callback version
+
+struct SphereParamsCB : SphereParams
+{
+	MeshOverlapCallback	mCallback;
+	void*				mUserData;
+};
+
+namespace
+{
+class LeafFunction_SphereOverlapCB
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(const SphereParamsCB* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			const PxVec3& p0 = params->mVerts[VRef0];
+			const PxVec3& p1 = params->mVerts[VRef1];
+			const PxVec3& p2 = params->mVerts[VRef2];
+
+			if(__SphereTriangle(params, p0, p1, p2))
+			{
+				const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 };
+				if((params->mCallback)(params->mUserData, p0, p1, p2, primIndex, vrefs))
+					return 1;
+			}
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+// PT: this one is currently not used
+void BV4_OverlapSphereCB(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshOverlapCallback callback, void* userData)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	SphereParamsCB Params;
+	Params.mCallback	= callback;
+	Params.mUserData	= userData;
+	setupSphereParams(&Params, sphere, &tree, worldm_Aligned, mesh);
+
+	if(tree.mNodes)
+	{
+		processStreamNoOrder<LeafFunction_SphereOverlapCB>(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		LeafFunction_SphereOverlapCB::doLeafTest(&Params, nbTris);
+	}
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereSweep.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereSweep.cpp
new file mode 100644
index 00000000..c955c7f5
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuBV4_SphereSweep.cpp
@@ -0,0 +1,388 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxMat44.h"
+#include "GuBV4.h"
+#include "GuBox.h"
+#include "GuSphere.h"
+#include "GuSIMDHelpers.h"
+#include "GuSweepSphereTriangle.h"
+
+using namespace physx;
+using namespace Gu;
+
+#if PX_INTEL_FAMILY
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#include "GuBV4_Common.h"
+
+// PT: for sphere-sweeps we use method 3 in \\sw\physx\PhysXSDK\3.4\trunk\InternalDocumentation\GU\Sweep strategies.ppt
+
+namespace
+{
+	// PT: TODO: refactor structure (TA34704)
+	struct RayParams
+	{
+#ifdef GU_BV4_QUANTIZED_TREE
+		BV4_ALIGN16(Vec3p	mCenterOrMinCoeff_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mExtentsOrMaxCoeff_PaddedAligned);
+#endif
+#ifndef GU_BV4_USE_SLABS
+		BV4_ALIGN16(Vec3p	mData2_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mFDir_PaddedAligned);
+		BV4_ALIGN16(Vec3p	mData_PaddedAligned);
+#endif
+		BV4_ALIGN16(Vec3p	mLocalDir_Padded);	// PT: TODO: this one could be switched to PaddedAligned & V4LoadA (TA34704)
+		BV4_ALIGN16(Vec3p	mOrigin_Padded);	// PT: TODO: this one could be switched to PaddedAligned & V4LoadA (TA34704)
+	};
+
+	struct SphereSweepParams : RayParams
+	{
+		const IndTri32*		PX_RESTRICT	mTris32;
+		const IndTri16*		PX_RESTRICT	mTris16;
+		const PxVec3*		PX_RESTRICT	mVerts;
+
+		PxVec3				mOriginalExtents_Padded;
+
+		RaycastHitInternal	mStabbedFace;
+		PxU32				mBackfaceCulling;
+		PxU32				mEarlyExit;
+
+		PxVec3				mP0, mP1, mP2;
+		PxVec3				mBestTriNormal;
+		float				mBestAlignmentValue;
+		float				mBestDistance;
+		float				mMaxDist;
+	};
+}
+
+#include "GuBV4_AABBAABBSweepTest.h"
+
+// PT: TODO: __fastcall removed to make it compile everywhere. Revisit.
+static bool /*__fastcall*/ triSphereSweep(SphereSweepParams* PX_RESTRICT params, PxU32 primIndex, bool nodeSorting=true)
+{
+	PxU32 VRef0, VRef1, VRef2;
+	getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+	const PxVec3& p0 = params->mVerts[VRef0];
+	const PxVec3& p1 = params->mVerts[VRef1];
+	const PxVec3& p2 = params->mVerts[VRef2];
+
+	PxVec3 normal = (p1 - p0).cross(p2 - p0);
+
+	// Backface culling
+	const bool culled = params->mBackfaceCulling && normal.dot(params->mLocalDir_Padded) > 0.0f;
+	if(culled)
+		return false;
+
+	const PxTriangle T(p0, p1, p2);	// PT: TODO: check potential bad ctor/dtor here (TA34704) <= or avoid creating the tri, not needed anymore
+
+	normal.normalize();
+
+	// PT: TODO: we lost some perf when switching to PhysX version. Revisit/investigate. (TA34704)
+	float dist;
+	bool directHit;
+	if(!sweepSphereVSTri(T.verts, normal, params->mOrigin_Padded, params->mOriginalExtents_Padded.x, params->mLocalDir_Padded, dist, directHit, true))
+		return false;
+
+	const PxReal distEpsilon = GU_EPSILON_SAME_DISTANCE; // pick a farther hit within distEpsilon that is more opposing than the previous closest hit
+	const PxReal alignmentValue = computeAlignmentValue(normal, params->mLocalDir_Padded);
+	if(keepTriangle(dist, alignmentValue, params->mBestDistance, params->mBestAlignmentValue, params->mMaxDist, distEpsilon))
+	{
+		params->mStabbedFace.mDistance = dist;
+		params->mStabbedFace.mTriangleID = primIndex;
+		params->mP0 = p0;
+		params->mP1 = p1;
+		params->mP2 = p2;
+		params->mBestDistance = PxMin(params->mBestDistance, dist); // exact lower bound
+		params->mBestAlignmentValue = alignmentValue;
+		params->mBestTriNormal = normal;
+		if(nodeSorting)
+		{
+#ifndef GU_BV4_USE_SLABS
+			setupRayData(params, dist, params->mOrigin_Padded, params->mLocalDir_Padded);
+#endif
+		}
+		return true;
+	}
+	return false;
+}
+
+namespace
+{
+class LeafFunction_SphereSweepClosest
+{
+public:
+	static PX_FORCE_INLINE void doLeafTest(SphereSweepParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			triSphereSweep(params, primIndex);
+			primIndex++;
+		}while(nbToGo--);
+	}
+};
+
+class LeafFunction_SphereSweepAny
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(SphereSweepParams* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(triSphereSweep(params, primIndex))
+				return 1;
+
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+
+class ImpactFunctionSphere
+{
+public:
+	static PX_FORCE_INLINE void computeImpact(PxVec3& impactPos, PxVec3& impactNormal, const Sphere& sphere, const PxVec3& dir, const PxReal t, const TrianglePadded& triangle)
+	{
+		computeSphereTriImpactData(impactPos, impactNormal, sphere.center, dir, t, triangle);
+	}
+};
+}
+
+template<class ParamsT>
+static PX_FORCE_INLINE void setupSphereParams(ParamsT* PX_RESTRICT params, const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree* PX_RESTRICT tree, const PxMat44* PX_RESTRICT worldm_Aligned, const SourceMesh* PX_RESTRICT mesh, PxU32 flags)
+{
+	params->mOriginalExtents_Padded		= PxVec3(sphere.radius);
+	params->mStabbedFace.mTriangleID	= PX_INVALID_U32;
+	params->mStabbedFace.mDistance		= maxDist;
+	params->mBestDistance				= PX_MAX_REAL;
+	params->mBestAlignmentValue			= 2.0f;
+	params->mMaxDist					= maxDist;
+	setupParamsFlags(params, flags);
+
+	setupMeshPointersAndQuantizedCoeffs(params, mesh, tree);
+
+	computeLocalRay(params->mLocalDir_Padded, params->mOrigin_Padded, dir, sphere.center, worldm_Aligned);
+
+#ifndef GU_BV4_USE_SLABS
+	setupRayData(params, maxDist, params->mOrigin_Padded, params->mLocalDir_Padded);
+#endif
+}
+
+#include "GuBV4_Internal.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs.h"
+#endif
+#include "GuBV4_ProcessStreamOrdered_SegmentAABB_Inflated.h"
+#include "GuBV4_ProcessStreamNoOrder_SegmentAABB_Inflated.h"
+#ifdef GU_BV4_USE_SLABS
+	#include "GuBV4_Slabs_KajiyaNoOrder.h"
+	#include "GuBV4_Slabs_KajiyaOrdered.h"
+#endif
+
+Ps::IntBool BV4_SphereSweepSingle(const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepHit* PX_RESTRICT hit, PxU32 flags)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	SphereSweepParams Params;
+	setupSphereParams(&Params, sphere, dir, maxDist, &tree, worldm_Aligned, mesh, flags);
+
+	if(tree.mNodes)
+	{
+		if(Params.mEarlyExit)
+			processStreamRayNoOrder(1, LeafFunction_SphereSweepAny)(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamRayOrdered(1, LeafFunction_SphereSweepClosest)(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+		doBruteForceTests<LeafFunction_SphereSweepAny, LeafFunction_SphereSweepClosest>(mesh->getNbTriangles(), &Params);
+
+	return computeImpactDataT<ImpactFunctionSphere>(sphere, dir, hit, &Params, worldm_Aligned, (flags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (flags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0);
+}
+
+// PT: sphere sweep callback version - currently not used
+
+namespace
+{
+	struct SphereSweepParamsCB : SphereSweepParams
+	{
+		// PT: these new members are only here to call computeImpactDataT during traversal :( 
+		// PT: TODO: most of them may not be needed if we just move sphere to local space before traversal
+		Sphere					mSphere;	// Sphere in original space (maybe not local/mesh space)
+		PxVec3					mDir;		// Dir in original space (maybe not local/mesh space)
+		const PxMat44*			mWorldm_Aligned;
+		PxU32					mFlags;
+
+		SweepUnlimitedCallback	mCallback;
+		void*					mUserData;
+		float					mMaxDist;
+		bool					mNodeSorting;
+	};
+
+class LeafFunction_SphereSweepCB
+{
+public:
+	static PX_FORCE_INLINE Ps::IntBool doLeafTest(SphereSweepParamsCB* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			if(triSphereSweep(params, primIndex, params->mNodeSorting))
+			{
+				// PT: TODO: in this version we must compute the impact data immediately,
+				// which is a terrible idea in general, but I'm not sure what else I can do.
+				SweepHit hit;
+				const bool b = computeImpactDataT<ImpactFunctionSphere>(params->mSphere, params->mDir, &hit, params, params->mWorldm_Aligned, (params->mFlags & QUERY_MODIFIER_DOUBLE_SIDED)!=0, (params->mFlags & QUERY_MODIFIER_MESH_BOTH_SIDES)!=0);
+				PX_ASSERT(b);
+				PX_UNUSED(b);
+
+				reportUnlimitedCallbackHit(params, hit);
+			}
+
+			primIndex++;
+		}while(nbToGo--);
+
+		return 0;
+	}
+};
+}
+
+// PT: for design decisions in this function, refer to the comments of BV4_GenericSweepCB().
+void BV4_SphereSweepCB(const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting)
+{
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	SphereSweepParamsCB Params;
+	Params.mSphere			= sphere;
+	Params.mDir				= dir;
+	Params.mWorldm_Aligned	= worldm_Aligned;
+	Params.mFlags			= flags;
+
+	Params.mCallback		= callback;
+	Params.mUserData		= userData;
+	Params.mMaxDist			= maxDist;
+	Params.mNodeSorting		= nodeSorting;
+	setupSphereParams(&Params, sphere, dir, maxDist, &tree, worldm_Aligned, mesh, flags);
+	
+	PX_ASSERT(!Params.mEarlyExit);
+
+	if(tree.mNodes)
+	{
+		if(nodeSorting)
+			processStreamRayOrdered(1, LeafFunction_SphereSweepCB)(tree.mNodes, tree.mInitData, &Params);
+		else
+			processStreamRayNoOrder(1, LeafFunction_SphereSweepCB)(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+		doBruteForceTests<LeafFunction_SphereSweepCB, LeafFunction_SphereSweepCB>(mesh->getNbTriangles(), &Params);
+}
+
+
+// Old box sweep callback version, using sphere code
+
+namespace
+{
+struct BoxSweepParamsCB : SphereSweepParams
+{
+	MeshSweepCallback	mCallback;
+	void*				mUserData;
+};
+
+class ExLeafTestSweepCB
+{
+public:
+	static PX_FORCE_INLINE void doLeafTest(BoxSweepParamsCB* PX_RESTRICT params, PxU32 primIndex)
+	{
+		PxU32 nbToGo = getNbPrimitives(primIndex);
+		do
+		{
+			PxU32 VRef0, VRef1, VRef2;
+			getVertexReferences(VRef0, VRef1, VRef2, primIndex, params->mTris32, params->mTris16);
+
+			{
+//				const PxU32 vrefs[3] = { VRef0, VRef1, VRef2 };
+				float dist = params->mStabbedFace.mDistance;
+				if((params->mCallback)(params->mUserData, params->mVerts[VRef0], params->mVerts[VRef1], params->mVerts[VRef2], primIndex, /*vrefs,*/ dist))
+					return;
+
+				if(dist<params->mStabbedFace.mDistance)
+				{
+					params->mStabbedFace.mDistance = dist;
+#ifndef GU_BV4_USE_SLABS
+					setupRayData(params, dist, params->mOrigin_Padded, params->mLocalDir_Padded);
+#endif
+				}
+			}
+
+			primIndex++;
+		}while(nbToGo--);
+	}
+};
+}
+
+void BV4_GenericSweepCB_Old(const PxVec3& origin, const PxVec3& extents, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshSweepCallback callback, void* userData)
+{
+	BoxSweepParamsCB Params;
+	Params.mCallback				= callback;
+	Params.mUserData				= userData;
+	Params.mOriginalExtents_Padded	= extents;
+
+	Params.mStabbedFace.mTriangleID	= PX_INVALID_U32;
+	Params.mStabbedFace.mDistance	= maxDist;
+
+	computeLocalRay(Params.mLocalDir_Padded, Params.mOrigin_Padded, dir, origin, worldm_Aligned);
+
+#ifndef GU_BV4_USE_SLABS
+	setupRayData(&Params, maxDist, Params.mOrigin_Padded, Params.mLocalDir_Padded);
+#endif
+
+	const SourceMesh* PX_RESTRICT mesh = tree.mMeshInterface;
+
+	setupMeshPointersAndQuantizedCoeffs(&Params, mesh, &tree);
+
+	if(tree.mNodes)
+	{
+		processStreamRayOrdered(1, ExLeafTestSweepCB)(tree.mNodes, tree.mInitData, &Params);
+	}
+	else
+	{
+		const PxU32 nbTris = mesh->getNbTriangles();
+		PX_ASSERT(nbTris<16);
+		ExLeafTestSweepCB::doLeafTest(&Params, nbTris);
+	}
+}
+
+#endif
+
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshData.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshData.h
new file mode 100644
index 00000000..37cdbcfc
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshData.h
@@ -0,0 +1,298 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_MESH_DATA_H
+#define GU_MESH_DATA_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec4.h"
+#include "foundation/PxBounds3.h"
+#include "PsUserAllocated.h"
+#include "CmPhysXCommon.h"
+#include "PsAllocator.h"
+#include "PxTriangleMesh.h"
+#include "GuRTree.h"
+#include "GuBV4.h"
+#include "GuBV32.h"
+
+namespace physx
+{
+
+#define RTREE_COOK_VERSION 1
+
+namespace Gu {
+	
+// 1: support stackless collision trees for non-recursive collision queries
+// 2: height field functionality not supported anymore
+// 3: mass struct removed
+// 4: bounding sphere removed
+// 5: RTree added, opcode tree still in the binary image, physx 3.0
+// 6: opcode tree removed from binary image
+// 7: convex decomposition is out
+// 8: adjacency information added
+// 9: removed leaf triangles and most of opcode data, changed rtree layout
+// 10: float rtrees
+// 11: new build, isLeaf added to page
+// 12: isLeaf is now the lowest bit in ptrs
+// 13: TA30159 removed deprecated convexEdgeThreshold and bumped version
+// 14: added midphase ID
+
+#define PX_MESH_VERSION 14
+
+// these flags are used to indicate/validate the contents of a cooked mesh file
+enum InternalMeshSerialFlag
+{
+	IMSF_MATERIALS		=	(1<<0),	//!< if set, the cooked mesh file contains per-triangle material indices
+	IMSF_FACE_REMAP		=	(1<<1),	//!< if set, the cooked mesh file contains a remap table
+	IMSF_8BIT_INDICES	=	(1<<2),	//!< if set, the cooked mesh file contains 8bit indices (topology)
+	IMSF_16BIT_INDICES	=	(1<<3),	//!< if set, the cooked mesh file contains 16bit indices (topology)
+	IMSF_ADJACENCIES	=	(1<<4),	//!< if set, the cooked mesh file contains adjacency structures
+	IMSF_GRB_DATA		=	(1<<5)	//!< if set, the cooked mesh file contains GRB data structures
+};
+
+
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable: 4324)	// Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+	class TriangleMeshData : public Ps::UserAllocated
+	{
+		public:
+		PxMeshMidPhase::Enum	mType;
+
+		PxU32					mNbVertices;
+		PxU32					mNbTriangles;
+		PxVec3*					mVertices;
+		void*					mTriangles;
+
+		PxBounds3				mAABB;
+		PxU8*					mExtraTrigData;
+		PxReal					mGeomEpsilon;
+
+		PxU8					mFlags;
+		PxU16*					mMaterialIndices;
+		PxU32*					mFaceRemap;
+		PxU32*					mAdjacencies;
+
+		// GRB data -------------------------
+		void *					mGRB_triIndices;				//!< GRB: GPU-friendly tri indices(uint3)
+
+		// TODO avoroshilov: adjacency info - duplicated, remove it and use 'mAdjacencies' and 'mExtraTrigData' see GuTriangleMesh.cpp:325
+		void *					mGRB_triAdjacencies;			//!< GRB: adjacency data, with BOUNDARY and NONCONVEX flags (flags replace adj indices where applicable) [uin4]
+		PxU32 *					mGRB_vertValency;				//!< GRB: number of adjacent vertices to a vertex
+		PxU32 *					mGRB_adjVertStart;				//!< GRB: offset for each vertex in the adjacency list
+		PxU32 *					mGRB_adjVertices;				//!< GRB: list of adjacent vertices
+
+		PxU32					mGRB_meshAdjVerticiesTotal;		//!< GRB: total number of indices in the 'mGRB_adjVertices'
+		PxU32*					mGRB_faceRemap;					//!< GRB: this remap the GPU triangle indices to CPU triangle indices
+
+		void*					mGRB_BV32Tree;
+		// End of GRB data ------------------
+
+		TriangleMeshData() :
+			mNbVertices			(0),
+			mNbTriangles		(0),
+			mVertices			(NULL),
+			mTriangles			(NULL),
+			mAABB				(PxBounds3::empty()),
+			mExtraTrigData		(NULL),
+			mGeomEpsilon		(0.0f),
+			mFlags				(0),
+			mMaterialIndices	(NULL),
+			mFaceRemap			(NULL),
+			mAdjacencies		(NULL),
+
+			mGRB_triIndices					(NULL),
+			mGRB_triAdjacencies				(NULL),
+			mGRB_vertValency				(NULL),
+			mGRB_adjVertStart				(NULL),
+			mGRB_adjVertices				(NULL),
+			
+			mGRB_meshAdjVerticiesTotal		(0),
+			mGRB_faceRemap					(NULL),
+			mGRB_BV32Tree					(NULL)
+			
+		{
+		}
+
+		virtual ~TriangleMeshData()
+		{
+			if(mVertices) 
+				PX_FREE(mVertices);
+			if(mTriangles) 
+				PX_FREE(mTriangles);
+			if(mMaterialIndices)
+				PX_DELETE_POD(mMaterialIndices);
+			if(mFaceRemap)
+				PX_DELETE_POD(mFaceRemap);
+			if(mAdjacencies)
+				PX_DELETE_POD(mAdjacencies);
+			if(mExtraTrigData)
+				PX_DELETE_POD(mExtraTrigData);
+
+
+			if (mGRB_triIndices)
+				PX_FREE(mGRB_triIndices);
+			if (mGRB_triAdjacencies)
+				PX_DELETE_POD(mGRB_triAdjacencies);
+			if (mGRB_vertValency)
+				PX_DELETE_POD(mGRB_vertValency);
+			if (mGRB_adjVertStart)
+				PX_DELETE_POD(mGRB_adjVertStart);
+			if (mGRB_adjVertices)
+				PX_DELETE_POD(mGRB_adjVertices);
+
+			if (mGRB_faceRemap)
+				PX_DELETE_POD(mGRB_faceRemap);
+
+			if (mGRB_BV32Tree)
+			{
+				Gu::BV32Tree* bv32Tree = reinterpret_cast<BV32Tree*>(mGRB_BV32Tree);
+				PX_DELETE(bv32Tree);
+				mGRB_BV32Tree = NULL;
+			}
+
+			
+		}
+
+
+		PxVec3* allocateVertices(PxU32 nbVertices)
+		{
+			PX_ASSERT(!mVertices);
+			// PT: we allocate one more vertex to make sure it's safe to V4Load the last one
+			const PxU32 nbAllocatedVerts = nbVertices + 1;
+			mVertices = reinterpret_cast<PxVec3*>(PX_ALLOC(nbAllocatedVerts * sizeof(PxVec3), "PxVec3"));
+			mNbVertices = nbVertices;
+			return mVertices;
+		}
+
+		void* allocateTriangles(PxU32 nbTriangles, bool force32Bit, PxU32 allocateGPUData = 0)
+		{
+			PX_ASSERT(mNbVertices);
+			PX_ASSERT(!mTriangles);
+
+			bool index16 = mNbVertices <= 0xffff && !force32Bit;
+			if(index16)
+				mFlags |= PxTriangleMeshFlag::e16_BIT_INDICES;
+
+			mTriangles = PX_ALLOC(nbTriangles * (index16 ? sizeof(PxU16) : sizeof(PxU32)) * 3, "mTriangles");
+			if (allocateGPUData)
+				mGRB_triIndices = PX_ALLOC(nbTriangles * (index16 ? sizeof(PxU16) : sizeof(PxU32)) * 3, "mGRB_triIndices");
+			mNbTriangles = nbTriangles;
+			return mTriangles;
+		}
+
+		PxU16* allocateMaterials()
+		{
+			PX_ASSERT(mNbTriangles);
+			PX_ASSERT(!mMaterialIndices);
+			mMaterialIndices = PX_NEW(PxU16)[mNbTriangles];
+			return mMaterialIndices;
+		}
+
+		PxU32* allocateAdjacencies()
+		{
+			PX_ASSERT(mNbTriangles);
+			PX_ASSERT(!mAdjacencies);
+			mAdjacencies = PX_NEW(PxU32)[mNbTriangles*3];
+			mFlags |= PxTriangleMeshFlag::eADJACENCY_INFO;
+			return mAdjacencies;
+		}
+
+		PxU32* allocateFaceRemap()
+		{
+			PX_ASSERT(mNbTriangles);
+			PX_ASSERT(!mFaceRemap);
+			mFaceRemap = PX_NEW(PxU32)[mNbTriangles];
+			return mFaceRemap;
+		}
+
+		PxU8* allocateExtraTrigData()
+		{
+			PX_ASSERT(mNbTriangles);
+			PX_ASSERT(!mExtraTrigData);
+			mExtraTrigData = PX_NEW(PxU8)[mNbTriangles];
+			return mExtraTrigData;
+		}
+
+		PX_FORCE_INLINE void	setTriangleAdjacency(PxU32 triangleIndex, PxU32 adjacency, PxU32 offset)
+		{
+			PX_ASSERT(mAdjacencies); 
+			mAdjacencies[triangleIndex*3 + offset] = adjacency; 
+		}
+
+		PX_FORCE_INLINE	bool	has16BitIndices()	const	
+		{ 
+			return (mFlags & PxTriangleMeshFlag::e16_BIT_INDICES) ? true : false; 
+		}
+	};
+
+	class RTreeTriangleData : public TriangleMeshData
+	{
+		public:
+								RTreeTriangleData()		{ mType = PxMeshMidPhase::eBVH33; }
+		virtual					~RTreeTriangleData()	{}
+
+				Gu::RTree		mRTree;
+	};
+
+	class BV4TriangleData : public TriangleMeshData
+	{
+		public:
+								BV4TriangleData()	{ mType = PxMeshMidPhase::eBVH34;	}
+		virtual					~BV4TriangleData()	{}
+
+				Gu::SourceMesh	mMeshInterface;
+				Gu::BV4Tree		mBV4Tree;
+	};
+
+
+	class BV32TriangleData : public TriangleMeshData
+	{
+	public:
+		//using the same type as BV4 
+		BV32TriangleData()	{ mType = PxMeshMidPhase::eBVH34; }
+		virtual					~BV32TriangleData()	{}
+
+		Gu::SourceMesh	mMeshInterface;
+		Gu::BV32Tree		mBV32Tree;
+	};
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+
+} // namespace Gu
+
+}
+
+#endif // #ifdef GU_MESH_DATA_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshQuery.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshQuery.cpp
new file mode 100644
index 00000000..74b352d5
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMeshQuery.cpp
@@ -0,0 +1,312 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxProfiler.h"
+#include "PxMeshQuery.h"
+#include "GuInternal.h"
+#include "PxSphereGeometry.h"
+#include "PxGeometryQuery.h"
+#include "GuEntityReport.h"
+#include "GuHeightFieldUtil.h"
+#include "GuBoxConversion.h"
+#include "GuIntersectionTriangleBox.h"
+#include "CmScaling.h"
+#include "GuSweepTests.h"
+#include "GuSIMDHelpers.h"
+#include "GuMidphaseInterface.h"
+#include "PsFPU.h"
+
+using namespace physx;
+using namespace Gu;
+
+namespace {
+
+	class HfTrianglesEntityReport2 : public EntityReport<PxU32>, public LimitedResults
+	{
+	public:
+		HfTrianglesEntityReport2(
+			PxU32* results, PxU32 maxResults, PxU32 startIndex,
+			HeightFieldUtil& hfUtil,
+			const PxVec3& boxCenter, const PxVec3& boxExtents, const PxQuat& boxRot,
+			bool aabbOverlap) :
+				LimitedResults	(results, maxResults, startIndex),
+				mHfUtil			(hfUtil),
+				mAABBOverlap	(aabbOverlap)
+		{
+			buildFrom(mBox2Hf, boxCenter, boxExtents, boxRot);
+		}
+
+		virtual bool onEvent(PxU32 nbEntities, PxU32* entities)
+		{
+			if(mAABBOverlap)
+			{
+				while(nbEntities--)
+					if(!add(*entities++))
+						return false;
+			}
+			else
+			{
+				const PxTransform idt(PxIdentity);
+				for(PxU32 i=0; i<nbEntities; i++)
+				{
+					TrianglePadded tri;
+					mHfUtil.getTriangle(idt, tri, NULL, NULL, entities[i], false, false);  // First parameter not needed if local space triangle is enough
+
+					// PT: this one is safe because triangle class is padded
+					if(intersectTriangleBox(mBox2Hf, tri.verts[0], tri.verts[1], tri.verts[2]))
+					{
+						if(!add(entities[i]))
+							return false;
+					}
+				}
+			}
+			return true;
+		}
+
+			HeightFieldUtil&	mHfUtil;
+			BoxPadded			mBox2Hf;
+			bool				mAABBOverlap;
+
+	private:
+		HfTrianglesEntityReport2& operator=(const HfTrianglesEntityReport2&);
+	};
+
+
+} // namespace
+
+void physx::PxMeshQuery::getTriangle(const PxTriangleMeshGeometry& triGeom, const PxTransform& globalPose, PxTriangleID triangleIndex, PxTriangle& triangle, PxU32* vertexIndices, PxU32* adjacencyIndices)
+{
+	TriangleMesh* tm = static_cast<TriangleMesh*>(triGeom.triangleMesh);
+
+	PX_CHECK_AND_RETURN(triangleIndex<tm->getNbTriangles(), "PxMeshQuery::getTriangle: triangle index is out of bounds");
+
+	if(adjacencyIndices && !tm->getAdjacencies())
+		Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "Adjacency information not created. Set buildTriangleAdjacencies on Cooking params.");
+
+	const Cm::Matrix34 vertex2worldSkew = globalPose * triGeom.scale;
+	tm->computeWorldTriangle(triangle, triangleIndex, vertex2worldSkew, triGeom.scale.hasNegativeDeterminant(), vertexIndices, adjacencyIndices);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+void physx::PxMeshQuery::getTriangle(const PxHeightFieldGeometry& hfGeom, const PxTransform& globalPose, PxTriangleID triangleIndex, PxTriangle& triangle, PxU32* vertexIndices, PxU32* adjacencyIndices)
+{
+	HeightFieldUtil hfUtil(hfGeom);
+	
+	hfUtil.getTriangle(globalPose, triangle, vertexIndices, adjacencyIndices, triangleIndex, true, true);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+PxU32 physx::PxMeshQuery::findOverlapTriangleMesh(
+	const PxGeometry& geom, const PxTransform& geomPose,
+	const PxTriangleMeshGeometry& meshGeom, const PxTransform& meshPose,
+	PxU32* results, PxU32 maxResults, PxU32 startIndex, bool& overflow)
+{
+	PX_SIMD_GUARD;
+
+	LimitedResults limitedResults(results, maxResults, startIndex);
+
+	TriangleMesh* tm = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+
+	switch(geom.getType())
+	{
+		case PxGeometryType::eBOX:
+		{
+			const PxBoxGeometry& boxGeom = static_cast<const PxBoxGeometry&>(geom);
+
+			Box box;
+			buildFrom(box, geomPose.p, boxGeom.halfExtents, geomPose.q);
+
+			Midphase::intersectBoxVsMesh(box, *tm, meshPose, meshGeom.scale, &limitedResults);
+			break;
+		}
+
+		case PxGeometryType::eCAPSULE:
+		{
+			const PxCapsuleGeometry& capsGeom = static_cast<const PxCapsuleGeometry&>(geom);
+
+			Capsule capsule;
+			getCapsule(capsule, capsGeom, geomPose);
+
+			Midphase::intersectCapsuleVsMesh(capsule, *tm, meshPose, meshGeom.scale, &limitedResults);
+			break;
+		}
+
+		case PxGeometryType::eSPHERE:
+		{
+			const PxSphereGeometry& sphereGeom = static_cast<const PxSphereGeometry&>(geom);
+			Midphase::intersectSphereVsMesh(Sphere(geomPose.p, sphereGeom.radius), *tm, meshPose, meshGeom.scale, &limitedResults);
+			break;
+		}
+
+		case PxGeometryType::ePLANE:
+		case PxGeometryType::eCONVEXMESH:
+		case PxGeometryType::eTRIANGLEMESH:
+		case PxGeometryType::eHEIGHTFIELD:
+		case PxGeometryType::eGEOMETRY_COUNT:
+		case PxGeometryType::eINVALID:
+		{
+			PX_CHECK_MSG(false, "findOverlapTriangleMesh: Only box, capsule and sphere geometries are supported.");
+		}
+	}
+
+	overflow = limitedResults.mOverflow;
+	return limitedResults.mNbResults;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+PxU32 physx::PxMeshQuery::findOverlapHeightField(	const PxGeometry& geom, const PxTransform& geomPose,
+													const PxHeightFieldGeometry& hfGeom, const PxTransform& hfPose,
+													PxU32* results, PxU32 maxResults, PxU32 startIndex, bool& overflow)
+{
+	PX_SIMD_GUARD;
+	const PxTransform localPose0 = hfPose.transformInv(geomPose);
+	PxBoxGeometry boxGeom;
+
+	switch(geom.getType())
+	{
+		case PxGeometryType::eCAPSULE:
+		{
+			const PxCapsuleGeometry& cap = static_cast<const PxCapsuleGeometry&>(geom);
+			boxGeom.halfExtents = PxVec3(cap.halfHeight+cap.radius, cap.radius, cap.radius);
+		}
+		break;
+		case PxGeometryType::eSPHERE:
+		{
+			const PxSphereGeometry& sph = static_cast<const PxSphereGeometry&>(geom);
+			boxGeom.halfExtents = PxVec3(sph.radius, sph.radius, sph.radius);
+		}
+		break;
+		case PxGeometryType::eBOX:
+			boxGeom = static_cast<const PxBoxGeometry&>(geom);
+		break;
+		case PxGeometryType::ePLANE:
+		case PxGeometryType::eCONVEXMESH:
+		case PxGeometryType::eTRIANGLEMESH:
+		case PxGeometryType::eHEIGHTFIELD:
+		case PxGeometryType::eGEOMETRY_COUNT:
+		case PxGeometryType::eINVALID:
+		{
+			overflow = false;
+			PX_CHECK_AND_RETURN_VAL(false, "findOverlapHeightField: Only box, sphere and capsule queries are supported.", false);
+		}
+	}
+
+	const bool isAABB = ((localPose0.q.x == 0.0f) && (localPose0.q.y == 0.0f) && (localPose0.q.z == 0.0f));
+	
+	PxBounds3 bounds;
+	if (isAABB)
+		bounds = PxBounds3::centerExtents(localPose0.p, boxGeom.halfExtents);
+	else
+		bounds = PxBounds3::poseExtent(localPose0, boxGeom.halfExtents); // box.halfExtents is really extent
+
+	HeightFieldUtil hfUtil(hfGeom);
+	HfTrianglesEntityReport2 entityReport(results, maxResults, startIndex, hfUtil, localPose0.p, boxGeom.halfExtents, localPose0.q, isAABB);
+
+	hfUtil.overlapAABBTriangles(hfPose, bounds, 0, &entityReport);
+	overflow = entityReport.mOverflow;
+	return entityReport.mNbResults;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool physx::PxMeshQuery::sweep(	const PxVec3& unitDir, const PxReal maxDistance,
+								const PxGeometry& geom, const PxTransform& pose,
+								PxU32 triangleCount, const PxTriangle* triangles,
+								PxSweepHit& sweepHit, PxHitFlags hitFlags,
+								const PxU32* cachedIndex, const PxReal inflation, bool doubleSided)
+{
+	PX_SIMD_GUARD;
+	PX_CHECK_AND_RETURN_VAL(pose.isValid(), "PxMeshQuery::sweep(): pose is not valid.", false);
+	PX_CHECK_AND_RETURN_VAL(unitDir.isFinite(), "PxMeshQuery::sweep(): unitDir is not valid.", false);
+	PX_CHECK_AND_RETURN_VAL(PxIsFinite(maxDistance), "PxMeshQuery::sweep(): distance is not valid.", false);
+	PX_CHECK_AND_RETURN_VAL(maxDistance > 0, "PxMeshQuery::sweep(): sweep distance must be greater than 0.", false);
+
+	PX_PROFILE_ZONE("MeshQuery.sweep", 0);
+
+	const PxReal distance = PxMin(maxDistance, PX_MAX_SWEEP_DISTANCE);
+
+	switch(geom.getType())
+	{
+		case PxGeometryType::eSPHERE:
+		{
+			const PxSphereGeometry& sphereGeom = static_cast<const PxSphereGeometry&>(geom);
+
+			// PT: TODO: technically this capsule with 0.0 half-height is invalid ("isValid" returns false)
+			const PxCapsuleGeometry capsuleGeom(sphereGeom.radius, 0.0f);
+
+			return sweepCapsuleTriangles(	triangleCount, triangles, doubleSided, capsuleGeom, pose, unitDir, distance,
+											sweepHit, cachedIndex, inflation, hitFlags);
+		}
+
+		case PxGeometryType::eCAPSULE:
+		{
+			const PxCapsuleGeometry& capsuleGeom = static_cast<const PxCapsuleGeometry&>(geom);
+
+			return sweepCapsuleTriangles(	triangleCount, triangles, doubleSided, capsuleGeom, pose, unitDir, distance,
+											sweepHit, cachedIndex, inflation, hitFlags);
+		}
+
+		case PxGeometryType::eBOX:
+		{
+			const PxBoxGeometry& boxGeom = static_cast<const PxBoxGeometry&>(geom);
+
+			if(hitFlags & PxHitFlag::ePRECISE_SWEEP)
+			{
+				return sweepBoxTriangles_Precise(	triangleCount, triangles, doubleSided, boxGeom, pose, unitDir, distance, sweepHit, cachedIndex,
+													inflation, hitFlags);
+			}
+			else
+			{
+				return sweepBoxTriangles(	triangleCount, triangles, doubleSided, boxGeom, pose, unitDir, distance, sweepHit, cachedIndex,
+											inflation, hitFlags);
+			}
+		}	
+		case PxGeometryType::ePLANE:
+		case PxGeometryType::eCONVEXMESH:
+		case PxGeometryType::eTRIANGLEMESH:
+		case PxGeometryType::eHEIGHTFIELD:
+		case PxGeometryType::eGEOMETRY_COUNT:
+		case PxGeometryType::eINVALID:
+			PX_CHECK_MSG(false, "PxMeshQuery::sweep(): geometry object parameter must be sphere, capsule or box geometry.");
+	}
+	return false;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+// Exposing wrapper for Midphase::intersectOBB just for particles in order to avoid DelayLoad performance problem. This should be removed with particles in PhysX 3.5 (US16993)
+void physx::Gu::intersectOBB_Particles(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned)
+{
+	Midphase::intersectOBB(mesh, obb, callback, bothTriangleSidesCollide, checkObbIsAligned);
+}
+
+///////////////////////////////////////////////////////////////////////////////
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseBV4.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseBV4.cpp
new file mode 100644
index 00000000..1cab487f
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseBV4.cpp
@@ -0,0 +1,999 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuBV4.h"
+using namespace physx;
+using namespace Gu;
+
+#include "PsVecMath.h"
+using namespace physx::shdfnd::aos;
+
+#include "GuSweepMesh.h"
+#include "GuBV4Build.h"
+#include "GuBV4_Common.h"
+#include "GuSphere.h"
+#include "GuCapsule.h"
+#include "GuBoxConversion.h"
+#include "GuConvexUtilsInternal.h"
+#include "GuVecTriangle.h"
+#include "GuIntersectionTriangleBox.h"
+#include "GuIntersectionCapsuleTriangle.h"
+#include "GuIntersectionRayBox.h"
+#include "PxTriangleMeshGeometry.h"
+#include "CmScaling.h"
+#include "GuTriangleMeshBV4.h"
+
+// This file contains code specific to the BV4 midphase.
+
+// PT: TODO: revisit/inline static sweep functions (TA34704)
+
+using namespace physx;
+using namespace Gu;
+using namespace Cm;
+
+#if PX_INTEL_FAMILY
+Ps::IntBool	BV4_RaycastSingle		(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxRaycastHit* PX_RESTRICT hit, float maxDist, float geomEpsilon, PxU32 flags, PxHitFlags hitFlags);
+PxU32		BV4_RaycastAll			(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxRaycastHit* PX_RESTRICT hits, PxU32 maxNbHits, float maxDist, float geomEpsilon, PxU32 flags, PxHitFlags hitFlags);
+void		BV4_RaycastCB			(const PxVec3& origin, const PxVec3& dir, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, float maxDist, float geomEpsilon, PxU32 flags, MeshRayCallback callback, void* userData);
+
+Ps::IntBool	BV4_OverlapSphereAny	(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned);
+PxU32		BV4_OverlapSphereAll	(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow);
+void		BV4_OverlapSphereCB		(const Sphere& sphere, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshOverlapCallback callback, void* userData);
+
+Ps::IntBool	BV4_OverlapBoxAny		(const Box& box, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned);
+PxU32		BV4_OverlapBoxAll		(const Box& box, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow);
+void		BV4_OverlapBoxCB		(const Box& box, const BV4Tree& tree, MeshOverlapCallback callback, void* userData);
+
+Ps::IntBool	BV4_OverlapCapsuleAny	(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned);
+PxU32		BV4_OverlapCapsuleAll	(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, PxU32* results, PxU32 size, bool& overflow);
+void		BV4_OverlapCapsuleCB	(const Capsule& capsule, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshOverlapCallback callback, void* userData);
+
+Ps::IntBool	BV4_SphereSweepSingle	(const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepHit* PX_RESTRICT hit, PxU32 flags);
+void		BV4_SphereSweepCB		(const Sphere& sphere, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting);
+
+Ps::IntBool	BV4_BoxSweepSingle		(const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepHit* PX_RESTRICT hit, PxU32 flags);
+void		BV4_BoxSweepCB			(const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags, bool nodeSorting);
+
+Ps::IntBool	BV4_CapsuleSweepSingle	(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags);
+Ps::IntBool	BV4_CapsuleSweepSingleAA(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, SweepHit* PX_RESTRICT hit, PxU32 flags);
+void		BV4_CapsuleSweepCB		(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags);
+void		BV4_CapsuleSweepAACB	(const Capsule& capsule, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, SweepUnlimitedCallback callback, void* userData, PxU32 flags);
+
+void		BV4_GenericSweepCB_Old	(const PxVec3& origin, const PxVec3& extents, const PxVec3& dir, float maxDist, const BV4Tree& tree, const PxMat44* PX_RESTRICT worldm_Aligned, MeshSweepCallback callback, void* userData);
+void		BV4_GenericSweepCB		(const Box& box, const PxVec3& dir, float maxDist, const BV4Tree& tree, MeshSweepCallback callback, void* userData, bool anyHit);
+
+static PX_FORCE_INLINE void setIdentity(PxMat44& m)
+{
+	m.column0 = PxVec4(1.0f, 0.0f, 0.0f, 0.0f);
+	m.column1 = PxVec4(0.0f, 1.0f, 0.0f, 0.0f);
+	m.column2 = PxVec4(0.0f, 0.0f, 1.0f, 0.0f);
+	m.column3 = PxVec4(0.0f, 0.0f, 0.0f, 1.0f);
+}
+
+static PX_FORCE_INLINE void setRotation(PxMat44& m, const PxQuat& q)
+{
+	const PxReal x = q.x;
+	const PxReal y = q.y;
+	const PxReal z = q.z;
+	const PxReal w = q.w;
+
+	const PxReal x2 = x + x;
+	const PxReal y2 = y + y;
+	const PxReal z2 = z + z;
+
+	const PxReal xx = x2*x;
+	const PxReal yy = y2*y;
+	const PxReal zz = z2*z;
+
+	const PxReal xy = x2*y;
+	const PxReal xz = x2*z;
+	const PxReal xw = x2*w;
+
+	const PxReal yz = y2*z;
+	const PxReal yw = y2*w;
+	const PxReal zw = z2*w;
+
+	m.column0 = PxVec4(1.0f - yy - zz, xy + zw, xz - yw, 0.0f);
+	m.column1 = PxVec4(xy - zw, 1.0f - xx - zz, yz + xw, 0.0f);
+	m.column2 = PxVec4(xz + yw, yz - xw, 1.0f - xx - yy, 0.0f);
+}
+
+#define IEEE_1_0	0x3f800000	//!< integer representation of 1.0
+static PX_FORCE_INLINE const PxMat44* setupWorldMatrix(PxMat44& world, const float* meshPos, const float* meshRot)
+{
+//	world = PxMat44(PxIdentity);
+	setIdentity(world);
+
+	bool isIdt = true;
+	if(meshRot)
+	{
+		const PxU32* Bin = reinterpret_cast<const PxU32*>(meshRot);
+		if(Bin[0]!=0 || Bin[1]!=0 || Bin[2]!=0 || Bin[3]!=IEEE_1_0)
+		{
+//			const PxQuat Q(meshRot[0], meshRot[1], meshRot[2], meshRot[3]);
+//			world = PxMat44(Q);
+			setRotation(world, PxQuat(meshRot[0], meshRot[1], meshRot[2], meshRot[3]));
+			isIdt = false;
+		}
+	}
+
+	if(meshPos)
+	{
+		const PxU32* Bin = reinterpret_cast<const PxU32*>(meshPos);
+		if(Bin[0]!=0 || Bin[1]!=0 || Bin[2]!=0)
+		{
+//			world.setPosition(PxVec3(meshPos[0], meshPos[1], meshPos[2]));
+			world.column3.x = meshPos[0];
+			world.column3.y = meshPos[1];
+			world.column3.z = meshPos[2];
+			isIdt = false;
+		}
+	}
+	return isIdt ? NULL : &world;
+}
+
+static PX_FORCE_INLINE PxU32 setupFlags(bool anyHit, bool doubleSided, bool meshBothSides)
+{
+	PxU32 flags = 0;
+	if(anyHit)
+		flags |= QUERY_MODIFIER_ANY_HIT;
+	if(doubleSided)
+		flags |= QUERY_MODIFIER_DOUBLE_SIDED;
+	if(meshBothSides)
+		flags |= QUERY_MODIFIER_MESH_BOTH_SIDES;
+	return flags;
+}
+
+static Ps::IntBool boxSweepVsMesh(SweepHit& h, const BV4Tree& tree, const float* meshPos, const float* meshRot, const Box& box, const PxVec3& dir, float maxDist, bool anyHit, bool doubleSided, bool meshBothSides)
+{
+	BV4_ALIGN16(PxMat44 World);
+	const PxMat44* TM = setupWorldMatrix(World, meshPos, meshRot);
+
+	const PxU32 flags = setupFlags(anyHit, doubleSided, meshBothSides);
+	return BV4_BoxSweepSingle(box, dir, maxDist, tree, TM, &h, flags);
+}
+
+static Ps::IntBool sphereSweepVsMesh(SweepHit& h, const BV4Tree& tree, const PxVec3& center, float radius, const PxVec3& dir, float maxDist, const PxMat44* TM, const PxU32 flags)
+{
+	// PT: TODO: avoid this copy (TA34704)
+	const Sphere tmp(center, radius);
+
+	return BV4_SphereSweepSingle(tmp, dir, maxDist, tree, TM, &h, flags);
+}
+
+static bool capsuleSweepVsMesh(SweepHit& h, const BV4Tree& tree, const Capsule& capsule, const PxVec3& dir, float maxDist, const PxMat44* TM, const PxU32 flags)
+{
+	Capsule localCapsule;
+	computeLocalCapsule(localCapsule, capsule, TM);
+
+	// PT: TODO: optimize
+	PxVec3 localDir, unused;
+	computeLocalRay(localDir, unused, dir, dir, TM);
+
+	const PxVec3 capsuleDir = localCapsule.p1 - localCapsule.p0;
+	PxU32 nbNullComponents = 0;
+	const float epsilon = 1e-3f;
+	if(PxAbs(capsuleDir.x)<epsilon)
+		nbNullComponents++;
+	if(PxAbs(capsuleDir.y)<epsilon)
+		nbNullComponents++;
+	if(PxAbs(capsuleDir.z)<epsilon)
+		nbNullComponents++;
+
+	// PT: TODO: consider passing TM to BV4_CapsuleSweepSingleXX just to do the final transforms there instead
+	// of below. It would make the parameters slightly inconsistent (local input + world TM) but it might make
+	// the code better overall, more aligned with the "unlimited results" version.
+	Ps::IntBool status;
+	if(nbNullComponents==2)
+	{
+		status = BV4_CapsuleSweepSingleAA(localCapsule, localDir, maxDist, tree, &h, flags);
+	}
+	else
+	{
+		status = BV4_CapsuleSweepSingle(localCapsule, localDir, maxDist, tree, &h, flags);
+	}
+	if(status && TM)
+	{
+		h.mPos		= TM->transform(h.mPos);
+		h.mNormal	= TM->rotate(h.mNormal);
+	}
+	return status!=0;
+}
+
+static PX_FORCE_INLINE void boxSweepVsMeshCBOld(const BV4Tree& tree, const float* meshPos, const float* meshRot, const PxVec3& center, const PxVec3& extents, const PxVec3& dir, float maxDist, MeshSweepCallback callback, void* userData)
+{
+	BV4_ALIGN16(PxMat44 World);
+	const PxMat44* TM = setupWorldMatrix(World, meshPos, meshRot);
+
+	BV4_GenericSweepCB_Old(center, extents, dir, maxDist, tree, TM, callback, userData);
+}
+
+//
+
+static PX_FORCE_INLINE bool raycastVsMesh(PxRaycastHit& hitData, const BV4Tree& tree, const float* meshPos, const float* meshRot, const PxVec3& orig, const PxVec3& dir, float maxDist, float geomEpsilon, bool doubleSided, PxHitFlags hitFlags)
+{
+	BV4_ALIGN16(PxMat44 World);
+	const PxMat44* TM = setupWorldMatrix(World, meshPos, meshRot);
+
+	const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY;
+	const PxU32 flags = setupFlags(anyHit, doubleSided, false);
+	
+	if(!BV4_RaycastSingle(orig, dir, tree, TM, &hitData, maxDist, geomEpsilon, flags, hitFlags))
+		return false;
+
+	return true;
+}
+
+/*static PX_FORCE_INLINE PxU32 raycastVsMeshAll(PxRaycastHit* hits, PxU32 maxNbHits, const BV4Tree& tree, const float* meshPos, const float* meshRot, const PxVec3& orig, const PxVec3& dir, float maxDist, float geomEpsilon, bool doubleSided, PxHitFlags hitFlags)
+{
+	BV4_ALIGN16(PxMat44 World);
+	const PxMat44* TM = setupWorldMatrix(World, meshPos, meshRot);
+
+	const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY;
+	const PxU32 flags = setupFlags(anyHit, doubleSided, false);
+	
+	return BV4_RaycastAll(orig, dir, tree, TM, hits, maxNbHits, maxDist, geomEpsilon, flags, hitFlags);
+}*/
+
+static PX_FORCE_INLINE void raycastVsMeshCB(const BV4Tree& tree, const PxVec3& orig, const PxVec3& dir, float maxDist, float geomEpsilon, bool doubleSided, MeshRayCallback callback, void* userData)
+{
+	const PxU32 flags = setupFlags(false, doubleSided, false);
+	BV4_RaycastCB(orig, dir, tree, NULL, maxDist, geomEpsilon, flags, callback, userData);
+}
+
+struct BV4RaycastCBParams
+{
+	PX_FORCE_INLINE BV4RaycastCBParams(	PxRaycastHit* hits, PxU32 maxHits, const PxMeshScale* scale, const PxTransform* pose,
+										const Cm::Matrix34* world2vertexSkew, PxU32 hitFlags,
+										const PxVec3& rayDir, bool isDoubleSided, float distCoeff) :
+		mDstBase			(hits),
+		mHitNum				(0),
+		mMaxHits			(maxHits),
+		mScale				(scale),
+		mPose				(pose),
+		mWorld2vertexSkew	(world2vertexSkew),
+		mHitFlags			(hitFlags),
+		mRayDir				(rayDir),
+		mIsDoubleSided		(isDoubleSided),
+		mDistCoeff			(distCoeff)
+	{
+	}
+
+	PxRaycastHit*		mDstBase;
+	PxU32				mHitNum;
+	PxU32				mMaxHits;
+	const PxMeshScale*	mScale;
+	const PxTransform*	mPose;
+	const Cm::Matrix34*	mWorld2vertexSkew;
+	PxU32				mHitFlags;
+	const PxVec3&		mRayDir;
+	bool				mIsDoubleSided;
+	float				mDistCoeff;
+
+private:
+	BV4RaycastCBParams& operator=(const BV4RaycastCBParams&);
+};
+
+static PX_FORCE_INLINE PxVec3 processLocalNormal(const Cm::Matrix34* PX_RESTRICT world2vertexSkew, const PxTransform* PX_RESTRICT pose, const PxVec3& localNormal, const PxVec3& rayDir, const bool isDoubleSided)
+{
+	PxVec3 normal;
+	if(world2vertexSkew)
+		normal = world2vertexSkew->rotateTranspose(localNormal);
+	else
+		normal = pose->rotate(localNormal);
+	normal.normalize();
+
+	// PT: figure out correct normal orientation (DE7458)
+	// - if the mesh is single-sided the normal should be the regular triangle normal N, regardless of eMESH_BOTH_SIDES.
+	// - if the mesh is double-sided the correct normal can be either N or -N. We take the one opposed to ray direction.
+	if(isDoubleSided && normal.dot(rayDir) > 0.0f)
+		normal = -normal;
+	return normal;
+}
+
+static HitCode gRayCallback(void* userData, const PxVec3& lp0, const PxVec3& lp1, const PxVec3& lp2, PxU32 triangleIndex, float dist, float u, float v)
+{
+	BV4RaycastCBParams* params = reinterpret_cast<BV4RaycastCBParams*>(userData);
+
+//const bool last = params->mHitNum == params->mMaxHits;
+
+	//not worth concatenating to do 1 transform: PxMat34Legacy vertex2worldSkew = scaling.getVertex2WorldSkew(absPose);
+	// PT: TODO: revisit this for N hits
+	PX_ALIGN_PREFIX(16)	char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16);
+	PxRaycastHit& hit = reinterpret_cast<PxRaycastHit&>(buffer);
+//PxRaycastHit& hit = last ? (PxRaycastHit&)buffer : params->mDstBase[params->mHitNum];
+
+	hit.distance = dist * params->mDistCoeff;
+	hit.u = u;
+	hit.v = v;
+	hit.faceIndex = triangleIndex;
+
+	PxVec3 localImpact = (1.0f - u - v)*lp0 + u*lp1 + v*lp2;
+	if(params->mWorld2vertexSkew)
+	{
+		localImpact = params->mScale->transform(localImpact);
+		if(params->mScale->hasNegativeDeterminant())
+			Ps::swap<PxReal>(hit.u, hit.v); // have to swap the UVs though since they were computed in mesh local space
+	}
+
+	hit.position = params->mPose->transform(localImpact);
+	hit.flags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE|PxHitFlag::eUV|PxHitFlag::eFACE_INDEX;
+
+	PxVec3 normal(0.0f);
+	// Compute additional information if needed
+	if(params->mHitFlags & PxHitFlag::eNORMAL)
+	{
+		const PxVec3 localNormal = (lp1 - lp0).cross(lp2 - lp0);
+		normal = processLocalNormal(params->mWorld2vertexSkew, params->mPose, localNormal, params->mRayDir, params->mIsDoubleSided);
+		hit.flags |= PxHitFlag::eNORMAL;
+	}
+	hit.normal = normal;
+
+	// PT: no callback => store results in provided buffer
+	if(params->mHitNum == params->mMaxHits)
+//	if(last)
+		return HIT_EXIT;
+
+	params->mDstBase[params->mHitNum++] = hit;
+//	params->mHitNum++;
+
+	return HIT_NONE;
+}
+
+PxU32 physx::Gu::raycast_triangleMesh_BV4(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose,
+											const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist,
+											PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits)
+{
+	PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34);
+	const BV4TriangleMesh* meshData = static_cast<const BV4TriangleMesh*>(mesh);
+
+	const bool multipleHits = (maxHits > 1);
+	const bool idtScale = meshGeom.scale.isIdentity();
+
+	const bool isDoubleSided = meshGeom.meshFlags.isSet(PxMeshGeometryFlag::eDOUBLE_SIDED);
+	const bool bothSides = isDoubleSided || (hitFlags & PxHitFlag::eMESH_BOTH_SIDES);
+
+	const BV4Tree& tree = static_cast<const BV4TriangleMesh*>(meshData)->getBV4Tree();
+	if(idtScale && !multipleHits)
+	{
+		bool b = raycastVsMesh(*hits, tree, &pose.p.x, &pose.q.x, rayOrigin, rayDir, maxDist, meshData->getGeomEpsilon(), bothSides, hitFlags);
+		if(b)
+		{
+			PxHitFlags dstFlags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE|PxHitFlag::eUV|PxHitFlag::eFACE_INDEX;
+
+			// PT: TODO: pass flags to BV4 code (TA34704)
+			if(hitFlags & PxHitFlag::eNORMAL)
+			{
+				dstFlags |= PxHitFlag::eNORMAL;
+				if(isDoubleSided)
+				{
+					PxVec3 normal = hits->normal;
+					// PT: figure out correct normal orientation (DE7458)
+					// - if the mesh is single-sided the normal should be the regular triangle normal N, regardless of eMESH_BOTH_SIDES.
+					// - if the mesh is double-sided the correct normal can be either N or -N. We take the one opposed to ray direction.
+					if(normal.dot(rayDir) > 0.0f)
+						normal = -normal;
+					hits->normal = normal;
+				}
+			}
+			else
+			{
+				hits->normal = PxVec3(0.0f);
+			}
+			hits->flags = dstFlags;
+		}
+		return PxU32(b);
+	}
+
+/*
+	if(idtScale && multipleHits)
+	{
+		PxU32 nbHits = raycastVsMeshAll(hits, maxHits, tree, &pose.p.x, &pose.q.x, rayOrigin, rayDir, maxDist, meshData->getGeomEpsilon(), bothSides, hitFlags);
+
+		return nbHits;
+	}
+*/
+
+	//scaling: transform the ray to vertex space
+	PxVec3 orig, dir;
+	Cm::Matrix34 world2vertexSkew;
+	Cm::Matrix34* world2vertexSkewP = NULL;
+	PxReal distCoeff = 1.0f;
+	if(idtScale)
+	{
+		orig = pose.transformInv(rayOrigin);
+		dir = pose.rotateInv(rayDir);
+	}
+	else
+	{
+		world2vertexSkew = meshGeom.scale.getInverse() * pose.getInverse();
+		world2vertexSkewP = &world2vertexSkew;
+		orig = world2vertexSkew.transform(rayOrigin);
+		dir = world2vertexSkew.rotate(rayDir);
+		{
+			distCoeff = dir.normalize();
+			maxDist *= distCoeff;
+			maxDist += 1e-3f;
+			distCoeff = 1.0f/distCoeff;
+		}
+	}
+
+	if(!multipleHits)
+	{
+		bool b = raycastVsMesh(*hits, tree, NULL, NULL, orig, dir, maxDist, meshData->getGeomEpsilon(), bothSides, hitFlags);
+		if(b)
+		{
+			hits->distance	*= distCoeff;
+			hits->position	= pose.transform(meshGeom.scale.transform(hits->position));
+			PxHitFlags dstFlags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE|PxHitFlag::eUV|PxHitFlag::eFACE_INDEX;
+
+			if(meshGeom.scale.hasNegativeDeterminant())
+				Ps::swap<PxReal>(hits->u, hits->v); // have to swap the UVs though since they were computed in mesh local space
+
+			// PT: TODO: pass flags to BV4 code (TA34704)
+			// Compute additional information if needed
+			if(hitFlags & PxHitFlag::eNORMAL)
+			{
+				dstFlags |= PxHitFlag::eNORMAL;
+				hits->normal = processLocalNormal(world2vertexSkewP, &pose, hits->normal, rayDir, isDoubleSided);
+			}
+			else
+			{
+				hits->normal = PxVec3(0.0f);
+			}
+			hits->flags = dstFlags;
+		}
+		return PxU32(b);
+	}
+
+	BV4RaycastCBParams callback(hits, maxHits, &meshGeom.scale, &pose, world2vertexSkewP, hitFlags, rayDir, isDoubleSided, distCoeff);
+
+	raycastVsMeshCB(	static_cast<const BV4TriangleMesh*>(meshData)->getBV4Tree(),
+						orig, dir,
+						maxDist, meshData->getGeomEpsilon(), bothSides,
+						gRayCallback, &callback);
+	return callback.mHitNum;
+}
+
+namespace
+{
+struct IntersectShapeVsMeshCallback
+{
+	IntersectShapeVsMeshCallback(LimitedResults* results, bool flipNormal) :	 mResults(results), mAnyHits(false), mFlipNormal(flipNormal)	{}
+
+	LimitedResults*	mResults;
+	bool			mAnyHits;
+	bool			mFlipNormal;
+
+	PX_FORCE_INLINE	bool	recordHit(PxU32 faceIndex, Ps::IntBool hit)
+	{
+		if(hit)
+		{
+			mAnyHits = true;
+			if(mResults)
+				mResults->add(faceIndex);
+			else
+				return false; // abort traversal if we are only interested in firstContact (mResults is NULL)
+		}
+		return true; // if we are here, either no triangles were hit or multiple results are expected => continue traversal
+	}
+};
+
+// PT: TODO: get rid of this (TA34704)
+struct IntersectSphereVsMeshCallback : IntersectShapeVsMeshCallback
+{
+	PX_FORCE_INLINE IntersectSphereVsMeshCallback(const PxMeshScale& meshScale, const PxTransform& meshTransform, const Sphere& sphere, LimitedResults* r, bool flipNormal) 
+		: IntersectShapeVsMeshCallback(r, flipNormal)
+	{
+		mVertexToShapeSkew = meshScale.toMat33();
+		mLocalCenter = meshTransform.transformInv(sphere.center);	// sphereCenterInMeshSpace
+		mSphereRadius2 = sphere.radius*sphere.radius;
+	}
+
+	PxMat33		mVertexToShapeSkew;
+	PxVec3		mLocalCenter;	// PT: sphere center in local/mesh space
+	PxF32		mSphereRadius2;
+
+	PX_FORCE_INLINE PxAgain processHit(PxU32 faceIndex, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2)
+	{
+		const Vec3V v0 = V3LoadU(mVertexToShapeSkew * av0);
+		const Vec3V v1 = V3LoadU(mVertexToShapeSkew * (mFlipNormal ? av2 : av1));
+		const Vec3V v2 = V3LoadU(mVertexToShapeSkew * (mFlipNormal ? av1 : av2));
+
+		FloatV dummy1, dummy2;
+		Vec3V closestP;
+		PxReal dist2;
+		FStore(distancePointTriangleSquared(V3LoadU(mLocalCenter), v0, v1, v2, dummy1, dummy2, closestP), &dist2);
+		return recordHit(faceIndex, dist2 <= mSphereRadius2);
+	}
+};
+
+// PT: TODO: get rid of this (TA34704)
+struct IntersectCapsuleVsMeshCallback : IntersectShapeVsMeshCallback
+{
+	PX_FORCE_INLINE IntersectCapsuleVsMeshCallback(const PxMeshScale& meshScale, const PxTransform& meshTransform, const Capsule& capsule, LimitedResults* r, bool flipNormal)
+		: IntersectShapeVsMeshCallback(r, flipNormal)
+	{
+		mVertexToShapeSkew = meshScale.toMat33();
+
+		// transform world capsule to mesh shape space
+		mLocalCapsule.p0		= meshTransform.transformInv(capsule.p0);
+		mLocalCapsule.p1		= meshTransform.transformInv(capsule.p1);
+		mLocalCapsule.radius	= capsule.radius;
+		mParams.init(mLocalCapsule);
+	}
+
+	PxMat33						mVertexToShapeSkew;
+	Capsule						mLocalCapsule;		// PT: capsule in mesh/local space
+	CapsuleTriangleOverlapData	mParams;
+
+	PX_FORCE_INLINE PxAgain processHit(PxU32 faceIndex, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2)
+	{
+		const PxVec3 v0 = mVertexToShapeSkew * av0;
+		const PxVec3 v1 = mVertexToShapeSkew * (mFlipNormal ? av2 : av1);
+		const PxVec3 v2 = mVertexToShapeSkew * (mFlipNormal ? av1 : av2);
+		const PxVec3 normal = (v0 - v1).cross(v0 - v2);
+		bool hit = intersectCapsuleTriangle(normal, v0, v1, v2, mLocalCapsule, mParams);
+		return recordHit(faceIndex, hit);
+	}
+};
+
+// PT: TODO: get rid of this (TA34704)
+struct IntersectBoxVsMeshCallback : IntersectShapeVsMeshCallback
+{
+	PX_FORCE_INLINE IntersectBoxVsMeshCallback(const PxMeshScale& meshScale, const PxTransform& meshTransform, const Box& box, LimitedResults* r, bool flipNormal)
+		: IntersectShapeVsMeshCallback(r, flipNormal)
+	{
+		const PxMat33 vertexToShapeSkew = meshScale.toMat33();
+
+		// mesh scale needs to be included - inverse transform and optimize the box
+		const PxMat33 vertexToWorldSkew_Rot = PxMat33Padded(meshTransform.q) * vertexToShapeSkew;
+		const PxVec3& vertexToWorldSkew_Trans = meshTransform.p;
+
+		Matrix34 tmp;
+		buildMatrixFromBox(tmp, box);
+		const Matrix34 inv = tmp.getInverseRT();
+		const Matrix34 _vertexToWorldSkew(vertexToWorldSkew_Rot, vertexToWorldSkew_Trans);
+
+		mVertexToBox = inv * _vertexToWorldSkew;
+		mBoxCenter = PxVec3(0.0f);
+		mBoxExtents = box.extents; // extents do not change
+	}
+
+	Matrix34	mVertexToBox;
+	Vec3p		mBoxExtents, mBoxCenter;
+
+	PX_FORCE_INLINE PxAgain processHit(PxU32 faceIndex, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2)
+	{
+		const Vec3p v0 = mVertexToBox.transform(av0);
+		const Vec3p v1 = mVertexToBox.transform(mFlipNormal ? av2 : av1);
+		const Vec3p v2 = mVertexToBox.transform(mFlipNormal ? av1 : av2);
+
+		// PT: this one is safe because we're using Vec3p for all parameters
+		const Ps::IntBool hit = intersectTriangleBox_Unsafe(mBoxCenter, mBoxExtents, v0, v1, v2);
+		return recordHit(faceIndex, hit);
+	}
+};
+}
+
+static bool gSphereVsMeshCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* /*vertexIndices*/)
+{
+	IntersectSphereVsMeshCallback* callback = reinterpret_cast<IntersectSphereVsMeshCallback*>(userData);
+	return !callback->processHit(triangleIndex, p0, p1, p2);
+}
+
+static bool gCapsuleVsMeshCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* /*vertexIndices*/)
+{
+	IntersectCapsuleVsMeshCallback* callback = reinterpret_cast<IntersectCapsuleVsMeshCallback*>(userData);
+	return !callback->processHit(triangleIndex, p0, p1, p2);
+}
+
+static bool gBoxVsMeshCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* /*vertexIndices*/)
+{
+	IntersectBoxVsMeshCallback* callback = reinterpret_cast<IntersectBoxVsMeshCallback*>(userData);
+	return !callback->processHit(triangleIndex, p0, p1, p2);
+}
+
+bool physx::Gu::intersectSphereVsMesh_BV4(const Sphere& sphere, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+{
+	PX_ASSERT(triMesh.getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34);
+	const BV4Tree& tree = static_cast<const BV4TriangleMesh&>(triMesh).getBV4Tree();
+
+	if(meshScale.isIdentity())
+	{
+		BV4_ALIGN16(PxMat44 World);
+		const PxMat44* TM = setupWorldMatrix(World, &meshTransform.p.x, &meshTransform.q.x);
+		if(results)
+		{
+			const PxU32 nbResults = BV4_OverlapSphereAll(sphere, tree, TM, results->mResults, results->mMaxResults, results->mOverflow);
+			results->mNbResults = nbResults;
+			return nbResults!=0;
+		}
+		else
+		{
+			return BV4_OverlapSphereAny(sphere, tree, TM)!=0;
+		}
+	}
+	else
+	{
+		// PT: TODO: we don't need to use this callback here (TA34704)
+		IntersectSphereVsMeshCallback callback(meshScale, meshTransform, sphere, results, meshScale.hasNegativeDeterminant());
+
+		const Box worldOBB_(sphere.center, PxVec3(sphere.radius), PxMat33(PxIdentity));
+		Box vertexOBB;
+		computeVertexSpaceOBB(vertexOBB, worldOBB_, meshTransform, meshScale);
+
+		BV4_OverlapBoxCB(vertexOBB, tree, gSphereVsMeshCallback, &callback);
+		return callback.mAnyHits;
+	}
+}
+
+bool physx::Gu::intersectBoxVsMesh_BV4(const Box& box, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+{
+	PX_ASSERT(triMesh.getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34);
+	const BV4Tree& tree = static_cast<const BV4TriangleMesh&>(triMesh).getBV4Tree();
+
+	if(meshScale.isIdentity())
+	{
+		BV4_ALIGN16(PxMat44 World);
+		const PxMat44* TM = setupWorldMatrix(World, &meshTransform.p.x, &meshTransform.q.x);
+		if(results)
+		{
+			const PxU32 nbResults = BV4_OverlapBoxAll(box, tree, TM, results->mResults, results->mMaxResults, results->mOverflow);
+			results->mNbResults = nbResults;
+			return nbResults!=0;
+		}
+		else
+		{
+			return BV4_OverlapBoxAny(box, tree, TM)!=0;
+		}
+	}
+	else
+	{
+		// PT: TODO: we don't need to use this callback here (TA34704)
+		IntersectBoxVsMeshCallback callback(meshScale, meshTransform, box, results, meshScale.hasNegativeDeterminant());
+
+		Box vertexOBB; // query box in vertex space
+		computeVertexSpaceOBB(vertexOBB, box, meshTransform, meshScale);
+
+		BV4_OverlapBoxCB(vertexOBB, tree, gBoxVsMeshCallback, &callback);
+		return callback.mAnyHits;
+	}
+}
+
+bool physx::Gu::intersectCapsuleVsMesh_BV4(const Capsule& capsule, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+{
+	PX_ASSERT(triMesh.getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34);
+	const BV4Tree& tree = static_cast<const BV4TriangleMesh&>(triMesh).getBV4Tree();
+
+	if(meshScale.isIdentity())
+	{
+		BV4_ALIGN16(PxMat44 World);
+		const PxMat44* TM = setupWorldMatrix(World, &meshTransform.p.x, &meshTransform.q.x);
+		if(results)
+		{
+			const PxU32 nbResults = BV4_OverlapCapsuleAll(capsule, tree, TM, results->mResults, results->mMaxResults, results->mOverflow);
+			results->mNbResults = nbResults;
+			return nbResults!=0;
+		}
+		else
+		{
+			return BV4_OverlapCapsuleAny(capsule, tree, TM)!=0;
+		}
+	}
+	else
+	{
+		// PT: TODO: we don't need to use this callback here (TA34704)
+		IntersectCapsuleVsMeshCallback callback(meshScale, meshTransform, capsule, results, meshScale.hasNegativeDeterminant());
+
+		// make vertex space OBB
+		Box vertexOBB;
+		Box worldOBB_;
+		worldOBB_.create(capsule); // AP: potential optimization (meshTransform.inverse is already in callback.mCapsule)
+		computeVertexSpaceOBB(vertexOBB, worldOBB_, meshTransform, meshScale);
+
+		BV4_OverlapBoxCB(vertexOBB, tree, gCapsuleVsMeshCallback, &callback);
+		return callback.mAnyHits;
+	}
+}
+
+// PT: TODO: get rid of this (TA34704)
+static bool gVolumeCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, const PxU32* vertexIndices)
+{
+	MeshHitCallback<PxRaycastHit>* callback = reinterpret_cast<MeshHitCallback<PxRaycastHit>*>(userData);
+	PX_ALIGN_PREFIX(16)	char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16);
+	PxRaycastHit& hit = reinterpret_cast<PxRaycastHit&>(buffer);
+	hit.faceIndex = triangleIndex;
+	PxReal dummy;
+	return !callback->processHit(hit, p0, p1, p2, dummy, vertexIndices);
+}
+
+void physx::Gu::intersectOBB_BV4(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned)
+{
+	PX_UNUSED(checkObbIsAligned);
+	PX_UNUSED(bothTriangleSidesCollide);
+	BV4_OverlapBoxCB(obb, static_cast<const BV4TriangleMesh*>(mesh)->getBV4Tree(), gVolumeCallback, &callback);
+}
+
+
+
+
+#include "GuVecCapsule.h"
+#include "GuSweepMTD.h"
+
+static bool gCapsuleMeshSweepCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, /*const PxU32* vertexIndices,*/ float& dist)
+{
+	SweepCapsuleMeshHitCallback* callback = reinterpret_cast<SweepCapsuleMeshHitCallback*>(userData);
+	PxRaycastHit meshHit;
+	meshHit.faceIndex = triangleIndex;
+	return !callback->SweepCapsuleMeshHitCallback::processHit(meshHit, p0, p1, p2, dist, NULL/*vertexIndices*/);
+}
+
+// PT: TODO: refactor/share bits of this (TA34704)
+bool physx::Gu::sweepCapsule_MeshGeom_BV4(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+											const Capsule& lss, const PxVec3& unitDir, const PxReal distance,
+											PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation)
+{
+	PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34);
+	const BV4TriangleMesh* meshData = static_cast<const BV4TriangleMesh*>(mesh);
+
+	const Capsule inflatedCapsule(lss.p0, lss.p1, lss.radius + inflation);
+
+	const bool isIdentity = triMeshGeom.scale.isIdentity();
+	bool isDoubleSided = (triMeshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED);
+	const PxU32 meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES;
+
+	if(isIdentity)
+	{
+		const BV4Tree& tree = meshData->getBV4Tree();
+		const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY;
+
+		BV4_ALIGN16(PxMat44 World);
+		const PxMat44* TM = setupWorldMatrix(World, &pose.p.x, &pose.q.x);
+
+		const PxU32 flags = setupFlags(anyHit, isDoubleSided, meshBothSides!=0);
+
+		SweepHit hitData;
+		if(lss.p0==lss.p1)
+		{
+			if(!sphereSweepVsMesh(hitData, tree, inflatedCapsule.p0, inflatedCapsule.radius, unitDir, distance, TM, flags))
+				return false;
+		}
+		else
+		{
+			if(!capsuleSweepVsMesh(hitData, tree, inflatedCapsule, unitDir, distance, TM, flags))
+				return false;
+		}
+
+		sweepHit.distance = hitData.mDistance;
+		sweepHit.position = hitData.mPos;
+		sweepHit.normal = hitData.mNormal;
+		sweepHit.faceIndex = hitData.mTriangleID;
+
+		if(hitData.mDistance==0.0f)
+		{
+			sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::eNORMAL;
+
+			if(meshBothSides)
+				isDoubleSided = true;
+
+			// PT: TODO: consider using 'setInitialOverlapResults' here
+			bool hasContacts = false;
+			if(hitFlags & PxHitFlag::eMTD)
+			{
+				const Vec3V p0 = V3LoadU(inflatedCapsule.p0);
+				const Vec3V p1 = V3LoadU(inflatedCapsule.p1);
+				const FloatV radius = FLoad(lss.radius);
+				CapsuleV capsuleV;
+				capsuleV.initialize(p0, p1, radius);
+
+				//we need to calculate the MTD
+				hasContacts = computeCapsule_TriangleMeshMTD(triMeshGeom, pose, capsuleV, inflatedCapsule.radius, isDoubleSided, sweepHit);
+			}
+			setupSweepHitForMTD(sweepHit, hasContacts, unitDir);
+		}
+		else
+			sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::ePOSITION | PxHitFlag::eNORMAL | PxHitFlag::eFACE_INDEX;
+		return true;
+	}
+
+	// compute sweptAABB
+	const PxVec3 localP0 = pose.transformInv(inflatedCapsule.p0);
+	const PxVec3 localP1 = pose.transformInv(inflatedCapsule.p1);
+	PxVec3 sweepOrigin = (localP0+localP1)*0.5f;
+	PxVec3 sweepDir = pose.rotateInv(unitDir);
+	PxVec3 sweepExtents = PxVec3(inflatedCapsule.radius) + (localP0-localP1).abs()*0.5f;
+	PxReal distance1 = distance;
+	PxReal distCoef = 1.0f;
+	Matrix34 poseWithScale;
+	if(!isIdentity)
+	{
+		poseWithScale = pose * triMeshGeom.scale;
+		distance1 = computeSweepData(triMeshGeom, sweepOrigin, sweepExtents, sweepDir, distance);
+		distCoef = distance1 / distance;
+	} else
+		poseWithScale = Matrix34(pose);
+
+	SweepCapsuleMeshHitCallback callback(sweepHit, poseWithScale, distance, isDoubleSided, inflatedCapsule, unitDir, hitFlags, triMeshGeom.scale.hasNegativeDeterminant(), distCoef);
+
+	boxSweepVsMeshCBOld(meshData->getBV4Tree(), NULL, NULL, sweepOrigin, sweepExtents, sweepDir, distance1, gCapsuleMeshSweepCallback, &callback);
+
+	if(meshBothSides)
+		isDoubleSided = true;
+
+	return callback.finalizeHit(sweepHit, inflatedCapsule, triMeshGeom, pose, isDoubleSided);
+}
+
+#include "GuSweepSharedTests.h"
+static bool gBoxMeshSweepCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, /*const PxU32* vertexIndices,*/ float& dist)
+{
+	SweepBoxMeshHitCallback* callback = reinterpret_cast<SweepBoxMeshHitCallback*>(userData);
+	PxRaycastHit meshHit;
+	meshHit.faceIndex = triangleIndex;
+	return !callback->SweepBoxMeshHitCallback::processHit(meshHit, p0, p1, p2, dist, NULL/*vertexIndices*/);
+}
+
+// PT: TODO: refactor/share bits of this (TA34704)
+bool physx::Gu::sweepBox_MeshGeom_BV4(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+										const Box& box, const PxVec3& unitDir, const PxReal distance,
+										PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation)
+{
+	PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34);
+	const BV4TriangleMesh* meshData = static_cast<const BV4TriangleMesh*>(mesh);
+
+	const bool isIdentity = triMeshGeom.scale.isIdentity();
+
+	const bool meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES;
+	const bool isDoubleSided = triMeshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED;
+
+	if(isIdentity && inflation==0.0f)
+	{
+		const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY;
+
+		// PT: TODO: this is wrong, we shouldn't actually sweep the inflated version
+//		const PxVec3 inflated = (box.extents + PxVec3(inflation)) * 1.01f;
+		// PT: TODO: avoid this copy
+//		const Box tmp(box.center, inflated, box.rot);
+
+		SweepHit hitData;
+//		if(!boxSweepVsMesh(hitData, meshData->getBV4Tree(), &pose.p.x, &pose.q.x, tmp, unitDir, distance, anyHit, isDoubleSided, meshBothSides))
+		if(!boxSweepVsMesh(hitData, meshData->getBV4Tree(), &pose.p.x, &pose.q.x, box, unitDir, distance, anyHit, isDoubleSided, meshBothSides))
+			return false;
+
+		sweepHit.distance = hitData.mDistance;
+		sweepHit.position = hitData.mPos;
+		sweepHit.normal = hitData.mNormal;
+		sweepHit.faceIndex = hitData.mTriangleID;
+
+		if(hitData.mDistance==0.0f)
+		{
+			sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::eNORMAL;
+
+			const bool bothTriangleSidesCollide = isDoubleSided || meshBothSides;
+			const PxTransform boxTransform = box.getTransform();
+
+			bool hasContacts = false;
+			if(hitFlags & PxHitFlag::eMTD)
+				hasContacts = computeBox_TriangleMeshMTD(triMeshGeom, pose, box, boxTransform, inflation, bothTriangleSidesCollide,  sweepHit);
+
+			setupSweepHitForMTD(sweepHit, hasContacts, unitDir);
+		}
+		else
+		{
+			sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::ePOSITION | PxHitFlag::eNORMAL | PxHitFlag::eFACE_INDEX;
+		}
+		return true;
+	}
+
+	// PT: TODO: revisit this codepath, we don't need to sweep an AABB all the time (TA34704)
+
+	Matrix34 meshToWorldSkew;
+	PxVec3 sweptAABBMeshSpaceExtents, meshSpaceOrigin, meshSpaceDir;
+
+	// Input sweep params: geom, pose, box, unitDir, distance
+	// We convert the origin from world space to mesh local space
+	// and convert the box+pose to mesh space AABB
+	if(isIdentity)
+	{
+		meshToWorldSkew = Matrix34(pose);
+		PxMat33 worldToMeshRot(pose.q.getConjugate()); // extract rotation matrix from pose.q
+		meshSpaceOrigin = worldToMeshRot.transform(box.center - pose.p);
+		meshSpaceDir = worldToMeshRot.transform(unitDir) * distance;
+		PxMat33 boxToMeshRot = worldToMeshRot * box.rot;
+		sweptAABBMeshSpaceExtents = boxToMeshRot.column0.abs() * box.extents.x + 
+						   boxToMeshRot.column1.abs() * box.extents.y + 
+						   boxToMeshRot.column2.abs() * box.extents.z;
+	}
+	else
+	{
+		meshToWorldSkew = pose * triMeshGeom.scale;
+		const PxMat33 meshToWorldSkew_Rot = PxMat33Padded(pose.q) * triMeshGeom.scale.toMat33();
+		const PxVec3& meshToWorldSkew_Trans = pose.p;
+
+		PxMat33 worldToVertexSkew_Rot;
+		PxVec3 worldToVertexSkew_Trans;
+		getInverse(worldToVertexSkew_Rot, worldToVertexSkew_Trans, meshToWorldSkew_Rot, meshToWorldSkew_Trans);
+
+		//make vertex space OBB
+		Box vertexSpaceBox1;
+		const Matrix34 worldToVertexSkew(worldToVertexSkew_Rot, worldToVertexSkew_Trans);
+		vertexSpaceBox1 = transform(worldToVertexSkew, box);
+		// compute swept aabb
+		sweptAABBMeshSpaceExtents = vertexSpaceBox1.computeAABBExtent();
+
+		meshSpaceOrigin = worldToVertexSkew.transform(box.center);
+		meshSpaceDir = worldToVertexSkew.rotate(unitDir*distance); // also applies scale to direction/length
+	}
+
+	sweptAABBMeshSpaceExtents += PxVec3(inflation); // inflate the bounds with additive inflation
+	sweptAABBMeshSpaceExtents *= 1.01f; // fatten the bounds to account for numerical discrepancies
+
+	PxReal dirLen = PxMax(meshSpaceDir.magnitude(), 1e-5f);
+	PxReal distCoeff = 1.0f;
+	if (!isIdentity)
+		distCoeff = dirLen / distance;
+
+	// Move to AABB space
+	Matrix34 worldToBox;
+	computeWorldToBoxMatrix(worldToBox, box);
+
+	const bool bothTriangleSidesCollide = isDoubleSided || meshBothSides;
+
+	const Matrix34Padded meshToBox = worldToBox*meshToWorldSkew;
+	const PxTransform boxTransform = box.getTransform();	// PT: TODO: this is not needed when there's no hit (TA34704)
+
+	const PxVec3 localDir = worldToBox.rotate(unitDir);
+	const PxVec3 localDirDist = localDir*distance;
+	SweepBoxMeshHitCallback callback( // using eMULTIPLE with shrinkMaxT
+		CallbackMode::eMULTIPLE, meshToBox, distance, bothTriangleSidesCollide, box, localDirDist, localDir, unitDir, hitFlags, inflation, triMeshGeom.scale.hasNegativeDeterminant(), distCoeff);
+
+	const PxVec3 dir = meshSpaceDir/dirLen;
+	boxSweepVsMeshCBOld(meshData->getBV4Tree(), NULL, NULL, meshSpaceOrigin, sweptAABBMeshSpaceExtents, dir, dirLen, gBoxMeshSweepCallback, &callback);
+
+	return callback.finalizeHit(sweepHit, triMeshGeom, pose, boxTransform, localDir, meshBothSides, isDoubleSided);
+}
+
+static bool gConvexVsMeshSweepCallback(void* userData, const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxU32 triangleIndex, /*const PxU32* vertexIndices,*/ float& dist)
+{
+	SweepConvexMeshHitCallback* callback = reinterpret_cast<SweepConvexMeshHitCallback*>(userData);
+	PX_ALIGN_PREFIX(16)	char buffer[sizeof(PxRaycastHit)] PX_ALIGN_SUFFIX(16);
+	PxRaycastHit& hit = reinterpret_cast<PxRaycastHit&>(buffer);
+	hit.faceIndex = triangleIndex;
+	return !callback->SweepConvexMeshHitCallback::processHit(hit, p0, p1, p2, dist, NULL/*vertexIndices*/);
+}
+
+void physx::Gu::sweepConvex_MeshGeom_BV4(const TriangleMesh* mesh, const Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit)
+{
+	PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH34);
+	const BV4TriangleMesh* meshData = static_cast<const BV4TriangleMesh*>(mesh);
+	BV4_GenericSweepCB(hullBox, localDir, distance, meshData->getBV4Tree(), gConvexVsMeshSweepCallback, &callback, anyHit);
+}
+
+#endif
+
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseInterface.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseInterface.h
new file mode 100644
index 00000000..b7cab6ef
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseInterface.h
@@ -0,0 +1,420 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_MIDPHASE_INTERFACE_H
+#define GU_MIDPHASE_INTERFACE_H
+
+#include "GuOverlapTests.h"
+#include "GuRaycastTests.h"
+#include "GuTriangleMesh.h"
+#include "PsVecMath.h"
+
+// PT: this file contains the common interface for all midphase implementations. Specifically the Midphase namespace contains the
+// midphase-related entry points, dispatching calls to the proper implementations depending on the triangle mesh's type. The rest of it
+// is simply classes & structs shared by all implementations.
+
+namespace physx
+{
+	class PxMeshScale;
+	class PxTriangleMeshGeometry;
+namespace Cm
+{
+	class Matrix34;
+	class FastVertex2ShapeScaling;
+}
+
+namespace Gu
+{
+	struct ConvexHullData;
+
+	struct CallbackMode { enum Enum { eANY, eCLOSEST, eMULTIPLE }; };
+
+	template<typename HitType>
+	struct MeshHitCallback
+	{
+		CallbackMode::Enum mode;
+
+		MeshHitCallback(CallbackMode::Enum aMode) : mode(aMode) {}
+
+		PX_FORCE_INLINE	bool inAnyMode()		const { return mode == CallbackMode::eANY;		}
+		PX_FORCE_INLINE	bool inClosestMode()	const { return mode == CallbackMode::eCLOSEST;	}
+		PX_FORCE_INLINE	bool inMultipleMode()	const { return mode == CallbackMode::eMULTIPLE;	}
+
+		virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position
+			const HitType& hit, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, PxReal& shrunkMaxT, const PxU32* vIndices) = 0;
+
+		virtual ~MeshHitCallback() {}
+	};
+
+	struct SweepConvexMeshHitCallback;
+
+	struct LimitedResults
+	{
+		PxU32*	mResults;
+		PxU32	mNbResults;
+		PxU32	mMaxResults;
+		PxU32	mStartIndex;
+		PxU32	mNbSkipped;
+		bool	mOverflow;
+
+		PX_FORCE_INLINE LimitedResults(PxU32* results, PxU32 maxResults, PxU32 startIndex)
+			: mResults(results), mMaxResults(maxResults), mStartIndex(startIndex)
+		{
+			reset();
+		}
+
+		PX_FORCE_INLINE	void reset()
+		{
+			mNbResults	= 0;
+			mNbSkipped	= 0;
+			mOverflow	= false;
+		}
+
+		PX_FORCE_INLINE	bool add(PxU32 index)
+		{
+			if(mNbResults>=mMaxResults)
+			{
+				mOverflow = true;
+				return false;
+			}
+
+			if(mNbSkipped>=mStartIndex)
+				mResults[mNbResults++] = index;
+			else
+				mNbSkipped++;
+
+			return true;
+		}
+	};
+
+	// Exposing wrapper for Midphase::intersectOBB just for particles in order to avoid DelayLoad performance problem. This should be removed with particles in PhysX 3.5 (US16993)
+	PX_PHYSX_COMMON_API void intersectOBB_Particles(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned = true);
+
+	// RTree forward declarations
+	PX_PHYSX_COMMON_API PxU32 raycast_triangleMesh_RTREE(const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose,
+									const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist,
+									PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits);
+	PX_PHYSX_COMMON_API bool intersectSphereVsMesh_RTREE(const Sphere& sphere,		const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	PX_PHYSX_COMMON_API bool intersectBoxVsMesh_RTREE	(const Box& box,			const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	PX_PHYSX_COMMON_API bool intersectCapsuleVsMesh_RTREE(const Capsule& capsule,	const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	PX_PHYSX_COMMON_API void intersectOBB_RTREE(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned);
+	PX_PHYSX_COMMON_API bool sweepCapsule_MeshGeom_RTREE(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+										const Gu::Capsule& lss, const PxVec3& unitDir, const PxReal distance,
+										PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation);
+	PX_PHYSX_COMMON_API bool sweepBox_MeshGeom_RTREE(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+									const Gu::Box& box, const PxVec3& unitDir, const PxReal distance,
+									PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation);
+	PX_PHYSX_COMMON_API void sweepConvex_MeshGeom_RTREE(const TriangleMesh* mesh, const Gu::Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit);
+
+#if PX_INTEL_FAMILY
+	// BV4 forward declarations
+	PX_PHYSX_COMMON_API PxU32 raycast_triangleMesh_BV4(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose,
+									const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist,
+									PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits);
+	PX_PHYSX_COMMON_API bool intersectSphereVsMesh_BV4	(const Sphere& sphere,		const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	PX_PHYSX_COMMON_API bool intersectBoxVsMesh_BV4		(const Box& box,			const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	PX_PHYSX_COMMON_API bool intersectCapsuleVsMesh_BV4	(const Capsule& capsule,	const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	PX_PHYSX_COMMON_API void intersectOBB_BV4(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned);
+	PX_PHYSX_COMMON_API bool sweepCapsule_MeshGeom_BV4(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+									const Gu::Capsule& lss, const PxVec3& unitDir, const PxReal distance,
+									PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation);
+	PX_PHYSX_COMMON_API bool sweepBox_MeshGeom_BV4(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+								const Gu::Box& box, const PxVec3& unitDir, const PxReal distance,
+								PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation);
+	PX_PHYSX_COMMON_API void sweepConvex_MeshGeom_BV4(const TriangleMesh* mesh, const Gu::Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit);
+#endif
+
+	typedef PxU32 (*MidphaseRaycastFunction)(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose,
+												const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist,
+												PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits);
+
+	typedef bool (*MidphaseSphereOverlapFunction)	(const Sphere& sphere,		const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	typedef bool (*MidphaseBoxOverlapFunction)		(const Box& box,			const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	typedef bool (*MidphaseCapsuleOverlapFunction)	(const Capsule& capsule,	const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results);
+	typedef void (*MidphaseBoxCBOverlapFunction)	(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned);
+
+	typedef bool (*MidphaseCapsuleSweepFunction)(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+													const Gu::Capsule& lss, const PxVec3& unitDir, const PxReal distance,
+													PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation);
+	typedef bool (*MidphaseBoxSweepFunction)(		const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+													const Gu::Box& box, const PxVec3& unitDir, const PxReal distance,
+													PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation);
+	typedef void (*MidphaseConvexSweepFunction)(	const TriangleMesh* mesh, const Gu::Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit);
+
+namespace Midphase
+{
+	PX_FORCE_INLINE bool outputError()
+	{
+		static bool reportOnlyOnce = false;
+		if(!reportOnlyOnce)
+		{
+			reportOnlyOnce = true;
+			Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "BV4 midphase only supported on Intel platforms.");
+		}
+		return false;
+	}
+}
+
+	#if PX_INTEL_FAMILY
+	#else
+	static PxU32 unsupportedMidphase(	const TriangleMesh*, const PxTriangleMeshGeometry&, const PxTransform&,
+										const PxVec3&, const PxVec3&, PxReal,
+										PxHitFlags, PxU32, PxRaycastHit* PX_RESTRICT)
+	{
+		return PxU32(Midphase::outputError());
+	}
+	static bool unsupportedSphereOverlapMidphase(const Sphere&, const TriangleMesh&, const PxTransform&, const PxMeshScale&, LimitedResults*)
+	{
+		return Midphase::outputError();
+	}
+	static bool unsupportedBoxOverlapMidphase(const Box&, const TriangleMesh&, const PxTransform&, const PxMeshScale&, LimitedResults*)
+	{
+		return Midphase::outputError();
+	}
+	static bool unsupportedCapsuleOverlapMidphase(const Capsule&, const TriangleMesh&, const PxTransform&, const PxMeshScale&, LimitedResults*)
+	{
+		return Midphase::outputError();
+	}
+	static void unsupportedBoxCBOverlapMidphase(const TriangleMesh*, const Box&, MeshHitCallback<PxRaycastHit>&, bool, bool)
+	{
+		Midphase::outputError();
+	}
+	static bool unsupportedBoxSweepMidphase(const TriangleMesh*, const PxTriangleMeshGeometry&, const PxTransform&, const Gu::Box&, const PxVec3&, const PxReal, PxSweepHit&, PxHitFlags, const PxReal)
+	{
+		return Midphase::outputError();
+	}
+	static bool unsupportedCapsuleSweepMidphase(const TriangleMesh*, const PxTriangleMeshGeometry&, const PxTransform&, const Gu::Capsule&, const PxVec3&, const PxReal, PxSweepHit&, PxHitFlags, const PxReal)
+	{
+		return Midphase::outputError();
+	}
+	static void unsupportedConvexSweepMidphase(const TriangleMesh*, const Gu::Box&, const PxVec3&, const PxReal, SweepConvexMeshHitCallback&, bool)
+	{
+		Midphase::outputError();
+	}
+	#endif
+
+	static const MidphaseRaycastFunction	gMidphaseRaycastTable[PxMeshMidPhase::eLAST] =
+	{
+		raycast_triangleMesh_RTREE,
+	#if PX_INTEL_FAMILY
+		raycast_triangleMesh_BV4,
+	#else
+		unsupportedMidphase,
+	#endif
+	};
+
+	static const MidphaseSphereOverlapFunction gMidphaseSphereOverlapTable[PxMeshMidPhase::eLAST] =
+	{
+		intersectSphereVsMesh_RTREE,
+	#if PX_INTEL_FAMILY
+		intersectSphereVsMesh_BV4,
+	#else
+		unsupportedSphereOverlapMidphase,
+	#endif
+	};
+
+	static const MidphaseBoxOverlapFunction gMidphaseBoxOverlapTable[PxMeshMidPhase::eLAST] =
+	{
+		intersectBoxVsMesh_RTREE,
+	#if PX_INTEL_FAMILY
+		intersectBoxVsMesh_BV4,
+	#else
+		unsupportedBoxOverlapMidphase,
+	#endif
+	};
+
+	static const MidphaseCapsuleOverlapFunction gMidphaseCapsuleOverlapTable[PxMeshMidPhase::eLAST] =
+	{
+		intersectCapsuleVsMesh_RTREE,
+	#if PX_INTEL_FAMILY
+		intersectCapsuleVsMesh_BV4,
+	#else
+		unsupportedCapsuleOverlapMidphase,
+	#endif
+	};
+
+	static const MidphaseBoxCBOverlapFunction gMidphaseBoxCBOverlapTable[PxMeshMidPhase::eLAST] =
+	{
+		intersectOBB_RTREE,
+	#if PX_INTEL_FAMILY
+		intersectOBB_BV4,
+	#else
+		unsupportedBoxCBOverlapMidphase,
+	#endif
+	};
+
+	static const MidphaseBoxSweepFunction gMidphaseBoxSweepTable[PxMeshMidPhase::eLAST] =
+	{
+		sweepBox_MeshGeom_RTREE,
+	#if PX_INTEL_FAMILY
+		sweepBox_MeshGeom_BV4,
+	#else
+		unsupportedBoxSweepMidphase,
+	#endif
+	};
+
+	static const MidphaseCapsuleSweepFunction gMidphaseCapsuleSweepTable[PxMeshMidPhase::eLAST] =
+	{
+		sweepCapsule_MeshGeom_RTREE,
+	#if PX_INTEL_FAMILY
+		sweepCapsule_MeshGeom_BV4,
+	#else
+		unsupportedCapsuleSweepMidphase,
+	#endif
+	};
+
+	static const MidphaseConvexSweepFunction gMidphaseConvexSweepTable[PxMeshMidPhase::eLAST] =
+	{
+		sweepConvex_MeshGeom_RTREE,
+	#if PX_INTEL_FAMILY
+		sweepConvex_MeshGeom_BV4,
+	#else
+		unsupportedConvexSweepMidphase,
+	#endif
+	};
+
+namespace Midphase
+{
+	// \param[in]	mesh			triangle mesh to raycast against
+	// \param[in]	meshGeom		geometry object associated with the mesh
+	// \param[in]	meshTransform	pose/transform of geometry object
+	// \param[in]	rayOrigin		ray's origin
+	// \param[in]	rayDir			ray's unit dir
+	// \param[in]	maxDist			ray's length/max distance
+	// \param[in]	hitFlags		query behavior flags
+	// \param[in]	maxHits			max number of hits = size of 'hits' buffer
+	// \param[out]	hits			result buffer where to write raycast hits
+	// \return		number of hits written to 'hits' result buffer
+	// \note		there's no mechanism to report overflow. Returned number of hits is just clamped to maxHits.
+	PX_FORCE_INLINE PxU32 raycastTriangleMesh(	const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& meshTransform,
+												const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist,
+												PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits)
+	{
+		const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33);
+		return gMidphaseRaycastTable[index](mesh, meshGeom, meshTransform, rayOrigin, rayDir, maxDist, hitFlags, maxHits, hits);
+	}
+
+	// \param[in]	sphere			sphere
+	// \param[in]	mesh			triangle mesh
+	// \param[in]	meshTransform	pose/transform of triangle mesh
+	// \param[in]	meshScale		mesh scale
+	// \param[out]	results			results object if multiple hits are needed, NULL if a simple boolean answer is enough
+	// \return 		true if at least one overlap has been found
+	PX_FORCE_INLINE bool intersectSphereVsMesh(const Sphere& sphere, const TriangleMesh& mesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+	{
+		const PxU32 index = PxU32(mesh.getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33);
+		return gMidphaseSphereOverlapTable[index](sphere, mesh, meshTransform, meshScale, results);
+	}
+
+	// \param[in]	box				box
+	// \param[in]	mesh			triangle mesh
+	// \param[in]	meshTransform	pose/transform of triangle mesh
+	// \param[in]	meshScale		mesh scale
+	// \param[out]	results			results object if multiple hits are needed, NULL if a simple boolean answer is enough
+	// \return 		true if at least one overlap has been found
+	PX_FORCE_INLINE bool intersectBoxVsMesh(const Box& box, const TriangleMesh& mesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+	{
+		const PxU32 index = PxU32(mesh.getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33);
+		return gMidphaseBoxOverlapTable[index](box, mesh, meshTransform, meshScale, results);
+	}
+
+	// \param[in]	capsule			capsule
+	// \param[in]	mesh			triangle mesh
+	// \param[in]	meshTransform	pose/transform of triangle mesh
+	// \param[in]	meshScale		mesh scale
+	// \param[out]	results			results object if multiple hits are needed, NULL if a simple boolean answer is enough
+	// \return 		true if at least one overlap has been found
+	PX_FORCE_INLINE bool intersectCapsuleVsMesh(const Capsule& capsule, const TriangleMesh& mesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+	{
+		const PxU32 index = PxU32(mesh.getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33);
+		return gMidphaseCapsuleOverlapTable[index](capsule, mesh, meshTransform, meshScale, results);
+	}
+
+	// \param[in]	mesh						triangle mesh
+	// \param[in]	box							box
+	// \param[in]	callback					callback object, called each time a hit is found
+	// \param[in]	bothTriangleSidesCollide	true for double-sided meshes
+	// \param[in]	checkObbIsAligned			true to use a dedicated codepath for axis-aligned boxes
+	PX_FORCE_INLINE void intersectOBB(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned = true)
+	{
+		const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33);
+		gMidphaseBoxCBOverlapTable[index](mesh, obb, callback, bothTriangleSidesCollide, checkObbIsAligned);
+	}
+
+	// \param[in]	mesh			triangle mesh
+	// \param[in]	meshGeom		geometry object associated with the mesh
+	// \param[in]	meshTransform	pose/transform of geometry object
+	// \param[in]	capsule			swept capsule
+	// \param[in]	unitDir			sweep's unit dir
+	// \param[in]	distance		sweep's length/max distance
+	// \param[out]	sweepHit		hit result
+	// \param[in]	hitFlags		query behavior flags
+	// \param[in]	inflation		optional inflation value for swept shape
+	// \return		true if a hit was found, false otherwise
+	PX_FORCE_INLINE bool sweepCapsuleVsMesh(const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& meshTransform,
+											const Gu::Capsule& capsule, const PxVec3& unitDir, const PxReal distance,
+											PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation)
+	{
+		const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33);
+		return gMidphaseCapsuleSweepTable[index](mesh, meshGeom, meshTransform, capsule, unitDir, distance, sweepHit, hitFlags, inflation);
+	}
+
+	// \param[in]	mesh			triangle mesh
+	// \param[in]	meshGeom		geometry object associated with the mesh
+	// \param[in]	meshTransform	pose/transform of geometry object
+	// \param[in]	box				swept box
+	// \param[in]	unitDir			sweep's unit dir
+	// \param[in]	distance		sweep's length/max distance
+	// \param[out]	sweepHit		hit result
+	// \param[in]	hitFlags		query behavior flags
+	// \param[in]	inflation		optional inflation value for swept shape
+	// \return		true if a hit was found, false otherwise
+	PX_FORCE_INLINE bool sweepBoxVsMesh(const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& meshTransform,
+										const Gu::Box& box, const PxVec3& unitDir, const PxReal distance,
+										PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation)
+	{
+		const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33);
+		return gMidphaseBoxSweepTable[index](mesh, meshGeom, meshTransform, box, unitDir, distance, sweepHit, hitFlags, inflation);
+	}
+
+	// \param[in]	mesh			triangle mesh
+	// \param[in]	hullBox			hull's bounding box
+	// \param[in]	localDir		sweep's unit dir, in local/mesh space
+	// \param[in]	distance		sweep's length/max distance
+	// \param[in]	callback		callback object, called each time a hit is found
+	// \param[in]	anyHit			true for PxHitFlag::eMESH_ANY queries
+	PX_FORCE_INLINE void sweepConvexVsMesh(const TriangleMesh* mesh, const Gu::Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool anyHit)
+	{
+		const PxU32 index = PxU32(mesh->getConcreteType() - PxConcreteType::eTRIANGLE_MESH_BVH33);
+		gMidphaseConvexSweepTable[index](mesh, hullBox, localDir, distance, callback, anyHit);
+	}
+}
+}
+}
+#endif	// GU_MIDPHASE_INTERFACE_H
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseRTree.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseRTree.cpp
new file mode 100644
index 00000000..6133f0b8
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuMidphaseRTree.cpp
@@ -0,0 +1,886 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "GuSweepMesh.h"
+#include "GuIntersectionRayTriangle.h"
+#include "GuIntersectionCapsuleTriangle.h"
+#include "GuIntersectionRayBox.h"
+#include "GuIntersectionRayBoxSIMD.h"
+#include "GuSphere.h"
+#include "GuBoxConversion.h"
+#include "GuConvexUtilsInternal.h"
+#include "GuVecTriangle.h"
+#include "GuIntersectionTriangleBox.h"
+#include "GuSIMDHelpers.h"
+#include "GuTriangleVertexPointers.h"
+#include "GuTriangleMeshRTree.h"
+#include "GuInternal.h"
+
+// This file contains code specific to the RTree midphase.
+
+using namespace physx;
+using namespace Cm;
+using namespace Gu;
+using namespace physx::shdfnd::aos;
+
+struct MeshRayCollider
+{
+	template <int tInflate, int tRayTest>
+	PX_PHYSX_COMMON_API static void collide(
+		const PxVec3& orig, const PxVec3& dir, // dir is not normalized (full length), both in mesh space (unless meshWorld is non-zero)
+		PxReal maxT, // maxT is from [0,1], if maxT is 0.0f, AABB traversal will be used
+		bool bothTriangleSidesCollide, const RTreeTriangleMesh* mesh, MeshHitCallback<PxRaycastHit>& callback,
+		const PxVec3* inflate = NULL);
+
+	PX_PHYSX_COMMON_API static void collideOBB(
+		const Box& obb, bool bothTriangleSidesCollide, const RTreeTriangleMesh* mesh, MeshHitCallback<PxRaycastHit>& callback,
+		bool checkObbIsAligned = true); // perf hint, pass false if obb is rarely axis aligned
+};
+
+class SimpleRayTriOverlap
+{
+public:
+	PX_FORCE_INLINE SimpleRayTriOverlap(const PxVec3& origin, const PxVec3& dir, bool bothSides, PxReal geomEpsilon)
+		: mOrigin(origin), mDir(dir), mBothSides(bothSides), mGeomEpsilon(geomEpsilon)
+	{
+	}
+
+	PX_FORCE_INLINE Ps::IntBool overlap(const PxVec3& vert0, const PxVec3& vert1, const PxVec3& vert2, PxRaycastHit& hit) const
+	{
+		if(!intersectRayTriangle(mOrigin, mDir, vert0, vert1, vert2, hit.distance, hit.u, hit.v, !mBothSides, mGeomEpsilon))
+			return false;
+
+		if(hit.distance<-mGeomEpsilon) // test if the ray intersection t is really negative
+			return false;
+
+		return true;
+	}
+
+	PxVec3	mOrigin;
+	PxVec3	mDir;
+	bool	mBothSides;
+	PxReal	mGeomEpsilon;
+};
+
+using Gu::RTree;
+
+// This callback comes from RTree and decodes LeafTriangle indices stored in rtree into actual triangles
+// This callback is needed because RTree doesn't know that it stores triangles since it's a general purpose spatial index
+
+#if PX_VC 
+    #pragma warning(push)
+	#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+template <int tInflate, bool tRayTest>
+struct RayRTreeCallback : RTree::CallbackRaycast, RTree::Callback
+{
+	MeshHitCallback<PxRaycastHit>& outerCallback;
+	PxI32 has16BitIndices;
+	const void* mTris;
+	const PxVec3* mVerts;
+	const PxVec3* mInflate;
+	const SimpleRayTriOverlap rayCollider;
+	PxReal maxT;
+	PxRaycastHit closestHit; // recorded closest hit over the whole traversal (only for callback mode eCLOSEST)
+	PxVec3 cv0, cv1, cv2;	// PT: make sure these aren't last in the class, to safely V4Load them
+	PxU32 cis[3];
+	bool hadClosestHit;
+	const bool closestMode;
+	Vec3V inflateV, rayOriginV, rayDirV;
+
+	RayRTreeCallback(
+		PxReal geomEpsilon, MeshHitCallback<PxRaycastHit>& callback,
+		PxI32 has16BitIndices_, const void* tris, const PxVec3* verts,
+		const PxVec3& origin, const PxVec3& dir, PxReal maxT_, bool bothSides, const PxVec3* inflate)
+		:	outerCallback(callback), has16BitIndices(has16BitIndices_),
+			mTris(tris), mVerts(verts), mInflate(inflate), rayCollider(origin, dir, bothSides, geomEpsilon),
+			maxT(maxT_), closestMode(callback.inClosestMode())
+	{
+		PX_ASSERT(closestHit.distance == PX_MAX_REAL);
+		hadClosestHit = false;
+		if (tInflate)
+			inflateV = V3LoadU(*mInflate);
+		rayOriginV = V3LoadU(rayCollider.mOrigin);
+		rayDirV = V3LoadU(rayCollider.mDir);
+	}
+
+	PX_FORCE_INLINE void getVertIndices(PxU32 triIndex, PxU32& i0, PxU32 &i1, PxU32 &i2)
+	{
+		if(has16BitIndices)
+		{
+			const PxU16* p = reinterpret_cast<const PxU16*>(mTris) + triIndex*3;
+			i0 = p[0]; i1 = p[1]; i2 = p[2];
+		}
+		else
+		{
+			const PxU32* p = reinterpret_cast<const PxU32*>(mTris) + triIndex*3;
+			i0 = p[0]; i1 = p[1]; i2 = p[2];
+		}
+	}
+
+	virtual PX_FORCE_INLINE bool processResults(PxU32 NumTouched, PxU32* Touched, PxF32& newMaxT)
+	{
+		PX_ASSERT(NumTouched > 0);
+		// Loop through touched leaves
+		PxRaycastHit tempHit;
+		for(PxU32 leaf = 0; leaf<NumTouched; leaf++)
+		{
+			// Each leaf box has a set of triangles
+			LeafTriangles currentLeaf;
+			currentLeaf.Data = Touched[leaf];
+			PxU32 nbLeafTris = currentLeaf.GetNbTriangles();			
+			PxU32 baseLeafTriIndex = currentLeaf.GetTriangleIndex();
+
+			for(PxU32 i = 0; i < nbLeafTris; i++)
+			{
+				PxU32 i0, i1, i2;
+				const PxU32 triangleIndex = baseLeafTriIndex+i;
+				getVertIndices(triangleIndex, i0, i1, i2);
+
+				const PxVec3& v0 = mVerts[i0], &v1 = mVerts[i1], &v2 = mVerts[i2];
+				const PxU32 vinds[3] = { i0, i1, i2 };
+
+				if (tRayTest)
+				{
+					Ps::IntBool overlap;
+					if (tInflate)
+					{
+						// AP: mesh skew is already included here (ray is pre-transformed)
+						Vec3V v0v = V3LoadU(v0), v1v = V3LoadU(v1), v2v = V3LoadU(v2);
+						Vec3V minB = V3Min(V3Min(v0v, v1v), v2v), maxB = V3Max(V3Max(v0v, v1v), v2v);
+
+						// PT: we add an epsilon to max distance, to make sure we don't reject triangles that are just at the same
+						// distance as best triangle so far. We need to keep all of these to make sure we return the one with the
+						// best normal.
+						const float relativeEpsilon = GU_EPSILON_SAME_DISTANCE * PxMax(1.0f, maxT);
+						FloatV tNear, tFar;
+						overlap = intersectRayAABB2(
+							V3Sub(minB, inflateV), V3Add(maxB, inflateV), rayOriginV, rayDirV, FLoad(maxT+relativeEpsilon), tNear, tFar);
+						if (overlap)
+						{
+							// can't clip to tFar here because hitting the AABB doesn't guarantee that we can clip
+							// (since we can still miss the actual tri)
+							tempHit.distance = maxT;
+							tempHit.faceIndex = triangleIndex;
+							tempHit.u = tempHit.v = 0.0f;
+						}
+					} else
+						overlap = rayCollider.overlap(v0, v1, v2, tempHit) && tempHit.distance <= maxT;
+					if(!overlap)
+						continue;
+				}
+				tempHit.faceIndex = triangleIndex;
+				tempHit.flags = PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE;
+				// Intersection point is valid if dist < segment's length
+				// We know dist>0 so we can use integers
+				if (closestMode)
+				{
+					if(tempHit.distance < closestHit.distance)
+					{
+						closestHit = tempHit;
+						newMaxT = PxMin(tempHit.distance, newMaxT);
+						cv0 = v0; cv1 = v1; cv2 = v2;
+						cis[0] = vinds[0]; cis[1] = vinds[1]; cis[2] = vinds[2];
+						hadClosestHit = true;
+					}
+				} else
+				{
+					PxReal shrunkMaxT = newMaxT;
+					PxAgain again = outerCallback.processHit(tempHit, v0, v1, v2, shrunkMaxT, vinds);
+					if (!again)
+						return false;
+					if (shrunkMaxT < newMaxT)
+					{
+						newMaxT = shrunkMaxT;
+						maxT = shrunkMaxT;
+					}
+				}
+
+				if (outerCallback.inAnyMode()) // early out if in ANY mode
+					return false;
+			}
+
+		} // for(PxU32 leaf = 0; leaf<NumTouched; leaf++)
+
+		return true;
+	}
+
+	virtual bool processResults(PxU32 numTouched, PxU32* touched)
+	{
+		PxF32 dummy;
+		return RayRTreeCallback::processResults(numTouched, touched, dummy);
+	}
+
+
+	virtual ~RayRTreeCallback()
+	{
+		if (hadClosestHit)
+		{
+			PX_ASSERT(outerCallback.inClosestMode());
+			outerCallback.processHit(closestHit, cv0, cv1, cv2, maxT, cis);
+		}
+	}
+
+private:
+	RayRTreeCallback& operator=(const RayRTreeCallback&);
+};
+
+#if PX_VC 
+     #pragma warning(pop) 
+#endif
+
+void MeshRayCollider::collideOBB(
+	const Box& obb, bool bothTriangleSidesCollide, const RTreeTriangleMesh* mi, MeshHitCallback<PxRaycastHit>& callback,
+	bool checkObbIsAligned)
+{
+	const PxU32 maxResults = RTREE_N; // maxResults=rtree page size for more efficient early out
+	PxU32 buf[maxResults];
+	RayRTreeCallback<false, false> rTreeCallback(
+		mi->getGeomEpsilon(), callback, mi->has16BitIndices(), mi->getTrianglesFast(), mi->getVerticesFast(),
+		PxVec3(0), PxVec3(0), 0.0f, bothTriangleSidesCollide, NULL);
+	if (checkObbIsAligned && PxAbs(PxQuat(obb.rot).w) > 0.9999f)
+	{
+		PxVec3 aabbExtents = obb.computeAABBExtent();
+		mi->getRTree().traverseAABB(obb.center - aabbExtents, obb.center + aabbExtents, maxResults, buf, &rTreeCallback);
+	} else
+		mi->getRTree().traverseOBB(obb, maxResults, buf, &rTreeCallback);
+}
+
+template <int tInflate, int tRayTest>
+void MeshRayCollider::collide(
+	const PxVec3& orig, const PxVec3& dir, PxReal maxT, bool bothSides,
+	const RTreeTriangleMesh* mi, MeshHitCallback<PxRaycastHit>& callback,
+	const PxVec3* inflate)
+{
+	const PxU32 maxResults = RTREE_N; // maxResults=rtree page size for more efficient early out
+	PxU32 buf[maxResults];
+	if (maxT == 0.0f) // AABB traversal path
+	{
+		RayRTreeCallback<tInflate, false> rTreeCallback(
+			mi->getGeomEpsilon(), callback, mi->has16BitIndices(), mi->getTrianglesFast(), mi->getVerticesFast(),
+			orig, dir, maxT, bothSides, inflate);
+		PxVec3 inflate1 = tInflate ? *inflate : PxVec3(0); // both maxT and inflate can be zero, so need to check tInflate
+		mi->getRTree().traverseAABB(orig-inflate1, orig+inflate1, maxResults, buf, &rTreeCallback);
+	}
+	else // ray traversal path
+	{
+		RayRTreeCallback<tInflate, tRayTest> rTreeCallback(
+			mi->getGeomEpsilon(), callback, mi->has16BitIndices(), mi->getTrianglesFast(), mi->getVerticesFast(),
+			orig, dir, maxT, bothSides, inflate);
+		mi->getRTree().traverseRay<tInflate>(orig, dir, maxResults, buf, &rTreeCallback, inflate, maxT);
+	}
+}
+
+
+#define TINST(a,b) \
+template void MeshRayCollider::collide<a,b>( \
+	const PxVec3& orig, const PxVec3& dir, PxReal maxT, bool bothSides, const RTreeTriangleMesh* mesh, \
+	MeshHitCallback<PxRaycastHit>& callback, const PxVec3* inflate);
+
+TINST(0,0)
+TINST(1,0)
+TINST(0,1)
+TINST(1,1)
+
+#undef TINST
+
+#include "GuRaycastTests.h"
+#include "PxTriangleMeshGeometry.h"
+#include "GuTriangleMesh.h"
+#include "CmScaling.h"
+
+struct RayMeshColliderCallback  : public MeshHitCallback<PxRaycastHit>
+{
+	PxRaycastHit*		mDstBase;
+	PxU32				mHitNum;
+	PxU32				mMaxHits;
+	const PxMeshScale*	mScale;
+	const PxTransform*	mPose;
+	const Matrix34*		mWorld2vertexSkew;
+	PxU32				mHitFlags;
+	const PxVec3&		mRayDir;
+	bool				mIsDoubleSided;
+	float				mDistCoeff;
+
+	RayMeshColliderCallback(
+		CallbackMode::Enum mode_, PxRaycastHit* hits, PxU32 maxHits, const PxMeshScale* scale, const PxTransform* pose,
+		const Matrix34* world2vertexSkew, PxU32 hitFlags, const PxVec3& rayDir, bool isDoubleSided, float distCoeff) :
+			MeshHitCallback<PxRaycastHit>	(mode_),
+			mDstBase						(hits),
+			mHitNum							(0),
+			mMaxHits						(maxHits),
+			mScale							(scale),
+			mPose							(pose),
+			mWorld2vertexSkew				(world2vertexSkew),
+			mHitFlags						(hitFlags),
+			mRayDir							(rayDir),
+			mIsDoubleSided					(isDoubleSided),
+			mDistCoeff						(distCoeff)
+	{
+	}
+
+	// return false for early out
+	virtual bool processHit(
+		const PxRaycastHit& lHit, const PxVec3& lp0, const PxVec3& lp1, const PxVec3& lp2, PxReal&, const PxU32*)
+	{
+		const PxReal u = lHit.u, v = lHit.v;
+		const PxVec3 localImpact = (1.0f - u - v)*lp0 + u*lp1 + v*lp2;
+
+		//not worth concatenating to do 1 transform: PxMat34Legacy vertex2worldSkew = scaling.getVertex2WorldSkew(absPose);
+		// PT: TODO: revisit this for N hits
+		PxRaycastHit hit = lHit;
+		hit.position	= mPose->transform(mScale->transform(localImpact));
+		hit.flags		= PxHitFlag::ePOSITION|PxHitFlag::eDISTANCE|PxHitFlag::eUV|PxHitFlag::eFACE_INDEX;
+		hit.normal		= PxVec3(0.0f);
+		hit.distance	*= mDistCoeff;
+
+		// Compute additional information if needed
+		if(mHitFlags & PxHitFlag::eNORMAL)
+		{
+			// User requested impact normal
+			const PxVec3 localNormal = (lp1 - lp0).cross(lp2 - lp0);
+
+			if(mWorld2vertexSkew)
+			{
+				hit.normal = mWorld2vertexSkew->rotateTranspose(localNormal);
+				if (mScale->hasNegativeDeterminant())
+					Ps::swap<PxReal>(hit.u, hit.v); // have to swap the UVs though since they were computed in mesh local space
+			}
+			else
+				hit.normal = hit.normal = mPose->rotate(localNormal);
+			hit.normal.normalize();
+
+			// PT: figure out correct normal orientation (DE7458)
+			// - if the mesh is single-sided the normal should be the regular triangle normal N, regardless of eMESH_BOTH_SIDES.
+			// - if the mesh is double-sided the correct normal can be either N or -N. We take the one opposed to ray direction.
+			if(mIsDoubleSided && hit.normal.dot(mRayDir) > 0.0f)
+				hit.normal = -hit.normal;
+
+			hit.flags |= PxHitFlag::eNORMAL;
+		}
+
+		// PT: no callback => store results in provided buffer
+		if(mHitNum == mMaxHits)
+			return false;
+
+		mDstBase[mHitNum++] = hit;
+		return true;
+	}
+
+private:
+	RayMeshColliderCallback& operator=(const RayMeshColliderCallback&);
+};
+
+PxU32 physx::Gu::raycast_triangleMesh_RTREE(const TriangleMesh* mesh, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose,
+											const PxVec3& rayOrigin, const PxVec3& rayDir, PxReal maxDist,
+											PxHitFlags hitFlags, PxU32 maxHits, PxRaycastHit* PX_RESTRICT hits)
+{
+	PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33);
+
+	const RTreeTriangleMesh* meshData = static_cast<const RTreeTriangleMesh*>(mesh);
+
+	//scaling: transform the ray to vertex space
+
+	PxVec3 orig, dir;
+	Matrix34 world2vertexSkew;
+	Matrix34* world2vertexSkewP = NULL;
+	PxReal distCoeff = 1.0f;
+	if(meshGeom.scale.isIdentity())
+	{
+		orig = pose.transformInv(rayOrigin);
+		dir = pose.rotateInv(rayDir);
+	}
+	else
+	{
+		world2vertexSkew = meshGeom.scale.getInverse() * pose.getInverse();
+		world2vertexSkewP = &world2vertexSkew;
+		orig = world2vertexSkew.transform(rayOrigin);
+		dir = world2vertexSkew.rotate(rayDir);
+		{
+			distCoeff = dir.normalize();
+			maxDist *= distCoeff;
+			maxDist += 1e-3f;
+			distCoeff = 1.0f / distCoeff;
+		}
+	}
+
+	const bool isDoubleSided = meshGeom.meshFlags.isSet(PxMeshGeometryFlag::eDOUBLE_SIDED);
+	const bool bothSides = isDoubleSided || (hitFlags & PxHitFlag::eMESH_BOTH_SIDES);
+
+	RayMeshColliderCallback callback(
+		(maxHits > 1) ? CallbackMode::eMULTIPLE : (hitFlags & PxHitFlag::eMESH_ANY ? CallbackMode::eANY : CallbackMode::eCLOSEST),
+		hits, maxHits, &meshGeom.scale, &pose, world2vertexSkewP, hitFlags, rayDir, isDoubleSided, distCoeff);
+
+	MeshRayCollider::collide<0, 1>(orig, dir, maxDist, bothSides, static_cast<const RTreeTriangleMesh*>(meshData), callback, NULL);
+	return callback.mHitNum;
+}
+
+
+static PX_INLINE void computeSweptAABBAroundOBB(
+	const Box& obb, PxVec3& sweepOrigin, PxVec3& sweepExtents, PxVec3& sweepDir, PxReal& sweepLen)
+{
+	PxU32 other1, other2;
+	// largest axis of the OBB is the sweep direction, sum of abs of two other is the swept AABB extents
+	PxU32 lai = Ps::largestAxis(obb.extents, other1, other2);
+	PxVec3 longestAxis = obb.rot[lai]*obb.extents[lai];
+	PxVec3 absOther1 = obb.rot[other1].abs()*obb.extents[other1];
+	PxVec3 absOther2 = obb.rot[other2].abs()*obb.extents[other2];
+	sweepOrigin = obb.center - longestAxis;
+	sweepExtents = absOther1 + absOther2 + PxVec3(GU_MIN_AABB_EXTENT); // see comments for GU_MIN_AABB_EXTENT
+	sweepLen = 2.0f; // length is already included in longestAxis
+	sweepDir = longestAxis;
+}
+
+enum { eSPHERE, eCAPSULE, eBOX }; // values for tSCB
+
+#if PX_VC 
+    #pragma warning(push)
+	#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
+	#pragma warning( disable : 4512 ) // assignment operator could not be generated
+#endif
+
+namespace
+{
+struct IntersectShapeVsMeshCallback : MeshHitCallback<PxRaycastHit>
+{
+	PX_NOCOPY(IntersectShapeVsMeshCallback)
+public:
+	IntersectShapeVsMeshCallback(const PxMat33& vertexToShapeSkew, LimitedResults* results, bool flipNormal)
+		:	MeshHitCallback<PxRaycastHit>(CallbackMode::eMULTIPLE),
+			mVertexToShapeSkew	(vertexToShapeSkew),
+			mResults			(results),
+			mAnyHits			(false),
+			mFlipNormal			(flipNormal)
+	{
+	}
+	virtual	~IntersectShapeVsMeshCallback(){}
+
+	const PxMat33&	mVertexToShapeSkew; // vertex to box without translation for boxes
+	LimitedResults*	mResults;
+	bool			mAnyHits;
+	bool			mFlipNormal;
+
+	PX_FORCE_INLINE	bool	recordHit(const PxRaycastHit& aHit, Ps::IntBool hit)
+	{
+		if(hit)
+		{
+			mAnyHits = true;
+			if(mResults)
+				mResults->add(aHit.faceIndex);
+			else
+				return false; // abort traversal if we are only interested in firstContact (mResults is NULL)
+		}
+		return true; // if we are here, either no triangles were hit or multiple results are expected => continue traversal
+	}
+};
+
+template<bool tScaleIsIdentity>
+struct IntersectSphereVsMeshCallback : IntersectShapeVsMeshCallback
+{
+	IntersectSphereVsMeshCallback(const PxMat33& m, LimitedResults* r, bool flipNormal) : IntersectShapeVsMeshCallback(m, r, flipNormal)	{}
+	virtual	~IntersectSphereVsMeshCallback(){}
+	PxF32					mMinDist2;
+	PxVec3					mLocalCenter;	// PT: sphere center in local/mesh space
+
+	virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position
+		const PxRaycastHit& aHit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal&, const PxU32*)
+	{
+		const Vec3V v0 = V3LoadU(tScaleIsIdentity ? av0 : mVertexToShapeSkew * av0);
+		const Vec3V v1 = V3LoadU(tScaleIsIdentity ? av1 : mVertexToShapeSkew * (mFlipNormal ? av2 : av1));
+		const Vec3V v2 = V3LoadU(tScaleIsIdentity ? av2 : mVertexToShapeSkew * (mFlipNormal ? av1 : av2));
+
+		FloatV dummy1, dummy2;
+		Vec3V closestP;
+		PxReal dist2;
+		FStore(distancePointTriangleSquared(V3LoadU(mLocalCenter), v0, v1, v2, dummy1, dummy2, closestP), &dist2);
+		return recordHit(aHit, dist2 <= mMinDist2);
+	}
+};
+
+template<bool tScaleIsIdentity>
+struct IntersectCapsuleVsMeshCallback : IntersectShapeVsMeshCallback
+{
+	IntersectCapsuleVsMeshCallback(const PxMat33& m, LimitedResults* r, bool flipNormal) : IntersectShapeVsMeshCallback(m, r, flipNormal)	{}
+	virtual	~IntersectCapsuleVsMeshCallback(){}
+
+	Capsule						mLocalCapsule;		// PT: capsule in mesh/local space
+	CapsuleTriangleOverlapData	mParams;
+
+	virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position
+		const PxRaycastHit& aHit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal&, const PxU32*)
+	{
+		bool hit;
+		if(tScaleIsIdentity)
+		{
+			const PxVec3 normal = (av0 - av1).cross(av0 - av2);
+			hit = intersectCapsuleTriangle(normal, av0, av1, av2, mLocalCapsule, mParams);
+		}
+		else
+		{
+			const PxVec3 v0 = mVertexToShapeSkew * av0;
+			const PxVec3 v1 = mVertexToShapeSkew * (mFlipNormal ? av2 : av1);
+			const PxVec3 v2 = mVertexToShapeSkew * (mFlipNormal ? av1 : av2);
+			const PxVec3 normal = (v0 - v1).cross(v0 - v2);
+			hit = intersectCapsuleTriangle(normal, v0, v1, v2, mLocalCapsule, mParams);
+		}
+		return recordHit(aHit, hit);
+	}
+};
+
+template<bool tScaleIsIdentity>
+struct IntersectBoxVsMeshCallback : IntersectShapeVsMeshCallback
+{
+	IntersectBoxVsMeshCallback(const PxMat33& m, LimitedResults* r, bool flipNormal) : IntersectShapeVsMeshCallback(m, r, flipNormal)	{}
+	virtual	~IntersectBoxVsMeshCallback(){}
+
+	Matrix34	mVertexToBox;
+	Vec3p		mBoxExtents, mBoxCenter;
+
+	virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position
+		const PxRaycastHit& aHit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal&, const PxU32*)
+	{
+		Vec3p v0, v1, v2;
+		if(tScaleIsIdentity)
+		{
+			v0 = mVertexToShapeSkew * av0; // transform from skewed mesh vertex to box space,
+			v1 = mVertexToShapeSkew * av1; // this includes inverse skew, inverse mesh shape transform and inverse box basis
+			v2 = mVertexToShapeSkew * av2;
+		}
+		else
+		{
+			v0 = mVertexToBox.transform(av0);
+			v1 = mVertexToBox.transform(mFlipNormal ? av2 : av1);
+			v2 = mVertexToBox.transform(mFlipNormal ? av1 : av2);
+		}
+
+		// PT: this one is safe because we're using Vec3p for all parameters
+		const Ps::IntBool hit = intersectTriangleBox_Unsafe(mBoxCenter, mBoxExtents, v0, v1, v2);
+		return recordHit(aHit, hit);
+	}
+};
+}
+
+#if PX_VC 
+     #pragma warning(pop) 
+#endif
+
+template<int tSCB, bool idtMeshScale>
+static bool intersectAnyVsMeshT(
+	const Sphere* worldSphere, const Capsule* worldCapsule, const Box* worldOBB,
+	const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale,
+	LimitedResults* results)
+{
+	const bool flipNormal = meshScale.hasNegativeDeterminant();
+	PxMat33 shapeToVertexSkew, vertexToShapeSkew;
+	if (!idtMeshScale && tSCB != eBOX)
+	{
+		vertexToShapeSkew = meshScale.toMat33();
+		shapeToVertexSkew = vertexToShapeSkew.getInverse();
+	}
+
+	if (tSCB == eSPHERE)
+	{
+		IntersectSphereVsMeshCallback<idtMeshScale> callback(vertexToShapeSkew, results, flipNormal);
+		// transform sphere center from world to mesh shape space 
+		const PxVec3 center = meshTransform.transformInv(worldSphere->center);
+
+		// callback will transform verts
+		callback.mLocalCenter = center;
+		callback.mMinDist2 = worldSphere->radius*worldSphere->radius;
+
+		PxVec3 sweepOrigin, sweepDir, sweepExtents;
+		PxReal sweepLen;
+		if (!idtMeshScale)
+		{
+			// AP: compute a swept AABB around an OBB around a skewed sphere
+			// TODO: we could do better than an AABB around OBB actually because we can slice off the corners..
+			const Box worldOBB_(worldSphere->center, PxVec3(worldSphere->radius), PxMat33(PxIdentity));
+			Box vertexOBB;
+			computeVertexSpaceOBB(vertexOBB, worldOBB_, meshTransform, meshScale);
+			computeSweptAABBAroundOBB(vertexOBB, sweepOrigin, sweepExtents, sweepDir, sweepLen);
+		} else
+		{
+			sweepOrigin = center;
+			sweepDir = PxVec3(1.0f,0,0);
+			sweepLen = 0.0f;
+			sweepExtents = PxVec3(PxMax(worldSphere->radius, GU_MIN_AABB_EXTENT));
+		}
+
+		MeshRayCollider::collide<1, 1>(sweepOrigin, sweepDir, sweepLen, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback, &sweepExtents);
+
+		return callback.mAnyHits;
+	}
+	else if (tSCB == eCAPSULE)
+	{
+		IntersectCapsuleVsMeshCallback<idtMeshScale> callback(vertexToShapeSkew, results, flipNormal);
+		const PxF32 radius = worldCapsule->radius;
+
+		// transform world capsule to mesh shape space
+		callback.mLocalCapsule.p0		= meshTransform.transformInv(worldCapsule->p0);
+		callback.mLocalCapsule.p1		= meshTransform.transformInv(worldCapsule->p1);
+		callback.mLocalCapsule.radius	= radius;
+		callback.mParams.init(callback.mLocalCapsule);
+
+		if (idtMeshScale)
+		{
+			// traverse a sweptAABB around the capsule
+			const PxVec3 radius3(radius);
+			MeshRayCollider::collide<1, 0>(callback.mLocalCapsule.p0, callback.mLocalCapsule.p1-callback.mLocalCapsule.p0, 1.0f, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback, &radius3);
+		}
+		else
+		{
+			// make vertex space OBB
+			Box vertexOBB;
+			Box worldOBB_;
+			worldOBB_.create(*worldCapsule); // AP: potential optimization (meshTransform.inverse is already in callback.mCapsule)
+			computeVertexSpaceOBB(vertexOBB, worldOBB_, meshTransform, meshScale);
+
+			MeshRayCollider::collideOBB(vertexOBB, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback);
+		}
+		return callback.mAnyHits;
+	}
+	else if (tSCB == eBOX)
+	{
+		Box vertexOBB; // query box in vertex space
+		if (idtMeshScale)
+		{
+			// mesh scale is identity - just inverse transform the box without optimization
+			vertexOBB = transformBoxOrthonormal(*worldOBB, meshTransform.getInverse());
+			// mesh vertices will be transformed from skewed vertex space directly to box AABB space
+			// box inverse rotation is baked into the vertexToShapeSkew transform
+			// if meshScale is not identity, vertexOBB already effectively includes meshScale transform
+			PxVec3 boxCenter;
+			getInverse(vertexToShapeSkew, boxCenter, vertexOBB.rot, vertexOBB.center);
+			IntersectBoxVsMeshCallback<idtMeshScale> callback(vertexToShapeSkew, results, flipNormal);
+
+			callback.mBoxCenter = -boxCenter;
+			callback.mBoxExtents = worldOBB->extents; // extents do not change
+
+			MeshRayCollider::collideOBB(vertexOBB, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback);
+
+			return callback.mAnyHits;
+		} else
+		{
+			computeVertexSpaceOBB(vertexOBB, *worldOBB, meshTransform, meshScale);
+
+			// mesh scale needs to be included - inverse transform and optimize the box
+			const PxMat33 vertexToWorldSkew_Rot = PxMat33Padded(meshTransform.q) * meshScale.toMat33();
+			const PxVec3& vertexToWorldSkew_Trans = meshTransform.p;
+
+			Matrix34 tmp;
+			buildMatrixFromBox(tmp, *worldOBB);
+			const Matrix34 inv = tmp.getInverseRT();
+			const Matrix34 _vertexToWorldSkew(vertexToWorldSkew_Rot, vertexToWorldSkew_Trans);
+
+			IntersectBoxVsMeshCallback<idtMeshScale> callback(vertexToShapeSkew, results, flipNormal);
+			callback.mVertexToBox = inv * _vertexToWorldSkew;
+			callback.mBoxCenter = PxVec3(0.0f);
+			callback.mBoxExtents = worldOBB->extents; // extents do not change
+
+			MeshRayCollider::collideOBB(vertexOBB, true, static_cast<const RTreeTriangleMesh*>(&triMesh), callback);
+
+			return callback.mAnyHits;
+		}
+	}
+	else
+	{
+		PX_ASSERT(0);
+		return false;
+	}
+}
+
+template<int tSCB>
+static bool intersectAnyVsMesh(
+	const Sphere* worldSphere, const Capsule* worldCapsule, const Box* worldOBB,
+	const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale,
+	LimitedResults* results)
+{
+	PX_ASSERT(triMesh.getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33);
+	if (meshScale.isIdentity())
+		return intersectAnyVsMeshT<tSCB, true>(worldSphere, worldCapsule, worldOBB, triMesh, meshTransform, meshScale, results);
+	else
+		return intersectAnyVsMeshT<tSCB, false>(worldSphere, worldCapsule, worldOBB, triMesh, meshTransform, meshScale, results);
+}
+
+bool physx::Gu::intersectSphereVsMesh_RTREE(const Sphere& sphere, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+{
+	return intersectAnyVsMesh<eSPHERE>(&sphere, NULL, NULL, triMesh, meshTransform, meshScale, results);
+}
+
+bool physx::Gu::intersectBoxVsMesh_RTREE(const Box& box, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+{
+	return intersectAnyVsMesh<eBOX>(NULL, NULL, &box, triMesh, meshTransform, meshScale, results);
+}
+
+bool physx::Gu::intersectCapsuleVsMesh_RTREE(const Capsule& capsule, const TriangleMesh& triMesh, const PxTransform& meshTransform, const PxMeshScale& meshScale, LimitedResults* results)
+{
+	return intersectAnyVsMesh<eCAPSULE>(NULL, &capsule, NULL, triMesh, meshTransform, meshScale, results);
+}
+
+void physx::Gu::intersectOBB_RTREE(const TriangleMesh* mesh, const Box& obb, MeshHitCallback<PxRaycastHit>& callback, bool bothTriangleSidesCollide, bool checkObbIsAligned)
+{
+	MeshRayCollider::collideOBB(obb, bothTriangleSidesCollide, static_cast<const RTreeTriangleMesh*>(mesh), callback, checkObbIsAligned);
+}
+
+// PT: TODO: refactor/share bits of this
+bool physx::Gu::sweepCapsule_MeshGeom_RTREE(const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+											const Capsule& lss, const PxVec3& unitDir, const PxReal distance,
+											PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation)
+{
+	PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33);
+	const RTreeTriangleMesh* meshData = static_cast<const RTreeTriangleMesh*>(mesh);
+
+	const Capsule inflatedCapsule(lss.p0, lss.p1, lss.radius + inflation);
+
+	const bool isIdentity = triMeshGeom.scale.isIdentity();
+	bool isDoubleSided = (triMeshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED);
+	const PxU32 meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES;	
+
+	// compute sweptAABB
+	const PxVec3 localP0 = pose.transformInv(inflatedCapsule.p0);
+	const PxVec3 localP1 = pose.transformInv(inflatedCapsule.p1);
+	PxVec3 sweepOrigin = (localP0+localP1)*0.5f;
+	PxVec3 sweepDir = pose.rotateInv(unitDir);
+	PxVec3 sweepExtents = PxVec3(inflatedCapsule.radius) + (localP0-localP1).abs()*0.5f;
+	PxReal distance1 = distance;
+	PxReal distCoeff = 1.0f;
+	Matrix34 poseWithScale;
+	if(!isIdentity)
+	{
+		poseWithScale = pose * triMeshGeom.scale;
+		distance1 = computeSweepData(triMeshGeom, sweepOrigin, sweepExtents, sweepDir, distance);
+		distCoeff = distance1 / distance;
+	} else
+		poseWithScale = Matrix34(pose);
+
+	SweepCapsuleMeshHitCallback callback(sweepHit, poseWithScale, distance, isDoubleSided, inflatedCapsule, unitDir, hitFlags, triMeshGeom.scale.hasNegativeDeterminant(), distCoeff);
+
+	MeshRayCollider::collide<1, 1>(sweepOrigin, sweepDir, distance1, true, meshData, callback, &sweepExtents);
+
+	if(meshBothSides)
+		isDoubleSided = true;
+
+	return callback.finalizeHit(sweepHit, inflatedCapsule, triMeshGeom, pose, isDoubleSided);
+}
+
+#include "GuSweepSharedTests.h"
+
+// PT: TODO: refactor/share bits of this
+bool physx::Gu::sweepBox_MeshGeom_RTREE(const TriangleMesh* mesh, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+										const Box& box, const PxVec3& unitDir, const PxReal distance,
+										PxSweepHit& sweepHit, PxHitFlags hitFlags, const PxReal inflation)
+{
+	PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33);
+	const RTreeTriangleMesh* meshData = static_cast<const RTreeTriangleMesh*>(mesh);
+
+	const bool isIdentity = triMeshGeom.scale.isIdentity();
+
+	const bool meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES;
+	const bool isDoubleSided = triMeshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED;
+
+	Matrix34 meshToWorldSkew;
+	PxVec3 sweptAABBMeshSpaceExtents, meshSpaceOrigin, meshSpaceDir;
+
+	// Input sweep params: geom, pose, box, unitDir, distance
+	// We convert the origin from world space to mesh local space
+	// and convert the box+pose to mesh space AABB
+	if(isIdentity)
+	{
+		meshToWorldSkew = Matrix34(pose);
+		PxMat33 worldToMeshRot(pose.q.getConjugate()); // extract rotation matrix from pose.q
+		meshSpaceOrigin = worldToMeshRot.transform(box.center - pose.p);
+		meshSpaceDir = worldToMeshRot.transform(unitDir) * distance;
+		PxMat33 boxToMeshRot = worldToMeshRot * box.rot;
+		sweptAABBMeshSpaceExtents = boxToMeshRot.column0.abs() * box.extents.x + 
+						   boxToMeshRot.column1.abs() * box.extents.y + 
+						   boxToMeshRot.column2.abs() * box.extents.z;
+	}
+	else
+	{
+		meshToWorldSkew = pose * triMeshGeom.scale;
+		const PxMat33 meshToWorldSkew_Rot = PxMat33Padded(pose.q) * triMeshGeom.scale.toMat33();
+		const PxVec3& meshToWorldSkew_Trans = pose.p;
+
+		PxMat33 worldToVertexSkew_Rot;
+		PxVec3 worldToVertexSkew_Trans;
+		getInverse(worldToVertexSkew_Rot, worldToVertexSkew_Trans, meshToWorldSkew_Rot, meshToWorldSkew_Trans);
+
+		//make vertex space OBB
+		Box vertexSpaceBox1;
+		const Matrix34 worldToVertexSkew(worldToVertexSkew_Rot, worldToVertexSkew_Trans);
+		vertexSpaceBox1 = transform(worldToVertexSkew, box);
+		// compute swept aabb
+		sweptAABBMeshSpaceExtents = vertexSpaceBox1.computeAABBExtent();
+
+		meshSpaceOrigin = worldToVertexSkew.transform(box.center);
+		meshSpaceDir = worldToVertexSkew.rotate(unitDir*distance); // also applies scale to direction/length
+	}
+
+	sweptAABBMeshSpaceExtents += PxVec3(inflation); // inflate the bounds with additive inflation
+	sweptAABBMeshSpaceExtents *= 1.01f; // fatten the bounds to account for numerical discrepancies
+
+	PxReal dirLen = PxMax(meshSpaceDir.magnitude(), 1e-5f);
+	PxReal distCoeff = 1.0f;
+	if (!isIdentity)
+		distCoeff = dirLen / distance;
+
+	// Move to AABB space
+	Matrix34 worldToBox;
+	computeWorldToBoxMatrix(worldToBox, box);
+
+	const bool bothTriangleSidesCollide = isDoubleSided || meshBothSides;
+
+	const Matrix34Padded meshToBox = worldToBox*meshToWorldSkew;
+	const PxTransform boxTransform = box.getTransform();
+
+	const PxVec3 localDir = worldToBox.rotate(unitDir);
+	const PxVec3 localDirDist = localDir*distance;
+	SweepBoxMeshHitCallback callback( // using eMULTIPLE with shrinkMaxT
+		CallbackMode::eMULTIPLE, meshToBox, distance, bothTriangleSidesCollide, box, localDirDist, localDir, unitDir, hitFlags, inflation, triMeshGeom.scale.hasNegativeDeterminant(), distCoeff);
+
+	MeshRayCollider::collide<1, 1>(meshSpaceOrigin, meshSpaceDir/dirLen, dirLen, bothTriangleSidesCollide, meshData, callback, &sweptAABBMeshSpaceExtents);
+
+	return callback.finalizeHit(sweepHit, triMeshGeom, pose, boxTransform, localDir, meshBothSides, isDoubleSided);
+}
+
+#include "GuInternal.h"
+void physx::Gu::sweepConvex_MeshGeom_RTREE(const TriangleMesh* mesh, const Box& hullBox, const PxVec3& localDir, const PxReal distance, SweepConvexMeshHitCallback& callback, bool)
+{
+	PX_ASSERT(mesh->getConcreteType()==PxConcreteType::eTRIANGLE_MESH_BVH33);
+	const RTreeTriangleMesh* meshData = static_cast<const RTreeTriangleMesh*>(mesh);
+
+	// create temporal bounds
+	Box querySweptBox;
+	computeSweptBox(querySweptBox, hullBox.extents, hullBox.center, hullBox.rot, localDir, distance);	
+
+	MeshRayCollider::collideOBB(querySweptBox, true, meshData, callback);
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuOverlapTestsMesh.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuOverlapTestsMesh.cpp
new file mode 100644
index 00000000..a44343e8
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuOverlapTestsMesh.cpp
@@ -0,0 +1,241 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuMidphaseInterface.h"
+#include "CmScaling.h"
+#include "GuSphere.h"
+#include "GuInternal.h"
+#include "GuConvexUtilsInternal.h"
+#include "GuVecTriangle.h"
+#include "GuVecConvexHull.h"
+#include "GuConvexMesh.h"
+#include "GuGJK.h"
+#include "GuSweepSharedTests.h"
+
+using namespace physx;
+using namespace Cm;
+using namespace Gu;
+using namespace physx::shdfnd::aos;
+
+// PT: TODO: remove this function, replace with Midphase:: call at calling sites (TA34704)
+bool Gu::checkOverlapAABB_triangleGeom(const PxGeometry& geom, const PxTransform& pose, const PxBounds3& box)
+{
+	PX_ASSERT(geom.getType() == PxGeometryType::eTRIANGLEMESH);
+	const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom);
+
+	// PT: TODO: pass AABB directly to interface
+	const Box obb(box.getCenter(), box.getExtents(), PxMat33(PxIdentity));
+
+	TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+	return Midphase::intersectBoxVsMesh(obb, *meshData, pose, meshGeom.scale, NULL);
+}
+
+bool GeomOverlapCallback_SphereMesh(GU_OVERLAP_FUNC_PARAMS)
+{
+	PX_ASSERT(geom0.getType()==PxGeometryType::eSPHERE);
+	PX_ASSERT(geom1.getType()==PxGeometryType::eTRIANGLEMESH);
+	PX_UNUSED(cache);
+
+	const PxSphereGeometry& sphereGeom = static_cast<const PxSphereGeometry&>(geom0);
+	const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom1);	
+
+	const Sphere worldSphere(pose0.p, sphereGeom.radius);
+
+	TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+	return Midphase::intersectSphereVsMesh(worldSphere, *meshData, pose1, meshGeom.scale, NULL);
+}
+
+bool GeomOverlapCallback_CapsuleMesh(GU_OVERLAP_FUNC_PARAMS)
+{
+	PX_ASSERT(geom0.getType()==PxGeometryType::eCAPSULE);
+	PX_ASSERT(geom1.getType()==PxGeometryType::eTRIANGLEMESH);
+	PX_UNUSED(cache);
+
+	const PxCapsuleGeometry& capsuleGeom = static_cast<const PxCapsuleGeometry&>(geom0);
+	const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom1);
+
+	TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+
+	Capsule capsule;
+	getCapsule(capsule, capsuleGeom, pose0);
+	return Midphase::intersectCapsuleVsMesh(capsule, *meshData, pose1, meshGeom.scale, NULL);
+}
+
+bool GeomOverlapCallback_BoxMesh(GU_OVERLAP_FUNC_PARAMS)
+{
+	PX_ASSERT(geom0.getType()==PxGeometryType::eBOX);
+	PX_ASSERT(geom1.getType()==PxGeometryType::eTRIANGLEMESH);
+	PX_UNUSED(cache);
+
+	const PxBoxGeometry& boxGeom = static_cast<const PxBoxGeometry&>(geom0);
+	const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom1);
+
+	TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+
+	Box box;
+	buildFrom(box, pose0.p, boxGeom.halfExtents, pose0.q);
+	return Midphase::intersectBoxVsMesh(box, *meshData, pose1, meshGeom.scale, NULL);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+struct ConvexVsMeshOverlapCallback : MeshHitCallback<PxRaycastHit>
+{
+	PsMatTransformV MeshToBoxV;
+	Vec3V boxExtents;
+
+	ConvexVsMeshOverlapCallback(
+		const ConvexMesh& cm, const PxMeshScale& convexScale, const FastVertex2ShapeScaling& meshScale,
+		const PxTransform& tr0, const PxTransform& tr1, bool identityScale, const Box& meshSpaceOBB)
+		:
+			MeshHitCallback<PxRaycastHit>(CallbackMode::eMULTIPLE),
+			mAnyHit			(false),
+			mIdentityScale	(identityScale)
+	{
+		if (!mIdentityScale) // not done in initializer list for performance
+			mMeshScale = Ps::aos::Mat33V(
+				V3LoadU(meshScale.getVertex2ShapeSkew().column0),
+				V3LoadU(meshScale.getVertex2ShapeSkew().column1),
+				V3LoadU(meshScale.getVertex2ShapeSkew().column2) );
+		using namespace Ps::aos;
+
+		const ConvexHullData* hullData = &cm.getHull();
+
+		const Vec3V vScale0 = V3LoadU_SafeReadW(convexScale.scale);	// PT: safe because 'rotation' follows 'scale' in PxMeshScale
+		const QuatV vQuat0 = QuatVLoadU(&convexScale.rotation.x);
+
+		mConvex =  ConvexHullV(hullData, V3Zero(), vScale0, vQuat0, convexScale.isIdentity());
+		aToB = PsMatTransformV(tr0.transformInv(tr1));
+		
+		mIdentityScale = identityScale;
+
+		{
+			// Move to AABB space
+			Matrix34 MeshToBox;
+			computeWorldToBoxMatrix(MeshToBox, meshSpaceOBB);
+
+			const Vec3V base0 = V3LoadU(MeshToBox.m.column0);
+			const Vec3V base1 = V3LoadU(MeshToBox.m.column1);
+			const Vec3V base2 = V3LoadU(MeshToBox.m.column2);
+			const Mat33V matV(base0, base1, base2);
+			const Vec3V p  = V3LoadU(MeshToBox.p);
+			MeshToBoxV = PsMatTransformV(p, matV);
+			boxExtents = V3LoadU(meshSpaceOBB.extents+PxVec3(0.001f));
+		}
+	}
+	virtual ~ConvexVsMeshOverlapCallback()	{}
+
+	virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position
+		const PxRaycastHit&, const PxVec3& v0a, const PxVec3& v1a, const PxVec3& v2a, PxReal&, const PxU32*)
+	{
+		using namespace Ps::aos;
+		Vec3V v0 = V3LoadU(v0a), v1 = V3LoadU(v1a), v2 = V3LoadU(v2a);
+
+		// test triangle AABB in box space vs box AABB in box local space
+		const Vec3V triV0 = MeshToBoxV.transform(v0); // AP: MeshToBoxV already includes mesh scale so we have to use unscaled verts here
+		const Vec3V triV1 = MeshToBoxV.transform(v1);
+		const Vec3V triV2 = MeshToBoxV.transform(v2);
+		Vec3V triMn = V3Min(V3Min(triV0, triV1), triV2);
+		Vec3V triMx = V3Max(V3Max(triV0, triV1), triV2);
+		Vec3V negExtents = V3Neg(boxExtents);
+		BoolV minSeparated = V3IsGrtr(triMn, boxExtents), maxSeparated = V3IsGrtr(negExtents, triMx);
+		BoolV bSeparated = BAnyTrue3(BOr(minSeparated, maxSeparated));
+		if (BAllEqTTTT(bSeparated))
+			return true; // continue traversal
+
+		if (!mIdentityScale)
+		{
+			v0 = M33MulV3(mMeshScale, v0);
+			v1 = M33MulV3(mMeshScale, v1);
+			v2 = M33MulV3(mMeshScale, v2);
+		}
+
+		TriangleV triangle(v0, v1, v2);
+		Vec3V contactA, contactB, normal;
+		FloatV dist;
+		GjkStatus status;
+		RelativeConvex<TriangleV> convexA(triangle, aToB);
+		LocalConvex<ConvexHullV> convexB(mConvex);
+		status = gjk(convexA, convexB, aToB.p, FZero(), contactA, contactB, normal, dist);
+		if (status == GJK_CONTACT)// || FAllGrtrOrEq(mSqTolerance, sqDist))
+		{
+			mAnyHit = true;
+			return false; // abort traversal
+		}
+		return true; // continue traversal
+	}
+	
+	ConvexHullV			mConvex;
+	PsMatTransformV		aToB;
+	Ps::aos::Mat33V		mMeshScale;
+	bool				mAnyHit;
+	bool				mIdentityScale;
+
+private:
+	ConvexVsMeshOverlapCallback& operator=(const ConvexVsMeshOverlapCallback&);
+};
+
+// PT: TODO: refactor bits of this with convex-vs-mesh code
+bool GeomOverlapCallback_ConvexMesh(GU_OVERLAP_FUNC_PARAMS)
+{
+	PX_ASSERT(geom0.getType()==PxGeometryType::eCONVEXMESH);
+	PX_ASSERT(geom1.getType()==PxGeometryType::eTRIANGLEMESH);
+	PX_UNUSED(cache);
+
+	const PxConvexMeshGeometry& convexGeom = static_cast<const PxConvexMeshGeometry&>(geom0);
+	const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom1);
+
+	ConvexMesh* cm = static_cast<ConvexMesh*>(convexGeom.convexMesh);
+	TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+
+	const bool idtScaleConvex = convexGeom.scale.isIdentity();
+	const bool idtScaleMesh = meshGeom.scale.isIdentity();
+
+	FastVertex2ShapeScaling convexScaling;
+	if (!idtScaleConvex)
+		convexScaling.init(convexGeom.scale);
+
+	FastVertex2ShapeScaling meshScaling;
+	if (!idtScaleMesh)
+		meshScaling.init(meshGeom.scale);
+
+	const Matrix34 world0(pose0);
+	const Matrix34 world1(pose1);
+
+	PX_ASSERT(!cm->getLocalBoundsFast().isEmpty());
+	const PxBounds3 hullAABB = cm->getLocalBoundsFast().transformFast(convexScaling.getVertex2ShapeSkew());
+
+	Box hullOBB;
+	computeHullOBB(hullOBB, hullAABB, 0.0f, world0, world1, meshScaling, idtScaleMesh);
+
+	ConvexVsMeshOverlapCallback cb(*cm, convexGeom.scale, meshScaling, pose0, pose1, idtScaleMesh, hullOBB);
+	Midphase::intersectOBB(meshData, hullOBB, cb, true, false);
+
+	return cb.mAnyHit;
+}
+
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.cpp
new file mode 100644
index 00000000..7556f4e0
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.cpp
@@ -0,0 +1,466 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "foundation/PxPreprocessor.h"
+
+#define RTREE_TEXT_DUMP_ENABLE		0
+#if PX_P64_FAMILY
+#define RTREE_PAGES_PER_POOL_SLAB	16384 // preallocate all pages in first batch to make sure we stay within 32 bits for relative pointers.. this is 2 megs
+#else
+#define RTREE_PAGES_PER_POOL_SLAB	128
+#endif
+
+#define INSERT_SCAN_LOOKAHEAD		1 // enable one level lookahead scan for determining which child page is best to insert a node into
+
+#define RTREE_INFLATION_EPSILON 5e-4f
+
+#include "GuRTree.h"
+#include "PsSort.h"
+#include "GuSerialize.h"
+#include "CmUtils.h"
+#include "PsUtilities.h"
+
+using namespace physx;
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+using namespace shdfnd::aos;
+#endif
+using Ps::Array;
+using Ps::sort;
+using namespace Gu;
+
+namespace physx
+{
+namespace Gu {
+
+/////////////////////////////////////////////////////////////////////////
+PxU32 RTree::mVersion = 1;
+
+bool RTree::save(PxOutputStream& stream) const
+{
+	// save the RTree root structure followed immediately by RTreePage pages to an output stream
+	bool mismatch = (Ps::littleEndian() == 1);
+	writeChunk('R', 'T', 'R', 'E', stream);
+	writeDword(mVersion, mismatch, stream);
+	writeFloatBuffer(&mBoundsMin.x, 4, mismatch, stream);
+	writeFloatBuffer(&mBoundsMax.x, 4, mismatch, stream);
+	writeFloatBuffer(&mInvDiagonal.x, 4, mismatch, stream);
+	writeFloatBuffer(&mDiagonalScaler.x, 4, mismatch, stream);
+	writeDword(mPageSize, mismatch, stream);
+	writeDword(mNumRootPages, mismatch, stream);
+	writeDword(mNumLevels, mismatch, stream);
+	writeDword(mTotalNodes, mismatch, stream);
+	writeDword(mTotalPages, mismatch, stream);
+	PxU32 unused = 0; // backwards compatibility
+	writeDword(unused, mismatch, stream);
+	for (PxU32 j = 0; j < mTotalPages; j++)
+	{
+		writeFloatBuffer(mPages[j].minx, RTREE_N, mismatch, stream);
+		writeFloatBuffer(mPages[j].miny, RTREE_N, mismatch, stream);
+		writeFloatBuffer(mPages[j].minz, RTREE_N, mismatch, stream);
+		writeFloatBuffer(mPages[j].maxx, RTREE_N, mismatch, stream);
+		writeFloatBuffer(mPages[j].maxy, RTREE_N, mismatch, stream);
+		writeFloatBuffer(mPages[j].maxz, RTREE_N, mismatch, stream);
+		WriteDwordBuffer(mPages[j].ptrs, RTREE_N, mismatch, stream);
+	}
+
+	return true;
+}
+
+/////////////////////////////////////////////////////////////////////////
+bool RTree::load(PxInputStream& stream, PxU32 meshVersion)
+{
+	PX_UNUSED(meshVersion);
+
+	release();
+
+	PxI8 a, b, c, d;
+	readChunk(a, b, c, d, stream);
+	if(a!='R' || b!='T' || c!='R' || d!='E')
+		return false;
+
+	bool mismatch = (Ps::littleEndian() == 1);
+	if(readDword(mismatch, stream) != mVersion)
+		return false;
+
+	readFloatBuffer(&mBoundsMin.x, 4, mismatch, stream);
+	readFloatBuffer(&mBoundsMax.x, 4, mismatch, stream);
+	readFloatBuffer(&mInvDiagonal.x, 4, mismatch, stream);
+	readFloatBuffer(&mDiagonalScaler.x, 4, mismatch, stream);
+	mPageSize = readDword(mismatch, stream);
+	mNumRootPages = readDword(mismatch, stream);
+	mNumLevels = readDword(mismatch, stream);
+	mTotalNodes = readDword(mismatch, stream);
+	mTotalPages = readDword(mismatch, stream);
+	PxU32 unused = readDword(mismatch, stream); PX_UNUSED(unused); // backwards compatibility
+	mPages = static_cast<RTreePage*>(
+		Ps::AlignedAllocator<128>().allocate(sizeof(RTreePage)*mTotalPages, __FILE__, __LINE__));
+	Cm::markSerializedMem(mPages, sizeof(RTreePage)*mTotalPages);
+	for (PxU32 j = 0; j < mTotalPages; j++)
+	{
+		readFloatBuffer(mPages[j].minx, RTREE_N, mismatch, stream);
+		readFloatBuffer(mPages[j].miny, RTREE_N, mismatch, stream);
+		readFloatBuffer(mPages[j].minz, RTREE_N, mismatch, stream);
+		readFloatBuffer(mPages[j].maxx, RTREE_N, mismatch, stream);
+		readFloatBuffer(mPages[j].maxy, RTREE_N, mismatch, stream);
+		readFloatBuffer(mPages[j].maxz, RTREE_N, mismatch, stream);
+		ReadDwordBuffer(mPages[j].ptrs, RTREE_N, mismatch, stream);
+	}
+
+	return true;
+}
+
+/////////////////////////////////////////////////////////////////////////
+PxU32 RTree::computeBottomLevelCount(PxU32 multiplier) const
+{
+	PxU32 topCount = 0, curCount = mNumRootPages;
+	const RTreePage* rightMostPage = &mPages[mNumRootPages-1];
+	PX_ASSERT(rightMostPage);
+	for (PxU32 level = 0; level < mNumLevels-1; level++)
+	{
+		topCount += curCount;
+		PxU32 nc = rightMostPage->nodeCount();
+		PX_ASSERT(nc > 0 && nc <= RTREE_N);
+		// old version pointer, up to PX_MESH_VERSION 8
+		PxU32 ptr = (rightMostPage->ptrs[nc-1]) * multiplier;
+		PX_ASSERT(ptr % sizeof(RTreePage) == 0);
+		const RTreePage* rightMostPageNext = mPages + (ptr / sizeof(RTreePage));
+		curCount = PxU32(rightMostPageNext - rightMostPage);
+		rightMostPage = rightMostPageNext;
+	}
+
+	return mTotalPages - topCount;
+}
+
+/////////////////////////////////////////////////////////////////////////
+RTree::RTree(const PxEMPTY)
+{
+	mFlags |= USER_ALLOCATED;
+}
+
+
+// PX_SERIALIZATION
+/////////////////////////////////////////////////////////////////////////
+void RTree::exportExtraData(PxSerializationContext& stream)
+{
+	stream.alignData(128);
+	stream.writeData(mPages, mTotalPages*sizeof(RTreePage));
+}
+
+/////////////////////////////////////////////////////////////////////////
+void RTree::importExtraData(PxDeserializationContext& context)
+{
+	context.alignExtraData(128);
+	mPages = context.readExtraData<RTreePage>(mTotalPages);
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE PxU32 RTreePage::nodeCount() const
+{
+	for (int j = 0; j < RTREE_N; j ++)
+		if (minx[j] == MX)
+			return PxU32(j);
+
+	return RTREE_N;
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreePage::clearNode(PxU32 nodeIndex)
+{
+	PX_ASSERT(nodeIndex < RTREE_N);
+	minx[nodeIndex] = miny[nodeIndex] = minz[nodeIndex] = MX; // initialize empty node with sentinels
+	maxx[nodeIndex] = maxy[nodeIndex] = maxz[nodeIndex] = MN;
+	ptrs[nodeIndex] = 0;
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreePage::getNode(const PxU32 nodeIndex, RTreeNodeQ& r) const
+{
+	PX_ASSERT(nodeIndex < RTREE_N);
+	r.minx = minx[nodeIndex];
+	r.miny = miny[nodeIndex];
+	r.minz = minz[nodeIndex];
+	r.maxx = maxx[nodeIndex];
+	r.maxy = maxy[nodeIndex];
+	r.maxz = maxz[nodeIndex];
+	r.ptr  = ptrs[nodeIndex];
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreePage::setEmpty(PxU32 startIndex)
+{
+	PX_ASSERT(startIndex < RTREE_N);
+	for (PxU32 j = startIndex; j < RTREE_N; j ++)
+		clearNode(j);
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreePage::computeBounds(RTreeNodeQ& newBounds)
+{
+	RTreeValue _minx = MX, _miny = MX, _minz = MX, _maxx = MN, _maxy = MN, _maxz = MN;
+	for (PxU32 j = 0; j < RTREE_N; j++)
+	{
+		if (isEmpty(j))
+			continue;
+		_minx = PxMin(_minx, minx[j]);
+		_miny = PxMin(_miny, miny[j]);
+		_minz = PxMin(_minz, minz[j]);
+		_maxx = PxMax(_maxx, maxx[j]);
+		_maxy = PxMax(_maxy, maxy[j]);
+		_maxz = PxMax(_maxz, maxz[j]);
+	}
+	newBounds.minx = _minx;
+	newBounds.miny = _miny;
+	newBounds.minz = _minz;
+	newBounds.maxx = _maxx;
+	newBounds.maxy = _maxy;
+	newBounds.maxz = _maxz;
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreePage::adjustChildBounds(PxU32 index, const RTreeNodeQ& adjChild)
+{
+	PX_ASSERT(index < RTREE_N);
+	minx[index] = adjChild.minx;
+	miny[index] = adjChild.miny;
+	minz[index] = adjChild.minz;
+	maxx[index] = adjChild.maxx;
+	maxy[index] = adjChild.maxy;
+	maxz[index] = adjChild.maxz;
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreePage::growChildBounds(PxU32 index, const RTreeNodeQ& child)
+{
+	PX_ASSERT(index < RTREE_N);
+	minx[index] = PxMin(minx[index], child.minx);
+	miny[index] = PxMin(miny[index], child.miny);
+	minz[index] = PxMin(minz[index], child.minz);
+	maxx[index] = PxMax(maxx[index], child.maxx);
+	maxy[index] = PxMax(maxy[index], child.maxy);
+	maxz[index] = PxMax(maxz[index], child.maxz);
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreePage::copyNode(PxU32 targetIndex, const RTreePage& sourcePage, PxU32 sourceIndex)
+{
+	PX_ASSERT(targetIndex < RTREE_N);
+	PX_ASSERT(sourceIndex < RTREE_N);
+	minx[targetIndex] = sourcePage.minx[sourceIndex];
+	miny[targetIndex] = sourcePage.miny[sourceIndex];
+	minz[targetIndex] = sourcePage.minz[sourceIndex];
+	maxx[targetIndex] = sourcePage.maxx[sourceIndex];
+	maxy[targetIndex] = sourcePage.maxy[sourceIndex];
+	maxz[targetIndex] = sourcePage.maxz[sourceIndex];
+	ptrs[targetIndex] = sourcePage.ptrs[sourceIndex];
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreePage::setNode(PxU32 targetIndex, const RTreeNodeQ& sourceNode)
+{
+	PX_ASSERT(targetIndex < RTREE_N);
+	minx[targetIndex] = sourceNode.minx;
+	miny[targetIndex] = sourceNode.miny;
+	minz[targetIndex] = sourceNode.minz;
+	maxx[targetIndex] = sourceNode.maxx;
+	maxy[targetIndex] = sourceNode.maxy;
+	maxz[targetIndex] = sourceNode.maxz;
+	ptrs[targetIndex] = sourceNode.ptr;
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreeNodeQ::grow(const RTreePage& page, int nodeIndex)
+{
+	PX_ASSERT(nodeIndex < RTREE_N);
+	minx = PxMin(minx, page.minx[nodeIndex]);
+	miny = PxMin(miny, page.miny[nodeIndex]);
+	minz = PxMin(minz, page.minz[nodeIndex]);
+	maxx = PxMax(maxx, page.maxx[nodeIndex]);
+	maxy = PxMax(maxy, page.maxy[nodeIndex]);
+	maxz = PxMax(maxz, page.maxz[nodeIndex]);
+}
+
+/////////////////////////////////////////////////////////////////////////
+PX_FORCE_INLINE void RTreeNodeQ::grow(const RTreeNodeQ& node)
+{
+	minx = PxMin(minx, node.minx); miny = PxMin(miny, node.miny); minz = PxMin(minz, node.minz);
+	maxx = PxMax(maxx, node.maxx); maxy = PxMax(maxy, node.maxy); maxz = PxMax(maxz, node.maxz);
+}
+
+/////////////////////////////////////////////////////////////////////////
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+void RTree::validateRecursive(PxU32 level, RTreeNodeQ parentBounds, RTreePage* page, CallbackRefit* cbLeaf)
+#else
+void RTree::validateRecursive(PxU32 level, RTreeNodeQ parentBounds, RTreePage* page)
+#endif
+{
+	PX_UNUSED(parentBounds);
+
+	static PxU32 validateCounter = 0; // this is to suppress a warning that recursive call has no side effects
+	validateCounter++;
+
+	RTreeNodeQ n;
+	PxU32 pageNodeCount = page->nodeCount();
+	for (PxU32 j = 0; j < pageNodeCount; j++)
+	{
+		page->getNode(j, n);
+		if (page->isEmpty(j))
+			continue;
+		PX_ASSERT(n.minx >= parentBounds.minx); PX_ASSERT(n.miny >= parentBounds.miny); PX_ASSERT(n.minz >= parentBounds.minz);
+		PX_ASSERT(n.maxx <= parentBounds.maxx); PX_ASSERT(n.maxy <= parentBounds.maxy); PX_ASSERT(n.maxz <= parentBounds.maxz);
+		if (!n.isLeaf())
+		{
+			PX_ASSERT((n.ptr&1) == 0);
+			RTreePage* childPage = reinterpret_cast<RTreePage*>(size_t(mPages) + n.ptr);
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+			validateRecursive(level+1, n, childPage, cbLeaf);
+		} else if (cbLeaf)
+		{
+			Vec3V mnv, mxv;
+			cbLeaf->recomputeBounds(page->ptrs[j] & ~1, mnv, mxv);
+			PxVec3 mn3, mx3; V3StoreU(mnv, mn3); V3StoreU(mxv, mx3);
+			const PxBounds3 lb(mn3, mx3);
+			const PxVec3& mn = lb.minimum; const PxVec3& mx = lb.maximum; PX_UNUSED(mn); PX_UNUSED(mx);
+			PX_ASSERT(mn.x >= n.minx); PX_ASSERT(mn.y >= n.miny); PX_ASSERT(mn.z >= n.minz);
+			PX_ASSERT(mx.x <= n.maxx); PX_ASSERT(mx.y <= n.maxy); PX_ASSERT(mx.z <= n.maxz);
+		}
+#else
+			validateRecursive(level+1, n, childPage);
+		}
+#endif
+	}
+	RTreeNodeQ recomputedBounds;
+	page->computeBounds(recomputedBounds);
+	PX_ASSERT((recomputedBounds.minx - parentBounds.minx)<=RTREE_INFLATION_EPSILON);
+	PX_ASSERT((recomputedBounds.miny - parentBounds.miny)<=RTREE_INFLATION_EPSILON);
+	PX_ASSERT((recomputedBounds.minz - parentBounds.minz)<=RTREE_INFLATION_EPSILON);
+	PX_ASSERT((recomputedBounds.maxx - parentBounds.maxx)<=RTREE_INFLATION_EPSILON);
+	PX_ASSERT((recomputedBounds.maxy - parentBounds.maxy)<=RTREE_INFLATION_EPSILON);
+	PX_ASSERT((recomputedBounds.maxz - parentBounds.maxz)<=RTREE_INFLATION_EPSILON);
+}
+
+/////////////////////////////////////////////////////////////////////////
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+void RTree::validate(CallbackRefit* cbLeaf)
+#else
+void RTree::validate()
+#endif
+{
+	for (PxU32 j = 0; j < mNumRootPages; j++)
+	{
+		RTreeNodeQ rootBounds;
+		mPages[j].computeBounds(rootBounds);
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+		validateRecursive(0, rootBounds, mPages+j, cbLeaf);
+#else
+		validateRecursive(0, rootBounds, mPages+j);
+#endif
+	}
+}
+
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+void RTree::refitAllStaticTree(CallbackRefit& cb, PxBounds3* retBounds)
+{
+	PxU8* treeNodes8 = reinterpret_cast<PxU8*>(mPages);
+
+	// since pages are ordered we can scan back to front and the hierarchy will be updated
+	for (PxI32 iPage = PxI32(mTotalPages)-1; iPage>=0; iPage--)
+	{
+		RTreePage& page = mPages[iPage];
+		for (PxU32 j = 0; j < RTREE_N; j++)
+		{
+			if (page.isEmpty(j))
+				continue;
+			if (page.isLeaf(j))
+			{
+				Vec3V childMn, childMx;
+				cb.recomputeBounds(page.ptrs[j]-1, childMn, childMx); // compute the bound around triangles
+				PxVec3 mn3, mx3;
+				V3StoreU(childMn, mn3);
+				V3StoreU(childMx, mx3);
+				page.minx[j] = mn3.x; page.miny[j] = mn3.y; page.minz[j] = mn3.z;
+				page.maxx[j] = mx3.x; page.maxy[j] = mx3.y; page.maxz[j] = mx3.z;
+			} else
+			{
+				const RTreePage* child = reinterpret_cast<const RTreePage*>(treeNodes8 + page.ptrs[j]);
+				PX_COMPILE_TIME_ASSERT(RTREE_N == 4);
+				bool first = true;
+				for (PxU32 k = 0; k < RTREE_N; k++)
+				{
+					if (child->isEmpty(k))
+						continue;
+					if (first)
+					{
+						page.minx[j] = child->minx[k]; page.miny[j] = child->miny[k]; page.minz[j] = child->minz[k];
+						page.maxx[j] = child->maxx[k]; page.maxy[j] = child->maxy[k]; page.maxz[j] = child->maxz[k];
+						first = false;
+					} else
+					{
+						page.minx[j] = PxMin(page.minx[j], child->minx[k]);
+						page.miny[j] = PxMin(page.miny[j], child->miny[k]);
+						page.minz[j] = PxMin(page.minz[j], child->minz[k]);
+						page.maxx[j] = PxMax(page.maxx[j], child->maxx[k]);
+						page.maxy[j] = PxMax(page.maxy[j], child->maxy[k]);
+						page.maxz[j] = PxMax(page.maxz[j], child->maxz[k]);
+					}
+				}
+			}
+		}
+	}
+
+	if (retBounds)
+	{
+		RTreeNodeQ bound1;
+		for (PxU32 ii = 0; ii<mNumRootPages; ii++)
+		{
+			mPages[ii].computeBounds(bound1);
+			if (ii == 0)
+			{
+				retBounds->minimum = PxVec3(bound1.minx, bound1.miny, bound1.minz);
+				retBounds->maximum = PxVec3(bound1.maxx, bound1.maxy, bound1.maxz);
+			} else
+			{
+				retBounds->minimum = retBounds->minimum.minimum(PxVec3(bound1.minx, bound1.miny, bound1.minz));
+				retBounds->maximum = retBounds->maximum.maximum(PxVec3(bound1.maxx, bound1.maxy, bound1.maxz));
+			}
+		}
+	}
+
+#if PX_CHECKED
+	validate(&cb);
+#endif
+}
+#endif // PX_ENABLE_DYNAMIC_MESH_RTREE
+
+//~PX_SERIALIZATION
+const RTreeValue RTreePage::MN = -PX_MAX_F32;
+const RTreeValue RTreePage::MX = PX_MAX_F32;
+
+} // namespace Gu
+
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.h
new file mode 100644
index 00000000..48c54fc5
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTree.h
@@ -0,0 +1,304 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_RTREE_H
+#define GU_RTREE_H
+
+#include "foundation/PxSimpleTypes.h"
+#include "foundation/PxVec4.h"
+#include "foundation/PxBounds3.h"
+#include "foundation/PxAssert.h"
+#include "PsUserAllocated.h" // for PxSerializationContext
+#include "PxSerialFramework.h"
+#include "PxTriangleMesh.h"
+#include "PsAlignedMalloc.h"
+
+
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+#include "PsVecMath.h"
+#endif
+
+#define RTREE_N 4 // changing this number will affect the mesh format
+PX_COMPILE_TIME_ASSERT(RTREE_N == 4 || RTREE_N == 8); // using the low 5 bits for storage of index(childPtr) for dynamic rtree
+
+namespace physx
+{
+
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable: 4324)	// Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+namespace Gu {
+	
+	class Box;
+	struct RTreePage;
+
+	typedef PxF32 RTreeValue;
+
+	/////////////////////////////////////////////////////////////////////////
+	// quantized untransposed RTree node - used for offline build and dynamic insertion
+	struct RTreeNodeQ
+	{
+		RTreeValue minx, miny, minz, maxx, maxy, maxz;
+		PxU32 ptr; // lowest bit is leaf flag
+
+		PX_FORCE_INLINE void setLeaf(bool set) { if (set) ptr |= 1; else ptr &= ~1; }
+		PX_FORCE_INLINE PxU32 isLeaf() const { return ptr & 1; }
+		PX_FORCE_INLINE void setEmpty();
+		PX_FORCE_INLINE void grow(const RTreePage& page, int nodeIndex);
+		PX_FORCE_INLINE void grow(const RTreeNodeQ& node);
+	};
+
+	/////////////////////////////////////////////////////////////////////////
+	// RTreePage data structure, holds RTREE_N transposed nodes
+
+	// RTreePage data structure, holds 8 transposed nodes
+	PX_ALIGN_PREFIX(16)
+	struct RTreePage
+	{
+	//= ATTENTION! =====================================================================================
+	// Changing the data layout of this class breaks the binary serialization format.  See comments for 
+	// PX_BINARY_SERIAL_VERSION.  If a modification is required, please adjust the getBinaryMetaData 
+	// function.  If the modification is made on a custom branch, please change PX_BINARY_SERIAL_VERSION
+	// accordingly.
+	//==================================================================================================
+
+		static const RTreeValue MN, MX;
+
+		RTreeValue minx[RTREE_N]; // [min=MX, max=MN] is used as a sentinel range for empty bounds
+		RTreeValue miny[RTREE_N];
+		RTreeValue minz[RTREE_N];
+		RTreeValue maxx[RTREE_N];
+		RTreeValue maxy[RTREE_N];
+		RTreeValue maxz[RTREE_N];
+		PxU32 ptrs[RTREE_N]; // for static rtree this is an offset relative to the first page divided by 16, for dynamics it's an absolute pointer divided by 16
+
+		PX_FORCE_INLINE PxU32	nodeCount() const; // returns the number of occupied nodes in this page
+		PX_FORCE_INLINE void	setEmpty(PxU32 startIndex = 0);
+		PX_FORCE_INLINE bool	isEmpty(PxU32 index) const { return minx[index] > maxx[index]; }
+		PX_FORCE_INLINE void	copyNode(PxU32 targetIndex, const RTreePage& sourcePage, PxU32 sourceIndex);
+		PX_FORCE_INLINE void	setNode(PxU32 targetIndex, const RTreeNodeQ& node);
+		PX_FORCE_INLINE void	clearNode(PxU32 nodeIndex);
+		PX_FORCE_INLINE void	getNode(PxU32 nodeIndex, RTreeNodeQ& result) const;
+		PX_FORCE_INLINE void	computeBounds(RTreeNodeQ& bounds);
+		PX_FORCE_INLINE void	adjustChildBounds(PxU32 index, const RTreeNodeQ& adjustedChildBounds);
+		PX_FORCE_INLINE void	growChildBounds(PxU32 index, const RTreeNodeQ& adjustedChildBounds);
+		PX_FORCE_INLINE PxU32	getNodeHandle(PxU32 index) const;
+		PX_FORCE_INLINE PxU32	isLeaf(PxU32 index) const { return ptrs[index] & 1; }
+	} PX_ALIGN_SUFFIX(16);
+
+	/////////////////////////////////////////////////////////////////////////
+	// RTree root data structure
+	PX_ALIGN_PREFIX(16)
+	struct RTree
+	{
+	//= ATTENTION! =====================================================================================
+	// Changing the data layout of this class breaks the binary serialization format.  See comments for 
+	// PX_BINARY_SERIAL_VERSION.  If a modification is required, please adjust the getBinaryMetaData 
+	// function.  If the modification is made on a custom branch, please change PX_BINARY_SERIAL_VERSION
+	// accordingly.
+	//==================================================================================================
+		// PX_SERIALIZATION
+		RTree(const PxEMPTY);
+		void	exportExtraData(PxSerializationContext&);
+		void	importExtraData(PxDeserializationContext& context);
+		static	void	getBinaryMetaData(PxOutputStream& stream);
+		//~PX_SERIALIZATION
+
+		PX_INLINE RTree(); // offline static rtree constructor used with cooking
+
+		~RTree() { release(); }
+
+		PX_INLINE void release();
+		bool save(PxOutputStream& stream) const; // always saves as big endian
+		bool load(PxInputStream& stream, PxU32 meshVersion); // converts to proper endian at load time
+
+		////////////////////////////////////////////////////////////////////////////
+		// QUERIES
+		struct Callback
+		{
+			// result buffer should have room for at least RTREE_N items
+			// should return true to continue traversal. If false is returned, traversal is aborted
+			virtual bool processResults(PxU32 count, PxU32* buf) = 0;
+			virtual void profile() {}
+            virtual ~Callback() {}
+		};
+
+		struct CallbackRaycast
+		{
+			// result buffer should have room for at least RTREE_N items
+			// should return true to continue traversal. If false is returned, traversal is aborted
+			// newMaxT serves as both input and output, as input it's the maxT so far
+			// set it to a new value (which should be smaller) and it will become the new far clip t
+			virtual bool processResults(PxU32 count, PxU32* buf, PxF32& newMaxT) = 0;
+            virtual ~CallbackRaycast() {}
+		};
+
+		// callback will be issued as soon as the buffer overflows maxResultsPerBlock-RTreePage:SIZE entries
+		// use maxResults = RTreePage:SIZE and return false from callback for "first hit" early out
+		void		traverseAABB(
+						const PxVec3& boxMin, const PxVec3& boxMax,
+						const PxU32 maxResultsPerBlock, PxU32* resultsBlockBuf, Callback* processResultsBlockCallback) const;
+		void		traverseOBB(
+						const Gu::Box& obb,
+						const PxU32 maxResultsPerBlock, PxU32* resultsBlockBuf, Callback* processResultsBlockCallback) const;
+		template <int inflate>
+		//PX_PHYSX_COMMON_API
+		void		traverseRay(
+						const PxVec3& rayOrigin, const PxVec3& rayDir, // dir doesn't have to be normalized and is B-A for raySegment
+						const PxU32 maxResults, PxU32* resultsPtr,
+						Gu::RTree::CallbackRaycast* callback,
+						const PxVec3* inflateAABBs, // inflate tree's AABBs by this amount. This function turns into AABB sweep.
+						PxF32 maxT = PX_MAX_F32 // maximum ray t parameter, p(t)=origin+t*dir; use 1.0f for ray segment
+						) const;
+
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+		struct CallbackRefit
+		{
+			// In this callback index is the number stored in the RTree, which is a LeafTriangles object for current PhysX mesh
+			virtual void recomputeBounds(PxU32 index, shdfnd::aos::Vec3V& mn, shdfnd::aos::Vec3V& mx) = 0;
+			virtual ~CallbackRefit() {}
+		};
+		void		refitAllStaticTree(CallbackRefit& cb, PxBounds3* resultMeshBounds); // faster version of refit for static RTree only
+#endif
+
+
+		////////////////////////////////////////////////////////////////////////////
+		// DEBUG HELPER FUNCTIONS
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+		PX_PHYSX_COMMON_API void validate(CallbackRefit* cb = NULL); // verify that all children are indeed included in parent bounds
+#else
+		PX_PHYSX_COMMON_API void validate(); // verify that all children are indeed included in parent bounds
+#endif
+		void		openTextDump();
+		void		closeTextDump();
+		void		textDump(const char* prefix);
+		void		maxscriptExport();
+		PxU32		computeBottomLevelCount(PxU32 storedToMemMultiplier) const;
+
+		////////////////////////////////////////////////////////////////////////////
+		// DATA
+		// remember to update save() and load() when adding or removing data
+		PxVec4			mBoundsMin, mBoundsMax, mInvDiagonal, mDiagonalScaler; // 16
+		PxU32			mPageSize;
+		PxU32			mNumRootPages;
+		PxU32			mNumLevels;
+		PxU32			mTotalNodes; // 16
+		PxU32			mTotalPages;
+		PxU32			mFlags; enum { USER_ALLOCATED = 0x1, IS_EDGE_SET = 0x2 };
+		RTreePage*		mPages;
+
+		static PxU32	mVersion;
+
+	protected:
+		typedef PxU32 NodeHandle;
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+		void		validateRecursive(PxU32 level, RTreeNodeQ parentBounds, RTreePage* page, CallbackRefit* cb = NULL);
+#else
+		void		validateRecursive(PxU32 level, RTreeNodeQ parentBounds, RTreePage* page);
+#endif
+
+		friend struct RTreePage;
+	} PX_ALIGN_SUFFIX(16);
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+	/////////////////////////////////////////////////////////////////////////
+	PX_INLINE RTree::RTree()
+	{
+		mFlags = 0;
+		mPages = NULL;
+		mTotalNodes = 0;
+		mNumLevels = 0;
+		mPageSize = RTREE_N;
+	}
+
+	/////////////////////////////////////////////////////////////////////////
+	PX_INLINE void RTree::release()
+	{
+		if ((mFlags & USER_ALLOCATED) == 0 && mPages)
+		{
+			physx::shdfnd::AlignedAllocator<128>().deallocate(mPages);
+			mPages = NULL;
+		}
+	}
+
+	// explicit instantiations for traverseRay
+	// XXX: dima: g++ 4.4 won't compile this => skipping by PX_UNIX_FAMILY
+#if PX_X86 && !PX_UNIX_FAMILY
+	template
+	//PX_PHYSX_COMMON_API
+	void RTree::traverseRay<0>(
+		const PxVec3&, const PxVec3&, const PxU32, PxU32*, Gu::RTree::CallbackRaycast*, const PxVec3*, PxF32 maxT) const;
+	template
+	//PX_PHYSX_COMMON_API
+	void RTree::traverseRay<1>(
+		const PxVec3&, const PxVec3&, const PxU32, PxU32*, Gu::RTree::CallbackRaycast*, const PxVec3*, PxF32 maxT) const;
+#endif
+
+	/////////////////////////////////////////////////////////////////////////
+	PX_FORCE_INLINE void RTreeNodeQ::setEmpty()
+	{
+		minx = miny = minz = RTreePage::MX;
+		maxx = maxy = maxz = RTreePage::MN;
+	}
+
+
+	// bit 1 is always expected to be set to differentiate between leaf and non-leaf node
+	PX_FORCE_INLINE PxU32 LeafGetNbTriangles(PxU32 Data) { return ((Data>>1) & 15)+1; }
+	PX_FORCE_INLINE PxU32 LeafGetTriangleIndex(PxU32 Data) { return Data>>5; }
+	PX_FORCE_INLINE PxU32 LeafSetData(PxU32 nb, PxU32 index)
+	{
+		PX_ASSERT(nb>0 && nb<=16); PX_ASSERT(index < (1<<27));
+		return (index<<5)|(((nb-1)&15)<<1) | 1;
+	}
+
+	struct LeafTriangles
+	{
+		PxU32			Data;
+
+		// Gets number of triangles in the leaf, returns the number of triangles N, with 0 < N <= 16
+		PX_FORCE_INLINE	PxU32	GetNbTriangles()				const	{ return LeafGetNbTriangles(Data); }
+
+		// Gets triangle index for this leaf. Indexed model's array of indices retrieved with RTreeMidphase::GetIndices()
+		PX_FORCE_INLINE	PxU32	GetTriangleIndex()				const	{ return LeafGetTriangleIndex(Data); }
+		PX_FORCE_INLINE	void	SetData(PxU32 nb, PxU32 index)			{ Data = LeafSetData(nb, index); }
+	};
+
+	PX_COMPILE_TIME_ASSERT(sizeof(LeafTriangles)==4); // RTree has space for 4 bytes
+
+} // namespace Gu
+
+}
+
+#endif // #ifdef PX_COLLISION_RTREE
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTreeQueries.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTreeQueries.cpp
new file mode 100644
index 00000000..9d7bd57a
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuRTreeQueries.cpp
@@ -0,0 +1,581 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+/*
+General notes:
+
+	rtree depth-first traversal looks like this:
+	push top level page onto stack
+
+	pop page from stack
+	for each node in page
+	  if node overlaps with testrect
+	    push node's subpage
+
+	we want to efficiently keep track of current stack level to know if the current page is a leaf or not
+	(since we don't store a flag with the page due to no space, we can't determine it just by looking at current page)
+	since we traverse depth first, the levels for nodes on the stack look like this:
+	l0 l0 l1 l2 l2 l3 l3 l3 l4
+
+	we can encode this as an array of 4 bits per level count into a 32-bit integer
+	to simplify the code->level computation we also keep track of current level by incrementing the level whenever any subpages
+	from current test page are pushed onto the stack
+	when we pop a page off the stack we use this encoding to determine if we should decrement the stack level
+*/
+
+#include "foundation/PxBounds3.h"
+#include "GuRTree.h"
+#include "PsIntrinsics.h"
+#include "GuBox.h"
+#include "PsVecMath.h"
+#include "PxQueryReport.h" // for PxAgain
+#include "PsBitUtils.h"
+
+//#define VERIFY_RTREE
+#ifdef VERIFY_RTREE
+#include "GuIntersectionRayBox.h"
+#include "GuIntersectionBoxBox.h"
+#include "stdio.h"
+#endif
+
+using namespace physx;
+using namespace physx::shdfnd;
+using namespace Ps::aos;
+
+namespace physx
+{
+namespace Gu {
+
+using namespace Ps::aos;
+
+#define v_absm(a) V4Andc(a, signMask)
+#define V4FromF32A(x) V4LoadA(x)
+#define PxF32FV(x) FStore(x)
+#define CAST_U8(a) reinterpret_cast<PxU8*>(a)
+
+/////////////////////////////////////////////////////////////////////////
+void RTree::traverseAABB(const PxVec3& boxMin, const PxVec3& boxMax, const PxU32 maxResults, PxU32* resultsPtr, Callback* callback) const
+{
+	PX_UNUSED(resultsPtr);
+
+	PX_ASSERT(callback);
+	PX_ASSERT(maxResults >= mPageSize);
+	PX_UNUSED(maxResults);
+
+	const PxU32 maxStack = 128;
+	PxU32 stack1[maxStack];
+	PxU32* stack = stack1+1;
+
+	PX_ASSERT(mPages);
+	PX_ASSERT((uintptr_t(mPages) & 127) == 0);
+	PX_ASSERT((uintptr_t(this) & 15) == 0);
+
+	// conservatively quantize the input box
+	Vec4V nqMin = Vec4V_From_PxVec3_WUndefined(boxMin);
+	Vec4V nqMax = Vec4V_From_PxVec3_WUndefined(boxMax);
+
+	Vec4V nqMinx4 = V4SplatElement<0>(nqMin);
+	Vec4V nqMiny4 = V4SplatElement<1>(nqMin);
+	Vec4V nqMinz4 = V4SplatElement<2>(nqMin);
+	Vec4V nqMaxx4 = V4SplatElement<0>(nqMax);
+	Vec4V nqMaxy4 = V4SplatElement<1>(nqMax);
+	Vec4V nqMaxz4 = V4SplatElement<2>(nqMax);
+
+	// on 64-bit platforms the dynamic rtree pointer is also relative to mPages
+	PxU8* treeNodes8 = CAST_U8(mPages);
+	PxU32* stackPtr = stack;
+
+	// AP potential perf optimization - fetch the top level right away
+	PX_ASSERT(RTREE_N == 4 || RTREE_N == 8);
+	PX_ASSERT(Ps::isPowerOfTwo(mPageSize));
+
+	for (PxI32 j = PxI32(mNumRootPages-1); j >= 0; j --)
+		*stackPtr++ = j*sizeof(RTreePage);
+
+	PxU32 cacheTopValid = true;
+	PxU32 cacheTop = 0;
+
+	do {
+		stackPtr--;
+		PxU32 top;
+		if (cacheTopValid) // branch is faster than lhs
+			top = cacheTop;
+		else
+			top = stackPtr[0];
+		PX_ASSERT(!cacheTopValid || stackPtr[0] == cacheTop);
+		RTreePage* PX_RESTRICT tn = reinterpret_cast<RTreePage*>(treeNodes8 + top);
+		const PxU32* ptrs = (reinterpret_cast<RTreePage*>(tn))->ptrs;
+
+		Vec4V minx4 = V4LoadA(tn->minx);
+		Vec4V miny4 = V4LoadA(tn->miny);
+		Vec4V minz4 = V4LoadA(tn->minz);
+		Vec4V maxx4 = V4LoadA(tn->maxx);
+		Vec4V maxy4 = V4LoadA(tn->maxy);
+		Vec4V maxz4 = V4LoadA(tn->maxz);
+
+		// AABB/AABB overlap test
+		BoolV res0 = V4IsGrtr(nqMinx4, maxx4); BoolV res1 = V4IsGrtr(nqMiny4, maxy4); BoolV res2 = V4IsGrtr(nqMinz4, maxz4);
+		BoolV res3 = V4IsGrtr(minx4, nqMaxx4); BoolV res4 = V4IsGrtr(miny4, nqMaxy4); BoolV res5 = V4IsGrtr(minz4, nqMaxz4);
+		BoolV resx = BOr(BOr(BOr(res0, res1), BOr(res2, res3)), BOr(res4, res5));
+		PX_ALIGN_PREFIX(16) PxU32 resa[RTREE_N] PX_ALIGN_SUFFIX(16);
+
+		VecU32V res4x = VecU32V_From_BoolV(resx); 
+		U4StoreA(res4x, resa);
+
+		cacheTopValid = false;
+		for (PxU32 i = 0; i < RTREE_N; i++)
+		{
+			PxU32 ptr = ptrs[i] & ~1; // clear the isLeaf bit
+			if (resa[i])
+				continue;
+			if (tn->isLeaf(i))
+			{
+				if (!callback->processResults(1, &ptr))
+					return;
+			}
+			else
+			{
+				*(stackPtr++) = ptr;
+				cacheTop = ptr;
+				cacheTopValid = true;
+			}
+		}
+	} while (stackPtr > stack);
+}
+
+namespace
+{
+	const VecU32V signMask = U4LoadXYZW((PxU32(1)<<31), (PxU32(1)<<31), (PxU32(1)<<31), (PxU32(1)<<31));
+	const Vec4V epsFloat4 = V4Load(1e-9f);
+	const Vec4V zeroes = V4Zero();
+	const Vec4V twos = V4Load(2.0f);
+	const Vec4V epsInflateFloat4 = V4Load(1e-7f);
+}
+
+/////////////////////////////////////////////////////////////////////////
+template <int inflate>
+void RTree::traverseRay(
+	const PxVec3& rayOrigin, const PxVec3& rayDir,
+	const PxU32 maxResults, PxU32* resultsPtr, Gu::RTree::CallbackRaycast* callback,
+	const PxVec3* fattenAABBs, PxF32 maxT) const
+{
+	// implements Kay-Kajiya 4-way SIMD test
+	PX_UNUSED(resultsPtr);
+	PX_UNUSED(maxResults);
+
+	const PxU32 maxStack = 128;
+	PxU32 stack1[maxStack];
+	PxU32* stack = stack1+1;
+
+	PX_ASSERT(mPages);
+	PX_ASSERT((uintptr_t(mPages) & 127) == 0);
+	PX_ASSERT((uintptr_t(this) & 15) == 0);
+
+	PxU8* treeNodes8 = CAST_U8(mPages);
+
+	Vec4V fattenAABBsX, fattenAABBsY, fattenAABBsZ;
+	PX_UNUSED(fattenAABBsX); PX_UNUSED(fattenAABBsY); PX_UNUSED(fattenAABBsZ);
+	if (inflate)
+	{
+		Vec4V fattenAABBs4 = Vec4V_From_PxVec3_WUndefined(*fattenAABBs);
+		fattenAABBs4 = V4Add(fattenAABBs4, epsInflateFloat4); // US2385 - shapes are "closed" meaning exactly touching shapes should report overlap
+		fattenAABBsX = V4SplatElement<0>(fattenAABBs4);
+		fattenAABBsY = V4SplatElement<1>(fattenAABBs4);
+		fattenAABBsZ = V4SplatElement<2>(fattenAABBs4);
+	}
+
+	Vec4V maxT4;
+	maxT4 = V4Load(maxT);
+	Vec4V rayP = Vec4V_From_PxVec3_WUndefined(rayOrigin);
+	Vec4V rayD = Vec4V_From_PxVec3_WUndefined(rayDir);
+	VecU32V raySign = V4U32and(VecU32V_ReinterpretFrom_Vec4V(rayD), signMask);
+	Vec4V rayDAbs = V4Abs(rayD); // abs value of rayD
+	Vec4V rayInvD = Vec4V_ReinterpretFrom_VecU32V(V4U32or(raySign, VecU32V_ReinterpretFrom_Vec4V(V4Max(rayDAbs, epsFloat4)))); // clamp near-zero components up to epsilon
+	rayD = rayInvD;
+
+	//rayInvD = V4Recip(rayInvD);
+	// Newton-Raphson iteration for reciprocal (see wikipedia):
+	// X[n+1] = X[n]*(2-original*X[n]), X[0] = V4RecipFast estimate
+	//rayInvD = rayInvD*(twos-rayD*rayInvD);
+	rayInvD = V4RecipFast(rayInvD); // initial estimate, not accurate enough
+	rayInvD = V4Mul(rayInvD, V4NegMulSub(rayD, rayInvD, twos));
+
+	// P+tD=a; t=(a-P)/D
+	// t=(a - p.x)*1/d.x = a/d.x +(- p.x/d.x)
+	Vec4V rayPinvD = V4NegMulSub(rayInvD, rayP, zeroes);
+	Vec4V rayInvDsplatX = V4SplatElement<0>(rayInvD);
+	Vec4V rayInvDsplatY = V4SplatElement<1>(rayInvD);
+	Vec4V rayInvDsplatZ = V4SplatElement<2>(rayInvD);
+	Vec4V rayPinvDsplatX = V4SplatElement<0>(rayPinvD);
+	Vec4V rayPinvDsplatY = V4SplatElement<1>(rayPinvD);
+	Vec4V rayPinvDsplatZ = V4SplatElement<2>(rayPinvD);
+
+	PX_ASSERT(RTREE_N == 4 || RTREE_N == 8);
+	PX_ASSERT(mNumRootPages > 0);
+
+	PxU32 stackPtr = 0;
+	for (PxI32 j = PxI32(mNumRootPages-1); j >= 0; j --)
+		stack[stackPtr++] = j*sizeof(RTreePage);
+
+	PX_ALIGN_PREFIX(16) PxU32 resa[4] PX_ALIGN_SUFFIX(16);
+
+	while (stackPtr)
+	{
+		PxU32 top = stack[--stackPtr];
+		if (top&1) // isLeaf test
+		{
+			top--;
+			PxF32 newMaxT = maxT;
+			if (!callback->processResults(1, &top, newMaxT))
+				return;
+			/* shrink the ray if newMaxT is reduced compared to the original maxT */
+			if (maxT != newMaxT)
+			{
+				PX_ASSERT(newMaxT < maxT);
+				maxT = newMaxT;
+				maxT4 = V4Load(newMaxT);
+			}
+			continue;
+		}
+
+		RTreePage* PX_RESTRICT tn = reinterpret_cast<RTreePage*>(treeNodes8 + top);
+		
+		// 6i load
+		Vec4V minx4a = V4LoadA(tn->minx), miny4a = V4LoadA(tn->miny), minz4a = V4LoadA(tn->minz);
+		Vec4V maxx4a = V4LoadA(tn->maxx), maxy4a = V4LoadA(tn->maxy), maxz4a = V4LoadA(tn->maxz);
+
+		// 1i disabled test
+		// AP scaffold - optimization opportunity - can save 2 instructions here
+		VecU32V ignore4a = V4IsGrtrV32u(minx4a, maxx4a); // 1 if degenerate box (empty slot in the page)
+
+		if (inflate)
+		{
+			// 6i
+			maxx4a = V4Add(maxx4a, fattenAABBsX); maxy4a = V4Add(maxy4a, fattenAABBsY); maxz4a = V4Add(maxz4a, fattenAABBsZ);
+			minx4a = V4Sub(minx4a, fattenAABBsX); miny4a = V4Sub(miny4a, fattenAABBsY); minz4a = V4Sub(minz4a, fattenAABBsZ);
+		}
+
+		// P+tD=a; t=(a-P)/D
+		// t=(a - p.x)*1/d.x = a/d.x +(- p.x/d.x)
+		// 6i
+		Vec4V tminxa0 = V4MulAdd(minx4a, rayInvDsplatX, rayPinvDsplatX);
+		Vec4V tminya0 = V4MulAdd(miny4a, rayInvDsplatY, rayPinvDsplatY);
+		Vec4V tminza0 = V4MulAdd(minz4a, rayInvDsplatZ, rayPinvDsplatZ);
+		Vec4V tmaxxa0 = V4MulAdd(maxx4a, rayInvDsplatX, rayPinvDsplatX);
+		Vec4V tmaxya0 = V4MulAdd(maxy4a, rayInvDsplatY, rayPinvDsplatY);
+		Vec4V tmaxza0 = V4MulAdd(maxz4a, rayInvDsplatZ, rayPinvDsplatZ);
+
+		// test half-spaces
+		// P+tD=dN
+		// t = (d(N,D)-(P,D))/(D,D) , (D,D)=1
+
+		// compute 4x dot products (N,D) and (P,N) for each AABB in the page
+
+		// 6i
+		// now compute tnear and tfar for each pair of planes for each box
+		Vec4V tminxa = V4Min(tminxa0, tmaxxa0); Vec4V tmaxxa = V4Max(tminxa0, tmaxxa0);
+		Vec4V tminya = V4Min(tminya0, tmaxya0); Vec4V tmaxya = V4Max(tminya0, tmaxya0);
+		Vec4V tminza = V4Min(tminza0, tmaxza0); Vec4V tmaxza = V4Max(tminza0, tmaxza0);
+
+		// 8i
+		Vec4V maxOfNeasa = V4Max(V4Max(tminxa, tminya), tminza);
+		Vec4V minOfFarsa = V4Min(V4Min(tmaxxa, tmaxya), tmaxza);
+		ignore4a = V4U32or(ignore4a, V4IsGrtrV32u(epsFloat4, minOfFarsa));  // if tfar is negative, ignore since its a ray, not a line
+		// AP scaffold: update the build to eliminate 3 more instructions for ignore4a above
+		//VecU32V ignore4a = V4IsGrtrV32u(epsFloat4, minOfFarsa);  // if tfar is negative, ignore since its a ray, not a line
+		ignore4a = V4U32or(ignore4a, V4IsGrtrV32u(maxOfNeasa, maxT4));  // if tnear is over maxT, ignore this result
+
+		// 2i
+		VecU32V resa4 = V4IsGrtrV32u(maxOfNeasa, minOfFarsa); // if 1 => fail
+		resa4 = V4U32or(resa4, ignore4a);
+
+		// 1i
+		V4U32StoreAligned(resa4, reinterpret_cast<VecU32V*>(resa));
+
+		PxU32* ptrs = (reinterpret_cast<RTreePage*>(tn))->ptrs;
+
+		stack[stackPtr] = ptrs[0]; stackPtr += (1+resa[0]); // AP scaffold TODO: use VecU32add
+		stack[stackPtr] = ptrs[1]; stackPtr += (1+resa[1]);
+		stack[stackPtr] = ptrs[2]; stackPtr += (1+resa[2]);
+		stack[stackPtr] = ptrs[3]; stackPtr += (1+resa[3]);
+	}
+}
+
+template void RTree::traverseRay<0>(
+	const PxVec3&, const PxVec3&, const PxU32, PxU32*, Gu::RTree::CallbackRaycast*, const PxVec3*, PxF32 maxT) const;
+template void RTree::traverseRay<1>(
+	const PxVec3&, const PxVec3&, const PxU32, PxU32*, Gu::RTree::CallbackRaycast*, const PxVec3*, PxF32 maxT) const;
+
+/////////////////////////////////////////////////////////////////////////
+void RTree::traverseOBB(
+	const Gu::Box& obb, const PxU32 maxResults, PxU32* resultsPtr, Gu::RTree::Callback* callback) const
+{
+	PX_UNUSED(resultsPtr);
+	PX_UNUSED(maxResults);
+
+	const PxU32 maxStack = 128;
+	PxU32 stack[maxStack];
+
+	PX_ASSERT(mPages);
+	PX_ASSERT((uintptr_t(mPages) & 127) == 0);
+	PX_ASSERT((uintptr_t(this) & 15) == 0);
+
+	PxU8* treeNodes8 = CAST_U8(mPages);
+	PxU32* stackPtr = stack;
+
+	Vec4V ones, halves, eps;
+	ones = V4Load(1.0f);
+	halves = V4Load(0.5f);
+	eps = V4Load(1e-6f);
+	
+	PX_UNUSED(ones);
+
+	Vec4V obbO = Vec4V_From_PxVec3_WUndefined(obb.center);
+	Vec4V obbE = Vec4V_From_PxVec3_WUndefined(obb.extents);
+	// Gu::Box::rot matrix columns are the OBB axes
+	Vec4V obbX = Vec4V_From_PxVec3_WUndefined(obb.rot.column0);
+	Vec4V obbY = Vec4V_From_PxVec3_WUndefined(obb.rot.column1);
+	Vec4V obbZ = Vec4V_From_PxVec3_WUndefined(obb.rot.column2);
+
+#if PX_WINDOWS || PX_XBOXONE
+	// Visual Studio compiler hangs with #defines
+	// On VMX platforms we use #defines in the other branch of this #ifdef to avoid register spills (LHS)
+	Vec4V obbESplatX = V4SplatElement<0>(obbE);
+	Vec4V obbESplatY = V4SplatElement<1>(obbE);
+	Vec4V obbESplatZ = V4SplatElement<2>(obbE);
+	Vec4V obbESplatNegX = V4Sub(zeroes, obbESplatX);
+	Vec4V obbESplatNegY = V4Sub(zeroes, obbESplatY);
+	Vec4V obbESplatNegZ = V4Sub(zeroes, obbESplatZ);
+	Vec4V obbXE = V4MulAdd(obbX, obbESplatX, zeroes); // scale axii by E
+	Vec4V obbYE = V4MulAdd(obbY, obbESplatY, zeroes); // scale axii by E
+	Vec4V obbZE = V4MulAdd(obbZ, obbESplatZ, zeroes); // scale axii by E
+	Vec4V obbOSplatX = V4SplatElement<0>(obbO);
+	Vec4V obbOSplatY = V4SplatElement<1>(obbO);
+	Vec4V obbOSplatZ = V4SplatElement<2>(obbO);
+	Vec4V obbXSplatX = V4SplatElement<0>(obbX);
+	Vec4V obbXSplatY = V4SplatElement<1>(obbX);
+	Vec4V obbXSplatZ = V4SplatElement<2>(obbX);
+	Vec4V obbYSplatX = V4SplatElement<0>(obbY);
+	Vec4V obbYSplatY = V4SplatElement<1>(obbY);
+	Vec4V obbYSplatZ = V4SplatElement<2>(obbY);
+	Vec4V obbZSplatX = V4SplatElement<0>(obbZ);
+	Vec4V obbZSplatY = V4SplatElement<1>(obbZ);
+	Vec4V obbZSplatZ = V4SplatElement<2>(obbZ);
+	Vec4V obbXESplatX = V4SplatElement<0>(obbXE);
+	Vec4V obbXESplatY = V4SplatElement<1>(obbXE);
+	Vec4V obbXESplatZ = V4SplatElement<2>(obbXE);
+	Vec4V obbYESplatX = V4SplatElement<0>(obbYE);
+	Vec4V obbYESplatY = V4SplatElement<1>(obbYE);
+	Vec4V obbYESplatZ = V4SplatElement<2>(obbYE);
+	Vec4V obbZESplatX = V4SplatElement<0>(obbZE);
+	Vec4V obbZESplatY = V4SplatElement<1>(obbZE);
+	Vec4V obbZESplatZ = V4SplatElement<2>(obbZE);
+#else
+	#define obbESplatX V4SplatElement<0>(obbE)
+	#define obbESplatY V4SplatElement<1>(obbE)
+	#define obbESplatZ V4SplatElement<2>(obbE)
+	#define obbESplatNegX V4Sub(zeroes, obbESplatX)
+	#define obbESplatNegY V4Sub(zeroes, obbESplatY)
+	#define obbESplatNegZ V4Sub(zeroes, obbESplatZ)
+	#define obbXE V4MulAdd(obbX, obbESplatX, zeroes)
+	#define obbYE V4MulAdd(obbY, obbESplatY, zeroes)
+	#define obbZE V4MulAdd(obbZ, obbESplatZ, zeroes)
+	#define obbOSplatX V4SplatElement<0>(obbO)
+	#define obbOSplatY V4SplatElement<1>(obbO)
+	#define obbOSplatZ V4SplatElement<2>(obbO)
+	#define obbXSplatX V4SplatElement<0>(obbX)
+	#define obbXSplatY V4SplatElement<1>(obbX)
+	#define obbXSplatZ V4SplatElement<2>(obbX)
+	#define obbYSplatX V4SplatElement<0>(obbY)
+	#define obbYSplatY V4SplatElement<1>(obbY)
+	#define obbYSplatZ V4SplatElement<2>(obbY)
+	#define obbZSplatX V4SplatElement<0>(obbZ)
+	#define obbZSplatY V4SplatElement<1>(obbZ)
+	#define obbZSplatZ V4SplatElement<2>(obbZ)
+	#define obbXESplatX V4SplatElement<0>(obbXE)
+	#define obbXESplatY V4SplatElement<1>(obbXE)
+	#define obbXESplatZ V4SplatElement<2>(obbXE)
+	#define obbYESplatX V4SplatElement<0>(obbYE)
+	#define obbYESplatY V4SplatElement<1>(obbYE)
+	#define obbYESplatZ V4SplatElement<2>(obbYE)
+	#define obbZESplatX V4SplatElement<0>(obbZE)
+	#define obbZESplatY V4SplatElement<1>(obbZE)
+	#define obbZESplatZ V4SplatElement<2>(obbZE)
+#endif
+
+	PX_ASSERT(mPageSize == 4 || mPageSize == 8);
+	PX_ASSERT(mNumRootPages > 0);
+
+	for (PxI32 j = PxI32(mNumRootPages-1); j >= 0; j --)
+		*stackPtr++ = j*sizeof(RTreePage);
+	PxU32 cacheTopValid = true;
+	PxU32 cacheTop = 0;
+
+	PX_ALIGN_PREFIX(16) PxU32 resa_[4] PX_ALIGN_SUFFIX(16);
+
+	do {
+		stackPtr--;
+
+		PxU32 top;
+		if (cacheTopValid) // branch is faster than lhs
+			top = cacheTop;
+		else
+			top = stackPtr[0];
+		PX_ASSERT(!cacheTopValid || top == cacheTop);
+		RTreePage* PX_RESTRICT tn = reinterpret_cast<RTreePage*>(treeNodes8 + top);
+		
+		const PxU32 offs = 0;
+		PxU32* ptrs = (reinterpret_cast<RTreePage*>(tn))->ptrs;
+
+		// 6i
+		Vec4V minx4a = V4LoadA(tn->minx+offs);
+		Vec4V miny4a = V4LoadA(tn->miny+offs);
+		Vec4V minz4a = V4LoadA(tn->minz+offs);
+		Vec4V maxx4a = V4LoadA(tn->maxx+offs);
+		Vec4V maxy4a = V4LoadA(tn->maxy+offs);
+		Vec4V maxz4a = V4LoadA(tn->maxz+offs);
+
+		VecU32V noOverlapa;
+		VecU32V resa4u;
+		{
+			// PRECOMPUTE FOR A BLOCK
+			// 109 instr per 4 OBB/AABB
+			// ABB iteration 1, start with OBB origin as other point -- 6
+			Vec4V p1ABBxa = V4Max(minx4a, V4Min(maxx4a, obbOSplatX));
+			Vec4V p1ABBya = V4Max(miny4a, V4Min(maxy4a, obbOSplatY));
+			Vec4V p1ABBza = V4Max(minz4a, V4Min(maxz4a, obbOSplatZ));
+
+			// OBB iteration 1, move to OBB space first -- 12
+			Vec4V p1ABBOxa = V4Sub(p1ABBxa, obbOSplatX);
+			Vec4V p1ABBOya = V4Sub(p1ABBya, obbOSplatY);
+			Vec4V p1ABBOza = V4Sub(p1ABBza, obbOSplatZ);
+			Vec4V obbPrjXa = V4MulAdd(p1ABBOxa, obbXSplatX, V4MulAdd(p1ABBOya, obbXSplatY, V4MulAdd(p1ABBOza, obbXSplatZ, zeroes)));
+			Vec4V obbPrjYa = V4MulAdd(p1ABBOxa, obbYSplatX, V4MulAdd(p1ABBOya, obbYSplatY, V4MulAdd(p1ABBOza, obbYSplatZ, zeroes)));
+			Vec4V obbPrjZa = V4MulAdd(p1ABBOxa, obbZSplatX, V4MulAdd(p1ABBOya, obbZSplatY, V4MulAdd(p1ABBOza, obbZSplatZ, zeroes)));
+			// clamp AABB point in OBB space to OBB extents. Since we scaled the axii, the extents are [-1,1] -- 6
+			Vec4V pOBBxa = V4Max(obbESplatNegX, V4Min(obbPrjXa, obbESplatX));
+			Vec4V pOBBya = V4Max(obbESplatNegY, V4Min(obbPrjYa, obbESplatY));
+			Vec4V pOBBza = V4Max(obbESplatNegZ, V4Min(obbPrjZa, obbESplatZ));
+			// go back to AABB space. we have x,y,z in obb space, need to multiply by axii -- 9
+			Vec4V p1OBBxa = V4MulAdd(pOBBxa, obbXSplatX, V4MulAdd(pOBBya, obbYSplatX, V4MulAdd(pOBBza, obbZSplatX, obbOSplatX)));
+			Vec4V p1OBBya = V4MulAdd(pOBBxa, obbXSplatY, V4MulAdd(pOBBya, obbYSplatY, V4MulAdd(pOBBza, obbZSplatY, obbOSplatY)));
+			Vec4V p1OBBza = V4MulAdd(pOBBxa, obbXSplatZ, V4MulAdd(pOBBya, obbYSplatZ, V4MulAdd(pOBBza, obbZSplatZ, obbOSplatZ)));
+
+			// ABB iteration 2 -- 6 instructions
+			Vec4V p2ABBxa = V4Max(minx4a, V4Min(maxx4a, p1OBBxa));
+			Vec4V p2ABBya = V4Max(miny4a, V4Min(maxy4a, p1OBBya));
+			Vec4V p2ABBza = V4Max(minz4a, V4Min(maxz4a, p1OBBza));
+			// above blocks add up to 12+12+15=39 instr
+			// END PRECOMPUTE FOR A BLOCK
+
+			// for AABBs precompute extents and center -- 9i
+			Vec4V abbCxa = V4MulAdd(V4Add(maxx4a, minx4a), halves, zeroes);
+			Vec4V abbCya = V4MulAdd(V4Add(maxy4a, miny4a), halves, zeroes);
+			Vec4V abbCza = V4MulAdd(V4Add(maxz4a, minz4a), halves, zeroes);
+			Vec4V abbExa = V4Sub(maxx4a, abbCxa);
+			Vec4V abbEya = V4Sub(maxy4a, abbCya);
+			Vec4V abbEza = V4Sub(maxz4a, abbCza);
+
+			// now test separating axes D1 = p1OBB-p1ABB and D2 = p1OBB-p2ABB -- 37 instructions per axis
+			// D1 first -- 3 instructions
+			Vec4V d1xa = V4Sub(p1OBBxa, p1ABBxa), d1ya = V4Sub(p1OBBya, p1ABBya), d1za = V4Sub(p1OBBza, p1ABBza);
+
+			// for AABB compute projections of extents and center -- 6
+			Vec4V abbExd1Prja = V4MulAdd(d1xa, abbExa, zeroes);
+			Vec4V abbEyd1Prja = V4MulAdd(d1ya, abbEya, zeroes);
+			Vec4V abbEzd1Prja = V4MulAdd(d1za, abbEza, zeroes);
+			Vec4V abbCd1Prja = V4MulAdd(d1xa, abbCxa, V4MulAdd(d1ya, abbCya, V4MulAdd(d1za, abbCza, zeroes)));
+
+			// for obb project each halfaxis and origin and add abs values of half-axis projections -- 12 instructions
+			Vec4V obbXEd1Prja = V4MulAdd(d1xa, obbXESplatX, V4MulAdd(d1ya, obbXESplatY, V4MulAdd(d1za, obbXESplatZ, zeroes)));
+			Vec4V obbYEd1Prja = V4MulAdd(d1xa, obbYESplatX, V4MulAdd(d1ya, obbYESplatY, V4MulAdd(d1za, obbYESplatZ, zeroes)));
+			Vec4V obbZEd1Prja = V4MulAdd(d1xa, obbZESplatX, V4MulAdd(d1ya, obbZESplatY, V4MulAdd(d1za, obbZESplatZ, zeroes)));
+			Vec4V obbOd1Prja = V4MulAdd(d1xa, obbOSplatX, V4MulAdd(d1ya, obbOSplatY, V4MulAdd(d1za, obbOSplatZ, zeroes)));
+
+			// compare lengths between projected centers with sum of projected radii -- 16i
+			Vec4V originDiffd1a = v_absm(V4Sub(abbCd1Prja, obbOd1Prja));
+			Vec4V absABBRd1a = V4Add(V4Add(v_absm(abbExd1Prja), v_absm(abbEyd1Prja)), v_absm(abbEzd1Prja));
+			Vec4V absOBBRd1a = V4Add(V4Add(v_absm(obbXEd1Prja), v_absm(obbYEd1Prja)), v_absm(obbZEd1Prja));
+			VecU32V noOverlapd1a = V4IsGrtrV32u(V4Sub(originDiffd1a, eps), V4Add(absABBRd1a, absOBBRd1a));
+			VecU32V epsNoOverlapd1a = V4IsGrtrV32u(originDiffd1a, eps);
+
+			// D2 next (35 instr)
+			// 3i
+			Vec4V d2xa = V4Sub(p1OBBxa, p2ABBxa), d2ya = V4Sub(p1OBBya, p2ABBya), d2za = V4Sub(p1OBBza, p2ABBza);
+			// for AABB compute projections of extents and center -- 6
+			Vec4V abbExd2Prja = V4MulAdd(d2xa, abbExa, zeroes);
+			Vec4V abbEyd2Prja = V4MulAdd(d2ya, abbEya, zeroes);
+			Vec4V abbEzd2Prja = V4MulAdd(d2za, abbEza, zeroes);
+			Vec4V abbCd2Prja = V4MulAdd(d2xa, abbCxa, V4MulAdd(d2ya, abbCya, V4MulAdd(d2za, abbCza, zeroes)));
+			// for obb project each halfaxis and origin and add abs values of half-axis projections -- 12i
+			Vec4V obbXEd2Prja = V4MulAdd(d2xa, obbXESplatX, V4MulAdd(d2ya, obbXESplatY, V4MulAdd(d2za, obbXESplatZ, zeroes)));
+			Vec4V obbYEd2Prja = V4MulAdd(d2xa, obbYESplatX, V4MulAdd(d2ya, obbYESplatY, V4MulAdd(d2za, obbYESplatZ, zeroes)));
+			Vec4V obbZEd2Prja = V4MulAdd(d2xa, obbZESplatX, V4MulAdd(d2ya, obbZESplatY, V4MulAdd(d2za, obbZESplatZ, zeroes)));
+			Vec4V obbOd2Prja = V4MulAdd(d2xa, obbOSplatX, V4MulAdd(d2ya, obbOSplatY, V4MulAdd(d2za, obbOSplatZ, zeroes)));
+			// compare lengths between projected centers with sum of projected radii -- 16i
+			Vec4V originDiffd2a = v_absm(V4Sub(abbCd2Prja, obbOd2Prja));
+			Vec4V absABBRd2a = V4Add(V4Add(v_absm(abbExd2Prja), v_absm(abbEyd2Prja)), v_absm(abbEzd2Prja));
+			Vec4V absOBBRd2a = V4Add(V4Add(v_absm(obbXEd2Prja), v_absm(obbYEd2Prja)), v_absm(obbZEd2Prja));
+			VecU32V noOverlapd2a = V4IsGrtrV32u(V4Sub(originDiffd2a, eps), V4Add(absABBRd2a, absOBBRd2a));
+			VecU32V epsNoOverlapd2a = V4IsGrtrV32u(originDiffd2a, eps);
+
+			// 8i
+			noOverlapa = V4U32or(V4U32and(noOverlapd1a, epsNoOverlapd1a), V4U32and(noOverlapd2a, epsNoOverlapd2a));
+			VecU32V ignore4a = V4IsGrtrV32u(minx4a, maxx4a); // 1 if degenerate box (empty slot)
+			noOverlapa = V4U32or(noOverlapa, ignore4a);
+			resa4u = V4U32Andc(U4Load(1), noOverlapa); // 1 & ~noOverlap
+			V4U32StoreAligned(resa4u, reinterpret_cast<VecU32V*>(resa_));
+			///// 8+16+12+6+3+16+12+6+3+9+6+9+6+12+6+6=136i from load to result
+		}
+
+		cacheTopValid = false;
+		for (PxU32 i = 0; i < 4; i++)
+		{
+			PxU32 ptr = ptrs[i+offs] & ~1; // clear the isLeaf bit
+			if (resa_[i])
+			{
+				if (tn->isLeaf(i))
+				{
+					if (!callback->processResults(1, &ptr))
+						return;
+				}
+				else
+				{
+					*(stackPtr++) = ptr;
+					cacheTop = ptr;
+					cacheTopValid = true;
+				}
+			}
+		}
+	} while (stackPtr > stack);
+}
+
+} // namespace Gu
+
+}
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepConvexTri.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepConvexTri.h
new file mode 100644
index 00000000..15263717
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepConvexTri.h
@@ -0,0 +1,103 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_SWEEP_CONVEX_TRI
+#define GU_SWEEP_CONVEX_TRI
+
+#include "GuVecTriangle.h"
+#include "GuVecConvexHull.h"
+#include "GuConvexMesh.h"
+#include "PxConvexMeshGeometry.h"
+#include "GuGJKRaycast.h"
+
+// return true if hit, false if no hit
+static PX_FORCE_INLINE bool sweepConvexVsTriangle(
+	const PxVec3& v0, const PxVec3& v1, const PxVec3& v2,
+	ConvexHullV& convexHull, const Ps::aos::PsMatTransformV& meshToConvex, const Ps::aos::PsTransformV& convexTransfV,
+	const Ps::aos::Vec3VArg convexSpaceDir, const PxVec3& unitDir, const PxVec3& meshSpaceUnitDir,
+	const Ps::aos::FloatVArg fullDistance, PxReal shrunkDistance,
+	PxSweepHit& hit, bool isDoubleSided, const PxReal inflation, bool& initialOverlap, PxU32 faceIndex)
+{
+	using namespace Ps::aos;
+	// Create triangle normal
+	const PxVec3 denormalizedNormal = (v1 - v0).cross(v2 - v1);
+
+	// Backface culling
+	// PT: WARNING, the test is reversed compared to usual because we pass -unitDir to this function
+	const bool culled = !isDoubleSided && (denormalizedNormal.dot(meshSpaceUnitDir) <= 0.0f);
+	if(culled)
+		return false;
+
+	const Vec3V zeroV = V3Zero();
+	const FloatV zero = FZero();
+
+	const Vec3V p0 = V3LoadU(v0); // in mesh local space
+	const Vec3V	p1 = V3LoadU(v1);
+	const Vec3V p2 = V3LoadU(v2);
+
+	// transform triangle verts from mesh local to convex local space
+	TriangleV triangleV(meshToConvex.transform(p0), meshToConvex.transform(p1), meshToConvex.transform(p2));
+
+	FloatV toi;
+	Vec3V closestA,normal;
+
+	LocalConvex<TriangleV> convexA(triangleV);
+	LocalConvex<ConvexHullV> convexB(convexHull);
+	const Vec3V initialSearchDir = V3Sub(triangleV.getCenter(), convexHull.getCenter());
+	// run GJK raycast
+	// sweep triangle in convex local space vs convex, closestA will be the impact point in convex local space
+	const bool gjkHit = gjkRaycastPenetration<LocalConvex<TriangleV>, LocalConvex<ConvexHullV> >(
+		convexA, convexB, initialSearchDir, zero, zeroV, convexSpaceDir, toi, normal, closestA, inflation, false);
+	if(!gjkHit)
+		return false;
+
+	const FloatV minDist = FLoad(shrunkDistance);
+	const Vec3V destWorldPointA = convexTransfV.transform(closestA);
+	const Vec3V destNormal = V3Normalize(convexTransfV.rotate(normal));
+
+	if(FAllGrtrOrEq(zero, toi))
+	{
+		initialOverlap	= true;	// PT: TODO: redundant with hit distance, consider removing
+		return setInitialOverlapResults(hit, unitDir, faceIndex);
+	}
+
+	const FloatV dist = FMul(toi, fullDistance); // scale the toi to original full sweep distance
+	if(FAllGrtr(minDist, dist)) // is current dist < minDist?
+	{
+		hit.faceIndex	= faceIndex;
+		hit.flags		= PxHitFlag::eDISTANCE | PxHitFlag::ePOSITION | PxHitFlag::eNORMAL | PxHitFlag::eFACE_INDEX;
+		V3StoreU(destWorldPointA, hit.position);
+		V3StoreU(destNormal, hit.normal);
+		FStore(dist, &hit.distance);
+		return true; // report a hit
+	}
+	return false; // report no hit
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepMesh.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepMesh.h
new file mode 100644
index 00000000..c1f8521e
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepMesh.h
@@ -0,0 +1,169 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_SWEEP_MESH_H
+#define GU_SWEEP_MESH_H
+
+#include "GuMidphaseInterface.h"
+#include "GuVecConvexHull.h"
+
+namespace physx
+{
+
+namespace Gu
+{
+	// PT: class to make sure we can safely V4Load Matrix34's last column
+	class Matrix34Padded : public Cm::Matrix34
+	{
+		public:
+			PX_FORCE_INLINE	Matrix34Padded(const Matrix34& src) : Matrix34(src)	{}
+			PX_FORCE_INLINE	Matrix34Padded()									{}
+			PX_FORCE_INLINE	~Matrix34Padded()									{}
+			PxU32	padding;
+	};
+	PX_COMPILE_TIME_ASSERT(0==(sizeof(Matrix34Padded)==16));
+
+	// PT: intermediate class containing shared bits of code & members
+	struct SweepShapeMeshHitCallback : MeshHitCallback<PxRaycastHit>
+	{
+							SweepShapeMeshHitCallback(CallbackMode::Enum mode, const PxHitFlags& hitFlags, bool flipNormal, float distCoef);
+
+		const PxHitFlags	mHitFlags;
+		bool				mStatus;			// Default is false, set to true if a valid hit is found. Stays true once true.
+		bool				mInitialOverlap;	// Default is false, set to true if an initial overlap hit is found. Reset for each hit.
+		bool				mFlipNormal;		// If negative scale is used we need to flip normal
+		PxReal				mDistCoeff;			// dist coeff from unscaled to scaled distance
+
+		void operator=(const SweepShapeMeshHitCallback&) {}
+	};
+
+	struct SweepCapsuleMeshHitCallback : SweepShapeMeshHitCallback
+	{		
+		PxSweepHit&			mSweepHit;
+		const Cm::Matrix34&	mVertexToWorldSkew;		
+		const PxReal		mTrueSweepDistance;		// max sweep distance that can be used
+		PxReal				mBestAlignmentValue;	// best alignment value for triangle normal
+		PxReal				mBestDist;				// best distance, not the same as sweepHit.distance, can be shorter by epsilon
+		const Capsule&		mCapsule;
+		const PxVec3&		mUnitDir;
+		const bool			mMeshDoubleSided;	// PT: true if PxMeshGeometryFlag::eDOUBLE_SIDED
+		const bool			mIsSphere;
+
+		SweepCapsuleMeshHitCallback(PxSweepHit& sweepHit, const Cm::Matrix34& worldMatrix, PxReal distance, bool meshDoubleSided,
+									const Capsule& capsule, const PxVec3& unitDir, const PxHitFlags& hitFlags, bool flipNormal, float distCoef);
+
+		virtual PxAgain processHit(const PxRaycastHit& aHit, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, PxReal& shrunkMaxT, const PxU32*);
+
+		// PT: TODO: unify these operators
+		void operator=(const SweepCapsuleMeshHitCallback&) {}
+
+		bool finalizeHit(	PxSweepHit& sweepHit, const Capsule& lss, const PxTriangleMeshGeometry& triMeshGeom,
+							const PxTransform& pose, bool isDoubleSided) const;
+	};
+
+#if PX_VC 
+    #pragma warning(push)
+	#pragma warning( disable : 4324 ) // Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+	struct SweepBoxMeshHitCallback : SweepShapeMeshHitCallback
+	{		
+		const Matrix34Padded&		mMeshToBox;
+		PxReal						mDist, mDist0;
+		physx::shdfnd::aos::FloatV	mDistV;
+		const Box&					mBox;
+		const PxVec3&				mLocalDir;
+		const PxVec3&				mWorldUnitDir;
+		PxReal						mInflation;
+		PxTriangle					mHitTriangle;
+		physx::shdfnd::aos::Vec3V	mMinClosestA;
+		physx::shdfnd::aos::Vec3V	mMinNormal;
+		physx::shdfnd::aos::Vec3V	mLocalMotionV;
+		PxU32						mMinTriangleIndex;
+		PxVec3						mOneOverDir;
+		const bool					mBothTriangleSidesCollide;	// PT: true if PxMeshGeometryFlag::eDOUBLE_SIDED || PxHitFlag::eMESH_BOTH_SIDES
+
+		SweepBoxMeshHitCallback(CallbackMode::Enum mode_, const Matrix34Padded& meshToBox, PxReal distance, bool bothTriangleSidesCollide, 
+								const Box& box, const PxVec3& localMotion, const PxVec3& localDir, const PxVec3& unitDir,
+								const PxHitFlags& hitFlags, const PxReal inflation, bool flipNormal, float distCoef);
+
+		virtual ~SweepBoxMeshHitCallback() {}
+
+		virtual PxAgain processHit(const PxRaycastHit& meshHit, const PxVec3& lp0, const PxVec3& lp1, const PxVec3& lp2, PxReal& shrinkMaxT, const PxU32*);
+
+		bool	finalizeHit(	PxSweepHit& sweepHit, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+								const PxTransform& boxTransform, const PxVec3& localDir,
+								bool meshBothSides, bool isDoubleSided)	const;
+
+	private:
+		SweepBoxMeshHitCallback& operator=(const SweepBoxMeshHitCallback&);
+	};
+
+	struct SweepConvexMeshHitCallback : SweepShapeMeshHitCallback
+	{
+		PxTriangle							mHitTriangle;
+		ConvexHullV							mConvexHull;
+		physx::shdfnd::aos::PsMatTransformV	mMeshToConvex;
+		physx::shdfnd::aos::PsTransformV	mConvexPoseV;
+		const Cm::FastVertex2ShapeScaling&	mMeshScale;
+		PxSweepHit							mSweepHit; // stores either the closest or any hit depending on value of mAnyHit
+		physx::shdfnd::aos::FloatV			mInitialDistance;
+		physx::shdfnd::aos::Vec3V			mConvexSpaceDir; // convexPose.rotateInv(-unit*distance)
+		PxVec3								mUnitDir;
+		PxVec3								mMeshSpaceUnitDir;
+		PxReal								mInflation;
+		const bool							mAnyHit;
+		const bool							mBothTriangleSidesCollide;	// PT: true if PxMeshGeometryFlag::eDOUBLE_SIDED || PxHitFlag::eMESH_BOTH_SIDES
+
+		SweepConvexMeshHitCallback(	const ConvexHullData& hull, const PxMeshScale& convexScale, const Cm::FastVertex2ShapeScaling& meshScale,
+									const PxTransform& convexPose, const PxTransform& meshPose,
+									const PxVec3& unitDir, const PxReal distance, PxHitFlags hitFlags, const bool bothTriangleSidesCollide, const PxReal inflation,
+									const bool anyHit, float distCoef);
+
+		virtual ~SweepConvexMeshHitCallback()	{}
+
+		virtual PxAgain processHit(const PxRaycastHit& hit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal& shrunkMaxT, const PxU32*);
+
+		bool	finalizeHit(PxSweepHit& sweepHit, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose,
+							const PxConvexMeshGeometry& convexGeom, const PxTransform& convexPose,
+							const PxVec3& unitDir, PxReal inflation,
+							bool isMtd, bool meshBothSides, bool isDoubleSided, bool bothTriangleSidesCollide);
+
+	private:
+		SweepConvexMeshHitCallback& operator=(const SweepConvexMeshHitCallback&);
+	};
+
+#if PX_VC 
+     #pragma warning(pop) 
+#endif
+
+}
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepsMesh.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepsMesh.cpp
new file mode 100644
index 00000000..6efb85db
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuSweepsMesh.cpp
@@ -0,0 +1,602 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuSweepTests.h"
+#include "GuSweepMesh.h"
+#include "GuInternal.h"
+#include "GuConvexUtilsInternal.h"
+#include "CmScaling.h"
+#include "GuVecShrunkBox.h"
+#include "GuSweepMTD.h"
+#include "GuVecCapsule.h"
+#include "GuSweepBoxTriangle_SAT.h"
+#include "GuSweepCapsuleTriangle.h"
+#include "GuSweepSphereTriangle.h"
+#include "GuDistancePointTriangle.h"
+#include "GuCapsule.h"
+
+using namespace physx;
+using namespace Gu;
+using namespace Cm;
+using namespace physx::shdfnd::aos;
+
+#include "GuSweepConvexTri.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+static bool sweepSphereTriangle(const PxTriangle& tri,
+								const PxVec3& center, PxReal radius,
+								const PxVec3& unitDir, const PxReal distance,
+								PxSweepHit& hit, PxVec3& triNormalOut,
+								PxHitFlags hitFlags, bool isDoubleSided)
+{
+	const bool meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES;
+	if(!(hitFlags & PxHitFlag::eASSUME_NO_INITIAL_OVERLAP))
+	{
+		const bool doBackfaceCulling = !isDoubleSided && !meshBothSides;
+
+		// PT: test if shapes initially overlap
+		// PT: add culling here for now, but could be made more efficiently...
+
+		// Create triangle normal
+		PxVec3 denormalizedNormal;
+		tri.denormalizedNormal(denormalizedNormal);
+
+		// Backface culling
+		if(doBackfaceCulling && (denormalizedNormal.dot(unitDir) > 0.0f))
+			return false;
+
+		float s_unused, t_unused;
+		const PxVec3 cp = closestPtPointTriangle(center, tri.verts[0], tri.verts[1], tri.verts[2], s_unused, t_unused);
+		const PxReal dist2 = (cp - center).magnitudeSquared();
+		if(dist2<=radius*radius)
+		{
+			triNormalOut = denormalizedNormal.getNormalized();
+			return setInitialOverlapResults(hit, unitDir, 0);
+		}
+	}
+	
+	return sweepSphereTriangles(1, &tri,
+								center, radius,
+								unitDir, distance,
+								NULL,
+								hit, triNormalOut,
+								isDoubleSided, meshBothSides, false, false);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+SweepShapeMeshHitCallback::SweepShapeMeshHitCallback(CallbackMode::Enum mode, const PxHitFlags& hitFlags, bool flipNormal, float distCoef) :
+	MeshHitCallback<PxRaycastHit>	(mode),
+	mHitFlags						(hitFlags),
+	mStatus							(false),
+	mInitialOverlap					(false),
+	mFlipNormal						(flipNormal),
+	mDistCoeff						(distCoef)
+{		
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+SweepCapsuleMeshHitCallback::SweepCapsuleMeshHitCallback(
+	PxSweepHit& sweepHit, const Matrix34& worldMatrix, PxReal distance, bool meshDoubleSided,
+	const Capsule& capsule, const PxVec3& unitDir, const PxHitFlags& hitFlags, bool flipNormal, float distCoef) :
+	SweepShapeMeshHitCallback	(CallbackMode::eMULTIPLE, hitFlags, flipNormal, distCoef),
+	mSweepHit					(sweepHit),
+	mVertexToWorldSkew			(worldMatrix),	
+	mTrueSweepDistance			(distance),
+	mBestAlignmentValue			(2.0f),
+	mBestDist					(distance + GU_EPSILON_SAME_DISTANCE),
+	mCapsule					(capsule),
+	mUnitDir					(unitDir),
+	mMeshDoubleSided			(meshDoubleSided),
+	mIsSphere					(capsule.p0 == capsule.p1)
+{
+	mSweepHit.distance = mTrueSweepDistance;
+}
+
+PxAgain SweepCapsuleMeshHitCallback::processHit( // all reported coords are in mesh local space including hit.position
+												const PxRaycastHit& aHit, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, PxReal& shrunkMaxT, const PxU32*)
+{
+	const PxTriangle tmpt(	mVertexToWorldSkew.transform(v0),
+							mVertexToWorldSkew.transform(mFlipNormal ? v2 : v1),
+							mVertexToWorldSkew.transform(mFlipNormal ? v1 : v2));
+
+	PxSweepHit localHit;	// PT: TODO: ctor!
+	PxVec3 triNormal;
+	// pick a farther hit within distEpsilon that is more opposing than the previous closest hit
+	// make it a relative epsilon to make sure it still works with large distances
+	const PxReal distEpsilon = GU_EPSILON_SAME_DISTANCE * PxMax(1.0f, mSweepHit.distance); 
+	const float minD = mSweepHit.distance + distEpsilon;
+	if(mIsSphere)
+	{
+		if(!::sweepSphereTriangle(	tmpt,
+									mCapsule.p0, mCapsule.radius,
+									mUnitDir, minD,
+									localHit, triNormal,
+									mHitFlags, mMeshDoubleSided))
+			return true;
+	}
+	else
+	{
+		// PT: this one is safe because cullbox is NULL (no need to allocate one more triangle)		
+		if(!sweepCapsuleTriangles_Precise(	1, &tmpt,
+											mCapsule,
+											mUnitDir, minD,
+											NULL,
+											localHit, triNormal,
+											mHitFlags, mMeshDoubleSided,
+											NULL))
+			return true;
+	}
+
+	const PxReal alignmentValue = computeAlignmentValue(triNormal, mUnitDir);
+	if(keepTriangle(localHit.distance, alignmentValue, mBestDist, mBestAlignmentValue, mTrueSweepDistance, distEpsilon))	
+	{
+		mBestAlignmentValue = alignmentValue;
+
+		// AP: need to shrink the sweep distance passed into sweepCapsuleTriangles for correctness so that next sweep is closer		
+		shrunkMaxT = localHit.distance * mDistCoeff; // shrunkMaxT is scaled
+
+		mBestDist = PxMin(mBestDist, localHit.distance); // exact lower bound
+		mSweepHit.flags		= localHit.flags;
+		mSweepHit.distance	= localHit.distance;
+		mSweepHit.normal	= localHit.normal;
+		mSweepHit.position	= localHit.position;
+		mSweepHit.faceIndex	= aHit.faceIndex;
+
+		mStatus = true;
+		//ML:this is the initial overlap condition
+		if(localHit.distance == 0.0f)
+		{
+			mInitialOverlap = true;
+			return false;
+		}
+		if(mHitFlags & PxHitFlag::eMESH_ANY)
+			return false; // abort traversal
+	}
+	return true;
+}
+
+bool SweepCapsuleMeshHitCallback::finalizeHit(	PxSweepHit& sweepHit, const Capsule& lss, const PxTriangleMeshGeometry& triMeshGeom,
+												const PxTransform& pose, bool isDoubleSided) const
+{
+	if(!mStatus)
+		return false;
+
+	if(mInitialOverlap)
+	{
+		// PT: TODO: consider using 'setInitialOverlapResults' here
+		bool hasContacts = false;
+		if(mHitFlags & PxHitFlag::eMTD)
+		{
+			const Vec3V p0 = V3LoadU(mCapsule.p0);
+			const Vec3V p1 = V3LoadU(mCapsule.p1);
+			const FloatV radius = FLoad(lss.radius);
+			CapsuleV capsuleV;
+			capsuleV.initialize(p0, p1, radius);
+
+			//we need to calculate the MTD
+			hasContacts = computeCapsule_TriangleMeshMTD(triMeshGeom, pose, capsuleV, mCapsule.radius, isDoubleSided, sweepHit);
+		}
+		setupSweepHitForMTD(sweepHit, hasContacts, mUnitDir);
+	}
+	else
+	{
+		sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::eNORMAL | PxHitFlag::ePOSITION | PxHitFlag::eFACE_INDEX;
+	}
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool sweepCapsule_MeshGeom(GU_CAPSULE_SWEEP_FUNC_PARAMS)
+{
+	PX_UNUSED(capsuleGeom_);
+	PX_UNUSED(capsulePose_);
+
+	PX_ASSERT(geom.getType() == PxGeometryType::eTRIANGLEMESH);
+	const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom);
+
+	TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+
+	return Midphase::sweepCapsuleVsMesh(meshData, meshGeom, pose, lss, unitDir, distance, sweepHit, hitFlags, inflation);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+	// same as 'mat.transform(p)' but using SIMD
+	static PX_FORCE_INLINE Vec4V transformV(const Vec4V p, const Matrix34Padded& mat)
+	{
+		Vec4V ResV = V4Scale(V4LoadU(&mat.m.column0.x), V4GetX(p));
+		ResV = V4ScaleAdd(V4LoadU(&mat.m.column1.x), V4GetY(p), ResV);
+		ResV = V4ScaleAdd(V4LoadU(&mat.m.column2.x), V4GetZ(p), ResV);
+		ResV = V4Add(ResV, V4LoadU(&mat.p.x));	// PT: this load is safe thanks to padding
+		return ResV;
+	}
+
+///////////////////////////////////////////////////////////////////////////////
+
+SweepBoxMeshHitCallback::SweepBoxMeshHitCallback(	CallbackMode::Enum mode_, const Matrix34Padded& meshToBox, PxReal distance, bool bothTriangleSidesCollide,
+													const Box& box, const PxVec3& localMotion, const PxVec3& localDir, const PxVec3& unitDir,
+													const PxHitFlags& hitFlags, const PxReal inflation, bool flipNormal, float distCoef) :
+	SweepShapeMeshHitCallback	(mode_, hitFlags, flipNormal,distCoef),
+	mMeshToBox					(meshToBox),
+	mDist						(distance),
+	mBox						(box),
+	mLocalDir					(localDir),
+	mWorldUnitDir				(unitDir),
+	mInflation					(inflation),
+	mBothTriangleSidesCollide	(bothTriangleSidesCollide)
+{
+	mLocalMotionV = V3LoadU(localMotion);
+	mDistV = FLoad(distance);
+	mDist0 = distance;
+	mOneOverDir = PxVec3(
+		mLocalDir.x!=0.0f ? 1.0f/mLocalDir.x : 0.0f,
+		mLocalDir.y!=0.0f ? 1.0f/mLocalDir.y : 0.0f,
+		mLocalDir.z!=0.0f ? 1.0f/mLocalDir.z : 0.0f);
+}
+
+PxAgain SweepBoxMeshHitCallback::processHit( // all reported coords are in mesh local space including hit.position
+											const PxRaycastHit& meshHit, const PxVec3& lp0, const PxVec3& lp1, const PxVec3& lp2, PxReal& shrinkMaxT, const PxU32*)
+{
+	if(mHitFlags & PxHitFlag::ePRECISE_SWEEP)
+	{
+		const PxTriangle currentTriangle(
+				mMeshToBox.transform(lp0),
+				mMeshToBox.transform(mFlipNormal ? lp2 : lp1),
+				mMeshToBox.transform(mFlipNormal ? lp1 : lp2));
+
+		PxF32 t = PX_MAX_REAL; // PT: could be better!
+		if(!triBoxSweepTestBoxSpace(currentTriangle, mBox.extents, mLocalDir, mOneOverDir, mDist, t, !mBothTriangleSidesCollide))
+			return true;
+
+		if(t <= mDist)
+		{
+			// PT: test if shapes initially overlap
+			mDist				= t;
+			shrinkMaxT			= t * mDistCoeff; // shrunkMaxT is scaled
+			mMinClosestA		= V3LoadU(currentTriangle.verts[0]); // PT: this is arbitrary
+			mMinNormal			= V3LoadU(-mWorldUnitDir);
+			mStatus				= true;
+			mMinTriangleIndex	= meshHit.faceIndex;
+			mHitTriangle		= currentTriangle;
+			if(t == 0.0f)
+			{
+				mInitialOverlap = true;
+				return false; // abort traversal
+			}
+		}
+	}
+	else
+	{
+		const FloatV zero = FZero();
+
+		// PT: SIMD code similar to:
+		//	const Vec3V triV0 = V3LoadU(mMeshToBox.transform(lp0));
+		//	const Vec3V triV1 = V3LoadU(mMeshToBox.transform(lp1));
+		//	const Vec3V triV2 = V3LoadU(mMeshToBox.transform(lp2));
+		//
+		// SIMD version works but we need to ensure all loads are safe.
+		// For incoming vertices they should either come from the vertex array or from a binary deserialized file.
+		// For the vertex array we can just allocate one more vertex. For the binary file it should be ok as soon
+		// as vertices aren't the last thing serialized in the file.
+		// For the matrix only the last column is a problem, and we can easily solve that with some padding in the local class.
+		const Vec3V triV0 = Vec3V_From_Vec4V(transformV(V4LoadU(&lp0.x), mMeshToBox));
+		const Vec3V triV1 = Vec3V_From_Vec4V(transformV(V4LoadU(mFlipNormal ? &lp2.x : &lp1.x), mMeshToBox));
+		const Vec3V triV2 = Vec3V_From_Vec4V(transformV(V4LoadU(mFlipNormal ? &lp1.x : &lp2.x), mMeshToBox));
+
+		if(!mBothTriangleSidesCollide)
+		{
+			const Vec3V triNormal = V3Cross(V3Sub(triV2, triV1),V3Sub(triV0, triV1)); 
+			if(FAllGrtrOrEq(V3Dot(triNormal, mLocalMotionV), zero))
+				return true;
+		}
+
+		const Vec3V zeroV = V3Zero();
+		const Vec3V boxExtents = V3LoadU(mBox.extents);
+		const BoxV boxV(zeroV, boxExtents);
+
+		const TriangleV triangleV(triV0, triV1, triV2);
+
+		FloatV lambda;   
+		Vec3V closestA, normal;//closestA and normal is in the local space of convex hull
+		LocalConvex<TriangleV> convexA(triangleV);
+		LocalConvex<BoxV> convexB(boxV);
+		const Vec3V initialSearchDir = V3Sub(triangleV.getCenter(), boxV.getCenter());
+		if(!gjkRaycastPenetration< LocalConvex<TriangleV>, LocalConvex<BoxV> >(convexA, convexB, initialSearchDir, zero, zeroV, mLocalMotionV, lambda, normal, closestA, mInflation, false))
+			return true;
+
+		mStatus = true;
+		mMinClosestA = closestA;
+		mMinTriangleIndex = meshHit.faceIndex;
+		if(FAllGrtrOrEq(zero, lambda)) // lambda < 0? => initial overlap
+		{
+			mInitialOverlap = true;
+			shrinkMaxT = 0.0f;
+			mDistV = zero;
+			mDist = 0.0f;
+			mMinNormal = V3LoadU(-mWorldUnitDir);
+			return false;
+		}
+
+		PxF32 f;
+		FStore(lambda, &f);
+		mDist = f*mDist; // shrink dist
+		mLocalMotionV = V3Scale(mLocalMotionV, lambda); // shrink localMotion
+		mDistV = FMul(mDistV, lambda); // shrink distV
+		mMinNormal = normal;
+		if(mDist * mDistCoeff < shrinkMaxT) // shrink shrinkMaxT
+			shrinkMaxT = mDist * mDistCoeff; // shrunkMaxT is scaled
+
+		//mHitTriangle = currentTriangle;
+		V3StoreU(triV0, mHitTriangle.verts[0]);
+		V3StoreU(triV1, mHitTriangle.verts[1]);
+		V3StoreU(triV2, mHitTriangle.verts[2]);
+	}
+	return true;
+}
+
+bool SweepBoxMeshHitCallback::finalizeHit(	PxSweepHit& sweepHit, const PxTriangleMeshGeometry& triMeshGeom, const PxTransform& pose,
+											const PxTransform& boxTransform, const PxVec3& localDir,
+											bool meshBothSides, bool isDoubleSided) const
+{
+	if(!mStatus)
+		return false;
+
+	Vec3V minClosestA = mMinClosestA;
+	Vec3V minNormal = mMinNormal;
+	sweepHit.faceIndex = mMinTriangleIndex;
+
+	if(mInitialOverlap)
+	{
+		bool hasContacts = false;
+		if(mHitFlags & PxHitFlag::eMTD)
+			hasContacts = computeBox_TriangleMeshMTD(triMeshGeom, pose, mBox, boxTransform, mInflation, mBothTriangleSidesCollide, sweepHit);
+
+		setupSweepHitForMTD(sweepHit, hasContacts, mWorldUnitDir);
+	}
+	else
+	{
+		sweepHit.distance = mDist;
+		sweepHit.flags = PxHitFlag::eDISTANCE | PxHitFlag::eFACE_INDEX;
+
+		// PT: we need the "best triangle" normal in order to call 'shouldFlipNormal'. We stored the best
+		// triangle in both GJK & precise codepaths (in box space). We use a dedicated 'shouldFlipNormal'
+		// function that delays computing the triangle normal.
+		// TODO: would still be more efficient to store the best normal directly, it's already computed at least
+		// in the GJK codepath.
+
+		const Vec3V p0 = V3LoadU(&boxTransform.p.x);
+		const QuatV q0 = QuatVLoadU(&boxTransform.q.x);
+		const PsTransformV boxPos(p0, q0);
+
+		if(mHitFlags & PxHitFlag::ePRECISE_SWEEP)
+		{
+			computeBoxLocalImpact(sweepHit.position, sweepHit.normal, sweepHit.flags, mBox, localDir, mHitTriangle, mHitFlags, isDoubleSided, meshBothSides, mDist);
+		}
+		else
+		{
+			sweepHit.flags |= PxHitFlag::eNORMAL|PxHitFlag::ePOSITION;
+
+			// PT: now for the GJK path, we must first always negate the returned normal. Similar to what happens in the precise path,
+			// we can't delay this anymore: our normal must be properly oriented in order to call 'shouldFlipNormal'.
+			minNormal = V3Neg(minNormal);
+
+			// PT: this one is to ensure the normal respects the mesh-both-sides/double-sided convention
+			PxVec3 tmp;
+			V3StoreU(minNormal, tmp);
+
+			if(shouldFlipNormal(tmp, meshBothSides, isDoubleSided, mHitTriangle, localDir, NULL))
+				minNormal = V3Neg(minNormal);
+
+			// PT: finally, this moves everything back to world space
+			V3StoreU(boxPos.rotate(minNormal), sweepHit.normal);
+			V3StoreU(boxPos.transform(minClosestA), sweepHit.position);
+		}
+	}
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool sweepBox_MeshGeom(GU_BOX_SWEEP_FUNC_PARAMS)
+{
+	PX_ASSERT(geom.getType() == PxGeometryType::eTRIANGLEMESH);
+	PX_UNUSED(boxPose_);
+	PX_UNUSED(boxGeom_);
+
+	const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom);
+
+	TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+
+	return Midphase::sweepBoxVsMesh(meshData, meshGeom, pose, box, unitDir, distance, sweepHit, hitFlags, inflation);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+SweepConvexMeshHitCallback::SweepConvexMeshHitCallback(	const ConvexHullData& hull, const PxMeshScale& convexScale, const FastVertex2ShapeScaling& meshScale,
+														const PxTransform& convexPose, const PxTransform& meshPose,
+														const PxVec3& unitDir, const PxReal distance, PxHitFlags hitFlags, const bool bothTriangleSidesCollide, const PxReal inflation,
+														const bool anyHit, float distCoef) :
+	SweepShapeMeshHitCallback	(CallbackMode::eMULTIPLE, hitFlags, meshScale.flipsNormal(), distCoef),
+	mMeshScale					(meshScale),
+	mUnitDir					(unitDir),
+	mInflation					(inflation),
+	mAnyHit						(anyHit),
+	mBothTriangleSidesCollide	(bothTriangleSidesCollide)
+{
+	mSweepHit.distance = distance; // this will be shrinking progressively as we sweep and clip the sweep length
+	mSweepHit.faceIndex = 0xFFFFFFFF;
+
+	mMeshSpaceUnitDir = meshPose.rotateInv(unitDir);
+	
+	const Vec3V worldDir = V3LoadU(unitDir);
+	const FloatV dist = FLoad(distance);
+	const QuatV q0 = QuatVLoadU(&meshPose.q.x);
+	const Vec3V p0 = V3LoadU(&meshPose.p.x);
+
+	const QuatV q1 = QuatVLoadU(&convexPose.q.x);
+	const Vec3V p1 = V3LoadU(&convexPose.p.x);
+
+	const PsTransformV meshPoseV(p0, q0);
+	const PsTransformV convexPoseV(p1, q1);
+
+	mMeshToConvex = convexPoseV.transformInv(meshPoseV);
+	mConvexPoseV = convexPoseV;
+	mConvexSpaceDir = convexPoseV.rotateInv(V3Neg(V3Scale(worldDir, dist)));
+	mInitialDistance = dist;
+
+	const Vec3V vScale = V3LoadU_SafeReadW(convexScale.scale);	// PT: safe because 'rotation' follows 'scale' in PxMeshScale
+	const QuatV vQuat = QuatVLoadU(&convexScale.rotation.x);
+	mConvexHull.initialize(&hull, V3Zero(), vScale, vQuat, convexScale.isIdentity());
+}
+
+PxAgain SweepConvexMeshHitCallback::processHit( // all reported coords are in mesh local space including hit.position
+												const PxRaycastHit& hit, const PxVec3& av0, const PxVec3& av1, const PxVec3& av2, PxReal& shrunkMaxT, const PxU32*)
+{
+	const PxVec3 v0 = mMeshScale * av0;
+	const PxVec3 v1 = mMeshScale * (mFlipNormal ?  av2 : av1);
+	const PxVec3 v2 = mMeshScale * (mFlipNormal ?  av1 : av2);
+
+	// mSweepHit will be updated if sweep distance is < input mSweepHit.distance
+	const PxReal oldDist = mSweepHit.distance;
+	if(sweepConvexVsTriangle(
+		v0, v1, v2, mConvexHull, mMeshToConvex, mConvexPoseV, mConvexSpaceDir,
+		mUnitDir, mMeshSpaceUnitDir, mInitialDistance, oldDist, mSweepHit, mBothTriangleSidesCollide,
+		mInflation, mInitialOverlap, hit.faceIndex))
+	{
+		mStatus = true;
+		shrunkMaxT = mSweepHit.distance * mDistCoeff; // shrunkMaxT is scaled
+
+		// PT: added for 'shouldFlipNormal'
+		mHitTriangle.verts[0] = v0;
+		mHitTriangle.verts[1] = v1;
+		mHitTriangle.verts[2] = v2;
+
+		if(mAnyHit)
+			return false; // abort traversal
+			
+		if(mSweepHit.distance == 0.0f)
+			return false;
+	}
+	return true; // continue traversal
+}
+
+bool SweepConvexMeshHitCallback::finalizeHit(	PxSweepHit& sweepHit, const PxTriangleMeshGeometry& meshGeom, const PxTransform& pose,
+												const PxConvexMeshGeometry& convexGeom, const PxTransform& convexPose,
+												const PxVec3& unitDir, PxReal inflation,
+												bool isMtd, bool meshBothSides, bool isDoubleSided, bool bothTriangleSidesCollide)
+{
+	if(!mStatus)
+		return false;
+
+	if(mInitialOverlap)
+	{
+		bool hasContacts = false;
+		if(isMtd)
+			hasContacts = computeConvex_TriangleMeshMTD(meshGeom,  pose, convexGeom, convexPose, inflation, bothTriangleSidesCollide, sweepHit);
+
+		setupSweepHitForMTD(sweepHit, hasContacts, unitDir);
+
+		sweepHit.faceIndex = mSweepHit.faceIndex;
+	}
+	else
+	{
+		sweepHit = mSweepHit;
+		//sweepHit.position += unitDir * sweepHit.distance;
+		sweepHit.normal = -sweepHit.normal;
+		sweepHit.normal.normalize();
+
+		// PT: this one is to ensure the normal respects the mesh-both-sides/double-sided convention
+		// PT: beware, the best triangle is in mesh-space, but the impact data is in world-space already
+		if(shouldFlipNormal(sweepHit.normal, meshBothSides, isDoubleSided, mHitTriangle, unitDir, &pose))
+			sweepHit.normal = -sweepHit.normal;
+	}
+	return true;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+bool sweepConvex_MeshGeom(GU_CONVEX_SWEEP_FUNC_PARAMS)
+{
+	PX_ASSERT(geom.getType() == PxGeometryType::eTRIANGLEMESH);
+	const PxTriangleMeshGeometry& meshGeom = static_cast<const PxTriangleMeshGeometry&>(geom);
+
+	ConvexMesh* convexMesh = static_cast<ConvexMesh*>(convexGeom.convexMesh);
+	TriangleMesh* meshData = static_cast<TriangleMesh*>(meshGeom.triangleMesh);
+
+	const bool idtScaleConvex = convexGeom.scale.isIdentity();
+	const bool idtScaleMesh = meshGeom.scale.isIdentity();
+
+	FastVertex2ShapeScaling convexScaling;
+	if(!idtScaleConvex)
+		convexScaling.init(convexGeom.scale);
+
+	FastVertex2ShapeScaling meshScaling;
+	if(!idtScaleMesh)
+		meshScaling.init(meshGeom.scale);
+
+	PX_ASSERT(!convexMesh->getLocalBoundsFast().isEmpty());
+	const PxBounds3 hullAABB = convexMesh->getLocalBoundsFast().transformFast(convexScaling.getVertex2ShapeSkew());
+
+	Box hullOBB;
+	computeHullOBB(hullOBB, hullAABB, 0.0f, Matrix34(convexPose), Matrix34(pose), meshScaling, idtScaleMesh);
+
+	hullOBB.extents.x += inflation;
+	hullOBB.extents.y += inflation;
+	hullOBB.extents.z += inflation;
+
+	const PxVec3 localDir = pose.rotateInv(unitDir);
+
+	// inverse transform the sweep direction and distance to mesh space	
+	PxVec3 meshSpaceSweepVector = meshScaling.getShape2VertexSkew().transform(localDir*distance);
+	const PxReal meshSpaceSweepDist = meshSpaceSweepVector.normalize();
+
+	PxReal distCoeff = 1.0f;
+	if (!idtScaleMesh)
+		distCoeff = meshSpaceSweepDist / distance;
+
+	const bool meshBothSides = hitFlags & PxHitFlag::eMESH_BOTH_SIDES;
+	const bool isDoubleSided = meshGeom.meshFlags & PxMeshGeometryFlag::eDOUBLE_SIDED;
+	const bool bothTriangleSidesCollide = isDoubleSided || meshBothSides;
+	const bool anyHit = hitFlags & PxHitFlag::eMESH_ANY;
+	SweepConvexMeshHitCallback callback(
+		convexMesh->getHull(), convexGeom.scale, meshScaling, convexPose, pose, -unitDir, distance, hitFlags,
+		bothTriangleSidesCollide, inflation, anyHit, distCoeff);
+	
+	Midphase::sweepConvexVsMesh(meshData, hullOBB, meshSpaceSweepVector, meshSpaceSweepDist, callback, anyHit);
+
+	const bool isMtd = hitFlags & PxHitFlag::eMTD;
+	return callback.finalizeHit(sweepHit, meshGeom, pose, convexGeom, convexPose, unitDir, inflation, isMtd, meshBothSides, isDoubleSided, bothTriangleSidesCollide);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangle32.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangle32.h
new file mode 100644
index 00000000..7607e730
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangle32.h
@@ -0,0 +1,132 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_TRIANGLE32_H
+#define GU_TRIANGLE32_H
+
+#include "foundation/PxVec3.h"
+#include "CmPhysXCommon.h"
+#include "PsUtilities.h"
+
+namespace physx
+{
+namespace Gu
+{
+	/**
+	\brief Structure used to store indices for a triangles points. T is either PxU32 or PxU16
+
+	*/
+
+	template <class T>
+	struct TriangleT// : public Ps::UserAllocated
+		{
+		PX_INLINE	TriangleT()							{}
+		PX_INLINE	TriangleT(T a, T b, T c)	{ v[0] = a; v[1] = b; v[2] = c; }
+		template <class TX>
+		PX_INLINE	TriangleT(const TriangleT<TX>& other) { v[0] = other[0]; v[1] = other[1]; v[2] = other[2]; }
+		PX_INLINE	T& operator[](T i)				{ return v[i]; }
+		template<class TX>//any type of TriangleT<>, possibly with different T
+		PX_INLINE	TriangleT<T>& operator=(const TriangleT<TX>& i)	{ v[0]=i[0]; v[1]=i[1]; v[2]=i[2]; return *this; }
+		PX_INLINE	const T& operator[](T i) const	{ return v[i]; }
+
+		void	flip()
+		{
+			Ps::swap(v[1], v[2]);
+		}
+
+		PX_INLINE void center(const PxVec3* verts, PxVec3& center)	const
+		{
+			const PxVec3& p0 = verts[v[0]];
+			const PxVec3& p1 = verts[v[1]];
+			const PxVec3& p2 = verts[v[2]];
+			center = (p0+p1+p2)*0.33333333333333333333f;
+		}
+
+		float area(const PxVec3* verts)	const
+		{
+			const PxVec3& p0 = verts[v[0]];
+			const PxVec3& p1 = verts[v[1]];
+			const PxVec3& p2 = verts[v[2]];
+			return ((p0-p1).cross(p0-p2)).magnitude() * 0.5f;
+		}
+
+		PxU8	findEdge(T vref0, T vref1)	const
+		{
+					if(v[0]==vref0 && v[1]==vref1)	return 0;
+			else	if(v[0]==vref1 && v[1]==vref0)	return 0;
+			else	if(v[0]==vref0 && v[2]==vref1)	return 1;
+			else	if(v[0]==vref1 && v[2]==vref0)	return 1;
+			else	if(v[1]==vref0 && v[2]==vref1)	return 2;
+			else	if(v[1]==vref1 && v[2]==vref0)	return 2;
+			return 0xff;
+		}
+
+		// counter clock wise order
+		PxU8	findEdgeCCW(T vref0, T vref1)	const
+		{
+			if(v[0]==vref0 && v[1]==vref1)	return 0;
+			else	if(v[0]==vref1 && v[1]==vref0)	return 0;
+			else	if(v[0]==vref0 && v[2]==vref1)	return 2;
+			else	if(v[0]==vref1 && v[2]==vref0)	return 2;
+			else	if(v[1]==vref0 && v[2]==vref1)	return 1;
+			else	if(v[1]==vref1 && v[2]==vref0)	return 1;
+			return 0xff;
+		}
+
+		bool	replaceVertex(T oldref, T newref)
+		{
+					if(v[0]==oldref)	{ v[0] = newref; return true; }
+			else	if(v[1]==oldref)	{ v[1] = newref; return true; }
+			else	if(v[2]==oldref)	{ v[2] = newref; return true; }
+			return false;
+		}
+
+		bool isDegenerate()	const
+		{
+			if(v[0]==v[1])	return true;
+			if(v[1]==v[2])	return true;
+			if(v[2]==v[0])	return true;
+			return false;
+		}
+
+		PX_INLINE void denormalizedNormal(const PxVec3* verts, PxVec3& normal)	const
+		{
+			const PxVec3& p0 = verts[v[0]];
+			const PxVec3& p1 = verts[v[1]];
+			const PxVec3& p2 = verts[v[2]];
+			normal = ((p2 - p1).cross(p0 - p1));
+		}
+
+		T v[3];	//vertex indices
+	};
+}
+
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleCache.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleCache.h
new file mode 100644
index 00000000..9dc2a453
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleCache.h
@@ -0,0 +1,207 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_TRIANGLE_CACHE_H
+#define GU_TRIANGLE_CACHE_H
+#include "PsHash.h"
+#include "PsUtilities.h"
+
+namespace physx
+{
+	namespace Gu
+	{
+		struct CachedEdge
+		{
+		protected:
+			PxU32 mId0, mId1;
+		public:
+			CachedEdge(PxU32 i0, PxU32 i1)
+			{
+				mId0 = PxMin(i0, i1);
+				mId1 = PxMax(i0, i1);
+			}
+
+			CachedEdge()
+			{
+			}
+
+			PxU32 getId0() const { return mId0; }
+			PxU32 getId1() const { return mId1; }
+
+			bool operator == (const CachedEdge& other) const
+			{
+				return mId0 == other.mId0 && mId1 == other.mId1;
+			}
+
+			PxU32 getHashCode() const
+			{
+				return Ps::hash(mId0 << 16 | mId1);
+			}
+		};
+
+		struct CachedVertex
+		{
+		private:
+			PxU32 mId;
+		public:
+			CachedVertex(PxU32 id)
+			{
+				mId = id;
+			}
+
+			CachedVertex()
+			{
+			}
+
+			PxU32 getId() const { return mId; }
+
+			PxU32 getHashCode() const
+			{
+				return mId;
+			}
+
+			bool operator == (const CachedVertex& other) const 
+			{
+				return mId == other.mId;
+			}
+		};
+
+		template <typename Elem, PxU32 MaxCount>
+		struct CacheMap
+		{
+			PX_COMPILE_TIME_ASSERT(MaxCount < 0xFF);
+			Elem mCache[MaxCount];
+			PxU8 mNextInd[MaxCount];
+			PxU8 mIndex[MaxCount];
+			PxU32 mSize;
+
+			CacheMap() : mSize(0)
+			{
+				for(PxU32 a = 0; a < MaxCount; ++a)
+				{
+					mIndex[a] = 0xFF;
+				}
+			}
+
+			bool addData(const Elem& data)
+			{
+				if(mSize == MaxCount)
+					return false;
+
+				const PxU8 hash = PxU8(data.getHashCode() % MaxCount);
+
+				PxU8 index = hash;
+				PxU8 nextInd = mIndex[hash];
+				while(nextInd != 0xFF)
+				{
+					index = nextInd;
+					if(mCache[index] == data)
+						return false;
+					nextInd = mNextInd[nextInd];
+				}
+
+				if(mIndex[hash] == 0xFF)
+				{
+					mIndex[hash] = Ps::to8(mSize);
+				}
+				else
+				{
+					mNextInd[index] = Ps::to8(mSize);
+				}
+				mNextInd[mSize] = 0xFF;
+				mCache[mSize++] = data;
+				return true;
+			}
+
+			bool contains(const Elem& data) const
+			{
+				PxU32 hash = (data.getHashCode() % MaxCount);
+				PxU8 index = mIndex[hash];
+
+				while(index != 0xFF)
+				{
+					if(mCache[index] == data)
+						return true;
+					index = mNextInd[index];
+				}
+				return false;
+			}
+
+			const Elem* get(const Elem& data) const
+			{
+				PxU32 hash = (data.getHashCode() % MaxCount);
+				PxU8 index = mIndex[hash];
+
+				while(index != 0xFF)
+				{
+					if(mCache[index] == data)
+						return &mCache[index];
+					index = mNextInd[index];
+				}
+				return NULL;
+			}
+		};
+
+		template <PxU32 MaxTriangles>
+		struct TriangleCache
+		{
+			PxVec3 mVertices[3*MaxTriangles];
+			PxU32 mIndices[3*MaxTriangles];
+			PxU32 mTriangleIndex[MaxTriangles];
+			PxU8 mEdgeFlags[MaxTriangles];
+			PxU32 mNumTriangles;
+
+			TriangleCache() : mNumTriangles(0)
+			{
+			}
+
+			PX_FORCE_INLINE bool isEmpty() const { return mNumTriangles == 0; }
+			PX_FORCE_INLINE bool isFull() const { return mNumTriangles == MaxTriangles; }
+			PX_FORCE_INLINE void reset() { mNumTriangles = 0; }
+
+			void addTriangle(const PxVec3* verts, const PxU32* indices, PxU32 triangleIndex, PxU8 edgeFlag)
+			{
+				PX_ASSERT(mNumTriangles < MaxTriangles);
+				PxU32 triInd = mNumTriangles++;
+				PxU32 triIndMul3 = triInd*3;
+				mVertices[triIndMul3] = verts[0];
+				mVertices[triIndMul3+1] = verts[1];
+				mVertices[triIndMul3+2] = verts[2];
+				mIndices[triIndMul3] = indices[0];
+				mIndices[triIndMul3+1] = indices[1];
+				mIndices[triIndMul3+2] = indices[2];
+				mTriangleIndex[triInd] = triangleIndex;
+				mEdgeFlags[triInd] = edgeFlag;
+			}
+		};
+	}
+}
+
+#endif
+
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.cpp
new file mode 100644
index 00000000..be47d3e1
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.cpp
@@ -0,0 +1,457 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+
+#include "PsIntrinsics.h"
+#include "GuMidphaseInterface.h"
+#include "GuSerialize.h"
+#include "GuMeshFactory.h"
+#include "CmRenderOutput.h"
+#include "PxVisualizationParameter.h"
+#include "GuConvexEdgeFlags.h"
+#include "GuBox.h"
+#include "PxMeshScale.h"
+#include "CmUtils.h"
+
+using namespace physx;
+
+namespace physx
+{
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+static PxConcreteType::Enum gTable[] = {	PxConcreteType::eTRIANGLE_MESH_BVH33,
+											PxConcreteType::eTRIANGLE_MESH_BVH34
+										};
+
+Gu::TriangleMesh::TriangleMesh(GuMeshFactory& factory, TriangleMeshData& d)
+:	PxTriangleMesh(PxType(gTable[d.mType]), PxBaseFlag::eOWNS_MEMORY | PxBaseFlag::eIS_RELEASABLE)
+,	mNbVertices				(d.mNbVertices)
+,	mNbTriangles			(d.mNbTriangles)
+,	mVertices				(d.mVertices)
+,	mTriangles				(d.mTriangles)
+,	mAABB					(d.mAABB)
+,	mExtraTrigData			(d.mExtraTrigData)
+,	mGeomEpsilon			(d.mGeomEpsilon)
+,	mFlags					(d.mFlags)
+,	mMaterialIndices		(d.mMaterialIndices)
+,	mFaceRemap				(d.mFaceRemap)
+,	mAdjacencies			(d.mAdjacencies)
+
+,	mMeshFactory			(&factory)
+
+,	mGRB_triIndices					(d.mGRB_triIndices)
+
+,	mGRB_triAdjacencies				(d.mGRB_triAdjacencies)
+,	mGRB_vertValency				(d.mGRB_vertValency)
+,	mGRB_adjVertStart				(d.mGRB_adjVertStart)
+,	mGRB_adjVertices				(d.mGRB_adjVertices)
+
+,	mGRB_meshAdjVerticiesTotal		(d.mGRB_meshAdjVerticiesTotal)
+,	mGRB_faceRemap					(d.mGRB_faceRemap)
+,	mGRB_BV32Tree					(d.mGRB_BV32Tree)
+{
+	// this constructor takes ownership of memory from the data object
+	d.mVertices = 0;
+	d.mTriangles = 0;
+	d.mExtraTrigData = 0;
+	d.mFaceRemap = 0;
+	d.mAdjacencies = 0;
+	d.mMaterialIndices = 0;
+
+	d.mGRB_triIndices = 0;
+
+	d.mGRB_triAdjacencies = 0;
+	d.mGRB_vertValency = 0;
+	d.mGRB_adjVertStart = 0;
+	d.mGRB_adjVertices = 0;
+	d.mGRB_faceRemap = 0;
+	d.mGRB_BV32Tree = 0;
+
+	// PT: 'getPaddedBounds()' is only safe if we make sure the bounds member is followed by at least 32bits of data
+	PX_COMPILE_TIME_ASSERT(PX_OFFSET_OF(Gu::TriangleMesh, mExtraTrigData)>=PX_OFFSET_OF(Gu::TriangleMesh, mAABB)+4);
+	
+}
+
+Gu::TriangleMesh::~TriangleMesh() 
+{ 	
+	if(getBaseFlags() & PxBaseFlag::eOWNS_MEMORY)
+	{
+		PX_FREE_AND_RESET(mExtraTrigData);
+		PX_FREE_AND_RESET(mFaceRemap);
+		PX_FREE_AND_RESET(mAdjacencies);
+		PX_FREE_AND_RESET(mMaterialIndices);
+		PX_FREE_AND_RESET(mTriangles);
+		PX_FREE_AND_RESET(mVertices);
+
+		PX_FREE_AND_RESET(mGRB_triIndices); 
+
+		PX_FREE_AND_RESET(mGRB_triAdjacencies);
+		PX_FREE_AND_RESET(mGRB_vertValency);
+		PX_FREE_AND_RESET(mGRB_adjVertStart);
+		PX_FREE_AND_RESET(mGRB_adjVertices);
+		PX_FREE_AND_RESET(mGRB_faceRemap);
+
+		BV32Tree* bv32Tree = reinterpret_cast<BV32Tree*>(mGRB_BV32Tree);
+		PX_DELETE_AND_RESET(bv32Tree);
+
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// PT: used to be automatic but making it manual saves bytes in the internal mesh
+
+void Gu::TriangleMesh::exportExtraData(PxSerializationContext& stream)
+{
+	//PX_DEFINE_DYNAMIC_ARRAY(TriangleMesh, mVertices, PxField::eVEC3, mNbVertices, Ps::PxFieldFlag::eSERIALIZE),
+	if(mVertices)
+	{
+		stream.alignData(PX_SERIAL_ALIGN);
+		stream.writeData(mVertices, mNbVertices * sizeof(PxVec3));
+	}
+
+	if(mTriangles)
+	{
+		const PxU32 triangleSize = mFlags & PxTriangleMeshFlag::e16_BIT_INDICES ? sizeof(PxU16) : sizeof(PxU32);
+		stream.alignData(PX_SERIAL_ALIGN);
+		stream.writeData(mTriangles, mNbTriangles * 3 * triangleSize);
+	}
+
+	//PX_DEFINE_DYNAMIC_ARRAY(TriangleMesh, mExtraTrigData, PxField::eBYTE, mNbTriangles, Ps::PxFieldFlag::eSERIALIZE),
+	if(mExtraTrigData)
+	{
+		// PT: it might not be needed to 16-byte align this array of PxU8....
+		stream.alignData(PX_SERIAL_ALIGN);
+		stream.writeData(mExtraTrigData, mNbTriangles * sizeof(PxU8));
+	}
+
+	if(mMaterialIndices)
+	{
+		stream.alignData(PX_SERIAL_ALIGN);
+		stream.writeData(mMaterialIndices, mNbTriangles * sizeof(PxU16));
+	}
+
+	if(mFaceRemap)
+	{
+		stream.alignData(PX_SERIAL_ALIGN);
+		stream.writeData(mFaceRemap, mNbTriangles * sizeof(PxU32));
+	}
+
+	if(mAdjacencies)
+	{
+		stream.alignData(PX_SERIAL_ALIGN);
+		stream.writeData(mAdjacencies, mNbTriangles * sizeof(PxU32) * 3);
+	}
+}
+
+void Gu::TriangleMesh::importExtraData(PxDeserializationContext& context)
+{
+	// PT: vertices are followed by indices, so it will be safe to V4Load vertices from a deserialized binary file
+	if(mVertices)
+		mVertices = context.readExtraData<PxVec3, PX_SERIAL_ALIGN>(mNbVertices);
+
+	if(mTriangles)
+	{
+		if(mFlags & PxTriangleMeshFlag::e16_BIT_INDICES)
+			mTriangles = context.readExtraData<PxU16, PX_SERIAL_ALIGN>(3*mNbTriangles);
+		else
+			mTriangles = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(3*mNbTriangles);
+	}
+
+	if(mExtraTrigData)
+		mExtraTrigData = context.readExtraData<PxU8, PX_SERIAL_ALIGN>(mNbTriangles);
+
+	if(mMaterialIndices)
+		mMaterialIndices = context.readExtraData<PxU16, PX_SERIAL_ALIGN>(mNbTriangles);
+
+	if(mFaceRemap)
+		mFaceRemap = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(mNbTriangles);
+
+	if(mAdjacencies)
+		mAdjacencies = context.readExtraData<PxU32, PX_SERIAL_ALIGN>(3*mNbTriangles);
+}
+
+void Gu::TriangleMesh::onRefCountZero()
+{
+	if(mMeshFactory->removeTriangleMesh(*this))
+	{
+		const PxType type = getConcreteType();
+		GuMeshFactory* mf = mMeshFactory;
+		Cm::deletePxBase(this);
+		mf->notifyFactoryListener(this, type);
+		return;
+	}
+
+	// PT: if we reach this point, we didn't find the mesh in the Physics object => don't delete!
+	// This prevents deleting the object twice.
+	Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "Gu::TriangleMesh::release: double deletion detected!");
+}
+//~PX_SERIALIZATION
+
+void Gu::TriangleMesh::release()
+{
+	decRefCount();
+}
+
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+PxVec3 * Gu::TriangleMesh::getVerticesForModification()
+{
+	Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "PxTriangleMesh::getVerticesForModification() is only supported for meshes with PxMeshMidPhase::eBVHDynamic.");
+
+	return NULL;
+}
+
+PxBounds3 Gu::TriangleMesh::refitBVH()
+{
+	Ps::getFoundation().error(PxErrorCode::eINVALID_OPERATION, __FILE__, __LINE__, "PxTriangleMesh::refitBVH() is only supported for meshes with PxMeshMidPhase::eBVHDynamic.");
+
+	return PxBounds3(mAABB.getMin(), mAABB.getMax());
+}
+#endif
+
+#if PX_ENABLE_DEBUG_VISUALIZATION
+
+static void getTriangle(const Gu::TriangleMesh&, PxU32 i, PxVec3* wp, const PxVec3* vertices, const void* indices, bool has16BitIndices)
+{
+	PxU32 ref0, ref1, ref2;
+
+	if(!has16BitIndices)
+	{
+		const PxU32* dtriangles = reinterpret_cast<const PxU32*>(indices);
+		ref0 = dtriangles[i*3+0];
+		ref1 = dtriangles[i*3+1];
+		ref2 = dtriangles[i*3+2];
+	}
+	else
+	{
+		const PxU16* wtriangles = reinterpret_cast<const PxU16*>(indices);
+		ref0 = wtriangles[i*3+0];
+		ref1 = wtriangles[i*3+1];
+		ref2 = wtriangles[i*3+2];
+	}
+
+	wp[0] = vertices[ref0];
+	wp[1] = vertices[ref1];
+	wp[2] = vertices[ref2];
+}
+
+static void getTriangle(const Gu::TriangleMesh& mesh, PxU32 i, PxVec3* wp, const PxVec3* vertices, const void* indices, const Cm::Matrix34& absPose, bool has16BitIndices)
+{
+	PxVec3 localVerts[3];
+	getTriangle(mesh, i, localVerts, vertices, indices, has16BitIndices);
+
+	wp[0] = absPose.transform(localVerts[0]);
+	wp[1] = absPose.transform(localVerts[1]);
+	wp[2] = absPose.transform(localVerts[2]);
+}
+
+static void visualizeActiveEdges(Cm::RenderOutput& out, const Gu::TriangleMesh& mesh, PxU32 nbTriangles, const PxU32* results, const Cm::Matrix34& absPose, const PxMat44& midt)
+{
+	const PxU8* extraTrigData = mesh.getExtraTrigData();
+	PX_ASSERT(extraTrigData);
+
+	const PxVec3* vertices = mesh.getVerticesFast();
+	const void* indices = mesh.getTrianglesFast();
+
+	const PxU32 ecolor = PxU32(PxDebugColor::eARGB_YELLOW);
+	const bool has16Bit = mesh.has16BitIndices();
+	for(PxU32 i=0; i<nbTriangles; i++)
+	{
+		const PxU32 index = results ? results[i] : i;
+
+		PxVec3 wp[3];
+		getTriangle(mesh, index, wp, vertices, indices, absPose, has16Bit);
+
+		const PxU32 flags = extraTrigData[index];
+
+		if(flags & Gu::ETD_CONVEX_EDGE_01)
+		{
+			out << midt << ecolor << Cm::RenderOutput::LINES << wp[0] << wp[1];
+		}
+		if(flags & Gu::ETD_CONVEX_EDGE_12)
+		{
+			out << midt << ecolor << Cm::RenderOutput::LINES << wp[1] << wp[2];
+		}
+		if(flags & Gu::ETD_CONVEX_EDGE_20)
+		{
+			out << midt << ecolor << Cm::RenderOutput::LINES << wp[0] << wp[2];
+		}
+	}
+}
+
+void Gu::TriangleMesh::debugVisualize(
+	Cm::RenderOutput& out, const PxTransform& pose, const PxMeshScale& scaling, const PxBounds3& cullbox,
+	const PxU64 mask, const PxReal fscale, const PxU32 numMaterials) const 
+{
+	PX_UNUSED(numMaterials);
+
+	//bool cscale = !!(mask & ((PxU64)1 << PxVisualizationParameter::eCULL_BOX));
+	const PxU64 cullBoxMask = PxU64(1) << PxVisualizationParameter::eCULL_BOX;
+	bool cscale = ((mask & cullBoxMask) == cullBoxMask);
+
+	const PxMat44 midt(PxIdentity);
+	const Cm::Matrix34 absPose(PxMat33(pose.q) * scaling.toMat33(), pose.p);
+
+	PxU32 nbTriangles = getNbTrianglesFast();
+	const PxU32 nbVertices = getNbVerticesFast();
+	const PxVec3* vertices = getVerticesFast();
+	const void* indices = getTrianglesFast();
+
+	const PxDebugColor::Enum colors[] = 
+	{
+		PxDebugColor::eARGB_BLACK,		
+		PxDebugColor::eARGB_RED,		
+		PxDebugColor::eARGB_GREEN,		
+		PxDebugColor::eARGB_BLUE,		
+		PxDebugColor::eARGB_YELLOW,	
+		PxDebugColor::eARGB_MAGENTA,	
+		PxDebugColor::eARGB_CYAN,		
+		PxDebugColor::eARGB_WHITE,		
+		PxDebugColor::eARGB_GREY,		
+		PxDebugColor::eARGB_DARKRED,	
+		PxDebugColor::eARGB_DARKGREEN,	
+		PxDebugColor::eARGB_DARKBLUE,	
+	};
+
+	const PxU32 colorCount = sizeof(colors)/sizeof(PxDebugColor::Enum);
+
+	if(cscale)
+	{
+		const Gu::Box worldBox(
+			(cullbox.maximum + cullbox.minimum)*0.5f,
+			(cullbox.maximum - cullbox.minimum)*0.5f,
+			PxMat33(PxIdentity));
+		
+		// PT: TODO: use the callback version here to avoid allocating this huge array
+		PxU32* results = reinterpret_cast<PxU32*>(PX_ALLOC_TEMP(sizeof(PxU32)*nbTriangles, "tmp triangle indices"));
+		LimitedResults limitedResults(results, nbTriangles, 0);
+		Midphase::intersectBoxVsMesh(worldBox, *this, pose, scaling, &limitedResults);
+		nbTriangles = limitedResults.mNbResults;
+
+		if (fscale)
+		{
+			const PxU32 fcolor = PxU32(PxDebugColor::eARGB_DARKRED);
+
+			for (PxU32 i=0; i<nbTriangles; i++)
+			{
+				const PxU32 index = results[i];
+				PxVec3 wp[3];
+				getTriangle(*this, index, wp, vertices, indices, absPose, has16BitIndices());
+
+				const PxVec3 center = (wp[0] + wp[1] + wp[2]) / 3.0f;
+				PxVec3 normal = (wp[0] - wp[1]).cross(wp[0] - wp[2]);
+				PX_ASSERT(!normal.isZero());
+				normal = normal.getNormalized();
+
+				out << midt << fcolor <<
+						Cm::DebugArrow(center, normal * fscale);
+			}
+		}
+
+		if (mask & (PxU64(1) << PxVisualizationParameter::eCOLLISION_SHAPES))
+		{
+			const PxU32 scolor = PxU32(PxDebugColor::eARGB_MAGENTA);
+
+			out << midt << scolor;	// PT: no need to output this for each segment!
+
+			PxDebugLine* segments = out.reserveSegments(nbTriangles*3);
+			for(PxU32 i=0; i<nbTriangles; i++)
+			{
+				const PxU32 index = results[i];
+				PxVec3 wp[3];
+				getTriangle(*this, index, wp, vertices, indices, absPose, has16BitIndices());
+				segments[0] = PxDebugLine(wp[0], wp[1], scolor);
+				segments[1] = PxDebugLine(wp[1], wp[2], scolor);
+				segments[2] = PxDebugLine(wp[2], wp[0], scolor);
+				segments+=3;
+			}
+		}
+
+		if ((mask & (PxU64(1) << PxVisualizationParameter::eCOLLISION_EDGES)) && mExtraTrigData)
+			visualizeActiveEdges(out, *this, nbTriangles, results, absPose, midt);
+
+		PX_FREE(results);
+	}
+	else
+	{
+		if (fscale)
+		{
+			const PxU32 fcolor = PxU32(PxDebugColor::eARGB_DARKRED);
+
+			for (PxU32 i=0; i<nbTriangles; i++)
+			{
+				PxVec3 wp[3];
+				getTriangle(*this, i, wp, vertices, indices, absPose, has16BitIndices());
+
+				const PxVec3 center = (wp[0] + wp[1] + wp[2]) / 3.0f;
+				PxVec3 normal = (wp[0] - wp[1]).cross(wp[0] - wp[2]);
+				PX_ASSERT(!normal.isZero());
+				normal = normal.getNormalized();
+
+				out << midt << fcolor <<
+						Cm::DebugArrow(center, normal * fscale);
+			}
+		}
+
+		if (mask & (PxU64(1) << PxVisualizationParameter::eCOLLISION_SHAPES))
+		{
+			PxU32 scolor = PxU32(PxDebugColor::eARGB_MAGENTA);
+
+			out << midt << scolor;	// PT: no need to output this for each segment!
+
+			PxVec3* transformed = reinterpret_cast<PxVec3*>(PX_ALLOC(sizeof(PxVec3)*nbVertices, "PxVec3"));
+			for(PxU32 i=0;i<nbVertices;i++)
+				transformed[i] = absPose.transform(vertices[i]);
+
+			PxDebugLine* segments = out.reserveSegments(nbTriangles*3);
+			for (PxU32 i=0; i<nbTriangles; i++)
+			{
+				PxVec3 wp[3];
+				getTriangle(*this, i, wp, transformed, indices, has16BitIndices());
+				const PxU32 localMaterialIndex = getTriangleMaterialIndex(i);
+				scolor = colors[localMaterialIndex % colorCount];
+				
+				segments[0] = PxDebugLine(wp[0], wp[1], scolor);
+				segments[1] = PxDebugLine(wp[1], wp[2], scolor);
+				segments[2] = PxDebugLine(wp[2], wp[0], scolor);
+				segments+=3;
+			}
+
+			PX_FREE(transformed);
+		}
+
+		if ((mask & (PxU64(1) << PxVisualizationParameter::eCOLLISION_EDGES)) && mExtraTrigData)
+			visualizeActiveEdges(out, *this, nbTriangles, NULL, absPose, midt);
+	}
+}
+
+#endif // #if PX_ENABLE_DEBUG_VISUALIZATION
+
+} // namespace physx
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.h
new file mode 100644
index 00000000..854f43b5
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMesh.h
@@ -0,0 +1,302 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_TRIANGLEMESH_H
+#define GU_TRIANGLEMESH_H
+
+#include "foundation/PxIO.h"
+#include "PxSimpleTriangleMesh.h"
+#include "PxTriangleMeshGeometry.h"
+#include "CmScaling.h"
+#include "GuTriangleMesh.h"
+#include "GuTriangle32.h"
+#include "CmRefCountable.h"
+#include "PxTriangle.h"
+#include "PxTriangleMesh.h"
+#include "CmRenderOutput.h"
+#include "GuMeshData.h"
+#include "GuCenterExtents.h"
+
+namespace physx
+{
+
+class GuMeshFactory;
+class PxMeshScale;
+
+namespace Gu
+{
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable: 4324)	// Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+// Possible optimization: align the whole struct to cache line
+class TriangleMesh : public PxTriangleMesh, public Ps::UserAllocated, public Cm::RefCountable
+{
+//= ATTENTION! =====================================================================================
+// Changing the data layout of this class breaks the binary serialization format.  See comments for 
+// PX_BINARY_SERIAL_VERSION.  If a modification is required, please adjust the getBinaryMetaData 
+// function.  If the modification is made on a custom branch, please change PX_BINARY_SERIAL_VERSION
+// accordingly.
+//==================================================================================================
+public:
+
+// PX_SERIALIZATION
+														TriangleMesh(PxBaseFlags baseFlags)	: PxTriangleMesh(baseFlags), Cm::RefCountable(PxEmpty) {}
+						virtual void					exportExtraData(PxSerializationContext& ctx);
+								void					importExtraData(PxDeserializationContext&);
+	PX_PHYSX_COMMON_API	static	void					getBinaryMetaData(PxOutputStream& stream);
+						virtual	void					release();
+
+								void					resolveReferences(PxDeserializationContext& ) {}
+						virtual	void					requires(PxProcessPxBaseCallback&){}
+//~PX_SERIALIZATION
+
+// Cm::RefCountable
+						virtual	void					onRefCountZero();
+//~Cm::RefCountable
+
+														TriangleMesh(GuMeshFactory& factory, TriangleMeshData& data);
+						 virtual						~TriangleMesh();
+	
+// PxTriangleMesh
+						virtual	PxU32					getNbVertices()						const	{ return mNbVertices; }
+						virtual	const PxVec3*			getVertices()						const	{ return mVertices; }
+						virtual	const PxU32*			getTrianglesRemap()					const	{ return mFaceRemap; }
+						virtual	PxU32					getNbTriangles()					const	{ return mNbTriangles; }
+						virtual	const void*				getTriangles()						const	{ return mTriangles; }
+						virtual	PxTriangleMeshFlags		getTriangleMeshFlags()				const	{ return PxTriangleMeshFlags(mFlags); }
+						virtual	PxMaterialTableIndex	getTriangleMaterialIndex(PxTriangleID triangleIndex) const {
+																				return hasPerTriangleMaterials() ? getMaterials()[triangleIndex] : PxMaterialTableIndex(0xffff);	}
+	
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+						virtual PxVec3*					getVerticesForModification();
+						virtual PxBounds3				refitBVH();
+#endif 
+
+						virtual	PxBounds3				getLocalBounds()					const
+						{
+							PX_ASSERT(mAABB.isValid());
+							return PxBounds3::centerExtents(mAABB.mCenter, mAABB.mExtents);
+						}
+
+						virtual	void					acquireReference()							{ incRefCount();  }
+						virtual	PxU32					getReferenceCount()					const	{ return getRefCount(); }
+//~PxTriangleMesh
+						// PT: this one is just to prevent instancing Gu::TriangleMesh.
+						// But you should use PxBase::getConcreteType() instead to avoid the virtual call.
+						virtual	PxMeshMidPhase::Enum	getMidphaseID()						const	= 0;
+
+	PX_FORCE_INLINE				const PxU32*			getFaceRemap()						const	{ return mFaceRemap;											}
+	PX_FORCE_INLINE				bool					has16BitIndices()					const	{ return (mFlags & PxMeshFlag::e16_BIT_INDICES) ? true : false;	}
+	PX_FORCE_INLINE				bool					hasPerTriangleMaterials()			const	{ return mMaterialIndices != NULL;								}
+	PX_FORCE_INLINE				PxU32					getNbVerticesFast()					const	{ return mNbVertices;		}
+	PX_FORCE_INLINE				PxU32					getNbTrianglesFast()				const	{ return mNbTriangles;		}
+	PX_FORCE_INLINE				const void*				getTrianglesFast()					const	{ return mTriangles;		}
+	PX_FORCE_INLINE				const PxVec3*			getVerticesFast()					const	{ return mVertices;			}
+	PX_FORCE_INLINE				const PxU32*			getAdjacencies()					const	{ return mAdjacencies;		}
+	PX_FORCE_INLINE				PxReal					getGeomEpsilon()					const	{ return mGeomEpsilon;		}
+	PX_FORCE_INLINE				const CenterExtents&	getLocalBoundsFast()				const	{ return mAABB;				}
+	PX_FORCE_INLINE				const PxU16*			getMaterials()						const	{ return mMaterialIndices;	}
+	PX_FORCE_INLINE				const PxU8*				getExtraTrigData()					const	{ return mExtraTrigData;	}
+
+	PX_FORCE_INLINE				const CenterExtentsPadded&	getPaddedBounds()				const
+														{
+															// PT: see compile-time assert in cpp
+															return static_cast<const CenterExtentsPadded&>(mAABB);
+														}
+
+	PX_FORCE_INLINE				void					computeWorldTriangle(
+															PxTriangle& worldTri, PxTriangleID triangleIndex, const Cm::Matrix34& worldMatrix, bool flipNormal = false,
+													PxU32* PX_RESTRICT vertexIndices=NULL, PxU32* PX_RESTRICT adjacencyIndices=NULL) const;
+	PX_FORCE_INLINE				void							getLocalTriangle(PxTriangle& localTri, PxTriangleID triangleIndex, bool flipNormal = false) const;
+
+								void					setMeshFactory(GuMeshFactory* factory) { mMeshFactory = factory; }
+
+protected:
+								PxU32					mNbVertices;
+								PxU32					mNbTriangles;
+								PxVec3*					mVertices;
+								void*					mTriangles;				//!< 16 (<= 0xffff #vertices) or 32 bit trig indices (mNbTriangles * 3)
+								// 16 bytes block
+
+		// PT: WARNING: bounds must be followed by at least 32bits of data for safe SIMD loading
+								CenterExtents			mAABB;
+								PxU8*					mExtraTrigData;			//one per trig
+								PxReal					mGeomEpsilon;			//!< see comments in cooking code referencing this variable
+								// 16 bytes block
+		/*
+		low 3 bits (mask: 7) are the edge flags:
+		b001 = 1 = ignore edge 0 = edge v0-->v1
+		b010 = 2 = ignore edge 1 = edge v0-->v2
+		b100 = 4 = ignore edge 2 = edge v1-->v2
+		*/
+								PxU8					mFlags;					//!< Flag whether indices are 16 or 32 bits wide
+																					//!< Flag whether triangle adajacencies are build
+								PxU16*					mMaterialIndices;		//!< the size of the array is numTriangles.
+								PxU32*					mFaceRemap;				//!< new faces to old faces mapping (after cleaning, etc). Usage: old = faceRemap[new]
+								PxU32*					mAdjacencies;			//!< Adjacency information for each face - 3 adjacent faces
+																					//!< Set to 0xFFFFffff if no adjacent face
+	
+								GuMeshFactory*			mMeshFactory;					// PT: changed to pointer for serialization
+
+#if PX_ENABLE_DEBUG_VISUALIZATION
+public:
+	/**
+	\brief Perform triangle mesh geometry debug visualization
+
+	\param out Debug renderer.
+	\param pose World position.
+	*/
+								void					debugVisualize(	Cm::RenderOutput& out, const PxTransform& pose, const PxMeshScale& scaling, const PxBounds3& cullbox,
+																const PxU64 mask, const PxReal fscale, const PxU32 numMaterials) const;
+#endif
+
+public:
+								
+								// GRB data -------------------------
+								void *					mGRB_triIndices;				//!< GRB: GPU-friendly tri indices [uint4]
+
+								// TODO avoroshilov: cooking - adjacency info - duplicated, remove it and use 'mAdjacencies' and 'mExtraTrigData' see GuTriangleMesh.cpp:325
+								void *					mGRB_triAdjacencies;			//!< GRB: adjacency data, with BOUNDARY and NONCONVEX flags (flags replace adj indices where applicable)
+								PxU32 *					mGRB_vertValency;				//!< GRB: number of adjacent vertices to a vertex
+								PxU32 *					mGRB_adjVertStart;				//!< GRB: offset for each vertex in the adjacency list
+								PxU32 *					mGRB_adjVertices;				//!< GRB: list of adjacent vertices
+
+								PxU32					mGRB_meshAdjVerticiesTotal;		//!< GRB: total number of indices in the 'mGRB_adjVertices'
+								PxU32*					mGRB_faceRemap;					//!< GRB : gpu to cpu triangle indice remap
+								void*					mGRB_BV32Tree;					//!< GRB: BV32 tree
+								// End of GRB data ------------------
+
+};
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+} // namespace Gu
+
+PX_FORCE_INLINE void Gu::TriangleMesh::computeWorldTriangle(PxTriangle& worldTri, PxTriangleID triangleIndex, const Cm::Matrix34& worldMatrix, bool flipNormal,
+	PxU32* PX_RESTRICT vertexIndices, PxU32* PX_RESTRICT adjacencyIndices) const
+{
+	PxU32 vref0, vref1, vref2;
+	if(has16BitIndices())
+	{
+		const Gu::TriangleT<PxU16>& T = (reinterpret_cast<const Gu::TriangleT<PxU16>*>(getTrianglesFast()))[triangleIndex];
+		vref0 = T.v[0];
+		vref1 = T.v[1];
+		vref2 = T.v[2];
+	}
+	else
+	{
+		const Gu::TriangleT<PxU32>& T = (reinterpret_cast<const Gu::TriangleT<PxU32>*>(getTrianglesFast()))[triangleIndex];
+		vref0 = T.v[0];
+		vref1 = T.v[1];
+		vref2 = T.v[2];
+	}
+	if (flipNormal)
+		Ps::swap<PxU32>(vref1, vref2);
+	const PxVec3* PX_RESTRICT vertices = getVerticesFast();
+	worldTri.verts[0] = worldMatrix.transform(vertices[vref0]);
+	worldTri.verts[1] = worldMatrix.transform(vertices[vref1]);
+	worldTri.verts[2] = worldMatrix.transform(vertices[vref2]);
+
+	if(vertexIndices)
+	{
+		vertexIndices[0] = vref0;
+		vertexIndices[1] = vref1;
+		vertexIndices[2] = vref2;
+	}
+
+	if(adjacencyIndices)
+	{
+		if(getAdjacencies())
+		{
+			adjacencyIndices[0] = flipNormal ? getAdjacencies()[triangleIndex*3 + 2] : getAdjacencies()[triangleIndex*3 + 0];
+			adjacencyIndices[1] = getAdjacencies()[triangleIndex*3 + 1];
+			adjacencyIndices[2] = flipNormal ? getAdjacencies()[triangleIndex*3 + 0] : getAdjacencies()[triangleIndex*3 + 2];
+		}
+		else
+		{
+			adjacencyIndices[0] = 0xffffffff;
+			adjacencyIndices[1] = 0xffffffff;
+			adjacencyIndices[2] = 0xffffffff;
+		}
+	}
+}
+
+PX_FORCE_INLINE void Gu::TriangleMesh::getLocalTriangle(PxTriangle& localTri, PxTriangleID triangleIndex, bool flipNormal) const
+{
+	PxU32 vref0, vref1, vref2;
+	if(has16BitIndices())
+	{
+		const Gu::TriangleT<PxU16>& T = (reinterpret_cast<const Gu::TriangleT<PxU16>*>(getTrianglesFast()))[triangleIndex];
+		vref0 = T.v[0];
+		vref1 = T.v[1];
+		vref2 = T.v[2];
+	}
+	else
+	{
+		const Gu::TriangleT<PxU32>& T = (reinterpret_cast<const Gu::TriangleT<PxU32>*>(getTrianglesFast()))[triangleIndex];
+		vref0 = T.v[0];
+		vref1 = T.v[1];
+		vref2 = T.v[2];
+	}
+	if (flipNormal)
+		Ps::swap<PxU32>(vref1, vref2);
+	const PxVec3* PX_RESTRICT vertices = getVerticesFast();
+	localTri.verts[0] = vertices[vref0];
+	localTri.verts[1] = vertices[vref1];
+	localTri.verts[2] = vertices[vref2];
+}
+
+PX_INLINE float computeSweepData(const PxTriangleMeshGeometry& triMeshGeom, /*const Cm::FastVertex2ShapeScaling& scaling,*/ PxVec3& sweepOrigin, PxVec3& sweepExtents, PxVec3& sweepDir, float distance)
+{
+	PX_ASSERT(!Cm::isEmpty(sweepOrigin, sweepExtents));
+
+	const PxVec3 endPt = sweepOrigin + sweepDir*distance;
+	PX_ASSERT(!Cm::isEmpty(endPt, sweepExtents));
+
+	const Cm::FastVertex2ShapeScaling meshScaling(triMeshGeom.scale.getInverse()); // shape to vertex transform
+
+	const PxMat33& vertex2ShapeSkew = meshScaling.getVertex2ShapeSkew();
+
+	const PxVec3 originBoundsCenter = vertex2ShapeSkew * sweepOrigin;
+	const PxVec3 originBoundsExtents = Cm::basisExtent(vertex2ShapeSkew.column0, vertex2ShapeSkew.column1, vertex2ShapeSkew.column2, sweepExtents);
+
+	sweepOrigin = originBoundsCenter;
+	sweepExtents = originBoundsExtents;
+	sweepDir = (vertex2ShapeSkew * endPt) - originBoundsCenter;
+	return sweepDir.normalizeSafe();
+}
+
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.cpp
new file mode 100644
index 00000000..f10409e2
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.cpp
@@ -0,0 +1,76 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuTriangleMesh.h"
+#include "GuTriangleMeshBV4.h"
+
+using namespace physx;
+
+namespace physx
+{
+
+Gu::BV4TriangleMesh::BV4TriangleMesh(GuMeshFactory& factory, TriangleMeshData& d)
+:	TriangleMesh(factory, d)
+{
+	PX_ASSERT(d.mType==PxMeshMidPhase::eBVH34);
+
+	BV4TriangleData& bv4Data = static_cast<BV4TriangleData&>(d);
+	mMeshInterface = bv4Data.mMeshInterface;
+	mBV4Tree = bv4Data.mBV4Tree;
+	mBV4Tree.mMeshInterface = &mMeshInterface;
+}
+
+Gu::TriangleMesh* Gu::BV4TriangleMesh::createObject(PxU8*& address, PxDeserializationContext& context)
+{
+	BV4TriangleMesh* obj = new (address) BV4TriangleMesh(PxBaseFlag::eIS_RELEASABLE);
+	address += sizeof(BV4TriangleMesh);	
+	obj->importExtraData(context);
+	obj->resolveReferences(context);
+	return obj;
+}
+
+void Gu::BV4TriangleMesh::exportExtraData(PxSerializationContext& stream)
+{
+	mBV4Tree.exportExtraData(stream);
+	TriangleMesh::exportExtraData(stream);
+}
+
+void Gu::BV4TriangleMesh::importExtraData(PxDeserializationContext& context)
+{
+	mBV4Tree.importExtraData(context);
+	TriangleMesh::importExtraData(context);
+
+	if(has16BitIndices())
+		mMeshInterface.setPointers(NULL, const_cast<IndTri16*>(reinterpret_cast<const IndTri16*>(getTrianglesFast())), getVerticesFast());
+	else
+		mMeshInterface.setPointers(const_cast<IndTri32*>(reinterpret_cast<const IndTri32*>(getTrianglesFast())), NULL, getVerticesFast());
+	mBV4Tree.mMeshInterface = &mMeshInterface;
+}
+
+} // namespace physx
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.h
new file mode 100644
index 00000000..608f5d2d
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshBV4.h
@@ -0,0 +1,76 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_TRIANGLEMESH_BV4_H
+#define GU_TRIANGLEMESH_BV4_H
+
+#include "GuTriangleMesh.h"
+
+namespace physx
+{
+class GuMeshFactory;
+
+namespace Gu
+{
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable: 4324)	// Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+class BV4TriangleMesh : public TriangleMesh
+{
+	public:
+						virtual const char*				getConcreteTypeName()	const	{ return "PxBVH34TriangleMesh"; }
+// PX_SERIALIZATION
+														BV4TriangleMesh(PxBaseFlags baseFlags) : TriangleMesh(baseFlags), mMeshInterface(PxEmpty), mBV4Tree(PxEmpty)	{}
+	PX_PHYSX_COMMON_API	virtual void					exportExtraData(PxSerializationContext& ctx);
+								void					importExtraData(PxDeserializationContext&);
+	PX_PHYSX_COMMON_API	static	TriangleMesh*			createObject(PxU8*& address, PxDeserializationContext& context);
+	PX_PHYSX_COMMON_API	static	void					getBinaryMetaData(PxOutputStream& stream);
+//~PX_SERIALIZATION
+														BV4TriangleMesh(GuMeshFactory& factory, TriangleMeshData& data);
+						virtual							~BV4TriangleMesh(){}
+
+						virtual	PxMeshMidPhase::Enum	getMidphaseID()			const	{ return PxMeshMidPhase::eBVH34;	}
+	PX_FORCE_INLINE				const Gu::BV4Tree&		getBV4Tree()			const	{ return mBV4Tree;				}
+	private:
+								Gu::SourceMesh			mMeshInterface;
+								Gu::BV4Tree				mBV4Tree;
+};
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+} // namespace Gu
+
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.cpp b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.cpp
new file mode 100644
index 00000000..ec5a1931
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.cpp
@@ -0,0 +1,148 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#include "GuTriangleMesh.h"
+#include "GuTriangleMeshRTree.h"
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+#include "GuConvexEdgeFlags.h"
+#endif
+
+using namespace physx;
+
+namespace physx
+{
+
+Gu::RTreeTriangleMesh::RTreeTriangleMesh(GuMeshFactory& factory, TriangleMeshData& d)
+:	TriangleMesh(factory, d)
+{
+	PX_ASSERT(d.mType==PxMeshMidPhase::eBVH33);
+
+	RTreeTriangleData& rtreeData = static_cast<RTreeTriangleData&>(d);
+	mRTree = rtreeData.mRTree;
+	rtreeData.mRTree.mPages = NULL;
+}
+
+Gu::TriangleMesh* Gu::RTreeTriangleMesh::createObject(PxU8*& address, PxDeserializationContext& context)
+{
+	RTreeTriangleMesh* obj = new (address) RTreeTriangleMesh(PxBaseFlag::eIS_RELEASABLE);
+	address += sizeof(RTreeTriangleMesh);	
+	obj->importExtraData(context);
+	obj->resolveReferences(context);
+	return obj;
+}
+
+void Gu::RTreeTriangleMesh::exportExtraData(PxSerializationContext& stream)
+{
+	mRTree.exportExtraData(stream);
+	TriangleMesh::exportExtraData(stream);
+}
+
+void Gu::RTreeTriangleMesh::importExtraData(PxDeserializationContext& context)
+{
+	mRTree.importExtraData(context);
+	TriangleMesh::importExtraData(context);
+}
+
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+PxVec3 * Gu::RTreeTriangleMesh::getVerticesForModification()
+{
+	return const_cast<PxVec3*>(getVertices());
+}
+
+template<typename IndexType>
+struct RefitCallback : Gu::RTree::CallbackRefit
+{
+	const PxVec3* newPositions;
+	const IndexType* indices;
+
+	RefitCallback(const PxVec3* aNewPositions, const IndexType* aIndices) : newPositions(aNewPositions), indices(aIndices) {}
+	PX_FORCE_INLINE ~RefitCallback() {}
+
+	virtual void recomputeBounds(PxU32 index, shdfnd::aos::Vec3V& aMn, shdfnd::aos::Vec3V& aMx)
+	{
+		using namespace shdfnd::aos;
+
+		// Each leaf box has a set of triangles
+		Gu::LeafTriangles currentLeaf; currentLeaf.Data = index;
+		PxU32 nbTris = currentLeaf.GetNbTriangles();
+		PxU32 baseTri = currentLeaf.GetTriangleIndex();
+		PX_ASSERT(nbTris > 0);
+		const IndexType* vInds = indices + 3 * baseTri;
+		Vec3V vPos = V3LoadU(newPositions[vInds[0]]);
+		Vec3V mn = vPos, mx = vPos;
+		//PxBounds3 result(newPositions[vInds[0]], newPositions[vInds[0]]);
+		vPos = V3LoadU(newPositions[vInds[1]]);
+		mn = V3Min(mn, vPos); mx = V3Max(mx, vPos);
+		vPos = V3LoadU(newPositions[vInds[2]]);
+		mn = V3Min(mn, vPos); mx = V3Max(mx, vPos);
+		for (PxU32 i = 1; i < nbTris; i++)
+		{
+			const IndexType* vInds1 = indices + 3 * (baseTri + i);
+			vPos = V3LoadU(newPositions[vInds1[0]]);
+			mn = V3Min(mn, vPos); mx = V3Max(mx, vPos);
+			vPos = V3LoadU(newPositions[vInds1[1]]);
+			mn = V3Min(mn, vPos); mx = V3Max(mx, vPos);
+			vPos = V3LoadU(newPositions[vInds1[2]]);
+			mn = V3Min(mn, vPos); mx = V3Max(mx, vPos);
+		}
+
+		aMn = mn;
+		aMx = mx;
+	}
+};
+
+PxBounds3 Gu::RTreeTriangleMesh::refitBVH()
+{
+	PxBounds3 meshBounds;
+	if (has16BitIndices())
+	{
+		RefitCallback<PxU16> cb(mVertices, static_cast<const PxU16*>(mTriangles));
+		mRTree.refitAllStaticTree(cb, &meshBounds);
+	}
+	else
+	{
+		RefitCallback<PxU32> cb(mVertices, static_cast<const PxU32*>(mTriangles));
+		mRTree.refitAllStaticTree(cb, &meshBounds);
+	}
+
+	// reset edge flags and remember we did that using a mesh flag (optimization)
+	if ((mRTree.mFlags & RTree::IS_EDGE_SET) == 0)
+	{
+		mRTree.mFlags |= RTree::IS_EDGE_SET;
+		const PxU32 nbTris = getNbTriangles();
+		for (PxU32 i = 0; i < nbTris; i++)
+			mExtraTrigData[i] |= (Gu::ETD_CONVEX_EDGE_01 | Gu::ETD_CONVEX_EDGE_12 | Gu::ETD_CONVEX_EDGE_20);
+	}
+
+	mAABB = meshBounds;
+	return meshBounds;
+}
+#endif
+
+} // namespace physx
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.h
new file mode 100644
index 00000000..7c861663
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleMeshRTree.h
@@ -0,0 +1,81 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_TRIANGLEMESH_RTREE_H
+#define GU_TRIANGLEMESH_RTREE_H
+
+#include "GuTriangleMesh.h"
+
+namespace physx
+{
+class GuMeshFactory;
+
+namespace Gu
+{
+
+#if PX_VC
+#pragma warning(push)
+#pragma warning(disable: 4324)	// Padding was added at the end of a structure because of a __declspec(align) value.
+#endif
+
+class RTreeTriangleMesh : public TriangleMesh
+{
+	public:
+						virtual const char*				getConcreteTypeName()	const	{ return "PxBVH33TriangleMesh"; }
+// PX_SERIALIZATION
+														RTreeTriangleMesh(PxBaseFlags baseFlags) : TriangleMesh(baseFlags), mRTree(PxEmpty) {}
+	PX_PHYSX_COMMON_API	virtual void					exportExtraData(PxSerializationContext& ctx);
+								void					importExtraData(PxDeserializationContext&);
+	PX_PHYSX_COMMON_API	static	TriangleMesh*			createObject(PxU8*& address, PxDeserializationContext& context);
+	PX_PHYSX_COMMON_API	static	void					getBinaryMetaData(PxOutputStream& stream);
+//~PX_SERIALIZATION
+														RTreeTriangleMesh(GuMeshFactory& factory, TriangleMeshData& data);
+						virtual							~RTreeTriangleMesh(){}
+
+						virtual	PxMeshMidPhase::Enum	getMidphaseID()			const	{ return PxMeshMidPhase::eBVH33; }
+
+#if PX_ENABLE_DYNAMIC_MESH_RTREE
+						virtual PxVec3*					getVerticesForModification();
+						virtual PxBounds3				refitBVH();
+#endif
+
+	PX_FORCE_INLINE				const Gu::RTree&		getRTree()				const	{ return mRTree; }
+	private:
+								Gu::RTree				mRTree;								
+};
+
+#if PX_VC
+#pragma warning(pop)
+#endif
+
+} // namespace Gu
+
+}
+
+#endif
diff --git a/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleVertexPointers.h b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleVertexPointers.h
new file mode 100644
index 00000000..952f6998
--- /dev/null
+++ b/PhysX_3.4/Source/GeomUtils/src/mesh/GuTriangleVertexPointers.h
@@ -0,0 +1,65 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.  
+
+#ifndef GU_TRIANGLE_VERTEX_POINTERS_H
+#define GU_TRIANGLE_VERTEX_POINTERS_H
+
+#include "PxTriangleMesh.h"
+#include "GuTriangleMesh.h"
+
+namespace physx {
+	namespace Gu {
+
+	// PT: TODO: replace with Gu::TriangleMesh::getLocalTriangle(...)
+	struct TriangleVertexPointers
+	{
+		static void PX_FORCE_INLINE getTriangleVerts(const TriangleMesh* mesh, PxU32 triangleIndex, PxVec3& v0, PxVec3& v1, PxVec3& v2)
+		{
+			const PxVec3* verts = mesh->getVerticesFast();
+			if(mesh->has16BitIndices())
+			{
+				const PxU16* tris = reinterpret_cast<const PxU16*>(mesh->getTrianglesFast());
+				const PxU16* inds = tris+triangleIndex*3;
+				v0 = verts[inds[0]];
+				v1 = verts[inds[1]];
+				v2 = verts[inds[2]];
+			} 
+			else 
+			{ 
+				const PxU32* tris = reinterpret_cast<const PxU32*>(mesh->getTrianglesFast());
+				const PxU32* inds = tris+triangleIndex*3;
+				v0 = verts[inds[0]];
+				v1 = verts[inds[1]];
+				v2 = verts[inds[2]];
+			} 
+		}
+	};
+} } // physx, Gu
+
+#endif
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/GeomUtils/src/mesh
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip