Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelParticles/src
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
41 files changed, 11803 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.cpp
new file mode 100644
index 00000000..11ff89c3
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.cpp
@@ -0,0 +1,255 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtBatcher.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#if PX_SUPPORT_GPU_PHYSX
+#include "PxPhysXGpu.h"
+#endif
+
+#include "task/PxTask.h"
+#include "PtContext.h"
+#include "PtParticleSystemSim.h"
+#include "PtParticleSystemSimCpu.h"
+
+using namespace physx;
+using namespace Pt;
+
+namespace
+{
+template <class T>
+static void sortBatchedInputs(ParticleSystemSim** particleSystems, T* inputs, PxU32 batchSize, PxU32& cpuOffset,
+                              PxU32& cpuCount, PxU32& gpuOffset, PxU32& gpuCount)
+{
+	PX_UNUSED(particleSystems);
+	PX_UNUSED(inputs);
+
+	cpuOffset = 0;
+	gpuOffset = 0;
+
+	// in place sort of both arrays
+	PxU32 i = 0;
+	PxU32 j = 0;
+
+	while((i < batchSize) && (j < batchSize))
+	{
+#if PX_SUPPORT_GPU_PHYSX
+		if(particleSystems[i]->isGpuV())
+		{
+			j = i + 1;
+			while(j < batchSize && particleSystems[j]->isGpuV())
+			{
+				j++;
+			}
+
+			if(j < batchSize)
+			{
+				Ps::swap(particleSystems[i], particleSystems[j]);
+				if(inputs)
+				{
+					Ps::swap(inputs[i], inputs[j]);
+				}
+				i++;
+			}
+		}
+		else
+#endif
+		{
+			i++;
+		}
+	}
+
+	gpuOffset = i;
+	cpuCount = gpuOffset;
+	gpuCount = batchSize - cpuCount;
+}
+}
+
+Batcher::Batcher(class Context& _context)
+: shapeGenTask("Pt::Batcher::shapeGen")
+, dynamicsCpuTask("Pt::Batcher::dynamicsCpu")
+, collPrepTask("Pt::Batcher::collPrep")
+, collisionCpuTask("Pt::Batcher::collisionCpu")
+, context(_context)
+{
+}
+
+PxBaseTask& Batcher::scheduleShapeGeneration(ParticleSystemSim** particleSystems, ParticleShapesUpdateInput* inputs,
+                                             PxU32 batchSize, PxBaseTask& continuation)
+{
+	PxU32 cpuOffset = 0;
+	PxU32 cpuCount = batchSize;
+
+#if PX_SUPPORT_GPU_PHYSX
+	PxU32 gpuOffset, gpuCount;
+	sortBatchedInputs(particleSystems, inputs, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount);
+	if(context.getSceneGpuFast() && gpuCount > 0)
+	{
+		PxBaseTask& task = context.getSceneGpuFast()->scheduleParticleShapeUpdate(
+		    particleSystems + gpuOffset, inputs + gpuOffset, gpuCount, continuation);
+		shapeGenTask.addDependent(task);
+		task.removeReference();
+	}
+#endif
+	for(PxU32 i = cpuOffset; i < (cpuOffset + cpuCount); ++i)
+	{
+		PxBaseTask& task =
+		    static_cast<ParticleSystemSimCpu*>(particleSystems[i])->schedulePacketShapesUpdate(inputs[i], continuation);
+		shapeGenTask.addDependent(task);
+		task.removeReference();
+	}
+
+	if(shapeGenTask.getReference() == 0)
+	{
+		continuation.addReference();
+		return continuation;
+	}
+
+	while(shapeGenTask.getReference() > 1)
+		shapeGenTask.removeReference();
+
+	return shapeGenTask;
+}
+
+PxBaseTask& Batcher::scheduleDynamicsCpu(ParticleSystemSim** particleSystems, PxU32 batchSize, PxBaseTask& continuation)
+{
+	PxU32 cpuOffset = 0;
+	PxU32 cpuCount = batchSize;
+#if PX_SUPPORT_GPU_PHYSX
+	PxU32 gpuOffset, gpuCount;
+	sortBatchedInputs(particleSystems, (PxU8*)NULL, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount);
+#endif
+	for(PxU32 i = cpuOffset; i < (cpuOffset + cpuCount); ++i)
+	{
+		PxBaseTask& task = static_cast<ParticleSystemSimCpu*>(particleSystems[i])->scheduleDynamicsUpdate(continuation);
+		dynamicsCpuTask.addDependent(task);
+		task.removeReference();
+	}
+
+	if(dynamicsCpuTask.getReference() == 0)
+	{
+		continuation.addReference();
+		return continuation;
+	}
+
+	while(dynamicsCpuTask.getReference() > 1)
+		dynamicsCpuTask.removeReference();
+
+	return dynamicsCpuTask;
+}
+
+PxBaseTask& Batcher::scheduleCollisionPrep(ParticleSystemSim** particleSystems, PxLightCpuTask** inputPrepTasks,
+                                           PxU32 batchSize, PxBaseTask& continuation)
+{
+	PxU32 cpuOffset = 0;
+	PxU32 cpuCount = batchSize;
+#if PX_SUPPORT_GPU_PHYSX
+	PxU32 gpuOffset, gpuCount;
+	sortBatchedInputs(particleSystems, inputPrepTasks, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount);
+	if(context.getSceneGpuFast() && gpuCount > 0)
+	{
+		PxBaseTask& gpuCollisionInputTask = context.getSceneGpuFast()->scheduleParticleCollisionInputUpdate(
+		    particleSystems + gpuOffset, gpuCount, continuation);
+		for(PxU32 i = gpuOffset; i < (gpuOffset + gpuCount); ++i)
+		{
+			inputPrepTasks[i]->setContinuation(&gpuCollisionInputTask);
+			collPrepTask.addDependent(*inputPrepTasks[i]);
+			inputPrepTasks[i]->removeReference();
+		}
+		gpuCollisionInputTask.removeReference();
+	}
+#else
+	PX_UNUSED(particleSystems);
+	PX_UNUSED(batchSize);
+#endif
+	for(PxU32 i = cpuOffset; i < (cpuOffset + cpuCount); ++i)
+	{
+		inputPrepTasks[i]->setContinuation(&continuation);
+		collPrepTask.addDependent(*inputPrepTasks[i]);
+		inputPrepTasks[i]->removeReference();
+	}
+
+	if(collPrepTask.getReference() == 0)
+	{
+		continuation.addReference();
+		return continuation;
+	}
+
+	while(collPrepTask.getReference() > 1)
+		collPrepTask.removeReference();
+
+	return collPrepTask;
+}
+
+PxBaseTask& Batcher::scheduleCollisionCpu(ParticleSystemSim** particleSystems, PxU32 batchSize, PxBaseTask& continuation)
+{
+	PxU32 cpuOffset = 0;
+	PxU32 cpuCount = batchSize;
+#if PX_SUPPORT_GPU_PHYSX
+	PxU32 gpuOffset, gpuCount;
+	sortBatchedInputs(particleSystems, (PxU8*)NULL, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount);
+#endif
+	for(PxU32 i = cpuOffset; i < (cpuOffset + cpuCount); ++i)
+	{
+		PxBaseTask& task = static_cast<ParticleSystemSimCpu*>(particleSystems[i])->scheduleCollisionUpdate(continuation);
+		collisionCpuTask.addDependent(task);
+		task.removeReference();
+	}
+
+	if(collisionCpuTask.getReference() == 0)
+	{
+		continuation.addReference();
+		return continuation;
+	}
+
+	while(collisionCpuTask.getReference() > 1)
+		collisionCpuTask.removeReference();
+
+	return collisionCpuTask;
+}
+
+PxBaseTask& Batcher::schedulePipelineGpu(ParticleSystemSim** particleSystems, PxU32 batchSize, PxBaseTask& continuation)
+{
+#if PX_SUPPORT_GPU_PHYSX
+	PxU32 cpuOffset, cpuCount, gpuOffset, gpuCount;
+	sortBatchedInputs(particleSystems, (PxU8*)NULL, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount);
+	if(context.getSceneGpuFast() && gpuCount > 0)
+	{
+		return context.getSceneGpuFast()->scheduleParticlePipeline(particleSystems + gpuOffset, gpuCount, continuation);
+	}
+#else
+	PX_UNUSED(batchSize);
+	PX_UNUSED(particleSystems);
+#endif
+	continuation.addReference();
+	return continuation;
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.h b/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.h
new file mode 100644
index 00000000..7ff534c0
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.h
@@ -0,0 +1,99 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_BATCHER_H
+#define PT_BATCHER_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "CmTask.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+class Batcher : public Ps::UserAllocated
+{
+  public:
+	Batcher(class Context& _context);
+
+	/**
+	Issues shape update stages for a batch of particle systems.
+	Ownership of Pt::ParticleShapeUpdateInput::shapes passed to callee!
+	*/
+	physx::PxBaseTask& scheduleShapeGeneration(class ParticleSystemSim** particleSystems,
+	                                           struct ParticleShapesUpdateInput* inputs, PxU32 batchSize,
+	                                           physx::PxBaseTask& continuation);
+
+	/**
+	Issues dynamics (SPH) update on CPUs.
+	*/
+	physx::PxBaseTask& scheduleDynamicsCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize,
+	                                       physx::PxBaseTask& continuation);
+
+	/**
+	Schedules collision prep work.
+	*/
+	physx::PxBaseTask& scheduleCollisionPrep(class ParticleSystemSim** particleSystems,
+	                                         physx::PxLightCpuTask** inputPrepTasks, PxU32 batchSize,
+	                                         physx::PxBaseTask& continuation);
+
+	/**
+	Schedules collision update stages for a batch of particle systems on CPU.
+	Ownership of Pt::ParticleCollisionUpdateInput::contactManagerStream passed to callee!
+	*/
+	physx::PxBaseTask& scheduleCollisionCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize,
+	                                        physx::PxBaseTask& continuation);
+
+	/**
+	Schedule gpu pipeline.
+	*/
+	physx::PxBaseTask& schedulePipelineGpu(ParticleSystemSim** particleSystems, PxU32 batchSize,
+	                                       physx::PxBaseTask& continuation);
+
+	Cm::FanoutTask shapeGenTask;
+	Cm::FanoutTask dynamicsCpuTask;
+	Cm::FanoutTask collPrepTask;
+	Cm::FanoutTask collisionCpuTask;
+
+	class Context& context;
+
+  private:
+	Batcher(const Batcher&);
+	Batcher& operator=(const Batcher&);
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_BATCHER_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtBodyTransformVault.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtBodyTransformVault.cpp
new file mode 100644
index 00000000..de4c282d
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtBodyTransformVault.cpp
@@ -0,0 +1,241 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtBodyTransformVault.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxMemory.h"
+#include "PxvGeometry.h"
+#include "PxvDynamics.h"
+#include "PsHash.h"
+#include "PsFoundation.h"
+
+using namespace physx;
+using namespace Pt;
+
+BodyTransformVault::BodyTransformVault() : mBody2WorldPool("body2WorldPool", 256), mBodyCount(0)
+{
+	// Make sure the hash size is a power of 2
+	PX_ASSERT((((PT_BODY_TRANSFORM_HASH_SIZE - 1) ^ PT_BODY_TRANSFORM_HASH_SIZE) + 1) ==
+	          (2 * PT_BODY_TRANSFORM_HASH_SIZE));
+
+	PxMemSet(mBody2WorldHash, 0, PT_BODY_TRANSFORM_HASH_SIZE * sizeof(Body2World*));
+}
+
+BodyTransformVault::~BodyTransformVault()
+{
+}
+
+PX_FORCE_INLINE PxU32 BodyTransformVault::getHashIndex(const PxsBodyCore& body) const
+{
+	PxU32 index = Ps::hash(&body);
+	return (index & (PT_BODY_TRANSFORM_HASH_SIZE - 1)); // Modulo hash size
+}
+
+void BodyTransformVault::addBody(const PxsBodyCore& body)
+{
+	Body2World* entry;
+	Body2World* dummy;
+
+	bool hasEntry = findEntry(body, entry, dummy);
+	if(!hasEntry)
+	{
+		Body2World* newEntry;
+		if(entry)
+		{
+			// No entry for the given body but the hash entry has other bodies
+			// --> create new entry, link into list
+			newEntry = createEntry(body);
+			entry->next = newEntry;
+		}
+		else
+		{
+			// No entry for the given body and no hash entry --> create new entry
+			PxU32 hashIndex = getHashIndex(body);
+			newEntry = createEntry(body);
+			mBody2WorldHash[hashIndex] = newEntry;
+		}
+		newEntry->refCount = 1;
+		mBodyCount++;
+	}
+	else
+	{
+		entry->refCount++;
+	}
+}
+
+void BodyTransformVault::removeBody(const PxsBodyCore& body)
+{
+	Body2World* entry;
+	Body2World* prevEntry;
+
+	bool hasEntry = findEntry(body, entry, prevEntry);
+	PX_ASSERT(hasEntry);
+	PX_UNUSED(hasEntry);
+
+	if(entry->refCount == 1)
+	{
+		if(prevEntry)
+		{
+			prevEntry->next = entry->next;
+		}
+		else
+		{
+			// Shape entry was first in list
+			PxU32 hashIndex = getHashIndex(body);
+
+			mBody2WorldHash[hashIndex] = entry->next;
+		}
+		mBody2WorldPool.destroy(entry);
+		PX_ASSERT(mBodyCount > 0);
+		mBodyCount--;
+	}
+	else
+	{
+		entry->refCount--;
+	}
+}
+
+void BodyTransformVault::teleportBody(const PxsBodyCore& body)
+{
+	Body2World* entry;
+	Body2World* dummy;
+
+	bool hasEntry = findEntry(body, entry, dummy);
+	PX_ASSERT(hasEntry);
+	PX_ASSERT(entry);
+	PX_UNUSED(hasEntry);
+
+	PX_CHECK_AND_RETURN(body.body2World.isValid(), "BodyTransformVault::teleportBody: body.body2World is not valid.");
+
+	entry->b2w = body.body2World;
+}
+
+const PxTransform* BodyTransformVault::getTransform(const PxsBodyCore& body) const
+{
+	Body2World* entry;
+	Body2World* dummy;
+
+	bool hasEntry = findEntry(body, entry, dummy);
+	// PX_ASSERT(hasEntry);
+	// PX_UNUSED(hasEntry);
+	// PX_ASSERT(entry);
+	return hasEntry ? &entry->b2w : NULL;
+}
+
+void BodyTransformVault::update()
+{
+	if(mBodyCount)
+	{
+		for(PxU32 i = 0; i < PT_BODY_TRANSFORM_HASH_SIZE; i++)
+		{
+			Body2World* entry = mBody2WorldHash[i];
+
+			while(entry)
+			{
+				PX_ASSERT(entry->body);
+				entry->b2w = entry->body->body2World;
+				entry = entry->next;
+			}
+		}
+	}
+}
+
+BodyTransformVault::Body2World* BodyTransformVault::createEntry(const PxsBodyCore& body)
+{
+	Body2World* entry = mBody2WorldPool.construct();
+
+	if(entry)
+	{
+		entry->b2w = body.body2World;
+		entry->next = NULL;
+		entry->body = &body;
+	}
+
+	return entry;
+}
+
+bool BodyTransformVault::isInVaultInternal(const PxsBodyCore& body) const
+{
+	PxU32 hashIndex = getHashIndex(body);
+
+	if(mBody2WorldHash[hashIndex])
+	{
+		Body2World* curEntry = mBody2WorldHash[hashIndex];
+
+		while(curEntry->next)
+		{
+			if(curEntry->body == &body)
+				break;
+
+			curEntry = curEntry->next;
+		}
+
+		if(curEntry->body == &body)
+			return true;
+	}
+
+	return false;
+}
+
+bool BodyTransformVault::findEntry(const PxsBodyCore& body, Body2World*& entry, Body2World*& prevEntry) const
+{
+	PxU32 hashIndex = getHashIndex(body);
+
+	prevEntry = NULL;
+	bool hasEntry = false;
+	if(mBody2WorldHash[hashIndex])
+	{
+		Body2World* curEntry = mBody2WorldHash[hashIndex];
+
+		while(curEntry->next)
+		{
+			if(curEntry->body == &body)
+				break;
+
+			prevEntry = curEntry;
+			curEntry = curEntry->next;
+		}
+
+		entry = curEntry;
+		if(curEntry->body == &body)
+		{
+			// An entry already exists for the given body
+			hasEntry = true;
+		}
+	}
+	else
+	{
+		entry = NULL;
+	}
+
+	return hasEntry;
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.cpp
new file mode 100644
index 00000000..537c0112
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.cpp
@@ -0,0 +1,676 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtCollision.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtConfig.h"
+#include "PtParticleSystemSimCpu.h"
+#include "PtParticleShapeCpu.h"
+#include "PtContext.h"
+#include "PtBodyTransformVault.h"
+#include "PtCollisionHelper.h"
+#include "PtParticleContactManagerStream.h"
+#include "GuConvexMeshData.h"
+#include "CmFlushPool.h"
+#include "PxvGeometry.h"
+
+using namespace physx;
+using namespace Pt;
+
+namespace physx
+{
+
+namespace Pt
+{
+
+class CollisionTask : public Cm::Task
+{
+  public:
+	CollisionTask(Collision& context, PxU32 taskDataIndex) : mCollisionContext(context), mTaskDataIndex(taskDataIndex)
+	{
+	}
+
+	virtual void runInternal()
+	{
+		mCollisionContext.processShapeListWithFilter(mTaskDataIndex);
+	}
+
+	virtual const char* getName() const
+	{
+		return "Collision.fluidCollision";
+	}
+
+  private:
+	CollisionTask& operator=(const CollisionTask&);
+	Collision& mCollisionContext;
+	PxU32 mTaskDataIndex;
+};
+
+} // namespace Pt
+} // namespace physx
+
+/*
+how to support dominance-driven one/two-way collision (search for 'todo dominance'):
+- add 2-bit flag to PxsBodyShapeRef which stores the dominance matrix values
+- store this flag when creating the shape ref in updateFluidBodyContactPair()
+- use this flag when copying impulse to collData.shapeImpulse
+*/
+Collision::Collision(ParticleSystemSimCpu& particleSystem)
+: mParticleSystem(particleSystem), mMergeTask(this, "Collision.mergeResults")
+{
+}
+
+Collision::~Collision()
+{
+}
+
+void PX_FORCE_INLINE Collision::addTempW2STransform(TaskData& taskData, const ParticleStreamContactManager& cm)
+{
+	W2STransformTemp& cmTemp = taskData.tempContactManagers.insert();
+
+	if(cm.isDynamic)
+	{
+		const PxsBodyCore* bodyCore = static_cast<const PxsBodyCore*>(cm.rigidCore);
+		cmTemp.w2sOld = cm.shapeCore->transform.transformInv(bodyCore->getBody2Actor()).transform(cm.w2sOld->getInverse());
+		cmTemp.w2sNew =
+		    cm.shapeCore->transform.transformInv(bodyCore->getBody2Actor()).transform(bodyCore->body2World.getInverse());
+	}
+	else
+	{
+		const PxTransform tmp = cm.shapeCore->transform.getInverse() * cm.rigidCore->body2World.getInverse();
+		cmTemp.w2sOld = tmp;
+		cmTemp.w2sNew = tmp;
+	}
+}
+
+void Collision::updateCollision(const PxU8* contactManagerStream, physx::PxBaseTask& continuation)
+{
+	mMergeTask.setContinuation(&continuation);
+	PxU32 maxTasks = PT_NUM_PACKETS_PARALLEL_COLLISION;
+	PxU32 packetParticleIndicesCount = mParticleSystem.mNumPacketParticlesIndices;
+
+	// would be nice to get available thread count to decide on task decomposition
+	// mParticleSystem.getContext().getTaskManager().getCpuDispatcher();
+
+	// use number of particles for task decomposition
+
+	PxU32 targetParticleCountPerTask =
+	    PxMax(PxU32(packetParticleIndicesCount / maxTasks), PxU32(PT_SUBPACKET_PARTICLE_LIMIT_COLLISION));
+	ParticleContactManagerStreamReader cmStreamReader(contactManagerStream);
+	ParticleContactManagerStreamIterator cmStreamEnd = cmStreamReader.getEnd();
+	ParticleContactManagerStreamIterator cmStream = cmStreamReader.getBegin();
+	ParticleContactManagerStreamIterator cmStreamLast;
+
+	PxU32 numTasks = 0;
+	for(PxU32 i = 0; i < PT_NUM_PACKETS_PARALLEL_COLLISION; ++i)
+	{
+		TaskData& taskData = mTaskData[i];
+		taskData.bounds.setEmpty();
+
+		// if this is the last interation, we need to gather all remaining packets
+		if(i == maxTasks - 1)
+			targetParticleCountPerTask = 0xffffffff;
+
+		cmStreamLast = cmStream;
+		PxU32 currentParticleCount = 0;
+
+		while(currentParticleCount < targetParticleCountPerTask && cmStream != cmStreamEnd)
+		{
+			ParticleStreamShape streamShape;
+			cmStream.getNext(streamShape);
+			const ParticleShapeCpu* particleShape = static_cast<const ParticleShapeCpu*>(streamShape.particleShape);
+			currentParticleCount += particleShape->getFluidPacket()->numParticles;
+		}
+
+		if(currentParticleCount > 0)
+		{
+			PX_ASSERT(cmStreamLast != cmStream);
+			taskData.packetBegin = cmStreamLast;
+			taskData.packetEnd = cmStream;
+			numTasks++;
+		}
+	}
+	PX_ASSERT(cmStream == cmStreamEnd);
+
+	// spawn tasks
+	for(PxU32 i = 0; i < numTasks; ++i)
+	{
+		void* ptr = mParticleSystem.getContext().getTaskPool().allocate(sizeof(CollisionTask));
+		CollisionTask* task = PX_PLACEMENT_NEW(ptr, CollisionTask)(*this, i);
+		task->setContinuation(&mMergeTask);
+		task->removeReference();
+	}
+
+	mMergeTask.removeReference();
+}
+
+void Collision::updateOverflowParticles()
+{
+	// if no particles are present, the hash shouldn't be accessed, as it hasn't been updated.
+	if(mParticleSystem.mParticleState->getValidParticleRange() > 0)
+	{
+		const Pt::ParticleCell& overflowCell =
+		    mParticleSystem.mSpatialHash->getPackets()[PT_PARTICLE_SYSTEM_OVERFLOW_INDEX];
+		Pt::Particle* particles = mParticleSystem.mParticleState->getParticleBuffer();
+		PxU32* indices = mParticleSystem.mPacketParticlesIndices;
+		for(PxU32 i = overflowCell.firstParticle; i < overflowCell.firstParticle + overflowCell.numParticles; i++)
+		{
+			PxU32 index = indices[i];
+			Pt::Particle& particle = particles[index];
+			PX_ASSERT((particle.flags.api & PxParticleFlag::eSPATIAL_DATA_STRUCTURE_OVERFLOW) != 0);
+
+			// update velocity and position
+			// world bounds are not updated for overflow particles, to make it more consistent with GPU.
+			{
+				PxVec3 acceleration = mParams.externalAcceleration;
+				integrateParticleVelocity(particle, mParams.maxMotionDistance, acceleration, mParams.dampingDtComp,
+				                          mParams.timeStep);
+				particle.position = particle.position + particle.velocity * mParams.timeStep;
+
+				// adapted from updateParticle(...) in PxsFluidCollisionHelper.h
+				bool projection = (mParams.flags & PxParticleBaseFlag::ePROJECT_TO_PLANE) != 0;
+				if(projection)
+				{
+					const PxReal dist = mParams.projectionPlane.n.dot(particle.velocity);
+					particle.velocity = particle.velocity - (mParams.projectionPlane.n * dist);
+					particle.position = mParams.projectionPlane.project(particle.position);
+				}
+				PX_ASSERT(particle.position.isFinite());
+			}
+		}
+	}
+}
+
+void Collision::processShapeListWithFilter(PxU32 taskDataIndex, const PxU32 skipNum)
+{
+	TaskData& taskData = mTaskData[taskDataIndex];
+
+	ParticleContactManagerStreamIterator it = taskData.packetBegin;
+	while(it != taskData.packetEnd)
+	{
+		ParticleStreamShape streamShape;
+		it.getNext(streamShape);
+
+		if(streamShape.numContactManagers < skipNum)
+			continue;
+
+		const ParticleShapeCpu* particleShape = static_cast<const ParticleShapeCpu*>(streamShape.particleShape);
+		PX_ASSERT(particleShape);
+		PX_UNUSED(particleShape);
+
+		// Collect world to shape space transforms for all colliding rigid body shapes
+		taskData.tempContactManagers.clear();
+		for(PxU32 i = 0; i < streamShape.numContactManagers; i++)
+		{
+			const ParticleStreamContactManager& cm = streamShape.contactManagers[i];
+			addTempW2STransform(taskData, cm);
+		}
+
+		updateFluidShapeCollision(
+		    mParticleSystem.mParticleState->getParticleBuffer(), mParticleSystem.mFluidTwoWayData,
+		    mParticleSystem.mTransientBuffer, mParticleSystem.mCollisionVelocities, mParticleSystem.mConstraintBuffers,
+		    mParticleSystem.mOpcodeCacheBuffer, taskData.bounds, mParticleSystem.mPacketParticlesIndices,
+		    mParticleSystem.mParticleState->getRestOffsetBuffer(), taskData.tempContactManagers.begin(), streamShape);
+	}
+}
+
+void Collision::mergeResults(physx::PxBaseTask* /*continuation*/)
+{
+	PxBounds3& worldBounds = mParticleSystem.mParticleState->getWorldBounds();
+	for(PxU32 i = 0; i < PT_NUM_PACKETS_PARALLEL_COLLISION; ++i)
+		worldBounds.include(mTaskData[i].bounds);
+}
+
+void Collision::updateFluidShapeCollision(Particle* particles, TwoWayData* fluidTwoWayData, PxVec3* transientBuf,
+                                          PxVec3* collisionVelocities, ConstraintBuffers& constraintBufs,
+                                          ParticleOpcodeCache* opcodeCache, PxBounds3& worldBounds,
+                                          const PxU32* fluidShapeParticleIndices, const PxF32* restOffsets,
+                                          const W2STransformTemp* w2sTransforms, const ParticleStreamShape& streamShape)
+{
+	const ParticleShapeCpu& particleShape = *static_cast<const ParticleShapeCpu*>(streamShape.particleShape);
+	PX_ASSERT(particleShape.getFluidPacket());
+
+	const ParticleCell& packet = *particleShape.getFluidPacket();
+
+	PxU32 numParticles = packet.numParticles;
+	PxU32 firstParticleIndex = packet.firstParticle;
+	const PxU32* packetParticleIndices = fluidShapeParticleIndices + firstParticleIndex;
+	const PxU32 numParticlesPerSubpacket = PT_SUBPACKET_PARTICLE_LIMIT_COLLISION;
+
+	PX_ALLOCA(particlesSp, Particle, numParticlesPerSubpacket);
+	PxF32 restOffsetsSp[numParticlesPerSubpacket];
+
+	const PxU32 numHashBuckets = PT_LOCAL_HASH_SIZE_MESH_COLLISION;
+
+	PxU32 hashMemCount = numHashBuckets * sizeof(ParticleCell) + numParticlesPerSubpacket * sizeof(PxU32);
+	PxU32 cacheMemCount = numParticlesPerSubpacket * sizeof(ParticleOpcodeCache);
+	PX_ALLOCA(shareMem, PxU8, PxMax(hashMemCount, cacheMemCount));
+
+	ParticleOpcodeCache* perParticleCacheSp = NULL;
+	LocalCellHash localCellHash;
+	PxVec3 packetCorner;
+
+	if(opcodeCache)
+		perParticleCacheSp = reinterpret_cast<ParticleOpcodeCache*>(shareMem.mPointer);
+	else
+	{
+		// Make sure the number of hash buckets is a power of 2 (requirement for the used hash function)
+		PX_ASSERT((((numHashBuckets - 1) ^ numHashBuckets) + 1) == (2 * numHashBuckets));
+		PX_ASSERT(numHashBuckets > numParticlesPerSubpacket);
+		// Set the buffers for the local cell hash
+		localCellHash.particleIndices = reinterpret_cast<PxU32*>(shareMem.mPointer);
+		localCellHash.hashEntries =
+		    reinterpret_cast<ParticleCell*>(shareMem.mPointer + numParticlesPerSubpacket * sizeof(PxU32));
+		packetCorner =
+		    PxVec3(PxReal(packet.coords.x), PxReal(packet.coords.y), PxReal(packet.coords.z)) * mParams.packetSize;
+	}
+
+	// Divide the packet into subpackets that fit into local memory of processing unit.
+	PxU32 particlesRemainder = (numParticles - 1) % numParticlesPerSubpacket + 1;
+
+	PxU32 numProcessedParticles = 0;
+	PxU32 numParticlesSp = particlesRemainder; // We start with the smallest subpacket, i.e., the subpacket which does
+	// not reach its particle limit.
+	while(numProcessedParticles < numParticles)
+	{
+		const PxU32* particleIndicesSp = packetParticleIndices + numProcessedParticles;
+
+		// load particles (constraints are loaded on demand so far)
+		for(PxU32 p = 0; p < numParticlesSp; p++)
+		{
+			PxU32 particleIndex = particleIndicesSp[p];
+			particlesSp[p] = particles[particleIndex];
+		}
+
+		if(restOffsets)
+		{
+			for(PxU32 p = 0; p < numParticlesSp; p++)
+			{
+				PxU32 particleIndex = particleIndicesSp[p];
+				restOffsetsSp[p] = restOffsets[particleIndex];
+			}
+		}
+		else
+		{
+			for(PxU32 p = 0; p < numParticlesSp; p++)
+				restOffsetsSp[p] = mParams.restOffset;
+		}
+
+		updateSubPacket(particlesSp, fluidTwoWayData, transientBuf, collisionVelocities, constraintBufs,
+		                perParticleCacheSp, opcodeCache, localCellHash, worldBounds, packetCorner, particleIndicesSp,
+		                numParticlesSp, streamShape.contactManagers, w2sTransforms, streamShape.numContactManagers,
+		                restOffsetsSp);
+
+		// store particles back
+		for(PxU32 p = 0; p < numParticlesSp; p++)
+		{
+			PxU32 particleIndex = particleIndicesSp[p];
+			particles[particleIndex] = particlesSp[p];
+		}
+
+		// Invalidate cached local cell hash
+		localCellHash.isHashValid = false;
+
+		numProcessedParticles += numParticlesSp;
+		numParticlesSp = numParticlesPerSubpacket;
+	}
+}
+
+PX_FORCE_INLINE void
+Collision::updateSubPacket(Particle* particlesSp, TwoWayData* fluidTwoWayData, PxVec3* transientBuf,
+                           PxVec3* collisionVelocities, ConstraintBuffers& constraintBufs,
+                           ParticleOpcodeCache* perParticleCacheLocal, ParticleOpcodeCache* perParticleCacheGlobal,
+                           LocalCellHash& localCellHash, PxBounds3& worldBounds, const PxVec3& packetCorner,
+                           const PxU32* particleIndicesSp, const PxU32 numParticlesSp,
+                           const ParticleStreamContactManager* contactManagers, const W2STransformTemp* w2sTransforms,
+                           const PxU32 numContactManagers, const PxF32* restOffsetsSp)
+{
+	ParticleCollData* collDataSp =
+	    reinterpret_cast<ParticleCollData*>(PX_ALLOC(numParticlesSp * sizeof(ParticleCollData), "ParticleCollData"));
+	for(PxU32 p = 0; p < numParticlesSp; p++)
+	{
+		const PxU32 particleIndex = particleIndicesSp[p];
+		Particle& particle = particlesSp[p];
+		PX_ASSERT(particle.position.isFinite() && particle.velocity.isFinite());
+		ParticleCollData& collData = collDataSp[p];
+		Ps::prefetchLine(&collData);
+		collData.c0 = &constraintBufs.constraint0Buf[particleIndex];
+		collData.c1 = &constraintBufs.constraint1Buf[particleIndex];
+		Ps::prefetchLine(collData.c0);
+		Ps::prefetchLine(collData.c1);
+		const PxVec3 particleOldVel = particle.velocity;
+
+		// integrate velocity
+		{
+			PxVec3 acceleration = mParams.externalAcceleration;
+			if(mParams.flags & InternalParticleSystemFlag::eSPH)
+				acceleration += transientBuf[particleIndex];
+
+			integrateParticleVelocity(particle, mParams.maxMotionDistance, acceleration, mParams.dampingDtComp,
+			                          mParams.timeStep);
+		}
+
+		PxVec3 c0Velocity(0.0f);
+		PxVec3 c1Velocity(0.0f);
+		const PxsBodyCore* c0TwoWayBody = NULL;
+		const PxsBodyCore* c1TwoWayBody = NULL;
+		if(particle.flags.low & InternalParticleFlag::eCONSTRAINT_0_DYNAMIC)
+		{
+			c0Velocity = constraintBufs.constraint0DynamicBuf[particleIndex].velocity;
+			if(fluidTwoWayData)
+				c0TwoWayBody = constraintBufs.constraint0DynamicBuf[particleIndex].twoWayBody;
+		}
+
+		if(particle.flags.low & InternalParticleFlag::eCONSTRAINT_1_DYNAMIC)
+		{
+			c1Velocity = constraintBufs.constraint1DynamicBuf[particleIndex].velocity;
+			if(fluidTwoWayData)
+				c1TwoWayBody = constraintBufs.constraint1DynamicBuf[particleIndex].twoWayBody;
+		}
+
+		initCollDataAndApplyConstraints(collData, particle, particleOldVel, restOffsetsSp[p], c0Velocity, c1Velocity,
+		                                c0TwoWayBody, c1TwoWayBody, particleIndex, mParams);
+
+		collData.particleFlags.low &=
+		    PxU16(~(InternalParticleFlag::eCONSTRAINT_0_VALID | InternalParticleFlag::eCONSTRAINT_1_VALID |
+		            InternalParticleFlag::eCONSTRAINT_0_DYNAMIC | InternalParticleFlag::eCONSTRAINT_1_DYNAMIC));
+	}
+
+	//
+	// Collide with dynamic shapes
+
+	PxU32 numDynamicShapes = 0;
+	for(PxU32 i = 0; i < numContactManagers; i++)
+	{
+		const ParticleStreamContactManager& cm = contactManagers[i];
+		if(!cm.isDynamic)
+			continue;
+
+		updateFluidBodyContactPair(particlesSp, numParticlesSp, collDataSp, constraintBufs, perParticleCacheLocal,
+		                           localCellHash, packetCorner, cm, w2sTransforms[i]);
+
+		numDynamicShapes++;
+	}
+
+	PxF32 maxMotionDistanceSqr = mParams.maxMotionDistance * mParams.maxMotionDistance;
+
+	if(numDynamicShapes > 0)
+	{
+		bool isTwoWay = (mParams.flags & PxParticleBaseFlag::eCOLLISION_TWOWAY) != 0;
+		for(PxU32 p = 0; p < numParticlesSp; p++)
+		{
+			ParticleCollData& collData = collDataSp[p];
+			collisionResponse(collData, isTwoWay, false, mParams);
+			clampToMaxMotion(collData.newPos, collData.oldPos, mParams.maxMotionDistance, maxMotionDistanceSqr);
+			collData.flags &= ~ParticleCollisionFlags::CC;
+			collData.flags &= ~ParticleCollisionFlags::DC;
+			collData.flags |= ParticleCollisionFlags::RESET_SNORMAL;
+			collData.surfacePos = PxVec3(0);
+			// we need to keep the dynamic surface velocity for providing collision velocities in finalization
+			// collData.surfaceVel = PxVec3(0);
+			collData.ccTime = 1.0f;
+		}
+	}
+
+	//
+	// Collide with static shapes
+	// (Static shapes need to be processed after dynamic shapes to avoid that dynamic shapes push
+	//  particles into static shapes)
+	//
+
+	bool loadedCache = false;
+	for(PxU32 i = 0; i < numContactManagers; i++)
+	{
+		const ParticleStreamContactManager& cm = contactManagers[i];
+		if(cm.isDynamic)
+			continue;
+
+		const Gu::GeometryUnion& shape = cm.shapeCore->geometry;
+		if(perParticleCacheLocal && (!loadedCache) && (shape.getType() == PxGeometryType::eTRIANGLEMESH))
+		{
+			for(PxU32 p = 0; p < numParticlesSp; p++)
+			{
+				PxU32 particleIndex = particleIndicesSp[p];
+				perParticleCacheLocal[p] = perParticleCacheGlobal[particleIndex];
+			}
+			loadedCache = true;
+		}
+
+		updateFluidBodyContactPair(particlesSp, numParticlesSp, collDataSp, constraintBufs, perParticleCacheLocal,
+		                           localCellHash, packetCorner, cm, w2sTransforms[i]);
+	}
+
+	if(loadedCache)
+	{
+		for(PxU32 p = 0; p < numParticlesSp; p++)
+		{
+			PxU32 particleIndex = particleIndicesSp[p];
+			perParticleCacheGlobal[particleIndex] = perParticleCacheLocal[p];
+		}
+	}
+
+	for(PxU32 p = 0; p < numParticlesSp; p++)
+	{
+		ParticleCollData& collData = collDataSp[p];
+		Particle& particle = particlesSp[p];
+
+		collisionResponse(collData, false, true, mParams);
+
+		// Clamp new particle position to maximum motion.
+		clampToMaxMotion(collData.newPos, collData.oldPos, mParams.maxMotionDistance, maxMotionDistanceSqr);
+
+		// Update particle
+		updateParticle(particle, collData, (mParams.flags & PxParticleBaseFlag::ePROJECT_TO_PLANE) != 0,
+		               mParams.projectionPlane, worldBounds);
+	}
+
+	if(transientBuf)
+	{
+		for(PxU32 p = 0; p < numParticlesSp; p++)
+		{
+			ParticleCollData& collData = collDataSp[p];
+			transientBuf[collData.origParticleIndex] = collData.surfaceNormal;
+		}
+	}
+
+	if(collisionVelocities)
+	{
+		for(PxU32 p = 0; p < numParticlesSp; p++)
+		{
+			ParticleCollData& collData = collDataSp[p];
+			PxVec3 collisionVelocity = particlesSp[p].velocity - collData.surfaceVel;
+			collisionVelocities[collData.origParticleIndex] = collisionVelocity;
+		}
+	}
+
+	if(fluidTwoWayData)
+	{
+		for(PxU32 p = 0; p < numParticlesSp; p++)
+		{
+			ParticleCollData& collData = collDataSp[p];
+			PX_ASSERT(!collData.twoWayBody || (particlesSp[p].flags.api & PxParticleFlag::eCOLLISION_WITH_DYNAMIC));
+			fluidTwoWayData[collData.origParticleIndex].body = collData.twoWayBody;
+			fluidTwoWayData[collData.origParticleIndex].impulse = collData.twoWayImpulse;
+		}
+	}
+
+	PX_FREE(collDataSp);
+}
+
+void Collision::updateFluidBodyContactPair(const Particle* particles, PxU32 numParticles,
+                                           ParticleCollData* particleCollData, ConstraintBuffers& constraintBufs,
+                                           ParticleOpcodeCache* opcodeCacheLocal, LocalCellHash& localCellHash,
+                                           const PxVec3& packetCorner, const ParticleStreamContactManager& contactManager,
+                                           const W2STransformTemp& w2sTransform)
+{
+	PX_ASSERT(particles);
+	PX_ASSERT(particleCollData);
+
+	bool isStaticMeshType = false;
+
+	const Gu::GeometryUnion& shape = contactManager.shapeCore->geometry;
+	const PxsBodyCore* body = contactManager.isDynamic ? static_cast<const PxsBodyCore*>(contactManager.rigidCore) : NULL;
+
+	const PxTransform& world2Shape = w2sTransform.w2sNew;
+	const PxTransform& world2ShapeOld = w2sTransform.w2sOld;
+	const PxTransform shape2World = world2Shape.getInverse();
+
+	for(PxU32 p = 0; p < numParticles; p++)
+	{
+		ParticleCollData& collData = particleCollData[p];
+
+		collData.localFlags = (collData.flags & ParticleCollisionFlags::CC);
+		// Transform position from world to shape space
+		collData.localNewPos = world2Shape.transform(collData.newPos);
+		collData.localOldPos = world2ShapeOld.transform(collData.oldPos);
+		collData.c0 = constraintBufs.constraint0Buf + collData.origParticleIndex;
+		collData.c1 = constraintBufs.constraint1Buf + collData.origParticleIndex;
+		collData.localSurfaceNormal = PxVec3(0.0f);
+		collData.localSurfacePos = PxVec3(0.0f);
+	}
+
+	switch(shape.getType())
+	{
+	case PxGeometryType::eSPHERE:
+	{
+		collideWithSphere(particleCollData, numParticles, shape, mParams.contactOffset);
+		break;
+	}
+	case PxGeometryType::ePLANE:
+	{
+		collideWithPlane(particleCollData, numParticles, shape, mParams.contactOffset);
+		break;
+	}
+	case PxGeometryType::eCAPSULE:
+	{
+		collideWithCapsule(particleCollData, numParticles, shape, mParams.contactOffset);
+		break;
+	}
+	case PxGeometryType::eBOX:
+	{
+		collideWithBox(particleCollData, numParticles, shape, mParams.contactOffset);
+		break;
+	}
+	case PxGeometryType::eCONVEXMESH:
+	{
+		const PxConvexMeshGeometryLL& convexShapeData = shape.get<const PxConvexMeshGeometryLL>();
+		const Gu::ConvexHullData* convexHullData = convexShapeData.hullData;
+		PX_ASSERT(convexHullData);
+
+		PX_ALLOCA(scaledPlanesBuf, PxPlane, convexHullData->mNbPolygons);
+		collideWithConvex(scaledPlanesBuf, particleCollData, numParticles, shape, mParams.contactOffset);
+		break;
+	}
+	case PxGeometryType::eTRIANGLEMESH:
+	{
+		if(opcodeCacheLocal)
+		{
+			collideWithStaticMesh(numParticles, particleCollData, opcodeCacheLocal, shape, world2Shape, shape2World,
+			                      mParams.cellSize, mParams.collisionRange, mParams.contactOffset);
+		}
+		else
+		{
+			// Compute cell hash if needed
+			if(!localCellHash.isHashValid)
+			{
+				PX_ALLOCA(hashKeyArray, PxU16, numParticles * sizeof(PxU16)); // save the hashkey for reorder
+				PX_ASSERT(hashKeyArray);
+				computeLocalCellHash(localCellHash, hashKeyArray, particles, numParticles, packetCorner,
+				                     mParams.cellSizeInv);
+			}
+
+			collideCellsWithStaticMesh(particleCollData, localCellHash, shape, world2Shape, shape2World,
+			                           mParams.cellSize, mParams.collisionRange, mParams.contactOffset, packetCorner);
+		}
+		isStaticMeshType = true;
+		break;
+	}
+	case PxGeometryType::eHEIGHTFIELD:
+	{
+		collideWithStaticHeightField(particleCollData, numParticles, shape, mParams.contactOffset, shape2World);
+		isStaticMeshType = true;
+		break;
+	}
+	case PxGeometryType::eGEOMETRY_COUNT:
+	case PxGeometryType::eINVALID:
+		PX_ASSERT(0);
+	}
+
+	if(isStaticMeshType)
+	{
+		for(PxU32 p = 0; p < numParticles; p++)
+		{
+			ParticleCollData& collData = particleCollData[p];
+			updateCollDataStaticMesh(collData, shape2World, mParams.timeStep);
+		}
+	}
+	else if(body)
+	{
+		for(PxU32 p = 0; p < numParticles; p++)
+		{
+			ParticleCollData& collData = particleCollData[p];
+			ConstraintDynamic cdTemp;
+			ConstraintDynamic& c0Dynamic = constraintBufs.constraint0DynamicBuf
+			                                   ? constraintBufs.constraint0DynamicBuf[collData.origParticleIndex]
+			                                   : cdTemp;
+			ConstraintDynamic& c1Dynamic = constraintBufs.constraint1DynamicBuf
+			                                   ? constraintBufs.constraint1DynamicBuf[collData.origParticleIndex]
+			                                   : cdTemp;
+			c0Dynamic.setEmpty();
+			c1Dynamic.setEmpty();
+			updateCollDataDynamic(collData, body->body2World, body->linearVelocity, body->angularVelocity, body,
+			                      shape2World, mParams.timeStep, c0Dynamic, c1Dynamic);
+		}
+	}
+	else
+	{
+		for(PxU32 p = 0; p < numParticles; p++)
+		{
+			ParticleCollData& collData = particleCollData[p];
+
+			updateCollDataStatic(collData, shape2World, mParams.timeStep);
+		}
+	}
+
+	if(contactManager.isDrain)
+	{
+		for(PxU32 p = 0; p < numParticles; p++)
+		{
+			ParticleCollData& collData = particleCollData[p];
+
+			if((collData.localFlags & ParticleCollisionFlags::L_ANY) != 0)
+			{
+				collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_DRAIN;
+			}
+		}
+	}
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.h
new file mode 100644
index 00000000..b1d0c640
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.h
@@ -0,0 +1,130 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_COLLISION_H
+#define PT_COLLISION_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxTransform.h"
+#include "PsBitUtils.h"
+#include "PtConfig.h"
+#include "PtCollisionData.h"
+#include "PtCollisionMethods.h"
+#include "PtParticle.h"
+#include "PtTwoWayData.h"
+#include "PtCollisionParameters.h"
+#include "PsAlignedMalloc.h"
+#include "CmTask.h"
+#include "PtParticleContactManagerStream.h"
+
+namespace physx
+{
+
+class PxsRigidBody;
+class PxBaseTask;
+
+namespace Pt
+{
+
+class ParticleShape;
+class BodyTransformVault;
+struct W2STransformTemp;
+
+class Collision
+{
+  public:
+	Collision(class ParticleSystemSimCpu& particleSystem);
+	~Collision();
+
+	void updateCollision(const PxU8* contactManagerStream, physx::PxBaseTask& continuation);
+
+	// Update position and velocity of particles that have PxParticleFlag::eSPATIAL_DATA_STRUCTURE_OVERFLOW set.
+	void updateOverflowParticles();
+
+	PX_FORCE_INLINE CollisionParameters& getParameter()
+	{
+		return mParams;
+	}
+
+  private:
+	typedef Ps::Array<W2STransformTemp, shdfnd::AlignedAllocator<16, Ps::ReflectionAllocator<W2STransformTemp> > >
+	TempContactManagerArray;
+	struct TaskData
+	{
+		TempContactManagerArray tempContactManagers;
+		ParticleContactManagerStreamIterator packetBegin;
+		ParticleContactManagerStreamIterator packetEnd;
+		PxBounds3 bounds;
+	};
+
+	void processShapeListWithFilter(PxU32 taskDataIndex, const PxU32 skipNum = 0);
+	void mergeResults(physx::PxBaseTask* continuation);
+
+	void updateFluidShapeCollision(Particle* particles, TwoWayData* fluidTwoWayData, PxVec3* transientBuf,
+	                               PxVec3* collisionVelocities, ConstraintBuffers& constraintBufs,
+	                               ParticleOpcodeCache* opcodeCache, PxBounds3& worldBounds,
+	                               const PxU32* fluidShapeParticleIndices, const PxF32* restOffsets,
+	                               const W2STransformTemp* w2sTransforms, const ParticleStreamShape& streamShape);
+
+	PX_FORCE_INLINE void updateSubPacket(Particle* particlesSp, TwoWayData* fluidTwoWayData, PxVec3* transientBuf,
+	                                     PxVec3* collisionVelocities, ConstraintBuffers& constraintBufs,
+	                                     ParticleOpcodeCache* perParticleCacheLocal,
+	                                     ParticleOpcodeCache* perParticleCacheGlobal, LocalCellHash& localCellHash,
+	                                     PxBounds3& worldBounds, const PxVec3& packetCorner,
+	                                     const PxU32* particleIndicesSp, const PxU32 numParticlesSp,
+	                                     const ParticleStreamContactManager* contactManagers,
+	                                     const W2STransformTemp* w2sTransforms, const PxU32 numContactManagers,
+	                                     const PxF32* restOffsetsSp);
+
+	void updateFluidBodyContactPair(const Particle* particles, PxU32 numParticles, ParticleCollData* particleCollData,
+	                                ConstraintBuffers& constraintBufs, ParticleOpcodeCache* perParticleCacheLocal,
+	                                LocalCellHash& localCellHash, const PxVec3& packetCorner,
+	                                const ParticleStreamContactManager& contactManager,
+	                                const W2STransformTemp& w2sTransform);
+
+	void PX_FORCE_INLINE addTempW2STransform(TaskData& taskData, const ParticleStreamContactManager& cm);
+
+  private:
+	Collision& operator=(const Collision&);
+	CollisionParameters mParams;
+	ParticleSystemSimCpu& mParticleSystem;
+	TaskData mTaskData[PT_NUM_PACKETS_PARALLEL_COLLISION];
+
+	typedef Cm::DelegateTask<Collision, &Collision::mergeResults> MergeTask;
+	MergeTask mMergeTask;
+	friend class CollisionTask;
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_COLLISION_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionBox.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionBox.cpp
new file mode 100644
index 00000000..8f9b90ba
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionBox.cpp
@@ -0,0 +1,135 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtCollisionMethods.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+void physx::Pt::collideWithBox(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& boxShape,
+                               PxReal proxRadius)
+{
+	PX_ASSERT(particleCollData);
+	PX_ASSERT(boxShape.getType() == PxGeometryType::eBOX);
+
+	const PxBoxGeometry& boxShapeData = boxShape.get<const PxBoxGeometry>();
+
+	PxVec3 boxExtent = boxShapeData.halfExtents;
+	PxBounds3 shapeBounds(boxExtent * -1.0f, boxExtent);
+	PX_ASSERT(!shapeBounds.isEmpty());
+	shapeBounds.fattenFast(proxRadius);
+
+	// Box to convex conversion.
+	PxPlane planes[6];
+	PxVec3 normal;
+
+	normal = PxVec3(1.0f, 0.0f, 0.0f);
+	planes[0].n = normal;
+	planes[0].d = -boxExtent.x;
+
+	normal = PxVec3(-1.0f, 0.0f, 0.0f);
+	planes[1].n = normal;
+	planes[1].d = -boxExtent.x;
+
+	normal = PxVec3(0.0f, 1.0f, 0.0f);
+	planes[2].n = normal;
+	planes[2].d = -boxExtent.y;
+
+	normal = PxVec3(0.0f, -1.0f, 0.0f);
+	planes[3].n = normal;
+	planes[3].d = -boxExtent.y;
+
+	normal = PxVec3(0.0f, 0.0f, 1.0f);
+	planes[4].n = normal;
+	planes[4].d = -boxExtent.z;
+
+	normal = PxVec3(0.0f, 0.0f, -1.0f);
+	planes[5].n = normal;
+	planes[5].d = -boxExtent.z;
+
+#if PT_USE_SIMD_CONVEX_COLLISION
+	ParticleCollDataV4 collDataV4;
+	PxU32 v4Count = 0;
+
+	for(PxU32 p = 0; p < numCollData; p++)
+	{
+		ParticleCollData& collData = particleCollData[p];
+
+		PxBounds3 particleBounds = PxBounds3::boundsOfPoints(collData.localOldPos, collData.localNewPos);
+		if(particleBounds.intersects(shapeBounds))
+		{
+			collDataV4.localOldPos[v4Count].v3 = collData.localOldPos;
+			collDataV4.localOldPos[v4Count].pad = 0;
+			collDataV4.localNewPos[v4Count].v3 = collData.localNewPos;
+			collDataV4.localNewPos[v4Count].pad = 0;
+			collDataV4.localFlags[v4Count] = collData.localFlags;
+			collDataV4.restOffset[v4Count] = collData.restOffset;
+			collDataV4.ccTime[v4Count] = collData.ccTime;
+			collDataV4.collData[v4Count] = &collData;
+			v4Count++;
+		}
+
+		if(v4Count == 4)
+		{
+			// sschirm: not processing with less than 4 elements to avoid uninitialized data reads
+			collideWithConvexPlanesSIMD(collDataV4, planes, 6, proxRadius);
+			for(PxU32 j = 0; j < v4Count; j++)
+			{
+				ParticleCollData* collData1 = collDataV4.collData[j];
+				PxU32 stateFlag = collDataV4.localFlags[j];
+				if(stateFlag)
+				{
+					collData1->localFlags |= stateFlag;
+					collData1->ccTime = collDataV4.ccTime[j];
+					collData1->localSurfaceNormal = collDataV4.localSurfaceNormal[j].v3;
+					collData1->localSurfacePos = collDataV4.localSurfacePos[j].v3;
+				}
+			}
+			v4Count = 0;
+		}
+		else if(v4Count > 0 && (p == numCollData - 1))
+		{
+			for(PxU32 j = 0; j < v4Count; j++)
+			{
+				collideWithConvexPlanes(*collDataV4.collData[j], planes, 6, proxRadius);
+			}
+		}
+	}
+#else
+	for(PxU32 p = 0; p < numCollData; p++)
+	{
+		ParticleCollData& collData = particleCollData[p];
+		PxBounds3 particleBounds = PxBounds3::boundsOfPoints(collData.localOldPos, collData.localNewPos);
+		if(particleBounds.intersects(shapeBounds))
+		{
+			collideWithConvexPlanes(collData, planes, 6, proxRadius);
+		}
+	}
+#endif
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionCapsule.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionCapsule.cpp
new file mode 100644
index 00000000..3add04a5
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionCapsule.cpp
@@ -0,0 +1,304 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtCollisionMethods.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+using namespace physx;
+using namespace Pt;
+
+namespace
+{
+
+void collideWithCapsuleNonContinuous(ParticleCollData& collData, const PxVec3& q, const PxReal& h, const PxReal& r,
+                                     const PxReal& proxRadius)
+{
+	if(collData.localFlags & ParticleCollisionFlags::CC)
+		return; // Only apply discrete and proximity collisions if no continuous collisions was detected so far (for any
+	// colliding shape)
+
+	PxVec3 segPoint;
+	segPoint = PxVec3(q.x, 0.0f, 0.0f);
+	segPoint.x = PxMax(segPoint.x, -h);
+	segPoint.x = PxMin(segPoint.x, h);
+	collData.localSurfaceNormal = q - segPoint;
+	PxReal dist = collData.localSurfaceNormal.magnitude();
+	if(dist < (r + proxRadius))
+	{
+		if(dist != 0.0f)
+			collData.localSurfaceNormal *= (1.0f / dist);
+		else
+			collData.localSurfaceNormal = PxVec3(0);
+
+		// Push particle to surface such that the distance to the surface is equal to the collision radius
+		collData.localSurfacePos = segPoint + (collData.localSurfaceNormal * (r + collData.restOffset));
+		collData.localFlags |= ParticleCollisionFlags::L_PROX;
+
+		if(dist < (r + collData.restOffset))
+			collData.localFlags |= ParticleCollisionFlags::L_DC;
+	}
+}
+
+void collideWithCapsuleTestSphere(ParticleCollData& collData, const PxVec3& p, const PxVec3& q, const PxVec3& d,
+                                  const PxReal& h, const PxReal& r, const PxReal& sphereH, const PxReal& discS,
+                                  const PxReal& aS, const PxReal& bS, const PxReal& proxRadius)
+{
+	if(discS <= 0.0f || aS == 0.0f)
+	{
+		collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius);
+	}
+	else
+	{
+		PxReal t = -(bS + PxSqrt(discS)) / aS;
+		if(t < 0.0f || t > 1.0f)
+		{
+			// intersection lies outside p-q interval
+			collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius);
+		}
+		else if(t < collData.ccTime)
+		{
+			// intersection point lies on sphere, add lcc
+			// collData.localSurfacePos = p + (d * t);
+			// collData.localSurfaceNormal = collData.localSurfacePos;
+			// collData.localSurfaceNormal.x -= sphereH;
+			// collData.localSurfaceNormal *= (1.0f / r);
+			// collData.localSurfacePos += (collData.localSurfaceNormal * collData.restOffset);
+			PxVec3 relativePOSITION = (d * t);
+			collData.localSurfaceNormal = p + relativePOSITION;
+			collData.localSurfaceNormal.x -= sphereH;
+			collData.localSurfaceNormal *= (1.0f / r);
+			computeContinuousTargetPosition(collData.localSurfacePos, p, relativePOSITION, collData.localSurfaceNormal,
+			                                collData.restOffset);
+			collData.ccTime = t;
+			collData.localFlags |= ParticleCollisionFlags::L_CC;
+		}
+	}
+}
+
+// ----------------------------------------------------------------
+//
+//		Note: this code is based on the hardware implementation
+//
+//      Terminology:
+//      Starting point: p
+//      End point:      q
+//      Ray direction:  d
+//
+//      Infinite cylinder I:  all (y,z)   : y^2 + z^2 < r^2
+//      "Fat plane"       F:  all (x)     : -h < x < h
+//      Top sphere        S0: all (x,y,z) : y^2 + z^2 + (x-h)^2 < r^2
+//      Bottom sphere     S1: all (x,y,z) : y^2 + z^2 + (x+h)^2 < r^2
+//
+//      Cylinder          Z = (I & F)
+//      Capsule           C = Z | S0 | S1
+//
+//      coefficients a, b, c for the squared distance functions sqd(t) = a * t^2 + b * t + c, for I, S0 and S1:
+//
+//      aI =  d.y*d.y + d.z*d.z
+//      aS0 = d.y*d.y + d.z*d.z + d.x*d.x
+//      aS1 = d.y*d.y + d.z*d.z + d.x*d.x
+//
+//      bI =  d.y*p.y + d.z*p.z
+//      bS0 = d.y*p.y + d.z*p.z + d.x*p.x - h*d.x
+//      bS1 = d.y*p.y + d.z*p.z + d.x*p.x + h*d.x
+//
+//      cI =  p.y*p.y + p.z*p.z - r*r.
+//      cS0 = p.y*p.y + p.z*p.z - r*r + p.x*p.x + h*h - 2*h*p.x
+//      cS1 = p.y*p.y + p.z*p.z - r*r + p.x*p.x + h*h + 2*h*p.x
+//
+//      these will be treated in vectorized fashion:
+//      I  <--> .y
+//      S0 <--> .x
+//      S1 <--> .z
+//
+//      for p, we have sqd(0) = c
+//      ( for q, we have sqd(1) = a + b + c )
+//
+// ----------------------------------------------------------------
+PX_FORCE_INLINE void collideWithCapsule(ParticleCollData& collData, const PxCapsuleGeometry& capsuleShapeData,
+                                        PxReal proxRadius)
+{
+	// Note: The local coordinate system of a capsule is defined such that the cylindrical part is
+	//       wrapped around the x-axis
+
+	PxVec3& p = collData.localOldPos;
+	PxVec3& q = collData.localNewPos;
+
+	PxReal r = capsuleShapeData.radius;
+	PxReal h = capsuleShapeData.halfHeight;
+
+	PxVec3 a, b, c;
+
+	// all c values
+	PxReal tmp;
+	c.y = p.y * p.y + p.z * p.z - r * r;
+	tmp = c.y + p.x * p.x + h * h;
+	c.x = tmp - 2 * h * p.x;
+	c.z = tmp + 2 * h * p.x;
+
+	bool pInI = c.y < 0.0f;  // Old particle position inside the infinite zylinder
+	bool pInS0 = c.x < 0.0f; // Old particle position inside the right sphere
+	bool pInS1 = c.z < 0.0f; // Old particle position inside the left sphere
+	bool pRightOfH = p.x > h;
+	bool pLeftOfMinusH = p.x < -h;
+	bool pInZ = (!pRightOfH && !pLeftOfMinusH && pInI);
+
+	if(pInZ || pInS0 || pInS1)
+	{
+		// p is inside the skeleton
+		// add ccd with time 0.0
+
+		PxVec3 segPoint;
+		segPoint = PxVec3(p.x, 0.0f, 0.0f);
+		segPoint.x = PxMax(segPoint.x, -h);
+		segPoint.x = PxMin(segPoint.x, h);
+		PxVec3 normal = p - segPoint;
+		collData.localSurfaceNormal = normal.isZero() ? PxVec3(0.0f, 1.0f, 0.0f) : normal.getNormalized();
+		// Push particle to surface such that the distance to the surface is equal to the collision radius
+		collData.localSurfacePos = segPoint + (collData.localSurfaceNormal * (r + collData.restOffset));
+		collData.ccTime = 0.0;
+		collData.localFlags |= ParticleCollisionFlags::L_CC;
+	}
+	else
+	{
+		// p is outside of the skeleton
+
+		PxVec3 d = q - p;
+
+		// all b values
+		b.y = d.y * p.y + d.z * p.z;
+		tmp = b.y + d.x * p.x;
+		b.x = tmp - h * d.x;
+		b.z = tmp + h * d.x;
+
+		// all a values
+		a.y = d.y * d.y + d.z * d.z;
+		a.x = a.y + d.x * d.x;
+		a.z = a.x;
+
+		// all discriminants
+		PxVec3 tmpVec0, tmpVec1;
+		tmpVec0 = b.multiply(b);
+		tmpVec1 = c.multiply(a);
+		PxVec3 discs = tmpVec0 - tmpVec1;
+
+		// this made cases fail with d.y == 0.0 and d.z == 0.0
+		// bool dInI  = discs.y > 0.0f;
+		bool dInI = discs.y >= 0.0f;
+
+		// bool dInS0 = discs.x > 0.0f;
+		// bool dInS1 = discs.z > 0.0f;
+
+		if(!dInI)
+		{
+			// the ray does not intersect the infinite cylinder
+			collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius);
+		}
+		else
+		{
+			// d intersects the infinite cylinder
+			if(pInI)
+			{
+				// p is contained in the infinite cylinder, either above the top sphere or below the bottom sphere.
+				// -> directly test against the nearest sphere
+				if(p.x > 0)
+				{
+					// check sphere 0
+					collideWithCapsuleTestSphere(collData, p, q, d, h, r, h, discs.x, a.x, b.x, proxRadius);
+				}
+				else
+				{
+					// check sphere 1
+					collideWithCapsuleTestSphere(collData, p, q, d, h, r, -h, discs.z, a.z, b.z, proxRadius);
+				}
+			}
+			else if(discs.y <= 0.0f || a.y == 0.0f)
+			{
+				// d is zero or tangential to cylinder surface
+				collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius);
+			}
+			else
+			{
+				// p lies outside of infinite cylinder, compute intersection point with it
+				PxReal t = -(b.y + PxSqrt(discs.y)) / a.y;
+				if(t < 0.0f || t > 1.0f)
+				{
+					// intersection lies outside p-q interval
+					collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius);
+				}
+				else
+				{
+					PxVec3 relativePOSITION = (d * t);
+					PxVec3 impact = p + relativePOSITION;
+					if(impact.x > h)
+					{
+						// if above the actual cylinder, check sphere 0
+						collideWithCapsuleTestSphere(collData, p, q, d, h, r, h, discs.x, a.x, b.x, proxRadius);
+					}
+					else if(impact.x < -h)
+					{
+						// if below the actual cylinder, check sphere 1
+						collideWithCapsuleTestSphere(collData, p, q, d, h, r, -h, discs.z, a.z, b.z, proxRadius);
+					}
+					else if(t < collData.ccTime)
+					{
+						// intersection point lies on cylinder, add cc
+						// collData.localSurfaceNormal = collData.localSurfacePos / r;
+						// collData.localSurfaceNormal.x = 0.0f;
+						// collData.localSurfacePos += (collData.localSurfaceNormal * collData.restOffset);
+						collData.localSurfaceNormal = impact / r;
+						collData.localSurfaceNormal.x = 0.0f;
+						computeContinuousTargetPosition(collData.localSurfacePos, p, relativePOSITION,
+						                                collData.localSurfaceNormal, collData.restOffset);
+						collData.ccTime = t;
+						collData.localFlags |= ParticleCollisionFlags::L_CC;
+					}
+				}
+			}
+		}
+	}
+}
+
+} // namespace
+
+void physx::Pt::collideWithCapsule(ParticleCollData* collShapeData, PxU32 numCollData,
+                                   const Gu::GeometryUnion& capsuleShape, PxReal proxRadius)
+{
+	PX_ASSERT(collShapeData);
+	PX_ASSERT(capsuleShape.getType() == PxGeometryType::eCAPSULE);
+
+	const PxCapsuleGeometry& capsuleShapeData = capsuleShape.get<const PxCapsuleGeometry>();
+
+	for(PxU32 p = 0; p < numCollData; p++)
+	{
+		::collideWithCapsule(collShapeData[p], capsuleShapeData, proxRadius);
+	}
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionConvex.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionConvex.cpp
new file mode 100644
index 00000000..a6d658ce
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionConvex.cpp
@@ -0,0 +1,553 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtCollisionMethods.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxMat33.h"
+#include "foundation/PxVec3.h"
+#include "foundation/PxPlane.h"
+#include "GuConvexMeshData.h"
+#include "CmPhysXCommon.h"
+#include "PsVecMath.h"
+
+#define FLCNVX_NO_DC (1 << 0)
+#define FLCNVX_NO_PARALLEL_CC (1 << 1)
+#define FLCNVX_NO_PROX (1 << 2)
+#define FLCNVX_NO_CONTAINMENT (1 << 3)
+#define PLCNVX_POTENTIAL_PROX (1 << 4)
+
+using namespace physx::shdfnd::aos;
+using namespace physx;
+using namespace Pt;
+
+namespace
+{
+
+void scalePlanes(PxPlane* scaledPlaneBuf, const Gu::ConvexHullData* convexHullData, const PxMat33& invScaling)
+{
+	PxU32 numPlanes = convexHullData->mNbPolygons;
+	PxPlane* planeIt = scaledPlaneBuf;
+	const Gu::HullPolygonData* polygonIt = convexHullData->mPolygons;
+	for(; numPlanes > 0; --numPlanes, ++planeIt, ++polygonIt)
+	{
+		PxVec3 normal = polygonIt->mPlane.n;
+		PxF32 d = polygonIt->mPlane.d;
+		normal = invScaling.transformTranspose(normal);
+		PxReal magnitude = normal.normalize();
+		*planeIt = PxPlane(normal, d / magnitude);
+	}
+}
+
+} // namespace
+
+void physx::Pt::collideWithConvexPlanes(ParticleCollData& collData, const PxPlane* convexPlanes, PxU32 numPlanes,
+                                        const PxReal proxRadius)
+{
+	PX_ASSERT(convexPlanes);
+
+	// initializing these to 0 saves a test for accessing corresponding arrays
+	PxU32 newPosPlaneIndex = 0;
+	PxU32 oldPosPlaneIndex = 0;
+	PxU32 rayPlaneIndex = 0;
+	bool newPosOutMask = false;
+
+	PxReal latestEntry = -FLT_MAX;
+	PxReal soonestExit = FLT_MAX;
+	PxReal newPosClosestDist = -FLT_MAX;
+	PxReal oldPosClosestDist = -FLT_MAX;
+
+	PxVec3 motion = collData.localNewPos - collData.localOldPos;
+
+	const PxPlane* plane = convexPlanes;
+	for(PxU32 k = 0; k < numPlanes; k++)
+	{
+		PxReal planeDistNewPos = plane[k].distance(collData.localNewPos);
+		PxReal planeDistOldPos = plane[k].distance(collData.localOldPos);
+
+		bool wasNewPosOutide = newPosClosestDist > 0.0f;
+
+		// maximize distance to planes to find minimal distance to convex
+		bool isOldPosFurther = planeDistOldPos > oldPosClosestDist;
+		oldPosClosestDist = isOldPosFurther ? planeDistOldPos : oldPosClosestDist;
+		oldPosPlaneIndex = isOldPosFurther ? k : oldPosPlaneIndex;
+
+		bool isNewPosFurther = planeDistNewPos > newPosClosestDist;
+		newPosClosestDist = isNewPosFurther ? planeDistNewPos : newPosClosestDist;
+		newPosPlaneIndex = isNewPosFurther ? k : newPosPlaneIndex;
+
+		bool isNewPosOutside = planeDistNewPos > 0.0f;
+
+		// flagging cases where newPos it out multiple times
+		newPosOutMask |= (wasNewPosOutide & isNewPosOutside);
+
+		// continuous collision
+		PxReal dot = motion.dot(plane[k].n);
+
+		// div by zero shouldn't hurt, since dot == 0.0f case is masked out
+		PxReal hitTime = -planeDistOldPos / dot;
+		bool isEntry = (dot < 0.0f) & (hitTime > latestEntry);
+		bool isExit = (dot > 0.0f) & (hitTime < soonestExit);
+
+		latestEntry = isEntry ? hitTime : latestEntry;
+		rayPlaneIndex = isEntry ? k : rayPlaneIndex;
+		soonestExit = isExit ? hitTime : soonestExit;
+
+		// mark parallel outside for no ccd in PxcFinalizeConvexCollision
+		latestEntry = ((dot == 0.0f) & isNewPosOutside) ? FLT_MAX : latestEntry;
+	}
+
+	bool isContained = oldPosClosestDist <= 0.0f;
+	bool isDc = newPosClosestDist <= collData.restOffset;
+	bool isProximity = (newPosClosestDist > 0.0f) && (newPosClosestDist <= proxRadius) && !newPosOutMask;
+
+	if(isContained)
+	{
+		// Treat the case where the old pos is inside the skeleton as
+		// a continous collision with time 0
+
+		collData.localFlags |= ParticleCollisionFlags::L_CC;
+		collData.ccTime = 0.0f;
+		collData.localSurfaceNormal = plane[oldPosPlaneIndex].n;
+
+		// Push the particle to the surface (such that distance to surface is equal to the collision radius)
+		collData.localSurfacePos =
+		    collData.localOldPos + plane[oldPosPlaneIndex].n * (collData.restOffset - oldPosClosestDist);
+	}
+	else
+	{
+		// Check for continuous collision
+		// only add a proximity/discrete case if there are no continous collisions
+		// for this shape or any other shape before
+
+		bool ccHappened = (0.0f <= latestEntry) && (latestEntry < collData.ccTime) && (latestEntry <= soonestExit);
+		if(ccHappened)
+		{
+			collData.localSurfaceNormal = plane[rayPlaneIndex].n;
+			// collData.localSurfacePos = collData.localOldPos + (motion * latestEntry) + (continuousNormal *
+			// collData.restOffset);
+			computeContinuousTargetPosition(collData.localSurfacePos, collData.localOldPos, motion * latestEntry,
+			                                plane[rayPlaneIndex].n, collData.restOffset);
+			collData.ccTime = latestEntry;
+			collData.localFlags |= ParticleCollisionFlags::L_CC;
+		}
+		else if(!(collData.localFlags & ParticleCollisionFlags::CC))
+		{
+			// No other collision shape has caused a continuous collision so far
+			if(isProximity) // proximity
+			{
+				collData.localSurfaceNormal = plane[newPosPlaneIndex].n;
+				collData.localSurfacePos =
+				    collData.localNewPos + plane[newPosPlaneIndex].n * (collData.restOffset - newPosClosestDist);
+				collData.localFlags |= ParticleCollisionFlags::L_PROX;
+			}
+			if(isDc) // discrete collision
+			{
+				collData.localSurfaceNormal = plane[newPosPlaneIndex].n;
+				collData.localSurfacePos =
+				    collData.localNewPos + plane[newPosPlaneIndex].n * (collData.restOffset - newPosClosestDist);
+				collData.localFlags |= ParticleCollisionFlags::L_DC;
+			}
+		}
+	}
+}
+
+void physx::Pt::collideWithConvexPlanesSIMD(ParticleCollDataV4& collDataV4, const PxPlane* convexPlanes,
+                                            PxU32 numPlanes, const PxReal proxRadius)
+{
+	PX_ASSERT(convexPlanes);
+	Ps::prefetch(convexPlanes);
+
+	Vec4V latestEntry = V4Load(-FLT_MAX);
+	Vec4V soonestExit = V4Load(FLT_MAX);
+	Vec4V newPosClosestDist = V4Load(-FLT_MAX);
+	Vec4V oldPosClosestDist = V4Load(-FLT_MAX);
+	Vec4V discreteNormal[4] = { V4Zero(), V4Zero(), V4Zero(), V4Zero() };
+	Vec4V continuousNormal[4] = { V4Zero(), V4Zero(), V4Zero(), V4Zero() };
+	Vec4V containmentNormal[4] = { V4Zero(), V4Zero(), V4Zero(), V4Zero() };
+
+	Vec4V localNewPos0 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localNewPos[0]));
+	Vec4V localOldPos0 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localOldPos[0]));
+
+	Vec4V localNewPos1 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localNewPos[1]));
+	Vec4V localOldPos1 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localOldPos[1]));
+
+	Vec4V localNewPos2 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localNewPos[2]));
+	Vec4V localOldPos2 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localOldPos[2]));
+
+	Vec4V localNewPos3 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localNewPos[3]));
+	Vec4V localOldPos3 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localOldPos[3]));
+
+	Vec4V motion[4];
+	motion[0] = V4Sub(localNewPos0, localOldPos0);
+	motion[1] = V4Sub(localNewPos1, localOldPos1);
+	motion[2] = V4Sub(localNewPos2, localOldPos2);
+	motion[3] = V4Sub(localNewPos3, localOldPos3);
+
+	const Mat44V newPos44(localNewPos0, localNewPos1, localNewPos2, localNewPos3);
+	const Mat44V oldPos44(localOldPos0, localOldPos1, localOldPos2, localOldPos3);
+	const Mat44V motion44(motion[0], motion[1], motion[2], motion[3]);
+
+	const Mat44V newPosTrans44 = M44Trnsps(newPos44);
+	const Mat44V oldPosTrans44 = M44Trnsps(oldPos44);
+	const Mat44V motionTrans44 = M44Trnsps(motion44);
+
+	BoolV newPosOutMask = BLoad(false);
+
+	const PxPlane* plane = convexPlanes;
+	for(PxU32 k = 0; k < numPlanes; k++)
+	{
+		Vec4V planeNormal = Vec4V_From_Vec3V(V3LoadU(plane->n));
+		Vec4V planeD = V4Load(plane->d);
+		plane++;
+		Ps::prefetch(plane);
+
+		const FloatV normalX = V4GetX(planeNormal);
+		const FloatV normalY = V4GetY(planeNormal);
+		const FloatV normalZ = V4GetZ(planeNormal);
+
+		Vec4V v1 = V4ScaleAdd(newPosTrans44.col0, normalX, planeD);
+		Vec4V v2 = V4ScaleAdd(newPosTrans44.col1, normalY, v1);
+		Vec4V planeDistNewPosV4 = V4ScaleAdd(newPosTrans44.col2, normalZ, v2);
+
+		v1 = V4ScaleAdd(oldPosTrans44.col0, normalX, planeD);
+		v2 = V4ScaleAdd(oldPosTrans44.col1, normalY, v1);
+		Vec4V planeDistOldPosV4 = V4ScaleAdd(oldPosTrans44.col2, normalZ, v2);
+
+		// containment: select the max distance plane
+		BoolV mask = V4IsGrtr(planeDistOldPosV4, oldPosClosestDist);
+		oldPosClosestDist = V4Sel(mask, planeDistOldPosV4, oldPosClosestDist);
+		containmentNormal[0] = V4Sel(BSplatElement<0>(mask), planeNormal, containmentNormal[0]);
+		containmentNormal[1] = V4Sel(BSplatElement<1>(mask), planeNormal, containmentNormal[1]);
+		containmentNormal[2] = V4Sel(BSplatElement<2>(mask), planeNormal, containmentNormal[2]);
+		containmentNormal[3] = V4Sel(BSplatElement<3>(mask), planeNormal, containmentNormal[3]);
+
+		// proxmity and discrete: select the max distance planes
+		BoolV wasNewPosOutide = V4IsGrtr(newPosClosestDist, V4Zero());
+		BoolV isNewPosOutside = V4IsGrtr(planeDistNewPosV4, V4Zero());
+
+		mask = V4IsGrtr(planeDistNewPosV4, newPosClosestDist);
+		newPosClosestDist = V4Sel(mask, planeDistNewPosV4, newPosClosestDist);
+		discreteNormal[0] = V4Sel(BSplatElement<0>(mask), planeNormal, discreteNormal[0]);
+		discreteNormal[1] = V4Sel(BSplatElement<1>(mask), planeNormal, discreteNormal[1]);
+		discreteNormal[2] = V4Sel(BSplatElement<2>(mask), planeNormal, discreteNormal[2]);
+		discreteNormal[3] = V4Sel(BSplatElement<3>(mask), planeNormal, discreteNormal[3]);
+
+		// flagging cases where newPos it out multiple times
+		newPosOutMask = BOr(newPosOutMask, BAnd(wasNewPosOutide, isNewPosOutside));
+
+		// Test continuous collision
+		v1 = V4Scale(motionTrans44.col0, normalX);
+		v2 = V4ScaleAdd(motionTrans44.col1, normalY, v1);
+		Vec4V dotV4 = V4ScaleAdd(motionTrans44.col2, normalZ, v2);
+
+		Vec4V hitTime = V4Neg(V4Div(planeDistOldPosV4, dotV4));
+
+		BoolV exit = V4IsGrtr(dotV4, V4Zero());
+		mask = BAnd(exit, V4IsGrtr(soonestExit, hitTime));
+		soonestExit = V4Sel(mask, hitTime, soonestExit);
+
+		BoolV entry = V4IsGrtr(V4Zero(), dotV4);
+		mask = BAnd(entry, V4IsGrtr(hitTime, latestEntry));
+		latestEntry = V4Sel(mask, hitTime, latestEntry);
+		continuousNormal[0] = V4Sel(BSplatElement<0>(mask), planeNormal, continuousNormal[0]);
+		continuousNormal[1] = V4Sel(BSplatElement<1>(mask), planeNormal, continuousNormal[1]);
+		continuousNormal[2] = V4Sel(BSplatElement<2>(mask), planeNormal, continuousNormal[2]);
+		continuousNormal[3] = V4Sel(BSplatElement<3>(mask), planeNormal, continuousNormal[3]);
+
+		// mark parallel outside for no ccd in PxcFinalizeConvexCollision
+		mask = BAnd(isNewPosOutside, V4IsEq(V4Zero(), dotV4));
+		latestEntry = V4Sel(mask, V4One(), latestEntry);
+	}
+
+	VecU32V localFlags = U4LoadXYZW(collDataV4.localFlags[0], collDataV4.localFlags[1], collDataV4.localFlags[2],
+	                                collDataV4.localFlags[3]);
+	Vec4V proxRadiusV4 = V4Load(proxRadius);
+	Vec4V restOffsetV4 = V4LoadA(collDataV4.restOffset);
+
+	const VecU32V u4Zero = U4LoadXYZW(0, 0, 0, 0);
+	const VecU32V flagCC = U4LoadXYZW(ParticleCollisionFlags::CC, ParticleCollisionFlags::CC,
+	                                  ParticleCollisionFlags::CC, ParticleCollisionFlags::CC);
+	const BoolV noFlagCC = V4IsEqU32(V4U32and(flagCC, localFlags), u4Zero);
+
+	// proximity
+	const VecU32V flagLPROX = U4LoadXYZW(ParticleCollisionFlags::L_PROX, ParticleCollisionFlags::L_PROX,
+	                                     ParticleCollisionFlags::L_PROX, ParticleCollisionFlags::L_PROX);
+	const BoolV proximityV =
+	    BAnd(BAnd(BAnd(noFlagCC, V4IsGrtrOrEq(newPosClosestDist, V4Zero())), V4IsGrtr(proxRadiusV4, newPosClosestDist)),
+	         BNot(newPosOutMask));
+	VecU32V stateFlag = V4U32Sel(proximityV, flagLPROX, u4Zero);
+
+	// discrete
+	const VecU32V flagLDC = U4LoadXYZW(ParticleCollisionFlags::L_DC, ParticleCollisionFlags::L_DC,
+	                                   ParticleCollisionFlags::L_DC, ParticleCollisionFlags::L_DC);
+	const BoolV DCV =
+	    BAnd(BAnd(noFlagCC, V4IsGrtrOrEq(newPosClosestDist, V4Zero())), V4IsGrtr(restOffsetV4, newPosClosestDist));
+	stateFlag = V4U32or(stateFlag, V4U32Sel(DCV, flagLDC, u4Zero));
+
+	// cc
+	const VecU32V flagLCC = U4LoadXYZW(ParticleCollisionFlags::L_CC, ParticleCollisionFlags::L_CC,
+	                                   ParticleCollisionFlags::L_CC, ParticleCollisionFlags::L_CC);
+
+	Vec4V oldCCTimeV = V4LoadA(collDataV4.ccTime);
+	const BoolV ccHappenedV = BAnd(BAnd(V4IsGrtrOrEq(latestEntry, V4Zero()), V4IsGrtr(oldCCTimeV, latestEntry)),
+	                               V4IsGrtrOrEq(soonestExit, latestEntry));
+
+	stateFlag = V4U32Sel(ccHappenedV, flagLCC, stateFlag);
+	Vec4V localSurfaceNormal0 = V4Sel(BSplatElement<0>(ccHappenedV), continuousNormal[0], discreteNormal[0]);
+	Vec4V localSurfaceNormal1 = V4Sel(BSplatElement<1>(ccHappenedV), continuousNormal[1], discreteNormal[1]);
+	Vec4V localSurfaceNormal2 = V4Sel(BSplatElement<2>(ccHappenedV), continuousNormal[2], discreteNormal[2]);
+	Vec4V localSurfaceNormal3 = V4Sel(BSplatElement<3>(ccHappenedV), continuousNormal[3], discreteNormal[3]);
+
+	Vec4V ccTimeV = V4Sel(ccHappenedV, latestEntry, oldCCTimeV);
+	Vec4V distV = newPosClosestDist;
+
+#if PT_CCD_MEDTHOD == PT_CCD_PROJECT
+	Vec4V projected0 = V4MulAdd(motion0, V4U32SplatElement<0>(latestEntry), localOldPos0);
+	Vec4V projected1 = V4MulAdd(motion1, V4U32SplatElement<1>(latestEntry), localOldPos1);
+	Vec4V projected2 = V4MulAdd(motion2, V4U32SplatElement<2>(latestEntry), localOldPos2);
+	Vec4V projected2 = V4MulAdd(motion3, V4U32SplatElement<3>(latestEntry), localOldPos3);
+	distV = V4Sel(ccHappenedV, V4Zero(), distV);
+
+#elif PT_CCD_MEDTHOD == PT_CCD_STAY
+	Vec4V projected0 = localOldPos0;
+	Vec4V projected1 = localOldPos1;
+	Vec4V projected2 = localOldPos2;
+	Vec4V projected3 = localOldPos3;
+	distV = V4Sel(ccHappenedV, restOffsetV4, distV);
+
+#elif PT_CCD_MEDTHOD == PT_CCD_IMPACT
+	Vec4V projected0 = V4MulAdd(motion0, V4U32SplatElement<0>(latestEntry), localOldPos0);
+	Vec4V projected1 = V4MulAdd(motion1, V4U32SplatElement<1>(latestEntry), localOldPos1);
+	Vec4V projected2 = V4MulAdd(motion2, V4U32SplatElement<2>(latestEntry), localOldPos2);
+	Vec4V projected2 = V4MulAdd(motion3, V4U32SplatElement<3>(latestEntry), localOldPos3);
+	distV = V4Sel(ccHappenedV, restOffsetV4, distV);
+#else
+	PX_ASSERT(0); // simd unspport yet
+#endif
+
+	Vec4V localSurfacePos0 = V4Sel(BSplatElement<0>(ccHappenedV), projected0, localNewPos0);
+	Vec4V localSurfacePos1 = V4Sel(BSplatElement<1>(ccHappenedV), projected1, localNewPos1);
+	Vec4V localSurfacePos2 = V4Sel(BSplatElement<2>(ccHappenedV), projected2, localNewPos2);
+	Vec4V localSurfacePos3 = V4Sel(BSplatElement<3>(ccHappenedV), projected3, localNewPos3);
+
+	// contain
+	const BoolV containmentV = V4IsGrtrOrEq(V4Zero(), oldPosClosestDist);
+
+	stateFlag = V4U32Sel(containmentV, flagLCC, stateFlag);
+
+	localSurfaceNormal0 = V4Sel(BSplatElement<0>(containmentV), containmentNormal[0], localSurfaceNormal0);
+	localSurfaceNormal1 = V4Sel(BSplatElement<1>(containmentV), containmentNormal[1], localSurfaceNormal1);
+	localSurfaceNormal2 = V4Sel(BSplatElement<2>(containmentV), containmentNormal[2], localSurfaceNormal2);
+	localSurfaceNormal3 = V4Sel(BSplatElement<3>(containmentV), containmentNormal[3], localSurfaceNormal3);
+
+	localSurfacePos0 = V4Sel(BSplatElement<0>(containmentV), localOldPos0, localSurfacePos0);
+	localSurfacePos1 = V4Sel(BSplatElement<1>(containmentV), localOldPos1, localSurfacePos1);
+	localSurfacePos2 = V4Sel(BSplatElement<2>(containmentV), localOldPos2, localSurfacePos2);
+	localSurfacePos3 = V4Sel(BSplatElement<3>(containmentV), localOldPos3, localSurfacePos3);
+
+	distV = V4Sel(containmentV, oldPosClosestDist, distV);
+	ccTimeV = V4Sel(containmentV, V4Zero(), ccTimeV);
+
+	// localSurfacePos
+	Vec4V reflectDistV = V4Sub(restOffsetV4, distV);
+	localSurfacePos0 = V4MulAdd(localSurfaceNormal0, V4SplatElement<0>(reflectDistV), localSurfacePos0);
+	localSurfacePos1 = V4MulAdd(localSurfaceNormal1, V4SplatElement<1>(reflectDistV), localSurfacePos1);
+	localSurfacePos2 = V4MulAdd(localSurfaceNormal2, V4SplatElement<2>(reflectDistV), localSurfacePos2);
+	localSurfacePos3 = V4MulAdd(localSurfaceNormal3, V4SplatElement<3>(reflectDistV), localSurfacePos3);
+
+	// store
+	V4StoreA(localSurfacePos0, reinterpret_cast<PxF32*>(&collDataV4.localSurfacePos[0]));
+	V4StoreA(localSurfacePos1, reinterpret_cast<PxF32*>(&collDataV4.localSurfacePos[1]));
+	V4StoreA(localSurfacePos2, reinterpret_cast<PxF32*>(&collDataV4.localSurfacePos[2]));
+	V4StoreA(localSurfacePos3, reinterpret_cast<PxF32*>(&collDataV4.localSurfacePos[3]));
+
+	V4StoreA(localSurfaceNormal0, reinterpret_cast<PxF32*>(&collDataV4.localSurfaceNormal[0]));
+	V4StoreA(localSurfaceNormal1, reinterpret_cast<PxF32*>(&collDataV4.localSurfaceNormal[1]));
+	V4StoreA(localSurfaceNormal2, reinterpret_cast<PxF32*>(&collDataV4.localSurfaceNormal[2]));
+	V4StoreA(localSurfaceNormal3, reinterpret_cast<PxF32*>(&collDataV4.localSurfaceNormal[3]));
+
+	V4StoreA(ccTimeV, collDataV4.ccTime);
+
+	V4U32StoreAligned(stateFlag, reinterpret_cast<VecU32V*>(collDataV4.localFlags));
+}
+
+/**
+input scaledPlaneBuf needs a capacity of the number of planes in convexShape
+*/
+void physx::Pt::collideWithConvex(PxPlane* scaledPlaneBuf, ParticleCollData* particleCollData, PxU32 numCollData,
+                                  const Gu::GeometryUnion& convexShape, const PxReal proxRadius)
+{
+	PX_ASSERT(scaledPlaneBuf);
+	PX_ASSERT(particleCollData);
+
+	const PxConvexMeshGeometryLL& convexShapeData = convexShape.get<const PxConvexMeshGeometryLL>();
+	const Gu::ConvexHullData* convexHullData = convexShapeData.hullData;
+	PX_ASSERT(convexHullData);
+
+	// convex bounds in local space
+	PxMat33 scaling = convexShapeData.scale.toMat33(), invScaling;
+	invScaling = scaling.getInverse();
+
+	PX_ASSERT(!convexHullData->mAABB.isEmpty());
+	PxBounds3 shapeBounds = convexHullData->mAABB.transformFast(scaling);
+	PX_ASSERT(!shapeBounds.isEmpty());
+	shapeBounds.fattenFast(proxRadius);
+	bool scaledPlanes = false;
+
+#if PT_USE_SIMD_CONVEX_COLLISION
+	const Vec3V boundMin = V3LoadU(shapeBounds.minimum);
+	const Vec3V boundMax = V3LoadU(shapeBounds.maximum);
+	const Vec4V boundMinX = V4SplatElement<0>(Vec4V_From_Vec3V(boundMin));
+	const Vec4V boundMinY = V4SplatElement<1>(Vec4V_From_Vec3V(boundMin));
+	const Vec4V boundMinZ = V4SplatElement<2>(Vec4V_From_Vec3V(boundMin));
+	const Vec4V boundMaxX = V4SplatElement<0>(Vec4V_From_Vec3V(boundMax));
+	const Vec4V boundMaxY = V4SplatElement<1>(Vec4V_From_Vec3V(boundMax));
+	const Vec4V boundMaxZ = V4SplatElement<2>(Vec4V_From_Vec3V(boundMax));
+
+	ParticleCollDataV4 collDataV4;
+
+	const VecU32V u4Zero = U4LoadXYZW(0, 0, 0, 0);
+	const VecU32V u4One = U4LoadXYZW(1, 1, 1, 1);
+	PX_ALIGN(16, ParticleCollData fakeCsd);
+	fakeCsd.localOldPos = PxVec3(FLT_MAX, FLT_MAX, FLT_MAX);
+	fakeCsd.localNewPos = PxVec3(FLT_MAX, FLT_MAX, FLT_MAX);
+	PX_ALIGN(16, PxU32 overlapArray[128]);
+
+	PxU32 start = 0;
+	while(start < numCollData)
+	{
+		const PxU32 batchSize = PxMin(numCollData - start, PxU32(128));
+		PxU32 v4Count = 0;
+		ParticleCollData* particleCollDataIt = &particleCollData[start];
+		for(PxU32 i = 0; i < batchSize; i += 4)
+		{
+			ParticleCollData* collData[4];
+			collData[0] = particleCollDataIt++;
+			collData[1] = (i + 1 < numCollData) ? particleCollDataIt++ : &fakeCsd;
+			collData[2] = (i + 2 < numCollData) ? particleCollDataIt++ : &fakeCsd;
+			collData[3] = (i + 3 < numCollData) ? particleCollDataIt++ : &fakeCsd;
+
+			Vec4V oldPosV0 = V4LoadU(reinterpret_cast<PxF32*>(&collData[0]->localOldPos));
+			Vec4V newPosV0 = V4LoadU(reinterpret_cast<PxF32*>(&collData[0]->localNewPos));
+			Vec4V oldPosV1 = V4LoadU(reinterpret_cast<PxF32*>(&collData[1]->localOldPos));
+			Vec4V newPosV1 = V4LoadU(reinterpret_cast<PxF32*>(&collData[1]->localNewPos));
+			Vec4V oldPosV2 = V4LoadU(reinterpret_cast<PxF32*>(&collData[2]->localOldPos));
+			Vec4V newPosV2 = V4LoadU(reinterpret_cast<PxF32*>(&collData[2]->localNewPos));
+			Vec4V oldPosV3 = V4LoadU(reinterpret_cast<PxF32*>(&collData[3]->localOldPos));
+			Vec4V newPosV3 = V4LoadU(reinterpret_cast<PxF32*>(&collData[3]->localNewPos));
+
+			Vec4V particleMin0 = V4Min(oldPosV0, newPosV0);
+			Vec4V particleMax0 = V4Max(oldPosV0, newPosV0);
+			Vec4V particleMin1 = V4Min(oldPosV1, newPosV1);
+			Vec4V particleMax1 = V4Max(oldPosV1, newPosV1);
+			Vec4V particleMin2 = V4Min(oldPosV2, newPosV2);
+			Vec4V particleMax2 = V4Max(oldPosV2, newPosV2);
+			Vec4V particleMin3 = V4Min(oldPosV3, newPosV3);
+			Vec4V particleMax3 = V4Max(oldPosV3, newPosV3);
+
+			Mat44V particleMin44(particleMin0, particleMin1, particleMin2, particleMin3);
+			const Mat44V particleMinTrans44 = M44Trnsps(particleMin44);
+			Mat44V particleMax44(particleMax0, particleMax1, particleMax2, particleMax3);
+			const Mat44V particleMaxTrans44 = M44Trnsps(particleMax44);
+
+			BoolV mask = V4IsGrtr(boundMaxX, particleMinTrans44.col0);
+			mask = BAnd(V4IsGrtr(boundMaxY, particleMinTrans44.col1), mask);
+			mask = BAnd(V4IsGrtr(boundMaxZ, particleMinTrans44.col2), mask);
+			mask = BAnd(V4IsGrtr(particleMaxTrans44.col0, boundMinX), mask);
+			mask = BAnd(V4IsGrtr(particleMaxTrans44.col1, boundMinY), mask);
+			mask = BAnd(V4IsGrtr(particleMaxTrans44.col2, boundMinZ), mask);
+
+			VecU32V overlap4 = V4U32Sel(mask, u4One, u4Zero);
+			V4U32StoreAligned(overlap4, reinterpret_cast<VecU32V*>(&overlapArray[i]));
+		}
+
+		particleCollDataIt = &particleCollData[start];
+		for(PxU32 k = 0; k < batchSize; k++, ++particleCollDataIt)
+		{
+			if(overlapArray[k])
+			{
+				if(!scaledPlanes)
+				{
+					scalePlanes(scaledPlaneBuf, convexHullData, invScaling);
+					scaledPlanes = true;
+				}
+
+				collDataV4.localOldPos[v4Count].v3 = particleCollDataIt->localOldPos;
+				collDataV4.localNewPos[v4Count].v3 = particleCollDataIt->localNewPos;
+				collDataV4.localFlags[v4Count] = particleCollDataIt->localFlags;
+				collDataV4.restOffset[v4Count] = particleCollDataIt->restOffset;
+				collDataV4.ccTime[v4Count] = particleCollDataIt->ccTime;
+				collDataV4.collData[v4Count] = particleCollDataIt;
+				v4Count++;
+			}
+
+			if(v4Count == 4 || (v4Count > 0 && (k == batchSize - 1)))
+			{
+				collideWithConvexPlanesSIMD(collDataV4, scaledPlaneBuf, convexHullData->mNbPolygons, proxRadius);
+
+				for(PxU32 j = 0; j < v4Count; j++)
+				{
+					ParticleCollData* collData = collDataV4.collData[j];
+					PxU32 stateFlag = collDataV4.localFlags[j];
+					if(stateFlag)
+					{
+						collData->localFlags |= stateFlag;
+						collData->ccTime = collDataV4.ccTime[j];
+						collData->localSurfaceNormal = collDataV4.localSurfaceNormal[j].v3;
+						collData->localSurfacePos = collDataV4.localSurfacePos[j].v3;
+					}
+				}
+				v4Count = 0;
+			}
+		}
+		start += batchSize;
+	}
+#else
+	ParticleCollData* particleCollDataIt = particleCollData;
+	for(PxU32 i = 0; i < numCollData; ++i, ++particleCollDataIt)
+	{
+		PxBounds3 particleBounds =
+		    PxBounds3::boundsOfPoints(particleCollDataIt->localOldPos, particleCollDataIt->localNewPos);
+
+		if(particleBounds.intersects(shapeBounds))
+		{
+			if(!scaledPlanes)
+			{
+				scalePlanes(scaledPlaneBuf, convexHullData, invScaling);
+				scaledPlanes = true;
+			}
+
+			collideWithConvexPlanes(*particleCollDataIt, scaledPlaneBuf, convexHullData->mNbPolygons, proxRadius);
+		}
+	}
+#endif
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionData.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionData.h
new file mode 100644
index 00000000..d52ff29e
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionData.h
@@ -0,0 +1,271 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_COLLISION_DATA_H
+#define PT_COLLISION_DATA_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxVec3.h"
+#include "foundation/PxVec4.h"
+#include "foundation/PxTransform.h"
+#include "particles/PxParticleFlag.h"
+#include "PtConfig.h"
+
+namespace physx
+{
+
+struct PxsShapeCore;
+struct PxsBodyCore;
+
+namespace Pt
+{
+
+#define PT_CCD_PROJECT 0         // Ocational leaking at static interfaces
+#define PT_CCD_STAY 1            // Seems to work for static
+#define PT_CCD_IMPACT 2          // Doesn't work at all for static interfaces
+#define PT_CDD_BACKTRACK_SMALL 3 // Seems to work for static
+#define PT_CDD_BACKTRACK_LARGE 4 // Seems to work for static
+
+#define PT_CDD_BACKTRACK_SMALL_EPS 1e-4f
+
+#define PT_CCD_MEDTHOD PT_CCD_STAY // Maybe we'll need to do something else for dynamics
+
+PX_FORCE_INLINE void computeContinuousTargetPosition(PxVec3& surfacePos, const PxVec3& localOldPos,
+                                                     const PxVec3& relativePOSITION, const PxVec3& surfaceNormal,
+                                                     const PxF32 restOffset)
+{
+	PX_UNUSED(restOffset);
+	PX_UNUSED(surfaceNormal);
+	PX_UNUSED(relativePOSITION);
+
+#if PT_CCD_MEDTHOD == PT_CCD_PROJECT
+	surfacePos = localOldPos + relativePOSITION + (surfaceNormal * restOffset);
+#elif PT_CCD_MEDTHOD == PT_CCD_STAY
+	surfacePos = localOldPos;
+#elif PT_CCD_MEDTHOD == PT_CCD_IMPACT
+	surfacePos = localOldPos + relativePOSITION;
+#else
+	const PxF32 backtrackLength = (PT_CCD_MEDTHOD == PT_CDD_BACKTRACK_SMALL) ? PT_CDD_BACKTRACK_SMALL_EPS : restOffset;
+	PxF32 relImpactLength = relativePOSITION.magnitude();
+	PxF32 backtrackParam = (relImpactLength > 0.0f) ? PxMax(0.0f, relImpactLength - backtrackLength) : 0.0f;
+	surfacePos = localOldPos + relativePOSITION * (backtrackParam / relImpactLength);
+#endif
+}
+
+/*!
+Fluid particle collision constraint
+*/
+struct Constraint
+{
+	PxVec3 normal; // Contact surface normal
+	PxF32 d;       // Contact point projected on contact normal
+	               // 16
+
+  public:
+	Constraint()
+	{
+		// Do we want to initialize the constraints on creation?
+		// setZero();
+	}
+
+	Constraint(const PxVec3& _normal, const PxVec3& _p)
+	{
+		normal = _normal;
+		d = normal.dot(_p);
+	}
+
+	PX_FORCE_INLINE PxVec3 project(const PxVec3& p) const
+	{
+		return (p + (normal * (d - normal.dot(p))));
+	}
+};
+
+/*!
+Fluid particle collision constraint data for dynamic rigid body
+*/
+struct ConstraintDynamic
+{
+	PxVec3 velocity;
+	const PxsBodyCore* twoWayBody; // weak reference to rigid body.
+
+  public:
+	PX_FORCE_INLINE void setEmpty()
+	{
+		velocity = PxVec3(0);
+		twoWayBody = NULL;
+	}
+};
+
+/*!
+Fluid particle collision constraint buffers
+*/
+struct ConstraintBuffers
+{
+	Constraint* constraint0Buf;
+	Constraint* constraint1Buf;
+	ConstraintDynamic* constraint0DynamicBuf;
+	ConstraintDynamic* constraint1DynamicBuf;
+};
+
+/*!
+Different types of collision
+*/
+struct ParticleCollisionFlags
+{
+	enum Enum
+	{
+		// Global collision flags. Used to track the latest collision status of a particle when
+		// testing against potentially colliding shapes
+		DC            = (1 << 0), // Discrete collision
+		CC            = (1 << 1), // Continuous collision
+		RESET_SNORMAL = (1 << 2), // Saves one PxVec3 in the ParticleCollData
+
+		// When testing a particle against a shape, the following collision flags might be used
+		L_CC          = (1 << 3), // Discrete collision: Predicted particle position inside discrete region of shape (shape
+		// region + collision radius)
+		L_DC          = (1 << 4), // Continuous collision: Predicted particle motion vector intersects shape region
+		L_PROX        = (1 << 5), // Proximity collision: Predicted particle position inside proximity region of shape (shape
+		// region + proximity radius)
+		L_CC_PROX     = (L_CC | L_PROX),
+		L_ANY         = (L_CC | L_DC | L_PROX)
+	};
+};
+
+/*!
+Structure to track collision data for a fluid particle
+*/
+struct ParticleCollData
+{
+	PxVec3 surfaceNormal; // Contact normal [world space]
+	PxU32 flags;          // Latest collision status
+	// 16
+
+	PxVec3 surfacePos; // Contact point on shape surface [world space]
+	PxF32 dcNum;       // Number of discrete collisions
+	// 32
+
+	PxVec3 surfaceVel; // Velocity of contact point on shape surface [world space]
+	PxF32 ccTime;      // "Time of impact" for continuous collision
+	// 48
+
+	PxVec3 oldPos; // Old particle position
+	ParticleFlags particleFlags;
+	// 64
+
+	PxVec3 newPos; // New particle position
+	PxU32 origParticleIndex;
+	// 80
+
+	PxVec3 velocity; // Particle velocity
+	PxF32 restOffset;
+	// 96
+
+	PxVec3 twoWayImpulse;
+	const PxsBodyCore* twoWayBody; // Weak reference to colliding rigid body
+	// 112
+
+	PxVec3 localOldPos; // in
+	PxU32 localFlags;   // in/out
+	// 128
+
+	PxVec3 localNewPos; // in
+	Constraint* c0;     // in
+	// 144
+
+	PxVec3 localSurfaceNormal; // out
+	Constraint* c1;            // in
+	// 160
+
+	PxVec3 localSurfacePos; // out
+	PxF32 localDcNum;       // Number of discrete collisions
+	                        // 176
+
+  public:
+	PX_FORCE_INLINE void init(const PxVec3& particlePos, const PxF32 particleRestOffset, const PxU32 particleIndex,
+	                          const ParticleFlags _particleFlags)
+	{
+		// Initialize values
+
+		surfaceNormal = PxVec3(0);
+		flags = 0;
+
+		surfacePos = PxVec3(0);
+		dcNum = 0.0f;
+
+		surfaceVel = PxVec3(0);
+		ccTime = 1.0f; // No collision assumed.
+
+		restOffset = particleRestOffset;
+
+		oldPos = particlePos;
+
+		// Remove collision flags from previous time step
+		particleFlags.api = PxU16(_particleFlags.api & ((~PxU16(PxParticleFlag::eCOLLISION_WITH_STATIC)) &
+		                                                (~PxU16(PxParticleFlag::eCOLLISION_WITH_DYNAMIC))));
+
+		// Reduce cache bits
+		// 11 -> 01
+		// 01 -> 00
+		// 00 -> 00
+		PxU16 reducedCache = PxU16(((_particleFlags.low & InternalParticleFlag::eGEOM_CACHE_MASK) >> 1) &
+		                           InternalParticleFlag::eGEOM_CACHE_MASK);
+		particleFlags.low = PxU16((_particleFlags.low & ~PxU16(InternalParticleFlag::eGEOM_CACHE_MASK)) | reducedCache);
+
+		origParticleIndex = particleIndex;
+
+		twoWayBody = NULL;
+		twoWayImpulse = PxVec3(0);
+	}
+};
+
+struct PxVec3Pad
+{
+	PxVec3 v3;
+	PxF32 pad;
+};
+
+struct ParticleCollDataV4
+{
+	ParticleCollData* collData[4];
+	PX_ALIGN(16, PxVec3Pad localOldPos[4]);        // in
+	PX_ALIGN(16, PxVec3Pad localNewPos[4]);        // in
+	PX_ALIGN(16, PxF32 restOffset[4]);             // in
+	PX_ALIGN(16, PxU32 localFlags[4]);             // in,out
+	PX_ALIGN(16, PxF32 ccTime[4]);                 // out
+	PX_ALIGN(16, PxVec3Pad localSurfaceNormal[4]); // out
+	PX_ALIGN(16, PxVec3Pad localSurfacePos[4]);    // out
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_COLLISION_DATA_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionHelper.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionHelper.h
new file mode 100644
index 00000000..30a746a6
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionHelper.h
@@ -0,0 +1,860 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_COLLISION_HELPER_H
+#define PT_COLLISION_HELPER_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PxvDynamics.h"
+#include "PtSpatialHash.h"
+#include "PtConstants.h"
+
+namespace physx
+{
+
+struct PxsShapeCore;
+
+namespace Pt
+{
+
+#define RANDOMIZED_COLLISION_PROTOTYPE 0
+#define PXS_SURFACE_NORMAL_UNIT_TOLERANCE 5e-2f
+
+struct W2STransformTemp
+{
+	PxTransform w2sOld;
+	PxTransform w2sNew;
+};
+
+PX_FORCE_INLINE PxF32 invertDcNum(PxF32 dcNum)
+{
+	PX_ASSERT(dcNum > 0.0f);
+	if(dcNum < 3.0f)
+	{
+		PX_ASSERT(dcNum == 1.0f || dcNum == 2.0f);
+		return physx::intrinsics::fsel(dcNum - 1.5f, 0.5f, 1.0f);
+	}
+	else
+	{
+		return 1.0f / dcNum;
+	}
+}
+
+#if RANDOMIZED_COLLISION_PROTOTYPE
+static bool rrEnabled = false;
+static PxF32 rrVelocityThresholdSqr = 10.0f;
+static PxF32 rrRestitutionMin = 0.3f;
+static PxF32 rrRestitutionMax = 0.5f;
+static PxF32 rrDynamicFrictionMin = 0.03f;
+static PxF32 rrDynamicFrictionMax = 0.05f;
+static PxF32 rrStaticFrictionSqrMin = 1.0f;
+static PxF32 rrStaticFrictionSqrMax = 2.0f;
+static PxF32 rrAngleRadMax = physx::intrinsics::sin(PxPi / 16);
+
+static void selectRandomParameters(PxVec3& outSurfaceNormal, PxF32& outRestitution, PxF32& outDynamicFriction,
+                                   PxF32& outStaticFrictionSqr, const PxU32 particleIndex, const PxVec3& surfaceNormal,
+                                   const PxVec3& velocity, const CollisionParameters& params)
+{
+	static PxF32 noiseScale = (1.0f / 65536.0f);
+	PxU32 noiseFactorP = particleIndex * particleIndex; // taking the square of the particleIndex yields better results.
+	PxU32 noiseFactorTP = params.temporalNoise * noiseFactorP;
+
+	PxF32 noise0 = ((noiseFactorTP * 1735339) & 0xffff) * noiseScale;
+	PxF32 noise1 = ((noiseFactorTP * 1335379) & 0xffff) * noiseScale;
+	PxF32 noise2 = ((noiseFactorP * 1235303) & 0xffff) * noiseScale;
+
+	outRestitution = (1.0f - noise0) * rrRestitutionMin + noise0 * rrRestitutionMax;
+	outDynamicFriction = (1.0f - noise1) * rrDynamicFrictionMin + noise1 * rrDynamicFrictionMax;
+	outStaticFrictionSqr = (1.0f - noise2) * rrStaticFrictionSqrMin + noise2 * rrStaticFrictionSqrMax;
+
+	if(velocity.magnitudeSquared() > rrVelocityThresholdSqr)
+	{
+		PxF32 noise3 = ((noiseFactorTP * 14699023) & 0xffff) * noiseScale;
+		PxF32 noise4 = ((noiseFactorTP * 16699087) & 0xffff) * noiseScale;
+		PxF32 noise5 = ((noiseFactorTP * 11999027) & 0xffff) * noiseScale;
+
+		PxVec3 tangent0, tangent1;
+		normalToTangents(surfaceNormal, tangent0, tangent1);
+
+		PxF32 angleNoise = noise3 * PxTwoPi;
+		PxF32 angleCosNoise = physx::intrinsics::cos(angleNoise);
+		PxF32 angleSinNoise = physx::intrinsics::sin(angleNoise);
+
+		// skew towards mean
+		PxF32 radiusNoise = noise4 * noise5;
+		PxVec3 tangent = tangent0 * angleCosNoise + tangent1 * angleSinNoise;
+
+		outSurfaceNormal = surfaceNormal + tangent * radiusNoise * rrAngleRadMax;
+		outSurfaceNormal.normalize();
+	}
+	else
+	{
+		outSurfaceNormal = surfaceNormal;
+	}
+}
+#endif
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void clampVelocity(PxVec3& velocity, PxReal maxMotion, PxReal timeStep)
+{
+	PxReal velocityMagnitude = velocity.magnitude();
+	if(velocityMagnitude * timeStep > maxMotion)
+	{
+		PxReal scaleFactor = maxMotion / (velocityMagnitude * timeStep);
+		velocity *= scaleFactor;
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void integrateParticleVelocity(Particle& particle, const PxF32 maxMotionDistance,
+                                               const PxVec3& acceleration, const PxF32 dampingDtComp,
+                                               const PxF32 timeStep)
+{
+	// Integrate
+	particle.velocity += acceleration * timeStep;
+
+	// Damp
+	particle.velocity *= dampingDtComp;
+
+	// Clamp velocity such that particle stays within maximum motion distance
+	clampVelocity(particle.velocity, maxMotionDistance, timeStep);
+
+	PX_ASSERT((particle.velocity * timeStep).magnitude() <= maxMotionDistance + 1e-5f);
+}
+
+//-----------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void addDiscreteCollisionStatic(ParticleCollData& collData, const PxVec3& newSurfaceNormal,
+                                                const PxVec3& newSurfacePos, const PxF32& dcNum)
+{
+	collData.flags |= ParticleCollisionFlags::DC;
+
+	if(collData.flags & ParticleCollisionFlags::RESET_SNORMAL)
+	{
+		collData.surfaceNormal = newSurfaceNormal;
+		collData.flags &= ~ParticleCollisionFlags::RESET_SNORMAL;
+	}
+	else
+	{
+		collData.surfaceNormal += newSurfaceNormal;
+	}
+
+	// Discrete collisions will be averaged
+	collData.surfacePos += newSurfacePos;
+	collData.dcNum += dcNum; // The passed surface normal/position/velocity can itself consist of
+	                         // summed up normals/positions/velocities (for meshes for instance).
+}
+
+//-----------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void addDiscreteCollisionDynamic(ParticleCollData& collData, const PxVec3& newSurfaceNormal,
+                                                 const PxVec3& newSurfacePos, const PxVec3& newSurfaceVel,
+                                                 const PxF32& dcNum)
+{
+	collData.flags |= ParticleCollisionFlags::DC;
+
+	// Discrete collisions will be averaged
+	if(collData.flags & ParticleCollisionFlags::RESET_SNORMAL)
+	{
+		collData.surfaceNormal = newSurfaceNormal;
+		collData.surfaceVel = newSurfaceVel;
+		collData.flags &= ~ParticleCollisionFlags::RESET_SNORMAL;
+	}
+	else
+	{
+		collData.surfaceNormal += newSurfaceNormal;
+		collData.surfaceVel += newSurfaceVel;
+	}
+
+	collData.surfacePos += newSurfacePos;
+	collData.dcNum += dcNum; // The passed surface normal/position/velocity can itself consist of
+	                         // summed up normals/positions/velocities (for meshes for instance).
+}
+
+//-----------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void addContinuousCollisionStatic(ParticleCollData& collData, const PxVec3& newSurfaceNormal,
+                                                  const PxVec3& newSurfacePos)
+{
+	collData.flags &= ~ParticleCollisionFlags::DC; // Continuous collisions take precedence over discrete collisions
+	collData.flags |= ParticleCollisionFlags::CC;
+
+	collData.surfaceNormal = newSurfaceNormal;
+	collData.surfacePos = newSurfacePos;
+}
+
+//-----------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void addContinuousCollisionDynamic(ParticleCollData& collData, const PxVec3& newSurfaceNormal,
+                                                   const PxVec3& newSurfacePos, const PxVec3& newSurfaceVel)
+{
+	collData.flags &= ~ParticleCollisionFlags::DC; // Continuous collisions take precedence over discrete collisions
+	collData.flags |= ParticleCollisionFlags::CC;
+
+	collData.surfaceNormal = newSurfaceNormal;
+	collData.surfacePos = newSurfacePos;
+	collData.surfaceVel = newSurfaceVel;
+}
+
+//-----------------------------------------------------------------------------------------------------------------------//
+PX_FORCE_INLINE void addConstraint(ParticleCollData& collData, const PxVec3& newSurfaceNormal, const PxVec3& newSurfacePos)
+{
+	// sschirm: Turns out that there are cases where a perfectly  normalized normal (-1,0,0) which is rotated by a
+	// quat with PxQuat::isSane(), has !PxVec3::isNormalized(). Therefore we intruduce a less conservative assert here.
+	PX_ASSERT(PxAbs(newSurfaceNormal.magnitude() - 1) < PXS_SURFACE_NORMAL_UNIT_TOLERANCE);
+	Constraint cN(newSurfaceNormal, newSurfacePos);
+	if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_0_VALID))
+	{
+		*collData.c0 = cN;
+		collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_0_VALID;
+	}
+	else if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_1_VALID))
+	{
+		*collData.c1 = cN;
+		collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_1_VALID;
+	}
+	else
+	{
+		// Important: If the criterion to select the overwrite constraint changes, the fluid vs. static
+		//            mesh code needs to be adjusted accordingly.
+
+		// Overwrite constraint with the largest distance {old position} <--> {shape surface}.
+		// The old position must be used since the new position is corrected after each collision occurrence.
+		PxReal dist0 = collData.c0->normal.dot(collData.oldPos) - collData.c0->d;
+		PxReal dist1 = collData.c1->normal.dot(collData.oldPos) - collData.c1->d;
+		PxReal distN = cN.normal.dot(collData.oldPos) - cN.d;
+
+		if(dist0 < dist1)
+		{
+			if(distN < dist1)
+			{
+				*collData.c1 = cN;
+				collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_1_VALID;
+				collData.particleFlags.low &= PxU16(~InternalParticleFlag::eCONSTRAINT_1_DYNAMIC);
+			}
+		}
+		else if(distN < dist0)
+		{
+			*collData.c0 = cN;
+			collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_0_VALID;
+			collData.particleFlags.low &= PxU16(~InternalParticleFlag::eCONSTRAINT_0_DYNAMIC);
+		}
+	}
+}
+
+PX_FORCE_INLINE void addConstraintDynamic(ParticleCollData& collData, const PxVec3& newSurfaceNormal,
+                                          const PxVec3& newSurfacePos, const PxVec3& newSurfaceVel,
+                                          const PxsBodyCore* body, ConstraintDynamic& c0Dynamic,
+                                          ConstraintDynamic& c1Dynamic)
+{
+	// sschirm: Turns out that there are cases where a perfectly  normalized normal (-1,0,0) which is rotated by a
+	// quat with PxQuat::isSane(), has !PxVec3::isNormalized(). Therefore we intruduce a less conservative assert here.
+	PX_ASSERT(PxAbs(newSurfaceNormal.magnitude() - 1) < PXS_SURFACE_NORMAL_UNIT_TOLERANCE);
+	Constraint cN(newSurfaceNormal, newSurfacePos);
+	if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_0_VALID))
+	{
+		*collData.c0 = cN;
+		c0Dynamic.velocity = newSurfaceVel;
+		c0Dynamic.twoWayBody = body;
+		collData.particleFlags.low |=
+		    (InternalParticleFlag::eCONSTRAINT_0_VALID | InternalParticleFlag::eCONSTRAINT_0_DYNAMIC);
+	}
+	else if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_1_VALID))
+	{
+		*collData.c1 = cN;
+		c1Dynamic.velocity = newSurfaceVel;
+		c1Dynamic.twoWayBody = body;
+		collData.particleFlags.low |=
+		    (InternalParticleFlag::eCONSTRAINT_1_VALID | InternalParticleFlag::eCONSTRAINT_1_DYNAMIC);
+	}
+	else
+	{
+		// Important: If the criterion to select the overwrite constraint changes, the fluid vs. static
+		//            mesh code needs to be adjusted accordingly.
+
+		// Overwrite constraint with the largest distance {old position} <--> {shape surface}.
+		// The old position must be used since the new position is corrected after each collision occurrence.
+		PxReal dist0 = collData.c0->normal.dot(collData.oldPos) - collData.c0->d;
+		PxReal dist1 = collData.c1->normal.dot(collData.oldPos) - collData.c1->d;
+		PxReal distN = cN.normal.dot(collData.oldPos) - cN.d;
+
+		if(dist0 < dist1)
+		{
+			if(distN < dist1)
+			{
+				*collData.c1 = cN;
+				c1Dynamic.velocity = newSurfaceVel;
+				c1Dynamic.twoWayBody = body;
+				collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_1_DYNAMIC;
+			}
+		}
+		else if(distN < dist0)
+		{
+			*collData.c0 = cN;
+			c0Dynamic.velocity = newSurfaceVel;
+			c0Dynamic.twoWayBody = body;
+			collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_0_DYNAMIC;
+		}
+	}
+}
+
+/*!
+Reflect velocity on shape surface.
+- To apply friction, the current velocity is used
+- For restitution a different velocity can be used
+(This can help to avoid jittering effects. After the fluid particle dynamics update, forces are applied
+to integrate the new velocities. If particle collision constraints work on these new velocities,
+jittering can result. Using the old velocities (before the forces were applied) to compute the
+normal impulse can solve this problem)
+*/
+PX_FORCE_INLINE void reflectVelocity(PxVec3& reflectedVel, const PxVec3& inVel, const PxVec3& oldVel,
+                                     const PxVec3& surfaceNormal, const PxVec3& surfaceVel, PxU32 particleIndex,
+                                     const CollisionParameters& params)
+{
+	PX_UNUSED(particleIndex);
+
+	PxVec3 relativeVel = inVel - surfaceVel;
+	PxReal projectedRelativeVel = surfaceNormal.dot(relativeVel);
+
+	if(projectedRelativeVel < 0.0f) // Particle is moving closer to surface (else the collision will be resolved)
+	{
+		PxF32 rDynamicFriction;
+		PxF32 rStaticFrictionSqr;
+		PxF32 rRestitution;
+		PxVec3 rSurfaceNormal;
+
+#if RANDOMIZED_COLLISION_PROTOTYPE
+		if(rrEnabled)
+		{
+			selectRandomParameters(rSurfaceNormal, rRestitution, rDynamicFriction, rStaticFrictionSqr, particleIndex,
+			                       surfaceNormal, relativeVel, params);
+		}
+		else
+#endif
+		{
+			rDynamicFriction = params.dynamicFriction;
+			rStaticFrictionSqr = params.staticFrictionSqr;
+			rRestitution = params.restitution;
+			rSurfaceNormal = surfaceNormal;
+		}
+
+		PxVec3 newNormalComponent = rSurfaceNormal * projectedRelativeVel;
+		PxVec3 newTangentialComponent = relativeVel - newNormalComponent;
+
+		PxVec3 oldRelativeVel = oldVel - surfaceVel;
+		PxReal oldProjectedRelativeVel = rSurfaceNormal.dot(oldRelativeVel);
+		PxVec3 oldNormalComponent = rSurfaceNormal * oldProjectedRelativeVel;
+
+		// static friction (this works based on the quotient between tangential and normal velocity magnitude).
+		PxVec3 diffNormalComponent = newNormalComponent - oldNormalComponent;
+
+		PxReal stictionSqr = rStaticFrictionSqr * diffNormalComponent.magnitudeSquared();
+
+		// if (newTangentialComponent.magnitudeSquared() < stictionSqr)
+		//	newTangentialComponent = PxVec3(0);
+		PxF32 diff = newTangentialComponent.magnitudeSquared() - stictionSqr;
+		newTangentialComponent.x = physx::intrinsics::fsel(diff, newTangentialComponent.x, 0.0f);
+		newTangentialComponent.y = physx::intrinsics::fsel(diff, newTangentialComponent.y, 0.0f);
+		newTangentialComponent.z = physx::intrinsics::fsel(diff, newTangentialComponent.z, 0.0f);
+
+		// pseudo dynamic friction (not dependent on normal component!)
+		reflectedVel = newTangentialComponent * (1.0f - rDynamicFriction);
+
+		// restitution is computed using the old velocity
+		// if (oldProjectedRelativeVel < 0.0f)
+		//	reflectedVel -= oldNormalComponent * mParams.restitution;
+		PxVec3 reflectedVelTmp = reflectedVel - oldNormalComponent * rRestitution;
+		reflectedVel.x = physx::intrinsics::fsel(oldProjectedRelativeVel, reflectedVel.x, reflectedVelTmp.x);
+		reflectedVel.y = physx::intrinsics::fsel(oldProjectedRelativeVel, reflectedVel.y, reflectedVelTmp.y);
+		reflectedVel.z = physx::intrinsics::fsel(oldProjectedRelativeVel, reflectedVel.z, reflectedVelTmp.z);
+
+		reflectedVel += surfaceVel;
+	}
+	else
+		reflectedVel = inVel;
+}
+
+PX_FORCE_INLINE void updateParticle(Particle& particle, const ParticleCollData& collData, bool projection,
+                                    const PxPlane& projectionPlane, PxBounds3& worldBounds)
+{
+	// move worldBounds update here to avoid LHS
+	if(!projection)
+	{
+		particle.velocity = collData.velocity;
+		particle.position = collData.newPos;
+		PX_ASSERT(particle.position.isFinite());
+		worldBounds.include(collData.newPos);
+	}
+	else
+	{
+		const PxReal dist = projectionPlane.n.dot(collData.velocity);
+		particle.velocity = collData.velocity - (projectionPlane.n * dist);
+		const PxVec3 pos = projectionPlane.project(collData.newPos);
+		PX_ASSERT(pos.isFinite());
+		particle.position = pos;
+		worldBounds.include(pos);
+	}
+	particle.flags = collData.particleFlags;
+}
+
+PX_FORCE_INLINE void clampToMaxMotion(PxVec3& newPos, const PxVec3& oldPos, PxF32 maxMotionDistance,
+                                      PxF32 maxMotionDistanceSqr)
+{
+	PxVec3 motionVec = newPos - oldPos;
+	PxReal motionDistanceSqr = motionVec.magnitudeSquared();
+	if(motionDistanceSqr > maxMotionDistanceSqr)
+	{
+		newPos = oldPos + (motionVec * maxMotionDistance * physx::intrinsics::recipSqrt(motionDistanceSqr));
+	}
+}
+
+PX_FORCE_INLINE void updateCollDataDynamic(ParticleCollData& collData, const PxTransform& bodyToWorld,
+                                           const PxVec3& linearVel, const PxVec3& angularVel,
+                                           const PxsBodyCore* twoWayBody, const PxTransform& shapeToWorld,
+                                           const PxReal timeStep, ConstraintDynamic& c0Dynamic,
+                                           ConstraintDynamic& c1Dynamic)
+{
+	if(collData.localFlags & ParticleCollisionFlags::L_ANY)
+	{
+		PxVec3 newSurfaceNormal = shapeToWorld.rotate(collData.localSurfaceNormal);
+		PxVec3 newSurfacePos = shapeToWorld.transform(collData.localSurfacePos);
+
+		PxVec3 rotatedSurfacePosBody = newSurfacePos - bodyToWorld.p;
+
+		PxVec3 angularSurfaceVel = angularVel.cross(rotatedSurfacePosBody);
+		PxVec3 newSurfaceVel = angularSurfaceVel + linearVel;
+
+		if(collData.localFlags & ParticleCollisionFlags::L_CC)
+		{
+			addContinuousCollisionDynamic(collData, newSurfaceNormal, newSurfacePos, newSurfaceVel);
+			// old body gets overwritten if a new one appears
+			collData.twoWayBody = twoWayBody;
+			collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_DYNAMIC;
+		}
+		if(collData.localFlags & ParticleCollisionFlags::L_DC)
+		{
+			addDiscreteCollisionDynamic(collData, newSurfaceNormal, newSurfacePos, newSurfaceVel, 1.f);
+			// old body gets overwritten if a new one appears
+			collData.twoWayBody = twoWayBody;
+			collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_DYNAMIC;
+		}
+		if(collData.localFlags & ParticleCollisionFlags::L_CC_PROX)
+		{
+			// Try to the predict the constraint for the next pose of the shape
+
+			// sschirm: this code tries to call inv sqrt as much as possible it seems!
+			// Predict surface position (for the rotation part an approximation is used)
+			PxReal surfacePosDist = rotatedSurfacePosBody.magnitude();
+			newSurfacePos = rotatedSurfacePosBody + angularSurfaceVel * timeStep;
+			newSurfacePos = newSurfacePos.getNormalized();
+			newSurfacePos *= surfacePosDist;
+
+			newSurfacePos += (bodyToWorld.p + (linearVel * timeStep));
+
+			// Predict surface normal (for the rotation an approximation is used)
+			newSurfaceNormal += (angularVel.cross(newSurfaceNormal)) * timeStep;
+			newSurfaceNormal = newSurfaceNormal.getNormalized();
+
+			addConstraintDynamic(collData, newSurfaceNormal, newSurfacePos, newSurfaceVel, twoWayBody, c0Dynamic,
+			                     c1Dynamic);
+		}
+	}
+}
+
+PX_FORCE_INLINE void updateCollDataStatic(ParticleCollData& collData, const PxTransform& shapeToWorld,
+                                          const PxReal /*timeStep*/)
+{
+	if(collData.localFlags & ParticleCollisionFlags::L_ANY)
+	{
+		PxVec3 newSurfaceNormal = shapeToWorld.rotate(collData.localSurfaceNormal);
+		PxVec3 newSurfacePos = shapeToWorld.transform(collData.localSurfacePos);
+
+		if(collData.localFlags & ParticleCollisionFlags::L_CC)
+		{
+			addContinuousCollisionStatic(collData, newSurfaceNormal, newSurfacePos);
+			collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC;
+		}
+		if(collData.localFlags & ParticleCollisionFlags::L_DC)
+		{
+			addDiscreteCollisionStatic(collData, newSurfaceNormal, newSurfacePos, 1.f);
+			collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC;
+		}
+		if(collData.localFlags & ParticleCollisionFlags::L_CC_PROX)
+		{
+			addConstraint(collData, newSurfaceNormal, newSurfacePos);
+		}
+	}
+}
+
+PX_FORCE_INLINE void updateCollDataStaticMesh(ParticleCollData& collData, const PxTransform& shapeToWorld,
+                                              const PxReal /*timeStep*/)
+{
+	if(collData.localFlags & ParticleCollisionFlags::L_ANY)
+	{
+		if(collData.localFlags & ParticleCollisionFlags::L_CC)
+		{
+			PxVec3 newSurfaceNormal(shapeToWorld.rotate(collData.localSurfaceNormal));
+
+			// For static meshes, the old particle position is passed
+			addContinuousCollisionStatic(collData, newSurfaceNormal, collData.oldPos);
+			collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC;
+		}
+		if(collData.localFlags & ParticleCollisionFlags::L_DC)
+		{
+			// Average discrete collision data, transform to world space, multiply result to maintain the
+			// weight of the data
+			PX_ASSERT(collData.localDcNum > 0.0f);
+			PxReal invDcNum = invertDcNum(collData.localDcNum);
+			PxVec3 newSurfaceNormal(collData.localSurfaceNormal * invDcNum);
+			PxVec3 newSurfacePos(collData.localSurfacePos * invDcNum);
+
+			newSurfaceNormal = shapeToWorld.rotate(newSurfaceNormal) * collData.localDcNum;
+			newSurfacePos = shapeToWorld.transform(newSurfacePos) * collData.localDcNum;
+
+			addDiscreteCollisionStatic(collData, newSurfaceNormal, newSurfacePos, collData.localDcNum);
+			collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC;
+		}
+		// if (collData.localFlags & ParticleCollisionFlags::L_CC_PROX)  mesh constraints already writed in collision
+		// function
+	}
+}
+
+PX_FORCE_INLINE bool applyConstraints(const PxVec3& rayOrig, PxVec3& rayDir, const PxVec3& oldVelocity,
+                                      const PxsBodyCore*& twoWayBody, PxVec3& shapeNormal, PxVec3& shapeVelocity,
+                                      const Constraint* constr0, const Constraint* constr1,
+                                      const PxsBodyCore* constr0TwoWayBody, const PxsBodyCore* constr1TwoWayBody,
+                                      const PxVec3& constr0Velocity, const PxVec3& constr1Velocity,
+                                      const PxU32 particleIndex, const CollisionParameters& params,
+                                      const ParticleFlags& particleFlags)
+{
+	PX_ASSERT(particleFlags.low & InternalParticleFlag::eCONSTRAINT_0_VALID); // There must be one constraint to get
+	// here
+	bool needsRescaling = false;
+	PxVec3 rayDirTmp = rayDir; // avoid LHS
+	PxVec3 newPos = rayOrig + rayDirTmp;
+
+	if(!(particleFlags.low & InternalParticleFlag::eCONSTRAINT_1_VALID))
+	{
+		PxReal projectedNewPosC0 = constr0->normal.dot(newPos);
+
+		if(projectedNewPosC0 < constr0->d)
+		{
+			twoWayBody = constr0TwoWayBody;
+			shapeNormal = constr0->normal;
+			shapeVelocity = constr0Velocity;
+		}
+		else
+			return false;
+
+		PxVec3 velocity = rayDirTmp * params.invTimeStep;
+		reflectVelocity(rayDirTmp, velocity, oldVelocity, constr0->normal, constr0Velocity, particleIndex, params);
+
+		// Compute motion direction of reflected particle and integrate position
+		rayDirTmp *= params.timeStep;
+		newPos = rayOrig + rayDirTmp;
+
+		//
+		// Constraint has been applied. Do second pass using the modified particle velocity and position
+		//
+		// - Check if modified particle is closer to the surface than in the last simulation step.
+		//   If this is the case then move the particle such that the distance is at least as large as in the
+		//   last step.
+		//
+		projectedNewPosC0 = constr0->normal.dot(newPos);
+		if(constr0->d > projectedNewPosC0)
+		{
+			newPos = newPos + (constr0->normal * ((constr0->d - projectedNewPosC0) * 1.01f)); // Move particle in
+			// direction of surface
+			// normal
+			rayDirTmp = newPos - rayOrig;
+			needsRescaling = true;
+		}
+	}
+	else
+	{
+		PxReal projectedNewPosC0 = constr0->normal.dot(newPos);
+		PxReal projectedNewPosC1 = constr1->normal.dot(newPos);
+
+		bool violateC0 = projectedNewPosC0 < constr0->d;
+		bool violateC1 = projectedNewPosC1 < constr1->d;
+
+		if(violateC0)
+		{
+			twoWayBody = constr0TwoWayBody;
+			shapeNormal = constr0->normal;
+			shapeVelocity = constr0Velocity;
+		}
+		else if(violateC1)
+		{
+			twoWayBody = constr1TwoWayBody;
+			shapeNormal = constr1->normal;
+			shapeVelocity = constr1Velocity;
+		}
+		else
+			return false;
+
+		if(!(violateC0 && violateC1))
+		{
+			PxVec3 velocity = rayDirTmp * params.invTimeStep;
+			reflectVelocity(rayDirTmp, velocity, oldVelocity, shapeNormal, shapeVelocity, particleIndex, params);
+
+			// Compute motion direction of reflected particle and integrate position
+			rayDirTmp *= params.timeStep;
+		}
+		else
+		{
+			// removed reflection code for this case (leads to jittering on edges)
+			// missing restitution/static friction term might cause other artifacts though
+			rayDirTmp *= (1.0f - params.dynamicFriction);
+		}
+		newPos = rayOrig + rayDirTmp;
+
+		//
+		// Constraint has been applied. Do second pass using the modified particle velocity and position
+		//
+		// - Check if modified particle is closer to the surface than in the last simulation step.
+		//   If this is the case then move the particle such that the distance is at least as large as in the
+		//   last step.
+		//
+
+		projectedNewPosC0 = constr0->normal.dot(newPos);
+
+		PxReal n0dotn1 = constr0->normal.dot(constr1->normal);
+
+		if(PxAbs(n0dotn1) > (1.0f - PT_COLL_VEL_PROJECTION_CROSS_EPSILON))
+		{
+			// angle between collision surfaces above a certain threshold
+			if(projectedNewPosC0 < constr0->d)
+			{
+				newPos = newPos + (constr0->normal * ((constr0->d - projectedNewPosC0) * 1.01f)); // Move particle in
+				// direction of surface
+				// normal
+				rayDirTmp = newPos - rayOrig;
+				needsRescaling = true;
+			}
+		}
+		else
+		{
+			PxReal projectedNewPosC1_ = constr1->normal.dot(newPos);
+
+			PxReal distChange0 = constr0->d - projectedNewPosC0;
+			PxVec3 newPos0 = newPos + constr0->normal * distChange0; // Push particle in direction of surface normal
+
+			PxReal distChange1 = constr1->d - projectedNewPosC1_;
+			PxVec3 newPos1 = newPos + constr1->normal * distChange1; // Push particle in direction of surface normal
+
+			if(projectedNewPosC0 < constr0->d || projectedNewPosC1_ < constr1->d)
+			{
+				PxReal projectedNewPosC1nC0 = constr0->normal.dot(newPos1);
+				PxReal projectedNewPosC0nC1 = constr1->normal.dot(newPos0);
+
+				if(projectedNewPosC1nC0 < constr0->d && projectedNewPosC0nC1 < constr1->d)
+				{
+					PxReal factor = 1.0f / (1.0f - (n0dotn1 * n0dotn1));
+					PxReal a0 = (distChange0 - (n0dotn1 * distChange1)) * factor;
+					PxReal a1 = (distChange1 - (n0dotn1 * distChange0)) * factor;
+					newPos += (constr0->normal * a0) + (constr1->normal * a1);
+
+					rayDirTmp = newPos - rayOrig;
+
+					PxVec3 epsVec = (constr0->normal + constr1->normal) * 0.5f * PT_COLL_VEL_PROJECTION_PROJ;
+					rayDirTmp += epsVec * (rayDirTmp.dot(rayDirTmp));
+				}
+				else if(projectedNewPosC1nC0 < constr0->d)
+				{
+					newPos = newPos + (constr0->normal * ((1.0f + PT_COLL_VEL_PROJECTION_PROJ) * distChange0));
+					rayDirTmp = newPos - rayOrig;
+				}
+				else
+				{
+					newPos = newPos + (constr1->normal * ((1.0f + PT_COLL_VEL_PROJECTION_PROJ) * distChange1));
+					rayDirTmp = newPos - rayOrig;
+				}
+				needsRescaling = true;
+			}
+		}
+	}
+
+	// Clamp velocity to magnitude of original velocity
+	if(needsRescaling)
+	{
+		PxF32 originalLengthSqr = rayDir.magnitudeSquared();
+		PxF32 lengthSqr = rayDirTmp.magnitudeSquared();
+		if(lengthSqr > originalLengthSqr)
+		{
+			rayDirTmp *= physx::intrinsics::sqrt(originalLengthSqr) * physx::intrinsics::recipSqrt(lengthSqr);
+		}
+	}
+	rayDir = rayDirTmp;
+	return true;
+}
+
+PX_FORCE_INLINE void initCollDataAndApplyConstraints(ParticleCollData& collData, const Particle& particle,
+                                                     const PxVec3& oldVelocity, const PxF32 restOffset,
+                                                     const PxVec3& constr0Velocity, const PxVec3& constr1Velocity,
+                                                     const PxsBodyCore* constr0TwoWayBody,
+                                                     const PxsBodyCore* constr1TwoWayBody, PxU32 particleIndex,
+                                                     const CollisionParameters& params)
+{
+	PX_ASSERT(particle.flags.api & PxParticleFlag::eVALID);
+
+	collData.init(particle.position, restOffset, particleIndex, particle.flags);
+	PxVec3 motionDir = particle.velocity * params.timeStep;
+
+	//
+	// Apply constraints from last simulation step
+	//
+	if(particle.flags.low & InternalParticleFlag::eANY_CONSTRAINT_VALID)
+	{
+		PxVec3 motionDirOld = motionDir;
+
+		bool isColliding =
+		    applyConstraints(collData.oldPos, motionDir, oldVelocity, collData.twoWayBody, collData.surfaceNormal,
+		                     collData.surfaceVel, collData.c0, collData.c1, constr0TwoWayBody, constr1TwoWayBody,
+		                     constr0Velocity, constr1Velocity, particleIndex, params, particle.flags);
+
+		// can't have no collision but a twoWayShape
+		PX_ASSERT(isColliding || !collData.twoWayBody);
+
+		if(isColliding)
+		{
+			if(collData.twoWayBody)
+			{
+				// params.flags & PxParticleBaseFlag::eCOLLISION_TWOWAY doesn't really matter to compute this if two way
+				// is off
+				collData.twoWayImpulse = (motionDirOld - motionDir) * params.invTimeStep;
+				collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_DYNAMIC;
+			}
+			else
+			{
+				collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC;
+			}
+			collData.flags |= ParticleCollisionFlags::RESET_SNORMAL;
+		}
+	}
+
+	collData.newPos = collData.oldPos + motionDir;
+	collData.velocity = motionDir * params.invTimeStep;
+}
+
+// had to reintroduce isStatic for selective read of collData.surfaceVel for the collisionVelocity feature. at this
+// point
+// it would probably be better to refactor collisionResponse to take individual particle data as input again (as done
+// for GPU)
+void collisionResponse(ParticleCollData& collData, bool twoWay, bool isStatic, CollisionParameters& params)
+{
+	bool continuousCollision = (collData.flags & ParticleCollisionFlags::CC) > 0;
+	bool discreteCollision = (collData.flags & ParticleCollisionFlags::DC) > 0;
+
+	// update of newPos
+	PxVec3 surfaceNormal = collData.surfaceNormal;                  // avoid LHS
+	PxVec3 surfaceVel = isStatic ? PxVec3(0) : collData.surfaceVel; // avoid LHS
+	if(continuousCollision)
+	{
+		// Particle has penetrated shape surface -> Push particle back to surface
+		PX_ASSERT(!(collData.flags & ParticleCollisionFlags::DC));
+		PX_ASSERT(collData.ccTime < 1.0f);
+		PX_ASSERT(PxAbs(collData.surfaceNormal.magnitude() - 1) < PXS_SURFACE_NORMAL_UNIT_TOLERANCE);
+
+		collData.newPos = collData.surfacePos;
+	}
+	else if(discreteCollision)
+	{
+		PxReal invDCNum = invertDcNum(collData.dcNum);
+		collData.newPos = collData.surfacePos * invDCNum;
+		surfaceVel = collData.surfaceVel * invDCNum;
+		collData.surfaceVel = surfaceVel;
+
+		// Since normals have unit length, we do not need to average, it is enough to
+		// normalize the summed up contact normals.
+		if(invDCNum == 1.0f)
+			;
+		else
+		{
+			surfaceNormal =
+			    collData.surfaceNormal * physx::intrinsics::recipSqrt(collData.surfaceNormal.magnitudeSquared());
+			collData.surfaceNormal = surfaceNormal;
+		}
+
+		collData.dcNum = 0.0f;
+	}
+	else
+	{
+		// Note: Proximity collisions have no immediate effect on the particle position,
+		//       they are only used to build constraints.
+
+		PX_ASSERT(!(collData.flags & (ParticleCollisionFlags::DC | ParticleCollisionFlags::CC)));
+		return; // It is important to return here if there is no collision
+	}
+
+	PX_ASSERT(continuousCollision || discreteCollision);
+
+	PxVec3 newVel;
+	reflectVelocity(newVel, collData.velocity, collData.velocity, surfaceNormal, surfaceVel, collData.origParticleIndex,
+	                params);
+
+	// if the collData.twoWayShape is set, we have a collision with a dynamic rb.
+	if(twoWay && collData.twoWayBody)
+	{
+		collData.twoWayImpulse = collData.velocity - newVel;
+	}
+
+	collData.velocity = newVel;
+}
+
+PX_FORCE_INLINE void computeLocalCellHash(LocalCellHash& localCellHash, PxU16* hashKeyArray, const Particle* particles,
+                                          PxU32 numParticles, const PxVec3& packetCorner, const PxReal cellSizeInv)
+{
+	PX_ASSERT(numParticles <= PT_SUBPACKET_PARTICLE_LIMIT_COLLISION);
+
+	PxU32 numHashEntries = Ps::nextPowerOfTwo(numParticles + 1);
+	numHashEntries = PxMin(PxU32(PT_LOCAL_HASH_SIZE_MESH_COLLISION), numHashEntries);
+
+	// Make sure the number of hash entries is a power of 2 (requirement for the used hash function)
+	PX_ASSERT((((numHashEntries - 1) ^ numHashEntries) + 1) == (2 * numHashEntries));
+	PX_ASSERT(numHashEntries > numParticles);
+
+	// Get local cell hash for the current subpacket
+	SpatialHash::buildLocalHash(particles, numParticles, localCellHash.hashEntries, localCellHash.particleIndices,
+	                            hashKeyArray, numHashEntries, cellSizeInv, packetCorner);
+
+	localCellHash.numHashEntries = numHashEntries;
+	localCellHash.numParticles = numParticles;
+	localCellHash.isHashValid = true;
+}
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_COLLISION_HELPER_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMesh.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMesh.cpp
new file mode 100644
index 00000000..b9f96e3e
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMesh.cpp
@@ -0,0 +1,698 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtCollisionMethods.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtHeightFieldAabbTest.h"
+#include "GuTriangleVertexPointers.h"
+#include "PtConstants.h"
+#include "GuBox.h"
+#include "GuMidphaseInterface.h"
+
+using namespace physx;
+using namespace Pt;
+using namespace Gu;
+
+//
+// Collide particle against mesh triangle
+//
+// Project particle on triangle plane, check if projected particle is inside triangle
+// using barycentric coordinates.
+//																			//
+//                 q2														//
+//                 *														//
+//               /   \														//
+//              /      \													//
+//             /         \													//
+//            /            \												//
+//        q0 *--------------* q1											//
+//
+// Triangle with points q0, q1, q2.
+//
+// Point p on plane defined by triangle:
+//
+//     p = q0 + (u * (q1 - q0)) + (v * (q2 - q0))
+//       = q0 + (u * e0) + (v * e1)
+//
+// ->  (p - q0) = (u * e0) + (v * e1)	// Subtract q0 from both sides
+//           e2 = (u * e0) + (v * e1)
+//
+// We have two unknowns (u and v) so we need two equations to solve for them. Dot both sides by e0 to get one and dot
+// both sides by
+// e1 to get a second.
+//
+//     e2 . e0 = ((u * e0) + (v * e1)) . e0		(1)
+//     e2 . e1 = ((u * e0) + (v * e1)) . e1		(2)
+//
+// Distribute e0 and e1
+//
+//     e2 . e0 = u * (e0 . e0) + v * (e1 . e0)	(1)
+//     e2 . e1 = u * (e0 . e1) + v * (e1 . e1)	(2)
+//
+// Solve vor u, v
+//
+//     u = ((e1.e1)(e0.e2) - (e0.e1)(e1.e2))  /  ((e0.e0)(e1.e1) - (e0.e1)(e0.e1))
+//     v = ((e0.e0)(e1.e2) - (e0.e1)(e0.e2))  /  ((e0.e0)(e1.e1) - (e0.e1)(e0.e1))
+//
+// Setting a = e0.e0, b = e0.e1, c = e1.e1, d = e0.(-e2), e = e1.(-e2) we can write
+//
+//     u = (b*e - c*d) / (a*c - b*b)
+//     v = (b*d - a*e) / (a*c - b*b)
+//
+// If (u >= 0) and (v >= 0) and (u + v <= 1) the point lies inside the triangle.
+//
+// Note that u and v do not need to be computed in full to do the test.
+// Lets define the following substitutions:
+//     x = (b*e - c*d)
+//     y = (b*d - a*e)
+//     z = (a*c - b*b)  // Always positive!
+//
+// If (x >= 0) and (y >= 0) and (x + y <= z) the point lies inside the triangle.
+//
+//
+
+namespace physx
+{
+PX_FORCE_INLINE PxU32 collideWithMeshTriangle(PxVec3& surfaceNormal, PxVec3& surfacePos, PxVec3& proxSurfaceNormal,
+                                              PxVec3& proxSurfacePos, PxReal& ccTime, PxReal& distOldToSurface,
+                                              const PxVec3& oldPos, const PxVec3& newPos, const PxVec3& origin,
+                                              const PxVec3& e0, const PxVec3& e1, bool hasCC, const PxReal& collRadius,
+                                              const PxReal& proxRadius)
+{
+	PxU32 flags = 0;
+
+	PxReal collisionRadius2 = collRadius * collRadius;
+	PxReal proximityRadius2 = proxRadius * proxRadius;
+
+	PxVec3 motion = newPos - oldPos;
+
+	// dc and proximity tests
+	PxVec3 tmpV = origin - newPos;
+
+	PxReal a = e0.dot(e0);
+	PxReal b = e0.dot(e1);
+	PxReal c = e1.dot(e1);
+	PxReal d = e0.dot(tmpV);
+	PxReal e = e1.dot(tmpV);
+	PxVec3 coords;
+	coords.x = b * e - c * d; // s * det
+	coords.y = b * d - a * e; // t * det
+	coords.z = a * c - b * b; // det
+
+	bool insideCase = false;
+	PxVec3 clampedCoords(PxVec3(0));
+	if(coords.x <= 0.0f)
+	{
+		c = PxMax(c, FLT_MIN);
+		clampedCoords.y = -e / c;
+	}
+	else if(coords.y <= 0.0f)
+	{
+		a = PxMax(a, FLT_MIN);
+		clampedCoords.x = -d / a;
+	}
+	else if(coords.x + coords.y > coords.z)
+	{
+		PxReal denominator = a + c - b - b;
+		PxReal numerator = c + e - b - d;
+		denominator = PxMax(denominator, FLT_MIN);
+		clampedCoords.x = numerator / denominator;
+		clampedCoords.y = 1.0f - clampedCoords.x;
+	}
+	else // all inside
+	{
+		PxReal tmpF = PxMax(coords.z, FLT_MIN);
+		tmpF = 1.0f / tmpF;
+		clampedCoords.x = coords.x * tmpF;
+		clampedCoords.y = coords.y * tmpF;
+		insideCase = true;
+	}
+	clampedCoords.x = PxMax(clampedCoords.x, 0.0f);
+	clampedCoords.y = PxMax(clampedCoords.y, 0.0f);
+	clampedCoords.x = PxMin(clampedCoords.x, 1.0f);
+	clampedCoords.y = PxMin(clampedCoords.y, 1.0f);
+
+	// Closest point to particle inside triangle
+	PxVec3 closest = origin + e0 * clampedCoords.x + e1 * clampedCoords.y;
+
+	PxVec3 triangleOffset = newPos - closest;
+	PxReal triangleDistance2 = triangleOffset.magnitudeSquared();
+
+	PxVec3 triangleNormal = e0.cross(e1);
+	PxReal e0e1Span = triangleNormal.magnitude();
+
+	bool isInFront = triangleOffset.dot(triangleNormal) > 0.0f;
+
+	// MS: Possible optimzation
+	/*
+	if (isInFront && (triangleDistance2 >= proximityRadius2))
+	    return flags;
+	*/
+
+	bool isInProximity = insideCase && (triangleDistance2 < proximityRadius2) && isInFront;
+	bool isInDiscrete = (triangleDistance2 < collisionRadius2) && isInFront;
+
+	if(!hasCC)
+	{
+		// Only apply discrete and proximity collisions if no continuous collisions was detected so far (for any
+		// colliding shape)
+
+		if(isInDiscrete)
+		{
+			if(triangleDistance2 > PT_COLL_TRI_DISTANCE)
+			{
+				surfaceNormal = triangleOffset * PxRecipSqrt(triangleDistance2);
+			}
+			else
+			{
+				surfaceNormal = triangleNormal * (1.0f / e0e1Span);
+			}
+			surfacePos = closest + (surfaceNormal * collRadius);
+			flags |= ParticleCollisionFlags::L_DC;
+		}
+
+		if(isInProximity)
+		{
+			proxSurfaceNormal = triangleNormal * (1.0f / e0e1Span);
+			proxSurfacePos = closest + (proxSurfaceNormal * collRadius);
+			flags |= ParticleCollisionFlags::L_PROX;
+
+			tmpV = (oldPos - origin);                       // this time it's not the newPosition offset.
+			distOldToSurface = proxSurfaceNormal.dot(tmpV); // Need to return the distance to decide which constraints
+			                                                // should be thrown away
+		}
+	}
+
+	if(!isInDiscrete && !isInProximity)
+	{
+		// cc test (let's try only executing this if no discrete coll, or proximity happend).
+		tmpV = origin - oldPos; // this time it's not the newPosition offset.
+		PxReal pDistN = triangleNormal.dot(tmpV);
+		PxReal rLengthN = triangleNormal.dot(motion);
+
+		if(pDistN > 0.0f || rLengthN >= pDistN)
+			return flags;
+
+		// we are in the half closed interval [0.0f, 1.0)
+
+		PxReal t = pDistN / rLengthN;
+		PX_ASSERT((t >= 0.0f) && (t < 1.0f));
+
+		PxVec3 relativePOSITION = (motion * t);
+		PxVec3 testPoint = oldPos + relativePOSITION;
+
+		// a,b,c and coords.z don't depend on test point -> still valid
+		tmpV = origin - testPoint;
+		d = e0.dot(tmpV);
+		e = e1.dot(tmpV);
+		coords.x = b * e - c * d;
+		coords.y = b * d - a * e;
+
+		// maybe we don't need this for rare case leaking on triangle boundaries?
+		PxReal eps = coords.z * PT_COLL_RAY_EPSILON_FACTOR;
+
+		if((coords.x >= -eps) && (coords.y >= -eps) && (coords.x + coords.y <= coords.z + eps))
+		{
+			PxReal invLengthN = (1.0f / e0e1Span);
+			distOldToSurface = -pDistN * invLengthN; // Need to return the distance to decide which constraints should
+			// be thrown away
+			surfaceNormal = triangleNormal * invLengthN;
+			// surfacePos = testPoint + (surfaceNormal * collRadius);
+			computeContinuousTargetPosition(surfacePos, oldPos, relativePOSITION, surfaceNormal, collRadius);
+			ccTime = t;
+			flags |= ParticleCollisionFlags::L_CC;
+		}
+	}
+
+	return flags;
+}
+}
+
+PX_FORCE_INLINE void setConstraintData(ParticleCollData& collData, const PxReal& distToSurface, const PxVec3& normal,
+                                       const PxVec3& position, const PxTransform& shape2World)
+{
+	PxU32 i;
+
+	if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_0_VALID))
+	{
+		i = 0;
+	}
+	else if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_1_VALID))
+	{
+		i = 1;
+	}
+	else
+	{
+		PxVec3 oldWorldSurfacePos(shape2World.transform(collData.localOldPos));
+
+		PxReal dist0 = collData.c0->normal.dot(oldWorldSurfacePos) - collData.c0->d;
+		PxReal dist1 = collData.c1->normal.dot(oldWorldSurfacePos) - collData.c1->d;
+
+		if(dist0 < dist1)
+		{
+			if(distToSurface < dist1)
+				i = 1;
+			else
+				return;
+		}
+		else if(distToSurface < dist0)
+		{
+			i = 0;
+		}
+		else
+			return;
+	}
+
+	PxVec3 newSurfaceNormal(shape2World.rotate(normal));
+	PxVec3 newSurfacePos(shape2World.transform(position));
+	Constraint cN(newSurfaceNormal, newSurfacePos);
+
+	if(i == 0)
+	{
+		*collData.c0 = cN;
+		collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_0_VALID;
+		collData.particleFlags.low &= PxU16(~InternalParticleFlag::eCONSTRAINT_0_DYNAMIC);
+	}
+	else
+	{
+		*collData.c1 = cN;
+		collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_1_VALID;
+		collData.particleFlags.low &= PxU16(~InternalParticleFlag::eCONSTRAINT_1_DYNAMIC);
+	}
+}
+
+PX_FORCE_INLINE void updateCollShapeData(ParticleCollData& collData, bool& hasCC, PxU32 collFlags, PxReal ccTime,
+                                         PxReal distOldToSurface, const PxVec3& surfaceNormal, const PxVec3& surfacePos,
+                                         const PxVec3& proxSurfaceNormal, const PxVec3& proxSurfacePos,
+                                         const PxTransform& shape2World)
+{
+	if(collFlags & ParticleCollisionFlags::L_CC)
+	{
+		if(ccTime < collData.ccTime)
+		{
+			// We want the collision that happened first
+			collData.localSurfaceNormal = surfaceNormal;
+			collData.localSurfacePos = surfacePos;
+			collData.ccTime = ccTime;
+			collData.localFlags = ParticleCollisionFlags::L_CC; // Continuous collision should overwrite discrete
+			                                                    // collision (?)
+		}
+
+		setConstraintData(collData, distOldToSurface, surfaceNormal, surfacePos, shape2World);
+		hasCC = true;
+	}
+	else if(!hasCC)
+	{
+		if(collFlags & ParticleCollisionFlags::L_PROX)
+		{
+			setConstraintData(collData, distOldToSurface, proxSurfaceNormal, proxSurfacePos, shape2World);
+
+			collData.localFlags |= ParticleCollisionFlags::L_PROX;
+		}
+
+		if(collFlags & ParticleCollisionFlags::L_DC)
+		{
+			collData.localSurfaceNormal += surfaceNormal;
+			collData.localSurfacePos += surfacePos;
+			collData.localDcNum += 1.0f;
+			collData.localFlags |= ParticleCollisionFlags::L_DC;
+		}
+	}
+}
+
+void collideCellWithMeshTriangles(ParticleCollData* collData, const PxU32* collDataIndices, PxU32 numCollDataIndices,
+                                  const TriangleMesh& meshData, const Cm::FastVertex2ShapeScaling& scale,
+                                  const PxVec3* triangleVerts, PxU32 numTriangles, PxReal proxRadius,
+                                  const PxTransform& shape2World);
+
+struct PxcContactCellMeshCallback : MeshHitCallback<PxRaycastHit>
+{
+	ParticleCollData* collData;
+	const PxU32* collDataIndices;
+	PxU32 numCollDataIndices;
+	const TriangleMesh& meshData;
+	const Cm::FastVertex2ShapeScaling meshScaling;
+	PxReal proxRadius;
+	ParticleOpcodeCache* cache;
+	const PxTransform& shape2World;
+
+	PxcContactCellMeshCallback(ParticleCollData* collData_, const PxU32* collDataIndices_, PxU32 numCollDataIndices_,
+	                           const TriangleMesh& meshData_, const Cm::FastVertex2ShapeScaling& meshScaling_,
+	                           PxReal proxRadius_, ParticleOpcodeCache* cache_, const PxTransform& shape2World_)
+	: MeshHitCallback<PxRaycastHit>(CallbackMode::eMULTIPLE)
+	, collData(collData_)
+	, collDataIndices(collDataIndices_)
+	, numCollDataIndices(numCollDataIndices_)
+	, meshData(meshData_)
+	, meshScaling(meshScaling_)
+	, proxRadius(proxRadius_)
+	, cache(cache_)
+	, shape2World(shape2World_)
+	{
+		PX_ASSERT(collData);
+		PX_ASSERT(collDataIndices);
+		PX_ASSERT(numCollDataIndices > 0);
+
+		// init
+		const PxU32* collDataIndexIt = collDataIndices_;
+		for(PxU32 i = 0; i < numCollDataIndices_; ++i, ++collDataIndexIt)
+		{
+			ParticleCollData& collisionShapeData = collData_[*collDataIndexIt];
+			collisionShapeData.localDcNum = 0.0f;
+			collisionShapeData.localSurfaceNormal = PxVec3(0);
+			collisionShapeData.localSurfacePos = PxVec3(0);
+		}
+	}
+	virtual ~PxcContactCellMeshCallback()
+	{
+	}
+
+	virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position
+	    const PxRaycastHit& hit, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, PxReal&, const PxU32*)
+	{
+		PxVec3 verts[3] = { v0, v1, v2 };
+		collideCellWithMeshTriangles(collData, collDataIndices, numCollDataIndices, meshData, meshScaling, verts, 1,
+		                             proxRadius, shape2World);
+
+		if(cache)
+			cache->add(&hit.faceIndex, 1);
+
+		return true;
+	}
+
+  private:
+	PxcContactCellMeshCallback& operator=(const PxcContactCellMeshCallback&);
+};
+
+void testBoundsMesh(const TriangleMesh& meshData, const PxTransform& world2Shape,
+                    const Cm::FastVertex2ShapeScaling& meshScaling, bool idtScaleMesh, const PxBounds3& worldBounds,
+                    PxcContactCellMeshCallback& callback)
+{
+	// Find colliding triangles.
+	// Setup an OBB for the fluid particle cell (in local space of shape)
+	// assuming uniform scaling in most cases, using the pose as box rotation
+	// if scaling is non-uniform, the bounding box is conservative
+
+	Box vertexSpaceAABB;
+	computeVertexSpaceAABB(vertexSpaceAABB, worldBounds, world2Shape, meshScaling, idtScaleMesh);
+
+	Gu::intersectOBB_Particles(&meshData, vertexSpaceAABB, callback, true);
+}
+
+void collideWithMeshTriangles(ParticleCollData& collisionShapeData, const TriangleMesh& /*meshData*/,
+                              const Cm::FastVertex2ShapeScaling& scale, const PxVec3* triangleVerts, PxU32 numTriangles,
+                              PxReal proxRadius, const PxTransform& shape2World)
+{
+	bool hasCC = ((collisionShapeData.localFlags & ParticleCollisionFlags::CC) ||
+	              (collisionShapeData.localFlags & ParticleCollisionFlags::L_CC));
+
+	PxVec3 tmpSurfaceNormal(0.0f);
+	PxVec3 tmpSurfacePos(0.0f);
+	PxVec3 tmpProxSurfaceNormal(0.0f);
+	PxVec3 tmpProxSurfacePos(0.0f);
+	PxReal tmpCCTime(0.0f);
+	PxReal tmpDistOldToSurface(0.0f);
+
+	for(PxU32 i = 0; i < numTriangles; ++i)
+	{
+		const PxI32 winding = scale.flipsNormal() ? 1 : 0;
+		PxVec3 v0 = scale * triangleVerts[i * 3];
+		PxVec3 v1 = scale * triangleVerts[i * 3 + 1 + winding];
+		PxVec3 v2 = scale * triangleVerts[i * 3 + 2 - winding];
+
+		PxU32 tmpFlags =
+		    collideWithMeshTriangle(tmpSurfaceNormal, tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, tmpCCTime,
+		                            tmpDistOldToSurface, collisionShapeData.localOldPos, collisionShapeData.localNewPos,
+		                            v0, v1 - v0, v2 - v0, hasCC, collisionShapeData.restOffset, proxRadius);
+
+		updateCollShapeData(collisionShapeData, hasCC, tmpFlags, tmpCCTime, tmpDistOldToSurface, tmpSurfaceNormal,
+		                    tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, shape2World);
+	}
+}
+
+void collideCellWithMeshTriangles(ParticleCollData* collData, const PxU32* collDataIndices, PxU32 numCollDataIndices,
+                                  const TriangleMesh& meshData, const Cm::FastVertex2ShapeScaling& scale,
+                                  const PxVec3* triangleVerts, PxU32 numTriangles, PxReal proxRadius,
+                                  const PxTransform& shape2World)
+{
+	PX_ASSERT(collData);
+	PX_ASSERT(collDataIndices);
+	PX_ASSERT(numCollDataIndices > 0);
+	PX_ASSERT(triangleVerts);
+
+	const PxU32* collDataIndexIt = collDataIndices;
+	for(PxU32 i = 0; i < numCollDataIndices; ++i, ++collDataIndexIt)
+	{
+		ParticleCollData& collisionShapeData = collData[*collDataIndexIt];
+		collideWithMeshTriangles(collisionShapeData, meshData, scale, triangleVerts, numTriangles, proxRadius,
+		                         shape2World);
+	}
+}
+
+void physx::Pt::collideCellsWithStaticMesh(ParticleCollData* collData, const LocalCellHash& localCellHash,
+                                           const GeometryUnion& meshShape, const PxTransform& world2Shape,
+                                           const PxTransform& shape2World, PxReal /*cellSize*/,
+                                           PxReal /*collisionRange*/, PxReal proxRadius, const PxVec3& /*packetCorner*/)
+{
+	PX_ASSERT(collData);
+	PX_ASSERT(localCellHash.isHashValid);
+	PX_ASSERT(localCellHash.numParticles <= PT_SUBPACKET_PARTICLE_LIMIT_COLLISION);
+	PX_ASSERT(localCellHash.numHashEntries <= PT_LOCAL_HASH_SIZE_MESH_COLLISION);
+
+	const PxTriangleMeshGeometryLL& meshShapeData = meshShape.get<const PxTriangleMeshGeometryLL>();
+
+	const TriangleMesh* meshData = meshShapeData.meshData;
+	PX_ASSERT(meshData);
+
+	// mesh bounds in world space (conservative)
+	const PxBounds3 shapeBounds =
+	    meshData->getLocalBoundsFast().transformSafe(world2Shape.getInverse() * meshShapeData.scale);
+
+	const bool idtScaleMesh = meshShapeData.scale.isIdentity();
+
+	Cm::FastVertex2ShapeScaling meshScaling;
+	if(!idtScaleMesh)
+		meshScaling.init(meshShapeData.scale);
+
+	// process the particle cells
+	for(PxU32 c = 0; c < localCellHash.numHashEntries; c++)
+	{
+		const ParticleCell& cell = localCellHash.hashEntries[c];
+
+		if(cell.numParticles == PX_INVALID_U32)
+			continue;
+
+		PxBounds3 cellBounds;
+
+		cellBounds.setEmpty();
+		PxBounds3 cellBoundsNew(PxBounds3::empty());
+
+		PxU32* it = localCellHash.particleIndices + cell.firstParticle;
+		const PxU32* end = it + cell.numParticles;
+		for(; it != end; it++)
+		{
+			const ParticleCollData& particle = collData[*it];
+			cellBounds.include(particle.oldPos);
+			cellBoundsNew.include(particle.newPos);
+		}
+		PX_ASSERT(!cellBoundsNew.isEmpty());
+		cellBoundsNew.fattenFast(proxRadius);
+		cellBounds.include(cellBoundsNew);
+
+		if(!cellBounds.intersects(shapeBounds))
+			continue; // early out if (inflated) cell doesn't intersect mesh bounds
+
+		// opcode query: cell bounds against shape bounds in unscaled mesh space
+		PxcContactCellMeshCallback callback(collData, &(localCellHash.particleIndices[cell.firstParticle]),
+		                                    cell.numParticles, *meshData, meshScaling, proxRadius, NULL, shape2World);
+		testBoundsMesh(*meshData, world2Shape, meshScaling, idtScaleMesh, cellBounds, callback);
+	}
+}
+
+void physx::Pt::collideWithStaticMesh(PxU32 numParticles, ParticleCollData* collData, ParticleOpcodeCache* opcodeCaches,
+                                      const GeometryUnion& meshShape, const PxTransform& world2Shape,
+                                      const PxTransform& shape2World, PxReal /*cellSize*/, PxReal collisionRange,
+                                      PxReal proxRadius)
+{
+	PX_ASSERT(collData);
+	PX_ASSERT(opcodeCaches);
+
+	const PxTriangleMeshGeometryLL& meshShapeData = meshShape.get<const PxTriangleMeshGeometryLL>();
+
+	const bool idtScaleMesh = meshShapeData.scale.isIdentity();
+	Cm::FastVertex2ShapeScaling meshScaling;
+	if(!idtScaleMesh)
+		meshScaling.init(meshShapeData.scale);
+
+	const PxF32 maxCacheBoundsExtent = 4 * collisionRange + proxRadius;
+	const ParticleOpcodeCache::QuantizationParams quantizationParams =
+	    ParticleOpcodeCache::getQuantizationParams(maxCacheBoundsExtent);
+
+	const TriangleMesh* meshData = meshShapeData.meshData;
+	PX_ASSERT(meshData);
+
+	bool isSmallMesh = meshData->has16BitIndices();
+	PxU32 cachedTriangleBuffer[ParticleOpcodeCache::sMaxCachedTriangles];
+
+	PxVec3 extent(proxRadius);
+
+	for(PxU32 i = 0; i < numParticles; ++i)
+	{
+		// had to make this non-const to be able to update cache bits
+		ParticleCollData& particle = collData[i];
+		ParticleOpcodeCache& cache = opcodeCaches[i];
+
+		PxBounds3 bounds;
+		{
+			bounds = PxBounds3(particle.newPos - extent, particle.newPos + extent);
+			bounds.include(particle.oldPos);
+		}
+
+		PxU32 numTriangles = 0;
+		const PxU32* triangles = NULL;
+		bool isCached = cache.read(particle.particleFlags.low, numTriangles, cachedTriangleBuffer, bounds,
+		                           quantizationParams, &meshShape, isSmallMesh);
+
+		if(isCached)
+		{
+			triangles = cachedTriangleBuffer;
+			if(numTriangles > 0)
+			{
+				PxVec3 triangleVerts[ParticleOpcodeCache::sMaxCachedTriangles * 3];
+				const PxU32* triangleIndexIt = triangles;
+				for(PxU32 j = 0; j < numTriangles; ++j, ++triangleIndexIt)
+				{
+					TriangleVertexPointers::getTriangleVerts(meshData, *triangleIndexIt, triangleVerts[j * 3],
+					                                         triangleVerts[j * 3 + 1], triangleVerts[j * 3 + 2]);
+				}
+
+				collData[i].localDcNum = 0.0f;
+				collData[i].localSurfaceNormal = PxVec3(0);
+				collData[i].localSurfacePos = PxVec3(0);
+
+				collideWithMeshTriangles(collData[i], *meshData, meshScaling, triangleVerts, numTriangles, proxRadius,
+				                         shape2World);
+			}
+		}
+		else if((particle.particleFlags.low & InternalParticleFlag::eGEOM_CACHE_BIT_0) != 0 &&
+		        (particle.particleFlags.low & InternalParticleFlag::eGEOM_CACHE_BIT_1) != 0)
+		{
+			// don't update the cache since it's already successfully in use
+			PxcContactCellMeshCallback callback(collData, &i, 1, *meshData, meshScaling, proxRadius, NULL, shape2World);
+
+			testBoundsMesh(*meshData, world2Shape, meshScaling, idtScaleMesh, bounds, callback);
+		}
+		else
+		{
+			// compute new conservative bounds for cache
+			PxBounds3 cachedBounds;
+			{
+				PxVec3 predictedExtent(proxRadius * 1.5f);
+
+				// add future newpos + extent
+				PxVec3 newPosPredicted = particle.newPos + 3.f * (particle.newPos - particle.oldPos);
+				cachedBounds = PxBounds3(newPosPredicted - predictedExtent, newPosPredicted + predictedExtent);
+
+				// add next oldpos + extent
+				cachedBounds.include(PxBounds3(particle.newPos - predictedExtent, particle.newPos + predictedExtent));
+
+				// add old pos
+				cachedBounds.include(particle.oldPos);
+			}
+
+			cache.init(cachedTriangleBuffer);
+
+			// the callback function will call collideWithMeshTriangles()
+			PxcContactCellMeshCallback callback(collData, &i, 1, *meshData, meshScaling, proxRadius, &cache, shape2World);
+
+			// opcode query: cache bounds against shape bounds in unscaled mesh space
+			testBoundsMesh(*meshData, world2Shape, meshScaling, idtScaleMesh, cachedBounds, callback);
+
+			// update cache
+			cache.write(particle.particleFlags.low, cachedBounds, quantizationParams, meshShape, isSmallMesh);
+		}
+	}
+}
+
+void physx::Pt::collideWithStaticHeightField(ParticleCollData* particleCollData, PxU32 numCollData,
+                                             const GeometryUnion& heightFieldShape, PxReal proxRadius,
+                                             const PxTransform& shape2World)
+{
+	PX_ASSERT(particleCollData);
+
+	const PxHeightFieldGeometryLL& hfGeom = heightFieldShape.get<const PxHeightFieldGeometryLL>();
+	const HeightFieldUtil hfUtil(hfGeom);
+
+	for(PxU32 p = 0; p < numCollData; p++)
+	{
+		ParticleCollData& collData = particleCollData[p];
+
+		PxBounds3 particleBounds = PxBounds3::boundsOfPoints(collData.localOldPos, collData.localNewPos);
+		PX_ASSERT(!particleBounds.isEmpty());
+		particleBounds.fattenFast(proxRadius);
+
+		HeightFieldAabbTest test(particleBounds, hfUtil);
+		HeightFieldAabbTest::Iterator itBegin = test.begin();
+		HeightFieldAabbTest::Iterator itEnd = test.end();
+		PxVec3 triangle[3];
+
+		collData.localDcNum = 0.0f;
+		collData.localSurfaceNormal = PxVec3(0);
+		collData.localSurfacePos = PxVec3(0);
+		bool hasCC = (collData.localFlags & ParticleCollisionFlags::CC) > 0;
+
+		PxVec3 tmpSurfaceNormal(0.0f);
+		PxVec3 tmpSurfacePos(0.0f);
+		PxVec3 tmpProxSurfaceNormal(0.0f);
+		PxVec3 tmpProxSurfacePos(0.0f);
+		PxReal tmpCCTime(collData.ccTime);
+		PxReal tmpDistOldToSurface(0.0f);
+
+		for(HeightFieldAabbTest::Iterator it = itBegin; it != itEnd; ++it)
+		{
+			it.getTriangleVertices(triangle);
+
+			const PxVec3& origin = triangle[0];
+			PxVec3 e0, e1;
+			e0 = triangle[1] - origin;
+			e1 = triangle[2] - origin;
+
+			PxU32 tmpFlags =
+			    collideWithMeshTriangle(tmpSurfaceNormal, tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos,
+			                            tmpCCTime, tmpDistOldToSurface, collData.localOldPos, collData.localNewPos,
+			                            origin, e0, e1, hasCC, collData.restOffset, proxRadius);
+
+			updateCollShapeData(collData, hasCC, tmpFlags, tmpCCTime, tmpDistOldToSurface, tmpSurfaceNormal,
+			                    tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, shape2World);
+		}
+	}
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMethods.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMethods.h
new file mode 100644
index 00000000..fdbbb191
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMethods.h
@@ -0,0 +1,93 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_COLLISION_METHODS_H
+#define PT_COLLISION_METHODS_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxVec3.h"
+#include "PtConfig.h"
+#include "PtCollisionData.h"
+#include "PtSpatialHash.h"
+#include "PtParticleOpcodeCache.h"
+#include "GuGeometryUnion.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+/*!
+Collision routines for fluid particles
+*/
+
+void collideWithPlane(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& planeShape,
+                      PxReal proxRadius);
+
+void collideWithConvexPlanes(ParticleCollData& collData, const PxPlane* planes, const PxU32 numPlanes,
+                             const PxReal proxRadius);
+void collideWithConvexPlanesSIMD(ParticleCollDataV4& collDataV4, const PxPlane* convexPlanes, PxU32 numPlanes,
+                                 const PxReal proxRadius);
+
+/**
+input scaledPlaneBuf needs a capacity of the number of planes in convexShape
+*/
+void collideWithConvex(PxPlane* scaledPlaneBuf, ParticleCollData* particleCollData, PxU32 numCollData,
+                       const Gu::GeometryUnion& convexShape, const PxReal proxRadius);
+
+void collideWithBox(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& boxShape,
+                    PxReal proxRadius);
+
+void collideWithCapsule(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& capsuleShape,
+                        PxReal proxRadius);
+
+void collideWithSphere(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& sphereShape,
+                       PxReal proxRadius);
+
+void collideCellsWithStaticMesh(ParticleCollData* particleCollData, const LocalCellHash& localCellHash,
+                                const Gu::GeometryUnion& meshShape, const PxTransform& world2Shape,
+                                const PxTransform& shape2World, PxReal cellSize, PxReal collisionRange,
+                                PxReal proxRadius, const PxVec3& packetCorner);
+
+void collideWithStaticMesh(PxU32 numParticles, ParticleCollData* particleCollData, ParticleOpcodeCache* opcodeCaches,
+                           const Gu::GeometryUnion& meshShape, const PxTransform& world2Shape,
+                           const PxTransform& shape2World, PxReal cellSize, PxReal collisionRange, PxReal proxRadius);
+
+void collideWithStaticHeightField(ParticleCollData* particleCollData, PxU32 numCollData,
+                                  const Gu::GeometryUnion& heightFieldShape, PxReal proxRadius,
+                                  const PxTransform& shape2World);
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_COLLISION_METHODS_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionParameters.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionParameters.h
new file mode 100644
index 00000000..736cfcd1
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionParameters.h
@@ -0,0 +1,70 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+#ifndef PT_COLLISION_PARAM_H
+#define PT_COLLISION_PARAM_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+namespace physx
+{
+
+namespace Pt
+{
+
+struct CollisionParameters
+{
+	PxVec3 externalAcceleration;
+	PxReal dampingDtComp;
+	PxReal restitution;
+	PxReal dynamicFriction;
+	PxReal staticFrictionSqr;
+	PxReal cellSize;
+	PxReal cellSizeInv;
+	PxU32 packetMultLog;
+	PxU32 packetMult;
+	PxReal packetSize;
+	PxReal restOffset;
+	PxReal contactOffset;
+	PxReal maxMotionDistance;
+	PxReal collisionRange;
+	PxReal timeStep;
+	PxReal invTimeStep;
+	PxPlane projectionPlane;
+	PxU32 flags;
+	PxU32 temporalNoise;
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(CollisionParameters) % 16 == 0);
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_COLLISION_PARAM_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionPlane.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionPlane.cpp
new file mode 100644
index 00000000..70d06af4
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionPlane.cpp
@@ -0,0 +1,157 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtCollisionMethods.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+using namespace physx;
+using namespace Pt;
+
+namespace
+{
+
+PX_FORCE_INLINE void collideWithPlane(ParticleCollData& collData, PxReal proxRadius)
+{
+	// In plane space the normal is (1,0,0) and d is 0. This simplifies the computations below.
+	PxReal entryTime = -FLT_MAX;
+
+	PxReal planeDistNewPos = collData.localNewPos.x;
+	PxReal planeDistOldPos = collData.localOldPos.x;
+
+	bool isContained = false;
+	bool hasDC = false;
+	bool hasProx = false;
+	bool parallelMotion = false;
+
+	// Test the old pos for containment
+	if(planeDistOldPos <= 0.0f)
+		isContained = true;
+
+	// Test proximity
+	if(planeDistNewPos <= proxRadius)
+	{
+		if(planeDistNewPos > 0.0f)
+			hasProx = true;
+
+		// Test discrete collision
+		if(planeDistNewPos <= collData.restOffset)
+			hasDC = true;
+	}
+
+	if(!(hasProx || hasDC || isContained))
+		return; // We know that the old position is outside the surface and that the new position is
+	            // not within the proximity region.
+
+	PxVec3 planeNormal;
+	planeNormal = PxVec3(1.0f, 0.0f, 0.0f);
+
+	// Test continuous collision
+	PxVec3 motion = collData.localNewPos - collData.localOldPos;
+	PxReal projMotion = motion.x;
+	if(projMotion == 0.0f) // parallel
+	{
+		if(planeDistNewPos > 0.0f)
+			parallelMotion = true;
+	}
+	else
+	{
+		PxReal hitTime = -planeDistOldPos / projMotion;
+		if(projMotion < 0.0f) // entry point
+			entryTime = hitTime;
+	}
+
+	if(isContained)
+	{
+		// Treat the case where the old pos is inside the skeleton as
+		// a continous collision with time 0
+
+		collData.localFlags |= ParticleCollisionFlags::L_CC;
+		collData.ccTime = 0.0f;
+		collData.localSurfaceNormal = planeNormal;
+
+		// Push the particle to the surface (such that distance to surface is equal to the collision radius)
+		collData.localSurfacePos = collData.localOldPos;
+		collData.localSurfacePos.x += (collData.restOffset - planeDistOldPos);
+	}
+	else
+	{
+		// check for continuous collision
+		// only add a proximity/discrete case if there are no continous collisions
+		// for this shape or any other shape before
+
+		bool ccHappened = ((0.0f <= entryTime) && (entryTime < collData.ccTime) && (!parallelMotion));
+		if(ccHappened)
+		{
+			collData.localSurfaceNormal = planeNormal;
+
+			// collData.localSurfacePos = collData.localOldPos + (motion*entryTime);
+			// collData.localSurfacePos.x += collData.restOffset;
+			PxVec3 relativePOSITION = motion * entryTime;
+			computeContinuousTargetPosition(collData.localSurfacePos, collData.localOldPos, relativePOSITION,
+			                                collData.localSurfaceNormal, collData.restOffset);
+
+			collData.ccTime = entryTime;
+			collData.localFlags |= ParticleCollisionFlags::L_CC;
+		}
+		else if(!(collData.localFlags & ParticleCollisionFlags::CC))
+		{
+			// No other collision shape has caused a continuous collision so far
+
+			PX_ASSERT(hasProx | hasDC);
+
+			if(hasProx) // proximity
+				collData.localFlags |= ParticleCollisionFlags::L_PROX;
+			if(hasDC) // discrete collision
+				collData.localFlags |= ParticleCollisionFlags::L_DC;
+
+			collData.localSurfaceNormal = planeNormal;
+
+			// Move contact point such that the projected distance to the surface is equal
+			// to the collision radius
+			collData.localSurfacePos = collData.localNewPos;
+			collData.localSurfacePos.x += (collData.restOffset - planeDistNewPos);
+		}
+	}
+}
+}
+
+void physx::Pt::collideWithPlane(ParticleCollData* particleCollData, PxU32 numCollData,
+                                 const Gu::GeometryUnion& planeShape, PxReal proxRadius)
+{
+	PX_ASSERT(particleCollData);
+	PX_ASSERT(planeShape.getType() == PxGeometryType::ePLANE);
+	PX_UNUSED(planeShape);
+
+	for(PxU32 p = 0; p < numCollData; p++)
+	{
+		::collideWithPlane(particleCollData[p], proxRadius);
+	}
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionSphere.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionSphere.cpp
new file mode 100644
index 00000000..cddce6af
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionSphere.cpp
@@ -0,0 +1,156 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtCollisionMethods.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+using namespace physx;
+using namespace Pt;
+
+namespace
+{
+
+void collideWithSphereNonContinuous(ParticleCollData& collData, const PxVec3& pos, const PxReal& radius,
+                                    const PxReal& proxRadius)
+{
+	if(collData.localFlags & ParticleCollisionFlags::CC)
+		return; // Only apply discrete and proximity collisions if no continuous collisions was detected so far (for any
+	// colliding shape)
+
+	PxReal dist = pos.magnitude();
+	collData.localSurfaceNormal = pos;
+	if(dist < (radius + proxRadius))
+	{
+		if(dist != 0.0f)
+			collData.localSurfaceNormal *= (1.0f / dist);
+		else
+			collData.localSurfaceNormal = PxVec3(0);
+
+		// Push particle to surface such that the distance to the surface is equal to the collision radius
+		collData.localSurfacePos = collData.localSurfaceNormal * (radius + collData.restOffset);
+		collData.localFlags |= ParticleCollisionFlags::L_PROX;
+
+		if(dist < (radius + collData.restOffset))
+			collData.localFlags |= ParticleCollisionFlags::L_DC;
+	}
+}
+
+PX_FORCE_INLINE void collideWithSphere(ParticleCollData& collData, const PxSphereGeometry& sphereShapeData,
+                                       PxReal proxRadius)
+{
+	PxVec3& oldPos = collData.localOldPos;
+	PxVec3& newPos = collData.localNewPos;
+
+	PxReal radius = sphereShapeData.radius;
+
+	PxReal oldPosDist2 = oldPos.magnitudeSquared();
+	PxReal radius2 = radius * radius;
+
+	bool oldInSphere = (oldPosDist2 < radius2);
+
+	if(oldInSphere)
+	{
+		// old position inside the skeleton
+		// add ccd with time 0.0
+
+		collData.localSurfaceNormal = oldPos;
+		if(oldPosDist2 > 0.0f)
+			collData.localSurfaceNormal *= PxRecipSqrt(oldPosDist2);
+		else
+			collData.localSurfaceNormal = PxVec3(0, 1.0f, 0);
+
+		// Push particle to surface such that the distance to the surface is equal to the collision radius
+		collData.localSurfacePos = collData.localSurfaceNormal * (radius + collData.restOffset);
+		collData.ccTime = 0.0;
+		collData.localFlags |= ParticleCollisionFlags::L_CC;
+	}
+	else
+	{
+		// old position is outside of the skeleton
+
+		PxVec3 motion = newPos - oldPos;
+
+		// Discriminant
+		PxReal b = motion.dot(oldPos) * 2.0f;
+		PxReal a2 = 2.0f * motion.magnitudeSquared();
+		PxReal disc = (b * b) - (2.0f * a2 * (oldPosDist2 - radius2));
+
+		bool intersection = disc > 0.0f;
+
+		if((!intersection) || (a2 == 0.0f))
+		{
+			// the ray does not intersect the sphere
+			collideWithSphereNonContinuous(collData, newPos, radius, proxRadius);
+		}
+		else
+		{
+			// the ray intersects the sphere
+			PxReal t = -(b + PxSqrt(disc)) / a2; // Compute intersection point
+
+			if(t < 0.0f || t > 1.0f)
+			{
+				// intersection point lies outside motion vector
+				collideWithSphereNonContinuous(collData, newPos, radius, proxRadius);
+			}
+			else if(t < collData.ccTime)
+			{
+				// intersection point lies on sphere, add lcc
+				// collData.localSurfacePos = oldPos + (motion * t);
+				// collData.localSurfaceNormal = collData.localSurfacePos;
+				// collData.localSurfaceNormal *= (1.0f / radius);
+				// collData.localSurfacePos += (collData.localSurfaceNormal * collData.restOffset);
+				PxVec3 relativeImpact = motion * t;
+				collData.localSurfaceNormal = oldPos + relativeImpact;
+				collData.localSurfaceNormal *= (1.0f / radius);
+				computeContinuousTargetPosition(collData.localSurfacePos, collData.localOldPos, relativeImpact,
+				                                collData.localSurfaceNormal, collData.restOffset);
+
+				collData.ccTime = t;
+				collData.localFlags |= ParticleCollisionFlags::L_CC;
+			}
+		}
+	}
+}
+
+} // namespace
+
+void physx::Pt::collideWithSphere(ParticleCollData* particleCollData, PxU32 numCollData,
+                                  const Gu::GeometryUnion& sphereShape, PxReal proxRadius)
+{
+	PX_ASSERT(particleCollData);
+
+	const PxSphereGeometry& sphereShapeData = sphereShape.get<const PxSphereGeometry>();
+
+	for(PxU32 p = 0; p < numCollData; p++)
+	{
+		::collideWithSphere(particleCollData[p], sphereShapeData, proxRadius);
+	}
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtConfig.h b/PhysX_3.4/Source/LowLevelParticles/src/PtConfig.h
new file mode 100644
index 00000000..224b5a8b
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtConfig.h
@@ -0,0 +1,121 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_CONFIG_H
+#define PT_CONFIG_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtParticleSystemFlags.h"
+
+// Marker for fluid particles with no collision constraints
+#define PT_NO_CONSTRAINT PT_PARTICLE_SYSTEM_PARTICLE_LIMIT
+
+// Needs to be addressable with PxU16 Particle::hashKey
+// - Ps::nextPowerOf2((PXD_PARTICLE_SYSTEM_HASH_KEY_LIMIT + 1)) must be addressable
+//   through PxU16 Particle::hashKey, see and PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY
+//   Dynamics::updatePacket()
+#define PT_PARTICLE_SYSTEM_HASH_KEY_LIMIT 0x7ffe
+
+// Size of particle packet hash table.
+// - Must be a power of 2
+// - Must be at least as large as PT_PARTICLE_SYSTEM_PACKET_LIMIT (see further below), but should be larger for the hash
+// to be efficient.
+// - Must to be addressable through PxU16 Pt::Particle::hashKey.
+#define PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE 1024
+
+// One larger than PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE to fit a special cell for overflow particles.
+#define PT_PARTICLE_SYSTEM_PACKET_HASH_BUFFER_SIZE 1025
+
+// Index of special overflow packet
+#define PT_PARTICLE_SYSTEM_OVERFLOW_INDEX 1024
+
+// Maximum number of particle packets (should be smaller than hash size since a full hash table is not efficient)
+#define PT_PARTICLE_SYSTEM_PACKET_LIMIT 924
+
+// Slack for building the triangle packet hash. Has to be bigger than any epsilons used in collision detection.
+#define PT_PARTICLE_SYSTEM_COLLISION_SLACK 1.0e-3f
+
+// Maximum number of fluid particles in a packet that can be handled at a time
+#define PT_SUBPACKET_PARTICLE_LIMIT 512
+// If the number of particles in a packet and the number of particles for each neighboring halo region
+// are below this threshold, then no local hash will be constructed and each particle of one packet will be
+// tested against each particle of the other packet (for particle-particle interaction only).
+//
+// Note: Has to be smaller or equal to PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY
+#define PT_BRUTE_FORCE_PARTICLE_THRESHOLD 100
+// If the number of particles in a packet section and the number of particles in a neighboring halo
+// region are below this threshold, then no local hash will be constructed and each particle of the
+// packet section will be tested against each particle of the halo region (for particle-particle interaction only).
+//
+// Note: Has to be smaller or equal to PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY
+#define PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION 200
+
+// Maximum number of fluid particles in a packet that can be handled at a time for dividing
+// a packet into sections and reordering the particles accordingly
+#define PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS PT_SUBPACKET_PARTICLE_LIMIT
+
+// Maximum number of fluid particles in a packet that can be handled at a time for SPH dynamics
+// calculations, i.e., computation of density & force
+// - Ps::nextPowerOf2((PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY + 1)) must be addressable
+//   through PxU16 Particle::hashKey, see Dynamics::updatePacket().
+#define PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY PT_SUBPACKET_PARTICLE_LIMIT
+
+//  loacl hash bucket size,  should equal nextPowerOfTwo(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY + 1)
+#define PT_SUBPACKET_PARTICLE_HASH_BUCKET_SIZE 512
+
+// Maximum number of parallel tasks created for sph computation
+#define PT_MAX_PARALLEL_TASKS_SPH 8
+
+// Maximum number of fluid particles in a packet that can be handled at a time for velocity
+// integration
+#define PT_SUBPACKET_PARTICLE_LIMIT_VEL_INTEGRATION PT_SUBPACKET_PARTICLE_LIMIT
+
+// Maximum number of fluid particles in a packet that can be handled at a time for
+// detecting and resolving collisions.
+// - Must be smaller than PT_LOCAL_HASH_SIZE_MESH_COLLISION.
+#define PT_SUBPACKET_PARTICLE_LIMIT_COLLISION 128
+
+// Hash size for the local particle cell hash.
+// - Must to be larger than PT_SUBPACKET_PARTICLE_LIMIT_COLLISION
+// - Must be a power of 2
+// - Must be addressable with PxU16 Particle::hashKey
+#define PT_LOCAL_HASH_SIZE_MESH_COLLISION 256
+
+// Number of fluid packet shapes to run in parallel during collision update.
+#define PT_NUM_PACKETS_PARALLEL_COLLISION 8
+
+// Initial size of triangle mesh collision buffer (for storing indices of colliding triangles)
+#define PT_INITIAL_MESH_COLLISION_BUFFER_SIZE 1024
+
+#define PT_USE_SIMD_CONVEX_COLLISION 1
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_CONFIG_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtConstants.h b/PhysX_3.4/Source/LowLevelParticles/src/PtConstants.h
new file mode 100644
index 00000000..74726a5b
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtConstants.h
@@ -0,0 +1,45 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_CONSTANTS_H
+#define PT_CONSTANTS_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+// Threshold for the angle between two contact constraint surfaces. If the two surfaces form
+// a "steep valley" only one of the two constraints will be applied.
+#define PT_COLL_VEL_PROJECTION_CROSS_EPSILON 1e-6f
+
+#define PT_COLL_VEL_PROJECTION_PROJ 1e-4f
+#define PT_COLL_TRI_DISTANCE 1e-5f
+#define PT_COLL_RAY_EPSILON_FACTOR 1e-4f
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_CONSTANTS_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.cpp
new file mode 100644
index 00000000..e906a31b
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.cpp
@@ -0,0 +1,325 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtContextCpu.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#if PX_SUPPORT_GPU_PHYSX
+#include "task/PxGpuDispatcher.h"
+#include "PxvGlobals.h"
+#include "PxPhysXGpu.h"
+#include "PxSceneGpu.h"
+#include "gpu/PtRigidBodyAccessGpu.h"
+#endif
+
+#include "foundation/PxFoundation.h"
+#include "PtParticleData.h"
+#include "PtParticleSystemSimCpu.h"
+#include "PtParticleShapeCpu.h"
+#include "PtBatcher.h"
+#include "PtBodyTransformVault.h"
+#include "PsFoundation.h"
+
+using namespace physx::shdfnd;
+using namespace physx;
+using namespace Pt;
+
+namespace
+{
+ParticleSystemSim* (ContextCpu::*addParticleSystemFn)(ParticleData*, const ParticleSystemParameter&, bool);
+ParticleData* (ContextCpu::*removeParticleSystemFn)(ParticleSystemSim*, bool);
+Context* (*createContextFn)(physx::PxTaskManager*, Cm::FlushPool&);
+void (ContextCpu::*destroyContextFn)();
+
+PxBaseTask& (Batcher::*scheduleShapeGenerationFn)(ParticleSystemSim** particleSystems, ParticleShapesUpdateInput* inputs,
+                                                  PxU32 batchSize, PxBaseTask& continuation) = 0;
+PxBaseTask& (Batcher::*scheduleDynamicsCpuFn)(ParticleSystemSim** particleSystems, PxU32 batchSize,
+                                              PxBaseTask& continuation) = 0;
+PxBaseTask& (Batcher::*scheduleCollisionPrepFn)(ParticleSystemSim** particleSystems, PxLightCpuTask** inputPrepTasks,
+                                                PxU32 batchSize, PxBaseTask& continuation) = 0;
+PxBaseTask& (Batcher::*scheduleCollisionCpuFn)(ParticleSystemSim** particleSystems, PxU32 batchSize,
+                                               PxBaseTask& continuation) = 0;
+PxBaseTask& (Batcher::*schedulePipelineGpuFn)(ParticleSystemSim** particleSystems, PxU32 batchSize,
+                                              PxBaseTask& continuation) = 0;
+}
+
+namespace physx
+{
+namespace Pt
+{
+void registerParticles()
+{
+	ContextCpu::registerParticles();
+}
+
+Context* createParticleContext(class physx::PxTaskManager* taskManager, Cm::FlushPool& taskPool)
+{
+	if(::createContextFn)
+	{
+		return ::createContextFn(taskManager, taskPool);
+	}
+	return NULL;
+}
+} // namespace Pt
+} // namespace physx
+
+void ContextCpu::registerParticles()
+{
+	::createContextFn = &ContextCpu::createContextImpl;
+	::destroyContextFn = &ContextCpu::destroyContextImpl;
+	::addParticleSystemFn = &ContextCpu::addParticleSystemImpl;
+	::removeParticleSystemFn = &ContextCpu::removeParticleSystemImpl;
+
+	::scheduleShapeGenerationFn = &Batcher::scheduleShapeGeneration;
+	::scheduleDynamicsCpuFn = &Batcher::scheduleDynamicsCpu;
+	::scheduleCollisionPrepFn = &Batcher::scheduleCollisionPrep;
+	::scheduleCollisionCpuFn = &Batcher::scheduleCollisionCpu;
+	::schedulePipelineGpuFn = &Batcher::schedulePipelineGpu;
+}
+
+Context* ContextCpu::createContextImpl(PxTaskManager* taskManager, Cm::FlushPool& taskPool)
+{
+	return PX_NEW(ContextCpu)(taskManager, taskPool);
+}
+
+void ContextCpu::destroy()
+{
+	(this->*destroyContextFn)();
+}
+
+void ContextCpu::destroyContextImpl()
+{
+	PX_DELETE(this);
+}
+
+ParticleSystemSim* ContextCpu::addParticleSystem(ParticleData* particleData, const ParticleSystemParameter& parameter,
+                                                 bool useGpuSupport)
+{
+	return (this->*addParticleSystemFn)(particleData, parameter, useGpuSupport);
+}
+
+ParticleData* ContextCpu::removeParticleSystem(ParticleSystemSim* particleSystem, bool acquireParticleData)
+{
+	return (this->*removeParticleSystemFn)(particleSystem, acquireParticleData);
+}
+
+ContextCpu::ContextCpu(PxTaskManager* taskManager, Cm::FlushPool& taskPool)
+: mParticleSystemPool("mParticleSystemPool", this, 16, 1024)
+, mParticleShapePool("mParticleShapePool", this, 256, 1024)
+, mBatcher(NULL)
+, mTaskManager(taskManager)
+, mTaskPool(taskPool)
+#if PX_SUPPORT_GPU_PHYSX
+, mGpuRigidBodyAccess(NULL)
+#endif
+{
+	mBatcher = PX_NEW(Batcher)(*this);
+	mBodyTransformVault = PX_NEW(BodyTransformVault);
+	mSceneGpu = NULL;
+}
+
+ContextCpu::~ContextCpu()
+{
+#if PX_SUPPORT_GPU_PHYSX
+	if(mSceneGpu)
+	{
+		mSceneGpu->release();
+	}
+
+	if(mGpuRigidBodyAccess)
+	{
+		PX_DELETE(mGpuRigidBodyAccess);
+	}
+#endif
+
+	PX_DELETE(mBatcher);
+	PX_DELETE(mBodyTransformVault);
+}
+
+ParticleSystemSim* ContextCpu::addParticleSystemImpl(ParticleData* particleData,
+                                                     const ParticleSystemParameter& parameter, bool useGpuSupport)
+{
+	PX_ASSERT(particleData);
+
+#if PX_SUPPORT_GPU_PHYSX
+	if(useGpuSupport)
+	{
+		PxSceneGpu* sceneGPU = createOrGetSceneGpu();
+		if(sceneGPU)
+		{
+			ParticleSystemStateDataDesc particles;
+			particleData->getParticlesV(particles, true, false);
+			ParticleSystemSim* sim = sceneGPU->addParticleSystem(particles, parameter);
+
+			if(sim)
+			{
+				particleData->release();
+				return sim;
+			}
+		}
+		return NULL;
+	}
+	else
+	{
+		ParticleSystemSimCpu* sim = mParticleSystemPool.get();
+		sim->init(*particleData, parameter);
+		return sim;
+	}
+#else
+	PX_UNUSED(useGpuSupport);
+	ParticleSystemSimCpu* sim = mParticleSystemPool.get();
+	sim->init(*particleData, parameter);
+	return sim;
+#endif
+}
+
+ParticleData* ContextCpu::removeParticleSystemImpl(ParticleSystemSim* particleSystem, bool acquireParticleData)
+{
+	ParticleData* particleData = NULL;
+
+#if PX_SUPPORT_GPU_PHYSX
+	if(particleSystem->isGpuV())
+	{
+		PX_ASSERT(getSceneGpuFast());
+		if(acquireParticleData)
+		{
+			ParticleSystemStateDataDesc particles;
+			particleSystem->getParticleStateV().getParticlesV(particles, true, false);
+			particleData = ParticleData::create(particles, particleSystem->getParticleStateV().getWorldBoundsV());
+		}
+		getSceneGpuFast()->removeParticleSystem(particleSystem);
+		return particleData;
+	}
+#endif
+
+	ParticleSystemSimCpu& sim = *static_cast<ParticleSystemSimCpu*>(particleSystem);
+
+	if(acquireParticleData)
+		particleData = sim.obtainParticleState();
+
+	sim.clear();
+	mParticleSystemPool.put(&sim);
+	return particleData;
+}
+
+ParticleShapeCpu* ContextCpu::createParticleShape(ParticleSystemSimCpu* particleSystem, const ParticleCell* packet)
+{
+	// for now just lock the mParticleShapePool for concurrent access from different tasks
+	Ps::Mutex::ScopedLock lock(mParticleShapePoolMutex);
+	ParticleShapeCpu* shape = mParticleShapePool.get();
+
+	if(shape)
+		shape->init(particleSystem, packet);
+
+	return shape;
+}
+
+void ContextCpu::releaseParticleShape(ParticleShapeCpu* shape)
+{
+	// for now just lock the mParticleShapePool for concurrent access from different tasks
+	Ps::Mutex::ScopedLock lock(mParticleShapePoolMutex);
+	mParticleShapePool.put(shape);
+}
+
+#if PX_SUPPORT_GPU_PHYSX
+
+PxSceneGpu* ContextCpu::createOrGetSceneGpu()
+{
+	if(mSceneGpu)
+		return mSceneGpu;
+
+	// get PxCudaContextManager
+
+	if(!mTaskManager || !mTaskManager->getGpuDispatcher() || !mTaskManager->getGpuDispatcher()->getCudaContextManager())
+	{
+		Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__,
+		                          "GPU operation failed. No PxCudaContextManager available.");
+		return NULL;
+	}
+	physx::PxCudaContextManager& contextManager = *mTaskManager->getGpuDispatcher()->getCudaContextManager();
+
+	// load PhysXGpu dll interface
+
+	PxPhysXGpu* physXGpu = PxvGetPhysXGpu(true);
+	if(!physXGpu)
+	{
+		getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__,
+		                      "GPU operation failed. PhysXGpu dll unavailable.");
+		return NULL;
+	}
+
+	// create PxsGpuRigidBodyAccess
+
+	PX_ASSERT(!mGpuRigidBodyAccess);
+	mGpuRigidBodyAccess = PX_NEW(RigidBodyAccessGpu)(*mBodyTransformVault);
+
+	// finally create PxSceneGpu
+	mSceneGpu = physXGpu->createScene(contextManager, *mGpuRigidBodyAccess);
+	if(!mSceneGpu)
+	{
+		PX_DELETE_AND_RESET(mGpuRigidBodyAccess);
+		Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__,
+		                          "GPU operation failed. PxSceneGpu creation unsuccessful.");
+	}
+
+	return mSceneGpu;
+}
+#endif // PX_SUPPORT_GPU_PHYSX
+
+PxBaseTask& ContextCpu::scheduleShapeGeneration(class ParticleSystemSim** particleSystems,
+                                                struct ParticleShapesUpdateInput* inputs, PxU32 batchSize,
+                                                PxBaseTask& continuation)
+{
+	return (mBatcher->*::scheduleShapeGenerationFn)(particleSystems, inputs, batchSize, continuation);
+}
+
+PxBaseTask& ContextCpu::scheduleDynamicsCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize,
+                                            PxBaseTask& continuation)
+{
+	return (mBatcher->*::scheduleDynamicsCpuFn)(particleSystems, batchSize, continuation);
+}
+
+PxBaseTask& ContextCpu::scheduleCollisionPrep(class ParticleSystemSim** particleSystems,
+                                              PxLightCpuTask** inputPrepTasks, PxU32 batchSize, PxBaseTask& continuation)
+{
+	return (mBatcher->*::scheduleCollisionPrepFn)(particleSystems, inputPrepTasks, batchSize, continuation);
+}
+
+PxBaseTask& ContextCpu::scheduleCollisionCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize,
+                                             PxBaseTask& continuation)
+{
+	return (mBatcher->*::scheduleCollisionCpuFn)(particleSystems, batchSize, continuation);
+}
+
+PxBaseTask& ContextCpu::schedulePipelineGpu(ParticleSystemSim** particleSystems, PxU32 batchSize, PxBaseTask& continuation)
+{
+	return (mBatcher->*::schedulePipelineGpuFn)(particleSystems, batchSize, continuation);
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.h b/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.h
new file mode 100644
index 00000000..e96e5a9b
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.h
@@ -0,0 +1,127 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_CONTEXT_CPU_H
+#define PT_CONTEXT_CPU_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "CmPool.h"
+#include "PtContext.h"
+
+namespace physx
+{
+
+class PxBaseTask;
+class PxLightCpuTask;
+class PxTaskManager;
+
+namespace Pt
+{
+
+class Batcher;
+class BodyTransformVault;
+class ParticleShapeCpu;
+class ParticleSystemSimCpu;
+struct ParticleCell;
+
+/**
+Per scene manager class for particle systems.
+*/
+class ContextCpu : public Context, Ps::UserAllocated
+{
+	PX_NOCOPY(ContextCpu)
+  public:
+	/**
+	Register particle functionality.
+	Not calling this should allow the code to be stripped at link time.
+	*/
+	static void registerParticles();
+
+	// Pt::Context implementation
+	virtual void destroy();
+	virtual ParticleSystemSim* addParticleSystem(class ParticleData* particleData,
+	                                             const ParticleSystemParameter& parameter, bool useGpuSupport);
+	virtual ParticleData* removeParticleSystem(ParticleSystemSim* system, bool acquireParticleData);
+	virtual PxBaseTask& scheduleShapeGeneration(class ParticleSystemSim** particleSystems,
+	                                            struct ParticleShapesUpdateInput* inputs, PxU32 batchSize,
+	                                            PxBaseTask& continuation);
+	virtual PxBaseTask& scheduleDynamicsCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize,
+	                                        PxBaseTask& continuation);
+	virtual PxBaseTask& scheduleCollisionPrep(class ParticleSystemSim** particleSystems, PxLightCpuTask** inputPrepTasks,
+	                                          PxU32 batchSize, PxBaseTask& continuation);
+	virtual PxBaseTask& scheduleCollisionCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize,
+	                                         PxBaseTask& continuation);
+	virtual PxBaseTask& schedulePipelineGpu(ParticleSystemSim** particleSystems, PxU32 batchSize,
+	                                        PxBaseTask& continuation);
+#if PX_SUPPORT_GPU_PHYSX
+	virtual class PxSceneGpu* createOrGetSceneGpu();
+#endif
+	//~Pt::Context implementation
+
+	ParticleShapeCpu* createParticleShape(ParticleSystemSimCpu* particleSystem, const ParticleCell* packet);
+	void releaseParticleShape(ParticleShapeCpu* shape);
+
+	Cm::FlushPool& getTaskPool()
+	{
+		return mTaskPool;
+	}
+
+  private:
+	ContextCpu(physx::PxTaskManager* taskManager, Cm::FlushPool& taskPool);
+
+	virtual ~ContextCpu();
+
+	ParticleSystemSim* addParticleSystemImpl(ParticleData* particleData, const ParticleSystemParameter& parameter,
+	                                         bool useGpuSupport);
+	ParticleData* removeParticleSystemImpl(ParticleSystemSim* system, bool acquireParticleData);
+
+	static Context* createContextImpl(physx::PxTaskManager* taskManager, Cm::FlushPool& taskPool);
+
+	void destroyContextImpl();
+
+	Cm::PoolList<ParticleSystemSimCpu, ContextCpu> mParticleSystemPool;
+	Cm::PoolList<ParticleShapeCpu, ContextCpu> mParticleShapePool;
+	Ps::Mutex mParticleShapePoolMutex;
+	Batcher* mBatcher;
+
+	physx::PxTaskManager* mTaskManager;
+	Cm::FlushPool& mTaskPool;
+
+#if PX_SUPPORT_GPU_PHYSX
+	class RigidBodyAccessGpu* mGpuRigidBodyAccess;
+#endif
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_CONTEXT_CPU_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicHelper.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicHelper.h
new file mode 100644
index 00000000..5578a6c6
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicHelper.h
@@ -0,0 +1,320 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+#ifndef PT_DYNAMIC_HELPER_H
+#define PT_DYNAMIC_HELPER_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtDynamicsKernels.h"
+#include "PtSpatialHash.h"
+#include "PtDynamicsTempBuffers.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void updateParticlesPrePass(const SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles,
+                                            PxU32 numParticles, const DynamicsParameters& params)
+{
+	if(updateType == SphUpdateType::DENSITY)
+	{
+		for(PxU32 i = 0; i < numParticles; ++i)
+		{
+			Pt::Particle& particle = particles[i];
+
+			// Initialize particle densities with self density value
+			particle.density = params.selfDensity;
+			forceBuf[i] = PxVec3(0);
+		}
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void updateParticlesPostPass(const SphUpdateType::Enum updateType, PxVec3* forceBuf,
+                                             Particle* particles, PxU32 numParticles, const DynamicsParameters& params)
+{
+	if(updateType == SphUpdateType::FORCE)
+	{
+		for(PxU32 i = 0; i < numParticles; ++i)
+		{
+			Particle& particle = particles[i];
+
+			forceBuf[i] *= params.scaleToWorld * (1.0f / particle.density);
+		}
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+/*!
+Given a cell hash table, find neighboring cells and compute particle interactions.
+*/
+void updateCellsSubpacket(SphUpdateType::Enum updateType, PxVec3* __restrict forceBuf, Particle* __restrict particles,
+                          const ParticleCell* __restrict cells, const PxU32* __restrict particleIndices,
+                          const PxU32 numCellHashBuckets, const DynamicsParameters& params,
+                          DynamicsTempBuffers& tempBuffers)
+{
+	PX_ASSERT(particles);
+	PX_ASSERT(cells);
+	PX_ASSERT(particleIndices);
+
+	const ParticleCell* neighborCells[13];
+
+	for(PxU32 c = 0; c < numCellHashBuckets; c++)
+	{
+		const ParticleCell& cell = cells[c];
+
+		if(cell.numParticles == PX_INVALID_U32)
+			continue;
+
+		GridCellVector coords(cell.coords);
+
+		//
+		// To process each pair of neighboring cells only once, a special neighborhood layout can be
+		// used. Thus, we do not need to consider all 26 neighbors of a cell but only half of them.
+		// Going through the list of cells, a cell X might not be aware of a neighboring cell Y with
+		// this layout, however, since cell Y in turn is aware of cell X the pair will still be processed
+		// at the end.
+		//
+
+		// Complete back plane
+		PxU32 cellIdx;
+
+		PxI16 neighbor[13][3] = { { -1, -1, -1 },
+			                      { 0, -1, -1 },
+			                      { 1, -1, -1 },
+			                      { -1, 0, -1 },
+			                      { 0, 0, -1 },
+			                      { 1, 0, -1 },
+			                      { -1, 1, -1 },
+			                      { 0, 1, -1 },
+			                      { 1, 1, -1 },
+			                      { 1, 0, 0 },
+			                      { -1, 1, 0 },
+			                      { 0, 1, 0 },
+			                      { 1, 1, 0 } };
+
+		for(PxU32 n = 0; n < 13; n++)
+		{
+			neighborCells[n] = SpatialHash::findConstCell(
+			    cellIdx, GridCellVector(coords.x + neighbor[n][0], coords.y + neighbor[n][1], coords.z + neighbor[n][2]),
+			    cells, numCellHashBuckets);
+		}
+
+		// Compute interaction between particles inside the current cell
+		// These calls still produce a lot of LHS. Going from two way to one way updates didn't help. TODO, more
+		// investigation.
+		for(PxU32 p = 1; p < cell.numParticles; p++)
+		{
+			updateParticleGroupPair(forceBuf, forceBuf, particles, particles,
+			                        particleIndices + cell.firstParticle + p - 1, 1,
+			                        particleIndices + cell.firstParticle + p, cell.numParticles - p, true,
+			                        updateType == SphUpdateType::DENSITY, params, tempBuffers.simdPositionsSubpacket,
+			                        tempBuffers.indexStream);
+		}
+
+		// Compute interaction between particles of current cell and neighboring cells
+		PxU32 srcIndexCount = 0;
+
+		for(PxU32 n = 0; n < 13; n++)
+		{
+			if(!neighborCells[n])
+				continue;
+
+			const ParticleCell* nCell = neighborCells[n];
+
+			for(PxU32 i = nCell->firstParticle, end = nCell->firstParticle + nCell->numParticles; i < end; i++)
+				tempBuffers.mergedIndices[srcIndexCount++] = particleIndices[i];
+		}
+
+		if(srcIndexCount > 0)
+		{
+			updateParticleGroupPair(forceBuf, forceBuf, particles, particles, particleIndices + cell.firstParticle,
+			                        cell.numParticles, tempBuffers.mergedIndices, srcIndexCount, true,
+			                        updateType == SphUpdateType::DENSITY, params, tempBuffers.simdPositionsSubpacket,
+			                        tempBuffers.indexStream);
+		}
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+/*!
+Given two subpackets, i.e., their cell hash tables and particle arrays, find for each cell of the first subpacket
+the neighboring cells within the second subpacket and compute particle interactions for these neighboring cells.
+*/
+void updateCellsSubpacketPair(SphUpdateType::Enum updateType, PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB,
+                              Particle* __restrict particlesSpA, Particle* __restrict particlesSpB,
+                              const ParticleCell* __restrict cellsSpA, const ParticleCell* __restrict cellsSpB,
+                              const PxU32* __restrict particleIndicesSpA, const PxU32* __restrict particleIndicesSpB,
+                              const PxU32 numCellHashBucketsA, const PxU32 numCellHashBucketsB, bool twoWayUpdate,
+                              const DynamicsParameters& params, DynamicsTempBuffers& tempBuffers, bool swapAB)
+{
+	PX_ASSERT(particlesSpA);
+	PX_ASSERT(particlesSpB);
+	PX_ASSERT(cellsSpA);
+	PX_ASSERT(cellsSpB);
+	PX_ASSERT(particleIndicesSpA);
+	PX_ASSERT(particleIndicesSpB);
+
+	const ParticleCell* __restrict srcCell;
+	const ParticleCell* __restrict dstCell;
+	const PxU32* __restrict dstIndices;
+	PxU32 srcBuckets, dstBuckets;
+
+	if(swapAB)
+	{
+		srcCell = cellsSpB;
+		srcBuckets = numCellHashBucketsB;
+
+		dstCell = cellsSpA;
+		dstIndices = particleIndicesSpA;
+		dstBuckets = numCellHashBucketsA;
+	}
+	else
+	{
+		srcCell = cellsSpA;
+		srcBuckets = numCellHashBucketsA;
+
+		dstCell = cellsSpB;
+		dstIndices = particleIndicesSpB;
+		dstBuckets = numCellHashBucketsB;
+	}
+
+	const ParticleCell* neighborCells[27];
+
+	// For the cells of the subpacket A find neighboring cells in the subpacket B.
+	const ParticleCell* pcell_end = srcCell + srcBuckets;
+	for(const ParticleCell* pcell = srcCell; pcell < pcell_end; pcell++)
+	{
+		if(pcell->numParticles != PX_INVALID_U32)
+		{
+			GridCellVector coords(pcell->coords);
+
+			//
+			// Check the 26 neighboring cells plus the cell with the same coordinates but inside the other subpacket
+			//
+
+			// Back plane
+			PxU32 cellIdx;
+			PxI16 neighbor[27][3] = { { -1, -1, -1 },
+				                      { 0, -1, -1 },
+				                      { 1, -1, -1 },
+				                      { -1, 0, -1 },
+				                      { 0, 0, -1 },
+				                      { 1, 0, -1 },
+				                      { -1, 1, -1 },
+				                      { 0, 1, -1 },
+				                      { 1, 1, -1 },
+				                      { -1, -1, 0 },
+				                      { 0, -1, 0 },
+				                      { 1, -1, 0 },
+				                      { -1, 0, 0 },
+				                      { 0, 0, 0 },
+				                      { 1, 0, 0 },
+				                      { -1, 1, 0 },
+				                      { 0, 1, 0 },
+				                      { 1, 1, 0 },
+				                      { -1, -1, 1 },
+				                      { 0, -1, 1 },
+				                      { 1, -1, 1 },
+				                      { -1, 0, 1 },
+				                      { 0, 0, 1 },
+				                      { 1, 0, 1 },
+				                      { -1, 1, 1 },
+				                      { 0, 1, 1 },
+				                      { 1, 1, 1 } };
+
+			for(PxU32 n = 0; n < 27; n++)
+			{
+				neighborCells[n] = SpatialHash::findConstCell(
+				    cellIdx,
+				    GridCellVector(coords.x + neighbor[n][0], coords.y + neighbor[n][1], coords.z + neighbor[n][2]),
+				    dstCell, dstBuckets);
+			}
+
+			// Compute interaction between particles of current cell and neighboring cells
+			PxU32 indexCount = 0;
+
+			for(PxU32 n = 0; n < 27; n++)
+			{
+				if(!neighborCells[n])
+					continue;
+
+				const ParticleCell* nCell = neighborCells[n];
+
+				for(PxU32 i = nCell->firstParticle, end = nCell->firstParticle + nCell->numParticles; i < end; i++)
+					tempBuffers.mergedIndices[indexCount++] = dstIndices[i];
+			}
+
+			if(indexCount > 0)
+			{
+
+				if(swapAB)
+				{
+					updateParticleGroupPair(forceBufA, forceBufB, particlesSpA, particlesSpB, tempBuffers.mergedIndices,
+					                        indexCount, particleIndicesSpB + pcell->firstParticle, pcell->numParticles,
+					                        twoWayUpdate, updateType == SphUpdateType::DENSITY, params,
+					                        tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream);
+				}
+				else
+				{
+					updateParticleGroupPair(forceBufA, forceBufB, particlesSpA, particlesSpB,
+					                        particleIndicesSpA + pcell->firstParticle, pcell->numParticles,
+					                        tempBuffers.mergedIndices, indexCount, twoWayUpdate,
+					                        updateType == SphUpdateType::DENSITY, params,
+					                        tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream);
+				}
+			}
+		}
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void normalizeParticleDensity(Particle& particle, const PxF32 selfDensity,
+                                              const PxF32 densityNormalizationFactor)
+{
+	// normalize density
+	particle.density = (particle.density - selfDensity) * densityNormalizationFactor;
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_DYNAMIC_HELPER_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.cpp
new file mode 100644
index 00000000..2d1fd82b
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.cpp
@@ -0,0 +1,828 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtDynamics.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PsBitUtils.h"
+#include "PsIntrinsics.h"
+#include "PsAllocator.h"
+#include "CmFlushPool.h"
+
+#include "PtDynamicHelper.h"
+#include "PtParticleSystemSimCpu.h"
+#include "PtContext.h"
+
+#define MERGE_HALO_REGIONS 0
+
+using namespace physx;
+using namespace Pt;
+
+PX_FORCE_INLINE void Dynamics::updateParticlesBruteForceHalo(SphUpdateType::Enum updateType, PxVec3* forceBuf,
+                                                             Particle* particles, const PacketSections& packetSections,
+                                                             const PacketHaloRegions& haloRegions,
+                                                             DynamicsTempBuffers& tempBuffers)
+{
+	for(PxU32 i = 0; i < 26; i++)
+	{
+		if(packetSections.numParticles[i] == 0)
+			continue;
+
+		Particle* particlesA = &particles[packetSections.firstParticle[i]];
+		PxVec3* forceBufA = &forceBuf[packetSections.firstParticle[i]];
+
+		//
+		// Get neighboring halo regions for the packet section
+		//
+		PxU32 numHaloRegions = sSectionToHaloTable[i].numHaloRegions;
+		PxU32* haloRegionIndices = sSectionToHaloTable[i].haloRegionIndices;
+		PxU32 mergedIndexCount = 0;
+		//
+		// Iterate over neighboring halo regions and update particles
+		//
+		for(PxU32 j = 0; j < numHaloRegions; j++)
+		{
+			PxU32 idx = haloRegionIndices[j];
+
+			if(haloRegions.numParticles[idx] == 0)
+				continue;
+
+			if(mergedIndexCount + haloRegions.numParticles[idx] > PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)
+			{
+				updateParticleGroupPair(forceBufA, forceBuf, particlesA, particles, tempBuffers.orderedIndicesSubpacket,
+				                        packetSections.numParticles[i], tempBuffers.mergedIndices, mergedIndexCount,
+				                        false, updateType == SphUpdateType::DENSITY, mParams,
+				                        tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream);
+				mergedIndexCount = 0;
+			}
+			PxU32 hpIndex = haloRegions.firstParticle[idx];
+			for(PxU32 k = 0; k < haloRegions.numParticles[idx]; k++)
+				tempBuffers.mergedIndices[mergedIndexCount++] = hpIndex++;
+		}
+
+		if(mergedIndexCount > 0)
+		{
+			updateParticleGroupPair(forceBufA, forceBuf, particlesA, particles, tempBuffers.orderedIndicesSubpacket,
+			                        packetSections.numParticles[i], tempBuffers.mergedIndices, mergedIndexCount, false,
+			                        updateType == SphUpdateType::DENSITY, mParams, tempBuffers.simdPositionsSubpacket,
+			                        tempBuffers.indexStream);
+		}
+	}
+}
+
+// The following table defines for each packet section (except the one in the centre) the number
+// of neighboring halo region as well as the indices of these neighboring halo region
+Dynamics::SectionToHaloTable Dynamics::sSectionToHaloTable[26] = {
+	{ 19, { 0, 2, 6, 8, 18, 20, 24, 26, 36, 38, 42, 44, 54, 56, 66, 68, 78, 80, 90 } },     // 0
+	{ 19, { 1, 2, 7, 8, 19, 20, 25, 26, 45, 47, 51, 53, 55, 56, 72, 74, 84, 86, 91 } },     // 1
+	{ 15, { 0, 1, 2, 6, 7, 8, 18, 19, 20, 24, 25, 26, 54, 55, 56 } },                       // 2
+	{ 19, { 3, 5, 6, 8, 27, 29, 33, 35, 37, 38, 43, 44, 60, 62, 67, 68, 81, 83, 92 } },     // 3
+	{ 19, { 4, 5, 7, 8, 28, 29, 34, 35, 46, 47, 52, 53, 61, 62, 73, 74, 87, 89, 93 } },     // 4
+	{ 15, { 3, 4, 5, 6, 7, 8, 27, 28, 29, 33, 34, 35, 60, 61, 62 } },                       // 5
+	{ 15, { 0, 2, 3, 5, 6, 8, 36, 37, 38, 42, 43, 44, 66, 67, 68 } },                       // 6
+	{ 15, { 1, 2, 4, 5, 7, 8, 45, 46, 47, 51, 52, 53, 72, 73, 74 } },                       // 7
+	{ 9, { 0, 1, 2, 3, 4, 5, 6, 7, 8 } },                                                   // 8
+	{ 19, { 9, 11, 15, 17, 21, 23, 24, 26, 39, 41, 42, 44, 57, 59, 69, 71, 79, 80, 94 } },  // 9
+	{ 19, { 10, 11, 16, 17, 22, 23, 25, 26, 48, 50, 51, 53, 58, 59, 75, 77, 85, 86, 95 } }, // 10
+	{ 15, { 9, 10, 11, 15, 16, 17, 21, 22, 23, 24, 25, 26, 57, 58, 59 } },                  // 11
+	{ 19, { 12, 14, 15, 17, 30, 32, 33, 35, 40, 41, 43, 44, 63, 65, 70, 71, 82, 83, 96 } }, // 12
+	{ 19, { 13, 14, 16, 17, 31, 32, 34, 35, 49, 50, 52, 53, 64, 65, 76, 77, 88, 89, 97 } }, // 13
+	{ 15, { 12, 13, 14, 15, 16, 17, 30, 31, 32, 33, 34, 35, 63, 64, 65 } },                 // 14
+	{ 15, { 9, 11, 12, 14, 15, 17, 39, 40, 41, 42, 43, 44, 69, 70, 71 } },                  // 15
+	{ 15, { 10, 11, 13, 14, 16, 17, 48, 49, 50, 51, 52, 53, 75, 76, 77 } },                 // 16
+	{ 9, { 9, 10, 11, 12, 13, 14, 15, 16, 17 } },                                           // 17
+	{ 15, { 18, 20, 21, 23, 24, 26, 36, 38, 39, 41, 42, 44, 78, 79, 80 } },                 // 18
+	{ 15, { 19, 20, 22, 23, 25, 26, 45, 47, 48, 50, 51, 53, 84, 85, 86 } },                 // 19
+	{ 9, { 18, 19, 20, 21, 22, 23, 24, 25, 26 } },                                          // 20
+	{ 15, { 27, 29, 30, 32, 33, 35, 37, 38, 40, 41, 43, 44, 81, 82, 83 } },                 // 21
+	{ 15, { 28, 29, 31, 32, 34, 35, 46, 47, 49, 50, 52, 53, 87, 88, 89 } },                 // 22
+	{ 9, { 27, 28, 29, 30, 31, 32, 33, 34, 35 } },                                          // 23
+	{ 9, { 36, 37, 38, 39, 40, 41, 42, 43, 44 } },                                          // 24
+	{ 9, { 45, 46, 47, 48, 49, 50, 51, 52, 53 } },                                          // 25
+};
+
+Dynamics::OrderedIndexTable Dynamics::sOrderedIndexTable;
+
+Dynamics::OrderedIndexTable::OrderedIndexTable()
+{
+	for(PxU32 i = 0; i < PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY; ++i)
+		indices[i] = i;
+}
+
+namespace physx
+{
+
+namespace Pt
+{
+
+class DynamicsSphTask : public Cm::Task
+{
+  public:
+	DynamicsSphTask(Dynamics& context, PxU32 taskDataIndex) : mDynamicsContext(context), mTaskDataIndex(taskDataIndex)
+	{
+	}
+
+	virtual void runInternal()
+	{
+		mDynamicsContext.processPacketRange(mTaskDataIndex);
+	}
+
+	virtual const char* getName() const
+	{
+		return "Pt::Dynamics.sph";
+	}
+
+  private:
+	DynamicsSphTask& operator=(const DynamicsSphTask&);
+	Dynamics& mDynamicsContext;
+	PxU32 mTaskDataIndex;
+};
+
+} // namespace Pt
+} // namespace physx
+
+Dynamics::Dynamics(ParticleSystemSimCpu& particleSystem)
+: mParticleSystem(particleSystem)
+, mTempReorderedParticles(NULL)
+, mTempParticleForceBuf(NULL)
+, mMergeDensityTask(this, "Pt::Dynamics.mergeDensity")
+, mMergeForceTask(this, "Pt::Dynamics.mergeForce")
+, mNumTempBuffers(0)
+{
+}
+
+Dynamics::~Dynamics()
+{
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::clear()
+{
+	if(mTempReorderedParticles)
+	{
+		mParticleSystem.mAlign16.deallocate(mTempReorderedParticles);
+		mTempReorderedParticles = NULL;
+	}
+
+	adjustTempBuffers(0);
+}
+
+void Dynamics::adjustTempBuffers(PxU32 count)
+{
+	PX_ASSERT(count <= PT_MAX_PARALLEL_TASKS_SPH);
+	PX_ASSERT(mNumTempBuffers <= PT_MAX_PARALLEL_TASKS_SPH);
+	Ps::AlignedAllocator<16, Ps::ReflectionAllocator<char> > align16;
+
+	// shrink
+	for(PxU32 i = count; i < mNumTempBuffers; ++i)
+	{
+		DynamicsTempBuffers& tempBuffers = mTempBuffers[i];
+
+		if(tempBuffers.indexStream)
+			PX_FREE_AND_RESET(tempBuffers.indexStream);
+
+		if(tempBuffers.hashKeys)
+			PX_FREE_AND_RESET(tempBuffers.hashKeys);
+
+		if(tempBuffers.mergedIndices)
+			PX_FREE_AND_RESET(tempBuffers.mergedIndices);
+
+		if(tempBuffers.indicesSubpacketA)
+			PX_FREE_AND_RESET(tempBuffers.indicesSubpacketA);
+
+		if(tempBuffers.indicesSubpacketB)
+			PX_FREE_AND_RESET(tempBuffers.indicesSubpacketB);
+
+		if(tempBuffers.cellHashTableSubpacketB)
+			PX_FREE_AND_RESET(tempBuffers.cellHashTableSubpacketB);
+
+		if(tempBuffers.cellHashTableSubpacketA)
+			PX_FREE_AND_RESET(tempBuffers.cellHashTableSubpacketA);
+
+		if(tempBuffers.simdPositionsSubpacket)
+		{
+			align16.deallocate(tempBuffers.simdPositionsSubpacket);
+			tempBuffers.simdPositionsSubpacket = NULL;
+		}
+
+		if(tempBuffers.mergedHaloRegions)
+		{
+			align16.deallocate(tempBuffers.mergedHaloRegions);
+			tempBuffers.mergedHaloRegions = NULL;
+		}
+	}
+
+	// growing
+	for(PxU32 i = mNumTempBuffers; i < count; ++i)
+	{
+		DynamicsTempBuffers& tempBuffers = mTempBuffers[i];
+
+		// Make sure the number of hash buckets is a power of 2 (requirement for the used hash function)
+		tempBuffers.cellHashMaxSize = Ps::nextPowerOfTwo((PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY + 1));
+
+		// Local hash tables for particle cells (for two subpackets A and B).
+		tempBuffers.cellHashTableSubpacketA = reinterpret_cast<ParticleCell*>(
+		    PX_ALLOC(tempBuffers.cellHashMaxSize * sizeof(ParticleCell), "ParticleCell"));
+		tempBuffers.cellHashTableSubpacketB = reinterpret_cast<ParticleCell*>(
+		    PX_ALLOC(tempBuffers.cellHashMaxSize * sizeof(ParticleCell), "ParticleCell"));
+
+		// Particle index lists for local hash of particle cells (for two subpackets A and B).
+		tempBuffers.indicesSubpacketA = reinterpret_cast<PxU32*>(
+		    PX_ALLOC(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(PxU32), "Subpacket indices"));
+		tempBuffers.indicesSubpacketB = reinterpret_cast<PxU32*>(
+		    PX_ALLOC(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(PxU32), "Subpacket indices"));
+		tempBuffers.mergedIndices = reinterpret_cast<PxU32*>(
+		    PX_ALLOC(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(PxU32), "Subpacket merged indices"));
+		tempBuffers.mergedHaloRegions = reinterpret_cast<Particle*>(
+		    align16.allocate(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(Particle), __FILE__, __LINE__));
+
+		tempBuffers.hashKeys = reinterpret_cast<PxU16*>(
+		    PX_ALLOC(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(PxU16), "Subpacket hashKeys"));
+
+		// SIMD buffer for storing intermediate particle positions of up to a subpacket size.
+		// Ceil up to multiple of four + 4 for save unrolling.
+		// For 4 particles we need three Vec4V.
+		PxU32 paddedSubPacketMax = ((PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY + 3) & ~0x3) + 4;
+		tempBuffers.simdPositionsSubpacket =
+		    reinterpret_cast<PxU8*>(align16.allocate(3 * (paddedSubPacketMax / 4) * sizeof(Vec4V), __FILE__, __LINE__));
+
+		tempBuffers.indexStream =
+		    reinterpret_cast<PxU32*>(PX_ALLOC(MAX_INDEX_STREAM_SIZE * sizeof(PxU32), "indexStream"));
+		tempBuffers.orderedIndicesSubpacket = sOrderedIndexTable.indices;
+	}
+
+	mNumTempBuffers = count;
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::updateSph(physx::PxBaseTask& continuation)
+{
+	Particle* particles = mParticleSystem.mParticleState->getParticleBuffer();
+	PxU32 numParticles = mParticleSystem.mNumPacketParticlesIndices;
+	const PxU32* particleIndices = mParticleSystem.mPacketParticlesIndices;
+	const ParticleCell* packets = mParticleSystem.mSpatialHash->getPackets();
+	const PacketSections* packetSections = mParticleSystem.mSpatialHash->getPacketSections();
+	PX_ASSERT(packets);
+	PX_ASSERT(packetSections);
+	PX_ASSERT(numParticles > 0);
+	PX_UNUSED(packetSections);
+
+	{
+		// sschirm: for now we reorder particles for sph exclusively, and scatter again after sph.
+		if(!mTempReorderedParticles)
+		{
+			PxU32 maxParticles = mParticleSystem.mParticleState->getMaxParticles();
+			mTempReorderedParticles = reinterpret_cast<Particle*>(
+			    mParticleSystem.mAlign16.allocate(maxParticles * sizeof(Particle), __FILE__, __LINE__));
+		}
+
+		if(!mTempParticleForceBuf)
+		{
+			PxU32 maxParticles = mParticleSystem.mParticleState->getMaxParticles();
+			// sschirm: Add extra float, since we are accessing this buffer later with: Vec4V_From_F32Array.
+			// The last 4 element would contain unallocated memory otherwise.
+			// Also initializing buffer that may only be used partially and non-contiguously with 0 to avoid
+			// simd operations to use bad values.
+			PxU32 byteSize = maxParticles * sizeof(PxVec3) + sizeof(PxF32);
+			mTempParticleForceBuf =
+			    reinterpret_cast<PxVec3*>(mParticleSystem.mAlign16.allocate(byteSize, __FILE__, __LINE__));
+			memset(mTempParticleForceBuf, 0, byteSize);
+		}
+
+		for(PxU32 i = 0; i < numParticles; ++i)
+		{
+			PxU32 particleIndex = particleIndices[i];
+			mTempReorderedParticles[i] = particles[particleIndex];
+		}
+
+		// would be nice to get available thread count to decide on task decomposition
+		// mParticleSystem.getContext().getTaskManager().getCpuDispatcher();
+
+		// use number of particles for task decomposition
+		PxU32 targetParticleCountPerTask =
+		    PxMax(PxU32(numParticles / PT_MAX_PARALLEL_TASKS_SPH), PxU32(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY));
+		PxU16 packetIndex = 0;
+		PxU16 lastPacketIndex = 0;
+		PxU32 numTasks = 0;
+		for(PxU32 i = 0; i < PT_MAX_PARALLEL_TASKS_SPH; ++i)
+		{
+			// if this is the last interation, we need to gather all remaining packets
+			if(i == PT_MAX_PARALLEL_TASKS_SPH - 1)
+				targetParticleCountPerTask = 0xffffffff;
+
+			lastPacketIndex = packetIndex;
+			PxU32 currentParticleCount = 0;
+			while(currentParticleCount < targetParticleCountPerTask && packetIndex < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE)
+			{
+				const ParticleCell& packet = packets[packetIndex];
+				currentParticleCount += (packet.numParticles != PX_INVALID_U32) ? packet.numParticles : 0;
+				packetIndex++;
+			}
+
+			if(currentParticleCount > 0)
+			{
+				PX_ASSERT(lastPacketIndex != packetIndex);
+				mTaskData[i].beginPacketIndex = lastPacketIndex;
+				mTaskData[i].endPacketIndex = packetIndex;
+				numTasks++;
+			}
+			else
+			{
+				mTaskData[i].beginPacketIndex = PX_INVALID_U16;
+				mTaskData[i].endPacketIndex = PX_INVALID_U16;
+			}
+		}
+		PX_ASSERT(packetIndex == PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE);
+
+		mNumTasks = numTasks;
+		adjustTempBuffers(PxMax(numTasks, mNumTempBuffers));
+
+		mMergeForceTask.setContinuation(&continuation);
+		mMergeDensityTask.setContinuation(&mMergeForceTask);
+
+		schedulePackets(SphUpdateType::DENSITY, mMergeDensityTask);
+		mMergeDensityTask.removeReference();
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::mergeDensity(physx::PxBaseTask* /*continuation*/)
+{
+	schedulePackets(SphUpdateType::FORCE, mMergeForceTask);
+	mMergeForceTask.removeReference();
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::mergeForce(physx::PxBaseTask* /*continuation*/)
+{
+	PxU32 numParticles = mParticleSystem.mNumPacketParticlesIndices;
+	Particle* particles = mParticleSystem.mParticleState->getParticleBuffer();
+	PxVec3* forces = mParticleSystem.mTransientBuffer;
+	const PxU32* particleIndices = mParticleSystem.mPacketParticlesIndices;
+
+	// reorder and normalize density.
+	for(PxU32 i = 0; i < numParticles; ++i)
+	{
+		PxU32 particleIndex = particleIndices[i];
+		Particle& particle = mTempReorderedParticles[i];
+		normalizeParticleDensity(particle, mParams.selfDensity, mParams.densityNormalizationFactor);
+		particles[particleIndex] = particle;
+		forces[particleIndex] = mTempParticleForceBuf[i];
+	}
+
+	mParticleSystem.mAlign16.deallocate(mTempParticleForceBuf);
+	mTempParticleForceBuf = NULL;
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::schedulePackets(SphUpdateType::Enum updateType, physx::PxBaseTask& continuation)
+{
+	mCurrentUpdateType = updateType;
+	for(PxU32 i = 0; i < mNumTasks; ++i)
+	{
+		PX_ASSERT(mTaskData[i].beginPacketIndex != PX_INVALID_U16 && mTaskData[i].endPacketIndex != PX_INVALID_U16);
+		void* ptr = mParticleSystem.getContext().getTaskPool().allocate(sizeof(DynamicsSphTask));
+		DynamicsSphTask* task = PX_PLACEMENT_NEW(ptr, DynamicsSphTask)(*this, i);
+		task->setContinuation(&continuation);
+		task->removeReference();
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::processPacketRange(PxU32 taskDataIndex)
+{
+	const ParticleCell* packets = mParticleSystem.mSpatialHash->getPackets();
+	const PacketSections* packetSections = mParticleSystem.mSpatialHash->getPacketSections();
+	Particle* particles = mTempReorderedParticles;
+	PxVec3* forceBuf = mTempParticleForceBuf;
+
+	TaskData& taskData = mTaskData[taskDataIndex];
+
+	for(PxU16 p = taskData.beginPacketIndex; p < taskData.endPacketIndex; ++p)
+	{
+		const ParticleCell& packet = packets[p];
+		if(packet.numParticles == PX_INVALID_U32)
+			continue;
+
+		// Get halo regions with neighboring particles
+		PacketHaloRegions haloRegions;
+		SpatialHash::getHaloRegions(haloRegions, packet.coords, packets, packetSections,
+		                            PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE);
+
+		updatePacket(mCurrentUpdateType, forceBuf, particles, packet, packetSections[p], haloRegions,
+		             mTempBuffers[taskDataIndex]);
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::updatePacket(SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles,
+                            const ParticleCell& packet, const PacketSections& packetSections,
+                            const PacketHaloRegions& haloRegions, DynamicsTempBuffers& tempBuffers)
+{
+	PX_COMPILE_TIME_ASSERT(PT_BRUTE_FORCE_PARTICLE_THRESHOLD <= PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY);
+
+	updateParticlesPrePass(updateType, forceBuf + packet.firstParticle, particles + packet.firstParticle,
+	                       packet.numParticles, mParams);
+	bool bruteForceApproach = ((packet.numParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD) &&
+	                           (haloRegions.maxNumParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD));
+
+	if(bruteForceApproach)
+	{
+		// There are not enough particles in the packet and its neighbors to make it worth building the local cell hash.
+		// So, we do a brute force approach testing each particle against each particle.
+		// sschirm: TODO check whether one way is faster (fewer function calls... more math)
+		Particle* packetParticles = particles + packet.firstParticle;
+		PxVec3* packetForceBuf = forceBuf + packet.firstParticle;
+		for(PxU32 p = 1; p < packet.numParticles; p++)
+		{
+			updateParticleGroupPair(packetForceBuf, packetForceBuf, packetParticles, packetParticles,
+			                        tempBuffers.orderedIndicesSubpacket + p - 1, 1,
+			                        tempBuffers.orderedIndicesSubpacket + p, packet.numParticles - p, true,
+			                        updateType == SphUpdateType::DENSITY, mParams, tempBuffers.simdPositionsSubpacket,
+			                        tempBuffers.indexStream);
+		}
+
+		// Compute particle interactions between particles of the current packet and particles of neighboring packets.
+		updateParticlesBruteForceHalo(updateType, forceBuf, particles, packetSections, haloRegions, tempBuffers);
+	}
+	else
+	{
+		updatePacketLocalHash(updateType, forceBuf, particles, packet, packetSections, haloRegions, tempBuffers);
+	}
+
+	updateParticlesPostPass(updateType, forceBuf + packet.firstParticle, particles + packet.firstParticle,
+	                        packet.numParticles, mParams);
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::updatePacketLocalHash(SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles,
+                                     const ParticleCell& packet, const PacketSections& packetSections,
+                                     const PacketHaloRegions& haloRegions, DynamicsTempBuffers& tempBuffers)
+{
+	// Particle index lists for local hash of particle cells (for two subpackets A and B).
+	PxU32* particleIndicesSpA = tempBuffers.indicesSubpacketA;
+	PxU32* particleIndicesSpB = tempBuffers.indicesSubpacketB;
+
+	// Local hash tables for particle cells (for two subpackets A and B).
+	ParticleCell* particleCellsSpA = tempBuffers.cellHashTableSubpacketA;
+	ParticleCell* particleCellsSpB = tempBuffers.cellHashTableSubpacketB;
+
+	PxVec3 packetCorner =
+	    PxVec3(PxReal(packet.coords.x), PxReal(packet.coords.y), PxReal(packet.coords.z)) * mParams.packetSize;
+
+	PxU32 particlesLeftA0 = packet.numParticles;
+	Particle* particlesSpA0 = particles + packet.firstParticle;
+	PxVec3* forceBufA0 = forceBuf + packet.firstParticle;
+
+	while(particlesLeftA0)
+	{
+		PxU32 numParticlesSpA = PxMin(particlesLeftA0, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY));
+
+		// Make sure the number of hash buckets is a power of 2 (requirement for the used hash function)
+		const PxU32 numCellHashBucketsSpA = Ps::nextPowerOfTwo(numParticlesSpA + 1);
+		PX_ASSERT(numCellHashBucketsSpA <= tempBuffers.cellHashMaxSize);
+
+		// Get local cell hash for the current subpacket
+		SpatialHash::buildLocalHash(particlesSpA0, numParticlesSpA, particleCellsSpA, particleIndicesSpA,
+		                            tempBuffers.hashKeys, numCellHashBucketsSpA, mParams.cellSizeInv, packetCorner);
+
+		//---------------------------------------------------------------------------------------------------
+
+		//
+		// Compute particle interactions between particles within the current subpacket.
+		//
+
+		updateCellsSubpacket(updateType, forceBufA0, particlesSpA0, particleCellsSpA, particleIndicesSpA,
+		                     numCellHashBucketsSpA, mParams, tempBuffers);
+
+		//---------------------------------------------------------------------------------------------------
+
+		//
+		// Compute particle interactions between particles of current subpacket and particles
+		// of other subpackets within the same packet (i.e., we process all subpacket pairs).
+		//
+
+		PxU32 particlesLeftB = particlesLeftA0 - numParticlesSpA;
+		Particle* particlesSpB = particlesSpA0 + numParticlesSpA;
+		PxVec3* forceBufB = forceBufA0 + numParticlesSpA;
+
+		while(particlesLeftB)
+		{
+			PxU32 numParticlesSpB = PxMin(particlesLeftB, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY));
+
+			// Make sure the number of hash buckets is a power of 2 (requirement for the used hash function)
+			const PxU32 numCellHashBucketsSpB = Ps::nextPowerOfTwo(numParticlesSpB + 1);
+			PX_ASSERT(numCellHashBucketsSpB <= tempBuffers.cellHashMaxSize);
+
+			// Get local cell hash for other subpacket
+			SpatialHash::buildLocalHash(particlesSpB, numParticlesSpB, particleCellsSpB, particleIndicesSpB,
+			                            tempBuffers.hashKeys, numCellHashBucketsSpB, mParams.cellSizeInv, packetCorner);
+
+			// For the cells of subpacket A, find neighboring cells in the subpacket B and compute particle
+			// interactions.
+			updateCellsSubpacketPair(updateType, forceBufA0, forceBufB, particlesSpA0, particlesSpB, particleCellsSpA,
+			                         particleCellsSpB, particleIndicesSpA, particleIndicesSpB, numCellHashBucketsSpA,
+			                         numCellHashBucketsSpB, true, mParams, tempBuffers,
+			                         numParticlesSpA < numParticlesSpB);
+
+			particlesLeftB -= numParticlesSpB;
+			particlesSpB += numParticlesSpB;
+			forceBufB += numParticlesSpB;
+		}
+
+		particlesLeftA0 -= numParticlesSpA;
+		particlesSpA0 += numParticlesSpA;
+		forceBufA0 += numParticlesSpA;
+	}
+
+	//---------------------------------------------------------------------------------------------------
+
+	//
+	// Compute particle interactions between particles of sections of the current packet and particles of neighboring
+	// halo regions
+	//
+
+	PX_ASSERT(PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION <= PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY);
+	if(haloRegions.maxNumParticles != 0)
+	{
+		for(PxU32 s = 0; s < 26; s++)
+		{
+			PxU32 numSectionParticles = packetSections.numParticles[s];
+			if(numSectionParticles == 0)
+				continue;
+
+			bool sectionEnablesBruteForce = (numSectionParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION);
+
+			SectionToHaloTable& neighborHaloRegions = sSectionToHaloTable[s];
+			PxU32 numHaloNeighbors = neighborHaloRegions.numHaloRegions;
+
+			PxU32 particlesLeftA = numSectionParticles;
+			Particle* particlesSpA = particles + packetSections.firstParticle[s];
+			PxVec3* forceBufA = forceBuf + packetSections.firstParticle[s];
+
+			while(particlesLeftA)
+			{
+				PxU32 numParticlesSpA =
+				    PxMin(particlesLeftA, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY));
+
+				// Compute particle interactions between particles of the current subpacket (of the section)
+				// and particles of neighboring halo regions relevant.
+
+				// Process halo regions which need local hash building first.
+				bool isLocalHashValid = false;
+
+				// Make sure the number of hash buckets is a power of 2 (requirement for the used hash function)
+				const PxU32 numCellHashBucketsSpA = Ps::nextPowerOfTwo(numParticlesSpA + 1);
+				PX_ASSERT(numCellHashBucketsSpA <= tempBuffers.cellHashMaxSize);
+#if MERGE_HALO_REGIONS
+				// Read halo region particles into temporary buffer
+				PxU32 numMergedHaloParticles = 0;
+				for(PxU32 h = 0; h < numHaloNeighbors; h++)
+				{
+					PxU32 haloRegionIdx = neighborHaloRegions.haloRegionIndices[h];
+					PxU32 numHaloParticles = haloRegions.numParticles[haloRegionIdx];
+
+					// chunk regions into subpackets!
+					PxU32 particlesLeftB = numHaloParticles;
+					Particle* particlesSpB = particles + haloRegions.firstParticle[haloRegionIdx];
+					PxVec3* forceBufB = forceBuf + haloRegions.firstParticle[haloRegionIdx];
+					while(particlesLeftB)
+					{
+						PxU32 numParticlesSpB =
+						    PxMin(particlesLeftB, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY));
+
+						// if there are plenty of particles already, don't bother to do the copy for merging.
+						if(numParticlesSpB > PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION)
+						{
+							updateSubpacketPairHalo(forceBufA, particlesSpA, numParticlesSpA, particleCellsSpA,
+							                        particleIndicesSpA, isLocalHashValid, numCellHashBucketsSpA,
+							                        forceBufB, particlesSpB, numParticlesSpB, particleCellsSpB,
+							                        particleIndicesSpB, packetCorner, updateType, hashKeyArray,
+							                        tempBuffers);
+						}
+						else
+						{
+							if(numMergedHaloParticles + numParticlesSpB > PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)
+							{
+								// flush
+								updateSubpacketPairHalo(forceBufA, particlesSpA, numParticlesSpA, particleCellsSpA,
+								                        particleIndicesSpA, isLocalHashValid, numCellHashBucketsSpA,
+								                        tempBuffers.mergedHaloRegions, numMergedHaloParticles,
+								                        particleCellsSpB, particleIndicesSpB, packetCorner, updateType,
+								                        hashKeyArray, tempBuffers);
+								numMergedHaloParticles = 0;
+							}
+
+							for(PxU32 k = 0; k < numParticlesSpB; ++k)
+								tempBuffers.mergedHaloRegions[numMergedHaloParticles++] = particlesSpB[k];
+						}
+
+						particlesLeftB -= numParticlesSpB;
+						particlesSpB += numParticlesSpB;
+					}
+				}
+
+				// flush
+				updateSubpacketPairHalo(forceBufA, particlesSpA, numParticlesSpA, particleCellsSpA, particleIndicesSpA,
+				                        isLocalHashValid, numCellHashBucketsSpA, tempBuffers.mergedHaloRegions,
+				                        numMergedHaloParticles, particleCellsSpB, particleIndicesSpB, packetCorner,
+				                        updateType, hashKeyArray, tempBuffers);
+#else  // MERGE_HALO_REGIONS
+				for(PxU32 h = 0; h < numHaloNeighbors; h++)
+				{
+					PxU32 haloRegionIdx = neighborHaloRegions.haloRegionIndices[h];
+					PxU32 numHaloParticles = haloRegions.numParticles[haloRegionIdx];
+
+					bool haloRegionEnablesBruteForce =
+					    (numHaloParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION);
+
+					if(sectionEnablesBruteForce && haloRegionEnablesBruteForce)
+						continue;
+
+					if(!isLocalHashValid)
+					{
+						// Get local cell hash for the current subpacket
+						SpatialHash::buildLocalHash(particlesSpA, numParticlesSpA, particleCellsSpA, particleIndicesSpA,
+						                            tempBuffers.hashKeys, numCellHashBucketsSpA, mParams.cellSizeInv,
+						                            packetCorner);
+						isLocalHashValid = true;
+					}
+
+					PxU32 particlesLeftB = numHaloParticles;
+					Particle* particlesSpB = particles + haloRegions.firstParticle[haloRegionIdx];
+
+					while(particlesLeftB)
+					{
+						PxU32 numParticlesSpB =
+						    PxMin(particlesLeftB, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY));
+
+						// It is important that no data is written to particles in halo regions since they belong to
+						// a neighboring packet. The interaction effect of the current packet on the neighboring packet
+						// will be
+						// considered when the neighboring packet is processed.
+
+						// Make sure the number of hash buckets is a power of 2 (requirement for the used hash function)
+						const PxU32 numCellHashBucketsSpB = Ps::nextPowerOfTwo(numParticlesSpB + 1);
+						PX_ASSERT(numCellHashBucketsSpB <= tempBuffers.cellHashMaxSize);
+
+						// Get local cell hash for other subpacket
+						SpatialHash::buildLocalHash(particlesSpB, numParticlesSpB, particleCellsSpB, particleIndicesSpB,
+						                            tempBuffers.hashKeys, numCellHashBucketsSpB, mParams.cellSizeInv,
+						                            packetCorner);
+
+						// For the cells of subpacket A, find neighboring cells in the subpacket B and compute particle
+						// interactions.
+						updateCellsSubpacketPair(updateType, forceBufA, NULL, particlesSpA, particlesSpB,
+						                         particleCellsSpA, particleCellsSpB, particleIndicesSpA,
+						                         particleIndicesSpB, numCellHashBucketsSpA, numCellHashBucketsSpB,
+						                         false, mParams, tempBuffers, numParticlesSpA > numParticlesSpB);
+
+						particlesLeftB -= numParticlesSpB;
+						particlesSpB += numParticlesSpB;
+					}
+				}
+
+				// Now process halo regions which don't need local hash building.
+				PxU32 mergedIndexCount = 0;
+				for(PxU32 h = 0; h < numHaloNeighbors; h++)
+				{
+					PxU32 haloRegionIdx = neighborHaloRegions.haloRegionIndices[h];
+					PxU32 numHaloParticles = haloRegions.numParticles[haloRegionIdx];
+					if(numHaloParticles == 0)
+						continue;
+
+					bool haloRegionEnablesBruteForce =
+					    (numHaloParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION);
+
+					if(!sectionEnablesBruteForce || !haloRegionEnablesBruteForce)
+						continue;
+
+					// The section and the halo region do not have enough particles to make it worth
+					// building a local cell hash --> use brute force approach
+
+					// This is given by the brute force condition (haloRegionEnablesBruteForce). Its necessary to
+					// make sure a halo region alone fits into the merge buffer.
+					PX_ASSERT(numHaloParticles <= PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY);
+
+					if(mergedIndexCount + numHaloParticles > PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)
+					{
+						updateParticleGroupPair(forceBufA, NULL, particlesSpA, particles,
+						                        tempBuffers.orderedIndicesSubpacket, numSectionParticles,
+						                        tempBuffers.mergedIndices, mergedIndexCount, false,
+						                        updateType == SphUpdateType::DENSITY, mParams,
+						                        tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream);
+						mergedIndexCount = 0;
+					}
+
+					PxU32 hpIndex = haloRegions.firstParticle[haloRegionIdx];
+					for(PxU32 k = 0; k < numHaloParticles; k++)
+						tempBuffers.mergedIndices[mergedIndexCount++] = hpIndex++;
+				}
+
+				if(mergedIndexCount > 0)
+				{
+					updateParticleGroupPair(forceBufA, NULL, particlesSpA, particles, tempBuffers.orderedIndicesSubpacket,
+					                        numSectionParticles, tempBuffers.mergedIndices, mergedIndexCount, false,
+					                        updateType == SphUpdateType::DENSITY, mParams,
+					                        tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream);
+				}
+#endif // MERGE_HALO_REGIONS
+
+				particlesLeftA -= numParticlesSpA;
+				particlesSpA += numParticlesSpA;
+				forceBufA += numParticlesSpA;
+			}
+		}
+	}
+}
+
+//-------------------------------------------------------------------------------------------------------------------//
+
+void Dynamics::updateSubpacketPairHalo(PxVec3* __restrict forceBufA, Particle* __restrict particlesSpA,
+                                       PxU32 numParticlesSpA, ParticleCell* __restrict particleCellsSpA,
+                                       PxU32* __restrict particleIndicesSpA, bool& isLocalHashSpAValid,
+                                       PxU32 numCellHashBucketsSpA, Particle* __restrict particlesSpB,
+                                       PxU32 numParticlesSpB, ParticleCell* __restrict particleCellsSpB,
+                                       PxU32* __restrict particleIndicesSpB, const PxVec3& packetCorner,
+                                       SphUpdateType::Enum updateType, PxU16* __restrict hashKeyArray,
+                                       DynamicsTempBuffers& tempBuffers)
+{
+	bool sectionEnablesBruteForce = (numParticlesSpA <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION);
+	bool haloRegionEnablesBruteForce = (numParticlesSpB <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION);
+
+	// It is important that no data is written to particles in halo regions since they belong to
+	// a neighboring packet. The interaction effect of the current packet on the neighboring packet will be
+	// considered when the neighboring packet is processed.
+
+	if(sectionEnablesBruteForce && haloRegionEnablesBruteForce)
+	{
+		// Now process halo regions which don't need local hash building.
+		// The section and the halo region do not have enough particles to make it worth
+		// building a local cell hash --> use brute force approach
+
+		updateParticleGroupPair(forceBufA, NULL, particlesSpA, particlesSpB, tempBuffers.orderedIndicesSubpacket,
+		                        numParticlesSpA, tempBuffers.orderedIndicesSubpacket, numParticlesSpB, false,
+		                        updateType == SphUpdateType::DENSITY, mParams, tempBuffers.simdPositionsSubpacket,
+		                        tempBuffers.indexStream);
+	}
+	else
+	{
+		if(!isLocalHashSpAValid)
+		{
+			// Get local cell hash for the current subpacket
+			SpatialHash::buildLocalHash(particlesSpA, numParticlesSpA, particleCellsSpA, particleIndicesSpA,
+			                            hashKeyArray, numCellHashBucketsSpA, mParams.cellSizeInv, packetCorner);
+			isLocalHashSpAValid = true;
+		}
+
+		// Make sure the number of hash buckets is a power of 2 (requirement for the used hash function)
+		const PxU32 numCellHashBucketsSpB = Ps::nextPowerOfTwo(numParticlesSpB + 1);
+		PX_ASSERT(numCellHashBucketsSpB <= tempBuffers.cellHashMaxSize);
+
+		// Get local cell hash for other subpacket
+		SpatialHash::buildLocalHash(particlesSpB, numParticlesSpB, particleCellsSpB, particleIndicesSpB, hashKeyArray,
+		                            numCellHashBucketsSpB, mParams.cellSizeInv, packetCorner);
+
+		// For the cells of subpacket A, find neighboring cells in the subpacket B and compute particle interactions.
+		updateCellsSubpacketPair(updateType, forceBufA, NULL, particlesSpA, particlesSpB, particleCellsSpA,
+		                         particleCellsSpB, particleIndicesSpA, particleIndicesSpB, numCellHashBucketsSpA,
+		                         numCellHashBucketsSpB, false, mParams, tempBuffers, numParticlesSpA < numParticlesSpB);
+	}
+}
+//-------------------------------------------------------------------------------------------------------------------//
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.h
new file mode 100644
index 00000000..0af21fa4
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.h
@@ -0,0 +1,144 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_DYNAMICS_H
+#define PT_DYNAMICS_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtConfig.h"
+#include "PtParticle.h"
+#include "PtDynamicsParameters.h"
+#include "PtDynamicsTempBuffers.h"
+#include "CmBitMap.h"
+#include "CmTask.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+struct ParticleCell;
+struct PacketSections;
+struct PacketHaloRegions;
+
+class Dynamics
+{
+  public:
+	Dynamics(class ParticleSystemSimCpu& particleSystem);
+	~Dynamics();
+
+	void clear();
+
+	void updateSph(physx::PxBaseTask& continuation);
+
+	PX_FORCE_INLINE DynamicsParameters& getParameter()
+	{
+		return mParams;
+	}
+
+  private:
+	// Table to get the neighboring halo region indices for a packet section
+	struct SectionToHaloTable
+	{
+		PxU32 numHaloRegions;
+		PxU32 haloRegionIndices[19]; // No packet section has more than 19 neighboring halo regions
+	};
+
+	struct OrderedIndexTable
+	{
+		OrderedIndexTable();
+		PxU32 indices[PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY];
+	};
+
+	struct TaskData
+	{
+		PxU16 beginPacketIndex;
+		PxU16 endPacketIndex;
+	};
+
+	void adjustTempBuffers(PxU32 count);
+
+	void schedulePackets(SphUpdateType::Enum updateType, physx::PxBaseTask& continuation);
+	void processPacketRange(PxU32 taskDataIndex);
+
+	void updatePacket(SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles, const ParticleCell& packet,
+	                  const PacketSections& packetSections, const PacketHaloRegions& haloRegions,
+	                  struct DynamicsTempBuffers& tempBuffers);
+
+	void updatePacketLocalHash(SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles,
+	                           const ParticleCell& packet, const PacketSections& packetSections,
+	                           const PacketHaloRegions& haloRegions, DynamicsTempBuffers& tempBuffers);
+
+	void updateSubpacketPairHalo(PxVec3* __restrict forceBufA, Particle* __restrict particlesSpA, PxU32 numParticlesSpA,
+	                             ParticleCell* __restrict particleCellsSpA, PxU32* __restrict particleIndicesSpA,
+	                             bool& isLocalHashSpAValid, PxU32 numCellHashBucketsSpA,
+	                             Particle* __restrict particlesSpB, PxU32 numParticlesSpB,
+	                             ParticleCell* __restrict particleCellsSpB, PxU32* __restrict particleIndicesSpB,
+	                             const PxVec3& packetCorner, SphUpdateType::Enum updateType,
+	                             PxU16* __restrict hashKeyArray, DynamicsTempBuffers& tempBuffers);
+
+	PX_FORCE_INLINE void updateParticlesBruteForceHalo(SphUpdateType::Enum updateType, PxVec3* forceBuf,
+	                                                   Particle* particles, const PacketSections& packetSections,
+	                                                   const PacketHaloRegions& haloRegions,
+	                                                   DynamicsTempBuffers& tempBuffers);
+
+	void mergeDensity(physx::PxBaseTask* continuation);
+	void mergeForce(physx::PxBaseTask* continuation);
+
+  private:
+	Dynamics& operator=(const Dynamics&);
+	static SectionToHaloTable sSectionToHaloTable[26]; // Halo region table for each packet section
+	static OrderedIndexTable sOrderedIndexTable;
+
+	PX_ALIGN(16, DynamicsParameters mParams);
+	class ParticleSystemSimCpu& mParticleSystem;
+	Particle* mTempReorderedParticles;
+	PxVec3* mTempParticleForceBuf;
+
+	typedef Cm::DelegateTask<Dynamics, &Dynamics::mergeDensity> MergeDensityTask;
+	typedef Cm::DelegateTask<Dynamics, &Dynamics::mergeForce> MergeForceTask;
+
+	MergeDensityTask mMergeDensityTask;
+	MergeForceTask mMergeForceTask;
+	PxU32 mNumTasks;
+	SphUpdateType::Enum mCurrentUpdateType;
+	PxU32 mNumTempBuffers;
+	DynamicsTempBuffers mTempBuffers[PT_MAX_PARALLEL_TASKS_SPH];
+	TaskData mTaskData[PT_MAX_PARALLEL_TASKS_SPH];
+	friend class DynamicsSphTask;
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_DYNAMICS_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsKernels.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsKernels.h
new file mode 100644
index 00000000..94494072
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsKernels.h
@@ -0,0 +1,1105 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodesX AG. All rights reserved.
+
+#ifndef PT_DYNAMICS_KERNELS_H
+#define PT_DYNAMICS_KERNELS_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PsFPU.h"
+#include "foundation/PxUnionCast.h"
+
+#include "PtDynamicsParameters.h"
+
+#define REFERENCE_KERNELS 0
+
+#if !REFERENCE_KERNELS
+#include "PsVecMath.h"
+#endif
+
+namespace physx
+{
+namespace Pt
+{
+
+using namespace Ps;
+using namespace aos;
+
+#define COMPILE_IN_SIMD_DENSITY 1
+#define PX_FORCE_INLINE_KERNELS PX_FORCE_INLINE
+
+#define MAX_INDEX_STREAM_SIZE 128
+#define PRESSURE_ORIGIN 1
+
+PX_FORCE_INLINE PxF32 calcDensity(const PxF32 distSqr, const DynamicsParameters& params)
+{
+	PxF32 dist2Std = distSqr * params.scaleSqToStd;
+	PxF32 radius2MinusDist2Std = params.radiusSqStd - dist2Std;
+	PxF32 densityStd = params.densityMultiplierStd * radius2MinusDist2Std * radius2MinusDist2Std * radius2MinusDist2Std;
+	return densityStd;
+}
+
+PX_FORCE_INLINE void addDensity(Particle& particleDst, const PxF32 distSqr, const DynamicsParameters& params)
+{
+	PX_ASSERT(distSqr <= params.cellSizeSq);
+	PxF32 densityStd = calcDensity(distSqr, params);
+	particleDst.density += densityStd;
+}
+
+PX_FORCE_INLINE void addDensity_twoWay(Particle& particleA, Particle& particleB, const PxF32 distSqr,
+                                       const DynamicsParameters& params)
+{
+	PX_ASSERT(distSqr <= params.cellSizeSq);
+	PxF32 densityStd = calcDensity(distSqr, params);
+	particleA.density += densityStd;
+	particleB.density += densityStd;
+}
+
+PX_FORCE_INLINE PxVec3 calcForce(const Particle& particleA, const Particle& particleB, const PxF32 distSqr,
+                                 const PxVec3& distVec, const DynamicsParameters& params)
+{
+	PxReal dist2Std = distSqr * params.scaleSqToStd;
+
+	PxReal recipDistStd = physx::intrinsics::recipSqrtFast(dist2Std);
+	PxReal distStd = dist2Std * recipDistStd;
+
+	PxReal radiusMinusDistStd = params.radiusStd - distStd;
+
+// pressure force
+#if PRESSURE_ORIGIN
+	PxF32 pressureA = PxMax(particleA.density - params.initialDensity, 0.0f);
+	PxF32 pressureB = PxMax(particleB.density - params.initialDensity, 0.0f);
+	PxF32 pressureSum = pressureA + pressureB;
+#else
+	PxF32 pressureSum = PxMax(particleA.density + particleB.density - 2 * params.initialDensity, 0.0f);
+#endif
+
+	PxReal multiplierPressStd = (params.radiusSqStd * recipDistStd - 2 * params.radiusStd + distStd) *
+	                            params.stiffMulPressureMultiplierStd * pressureSum;
+
+	PxVec3 force = distVec * multiplierPressStd * params.scaleToStd;
+
+	// viscosity force
+	PxReal multiplierViscStd = radiusMinusDistStd * params.viscosityMultiplierStd;
+
+	PxVec3 vDiff = (particleB.velocity - particleA.velocity) * params.scaleToStd;
+	force += (vDiff * multiplierViscStd);
+
+	return force;
+}
+
+PX_FORCE_INLINE void addForce(PxVec3& particleForceDst, const Particle& particleDst, const Particle& particleSrc,
+                              const PxF32 distSqr, const PxVec3& distVec, const DynamicsParameters& params)
+{
+	PX_ASSERT(distSqr <= params.cellSizeSq);
+	PxVec3 force = calcForce(particleDst, particleSrc, distSqr, distVec, params);
+	particleForceDst += (force * physx::intrinsics::recipFast(particleSrc.density));
+}
+
+PX_FORCE_INLINE void addForce_twoWay(PxVec3& particleAForce, PxVec3& particleBForce, const Particle& particleA,
+                                     const Particle& particleB, const PxF32 distSqr, const PxVec3& distVec,
+                                     const DynamicsParameters& params)
+{
+	PX_ASSERT(distSqr <= params.cellSizeSq);
+	PxVec3 force = calcForce(particleA, particleB, distSqr, distVec, params);
+	particleAForce += (force * physx::intrinsics::recipFast(particleB.density));
+	particleBForce -= (force * physx::intrinsics::recipFast(particleA.density));
+}
+
+#if REFERENCE_KERNELS
+
+PX_FORCE_INLINE void updateParticleGroupPair(PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB,
+                                             Particle* __restrict particlesSpA, Particle* __restrict particlesSpB,
+                                             const PxU32* __restrict particleIndicesSpA, const PxU32 numParticlesA,
+                                             const PxU32* __restrict particleIndicesSpB, const PxU32 numParticlesB,
+                                             const bool twoWayUpdate, const bool isDensityMode,
+                                             const DynamicsParameters& params, PxU8* tempSimdPositionBuffer,
+                                             PxU32* tempIndexStream)
+{
+	// Check given particle against particles of another cell.
+
+	for(PxU32 pA = 0; pA < numParticlesA; pA++)
+	{
+		PxU32 idxA = particleIndicesSpA[pA];
+		Particle& particleA = particlesSpA[idxA];
+		PxVec3& forceA = forceBufA[idxA];
+
+		for(PxU32 pB = 0; pB < numParticlesB; pB++)
+		{
+			PxU32 idxB = particleIndicesSpB[pB];
+			Particle& particleB = particlesSpB[idxB];
+			PxVec3& forceB = forceBufB[idxB];
+
+			PxVec3 distVec = particleA.position - particleB.position;
+			PxReal distSqr = distVec.magnitudeSquared();
+
+			if(distSqr < params.cellSizeSq && distSqr > 0.0f)
+			{
+				if(isDensityMode)
+				{
+					if(!twoWayUpdate)
+						addDensity(particleA, distSqr, params);
+					else
+						addDensity_twoWay(particleA, particleB, distSqr, params);
+				}
+				else
+				{
+					if(!twoWayUpdate)
+						addForce(forceA, particleA, particleB, distSqr, distVec, params);
+					else
+						addForce_twoWay(forceA, forceB, particleA, particleB, distSqr, distVec, params);
+				}
+			}
+		}
+	}
+}
+
+#else // REFERENCE_KERNELS
+
+class DensityPassType
+{
+};
+class ForcePassType
+{
+};
+class TwoWayUpdateType
+{
+};
+class OneWayUpdateType
+{
+};
+
+template <typename PassType, typename UpdateType>
+struct Contribution
+{
+};
+
+template <>
+struct Contribution<DensityPassType, TwoWayUpdateType>
+{
+	static void add(PxVec3&, PxVec3&, PxReal distSqr, const PxVec3&, Particle& particleA, Particle& particleB,
+	                const DynamicsParameters& params)
+	{
+		addDensity_twoWay(particleA, particleB, distSqr, params);
+	}
+};
+
+template <>
+struct Contribution<ForcePassType, TwoWayUpdateType>
+{
+	static void add(PxVec3& forceA, PxVec3& forceB, PxReal distSqr, const PxVec3& distVec, Particle& particleA,
+	                Particle& particleB, const DynamicsParameters& params)
+	{
+		addForce_twoWay(forceA, forceB, particleA, particleB, distSqr, distVec, params);
+	}
+};
+
+template <>
+struct Contribution<DensityPassType, OneWayUpdateType>
+{
+	static void add(PxVec3&, PxVec3&, PxReal distSqr, const PxVec3&, Particle& particleA, Particle&,
+	                const DynamicsParameters& params)
+	{
+		addDensity(particleA, distSqr, params);
+	}
+};
+
+template <>
+struct Contribution<ForcePassType, OneWayUpdateType>
+{
+	static void add(PxVec3& forceA, PxVec3&, PxReal distSqr, const PxVec3& distVec, Particle& particleA,
+	                Particle& particleB, const DynamicsParameters& params)
+	{
+		addForce(forceA, particleA, particleB, distSqr, distVec, params);
+	}
+};
+
+// Parameters for simd kernel execution
+struct DynamicsParametersSIMD
+{
+	Ps::aos::Vec4V scaleToStd;
+	Ps::aos::Vec4V scaleSqToStd;
+	Ps::aos::Vec4V radiusStd;
+	Ps::aos::Vec4V radiusSqStd;
+	Ps::aos::Vec4V densityMultiplierStd;
+	Ps::aos::Vec4V stiffMulPressureMultiplierStd;
+	Ps::aos::Vec4V viscosityMultiplierStd;
+	Ps::aos::Vec4V initialDensity;
+	Ps::aos::Vec4V stiffnessStd;
+};
+
+#if COMPILE_IN_SIMD_DENSITY
+
+PX_FORCE_INLINE void calcDensity4_onlyPtrs(Mat44V& posDensDstT, const Particle* __restrict pSrc0,
+                                           const Particle* __restrict pSrc1, const Particle* __restrict pSrc2,
+                                           const Particle* __restrict pSrc3, const DynamicsParametersSIMD& params)
+{
+	Ps::aos::Mat44V posDensSrc(V4LoadA(&pSrc0->position.x), V4LoadA(&pSrc1->position.x), V4LoadA(&pSrc2->position.x),
+	                           V4LoadA(&pSrc3->position.x));
+
+	Mat44V posDensSrcT = M44Trnsps(posDensSrc);
+
+	Vec4V distVec_x = V4Sub(posDensDstT.col0, posDensSrcT.col0);
+	Vec4V distVec_y = V4Sub(posDensDstT.col1, posDensSrcT.col1);
+	Vec4V distVec_z = V4Sub(posDensDstT.col2, posDensSrcT.col2);
+
+	Vec4V distSqr_x = V4Mul(distVec_x, distVec_x);
+	Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x);
+	Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy);
+
+	Vec4V distSqrStd = V4Mul(distSqr, params.scaleSqToStd);
+
+	Vec4V radius2MinusDist2Std = V4Sub(params.radiusSqStd, distSqrStd);
+	Vec4V densityStd = V4Mul(params.densityMultiplierStd, radius2MinusDist2Std);
+	densityStd = V4Mul(densityStd, radius2MinusDist2Std);
+	densityStd = V4Mul(densityStd, radius2MinusDist2Std);
+
+	posDensDstT.col3 = V4Add(posDensDstT.col3, densityStd);
+}
+
+PX_FORCE_INLINE void calcDensity4_twoWay_onlyPtrs(Mat44V& posDensDstT, Particle* __restrict pSrc0,
+                                                  Particle* __restrict pSrc1, Particle* __restrict pSrc2,
+                                                  Particle* __restrict pSrc3, const DynamicsParametersSIMD& params)
+{
+	Mat44V posDensSrc(V4LoadA(&pSrc0->position.x), V4LoadA(&pSrc1->position.x), V4LoadA(&pSrc2->position.x),
+	                  V4LoadA(&pSrc3->position.x));
+
+	Mat44V posDensSrcT = M44Trnsps(posDensSrc);
+
+	Vec4V distVec_x = V4Sub(posDensDstT.col0, posDensSrcT.col0);
+	Vec4V distVec_y = V4Sub(posDensDstT.col1, posDensSrcT.col1);
+	Vec4V distVec_z = V4Sub(posDensDstT.col2, posDensSrcT.col2);
+
+	Vec4V distSqr_x = V4Mul(distVec_x, distVec_x);
+	Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x);
+	Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy);
+
+	Vec4V distSqrStd = V4Mul(distSqr, params.scaleSqToStd);
+
+	Vec4V radius2MinusDist2Std = V4Sub(params.radiusSqStd, distSqrStd);
+	Vec4V densityStd = V4Mul(params.densityMultiplierStd, radius2MinusDist2Std);
+	densityStd = V4Mul(densityStd, radius2MinusDist2Std);
+	densityStd = V4Mul(densityStd, radius2MinusDist2Std);
+
+	// apply to srcParticles (sschirm TOTO rename)
+	PX_ALIGN(16, PxVec4 density);
+	V4StoreA(densityStd, &density[0]);
+	pSrc0->density += density[0];
+	pSrc1->density += density[1];
+	pSrc2->density += density[2];
+	pSrc3->density += density[3];
+
+	// apply to dstParticle (sschirm TOTO rename)
+	posDensDstT.col3 = V4Add(posDensDstT.col3, densityStd);
+}
+
+#endif // COMPILE_IN_SIMD_DENSITY
+
+PX_FORCE_INLINE void calcForce4_onlyPtrs(Mat44V& forceDstT, const Particle* __restrict pSrc0,
+                                         const Particle* __restrict pSrc1, const Particle* __restrict pSrc2,
+                                         const Particle* __restrict pSrc3, const Mat44V& posDensDstT,
+                                         const Mat44V& velPressDstT, const DynamicsParametersSIMD& params)
+{
+	Mat44V posDensSrc(V4LoadA(&pSrc0->position.x), V4LoadA(&pSrc1->position.x), V4LoadA(&pSrc2->position.x),
+	                  V4LoadA(&pSrc3->position.x));
+
+	Mat44V posDensSrcT = M44Trnsps(posDensSrc);
+
+	Vec4V distVec_x = V4Sub(posDensDstT.col0, posDensSrcT.col0);
+	Vec4V distVec_y = V4Sub(posDensDstT.col1, posDensSrcT.col1);
+	Vec4V distVec_z = V4Sub(posDensDstT.col2, posDensSrcT.col2);
+
+	Vec4V distSqr_x = V4Mul(distVec_x, distVec_x);
+	Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x);
+	Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy);
+
+	Vec4V distSqrStd = V4Mul(distSqr, params.scaleSqToStd);
+
+	Vec4V recipDistStd = V4RsqrtFast(distSqrStd);
+	Vec4V distStd = V4Mul(distSqrStd, recipDistStd);
+	Vec4V radiusMinusDistStd = V4Sub(params.radiusStd, distStd);
+
+	// pressure force
+	Mat44V velPressSrc(V4LoadA(&pSrc0->velocity.x), V4LoadA(&pSrc1->velocity.x), V4LoadA(&pSrc2->velocity.x),
+	                   V4LoadA(&pSrc3->velocity.x));
+
+	Mat44V velPressSrcT = M44Trnsps(velPressSrc);
+
+	Vec4V pressureDst = V4Sub(posDensDstT.col3, params.initialDensity);
+	Vec4V pressureSrc = V4Sub(posDensSrcT.col3, params.initialDensity);
+#if PRESSURE_ORIGIN
+	pressureDst = V4Max(pressureDst, V4Zero());
+	pressureSrc = V4Max(pressureSrc, V4Zero());
+	Vec4V pressureSum = V4Add(pressureDst, pressureSrc);
+#else
+	Vec4V pressureSum = V4Add(pressureDst, pressureSrc);
+	pressureSum = V4Max(pressureSum, V4Zero());
+#endif
+
+	Vec4V radiusStd_x2 = V4Add(params.radiusStd, params.radiusStd);
+	Vec4V multiplierPressStd = V4MulAdd(params.radiusSqStd, recipDistStd, distStd);
+	multiplierPressStd = V4Sub(multiplierPressStd, radiusStd_x2);
+	multiplierPressStd = V4Mul(multiplierPressStd, params.stiffMulPressureMultiplierStd);
+	multiplierPressStd = V4Mul(multiplierPressStd, pressureSum);
+
+	Vec4V pressureForceMult = V4Mul(multiplierPressStd, params.scaleToStd);
+	Vec4V force_x = V4Mul(distVec_x, pressureForceMult);
+	Vec4V force_y = V4Mul(distVec_y, pressureForceMult);
+	Vec4V force_z = V4Mul(distVec_z, pressureForceMult);
+
+	// viscosity force
+	Vec4V multiplierViscStd = V4Mul(radiusMinusDistStd, params.viscosityMultiplierStd);
+
+	Vec4V viscossityForceMult = V4Mul(params.scaleToStd, multiplierViscStd);
+
+	Vec4V vDiff_x = V4Sub(velPressSrcT.col0, velPressDstT.col0);
+	Vec4V vDiff_y = V4Sub(velPressSrcT.col1, velPressDstT.col1);
+	Vec4V vDiff_z = V4Sub(velPressSrcT.col2, velPressDstT.col2);
+
+	force_x = V4MulAdd(vDiff_x, viscossityForceMult, force_x);
+	force_y = V4MulAdd(vDiff_y, viscossityForceMult, force_y);
+	force_z = V4MulAdd(vDiff_z, viscossityForceMult, force_z);
+
+	// application of force
+	Vec4V invDensities = V4RecipFast(posDensSrcT.col3);
+	force_x = V4Mul(force_x, invDensities);
+	force_y = V4Mul(force_y, invDensities);
+	force_z = V4Mul(force_z, invDensities);
+
+	forceDstT.col0 = V4Add(forceDstT.col0, force_x);
+	forceDstT.col1 = V4Add(forceDstT.col1, force_y);
+	forceDstT.col2 = V4Add(forceDstT.col2, force_z);
+}
+
+PX_FORCE_INLINE void calcForce4_twoWay_onlyPtrs(Mat44V& forceDstT, Mat44V& forceSrcT, Particle* __restrict pSrc0,
+                                                Particle* __restrict pSrc1, Particle* __restrict pSrc2,
+                                                Particle* __restrict pSrc3, const Mat44V& posDensDstT,
+                                                const Mat44V& velPressDstT, const Vec4V& invDensityDst,
+                                                const DynamicsParametersSIMD& params)
+{
+	Mat44V posDensSrc(V4LoadA(&pSrc0->position.x), V4LoadA(&pSrc1->position.x), V4LoadA(&pSrc2->position.x),
+	                  V4LoadA(&pSrc3->position.x));
+
+	Mat44V posDensSrcT = M44Trnsps(posDensSrc);
+
+	Vec4V distVec_x = V4Sub(posDensDstT.col0, posDensSrcT.col0);
+	Vec4V distVec_y = V4Sub(posDensDstT.col1, posDensSrcT.col1);
+	Vec4V distVec_z = V4Sub(posDensDstT.col2, posDensSrcT.col2);
+
+	Vec4V distSqr_x = V4Mul(distVec_x, distVec_x);
+	Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x);
+	Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy);
+
+	Vec4V distSqrStd = V4Mul(distSqr, params.scaleSqToStd);
+
+	Vec4V recipDistStd = V4RsqrtFast(distSqrStd);
+	Vec4V distStd = V4Mul(distSqrStd, recipDistStd);
+	Vec4V radiusMinusDistStd = V4Sub(params.radiusStd, distStd);
+
+	// pressure force
+	Mat44V velPressSrc(V4LoadA(&pSrc0->velocity.x), V4LoadA(&pSrc1->velocity.x), V4LoadA(&pSrc2->velocity.x),
+	                   V4LoadA(&pSrc3->velocity.x));
+
+	Mat44V velPressSrcT = M44Trnsps(velPressSrc);
+
+	Vec4V pressureDst = V4Sub(posDensDstT.col3, params.initialDensity);
+	Vec4V pressureSrc = V4Sub(posDensSrcT.col3, params.initialDensity);
+#if PRESSURE_ORIGIN
+	pressureDst = V4Max(pressureDst, V4Zero());
+	pressureSrc = V4Max(pressureSrc, V4Zero());
+	Vec4V pressureSum = V4Add(pressureDst, pressureSrc);
+#else
+	Vec4V pressureSum = V4Add(pressureDst, pressureSrc);
+	pressureSum = V4Max(pressureSum, V4Zero());
+#endif
+
+	Vec4V radiusStd_x2 = V4Add(params.radiusStd, params.radiusStd);
+	Vec4V multiplierPressStd = V4MulAdd(params.radiusSqStd, recipDistStd, distStd);
+	multiplierPressStd = V4Sub(multiplierPressStd, radiusStd_x2);
+	multiplierPressStd = V4Mul(multiplierPressStd, params.stiffMulPressureMultiplierStd);
+	multiplierPressStd = V4Mul(multiplierPressStd, pressureSum);
+
+	Vec4V pressureForceMult = V4Mul(multiplierPressStd, params.scaleToStd);
+	Vec4V force_x = V4Mul(distVec_x, pressureForceMult);
+	Vec4V force_y = V4Mul(distVec_y, pressureForceMult);
+	Vec4V force_z = V4Mul(distVec_z, pressureForceMult);
+
+	// viscosity force
+	Vec4V multiplierViscStd = V4Mul(radiusMinusDistStd, params.viscosityMultiplierStd);
+
+	Vec4V viscossityForceMult = V4Mul(params.scaleToStd, multiplierViscStd);
+
+	Vec4V vDiff_x = V4Sub(velPressSrcT.col0, velPressDstT.col0);
+	Vec4V vDiff_y = V4Sub(velPressSrcT.col1, velPressDstT.col1);
+	Vec4V vDiff_z = V4Sub(velPressSrcT.col2, velPressDstT.col2);
+
+	force_x = V4MulAdd(vDiff_x, viscossityForceMult, force_x);
+	force_y = V4MulAdd(vDiff_y, viscossityForceMult, force_y);
+	force_z = V4MulAdd(vDiff_z, viscossityForceMult, force_z);
+
+	// apply to src particles (sschirm TODO:rename)
+	forceSrcT.col0 = V4NegMulSub(force_x, invDensityDst, forceSrcT.col0);
+	forceSrcT.col1 = V4NegMulSub(force_y, invDensityDst, forceSrcT.col1);
+	forceSrcT.col2 = V4NegMulSub(force_z, invDensityDst, forceSrcT.col2);
+
+	// apply to dst particle (sschirm TODO:rename)
+	Vec4V invDensities = V4RecipFast(posDensSrcT.col3);
+	forceDstT.col0 = V4MulAdd(force_x, invDensities, forceDstT.col0);
+	forceDstT.col1 = V4MulAdd(force_y, invDensities, forceDstT.col1);
+	forceDstT.col2 = V4MulAdd(force_z, invDensities, forceDstT.col2);
+}
+
+#if !PX_IOS
+
+static void updateStreamDensity(Particle* __restrict particlesA, const Particle* __restrict particlesB,
+                                const PxU32* indexStream, const PxU32 indexStreamSize, const DynamicsParameters& params,
+                                const DynamicsParametersSIMD& simdParams)
+{
+	PX_UNUSED(simdParams);
+	PxU32 s = 0;
+	while(s < indexStreamSize)
+	{
+		PxU32 dstIdx = indexStream[s++];
+		PxU32 numInteractions = indexStream[s++];
+
+		// the simd density code is currently disabled, since it's not a real win
+		if(1)
+		{
+			for(PxU32 i = 0; i < numInteractions; ++i)
+			{
+				PxU32 srcIdx = indexStream[s++];
+				PX_ALIGN(16, PxVec3 distVec) = particlesA[dstIdx].position - particlesB[srcIdx].position;
+				PxF32 distSqr = distVec.magnitudeSquared();
+				addDensity(particlesA[dstIdx], distSqr, params);
+			}
+		}
+#if COMPILE_IN_SIMD_DENSITY
+		else
+		{
+			Particle* __restrict dstParticle = particlesA + dstIdx;
+			PxU32 blockCount = numInteractions / 4;
+
+			if(blockCount > 0)
+			{
+				Vec4V tmp = V4LoadA(&dstParticle->position.x);
+				Mat44V posDensDst(tmp, tmp, tmp, tmp);
+				Mat44V posDensDstT = M44Trnsps(posDensDst);
+
+				// set density to zero
+				posDensDstT.col3 = V4Zero();
+
+				for(PxU32 i = 0; i < blockCount; ++i)
+				{
+					PxU32 srcIdx0 = indexStream[s++];
+					PxU32 srcIdx1 = indexStream[s++];
+					PxU32 srcIdx2 = indexStream[s++];
+					PxU32 srcIdx3 = indexStream[s++];
+
+					calcDensity4_onlyPtrs(posDensDstT, particlesB + srcIdx0, particlesB + srcIdx1, particlesB + srcIdx2,
+					                      particlesB + srcIdx3, simdParams);
+				}
+
+				// simd to scalar
+				PX_ALIGN(16, PxVec4 density);
+				V4StoreA(posDensDstT.col3, &density[0]);
+				dstParticle->density += density[0] + density[1] + density[2] + density[3];
+			}
+
+			PxU32 numLeft = numInteractions - blockCount * 4;
+			for(PxU32 i = 0; i < numLeft; ++i)
+			{
+				PxU32 srcIdx = indexStream[s++];
+
+				PX_ALIGN(16, PxVec3) distVec = particlesA[dstIdx].position - particlesB[srcIdx].position;
+				PxF32 distSqr = distVec.magnitudeSquared();
+				addDensity(particlesA[dstIdx], distSqr, params);
+			}
+		}
+#endif // COMPILE_IN_SIMD_DENSITY
+	}
+}
+
+static void updateStreamDensityTwoWay(Particle* __restrict particlesA, Particle* __restrict particlesB,
+                                      const PxU32* indexStream, const PxU32 indexStreamSize,
+                                      const DynamicsParameters& params, const DynamicsParametersSIMD& simdParams)
+{
+	PX_UNUSED(simdParams);
+	PxU32 s = 0;
+	while(s < indexStreamSize)
+	{
+		PxU32 dstIdx = indexStream[s++];
+		PxU32 numInteractions = indexStream[s++];
+
+		// the simd density code is currently disabled, since it's not a real win
+		if(1)
+		{
+			for(PxU32 i = 0; i < numInteractions; ++i)
+			{
+				PxU32 srcIdx = indexStream[s++];
+				PX_ALIGN(16, PxVec3) distVec = particlesA[dstIdx].position - particlesB[srcIdx].position;
+				PxF32 distSqr = distVec.magnitudeSquared();
+				addDensity_twoWay(particlesA[dstIdx], particlesB[srcIdx], distSqr, params);
+			}
+		}
+#if COMPILE_IN_SIMD_DENSITY
+		else
+		{
+			Particle* __restrict dstParticle = particlesA + dstIdx;
+			PxU32 blockCount = numInteractions / 4;
+
+			if(blockCount > 0)
+			{
+				Vec4V tmp = V4LoadA(&dstParticle->position.x);
+				Mat44V posDensDst(tmp, tmp, tmp, tmp);
+				Mat44V posDensDstT = M44Trnsps(posDensDst);
+
+				// set density to zero
+				posDensDstT.col3 = V4Zero();
+
+				for(PxU32 i = 0; i < blockCount; ++i)
+				{
+					PxU32 srcIdx0 = indexStream[s++];
+					PxU32 srcIdx1 = indexStream[s++];
+					PxU32 srcIdx2 = indexStream[s++];
+					PxU32 srcIdx3 = indexStream[s++];
+
+					calcDensity4_twoWay_onlyPtrs(posDensDstT, particlesB + srcIdx0, particlesB + srcIdx1,
+					                             particlesB + srcIdx2, particlesB + srcIdx3, simdParams);
+				}
+
+				// simd to scalar
+				PX_ALIGN(16, PxVec4 density);
+				V4StoreA(posDensDstT.col3, &density[0]);
+				dstParticle->density += density[0] + density[1] + density[2] + density[3];
+			}
+
+			PxU32 numLeft = numInteractions - blockCount * 4;
+			for(PxU32 i = 0; i < numLeft; ++i)
+			{
+				PxU32 srcIdx = indexStream[s++];
+
+				PX_ALIGN(16, PxVec3 distVec) = particlesA[dstIdx].position - particlesB[srcIdx].position;
+				PxF32 distSqr = distVec.magnitudeSquared();
+				addDensity_twoWay(particlesA[dstIdx], particlesB[srcIdx], distSqr, params);
+			}
+		}
+#endif // COMPILE_IN_SIMD_DENSITY
+	}
+}
+
+static void updateStreamForce(PxVec3* __restrict forceBufA, Particle* __restrict particlesA,
+                              const Particle* __restrict particlesB, const PxU32* indexStream,
+                              const PxU32 indexStreamSize, const DynamicsParameters& params,
+                              const DynamicsParametersSIMD& simdParams)
+{
+	PxU32 s = 0;
+	while(s < indexStreamSize)
+	{
+		PxU32 dstIdx = indexStream[s++];
+		Particle* __restrict dstParticle = particlesA + dstIdx;
+
+		PxU32 numInteractions = indexStream[s++];
+		PxU32 blockCount = numInteractions / 4;
+
+		if(blockCount > 0)
+		{
+			Vec4V tmp = V4LoadA(&dstParticle->position.x);
+			Mat44V posDensDst(tmp, tmp, tmp, tmp);
+			Mat44V posDensDstT = M44Trnsps(posDensDst);
+
+			Mat44V forceDstT(V4Zero(), V4Zero(), V4Zero(), V4Zero());
+
+			tmp = V4LoadA(&dstParticle->velocity.x);
+			Mat44V velPressDst(tmp, tmp, tmp, tmp);
+			Mat44V velPressDstT = M44Trnsps(velPressDst);
+
+			for(PxU32 i = 0; i < blockCount; ++i)
+			{
+				PxU32 srcIdx0 = indexStream[s++];
+				PxU32 srcIdx1 = indexStream[s++];
+				PxU32 srcIdx2 = indexStream[s++];
+				PxU32 srcIdx3 = indexStream[s++];
+
+				calcForce4_onlyPtrs(forceDstT, particlesB + srcIdx0, particlesB + srcIdx1, particlesB + srcIdx2,
+				                    particlesB + srcIdx3, posDensDstT, velPressDstT, simdParams);
+			}
+
+			// simd to scalar
+			Mat44V forceDst = M44Trnsps(forceDstT);
+			Vec4V forceTmp1 = V4Add(forceDst.col0, forceDst.col1);
+			Vec4V forceTmp2 = V4Add(forceDst.col2, forceDst.col3);
+			forceTmp1 = V4Add(forceTmp1, forceTmp2);
+			forceBufA[dstIdx] += V4ReadXYZ(forceTmp1);
+		}
+
+		PxU32 numLeft = numInteractions - blockCount * 4;
+		for(PxU32 i = 0; i < numLeft; ++i)
+		{
+			PxU32 srcIdx = indexStream[s++];
+
+			PX_ALIGN(16, PxVec3 distVec) = particlesA[dstIdx].position - particlesB[srcIdx].position;
+			PxF32 distSqr = distVec.magnitudeSquared();
+			addForce(forceBufA[dstIdx], particlesA[dstIdx], particlesB[srcIdx], distSqr, distVec, params);
+		}
+	}
+}
+
+static void updateStreamForceTwoWay(PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB,
+                                    Particle* __restrict particlesA, Particle* __restrict particlesB,
+                                    const PxU32* indexStream, const PxU32 indexStreamSize,
+                                    const DynamicsParameters& params, const DynamicsParametersSIMD& simdParams)
+{
+	PX_ASSERT(forceBufB);
+	PxU32 s = 0;
+	while(s < indexStreamSize)
+	{
+		PxU32 dstIdx = indexStream[s++];
+		Particle* __restrict dstParticle = particlesA + dstIdx;
+
+		PxU32 numInteractions = indexStream[s++];
+		PxU32 blockCount = numInteractions / 4;
+
+		if(blockCount > 0)
+		{
+			Vec4V tmp = V4LoadA(&dstParticle->position.x);
+			Mat44V posDensDst(tmp, tmp, tmp, tmp);
+			Mat44V posDensDstT = M44Trnsps(posDensDst);
+
+			Mat44V forceDstT(V4Zero(), V4Zero(), V4Zero(), V4Zero());
+
+			tmp = V4LoadA(&dstParticle->velocity.x);
+			Mat44V velPressDst(tmp, tmp, tmp, tmp);
+			Mat44V velPressDstT = M44Trnsps(velPressDst);
+
+			tmp = V4Load(dstParticle->density);
+			Vec4V invDensityA = V4RecipFast(tmp);
+
+			for(PxU32 i = 0; i < blockCount; ++i)
+			{
+				PxU32 srcIdx0 = indexStream[s++];
+				PxU32 srcIdx1 = indexStream[s++];
+				PxU32 srcIdx2 = indexStream[s++];
+				PxU32 srcIdx3 = indexStream[s++];
+
+				Vec4V tmp0 = Vec4V_From_Vec3V(V3LoadU(&forceBufB[srcIdx0].x));
+				Vec4V tmp1 = Vec4V_From_Vec3V(V3LoadU(&forceBufB[srcIdx1].x));
+				Vec4V tmp2 = Vec4V_From_Vec3V(V3LoadU(&forceBufB[srcIdx2].x));
+				Vec4V tmp3 = Vec4V_From_Vec3V(V3LoadU(&forceBufB[srcIdx3].x));
+				Mat44V forceSrc(tmp0, tmp1, tmp2, tmp3);
+				Mat44V forceSrcT = M44Trnsps(forceSrc);
+
+				calcForce4_twoWay_onlyPtrs(forceDstT, forceSrcT, particlesB + srcIdx0, particlesB + srcIdx1,
+				                           particlesB + srcIdx2, particlesB + srcIdx3, posDensDstT, velPressDstT,
+				                           invDensityA, simdParams);
+
+				forceSrc = M44Trnsps(forceSrcT);
+				forceBufB[srcIdx0] = V4ReadXYZ(forceSrc.col0);
+				forceBufB[srcIdx1] = V4ReadXYZ(forceSrc.col1);
+				forceBufB[srcIdx2] = V4ReadXYZ(forceSrc.col2);
+				forceBufB[srcIdx3] = V4ReadXYZ(forceSrc.col3);
+			}
+
+			// simd to scalar
+			Mat44V forceDst = M44Trnsps(forceDstT);
+			Vec4V forceTmp1 = V4Add(forceDst.col0, forceDst.col1);
+			Vec4V forceTmp2 = V4Add(forceDst.col2, forceDst.col3);
+			forceTmp1 = V4Add(forceTmp1, forceTmp2);
+			forceBufA[dstIdx] += V4ReadXYZ(forceTmp1);
+		}
+
+		PxU32 numLeft = numInteractions - blockCount * 4;
+		for(PxU32 i = 0; i < numLeft; ++i)
+		{
+			PxU32 srcIdx = indexStream[s++];
+
+			PX_ALIGN(16, PxVec3 distVec) = particlesA[dstIdx].position - particlesB[srcIdx].position;
+			PxF32 distSqr = distVec.magnitudeSquared();
+			addForce_twoWay(forceBufA[dstIdx], forceBufB[srcIdx], particlesA[dstIdx], particlesB[srcIdx], distSqr,
+			                distVec, params);
+		}
+	}
+}
+
+#endif // !PX_IOS
+
+template <typename PassType, typename UpdateType>
+PX_FORCE_INLINE_KERNELS static void updateParticleGroupPair_small_template(
+    PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB, Particle* __restrict particlesA,
+    Particle* __restrict particlesB, const PxU32* __restrict particleIndicesA, const PxU32 numParticlesA,
+    const PxU32* __restrict particleIndicesB, const PxU32 numParticlesB, const DynamicsParameters& params)
+{
+	PxU32 num_loopB = 4 * (numParticlesB / 4);
+	PxU32 u_cellSizeSq = PxUnionCast<PxU32, PxF32>(params.cellSizeSq);
+
+	for(PxU32 pA = 0; pA < numParticlesA; pA++)
+	{
+		PxU32 idxA = particleIndicesA[pA];
+		Particle& particleA = particlesA[idxA];
+		PxVec3& forceA = forceBufA[idxA];
+
+		for(PxU32 pB = 0; pB < num_loopB; pB += 4)
+		{
+			PxU32 idxB0 = particleIndicesB[pB];
+			PxU32 idxB1 = particleIndicesB[pB + 1];
+			PxU32 idxB2 = particleIndicesB[pB + 2];
+			PxU32 idxB3 = particleIndicesB[pB + 3];
+
+			Particle& particleB0 = particlesB[idxB0];
+			Particle& particleB1 = particlesB[idxB1];
+			Particle& particleB2 = particlesB[idxB2];
+			Particle& particleB3 = particlesB[idxB3];
+
+			PxVec3& forceB0 = forceBufB[idxB0];
+			PxVec3& forceB1 = forceBufB[idxB1];
+			PxVec3& forceB2 = forceBufB[idxB2];
+			PxVec3& forceB3 = forceBufB[idxB3];
+
+			PX_ALIGN(16, PxVec3 distVec0) = particleA.position - particleB0.position;
+			PX_ALIGN(16, PxVec3 distVec1) = particleA.position - particleB1.position;
+			PX_ALIGN(16, PxVec3 distVec2) = particleA.position - particleB2.position;
+			PX_ALIGN(16, PxVec3 distVec3) = particleA.position - particleB3.position;
+
+			PxReal distSqr0 = distVec0.magnitudeSquared();
+			PxReal distSqr1 = distVec1.magnitudeSquared();
+			PxReal distSqr2 = distVec2.magnitudeSquared();
+			PxReal distSqr3 = distVec3.magnitudeSquared();
+
+			// marginally faster to do that test (not as good as in brute force)
+			PxF32 isec = physx::intrinsics::fsel(params.cellSizeSq - distSqr0, 1.0f, 0.0f);
+			isec = physx::intrinsics::fsel(params.cellSizeSq - distSqr1, 1.0f, isec);
+			isec = physx::intrinsics::fsel(params.cellSizeSq - distSqr2, 1.0f, isec);
+			isec = physx::intrinsics::fsel(params.cellSizeSq - distSqr3, 1.0f, isec);
+
+			if(isec == 0.0f)
+				continue;
+
+			PxU32 u_distSqr0 = PxUnionCast<PxU32, PxReal>(distSqr0);
+			PxU32 u_distSqr1 = PxUnionCast<PxU32, PxReal>(distSqr1);
+			PxU32 u_distSqr2 = PxUnionCast<PxU32, PxReal>(distSqr2);
+			PxU32 u_distSqr3 = PxUnionCast<PxU32, PxReal>(distSqr3);
+
+			if(u_distSqr0 < u_cellSizeSq && u_distSqr0 > 0)
+			{
+				Contribution<PassType, UpdateType>::add(forceA, forceB0, distSqr0, distVec0, particleA, particleB0,
+				                                        params);
+			}
+			if(u_distSqr1 < u_cellSizeSq && u_distSqr1 > 0)
+			{
+				Contribution<PassType, UpdateType>::add(forceA, forceB1, distSqr1, distVec1, particleA, particleB1,
+				                                        params);
+			}
+			if(u_distSqr2 < u_cellSizeSq && u_distSqr2 > 0)
+			{
+				Contribution<PassType, UpdateType>::add(forceA, forceB2, distSqr2, distVec2, particleA, particleB2,
+				                                        params);
+			}
+			if(u_distSqr3 < u_cellSizeSq && u_distSqr3 > 0)
+			{
+				Contribution<PassType, UpdateType>::add(forceA, forceB3, distSqr3, distVec3, particleA, particleB3,
+				                                        params);
+			}
+		}
+
+		for(PxU32 pB = num_loopB; pB < numParticlesB; pB++)
+		{
+			PxU32 idxB = particleIndicesB[pB];
+			Particle& particleB = particlesB[idxB];
+			PxVec3& forceB = forceBufB[idxB];
+
+			PX_ALIGN(16, PxVec3 distVec) = particleA.position - particleB.position;
+
+			PxReal distSqr = distVec.magnitudeSquared();
+			PxU32 u_distSqr = PxUnionCast<PxU32, PxReal>(distSqr);
+
+			if(u_distSqr < u_cellSizeSq && u_distSqr > 0)
+			{
+				Contribution<PassType, UpdateType>::add(forceA, forceB, distSqr, distVec, particleA, particleB, params);
+			}
+		}
+	}
+}
+
+#if !PX_IOS
+/**
+particlesA, particlesB, particleIndicesA, particleIndicesB are guaranteed to be non-overlapping
+*/
+static void updateParticleGroupPair_simd_template(PxVec3* forceBufA, PxVec3* forceBufB, Particle* particlesA,
+                                                  Particle* particlesB, const PxU32* particleIndicesA,
+                                                  const PxU32 numParticlesA, const PxU32* particleIndicesB,
+                                                  const PxU32 numParticlesB, const DynamicsParameters& params,
+                                                  const bool isDensityMode, const bool twoWayUpdate,
+                                                  Vec4V* tempSimdPositionBuffer, PxU32* tempIndexStream)
+{
+	PxU32 numParticles4B = ((numParticlesB + 3) & ~0x3) + 4; // ceil up to multiple of four + 4 for save unrolling
+
+	PX_ALIGN(16, Particle fakeParticle);
+	fakeParticle.position = PxVec3(FLT_MAX, FLT_MAX, FLT_MAX);
+	fakeParticle.density = FLT_MAX; // avoid uninitialized access by V4LoadA
+
+	const PxU32* __restrict idxB = particleIndicesB;
+	const PxU32* __restrict idxBEnd = particleIndicesB + numParticlesB;
+	for(PxU32 q = 0, v = 0; q < numParticles4B; q += 4, idxB += 4, v += 3)
+	{
+		const Particle* prtB0 = (q < numParticlesB) ? particlesB + *(idxB) : &fakeParticle;
+		const Particle* prtB1 = (q + 1 < numParticlesB) ? particlesB + *(idxB + 1) : &fakeParticle;
+		const Particle* prtB2 = (q + 2 < numParticlesB) ? particlesB + *(idxB + 2) : &fakeParticle;
+		const Particle* prtB3 = (q + 3 < numParticlesB) ? particlesB + *(idxB + 3) : &fakeParticle;
+
+		Mat44V posDensB_N(V4LoadA(&prtB0->position.x), V4LoadA(&prtB1->position.x), V4LoadA(&prtB2->position.x),
+		                  V4LoadA(&prtB3->position.x));
+		Mat44V posDensTB_N = M44Trnsps(posDensB_N);
+
+		tempSimdPositionBuffer[v] = posDensTB_N.col0;
+		tempSimdPositionBuffer[v + 1] = posDensTB_N.col1;
+		tempSimdPositionBuffer[v + 2] = posDensTB_N.col2;
+	}
+
+	DynamicsParametersSIMD simdParams;
+	simdParams.scaleToStd = V4Load(params.scaleToStd);
+	simdParams.scaleSqToStd = V4Load(params.scaleSqToStd);
+	simdParams.radiusStd = V4Load(params.radiusStd);
+	simdParams.radiusSqStd = V4Load(params.radiusSqStd);
+	simdParams.densityMultiplierStd = V4Load(params.densityMultiplierStd);
+	simdParams.stiffMulPressureMultiplierStd = V4Load(params.stiffMulPressureMultiplierStd);
+	simdParams.viscosityMultiplierStd = V4Load(params.viscosityMultiplierStd);
+	simdParams.initialDensity = V4Load(params.initialDensity);
+	Vec4V simdCellSizeSq = V4Load(params.cellSizeSq);
+	VecU32V simdIntOne = U4LoadXYZW(1, 1, 1, 1);
+	VecU32V simdIntZero = U4LoadXYZW(0, 0, 0, 0);
+
+	PxU32 indexStreamSize = 0;
+	const PxU32* __restrict idxA = particleIndicesA;
+	for(PxU32 p = 0; p < numParticlesA; p++, idxA++)
+	{
+		Particle* __restrict prtA = particlesA + *idxA;
+
+		PX_ASSERT(MAX_INDEX_STREAM_SIZE - indexStreamSize >= 2);
+		tempIndexStream[indexStreamSize++] = *idxA;
+
+		PxU32* interactionCountPtr = tempIndexStream + indexStreamSize++;
+		PxU32 indexStreamSizeOld = indexStreamSize;
+
+		PX_ALIGN(16, PxU32 isecs[8]);
+		idxB = particleIndicesB;
+
+		Vec4V tmp = V4LoadA(&prtA->position.x);
+		Mat44V posDensA(tmp, tmp, tmp, tmp);
+		Mat44V posDensTA = M44Trnsps(posDensA);
+
+		const Vec4V* prtB = tempSimdPositionBuffer;
+		Vec4V posT0B = *prtB++;
+		Vec4V posT1B = *prtB++;
+		Vec4V posT2B = *prtB++;
+		Vec4V distVec_x = V4Sub(posDensTA.col0, posT0B);
+		Vec4V distVec_y = V4Sub(posDensTA.col1, posT1B);
+		Vec4V distVec_z = V4Sub(posDensTA.col2, posT2B);
+		Vec4V distSqr_x = V4Mul(distVec_x, distVec_x);
+		Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x);
+		Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy);
+		BoolV isec_b = V4IsGrtr(simdCellSizeSq, distSqr);
+		isec_b = BAnd(isec_b, V4IsGrtr(distSqr, V4Zero()));
+		VecU32V isec = V4U32Sel(isec_b, simdIntOne, simdIntZero);
+
+		U4StoreA(isec, isecs);
+
+		for(PxU32 q = 0; q < numParticlesB; q += 4, idxB += 4)
+		{
+			Vec4V posT0B_N = *prtB++;
+			Vec4V posT1B_N = *prtB++;
+			Vec4V posT2B_N = *prtB++;
+			Vec4V distVec_x_N = V4Sub(posDensTA.col0, posT0B_N);
+			Vec4V distVec_y_N = V4Sub(posDensTA.col1, posT1B_N);
+			Vec4V distVec_z_N = V4Sub(posDensTA.col2, posT2B_N);
+			Vec4V distSqr_x_N = V4Mul(distVec_x_N, distVec_x_N);
+			Vec4V distSqr_xy_N = V4MulAdd(distVec_y_N, distVec_y_N, distSqr_x_N);
+			Vec4V distSqr_N = V4MulAdd(distVec_z_N, distVec_z_N, distSqr_xy_N);
+			BoolV isec_b_N = V4IsGrtr(simdCellSizeSq, distSqr_N);
+			isec_b_N = BAnd(isec_b_N, V4IsGrtr(distSqr_N, V4Zero()));
+			VecU32V isec_N = V4U32Sel(isec_b_N, simdIntOne, simdIntZero);
+
+			PxU32 base_write_index = (q + 4) & 7;
+			U4StoreA(isec_N, isecs + base_write_index);
+
+			PxU32 base_read_index = q & 7;
+			PxU32 u_isec0 = isecs[base_read_index];
+			PxU32 u_isec1 = isecs[base_read_index + 1];
+			PxU32 u_isec2 = isecs[base_read_index + 2];
+			PxU32 u_isec3 = isecs[base_read_index + 3];
+
+			PX_ASSERT(MAX_INDEX_STREAM_SIZE - indexStreamSize >= 4);
+
+			PX_ASSERT(indexStreamSize < MAX_INDEX_STREAM_SIZE);
+			PX_ASSERT(idxB < idxBEnd);
+			tempIndexStream[indexStreamSize] = *(idxB);
+			indexStreamSize += u_isec0;
+
+			PX_ASSERT(indexStreamSize < MAX_INDEX_STREAM_SIZE);
+			tempIndexStream[indexStreamSize] = ((idxB + 1) < idxBEnd) ? *(idxB + 1) : 0;
+			indexStreamSize += u_isec1;
+
+			PX_ASSERT(indexStreamSize < MAX_INDEX_STREAM_SIZE);
+			tempIndexStream[indexStreamSize] = ((idxB + 2) < idxBEnd) ? *(idxB + 2) : 0;
+			indexStreamSize += u_isec2;
+
+			PX_ASSERT(indexStreamSize < MAX_INDEX_STREAM_SIZE);
+			tempIndexStream[indexStreamSize] = ((idxB + 3) < idxBEnd) ? *(idxB + 3) : 0;
+			indexStreamSize += u_isec3;
+
+			// flush interactions
+			if(MAX_INDEX_STREAM_SIZE - indexStreamSize >= (4 + 2))
+				;
+			else // 4+2, since we potentially need to add the dst index + the src count as well.
+			{
+				*interactionCountPtr = indexStreamSize - indexStreamSizeOld;
+				if(isDensityMode)
+				{
+					if(twoWayUpdate)
+						updateStreamDensityTwoWay(particlesA, particlesB, tempIndexStream, indexStreamSize, params,
+						                          simdParams);
+					else
+						updateStreamDensity(particlesA, particlesB, tempIndexStream, indexStreamSize, params, simdParams);
+				}
+				else
+				{
+					if(twoWayUpdate)
+						updateStreamForceTwoWay(forceBufA, forceBufB, particlesA, particlesB, tempIndexStream,
+						                        indexStreamSize, params, simdParams);
+					else
+						updateStreamForce(forceBufA, particlesA, particlesB, tempIndexStream, indexStreamSize, params,
+						                  simdParams);
+				}
+
+				indexStreamSize = 0;
+				tempIndexStream[indexStreamSize++] = *idxA;
+				interactionCountPtr = tempIndexStream + indexStreamSize++;
+				indexStreamSizeOld = indexStreamSize;
+			}
+		}
+
+		*interactionCountPtr = indexStreamSize - indexStreamSizeOld;
+	}
+
+	if(indexStreamSize > 0)
+	{
+		if(isDensityMode)
+		{
+			if(twoWayUpdate)
+				updateStreamDensityTwoWay(particlesA, particlesB, tempIndexStream, indexStreamSize, params, simdParams);
+			else
+				updateStreamDensity(particlesA, particlesB, tempIndexStream, indexStreamSize, params, simdParams);
+		}
+		else
+		{
+			if(twoWayUpdate)
+				updateStreamForceTwoWay(forceBufA, forceBufB, particlesA, particlesB, tempIndexStream, indexStreamSize,
+				                        params, simdParams);
+			else
+				updateStreamForce(forceBufA, particlesA, particlesB, tempIndexStream, indexStreamSize, params,
+				                  simdParams);
+		}
+	}
+}
+
+#endif // !PX_IOS
+
+#define SIMD_THRESH_SRC 8
+
+/**
+Computes and adds contributions of particle group B to particle group A. If twoWayUpdate is true,
+group B is updated with contributions from group A as well.
+*/
+PX_FORCE_INLINE_KERNELS static void
+updateParticleGroupPair(PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB, Particle* __restrict particlesA,
+                        Particle* __restrict particlesB, const PxU32* __restrict particleIndicesA,
+                        const PxU32 numParticlesA, const PxU32* __restrict particleIndicesB, const PxU32 numParticlesB,
+                        const bool twoWayUpdate, const bool isDensityMode, const DynamicsParameters& params,
+                        PxU8* tempSimdPositionBuffer, PxU32* tempIndexStream)
+{
+	PX_ASSERT(numParticlesA > 0);
+	PX_ASSERT(numParticlesB > 0);
+
+#if !PX_IOS
+	if(numParticlesB < SIMD_THRESH_SRC)
+#endif
+	{
+		if(isDensityMode)
+		{
+			if(twoWayUpdate)
+			{
+				PX_ASSERT(forceBufB);
+				updateParticleGroupPair_small_template<DensityPassType, TwoWayUpdateType>(
+				    forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, numParticlesA, particleIndicesB,
+				    numParticlesB, params);
+			}
+			else
+			{
+				updateParticleGroupPair_small_template<DensityPassType, OneWayUpdateType>(
+				    forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, numParticlesA, particleIndicesB,
+				    numParticlesB, params);
+			}
+		}
+		else
+		{
+			if(twoWayUpdate)
+			{
+				PX_ASSERT(forceBufB);
+				updateParticleGroupPair_small_template<ForcePassType, TwoWayUpdateType>(
+				    forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, numParticlesA, particleIndicesB,
+				    numParticlesB, params);
+			}
+			else
+			{
+				updateParticleGroupPair_small_template<ForcePassType, OneWayUpdateType>(
+				    forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, numParticlesA, particleIndicesB,
+				    numParticlesB, params);
+			}
+		}
+	}
+#if !PX_IOS
+	else
+	{
+		updateParticleGroupPair_simd_template(forceBufA, forceBufB, particlesA, particlesB, particleIndicesA,
+		                                      numParticlesA, particleIndicesB, numParticlesB, params, isDensityMode,
+		                                      twoWayUpdate, reinterpret_cast<Vec4V*>(tempSimdPositionBuffer),
+		                                      tempIndexStream);
+	}
+#else
+	PX_UNUSED(tempSimdPositionBuffer);
+	PX_UNUSED(tempIndexStream);
+#endif
+}
+
+#endif // REFERENCE_KERNELS
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_DYNAMICS_KERNELS_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsParameters.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsParameters.h
new file mode 100644
index 00000000..42e3aa79
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsParameters.h
@@ -0,0 +1,83 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+#ifndef PT_DYNAMICS_PARAMETER_H
+#define PT_DYNAMICS_PARAMETER_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+namespace physx
+{
+
+namespace Pt
+{
+
+struct SphUpdateType
+{
+	enum Enum
+	{
+		DENSITY,
+		FORCE,
+		EXIT
+	};
+};
+
+struct DynamicsParameters
+{
+	PxReal selfDensity;
+	PxReal particleMassStd;
+	PxReal cellSize;
+	PxReal cellSizeInv;
+
+	PxReal cellSizeSq;
+	PxReal packetSize;
+	PxReal radiusStd;
+	PxReal radiusSqStd;
+
+	PxReal densityMultiplierStd;
+	PxReal stiffMulPressureMultiplierStd;
+	PxReal viscosityMultiplierStd;
+	PxReal initialDensity;
+
+	PxReal scaleToStd;
+	PxReal scaleSqToStd;
+	PxReal scaleToWorld;
+	PxReal densityNormalizationFactor;
+
+	PxU32 packetMultLog;
+	PxU32 pad[3];
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(DynamicsParameters) % 16 == 0);
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_DYNAMICS_PARAMETER_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsTempBuffers.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsTempBuffers.h
new file mode 100644
index 00000000..bafe3679
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsTempBuffers.h
@@ -0,0 +1,62 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+#ifndef PT_DYNAMICS_TEMP_BUFFERS_H
+#define PT_DYNAMICS_TEMP_BUFFERS_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtParticleCell.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+struct DynamicsTempBuffers
+{
+	PxU32* indicesSubpacketA;
+	PxU32* indicesSubpacketB;
+	PxU32* mergedIndices;
+	Particle* mergedHaloRegions;
+	ParticleCell* cellHashTableSubpacketA;
+	ParticleCell* cellHashTableSubpacketB;
+	PxU32 cellHashMaxSize;
+	PxU8* simdPositionsSubpacket;
+	PxU32* indexStream;
+	const PxU32* orderedIndicesSubpacket;
+	PxU16* hashKeys;
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_DYNAMICS_TEMP_BUFFERS_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtHeightFieldAabbTest.h b/PhysX_3.4/Source/LowLevelParticles/src/PtHeightFieldAabbTest.h
new file mode 100644
index 00000000..d750a363
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtHeightFieldAabbTest.h
@@ -0,0 +1,310 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_HEIGHT_FIELD_AABB_TEST_H
+#define PT_HEIGHT_FIELD_AABB_TEST_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+//----------------------------------------------------------------------------//
+
+#include "GuHeightField.h"
+#include "GuHeightFieldData.h"
+#include "GuHeightFieldUtil.h"
+#include "PsUtilities.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+//----------------------------------------------------------------------------//
+
+/**
+Can be used for querying an AABB against a heightfield, without copying triangles to a temporary buffer.
+An iterator can be created to walk the triangles which intersect the AABB and have not a hole material assigned.
+This isn't really optimized yet.
+*/
+class HeightFieldAabbTest
+{
+  public:
+	HeightFieldAabbTest(const PxBounds3& localBounds, const Gu::HeightFieldUtil& hfUtil)
+	: mHfUtil(hfUtil), mIsEmpty(false)
+	{
+		const PxHeightFieldGeometry& hfGeom = mHfUtil.getHeightFieldGeometry();
+
+		PxVec3 minimum = localBounds.minimum;
+		PxVec3 maximum = localBounds.maximum;
+		minimum = hfUtil.shape2hfp(minimum);
+		maximum = hfUtil.shape2hfp(maximum);
+
+		// if (heightField.getRowScale() < 0)
+		if(hfGeom.rowScale < 0)
+			Ps::swap(minimum.x, maximum.x);
+
+		// if (heightField.getColumnScale() < 0)
+		if(hfGeom.columnScale < 0)
+			Ps::swap(minimum.z, maximum.z);
+
+		// early exit for aabb does not overlap in XZ plane
+		// DO NOT MOVE: since rowScale / columnScale may be negative this has to be done after scaling the bounds
+		// if ((minimum.x > (heightField.getNbRowsFast()-1)) ||
+		if((minimum.x > (mHfUtil.getHeightField().getNbRowsFast() - 1)) ||
+		   //(minimum.z > (heightField.getNbColumnsFast()-1)) ||
+		   (minimum.z > (mHfUtil.getHeightField().getNbColumnsFast() - 1)) || (maximum.x < 0) || (maximum.z < 0))
+		{
+			mIsEmpty = true;
+			return;
+		}
+
+		mMinRow = mHfUtil.getHeightField().getMinRow(minimum.x);
+		mMaxRow = mHfUtil.getHeightField().getMaxRow(maximum.x);
+		mMinColumn = mHfUtil.getHeightField().getMinColumn(minimum.z);
+		mMaxColumn = mHfUtil.getHeightField().getMaxColumn(maximum.z);
+
+		if(mMinRow == mMaxRow || mMinColumn == mMaxColumn)
+		{
+			mIsEmpty = true;
+			return;
+		}
+
+		mMiny = minimum.y;
+		mMaxy = maximum.y;
+
+		// Check if thickness / vertical extent is negative or positive. Set the triangle vertex indices
+		// such that the collision triangles of the heightfield have the correct orientation, i.e., the correct normal
+		// -
+		// If the row and column scale have different signs, the orientation of the collision triangle vertices
+		// need to be swapped
+		mSwapVertIdx12 = ((mHfUtil.getHeightField().getThicknessFast() > 0.0f) !=
+		                  Ps::differentSign(hfGeom.rowScale, hfGeom.columnScale));
+	}
+
+	//----------------------------------------------------------------------------//
+
+	class Iterator
+	{
+
+	  public:
+		bool operator!=(const Iterator& it) const
+		{
+			return (it.mTri != mTri) || (it.mOffset != mOffset);
+		}
+
+		//----------------------------------------------------------------------------//
+
+		Iterator& operator++()
+		{
+			bool isec = (mTri == 1) || mTest.intersectsSegment(mOffset);
+			PX_ASSERT(!(mTri == 1) || mTest.intersectsSegment(mOffset));
+
+			PxU32 endOffset = mTest.getMaxOffset();
+			while(mOffset < endOffset)
+			{
+				PX_ASSERT(mColumn < mTest.mMaxColumn);
+				PX_ASSERT(mRow < mTest.mMaxRow);
+				PX_ASSERT(mColumn >= mTest.mMinColumn);
+				PX_ASSERT(mRow >= mTest.mMinRow);
+
+				if(mTri == 0 && isec)
+				{
+					mTri++;
+					if(mTest.isHole(mTri, mOffset))
+						continue;
+
+					return *this;
+				}
+
+				mTri = 0;
+				mColumn++;
+				mOffset++;
+
+				if(mColumn == mTest.mMaxColumn)
+				{
+					mRow++;
+					mOffset +=
+					    (mTest.mHfUtil.getHeightField().getNbColumnsFast() - (mTest.mMaxColumn - mTest.mMinColumn));
+
+					if(mRow == mTest.mMaxRow)
+					{
+						mOffset += (mTest.mMaxColumn - mTest.mMinColumn);
+						continue;
+					}
+					mColumn = mTest.mMinColumn;
+				}
+
+				isec = mTest.intersectsSegment(mOffset);
+				if(!isec || mTest.isHole(mTri, mOffset))
+					continue;
+
+				return *this;
+			}
+			PX_ASSERT(mOffset == endOffset);
+			return *this;
+		}
+
+		//----------------------------------------------------------------------------//
+
+		PX_INLINE void getTriangleVertices(PxVec3* triangle) const
+		{
+			mTest.getTriangleVertices(triangle, *this);
+		}
+
+		//----------------------------------------------------------------------------//
+
+	  private:
+		Iterator& operator=(const Iterator&);
+
+		Iterator(PxU32 row, PxU32 column, const HeightFieldAabbTest& test) : mRow(row), mColumn(column), mTest(test)
+		{
+			mTri = 0;
+			mOffset = mRow * mTest.mHfUtil.getHeightField().getNbColumnsFast() + mColumn;
+		}
+
+		//----------------------------------------------------------------------------//
+
+		bool isValid()
+		{
+			return !mTest.isHole(mTri, mOffset) && mTest.intersectsSegment(mOffset);
+		}
+
+		//----------------------------------------------------------------------------//
+
+		PxU32 mRow;
+		PxU32 mColumn;
+		PxU32 mTri;
+		PxU32 mOffset;
+		const HeightFieldAabbTest& mTest;
+
+		friend class HeightFieldAabbTest;
+	};
+
+	//----------------------------------------------------------------------------//
+
+	Iterator end() const
+	{
+		if(mIsEmpty)
+			return Iterator(0, 0, *this);
+
+		return Iterator(mMaxRow, mMaxColumn, *this);
+	}
+
+	//----------------------------------------------------------------------------//
+
+	Iterator begin() const
+	{
+		if(mIsEmpty)
+			return Iterator(0, 0, *this);
+
+		Iterator itBegin(mMinRow, mMinColumn, *this);
+		if(itBegin != end() && !itBegin.isValid())
+			++itBegin;
+
+		return itBegin;
+	}
+
+  private:
+	HeightFieldAabbTest& operator=(const HeightFieldAabbTest&);
+
+	PxU32 getMinOffset() const
+	{
+		return mMinRow * mHfUtil.getHeightField().getNbColumnsFast() + mMinColumn;
+	}
+
+	//----------------------------------------------------------------------------//
+
+	PxU32 getMaxOffset() const
+	{
+		return mMaxRow * mHfUtil.getHeightField().getNbColumnsFast() + mMaxColumn;
+	}
+
+	//----------------------------------------------------------------------------//
+
+	bool isHole(PxU32 triangleIndex, PxU32 offset) const
+	{
+		return mHfUtil.getHeightField().getTriangleMaterial((offset << 1) + triangleIndex) ==
+		       PxHeightFieldMaterial::eHOLE;
+	}
+
+	//----------------------------------------------------------------------------//
+
+	bool intersectsSegment(PxU32 offset) const
+	{
+		// should we cache this?
+		PxReal h0 = mHfUtil.getHeightField().getHeight(offset);
+		PxReal h1 = mHfUtil.getHeightField().getHeight(offset + 1);
+		PxReal h2 = mHfUtil.getHeightField().getHeight(offset + mHfUtil.getHeightField().getNbColumnsFast());
+		PxReal h3 = mHfUtil.getHeightField().getHeight(offset + mHfUtil.getHeightField().getNbColumnsFast() + 1);
+
+		// Optimization: Could store the two left height field cell vertices and thus avoid some comparisons here
+		//               (if the bounds covers more than one height field cell)
+		return (!((mMaxy < h0 && mMaxy < h1 && mMaxy < h2 && mMaxy < h3) ||
+		          (mMiny > h0 && mMiny > h1 && mMiny > h2 && mMiny > h3)));
+	}
+
+	//----------------------------------------------------------------------------//
+
+	void getTriangleVertices(PxVec3* triangleVertices, const Iterator& iterator) const
+	{
+		PX_ASSERT(iterator.mOffset != getMaxOffset());
+		PX_ASSERT(!isHole(iterator.mTri, iterator.mOffset));
+
+		PxU32 triangleIndex = (iterator.mOffset << 1) + iterator.mTri;
+		PxU32 vertIdx1 = PxU32(mSwapVertIdx12 ? 2 : 1);
+		PxU32 vertIdx2 = PxU32(mSwapVertIdx12 ? 1 : 2);
+
+		mHfUtil.getHeightField().getTriangleVertices(triangleIndex, iterator.mRow, iterator.mColumn, triangleVertices[0],
+		                                             triangleVertices[vertIdx1], triangleVertices[vertIdx2]);
+
+		triangleVertices[0] = mHfUtil.hf2shapep(triangleVertices[0]);
+		triangleVertices[1] = mHfUtil.hf2shapep(triangleVertices[1]);
+		triangleVertices[2] = mHfUtil.hf2shapep(triangleVertices[2]);
+	}
+
+	//----------------------------------------------------------------------------//
+
+	const Gu::HeightFieldUtil& mHfUtil;
+	bool mIsEmpty;
+
+	PxU32 mMinRow;
+	PxU32 mMaxRow;
+	PxU32 mMinColumn;
+	PxU32 mMaxColumn;
+	PxReal mMiny;
+	PxReal mMaxy;
+	bool mSwapVertIdx12;
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_HEIGHT_FIELD_AABB_TEST_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtPacketSections.h b/PhysX_3.4/Source/LowLevelParticles/src/PtPacketSections.h
new file mode 100644
index 00000000..f3f24b82
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtPacketSections.h
@@ -0,0 +1,55 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+#ifndef PT_PACKETSECTIONS_H
+#define PT_PACKETSECTIONS_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+namespace physx
+{
+
+namespace Pt
+{
+
+// Structure describing boundary section (plus inner section) of a fluid packet.
+// This will be used for halo optimization, i.e., to reduce the number of particles
+// that have to be tested in neighboring packets.
+#define PT_PACKET_SECTIONS 27
+struct PacketSections
+{
+	PxU32 numParticles[PT_PACKET_SECTIONS];  //! Number of particles in each packet section
+	PxU32 firstParticle[PT_PACKET_SECTIONS]; //! Start index of the associated particle interval for each packet section
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_PACKETSECTIONS_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleCell.h b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleCell.h
new file mode 100644
index 00000000..f810daae
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleCell.h
@@ -0,0 +1,55 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+#ifndef PT_PARTILCECELL_H
+#define PT_PARTILCECELL_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtGridCellVector.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+// Structure describing a particle cell hash entry.
+struct ParticleCell
+{
+	GridCellVector coords; //! The packet coordinates
+	PxU32 numParticles;    //! Number of particles in the packet
+	PxU32 firstParticle;   //! Start index of the associated particle interval
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_PARTILCECELL_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleData.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleData.cpp
new file mode 100644
index 00000000..408a1be9
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleData.cpp
@@ -0,0 +1,505 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtParticleData.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxIO.h"
+#include "particles/PxParticleCreationData.h"
+#include "PxMetaData.h"
+#include "PsAlignedMalloc.h"
+#include "CmUtils.h"
+#include "PtParticle.h"
+
+using namespace physx;
+using namespace Cm;
+using namespace Pt;
+//----------------------------------------------------------------------------//
+
+ParticleData::ParticleData(PxU32 maxParticles, bool perParticleRestOffset)
+{
+	mOwnMemory = true;
+	mMaxParticles = maxParticles;
+	mHasRestOffsets = perParticleRestOffset;
+	mValidParticleCount = 0;
+	mValidParticleRange = 0;
+	mWorldBounds = PxBounds3::empty();
+
+	fixupPointers();
+	mParticleMap.resizeAndClear(mMaxParticles);
+
+#if PX_CHECKED
+	{
+		PxU32 numWords = mMaxParticles * sizeof(Particle) >> 2;
+		for(PxU32 i = 0; i < numWords; ++i)
+			reinterpret_cast<PxU32*>(mParticleBuffer)[i] = 0xDEADBEEF;
+	}
+#endif
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleData::ParticleData(ParticleSystemStateDataDesc& particles, const PxBounds3& bounds)
+{
+	mOwnMemory = true;
+	mMaxParticles = particles.maxParticles;
+	mHasRestOffsets = (particles.restOffsets.ptr() != NULL);
+	mValidParticleCount = particles.numParticles;
+	mValidParticleRange = particles.validParticleRange;
+	mWorldBounds = bounds;
+
+	fixupPointers();
+	if(particles.bitMap)
+		mParticleMap.copy(*particles.bitMap);
+	else
+		mParticleMap.resizeAndClear(mMaxParticles);
+
+	if(mValidParticleRange > 0)
+	{
+		for(PxU32 i = 0; i < mValidParticleRange; ++i)
+			mParticleBuffer[i].flags.api = PxParticleFlags(0);
+
+		for(PxU32 w = 0; w <= (mValidParticleRange - 1) >> 5; w++)
+			for(PxU32 b = mParticleMap.getWords()[w]; b; b &= b - 1)
+			{
+				PxU32 index = (w << 5 | Ps::lowestSetBit(b));
+				Particle& dstParticle = mParticleBuffer[index];
+				dstParticle.position = particles.positions[index];
+				dstParticle.velocity = particles.velocities[index];
+				dstParticle.density = 0.0f;
+				dstParticle.flags.low = 0;
+				dstParticle.flags.api = PxParticleFlag::eVALID;
+			}
+
+		if(mHasRestOffsets)
+		{
+			PX_ASSERT(mRestOffsetBuffer);
+			for(PxU32 w = 0; w <= (mValidParticleRange - 1) >> 5; w++)
+				for(PxU32 b = mParticleMap.getWords()[w]; b; b &= b - 1)
+				{
+					PxU32 index = (w << 5 | Ps::lowestSetBit(b));
+					mRestOffsetBuffer[index] = particles.restOffsets[index];
+				}
+		}
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleData::ParticleData(PxU8* address)
+{
+	PX_ASSERT(address == reinterpret_cast<PxU8*>(this));
+	PX_UNUSED(address);
+	mOwnMemory = false;
+	fixupPointers();
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleData::~ParticleData()
+{
+	Ps::AlignedAllocator<16> align16;
+
+	if(mParticleBuffer)
+		align16.deallocate(mParticleBuffer);
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::fixupPointers()
+{
+	PX_ASSERT(size_t(this) % 16 == 0);
+	PxU8* address = reinterpret_cast<PxU8*>(this);
+
+	address += getHeaderSize();
+	PxU32 bitmapSize = getBitmapSize(mMaxParticles);
+	mParticleMap.importData(bitmapSize / 4, reinterpret_cast<PxU32*>(address));
+	address += (bitmapSize + 15) & ~15;
+	mParticleBuffer = reinterpret_cast<Particle*>(address);
+	address += getParticleBufferSize(mMaxParticles);
+	mRestOffsetBuffer = mHasRestOffsets ? reinterpret_cast<PxF32*>(address) : NULL;
+	address += getRestOffsetBufferSize(mMaxParticles, mHasRestOffsets);
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::exportData(PxSerializationContext& stream)
+{
+	clearSimState();
+	stream.alignData(16);
+	stream.writeData(this, ParticleData::getTotalSize(mMaxParticles, mHasRestOffsets));
+}
+
+void ParticleData::getBinaryMetaData(PxOutputStream& stream)
+{
+	// define ParticleFlags
+	PX_DEF_BIN_METADATA_CLASS(stream, Pt::ParticleFlags)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleFlags, PxU16, api, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleFlags, PxU16, low, 0)
+
+	// define Particle
+	PX_DEF_BIN_METADATA_CLASS(stream, Pt::Particle)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::Particle, PxVec3, position, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::Particle, PxReal, density, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::Particle, PxVec3, velocity, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::Particle, Pt::ParticleFlags, flags, 0)
+
+	// define ParticleData
+	PX_DEF_BIN_METADATA_VCLASS(stream, Pt::ParticleData)
+
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, bool, mOwnMemory, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxU32, mMaxParticles, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, bool, mHasRestOffsets, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxU32, mValidParticleRange, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxU32, mValidParticleCount, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxBounds3, mWorldBounds, 0)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, Pt::Particle, mParticleBuffer, PxMetaDataFlag::ePTR)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxReal, mRestOffsetBuffer, PxMetaDataFlag::ePTR)
+	PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, BitMap, mParticleMap, 0)
+
+	// extra data
+	PX_DEF_BIN_METADATA_EXTRA_ARRAY(stream, Pt::ParticleData, Pt::Particle, mMaxParticles, 16, 0)
+	PX_DEF_BIN_METADATA_EXTRA_ITEMS(stream, Pt::ParticleData, PxReal, mHasRestOffsets, mMaxParticles, 0, 16)
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::clearSimState()
+{
+	if(mValidParticleRange > 0)
+	{
+		for(PxU32 w = 0; w <= (mValidParticleRange - 1) >> 5; w++)
+			for(PxU32 b = mParticleMap.getWords()[w]; b; b &= b - 1)
+			{
+				PxU32 index = (w << 5 | Ps::lowestSetBit(b));
+				Particle& dstParticle = mParticleBuffer[index];
+				dstParticle.flags.low = 0;
+				dstParticle.density = 0.0f;
+			}
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::onOriginShift(const PxVec3& shift)
+{
+	if(mValidParticleRange > 0)
+	{
+		for(PxU32 w = 0; w <= (mValidParticleRange - 1) >> 5; w++)
+			for(PxU32 b = mParticleMap.getWords()[w]; b; b &= b - 1)
+			{
+				PxU32 index = (w << 5 | Ps::lowestSetBit(b));
+				Particle& particle = mParticleBuffer[index];
+				particle.position -= shift;
+			}
+	}
+
+	mWorldBounds.minimum -= shift;
+	mWorldBounds.maximum -= shift;
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleData* ParticleData::create(ParticleSystemStateDataDesc& particles, const PxBounds3& bounds)
+{
+	Ps::AlignedAllocator<16, Ps::ReflectionAllocator<ParticleData> > align16;
+	PxU32 totalSize = getTotalSize(particles.maxParticles, particles.restOffsets.ptr() != NULL);
+	ParticleData* mem = reinterpret_cast<ParticleData*>(align16.allocate(totalSize, __FILE__, __LINE__));
+	markSerializedMem(mem, totalSize);
+	PX_PLACEMENT_NEW(mem, ParticleData)(particles, bounds);
+	return mem;
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleData* ParticleData::create(PxU32 maxParticles, bool perParticleRestOffsets)
+{
+	Ps::AlignedAllocator<16, Ps::ReflectionAllocator<ParticleData> > align16;
+	PxU32 totalSize = getTotalSize(maxParticles, perParticleRestOffsets);
+	ParticleData* mem = reinterpret_cast<ParticleData*>(align16.allocate(totalSize, __FILE__, __LINE__));
+	markSerializedMem(mem, totalSize);
+	PX_PLACEMENT_NEW(mem, ParticleData)(maxParticles, perParticleRestOffsets);
+	return mem;
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleData* ParticleData::create(PxDeserializationContext& context)
+{
+	ParticleData* mem = context.readExtraData<ParticleData, PX_SERIAL_ALIGN>();
+	new (mem) ParticleData(reinterpret_cast<PxU8*>(mem));
+	context.readExtraData<PxU8>(getDataSize(mem->getMaxParticles(), mem->getRestOffsetBuffer() != NULL));
+	return mem;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::release()
+{
+	if(!mOwnMemory)
+		return;
+
+	Ps::AlignedAllocator<16> align16;
+	align16.deallocate(this);
+}
+
+//----------------------------------------------------------------------------//
+
+bool ParticleData::addParticlesV(const PxParticleCreationData& creationData)
+{
+	PX_ASSERT(creationData.numParticles <= mMaxParticles);
+	PX_ASSERT(creationData.indexBuffer.ptr() && creationData.positionBuffer.ptr());
+	PX_ASSERT((mRestOffsetBuffer != NULL) == (creationData.restOffsetBuffer.ptr() != NULL));
+
+	const PxVec3 zeroVector(0.0f);
+
+	PxStrideIterator<const PxU32> indexIt = creationData.indexBuffer;
+	PxStrideIterator<const PxVec3> positionIt = creationData.positionBuffer;
+	PxStrideIterator<const PxVec3> velocityIt =
+	    creationData.velocityBuffer.ptr() ? creationData.velocityBuffer : PxStrideIterator<const PxVec3>(&zeroVector, 0);
+
+	for(PxU32 i = 0; i < creationData.numParticles; i++)
+	{
+		const PxU32 particleIndex = *indexIt;
+		PX_ASSERT(particleIndex <= mMaxParticles);
+
+		Particle& particle = mParticleBuffer[particleIndex];
+		PX_ASSERT(!mParticleMap.test(particleIndex));
+		mParticleMap.set(particleIndex);
+
+		if(particleIndex + 1 > mValidParticleRange)
+		{
+			mValidParticleRange = particleIndex + 1;
+		}
+		else
+		{
+			PX_ASSERT(!(particle.flags.api & PxParticleFlag::eVALID));
+		}
+
+		particle.position = *positionIt;
+		particle.velocity = *velocityIt;
+		particle.flags.low = 0;
+		particle.flags.api = PxParticleFlag::eVALID;
+		particle.density = 0.0f;
+
+		mWorldBounds.include(particle.position);
+
+		positionIt++;
+		velocityIt++;
+		indexIt++;
+	}
+
+	if(mRestOffsetBuffer)
+	{
+		PxStrideIterator<const PxF32> restOffsetIt = creationData.restOffsetBuffer;
+		indexIt = creationData.indexBuffer;
+
+		for(PxU32 i = 0; i < creationData.numParticles; i++)
+		{
+			const PxU32 particleIndex = *indexIt;
+			mRestOffsetBuffer[particleIndex] = *restOffsetIt;
+			restOffsetIt++;
+			indexIt++;
+		}
+	}
+
+	mValidParticleCount += creationData.numParticles;
+	return true;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::removeParticlesV(PxU32 count, const PxStrideIterator<const PxU32>& indices)
+{
+	for(PxU32 i = 0; i < count; ++i)
+		removeParticle(indices[i]);
+
+	mValidParticleCount -= count;
+	mValidParticleRange = (mValidParticleCount > 0) ? mParticleMap.findLast() + 1 : 0;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::removeParticlesV()
+{
+	Cm::BitMap::Iterator it(mParticleMap);
+	for(PxU32 particleIndex = it.getNext(); particleIndex != Cm::BitMap::Iterator::DONE; particleIndex = it.getNext())
+		removeParticle(particleIndex);
+
+	mValidParticleCount = 0;
+	mValidParticleRange = 0;
+	PX_ASSERT(mValidParticleCount == 0);
+}
+
+//----------------------------------------------------------------------------//
+
+PxU32 ParticleData::getParticleCountV() const
+{
+	return mValidParticleCount;
+}
+
+//----------------------------------------------------------------------------//
+
+/**
+In the non-gpu implementation the full state is always available.
+*/
+void ParticleData::getParticlesV(ParticleSystemStateDataDesc& particles, bool /*fullState*/, bool) const
+{
+	PX_ASSERT(mValidParticleCount <= mMaxParticles);
+
+	particles.bitMap = &mParticleMap;
+	particles.numParticles = mValidParticleCount;
+	particles.maxParticles = mMaxParticles;
+	particles.validParticleRange = mValidParticleRange;
+
+	if(mValidParticleCount == 0)
+	{
+		particles.positions = PxStrideIterator<const PxVec3>();
+		particles.velocities = PxStrideIterator<const PxVec3>();
+		particles.flags = PxStrideIterator<const ParticleFlags>();
+		particles.restOffsets = PxStrideIterator<const PxF32>();
+	}
+	else
+	{
+		PX_ASSERT(mParticleBuffer);
+		particles.positions = PxStrideIterator<const PxVec3>(&mParticleBuffer->position, sizeof(Particle));
+		particles.velocities = PxStrideIterator<const PxVec3>(&mParticleBuffer->velocity, sizeof(Particle));
+		particles.flags = PxStrideIterator<const ParticleFlags>(&mParticleBuffer->flags, sizeof(Particle));
+		particles.restOffsets =
+		    mRestOffsetBuffer ? PxStrideIterator<const PxF32>(mRestOffsetBuffer) : PxStrideIterator<const PxF32>();
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::setPositionsV(PxU32 numParticles, const PxStrideIterator<const PxU32>& indices,
+                                 const PxStrideIterator<const PxVec3>& positions)
+{
+	PX_ASSERT(indices.ptr() && positions.ptr());
+
+	PxStrideIterator<const PxU32> indexIt(indices);
+	PxStrideIterator<const PxVec3> positionIt(positions);
+
+	for(PxU32 i = 0; i != numParticles; ++i)
+	{
+		PxU32 particleIndex = *indexIt++;
+		PX_ASSERT(particleIndex <= mMaxParticles);
+		PX_ASSERT(mParticleMap.test(particleIndex));
+		Particle& particle = mParticleBuffer[particleIndex];
+		particle.position = *positionIt++;
+		mWorldBounds.include(particle.position);
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::setVelocitiesV(PxU32 numParticles, const PxStrideIterator<const PxU32>& indices,
+                                  const PxStrideIterator<const PxVec3>& velocities)
+{
+	PX_ASSERT(indices.ptr() && velocities.ptr());
+
+	PxStrideIterator<const PxU32> indexIt(indices);
+	PxStrideIterator<const PxVec3> velocityIt(velocities);
+
+	for(PxU32 i = 0; i != numParticles; ++i)
+	{
+		PxU32 particleIndex = *indexIt++;
+		PX_ASSERT(particleIndex <= mMaxParticles);
+		PX_ASSERT(mParticleMap.test(particleIndex));
+		Particle& particle = mParticleBuffer[particleIndex];
+		particle.velocity = *velocityIt++;
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::setRestOffsetsV(PxU32 numParticles, const PxStrideIterator<const PxU32>& indices,
+                                   const PxStrideIterator<const PxF32>& restOffsets)
+{
+	PX_ASSERT(indices.ptr() && restOffsets.ptr());
+
+	PxStrideIterator<const PxU32> indexIt(indices);
+	PxStrideIterator<const PxF32> restOffsetIt(restOffsets);
+
+	for(PxU32 i = 0; i != numParticles; ++i)
+	{
+		PxU32 particleIndex = *indexIt++;
+		PX_ASSERT(particleIndex <= mMaxParticles);
+		PX_ASSERT(mParticleMap.test(particleIndex));
+		mRestOffsetBuffer[particleIndex] = *restOffsetIt++;
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleData::addDeltaVelocitiesV(const Cm::BitMap& bufferMap, const PxVec3* buffer, PxReal multiplier)
+{
+	Cm::BitMap::Iterator it(bufferMap);
+	for(PxU32 particleIndex = it.getNext(); particleIndex != Cm::BitMap::Iterator::DONE; particleIndex = it.getNext())
+	{
+		PX_ASSERT(mParticleMap.boundedTest(particleIndex));
+		mParticleBuffer[particleIndex].velocity += buffer[particleIndex] * multiplier;
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+PxBounds3 ParticleData::getWorldBoundsV() const
+{
+	return mWorldBounds;
+}
+
+//----------------------------------------------------------------------------//
+
+PxU32 ParticleData::getMaxParticlesV() const
+{
+	return mMaxParticles;
+}
+
+//----------------------------------------------------------------------------//
+
+PX_FORCE_INLINE void ParticleData::removeParticle(PxU32 particleIndex)
+{
+	PX_ASSERT(particleIndex <= mMaxParticles);
+
+	Particle& particle = mParticleBuffer[particleIndex];
+	PX_ASSERT(particle.flags.api & PxParticleFlag::eVALID);
+	PX_ASSERT(mParticleMap.test(particleIndex));
+
+#if PX_CHECKED
+	for(PxU32 i = 0; i<sizeof(Particle)>> 2; ++i)
+		reinterpret_cast<PxU32*>(&particle)[i] = 0xDEADBEEF;
+#endif
+	particle.flags.api = PxParticleFlags(0);
+	mParticleMap.reset(particleIndex);
+}
+
+//----------------------------------------------------------------------------//
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleOpcodeCache.h b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleOpcodeCache.h
new file mode 100644
index 00000000..0e77a146
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleOpcodeCache.h
@@ -0,0 +1,441 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_PARTICLE_OPCODE_CACHE_H
+#define PT_PARTICLE_OPCODE_CACHE_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxBounds3.h"
+#include "GuGeometryUnion.h"
+#include "PtParticleSystemFlags.h"
+#include "PsUtilities.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+/**
+Represents a per particle opcode cache for collision with meshes.
+The cache contains
+- number of triangle indices
+- mesh pointer
+- triangle indices
+- bounds representing the volume within which the cache is valid
+
+The cache is always guaranteed to reference ALL triangles that are relevant for a given mesh for a given volume.
+
+There are four different data layouts to optimize access speed for fewer indices and maximize the amount of triangles
+that can be cached.
+1. regular PxBounds3 with up to 1 x 16 bit triangle indices.
+2. compressed volume with up to 6 x 16 bit triangle indices.
+3. compressed volume with up to 9 x 10 bit triangle indices. (The indices are compressed if the range of indices
+allows).
+4. compressed volume with up to 3 x 32 bit triangle indices. (For large meshes).
+*/
+struct ParticleOpcodeCache
+{
+	static const PxU32 sMaxCachedTriangles = 9;
+
+	struct QuantizationParams
+	{
+		PxF32 dequantizationMultiplier;
+		PxF32 quantizationMultiplier;
+	};
+
+	static PX_FORCE_INLINE QuantizationParams getQuantizationParams(const PxF32 maxExtents)
+	{
+		QuantizationParams params;
+		params.quantizationMultiplier = 254 * (1.0f / maxExtents);
+		params.dequantizationMultiplier = (1.0f / 254) * maxExtents;
+		return params;
+	}
+
+	PX_FORCE_INLINE const Gu::GeometryUnion* getGeometry()
+	{
+		return mGeom;
+	}
+
+	PX_FORCE_INLINE ParticleOpcodeCache& operator=(const ParticleOpcodeCache& p)
+	{
+		const PxU32* src = reinterpret_cast<const PxU32*>(&p);
+		PxU32* dist = reinterpret_cast<PxU32*>(this);
+		dist[0] = src[0];
+		dist[1] = src[1];
+		dist[2] = src[2];
+		dist[3] = src[3];
+		dist[4] = src[4];
+		dist[5] = src[5];
+		dist[6] = src[6];
+		dist[7] = src[7];
+
+#if PX_P64_FAMILY
+		dist[8] = src[8];
+		dist[9] = src[9];
+#endif
+		return *this;
+	}
+
+	// set mGeom to a temp mem to store the triangles index
+	// init for triangles mesh cache
+	PX_FORCE_INLINE void init(PxU32* triangles)
+	{
+		mTriangleCount = 0;
+		mGeom = reinterpret_cast<Gu::GeometryUnion*>(triangles);
+	}
+
+	// add triangles
+	PX_FORCE_INLINE void add(const PxU32* triangles, const PxU32 numTriangles)
+	{
+		const PxU32 end = mTriangleCount + numTriangles;
+		if(end <= sMaxCachedTriangles)
+		{
+			PxU32* tmp = const_cast<PxU32*>(reinterpret_cast<const PxU32*>(mGeom));
+			for(PxU32 i = mTriangleCount; i < end; i++)
+			{
+				tmp[i] = *triangles++;
+			}
+			mTriangleCount = Ps::to8(end);
+		}
+		else
+		{
+			PX_COMPILE_TIME_ASSERT(sMaxCachedTriangles < PX_MAX_U8);
+			//this result in marking the cache invalid
+			mTriangleCount = PX_MAX_U8;
+		}
+	}
+
+	PX_FORCE_INLINE void write(PxU16& internalParticleFlags, const PxBounds3& bounds,
+	                           const QuantizationParams& quantizationParams, const Gu::GeometryUnion& mesh,
+	                           const bool isSmallMesh)
+	{
+		PxU32* triangles = const_cast<PxU32*>(reinterpret_cast<const PxU32*>(mGeom));
+		if(isSmallMesh && mTriangleCount <= 1)
+		{
+			// Layout of mData:
+			// PxU8 pad
+			// PxU16 index
+			// PxBounds3 bounds
+			PxU8* ptr = mData + 1;
+			reinterpret_cast<PxU16&>(*ptr) = (mTriangleCount > 0) ? static_cast<PxU16>(triangles[0]) : PxU16(0);
+			ptr += sizeof(PxU16);
+			reinterpret_cast<PxBounds3&>(*ptr) = bounds;
+		}
+		else
+		{
+			// Layout of mData:
+			// PxU8 extentX, extentY, extentZ
+			// PxVec3 center
+			// PxU8[12] indexData
+			PxU8* ptr = mData;
+			PxU8& extentX = *ptr++;
+			PxU8& extentY = *ptr++;
+			PxU8& extentZ = *ptr++;
+			PxVec3& center = reinterpret_cast<PxVec3&>(*ptr);
+			ptr += sizeof(PxVec3);
+			quantizeBounds(center, extentX, extentY, extentZ, bounds, quantizationParams);
+
+			if(isSmallMesh && mTriangleCount <= 6)
+			{
+				writeTriangles_6xU16(ptr, triangles, mTriangleCount);
+			}
+			else if(isSmallMesh && mTriangleCount <= 9)
+			{
+				bool success = writeTriangles_BaseU16_9xU10(ptr, triangles, mTriangleCount);
+				if(!success)
+				{
+					internalParticleFlags &= ~PxU16(InternalParticleFlag::eGEOM_CACHE_MASK);
+					return;
+				}
+			}
+			else if(!isSmallMesh && mTriangleCount <= 3)
+			{
+				writeTriangles_3xU32(ptr, triangles, mTriangleCount);
+			}
+			else
+			{
+				internalParticleFlags &= ~PxU16(InternalParticleFlag::eGEOM_CACHE_MASK);
+				return;
+			}
+		}
+
+		// refresh the cache flags
+		internalParticleFlags |= (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1);
+		mGeom = &mesh;
+	}
+
+	PX_FORCE_INLINE bool read(PxU16& internalParticleFlags, PxU32& numTriangles, PxU32* triangleBuffer,
+	                          const PxBounds3& bounds, const QuantizationParams& quantizationParams,
+	                          const Gu::GeometryUnion* mesh, const bool isSmallMesh) const
+	{
+		// cache bits:
+		// (00) -> no read (invalid)
+		// (01) -> read
+		// (11) -> no read (can't be the case with mGeom == mesh)
+		PX_ASSERT(mGeom != mesh || !((internalParticleFlags & InternalParticleFlag::eGEOM_CACHE_BIT_0) != 0 &&
+		                             (internalParticleFlags & InternalParticleFlag::eGEOM_CACHE_BIT_1) != 0));
+
+		if((internalParticleFlags & InternalParticleFlag::eGEOM_CACHE_BIT_0) != 0 && mGeom == mesh)
+		{
+			numTriangles = mTriangleCount;
+			if(isSmallMesh && numTriangles <= 1)
+			{
+				// Layout of mData:
+				// PxU8 pad
+				// PxU16 index
+				// PxBounds3 bounds
+				const PxU8* ptr = mData + 1;
+				*triangleBuffer = reinterpret_cast<const PxU16&>(*ptr);
+				ptr += sizeof(PxU16);
+				const PxBounds3& cachedBounds = reinterpret_cast<const PxBounds3&>(*ptr);
+
+				// if (bounds.isInside(cachedBounds)) //sschirm, we should implement the isInside to use fsels as well.
+				PxVec3 dMin = (bounds.minimum - cachedBounds.minimum).minimum(PxVec3(0));
+				PxVec3 dMax = (cachedBounds.maximum - bounds.maximum).minimum(PxVec3(0));
+				PxF32 sum = dMin.x + dMin.y + dMin.z + dMax.x + dMax.y + dMax.z;
+				if(sum == 0.0f)
+				{
+					// refresh the cache bits (11)
+					internalParticleFlags |=
+					    (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1);
+					return true;
+				}
+			}
+			else
+			{
+				// Layout of mData:
+				// PxU8 extentX, extentY, extentZ
+				// PxVec3 center
+				// PxU8[12] indexData
+				const PxU8* ptr = mData;
+				const PxU8 extentX = *ptr++;
+				const PxU8 extentY = *ptr++;
+				const PxU8 extentZ = *ptr++;
+				const PxVec3& center = reinterpret_cast<const PxVec3&>(*ptr);
+				ptr += sizeof(PxVec3);
+				PX_ASSERT(!bounds.isEmpty());
+
+				// if (bounds.isInside(cachedBounds)) //sschirm, we should implement the isInside to use fsels as well.
+				PxVec3 diffMin = bounds.minimum - center;
+				PxVec3 diffMax = bounds.maximum - center;
+				PxF32 diffx = PxMax(PxAbs(diffMin.x), PxAbs(diffMax.x));
+				PxF32 diffy = PxMax(PxAbs(diffMin.y), PxAbs(diffMax.y));
+				PxF32 diffz = PxMax(PxAbs(diffMin.z), PxAbs(diffMax.z));
+				PxU8 dX = PxU8(diffx * quantizationParams.quantizationMultiplier);
+				PxU8 dY = PxU8(diffy * quantizationParams.quantizationMultiplier);
+				PxU8 dZ = PxU8(diffz * quantizationParams.quantizationMultiplier);
+				if((dX < extentX) && (dY < extentY) && (dZ < extentZ))
+				{
+					if(isSmallMesh && numTriangles <= 6)
+					{
+						readTriangles_6xU16(triangleBuffer, ptr, numTriangles);
+						// refresh the cache bits (11)
+						internalParticleFlags |=
+						    (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1);
+						return true;
+					}
+					else if(isSmallMesh && numTriangles <= 9)
+					{
+						readTriangles_BaseU16_9xU10(triangleBuffer, ptr, numTriangles);
+						// refresh the cache bits (11)
+						internalParticleFlags |=
+						    (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1);
+						return true;
+					}
+					else if(!isSmallMesh && numTriangles <= 3)
+					{
+						readTriangles_3xU32(triangleBuffer, ptr, numTriangles);
+						// refresh the cache bits (11)
+						internalParticleFlags |=
+						    (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1);
+						return true;
+					}
+				}
+			}
+		}
+
+		// cache invalid!
+		numTriangles = 0;
+		return false;
+	}
+
+  private:
+	PxU8 mTriangleCount;
+	PxU8 mData[27];
+	const Gu::GeometryUnion* mGeom;
+
+	static PX_FORCE_INLINE void quantizeBounds(PxVec3& center, PxU8& extentX, PxU8& extentY, PxU8& extentZ,
+	                                           const PxBounds3& bounds, const QuantizationParams& quantizationParams)
+	{
+		center = bounds.getCenter();
+		if(!bounds.isEmpty())
+		{
+			PxVec3 extents = bounds.getExtents();
+			extentX = PxU8((extents.x * quantizationParams.quantizationMultiplier) + 1);
+			extentY = PxU8((extents.y * quantizationParams.quantizationMultiplier) + 1);
+			extentZ = PxU8((extents.z * quantizationParams.quantizationMultiplier) + 1);
+			PX_ASSERT(extentX != 0 && extentY != 0 && extentZ != 0);
+		}
+		else
+		{
+			extentX = 0;
+			extentY = 0;
+			extentZ = 0;
+		}
+	}
+
+	static PX_FORCE_INLINE void dequantizeBounds(PxBounds3& bounds, const PxVec3& center, const PxU8 extentX,
+	                                             const PxU8 extentY, const PxU8 extentZ,
+	                                             const QuantizationParams& quantizationParams)
+	{
+		PxVec3 extents(extentX * quantizationParams.dequantizationMultiplier,
+		               extentY * quantizationParams.dequantizationMultiplier,
+		               extentZ * quantizationParams.dequantizationMultiplier);
+		bounds = PxBounds3::centerExtents(center, extents);
+	}
+
+	static PX_FORCE_INLINE void writeTriangles_6xU16(PxU8* data, const PxU32* triangles, const PxU32 numTriangles)
+	{
+		PX_ASSERT(numTriangles <= 6);
+		PxU16* ptr = reinterpret_cast<PxU16*>(data);
+		for(PxU32 t = 0; t < numTriangles; ++t)
+			*ptr++ = Ps::to16(triangles[t]);
+	}
+
+	static PX_FORCE_INLINE bool writeTriangles_BaseU16_9xU10(PxU8* data, const PxU32* triangles, const PxU32 numTriangles)
+	{
+		PX_ASSERT(numTriangles <= 9);
+
+		// check index range
+		PxU32 min = 0xffffffff;
+		PxU32 max = 0;
+		PxU32 minIndex = 0xffffffff;
+		for(PxU32 i = 0; i < numTriangles; ++i)
+		{
+			if(triangles[i] < min)
+			{
+				min = triangles[i];
+				minIndex = i;
+			}
+
+			if(triangles[i] > max)
+				max = triangles[i];
+		}
+
+		PxU32 range = max - min;
+		if(range < (1 << 10))
+		{
+			// copy triangles to subtract base and remove 0 element
+			PX_ASSERT(numTriangles > 6 && numTriangles <= 9);
+			PxU16 triCopy[12];
+			{
+				for(PxU32 i = 0; i < numTriangles; ++i)
+					triCopy[i] = PxU16(triangles[i] - min);
+
+				PX_ASSERT(triCopy[minIndex] == 0);
+				triCopy[minIndex] = triCopy[numTriangles - 1];
+			}
+
+			PxU16* buffer = reinterpret_cast<PxU16*>(data);
+			buffer[0] = Ps::to16(min);
+			buffer[1] = PxU16((triCopy[0] << 6) | (triCopy[1] >> 4));
+			buffer[2] = PxU16((triCopy[1] << 12) | (triCopy[2] << 2) | (triCopy[3] >> 8));
+			buffer[3] = PxU16((triCopy[3] << 8) | (triCopy[4] >> 2));
+			buffer[4] = PxU16((triCopy[4] << 14) | (triCopy[5] << 4) | (triCopy[6] >> 6));
+			buffer[5] = PxU16((triCopy[6] << 10));
+
+			// copy rubbish, doesn't hurt since we are reading from large enough buffer
+			buffer[5] |= triCopy[7];
+
+			return true;
+		}
+		return false;
+	}
+
+	static PX_FORCE_INLINE void writeTriangles_3xU32(PxU8* data, const PxU32* triangles, const PxU32 numTriangles)
+	{
+		PX_ASSERT(numTriangles <= 3);
+		PxU32* ptr = reinterpret_cast<PxU32*>(data);
+		for(PxU32 t = 0; t < numTriangles; ++t)
+			*ptr++ = triangles[t];
+	}
+
+	static PX_FORCE_INLINE void readTriangles_6xU16(PxU32* triangleBuffer, const PxU8* data, const PxU32 numTriangles)
+	{
+		PX_ASSERT(numTriangles <= 6);
+		const PxU16* ptr = reinterpret_cast<const PxU16*>(data);
+		const PxU16* end = ptr + numTriangles;
+		PxU32 dstIndex = 0;
+		while(ptr != end)
+			triangleBuffer[dstIndex++] = *ptr++;
+	}
+
+	static PX_FORCE_INLINE void readTriangles_BaseU16_9xU10(PxU32* triangleBuffer, const PxU8* data,
+	                                                        const PxU32 numTriangles)
+	{
+		PX_ASSERT(numTriangles > 6 && numTriangles <= 9);
+		PX_UNUSED(numTriangles);
+
+		const PxU16* buffer = reinterpret_cast<const PxU16*>(data);
+		PxU32 offset = buffer[0];
+		const PxU32 mask = 0xffffffff >> (6 + 16);
+		triangleBuffer[0] = offset;
+		triangleBuffer[1] = ((PxU32(buffer[1] >> 6)) & mask) + offset;
+		triangleBuffer[2] = ((PxU32(buffer[1] << 4) | PxU32(buffer[2] >> 12)) & mask) + offset;
+		triangleBuffer[3] = ((PxU32(buffer[2] >> 2)) & mask) + offset;
+		triangleBuffer[4] = ((PxU32(buffer[2] << 8) | PxU32(buffer[3] >> 8)) & mask) + offset;
+		triangleBuffer[5] = ((PxU32(buffer[3] << 2) | PxU32(buffer[4] >> 14)) & mask) + offset;
+		triangleBuffer[6] = ((PxU32(buffer[4] >> 4)) & mask) + offset;
+
+		// we can write the last two, even if they are rubbish.
+		triangleBuffer[7] = ((PxU32(buffer[4] << 6) | PxU32(buffer[5] >> 10)) & mask) + offset;
+		triangleBuffer[8] = (PxU32(buffer[5]) & mask) + offset;
+	}
+
+	static PX_FORCE_INLINE void readTriangles_3xU32(PxU32* triangleBuffer, const PxU8* data, const PxU32 numTriangles)
+	{
+		PX_ASSERT(numTriangles <= 3);
+		const PxU32* ptr = reinterpret_cast<const PxU32*>(data);
+		const PxU32* end = ptr + numTriangles;
+		PxU32 dstIndex = 0;
+		while(ptr != end)
+			triangleBuffer[dstIndex++] = *ptr++;
+	}
+};
+
+PX_COMPILE_TIME_ASSERT(sizeof(PxTriangleMeshGeometryLL*) > 4 || sizeof(ParticleOpcodeCache) == 32);
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_PARTICLE_OPCODE_CACHE_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.cpp
new file mode 100644
index 00000000..44b82b3b
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.cpp
@@ -0,0 +1,76 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtParticleShapeCpu.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtContext.h"
+#include "PtParticleSystemSimCpu.h"
+#include "PtSpatialHash.h"
+
+using namespace physx;
+using namespace Pt;
+
+ParticleShapeCpu::ParticleShapeCpu(Context*, PxU32 index)
+: mIndex(index), mParticleSystem(NULL), mPacket(NULL), mUserData(NULL)
+{
+}
+
+ParticleShapeCpu::~ParticleShapeCpu()
+{
+}
+
+void ParticleShapeCpu::init(ParticleSystemSimCpu* particleSystem, const ParticleCell* packet)
+{
+	PX_ASSERT(mParticleSystem == NULL);
+	PX_ASSERT(mPacket == NULL);
+	PX_ASSERT(mUserData == NULL);
+
+	PX_ASSERT(particleSystem);
+	PX_ASSERT(packet);
+
+	mParticleSystem = particleSystem;
+	mPacket = packet;
+	mPacketCoordinates = packet->coords; // this is needed for the remapping process.
+
+	// Compute and store AABB of the assigned packet
+	mParticleSystem->getPacketBounds(mPacketCoordinates, mBounds);
+}
+
+void ParticleShapeCpu::destroyV()
+{
+	PX_ASSERT(mParticleSystem);
+	mParticleSystem->getContext().releaseParticleShape(this);
+
+	mParticleSystem = NULL;
+	mPacket = NULL;
+	mUserData = NULL;
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.h b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.h
new file mode 100644
index 00000000..ccb525ea
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.h
@@ -0,0 +1,114 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_PARTICLE_SHAPE_CPU_H
+#define PT_PARTICLE_SHAPE_CPU_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxTransform.h"
+#include "foundation/PxBounds3.h"
+#include "PtConfig.h"
+#include "PtSpatialHash.h"
+#include "PtParticleShape.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+class Context;
+
+class ParticleShapeCpu : public ParticleShape
+{
+  public:
+	ParticleShapeCpu(Context* context, PxU32 index);
+	virtual ~ParticleShapeCpu();
+
+	void init(class ParticleSystemSimCpu* particleSystem, const ParticleCell* packet);
+
+	// Implements ParticleShapeCpu
+	virtual PxBounds3 getBoundsV() const
+	{
+		return mBounds;
+	}
+	virtual void setUserDataV(void* data)
+	{
+		mUserData = data;
+	}
+	virtual void* getUserDataV() const
+	{
+		return mUserData;
+	}
+	virtual void destroyV();
+	//~Implements ParticleShapeCpu
+
+	PX_FORCE_INLINE void setFluidPacket(const ParticleCell* packet)
+	{
+		PX_ASSERT(packet);
+		mPacket = packet;
+	}
+	PX_FORCE_INLINE const ParticleCell* getFluidPacket() const
+	{
+		return mPacket;
+	}
+
+	PX_FORCE_INLINE PxU32 getIndex() const
+	{
+		return mIndex;
+	}
+	PX_FORCE_INLINE class ParticleSystemSimCpu* getParticleSystem()
+	{
+		return mParticleSystem;
+	}
+	PX_FORCE_INLINE const class ParticleSystemSimCpu* getParticleSystem() const
+	{
+		return mParticleSystem;
+	}
+	PX_FORCE_INLINE GridCellVector getPacketCoordinates() const
+	{
+		return mPacketCoordinates;
+	}
+
+  private:
+	PxU32 mIndex;
+	class ParticleSystemSimCpu* mParticleSystem;
+	PxBounds3 mBounds;
+	GridCellVector mPacketCoordinates; // This is needed for the remapping process.
+	const ParticleCell* mPacket;
+	void* mUserData;
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_PARTICLE_SHAPE_CPU_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.cpp
new file mode 100644
index 00000000..5b9326bc
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.cpp
@@ -0,0 +1,858 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtParticleSystemSimCpu.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxProfiler.h"
+#include "PxvGeometry.h"
+#include "PtContext.h"
+#include "PtParticleShapeCpu.h"
+
+//----------------------------------------------------------------------------//
+
+// Standard value for particle resolution
+#define PXN_FLUID_REST_PARTICLE_PER_UNIT_STD 10.0f
+
+// Macros to clamp restitution and adhesion (particle collision) to values that give stable results.
+#define DYNAMIC_FRICTION_CLAMP 0.001f
+#define RESTITUTION_CLAMP 0.05f
+
+#define CLAMP_DYNAMIC_FRICTION(t) PxClamp(t, DYNAMIC_FRICTION_CLAMP, 1.0f)
+#define CLAMP_RESTITUTION(t) PxClamp(t, 0.0f, 1.0f - RESTITUTION_CLAMP)
+
+using namespace physx;
+using namespace Pt;
+
+//----------------------------------------------------------------------------//
+
+ParticleSystemState& ParticleSystemSimCpu::getParticleStateV()
+{
+	PX_ASSERT(mParticleState);
+	return *mParticleState;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::getSimParticleDataV(ParticleSystemSimDataDesc& simParticleData, bool) const
+{
+	simParticleData.densities = PxStrideIterator<const PxF32>();
+	simParticleData.collisionNormals = PxStrideIterator<const PxVec3>();
+	simParticleData.collisionVelocities = PxStrideIterator<const PxVec3>();
+	simParticleData.twoWayImpluses = PxStrideIterator<const PxVec3>();
+	simParticleData.twoWayBodies = PxStrideIterator<BodyHandle>();
+
+	if(mParticleState->getParticleCount() > 0 && mSimulated)
+	{
+		if(mParameter->particleReadDataFlags & PxParticleReadDataFlag::eDENSITY_BUFFER)
+			simParticleData.densities =
+			    PxStrideIterator<const PxF32>(&mParticleState->getParticleBuffer()->density, sizeof(Particle));
+
+		if(mParameter->particleReadDataFlags & PxParticleReadDataFlag::eCOLLISION_NORMAL_BUFFER)
+			simParticleData.collisionNormals = PxStrideIterator<const PxVec3>(mTransientBuffer, sizeof(PxVec3));
+
+		if(mParameter->particleReadDataFlags & PxParticleReadDataFlag::eCOLLISION_VELOCITY_BUFFER)
+			simParticleData.collisionVelocities = PxStrideIterator<const PxVec3>(mCollisionVelocities);
+
+		if(mFluidTwoWayData)
+		{
+			simParticleData.twoWayImpluses =
+			    PxStrideIterator<const PxVec3>(&mFluidTwoWayData->impulse, sizeof(TwoWayData));
+			simParticleData.twoWayBodies =
+			    PxStrideIterator<BodyHandle>(reinterpret_cast<BodyHandle*>(&mFluidTwoWayData->body), sizeof(TwoWayData));
+		}
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+/**
+Will be called from HL twice per step. Once after the shape update (at the start of the frame) has been executed,
+and once after the particle pipeline has finished.
+*/
+void ParticleSystemSimCpu::getShapesUpdateV(ParticleShapeUpdateResults& updateResults) const
+{
+	PX_ASSERT(mIsSimulated);
+
+	updateResults.destroyedShapeCount = mNumDeletedParticleShapes;
+	updateResults.destroyedShapes = mCreatedDeletedParticleShapes;
+
+	updateResults.createdShapeCount = mNumCreatedParticleShapes;
+	updateResults.createdShapes = mCreatedDeletedParticleShapes + mNumDeletedParticleShapes;
+}
+
+//----------------------------------------------------------------------------//
+
+physx::PxBaseTask& ParticleSystemSimCpu::schedulePacketShapesUpdate(const ParticleShapesUpdateInput& input,
+                                                                    physx::PxBaseTask& continuation)
+{
+	mPacketShapesFinalizationTask.setContinuation(&continuation);
+	mPacketShapesUpdateTask.setContinuation(&mPacketShapesFinalizationTask);
+	mPacketShapesFinalizationTask.removeReference();
+	mPacketShapesUpdateTaskInput = input;
+	return mPacketShapesUpdateTask;
+}
+
+//----------------------------------------------------------------------------//
+
+physx::PxBaseTask& ParticleSystemSimCpu::scheduleDynamicsUpdate(physx::PxBaseTask& continuation)
+{
+	if(mParameter->flags & InternalParticleSystemFlag::eSPH)
+	{
+		mDynamicsUpdateTask.setContinuation(&continuation);
+		return mDynamicsUpdateTask;
+	}
+	else
+	{
+		continuation.addReference();
+		return continuation;
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+physx::PxBaseTask& ParticleSystemSimCpu::scheduleCollisionUpdate(physx::PxBaseTask& continuation)
+{
+	mCollisionFinalizationTask.setContinuation(&continuation);
+	mCollisionUpdateTask.setContinuation(&mCollisionFinalizationTask);
+	mCollisionFinalizationTask.removeReference();
+	return mCollisionUpdateTask;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::spatialHashUpdateSections(physx::PxBaseTask* continuation)
+{
+	PX_ASSERT(mParameter->flags & InternalParticleSystemFlag::eSPH);
+
+	// Split each packet into sections and reorder particles of a packet according to these sections
+
+	mSpatialHash->updatePacketSections(mPacketParticlesIndices, mParticleState->getParticleBuffer(), continuation);
+}
+
+void ParticleSystemSimCpu::packetShapesUpdate(physx::PxBaseTask*)
+{
+	PX_ASSERT(mIsSimulated);
+	PX_ASSERT(mSpatialHash);
+
+	// Init parameters for tracking of new/deleted fluid shapes
+	mNumCreatedParticleShapes = 0;
+	mNumDeletedParticleShapes = 0;
+
+	if(mParticleState->getValidParticleRange() > 0)
+	{
+		if(!mPacketParticlesIndices)
+			mPacketParticlesIndices = reinterpret_cast<PxU32*>(
+			    mAlign16.allocate(mParticleState->getMaxParticles() * sizeof(PxU32), __FILE__, __LINE__));
+
+		physx::PxBaseTask* cont;
+		if(mParameter->flags & InternalParticleSystemFlag::eSPH)
+		{
+			cont = &mSpatialHashUpdateSectionsTask;
+			mSpatialHashUpdateSectionsTask.setContinuation(&mPacketShapesFinalizationTask);
+		}
+		else
+		{
+			cont = &mPacketShapesFinalizationTask;
+			cont->addReference();
+		}
+
+		// Hash particles to packets and reorder particle indices
+
+		mSpatialHash->updatePacketHash(mNumPacketParticlesIndices, mPacketParticlesIndices,
+		                               mParticleState->getParticleBuffer(), mParticleState->getParticleMap(),
+		                               mParticleState->getValidParticleRange(), cont);
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::packetShapesFinalization(physx::PxBaseTask*)
+{
+	// - Find for each packet shape the related packet and adjust the mapping.
+	// - Track created / deleted packets.
+	remapShapesToPackets(mPacketShapesUpdateTaskInput.shapes, mPacketShapesUpdateTaskInput.shapeCount);
+
+	// release the shapes, since their ownership was tranferred to us.
+	if(mPacketShapesUpdateTaskInput.shapes)
+		PX_FREE(mPacketShapesUpdateTaskInput.shapes);
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::dynamicsUpdate(physx::PxBaseTask* continuation)
+{
+	PX_ASSERT(mParameter->flags & InternalParticleSystemFlag::eSPH);
+	PX_ASSERT(mIsSimulated);
+	PX_ASSERT(mSpatialHash);
+	PX_ASSERT(continuation);
+
+	if(mNumPacketParticlesIndices > 0)
+	{
+		updateDynamicsParameter();
+
+		if(mParameter->flags & InternalParticleSystemFlag::eSPH)
+		{
+			mDynamics.updateSph(*continuation);
+		}
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::collisionUpdate(physx::PxBaseTask* continuation)
+{
+	PX_ASSERT(mIsSimulated);
+	PX_ASSERT(mSpatialHash);
+	PX_ASSERT(mCollisionUpdateTaskInput.contactManagerStream);
+	PX_ASSERT(continuation);
+
+	updateCollisionParameter();
+
+	mParticleState->getWorldBounds().setEmpty();
+
+	mCollision.updateCollision(mCollisionUpdateTaskInput.contactManagerStream, *continuation);
+	mCollision.updateOverflowParticles();
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::collisionFinalization(physx::PxBaseTask*)
+{
+	PX_FREE(mCollisionUpdateTaskInput.contactManagerStream);
+	mCollisionUpdateTaskInput.contactManagerStream = NULL;
+
+	mSimulated = true;
+
+	// clear shape update
+	mNumDeletedParticleShapes = 0;
+	mNumCreatedParticleShapes = 0;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::setExternalAccelerationV(const PxVec3& v)
+{
+	mExternalAcceleration = v;
+}
+
+//----------------------------------------------------------------------------//
+
+const PxVec3& ParticleSystemSimCpu::getExternalAccelerationV() const
+{
+	return mExternalAcceleration;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::setSimulationTimeStepV(PxReal value)
+{
+	PX_ASSERT(value >= 0.0f);
+
+	mSimulationTimeStep = value;
+}
+
+//----------------------------------------------------------------------------//
+
+PxReal ParticleSystemSimCpu::getSimulationTimeStepV() const
+{
+	return mSimulationTimeStep;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::setSimulatedV(bool isSimulated)
+{
+	mIsSimulated = isSimulated;
+	if(!isSimulated)
+		clearParticleConstraints();
+}
+
+//----------------------------------------------------------------------------//
+
+Ps::IntBool ParticleSystemSimCpu::isSimulatedV() const
+{
+	return mIsSimulated;
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleSystemSimCpu::ParticleSystemSimCpu(ContextCpu* context, PxU32 index)
+: mContext(*context)
+, mParticleState(NULL)
+, mSimulated(false)
+, mFluidTwoWayData(NULL)
+, mCreatedDeletedParticleShapes(NULL)
+, mPacketParticlesIndices(NULL)
+, mNumPacketParticlesIndices(0)
+, mOpcodeCacheBuffer(NULL)
+, mTransientBuffer(NULL)
+, mCollisionVelocities(NULL)
+, mDynamics(*this)
+, mCollision(*this)
+, mIndex(index)
+, mPacketShapesUpdateTask(this, "Pt::ParticleSystemSimCpu.packetShapesUpdate")
+, mPacketShapesFinalizationTask(this, "Pt::ParticleSystemSimCpu.packetShapesFinalization")
+, mDynamicsUpdateTask(this, "Pt::ParticleSystemSimCpu.dynamicsUpdate")
+, mCollisionUpdateTask(this, "Pt::ParticleSystemSimCpu.collisionUpdate")
+, mCollisionFinalizationTask(this, "Pt::ParticleSystemSimCpu.collisionFinalization")
+, mSpatialHashUpdateSectionsTask(this, "Pt::ParticleSystemSimCpu.spatialHashUpdateSections")
+{
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleSystemSimCpu::~ParticleSystemSimCpu()
+{
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::init(ParticleData& particleData, const ParticleSystemParameter& parameter)
+{
+	mParticleState = &particleData;
+	mParticleState->clearSimState();
+	mParameter = &parameter;
+	mSimulationTimeStep = 0.0f;
+	mExternalAcceleration = PxVec3(0);
+	mPacketParticlesIndices = NULL;
+
+	initializeParameter();
+
+	PxU32 maxParticles = mParticleState->getMaxParticles();
+
+	// Initialize buffers
+	mConstraintBuffers.constraint0Buf =
+	    reinterpret_cast<Constraint*>(mAlign16.allocate(maxParticles * sizeof(Constraint), __FILE__, __LINE__));
+	mConstraintBuffers.constraint1Buf =
+	    reinterpret_cast<Constraint*>(mAlign16.allocate(maxParticles * sizeof(Constraint), __FILE__, __LINE__));
+	if(mParameter->flags & PxParticleBaseFlag::eCOLLISION_WITH_DYNAMIC_ACTORS)
+	{
+		mConstraintBuffers.constraint0DynamicBuf = reinterpret_cast<ConstraintDynamic*>(
+		    mAlign16.allocate(maxParticles * sizeof(ConstraintDynamic), __FILE__, __LINE__));
+		mConstraintBuffers.constraint1DynamicBuf = reinterpret_cast<ConstraintDynamic*>(
+		    mAlign16.allocate(maxParticles * sizeof(ConstraintDynamic), __FILE__, __LINE__));
+	}
+	else
+	{
+		mConstraintBuffers.constraint0DynamicBuf = NULL;
+		mConstraintBuffers.constraint1DynamicBuf = NULL;
+	}
+
+	if((mParameter->flags & PxParticleBaseFlag::eCOLLISION_TWOWAY) &&
+	   (mParameter->flags & PxParticleBaseFlag::eCOLLISION_WITH_DYNAMIC_ACTORS))
+		mFluidTwoWayData =
+		    reinterpret_cast<TwoWayData*>(mAlign16.allocate(maxParticles * sizeof(TwoWayData), __FILE__, __LINE__));
+
+#if PX_CHECKED
+	{
+		PxU32 numWords = maxParticles * sizeof(Constraint) >> 2;
+		for(PxU32 i = 0; i < numWords; ++i)
+		{
+			reinterpret_cast<PxU32*>(mConstraintBuffers.constraint0Buf)[i] = 0xDEADBEEF;
+			reinterpret_cast<PxU32*>(mConstraintBuffers.constraint1Buf)[i] = 0xDEADBEEF;
+		}
+	}
+#endif
+
+	if(mParameter->flags & PxParticleBaseFlag::ePER_PARTICLE_COLLISION_CACHE_HINT)
+	{
+		mOpcodeCacheBuffer = reinterpret_cast<ParticleOpcodeCache*>(
+		    mAlign16.allocate(maxParticles * sizeof(ParticleOpcodeCache), __FILE__, __LINE__));
+#if PX_CHECKED
+		// sschirm: avoid reading uninitialized mGeom in ParticleOpcodeCache::read in assert statement
+		PxMemZero(mOpcodeCacheBuffer, maxParticles * sizeof(ParticleOpcodeCache));
+#endif
+	}
+
+	if((mParameter->flags & InternalParticleSystemFlag::eSPH) ||
+	   (mParameter->particleReadDataFlags & PxParticleReadDataFlag::eCOLLISION_NORMAL_BUFFER))
+		mTransientBuffer =
+		    reinterpret_cast<PxVec3*>(mAlign16.allocate(maxParticles * sizeof(PxVec3), __FILE__, __LINE__));
+
+	if(mParameter->particleReadDataFlags & PxParticleReadDataFlag::eCOLLISION_VELOCITY_BUFFER)
+		mCollisionVelocities =
+		    reinterpret_cast<PxVec3*>(mAlign16.allocate(maxParticles * sizeof(PxVec3), __FILE__, __LINE__));
+
+	mCreatedDeletedParticleShapes = reinterpret_cast<ParticleShape**>(
+	    PX_ALLOC(2 * PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE * sizeof(ParticleShape*), "ParticleShape*"));
+	mNumCreatedParticleShapes = 0;
+	mNumDeletedParticleShapes = 0;
+
+	// Create object for spatial hashing.
+	mSpatialHash = reinterpret_cast<SpatialHash*>(PX_ALLOC(sizeof(SpatialHash), "SpatialHash"));
+	if(mSpatialHash)
+	{
+		new (mSpatialHash) SpatialHash(PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE, mDynamics.getParameter().cellSizeInv,
+		                               mParameter->packetSizeMultiplierLog2,
+		                               (mParameter->flags & InternalParticleSystemFlag::eSPH) != 0);
+	}
+
+	mCollisionUpdateTaskInput.contactManagerStream = NULL;
+
+	// Make sure we start deactivated.
+	mSimulated = false;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::clear()
+{
+	mDynamics.clear();
+
+	if(mSpatialHash)
+	{
+		mSpatialHash->~SpatialHash();
+		PX_FREE(mSpatialHash);
+		mSpatialHash = NULL;
+	}
+
+	// Free particle buffers
+	mAlign16.deallocate(mConstraintBuffers.constraint0Buf);
+	mConstraintBuffers.constraint0Buf = NULL;
+
+	mAlign16.deallocate(mConstraintBuffers.constraint1Buf);
+	mConstraintBuffers.constraint1Buf = NULL;
+
+	if(mConstraintBuffers.constraint0DynamicBuf)
+	{
+		mAlign16.deallocate(mConstraintBuffers.constraint0DynamicBuf);
+		mConstraintBuffers.constraint0DynamicBuf = NULL;
+	}
+
+	if(mConstraintBuffers.constraint1DynamicBuf)
+	{
+		mAlign16.deallocate(mConstraintBuffers.constraint1DynamicBuf);
+		mConstraintBuffers.constraint1DynamicBuf = NULL;
+	}
+
+	if(mOpcodeCacheBuffer)
+	{
+		mAlign16.deallocate(mOpcodeCacheBuffer);
+		mOpcodeCacheBuffer = NULL;
+	}
+
+	if(mTransientBuffer)
+	{
+		mAlign16.deallocate(mTransientBuffer);
+		mTransientBuffer = NULL;
+	}
+
+	if(mCollisionVelocities)
+	{
+		mAlign16.deallocate(mCollisionVelocities);
+		mCollisionVelocities = NULL;
+	}
+
+	if(mCreatedDeletedParticleShapes)
+	{
+		PX_FREE(mCreatedDeletedParticleShapes);
+		mCreatedDeletedParticleShapes = NULL;
+	}
+
+	if(mPacketParticlesIndices)
+	{
+		mAlign16.deallocate(mPacketParticlesIndices);
+		mPacketParticlesIndices = NULL;
+	}
+	mNumPacketParticlesIndices = 0;
+
+	if(mFluidTwoWayData)
+	{
+		mAlign16.deallocate(mFluidTwoWayData);
+		mFluidTwoWayData = NULL;
+	}
+
+	mSimulated = false;
+
+	if(mParticleState)
+	{
+		mParticleState->release();
+		mParticleState = NULL;
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+ParticleData* ParticleSystemSimCpu::obtainParticleState()
+{
+	PX_ASSERT(mParticleState);
+	ParticleData* tmp = mParticleState;
+	mParticleState = NULL;
+	return tmp;
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::remapShapesToPackets(ParticleShape* const* shapes, PxU32 numShapes)
+{
+	PX_ASSERT(mNumCreatedParticleShapes == 0);
+	PX_ASSERT(mNumDeletedParticleShapes == 0);
+
+	if(mParticleState->getValidParticleRange() > 0)
+	{
+		PX_ASSERT(mSpatialHash);
+
+		Cm::BitMap mappedFluidPackets; // Marks the fluid packets that are mapped to a fluid shape.
+		mappedFluidPackets.resizeAndClear(PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE);
+
+		// Find for each shape the corresponding packet. If it does not exist the shape has to be deleted.
+		for(PxU32 i = 0; i < numShapes; i++)
+		{
+			ParticleShapeCpu* shape = static_cast<ParticleShapeCpu*>(shapes[i]);
+
+			PxU32 hashIndex;
+			const ParticleCell* particlePacket = mSpatialHash->findCell(hashIndex, shape->getPacketCoordinates());
+			if(particlePacket)
+			{
+				shape->setFluidPacket(particlePacket);
+
+				// Mark packet as mapped.
+				mappedFluidPackets.set(hashIndex);
+			}
+			else
+			{
+				mCreatedDeletedParticleShapes[mNumDeletedParticleShapes++] = shape;
+			}
+		}
+
+		// Check for each packet whether it is mapped to a fluid shape. If not, a new shape must be created.
+		const ParticleCell* fluidPackets = mSpatialHash->getPackets();
+		PX_ASSERT((mappedFluidPackets.getWordCount() << 5) >= PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE);
+		for(PxU32 p = 0; p < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE; p++)
+		{
+			if((!mappedFluidPackets.test(p)) && (fluidPackets[p].numParticles != PX_INVALID_U32))
+			{
+				ParticleShapeCpu* shape = mContext.createParticleShape(this, &fluidPackets[p]);
+				if(shape)
+				{
+					mCreatedDeletedParticleShapes[mNumDeletedParticleShapes + mNumCreatedParticleShapes++] = shape;
+				}
+			}
+		}
+	}
+	else
+	{
+		// Release all shapes.
+		for(PxU32 i = 0; i < numShapes; i++)
+		{
+			ParticleShapeCpu* shape = static_cast<ParticleShapeCpu*>(shapes[i]);
+			mCreatedDeletedParticleShapes[mNumDeletedParticleShapes++] = shape;
+		}
+	}
+}
+
+//----------------------------------------------------------------------------//
+// Body Shape Reference Invalidation
+//----------------------------------------------------------------------------//
+
+/**
+Removes all BodyShape references.
+Only the info in the Particle (constraint0Info, constraint1Info) need
+to be cleared, since they are checked before copying references from the constraints
+to the TwoWayData, where it is finally used for dereferencing.
+*/
+void ParticleSystemSimCpu::clearParticleConstraints()
+{
+	Particle* particleBuffer = mParticleState->getParticleBuffer();
+	Cm::BitMap::Iterator it(mParticleState->getParticleMap());
+	for(PxU32 particleIndex = it.getNext(); particleIndex != Cm::BitMap::Iterator::DONE; particleIndex = it.getNext())
+	{
+		Particle& particle = particleBuffer[particleIndex];
+		particle.flags.low &= PxU16(~InternalParticleFlag::eANY_CONSTRAINT_VALID);
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+/**
+Updates shape transform hash from context and removes references to a rigid body that was deleted.
+*/
+void ParticleSystemSimCpu::removeInteractionV(const ParticleShape& particleShape, ShapeHandle shape, BodyHandle body,
+                                              bool isDynamic, bool isDyingRb, bool)
+{
+	const PxsShapeCore* pxsShape = reinterpret_cast<const PxsShapeCore*>(shape);
+	const ParticleShapeCpu& pxsParticleShape = static_cast<const ParticleShapeCpu&>(particleShape);
+
+	if(isDyingRb)
+	{
+		if(isDynamic)
+		{
+			if(mFluidTwoWayData)
+			{
+				// just call when packets cover the same particles when constraints where
+				// generated (which is the case with isDyingRb).
+				removeTwoWayRbReferences(pxsParticleShape, reinterpret_cast<const PxsBodyCore*>(body));
+			}
+		}
+		else if(mOpcodeCacheBuffer && pxsShape->geometry.getType() == PxGeometryType::eTRIANGLEMESH)
+		{
+			// just call when packets cover the same particles when cache was used last (must be the last simulation
+			// step,
+			// since the cache gets invalidated after one step not being used).
+			setCollisionCacheInvalid(pxsParticleShape, pxsShape->geometry);
+		}
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::onRbShapeChangeV(const ParticleShape& particleShape, ShapeHandle shape)
+{
+	const PxsShapeCore* pxsShape = reinterpret_cast<const PxsShapeCore*>(shape);
+	const ParticleShapeCpu& pxsParticleShape = static_cast<const ParticleShapeCpu&>(particleShape);
+
+	if(mOpcodeCacheBuffer && pxsShape->geometry.getType() == PxGeometryType::eTRIANGLEMESH)
+	{
+		// just call when packets cover the same particles when cache was used last (must be the last simulation step,
+		// since the cache gets invalidated after one step not being used).
+		setCollisionCacheInvalid(pxsParticleShape, pxsShape->geometry);
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::passCollisionInputV(ParticleCollisionUpdateInput input)
+{
+	PX_ASSERT(mCollisionUpdateTaskInput.contactManagerStream == NULL);
+	mCollisionUpdateTaskInput = input;
+}
+
+//----------------------------------------------------------------------------//
+
+/**
+Removes specific PxsShapeCore references from particles belonging to a certain shape.
+The constraint data itself needs to be accessed, because it's assumed that if there
+is only one constraint, it's in the slot 1 of the constraint pair.
+
+Should only be called when packets cover the same particles when constraints where generated!
+*/
+void ParticleSystemSimCpu::removeTwoWayRbReferences(const ParticleShapeCpu& particleShape, const PxsBodyCore* rigidBody)
+{
+	PX_ASSERT(mFluidTwoWayData);
+	PX_ASSERT(mConstraintBuffers.constraint0DynamicBuf);
+	PX_ASSERT(mConstraintBuffers.constraint1DynamicBuf);
+	PX_ASSERT(rigidBody);
+	PX_ASSERT(particleShape.getFluidPacket());
+	const ParticleCell* packet = particleShape.getFluidPacket();
+	Particle* particleBuffer = mParticleState->getParticleBuffer();
+
+	PxU32 endIndex = packet->firstParticle + packet->numParticles;
+	for(PxU32 i = packet->firstParticle; i < endIndex; ++i)
+	{
+		// update particles for shapes that have been deleted!
+		PxU32 particleIndex = mPacketParticlesIndices[i];
+		Particle& particle = particleBuffer[particleIndex];
+
+		// we need to skip invalid particles
+		// it may be that a particle has been deleted prior to the deletion of the RB
+		// it may also be that a particle has been re-added to the same index, in which case
+		// the particle.flags.low will have been overwritten
+		if(!(particle.flags.api & PxParticleFlag::eVALID))
+			continue;
+
+		if(!(particle.flags.low & InternalParticleFlag::eANY_CONSTRAINT_VALID))
+			continue;
+
+		Constraint& c0 = mConstraintBuffers.constraint0Buf[particleIndex];
+		Constraint& c1 = mConstraintBuffers.constraint1Buf[particleIndex];
+		ConstraintDynamic& cd0 = mConstraintBuffers.constraint0DynamicBuf[particleIndex];
+		ConstraintDynamic& cd1 = mConstraintBuffers.constraint1DynamicBuf[particleIndex];
+
+		if(reinterpret_cast<const PxsBodyCore*>(rigidBody) == cd1.twoWayBody)
+		{
+			particle.flags.low &=
+			    PxU16(~(InternalParticleFlag::eCONSTRAINT_1_VALID | InternalParticleFlag::eCONSTRAINT_1_DYNAMIC));
+		}
+
+		if(reinterpret_cast<const PxsBodyCore*>(rigidBody) == cd0.twoWayBody)
+		{
+			if(!(particle.flags.low & InternalParticleFlag::eCONSTRAINT_1_VALID))
+			{
+				particle.flags.low &=
+				    PxU16(~(InternalParticleFlag::eCONSTRAINT_0_VALID | InternalParticleFlag::eCONSTRAINT_0_DYNAMIC));
+			}
+			else
+			{
+				c0 = c1;
+				cd0 = cd1;
+				particle.flags.low &=
+				    PxU16(~(InternalParticleFlag::eCONSTRAINT_1_VALID | InternalParticleFlag::eCONSTRAINT_1_DYNAMIC));
+			}
+		}
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+/**
+Should only be called when packets cover the same particles when cache was used last.
+I.e. after the last collision update and before the next shape update.
+It's ok if particles where replaced or removed from the corresponding packet intervalls,
+since the cache updates will not do any harm for those.
+*/
+void ParticleSystemSimCpu::setCollisionCacheInvalid(const ParticleShapeCpu& particleShape,
+                                                    const Gu::GeometryUnion& geometry)
+{
+	PX_ASSERT(mOpcodeCacheBuffer);
+	PX_ASSERT(particleShape.getFluidPacket());
+	const ParticleCell* packet = particleShape.getFluidPacket();
+	Particle* particleBuffer = mParticleState->getParticleBuffer();
+
+	PxU32 endIndex = packet->firstParticle + packet->numParticles;
+	for(PxU32 i = packet->firstParticle; i < endIndex; ++i)
+	{
+		// update particles for shapes that have been deleted!
+		PxU32 particleIndex = mPacketParticlesIndices[i];
+		Particle& particle = particleBuffer[particleIndex];
+
+		if((particle.flags.low & InternalParticleFlag::eGEOM_CACHE_MASK) != 0)
+		{
+			ParticleOpcodeCache& cache = mOpcodeCacheBuffer[particleIndex];
+			if(cache.getGeometry() == &geometry)
+				particle.flags.low &= ~PxU16(InternalParticleFlag::eGEOM_CACHE_MASK);
+		}
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::initializeParameter()
+{
+	const ParticleSystemParameter& parameter = *mParameter;
+
+	DynamicsParameters& dynamicsParams = mDynamics.getParameter();
+
+	// initialize dynamics parameter
+	{
+		PxReal restParticlesDistance = parameter.restParticleDistance;
+		PxReal restParticlesDistanceStd = 1.0f / PXN_FLUID_REST_PARTICLE_PER_UNIT_STD;
+		PxReal restParticlesDistance3 = restParticlesDistance * restParticlesDistance * restParticlesDistance;
+		PxReal restParticlesDistanceStd3 = restParticlesDistanceStd * restParticlesDistanceStd * restParticlesDistanceStd;
+		PX_UNUSED(restParticlesDistance3);
+
+		dynamicsParams.initialDensity = parameter.restDensity;
+		dynamicsParams.particleMassStd = dynamicsParams.initialDensity * restParticlesDistanceStd3;
+		dynamicsParams.cellSize = parameter.kernelRadiusMultiplier * restParticlesDistance;
+		dynamicsParams.cellSizeInv = 1.0f / dynamicsParams.cellSize;
+		dynamicsParams.cellSizeSq = dynamicsParams.cellSize * dynamicsParams.cellSize;
+		dynamicsParams.packetSize = dynamicsParams.cellSize * (1 << parameter.packetSizeMultiplierLog2);
+		PxReal radiusStd = parameter.kernelRadiusMultiplier * restParticlesDistanceStd;
+		PxReal radius2Std = radiusStd * radiusStd;
+		PxReal radius6Std = radius2Std * radius2Std * radius2Std;
+		PxReal radius9Std = radius6Std * radius2Std * radiusStd;
+		PxReal wPoly6ScalarStd = 315.0f / (64.0f * PxPi * radius9Std);
+		PxReal wSpikyGradientScalarStd = 1.5f * 15.0f / (PxPi * radius6Std);
+
+		dynamicsParams.radiusStd = radiusStd;
+		dynamicsParams.radiusSqStd = radius2Std;
+		dynamicsParams.densityMultiplierStd = wPoly6ScalarStd * dynamicsParams.particleMassStd;
+		dynamicsParams.stiffMulPressureMultiplierStd =
+		    wSpikyGradientScalarStd * dynamicsParams.particleMassStd * parameter.stiffness;
+		dynamicsParams.selfDensity = dynamicsParams.densityMultiplierStd * radius2Std * radius2Std * radius2Std;
+		dynamicsParams.scaleToStd = restParticlesDistanceStd / restParticlesDistance;
+		dynamicsParams.scaleSqToStd = dynamicsParams.scaleToStd * dynamicsParams.scaleToStd;
+		dynamicsParams.scaleToWorld = 1.0f / dynamicsParams.scaleToStd;
+		dynamicsParams.packetMultLog = parameter.packetSizeMultiplierLog2;
+
+		PxReal densityRestOffset = (dynamicsParams.initialDensity - dynamicsParams.selfDensity);
+		dynamicsParams.densityNormalizationFactor = (densityRestOffset > 0.0f) ? (1.0f / densityRestOffset) : 0.0f;
+
+		updateDynamicsParameter();
+	}
+
+	CollisionParameters& collisionParams = mCollision.getParameter();
+
+	// initialize collision parameter: these partially depend on dynamics parameters!
+	{
+		collisionParams.cellSize = dynamicsParams.cellSize;
+		collisionParams.cellSizeInv = dynamicsParams.cellSizeInv;
+		collisionParams.packetMultLog = parameter.packetSizeMultiplierLog2;
+		collisionParams.packetMult = PxU32(1 << parameter.packetSizeMultiplierLog2);
+		collisionParams.packetSize = dynamicsParams.packetSize;
+		collisionParams.restOffset = parameter.restOffset;
+		collisionParams.contactOffset = parameter.contactOffset;
+		PX_ASSERT(collisionParams.contactOffset >= collisionParams.restOffset);
+		collisionParams.maxMotionDistance = parameter.maxMotionDistance;
+		collisionParams.collisionRange =
+		    collisionParams.maxMotionDistance + collisionParams.contactOffset + PT_PARTICLE_SYSTEM_COLLISION_SLACK;
+		updateCollisionParameter();
+	}
+}
+
+//----------------------------------------------------------------------------//
+
+PX_FORCE_INLINE PxF32 computeDampingFactor(PxF32 damping, PxF32 timeStep)
+{
+	PxF32 dampingDt = damping * timeStep;
+	if(dampingDt < 1.0f)
+		return 1.0f - dampingDt;
+	else
+		return 0.0f;
+}
+
+void ParticleSystemSimCpu::updateDynamicsParameter()
+{
+	const ParticleSystemParameter& parameter = *mParameter;
+	DynamicsParameters& dynamicsParams = mDynamics.getParameter();
+
+	PxReal restParticlesDistanceStd = 1.0f / PXN_FLUID_REST_PARTICLE_PER_UNIT_STD;
+	PxReal radiusStd = parameter.kernelRadiusMultiplier * restParticlesDistanceStd;
+	PxReal radius2Std = radiusStd * radiusStd;
+	PxReal radius6Std = radius2Std * radius2Std * radius2Std;
+
+	dynamicsParams.viscosityMultiplierStd =
+	    computeViscosityMultiplier(parameter.viscosity, dynamicsParams.particleMassStd, radius6Std);
+}
+
+//----------------------------------------------------------------------------//
+
+void ParticleSystemSimCpu::updateCollisionParameter()
+{
+	const ParticleSystemParameter& parameter = *mParameter;
+	CollisionParameters& collisionParams = mCollision.getParameter();
+
+	collisionParams.dampingDtComp = computeDampingFactor(parameter.damping, mSimulationTimeStep);
+	collisionParams.externalAcceleration = mExternalAcceleration;
+
+	collisionParams.projectionPlane.n = parameter.projectionPlane.n;
+	collisionParams.projectionPlane.d = parameter.projectionPlane.d;
+	collisionParams.timeStep = mSimulationTimeStep;
+	collisionParams.invTimeStep = (mSimulationTimeStep > 0.0f) ? 1.0f / mSimulationTimeStep : 0.0f;
+
+	collisionParams.restitution = CLAMP_RESTITUTION(parameter.restitution);
+	collisionParams.dynamicFriction = CLAMP_DYNAMIC_FRICTION(parameter.dynamicFriction);
+	collisionParams.staticFrictionSqr = parameter.staticFriction * parameter.staticFriction;
+	collisionParams.temporalNoise = (parameter.noiseCounter * parameter.noiseCounter * 4999879) & 0xffff;
+	collisionParams.flags = parameter.flags;
+}
+
+//----------------------------------------------------------------------------//
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.h b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.h
new file mode 100644
index 00000000..381b80c4
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.h
@@ -0,0 +1,239 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_PARTICLE_SYSTEM_SIM_CPU_H
+#define PT_PARTICLE_SYSTEM_SIM_CPU_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtParticleSystemSim.h"
+#include "PtDynamics.h"
+#include "PtCollision.h"
+#include "PtGridCellVector.h"
+#include "PsAllocator.h"
+#include "PtParticleData.h"
+#include "CmTask.h"
+#include "PtContextCpu.h"
+
+namespace physx
+{
+
+class PxParticleDeviceExclusiveAccess;
+class PxBaseTask;
+
+namespace Pt
+{
+class Context;
+struct ConstraintPair;
+class SpatialHash;
+class ParticleShapeCpu;
+
+class ParticleSystemSimCpu : public ParticleSystemSim
+{
+	PX_NOCOPY(ParticleSystemSimCpu)
+  public:
+	//---------------------------
+	// Implements ParticleSystemSim
+	virtual ParticleSystemState& getParticleStateV();
+	virtual void getSimParticleDataV(ParticleSystemSimDataDesc& simParticleData, bool devicePtr) const;
+
+	virtual void getShapesUpdateV(ParticleShapeUpdateResults& updateResults) const;
+
+	virtual void setExternalAccelerationV(const PxVec3& v);
+	virtual const PxVec3& getExternalAccelerationV() const;
+
+	virtual void setSimulationTimeStepV(PxReal value);
+	virtual PxReal getSimulationTimeStepV() const;
+
+	virtual void setSimulatedV(bool);
+	virtual Ps::IntBool isSimulatedV() const;
+
+	virtual void addInteractionV(const ParticleShape&, ShapeHandle, BodyHandle, bool, bool)
+	{
+	}
+	virtual void removeInteractionV(const ParticleShape& particleShape, ShapeHandle shape, BodyHandle body,
+	                                bool isDynamic, bool isDyingRb, bool ccdBroadphase);
+	virtual void onRbShapeChangeV(const ParticleShape& particleShape, ShapeHandle shape);
+
+	virtual void flushBufferedInteractionUpdatesV()
+	{
+	}
+
+	virtual void passCollisionInputV(ParticleCollisionUpdateInput input);
+#if PX_SUPPORT_GPU_PHYSX
+	virtual Ps::IntBool isGpuV() const
+	{
+		return false;
+	}
+	virtual void enableDeviceExclusiveModeGpuV()
+	{
+		PX_ASSERT(0);
+	}
+	virtual PxParticleDeviceExclusiveAccess* getDeviceExclusiveAccessGpuV() const
+	{
+		PX_ASSERT(0);
+		return NULL;
+	}
+#endif
+
+	//~Implements ParticleSystemSim
+	//---------------------------
+
+	ParticleSystemSimCpu(ContextCpu* context, PxU32 index);
+	virtual ~ParticleSystemSimCpu();
+	void init(ParticleData& particleData, const ParticleSystemParameter& parameter);
+	void clear();
+	ParticleData* obtainParticleState();
+
+	PX_FORCE_INLINE ContextCpu& getContext() const
+	{
+		return mContext;
+	}
+
+	PX_FORCE_INLINE void getPacketBounds(const GridCellVector& coord, PxBounds3& bounds);
+
+	PX_FORCE_INLINE PxReal computeViscosityMultiplier(PxReal viscosityStd, PxReal particleMassStd, PxReal radius6Std);
+
+	PX_FORCE_INLINE PxU32 getIndex() const
+	{
+		return mIndex;
+	}
+
+	void packetShapesUpdate(physx::PxBaseTask* continuation);
+	void packetShapesFinalization(physx::PxBaseTask* continuation);
+	void dynamicsUpdate(physx::PxBaseTask* continuation);
+	void collisionUpdate(physx::PxBaseTask* continuation);
+	void collisionFinalization(physx::PxBaseTask* continuation);
+	void spatialHashUpdateSections(physx::PxBaseTask* continuation);
+
+	physx::PxBaseTask& schedulePacketShapesUpdate(const ParticleShapesUpdateInput& input,
+	                                              physx::PxBaseTask& continuation);
+	physx::PxBaseTask& scheduleDynamicsUpdate(physx::PxBaseTask& continuation);
+	physx::PxBaseTask& scheduleCollisionUpdate(physx::PxBaseTask& continuation);
+
+  private:
+	void remapShapesToPackets(ParticleShape* const* shapes, PxU32 numShapes);
+	void clearParticleConstraints();
+	void initializeParameter();
+	void updateDynamicsParameter();
+	void updateCollisionParameter();
+	void removeTwoWayRbReferences(const ParticleShapeCpu& particleShape, const PxsBodyCore* rigidBody);
+	void setCollisionCacheInvalid(const ParticleShapeCpu& particleShape, const Gu::GeometryUnion& geometry);
+
+  private:
+	ContextCpu& mContext;
+	ParticleData* mParticleState;
+	const ParticleSystemParameter* mParameter;
+
+	Ps::IntBool mSimulated;
+
+	TwoWayData* mFluidTwoWayData;
+
+	ParticleShape** mCreatedDeletedParticleShapes; // Handles of created and deleted particle packet shapes.
+	PxU32 mNumCreatedParticleShapes;
+	PxU32 mNumDeletedParticleShapes;
+	PxU32* mPacketParticlesIndices; // Dense array of sorted particle indices.
+	PxU32 mNumPacketParticlesIndices;
+
+	ConstraintBuffers mConstraintBuffers; // Particle constraints.
+
+	ParticleOpcodeCache* mOpcodeCacheBuffer; // Opcode cache.
+	PxVec3* mTransientBuffer;                // force in SPH , collision normal
+	PxVec3* mCollisionVelocities;
+
+	// Spatial ordering, packet generation
+	SpatialHash* mSpatialHash;
+
+	// Dynamics update
+	Dynamics mDynamics;
+
+	// Collision update
+	Collision mCollision;
+
+	PxReal mSimulationTimeStep;
+	bool mIsSimulated;
+
+	PxVec3 mExternalAcceleration; // This includes the gravity of the scene
+
+	PxU32 mIndex;
+
+	// pipeline tasks
+	typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::packetShapesUpdate> PacketShapesUpdateTask;
+	typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::packetShapesFinalization> PacketShapesFinalizationTask;
+	typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::dynamicsUpdate> DynamicsUpdateTask;
+	typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::collisionUpdate> CollisionUpdateTask;
+	typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::collisionFinalization> CollisionFinalizationTask;
+	typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::spatialHashUpdateSections> SpatialHashUpdateSectionsTask;
+
+	PacketShapesUpdateTask mPacketShapesUpdateTask;
+	PacketShapesFinalizationTask mPacketShapesFinalizationTask;
+	DynamicsUpdateTask mDynamicsUpdateTask;
+	CollisionUpdateTask mCollisionUpdateTask;
+	CollisionFinalizationTask mCollisionFinalizationTask;
+	SpatialHashUpdateSectionsTask mSpatialHashUpdateSectionsTask;
+
+	ParticleShapesUpdateInput mPacketShapesUpdateTaskInput;
+	ParticleCollisionUpdateInput mCollisionUpdateTaskInput;
+
+	Ps::AlignedAllocator<16, Ps::ReflectionAllocator<char> > mAlign16;
+
+	friend class Collision;
+	friend class Dynamics;
+};
+
+//----------------------------------------------------------------------------//
+
+/*!
+Compute AABB of a packet given its coordinates.
+Enlarge the bounding box such that a particle on the current boundary could
+travel the maximum distance and would still be inside the enlarged volume.
+*/
+PX_FORCE_INLINE void ParticleSystemSimCpu::getPacketBounds(const GridCellVector& coord, PxBounds3& bounds)
+{
+	PxVec3 gridOrigin(static_cast<PxReal>(coord.x), static_cast<PxReal>(coord.y), static_cast<PxReal>(coord.z));
+	gridOrigin *= mCollision.getParameter().packetSize;
+
+	PxVec3 collisionRangeVec(mCollision.getParameter().collisionRange);
+	bounds.minimum = gridOrigin - collisionRangeVec;
+	bounds.maximum = gridOrigin + PxVec3(mCollision.getParameter().packetSize) + collisionRangeVec;
+}
+
+PX_FORCE_INLINE PxReal
+ParticleSystemSimCpu::computeViscosityMultiplier(PxReal viscosityStd, PxReal particleMassStd, PxReal radius6Std)
+{
+	PxReal wViscosityLaplacianScalarStd = 45.0f / (PxPi * radius6Std);
+	return (wViscosityLaplacianScalarStd * viscosityStd * particleMassStd);
+}
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_PARTICLE_SYSTEM_SIM_CPU_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.cpp
new file mode 100644
index 00000000..16b6ca25
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.cpp
@@ -0,0 +1,514 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtSpatialHash.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PsAlloca.h"
+#include "CmTask.h"
+
+#include "PtParticleSystemSim.h"
+#include "PtSpatialHashHelper.h"
+#include "PtParticle.h"
+#include "PtCollisionData.h"
+#include "PsUtilities.h"
+#include "PsFoundation.h"
+
+using namespace physx;
+using namespace Pt;
+
+SpatialHash::SpatialHash(PxU32 numHashBuckets, PxF32 cellSizeInv, PxU32 packetMultLog, bool supportSections)
+: mNumCells(0)
+, mNumHashBuckets(numHashBuckets)
+, mCellSizeInv(cellSizeInv)
+, mPacketMultLog(packetMultLog)
+, mPacketSections(NULL)
+{
+	//(numHashBuckets + 1): including overflow cell
+	mCells = reinterpret_cast<ParticleCell*>(PX_ALLOC((numHashBuckets + 1) * sizeof(ParticleCell), "ParticleCell"));
+
+	if(supportSections)
+		mPacketSections =
+		    reinterpret_cast<PacketSections*>(PX_ALLOC(numHashBuckets * sizeof(PacketSections), "PacketSections"));
+}
+
+SpatialHash::~SpatialHash()
+{
+	PX_FREE(mCells);
+
+	if(mPacketSections)
+		PX_FREE(mPacketSections);
+}
+
+/*-------------------------------------------------------------------------*/
+
+/*!
+Builds the packet hash and reorders particles.
+*/
+void SpatialHash::updatePacketHash(PxU32& numSorted, PxU32* sortedIndices, Particle* particles,
+                                   const Cm::BitMap& particleMap, const PxU32 validParticleRange,
+                                   physx::PxBaseTask* continuation)
+{
+	PX_ASSERT(validParticleRange > 0);
+	PX_UNUSED(validParticleRange);
+
+	// Mark packet hash entries as empty.
+	for(PxU32 p = 0; p < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE; p++)
+	{
+		ParticleCell& packet = mCells[p];
+		packet.numParticles = PX_INVALID_U32;
+	}
+
+	// Initialize overflop packet
+	mCells[PT_PARTICLE_SYSTEM_OVERFLOW_INDEX].numParticles = 0;
+
+	PxU32 packetMult = PxU32(1 << mPacketMultLog);
+	const PxF32 packetSizeInv = mCellSizeInv / packetMult;
+
+	const PxU32 validWordCount = particleMap.size() >> 5; //((validParticleRange + 0x1F) & ~0x1F) >> 5;
+
+	{
+		PxU32 numPackets = 0;
+		numSorted = 0;
+
+		// Add particles to packet hash
+		PxU16* hashKeyArray =
+		    reinterpret_cast<PxU16*>(PX_ALLOC(validWordCount * 32 * sizeof(PxU16), "hashKeys")); // save the hashkey for
+		                                                                                         // reorder
+		Cm::BitMap::Iterator particleIt(particleMap);
+		PX_ASSERT(hashKeyArray);
+
+		for(PxU32 particleIndex = particleIt.getNext(); particleIndex != Cm::BitMap::Iterator::DONE;
+		    particleIndex = particleIt.getNext())
+		{
+			Particle& particle = particles[particleIndex];
+
+			if(particle.flags.api & PxParticleFlag::eSPATIAL_DATA_STRUCTURE_OVERFLOW) // particles which caused overflow
+			// in the past are rejected.
+			{
+				mCells[PT_PARTICLE_SYSTEM_OVERFLOW_INDEX].numParticles++;
+				hashKeyArray[particleIndex] = PT_PARTICLE_SYSTEM_OVERFLOW_INDEX;
+				continue;
+			}
+
+			// Compute cell coordinate for particle
+			// Transform cell to packet coordinate
+			GridCellVector packetCoords(particle.position, packetSizeInv);
+
+			PxU32 hashKey;
+			ParticleCell* packet = getCell(hashKey, packetCoords);
+			PX_ASSERT(packet);
+			PX_ASSERT(hashKey < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE);
+			hashKeyArray[particleIndex] = Ps::to16(hashKey);
+
+			if(packet->numParticles == PX_INVALID_U32)
+			{
+				// Entry is empty -> Initialize new entry
+
+				if(numPackets >= PT_PARTICLE_SYSTEM_PACKET_LIMIT)
+				{
+					// Reached maximum number of packets -> Mark particle for deletion
+					PX_WARN_ONCE("Particles: Spatial data structure overflow! Particles might miss collisions with the "
+					             "scene. See particle section of the guide for more information.");
+					particle.flags.api |= PxParticleFlag::eSPATIAL_DATA_STRUCTURE_OVERFLOW;
+					particle.flags.low &= PxU16(~InternalParticleFlag::eANY_CONSTRAINT_VALID);
+					mCells[PT_PARTICLE_SYSTEM_OVERFLOW_INDEX].numParticles++;
+					hashKeyArray[particleIndex] = PT_PARTICLE_SYSTEM_OVERFLOW_INDEX;
+					continue;
+				}
+
+				packet->coords = packetCoords;
+				packet->numParticles = 0;
+				numPackets++;
+			}
+
+			PX_ASSERT(packet->numParticles != PX_INVALID_U32);
+			packet->numParticles++;
+			numSorted++;
+		}
+
+		mNumCells = numPackets;
+
+		// Set for each packet the starting index of the associated particle interval and clear the
+		// particle counter (preparation for reorder step).
+		// include overflow packet.
+		PxU32 numParticles = 0;
+		for(PxU32 p = 0; p < PT_PARTICLE_SYSTEM_PACKET_HASH_BUFFER_SIZE; p++)
+		{
+			ParticleCell& packet = mCells[p];
+
+			if(packet.numParticles == PX_INVALID_U32)
+				continue;
+
+			packet.firstParticle = numParticles;
+			numParticles += packet.numParticles;
+			packet.numParticles = 0;
+		}
+
+		reorderParticleIndicesToPackets(sortedIndices, numParticles, particleMap, hashKeyArray);
+
+		PX_FREE(hashKeyArray);
+	}
+
+	continuation->removeReference();
+}
+
+/*!
+Reorders particle indices to packets.
+*/
+void SpatialHash::reorderParticleIndicesToPackets(PxU32* sortedIndices, PxU32 numParticles,
+                                                  const Cm::BitMap& particleMap, PxU16* hashKeyArray)
+{
+	Cm::BitMap::Iterator particleIt(particleMap);
+	for(PxU32 particleIndex = particleIt.getNext(); particleIndex != Cm::BitMap::Iterator::DONE;
+	    particleIndex = particleIt.getNext())
+	{
+		// Get packet for fluid
+		ParticleCell* packet = &mCells[hashKeyArray[particleIndex]];
+		PX_ASSERT(packet);
+		PX_ASSERT(packet->numParticles != PX_INVALID_U32);
+
+		PxU32 index = packet->firstParticle + packet->numParticles;
+		PX_ASSERT(index < numParticles);
+		PX_UNUSED(numParticles);
+		sortedIndices[index] = particleIndex;
+		packet->numParticles++;
+	}
+}
+
+void SpatialHash::updatePacketSections(PxU32* particleIndices, Particle* particles, physx::PxBaseTask* continuation)
+{
+	PX_ASSERT(mPacketSections);
+	PX_UNUSED(continuation);
+
+	// MS: For this task we could use multithreading, gather a couple of packets and run them in parallel.
+	//     Multiprocessor systems might take advantage of this but for the PC we will postpone this for now.
+	PxU32 skipSize = 0;
+
+	for(PxU32 p = 0; p < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE; p++)
+	{
+		ParticleCell& packet = mCells[p];
+
+		if((packet.numParticles == PX_INVALID_U32) || (packet.numParticles <= skipSize))
+			continue;
+
+		buildPacketSections(packet, mPacketSections[p], mPacketMultLog, particles, particleIndices);
+	}
+}
+
+void SpatialHash::buildPacketSections(const ParticleCell& packet, PacketSections& sections, PxU32 packetMultLog,
+                                      Particle* particles, PxU32* particleIndices)
+{
+	PX_ASSERT(packetMultLog > 0);
+
+	PxU32 packetMult = PxU32(1 << packetMultLog);
+
+	// Compute the smallest cell coordinate within the packet
+	GridCellVector packetMinCellCoords = packet.coords << packetMultLog;
+
+	// Clear packet section entries
+	PxMemSet(&sections, 0, sizeof(PacketSections));
+
+	// Divide the packet into subpackets that fit into local memory of processing unit.
+	PxU32 particlesRemainder = packet.numParticles % PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS;
+	if(particlesRemainder == 0)
+		particlesRemainder = PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS;
+
+	PxU32* packetParticleIndices = particleIndices + packet.firstParticle;
+
+	PX_ALLOCA(sectionIndexBuf, PxU16, packet.numParticles * sizeof(PxU16));
+	PX_ASSERT(sectionIndexBuf);
+
+	PxU32 startIdx = 0;
+	PxU32 endIdx = particlesRemainder; // We start with the smallest subpacket, i.e., the subpacket which does not reach
+	// its particle limit.
+	GridCellVector cellCoord;
+	PxU16* pSectionIndexBuf = sectionIndexBuf;
+	while(endIdx <= packet.numParticles)
+	{
+		// Loop over particles of the subpacket.
+		for(PxU32 p = startIdx; p < endIdx; p++)
+		{
+			PxU32 particleIndex = packetParticleIndices[p];
+			Particle& particle = particles[particleIndex];
+			// Find packet section the particle belongs to.
+			cellCoord.set(particle.position, mCellSizeInv);
+			PxU32 sectionIndex = getPacketSectionIndex(cellCoord, packetMinCellCoords, packetMult);
+			PX_ASSERT(sectionIndex < PT_PACKET_SECTIONS);
+
+			*pSectionIndexBuf++ = Ps::to16(sectionIndex);
+
+			// Increment particle count of the section the particle belongs to.
+			sections.numParticles[sectionIndex]++;
+		}
+
+		startIdx = endIdx;
+		endIdx += PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS;
+	}
+
+	// Set for each packet section the starting index of the associated particle interval.
+	PxU32 particleIndex = packet.firstParticle;
+	for(PxU32 s = 0; s < PT_PACKET_SECTIONS; s++)
+	{
+		sections.firstParticle[s] = particleIndex;
+		particleIndex += sections.numParticles[s];
+	}
+
+	// Simon: This is not yet chunked. Need to when porting.
+	PX_ALLOCA(tmpIndexBuffer, PxU32, packet.numParticles * sizeof(PxU32));
+	PX_ASSERT(tmpIndexBuffer);
+	PxMemCopy(tmpIndexBuffer, packetParticleIndices, packet.numParticles * sizeof(PxU32));
+
+	reorderParticlesToPacketSections(packet, sections, particles, tmpIndexBuffer, packetParticleIndices, sectionIndexBuf);
+}
+
+void SpatialHash::reorderParticlesToPacketSections(const ParticleCell& packet, PacketSections& sections,
+                                                   const Particle* particles, const PxU32* inParticleIndices,
+                                                   PxU32* outParticleIndices, PxU16* sectionIndexBuf)
+{
+	// Divide the packet into subpackets that fit into local memory of processing unit.
+	PxU32 particlesRemainder = packet.numParticles % PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS;
+	if(particlesRemainder == 0)
+		particlesRemainder = PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS;
+
+	// Prepare section structure for reorder
+	PxMemSet(sections.numParticles, 0, (PT_PACKET_SECTIONS * sizeof(PxU32)));
+
+	PxU32 startIdx = 0;
+	PxU32 endIdx = particlesRemainder; // We start with the smallest subpacket, i.e., the subpacket which does not reach
+	// its particle limit.
+	while(endIdx <= packet.numParticles)
+	{
+		// Loop over particles of the subpacket.
+		for(PxU32 p = startIdx; p < endIdx; p++)
+		{
+			PxU32 particleIndex = inParticleIndices[p];
+			const Particle& particle = particles[particleIndex];
+			PX_UNUSED(particle);
+
+			// Reorder particle according to packet section.
+			//
+			// It is important that particles inside the core section (the section that will not interact with neighbor
+			// packets)
+			// are moved to the end of the buffer. This way we can easily ignore these particles when testing against
+			// particles of neighboring packets.
+
+			PxU32 sectionIndex = *sectionIndexBuf++;
+			PxU32 outIndex = sections.firstParticle[sectionIndex] + sections.numParticles[sectionIndex];
+
+			// the output index array start at the packet start, unlike the section indices, which are absolute.
+			PxU32 relativeOutIndex = outIndex - packet.firstParticle;
+			PX_ASSERT(relativeOutIndex < packet.numParticles);
+			outParticleIndices[relativeOutIndex] = particleIndex;
+
+			sections.numParticles[sectionIndex]++;
+		}
+
+		startIdx = endIdx;
+		endIdx += PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS;
+	}
+}
+
+/*
+To optimize particle interaction between particles of neighboring packets, each packet is split
+into 27 sections. Of these 27 sections, 26 are located at the surface of the packet, i.e., contain
+the outermost particle cells, and one section contains all the inner cells. If we want to compute
+the particle interactions between neighboring packets, we only want to work with the 26 "surface
+sections" of each packet, neglecting the inner sections. Thus, we need to find for a given packet
+all the relevant sections of the neighboring packets. These sections will be called halo regions.
+The following illustration specifies how these halo regions are indexed (there are 98 halo regions
+for a packet). The illustration shows the halo regions of a packet from a viewer perspective that
+looks from the outside at the different sides of a packet.
+
+    Left halo regions                 Front halo regions                 Top halo regions
+__________________________        __________________________        __________________________
+|92 |60 |   62   | 61| 93|        |93 |87 |   89   | 88| 97|        |92 |81 |   83   | 82| 96|
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+|67 | 3 |    5   |  4| 73|        |73 |46 |   52   | 49| 76|        |60 |27 |   33   | 30| 63|
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+|   |   |        |   |   |        |   |   |        |   |   |        |   |   |        |   |   |
+|   |   |        |   |   |        |   |   |        |   |   |        |   |   |        |   |   |
+|68 | 6 |    8   |  7| 74|        |74 |47 |   53   | 50| 77|        |62 |29 |   35   | 32| 65|
+|   |   |        |   |   |        |   |   |        |   |   |        |   |   |        |   |   |
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+|66 | 0 |    2   |  1| 72|        |72 |45 |   51   | 48| 75|        |61 |28 |   34   | 31| 64|
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+|90 |54 |   56   | 55| 91|        |91 |84 |   86   | 85| 95|        |93 |87 |   89   | 88| 97|
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+
+
+   Right halo regions                  Rear halo regions                Bottom halo regions
+__________________________        __________________________        __________________________
+|97 |64 |   65   | 63| 96|        |96 |82 |   83   | 81| 92|        |91 |84 |   86   | 85| 95|
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+|76 |13 |   14   | 12| 70|        |70 |40 |   43   | 37| 67|        |55 |19 |   25   | 22| 58|
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+|   |   |        |   |   |        |   |   |        |   |   |        |   |   |        |   |   |
+|   |   |        |   |   |        |   |   |        |   |   |        |   |   |        |   |   |
+|77 |16 |   17   | 15| 71|        |71 |41 |   44   | 38| 68|        |56 |20 |   26   | 23| 59|
+|   |   |        |   |   |        |   |   |        |   |   |        |   |   |        |   |   |
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+|75 |10 |   11   |  9| 69|        |69 |39 |   42   | 36| 66|        |54 |18 |   24   | 21| 57|
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+|95 |58 |   59   | 57| 94|        |94 |79 |   80   | 78| 90|        |90 |78 |   80   | 79| 94|
+|___|___|________|___|___|        |___|___|________|___|___|        |___|___|________|___|___|
+
+*/
+void SpatialHash::getHaloRegions(PacketHaloRegions& packetHalo, const GridCellVector& packetCoords,
+                                 const ParticleCell* packets, const PacketSections* packetSections, PxU32 numHashBuckets)
+{
+#define PXS_COPY_PARTICLE_INTERVAL(destIdx, srcIdx)                                                                    \
+	packetHalo.firstParticle[destIdx] = sections.firstParticle[srcIdx];                                                \
+	packetHalo.numParticles[destIdx] = sections.numParticles[srcIdx];
+
+#define PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(dx, dy, dz, startIdx, idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9) \
+	coords.set(packetCoords.x + dx, packetCoords.y + dy, packetCoords.z + dz);                                         \
+	packet = findConstCell(packetIndex, coords, packets, numHashBuckets);                                              \
+	if(packet)                                                                                                         \
+	{                                                                                                                  \
+		const PacketSections& sections = packetSections[packetIndex];                                                  \
+                                                                                                                       \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx, idx1);                                                                    \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 1, idx2);                                                                \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 2, idx3);                                                                \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 3, idx4);                                                                \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 4, idx5);                                                                \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 5, idx6);                                                                \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 6, idx7);                                                                \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 7, idx8);                                                                \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 8, idx9);                                                                \
+	}
+
+#define PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(dx, dy, dz, startIdx, idx1, idx2, idx3)                                     \
+	coords.set(packetCoords.x + dx, packetCoords.y + dy, packetCoords.z + dz);                                         \
+	packet = findConstCell(packetIndex, coords, packets, numHashBuckets);                                              \
+	if(packet)                                                                                                         \
+	{                                                                                                                  \
+		const PacketSections& sections = packetSections[packetIndex];                                                  \
+                                                                                                                       \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx, idx1);                                                                    \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 1, idx2);                                                                \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx + 2, idx3);                                                                \
+	}
+
+#define PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(dx, dy, dz, startIdx, idx1)                                               \
+	coords.set(packetCoords.x + dx, packetCoords.y + dy, packetCoords.z + dz);                                         \
+	packet = findConstCell(packetIndex, coords, packets, numHashBuckets);                                              \
+	if(packet)                                                                                                         \
+	{                                                                                                                  \
+		const PacketSections& sections = packetSections[packetIndex];                                                  \
+                                                                                                                       \
+		PXS_COPY_PARTICLE_INTERVAL(startIdx, idx1);                                                                    \
+	}
+
+	PX_ASSERT(packets);
+	PX_ASSERT(packetSections);
+
+	// Clear halo information
+	PxMemSet(&packetHalo, 0, sizeof(PacketHaloRegions));
+
+	const ParticleCell* packet;
+	PxU32 packetIndex;
+	GridCellVector coords;
+
+	//
+	// Fill halo regions for the 6 neighbors which share a face with the packet.
+	//
+
+	// Left neighbor
+	coords.set(packetCoords.x - 1, packetCoords.y, packetCoords.z);
+	packet = findConstCell(packetIndex, coords, packets, numHashBuckets);
+	if(packet)
+	{
+		const PacketSections& sections = packetSections[packetIndex];
+
+		PxMemCopy(&(packetHalo.firstParticle[0]), &(sections.firstParticle[9]), (9 * sizeof(PxU32)));
+		PxMemCopy(&(packetHalo.numParticles[0]), &(sections.numParticles[9]), (9 * sizeof(PxU32)));
+	}
+
+	// Right neighbor
+	coords.set(packetCoords.x + 1, packetCoords.y, packetCoords.z);
+	packet = findConstCell(packetIndex, coords, packets, numHashBuckets);
+	if(packet)
+	{
+		const PacketSections& sections = packetSections[packetIndex];
+
+		PxMemCopy(&(packetHalo.firstParticle[9]), &(sections.firstParticle[0]), (9 * sizeof(PxU32)));
+		PxMemCopy(&(packetHalo.numParticles[9]), &(sections.numParticles[0]), (9 * sizeof(PxU32)));
+	}
+
+	// Bottom neighbor
+	PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(0, -1, 0, 18, 3, 4, 5, 12, 13, 14, 21, 22, 23)
+
+	// Top neighbor
+	PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(0, 1, 0, 27, 0, 1, 2, 9, 10, 11, 18, 19, 20)
+
+	// Rear neighbor
+	PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(0, 0, -1, 36, 1, 4, 7, 10, 13, 16, 19, 22, 25)
+
+	// Front neighbor
+	PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(0, 0, 1, 45, 0, 3, 6, 9, 12, 15, 18, 21, 24)
+
+	//
+	// Fill halo regions for the 12 neighbors which share an edge with the packet.
+	//
+
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(-1, -1, 0, 54, 12, 13, 14)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(1, -1, 0, 57, 3, 4, 5)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(-1, 1, 0, 60, 9, 10, 11)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(1, 1, 0, 63, 0, 1, 2)
+
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(-1, 0, -1, 66, 10, 13, 16)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(1, 0, -1, 69, 1, 4, 7)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(-1, 0, 1, 72, 9, 12, 15)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(1, 0, 1, 75, 0, 3, 6)
+
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(0, -1, -1, 78, 4, 13, 22)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(0, 1, -1, 81, 1, 10, 19)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(0, -1, 1, 84, 3, 12, 21)
+	PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(0, 1, 1, 87, 0, 9, 18)
+
+	//
+	// Fill halo regions for the 8 neighbors which share a corner with the packet.
+	//
+
+	PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(-1, -1, -1, 90, 13)
+	PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(-1, -1, 1, 91, 12)
+	PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(-1, 1, -1, 92, 10)
+	PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(-1, 1, 1, 93, 9)
+	PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(1, -1, -1, 94, 4)
+	PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(1, -1, 1, 95, 3)
+	PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(1, 1, -1, 96, 1)
+	PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(1, 1, 1, 97, 0)
+
+	for(PxU32 i = 0; i < PT_PACKET_HALO_REGIONS; i++)
+		packetHalo.maxNumParticles = PxMax(packetHalo.maxNumParticles, packetHalo.numParticles[i]);
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.h b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.h
new file mode 100644
index 00000000..9d257851
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.h
@@ -0,0 +1,220 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_SPATIAL_HASH_H
+#define PT_SPATIAL_HASH_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "CmBitMap.h"
+#include "PtConfig.h"
+#include "PtParticleCell.h"
+#include "PtPacketSections.h"
+#include "PtSpatialHashHelper.h"
+#include "PtParticle.h"
+
+namespace physx
+{
+
+class PxBaseTask;
+
+namespace Pt
+{
+
+struct Particle;
+
+// Structure describing the regions around a packet which are relevant for particle interactions.
+// A packet has 26 neighbor packets:
+// - 6 of these neighbors share a face with the packet. Each of these neighbors provide 9 halo regions.
+// - 12 of these neighbors share an edge with the packet. Each of these neighbors provides 3 halo region.
+// - 8 of these neighbors share a corner with the packet. Each of these neighbors provide 1 halo region.
+//
+// -> Number of halo regions for a packet:  6*9 + 12*3 + 8*1 = 98
+#define PT_PACKET_HALO_REGIONS 98
+struct PacketHaloRegions
+{
+	PxU32 numParticles[PT_PACKET_HALO_REGIONS];  //! Number of particles in each halo region
+	PxU32 firstParticle[PT_PACKET_HALO_REGIONS]; //! Start index of the associated particle interval for each halo
+	//! region
+	PxU32 maxNumParticles; //! Maximum number of particles among all halo regions
+};
+
+// Structure to cache a local cell hash that was computed for a set of particles
+struct LocalCellHash
+{
+	PxU32 numParticles;        // Number of particles the cell hash is based on
+	PxU32* particleIndices;    // Particle indices (0..numParticles) with respect to the particle array that was used
+	                           // to build the cell hash. Indices are ordered according to cells.
+	PxU32 numHashEntries;      // Size of cell hash table
+	ParticleCell* hashEntries; // Hash entry for cells
+
+	bool isHashValid; // Marks whether the hash contains valid data or needs to be computed
+
+	LocalCellHash()
+	{
+		numParticles = 0;
+		particleIndices = NULL;
+		numHashEntries = 0;
+		hashEntries = NULL;
+		isHashValid = false;
+	}
+};
+
+class SpatialHash
+{
+  public:
+	SpatialHash(PxU32 numHashBuckets, PxF32 cellSizeInv, PxU32 packetMultLog, bool supportSections);
+	~SpatialHash();
+
+	static PX_FORCE_INLINE ParticleCell* findCell(PxU32& cellIndex, const GridCellVector& coord, ParticleCell* cells,
+	                                              PxU32 numHashBuckets);
+	static PX_FORCE_INLINE const ParticleCell* findConstCell(PxU32& cellIndex, const GridCellVector& coord,
+	                                                         const ParticleCell* cells, PxU32 numHashBuckets);
+
+	PX_FORCE_INLINE PxF32 getCellSizeInv()
+	{
+		return mCellSizeInv;
+	}
+	PX_FORCE_INLINE PxU32 getPacketMultLog()
+	{
+		return mPacketMultLog;
+	}
+
+	PX_FORCE_INLINE PxU32 getNumPackets() const
+	{
+		return mNumCells;
+	}
+	PX_FORCE_INLINE const ParticleCell* getPackets()
+	{
+		return mCells;
+	}
+	PX_FORCE_INLINE const PacketSections* getPacketSections()
+	{
+		return mPacketSections;
+	}
+
+	PX_FORCE_INLINE const ParticleCell* findCell(PxU32& cellIndex, const GridCellVector& coord);
+	PX_FORCE_INLINE ParticleCell* getCell(PxU32& cellIndex, const GridCellVector& coord);
+
+	/*!
+	Given the coordinates of a specific packet, the packet table, the packet sections and the packet table
+	size, this function builds the halo region structure for the packet. The halo region specifies the relevant
+	particles of neighboring packets.
+	*/
+	static void getHaloRegions(PacketHaloRegions& packetHalo, const GridCellVector& packetCoords,
+	                           const ParticleCell* packets, const PacketSections* packetSections, PxU32 numHashBuckets);
+
+	/*!
+	Build local hash table for cells within a packet. Reorders a particle index array according to particle cells.
+
+	The cell entry array must have more entries than the number of particles passed. The particle index
+	table must have the size of the number of particles passed. The particle array is not declared const
+	because hash keys might get stored temporarily in the particles.
+	*/
+	static void buildLocalHash(const Particle* particles, PxU32 numParticles, ParticleCell* cells, PxU32* particleIndices,
+	                           PxU16* hashKeyArray, PxU32 numHashBuckets, PxF32 cellSizeInv, const PxVec3& packetCorner);
+
+	/*!
+	Builds the packet hash and reorders particle indices to packets. Particles are not declared const since
+	each particle hash key  and cell gets precomputed.
+	*/
+	void updatePacketHash(PxU32& numSorted, PxU32* sortedIndices, Particle* particles, const Cm::BitMap& particleMap,
+	                      const PxU32 validParticleRange, physx::PxBaseTask* continuation);
+
+	/*!
+	Divides each fluid packet into sections and reorders particle indices according to sections.
+	Input particles are not declared const since for each particle the section index gets precomputed.
+	*/
+	void updatePacketSections(PxU32* particleIndices, Particle* particles, physx::PxBaseTask* continuation);
+
+  private:
+	static void reorderParticleIndicesToCells(const Particle* particles, PxU32 numParticles, ParticleCell* cells,
+	                                          PxU32* particleIndices, PxU32 numHashBuckets, PxU16* hashKeyArray);
+
+	void reorderParticleIndicesToPackets(PxU32* sortedIndices, PxU32 numHashedParticles, const Cm::BitMap& particleMap,
+	                                     PxU16* hashKeyArray);
+
+	/*!
+	Splits the specified packet into 26 boundary sections (plus one inner section) and reorders the particles
+	according to sections.
+	*/
+	void buildPacketSections(const ParticleCell& packet, PacketSections& sections, PxU32 packetMultLog,
+	                         Particle* particles, PxU32* particleIndices);
+
+	void reorderParticlesToPacketSections(const ParticleCell& packet, PacketSections& sections,
+	                                      const Particle* particles, const PxU32* inParticleIndices,
+	                                      PxU32* outParticleIndices, PxU16* sectionIndexBuf);
+
+  private:
+	ParticleCell* mCells;
+	PxU32 mNumCells;
+	PxU32 mNumHashBuckets;
+	PxF32 mCellSizeInv;
+
+	// Packet Hash data
+	PxU32 mPacketMultLog;
+	PacketSections* mPacketSections;
+};
+
+PX_FORCE_INLINE const ParticleCell* SpatialHash::findConstCell(PxU32& cellIndex, const GridCellVector& coord,
+                                                               const ParticleCell* cells, PxU32 numHashBuckets)
+{
+	cellIndex = getCellIndex(coord, cells, numHashBuckets);
+	const ParticleCell* cell = &cells[cellIndex];
+
+	if(cell->numParticles == PX_INVALID_U32)
+		return NULL;
+	else
+		return cell;
+}
+
+PX_FORCE_INLINE ParticleCell* SpatialHash::findCell(PxU32& cellIndex, const GridCellVector& coord, ParticleCell* cells,
+                                                    PxU32 numHashBuckets)
+{
+	const ParticleCell* constCell = findConstCell(cellIndex, coord, cells, numHashBuckets);
+	return const_cast<ParticleCell*>(constCell);
+}
+
+PX_FORCE_INLINE const ParticleCell* SpatialHash::findCell(PxU32& cellIndex, const GridCellVector& coord)
+{
+	return findCell(cellIndex, coord, mCells, mNumHashBuckets);
+}
+
+PX_FORCE_INLINE ParticleCell* SpatialHash::getCell(PxU32& cellIndex, const GridCellVector& coord)
+{
+	cellIndex = getCellIndex(coord, mCells, mNumHashBuckets);
+	return &mCells[cellIndex];
+}
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_SPATIAL_HASH_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHashHelper.h b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHashHelper.h
new file mode 100644
index 00000000..ac4845ba
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHashHelper.h
@@ -0,0 +1,162 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+#ifndef PT_SPATIAL_HASH_HELPER_H
+#define PT_SPATIAL_HASH_HELPER_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtParticleCell.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+PX_FORCE_INLINE PxU32 hashFunction(const GridCellVector& coord, PxU32 numHashBuckets)
+{
+	PX_ASSERT((((numHashBuckets - 1) ^ numHashBuckets) + 1) == (2 * numHashBuckets));
+
+	return ((static_cast<PxU32>(coord.x) + 101 * static_cast<PxU32>(coord.y) + 7919 * static_cast<PxU32>(coord.z)) &
+	        (numHashBuckets - 1));
+	// sschirm: weird! The version that spreads all the coordinates is slower! Is the reason the additional
+	// multiplication?
+	// return ( (101*static_cast<PxU32>(coord.x) + 7919*static_cast<PxU32>(coord.y) +
+	// 73856093*static_cast<PxU32>(coord.z)) & (numHashBuckets - 1) );
+}
+
+PX_FORCE_INLINE PxU32 getCellIndex(const GridCellVector& coord, const ParticleCell* cells, PxU32 numHashBuckets)
+{
+#if PX_DEBUG
+	PxU32 tries = 0;
+#endif
+
+	PxU32 key = hashFunction(coord, numHashBuckets);
+	const ParticleCell* cell = &cells[key];
+
+	while((cell->numParticles != PX_INVALID_U32) && (coord != cell->coords))
+	{
+		key = (key + 1) & (numHashBuckets - 1);
+		cell = &cells[key];
+
+#if PX_DEBUG
+		tries++;
+#endif
+		PX_ASSERT(tries < numHashBuckets);
+	}
+
+	return key;
+}
+
+/*
+Compute packet section index for given cell coordinate. The packet sections are indexed as follows.
+
+Left packet boundary      Front packet boundary      Top packet boundary
+__________________        __________________         __________________
+| 3 |   5    | 4 |        | 4 |   22   |13 |         | 3 |   21   |12 |
+|___|________|___|        |___|________|___|         |___|________|___|
+|   |        |   |        |   |        |   |         |   |        |   |
+| 6 |   8    | 7 |        | 7 |   25   |16 |         | 5 |   23   |14 |
+|   |        |   |        |   |        |   |         |   |        |   |
+|___|________|___|        |___|________|___|         |___|________|___|
+| 0 |   2    | 1 |        | 1 |   19   |10 |         | 4 |   22   |13 |
+|___|________|___|        |___|________|___|         |___|________|___|
+
+Right packet boundary     Rear packet boundary       Bottom packet boundary
+__________________        __________________         __________________
+|13 |   14   |12 |        |12 |   21   | 3 |         | 1 |   19   |10 |
+|___|________|___|        |___|________|___|         |___|________|___|
+|   |        |   |        |   |        |   |         |   |        |   |
+|16 |   17   |15 |        |15 |   24   | 6 |         | 2 |   20   |11 |
+|   |        |   |        |   |        |   |         |   |        |   |
+|___|________|___|        |___|________|___|         |___|________|___|
+|10 |   11   | 9 |        |9  |   18   | 0 |         | 0 |   18   | 9 |
+|___|________|___|        |___|________|___|         |___|________|___|
+
+Note: One section is missing in this illustration. Section 26 is in the middle of the packet and
+      enclosed by the other sections. For particles in section 26 we know for sure that no interaction
+      with particles of neighboring packets occur.
+*/
+PX_FORCE_INLINE PxU32
+getPacketSectionIndex(const GridCellVector& cellCoords, const GridCellVector& packetMinCellCoords, PxU32 packetMult)
+{
+	PxU32 sectionIndex = 0;
+
+	// Translate cell coordinates such that the minimal cell coordinate of the packet is at the origin (0,0,0)
+	GridCellVector coord(cellCoords);
+	coord -= packetMinCellCoords;
+
+	// Find section the particle cell belongs to.
+
+	if(PxU32(coord.x + 1) == packetMult)
+	{
+		// Right side boundary of packet
+		sectionIndex = 9;
+	}
+	else if(coord.x != 0)
+	{
+		sectionIndex = 18;
+	}
+	// else: Left side boundary of packet
+
+	//-----------
+
+	if(PxU32(coord.y + 1) == packetMult)
+	{
+		// Top boundary of packet
+		sectionIndex += 3;
+	}
+	else if(coord.y != 0)
+	{
+		sectionIndex += 6;
+	}
+	// else: Bottom boundary of packet
+
+	//-----------
+
+	if(PxU32(coord.z + 1) == packetMult)
+	{
+		// Front boundary of packet
+		sectionIndex += 1;
+	}
+	else if(coord.z != 0)
+	{
+		sectionIndex += 2;
+	}
+	// else: Rear boundary of packet
+
+	return sectionIndex;
+}
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_SPATIAL_HASH_HELPER_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialLocalHash.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialLocalHash.cpp
new file mode 100644
index 00000000..82a3ac28
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialLocalHash.cpp
@@ -0,0 +1,173 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "PtSpatialHashHelper.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "PtSpatialHash.h"
+#include "PtParticle.h"
+#include "PsUtilities.h"
+
+/*!
+Builds local hash and reorders particle index table.
+*/
+void physx::Pt::SpatialHash::buildLocalHash(const Particle* particles, PxU32 numParticles, ParticleCell* cells,
+                                            PxU32* particleIndices, PxU16* hashKeyArray, PxU32 numHashBuckets,
+                                            PxF32 cellSizeInv, const PxVec3& packetCorner)
+{
+	PX_ASSERT(particles);
+	PX_ASSERT(cells);
+	PX_ASSERT(particleIndices);
+	PX_ASSERT(numHashBuckets > numParticles); // Needs to be larger to have at least one empty hash bucket (required to
+	// detect invalid cells).
+
+	// Mark packet cell entries as empty.
+	for(PxU32 c = 0; c < numHashBuckets; c++)
+		cells[c].numParticles = PX_INVALID_U32;
+
+	PX_ALIGN(16, Particle fakeParticle);
+	fakeParticle.position = PxVec3(FLT_MAX, FLT_MAX, FLT_MAX);
+
+	PxU32 numParticles4 = ((numParticles + 3) & ~0x3) + 4; // ceil up to multiple of four + 4 for save unrolling
+
+	// Add particles to cell hash
+
+	const Particle* prt0 = particles;
+	const Particle* prt1 = (1 < numParticles) ? particles + 1 : &fakeParticle;
+	const Particle* prt2 = (2 < numParticles) ? particles + 2 : &fakeParticle;
+	const Particle* prt3 = (3 < numParticles) ? particles + 3 : &fakeParticle;
+
+	struct Int32Vec3
+	{
+		PX_FORCE_INLINE void set(const PxVec3& realVec, const PxF32 scale)
+		{
+			x = static_cast<PxI32>(Ps::floor(realVec.x * scale));
+			y = static_cast<PxI32>(Ps::floor(realVec.y * scale));
+			z = static_cast<PxI32>(Ps::floor(realVec.z * scale));
+		}
+		PxI32 x;
+		PxI32 y;
+		PxI32 z;
+	};
+
+	PX_ALIGN(16, Int32Vec3 cellCoords[8]);
+	cellCoords[0].set(prt0->position - packetCorner, cellSizeInv);
+	cellCoords[1].set(prt1->position - packetCorner, cellSizeInv);
+	cellCoords[2].set(prt2->position - packetCorner, cellSizeInv);
+	cellCoords[3].set(prt3->position - packetCorner, cellSizeInv);
+
+	for(PxU32 p = 0; p < numParticles4; p += 4)
+	{
+		const Particle* prt0_N = (p + 4 < numParticles) ? particles + p + 4 : &fakeParticle;
+		const Particle* prt1_N = (p + 5 < numParticles) ? particles + p + 5 : &fakeParticle;
+		const Particle* prt2_N = (p + 6 < numParticles) ? particles + p + 6 : &fakeParticle;
+		const Particle* prt3_N = (p + 7 < numParticles) ? particles + p + 7 : &fakeParticle;
+
+		PxU32 wIndex = (p + 4) & 7;
+		cellCoords[wIndex].set(prt0_N->position - packetCorner, cellSizeInv);
+		cellCoords[wIndex + 1].set(prt1_N->position - packetCorner, cellSizeInv);
+		cellCoords[wIndex + 2].set(prt2_N->position - packetCorner, cellSizeInv);
+		cellCoords[wIndex + 3].set(prt3_N->position - packetCorner, cellSizeInv);
+
+		PxU32 rIndex = p & 7;
+		for(PxU32 i = 0; i < 4; ++i)
+		{
+			if(p + i < numParticles)
+			{
+				const Int32Vec3& int32Vec3 = cellCoords[rIndex + i];
+				const GridCellVector cellCoord(PxI16(int32Vec3.x), PxI16(int32Vec3.y), PxI16(int32Vec3.z));
+				PxU32 hashKey = getCellIndex(cellCoord, cells, numHashBuckets);
+				PX_ASSERT(hashKey < PT_PARTICLE_SYSTEM_HASH_KEY_LIMIT);
+				ParticleCell* cell = &cells[hashKey];
+				hashKeyArray[p + i] = Ps::to16(hashKey);
+				PX_ASSERT(cell);
+
+				if(cell->numParticles == PX_INVALID_U32)
+				{
+					// Entry is empty -> Initialize new entry
+					cell->coords = cellCoord;
+					cell->numParticles = 1; // this avoids some LHS
+				}
+				else
+				{
+					cell->numParticles++; // this avoids some LHS
+				}
+				PX_ASSERT(cell->numParticles != PX_INVALID_U32);
+			}
+		}
+	}
+
+	// Set for each cell the starting index of the associated particle index interval.
+	PxU32 cellFirstParticle = 0;
+	for(PxU32 c = 0; c < numHashBuckets; c++)
+	{
+		ParticleCell& cell = cells[c];
+
+		if(cell.numParticles == PX_INVALID_U32)
+			continue;
+
+		cell.firstParticle = cellFirstParticle;
+		cellFirstParticle += cell.numParticles;
+	}
+
+	reorderParticleIndicesToCells(particles, numParticles, cells, particleIndices, numHashBuckets, hashKeyArray);
+}
+
+/*!
+Reorders particle indices to cells.
+*/
+void physx::Pt::SpatialHash::reorderParticleIndicesToCells(const Particle* /*particles*/, PxU32 numParticles,
+                                                           ParticleCell* cells, PxU32* particleIndices,
+                                                           PxU32 numHashBuckets, PxU16* hashKeyArray)
+{
+	for(PxU32 c = 0; c < numHashBuckets; c++)
+	{
+		ParticleCell& cell = cells[c];
+		if(cell.numParticles == PX_INVALID_U32)
+			continue;
+
+		cell.numParticles = 0;
+	}
+
+	// Reorder particle indices according to cells
+	for(PxU32 p = 0; p < numParticles; p++)
+	{
+		// Get cell for fluid
+		ParticleCell* cell;
+		cell = &cells[hashKeyArray[p]];
+
+		PX_ASSERT(cell);
+		PX_ASSERT(cell->numParticles != PX_INVALID_U32);
+
+		particleIndices[cell->firstParticle + cell->numParticles] = p;
+		cell->numParticles++;
+	}
+}
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtTwoWayData.h b/PhysX_3.4/Source/LowLevelParticles/src/PtTwoWayData.h
new file mode 100644
index 00000000..dd9483c7
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/PtTwoWayData.h
@@ -0,0 +1,56 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+#ifndef PT_TWOWAYDATA_H
+#define PT_TWOWAYDATA_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+
+#include "foundation/PxVec3.h"
+#include "PtCollisionData.h"
+
+namespace physx
+{
+
+struct PxsBodyCore;
+
+namespace Pt
+{
+
+struct TwoWayData
+{
+	PxVec3 impulse; // used to accumulate impulse for two way interaction with RB
+	const PxsBodyCore* body;
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_TWOWAYDATA_H
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.cpp b/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.cpp
new file mode 100644
index 00000000..5d7844d6
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.cpp
@@ -0,0 +1,95 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "gpu/PtRigidBodyAccessGpu.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+#if PX_SUPPORT_GPU_PHYSX
+
+#include "PxvGeometry.h"
+#include "PxvDynamics.h"
+#include "PtBodyTransformVault.h"
+
+using namespace physx;
+using namespace Pt;
+
+void RigidBodyAccessGpu::copyShapeProperties(ShapeProperties& shapeProperties, const size_t shape, const size_t body) const
+{
+	const PxsShapeCore* shapeCore = reinterpret_cast<const PxsShapeCore*>(shape);
+	*shapeProperties.geometry = shapeCore->geometry;
+
+	const PxsRigidCore* rigidCore = reinterpret_cast<const PxsRigidCore*>(body);
+	*shapeProperties.ownerToWorld = rigidCore->body2World;
+	*shapeProperties.shapeToOwner = shapeCore->transform;
+}
+
+void RigidBodyAccessGpu::copyBodyProperties(BodyProperties& bodyProperties, const size_t* bodies, PxU32 numBodies) const
+{
+	const PxsBodyCore* const* bodyIt = reinterpret_cast<const PxsBodyCore* const*>(bodies);
+	PxStrideIterator<PxTransform> currentTransformIt(bodyProperties.currentTransforms);
+	PxStrideIterator<PxTransform> previousTransformIt(bodyProperties.previousTransforms);
+	PxStrideIterator<PxVec3> linearVelocityIt(bodyProperties.linearVelocities);
+	PxStrideIterator<PxVec3> angularVelocityIt(bodyProperties.angularVelocities);
+	PxStrideIterator<PxTransform> body2ActorTransformIt(bodyProperties.body2ActorTransforms);
+	PxStrideIterator<size_t> bodyHandleIt(bodyProperties.cpuBodyHandle);
+
+	for(PxU32 i = 0; i < numBodies; ++i)
+	{
+		const PxsBodyCore& body = **bodyIt;
+		*currentTransformIt = body.body2World;
+		const PxTransform* preTransform = mTransformVault.getTransform(body);
+		if(preTransform)
+		{
+			*previousTransformIt = *preTransform;
+			*linearVelocityIt = body.linearVelocity;
+			*angularVelocityIt = body.angularVelocity;
+			*body2ActorTransformIt = body.getBody2Actor();
+			*bodyHandleIt = (size_t) * bodyIt;
+		}
+		else
+		{
+			PX_ASSERT(0);
+			*previousTransformIt = PxTransform(PxIdentity);
+			*linearVelocityIt = PxVec3(0.f);
+			*angularVelocityIt = PxVec3(0.f);
+			*body2ActorTransformIt = PxTransform(PxIdentity);
+			*bodyHandleIt = 0;
+		}
+
+		++bodyIt;
+		++currentTransformIt;
+		++previousTransformIt;
+		++linearVelocityIt;
+		++angularVelocityIt;
+		++body2ActorTransformIt;
+		++bodyHandleIt;
+	}
+}
+
+#endif // PX_SUPPORT_GPU_PHYSX
+#endif // PX_USE_PARTICLE_SYSTEM_API
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.h b/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.h
new file mode 100644
index 00000000..9caed154
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.h
@@ -0,0 +1,72 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#ifndef PT_GPU_RIGID_BODY_ACCESS_H
+#define PT_GPU_RIGID_BODY_ACCESS_H
+
+#include "PxPhysXConfig.h"
+#if PX_USE_PARTICLE_SYSTEM_API
+#if PX_SUPPORT_GPU_PHYSX
+
+#include "PxRigidBodyAccessGpu.h"
+#include "PsUserAllocated.h"
+
+namespace physx
+{
+
+namespace Pt
+{
+
+class BodyTransformVault;
+
+class RigidBodyAccessGpu : public Ps::UserAllocated, public PxRigidBodyAccessGpu
+{
+  public:
+	virtual void copyShapeProperties(ShapeProperties& shapeProperties, const size_t shape, const size_t body) const;
+	virtual void copyBodyProperties(BodyProperties& bodyProperties, const size_t* bodies, PxU32 numBodies) const;
+
+  public:
+	RigidBodyAccessGpu(const BodyTransformVault& transformVault) : mTransformVault(transformVault)
+	{
+	}
+	virtual ~RigidBodyAccessGpu()
+	{
+	}
+
+  private:
+	RigidBodyAccessGpu& operator=(const RigidBodyAccessGpu&);
+	const BodyTransformVault& mTransformVault;
+};
+
+} // namespace Pt
+} // namespace physx
+
+#endif // PX_SUPPORT_GPU_PHYSX
+#endif // PX_USE_PARTICLE_SYSTEM_API
+#endif // PT_GPU_RIGID_BODY_ACCESS_H
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelParticles/src
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip