diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelParticles/src | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/LowLevelParticles/src')
41 files changed, 11803 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.cpp new file mode 100644 index 00000000..11ff89c3 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.cpp @@ -0,0 +1,255 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtBatcher.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#if PX_SUPPORT_GPU_PHYSX +#include "PxPhysXGpu.h" +#endif + +#include "task/PxTask.h" +#include "PtContext.h" +#include "PtParticleSystemSim.h" +#include "PtParticleSystemSimCpu.h" + +using namespace physx; +using namespace Pt; + +namespace +{ +template <class T> +static void sortBatchedInputs(ParticleSystemSim** particleSystems, T* inputs, PxU32 batchSize, PxU32& cpuOffset, + PxU32& cpuCount, PxU32& gpuOffset, PxU32& gpuCount) +{ + PX_UNUSED(particleSystems); + PX_UNUSED(inputs); + + cpuOffset = 0; + gpuOffset = 0; + + // in place sort of both arrays + PxU32 i = 0; + PxU32 j = 0; + + while((i < batchSize) && (j < batchSize)) + { +#if PX_SUPPORT_GPU_PHYSX + if(particleSystems[i]->isGpuV()) + { + j = i + 1; + while(j < batchSize && particleSystems[j]->isGpuV()) + { + j++; + } + + if(j < batchSize) + { + Ps::swap(particleSystems[i], particleSystems[j]); + if(inputs) + { + Ps::swap(inputs[i], inputs[j]); + } + i++; + } + } + else +#endif + { + i++; + } + } + + gpuOffset = i; + cpuCount = gpuOffset; + gpuCount = batchSize - cpuCount; +} +} + +Batcher::Batcher(class Context& _context) +: shapeGenTask("Pt::Batcher::shapeGen") +, dynamicsCpuTask("Pt::Batcher::dynamicsCpu") +, collPrepTask("Pt::Batcher::collPrep") +, collisionCpuTask("Pt::Batcher::collisionCpu") +, context(_context) +{ +} + +PxBaseTask& Batcher::scheduleShapeGeneration(ParticleSystemSim** particleSystems, ParticleShapesUpdateInput* inputs, + PxU32 batchSize, PxBaseTask& continuation) +{ + PxU32 cpuOffset = 0; + PxU32 cpuCount = batchSize; + +#if PX_SUPPORT_GPU_PHYSX + PxU32 gpuOffset, gpuCount; + sortBatchedInputs(particleSystems, inputs, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount); + if(context.getSceneGpuFast() && gpuCount > 0) + { + PxBaseTask& task = context.getSceneGpuFast()->scheduleParticleShapeUpdate( + particleSystems + gpuOffset, inputs + gpuOffset, gpuCount, continuation); + shapeGenTask.addDependent(task); + task.removeReference(); + } +#endif + for(PxU32 i = cpuOffset; i < (cpuOffset + cpuCount); ++i) + { + PxBaseTask& task = + static_cast<ParticleSystemSimCpu*>(particleSystems[i])->schedulePacketShapesUpdate(inputs[i], continuation); + shapeGenTask.addDependent(task); + task.removeReference(); + } + + if(shapeGenTask.getReference() == 0) + { + continuation.addReference(); + return continuation; + } + + while(shapeGenTask.getReference() > 1) + shapeGenTask.removeReference(); + + return shapeGenTask; +} + +PxBaseTask& Batcher::scheduleDynamicsCpu(ParticleSystemSim** particleSystems, PxU32 batchSize, PxBaseTask& continuation) +{ + PxU32 cpuOffset = 0; + PxU32 cpuCount = batchSize; +#if PX_SUPPORT_GPU_PHYSX + PxU32 gpuOffset, gpuCount; + sortBatchedInputs(particleSystems, (PxU8*)NULL, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount); +#endif + for(PxU32 i = cpuOffset; i < (cpuOffset + cpuCount); ++i) + { + PxBaseTask& task = static_cast<ParticleSystemSimCpu*>(particleSystems[i])->scheduleDynamicsUpdate(continuation); + dynamicsCpuTask.addDependent(task); + task.removeReference(); + } + + if(dynamicsCpuTask.getReference() == 0) + { + continuation.addReference(); + return continuation; + } + + while(dynamicsCpuTask.getReference() > 1) + dynamicsCpuTask.removeReference(); + + return dynamicsCpuTask; +} + +PxBaseTask& Batcher::scheduleCollisionPrep(ParticleSystemSim** particleSystems, PxLightCpuTask** inputPrepTasks, + PxU32 batchSize, PxBaseTask& continuation) +{ + PxU32 cpuOffset = 0; + PxU32 cpuCount = batchSize; +#if PX_SUPPORT_GPU_PHYSX + PxU32 gpuOffset, gpuCount; + sortBatchedInputs(particleSystems, inputPrepTasks, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount); + if(context.getSceneGpuFast() && gpuCount > 0) + { + PxBaseTask& gpuCollisionInputTask = context.getSceneGpuFast()->scheduleParticleCollisionInputUpdate( + particleSystems + gpuOffset, gpuCount, continuation); + for(PxU32 i = gpuOffset; i < (gpuOffset + gpuCount); ++i) + { + inputPrepTasks[i]->setContinuation(&gpuCollisionInputTask); + collPrepTask.addDependent(*inputPrepTasks[i]); + inputPrepTasks[i]->removeReference(); + } + gpuCollisionInputTask.removeReference(); + } +#else + PX_UNUSED(particleSystems); + PX_UNUSED(batchSize); +#endif + for(PxU32 i = cpuOffset; i < (cpuOffset + cpuCount); ++i) + { + inputPrepTasks[i]->setContinuation(&continuation); + collPrepTask.addDependent(*inputPrepTasks[i]); + inputPrepTasks[i]->removeReference(); + } + + if(collPrepTask.getReference() == 0) + { + continuation.addReference(); + return continuation; + } + + while(collPrepTask.getReference() > 1) + collPrepTask.removeReference(); + + return collPrepTask; +} + +PxBaseTask& Batcher::scheduleCollisionCpu(ParticleSystemSim** particleSystems, PxU32 batchSize, PxBaseTask& continuation) +{ + PxU32 cpuOffset = 0; + PxU32 cpuCount = batchSize; +#if PX_SUPPORT_GPU_PHYSX + PxU32 gpuOffset, gpuCount; + sortBatchedInputs(particleSystems, (PxU8*)NULL, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount); +#endif + for(PxU32 i = cpuOffset; i < (cpuOffset + cpuCount); ++i) + { + PxBaseTask& task = static_cast<ParticleSystemSimCpu*>(particleSystems[i])->scheduleCollisionUpdate(continuation); + collisionCpuTask.addDependent(task); + task.removeReference(); + } + + if(collisionCpuTask.getReference() == 0) + { + continuation.addReference(); + return continuation; + } + + while(collisionCpuTask.getReference() > 1) + collisionCpuTask.removeReference(); + + return collisionCpuTask; +} + +PxBaseTask& Batcher::schedulePipelineGpu(ParticleSystemSim** particleSystems, PxU32 batchSize, PxBaseTask& continuation) +{ +#if PX_SUPPORT_GPU_PHYSX + PxU32 cpuOffset, cpuCount, gpuOffset, gpuCount; + sortBatchedInputs(particleSystems, (PxU8*)NULL, batchSize, cpuOffset, cpuCount, gpuOffset, gpuCount); + if(context.getSceneGpuFast() && gpuCount > 0) + { + return context.getSceneGpuFast()->scheduleParticlePipeline(particleSystems + gpuOffset, gpuCount, continuation); + } +#else + PX_UNUSED(batchSize); + PX_UNUSED(particleSystems); +#endif + continuation.addReference(); + return continuation; +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.h b/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.h new file mode 100644 index 00000000..7ff534c0 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtBatcher.h @@ -0,0 +1,99 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_BATCHER_H +#define PT_BATCHER_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "CmTask.h" + +namespace physx +{ + +namespace Pt +{ + +class Batcher : public Ps::UserAllocated +{ + public: + Batcher(class Context& _context); + + /** + Issues shape update stages for a batch of particle systems. + Ownership of Pt::ParticleShapeUpdateInput::shapes passed to callee! + */ + physx::PxBaseTask& scheduleShapeGeneration(class ParticleSystemSim** particleSystems, + struct ParticleShapesUpdateInput* inputs, PxU32 batchSize, + physx::PxBaseTask& continuation); + + /** + Issues dynamics (SPH) update on CPUs. + */ + physx::PxBaseTask& scheduleDynamicsCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize, + physx::PxBaseTask& continuation); + + /** + Schedules collision prep work. + */ + physx::PxBaseTask& scheduleCollisionPrep(class ParticleSystemSim** particleSystems, + physx::PxLightCpuTask** inputPrepTasks, PxU32 batchSize, + physx::PxBaseTask& continuation); + + /** + Schedules collision update stages for a batch of particle systems on CPU. + Ownership of Pt::ParticleCollisionUpdateInput::contactManagerStream passed to callee! + */ + physx::PxBaseTask& scheduleCollisionCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize, + physx::PxBaseTask& continuation); + + /** + Schedule gpu pipeline. + */ + physx::PxBaseTask& schedulePipelineGpu(ParticleSystemSim** particleSystems, PxU32 batchSize, + physx::PxBaseTask& continuation); + + Cm::FanoutTask shapeGenTask; + Cm::FanoutTask dynamicsCpuTask; + Cm::FanoutTask collPrepTask; + Cm::FanoutTask collisionCpuTask; + + class Context& context; + + private: + Batcher(const Batcher&); + Batcher& operator=(const Batcher&); +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_BATCHER_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtBodyTransformVault.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtBodyTransformVault.cpp new file mode 100644 index 00000000..de4c282d --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtBodyTransformVault.cpp @@ -0,0 +1,241 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtBodyTransformVault.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxMemory.h" +#include "PxvGeometry.h" +#include "PxvDynamics.h" +#include "PsHash.h" +#include "PsFoundation.h" + +using namespace physx; +using namespace Pt; + +BodyTransformVault::BodyTransformVault() : mBody2WorldPool("body2WorldPool", 256), mBodyCount(0) +{ + // Make sure the hash size is a power of 2 + PX_ASSERT((((PT_BODY_TRANSFORM_HASH_SIZE - 1) ^ PT_BODY_TRANSFORM_HASH_SIZE) + 1) == + (2 * PT_BODY_TRANSFORM_HASH_SIZE)); + + PxMemSet(mBody2WorldHash, 0, PT_BODY_TRANSFORM_HASH_SIZE * sizeof(Body2World*)); +} + +BodyTransformVault::~BodyTransformVault() +{ +} + +PX_FORCE_INLINE PxU32 BodyTransformVault::getHashIndex(const PxsBodyCore& body) const +{ + PxU32 index = Ps::hash(&body); + return (index & (PT_BODY_TRANSFORM_HASH_SIZE - 1)); // Modulo hash size +} + +void BodyTransformVault::addBody(const PxsBodyCore& body) +{ + Body2World* entry; + Body2World* dummy; + + bool hasEntry = findEntry(body, entry, dummy); + if(!hasEntry) + { + Body2World* newEntry; + if(entry) + { + // No entry for the given body but the hash entry has other bodies + // --> create new entry, link into list + newEntry = createEntry(body); + entry->next = newEntry; + } + else + { + // No entry for the given body and no hash entry --> create new entry + PxU32 hashIndex = getHashIndex(body); + newEntry = createEntry(body); + mBody2WorldHash[hashIndex] = newEntry; + } + newEntry->refCount = 1; + mBodyCount++; + } + else + { + entry->refCount++; + } +} + +void BodyTransformVault::removeBody(const PxsBodyCore& body) +{ + Body2World* entry; + Body2World* prevEntry; + + bool hasEntry = findEntry(body, entry, prevEntry); + PX_ASSERT(hasEntry); + PX_UNUSED(hasEntry); + + if(entry->refCount == 1) + { + if(prevEntry) + { + prevEntry->next = entry->next; + } + else + { + // Shape entry was first in list + PxU32 hashIndex = getHashIndex(body); + + mBody2WorldHash[hashIndex] = entry->next; + } + mBody2WorldPool.destroy(entry); + PX_ASSERT(mBodyCount > 0); + mBodyCount--; + } + else + { + entry->refCount--; + } +} + +void BodyTransformVault::teleportBody(const PxsBodyCore& body) +{ + Body2World* entry; + Body2World* dummy; + + bool hasEntry = findEntry(body, entry, dummy); + PX_ASSERT(hasEntry); + PX_ASSERT(entry); + PX_UNUSED(hasEntry); + + PX_CHECK_AND_RETURN(body.body2World.isValid(), "BodyTransformVault::teleportBody: body.body2World is not valid."); + + entry->b2w = body.body2World; +} + +const PxTransform* BodyTransformVault::getTransform(const PxsBodyCore& body) const +{ + Body2World* entry; + Body2World* dummy; + + bool hasEntry = findEntry(body, entry, dummy); + // PX_ASSERT(hasEntry); + // PX_UNUSED(hasEntry); + // PX_ASSERT(entry); + return hasEntry ? &entry->b2w : NULL; +} + +void BodyTransformVault::update() +{ + if(mBodyCount) + { + for(PxU32 i = 0; i < PT_BODY_TRANSFORM_HASH_SIZE; i++) + { + Body2World* entry = mBody2WorldHash[i]; + + while(entry) + { + PX_ASSERT(entry->body); + entry->b2w = entry->body->body2World; + entry = entry->next; + } + } + } +} + +BodyTransformVault::Body2World* BodyTransformVault::createEntry(const PxsBodyCore& body) +{ + Body2World* entry = mBody2WorldPool.construct(); + + if(entry) + { + entry->b2w = body.body2World; + entry->next = NULL; + entry->body = &body; + } + + return entry; +} + +bool BodyTransformVault::isInVaultInternal(const PxsBodyCore& body) const +{ + PxU32 hashIndex = getHashIndex(body); + + if(mBody2WorldHash[hashIndex]) + { + Body2World* curEntry = mBody2WorldHash[hashIndex]; + + while(curEntry->next) + { + if(curEntry->body == &body) + break; + + curEntry = curEntry->next; + } + + if(curEntry->body == &body) + return true; + } + + return false; +} + +bool BodyTransformVault::findEntry(const PxsBodyCore& body, Body2World*& entry, Body2World*& prevEntry) const +{ + PxU32 hashIndex = getHashIndex(body); + + prevEntry = NULL; + bool hasEntry = false; + if(mBody2WorldHash[hashIndex]) + { + Body2World* curEntry = mBody2WorldHash[hashIndex]; + + while(curEntry->next) + { + if(curEntry->body == &body) + break; + + prevEntry = curEntry; + curEntry = curEntry->next; + } + + entry = curEntry; + if(curEntry->body == &body) + { + // An entry already exists for the given body + hasEntry = true; + } + } + else + { + entry = NULL; + } + + return hasEntry; +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.cpp new file mode 100644 index 00000000..537c0112 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.cpp @@ -0,0 +1,676 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtCollision.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtConfig.h" +#include "PtParticleSystemSimCpu.h" +#include "PtParticleShapeCpu.h" +#include "PtContext.h" +#include "PtBodyTransformVault.h" +#include "PtCollisionHelper.h" +#include "PtParticleContactManagerStream.h" +#include "GuConvexMeshData.h" +#include "CmFlushPool.h" +#include "PxvGeometry.h" + +using namespace physx; +using namespace Pt; + +namespace physx +{ + +namespace Pt +{ + +class CollisionTask : public Cm::Task +{ + public: + CollisionTask(Collision& context, PxU32 taskDataIndex) : mCollisionContext(context), mTaskDataIndex(taskDataIndex) + { + } + + virtual void runInternal() + { + mCollisionContext.processShapeListWithFilter(mTaskDataIndex); + } + + virtual const char* getName() const + { + return "Collision.fluidCollision"; + } + + private: + CollisionTask& operator=(const CollisionTask&); + Collision& mCollisionContext; + PxU32 mTaskDataIndex; +}; + +} // namespace Pt +} // namespace physx + +/* +how to support dominance-driven one/two-way collision (search for 'todo dominance'): +- add 2-bit flag to PxsBodyShapeRef which stores the dominance matrix values +- store this flag when creating the shape ref in updateFluidBodyContactPair() +- use this flag when copying impulse to collData.shapeImpulse +*/ +Collision::Collision(ParticleSystemSimCpu& particleSystem) +: mParticleSystem(particleSystem), mMergeTask(this, "Collision.mergeResults") +{ +} + +Collision::~Collision() +{ +} + +void PX_FORCE_INLINE Collision::addTempW2STransform(TaskData& taskData, const ParticleStreamContactManager& cm) +{ + W2STransformTemp& cmTemp = taskData.tempContactManagers.insert(); + + if(cm.isDynamic) + { + const PxsBodyCore* bodyCore = static_cast<const PxsBodyCore*>(cm.rigidCore); + cmTemp.w2sOld = cm.shapeCore->transform.transformInv(bodyCore->getBody2Actor()).transform(cm.w2sOld->getInverse()); + cmTemp.w2sNew = + cm.shapeCore->transform.transformInv(bodyCore->getBody2Actor()).transform(bodyCore->body2World.getInverse()); + } + else + { + const PxTransform tmp = cm.shapeCore->transform.getInverse() * cm.rigidCore->body2World.getInverse(); + cmTemp.w2sOld = tmp; + cmTemp.w2sNew = tmp; + } +} + +void Collision::updateCollision(const PxU8* contactManagerStream, physx::PxBaseTask& continuation) +{ + mMergeTask.setContinuation(&continuation); + PxU32 maxTasks = PT_NUM_PACKETS_PARALLEL_COLLISION; + PxU32 packetParticleIndicesCount = mParticleSystem.mNumPacketParticlesIndices; + + // would be nice to get available thread count to decide on task decomposition + // mParticleSystem.getContext().getTaskManager().getCpuDispatcher(); + + // use number of particles for task decomposition + + PxU32 targetParticleCountPerTask = + PxMax(PxU32(packetParticleIndicesCount / maxTasks), PxU32(PT_SUBPACKET_PARTICLE_LIMIT_COLLISION)); + ParticleContactManagerStreamReader cmStreamReader(contactManagerStream); + ParticleContactManagerStreamIterator cmStreamEnd = cmStreamReader.getEnd(); + ParticleContactManagerStreamIterator cmStream = cmStreamReader.getBegin(); + ParticleContactManagerStreamIterator cmStreamLast; + + PxU32 numTasks = 0; + for(PxU32 i = 0; i < PT_NUM_PACKETS_PARALLEL_COLLISION; ++i) + { + TaskData& taskData = mTaskData[i]; + taskData.bounds.setEmpty(); + + // if this is the last interation, we need to gather all remaining packets + if(i == maxTasks - 1) + targetParticleCountPerTask = 0xffffffff; + + cmStreamLast = cmStream; + PxU32 currentParticleCount = 0; + + while(currentParticleCount < targetParticleCountPerTask && cmStream != cmStreamEnd) + { + ParticleStreamShape streamShape; + cmStream.getNext(streamShape); + const ParticleShapeCpu* particleShape = static_cast<const ParticleShapeCpu*>(streamShape.particleShape); + currentParticleCount += particleShape->getFluidPacket()->numParticles; + } + + if(currentParticleCount > 0) + { + PX_ASSERT(cmStreamLast != cmStream); + taskData.packetBegin = cmStreamLast; + taskData.packetEnd = cmStream; + numTasks++; + } + } + PX_ASSERT(cmStream == cmStreamEnd); + + // spawn tasks + for(PxU32 i = 0; i < numTasks; ++i) + { + void* ptr = mParticleSystem.getContext().getTaskPool().allocate(sizeof(CollisionTask)); + CollisionTask* task = PX_PLACEMENT_NEW(ptr, CollisionTask)(*this, i); + task->setContinuation(&mMergeTask); + task->removeReference(); + } + + mMergeTask.removeReference(); +} + +void Collision::updateOverflowParticles() +{ + // if no particles are present, the hash shouldn't be accessed, as it hasn't been updated. + if(mParticleSystem.mParticleState->getValidParticleRange() > 0) + { + const Pt::ParticleCell& overflowCell = + mParticleSystem.mSpatialHash->getPackets()[PT_PARTICLE_SYSTEM_OVERFLOW_INDEX]; + Pt::Particle* particles = mParticleSystem.mParticleState->getParticleBuffer(); + PxU32* indices = mParticleSystem.mPacketParticlesIndices; + for(PxU32 i = overflowCell.firstParticle; i < overflowCell.firstParticle + overflowCell.numParticles; i++) + { + PxU32 index = indices[i]; + Pt::Particle& particle = particles[index]; + PX_ASSERT((particle.flags.api & PxParticleFlag::eSPATIAL_DATA_STRUCTURE_OVERFLOW) != 0); + + // update velocity and position + // world bounds are not updated for overflow particles, to make it more consistent with GPU. + { + PxVec3 acceleration = mParams.externalAcceleration; + integrateParticleVelocity(particle, mParams.maxMotionDistance, acceleration, mParams.dampingDtComp, + mParams.timeStep); + particle.position = particle.position + particle.velocity * mParams.timeStep; + + // adapted from updateParticle(...) in PxsFluidCollisionHelper.h + bool projection = (mParams.flags & PxParticleBaseFlag::ePROJECT_TO_PLANE) != 0; + if(projection) + { + const PxReal dist = mParams.projectionPlane.n.dot(particle.velocity); + particle.velocity = particle.velocity - (mParams.projectionPlane.n * dist); + particle.position = mParams.projectionPlane.project(particle.position); + } + PX_ASSERT(particle.position.isFinite()); + } + } + } +} + +void Collision::processShapeListWithFilter(PxU32 taskDataIndex, const PxU32 skipNum) +{ + TaskData& taskData = mTaskData[taskDataIndex]; + + ParticleContactManagerStreamIterator it = taskData.packetBegin; + while(it != taskData.packetEnd) + { + ParticleStreamShape streamShape; + it.getNext(streamShape); + + if(streamShape.numContactManagers < skipNum) + continue; + + const ParticleShapeCpu* particleShape = static_cast<const ParticleShapeCpu*>(streamShape.particleShape); + PX_ASSERT(particleShape); + PX_UNUSED(particleShape); + + // Collect world to shape space transforms for all colliding rigid body shapes + taskData.tempContactManagers.clear(); + for(PxU32 i = 0; i < streamShape.numContactManagers; i++) + { + const ParticleStreamContactManager& cm = streamShape.contactManagers[i]; + addTempW2STransform(taskData, cm); + } + + updateFluidShapeCollision( + mParticleSystem.mParticleState->getParticleBuffer(), mParticleSystem.mFluidTwoWayData, + mParticleSystem.mTransientBuffer, mParticleSystem.mCollisionVelocities, mParticleSystem.mConstraintBuffers, + mParticleSystem.mOpcodeCacheBuffer, taskData.bounds, mParticleSystem.mPacketParticlesIndices, + mParticleSystem.mParticleState->getRestOffsetBuffer(), taskData.tempContactManagers.begin(), streamShape); + } +} + +void Collision::mergeResults(physx::PxBaseTask* /*continuation*/) +{ + PxBounds3& worldBounds = mParticleSystem.mParticleState->getWorldBounds(); + for(PxU32 i = 0; i < PT_NUM_PACKETS_PARALLEL_COLLISION; ++i) + worldBounds.include(mTaskData[i].bounds); +} + +void Collision::updateFluidShapeCollision(Particle* particles, TwoWayData* fluidTwoWayData, PxVec3* transientBuf, + PxVec3* collisionVelocities, ConstraintBuffers& constraintBufs, + ParticleOpcodeCache* opcodeCache, PxBounds3& worldBounds, + const PxU32* fluidShapeParticleIndices, const PxF32* restOffsets, + const W2STransformTemp* w2sTransforms, const ParticleStreamShape& streamShape) +{ + const ParticleShapeCpu& particleShape = *static_cast<const ParticleShapeCpu*>(streamShape.particleShape); + PX_ASSERT(particleShape.getFluidPacket()); + + const ParticleCell& packet = *particleShape.getFluidPacket(); + + PxU32 numParticles = packet.numParticles; + PxU32 firstParticleIndex = packet.firstParticle; + const PxU32* packetParticleIndices = fluidShapeParticleIndices + firstParticleIndex; + const PxU32 numParticlesPerSubpacket = PT_SUBPACKET_PARTICLE_LIMIT_COLLISION; + + PX_ALLOCA(particlesSp, Particle, numParticlesPerSubpacket); + PxF32 restOffsetsSp[numParticlesPerSubpacket]; + + const PxU32 numHashBuckets = PT_LOCAL_HASH_SIZE_MESH_COLLISION; + + PxU32 hashMemCount = numHashBuckets * sizeof(ParticleCell) + numParticlesPerSubpacket * sizeof(PxU32); + PxU32 cacheMemCount = numParticlesPerSubpacket * sizeof(ParticleOpcodeCache); + PX_ALLOCA(shareMem, PxU8, PxMax(hashMemCount, cacheMemCount)); + + ParticleOpcodeCache* perParticleCacheSp = NULL; + LocalCellHash localCellHash; + PxVec3 packetCorner; + + if(opcodeCache) + perParticleCacheSp = reinterpret_cast<ParticleOpcodeCache*>(shareMem.mPointer); + else + { + // Make sure the number of hash buckets is a power of 2 (requirement for the used hash function) + PX_ASSERT((((numHashBuckets - 1) ^ numHashBuckets) + 1) == (2 * numHashBuckets)); + PX_ASSERT(numHashBuckets > numParticlesPerSubpacket); + // Set the buffers for the local cell hash + localCellHash.particleIndices = reinterpret_cast<PxU32*>(shareMem.mPointer); + localCellHash.hashEntries = + reinterpret_cast<ParticleCell*>(shareMem.mPointer + numParticlesPerSubpacket * sizeof(PxU32)); + packetCorner = + PxVec3(PxReal(packet.coords.x), PxReal(packet.coords.y), PxReal(packet.coords.z)) * mParams.packetSize; + } + + // Divide the packet into subpackets that fit into local memory of processing unit. + PxU32 particlesRemainder = (numParticles - 1) % numParticlesPerSubpacket + 1; + + PxU32 numProcessedParticles = 0; + PxU32 numParticlesSp = particlesRemainder; // We start with the smallest subpacket, i.e., the subpacket which does + // not reach its particle limit. + while(numProcessedParticles < numParticles) + { + const PxU32* particleIndicesSp = packetParticleIndices + numProcessedParticles; + + // load particles (constraints are loaded on demand so far) + for(PxU32 p = 0; p < numParticlesSp; p++) + { + PxU32 particleIndex = particleIndicesSp[p]; + particlesSp[p] = particles[particleIndex]; + } + + if(restOffsets) + { + for(PxU32 p = 0; p < numParticlesSp; p++) + { + PxU32 particleIndex = particleIndicesSp[p]; + restOffsetsSp[p] = restOffsets[particleIndex]; + } + } + else + { + for(PxU32 p = 0; p < numParticlesSp; p++) + restOffsetsSp[p] = mParams.restOffset; + } + + updateSubPacket(particlesSp, fluidTwoWayData, transientBuf, collisionVelocities, constraintBufs, + perParticleCacheSp, opcodeCache, localCellHash, worldBounds, packetCorner, particleIndicesSp, + numParticlesSp, streamShape.contactManagers, w2sTransforms, streamShape.numContactManagers, + restOffsetsSp); + + // store particles back + for(PxU32 p = 0; p < numParticlesSp; p++) + { + PxU32 particleIndex = particleIndicesSp[p]; + particles[particleIndex] = particlesSp[p]; + } + + // Invalidate cached local cell hash + localCellHash.isHashValid = false; + + numProcessedParticles += numParticlesSp; + numParticlesSp = numParticlesPerSubpacket; + } +} + +PX_FORCE_INLINE void +Collision::updateSubPacket(Particle* particlesSp, TwoWayData* fluidTwoWayData, PxVec3* transientBuf, + PxVec3* collisionVelocities, ConstraintBuffers& constraintBufs, + ParticleOpcodeCache* perParticleCacheLocal, ParticleOpcodeCache* perParticleCacheGlobal, + LocalCellHash& localCellHash, PxBounds3& worldBounds, const PxVec3& packetCorner, + const PxU32* particleIndicesSp, const PxU32 numParticlesSp, + const ParticleStreamContactManager* contactManagers, const W2STransformTemp* w2sTransforms, + const PxU32 numContactManagers, const PxF32* restOffsetsSp) +{ + ParticleCollData* collDataSp = + reinterpret_cast<ParticleCollData*>(PX_ALLOC(numParticlesSp * sizeof(ParticleCollData), "ParticleCollData")); + for(PxU32 p = 0; p < numParticlesSp; p++) + { + const PxU32 particleIndex = particleIndicesSp[p]; + Particle& particle = particlesSp[p]; + PX_ASSERT(particle.position.isFinite() && particle.velocity.isFinite()); + ParticleCollData& collData = collDataSp[p]; + Ps::prefetchLine(&collData); + collData.c0 = &constraintBufs.constraint0Buf[particleIndex]; + collData.c1 = &constraintBufs.constraint1Buf[particleIndex]; + Ps::prefetchLine(collData.c0); + Ps::prefetchLine(collData.c1); + const PxVec3 particleOldVel = particle.velocity; + + // integrate velocity + { + PxVec3 acceleration = mParams.externalAcceleration; + if(mParams.flags & InternalParticleSystemFlag::eSPH) + acceleration += transientBuf[particleIndex]; + + integrateParticleVelocity(particle, mParams.maxMotionDistance, acceleration, mParams.dampingDtComp, + mParams.timeStep); + } + + PxVec3 c0Velocity(0.0f); + PxVec3 c1Velocity(0.0f); + const PxsBodyCore* c0TwoWayBody = NULL; + const PxsBodyCore* c1TwoWayBody = NULL; + if(particle.flags.low & InternalParticleFlag::eCONSTRAINT_0_DYNAMIC) + { + c0Velocity = constraintBufs.constraint0DynamicBuf[particleIndex].velocity; + if(fluidTwoWayData) + c0TwoWayBody = constraintBufs.constraint0DynamicBuf[particleIndex].twoWayBody; + } + + if(particle.flags.low & InternalParticleFlag::eCONSTRAINT_1_DYNAMIC) + { + c1Velocity = constraintBufs.constraint1DynamicBuf[particleIndex].velocity; + if(fluidTwoWayData) + c1TwoWayBody = constraintBufs.constraint1DynamicBuf[particleIndex].twoWayBody; + } + + initCollDataAndApplyConstraints(collData, particle, particleOldVel, restOffsetsSp[p], c0Velocity, c1Velocity, + c0TwoWayBody, c1TwoWayBody, particleIndex, mParams); + + collData.particleFlags.low &= + PxU16(~(InternalParticleFlag::eCONSTRAINT_0_VALID | InternalParticleFlag::eCONSTRAINT_1_VALID | + InternalParticleFlag::eCONSTRAINT_0_DYNAMIC | InternalParticleFlag::eCONSTRAINT_1_DYNAMIC)); + } + + // + // Collide with dynamic shapes + + PxU32 numDynamicShapes = 0; + for(PxU32 i = 0; i < numContactManagers; i++) + { + const ParticleStreamContactManager& cm = contactManagers[i]; + if(!cm.isDynamic) + continue; + + updateFluidBodyContactPair(particlesSp, numParticlesSp, collDataSp, constraintBufs, perParticleCacheLocal, + localCellHash, packetCorner, cm, w2sTransforms[i]); + + numDynamicShapes++; + } + + PxF32 maxMotionDistanceSqr = mParams.maxMotionDistance * mParams.maxMotionDistance; + + if(numDynamicShapes > 0) + { + bool isTwoWay = (mParams.flags & PxParticleBaseFlag::eCOLLISION_TWOWAY) != 0; + for(PxU32 p = 0; p < numParticlesSp; p++) + { + ParticleCollData& collData = collDataSp[p]; + collisionResponse(collData, isTwoWay, false, mParams); + clampToMaxMotion(collData.newPos, collData.oldPos, mParams.maxMotionDistance, maxMotionDistanceSqr); + collData.flags &= ~ParticleCollisionFlags::CC; + collData.flags &= ~ParticleCollisionFlags::DC; + collData.flags |= ParticleCollisionFlags::RESET_SNORMAL; + collData.surfacePos = PxVec3(0); + // we need to keep the dynamic surface velocity for providing collision velocities in finalization + // collData.surfaceVel = PxVec3(0); + collData.ccTime = 1.0f; + } + } + + // + // Collide with static shapes + // (Static shapes need to be processed after dynamic shapes to avoid that dynamic shapes push + // particles into static shapes) + // + + bool loadedCache = false; + for(PxU32 i = 0; i < numContactManagers; i++) + { + const ParticleStreamContactManager& cm = contactManagers[i]; + if(cm.isDynamic) + continue; + + const Gu::GeometryUnion& shape = cm.shapeCore->geometry; + if(perParticleCacheLocal && (!loadedCache) && (shape.getType() == PxGeometryType::eTRIANGLEMESH)) + { + for(PxU32 p = 0; p < numParticlesSp; p++) + { + PxU32 particleIndex = particleIndicesSp[p]; + perParticleCacheLocal[p] = perParticleCacheGlobal[particleIndex]; + } + loadedCache = true; + } + + updateFluidBodyContactPair(particlesSp, numParticlesSp, collDataSp, constraintBufs, perParticleCacheLocal, + localCellHash, packetCorner, cm, w2sTransforms[i]); + } + + if(loadedCache) + { + for(PxU32 p = 0; p < numParticlesSp; p++) + { + PxU32 particleIndex = particleIndicesSp[p]; + perParticleCacheGlobal[particleIndex] = perParticleCacheLocal[p]; + } + } + + for(PxU32 p = 0; p < numParticlesSp; p++) + { + ParticleCollData& collData = collDataSp[p]; + Particle& particle = particlesSp[p]; + + collisionResponse(collData, false, true, mParams); + + // Clamp new particle position to maximum motion. + clampToMaxMotion(collData.newPos, collData.oldPos, mParams.maxMotionDistance, maxMotionDistanceSqr); + + // Update particle + updateParticle(particle, collData, (mParams.flags & PxParticleBaseFlag::ePROJECT_TO_PLANE) != 0, + mParams.projectionPlane, worldBounds); + } + + if(transientBuf) + { + for(PxU32 p = 0; p < numParticlesSp; p++) + { + ParticleCollData& collData = collDataSp[p]; + transientBuf[collData.origParticleIndex] = collData.surfaceNormal; + } + } + + if(collisionVelocities) + { + for(PxU32 p = 0; p < numParticlesSp; p++) + { + ParticleCollData& collData = collDataSp[p]; + PxVec3 collisionVelocity = particlesSp[p].velocity - collData.surfaceVel; + collisionVelocities[collData.origParticleIndex] = collisionVelocity; + } + } + + if(fluidTwoWayData) + { + for(PxU32 p = 0; p < numParticlesSp; p++) + { + ParticleCollData& collData = collDataSp[p]; + PX_ASSERT(!collData.twoWayBody || (particlesSp[p].flags.api & PxParticleFlag::eCOLLISION_WITH_DYNAMIC)); + fluidTwoWayData[collData.origParticleIndex].body = collData.twoWayBody; + fluidTwoWayData[collData.origParticleIndex].impulse = collData.twoWayImpulse; + } + } + + PX_FREE(collDataSp); +} + +void Collision::updateFluidBodyContactPair(const Particle* particles, PxU32 numParticles, + ParticleCollData* particleCollData, ConstraintBuffers& constraintBufs, + ParticleOpcodeCache* opcodeCacheLocal, LocalCellHash& localCellHash, + const PxVec3& packetCorner, const ParticleStreamContactManager& contactManager, + const W2STransformTemp& w2sTransform) +{ + PX_ASSERT(particles); + PX_ASSERT(particleCollData); + + bool isStaticMeshType = false; + + const Gu::GeometryUnion& shape = contactManager.shapeCore->geometry; + const PxsBodyCore* body = contactManager.isDynamic ? static_cast<const PxsBodyCore*>(contactManager.rigidCore) : NULL; + + const PxTransform& world2Shape = w2sTransform.w2sNew; + const PxTransform& world2ShapeOld = w2sTransform.w2sOld; + const PxTransform shape2World = world2Shape.getInverse(); + + for(PxU32 p = 0; p < numParticles; p++) + { + ParticleCollData& collData = particleCollData[p]; + + collData.localFlags = (collData.flags & ParticleCollisionFlags::CC); + // Transform position from world to shape space + collData.localNewPos = world2Shape.transform(collData.newPos); + collData.localOldPos = world2ShapeOld.transform(collData.oldPos); + collData.c0 = constraintBufs.constraint0Buf + collData.origParticleIndex; + collData.c1 = constraintBufs.constraint1Buf + collData.origParticleIndex; + collData.localSurfaceNormal = PxVec3(0.0f); + collData.localSurfacePos = PxVec3(0.0f); + } + + switch(shape.getType()) + { + case PxGeometryType::eSPHERE: + { + collideWithSphere(particleCollData, numParticles, shape, mParams.contactOffset); + break; + } + case PxGeometryType::ePLANE: + { + collideWithPlane(particleCollData, numParticles, shape, mParams.contactOffset); + break; + } + case PxGeometryType::eCAPSULE: + { + collideWithCapsule(particleCollData, numParticles, shape, mParams.contactOffset); + break; + } + case PxGeometryType::eBOX: + { + collideWithBox(particleCollData, numParticles, shape, mParams.contactOffset); + break; + } + case PxGeometryType::eCONVEXMESH: + { + const PxConvexMeshGeometryLL& convexShapeData = shape.get<const PxConvexMeshGeometryLL>(); + const Gu::ConvexHullData* convexHullData = convexShapeData.hullData; + PX_ASSERT(convexHullData); + + PX_ALLOCA(scaledPlanesBuf, PxPlane, convexHullData->mNbPolygons); + collideWithConvex(scaledPlanesBuf, particleCollData, numParticles, shape, mParams.contactOffset); + break; + } + case PxGeometryType::eTRIANGLEMESH: + { + if(opcodeCacheLocal) + { + collideWithStaticMesh(numParticles, particleCollData, opcodeCacheLocal, shape, world2Shape, shape2World, + mParams.cellSize, mParams.collisionRange, mParams.contactOffset); + } + else + { + // Compute cell hash if needed + if(!localCellHash.isHashValid) + { + PX_ALLOCA(hashKeyArray, PxU16, numParticles * sizeof(PxU16)); // save the hashkey for reorder + PX_ASSERT(hashKeyArray); + computeLocalCellHash(localCellHash, hashKeyArray, particles, numParticles, packetCorner, + mParams.cellSizeInv); + } + + collideCellsWithStaticMesh(particleCollData, localCellHash, shape, world2Shape, shape2World, + mParams.cellSize, mParams.collisionRange, mParams.contactOffset, packetCorner); + } + isStaticMeshType = true; + break; + } + case PxGeometryType::eHEIGHTFIELD: + { + collideWithStaticHeightField(particleCollData, numParticles, shape, mParams.contactOffset, shape2World); + isStaticMeshType = true; + break; + } + case PxGeometryType::eGEOMETRY_COUNT: + case PxGeometryType::eINVALID: + PX_ASSERT(0); + } + + if(isStaticMeshType) + { + for(PxU32 p = 0; p < numParticles; p++) + { + ParticleCollData& collData = particleCollData[p]; + updateCollDataStaticMesh(collData, shape2World, mParams.timeStep); + } + } + else if(body) + { + for(PxU32 p = 0; p < numParticles; p++) + { + ParticleCollData& collData = particleCollData[p]; + ConstraintDynamic cdTemp; + ConstraintDynamic& c0Dynamic = constraintBufs.constraint0DynamicBuf + ? constraintBufs.constraint0DynamicBuf[collData.origParticleIndex] + : cdTemp; + ConstraintDynamic& c1Dynamic = constraintBufs.constraint1DynamicBuf + ? constraintBufs.constraint1DynamicBuf[collData.origParticleIndex] + : cdTemp; + c0Dynamic.setEmpty(); + c1Dynamic.setEmpty(); + updateCollDataDynamic(collData, body->body2World, body->linearVelocity, body->angularVelocity, body, + shape2World, mParams.timeStep, c0Dynamic, c1Dynamic); + } + } + else + { + for(PxU32 p = 0; p < numParticles; p++) + { + ParticleCollData& collData = particleCollData[p]; + + updateCollDataStatic(collData, shape2World, mParams.timeStep); + } + } + + if(contactManager.isDrain) + { + for(PxU32 p = 0; p < numParticles; p++) + { + ParticleCollData& collData = particleCollData[p]; + + if((collData.localFlags & ParticleCollisionFlags::L_ANY) != 0) + { + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_DRAIN; + } + } + } +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.h new file mode 100644 index 00000000..b1d0c640 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollision.h @@ -0,0 +1,130 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_COLLISION_H +#define PT_COLLISION_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxTransform.h" +#include "PsBitUtils.h" +#include "PtConfig.h" +#include "PtCollisionData.h" +#include "PtCollisionMethods.h" +#include "PtParticle.h" +#include "PtTwoWayData.h" +#include "PtCollisionParameters.h" +#include "PsAlignedMalloc.h" +#include "CmTask.h" +#include "PtParticleContactManagerStream.h" + +namespace physx +{ + +class PxsRigidBody; +class PxBaseTask; + +namespace Pt +{ + +class ParticleShape; +class BodyTransformVault; +struct W2STransformTemp; + +class Collision +{ + public: + Collision(class ParticleSystemSimCpu& particleSystem); + ~Collision(); + + void updateCollision(const PxU8* contactManagerStream, physx::PxBaseTask& continuation); + + // Update position and velocity of particles that have PxParticleFlag::eSPATIAL_DATA_STRUCTURE_OVERFLOW set. + void updateOverflowParticles(); + + PX_FORCE_INLINE CollisionParameters& getParameter() + { + return mParams; + } + + private: + typedef Ps::Array<W2STransformTemp, shdfnd::AlignedAllocator<16, Ps::ReflectionAllocator<W2STransformTemp> > > + TempContactManagerArray; + struct TaskData + { + TempContactManagerArray tempContactManagers; + ParticleContactManagerStreamIterator packetBegin; + ParticleContactManagerStreamIterator packetEnd; + PxBounds3 bounds; + }; + + void processShapeListWithFilter(PxU32 taskDataIndex, const PxU32 skipNum = 0); + void mergeResults(physx::PxBaseTask* continuation); + + void updateFluidShapeCollision(Particle* particles, TwoWayData* fluidTwoWayData, PxVec3* transientBuf, + PxVec3* collisionVelocities, ConstraintBuffers& constraintBufs, + ParticleOpcodeCache* opcodeCache, PxBounds3& worldBounds, + const PxU32* fluidShapeParticleIndices, const PxF32* restOffsets, + const W2STransformTemp* w2sTransforms, const ParticleStreamShape& streamShape); + + PX_FORCE_INLINE void updateSubPacket(Particle* particlesSp, TwoWayData* fluidTwoWayData, PxVec3* transientBuf, + PxVec3* collisionVelocities, ConstraintBuffers& constraintBufs, + ParticleOpcodeCache* perParticleCacheLocal, + ParticleOpcodeCache* perParticleCacheGlobal, LocalCellHash& localCellHash, + PxBounds3& worldBounds, const PxVec3& packetCorner, + const PxU32* particleIndicesSp, const PxU32 numParticlesSp, + const ParticleStreamContactManager* contactManagers, + const W2STransformTemp* w2sTransforms, const PxU32 numContactManagers, + const PxF32* restOffsetsSp); + + void updateFluidBodyContactPair(const Particle* particles, PxU32 numParticles, ParticleCollData* particleCollData, + ConstraintBuffers& constraintBufs, ParticleOpcodeCache* perParticleCacheLocal, + LocalCellHash& localCellHash, const PxVec3& packetCorner, + const ParticleStreamContactManager& contactManager, + const W2STransformTemp& w2sTransform); + + void PX_FORCE_INLINE addTempW2STransform(TaskData& taskData, const ParticleStreamContactManager& cm); + + private: + Collision& operator=(const Collision&); + CollisionParameters mParams; + ParticleSystemSimCpu& mParticleSystem; + TaskData mTaskData[PT_NUM_PACKETS_PARALLEL_COLLISION]; + + typedef Cm::DelegateTask<Collision, &Collision::mergeResults> MergeTask; + MergeTask mMergeTask; + friend class CollisionTask; +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_COLLISION_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionBox.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionBox.cpp new file mode 100644 index 00000000..8f9b90ba --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionBox.cpp @@ -0,0 +1,135 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtCollisionMethods.h" +#if PX_USE_PARTICLE_SYSTEM_API + +void physx::Pt::collideWithBox(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& boxShape, + PxReal proxRadius) +{ + PX_ASSERT(particleCollData); + PX_ASSERT(boxShape.getType() == PxGeometryType::eBOX); + + const PxBoxGeometry& boxShapeData = boxShape.get<const PxBoxGeometry>(); + + PxVec3 boxExtent = boxShapeData.halfExtents; + PxBounds3 shapeBounds(boxExtent * -1.0f, boxExtent); + PX_ASSERT(!shapeBounds.isEmpty()); + shapeBounds.fattenFast(proxRadius); + + // Box to convex conversion. + PxPlane planes[6]; + PxVec3 normal; + + normal = PxVec3(1.0f, 0.0f, 0.0f); + planes[0].n = normal; + planes[0].d = -boxExtent.x; + + normal = PxVec3(-1.0f, 0.0f, 0.0f); + planes[1].n = normal; + planes[1].d = -boxExtent.x; + + normal = PxVec3(0.0f, 1.0f, 0.0f); + planes[2].n = normal; + planes[2].d = -boxExtent.y; + + normal = PxVec3(0.0f, -1.0f, 0.0f); + planes[3].n = normal; + planes[3].d = -boxExtent.y; + + normal = PxVec3(0.0f, 0.0f, 1.0f); + planes[4].n = normal; + planes[4].d = -boxExtent.z; + + normal = PxVec3(0.0f, 0.0f, -1.0f); + planes[5].n = normal; + planes[5].d = -boxExtent.z; + +#if PT_USE_SIMD_CONVEX_COLLISION + ParticleCollDataV4 collDataV4; + PxU32 v4Count = 0; + + for(PxU32 p = 0; p < numCollData; p++) + { + ParticleCollData& collData = particleCollData[p]; + + PxBounds3 particleBounds = PxBounds3::boundsOfPoints(collData.localOldPos, collData.localNewPos); + if(particleBounds.intersects(shapeBounds)) + { + collDataV4.localOldPos[v4Count].v3 = collData.localOldPos; + collDataV4.localOldPos[v4Count].pad = 0; + collDataV4.localNewPos[v4Count].v3 = collData.localNewPos; + collDataV4.localNewPos[v4Count].pad = 0; + collDataV4.localFlags[v4Count] = collData.localFlags; + collDataV4.restOffset[v4Count] = collData.restOffset; + collDataV4.ccTime[v4Count] = collData.ccTime; + collDataV4.collData[v4Count] = &collData; + v4Count++; + } + + if(v4Count == 4) + { + // sschirm: not processing with less than 4 elements to avoid uninitialized data reads + collideWithConvexPlanesSIMD(collDataV4, planes, 6, proxRadius); + for(PxU32 j = 0; j < v4Count; j++) + { + ParticleCollData* collData1 = collDataV4.collData[j]; + PxU32 stateFlag = collDataV4.localFlags[j]; + if(stateFlag) + { + collData1->localFlags |= stateFlag; + collData1->ccTime = collDataV4.ccTime[j]; + collData1->localSurfaceNormal = collDataV4.localSurfaceNormal[j].v3; + collData1->localSurfacePos = collDataV4.localSurfacePos[j].v3; + } + } + v4Count = 0; + } + else if(v4Count > 0 && (p == numCollData - 1)) + { + for(PxU32 j = 0; j < v4Count; j++) + { + collideWithConvexPlanes(*collDataV4.collData[j], planes, 6, proxRadius); + } + } + } +#else + for(PxU32 p = 0; p < numCollData; p++) + { + ParticleCollData& collData = particleCollData[p]; + PxBounds3 particleBounds = PxBounds3::boundsOfPoints(collData.localOldPos, collData.localNewPos); + if(particleBounds.intersects(shapeBounds)) + { + collideWithConvexPlanes(collData, planes, 6, proxRadius); + } + } +#endif +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionCapsule.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionCapsule.cpp new file mode 100644 index 00000000..3add04a5 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionCapsule.cpp @@ -0,0 +1,304 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtCollisionMethods.h" +#if PX_USE_PARTICLE_SYSTEM_API + +using namespace physx; +using namespace Pt; + +namespace +{ + +void collideWithCapsuleNonContinuous(ParticleCollData& collData, const PxVec3& q, const PxReal& h, const PxReal& r, + const PxReal& proxRadius) +{ + if(collData.localFlags & ParticleCollisionFlags::CC) + return; // Only apply discrete and proximity collisions if no continuous collisions was detected so far (for any + // colliding shape) + + PxVec3 segPoint; + segPoint = PxVec3(q.x, 0.0f, 0.0f); + segPoint.x = PxMax(segPoint.x, -h); + segPoint.x = PxMin(segPoint.x, h); + collData.localSurfaceNormal = q - segPoint; + PxReal dist = collData.localSurfaceNormal.magnitude(); + if(dist < (r + proxRadius)) + { + if(dist != 0.0f) + collData.localSurfaceNormal *= (1.0f / dist); + else + collData.localSurfaceNormal = PxVec3(0); + + // Push particle to surface such that the distance to the surface is equal to the collision radius + collData.localSurfacePos = segPoint + (collData.localSurfaceNormal * (r + collData.restOffset)); + collData.localFlags |= ParticleCollisionFlags::L_PROX; + + if(dist < (r + collData.restOffset)) + collData.localFlags |= ParticleCollisionFlags::L_DC; + } +} + +void collideWithCapsuleTestSphere(ParticleCollData& collData, const PxVec3& p, const PxVec3& q, const PxVec3& d, + const PxReal& h, const PxReal& r, const PxReal& sphereH, const PxReal& discS, + const PxReal& aS, const PxReal& bS, const PxReal& proxRadius) +{ + if(discS <= 0.0f || aS == 0.0f) + { + collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius); + } + else + { + PxReal t = -(bS + PxSqrt(discS)) / aS; + if(t < 0.0f || t > 1.0f) + { + // intersection lies outside p-q interval + collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius); + } + else if(t < collData.ccTime) + { + // intersection point lies on sphere, add lcc + // collData.localSurfacePos = p + (d * t); + // collData.localSurfaceNormal = collData.localSurfacePos; + // collData.localSurfaceNormal.x -= sphereH; + // collData.localSurfaceNormal *= (1.0f / r); + // collData.localSurfacePos += (collData.localSurfaceNormal * collData.restOffset); + PxVec3 relativePOSITION = (d * t); + collData.localSurfaceNormal = p + relativePOSITION; + collData.localSurfaceNormal.x -= sphereH; + collData.localSurfaceNormal *= (1.0f / r); + computeContinuousTargetPosition(collData.localSurfacePos, p, relativePOSITION, collData.localSurfaceNormal, + collData.restOffset); + collData.ccTime = t; + collData.localFlags |= ParticleCollisionFlags::L_CC; + } + } +} + +// ---------------------------------------------------------------- +// +// Note: this code is based on the hardware implementation +// +// Terminology: +// Starting point: p +// End point: q +// Ray direction: d +// +// Infinite cylinder I: all (y,z) : y^2 + z^2 < r^2 +// "Fat plane" F: all (x) : -h < x < h +// Top sphere S0: all (x,y,z) : y^2 + z^2 + (x-h)^2 < r^2 +// Bottom sphere S1: all (x,y,z) : y^2 + z^2 + (x+h)^2 < r^2 +// +// Cylinder Z = (I & F) +// Capsule C = Z | S0 | S1 +// +// coefficients a, b, c for the squared distance functions sqd(t) = a * t^2 + b * t + c, for I, S0 and S1: +// +// aI = d.y*d.y + d.z*d.z +// aS0 = d.y*d.y + d.z*d.z + d.x*d.x +// aS1 = d.y*d.y + d.z*d.z + d.x*d.x +// +// bI = d.y*p.y + d.z*p.z +// bS0 = d.y*p.y + d.z*p.z + d.x*p.x - h*d.x +// bS1 = d.y*p.y + d.z*p.z + d.x*p.x + h*d.x +// +// cI = p.y*p.y + p.z*p.z - r*r. +// cS0 = p.y*p.y + p.z*p.z - r*r + p.x*p.x + h*h - 2*h*p.x +// cS1 = p.y*p.y + p.z*p.z - r*r + p.x*p.x + h*h + 2*h*p.x +// +// these will be treated in vectorized fashion: +// I <--> .y +// S0 <--> .x +// S1 <--> .z +// +// for p, we have sqd(0) = c +// ( for q, we have sqd(1) = a + b + c ) +// +// ---------------------------------------------------------------- +PX_FORCE_INLINE void collideWithCapsule(ParticleCollData& collData, const PxCapsuleGeometry& capsuleShapeData, + PxReal proxRadius) +{ + // Note: The local coordinate system of a capsule is defined such that the cylindrical part is + // wrapped around the x-axis + + PxVec3& p = collData.localOldPos; + PxVec3& q = collData.localNewPos; + + PxReal r = capsuleShapeData.radius; + PxReal h = capsuleShapeData.halfHeight; + + PxVec3 a, b, c; + + // all c values + PxReal tmp; + c.y = p.y * p.y + p.z * p.z - r * r; + tmp = c.y + p.x * p.x + h * h; + c.x = tmp - 2 * h * p.x; + c.z = tmp + 2 * h * p.x; + + bool pInI = c.y < 0.0f; // Old particle position inside the infinite zylinder + bool pInS0 = c.x < 0.0f; // Old particle position inside the right sphere + bool pInS1 = c.z < 0.0f; // Old particle position inside the left sphere + bool pRightOfH = p.x > h; + bool pLeftOfMinusH = p.x < -h; + bool pInZ = (!pRightOfH && !pLeftOfMinusH && pInI); + + if(pInZ || pInS0 || pInS1) + { + // p is inside the skeleton + // add ccd with time 0.0 + + PxVec3 segPoint; + segPoint = PxVec3(p.x, 0.0f, 0.0f); + segPoint.x = PxMax(segPoint.x, -h); + segPoint.x = PxMin(segPoint.x, h); + PxVec3 normal = p - segPoint; + collData.localSurfaceNormal = normal.isZero() ? PxVec3(0.0f, 1.0f, 0.0f) : normal.getNormalized(); + // Push particle to surface such that the distance to the surface is equal to the collision radius + collData.localSurfacePos = segPoint + (collData.localSurfaceNormal * (r + collData.restOffset)); + collData.ccTime = 0.0; + collData.localFlags |= ParticleCollisionFlags::L_CC; + } + else + { + // p is outside of the skeleton + + PxVec3 d = q - p; + + // all b values + b.y = d.y * p.y + d.z * p.z; + tmp = b.y + d.x * p.x; + b.x = tmp - h * d.x; + b.z = tmp + h * d.x; + + // all a values + a.y = d.y * d.y + d.z * d.z; + a.x = a.y + d.x * d.x; + a.z = a.x; + + // all discriminants + PxVec3 tmpVec0, tmpVec1; + tmpVec0 = b.multiply(b); + tmpVec1 = c.multiply(a); + PxVec3 discs = tmpVec0 - tmpVec1; + + // this made cases fail with d.y == 0.0 and d.z == 0.0 + // bool dInI = discs.y > 0.0f; + bool dInI = discs.y >= 0.0f; + + // bool dInS0 = discs.x > 0.0f; + // bool dInS1 = discs.z > 0.0f; + + if(!dInI) + { + // the ray does not intersect the infinite cylinder + collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius); + } + else + { + // d intersects the infinite cylinder + if(pInI) + { + // p is contained in the infinite cylinder, either above the top sphere or below the bottom sphere. + // -> directly test against the nearest sphere + if(p.x > 0) + { + // check sphere 0 + collideWithCapsuleTestSphere(collData, p, q, d, h, r, h, discs.x, a.x, b.x, proxRadius); + } + else + { + // check sphere 1 + collideWithCapsuleTestSphere(collData, p, q, d, h, r, -h, discs.z, a.z, b.z, proxRadius); + } + } + else if(discs.y <= 0.0f || a.y == 0.0f) + { + // d is zero or tangential to cylinder surface + collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius); + } + else + { + // p lies outside of infinite cylinder, compute intersection point with it + PxReal t = -(b.y + PxSqrt(discs.y)) / a.y; + if(t < 0.0f || t > 1.0f) + { + // intersection lies outside p-q interval + collideWithCapsuleNonContinuous(collData, q, h, r, proxRadius); + } + else + { + PxVec3 relativePOSITION = (d * t); + PxVec3 impact = p + relativePOSITION; + if(impact.x > h) + { + // if above the actual cylinder, check sphere 0 + collideWithCapsuleTestSphere(collData, p, q, d, h, r, h, discs.x, a.x, b.x, proxRadius); + } + else if(impact.x < -h) + { + // if below the actual cylinder, check sphere 1 + collideWithCapsuleTestSphere(collData, p, q, d, h, r, -h, discs.z, a.z, b.z, proxRadius); + } + else if(t < collData.ccTime) + { + // intersection point lies on cylinder, add cc + // collData.localSurfaceNormal = collData.localSurfacePos / r; + // collData.localSurfaceNormal.x = 0.0f; + // collData.localSurfacePos += (collData.localSurfaceNormal * collData.restOffset); + collData.localSurfaceNormal = impact / r; + collData.localSurfaceNormal.x = 0.0f; + computeContinuousTargetPosition(collData.localSurfacePos, p, relativePOSITION, + collData.localSurfaceNormal, collData.restOffset); + collData.ccTime = t; + collData.localFlags |= ParticleCollisionFlags::L_CC; + } + } + } + } + } +} + +} // namespace + +void physx::Pt::collideWithCapsule(ParticleCollData* collShapeData, PxU32 numCollData, + const Gu::GeometryUnion& capsuleShape, PxReal proxRadius) +{ + PX_ASSERT(collShapeData); + PX_ASSERT(capsuleShape.getType() == PxGeometryType::eCAPSULE); + + const PxCapsuleGeometry& capsuleShapeData = capsuleShape.get<const PxCapsuleGeometry>(); + + for(PxU32 p = 0; p < numCollData; p++) + { + ::collideWithCapsule(collShapeData[p], capsuleShapeData, proxRadius); + } +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionConvex.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionConvex.cpp new file mode 100644 index 00000000..a6d658ce --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionConvex.cpp @@ -0,0 +1,553 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtCollisionMethods.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxMat33.h" +#include "foundation/PxVec3.h" +#include "foundation/PxPlane.h" +#include "GuConvexMeshData.h" +#include "CmPhysXCommon.h" +#include "PsVecMath.h" + +#define FLCNVX_NO_DC (1 << 0) +#define FLCNVX_NO_PARALLEL_CC (1 << 1) +#define FLCNVX_NO_PROX (1 << 2) +#define FLCNVX_NO_CONTAINMENT (1 << 3) +#define PLCNVX_POTENTIAL_PROX (1 << 4) + +using namespace physx::shdfnd::aos; +using namespace physx; +using namespace Pt; + +namespace +{ + +void scalePlanes(PxPlane* scaledPlaneBuf, const Gu::ConvexHullData* convexHullData, const PxMat33& invScaling) +{ + PxU32 numPlanes = convexHullData->mNbPolygons; + PxPlane* planeIt = scaledPlaneBuf; + const Gu::HullPolygonData* polygonIt = convexHullData->mPolygons; + for(; numPlanes > 0; --numPlanes, ++planeIt, ++polygonIt) + { + PxVec3 normal = polygonIt->mPlane.n; + PxF32 d = polygonIt->mPlane.d; + normal = invScaling.transformTranspose(normal); + PxReal magnitude = normal.normalize(); + *planeIt = PxPlane(normal, d / magnitude); + } +} + +} // namespace + +void physx::Pt::collideWithConvexPlanes(ParticleCollData& collData, const PxPlane* convexPlanes, PxU32 numPlanes, + const PxReal proxRadius) +{ + PX_ASSERT(convexPlanes); + + // initializing these to 0 saves a test for accessing corresponding arrays + PxU32 newPosPlaneIndex = 0; + PxU32 oldPosPlaneIndex = 0; + PxU32 rayPlaneIndex = 0; + bool newPosOutMask = false; + + PxReal latestEntry = -FLT_MAX; + PxReal soonestExit = FLT_MAX; + PxReal newPosClosestDist = -FLT_MAX; + PxReal oldPosClosestDist = -FLT_MAX; + + PxVec3 motion = collData.localNewPos - collData.localOldPos; + + const PxPlane* plane = convexPlanes; + for(PxU32 k = 0; k < numPlanes; k++) + { + PxReal planeDistNewPos = plane[k].distance(collData.localNewPos); + PxReal planeDistOldPos = plane[k].distance(collData.localOldPos); + + bool wasNewPosOutide = newPosClosestDist > 0.0f; + + // maximize distance to planes to find minimal distance to convex + bool isOldPosFurther = planeDistOldPos > oldPosClosestDist; + oldPosClosestDist = isOldPosFurther ? planeDistOldPos : oldPosClosestDist; + oldPosPlaneIndex = isOldPosFurther ? k : oldPosPlaneIndex; + + bool isNewPosFurther = planeDistNewPos > newPosClosestDist; + newPosClosestDist = isNewPosFurther ? planeDistNewPos : newPosClosestDist; + newPosPlaneIndex = isNewPosFurther ? k : newPosPlaneIndex; + + bool isNewPosOutside = planeDistNewPos > 0.0f; + + // flagging cases where newPos it out multiple times + newPosOutMask |= (wasNewPosOutide & isNewPosOutside); + + // continuous collision + PxReal dot = motion.dot(plane[k].n); + + // div by zero shouldn't hurt, since dot == 0.0f case is masked out + PxReal hitTime = -planeDistOldPos / dot; + bool isEntry = (dot < 0.0f) & (hitTime > latestEntry); + bool isExit = (dot > 0.0f) & (hitTime < soonestExit); + + latestEntry = isEntry ? hitTime : latestEntry; + rayPlaneIndex = isEntry ? k : rayPlaneIndex; + soonestExit = isExit ? hitTime : soonestExit; + + // mark parallel outside for no ccd in PxcFinalizeConvexCollision + latestEntry = ((dot == 0.0f) & isNewPosOutside) ? FLT_MAX : latestEntry; + } + + bool isContained = oldPosClosestDist <= 0.0f; + bool isDc = newPosClosestDist <= collData.restOffset; + bool isProximity = (newPosClosestDist > 0.0f) && (newPosClosestDist <= proxRadius) && !newPosOutMask; + + if(isContained) + { + // Treat the case where the old pos is inside the skeleton as + // a continous collision with time 0 + + collData.localFlags |= ParticleCollisionFlags::L_CC; + collData.ccTime = 0.0f; + collData.localSurfaceNormal = plane[oldPosPlaneIndex].n; + + // Push the particle to the surface (such that distance to surface is equal to the collision radius) + collData.localSurfacePos = + collData.localOldPos + plane[oldPosPlaneIndex].n * (collData.restOffset - oldPosClosestDist); + } + else + { + // Check for continuous collision + // only add a proximity/discrete case if there are no continous collisions + // for this shape or any other shape before + + bool ccHappened = (0.0f <= latestEntry) && (latestEntry < collData.ccTime) && (latestEntry <= soonestExit); + if(ccHappened) + { + collData.localSurfaceNormal = plane[rayPlaneIndex].n; + // collData.localSurfacePos = collData.localOldPos + (motion * latestEntry) + (continuousNormal * + // collData.restOffset); + computeContinuousTargetPosition(collData.localSurfacePos, collData.localOldPos, motion * latestEntry, + plane[rayPlaneIndex].n, collData.restOffset); + collData.ccTime = latestEntry; + collData.localFlags |= ParticleCollisionFlags::L_CC; + } + else if(!(collData.localFlags & ParticleCollisionFlags::CC)) + { + // No other collision shape has caused a continuous collision so far + if(isProximity) // proximity + { + collData.localSurfaceNormal = plane[newPosPlaneIndex].n; + collData.localSurfacePos = + collData.localNewPos + plane[newPosPlaneIndex].n * (collData.restOffset - newPosClosestDist); + collData.localFlags |= ParticleCollisionFlags::L_PROX; + } + if(isDc) // discrete collision + { + collData.localSurfaceNormal = plane[newPosPlaneIndex].n; + collData.localSurfacePos = + collData.localNewPos + plane[newPosPlaneIndex].n * (collData.restOffset - newPosClosestDist); + collData.localFlags |= ParticleCollisionFlags::L_DC; + } + } + } +} + +void physx::Pt::collideWithConvexPlanesSIMD(ParticleCollDataV4& collDataV4, const PxPlane* convexPlanes, + PxU32 numPlanes, const PxReal proxRadius) +{ + PX_ASSERT(convexPlanes); + Ps::prefetch(convexPlanes); + + Vec4V latestEntry = V4Load(-FLT_MAX); + Vec4V soonestExit = V4Load(FLT_MAX); + Vec4V newPosClosestDist = V4Load(-FLT_MAX); + Vec4V oldPosClosestDist = V4Load(-FLT_MAX); + Vec4V discreteNormal[4] = { V4Zero(), V4Zero(), V4Zero(), V4Zero() }; + Vec4V continuousNormal[4] = { V4Zero(), V4Zero(), V4Zero(), V4Zero() }; + Vec4V containmentNormal[4] = { V4Zero(), V4Zero(), V4Zero(), V4Zero() }; + + Vec4V localNewPos0 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localNewPos[0])); + Vec4V localOldPos0 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localOldPos[0])); + + Vec4V localNewPos1 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localNewPos[1])); + Vec4V localOldPos1 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localOldPos[1])); + + Vec4V localNewPos2 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localNewPos[2])); + Vec4V localOldPos2 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localOldPos[2])); + + Vec4V localNewPos3 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localNewPos[3])); + Vec4V localOldPos3 = V4LoadA(reinterpret_cast<const PxF32*>(&collDataV4.localOldPos[3])); + + Vec4V motion[4]; + motion[0] = V4Sub(localNewPos0, localOldPos0); + motion[1] = V4Sub(localNewPos1, localOldPos1); + motion[2] = V4Sub(localNewPos2, localOldPos2); + motion[3] = V4Sub(localNewPos3, localOldPos3); + + const Mat44V newPos44(localNewPos0, localNewPos1, localNewPos2, localNewPos3); + const Mat44V oldPos44(localOldPos0, localOldPos1, localOldPos2, localOldPos3); + const Mat44V motion44(motion[0], motion[1], motion[2], motion[3]); + + const Mat44V newPosTrans44 = M44Trnsps(newPos44); + const Mat44V oldPosTrans44 = M44Trnsps(oldPos44); + const Mat44V motionTrans44 = M44Trnsps(motion44); + + BoolV newPosOutMask = BLoad(false); + + const PxPlane* plane = convexPlanes; + for(PxU32 k = 0; k < numPlanes; k++) + { + Vec4V planeNormal = Vec4V_From_Vec3V(V3LoadU(plane->n)); + Vec4V planeD = V4Load(plane->d); + plane++; + Ps::prefetch(plane); + + const FloatV normalX = V4GetX(planeNormal); + const FloatV normalY = V4GetY(planeNormal); + const FloatV normalZ = V4GetZ(planeNormal); + + Vec4V v1 = V4ScaleAdd(newPosTrans44.col0, normalX, planeD); + Vec4V v2 = V4ScaleAdd(newPosTrans44.col1, normalY, v1); + Vec4V planeDistNewPosV4 = V4ScaleAdd(newPosTrans44.col2, normalZ, v2); + + v1 = V4ScaleAdd(oldPosTrans44.col0, normalX, planeD); + v2 = V4ScaleAdd(oldPosTrans44.col1, normalY, v1); + Vec4V planeDistOldPosV4 = V4ScaleAdd(oldPosTrans44.col2, normalZ, v2); + + // containment: select the max distance plane + BoolV mask = V4IsGrtr(planeDistOldPosV4, oldPosClosestDist); + oldPosClosestDist = V4Sel(mask, planeDistOldPosV4, oldPosClosestDist); + containmentNormal[0] = V4Sel(BSplatElement<0>(mask), planeNormal, containmentNormal[0]); + containmentNormal[1] = V4Sel(BSplatElement<1>(mask), planeNormal, containmentNormal[1]); + containmentNormal[2] = V4Sel(BSplatElement<2>(mask), planeNormal, containmentNormal[2]); + containmentNormal[3] = V4Sel(BSplatElement<3>(mask), planeNormal, containmentNormal[3]); + + // proxmity and discrete: select the max distance planes + BoolV wasNewPosOutide = V4IsGrtr(newPosClosestDist, V4Zero()); + BoolV isNewPosOutside = V4IsGrtr(planeDistNewPosV4, V4Zero()); + + mask = V4IsGrtr(planeDistNewPosV4, newPosClosestDist); + newPosClosestDist = V4Sel(mask, planeDistNewPosV4, newPosClosestDist); + discreteNormal[0] = V4Sel(BSplatElement<0>(mask), planeNormal, discreteNormal[0]); + discreteNormal[1] = V4Sel(BSplatElement<1>(mask), planeNormal, discreteNormal[1]); + discreteNormal[2] = V4Sel(BSplatElement<2>(mask), planeNormal, discreteNormal[2]); + discreteNormal[3] = V4Sel(BSplatElement<3>(mask), planeNormal, discreteNormal[3]); + + // flagging cases where newPos it out multiple times + newPosOutMask = BOr(newPosOutMask, BAnd(wasNewPosOutide, isNewPosOutside)); + + // Test continuous collision + v1 = V4Scale(motionTrans44.col0, normalX); + v2 = V4ScaleAdd(motionTrans44.col1, normalY, v1); + Vec4V dotV4 = V4ScaleAdd(motionTrans44.col2, normalZ, v2); + + Vec4V hitTime = V4Neg(V4Div(planeDistOldPosV4, dotV4)); + + BoolV exit = V4IsGrtr(dotV4, V4Zero()); + mask = BAnd(exit, V4IsGrtr(soonestExit, hitTime)); + soonestExit = V4Sel(mask, hitTime, soonestExit); + + BoolV entry = V4IsGrtr(V4Zero(), dotV4); + mask = BAnd(entry, V4IsGrtr(hitTime, latestEntry)); + latestEntry = V4Sel(mask, hitTime, latestEntry); + continuousNormal[0] = V4Sel(BSplatElement<0>(mask), planeNormal, continuousNormal[0]); + continuousNormal[1] = V4Sel(BSplatElement<1>(mask), planeNormal, continuousNormal[1]); + continuousNormal[2] = V4Sel(BSplatElement<2>(mask), planeNormal, continuousNormal[2]); + continuousNormal[3] = V4Sel(BSplatElement<3>(mask), planeNormal, continuousNormal[3]); + + // mark parallel outside for no ccd in PxcFinalizeConvexCollision + mask = BAnd(isNewPosOutside, V4IsEq(V4Zero(), dotV4)); + latestEntry = V4Sel(mask, V4One(), latestEntry); + } + + VecU32V localFlags = U4LoadXYZW(collDataV4.localFlags[0], collDataV4.localFlags[1], collDataV4.localFlags[2], + collDataV4.localFlags[3]); + Vec4V proxRadiusV4 = V4Load(proxRadius); + Vec4V restOffsetV4 = V4LoadA(collDataV4.restOffset); + + const VecU32V u4Zero = U4LoadXYZW(0, 0, 0, 0); + const VecU32V flagCC = U4LoadXYZW(ParticleCollisionFlags::CC, ParticleCollisionFlags::CC, + ParticleCollisionFlags::CC, ParticleCollisionFlags::CC); + const BoolV noFlagCC = V4IsEqU32(V4U32and(flagCC, localFlags), u4Zero); + + // proximity + const VecU32V flagLPROX = U4LoadXYZW(ParticleCollisionFlags::L_PROX, ParticleCollisionFlags::L_PROX, + ParticleCollisionFlags::L_PROX, ParticleCollisionFlags::L_PROX); + const BoolV proximityV = + BAnd(BAnd(BAnd(noFlagCC, V4IsGrtrOrEq(newPosClosestDist, V4Zero())), V4IsGrtr(proxRadiusV4, newPosClosestDist)), + BNot(newPosOutMask)); + VecU32V stateFlag = V4U32Sel(proximityV, flagLPROX, u4Zero); + + // discrete + const VecU32V flagLDC = U4LoadXYZW(ParticleCollisionFlags::L_DC, ParticleCollisionFlags::L_DC, + ParticleCollisionFlags::L_DC, ParticleCollisionFlags::L_DC); + const BoolV DCV = + BAnd(BAnd(noFlagCC, V4IsGrtrOrEq(newPosClosestDist, V4Zero())), V4IsGrtr(restOffsetV4, newPosClosestDist)); + stateFlag = V4U32or(stateFlag, V4U32Sel(DCV, flagLDC, u4Zero)); + + // cc + const VecU32V flagLCC = U4LoadXYZW(ParticleCollisionFlags::L_CC, ParticleCollisionFlags::L_CC, + ParticleCollisionFlags::L_CC, ParticleCollisionFlags::L_CC); + + Vec4V oldCCTimeV = V4LoadA(collDataV4.ccTime); + const BoolV ccHappenedV = BAnd(BAnd(V4IsGrtrOrEq(latestEntry, V4Zero()), V4IsGrtr(oldCCTimeV, latestEntry)), + V4IsGrtrOrEq(soonestExit, latestEntry)); + + stateFlag = V4U32Sel(ccHappenedV, flagLCC, stateFlag); + Vec4V localSurfaceNormal0 = V4Sel(BSplatElement<0>(ccHappenedV), continuousNormal[0], discreteNormal[0]); + Vec4V localSurfaceNormal1 = V4Sel(BSplatElement<1>(ccHappenedV), continuousNormal[1], discreteNormal[1]); + Vec4V localSurfaceNormal2 = V4Sel(BSplatElement<2>(ccHappenedV), continuousNormal[2], discreteNormal[2]); + Vec4V localSurfaceNormal3 = V4Sel(BSplatElement<3>(ccHappenedV), continuousNormal[3], discreteNormal[3]); + + Vec4V ccTimeV = V4Sel(ccHappenedV, latestEntry, oldCCTimeV); + Vec4V distV = newPosClosestDist; + +#if PT_CCD_MEDTHOD == PT_CCD_PROJECT + Vec4V projected0 = V4MulAdd(motion0, V4U32SplatElement<0>(latestEntry), localOldPos0); + Vec4V projected1 = V4MulAdd(motion1, V4U32SplatElement<1>(latestEntry), localOldPos1); + Vec4V projected2 = V4MulAdd(motion2, V4U32SplatElement<2>(latestEntry), localOldPos2); + Vec4V projected2 = V4MulAdd(motion3, V4U32SplatElement<3>(latestEntry), localOldPos3); + distV = V4Sel(ccHappenedV, V4Zero(), distV); + +#elif PT_CCD_MEDTHOD == PT_CCD_STAY + Vec4V projected0 = localOldPos0; + Vec4V projected1 = localOldPos1; + Vec4V projected2 = localOldPos2; + Vec4V projected3 = localOldPos3; + distV = V4Sel(ccHappenedV, restOffsetV4, distV); + +#elif PT_CCD_MEDTHOD == PT_CCD_IMPACT + Vec4V projected0 = V4MulAdd(motion0, V4U32SplatElement<0>(latestEntry), localOldPos0); + Vec4V projected1 = V4MulAdd(motion1, V4U32SplatElement<1>(latestEntry), localOldPos1); + Vec4V projected2 = V4MulAdd(motion2, V4U32SplatElement<2>(latestEntry), localOldPos2); + Vec4V projected2 = V4MulAdd(motion3, V4U32SplatElement<3>(latestEntry), localOldPos3); + distV = V4Sel(ccHappenedV, restOffsetV4, distV); +#else + PX_ASSERT(0); // simd unspport yet +#endif + + Vec4V localSurfacePos0 = V4Sel(BSplatElement<0>(ccHappenedV), projected0, localNewPos0); + Vec4V localSurfacePos1 = V4Sel(BSplatElement<1>(ccHappenedV), projected1, localNewPos1); + Vec4V localSurfacePos2 = V4Sel(BSplatElement<2>(ccHappenedV), projected2, localNewPos2); + Vec4V localSurfacePos3 = V4Sel(BSplatElement<3>(ccHappenedV), projected3, localNewPos3); + + // contain + const BoolV containmentV = V4IsGrtrOrEq(V4Zero(), oldPosClosestDist); + + stateFlag = V4U32Sel(containmentV, flagLCC, stateFlag); + + localSurfaceNormal0 = V4Sel(BSplatElement<0>(containmentV), containmentNormal[0], localSurfaceNormal0); + localSurfaceNormal1 = V4Sel(BSplatElement<1>(containmentV), containmentNormal[1], localSurfaceNormal1); + localSurfaceNormal2 = V4Sel(BSplatElement<2>(containmentV), containmentNormal[2], localSurfaceNormal2); + localSurfaceNormal3 = V4Sel(BSplatElement<3>(containmentV), containmentNormal[3], localSurfaceNormal3); + + localSurfacePos0 = V4Sel(BSplatElement<0>(containmentV), localOldPos0, localSurfacePos0); + localSurfacePos1 = V4Sel(BSplatElement<1>(containmentV), localOldPos1, localSurfacePos1); + localSurfacePos2 = V4Sel(BSplatElement<2>(containmentV), localOldPos2, localSurfacePos2); + localSurfacePos3 = V4Sel(BSplatElement<3>(containmentV), localOldPos3, localSurfacePos3); + + distV = V4Sel(containmentV, oldPosClosestDist, distV); + ccTimeV = V4Sel(containmentV, V4Zero(), ccTimeV); + + // localSurfacePos + Vec4V reflectDistV = V4Sub(restOffsetV4, distV); + localSurfacePos0 = V4MulAdd(localSurfaceNormal0, V4SplatElement<0>(reflectDistV), localSurfacePos0); + localSurfacePos1 = V4MulAdd(localSurfaceNormal1, V4SplatElement<1>(reflectDistV), localSurfacePos1); + localSurfacePos2 = V4MulAdd(localSurfaceNormal2, V4SplatElement<2>(reflectDistV), localSurfacePos2); + localSurfacePos3 = V4MulAdd(localSurfaceNormal3, V4SplatElement<3>(reflectDistV), localSurfacePos3); + + // store + V4StoreA(localSurfacePos0, reinterpret_cast<PxF32*>(&collDataV4.localSurfacePos[0])); + V4StoreA(localSurfacePos1, reinterpret_cast<PxF32*>(&collDataV4.localSurfacePos[1])); + V4StoreA(localSurfacePos2, reinterpret_cast<PxF32*>(&collDataV4.localSurfacePos[2])); + V4StoreA(localSurfacePos3, reinterpret_cast<PxF32*>(&collDataV4.localSurfacePos[3])); + + V4StoreA(localSurfaceNormal0, reinterpret_cast<PxF32*>(&collDataV4.localSurfaceNormal[0])); + V4StoreA(localSurfaceNormal1, reinterpret_cast<PxF32*>(&collDataV4.localSurfaceNormal[1])); + V4StoreA(localSurfaceNormal2, reinterpret_cast<PxF32*>(&collDataV4.localSurfaceNormal[2])); + V4StoreA(localSurfaceNormal3, reinterpret_cast<PxF32*>(&collDataV4.localSurfaceNormal[3])); + + V4StoreA(ccTimeV, collDataV4.ccTime); + + V4U32StoreAligned(stateFlag, reinterpret_cast<VecU32V*>(collDataV4.localFlags)); +} + +/** +input scaledPlaneBuf needs a capacity of the number of planes in convexShape +*/ +void physx::Pt::collideWithConvex(PxPlane* scaledPlaneBuf, ParticleCollData* particleCollData, PxU32 numCollData, + const Gu::GeometryUnion& convexShape, const PxReal proxRadius) +{ + PX_ASSERT(scaledPlaneBuf); + PX_ASSERT(particleCollData); + + const PxConvexMeshGeometryLL& convexShapeData = convexShape.get<const PxConvexMeshGeometryLL>(); + const Gu::ConvexHullData* convexHullData = convexShapeData.hullData; + PX_ASSERT(convexHullData); + + // convex bounds in local space + PxMat33 scaling = convexShapeData.scale.toMat33(), invScaling; + invScaling = scaling.getInverse(); + + PX_ASSERT(!convexHullData->mAABB.isEmpty()); + PxBounds3 shapeBounds = convexHullData->mAABB.transformFast(scaling); + PX_ASSERT(!shapeBounds.isEmpty()); + shapeBounds.fattenFast(proxRadius); + bool scaledPlanes = false; + +#if PT_USE_SIMD_CONVEX_COLLISION + const Vec3V boundMin = V3LoadU(shapeBounds.minimum); + const Vec3V boundMax = V3LoadU(shapeBounds.maximum); + const Vec4V boundMinX = V4SplatElement<0>(Vec4V_From_Vec3V(boundMin)); + const Vec4V boundMinY = V4SplatElement<1>(Vec4V_From_Vec3V(boundMin)); + const Vec4V boundMinZ = V4SplatElement<2>(Vec4V_From_Vec3V(boundMin)); + const Vec4V boundMaxX = V4SplatElement<0>(Vec4V_From_Vec3V(boundMax)); + const Vec4V boundMaxY = V4SplatElement<1>(Vec4V_From_Vec3V(boundMax)); + const Vec4V boundMaxZ = V4SplatElement<2>(Vec4V_From_Vec3V(boundMax)); + + ParticleCollDataV4 collDataV4; + + const VecU32V u4Zero = U4LoadXYZW(0, 0, 0, 0); + const VecU32V u4One = U4LoadXYZW(1, 1, 1, 1); + PX_ALIGN(16, ParticleCollData fakeCsd); + fakeCsd.localOldPos = PxVec3(FLT_MAX, FLT_MAX, FLT_MAX); + fakeCsd.localNewPos = PxVec3(FLT_MAX, FLT_MAX, FLT_MAX); + PX_ALIGN(16, PxU32 overlapArray[128]); + + PxU32 start = 0; + while(start < numCollData) + { + const PxU32 batchSize = PxMin(numCollData - start, PxU32(128)); + PxU32 v4Count = 0; + ParticleCollData* particleCollDataIt = &particleCollData[start]; + for(PxU32 i = 0; i < batchSize; i += 4) + { + ParticleCollData* collData[4]; + collData[0] = particleCollDataIt++; + collData[1] = (i + 1 < numCollData) ? particleCollDataIt++ : &fakeCsd; + collData[2] = (i + 2 < numCollData) ? particleCollDataIt++ : &fakeCsd; + collData[3] = (i + 3 < numCollData) ? particleCollDataIt++ : &fakeCsd; + + Vec4V oldPosV0 = V4LoadU(reinterpret_cast<PxF32*>(&collData[0]->localOldPos)); + Vec4V newPosV0 = V4LoadU(reinterpret_cast<PxF32*>(&collData[0]->localNewPos)); + Vec4V oldPosV1 = V4LoadU(reinterpret_cast<PxF32*>(&collData[1]->localOldPos)); + Vec4V newPosV1 = V4LoadU(reinterpret_cast<PxF32*>(&collData[1]->localNewPos)); + Vec4V oldPosV2 = V4LoadU(reinterpret_cast<PxF32*>(&collData[2]->localOldPos)); + Vec4V newPosV2 = V4LoadU(reinterpret_cast<PxF32*>(&collData[2]->localNewPos)); + Vec4V oldPosV3 = V4LoadU(reinterpret_cast<PxF32*>(&collData[3]->localOldPos)); + Vec4V newPosV3 = V4LoadU(reinterpret_cast<PxF32*>(&collData[3]->localNewPos)); + + Vec4V particleMin0 = V4Min(oldPosV0, newPosV0); + Vec4V particleMax0 = V4Max(oldPosV0, newPosV0); + Vec4V particleMin1 = V4Min(oldPosV1, newPosV1); + Vec4V particleMax1 = V4Max(oldPosV1, newPosV1); + Vec4V particleMin2 = V4Min(oldPosV2, newPosV2); + Vec4V particleMax2 = V4Max(oldPosV2, newPosV2); + Vec4V particleMin3 = V4Min(oldPosV3, newPosV3); + Vec4V particleMax3 = V4Max(oldPosV3, newPosV3); + + Mat44V particleMin44(particleMin0, particleMin1, particleMin2, particleMin3); + const Mat44V particleMinTrans44 = M44Trnsps(particleMin44); + Mat44V particleMax44(particleMax0, particleMax1, particleMax2, particleMax3); + const Mat44V particleMaxTrans44 = M44Trnsps(particleMax44); + + BoolV mask = V4IsGrtr(boundMaxX, particleMinTrans44.col0); + mask = BAnd(V4IsGrtr(boundMaxY, particleMinTrans44.col1), mask); + mask = BAnd(V4IsGrtr(boundMaxZ, particleMinTrans44.col2), mask); + mask = BAnd(V4IsGrtr(particleMaxTrans44.col0, boundMinX), mask); + mask = BAnd(V4IsGrtr(particleMaxTrans44.col1, boundMinY), mask); + mask = BAnd(V4IsGrtr(particleMaxTrans44.col2, boundMinZ), mask); + + VecU32V overlap4 = V4U32Sel(mask, u4One, u4Zero); + V4U32StoreAligned(overlap4, reinterpret_cast<VecU32V*>(&overlapArray[i])); + } + + particleCollDataIt = &particleCollData[start]; + for(PxU32 k = 0; k < batchSize; k++, ++particleCollDataIt) + { + if(overlapArray[k]) + { + if(!scaledPlanes) + { + scalePlanes(scaledPlaneBuf, convexHullData, invScaling); + scaledPlanes = true; + } + + collDataV4.localOldPos[v4Count].v3 = particleCollDataIt->localOldPos; + collDataV4.localNewPos[v4Count].v3 = particleCollDataIt->localNewPos; + collDataV4.localFlags[v4Count] = particleCollDataIt->localFlags; + collDataV4.restOffset[v4Count] = particleCollDataIt->restOffset; + collDataV4.ccTime[v4Count] = particleCollDataIt->ccTime; + collDataV4.collData[v4Count] = particleCollDataIt; + v4Count++; + } + + if(v4Count == 4 || (v4Count > 0 && (k == batchSize - 1))) + { + collideWithConvexPlanesSIMD(collDataV4, scaledPlaneBuf, convexHullData->mNbPolygons, proxRadius); + + for(PxU32 j = 0; j < v4Count; j++) + { + ParticleCollData* collData = collDataV4.collData[j]; + PxU32 stateFlag = collDataV4.localFlags[j]; + if(stateFlag) + { + collData->localFlags |= stateFlag; + collData->ccTime = collDataV4.ccTime[j]; + collData->localSurfaceNormal = collDataV4.localSurfaceNormal[j].v3; + collData->localSurfacePos = collDataV4.localSurfacePos[j].v3; + } + } + v4Count = 0; + } + } + start += batchSize; + } +#else + ParticleCollData* particleCollDataIt = particleCollData; + for(PxU32 i = 0; i < numCollData; ++i, ++particleCollDataIt) + { + PxBounds3 particleBounds = + PxBounds3::boundsOfPoints(particleCollDataIt->localOldPos, particleCollDataIt->localNewPos); + + if(particleBounds.intersects(shapeBounds)) + { + if(!scaledPlanes) + { + scalePlanes(scaledPlaneBuf, convexHullData, invScaling); + scaledPlanes = true; + } + + collideWithConvexPlanes(*particleCollDataIt, scaledPlaneBuf, convexHullData->mNbPolygons, proxRadius); + } + } +#endif +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionData.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionData.h new file mode 100644 index 00000000..d52ff29e --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionData.h @@ -0,0 +1,271 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_COLLISION_DATA_H +#define PT_COLLISION_DATA_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxVec3.h" +#include "foundation/PxVec4.h" +#include "foundation/PxTransform.h" +#include "particles/PxParticleFlag.h" +#include "PtConfig.h" + +namespace physx +{ + +struct PxsShapeCore; +struct PxsBodyCore; + +namespace Pt +{ + +#define PT_CCD_PROJECT 0 // Ocational leaking at static interfaces +#define PT_CCD_STAY 1 // Seems to work for static +#define PT_CCD_IMPACT 2 // Doesn't work at all for static interfaces +#define PT_CDD_BACKTRACK_SMALL 3 // Seems to work for static +#define PT_CDD_BACKTRACK_LARGE 4 // Seems to work for static + +#define PT_CDD_BACKTRACK_SMALL_EPS 1e-4f + +#define PT_CCD_MEDTHOD PT_CCD_STAY // Maybe we'll need to do something else for dynamics + +PX_FORCE_INLINE void computeContinuousTargetPosition(PxVec3& surfacePos, const PxVec3& localOldPos, + const PxVec3& relativePOSITION, const PxVec3& surfaceNormal, + const PxF32 restOffset) +{ + PX_UNUSED(restOffset); + PX_UNUSED(surfaceNormal); + PX_UNUSED(relativePOSITION); + +#if PT_CCD_MEDTHOD == PT_CCD_PROJECT + surfacePos = localOldPos + relativePOSITION + (surfaceNormal * restOffset); +#elif PT_CCD_MEDTHOD == PT_CCD_STAY + surfacePos = localOldPos; +#elif PT_CCD_MEDTHOD == PT_CCD_IMPACT + surfacePos = localOldPos + relativePOSITION; +#else + const PxF32 backtrackLength = (PT_CCD_MEDTHOD == PT_CDD_BACKTRACK_SMALL) ? PT_CDD_BACKTRACK_SMALL_EPS : restOffset; + PxF32 relImpactLength = relativePOSITION.magnitude(); + PxF32 backtrackParam = (relImpactLength > 0.0f) ? PxMax(0.0f, relImpactLength - backtrackLength) : 0.0f; + surfacePos = localOldPos + relativePOSITION * (backtrackParam / relImpactLength); +#endif +} + +/*! +Fluid particle collision constraint +*/ +struct Constraint +{ + PxVec3 normal; // Contact surface normal + PxF32 d; // Contact point projected on contact normal + // 16 + + public: + Constraint() + { + // Do we want to initialize the constraints on creation? + // setZero(); + } + + Constraint(const PxVec3& _normal, const PxVec3& _p) + { + normal = _normal; + d = normal.dot(_p); + } + + PX_FORCE_INLINE PxVec3 project(const PxVec3& p) const + { + return (p + (normal * (d - normal.dot(p)))); + } +}; + +/*! +Fluid particle collision constraint data for dynamic rigid body +*/ +struct ConstraintDynamic +{ + PxVec3 velocity; + const PxsBodyCore* twoWayBody; // weak reference to rigid body. + + public: + PX_FORCE_INLINE void setEmpty() + { + velocity = PxVec3(0); + twoWayBody = NULL; + } +}; + +/*! +Fluid particle collision constraint buffers +*/ +struct ConstraintBuffers +{ + Constraint* constraint0Buf; + Constraint* constraint1Buf; + ConstraintDynamic* constraint0DynamicBuf; + ConstraintDynamic* constraint1DynamicBuf; +}; + +/*! +Different types of collision +*/ +struct ParticleCollisionFlags +{ + enum Enum + { + // Global collision flags. Used to track the latest collision status of a particle when + // testing against potentially colliding shapes + DC = (1 << 0), // Discrete collision + CC = (1 << 1), // Continuous collision + RESET_SNORMAL = (1 << 2), // Saves one PxVec3 in the ParticleCollData + + // When testing a particle against a shape, the following collision flags might be used + L_CC = (1 << 3), // Discrete collision: Predicted particle position inside discrete region of shape (shape + // region + collision radius) + L_DC = (1 << 4), // Continuous collision: Predicted particle motion vector intersects shape region + L_PROX = (1 << 5), // Proximity collision: Predicted particle position inside proximity region of shape (shape + // region + proximity radius) + L_CC_PROX = (L_CC | L_PROX), + L_ANY = (L_CC | L_DC | L_PROX) + }; +}; + +/*! +Structure to track collision data for a fluid particle +*/ +struct ParticleCollData +{ + PxVec3 surfaceNormal; // Contact normal [world space] + PxU32 flags; // Latest collision status + // 16 + + PxVec3 surfacePos; // Contact point on shape surface [world space] + PxF32 dcNum; // Number of discrete collisions + // 32 + + PxVec3 surfaceVel; // Velocity of contact point on shape surface [world space] + PxF32 ccTime; // "Time of impact" for continuous collision + // 48 + + PxVec3 oldPos; // Old particle position + ParticleFlags particleFlags; + // 64 + + PxVec3 newPos; // New particle position + PxU32 origParticleIndex; + // 80 + + PxVec3 velocity; // Particle velocity + PxF32 restOffset; + // 96 + + PxVec3 twoWayImpulse; + const PxsBodyCore* twoWayBody; // Weak reference to colliding rigid body + // 112 + + PxVec3 localOldPos; // in + PxU32 localFlags; // in/out + // 128 + + PxVec3 localNewPos; // in + Constraint* c0; // in + // 144 + + PxVec3 localSurfaceNormal; // out + Constraint* c1; // in + // 160 + + PxVec3 localSurfacePos; // out + PxF32 localDcNum; // Number of discrete collisions + // 176 + + public: + PX_FORCE_INLINE void init(const PxVec3& particlePos, const PxF32 particleRestOffset, const PxU32 particleIndex, + const ParticleFlags _particleFlags) + { + // Initialize values + + surfaceNormal = PxVec3(0); + flags = 0; + + surfacePos = PxVec3(0); + dcNum = 0.0f; + + surfaceVel = PxVec3(0); + ccTime = 1.0f; // No collision assumed. + + restOffset = particleRestOffset; + + oldPos = particlePos; + + // Remove collision flags from previous time step + particleFlags.api = PxU16(_particleFlags.api & ((~PxU16(PxParticleFlag::eCOLLISION_WITH_STATIC)) & + (~PxU16(PxParticleFlag::eCOLLISION_WITH_DYNAMIC)))); + + // Reduce cache bits + // 11 -> 01 + // 01 -> 00 + // 00 -> 00 + PxU16 reducedCache = PxU16(((_particleFlags.low & InternalParticleFlag::eGEOM_CACHE_MASK) >> 1) & + InternalParticleFlag::eGEOM_CACHE_MASK); + particleFlags.low = PxU16((_particleFlags.low & ~PxU16(InternalParticleFlag::eGEOM_CACHE_MASK)) | reducedCache); + + origParticleIndex = particleIndex; + + twoWayBody = NULL; + twoWayImpulse = PxVec3(0); + } +}; + +struct PxVec3Pad +{ + PxVec3 v3; + PxF32 pad; +}; + +struct ParticleCollDataV4 +{ + ParticleCollData* collData[4]; + PX_ALIGN(16, PxVec3Pad localOldPos[4]); // in + PX_ALIGN(16, PxVec3Pad localNewPos[4]); // in + PX_ALIGN(16, PxF32 restOffset[4]); // in + PX_ALIGN(16, PxU32 localFlags[4]); // in,out + PX_ALIGN(16, PxF32 ccTime[4]); // out + PX_ALIGN(16, PxVec3Pad localSurfaceNormal[4]); // out + PX_ALIGN(16, PxVec3Pad localSurfacePos[4]); // out +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_COLLISION_DATA_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionHelper.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionHelper.h new file mode 100644 index 00000000..30a746a6 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionHelper.h @@ -0,0 +1,860 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_COLLISION_HELPER_H +#define PT_COLLISION_HELPER_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PxvDynamics.h" +#include "PtSpatialHash.h" +#include "PtConstants.h" + +namespace physx +{ + +struct PxsShapeCore; + +namespace Pt +{ + +#define RANDOMIZED_COLLISION_PROTOTYPE 0 +#define PXS_SURFACE_NORMAL_UNIT_TOLERANCE 5e-2f + +struct W2STransformTemp +{ + PxTransform w2sOld; + PxTransform w2sNew; +}; + +PX_FORCE_INLINE PxF32 invertDcNum(PxF32 dcNum) +{ + PX_ASSERT(dcNum > 0.0f); + if(dcNum < 3.0f) + { + PX_ASSERT(dcNum == 1.0f || dcNum == 2.0f); + return physx::intrinsics::fsel(dcNum - 1.5f, 0.5f, 1.0f); + } + else + { + return 1.0f / dcNum; + } +} + +#if RANDOMIZED_COLLISION_PROTOTYPE +static bool rrEnabled = false; +static PxF32 rrVelocityThresholdSqr = 10.0f; +static PxF32 rrRestitutionMin = 0.3f; +static PxF32 rrRestitutionMax = 0.5f; +static PxF32 rrDynamicFrictionMin = 0.03f; +static PxF32 rrDynamicFrictionMax = 0.05f; +static PxF32 rrStaticFrictionSqrMin = 1.0f; +static PxF32 rrStaticFrictionSqrMax = 2.0f; +static PxF32 rrAngleRadMax = physx::intrinsics::sin(PxPi / 16); + +static void selectRandomParameters(PxVec3& outSurfaceNormal, PxF32& outRestitution, PxF32& outDynamicFriction, + PxF32& outStaticFrictionSqr, const PxU32 particleIndex, const PxVec3& surfaceNormal, + const PxVec3& velocity, const CollisionParameters& params) +{ + static PxF32 noiseScale = (1.0f / 65536.0f); + PxU32 noiseFactorP = particleIndex * particleIndex; // taking the square of the particleIndex yields better results. + PxU32 noiseFactorTP = params.temporalNoise * noiseFactorP; + + PxF32 noise0 = ((noiseFactorTP * 1735339) & 0xffff) * noiseScale; + PxF32 noise1 = ((noiseFactorTP * 1335379) & 0xffff) * noiseScale; + PxF32 noise2 = ((noiseFactorP * 1235303) & 0xffff) * noiseScale; + + outRestitution = (1.0f - noise0) * rrRestitutionMin + noise0 * rrRestitutionMax; + outDynamicFriction = (1.0f - noise1) * rrDynamicFrictionMin + noise1 * rrDynamicFrictionMax; + outStaticFrictionSqr = (1.0f - noise2) * rrStaticFrictionSqrMin + noise2 * rrStaticFrictionSqrMax; + + if(velocity.magnitudeSquared() > rrVelocityThresholdSqr) + { + PxF32 noise3 = ((noiseFactorTP * 14699023) & 0xffff) * noiseScale; + PxF32 noise4 = ((noiseFactorTP * 16699087) & 0xffff) * noiseScale; + PxF32 noise5 = ((noiseFactorTP * 11999027) & 0xffff) * noiseScale; + + PxVec3 tangent0, tangent1; + normalToTangents(surfaceNormal, tangent0, tangent1); + + PxF32 angleNoise = noise3 * PxTwoPi; + PxF32 angleCosNoise = physx::intrinsics::cos(angleNoise); + PxF32 angleSinNoise = physx::intrinsics::sin(angleNoise); + + // skew towards mean + PxF32 radiusNoise = noise4 * noise5; + PxVec3 tangent = tangent0 * angleCosNoise + tangent1 * angleSinNoise; + + outSurfaceNormal = surfaceNormal + tangent * radiusNoise * rrAngleRadMax; + outSurfaceNormal.normalize(); + } + else + { + outSurfaceNormal = surfaceNormal; + } +} +#endif + +//-------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void clampVelocity(PxVec3& velocity, PxReal maxMotion, PxReal timeStep) +{ + PxReal velocityMagnitude = velocity.magnitude(); + if(velocityMagnitude * timeStep > maxMotion) + { + PxReal scaleFactor = maxMotion / (velocityMagnitude * timeStep); + velocity *= scaleFactor; + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void integrateParticleVelocity(Particle& particle, const PxF32 maxMotionDistance, + const PxVec3& acceleration, const PxF32 dampingDtComp, + const PxF32 timeStep) +{ + // Integrate + particle.velocity += acceleration * timeStep; + + // Damp + particle.velocity *= dampingDtComp; + + // Clamp velocity such that particle stays within maximum motion distance + clampVelocity(particle.velocity, maxMotionDistance, timeStep); + + PX_ASSERT((particle.velocity * timeStep).magnitude() <= maxMotionDistance + 1e-5f); +} + +//-----------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void addDiscreteCollisionStatic(ParticleCollData& collData, const PxVec3& newSurfaceNormal, + const PxVec3& newSurfacePos, const PxF32& dcNum) +{ + collData.flags |= ParticleCollisionFlags::DC; + + if(collData.flags & ParticleCollisionFlags::RESET_SNORMAL) + { + collData.surfaceNormal = newSurfaceNormal; + collData.flags &= ~ParticleCollisionFlags::RESET_SNORMAL; + } + else + { + collData.surfaceNormal += newSurfaceNormal; + } + + // Discrete collisions will be averaged + collData.surfacePos += newSurfacePos; + collData.dcNum += dcNum; // The passed surface normal/position/velocity can itself consist of + // summed up normals/positions/velocities (for meshes for instance). +} + +//-----------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void addDiscreteCollisionDynamic(ParticleCollData& collData, const PxVec3& newSurfaceNormal, + const PxVec3& newSurfacePos, const PxVec3& newSurfaceVel, + const PxF32& dcNum) +{ + collData.flags |= ParticleCollisionFlags::DC; + + // Discrete collisions will be averaged + if(collData.flags & ParticleCollisionFlags::RESET_SNORMAL) + { + collData.surfaceNormal = newSurfaceNormal; + collData.surfaceVel = newSurfaceVel; + collData.flags &= ~ParticleCollisionFlags::RESET_SNORMAL; + } + else + { + collData.surfaceNormal += newSurfaceNormal; + collData.surfaceVel += newSurfaceVel; + } + + collData.surfacePos += newSurfacePos; + collData.dcNum += dcNum; // The passed surface normal/position/velocity can itself consist of + // summed up normals/positions/velocities (for meshes for instance). +} + +//-----------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void addContinuousCollisionStatic(ParticleCollData& collData, const PxVec3& newSurfaceNormal, + const PxVec3& newSurfacePos) +{ + collData.flags &= ~ParticleCollisionFlags::DC; // Continuous collisions take precedence over discrete collisions + collData.flags |= ParticleCollisionFlags::CC; + + collData.surfaceNormal = newSurfaceNormal; + collData.surfacePos = newSurfacePos; +} + +//-----------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void addContinuousCollisionDynamic(ParticleCollData& collData, const PxVec3& newSurfaceNormal, + const PxVec3& newSurfacePos, const PxVec3& newSurfaceVel) +{ + collData.flags &= ~ParticleCollisionFlags::DC; // Continuous collisions take precedence over discrete collisions + collData.flags |= ParticleCollisionFlags::CC; + + collData.surfaceNormal = newSurfaceNormal; + collData.surfacePos = newSurfacePos; + collData.surfaceVel = newSurfaceVel; +} + +//-----------------------------------------------------------------------------------------------------------------------// +PX_FORCE_INLINE void addConstraint(ParticleCollData& collData, const PxVec3& newSurfaceNormal, const PxVec3& newSurfacePos) +{ + // sschirm: Turns out that there are cases where a perfectly normalized normal (-1,0,0) which is rotated by a + // quat with PxQuat::isSane(), has !PxVec3::isNormalized(). Therefore we intruduce a less conservative assert here. + PX_ASSERT(PxAbs(newSurfaceNormal.magnitude() - 1) < PXS_SURFACE_NORMAL_UNIT_TOLERANCE); + Constraint cN(newSurfaceNormal, newSurfacePos); + if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_0_VALID)) + { + *collData.c0 = cN; + collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_0_VALID; + } + else if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_1_VALID)) + { + *collData.c1 = cN; + collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_1_VALID; + } + else + { + // Important: If the criterion to select the overwrite constraint changes, the fluid vs. static + // mesh code needs to be adjusted accordingly. + + // Overwrite constraint with the largest distance {old position} <--> {shape surface}. + // The old position must be used since the new position is corrected after each collision occurrence. + PxReal dist0 = collData.c0->normal.dot(collData.oldPos) - collData.c0->d; + PxReal dist1 = collData.c1->normal.dot(collData.oldPos) - collData.c1->d; + PxReal distN = cN.normal.dot(collData.oldPos) - cN.d; + + if(dist0 < dist1) + { + if(distN < dist1) + { + *collData.c1 = cN; + collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_1_VALID; + collData.particleFlags.low &= PxU16(~InternalParticleFlag::eCONSTRAINT_1_DYNAMIC); + } + } + else if(distN < dist0) + { + *collData.c0 = cN; + collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_0_VALID; + collData.particleFlags.low &= PxU16(~InternalParticleFlag::eCONSTRAINT_0_DYNAMIC); + } + } +} + +PX_FORCE_INLINE void addConstraintDynamic(ParticleCollData& collData, const PxVec3& newSurfaceNormal, + const PxVec3& newSurfacePos, const PxVec3& newSurfaceVel, + const PxsBodyCore* body, ConstraintDynamic& c0Dynamic, + ConstraintDynamic& c1Dynamic) +{ + // sschirm: Turns out that there are cases where a perfectly normalized normal (-1,0,0) which is rotated by a + // quat with PxQuat::isSane(), has !PxVec3::isNormalized(). Therefore we intruduce a less conservative assert here. + PX_ASSERT(PxAbs(newSurfaceNormal.magnitude() - 1) < PXS_SURFACE_NORMAL_UNIT_TOLERANCE); + Constraint cN(newSurfaceNormal, newSurfacePos); + if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_0_VALID)) + { + *collData.c0 = cN; + c0Dynamic.velocity = newSurfaceVel; + c0Dynamic.twoWayBody = body; + collData.particleFlags.low |= + (InternalParticleFlag::eCONSTRAINT_0_VALID | InternalParticleFlag::eCONSTRAINT_0_DYNAMIC); + } + else if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_1_VALID)) + { + *collData.c1 = cN; + c1Dynamic.velocity = newSurfaceVel; + c1Dynamic.twoWayBody = body; + collData.particleFlags.low |= + (InternalParticleFlag::eCONSTRAINT_1_VALID | InternalParticleFlag::eCONSTRAINT_1_DYNAMIC); + } + else + { + // Important: If the criterion to select the overwrite constraint changes, the fluid vs. static + // mesh code needs to be adjusted accordingly. + + // Overwrite constraint with the largest distance {old position} <--> {shape surface}. + // The old position must be used since the new position is corrected after each collision occurrence. + PxReal dist0 = collData.c0->normal.dot(collData.oldPos) - collData.c0->d; + PxReal dist1 = collData.c1->normal.dot(collData.oldPos) - collData.c1->d; + PxReal distN = cN.normal.dot(collData.oldPos) - cN.d; + + if(dist0 < dist1) + { + if(distN < dist1) + { + *collData.c1 = cN; + c1Dynamic.velocity = newSurfaceVel; + c1Dynamic.twoWayBody = body; + collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_1_DYNAMIC; + } + } + else if(distN < dist0) + { + *collData.c0 = cN; + c0Dynamic.velocity = newSurfaceVel; + c0Dynamic.twoWayBody = body; + collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_0_DYNAMIC; + } + } +} + +/*! +Reflect velocity on shape surface. +- To apply friction, the current velocity is used +- For restitution a different velocity can be used +(This can help to avoid jittering effects. After the fluid particle dynamics update, forces are applied +to integrate the new velocities. If particle collision constraints work on these new velocities, +jittering can result. Using the old velocities (before the forces were applied) to compute the +normal impulse can solve this problem) +*/ +PX_FORCE_INLINE void reflectVelocity(PxVec3& reflectedVel, const PxVec3& inVel, const PxVec3& oldVel, + const PxVec3& surfaceNormal, const PxVec3& surfaceVel, PxU32 particleIndex, + const CollisionParameters& params) +{ + PX_UNUSED(particleIndex); + + PxVec3 relativeVel = inVel - surfaceVel; + PxReal projectedRelativeVel = surfaceNormal.dot(relativeVel); + + if(projectedRelativeVel < 0.0f) // Particle is moving closer to surface (else the collision will be resolved) + { + PxF32 rDynamicFriction; + PxF32 rStaticFrictionSqr; + PxF32 rRestitution; + PxVec3 rSurfaceNormal; + +#if RANDOMIZED_COLLISION_PROTOTYPE + if(rrEnabled) + { + selectRandomParameters(rSurfaceNormal, rRestitution, rDynamicFriction, rStaticFrictionSqr, particleIndex, + surfaceNormal, relativeVel, params); + } + else +#endif + { + rDynamicFriction = params.dynamicFriction; + rStaticFrictionSqr = params.staticFrictionSqr; + rRestitution = params.restitution; + rSurfaceNormal = surfaceNormal; + } + + PxVec3 newNormalComponent = rSurfaceNormal * projectedRelativeVel; + PxVec3 newTangentialComponent = relativeVel - newNormalComponent; + + PxVec3 oldRelativeVel = oldVel - surfaceVel; + PxReal oldProjectedRelativeVel = rSurfaceNormal.dot(oldRelativeVel); + PxVec3 oldNormalComponent = rSurfaceNormal * oldProjectedRelativeVel; + + // static friction (this works based on the quotient between tangential and normal velocity magnitude). + PxVec3 diffNormalComponent = newNormalComponent - oldNormalComponent; + + PxReal stictionSqr = rStaticFrictionSqr * diffNormalComponent.magnitudeSquared(); + + // if (newTangentialComponent.magnitudeSquared() < stictionSqr) + // newTangentialComponent = PxVec3(0); + PxF32 diff = newTangentialComponent.magnitudeSquared() - stictionSqr; + newTangentialComponent.x = physx::intrinsics::fsel(diff, newTangentialComponent.x, 0.0f); + newTangentialComponent.y = physx::intrinsics::fsel(diff, newTangentialComponent.y, 0.0f); + newTangentialComponent.z = physx::intrinsics::fsel(diff, newTangentialComponent.z, 0.0f); + + // pseudo dynamic friction (not dependent on normal component!) + reflectedVel = newTangentialComponent * (1.0f - rDynamicFriction); + + // restitution is computed using the old velocity + // if (oldProjectedRelativeVel < 0.0f) + // reflectedVel -= oldNormalComponent * mParams.restitution; + PxVec3 reflectedVelTmp = reflectedVel - oldNormalComponent * rRestitution; + reflectedVel.x = physx::intrinsics::fsel(oldProjectedRelativeVel, reflectedVel.x, reflectedVelTmp.x); + reflectedVel.y = physx::intrinsics::fsel(oldProjectedRelativeVel, reflectedVel.y, reflectedVelTmp.y); + reflectedVel.z = physx::intrinsics::fsel(oldProjectedRelativeVel, reflectedVel.z, reflectedVelTmp.z); + + reflectedVel += surfaceVel; + } + else + reflectedVel = inVel; +} + +PX_FORCE_INLINE void updateParticle(Particle& particle, const ParticleCollData& collData, bool projection, + const PxPlane& projectionPlane, PxBounds3& worldBounds) +{ + // move worldBounds update here to avoid LHS + if(!projection) + { + particle.velocity = collData.velocity; + particle.position = collData.newPos; + PX_ASSERT(particle.position.isFinite()); + worldBounds.include(collData.newPos); + } + else + { + const PxReal dist = projectionPlane.n.dot(collData.velocity); + particle.velocity = collData.velocity - (projectionPlane.n * dist); + const PxVec3 pos = projectionPlane.project(collData.newPos); + PX_ASSERT(pos.isFinite()); + particle.position = pos; + worldBounds.include(pos); + } + particle.flags = collData.particleFlags; +} + +PX_FORCE_INLINE void clampToMaxMotion(PxVec3& newPos, const PxVec3& oldPos, PxF32 maxMotionDistance, + PxF32 maxMotionDistanceSqr) +{ + PxVec3 motionVec = newPos - oldPos; + PxReal motionDistanceSqr = motionVec.magnitudeSquared(); + if(motionDistanceSqr > maxMotionDistanceSqr) + { + newPos = oldPos + (motionVec * maxMotionDistance * physx::intrinsics::recipSqrt(motionDistanceSqr)); + } +} + +PX_FORCE_INLINE void updateCollDataDynamic(ParticleCollData& collData, const PxTransform& bodyToWorld, + const PxVec3& linearVel, const PxVec3& angularVel, + const PxsBodyCore* twoWayBody, const PxTransform& shapeToWorld, + const PxReal timeStep, ConstraintDynamic& c0Dynamic, + ConstraintDynamic& c1Dynamic) +{ + if(collData.localFlags & ParticleCollisionFlags::L_ANY) + { + PxVec3 newSurfaceNormal = shapeToWorld.rotate(collData.localSurfaceNormal); + PxVec3 newSurfacePos = shapeToWorld.transform(collData.localSurfacePos); + + PxVec3 rotatedSurfacePosBody = newSurfacePos - bodyToWorld.p; + + PxVec3 angularSurfaceVel = angularVel.cross(rotatedSurfacePosBody); + PxVec3 newSurfaceVel = angularSurfaceVel + linearVel; + + if(collData.localFlags & ParticleCollisionFlags::L_CC) + { + addContinuousCollisionDynamic(collData, newSurfaceNormal, newSurfacePos, newSurfaceVel); + // old body gets overwritten if a new one appears + collData.twoWayBody = twoWayBody; + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_DYNAMIC; + } + if(collData.localFlags & ParticleCollisionFlags::L_DC) + { + addDiscreteCollisionDynamic(collData, newSurfaceNormal, newSurfacePos, newSurfaceVel, 1.f); + // old body gets overwritten if a new one appears + collData.twoWayBody = twoWayBody; + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_DYNAMIC; + } + if(collData.localFlags & ParticleCollisionFlags::L_CC_PROX) + { + // Try to the predict the constraint for the next pose of the shape + + // sschirm: this code tries to call inv sqrt as much as possible it seems! + // Predict surface position (for the rotation part an approximation is used) + PxReal surfacePosDist = rotatedSurfacePosBody.magnitude(); + newSurfacePos = rotatedSurfacePosBody + angularSurfaceVel * timeStep; + newSurfacePos = newSurfacePos.getNormalized(); + newSurfacePos *= surfacePosDist; + + newSurfacePos += (bodyToWorld.p + (linearVel * timeStep)); + + // Predict surface normal (for the rotation an approximation is used) + newSurfaceNormal += (angularVel.cross(newSurfaceNormal)) * timeStep; + newSurfaceNormal = newSurfaceNormal.getNormalized(); + + addConstraintDynamic(collData, newSurfaceNormal, newSurfacePos, newSurfaceVel, twoWayBody, c0Dynamic, + c1Dynamic); + } + } +} + +PX_FORCE_INLINE void updateCollDataStatic(ParticleCollData& collData, const PxTransform& shapeToWorld, + const PxReal /*timeStep*/) +{ + if(collData.localFlags & ParticleCollisionFlags::L_ANY) + { + PxVec3 newSurfaceNormal = shapeToWorld.rotate(collData.localSurfaceNormal); + PxVec3 newSurfacePos = shapeToWorld.transform(collData.localSurfacePos); + + if(collData.localFlags & ParticleCollisionFlags::L_CC) + { + addContinuousCollisionStatic(collData, newSurfaceNormal, newSurfacePos); + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC; + } + if(collData.localFlags & ParticleCollisionFlags::L_DC) + { + addDiscreteCollisionStatic(collData, newSurfaceNormal, newSurfacePos, 1.f); + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC; + } + if(collData.localFlags & ParticleCollisionFlags::L_CC_PROX) + { + addConstraint(collData, newSurfaceNormal, newSurfacePos); + } + } +} + +PX_FORCE_INLINE void updateCollDataStaticMesh(ParticleCollData& collData, const PxTransform& shapeToWorld, + const PxReal /*timeStep*/) +{ + if(collData.localFlags & ParticleCollisionFlags::L_ANY) + { + if(collData.localFlags & ParticleCollisionFlags::L_CC) + { + PxVec3 newSurfaceNormal(shapeToWorld.rotate(collData.localSurfaceNormal)); + + // For static meshes, the old particle position is passed + addContinuousCollisionStatic(collData, newSurfaceNormal, collData.oldPos); + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC; + } + if(collData.localFlags & ParticleCollisionFlags::L_DC) + { + // Average discrete collision data, transform to world space, multiply result to maintain the + // weight of the data + PX_ASSERT(collData.localDcNum > 0.0f); + PxReal invDcNum = invertDcNum(collData.localDcNum); + PxVec3 newSurfaceNormal(collData.localSurfaceNormal * invDcNum); + PxVec3 newSurfacePos(collData.localSurfacePos * invDcNum); + + newSurfaceNormal = shapeToWorld.rotate(newSurfaceNormal) * collData.localDcNum; + newSurfacePos = shapeToWorld.transform(newSurfacePos) * collData.localDcNum; + + addDiscreteCollisionStatic(collData, newSurfaceNormal, newSurfacePos, collData.localDcNum); + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC; + } + // if (collData.localFlags & ParticleCollisionFlags::L_CC_PROX) mesh constraints already writed in collision + // function + } +} + +PX_FORCE_INLINE bool applyConstraints(const PxVec3& rayOrig, PxVec3& rayDir, const PxVec3& oldVelocity, + const PxsBodyCore*& twoWayBody, PxVec3& shapeNormal, PxVec3& shapeVelocity, + const Constraint* constr0, const Constraint* constr1, + const PxsBodyCore* constr0TwoWayBody, const PxsBodyCore* constr1TwoWayBody, + const PxVec3& constr0Velocity, const PxVec3& constr1Velocity, + const PxU32 particleIndex, const CollisionParameters& params, + const ParticleFlags& particleFlags) +{ + PX_ASSERT(particleFlags.low & InternalParticleFlag::eCONSTRAINT_0_VALID); // There must be one constraint to get + // here + bool needsRescaling = false; + PxVec3 rayDirTmp = rayDir; // avoid LHS + PxVec3 newPos = rayOrig + rayDirTmp; + + if(!(particleFlags.low & InternalParticleFlag::eCONSTRAINT_1_VALID)) + { + PxReal projectedNewPosC0 = constr0->normal.dot(newPos); + + if(projectedNewPosC0 < constr0->d) + { + twoWayBody = constr0TwoWayBody; + shapeNormal = constr0->normal; + shapeVelocity = constr0Velocity; + } + else + return false; + + PxVec3 velocity = rayDirTmp * params.invTimeStep; + reflectVelocity(rayDirTmp, velocity, oldVelocity, constr0->normal, constr0Velocity, particleIndex, params); + + // Compute motion direction of reflected particle and integrate position + rayDirTmp *= params.timeStep; + newPos = rayOrig + rayDirTmp; + + // + // Constraint has been applied. Do second pass using the modified particle velocity and position + // + // - Check if modified particle is closer to the surface than in the last simulation step. + // If this is the case then move the particle such that the distance is at least as large as in the + // last step. + // + projectedNewPosC0 = constr0->normal.dot(newPos); + if(constr0->d > projectedNewPosC0) + { + newPos = newPos + (constr0->normal * ((constr0->d - projectedNewPosC0) * 1.01f)); // Move particle in + // direction of surface + // normal + rayDirTmp = newPos - rayOrig; + needsRescaling = true; + } + } + else + { + PxReal projectedNewPosC0 = constr0->normal.dot(newPos); + PxReal projectedNewPosC1 = constr1->normal.dot(newPos); + + bool violateC0 = projectedNewPosC0 < constr0->d; + bool violateC1 = projectedNewPosC1 < constr1->d; + + if(violateC0) + { + twoWayBody = constr0TwoWayBody; + shapeNormal = constr0->normal; + shapeVelocity = constr0Velocity; + } + else if(violateC1) + { + twoWayBody = constr1TwoWayBody; + shapeNormal = constr1->normal; + shapeVelocity = constr1Velocity; + } + else + return false; + + if(!(violateC0 && violateC1)) + { + PxVec3 velocity = rayDirTmp * params.invTimeStep; + reflectVelocity(rayDirTmp, velocity, oldVelocity, shapeNormal, shapeVelocity, particleIndex, params); + + // Compute motion direction of reflected particle and integrate position + rayDirTmp *= params.timeStep; + } + else + { + // removed reflection code for this case (leads to jittering on edges) + // missing restitution/static friction term might cause other artifacts though + rayDirTmp *= (1.0f - params.dynamicFriction); + } + newPos = rayOrig + rayDirTmp; + + // + // Constraint has been applied. Do second pass using the modified particle velocity and position + // + // - Check if modified particle is closer to the surface than in the last simulation step. + // If this is the case then move the particle such that the distance is at least as large as in the + // last step. + // + + projectedNewPosC0 = constr0->normal.dot(newPos); + + PxReal n0dotn1 = constr0->normal.dot(constr1->normal); + + if(PxAbs(n0dotn1) > (1.0f - PT_COLL_VEL_PROJECTION_CROSS_EPSILON)) + { + // angle between collision surfaces above a certain threshold + if(projectedNewPosC0 < constr0->d) + { + newPos = newPos + (constr0->normal * ((constr0->d - projectedNewPosC0) * 1.01f)); // Move particle in + // direction of surface + // normal + rayDirTmp = newPos - rayOrig; + needsRescaling = true; + } + } + else + { + PxReal projectedNewPosC1_ = constr1->normal.dot(newPos); + + PxReal distChange0 = constr0->d - projectedNewPosC0; + PxVec3 newPos0 = newPos + constr0->normal * distChange0; // Push particle in direction of surface normal + + PxReal distChange1 = constr1->d - projectedNewPosC1_; + PxVec3 newPos1 = newPos + constr1->normal * distChange1; // Push particle in direction of surface normal + + if(projectedNewPosC0 < constr0->d || projectedNewPosC1_ < constr1->d) + { + PxReal projectedNewPosC1nC0 = constr0->normal.dot(newPos1); + PxReal projectedNewPosC0nC1 = constr1->normal.dot(newPos0); + + if(projectedNewPosC1nC0 < constr0->d && projectedNewPosC0nC1 < constr1->d) + { + PxReal factor = 1.0f / (1.0f - (n0dotn1 * n0dotn1)); + PxReal a0 = (distChange0 - (n0dotn1 * distChange1)) * factor; + PxReal a1 = (distChange1 - (n0dotn1 * distChange0)) * factor; + newPos += (constr0->normal * a0) + (constr1->normal * a1); + + rayDirTmp = newPos - rayOrig; + + PxVec3 epsVec = (constr0->normal + constr1->normal) * 0.5f * PT_COLL_VEL_PROJECTION_PROJ; + rayDirTmp += epsVec * (rayDirTmp.dot(rayDirTmp)); + } + else if(projectedNewPosC1nC0 < constr0->d) + { + newPos = newPos + (constr0->normal * ((1.0f + PT_COLL_VEL_PROJECTION_PROJ) * distChange0)); + rayDirTmp = newPos - rayOrig; + } + else + { + newPos = newPos + (constr1->normal * ((1.0f + PT_COLL_VEL_PROJECTION_PROJ) * distChange1)); + rayDirTmp = newPos - rayOrig; + } + needsRescaling = true; + } + } + } + + // Clamp velocity to magnitude of original velocity + if(needsRescaling) + { + PxF32 originalLengthSqr = rayDir.magnitudeSquared(); + PxF32 lengthSqr = rayDirTmp.magnitudeSquared(); + if(lengthSqr > originalLengthSqr) + { + rayDirTmp *= physx::intrinsics::sqrt(originalLengthSqr) * physx::intrinsics::recipSqrt(lengthSqr); + } + } + rayDir = rayDirTmp; + return true; +} + +PX_FORCE_INLINE void initCollDataAndApplyConstraints(ParticleCollData& collData, const Particle& particle, + const PxVec3& oldVelocity, const PxF32 restOffset, + const PxVec3& constr0Velocity, const PxVec3& constr1Velocity, + const PxsBodyCore* constr0TwoWayBody, + const PxsBodyCore* constr1TwoWayBody, PxU32 particleIndex, + const CollisionParameters& params) +{ + PX_ASSERT(particle.flags.api & PxParticleFlag::eVALID); + + collData.init(particle.position, restOffset, particleIndex, particle.flags); + PxVec3 motionDir = particle.velocity * params.timeStep; + + // + // Apply constraints from last simulation step + // + if(particle.flags.low & InternalParticleFlag::eANY_CONSTRAINT_VALID) + { + PxVec3 motionDirOld = motionDir; + + bool isColliding = + applyConstraints(collData.oldPos, motionDir, oldVelocity, collData.twoWayBody, collData.surfaceNormal, + collData.surfaceVel, collData.c0, collData.c1, constr0TwoWayBody, constr1TwoWayBody, + constr0Velocity, constr1Velocity, particleIndex, params, particle.flags); + + // can't have no collision but a twoWayShape + PX_ASSERT(isColliding || !collData.twoWayBody); + + if(isColliding) + { + if(collData.twoWayBody) + { + // params.flags & PxParticleBaseFlag::eCOLLISION_TWOWAY doesn't really matter to compute this if two way + // is off + collData.twoWayImpulse = (motionDirOld - motionDir) * params.invTimeStep; + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_DYNAMIC; + } + else + { + collData.particleFlags.api |= PxParticleFlag::eCOLLISION_WITH_STATIC; + } + collData.flags |= ParticleCollisionFlags::RESET_SNORMAL; + } + } + + collData.newPos = collData.oldPos + motionDir; + collData.velocity = motionDir * params.invTimeStep; +} + +// had to reintroduce isStatic for selective read of collData.surfaceVel for the collisionVelocity feature. at this +// point +// it would probably be better to refactor collisionResponse to take individual particle data as input again (as done +// for GPU) +void collisionResponse(ParticleCollData& collData, bool twoWay, bool isStatic, CollisionParameters& params) +{ + bool continuousCollision = (collData.flags & ParticleCollisionFlags::CC) > 0; + bool discreteCollision = (collData.flags & ParticleCollisionFlags::DC) > 0; + + // update of newPos + PxVec3 surfaceNormal = collData.surfaceNormal; // avoid LHS + PxVec3 surfaceVel = isStatic ? PxVec3(0) : collData.surfaceVel; // avoid LHS + if(continuousCollision) + { + // Particle has penetrated shape surface -> Push particle back to surface + PX_ASSERT(!(collData.flags & ParticleCollisionFlags::DC)); + PX_ASSERT(collData.ccTime < 1.0f); + PX_ASSERT(PxAbs(collData.surfaceNormal.magnitude() - 1) < PXS_SURFACE_NORMAL_UNIT_TOLERANCE); + + collData.newPos = collData.surfacePos; + } + else if(discreteCollision) + { + PxReal invDCNum = invertDcNum(collData.dcNum); + collData.newPos = collData.surfacePos * invDCNum; + surfaceVel = collData.surfaceVel * invDCNum; + collData.surfaceVel = surfaceVel; + + // Since normals have unit length, we do not need to average, it is enough to + // normalize the summed up contact normals. + if(invDCNum == 1.0f) + ; + else + { + surfaceNormal = + collData.surfaceNormal * physx::intrinsics::recipSqrt(collData.surfaceNormal.magnitudeSquared()); + collData.surfaceNormal = surfaceNormal; + } + + collData.dcNum = 0.0f; + } + else + { + // Note: Proximity collisions have no immediate effect on the particle position, + // they are only used to build constraints. + + PX_ASSERT(!(collData.flags & (ParticleCollisionFlags::DC | ParticleCollisionFlags::CC))); + return; // It is important to return here if there is no collision + } + + PX_ASSERT(continuousCollision || discreteCollision); + + PxVec3 newVel; + reflectVelocity(newVel, collData.velocity, collData.velocity, surfaceNormal, surfaceVel, collData.origParticleIndex, + params); + + // if the collData.twoWayShape is set, we have a collision with a dynamic rb. + if(twoWay && collData.twoWayBody) + { + collData.twoWayImpulse = collData.velocity - newVel; + } + + collData.velocity = newVel; +} + +PX_FORCE_INLINE void computeLocalCellHash(LocalCellHash& localCellHash, PxU16* hashKeyArray, const Particle* particles, + PxU32 numParticles, const PxVec3& packetCorner, const PxReal cellSizeInv) +{ + PX_ASSERT(numParticles <= PT_SUBPACKET_PARTICLE_LIMIT_COLLISION); + + PxU32 numHashEntries = Ps::nextPowerOfTwo(numParticles + 1); + numHashEntries = PxMin(PxU32(PT_LOCAL_HASH_SIZE_MESH_COLLISION), numHashEntries); + + // Make sure the number of hash entries is a power of 2 (requirement for the used hash function) + PX_ASSERT((((numHashEntries - 1) ^ numHashEntries) + 1) == (2 * numHashEntries)); + PX_ASSERT(numHashEntries > numParticles); + + // Get local cell hash for the current subpacket + SpatialHash::buildLocalHash(particles, numParticles, localCellHash.hashEntries, localCellHash.particleIndices, + hashKeyArray, numHashEntries, cellSizeInv, packetCorner); + + localCellHash.numHashEntries = numHashEntries; + localCellHash.numParticles = numParticles; + localCellHash.isHashValid = true; +} + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_COLLISION_HELPER_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMesh.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMesh.cpp new file mode 100644 index 00000000..b9f96e3e --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMesh.cpp @@ -0,0 +1,698 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtCollisionMethods.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtHeightFieldAabbTest.h" +#include "GuTriangleVertexPointers.h" +#include "PtConstants.h" +#include "GuBox.h" +#include "GuMidphaseInterface.h" + +using namespace physx; +using namespace Pt; +using namespace Gu; + +// +// Collide particle against mesh triangle +// +// Project particle on triangle plane, check if projected particle is inside triangle +// using barycentric coordinates. +// // +// q2 // +// * // +// / \ // +// / \ // +// / \ // +// / \ // +// q0 *--------------* q1 // +// +// Triangle with points q0, q1, q2. +// +// Point p on plane defined by triangle: +// +// p = q0 + (u * (q1 - q0)) + (v * (q2 - q0)) +// = q0 + (u * e0) + (v * e1) +// +// -> (p - q0) = (u * e0) + (v * e1) // Subtract q0 from both sides +// e2 = (u * e0) + (v * e1) +// +// We have two unknowns (u and v) so we need two equations to solve for them. Dot both sides by e0 to get one and dot +// both sides by +// e1 to get a second. +// +// e2 . e0 = ((u * e0) + (v * e1)) . e0 (1) +// e2 . e1 = ((u * e0) + (v * e1)) . e1 (2) +// +// Distribute e0 and e1 +// +// e2 . e0 = u * (e0 . e0) + v * (e1 . e0) (1) +// e2 . e1 = u * (e0 . e1) + v * (e1 . e1) (2) +// +// Solve vor u, v +// +// u = ((e1.e1)(e0.e2) - (e0.e1)(e1.e2)) / ((e0.e0)(e1.e1) - (e0.e1)(e0.e1)) +// v = ((e0.e0)(e1.e2) - (e0.e1)(e0.e2)) / ((e0.e0)(e1.e1) - (e0.e1)(e0.e1)) +// +// Setting a = e0.e0, b = e0.e1, c = e1.e1, d = e0.(-e2), e = e1.(-e2) we can write +// +// u = (b*e - c*d) / (a*c - b*b) +// v = (b*d - a*e) / (a*c - b*b) +// +// If (u >= 0) and (v >= 0) and (u + v <= 1) the point lies inside the triangle. +// +// Note that u and v do not need to be computed in full to do the test. +// Lets define the following substitutions: +// x = (b*e - c*d) +// y = (b*d - a*e) +// z = (a*c - b*b) // Always positive! +// +// If (x >= 0) and (y >= 0) and (x + y <= z) the point lies inside the triangle. +// +// + +namespace physx +{ +PX_FORCE_INLINE PxU32 collideWithMeshTriangle(PxVec3& surfaceNormal, PxVec3& surfacePos, PxVec3& proxSurfaceNormal, + PxVec3& proxSurfacePos, PxReal& ccTime, PxReal& distOldToSurface, + const PxVec3& oldPos, const PxVec3& newPos, const PxVec3& origin, + const PxVec3& e0, const PxVec3& e1, bool hasCC, const PxReal& collRadius, + const PxReal& proxRadius) +{ + PxU32 flags = 0; + + PxReal collisionRadius2 = collRadius * collRadius; + PxReal proximityRadius2 = proxRadius * proxRadius; + + PxVec3 motion = newPos - oldPos; + + // dc and proximity tests + PxVec3 tmpV = origin - newPos; + + PxReal a = e0.dot(e0); + PxReal b = e0.dot(e1); + PxReal c = e1.dot(e1); + PxReal d = e0.dot(tmpV); + PxReal e = e1.dot(tmpV); + PxVec3 coords; + coords.x = b * e - c * d; // s * det + coords.y = b * d - a * e; // t * det + coords.z = a * c - b * b; // det + + bool insideCase = false; + PxVec3 clampedCoords(PxVec3(0)); + if(coords.x <= 0.0f) + { + c = PxMax(c, FLT_MIN); + clampedCoords.y = -e / c; + } + else if(coords.y <= 0.0f) + { + a = PxMax(a, FLT_MIN); + clampedCoords.x = -d / a; + } + else if(coords.x + coords.y > coords.z) + { + PxReal denominator = a + c - b - b; + PxReal numerator = c + e - b - d; + denominator = PxMax(denominator, FLT_MIN); + clampedCoords.x = numerator / denominator; + clampedCoords.y = 1.0f - clampedCoords.x; + } + else // all inside + { + PxReal tmpF = PxMax(coords.z, FLT_MIN); + tmpF = 1.0f / tmpF; + clampedCoords.x = coords.x * tmpF; + clampedCoords.y = coords.y * tmpF; + insideCase = true; + } + clampedCoords.x = PxMax(clampedCoords.x, 0.0f); + clampedCoords.y = PxMax(clampedCoords.y, 0.0f); + clampedCoords.x = PxMin(clampedCoords.x, 1.0f); + clampedCoords.y = PxMin(clampedCoords.y, 1.0f); + + // Closest point to particle inside triangle + PxVec3 closest = origin + e0 * clampedCoords.x + e1 * clampedCoords.y; + + PxVec3 triangleOffset = newPos - closest; + PxReal triangleDistance2 = triangleOffset.magnitudeSquared(); + + PxVec3 triangleNormal = e0.cross(e1); + PxReal e0e1Span = triangleNormal.magnitude(); + + bool isInFront = triangleOffset.dot(triangleNormal) > 0.0f; + + // MS: Possible optimzation + /* + if (isInFront && (triangleDistance2 >= proximityRadius2)) + return flags; + */ + + bool isInProximity = insideCase && (triangleDistance2 < proximityRadius2) && isInFront; + bool isInDiscrete = (triangleDistance2 < collisionRadius2) && isInFront; + + if(!hasCC) + { + // Only apply discrete and proximity collisions if no continuous collisions was detected so far (for any + // colliding shape) + + if(isInDiscrete) + { + if(triangleDistance2 > PT_COLL_TRI_DISTANCE) + { + surfaceNormal = triangleOffset * PxRecipSqrt(triangleDistance2); + } + else + { + surfaceNormal = triangleNormal * (1.0f / e0e1Span); + } + surfacePos = closest + (surfaceNormal * collRadius); + flags |= ParticleCollisionFlags::L_DC; + } + + if(isInProximity) + { + proxSurfaceNormal = triangleNormal * (1.0f / e0e1Span); + proxSurfacePos = closest + (proxSurfaceNormal * collRadius); + flags |= ParticleCollisionFlags::L_PROX; + + tmpV = (oldPos - origin); // this time it's not the newPosition offset. + distOldToSurface = proxSurfaceNormal.dot(tmpV); // Need to return the distance to decide which constraints + // should be thrown away + } + } + + if(!isInDiscrete && !isInProximity) + { + // cc test (let's try only executing this if no discrete coll, or proximity happend). + tmpV = origin - oldPos; // this time it's not the newPosition offset. + PxReal pDistN = triangleNormal.dot(tmpV); + PxReal rLengthN = triangleNormal.dot(motion); + + if(pDistN > 0.0f || rLengthN >= pDistN) + return flags; + + // we are in the half closed interval [0.0f, 1.0) + + PxReal t = pDistN / rLengthN; + PX_ASSERT((t >= 0.0f) && (t < 1.0f)); + + PxVec3 relativePOSITION = (motion * t); + PxVec3 testPoint = oldPos + relativePOSITION; + + // a,b,c and coords.z don't depend on test point -> still valid + tmpV = origin - testPoint; + d = e0.dot(tmpV); + e = e1.dot(tmpV); + coords.x = b * e - c * d; + coords.y = b * d - a * e; + + // maybe we don't need this for rare case leaking on triangle boundaries? + PxReal eps = coords.z * PT_COLL_RAY_EPSILON_FACTOR; + + if((coords.x >= -eps) && (coords.y >= -eps) && (coords.x + coords.y <= coords.z + eps)) + { + PxReal invLengthN = (1.0f / e0e1Span); + distOldToSurface = -pDistN * invLengthN; // Need to return the distance to decide which constraints should + // be thrown away + surfaceNormal = triangleNormal * invLengthN; + // surfacePos = testPoint + (surfaceNormal * collRadius); + computeContinuousTargetPosition(surfacePos, oldPos, relativePOSITION, surfaceNormal, collRadius); + ccTime = t; + flags |= ParticleCollisionFlags::L_CC; + } + } + + return flags; +} +} + +PX_FORCE_INLINE void setConstraintData(ParticleCollData& collData, const PxReal& distToSurface, const PxVec3& normal, + const PxVec3& position, const PxTransform& shape2World) +{ + PxU32 i; + + if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_0_VALID)) + { + i = 0; + } + else if(!(collData.particleFlags.low & InternalParticleFlag::eCONSTRAINT_1_VALID)) + { + i = 1; + } + else + { + PxVec3 oldWorldSurfacePos(shape2World.transform(collData.localOldPos)); + + PxReal dist0 = collData.c0->normal.dot(oldWorldSurfacePos) - collData.c0->d; + PxReal dist1 = collData.c1->normal.dot(oldWorldSurfacePos) - collData.c1->d; + + if(dist0 < dist1) + { + if(distToSurface < dist1) + i = 1; + else + return; + } + else if(distToSurface < dist0) + { + i = 0; + } + else + return; + } + + PxVec3 newSurfaceNormal(shape2World.rotate(normal)); + PxVec3 newSurfacePos(shape2World.transform(position)); + Constraint cN(newSurfaceNormal, newSurfacePos); + + if(i == 0) + { + *collData.c0 = cN; + collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_0_VALID; + collData.particleFlags.low &= PxU16(~InternalParticleFlag::eCONSTRAINT_0_DYNAMIC); + } + else + { + *collData.c1 = cN; + collData.particleFlags.low |= InternalParticleFlag::eCONSTRAINT_1_VALID; + collData.particleFlags.low &= PxU16(~InternalParticleFlag::eCONSTRAINT_1_DYNAMIC); + } +} + +PX_FORCE_INLINE void updateCollShapeData(ParticleCollData& collData, bool& hasCC, PxU32 collFlags, PxReal ccTime, + PxReal distOldToSurface, const PxVec3& surfaceNormal, const PxVec3& surfacePos, + const PxVec3& proxSurfaceNormal, const PxVec3& proxSurfacePos, + const PxTransform& shape2World) +{ + if(collFlags & ParticleCollisionFlags::L_CC) + { + if(ccTime < collData.ccTime) + { + // We want the collision that happened first + collData.localSurfaceNormal = surfaceNormal; + collData.localSurfacePos = surfacePos; + collData.ccTime = ccTime; + collData.localFlags = ParticleCollisionFlags::L_CC; // Continuous collision should overwrite discrete + // collision (?) + } + + setConstraintData(collData, distOldToSurface, surfaceNormal, surfacePos, shape2World); + hasCC = true; + } + else if(!hasCC) + { + if(collFlags & ParticleCollisionFlags::L_PROX) + { + setConstraintData(collData, distOldToSurface, proxSurfaceNormal, proxSurfacePos, shape2World); + + collData.localFlags |= ParticleCollisionFlags::L_PROX; + } + + if(collFlags & ParticleCollisionFlags::L_DC) + { + collData.localSurfaceNormal += surfaceNormal; + collData.localSurfacePos += surfacePos; + collData.localDcNum += 1.0f; + collData.localFlags |= ParticleCollisionFlags::L_DC; + } + } +} + +void collideCellWithMeshTriangles(ParticleCollData* collData, const PxU32* collDataIndices, PxU32 numCollDataIndices, + const TriangleMesh& meshData, const Cm::FastVertex2ShapeScaling& scale, + const PxVec3* triangleVerts, PxU32 numTriangles, PxReal proxRadius, + const PxTransform& shape2World); + +struct PxcContactCellMeshCallback : MeshHitCallback<PxRaycastHit> +{ + ParticleCollData* collData; + const PxU32* collDataIndices; + PxU32 numCollDataIndices; + const TriangleMesh& meshData; + const Cm::FastVertex2ShapeScaling meshScaling; + PxReal proxRadius; + ParticleOpcodeCache* cache; + const PxTransform& shape2World; + + PxcContactCellMeshCallback(ParticleCollData* collData_, const PxU32* collDataIndices_, PxU32 numCollDataIndices_, + const TriangleMesh& meshData_, const Cm::FastVertex2ShapeScaling& meshScaling_, + PxReal proxRadius_, ParticleOpcodeCache* cache_, const PxTransform& shape2World_) + : MeshHitCallback<PxRaycastHit>(CallbackMode::eMULTIPLE) + , collData(collData_) + , collDataIndices(collDataIndices_) + , numCollDataIndices(numCollDataIndices_) + , meshData(meshData_) + , meshScaling(meshScaling_) + , proxRadius(proxRadius_) + , cache(cache_) + , shape2World(shape2World_) + { + PX_ASSERT(collData); + PX_ASSERT(collDataIndices); + PX_ASSERT(numCollDataIndices > 0); + + // init + const PxU32* collDataIndexIt = collDataIndices_; + for(PxU32 i = 0; i < numCollDataIndices_; ++i, ++collDataIndexIt) + { + ParticleCollData& collisionShapeData = collData_[*collDataIndexIt]; + collisionShapeData.localDcNum = 0.0f; + collisionShapeData.localSurfaceNormal = PxVec3(0); + collisionShapeData.localSurfacePos = PxVec3(0); + } + } + virtual ~PxcContactCellMeshCallback() + { + } + + virtual PxAgain processHit( // all reported coords are in mesh local space including hit.position + const PxRaycastHit& hit, const PxVec3& v0, const PxVec3& v1, const PxVec3& v2, PxReal&, const PxU32*) + { + PxVec3 verts[3] = { v0, v1, v2 }; + collideCellWithMeshTriangles(collData, collDataIndices, numCollDataIndices, meshData, meshScaling, verts, 1, + proxRadius, shape2World); + + if(cache) + cache->add(&hit.faceIndex, 1); + + return true; + } + + private: + PxcContactCellMeshCallback& operator=(const PxcContactCellMeshCallback&); +}; + +void testBoundsMesh(const TriangleMesh& meshData, const PxTransform& world2Shape, + const Cm::FastVertex2ShapeScaling& meshScaling, bool idtScaleMesh, const PxBounds3& worldBounds, + PxcContactCellMeshCallback& callback) +{ + // Find colliding triangles. + // Setup an OBB for the fluid particle cell (in local space of shape) + // assuming uniform scaling in most cases, using the pose as box rotation + // if scaling is non-uniform, the bounding box is conservative + + Box vertexSpaceAABB; + computeVertexSpaceAABB(vertexSpaceAABB, worldBounds, world2Shape, meshScaling, idtScaleMesh); + + Gu::intersectOBB_Particles(&meshData, vertexSpaceAABB, callback, true); +} + +void collideWithMeshTriangles(ParticleCollData& collisionShapeData, const TriangleMesh& /*meshData*/, + const Cm::FastVertex2ShapeScaling& scale, const PxVec3* triangleVerts, PxU32 numTriangles, + PxReal proxRadius, const PxTransform& shape2World) +{ + bool hasCC = ((collisionShapeData.localFlags & ParticleCollisionFlags::CC) || + (collisionShapeData.localFlags & ParticleCollisionFlags::L_CC)); + + PxVec3 tmpSurfaceNormal(0.0f); + PxVec3 tmpSurfacePos(0.0f); + PxVec3 tmpProxSurfaceNormal(0.0f); + PxVec3 tmpProxSurfacePos(0.0f); + PxReal tmpCCTime(0.0f); + PxReal tmpDistOldToSurface(0.0f); + + for(PxU32 i = 0; i < numTriangles; ++i) + { + const PxI32 winding = scale.flipsNormal() ? 1 : 0; + PxVec3 v0 = scale * triangleVerts[i * 3]; + PxVec3 v1 = scale * triangleVerts[i * 3 + 1 + winding]; + PxVec3 v2 = scale * triangleVerts[i * 3 + 2 - winding]; + + PxU32 tmpFlags = + collideWithMeshTriangle(tmpSurfaceNormal, tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, tmpCCTime, + tmpDistOldToSurface, collisionShapeData.localOldPos, collisionShapeData.localNewPos, + v0, v1 - v0, v2 - v0, hasCC, collisionShapeData.restOffset, proxRadius); + + updateCollShapeData(collisionShapeData, hasCC, tmpFlags, tmpCCTime, tmpDistOldToSurface, tmpSurfaceNormal, + tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, shape2World); + } +} + +void collideCellWithMeshTriangles(ParticleCollData* collData, const PxU32* collDataIndices, PxU32 numCollDataIndices, + const TriangleMesh& meshData, const Cm::FastVertex2ShapeScaling& scale, + const PxVec3* triangleVerts, PxU32 numTriangles, PxReal proxRadius, + const PxTransform& shape2World) +{ + PX_ASSERT(collData); + PX_ASSERT(collDataIndices); + PX_ASSERT(numCollDataIndices > 0); + PX_ASSERT(triangleVerts); + + const PxU32* collDataIndexIt = collDataIndices; + for(PxU32 i = 0; i < numCollDataIndices; ++i, ++collDataIndexIt) + { + ParticleCollData& collisionShapeData = collData[*collDataIndexIt]; + collideWithMeshTriangles(collisionShapeData, meshData, scale, triangleVerts, numTriangles, proxRadius, + shape2World); + } +} + +void physx::Pt::collideCellsWithStaticMesh(ParticleCollData* collData, const LocalCellHash& localCellHash, + const GeometryUnion& meshShape, const PxTransform& world2Shape, + const PxTransform& shape2World, PxReal /*cellSize*/, + PxReal /*collisionRange*/, PxReal proxRadius, const PxVec3& /*packetCorner*/) +{ + PX_ASSERT(collData); + PX_ASSERT(localCellHash.isHashValid); + PX_ASSERT(localCellHash.numParticles <= PT_SUBPACKET_PARTICLE_LIMIT_COLLISION); + PX_ASSERT(localCellHash.numHashEntries <= PT_LOCAL_HASH_SIZE_MESH_COLLISION); + + const PxTriangleMeshGeometryLL& meshShapeData = meshShape.get<const PxTriangleMeshGeometryLL>(); + + const TriangleMesh* meshData = meshShapeData.meshData; + PX_ASSERT(meshData); + + // mesh bounds in world space (conservative) + const PxBounds3 shapeBounds = + meshData->getLocalBoundsFast().transformSafe(world2Shape.getInverse() * meshShapeData.scale); + + const bool idtScaleMesh = meshShapeData.scale.isIdentity(); + + Cm::FastVertex2ShapeScaling meshScaling; + if(!idtScaleMesh) + meshScaling.init(meshShapeData.scale); + + // process the particle cells + for(PxU32 c = 0; c < localCellHash.numHashEntries; c++) + { + const ParticleCell& cell = localCellHash.hashEntries[c]; + + if(cell.numParticles == PX_INVALID_U32) + continue; + + PxBounds3 cellBounds; + + cellBounds.setEmpty(); + PxBounds3 cellBoundsNew(PxBounds3::empty()); + + PxU32* it = localCellHash.particleIndices + cell.firstParticle; + const PxU32* end = it + cell.numParticles; + for(; it != end; it++) + { + const ParticleCollData& particle = collData[*it]; + cellBounds.include(particle.oldPos); + cellBoundsNew.include(particle.newPos); + } + PX_ASSERT(!cellBoundsNew.isEmpty()); + cellBoundsNew.fattenFast(proxRadius); + cellBounds.include(cellBoundsNew); + + if(!cellBounds.intersects(shapeBounds)) + continue; // early out if (inflated) cell doesn't intersect mesh bounds + + // opcode query: cell bounds against shape bounds in unscaled mesh space + PxcContactCellMeshCallback callback(collData, &(localCellHash.particleIndices[cell.firstParticle]), + cell.numParticles, *meshData, meshScaling, proxRadius, NULL, shape2World); + testBoundsMesh(*meshData, world2Shape, meshScaling, idtScaleMesh, cellBounds, callback); + } +} + +void physx::Pt::collideWithStaticMesh(PxU32 numParticles, ParticleCollData* collData, ParticleOpcodeCache* opcodeCaches, + const GeometryUnion& meshShape, const PxTransform& world2Shape, + const PxTransform& shape2World, PxReal /*cellSize*/, PxReal collisionRange, + PxReal proxRadius) +{ + PX_ASSERT(collData); + PX_ASSERT(opcodeCaches); + + const PxTriangleMeshGeometryLL& meshShapeData = meshShape.get<const PxTriangleMeshGeometryLL>(); + + const bool idtScaleMesh = meshShapeData.scale.isIdentity(); + Cm::FastVertex2ShapeScaling meshScaling; + if(!idtScaleMesh) + meshScaling.init(meshShapeData.scale); + + const PxF32 maxCacheBoundsExtent = 4 * collisionRange + proxRadius; + const ParticleOpcodeCache::QuantizationParams quantizationParams = + ParticleOpcodeCache::getQuantizationParams(maxCacheBoundsExtent); + + const TriangleMesh* meshData = meshShapeData.meshData; + PX_ASSERT(meshData); + + bool isSmallMesh = meshData->has16BitIndices(); + PxU32 cachedTriangleBuffer[ParticleOpcodeCache::sMaxCachedTriangles]; + + PxVec3 extent(proxRadius); + + for(PxU32 i = 0; i < numParticles; ++i) + { + // had to make this non-const to be able to update cache bits + ParticleCollData& particle = collData[i]; + ParticleOpcodeCache& cache = opcodeCaches[i]; + + PxBounds3 bounds; + { + bounds = PxBounds3(particle.newPos - extent, particle.newPos + extent); + bounds.include(particle.oldPos); + } + + PxU32 numTriangles = 0; + const PxU32* triangles = NULL; + bool isCached = cache.read(particle.particleFlags.low, numTriangles, cachedTriangleBuffer, bounds, + quantizationParams, &meshShape, isSmallMesh); + + if(isCached) + { + triangles = cachedTriangleBuffer; + if(numTriangles > 0) + { + PxVec3 triangleVerts[ParticleOpcodeCache::sMaxCachedTriangles * 3]; + const PxU32* triangleIndexIt = triangles; + for(PxU32 j = 0; j < numTriangles; ++j, ++triangleIndexIt) + { + TriangleVertexPointers::getTriangleVerts(meshData, *triangleIndexIt, triangleVerts[j * 3], + triangleVerts[j * 3 + 1], triangleVerts[j * 3 + 2]); + } + + collData[i].localDcNum = 0.0f; + collData[i].localSurfaceNormal = PxVec3(0); + collData[i].localSurfacePos = PxVec3(0); + + collideWithMeshTriangles(collData[i], *meshData, meshScaling, triangleVerts, numTriangles, proxRadius, + shape2World); + } + } + else if((particle.particleFlags.low & InternalParticleFlag::eGEOM_CACHE_BIT_0) != 0 && + (particle.particleFlags.low & InternalParticleFlag::eGEOM_CACHE_BIT_1) != 0) + { + // don't update the cache since it's already successfully in use + PxcContactCellMeshCallback callback(collData, &i, 1, *meshData, meshScaling, proxRadius, NULL, shape2World); + + testBoundsMesh(*meshData, world2Shape, meshScaling, idtScaleMesh, bounds, callback); + } + else + { + // compute new conservative bounds for cache + PxBounds3 cachedBounds; + { + PxVec3 predictedExtent(proxRadius * 1.5f); + + // add future newpos + extent + PxVec3 newPosPredicted = particle.newPos + 3.f * (particle.newPos - particle.oldPos); + cachedBounds = PxBounds3(newPosPredicted - predictedExtent, newPosPredicted + predictedExtent); + + // add next oldpos + extent + cachedBounds.include(PxBounds3(particle.newPos - predictedExtent, particle.newPos + predictedExtent)); + + // add old pos + cachedBounds.include(particle.oldPos); + } + + cache.init(cachedTriangleBuffer); + + // the callback function will call collideWithMeshTriangles() + PxcContactCellMeshCallback callback(collData, &i, 1, *meshData, meshScaling, proxRadius, &cache, shape2World); + + // opcode query: cache bounds against shape bounds in unscaled mesh space + testBoundsMesh(*meshData, world2Shape, meshScaling, idtScaleMesh, cachedBounds, callback); + + // update cache + cache.write(particle.particleFlags.low, cachedBounds, quantizationParams, meshShape, isSmallMesh); + } + } +} + +void physx::Pt::collideWithStaticHeightField(ParticleCollData* particleCollData, PxU32 numCollData, + const GeometryUnion& heightFieldShape, PxReal proxRadius, + const PxTransform& shape2World) +{ + PX_ASSERT(particleCollData); + + const PxHeightFieldGeometryLL& hfGeom = heightFieldShape.get<const PxHeightFieldGeometryLL>(); + const HeightFieldUtil hfUtil(hfGeom); + + for(PxU32 p = 0; p < numCollData; p++) + { + ParticleCollData& collData = particleCollData[p]; + + PxBounds3 particleBounds = PxBounds3::boundsOfPoints(collData.localOldPos, collData.localNewPos); + PX_ASSERT(!particleBounds.isEmpty()); + particleBounds.fattenFast(proxRadius); + + HeightFieldAabbTest test(particleBounds, hfUtil); + HeightFieldAabbTest::Iterator itBegin = test.begin(); + HeightFieldAabbTest::Iterator itEnd = test.end(); + PxVec3 triangle[3]; + + collData.localDcNum = 0.0f; + collData.localSurfaceNormal = PxVec3(0); + collData.localSurfacePos = PxVec3(0); + bool hasCC = (collData.localFlags & ParticleCollisionFlags::CC) > 0; + + PxVec3 tmpSurfaceNormal(0.0f); + PxVec3 tmpSurfacePos(0.0f); + PxVec3 tmpProxSurfaceNormal(0.0f); + PxVec3 tmpProxSurfacePos(0.0f); + PxReal tmpCCTime(collData.ccTime); + PxReal tmpDistOldToSurface(0.0f); + + for(HeightFieldAabbTest::Iterator it = itBegin; it != itEnd; ++it) + { + it.getTriangleVertices(triangle); + + const PxVec3& origin = triangle[0]; + PxVec3 e0, e1; + e0 = triangle[1] - origin; + e1 = triangle[2] - origin; + + PxU32 tmpFlags = + collideWithMeshTriangle(tmpSurfaceNormal, tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, + tmpCCTime, tmpDistOldToSurface, collData.localOldPos, collData.localNewPos, + origin, e0, e1, hasCC, collData.restOffset, proxRadius); + + updateCollShapeData(collData, hasCC, tmpFlags, tmpCCTime, tmpDistOldToSurface, tmpSurfaceNormal, + tmpSurfacePos, tmpProxSurfaceNormal, tmpProxSurfacePos, shape2World); + } + } +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMethods.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMethods.h new file mode 100644 index 00000000..fdbbb191 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionMethods.h @@ -0,0 +1,93 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_COLLISION_METHODS_H +#define PT_COLLISION_METHODS_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxVec3.h" +#include "PtConfig.h" +#include "PtCollisionData.h" +#include "PtSpatialHash.h" +#include "PtParticleOpcodeCache.h" +#include "GuGeometryUnion.h" + +namespace physx +{ + +namespace Pt +{ + +/*! +Collision routines for fluid particles +*/ + +void collideWithPlane(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& planeShape, + PxReal proxRadius); + +void collideWithConvexPlanes(ParticleCollData& collData, const PxPlane* planes, const PxU32 numPlanes, + const PxReal proxRadius); +void collideWithConvexPlanesSIMD(ParticleCollDataV4& collDataV4, const PxPlane* convexPlanes, PxU32 numPlanes, + const PxReal proxRadius); + +/** +input scaledPlaneBuf needs a capacity of the number of planes in convexShape +*/ +void collideWithConvex(PxPlane* scaledPlaneBuf, ParticleCollData* particleCollData, PxU32 numCollData, + const Gu::GeometryUnion& convexShape, const PxReal proxRadius); + +void collideWithBox(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& boxShape, + PxReal proxRadius); + +void collideWithCapsule(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& capsuleShape, + PxReal proxRadius); + +void collideWithSphere(ParticleCollData* particleCollData, PxU32 numCollData, const Gu::GeometryUnion& sphereShape, + PxReal proxRadius); + +void collideCellsWithStaticMesh(ParticleCollData* particleCollData, const LocalCellHash& localCellHash, + const Gu::GeometryUnion& meshShape, const PxTransform& world2Shape, + const PxTransform& shape2World, PxReal cellSize, PxReal collisionRange, + PxReal proxRadius, const PxVec3& packetCorner); + +void collideWithStaticMesh(PxU32 numParticles, ParticleCollData* particleCollData, ParticleOpcodeCache* opcodeCaches, + const Gu::GeometryUnion& meshShape, const PxTransform& world2Shape, + const PxTransform& shape2World, PxReal cellSize, PxReal collisionRange, PxReal proxRadius); + +void collideWithStaticHeightField(ParticleCollData* particleCollData, PxU32 numCollData, + const Gu::GeometryUnion& heightFieldShape, PxReal proxRadius, + const PxTransform& shape2World); + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_COLLISION_METHODS_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionParameters.h b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionParameters.h new file mode 100644 index 00000000..736cfcd1 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionParameters.h @@ -0,0 +1,70 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#ifndef PT_COLLISION_PARAM_H +#define PT_COLLISION_PARAM_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +namespace physx +{ + +namespace Pt +{ + +struct CollisionParameters +{ + PxVec3 externalAcceleration; + PxReal dampingDtComp; + PxReal restitution; + PxReal dynamicFriction; + PxReal staticFrictionSqr; + PxReal cellSize; + PxReal cellSizeInv; + PxU32 packetMultLog; + PxU32 packetMult; + PxReal packetSize; + PxReal restOffset; + PxReal contactOffset; + PxReal maxMotionDistance; + PxReal collisionRange; + PxReal timeStep; + PxReal invTimeStep; + PxPlane projectionPlane; + PxU32 flags; + PxU32 temporalNoise; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(CollisionParameters) % 16 == 0); + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_COLLISION_PARAM_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionPlane.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionPlane.cpp new file mode 100644 index 00000000..70d06af4 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionPlane.cpp @@ -0,0 +1,157 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtCollisionMethods.h" +#if PX_USE_PARTICLE_SYSTEM_API + +using namespace physx; +using namespace Pt; + +namespace +{ + +PX_FORCE_INLINE void collideWithPlane(ParticleCollData& collData, PxReal proxRadius) +{ + // In plane space the normal is (1,0,0) and d is 0. This simplifies the computations below. + PxReal entryTime = -FLT_MAX; + + PxReal planeDistNewPos = collData.localNewPos.x; + PxReal planeDistOldPos = collData.localOldPos.x; + + bool isContained = false; + bool hasDC = false; + bool hasProx = false; + bool parallelMotion = false; + + // Test the old pos for containment + if(planeDistOldPos <= 0.0f) + isContained = true; + + // Test proximity + if(planeDistNewPos <= proxRadius) + { + if(planeDistNewPos > 0.0f) + hasProx = true; + + // Test discrete collision + if(planeDistNewPos <= collData.restOffset) + hasDC = true; + } + + if(!(hasProx || hasDC || isContained)) + return; // We know that the old position is outside the surface and that the new position is + // not within the proximity region. + + PxVec3 planeNormal; + planeNormal = PxVec3(1.0f, 0.0f, 0.0f); + + // Test continuous collision + PxVec3 motion = collData.localNewPos - collData.localOldPos; + PxReal projMotion = motion.x; + if(projMotion == 0.0f) // parallel + { + if(planeDistNewPos > 0.0f) + parallelMotion = true; + } + else + { + PxReal hitTime = -planeDistOldPos / projMotion; + if(projMotion < 0.0f) // entry point + entryTime = hitTime; + } + + if(isContained) + { + // Treat the case where the old pos is inside the skeleton as + // a continous collision with time 0 + + collData.localFlags |= ParticleCollisionFlags::L_CC; + collData.ccTime = 0.0f; + collData.localSurfaceNormal = planeNormal; + + // Push the particle to the surface (such that distance to surface is equal to the collision radius) + collData.localSurfacePos = collData.localOldPos; + collData.localSurfacePos.x += (collData.restOffset - planeDistOldPos); + } + else + { + // check for continuous collision + // only add a proximity/discrete case if there are no continous collisions + // for this shape or any other shape before + + bool ccHappened = ((0.0f <= entryTime) && (entryTime < collData.ccTime) && (!parallelMotion)); + if(ccHappened) + { + collData.localSurfaceNormal = planeNormal; + + // collData.localSurfacePos = collData.localOldPos + (motion*entryTime); + // collData.localSurfacePos.x += collData.restOffset; + PxVec3 relativePOSITION = motion * entryTime; + computeContinuousTargetPosition(collData.localSurfacePos, collData.localOldPos, relativePOSITION, + collData.localSurfaceNormal, collData.restOffset); + + collData.ccTime = entryTime; + collData.localFlags |= ParticleCollisionFlags::L_CC; + } + else if(!(collData.localFlags & ParticleCollisionFlags::CC)) + { + // No other collision shape has caused a continuous collision so far + + PX_ASSERT(hasProx | hasDC); + + if(hasProx) // proximity + collData.localFlags |= ParticleCollisionFlags::L_PROX; + if(hasDC) // discrete collision + collData.localFlags |= ParticleCollisionFlags::L_DC; + + collData.localSurfaceNormal = planeNormal; + + // Move contact point such that the projected distance to the surface is equal + // to the collision radius + collData.localSurfacePos = collData.localNewPos; + collData.localSurfacePos.x += (collData.restOffset - planeDistNewPos); + } + } +} +} + +void physx::Pt::collideWithPlane(ParticleCollData* particleCollData, PxU32 numCollData, + const Gu::GeometryUnion& planeShape, PxReal proxRadius) +{ + PX_ASSERT(particleCollData); + PX_ASSERT(planeShape.getType() == PxGeometryType::ePLANE); + PX_UNUSED(planeShape); + + for(PxU32 p = 0; p < numCollData; p++) + { + ::collideWithPlane(particleCollData[p], proxRadius); + } +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionSphere.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionSphere.cpp new file mode 100644 index 00000000..cddce6af --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtCollisionSphere.cpp @@ -0,0 +1,156 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtCollisionMethods.h" +#if PX_USE_PARTICLE_SYSTEM_API + +using namespace physx; +using namespace Pt; + +namespace +{ + +void collideWithSphereNonContinuous(ParticleCollData& collData, const PxVec3& pos, const PxReal& radius, + const PxReal& proxRadius) +{ + if(collData.localFlags & ParticleCollisionFlags::CC) + return; // Only apply discrete and proximity collisions if no continuous collisions was detected so far (for any + // colliding shape) + + PxReal dist = pos.magnitude(); + collData.localSurfaceNormal = pos; + if(dist < (radius + proxRadius)) + { + if(dist != 0.0f) + collData.localSurfaceNormal *= (1.0f / dist); + else + collData.localSurfaceNormal = PxVec3(0); + + // Push particle to surface such that the distance to the surface is equal to the collision radius + collData.localSurfacePos = collData.localSurfaceNormal * (radius + collData.restOffset); + collData.localFlags |= ParticleCollisionFlags::L_PROX; + + if(dist < (radius + collData.restOffset)) + collData.localFlags |= ParticleCollisionFlags::L_DC; + } +} + +PX_FORCE_INLINE void collideWithSphere(ParticleCollData& collData, const PxSphereGeometry& sphereShapeData, + PxReal proxRadius) +{ + PxVec3& oldPos = collData.localOldPos; + PxVec3& newPos = collData.localNewPos; + + PxReal radius = sphereShapeData.radius; + + PxReal oldPosDist2 = oldPos.magnitudeSquared(); + PxReal radius2 = radius * radius; + + bool oldInSphere = (oldPosDist2 < radius2); + + if(oldInSphere) + { + // old position inside the skeleton + // add ccd with time 0.0 + + collData.localSurfaceNormal = oldPos; + if(oldPosDist2 > 0.0f) + collData.localSurfaceNormal *= PxRecipSqrt(oldPosDist2); + else + collData.localSurfaceNormal = PxVec3(0, 1.0f, 0); + + // Push particle to surface such that the distance to the surface is equal to the collision radius + collData.localSurfacePos = collData.localSurfaceNormal * (radius + collData.restOffset); + collData.ccTime = 0.0; + collData.localFlags |= ParticleCollisionFlags::L_CC; + } + else + { + // old position is outside of the skeleton + + PxVec3 motion = newPos - oldPos; + + // Discriminant + PxReal b = motion.dot(oldPos) * 2.0f; + PxReal a2 = 2.0f * motion.magnitudeSquared(); + PxReal disc = (b * b) - (2.0f * a2 * (oldPosDist2 - radius2)); + + bool intersection = disc > 0.0f; + + if((!intersection) || (a2 == 0.0f)) + { + // the ray does not intersect the sphere + collideWithSphereNonContinuous(collData, newPos, radius, proxRadius); + } + else + { + // the ray intersects the sphere + PxReal t = -(b + PxSqrt(disc)) / a2; // Compute intersection point + + if(t < 0.0f || t > 1.0f) + { + // intersection point lies outside motion vector + collideWithSphereNonContinuous(collData, newPos, radius, proxRadius); + } + else if(t < collData.ccTime) + { + // intersection point lies on sphere, add lcc + // collData.localSurfacePos = oldPos + (motion * t); + // collData.localSurfaceNormal = collData.localSurfacePos; + // collData.localSurfaceNormal *= (1.0f / radius); + // collData.localSurfacePos += (collData.localSurfaceNormal * collData.restOffset); + PxVec3 relativeImpact = motion * t; + collData.localSurfaceNormal = oldPos + relativeImpact; + collData.localSurfaceNormal *= (1.0f / radius); + computeContinuousTargetPosition(collData.localSurfacePos, collData.localOldPos, relativeImpact, + collData.localSurfaceNormal, collData.restOffset); + + collData.ccTime = t; + collData.localFlags |= ParticleCollisionFlags::L_CC; + } + } + } +} + +} // namespace + +void physx::Pt::collideWithSphere(ParticleCollData* particleCollData, PxU32 numCollData, + const Gu::GeometryUnion& sphereShape, PxReal proxRadius) +{ + PX_ASSERT(particleCollData); + + const PxSphereGeometry& sphereShapeData = sphereShape.get<const PxSphereGeometry>(); + + for(PxU32 p = 0; p < numCollData; p++) + { + ::collideWithSphere(particleCollData[p], sphereShapeData, proxRadius); + } +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtConfig.h b/PhysX_3.4/Source/LowLevelParticles/src/PtConfig.h new file mode 100644 index 00000000..224b5a8b --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtConfig.h @@ -0,0 +1,121 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_CONFIG_H +#define PT_CONFIG_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtParticleSystemFlags.h" + +// Marker for fluid particles with no collision constraints +#define PT_NO_CONSTRAINT PT_PARTICLE_SYSTEM_PARTICLE_LIMIT + +// Needs to be addressable with PxU16 Particle::hashKey +// - Ps::nextPowerOf2((PXD_PARTICLE_SYSTEM_HASH_KEY_LIMIT + 1)) must be addressable +// through PxU16 Particle::hashKey, see and PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY +// Dynamics::updatePacket() +#define PT_PARTICLE_SYSTEM_HASH_KEY_LIMIT 0x7ffe + +// Size of particle packet hash table. +// - Must be a power of 2 +// - Must be at least as large as PT_PARTICLE_SYSTEM_PACKET_LIMIT (see further below), but should be larger for the hash +// to be efficient. +// - Must to be addressable through PxU16 Pt::Particle::hashKey. +#define PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE 1024 + +// One larger than PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE to fit a special cell for overflow particles. +#define PT_PARTICLE_SYSTEM_PACKET_HASH_BUFFER_SIZE 1025 + +// Index of special overflow packet +#define PT_PARTICLE_SYSTEM_OVERFLOW_INDEX 1024 + +// Maximum number of particle packets (should be smaller than hash size since a full hash table is not efficient) +#define PT_PARTICLE_SYSTEM_PACKET_LIMIT 924 + +// Slack for building the triangle packet hash. Has to be bigger than any epsilons used in collision detection. +#define PT_PARTICLE_SYSTEM_COLLISION_SLACK 1.0e-3f + +// Maximum number of fluid particles in a packet that can be handled at a time +#define PT_SUBPACKET_PARTICLE_LIMIT 512 +// If the number of particles in a packet and the number of particles for each neighboring halo region +// are below this threshold, then no local hash will be constructed and each particle of one packet will be +// tested against each particle of the other packet (for particle-particle interaction only). +// +// Note: Has to be smaller or equal to PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY +#define PT_BRUTE_FORCE_PARTICLE_THRESHOLD 100 +// If the number of particles in a packet section and the number of particles in a neighboring halo +// region are below this threshold, then no local hash will be constructed and each particle of the +// packet section will be tested against each particle of the halo region (for particle-particle interaction only). +// +// Note: Has to be smaller or equal to PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY +#define PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION 200 + +// Maximum number of fluid particles in a packet that can be handled at a time for dividing +// a packet into sections and reordering the particles accordingly +#define PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS PT_SUBPACKET_PARTICLE_LIMIT + +// Maximum number of fluid particles in a packet that can be handled at a time for SPH dynamics +// calculations, i.e., computation of density & force +// - Ps::nextPowerOf2((PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY + 1)) must be addressable +// through PxU16 Particle::hashKey, see Dynamics::updatePacket(). +#define PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY PT_SUBPACKET_PARTICLE_LIMIT + +// loacl hash bucket size, should equal nextPowerOfTwo(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY + 1) +#define PT_SUBPACKET_PARTICLE_HASH_BUCKET_SIZE 512 + +// Maximum number of parallel tasks created for sph computation +#define PT_MAX_PARALLEL_TASKS_SPH 8 + +// Maximum number of fluid particles in a packet that can be handled at a time for velocity +// integration +#define PT_SUBPACKET_PARTICLE_LIMIT_VEL_INTEGRATION PT_SUBPACKET_PARTICLE_LIMIT + +// Maximum number of fluid particles in a packet that can be handled at a time for +// detecting and resolving collisions. +// - Must be smaller than PT_LOCAL_HASH_SIZE_MESH_COLLISION. +#define PT_SUBPACKET_PARTICLE_LIMIT_COLLISION 128 + +// Hash size for the local particle cell hash. +// - Must to be larger than PT_SUBPACKET_PARTICLE_LIMIT_COLLISION +// - Must be a power of 2 +// - Must be addressable with PxU16 Particle::hashKey +#define PT_LOCAL_HASH_SIZE_MESH_COLLISION 256 + +// Number of fluid packet shapes to run in parallel during collision update. +#define PT_NUM_PACKETS_PARALLEL_COLLISION 8 + +// Initial size of triangle mesh collision buffer (for storing indices of colliding triangles) +#define PT_INITIAL_MESH_COLLISION_BUFFER_SIZE 1024 + +#define PT_USE_SIMD_CONVEX_COLLISION 1 + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_CONFIG_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtConstants.h b/PhysX_3.4/Source/LowLevelParticles/src/PtConstants.h new file mode 100644 index 00000000..74726a5b --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtConstants.h @@ -0,0 +1,45 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_CONSTANTS_H +#define PT_CONSTANTS_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +// Threshold for the angle between two contact constraint surfaces. If the two surfaces form +// a "steep valley" only one of the two constraints will be applied. +#define PT_COLL_VEL_PROJECTION_CROSS_EPSILON 1e-6f + +#define PT_COLL_VEL_PROJECTION_PROJ 1e-4f +#define PT_COLL_TRI_DISTANCE 1e-5f +#define PT_COLL_RAY_EPSILON_FACTOR 1e-4f + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_CONSTANTS_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.cpp new file mode 100644 index 00000000..e906a31b --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.cpp @@ -0,0 +1,325 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtContextCpu.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#if PX_SUPPORT_GPU_PHYSX +#include "task/PxGpuDispatcher.h" +#include "PxvGlobals.h" +#include "PxPhysXGpu.h" +#include "PxSceneGpu.h" +#include "gpu/PtRigidBodyAccessGpu.h" +#endif + +#include "foundation/PxFoundation.h" +#include "PtParticleData.h" +#include "PtParticleSystemSimCpu.h" +#include "PtParticleShapeCpu.h" +#include "PtBatcher.h" +#include "PtBodyTransformVault.h" +#include "PsFoundation.h" + +using namespace physx::shdfnd; +using namespace physx; +using namespace Pt; + +namespace +{ +ParticleSystemSim* (ContextCpu::*addParticleSystemFn)(ParticleData*, const ParticleSystemParameter&, bool); +ParticleData* (ContextCpu::*removeParticleSystemFn)(ParticleSystemSim*, bool); +Context* (*createContextFn)(physx::PxTaskManager*, Cm::FlushPool&); +void (ContextCpu::*destroyContextFn)(); + +PxBaseTask& (Batcher::*scheduleShapeGenerationFn)(ParticleSystemSim** particleSystems, ParticleShapesUpdateInput* inputs, + PxU32 batchSize, PxBaseTask& continuation) = 0; +PxBaseTask& (Batcher::*scheduleDynamicsCpuFn)(ParticleSystemSim** particleSystems, PxU32 batchSize, + PxBaseTask& continuation) = 0; +PxBaseTask& (Batcher::*scheduleCollisionPrepFn)(ParticleSystemSim** particleSystems, PxLightCpuTask** inputPrepTasks, + PxU32 batchSize, PxBaseTask& continuation) = 0; +PxBaseTask& (Batcher::*scheduleCollisionCpuFn)(ParticleSystemSim** particleSystems, PxU32 batchSize, + PxBaseTask& continuation) = 0; +PxBaseTask& (Batcher::*schedulePipelineGpuFn)(ParticleSystemSim** particleSystems, PxU32 batchSize, + PxBaseTask& continuation) = 0; +} + +namespace physx +{ +namespace Pt +{ +void registerParticles() +{ + ContextCpu::registerParticles(); +} + +Context* createParticleContext(class physx::PxTaskManager* taskManager, Cm::FlushPool& taskPool) +{ + if(::createContextFn) + { + return ::createContextFn(taskManager, taskPool); + } + return NULL; +} +} // namespace Pt +} // namespace physx + +void ContextCpu::registerParticles() +{ + ::createContextFn = &ContextCpu::createContextImpl; + ::destroyContextFn = &ContextCpu::destroyContextImpl; + ::addParticleSystemFn = &ContextCpu::addParticleSystemImpl; + ::removeParticleSystemFn = &ContextCpu::removeParticleSystemImpl; + + ::scheduleShapeGenerationFn = &Batcher::scheduleShapeGeneration; + ::scheduleDynamicsCpuFn = &Batcher::scheduleDynamicsCpu; + ::scheduleCollisionPrepFn = &Batcher::scheduleCollisionPrep; + ::scheduleCollisionCpuFn = &Batcher::scheduleCollisionCpu; + ::schedulePipelineGpuFn = &Batcher::schedulePipelineGpu; +} + +Context* ContextCpu::createContextImpl(PxTaskManager* taskManager, Cm::FlushPool& taskPool) +{ + return PX_NEW(ContextCpu)(taskManager, taskPool); +} + +void ContextCpu::destroy() +{ + (this->*destroyContextFn)(); +} + +void ContextCpu::destroyContextImpl() +{ + PX_DELETE(this); +} + +ParticleSystemSim* ContextCpu::addParticleSystem(ParticleData* particleData, const ParticleSystemParameter& parameter, + bool useGpuSupport) +{ + return (this->*addParticleSystemFn)(particleData, parameter, useGpuSupport); +} + +ParticleData* ContextCpu::removeParticleSystem(ParticleSystemSim* particleSystem, bool acquireParticleData) +{ + return (this->*removeParticleSystemFn)(particleSystem, acquireParticleData); +} + +ContextCpu::ContextCpu(PxTaskManager* taskManager, Cm::FlushPool& taskPool) +: mParticleSystemPool("mParticleSystemPool", this, 16, 1024) +, mParticleShapePool("mParticleShapePool", this, 256, 1024) +, mBatcher(NULL) +, mTaskManager(taskManager) +, mTaskPool(taskPool) +#if PX_SUPPORT_GPU_PHYSX +, mGpuRigidBodyAccess(NULL) +#endif +{ + mBatcher = PX_NEW(Batcher)(*this); + mBodyTransformVault = PX_NEW(BodyTransformVault); + mSceneGpu = NULL; +} + +ContextCpu::~ContextCpu() +{ +#if PX_SUPPORT_GPU_PHYSX + if(mSceneGpu) + { + mSceneGpu->release(); + } + + if(mGpuRigidBodyAccess) + { + PX_DELETE(mGpuRigidBodyAccess); + } +#endif + + PX_DELETE(mBatcher); + PX_DELETE(mBodyTransformVault); +} + +ParticleSystemSim* ContextCpu::addParticleSystemImpl(ParticleData* particleData, + const ParticleSystemParameter& parameter, bool useGpuSupport) +{ + PX_ASSERT(particleData); + +#if PX_SUPPORT_GPU_PHYSX + if(useGpuSupport) + { + PxSceneGpu* sceneGPU = createOrGetSceneGpu(); + if(sceneGPU) + { + ParticleSystemStateDataDesc particles; + particleData->getParticlesV(particles, true, false); + ParticleSystemSim* sim = sceneGPU->addParticleSystem(particles, parameter); + + if(sim) + { + particleData->release(); + return sim; + } + } + return NULL; + } + else + { + ParticleSystemSimCpu* sim = mParticleSystemPool.get(); + sim->init(*particleData, parameter); + return sim; + } +#else + PX_UNUSED(useGpuSupport); + ParticleSystemSimCpu* sim = mParticleSystemPool.get(); + sim->init(*particleData, parameter); + return sim; +#endif +} + +ParticleData* ContextCpu::removeParticleSystemImpl(ParticleSystemSim* particleSystem, bool acquireParticleData) +{ + ParticleData* particleData = NULL; + +#if PX_SUPPORT_GPU_PHYSX + if(particleSystem->isGpuV()) + { + PX_ASSERT(getSceneGpuFast()); + if(acquireParticleData) + { + ParticleSystemStateDataDesc particles; + particleSystem->getParticleStateV().getParticlesV(particles, true, false); + particleData = ParticleData::create(particles, particleSystem->getParticleStateV().getWorldBoundsV()); + } + getSceneGpuFast()->removeParticleSystem(particleSystem); + return particleData; + } +#endif + + ParticleSystemSimCpu& sim = *static_cast<ParticleSystemSimCpu*>(particleSystem); + + if(acquireParticleData) + particleData = sim.obtainParticleState(); + + sim.clear(); + mParticleSystemPool.put(&sim); + return particleData; +} + +ParticleShapeCpu* ContextCpu::createParticleShape(ParticleSystemSimCpu* particleSystem, const ParticleCell* packet) +{ + // for now just lock the mParticleShapePool for concurrent access from different tasks + Ps::Mutex::ScopedLock lock(mParticleShapePoolMutex); + ParticleShapeCpu* shape = mParticleShapePool.get(); + + if(shape) + shape->init(particleSystem, packet); + + return shape; +} + +void ContextCpu::releaseParticleShape(ParticleShapeCpu* shape) +{ + // for now just lock the mParticleShapePool for concurrent access from different tasks + Ps::Mutex::ScopedLock lock(mParticleShapePoolMutex); + mParticleShapePool.put(shape); +} + +#if PX_SUPPORT_GPU_PHYSX + +PxSceneGpu* ContextCpu::createOrGetSceneGpu() +{ + if(mSceneGpu) + return mSceneGpu; + + // get PxCudaContextManager + + if(!mTaskManager || !mTaskManager->getGpuDispatcher() || !mTaskManager->getGpuDispatcher()->getCudaContextManager()) + { + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, + "GPU operation failed. No PxCudaContextManager available."); + return NULL; + } + physx::PxCudaContextManager& contextManager = *mTaskManager->getGpuDispatcher()->getCudaContextManager(); + + // load PhysXGpu dll interface + + PxPhysXGpu* physXGpu = PxvGetPhysXGpu(true); + if(!physXGpu) + { + getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, + "GPU operation failed. PhysXGpu dll unavailable."); + return NULL; + } + + // create PxsGpuRigidBodyAccess + + PX_ASSERT(!mGpuRigidBodyAccess); + mGpuRigidBodyAccess = PX_NEW(RigidBodyAccessGpu)(*mBodyTransformVault); + + // finally create PxSceneGpu + mSceneGpu = physXGpu->createScene(contextManager, *mGpuRigidBodyAccess); + if(!mSceneGpu) + { + PX_DELETE_AND_RESET(mGpuRigidBodyAccess); + Ps::getFoundation().error(PxErrorCode::eDEBUG_WARNING, __FILE__, __LINE__, + "GPU operation failed. PxSceneGpu creation unsuccessful."); + } + + return mSceneGpu; +} +#endif // PX_SUPPORT_GPU_PHYSX + +PxBaseTask& ContextCpu::scheduleShapeGeneration(class ParticleSystemSim** particleSystems, + struct ParticleShapesUpdateInput* inputs, PxU32 batchSize, + PxBaseTask& continuation) +{ + return (mBatcher->*::scheduleShapeGenerationFn)(particleSystems, inputs, batchSize, continuation); +} + +PxBaseTask& ContextCpu::scheduleDynamicsCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize, + PxBaseTask& continuation) +{ + return (mBatcher->*::scheduleDynamicsCpuFn)(particleSystems, batchSize, continuation); +} + +PxBaseTask& ContextCpu::scheduleCollisionPrep(class ParticleSystemSim** particleSystems, + PxLightCpuTask** inputPrepTasks, PxU32 batchSize, PxBaseTask& continuation) +{ + return (mBatcher->*::scheduleCollisionPrepFn)(particleSystems, inputPrepTasks, batchSize, continuation); +} + +PxBaseTask& ContextCpu::scheduleCollisionCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize, + PxBaseTask& continuation) +{ + return (mBatcher->*::scheduleCollisionCpuFn)(particleSystems, batchSize, continuation); +} + +PxBaseTask& ContextCpu::schedulePipelineGpu(ParticleSystemSim** particleSystems, PxU32 batchSize, PxBaseTask& continuation) +{ + return (mBatcher->*::schedulePipelineGpuFn)(particleSystems, batchSize, continuation); +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.h b/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.h new file mode 100644 index 00000000..e96e5a9b --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtContextCpu.h @@ -0,0 +1,127 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_CONTEXT_CPU_H +#define PT_CONTEXT_CPU_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "CmPool.h" +#include "PtContext.h" + +namespace physx +{ + +class PxBaseTask; +class PxLightCpuTask; +class PxTaskManager; + +namespace Pt +{ + +class Batcher; +class BodyTransformVault; +class ParticleShapeCpu; +class ParticleSystemSimCpu; +struct ParticleCell; + +/** +Per scene manager class for particle systems. +*/ +class ContextCpu : public Context, Ps::UserAllocated +{ + PX_NOCOPY(ContextCpu) + public: + /** + Register particle functionality. + Not calling this should allow the code to be stripped at link time. + */ + static void registerParticles(); + + // Pt::Context implementation + virtual void destroy(); + virtual ParticleSystemSim* addParticleSystem(class ParticleData* particleData, + const ParticleSystemParameter& parameter, bool useGpuSupport); + virtual ParticleData* removeParticleSystem(ParticleSystemSim* system, bool acquireParticleData); + virtual PxBaseTask& scheduleShapeGeneration(class ParticleSystemSim** particleSystems, + struct ParticleShapesUpdateInput* inputs, PxU32 batchSize, + PxBaseTask& continuation); + virtual PxBaseTask& scheduleDynamicsCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize, + PxBaseTask& continuation); + virtual PxBaseTask& scheduleCollisionPrep(class ParticleSystemSim** particleSystems, PxLightCpuTask** inputPrepTasks, + PxU32 batchSize, PxBaseTask& continuation); + virtual PxBaseTask& scheduleCollisionCpu(class ParticleSystemSim** particleSystems, PxU32 batchSize, + PxBaseTask& continuation); + virtual PxBaseTask& schedulePipelineGpu(ParticleSystemSim** particleSystems, PxU32 batchSize, + PxBaseTask& continuation); +#if PX_SUPPORT_GPU_PHYSX + virtual class PxSceneGpu* createOrGetSceneGpu(); +#endif + //~Pt::Context implementation + + ParticleShapeCpu* createParticleShape(ParticleSystemSimCpu* particleSystem, const ParticleCell* packet); + void releaseParticleShape(ParticleShapeCpu* shape); + + Cm::FlushPool& getTaskPool() + { + return mTaskPool; + } + + private: + ContextCpu(physx::PxTaskManager* taskManager, Cm::FlushPool& taskPool); + + virtual ~ContextCpu(); + + ParticleSystemSim* addParticleSystemImpl(ParticleData* particleData, const ParticleSystemParameter& parameter, + bool useGpuSupport); + ParticleData* removeParticleSystemImpl(ParticleSystemSim* system, bool acquireParticleData); + + static Context* createContextImpl(physx::PxTaskManager* taskManager, Cm::FlushPool& taskPool); + + void destroyContextImpl(); + + Cm::PoolList<ParticleSystemSimCpu, ContextCpu> mParticleSystemPool; + Cm::PoolList<ParticleShapeCpu, ContextCpu> mParticleShapePool; + Ps::Mutex mParticleShapePoolMutex; + Batcher* mBatcher; + + physx::PxTaskManager* mTaskManager; + Cm::FlushPool& mTaskPool; + +#if PX_SUPPORT_GPU_PHYSX + class RigidBodyAccessGpu* mGpuRigidBodyAccess; +#endif +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_CONTEXT_CPU_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicHelper.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicHelper.h new file mode 100644 index 00000000..5578a6c6 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicHelper.h @@ -0,0 +1,320 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#ifndef PT_DYNAMIC_HELPER_H +#define PT_DYNAMIC_HELPER_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtDynamicsKernels.h" +#include "PtSpatialHash.h" +#include "PtDynamicsTempBuffers.h" + +namespace physx +{ + +namespace Pt +{ + +//-------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void updateParticlesPrePass(const SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles, + PxU32 numParticles, const DynamicsParameters& params) +{ + if(updateType == SphUpdateType::DENSITY) + { + for(PxU32 i = 0; i < numParticles; ++i) + { + Pt::Particle& particle = particles[i]; + + // Initialize particle densities with self density value + particle.density = params.selfDensity; + forceBuf[i] = PxVec3(0); + } + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void updateParticlesPostPass(const SphUpdateType::Enum updateType, PxVec3* forceBuf, + Particle* particles, PxU32 numParticles, const DynamicsParameters& params) +{ + if(updateType == SphUpdateType::FORCE) + { + for(PxU32 i = 0; i < numParticles; ++i) + { + Particle& particle = particles[i]; + + forceBuf[i] *= params.scaleToWorld * (1.0f / particle.density); + } + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +/*! +Given a cell hash table, find neighboring cells and compute particle interactions. +*/ +void updateCellsSubpacket(SphUpdateType::Enum updateType, PxVec3* __restrict forceBuf, Particle* __restrict particles, + const ParticleCell* __restrict cells, const PxU32* __restrict particleIndices, + const PxU32 numCellHashBuckets, const DynamicsParameters& params, + DynamicsTempBuffers& tempBuffers) +{ + PX_ASSERT(particles); + PX_ASSERT(cells); + PX_ASSERT(particleIndices); + + const ParticleCell* neighborCells[13]; + + for(PxU32 c = 0; c < numCellHashBuckets; c++) + { + const ParticleCell& cell = cells[c]; + + if(cell.numParticles == PX_INVALID_U32) + continue; + + GridCellVector coords(cell.coords); + + // + // To process each pair of neighboring cells only once, a special neighborhood layout can be + // used. Thus, we do not need to consider all 26 neighbors of a cell but only half of them. + // Going through the list of cells, a cell X might not be aware of a neighboring cell Y with + // this layout, however, since cell Y in turn is aware of cell X the pair will still be processed + // at the end. + // + + // Complete back plane + PxU32 cellIdx; + + PxI16 neighbor[13][3] = { { -1, -1, -1 }, + { 0, -1, -1 }, + { 1, -1, -1 }, + { -1, 0, -1 }, + { 0, 0, -1 }, + { 1, 0, -1 }, + { -1, 1, -1 }, + { 0, 1, -1 }, + { 1, 1, -1 }, + { 1, 0, 0 }, + { -1, 1, 0 }, + { 0, 1, 0 }, + { 1, 1, 0 } }; + + for(PxU32 n = 0; n < 13; n++) + { + neighborCells[n] = SpatialHash::findConstCell( + cellIdx, GridCellVector(coords.x + neighbor[n][0], coords.y + neighbor[n][1], coords.z + neighbor[n][2]), + cells, numCellHashBuckets); + } + + // Compute interaction between particles inside the current cell + // These calls still produce a lot of LHS. Going from two way to one way updates didn't help. TODO, more + // investigation. + for(PxU32 p = 1; p < cell.numParticles; p++) + { + updateParticleGroupPair(forceBuf, forceBuf, particles, particles, + particleIndices + cell.firstParticle + p - 1, 1, + particleIndices + cell.firstParticle + p, cell.numParticles - p, true, + updateType == SphUpdateType::DENSITY, params, tempBuffers.simdPositionsSubpacket, + tempBuffers.indexStream); + } + + // Compute interaction between particles of current cell and neighboring cells + PxU32 srcIndexCount = 0; + + for(PxU32 n = 0; n < 13; n++) + { + if(!neighborCells[n]) + continue; + + const ParticleCell* nCell = neighborCells[n]; + + for(PxU32 i = nCell->firstParticle, end = nCell->firstParticle + nCell->numParticles; i < end; i++) + tempBuffers.mergedIndices[srcIndexCount++] = particleIndices[i]; + } + + if(srcIndexCount > 0) + { + updateParticleGroupPair(forceBuf, forceBuf, particles, particles, particleIndices + cell.firstParticle, + cell.numParticles, tempBuffers.mergedIndices, srcIndexCount, true, + updateType == SphUpdateType::DENSITY, params, tempBuffers.simdPositionsSubpacket, + tempBuffers.indexStream); + } + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +/*! +Given two subpackets, i.e., their cell hash tables and particle arrays, find for each cell of the first subpacket +the neighboring cells within the second subpacket and compute particle interactions for these neighboring cells. +*/ +void updateCellsSubpacketPair(SphUpdateType::Enum updateType, PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB, + Particle* __restrict particlesSpA, Particle* __restrict particlesSpB, + const ParticleCell* __restrict cellsSpA, const ParticleCell* __restrict cellsSpB, + const PxU32* __restrict particleIndicesSpA, const PxU32* __restrict particleIndicesSpB, + const PxU32 numCellHashBucketsA, const PxU32 numCellHashBucketsB, bool twoWayUpdate, + const DynamicsParameters& params, DynamicsTempBuffers& tempBuffers, bool swapAB) +{ + PX_ASSERT(particlesSpA); + PX_ASSERT(particlesSpB); + PX_ASSERT(cellsSpA); + PX_ASSERT(cellsSpB); + PX_ASSERT(particleIndicesSpA); + PX_ASSERT(particleIndicesSpB); + + const ParticleCell* __restrict srcCell; + const ParticleCell* __restrict dstCell; + const PxU32* __restrict dstIndices; + PxU32 srcBuckets, dstBuckets; + + if(swapAB) + { + srcCell = cellsSpB; + srcBuckets = numCellHashBucketsB; + + dstCell = cellsSpA; + dstIndices = particleIndicesSpA; + dstBuckets = numCellHashBucketsA; + } + else + { + srcCell = cellsSpA; + srcBuckets = numCellHashBucketsA; + + dstCell = cellsSpB; + dstIndices = particleIndicesSpB; + dstBuckets = numCellHashBucketsB; + } + + const ParticleCell* neighborCells[27]; + + // For the cells of the subpacket A find neighboring cells in the subpacket B. + const ParticleCell* pcell_end = srcCell + srcBuckets; + for(const ParticleCell* pcell = srcCell; pcell < pcell_end; pcell++) + { + if(pcell->numParticles != PX_INVALID_U32) + { + GridCellVector coords(pcell->coords); + + // + // Check the 26 neighboring cells plus the cell with the same coordinates but inside the other subpacket + // + + // Back plane + PxU32 cellIdx; + PxI16 neighbor[27][3] = { { -1, -1, -1 }, + { 0, -1, -1 }, + { 1, -1, -1 }, + { -1, 0, -1 }, + { 0, 0, -1 }, + { 1, 0, -1 }, + { -1, 1, -1 }, + { 0, 1, -1 }, + { 1, 1, -1 }, + { -1, -1, 0 }, + { 0, -1, 0 }, + { 1, -1, 0 }, + { -1, 0, 0 }, + { 0, 0, 0 }, + { 1, 0, 0 }, + { -1, 1, 0 }, + { 0, 1, 0 }, + { 1, 1, 0 }, + { -1, -1, 1 }, + { 0, -1, 1 }, + { 1, -1, 1 }, + { -1, 0, 1 }, + { 0, 0, 1 }, + { 1, 0, 1 }, + { -1, 1, 1 }, + { 0, 1, 1 }, + { 1, 1, 1 } }; + + for(PxU32 n = 0; n < 27; n++) + { + neighborCells[n] = SpatialHash::findConstCell( + cellIdx, + GridCellVector(coords.x + neighbor[n][0], coords.y + neighbor[n][1], coords.z + neighbor[n][2]), + dstCell, dstBuckets); + } + + // Compute interaction between particles of current cell and neighboring cells + PxU32 indexCount = 0; + + for(PxU32 n = 0; n < 27; n++) + { + if(!neighborCells[n]) + continue; + + const ParticleCell* nCell = neighborCells[n]; + + for(PxU32 i = nCell->firstParticle, end = nCell->firstParticle + nCell->numParticles; i < end; i++) + tempBuffers.mergedIndices[indexCount++] = dstIndices[i]; + } + + if(indexCount > 0) + { + + if(swapAB) + { + updateParticleGroupPair(forceBufA, forceBufB, particlesSpA, particlesSpB, tempBuffers.mergedIndices, + indexCount, particleIndicesSpB + pcell->firstParticle, pcell->numParticles, + twoWayUpdate, updateType == SphUpdateType::DENSITY, params, + tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream); + } + else + { + updateParticleGroupPair(forceBufA, forceBufB, particlesSpA, particlesSpB, + particleIndicesSpA + pcell->firstParticle, pcell->numParticles, + tempBuffers.mergedIndices, indexCount, twoWayUpdate, + updateType == SphUpdateType::DENSITY, params, + tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream); + } + } + } + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +PX_FORCE_INLINE void normalizeParticleDensity(Particle& particle, const PxF32 selfDensity, + const PxF32 densityNormalizationFactor) +{ + // normalize density + particle.density = (particle.density - selfDensity) * densityNormalizationFactor; +} + +//-------------------------------------------------------------------------------------------------------------------// + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_DYNAMIC_HELPER_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.cpp new file mode 100644 index 00000000..2d1fd82b --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.cpp @@ -0,0 +1,828 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtDynamics.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PsBitUtils.h" +#include "PsIntrinsics.h" +#include "PsAllocator.h" +#include "CmFlushPool.h" + +#include "PtDynamicHelper.h" +#include "PtParticleSystemSimCpu.h" +#include "PtContext.h" + +#define MERGE_HALO_REGIONS 0 + +using namespace physx; +using namespace Pt; + +PX_FORCE_INLINE void Dynamics::updateParticlesBruteForceHalo(SphUpdateType::Enum updateType, PxVec3* forceBuf, + Particle* particles, const PacketSections& packetSections, + const PacketHaloRegions& haloRegions, + DynamicsTempBuffers& tempBuffers) +{ + for(PxU32 i = 0; i < 26; i++) + { + if(packetSections.numParticles[i] == 0) + continue; + + Particle* particlesA = &particles[packetSections.firstParticle[i]]; + PxVec3* forceBufA = &forceBuf[packetSections.firstParticle[i]]; + + // + // Get neighboring halo regions for the packet section + // + PxU32 numHaloRegions = sSectionToHaloTable[i].numHaloRegions; + PxU32* haloRegionIndices = sSectionToHaloTable[i].haloRegionIndices; + PxU32 mergedIndexCount = 0; + // + // Iterate over neighboring halo regions and update particles + // + for(PxU32 j = 0; j < numHaloRegions; j++) + { + PxU32 idx = haloRegionIndices[j]; + + if(haloRegions.numParticles[idx] == 0) + continue; + + if(mergedIndexCount + haloRegions.numParticles[idx] > PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY) + { + updateParticleGroupPair(forceBufA, forceBuf, particlesA, particles, tempBuffers.orderedIndicesSubpacket, + packetSections.numParticles[i], tempBuffers.mergedIndices, mergedIndexCount, + false, updateType == SphUpdateType::DENSITY, mParams, + tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream); + mergedIndexCount = 0; + } + PxU32 hpIndex = haloRegions.firstParticle[idx]; + for(PxU32 k = 0; k < haloRegions.numParticles[idx]; k++) + tempBuffers.mergedIndices[mergedIndexCount++] = hpIndex++; + } + + if(mergedIndexCount > 0) + { + updateParticleGroupPair(forceBufA, forceBuf, particlesA, particles, tempBuffers.orderedIndicesSubpacket, + packetSections.numParticles[i], tempBuffers.mergedIndices, mergedIndexCount, false, + updateType == SphUpdateType::DENSITY, mParams, tempBuffers.simdPositionsSubpacket, + tempBuffers.indexStream); + } + } +} + +// The following table defines for each packet section (except the one in the centre) the number +// of neighboring halo region as well as the indices of these neighboring halo region +Dynamics::SectionToHaloTable Dynamics::sSectionToHaloTable[26] = { + { 19, { 0, 2, 6, 8, 18, 20, 24, 26, 36, 38, 42, 44, 54, 56, 66, 68, 78, 80, 90 } }, // 0 + { 19, { 1, 2, 7, 8, 19, 20, 25, 26, 45, 47, 51, 53, 55, 56, 72, 74, 84, 86, 91 } }, // 1 + { 15, { 0, 1, 2, 6, 7, 8, 18, 19, 20, 24, 25, 26, 54, 55, 56 } }, // 2 + { 19, { 3, 5, 6, 8, 27, 29, 33, 35, 37, 38, 43, 44, 60, 62, 67, 68, 81, 83, 92 } }, // 3 + { 19, { 4, 5, 7, 8, 28, 29, 34, 35, 46, 47, 52, 53, 61, 62, 73, 74, 87, 89, 93 } }, // 4 + { 15, { 3, 4, 5, 6, 7, 8, 27, 28, 29, 33, 34, 35, 60, 61, 62 } }, // 5 + { 15, { 0, 2, 3, 5, 6, 8, 36, 37, 38, 42, 43, 44, 66, 67, 68 } }, // 6 + { 15, { 1, 2, 4, 5, 7, 8, 45, 46, 47, 51, 52, 53, 72, 73, 74 } }, // 7 + { 9, { 0, 1, 2, 3, 4, 5, 6, 7, 8 } }, // 8 + { 19, { 9, 11, 15, 17, 21, 23, 24, 26, 39, 41, 42, 44, 57, 59, 69, 71, 79, 80, 94 } }, // 9 + { 19, { 10, 11, 16, 17, 22, 23, 25, 26, 48, 50, 51, 53, 58, 59, 75, 77, 85, 86, 95 } }, // 10 + { 15, { 9, 10, 11, 15, 16, 17, 21, 22, 23, 24, 25, 26, 57, 58, 59 } }, // 11 + { 19, { 12, 14, 15, 17, 30, 32, 33, 35, 40, 41, 43, 44, 63, 65, 70, 71, 82, 83, 96 } }, // 12 + { 19, { 13, 14, 16, 17, 31, 32, 34, 35, 49, 50, 52, 53, 64, 65, 76, 77, 88, 89, 97 } }, // 13 + { 15, { 12, 13, 14, 15, 16, 17, 30, 31, 32, 33, 34, 35, 63, 64, 65 } }, // 14 + { 15, { 9, 11, 12, 14, 15, 17, 39, 40, 41, 42, 43, 44, 69, 70, 71 } }, // 15 + { 15, { 10, 11, 13, 14, 16, 17, 48, 49, 50, 51, 52, 53, 75, 76, 77 } }, // 16 + { 9, { 9, 10, 11, 12, 13, 14, 15, 16, 17 } }, // 17 + { 15, { 18, 20, 21, 23, 24, 26, 36, 38, 39, 41, 42, 44, 78, 79, 80 } }, // 18 + { 15, { 19, 20, 22, 23, 25, 26, 45, 47, 48, 50, 51, 53, 84, 85, 86 } }, // 19 + { 9, { 18, 19, 20, 21, 22, 23, 24, 25, 26 } }, // 20 + { 15, { 27, 29, 30, 32, 33, 35, 37, 38, 40, 41, 43, 44, 81, 82, 83 } }, // 21 + { 15, { 28, 29, 31, 32, 34, 35, 46, 47, 49, 50, 52, 53, 87, 88, 89 } }, // 22 + { 9, { 27, 28, 29, 30, 31, 32, 33, 34, 35 } }, // 23 + { 9, { 36, 37, 38, 39, 40, 41, 42, 43, 44 } }, // 24 + { 9, { 45, 46, 47, 48, 49, 50, 51, 52, 53 } }, // 25 +}; + +Dynamics::OrderedIndexTable Dynamics::sOrderedIndexTable; + +Dynamics::OrderedIndexTable::OrderedIndexTable() +{ + for(PxU32 i = 0; i < PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY; ++i) + indices[i] = i; +} + +namespace physx +{ + +namespace Pt +{ + +class DynamicsSphTask : public Cm::Task +{ + public: + DynamicsSphTask(Dynamics& context, PxU32 taskDataIndex) : mDynamicsContext(context), mTaskDataIndex(taskDataIndex) + { + } + + virtual void runInternal() + { + mDynamicsContext.processPacketRange(mTaskDataIndex); + } + + virtual const char* getName() const + { + return "Pt::Dynamics.sph"; + } + + private: + DynamicsSphTask& operator=(const DynamicsSphTask&); + Dynamics& mDynamicsContext; + PxU32 mTaskDataIndex; +}; + +} // namespace Pt +} // namespace physx + +Dynamics::Dynamics(ParticleSystemSimCpu& particleSystem) +: mParticleSystem(particleSystem) +, mTempReorderedParticles(NULL) +, mTempParticleForceBuf(NULL) +, mMergeDensityTask(this, "Pt::Dynamics.mergeDensity") +, mMergeForceTask(this, "Pt::Dynamics.mergeForce") +, mNumTempBuffers(0) +{ +} + +Dynamics::~Dynamics() +{ +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::clear() +{ + if(mTempReorderedParticles) + { + mParticleSystem.mAlign16.deallocate(mTempReorderedParticles); + mTempReorderedParticles = NULL; + } + + adjustTempBuffers(0); +} + +void Dynamics::adjustTempBuffers(PxU32 count) +{ + PX_ASSERT(count <= PT_MAX_PARALLEL_TASKS_SPH); + PX_ASSERT(mNumTempBuffers <= PT_MAX_PARALLEL_TASKS_SPH); + Ps::AlignedAllocator<16, Ps::ReflectionAllocator<char> > align16; + + // shrink + for(PxU32 i = count; i < mNumTempBuffers; ++i) + { + DynamicsTempBuffers& tempBuffers = mTempBuffers[i]; + + if(tempBuffers.indexStream) + PX_FREE_AND_RESET(tempBuffers.indexStream); + + if(tempBuffers.hashKeys) + PX_FREE_AND_RESET(tempBuffers.hashKeys); + + if(tempBuffers.mergedIndices) + PX_FREE_AND_RESET(tempBuffers.mergedIndices); + + if(tempBuffers.indicesSubpacketA) + PX_FREE_AND_RESET(tempBuffers.indicesSubpacketA); + + if(tempBuffers.indicesSubpacketB) + PX_FREE_AND_RESET(tempBuffers.indicesSubpacketB); + + if(tempBuffers.cellHashTableSubpacketB) + PX_FREE_AND_RESET(tempBuffers.cellHashTableSubpacketB); + + if(tempBuffers.cellHashTableSubpacketA) + PX_FREE_AND_RESET(tempBuffers.cellHashTableSubpacketA); + + if(tempBuffers.simdPositionsSubpacket) + { + align16.deallocate(tempBuffers.simdPositionsSubpacket); + tempBuffers.simdPositionsSubpacket = NULL; + } + + if(tempBuffers.mergedHaloRegions) + { + align16.deallocate(tempBuffers.mergedHaloRegions); + tempBuffers.mergedHaloRegions = NULL; + } + } + + // growing + for(PxU32 i = mNumTempBuffers; i < count; ++i) + { + DynamicsTempBuffers& tempBuffers = mTempBuffers[i]; + + // Make sure the number of hash buckets is a power of 2 (requirement for the used hash function) + tempBuffers.cellHashMaxSize = Ps::nextPowerOfTwo((PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY + 1)); + + // Local hash tables for particle cells (for two subpackets A and B). + tempBuffers.cellHashTableSubpacketA = reinterpret_cast<ParticleCell*>( + PX_ALLOC(tempBuffers.cellHashMaxSize * sizeof(ParticleCell), "ParticleCell")); + tempBuffers.cellHashTableSubpacketB = reinterpret_cast<ParticleCell*>( + PX_ALLOC(tempBuffers.cellHashMaxSize * sizeof(ParticleCell), "ParticleCell")); + + // Particle index lists for local hash of particle cells (for two subpackets A and B). + tempBuffers.indicesSubpacketA = reinterpret_cast<PxU32*>( + PX_ALLOC(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(PxU32), "Subpacket indices")); + tempBuffers.indicesSubpacketB = reinterpret_cast<PxU32*>( + PX_ALLOC(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(PxU32), "Subpacket indices")); + tempBuffers.mergedIndices = reinterpret_cast<PxU32*>( + PX_ALLOC(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(PxU32), "Subpacket merged indices")); + tempBuffers.mergedHaloRegions = reinterpret_cast<Particle*>( + align16.allocate(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(Particle), __FILE__, __LINE__)); + + tempBuffers.hashKeys = reinterpret_cast<PxU16*>( + PX_ALLOC(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY * sizeof(PxU16), "Subpacket hashKeys")); + + // SIMD buffer for storing intermediate particle positions of up to a subpacket size. + // Ceil up to multiple of four + 4 for save unrolling. + // For 4 particles we need three Vec4V. + PxU32 paddedSubPacketMax = ((PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY + 3) & ~0x3) + 4; + tempBuffers.simdPositionsSubpacket = + reinterpret_cast<PxU8*>(align16.allocate(3 * (paddedSubPacketMax / 4) * sizeof(Vec4V), __FILE__, __LINE__)); + + tempBuffers.indexStream = + reinterpret_cast<PxU32*>(PX_ALLOC(MAX_INDEX_STREAM_SIZE * sizeof(PxU32), "indexStream")); + tempBuffers.orderedIndicesSubpacket = sOrderedIndexTable.indices; + } + + mNumTempBuffers = count; +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::updateSph(physx::PxBaseTask& continuation) +{ + Particle* particles = mParticleSystem.mParticleState->getParticleBuffer(); + PxU32 numParticles = mParticleSystem.mNumPacketParticlesIndices; + const PxU32* particleIndices = mParticleSystem.mPacketParticlesIndices; + const ParticleCell* packets = mParticleSystem.mSpatialHash->getPackets(); + const PacketSections* packetSections = mParticleSystem.mSpatialHash->getPacketSections(); + PX_ASSERT(packets); + PX_ASSERT(packetSections); + PX_ASSERT(numParticles > 0); + PX_UNUSED(packetSections); + + { + // sschirm: for now we reorder particles for sph exclusively, and scatter again after sph. + if(!mTempReorderedParticles) + { + PxU32 maxParticles = mParticleSystem.mParticleState->getMaxParticles(); + mTempReorderedParticles = reinterpret_cast<Particle*>( + mParticleSystem.mAlign16.allocate(maxParticles * sizeof(Particle), __FILE__, __LINE__)); + } + + if(!mTempParticleForceBuf) + { + PxU32 maxParticles = mParticleSystem.mParticleState->getMaxParticles(); + // sschirm: Add extra float, since we are accessing this buffer later with: Vec4V_From_F32Array. + // The last 4 element would contain unallocated memory otherwise. + // Also initializing buffer that may only be used partially and non-contiguously with 0 to avoid + // simd operations to use bad values. + PxU32 byteSize = maxParticles * sizeof(PxVec3) + sizeof(PxF32); + mTempParticleForceBuf = + reinterpret_cast<PxVec3*>(mParticleSystem.mAlign16.allocate(byteSize, __FILE__, __LINE__)); + memset(mTempParticleForceBuf, 0, byteSize); + } + + for(PxU32 i = 0; i < numParticles; ++i) + { + PxU32 particleIndex = particleIndices[i]; + mTempReorderedParticles[i] = particles[particleIndex]; + } + + // would be nice to get available thread count to decide on task decomposition + // mParticleSystem.getContext().getTaskManager().getCpuDispatcher(); + + // use number of particles for task decomposition + PxU32 targetParticleCountPerTask = + PxMax(PxU32(numParticles / PT_MAX_PARALLEL_TASKS_SPH), PxU32(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)); + PxU16 packetIndex = 0; + PxU16 lastPacketIndex = 0; + PxU32 numTasks = 0; + for(PxU32 i = 0; i < PT_MAX_PARALLEL_TASKS_SPH; ++i) + { + // if this is the last interation, we need to gather all remaining packets + if(i == PT_MAX_PARALLEL_TASKS_SPH - 1) + targetParticleCountPerTask = 0xffffffff; + + lastPacketIndex = packetIndex; + PxU32 currentParticleCount = 0; + while(currentParticleCount < targetParticleCountPerTask && packetIndex < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE) + { + const ParticleCell& packet = packets[packetIndex]; + currentParticleCount += (packet.numParticles != PX_INVALID_U32) ? packet.numParticles : 0; + packetIndex++; + } + + if(currentParticleCount > 0) + { + PX_ASSERT(lastPacketIndex != packetIndex); + mTaskData[i].beginPacketIndex = lastPacketIndex; + mTaskData[i].endPacketIndex = packetIndex; + numTasks++; + } + else + { + mTaskData[i].beginPacketIndex = PX_INVALID_U16; + mTaskData[i].endPacketIndex = PX_INVALID_U16; + } + } + PX_ASSERT(packetIndex == PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE); + + mNumTasks = numTasks; + adjustTempBuffers(PxMax(numTasks, mNumTempBuffers)); + + mMergeForceTask.setContinuation(&continuation); + mMergeDensityTask.setContinuation(&mMergeForceTask); + + schedulePackets(SphUpdateType::DENSITY, mMergeDensityTask); + mMergeDensityTask.removeReference(); + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::mergeDensity(physx::PxBaseTask* /*continuation*/) +{ + schedulePackets(SphUpdateType::FORCE, mMergeForceTask); + mMergeForceTask.removeReference(); +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::mergeForce(physx::PxBaseTask* /*continuation*/) +{ + PxU32 numParticles = mParticleSystem.mNumPacketParticlesIndices; + Particle* particles = mParticleSystem.mParticleState->getParticleBuffer(); + PxVec3* forces = mParticleSystem.mTransientBuffer; + const PxU32* particleIndices = mParticleSystem.mPacketParticlesIndices; + + // reorder and normalize density. + for(PxU32 i = 0; i < numParticles; ++i) + { + PxU32 particleIndex = particleIndices[i]; + Particle& particle = mTempReorderedParticles[i]; + normalizeParticleDensity(particle, mParams.selfDensity, mParams.densityNormalizationFactor); + particles[particleIndex] = particle; + forces[particleIndex] = mTempParticleForceBuf[i]; + } + + mParticleSystem.mAlign16.deallocate(mTempParticleForceBuf); + mTempParticleForceBuf = NULL; +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::schedulePackets(SphUpdateType::Enum updateType, physx::PxBaseTask& continuation) +{ + mCurrentUpdateType = updateType; + for(PxU32 i = 0; i < mNumTasks; ++i) + { + PX_ASSERT(mTaskData[i].beginPacketIndex != PX_INVALID_U16 && mTaskData[i].endPacketIndex != PX_INVALID_U16); + void* ptr = mParticleSystem.getContext().getTaskPool().allocate(sizeof(DynamicsSphTask)); + DynamicsSphTask* task = PX_PLACEMENT_NEW(ptr, DynamicsSphTask)(*this, i); + task->setContinuation(&continuation); + task->removeReference(); + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::processPacketRange(PxU32 taskDataIndex) +{ + const ParticleCell* packets = mParticleSystem.mSpatialHash->getPackets(); + const PacketSections* packetSections = mParticleSystem.mSpatialHash->getPacketSections(); + Particle* particles = mTempReorderedParticles; + PxVec3* forceBuf = mTempParticleForceBuf; + + TaskData& taskData = mTaskData[taskDataIndex]; + + for(PxU16 p = taskData.beginPacketIndex; p < taskData.endPacketIndex; ++p) + { + const ParticleCell& packet = packets[p]; + if(packet.numParticles == PX_INVALID_U32) + continue; + + // Get halo regions with neighboring particles + PacketHaloRegions haloRegions; + SpatialHash::getHaloRegions(haloRegions, packet.coords, packets, packetSections, + PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE); + + updatePacket(mCurrentUpdateType, forceBuf, particles, packet, packetSections[p], haloRegions, + mTempBuffers[taskDataIndex]); + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::updatePacket(SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles, + const ParticleCell& packet, const PacketSections& packetSections, + const PacketHaloRegions& haloRegions, DynamicsTempBuffers& tempBuffers) +{ + PX_COMPILE_TIME_ASSERT(PT_BRUTE_FORCE_PARTICLE_THRESHOLD <= PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY); + + updateParticlesPrePass(updateType, forceBuf + packet.firstParticle, particles + packet.firstParticle, + packet.numParticles, mParams); + bool bruteForceApproach = ((packet.numParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD) && + (haloRegions.maxNumParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD)); + + if(bruteForceApproach) + { + // There are not enough particles in the packet and its neighbors to make it worth building the local cell hash. + // So, we do a brute force approach testing each particle against each particle. + // sschirm: TODO check whether one way is faster (fewer function calls... more math) + Particle* packetParticles = particles + packet.firstParticle; + PxVec3* packetForceBuf = forceBuf + packet.firstParticle; + for(PxU32 p = 1; p < packet.numParticles; p++) + { + updateParticleGroupPair(packetForceBuf, packetForceBuf, packetParticles, packetParticles, + tempBuffers.orderedIndicesSubpacket + p - 1, 1, + tempBuffers.orderedIndicesSubpacket + p, packet.numParticles - p, true, + updateType == SphUpdateType::DENSITY, mParams, tempBuffers.simdPositionsSubpacket, + tempBuffers.indexStream); + } + + // Compute particle interactions between particles of the current packet and particles of neighboring packets. + updateParticlesBruteForceHalo(updateType, forceBuf, particles, packetSections, haloRegions, tempBuffers); + } + else + { + updatePacketLocalHash(updateType, forceBuf, particles, packet, packetSections, haloRegions, tempBuffers); + } + + updateParticlesPostPass(updateType, forceBuf + packet.firstParticle, particles + packet.firstParticle, + packet.numParticles, mParams); +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::updatePacketLocalHash(SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles, + const ParticleCell& packet, const PacketSections& packetSections, + const PacketHaloRegions& haloRegions, DynamicsTempBuffers& tempBuffers) +{ + // Particle index lists for local hash of particle cells (for two subpackets A and B). + PxU32* particleIndicesSpA = tempBuffers.indicesSubpacketA; + PxU32* particleIndicesSpB = tempBuffers.indicesSubpacketB; + + // Local hash tables for particle cells (for two subpackets A and B). + ParticleCell* particleCellsSpA = tempBuffers.cellHashTableSubpacketA; + ParticleCell* particleCellsSpB = tempBuffers.cellHashTableSubpacketB; + + PxVec3 packetCorner = + PxVec3(PxReal(packet.coords.x), PxReal(packet.coords.y), PxReal(packet.coords.z)) * mParams.packetSize; + + PxU32 particlesLeftA0 = packet.numParticles; + Particle* particlesSpA0 = particles + packet.firstParticle; + PxVec3* forceBufA0 = forceBuf + packet.firstParticle; + + while(particlesLeftA0) + { + PxU32 numParticlesSpA = PxMin(particlesLeftA0, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)); + + // Make sure the number of hash buckets is a power of 2 (requirement for the used hash function) + const PxU32 numCellHashBucketsSpA = Ps::nextPowerOfTwo(numParticlesSpA + 1); + PX_ASSERT(numCellHashBucketsSpA <= tempBuffers.cellHashMaxSize); + + // Get local cell hash for the current subpacket + SpatialHash::buildLocalHash(particlesSpA0, numParticlesSpA, particleCellsSpA, particleIndicesSpA, + tempBuffers.hashKeys, numCellHashBucketsSpA, mParams.cellSizeInv, packetCorner); + + //--------------------------------------------------------------------------------------------------- + + // + // Compute particle interactions between particles within the current subpacket. + // + + updateCellsSubpacket(updateType, forceBufA0, particlesSpA0, particleCellsSpA, particleIndicesSpA, + numCellHashBucketsSpA, mParams, tempBuffers); + + //--------------------------------------------------------------------------------------------------- + + // + // Compute particle interactions between particles of current subpacket and particles + // of other subpackets within the same packet (i.e., we process all subpacket pairs). + // + + PxU32 particlesLeftB = particlesLeftA0 - numParticlesSpA; + Particle* particlesSpB = particlesSpA0 + numParticlesSpA; + PxVec3* forceBufB = forceBufA0 + numParticlesSpA; + + while(particlesLeftB) + { + PxU32 numParticlesSpB = PxMin(particlesLeftB, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)); + + // Make sure the number of hash buckets is a power of 2 (requirement for the used hash function) + const PxU32 numCellHashBucketsSpB = Ps::nextPowerOfTwo(numParticlesSpB + 1); + PX_ASSERT(numCellHashBucketsSpB <= tempBuffers.cellHashMaxSize); + + // Get local cell hash for other subpacket + SpatialHash::buildLocalHash(particlesSpB, numParticlesSpB, particleCellsSpB, particleIndicesSpB, + tempBuffers.hashKeys, numCellHashBucketsSpB, mParams.cellSizeInv, packetCorner); + + // For the cells of subpacket A, find neighboring cells in the subpacket B and compute particle + // interactions. + updateCellsSubpacketPair(updateType, forceBufA0, forceBufB, particlesSpA0, particlesSpB, particleCellsSpA, + particleCellsSpB, particleIndicesSpA, particleIndicesSpB, numCellHashBucketsSpA, + numCellHashBucketsSpB, true, mParams, tempBuffers, + numParticlesSpA < numParticlesSpB); + + particlesLeftB -= numParticlesSpB; + particlesSpB += numParticlesSpB; + forceBufB += numParticlesSpB; + } + + particlesLeftA0 -= numParticlesSpA; + particlesSpA0 += numParticlesSpA; + forceBufA0 += numParticlesSpA; + } + + //--------------------------------------------------------------------------------------------------- + + // + // Compute particle interactions between particles of sections of the current packet and particles of neighboring + // halo regions + // + + PX_ASSERT(PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION <= PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY); + if(haloRegions.maxNumParticles != 0) + { + for(PxU32 s = 0; s < 26; s++) + { + PxU32 numSectionParticles = packetSections.numParticles[s]; + if(numSectionParticles == 0) + continue; + + bool sectionEnablesBruteForce = (numSectionParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION); + + SectionToHaloTable& neighborHaloRegions = sSectionToHaloTable[s]; + PxU32 numHaloNeighbors = neighborHaloRegions.numHaloRegions; + + PxU32 particlesLeftA = numSectionParticles; + Particle* particlesSpA = particles + packetSections.firstParticle[s]; + PxVec3* forceBufA = forceBuf + packetSections.firstParticle[s]; + + while(particlesLeftA) + { + PxU32 numParticlesSpA = + PxMin(particlesLeftA, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)); + + // Compute particle interactions between particles of the current subpacket (of the section) + // and particles of neighboring halo regions relevant. + + // Process halo regions which need local hash building first. + bool isLocalHashValid = false; + + // Make sure the number of hash buckets is a power of 2 (requirement for the used hash function) + const PxU32 numCellHashBucketsSpA = Ps::nextPowerOfTwo(numParticlesSpA + 1); + PX_ASSERT(numCellHashBucketsSpA <= tempBuffers.cellHashMaxSize); +#if MERGE_HALO_REGIONS + // Read halo region particles into temporary buffer + PxU32 numMergedHaloParticles = 0; + for(PxU32 h = 0; h < numHaloNeighbors; h++) + { + PxU32 haloRegionIdx = neighborHaloRegions.haloRegionIndices[h]; + PxU32 numHaloParticles = haloRegions.numParticles[haloRegionIdx]; + + // chunk regions into subpackets! + PxU32 particlesLeftB = numHaloParticles; + Particle* particlesSpB = particles + haloRegions.firstParticle[haloRegionIdx]; + PxVec3* forceBufB = forceBuf + haloRegions.firstParticle[haloRegionIdx]; + while(particlesLeftB) + { + PxU32 numParticlesSpB = + PxMin(particlesLeftB, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)); + + // if there are plenty of particles already, don't bother to do the copy for merging. + if(numParticlesSpB > PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION) + { + updateSubpacketPairHalo(forceBufA, particlesSpA, numParticlesSpA, particleCellsSpA, + particleIndicesSpA, isLocalHashValid, numCellHashBucketsSpA, + forceBufB, particlesSpB, numParticlesSpB, particleCellsSpB, + particleIndicesSpB, packetCorner, updateType, hashKeyArray, + tempBuffers); + } + else + { + if(numMergedHaloParticles + numParticlesSpB > PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY) + { + // flush + updateSubpacketPairHalo(forceBufA, particlesSpA, numParticlesSpA, particleCellsSpA, + particleIndicesSpA, isLocalHashValid, numCellHashBucketsSpA, + tempBuffers.mergedHaloRegions, numMergedHaloParticles, + particleCellsSpB, particleIndicesSpB, packetCorner, updateType, + hashKeyArray, tempBuffers); + numMergedHaloParticles = 0; + } + + for(PxU32 k = 0; k < numParticlesSpB; ++k) + tempBuffers.mergedHaloRegions[numMergedHaloParticles++] = particlesSpB[k]; + } + + particlesLeftB -= numParticlesSpB; + particlesSpB += numParticlesSpB; + } + } + + // flush + updateSubpacketPairHalo(forceBufA, particlesSpA, numParticlesSpA, particleCellsSpA, particleIndicesSpA, + isLocalHashValid, numCellHashBucketsSpA, tempBuffers.mergedHaloRegions, + numMergedHaloParticles, particleCellsSpB, particleIndicesSpB, packetCorner, + updateType, hashKeyArray, tempBuffers); +#else // MERGE_HALO_REGIONS + for(PxU32 h = 0; h < numHaloNeighbors; h++) + { + PxU32 haloRegionIdx = neighborHaloRegions.haloRegionIndices[h]; + PxU32 numHaloParticles = haloRegions.numParticles[haloRegionIdx]; + + bool haloRegionEnablesBruteForce = + (numHaloParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION); + + if(sectionEnablesBruteForce && haloRegionEnablesBruteForce) + continue; + + if(!isLocalHashValid) + { + // Get local cell hash for the current subpacket + SpatialHash::buildLocalHash(particlesSpA, numParticlesSpA, particleCellsSpA, particleIndicesSpA, + tempBuffers.hashKeys, numCellHashBucketsSpA, mParams.cellSizeInv, + packetCorner); + isLocalHashValid = true; + } + + PxU32 particlesLeftB = numHaloParticles; + Particle* particlesSpB = particles + haloRegions.firstParticle[haloRegionIdx]; + + while(particlesLeftB) + { + PxU32 numParticlesSpB = + PxMin(particlesLeftB, static_cast<PxU32>(PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY)); + + // It is important that no data is written to particles in halo regions since they belong to + // a neighboring packet. The interaction effect of the current packet on the neighboring packet + // will be + // considered when the neighboring packet is processed. + + // Make sure the number of hash buckets is a power of 2 (requirement for the used hash function) + const PxU32 numCellHashBucketsSpB = Ps::nextPowerOfTwo(numParticlesSpB + 1); + PX_ASSERT(numCellHashBucketsSpB <= tempBuffers.cellHashMaxSize); + + // Get local cell hash for other subpacket + SpatialHash::buildLocalHash(particlesSpB, numParticlesSpB, particleCellsSpB, particleIndicesSpB, + tempBuffers.hashKeys, numCellHashBucketsSpB, mParams.cellSizeInv, + packetCorner); + + // For the cells of subpacket A, find neighboring cells in the subpacket B and compute particle + // interactions. + updateCellsSubpacketPair(updateType, forceBufA, NULL, particlesSpA, particlesSpB, + particleCellsSpA, particleCellsSpB, particleIndicesSpA, + particleIndicesSpB, numCellHashBucketsSpA, numCellHashBucketsSpB, + false, mParams, tempBuffers, numParticlesSpA > numParticlesSpB); + + particlesLeftB -= numParticlesSpB; + particlesSpB += numParticlesSpB; + } + } + + // Now process halo regions which don't need local hash building. + PxU32 mergedIndexCount = 0; + for(PxU32 h = 0; h < numHaloNeighbors; h++) + { + PxU32 haloRegionIdx = neighborHaloRegions.haloRegionIndices[h]; + PxU32 numHaloParticles = haloRegions.numParticles[haloRegionIdx]; + if(numHaloParticles == 0) + continue; + + bool haloRegionEnablesBruteForce = + (numHaloParticles <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION); + + if(!sectionEnablesBruteForce || !haloRegionEnablesBruteForce) + continue; + + // The section and the halo region do not have enough particles to make it worth + // building a local cell hash --> use brute force approach + + // This is given by the brute force condition (haloRegionEnablesBruteForce). Its necessary to + // make sure a halo region alone fits into the merge buffer. + PX_ASSERT(numHaloParticles <= PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY); + + if(mergedIndexCount + numHaloParticles > PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY) + { + updateParticleGroupPair(forceBufA, NULL, particlesSpA, particles, + tempBuffers.orderedIndicesSubpacket, numSectionParticles, + tempBuffers.mergedIndices, mergedIndexCount, false, + updateType == SphUpdateType::DENSITY, mParams, + tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream); + mergedIndexCount = 0; + } + + PxU32 hpIndex = haloRegions.firstParticle[haloRegionIdx]; + for(PxU32 k = 0; k < numHaloParticles; k++) + tempBuffers.mergedIndices[mergedIndexCount++] = hpIndex++; + } + + if(mergedIndexCount > 0) + { + updateParticleGroupPair(forceBufA, NULL, particlesSpA, particles, tempBuffers.orderedIndicesSubpacket, + numSectionParticles, tempBuffers.mergedIndices, mergedIndexCount, false, + updateType == SphUpdateType::DENSITY, mParams, + tempBuffers.simdPositionsSubpacket, tempBuffers.indexStream); + } +#endif // MERGE_HALO_REGIONS + + particlesLeftA -= numParticlesSpA; + particlesSpA += numParticlesSpA; + forceBufA += numParticlesSpA; + } + } + } +} + +//-------------------------------------------------------------------------------------------------------------------// + +void Dynamics::updateSubpacketPairHalo(PxVec3* __restrict forceBufA, Particle* __restrict particlesSpA, + PxU32 numParticlesSpA, ParticleCell* __restrict particleCellsSpA, + PxU32* __restrict particleIndicesSpA, bool& isLocalHashSpAValid, + PxU32 numCellHashBucketsSpA, Particle* __restrict particlesSpB, + PxU32 numParticlesSpB, ParticleCell* __restrict particleCellsSpB, + PxU32* __restrict particleIndicesSpB, const PxVec3& packetCorner, + SphUpdateType::Enum updateType, PxU16* __restrict hashKeyArray, + DynamicsTempBuffers& tempBuffers) +{ + bool sectionEnablesBruteForce = (numParticlesSpA <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION); + bool haloRegionEnablesBruteForce = (numParticlesSpB <= PT_BRUTE_FORCE_PARTICLE_THRESHOLD_HALO_VS_SECTION); + + // It is important that no data is written to particles in halo regions since they belong to + // a neighboring packet. The interaction effect of the current packet on the neighboring packet will be + // considered when the neighboring packet is processed. + + if(sectionEnablesBruteForce && haloRegionEnablesBruteForce) + { + // Now process halo regions which don't need local hash building. + // The section and the halo region do not have enough particles to make it worth + // building a local cell hash --> use brute force approach + + updateParticleGroupPair(forceBufA, NULL, particlesSpA, particlesSpB, tempBuffers.orderedIndicesSubpacket, + numParticlesSpA, tempBuffers.orderedIndicesSubpacket, numParticlesSpB, false, + updateType == SphUpdateType::DENSITY, mParams, tempBuffers.simdPositionsSubpacket, + tempBuffers.indexStream); + } + else + { + if(!isLocalHashSpAValid) + { + // Get local cell hash for the current subpacket + SpatialHash::buildLocalHash(particlesSpA, numParticlesSpA, particleCellsSpA, particleIndicesSpA, + hashKeyArray, numCellHashBucketsSpA, mParams.cellSizeInv, packetCorner); + isLocalHashSpAValid = true; + } + + // Make sure the number of hash buckets is a power of 2 (requirement for the used hash function) + const PxU32 numCellHashBucketsSpB = Ps::nextPowerOfTwo(numParticlesSpB + 1); + PX_ASSERT(numCellHashBucketsSpB <= tempBuffers.cellHashMaxSize); + + // Get local cell hash for other subpacket + SpatialHash::buildLocalHash(particlesSpB, numParticlesSpB, particleCellsSpB, particleIndicesSpB, hashKeyArray, + numCellHashBucketsSpB, mParams.cellSizeInv, packetCorner); + + // For the cells of subpacket A, find neighboring cells in the subpacket B and compute particle interactions. + updateCellsSubpacketPair(updateType, forceBufA, NULL, particlesSpA, particlesSpB, particleCellsSpA, + particleCellsSpB, particleIndicesSpA, particleIndicesSpB, numCellHashBucketsSpA, + numCellHashBucketsSpB, false, mParams, tempBuffers, numParticlesSpA < numParticlesSpB); + } +} +//-------------------------------------------------------------------------------------------------------------------// + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.h new file mode 100644 index 00000000..0af21fa4 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamics.h @@ -0,0 +1,144 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_DYNAMICS_H +#define PT_DYNAMICS_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtConfig.h" +#include "PtParticle.h" +#include "PtDynamicsParameters.h" +#include "PtDynamicsTempBuffers.h" +#include "CmBitMap.h" +#include "CmTask.h" + +namespace physx +{ + +namespace Pt +{ + +struct ParticleCell; +struct PacketSections; +struct PacketHaloRegions; + +class Dynamics +{ + public: + Dynamics(class ParticleSystemSimCpu& particleSystem); + ~Dynamics(); + + void clear(); + + void updateSph(physx::PxBaseTask& continuation); + + PX_FORCE_INLINE DynamicsParameters& getParameter() + { + return mParams; + } + + private: + // Table to get the neighboring halo region indices for a packet section + struct SectionToHaloTable + { + PxU32 numHaloRegions; + PxU32 haloRegionIndices[19]; // No packet section has more than 19 neighboring halo regions + }; + + struct OrderedIndexTable + { + OrderedIndexTable(); + PxU32 indices[PT_SUBPACKET_PARTICLE_LIMIT_FORCE_DENSITY]; + }; + + struct TaskData + { + PxU16 beginPacketIndex; + PxU16 endPacketIndex; + }; + + void adjustTempBuffers(PxU32 count); + + void schedulePackets(SphUpdateType::Enum updateType, physx::PxBaseTask& continuation); + void processPacketRange(PxU32 taskDataIndex); + + void updatePacket(SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles, const ParticleCell& packet, + const PacketSections& packetSections, const PacketHaloRegions& haloRegions, + struct DynamicsTempBuffers& tempBuffers); + + void updatePacketLocalHash(SphUpdateType::Enum updateType, PxVec3* forceBuf, Particle* particles, + const ParticleCell& packet, const PacketSections& packetSections, + const PacketHaloRegions& haloRegions, DynamicsTempBuffers& tempBuffers); + + void updateSubpacketPairHalo(PxVec3* __restrict forceBufA, Particle* __restrict particlesSpA, PxU32 numParticlesSpA, + ParticleCell* __restrict particleCellsSpA, PxU32* __restrict particleIndicesSpA, + bool& isLocalHashSpAValid, PxU32 numCellHashBucketsSpA, + Particle* __restrict particlesSpB, PxU32 numParticlesSpB, + ParticleCell* __restrict particleCellsSpB, PxU32* __restrict particleIndicesSpB, + const PxVec3& packetCorner, SphUpdateType::Enum updateType, + PxU16* __restrict hashKeyArray, DynamicsTempBuffers& tempBuffers); + + PX_FORCE_INLINE void updateParticlesBruteForceHalo(SphUpdateType::Enum updateType, PxVec3* forceBuf, + Particle* particles, const PacketSections& packetSections, + const PacketHaloRegions& haloRegions, + DynamicsTempBuffers& tempBuffers); + + void mergeDensity(physx::PxBaseTask* continuation); + void mergeForce(physx::PxBaseTask* continuation); + + private: + Dynamics& operator=(const Dynamics&); + static SectionToHaloTable sSectionToHaloTable[26]; // Halo region table for each packet section + static OrderedIndexTable sOrderedIndexTable; + + PX_ALIGN(16, DynamicsParameters mParams); + class ParticleSystemSimCpu& mParticleSystem; + Particle* mTempReorderedParticles; + PxVec3* mTempParticleForceBuf; + + typedef Cm::DelegateTask<Dynamics, &Dynamics::mergeDensity> MergeDensityTask; + typedef Cm::DelegateTask<Dynamics, &Dynamics::mergeForce> MergeForceTask; + + MergeDensityTask mMergeDensityTask; + MergeForceTask mMergeForceTask; + PxU32 mNumTasks; + SphUpdateType::Enum mCurrentUpdateType; + PxU32 mNumTempBuffers; + DynamicsTempBuffers mTempBuffers[PT_MAX_PARALLEL_TASKS_SPH]; + TaskData mTaskData[PT_MAX_PARALLEL_TASKS_SPH]; + friend class DynamicsSphTask; +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_DYNAMICS_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsKernels.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsKernels.h new file mode 100644 index 00000000..94494072 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsKernels.h @@ -0,0 +1,1105 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodesX AG. All rights reserved. + +#ifndef PT_DYNAMICS_KERNELS_H +#define PT_DYNAMICS_KERNELS_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PsFPU.h" +#include "foundation/PxUnionCast.h" + +#include "PtDynamicsParameters.h" + +#define REFERENCE_KERNELS 0 + +#if !REFERENCE_KERNELS +#include "PsVecMath.h" +#endif + +namespace physx +{ +namespace Pt +{ + +using namespace Ps; +using namespace aos; + +#define COMPILE_IN_SIMD_DENSITY 1 +#define PX_FORCE_INLINE_KERNELS PX_FORCE_INLINE + +#define MAX_INDEX_STREAM_SIZE 128 +#define PRESSURE_ORIGIN 1 + +PX_FORCE_INLINE PxF32 calcDensity(const PxF32 distSqr, const DynamicsParameters& params) +{ + PxF32 dist2Std = distSqr * params.scaleSqToStd; + PxF32 radius2MinusDist2Std = params.radiusSqStd - dist2Std; + PxF32 densityStd = params.densityMultiplierStd * radius2MinusDist2Std * radius2MinusDist2Std * radius2MinusDist2Std; + return densityStd; +} + +PX_FORCE_INLINE void addDensity(Particle& particleDst, const PxF32 distSqr, const DynamicsParameters& params) +{ + PX_ASSERT(distSqr <= params.cellSizeSq); + PxF32 densityStd = calcDensity(distSqr, params); + particleDst.density += densityStd; +} + +PX_FORCE_INLINE void addDensity_twoWay(Particle& particleA, Particle& particleB, const PxF32 distSqr, + const DynamicsParameters& params) +{ + PX_ASSERT(distSqr <= params.cellSizeSq); + PxF32 densityStd = calcDensity(distSqr, params); + particleA.density += densityStd; + particleB.density += densityStd; +} + +PX_FORCE_INLINE PxVec3 calcForce(const Particle& particleA, const Particle& particleB, const PxF32 distSqr, + const PxVec3& distVec, const DynamicsParameters& params) +{ + PxReal dist2Std = distSqr * params.scaleSqToStd; + + PxReal recipDistStd = physx::intrinsics::recipSqrtFast(dist2Std); + PxReal distStd = dist2Std * recipDistStd; + + PxReal radiusMinusDistStd = params.radiusStd - distStd; + +// pressure force +#if PRESSURE_ORIGIN + PxF32 pressureA = PxMax(particleA.density - params.initialDensity, 0.0f); + PxF32 pressureB = PxMax(particleB.density - params.initialDensity, 0.0f); + PxF32 pressureSum = pressureA + pressureB; +#else + PxF32 pressureSum = PxMax(particleA.density + particleB.density - 2 * params.initialDensity, 0.0f); +#endif + + PxReal multiplierPressStd = (params.radiusSqStd * recipDistStd - 2 * params.radiusStd + distStd) * + params.stiffMulPressureMultiplierStd * pressureSum; + + PxVec3 force = distVec * multiplierPressStd * params.scaleToStd; + + // viscosity force + PxReal multiplierViscStd = radiusMinusDistStd * params.viscosityMultiplierStd; + + PxVec3 vDiff = (particleB.velocity - particleA.velocity) * params.scaleToStd; + force += (vDiff * multiplierViscStd); + + return force; +} + +PX_FORCE_INLINE void addForce(PxVec3& particleForceDst, const Particle& particleDst, const Particle& particleSrc, + const PxF32 distSqr, const PxVec3& distVec, const DynamicsParameters& params) +{ + PX_ASSERT(distSqr <= params.cellSizeSq); + PxVec3 force = calcForce(particleDst, particleSrc, distSqr, distVec, params); + particleForceDst += (force * physx::intrinsics::recipFast(particleSrc.density)); +} + +PX_FORCE_INLINE void addForce_twoWay(PxVec3& particleAForce, PxVec3& particleBForce, const Particle& particleA, + const Particle& particleB, const PxF32 distSqr, const PxVec3& distVec, + const DynamicsParameters& params) +{ + PX_ASSERT(distSqr <= params.cellSizeSq); + PxVec3 force = calcForce(particleA, particleB, distSqr, distVec, params); + particleAForce += (force * physx::intrinsics::recipFast(particleB.density)); + particleBForce -= (force * physx::intrinsics::recipFast(particleA.density)); +} + +#if REFERENCE_KERNELS + +PX_FORCE_INLINE void updateParticleGroupPair(PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB, + Particle* __restrict particlesSpA, Particle* __restrict particlesSpB, + const PxU32* __restrict particleIndicesSpA, const PxU32 numParticlesA, + const PxU32* __restrict particleIndicesSpB, const PxU32 numParticlesB, + const bool twoWayUpdate, const bool isDensityMode, + const DynamicsParameters& params, PxU8* tempSimdPositionBuffer, + PxU32* tempIndexStream) +{ + // Check given particle against particles of another cell. + + for(PxU32 pA = 0; pA < numParticlesA; pA++) + { + PxU32 idxA = particleIndicesSpA[pA]; + Particle& particleA = particlesSpA[idxA]; + PxVec3& forceA = forceBufA[idxA]; + + for(PxU32 pB = 0; pB < numParticlesB; pB++) + { + PxU32 idxB = particleIndicesSpB[pB]; + Particle& particleB = particlesSpB[idxB]; + PxVec3& forceB = forceBufB[idxB]; + + PxVec3 distVec = particleA.position - particleB.position; + PxReal distSqr = distVec.magnitudeSquared(); + + if(distSqr < params.cellSizeSq && distSqr > 0.0f) + { + if(isDensityMode) + { + if(!twoWayUpdate) + addDensity(particleA, distSqr, params); + else + addDensity_twoWay(particleA, particleB, distSqr, params); + } + else + { + if(!twoWayUpdate) + addForce(forceA, particleA, particleB, distSqr, distVec, params); + else + addForce_twoWay(forceA, forceB, particleA, particleB, distSqr, distVec, params); + } + } + } + } +} + +#else // REFERENCE_KERNELS + +class DensityPassType +{ +}; +class ForcePassType +{ +}; +class TwoWayUpdateType +{ +}; +class OneWayUpdateType +{ +}; + +template <typename PassType, typename UpdateType> +struct Contribution +{ +}; + +template <> +struct Contribution<DensityPassType, TwoWayUpdateType> +{ + static void add(PxVec3&, PxVec3&, PxReal distSqr, const PxVec3&, Particle& particleA, Particle& particleB, + const DynamicsParameters& params) + { + addDensity_twoWay(particleA, particleB, distSqr, params); + } +}; + +template <> +struct Contribution<ForcePassType, TwoWayUpdateType> +{ + static void add(PxVec3& forceA, PxVec3& forceB, PxReal distSqr, const PxVec3& distVec, Particle& particleA, + Particle& particleB, const DynamicsParameters& params) + { + addForce_twoWay(forceA, forceB, particleA, particleB, distSqr, distVec, params); + } +}; + +template <> +struct Contribution<DensityPassType, OneWayUpdateType> +{ + static void add(PxVec3&, PxVec3&, PxReal distSqr, const PxVec3&, Particle& particleA, Particle&, + const DynamicsParameters& params) + { + addDensity(particleA, distSqr, params); + } +}; + +template <> +struct Contribution<ForcePassType, OneWayUpdateType> +{ + static void add(PxVec3& forceA, PxVec3&, PxReal distSqr, const PxVec3& distVec, Particle& particleA, + Particle& particleB, const DynamicsParameters& params) + { + addForce(forceA, particleA, particleB, distSqr, distVec, params); + } +}; + +// Parameters for simd kernel execution +struct DynamicsParametersSIMD +{ + Ps::aos::Vec4V scaleToStd; + Ps::aos::Vec4V scaleSqToStd; + Ps::aos::Vec4V radiusStd; + Ps::aos::Vec4V radiusSqStd; + Ps::aos::Vec4V densityMultiplierStd; + Ps::aos::Vec4V stiffMulPressureMultiplierStd; + Ps::aos::Vec4V viscosityMultiplierStd; + Ps::aos::Vec4V initialDensity; + Ps::aos::Vec4V stiffnessStd; +}; + +#if COMPILE_IN_SIMD_DENSITY + +PX_FORCE_INLINE void calcDensity4_onlyPtrs(Mat44V& posDensDstT, const Particle* __restrict pSrc0, + const Particle* __restrict pSrc1, const Particle* __restrict pSrc2, + const Particle* __restrict pSrc3, const DynamicsParametersSIMD& params) +{ + Ps::aos::Mat44V posDensSrc(V4LoadA(&pSrc0->position.x), V4LoadA(&pSrc1->position.x), V4LoadA(&pSrc2->position.x), + V4LoadA(&pSrc3->position.x)); + + Mat44V posDensSrcT = M44Trnsps(posDensSrc); + + Vec4V distVec_x = V4Sub(posDensDstT.col0, posDensSrcT.col0); + Vec4V distVec_y = V4Sub(posDensDstT.col1, posDensSrcT.col1); + Vec4V distVec_z = V4Sub(posDensDstT.col2, posDensSrcT.col2); + + Vec4V distSqr_x = V4Mul(distVec_x, distVec_x); + Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x); + Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy); + + Vec4V distSqrStd = V4Mul(distSqr, params.scaleSqToStd); + + Vec4V radius2MinusDist2Std = V4Sub(params.radiusSqStd, distSqrStd); + Vec4V densityStd = V4Mul(params.densityMultiplierStd, radius2MinusDist2Std); + densityStd = V4Mul(densityStd, radius2MinusDist2Std); + densityStd = V4Mul(densityStd, radius2MinusDist2Std); + + posDensDstT.col3 = V4Add(posDensDstT.col3, densityStd); +} + +PX_FORCE_INLINE void calcDensity4_twoWay_onlyPtrs(Mat44V& posDensDstT, Particle* __restrict pSrc0, + Particle* __restrict pSrc1, Particle* __restrict pSrc2, + Particle* __restrict pSrc3, const DynamicsParametersSIMD& params) +{ + Mat44V posDensSrc(V4LoadA(&pSrc0->position.x), V4LoadA(&pSrc1->position.x), V4LoadA(&pSrc2->position.x), + V4LoadA(&pSrc3->position.x)); + + Mat44V posDensSrcT = M44Trnsps(posDensSrc); + + Vec4V distVec_x = V4Sub(posDensDstT.col0, posDensSrcT.col0); + Vec4V distVec_y = V4Sub(posDensDstT.col1, posDensSrcT.col1); + Vec4V distVec_z = V4Sub(posDensDstT.col2, posDensSrcT.col2); + + Vec4V distSqr_x = V4Mul(distVec_x, distVec_x); + Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x); + Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy); + + Vec4V distSqrStd = V4Mul(distSqr, params.scaleSqToStd); + + Vec4V radius2MinusDist2Std = V4Sub(params.radiusSqStd, distSqrStd); + Vec4V densityStd = V4Mul(params.densityMultiplierStd, radius2MinusDist2Std); + densityStd = V4Mul(densityStd, radius2MinusDist2Std); + densityStd = V4Mul(densityStd, radius2MinusDist2Std); + + // apply to srcParticles (sschirm TOTO rename) + PX_ALIGN(16, PxVec4 density); + V4StoreA(densityStd, &density[0]); + pSrc0->density += density[0]; + pSrc1->density += density[1]; + pSrc2->density += density[2]; + pSrc3->density += density[3]; + + // apply to dstParticle (sschirm TOTO rename) + posDensDstT.col3 = V4Add(posDensDstT.col3, densityStd); +} + +#endif // COMPILE_IN_SIMD_DENSITY + +PX_FORCE_INLINE void calcForce4_onlyPtrs(Mat44V& forceDstT, const Particle* __restrict pSrc0, + const Particle* __restrict pSrc1, const Particle* __restrict pSrc2, + const Particle* __restrict pSrc3, const Mat44V& posDensDstT, + const Mat44V& velPressDstT, const DynamicsParametersSIMD& params) +{ + Mat44V posDensSrc(V4LoadA(&pSrc0->position.x), V4LoadA(&pSrc1->position.x), V4LoadA(&pSrc2->position.x), + V4LoadA(&pSrc3->position.x)); + + Mat44V posDensSrcT = M44Trnsps(posDensSrc); + + Vec4V distVec_x = V4Sub(posDensDstT.col0, posDensSrcT.col0); + Vec4V distVec_y = V4Sub(posDensDstT.col1, posDensSrcT.col1); + Vec4V distVec_z = V4Sub(posDensDstT.col2, posDensSrcT.col2); + + Vec4V distSqr_x = V4Mul(distVec_x, distVec_x); + Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x); + Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy); + + Vec4V distSqrStd = V4Mul(distSqr, params.scaleSqToStd); + + Vec4V recipDistStd = V4RsqrtFast(distSqrStd); + Vec4V distStd = V4Mul(distSqrStd, recipDistStd); + Vec4V radiusMinusDistStd = V4Sub(params.radiusStd, distStd); + + // pressure force + Mat44V velPressSrc(V4LoadA(&pSrc0->velocity.x), V4LoadA(&pSrc1->velocity.x), V4LoadA(&pSrc2->velocity.x), + V4LoadA(&pSrc3->velocity.x)); + + Mat44V velPressSrcT = M44Trnsps(velPressSrc); + + Vec4V pressureDst = V4Sub(posDensDstT.col3, params.initialDensity); + Vec4V pressureSrc = V4Sub(posDensSrcT.col3, params.initialDensity); +#if PRESSURE_ORIGIN + pressureDst = V4Max(pressureDst, V4Zero()); + pressureSrc = V4Max(pressureSrc, V4Zero()); + Vec4V pressureSum = V4Add(pressureDst, pressureSrc); +#else + Vec4V pressureSum = V4Add(pressureDst, pressureSrc); + pressureSum = V4Max(pressureSum, V4Zero()); +#endif + + Vec4V radiusStd_x2 = V4Add(params.radiusStd, params.radiusStd); + Vec4V multiplierPressStd = V4MulAdd(params.radiusSqStd, recipDistStd, distStd); + multiplierPressStd = V4Sub(multiplierPressStd, radiusStd_x2); + multiplierPressStd = V4Mul(multiplierPressStd, params.stiffMulPressureMultiplierStd); + multiplierPressStd = V4Mul(multiplierPressStd, pressureSum); + + Vec4V pressureForceMult = V4Mul(multiplierPressStd, params.scaleToStd); + Vec4V force_x = V4Mul(distVec_x, pressureForceMult); + Vec4V force_y = V4Mul(distVec_y, pressureForceMult); + Vec4V force_z = V4Mul(distVec_z, pressureForceMult); + + // viscosity force + Vec4V multiplierViscStd = V4Mul(radiusMinusDistStd, params.viscosityMultiplierStd); + + Vec4V viscossityForceMult = V4Mul(params.scaleToStd, multiplierViscStd); + + Vec4V vDiff_x = V4Sub(velPressSrcT.col0, velPressDstT.col0); + Vec4V vDiff_y = V4Sub(velPressSrcT.col1, velPressDstT.col1); + Vec4V vDiff_z = V4Sub(velPressSrcT.col2, velPressDstT.col2); + + force_x = V4MulAdd(vDiff_x, viscossityForceMult, force_x); + force_y = V4MulAdd(vDiff_y, viscossityForceMult, force_y); + force_z = V4MulAdd(vDiff_z, viscossityForceMult, force_z); + + // application of force + Vec4V invDensities = V4RecipFast(posDensSrcT.col3); + force_x = V4Mul(force_x, invDensities); + force_y = V4Mul(force_y, invDensities); + force_z = V4Mul(force_z, invDensities); + + forceDstT.col0 = V4Add(forceDstT.col0, force_x); + forceDstT.col1 = V4Add(forceDstT.col1, force_y); + forceDstT.col2 = V4Add(forceDstT.col2, force_z); +} + +PX_FORCE_INLINE void calcForce4_twoWay_onlyPtrs(Mat44V& forceDstT, Mat44V& forceSrcT, Particle* __restrict pSrc0, + Particle* __restrict pSrc1, Particle* __restrict pSrc2, + Particle* __restrict pSrc3, const Mat44V& posDensDstT, + const Mat44V& velPressDstT, const Vec4V& invDensityDst, + const DynamicsParametersSIMD& params) +{ + Mat44V posDensSrc(V4LoadA(&pSrc0->position.x), V4LoadA(&pSrc1->position.x), V4LoadA(&pSrc2->position.x), + V4LoadA(&pSrc3->position.x)); + + Mat44V posDensSrcT = M44Trnsps(posDensSrc); + + Vec4V distVec_x = V4Sub(posDensDstT.col0, posDensSrcT.col0); + Vec4V distVec_y = V4Sub(posDensDstT.col1, posDensSrcT.col1); + Vec4V distVec_z = V4Sub(posDensDstT.col2, posDensSrcT.col2); + + Vec4V distSqr_x = V4Mul(distVec_x, distVec_x); + Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x); + Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy); + + Vec4V distSqrStd = V4Mul(distSqr, params.scaleSqToStd); + + Vec4V recipDistStd = V4RsqrtFast(distSqrStd); + Vec4V distStd = V4Mul(distSqrStd, recipDistStd); + Vec4V radiusMinusDistStd = V4Sub(params.radiusStd, distStd); + + // pressure force + Mat44V velPressSrc(V4LoadA(&pSrc0->velocity.x), V4LoadA(&pSrc1->velocity.x), V4LoadA(&pSrc2->velocity.x), + V4LoadA(&pSrc3->velocity.x)); + + Mat44V velPressSrcT = M44Trnsps(velPressSrc); + + Vec4V pressureDst = V4Sub(posDensDstT.col3, params.initialDensity); + Vec4V pressureSrc = V4Sub(posDensSrcT.col3, params.initialDensity); +#if PRESSURE_ORIGIN + pressureDst = V4Max(pressureDst, V4Zero()); + pressureSrc = V4Max(pressureSrc, V4Zero()); + Vec4V pressureSum = V4Add(pressureDst, pressureSrc); +#else + Vec4V pressureSum = V4Add(pressureDst, pressureSrc); + pressureSum = V4Max(pressureSum, V4Zero()); +#endif + + Vec4V radiusStd_x2 = V4Add(params.radiusStd, params.radiusStd); + Vec4V multiplierPressStd = V4MulAdd(params.radiusSqStd, recipDistStd, distStd); + multiplierPressStd = V4Sub(multiplierPressStd, radiusStd_x2); + multiplierPressStd = V4Mul(multiplierPressStd, params.stiffMulPressureMultiplierStd); + multiplierPressStd = V4Mul(multiplierPressStd, pressureSum); + + Vec4V pressureForceMult = V4Mul(multiplierPressStd, params.scaleToStd); + Vec4V force_x = V4Mul(distVec_x, pressureForceMult); + Vec4V force_y = V4Mul(distVec_y, pressureForceMult); + Vec4V force_z = V4Mul(distVec_z, pressureForceMult); + + // viscosity force + Vec4V multiplierViscStd = V4Mul(radiusMinusDistStd, params.viscosityMultiplierStd); + + Vec4V viscossityForceMult = V4Mul(params.scaleToStd, multiplierViscStd); + + Vec4V vDiff_x = V4Sub(velPressSrcT.col0, velPressDstT.col0); + Vec4V vDiff_y = V4Sub(velPressSrcT.col1, velPressDstT.col1); + Vec4V vDiff_z = V4Sub(velPressSrcT.col2, velPressDstT.col2); + + force_x = V4MulAdd(vDiff_x, viscossityForceMult, force_x); + force_y = V4MulAdd(vDiff_y, viscossityForceMult, force_y); + force_z = V4MulAdd(vDiff_z, viscossityForceMult, force_z); + + // apply to src particles (sschirm TODO:rename) + forceSrcT.col0 = V4NegMulSub(force_x, invDensityDst, forceSrcT.col0); + forceSrcT.col1 = V4NegMulSub(force_y, invDensityDst, forceSrcT.col1); + forceSrcT.col2 = V4NegMulSub(force_z, invDensityDst, forceSrcT.col2); + + // apply to dst particle (sschirm TODO:rename) + Vec4V invDensities = V4RecipFast(posDensSrcT.col3); + forceDstT.col0 = V4MulAdd(force_x, invDensities, forceDstT.col0); + forceDstT.col1 = V4MulAdd(force_y, invDensities, forceDstT.col1); + forceDstT.col2 = V4MulAdd(force_z, invDensities, forceDstT.col2); +} + +#if !PX_IOS + +static void updateStreamDensity(Particle* __restrict particlesA, const Particle* __restrict particlesB, + const PxU32* indexStream, const PxU32 indexStreamSize, const DynamicsParameters& params, + const DynamicsParametersSIMD& simdParams) +{ + PX_UNUSED(simdParams); + PxU32 s = 0; + while(s < indexStreamSize) + { + PxU32 dstIdx = indexStream[s++]; + PxU32 numInteractions = indexStream[s++]; + + // the simd density code is currently disabled, since it's not a real win + if(1) + { + for(PxU32 i = 0; i < numInteractions; ++i) + { + PxU32 srcIdx = indexStream[s++]; + PX_ALIGN(16, PxVec3 distVec) = particlesA[dstIdx].position - particlesB[srcIdx].position; + PxF32 distSqr = distVec.magnitudeSquared(); + addDensity(particlesA[dstIdx], distSqr, params); + } + } +#if COMPILE_IN_SIMD_DENSITY + else + { + Particle* __restrict dstParticle = particlesA + dstIdx; + PxU32 blockCount = numInteractions / 4; + + if(blockCount > 0) + { + Vec4V tmp = V4LoadA(&dstParticle->position.x); + Mat44V posDensDst(tmp, tmp, tmp, tmp); + Mat44V posDensDstT = M44Trnsps(posDensDst); + + // set density to zero + posDensDstT.col3 = V4Zero(); + + for(PxU32 i = 0; i < blockCount; ++i) + { + PxU32 srcIdx0 = indexStream[s++]; + PxU32 srcIdx1 = indexStream[s++]; + PxU32 srcIdx2 = indexStream[s++]; + PxU32 srcIdx3 = indexStream[s++]; + + calcDensity4_onlyPtrs(posDensDstT, particlesB + srcIdx0, particlesB + srcIdx1, particlesB + srcIdx2, + particlesB + srcIdx3, simdParams); + } + + // simd to scalar + PX_ALIGN(16, PxVec4 density); + V4StoreA(posDensDstT.col3, &density[0]); + dstParticle->density += density[0] + density[1] + density[2] + density[3]; + } + + PxU32 numLeft = numInteractions - blockCount * 4; + for(PxU32 i = 0; i < numLeft; ++i) + { + PxU32 srcIdx = indexStream[s++]; + + PX_ALIGN(16, PxVec3) distVec = particlesA[dstIdx].position - particlesB[srcIdx].position; + PxF32 distSqr = distVec.magnitudeSquared(); + addDensity(particlesA[dstIdx], distSqr, params); + } + } +#endif // COMPILE_IN_SIMD_DENSITY + } +} + +static void updateStreamDensityTwoWay(Particle* __restrict particlesA, Particle* __restrict particlesB, + const PxU32* indexStream, const PxU32 indexStreamSize, + const DynamicsParameters& params, const DynamicsParametersSIMD& simdParams) +{ + PX_UNUSED(simdParams); + PxU32 s = 0; + while(s < indexStreamSize) + { + PxU32 dstIdx = indexStream[s++]; + PxU32 numInteractions = indexStream[s++]; + + // the simd density code is currently disabled, since it's not a real win + if(1) + { + for(PxU32 i = 0; i < numInteractions; ++i) + { + PxU32 srcIdx = indexStream[s++]; + PX_ALIGN(16, PxVec3) distVec = particlesA[dstIdx].position - particlesB[srcIdx].position; + PxF32 distSqr = distVec.magnitudeSquared(); + addDensity_twoWay(particlesA[dstIdx], particlesB[srcIdx], distSqr, params); + } + } +#if COMPILE_IN_SIMD_DENSITY + else + { + Particle* __restrict dstParticle = particlesA + dstIdx; + PxU32 blockCount = numInteractions / 4; + + if(blockCount > 0) + { + Vec4V tmp = V4LoadA(&dstParticle->position.x); + Mat44V posDensDst(tmp, tmp, tmp, tmp); + Mat44V posDensDstT = M44Trnsps(posDensDst); + + // set density to zero + posDensDstT.col3 = V4Zero(); + + for(PxU32 i = 0; i < blockCount; ++i) + { + PxU32 srcIdx0 = indexStream[s++]; + PxU32 srcIdx1 = indexStream[s++]; + PxU32 srcIdx2 = indexStream[s++]; + PxU32 srcIdx3 = indexStream[s++]; + + calcDensity4_twoWay_onlyPtrs(posDensDstT, particlesB + srcIdx0, particlesB + srcIdx1, + particlesB + srcIdx2, particlesB + srcIdx3, simdParams); + } + + // simd to scalar + PX_ALIGN(16, PxVec4 density); + V4StoreA(posDensDstT.col3, &density[0]); + dstParticle->density += density[0] + density[1] + density[2] + density[3]; + } + + PxU32 numLeft = numInteractions - blockCount * 4; + for(PxU32 i = 0; i < numLeft; ++i) + { + PxU32 srcIdx = indexStream[s++]; + + PX_ALIGN(16, PxVec3 distVec) = particlesA[dstIdx].position - particlesB[srcIdx].position; + PxF32 distSqr = distVec.magnitudeSquared(); + addDensity_twoWay(particlesA[dstIdx], particlesB[srcIdx], distSqr, params); + } + } +#endif // COMPILE_IN_SIMD_DENSITY + } +} + +static void updateStreamForce(PxVec3* __restrict forceBufA, Particle* __restrict particlesA, + const Particle* __restrict particlesB, const PxU32* indexStream, + const PxU32 indexStreamSize, const DynamicsParameters& params, + const DynamicsParametersSIMD& simdParams) +{ + PxU32 s = 0; + while(s < indexStreamSize) + { + PxU32 dstIdx = indexStream[s++]; + Particle* __restrict dstParticle = particlesA + dstIdx; + + PxU32 numInteractions = indexStream[s++]; + PxU32 blockCount = numInteractions / 4; + + if(blockCount > 0) + { + Vec4V tmp = V4LoadA(&dstParticle->position.x); + Mat44V posDensDst(tmp, tmp, tmp, tmp); + Mat44V posDensDstT = M44Trnsps(posDensDst); + + Mat44V forceDstT(V4Zero(), V4Zero(), V4Zero(), V4Zero()); + + tmp = V4LoadA(&dstParticle->velocity.x); + Mat44V velPressDst(tmp, tmp, tmp, tmp); + Mat44V velPressDstT = M44Trnsps(velPressDst); + + for(PxU32 i = 0; i < blockCount; ++i) + { + PxU32 srcIdx0 = indexStream[s++]; + PxU32 srcIdx1 = indexStream[s++]; + PxU32 srcIdx2 = indexStream[s++]; + PxU32 srcIdx3 = indexStream[s++]; + + calcForce4_onlyPtrs(forceDstT, particlesB + srcIdx0, particlesB + srcIdx1, particlesB + srcIdx2, + particlesB + srcIdx3, posDensDstT, velPressDstT, simdParams); + } + + // simd to scalar + Mat44V forceDst = M44Trnsps(forceDstT); + Vec4V forceTmp1 = V4Add(forceDst.col0, forceDst.col1); + Vec4V forceTmp2 = V4Add(forceDst.col2, forceDst.col3); + forceTmp1 = V4Add(forceTmp1, forceTmp2); + forceBufA[dstIdx] += V4ReadXYZ(forceTmp1); + } + + PxU32 numLeft = numInteractions - blockCount * 4; + for(PxU32 i = 0; i < numLeft; ++i) + { + PxU32 srcIdx = indexStream[s++]; + + PX_ALIGN(16, PxVec3 distVec) = particlesA[dstIdx].position - particlesB[srcIdx].position; + PxF32 distSqr = distVec.magnitudeSquared(); + addForce(forceBufA[dstIdx], particlesA[dstIdx], particlesB[srcIdx], distSqr, distVec, params); + } + } +} + +static void updateStreamForceTwoWay(PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB, + Particle* __restrict particlesA, Particle* __restrict particlesB, + const PxU32* indexStream, const PxU32 indexStreamSize, + const DynamicsParameters& params, const DynamicsParametersSIMD& simdParams) +{ + PX_ASSERT(forceBufB); + PxU32 s = 0; + while(s < indexStreamSize) + { + PxU32 dstIdx = indexStream[s++]; + Particle* __restrict dstParticle = particlesA + dstIdx; + + PxU32 numInteractions = indexStream[s++]; + PxU32 blockCount = numInteractions / 4; + + if(blockCount > 0) + { + Vec4V tmp = V4LoadA(&dstParticle->position.x); + Mat44V posDensDst(tmp, tmp, tmp, tmp); + Mat44V posDensDstT = M44Trnsps(posDensDst); + + Mat44V forceDstT(V4Zero(), V4Zero(), V4Zero(), V4Zero()); + + tmp = V4LoadA(&dstParticle->velocity.x); + Mat44V velPressDst(tmp, tmp, tmp, tmp); + Mat44V velPressDstT = M44Trnsps(velPressDst); + + tmp = V4Load(dstParticle->density); + Vec4V invDensityA = V4RecipFast(tmp); + + for(PxU32 i = 0; i < blockCount; ++i) + { + PxU32 srcIdx0 = indexStream[s++]; + PxU32 srcIdx1 = indexStream[s++]; + PxU32 srcIdx2 = indexStream[s++]; + PxU32 srcIdx3 = indexStream[s++]; + + Vec4V tmp0 = Vec4V_From_Vec3V(V3LoadU(&forceBufB[srcIdx0].x)); + Vec4V tmp1 = Vec4V_From_Vec3V(V3LoadU(&forceBufB[srcIdx1].x)); + Vec4V tmp2 = Vec4V_From_Vec3V(V3LoadU(&forceBufB[srcIdx2].x)); + Vec4V tmp3 = Vec4V_From_Vec3V(V3LoadU(&forceBufB[srcIdx3].x)); + Mat44V forceSrc(tmp0, tmp1, tmp2, tmp3); + Mat44V forceSrcT = M44Trnsps(forceSrc); + + calcForce4_twoWay_onlyPtrs(forceDstT, forceSrcT, particlesB + srcIdx0, particlesB + srcIdx1, + particlesB + srcIdx2, particlesB + srcIdx3, posDensDstT, velPressDstT, + invDensityA, simdParams); + + forceSrc = M44Trnsps(forceSrcT); + forceBufB[srcIdx0] = V4ReadXYZ(forceSrc.col0); + forceBufB[srcIdx1] = V4ReadXYZ(forceSrc.col1); + forceBufB[srcIdx2] = V4ReadXYZ(forceSrc.col2); + forceBufB[srcIdx3] = V4ReadXYZ(forceSrc.col3); + } + + // simd to scalar + Mat44V forceDst = M44Trnsps(forceDstT); + Vec4V forceTmp1 = V4Add(forceDst.col0, forceDst.col1); + Vec4V forceTmp2 = V4Add(forceDst.col2, forceDst.col3); + forceTmp1 = V4Add(forceTmp1, forceTmp2); + forceBufA[dstIdx] += V4ReadXYZ(forceTmp1); + } + + PxU32 numLeft = numInteractions - blockCount * 4; + for(PxU32 i = 0; i < numLeft; ++i) + { + PxU32 srcIdx = indexStream[s++]; + + PX_ALIGN(16, PxVec3 distVec) = particlesA[dstIdx].position - particlesB[srcIdx].position; + PxF32 distSqr = distVec.magnitudeSquared(); + addForce_twoWay(forceBufA[dstIdx], forceBufB[srcIdx], particlesA[dstIdx], particlesB[srcIdx], distSqr, + distVec, params); + } + } +} + +#endif // !PX_IOS + +template <typename PassType, typename UpdateType> +PX_FORCE_INLINE_KERNELS static void updateParticleGroupPair_small_template( + PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB, Particle* __restrict particlesA, + Particle* __restrict particlesB, const PxU32* __restrict particleIndicesA, const PxU32 numParticlesA, + const PxU32* __restrict particleIndicesB, const PxU32 numParticlesB, const DynamicsParameters& params) +{ + PxU32 num_loopB = 4 * (numParticlesB / 4); + PxU32 u_cellSizeSq = PxUnionCast<PxU32, PxF32>(params.cellSizeSq); + + for(PxU32 pA = 0; pA < numParticlesA; pA++) + { + PxU32 idxA = particleIndicesA[pA]; + Particle& particleA = particlesA[idxA]; + PxVec3& forceA = forceBufA[idxA]; + + for(PxU32 pB = 0; pB < num_loopB; pB += 4) + { + PxU32 idxB0 = particleIndicesB[pB]; + PxU32 idxB1 = particleIndicesB[pB + 1]; + PxU32 idxB2 = particleIndicesB[pB + 2]; + PxU32 idxB3 = particleIndicesB[pB + 3]; + + Particle& particleB0 = particlesB[idxB0]; + Particle& particleB1 = particlesB[idxB1]; + Particle& particleB2 = particlesB[idxB2]; + Particle& particleB3 = particlesB[idxB3]; + + PxVec3& forceB0 = forceBufB[idxB0]; + PxVec3& forceB1 = forceBufB[idxB1]; + PxVec3& forceB2 = forceBufB[idxB2]; + PxVec3& forceB3 = forceBufB[idxB3]; + + PX_ALIGN(16, PxVec3 distVec0) = particleA.position - particleB0.position; + PX_ALIGN(16, PxVec3 distVec1) = particleA.position - particleB1.position; + PX_ALIGN(16, PxVec3 distVec2) = particleA.position - particleB2.position; + PX_ALIGN(16, PxVec3 distVec3) = particleA.position - particleB3.position; + + PxReal distSqr0 = distVec0.magnitudeSquared(); + PxReal distSqr1 = distVec1.magnitudeSquared(); + PxReal distSqr2 = distVec2.magnitudeSquared(); + PxReal distSqr3 = distVec3.magnitudeSquared(); + + // marginally faster to do that test (not as good as in brute force) + PxF32 isec = physx::intrinsics::fsel(params.cellSizeSq - distSqr0, 1.0f, 0.0f); + isec = physx::intrinsics::fsel(params.cellSizeSq - distSqr1, 1.0f, isec); + isec = physx::intrinsics::fsel(params.cellSizeSq - distSqr2, 1.0f, isec); + isec = physx::intrinsics::fsel(params.cellSizeSq - distSqr3, 1.0f, isec); + + if(isec == 0.0f) + continue; + + PxU32 u_distSqr0 = PxUnionCast<PxU32, PxReal>(distSqr0); + PxU32 u_distSqr1 = PxUnionCast<PxU32, PxReal>(distSqr1); + PxU32 u_distSqr2 = PxUnionCast<PxU32, PxReal>(distSqr2); + PxU32 u_distSqr3 = PxUnionCast<PxU32, PxReal>(distSqr3); + + if(u_distSqr0 < u_cellSizeSq && u_distSqr0 > 0) + { + Contribution<PassType, UpdateType>::add(forceA, forceB0, distSqr0, distVec0, particleA, particleB0, + params); + } + if(u_distSqr1 < u_cellSizeSq && u_distSqr1 > 0) + { + Contribution<PassType, UpdateType>::add(forceA, forceB1, distSqr1, distVec1, particleA, particleB1, + params); + } + if(u_distSqr2 < u_cellSizeSq && u_distSqr2 > 0) + { + Contribution<PassType, UpdateType>::add(forceA, forceB2, distSqr2, distVec2, particleA, particleB2, + params); + } + if(u_distSqr3 < u_cellSizeSq && u_distSqr3 > 0) + { + Contribution<PassType, UpdateType>::add(forceA, forceB3, distSqr3, distVec3, particleA, particleB3, + params); + } + } + + for(PxU32 pB = num_loopB; pB < numParticlesB; pB++) + { + PxU32 idxB = particleIndicesB[pB]; + Particle& particleB = particlesB[idxB]; + PxVec3& forceB = forceBufB[idxB]; + + PX_ALIGN(16, PxVec3 distVec) = particleA.position - particleB.position; + + PxReal distSqr = distVec.magnitudeSquared(); + PxU32 u_distSqr = PxUnionCast<PxU32, PxReal>(distSqr); + + if(u_distSqr < u_cellSizeSq && u_distSqr > 0) + { + Contribution<PassType, UpdateType>::add(forceA, forceB, distSqr, distVec, particleA, particleB, params); + } + } + } +} + +#if !PX_IOS +/** +particlesA, particlesB, particleIndicesA, particleIndicesB are guaranteed to be non-overlapping +*/ +static void updateParticleGroupPair_simd_template(PxVec3* forceBufA, PxVec3* forceBufB, Particle* particlesA, + Particle* particlesB, const PxU32* particleIndicesA, + const PxU32 numParticlesA, const PxU32* particleIndicesB, + const PxU32 numParticlesB, const DynamicsParameters& params, + const bool isDensityMode, const bool twoWayUpdate, + Vec4V* tempSimdPositionBuffer, PxU32* tempIndexStream) +{ + PxU32 numParticles4B = ((numParticlesB + 3) & ~0x3) + 4; // ceil up to multiple of four + 4 for save unrolling + + PX_ALIGN(16, Particle fakeParticle); + fakeParticle.position = PxVec3(FLT_MAX, FLT_MAX, FLT_MAX); + fakeParticle.density = FLT_MAX; // avoid uninitialized access by V4LoadA + + const PxU32* __restrict idxB = particleIndicesB; + const PxU32* __restrict idxBEnd = particleIndicesB + numParticlesB; + for(PxU32 q = 0, v = 0; q < numParticles4B; q += 4, idxB += 4, v += 3) + { + const Particle* prtB0 = (q < numParticlesB) ? particlesB + *(idxB) : &fakeParticle; + const Particle* prtB1 = (q + 1 < numParticlesB) ? particlesB + *(idxB + 1) : &fakeParticle; + const Particle* prtB2 = (q + 2 < numParticlesB) ? particlesB + *(idxB + 2) : &fakeParticle; + const Particle* prtB3 = (q + 3 < numParticlesB) ? particlesB + *(idxB + 3) : &fakeParticle; + + Mat44V posDensB_N(V4LoadA(&prtB0->position.x), V4LoadA(&prtB1->position.x), V4LoadA(&prtB2->position.x), + V4LoadA(&prtB3->position.x)); + Mat44V posDensTB_N = M44Trnsps(posDensB_N); + + tempSimdPositionBuffer[v] = posDensTB_N.col0; + tempSimdPositionBuffer[v + 1] = posDensTB_N.col1; + tempSimdPositionBuffer[v + 2] = posDensTB_N.col2; + } + + DynamicsParametersSIMD simdParams; + simdParams.scaleToStd = V4Load(params.scaleToStd); + simdParams.scaleSqToStd = V4Load(params.scaleSqToStd); + simdParams.radiusStd = V4Load(params.radiusStd); + simdParams.radiusSqStd = V4Load(params.radiusSqStd); + simdParams.densityMultiplierStd = V4Load(params.densityMultiplierStd); + simdParams.stiffMulPressureMultiplierStd = V4Load(params.stiffMulPressureMultiplierStd); + simdParams.viscosityMultiplierStd = V4Load(params.viscosityMultiplierStd); + simdParams.initialDensity = V4Load(params.initialDensity); + Vec4V simdCellSizeSq = V4Load(params.cellSizeSq); + VecU32V simdIntOne = U4LoadXYZW(1, 1, 1, 1); + VecU32V simdIntZero = U4LoadXYZW(0, 0, 0, 0); + + PxU32 indexStreamSize = 0; + const PxU32* __restrict idxA = particleIndicesA; + for(PxU32 p = 0; p < numParticlesA; p++, idxA++) + { + Particle* __restrict prtA = particlesA + *idxA; + + PX_ASSERT(MAX_INDEX_STREAM_SIZE - indexStreamSize >= 2); + tempIndexStream[indexStreamSize++] = *idxA; + + PxU32* interactionCountPtr = tempIndexStream + indexStreamSize++; + PxU32 indexStreamSizeOld = indexStreamSize; + + PX_ALIGN(16, PxU32 isecs[8]); + idxB = particleIndicesB; + + Vec4V tmp = V4LoadA(&prtA->position.x); + Mat44V posDensA(tmp, tmp, tmp, tmp); + Mat44V posDensTA = M44Trnsps(posDensA); + + const Vec4V* prtB = tempSimdPositionBuffer; + Vec4V posT0B = *prtB++; + Vec4V posT1B = *prtB++; + Vec4V posT2B = *prtB++; + Vec4V distVec_x = V4Sub(posDensTA.col0, posT0B); + Vec4V distVec_y = V4Sub(posDensTA.col1, posT1B); + Vec4V distVec_z = V4Sub(posDensTA.col2, posT2B); + Vec4V distSqr_x = V4Mul(distVec_x, distVec_x); + Vec4V distSqr_xy = V4MulAdd(distVec_y, distVec_y, distSqr_x); + Vec4V distSqr = V4MulAdd(distVec_z, distVec_z, distSqr_xy); + BoolV isec_b = V4IsGrtr(simdCellSizeSq, distSqr); + isec_b = BAnd(isec_b, V4IsGrtr(distSqr, V4Zero())); + VecU32V isec = V4U32Sel(isec_b, simdIntOne, simdIntZero); + + U4StoreA(isec, isecs); + + for(PxU32 q = 0; q < numParticlesB; q += 4, idxB += 4) + { + Vec4V posT0B_N = *prtB++; + Vec4V posT1B_N = *prtB++; + Vec4V posT2B_N = *prtB++; + Vec4V distVec_x_N = V4Sub(posDensTA.col0, posT0B_N); + Vec4V distVec_y_N = V4Sub(posDensTA.col1, posT1B_N); + Vec4V distVec_z_N = V4Sub(posDensTA.col2, posT2B_N); + Vec4V distSqr_x_N = V4Mul(distVec_x_N, distVec_x_N); + Vec4V distSqr_xy_N = V4MulAdd(distVec_y_N, distVec_y_N, distSqr_x_N); + Vec4V distSqr_N = V4MulAdd(distVec_z_N, distVec_z_N, distSqr_xy_N); + BoolV isec_b_N = V4IsGrtr(simdCellSizeSq, distSqr_N); + isec_b_N = BAnd(isec_b_N, V4IsGrtr(distSqr_N, V4Zero())); + VecU32V isec_N = V4U32Sel(isec_b_N, simdIntOne, simdIntZero); + + PxU32 base_write_index = (q + 4) & 7; + U4StoreA(isec_N, isecs + base_write_index); + + PxU32 base_read_index = q & 7; + PxU32 u_isec0 = isecs[base_read_index]; + PxU32 u_isec1 = isecs[base_read_index + 1]; + PxU32 u_isec2 = isecs[base_read_index + 2]; + PxU32 u_isec3 = isecs[base_read_index + 3]; + + PX_ASSERT(MAX_INDEX_STREAM_SIZE - indexStreamSize >= 4); + + PX_ASSERT(indexStreamSize < MAX_INDEX_STREAM_SIZE); + PX_ASSERT(idxB < idxBEnd); + tempIndexStream[indexStreamSize] = *(idxB); + indexStreamSize += u_isec0; + + PX_ASSERT(indexStreamSize < MAX_INDEX_STREAM_SIZE); + tempIndexStream[indexStreamSize] = ((idxB + 1) < idxBEnd) ? *(idxB + 1) : 0; + indexStreamSize += u_isec1; + + PX_ASSERT(indexStreamSize < MAX_INDEX_STREAM_SIZE); + tempIndexStream[indexStreamSize] = ((idxB + 2) < idxBEnd) ? *(idxB + 2) : 0; + indexStreamSize += u_isec2; + + PX_ASSERT(indexStreamSize < MAX_INDEX_STREAM_SIZE); + tempIndexStream[indexStreamSize] = ((idxB + 3) < idxBEnd) ? *(idxB + 3) : 0; + indexStreamSize += u_isec3; + + // flush interactions + if(MAX_INDEX_STREAM_SIZE - indexStreamSize >= (4 + 2)) + ; + else // 4+2, since we potentially need to add the dst index + the src count as well. + { + *interactionCountPtr = indexStreamSize - indexStreamSizeOld; + if(isDensityMode) + { + if(twoWayUpdate) + updateStreamDensityTwoWay(particlesA, particlesB, tempIndexStream, indexStreamSize, params, + simdParams); + else + updateStreamDensity(particlesA, particlesB, tempIndexStream, indexStreamSize, params, simdParams); + } + else + { + if(twoWayUpdate) + updateStreamForceTwoWay(forceBufA, forceBufB, particlesA, particlesB, tempIndexStream, + indexStreamSize, params, simdParams); + else + updateStreamForce(forceBufA, particlesA, particlesB, tempIndexStream, indexStreamSize, params, + simdParams); + } + + indexStreamSize = 0; + tempIndexStream[indexStreamSize++] = *idxA; + interactionCountPtr = tempIndexStream + indexStreamSize++; + indexStreamSizeOld = indexStreamSize; + } + } + + *interactionCountPtr = indexStreamSize - indexStreamSizeOld; + } + + if(indexStreamSize > 0) + { + if(isDensityMode) + { + if(twoWayUpdate) + updateStreamDensityTwoWay(particlesA, particlesB, tempIndexStream, indexStreamSize, params, simdParams); + else + updateStreamDensity(particlesA, particlesB, tempIndexStream, indexStreamSize, params, simdParams); + } + else + { + if(twoWayUpdate) + updateStreamForceTwoWay(forceBufA, forceBufB, particlesA, particlesB, tempIndexStream, indexStreamSize, + params, simdParams); + else + updateStreamForce(forceBufA, particlesA, particlesB, tempIndexStream, indexStreamSize, params, + simdParams); + } + } +} + +#endif // !PX_IOS + +#define SIMD_THRESH_SRC 8 + +/** +Computes and adds contributions of particle group B to particle group A. If twoWayUpdate is true, +group B is updated with contributions from group A as well. +*/ +PX_FORCE_INLINE_KERNELS static void +updateParticleGroupPair(PxVec3* __restrict forceBufA, PxVec3* __restrict forceBufB, Particle* __restrict particlesA, + Particle* __restrict particlesB, const PxU32* __restrict particleIndicesA, + const PxU32 numParticlesA, const PxU32* __restrict particleIndicesB, const PxU32 numParticlesB, + const bool twoWayUpdate, const bool isDensityMode, const DynamicsParameters& params, + PxU8* tempSimdPositionBuffer, PxU32* tempIndexStream) +{ + PX_ASSERT(numParticlesA > 0); + PX_ASSERT(numParticlesB > 0); + +#if !PX_IOS + if(numParticlesB < SIMD_THRESH_SRC) +#endif + { + if(isDensityMode) + { + if(twoWayUpdate) + { + PX_ASSERT(forceBufB); + updateParticleGroupPair_small_template<DensityPassType, TwoWayUpdateType>( + forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, numParticlesA, particleIndicesB, + numParticlesB, params); + } + else + { + updateParticleGroupPair_small_template<DensityPassType, OneWayUpdateType>( + forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, numParticlesA, particleIndicesB, + numParticlesB, params); + } + } + else + { + if(twoWayUpdate) + { + PX_ASSERT(forceBufB); + updateParticleGroupPair_small_template<ForcePassType, TwoWayUpdateType>( + forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, numParticlesA, particleIndicesB, + numParticlesB, params); + } + else + { + updateParticleGroupPair_small_template<ForcePassType, OneWayUpdateType>( + forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, numParticlesA, particleIndicesB, + numParticlesB, params); + } + } + } +#if !PX_IOS + else + { + updateParticleGroupPair_simd_template(forceBufA, forceBufB, particlesA, particlesB, particleIndicesA, + numParticlesA, particleIndicesB, numParticlesB, params, isDensityMode, + twoWayUpdate, reinterpret_cast<Vec4V*>(tempSimdPositionBuffer), + tempIndexStream); + } +#else + PX_UNUSED(tempSimdPositionBuffer); + PX_UNUSED(tempIndexStream); +#endif +} + +#endif // REFERENCE_KERNELS + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_DYNAMICS_KERNELS_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsParameters.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsParameters.h new file mode 100644 index 00000000..42e3aa79 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsParameters.h @@ -0,0 +1,83 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#ifndef PT_DYNAMICS_PARAMETER_H +#define PT_DYNAMICS_PARAMETER_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +namespace physx +{ + +namespace Pt +{ + +struct SphUpdateType +{ + enum Enum + { + DENSITY, + FORCE, + EXIT + }; +}; + +struct DynamicsParameters +{ + PxReal selfDensity; + PxReal particleMassStd; + PxReal cellSize; + PxReal cellSizeInv; + + PxReal cellSizeSq; + PxReal packetSize; + PxReal radiusStd; + PxReal radiusSqStd; + + PxReal densityMultiplierStd; + PxReal stiffMulPressureMultiplierStd; + PxReal viscosityMultiplierStd; + PxReal initialDensity; + + PxReal scaleToStd; + PxReal scaleSqToStd; + PxReal scaleToWorld; + PxReal densityNormalizationFactor; + + PxU32 packetMultLog; + PxU32 pad[3]; +}; + +PX_COMPILE_TIME_ASSERT(sizeof(DynamicsParameters) % 16 == 0); + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_DYNAMICS_PARAMETER_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsTempBuffers.h b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsTempBuffers.h new file mode 100644 index 00000000..bafe3679 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtDynamicsTempBuffers.h @@ -0,0 +1,62 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#ifndef PT_DYNAMICS_TEMP_BUFFERS_H +#define PT_DYNAMICS_TEMP_BUFFERS_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtParticleCell.h" + +namespace physx +{ + +namespace Pt +{ + +struct DynamicsTempBuffers +{ + PxU32* indicesSubpacketA; + PxU32* indicesSubpacketB; + PxU32* mergedIndices; + Particle* mergedHaloRegions; + ParticleCell* cellHashTableSubpacketA; + ParticleCell* cellHashTableSubpacketB; + PxU32 cellHashMaxSize; + PxU8* simdPositionsSubpacket; + PxU32* indexStream; + const PxU32* orderedIndicesSubpacket; + PxU16* hashKeys; +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_DYNAMICS_TEMP_BUFFERS_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtHeightFieldAabbTest.h b/PhysX_3.4/Source/LowLevelParticles/src/PtHeightFieldAabbTest.h new file mode 100644 index 00000000..d750a363 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtHeightFieldAabbTest.h @@ -0,0 +1,310 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_HEIGHT_FIELD_AABB_TEST_H +#define PT_HEIGHT_FIELD_AABB_TEST_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +//----------------------------------------------------------------------------// + +#include "GuHeightField.h" +#include "GuHeightFieldData.h" +#include "GuHeightFieldUtil.h" +#include "PsUtilities.h" + +namespace physx +{ + +namespace Pt +{ + +//----------------------------------------------------------------------------// + +/** +Can be used for querying an AABB against a heightfield, without copying triangles to a temporary buffer. +An iterator can be created to walk the triangles which intersect the AABB and have not a hole material assigned. +This isn't really optimized yet. +*/ +class HeightFieldAabbTest +{ + public: + HeightFieldAabbTest(const PxBounds3& localBounds, const Gu::HeightFieldUtil& hfUtil) + : mHfUtil(hfUtil), mIsEmpty(false) + { + const PxHeightFieldGeometry& hfGeom = mHfUtil.getHeightFieldGeometry(); + + PxVec3 minimum = localBounds.minimum; + PxVec3 maximum = localBounds.maximum; + minimum = hfUtil.shape2hfp(minimum); + maximum = hfUtil.shape2hfp(maximum); + + // if (heightField.getRowScale() < 0) + if(hfGeom.rowScale < 0) + Ps::swap(minimum.x, maximum.x); + + // if (heightField.getColumnScale() < 0) + if(hfGeom.columnScale < 0) + Ps::swap(minimum.z, maximum.z); + + // early exit for aabb does not overlap in XZ plane + // DO NOT MOVE: since rowScale / columnScale may be negative this has to be done after scaling the bounds + // if ((minimum.x > (heightField.getNbRowsFast()-1)) || + if((minimum.x > (mHfUtil.getHeightField().getNbRowsFast() - 1)) || + //(minimum.z > (heightField.getNbColumnsFast()-1)) || + (minimum.z > (mHfUtil.getHeightField().getNbColumnsFast() - 1)) || (maximum.x < 0) || (maximum.z < 0)) + { + mIsEmpty = true; + return; + } + + mMinRow = mHfUtil.getHeightField().getMinRow(minimum.x); + mMaxRow = mHfUtil.getHeightField().getMaxRow(maximum.x); + mMinColumn = mHfUtil.getHeightField().getMinColumn(minimum.z); + mMaxColumn = mHfUtil.getHeightField().getMaxColumn(maximum.z); + + if(mMinRow == mMaxRow || mMinColumn == mMaxColumn) + { + mIsEmpty = true; + return; + } + + mMiny = minimum.y; + mMaxy = maximum.y; + + // Check if thickness / vertical extent is negative or positive. Set the triangle vertex indices + // such that the collision triangles of the heightfield have the correct orientation, i.e., the correct normal + // - + // If the row and column scale have different signs, the orientation of the collision triangle vertices + // need to be swapped + mSwapVertIdx12 = ((mHfUtil.getHeightField().getThicknessFast() > 0.0f) != + Ps::differentSign(hfGeom.rowScale, hfGeom.columnScale)); + } + + //----------------------------------------------------------------------------// + + class Iterator + { + + public: + bool operator!=(const Iterator& it) const + { + return (it.mTri != mTri) || (it.mOffset != mOffset); + } + + //----------------------------------------------------------------------------// + + Iterator& operator++() + { + bool isec = (mTri == 1) || mTest.intersectsSegment(mOffset); + PX_ASSERT(!(mTri == 1) || mTest.intersectsSegment(mOffset)); + + PxU32 endOffset = mTest.getMaxOffset(); + while(mOffset < endOffset) + { + PX_ASSERT(mColumn < mTest.mMaxColumn); + PX_ASSERT(mRow < mTest.mMaxRow); + PX_ASSERT(mColumn >= mTest.mMinColumn); + PX_ASSERT(mRow >= mTest.mMinRow); + + if(mTri == 0 && isec) + { + mTri++; + if(mTest.isHole(mTri, mOffset)) + continue; + + return *this; + } + + mTri = 0; + mColumn++; + mOffset++; + + if(mColumn == mTest.mMaxColumn) + { + mRow++; + mOffset += + (mTest.mHfUtil.getHeightField().getNbColumnsFast() - (mTest.mMaxColumn - mTest.mMinColumn)); + + if(mRow == mTest.mMaxRow) + { + mOffset += (mTest.mMaxColumn - mTest.mMinColumn); + continue; + } + mColumn = mTest.mMinColumn; + } + + isec = mTest.intersectsSegment(mOffset); + if(!isec || mTest.isHole(mTri, mOffset)) + continue; + + return *this; + } + PX_ASSERT(mOffset == endOffset); + return *this; + } + + //----------------------------------------------------------------------------// + + PX_INLINE void getTriangleVertices(PxVec3* triangle) const + { + mTest.getTriangleVertices(triangle, *this); + } + + //----------------------------------------------------------------------------// + + private: + Iterator& operator=(const Iterator&); + + Iterator(PxU32 row, PxU32 column, const HeightFieldAabbTest& test) : mRow(row), mColumn(column), mTest(test) + { + mTri = 0; + mOffset = mRow * mTest.mHfUtil.getHeightField().getNbColumnsFast() + mColumn; + } + + //----------------------------------------------------------------------------// + + bool isValid() + { + return !mTest.isHole(mTri, mOffset) && mTest.intersectsSegment(mOffset); + } + + //----------------------------------------------------------------------------// + + PxU32 mRow; + PxU32 mColumn; + PxU32 mTri; + PxU32 mOffset; + const HeightFieldAabbTest& mTest; + + friend class HeightFieldAabbTest; + }; + + //----------------------------------------------------------------------------// + + Iterator end() const + { + if(mIsEmpty) + return Iterator(0, 0, *this); + + return Iterator(mMaxRow, mMaxColumn, *this); + } + + //----------------------------------------------------------------------------// + + Iterator begin() const + { + if(mIsEmpty) + return Iterator(0, 0, *this); + + Iterator itBegin(mMinRow, mMinColumn, *this); + if(itBegin != end() && !itBegin.isValid()) + ++itBegin; + + return itBegin; + } + + private: + HeightFieldAabbTest& operator=(const HeightFieldAabbTest&); + + PxU32 getMinOffset() const + { + return mMinRow * mHfUtil.getHeightField().getNbColumnsFast() + mMinColumn; + } + + //----------------------------------------------------------------------------// + + PxU32 getMaxOffset() const + { + return mMaxRow * mHfUtil.getHeightField().getNbColumnsFast() + mMaxColumn; + } + + //----------------------------------------------------------------------------// + + bool isHole(PxU32 triangleIndex, PxU32 offset) const + { + return mHfUtil.getHeightField().getTriangleMaterial((offset << 1) + triangleIndex) == + PxHeightFieldMaterial::eHOLE; + } + + //----------------------------------------------------------------------------// + + bool intersectsSegment(PxU32 offset) const + { + // should we cache this? + PxReal h0 = mHfUtil.getHeightField().getHeight(offset); + PxReal h1 = mHfUtil.getHeightField().getHeight(offset + 1); + PxReal h2 = mHfUtil.getHeightField().getHeight(offset + mHfUtil.getHeightField().getNbColumnsFast()); + PxReal h3 = mHfUtil.getHeightField().getHeight(offset + mHfUtil.getHeightField().getNbColumnsFast() + 1); + + // Optimization: Could store the two left height field cell vertices and thus avoid some comparisons here + // (if the bounds covers more than one height field cell) + return (!((mMaxy < h0 && mMaxy < h1 && mMaxy < h2 && mMaxy < h3) || + (mMiny > h0 && mMiny > h1 && mMiny > h2 && mMiny > h3))); + } + + //----------------------------------------------------------------------------// + + void getTriangleVertices(PxVec3* triangleVertices, const Iterator& iterator) const + { + PX_ASSERT(iterator.mOffset != getMaxOffset()); + PX_ASSERT(!isHole(iterator.mTri, iterator.mOffset)); + + PxU32 triangleIndex = (iterator.mOffset << 1) + iterator.mTri; + PxU32 vertIdx1 = PxU32(mSwapVertIdx12 ? 2 : 1); + PxU32 vertIdx2 = PxU32(mSwapVertIdx12 ? 1 : 2); + + mHfUtil.getHeightField().getTriangleVertices(triangleIndex, iterator.mRow, iterator.mColumn, triangleVertices[0], + triangleVertices[vertIdx1], triangleVertices[vertIdx2]); + + triangleVertices[0] = mHfUtil.hf2shapep(triangleVertices[0]); + triangleVertices[1] = mHfUtil.hf2shapep(triangleVertices[1]); + triangleVertices[2] = mHfUtil.hf2shapep(triangleVertices[2]); + } + + //----------------------------------------------------------------------------// + + const Gu::HeightFieldUtil& mHfUtil; + bool mIsEmpty; + + PxU32 mMinRow; + PxU32 mMaxRow; + PxU32 mMinColumn; + PxU32 mMaxColumn; + PxReal mMiny; + PxReal mMaxy; + bool mSwapVertIdx12; +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_HEIGHT_FIELD_AABB_TEST_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtPacketSections.h b/PhysX_3.4/Source/LowLevelParticles/src/PtPacketSections.h new file mode 100644 index 00000000..f3f24b82 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtPacketSections.h @@ -0,0 +1,55 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#ifndef PT_PACKETSECTIONS_H +#define PT_PACKETSECTIONS_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +namespace physx +{ + +namespace Pt +{ + +// Structure describing boundary section (plus inner section) of a fluid packet. +// This will be used for halo optimization, i.e., to reduce the number of particles +// that have to be tested in neighboring packets. +#define PT_PACKET_SECTIONS 27 +struct PacketSections +{ + PxU32 numParticles[PT_PACKET_SECTIONS]; //! Number of particles in each packet section + PxU32 firstParticle[PT_PACKET_SECTIONS]; //! Start index of the associated particle interval for each packet section +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_PACKETSECTIONS_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleCell.h b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleCell.h new file mode 100644 index 00000000..f810daae --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleCell.h @@ -0,0 +1,55 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#ifndef PT_PARTILCECELL_H +#define PT_PARTILCECELL_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtGridCellVector.h" + +namespace physx +{ + +namespace Pt +{ + +// Structure describing a particle cell hash entry. +struct ParticleCell +{ + GridCellVector coords; //! The packet coordinates + PxU32 numParticles; //! Number of particles in the packet + PxU32 firstParticle; //! Start index of the associated particle interval +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_PARTILCECELL_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleData.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleData.cpp new file mode 100644 index 00000000..408a1be9 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleData.cpp @@ -0,0 +1,505 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtParticleData.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxIO.h" +#include "particles/PxParticleCreationData.h" +#include "PxMetaData.h" +#include "PsAlignedMalloc.h" +#include "CmUtils.h" +#include "PtParticle.h" + +using namespace physx; +using namespace Cm; +using namespace Pt; +//----------------------------------------------------------------------------// + +ParticleData::ParticleData(PxU32 maxParticles, bool perParticleRestOffset) +{ + mOwnMemory = true; + mMaxParticles = maxParticles; + mHasRestOffsets = perParticleRestOffset; + mValidParticleCount = 0; + mValidParticleRange = 0; + mWorldBounds = PxBounds3::empty(); + + fixupPointers(); + mParticleMap.resizeAndClear(mMaxParticles); + +#if PX_CHECKED + { + PxU32 numWords = mMaxParticles * sizeof(Particle) >> 2; + for(PxU32 i = 0; i < numWords; ++i) + reinterpret_cast<PxU32*>(mParticleBuffer)[i] = 0xDEADBEEF; + } +#endif +} + +//----------------------------------------------------------------------------// + +ParticleData::ParticleData(ParticleSystemStateDataDesc& particles, const PxBounds3& bounds) +{ + mOwnMemory = true; + mMaxParticles = particles.maxParticles; + mHasRestOffsets = (particles.restOffsets.ptr() != NULL); + mValidParticleCount = particles.numParticles; + mValidParticleRange = particles.validParticleRange; + mWorldBounds = bounds; + + fixupPointers(); + if(particles.bitMap) + mParticleMap.copy(*particles.bitMap); + else + mParticleMap.resizeAndClear(mMaxParticles); + + if(mValidParticleRange > 0) + { + for(PxU32 i = 0; i < mValidParticleRange; ++i) + mParticleBuffer[i].flags.api = PxParticleFlags(0); + + for(PxU32 w = 0; w <= (mValidParticleRange - 1) >> 5; w++) + for(PxU32 b = mParticleMap.getWords()[w]; b; b &= b - 1) + { + PxU32 index = (w << 5 | Ps::lowestSetBit(b)); + Particle& dstParticle = mParticleBuffer[index]; + dstParticle.position = particles.positions[index]; + dstParticle.velocity = particles.velocities[index]; + dstParticle.density = 0.0f; + dstParticle.flags.low = 0; + dstParticle.flags.api = PxParticleFlag::eVALID; + } + + if(mHasRestOffsets) + { + PX_ASSERT(mRestOffsetBuffer); + for(PxU32 w = 0; w <= (mValidParticleRange - 1) >> 5; w++) + for(PxU32 b = mParticleMap.getWords()[w]; b; b &= b - 1) + { + PxU32 index = (w << 5 | Ps::lowestSetBit(b)); + mRestOffsetBuffer[index] = particles.restOffsets[index]; + } + } + } +} + +//----------------------------------------------------------------------------// + +ParticleData::ParticleData(PxU8* address) +{ + PX_ASSERT(address == reinterpret_cast<PxU8*>(this)); + PX_UNUSED(address); + mOwnMemory = false; + fixupPointers(); +} + +//----------------------------------------------------------------------------// + +ParticleData::~ParticleData() +{ + Ps::AlignedAllocator<16> align16; + + if(mParticleBuffer) + align16.deallocate(mParticleBuffer); +} + +//----------------------------------------------------------------------------// + +void ParticleData::fixupPointers() +{ + PX_ASSERT(size_t(this) % 16 == 0); + PxU8* address = reinterpret_cast<PxU8*>(this); + + address += getHeaderSize(); + PxU32 bitmapSize = getBitmapSize(mMaxParticles); + mParticleMap.importData(bitmapSize / 4, reinterpret_cast<PxU32*>(address)); + address += (bitmapSize + 15) & ~15; + mParticleBuffer = reinterpret_cast<Particle*>(address); + address += getParticleBufferSize(mMaxParticles); + mRestOffsetBuffer = mHasRestOffsets ? reinterpret_cast<PxF32*>(address) : NULL; + address += getRestOffsetBufferSize(mMaxParticles, mHasRestOffsets); +} + +//----------------------------------------------------------------------------// + +void ParticleData::exportData(PxSerializationContext& stream) +{ + clearSimState(); + stream.alignData(16); + stream.writeData(this, ParticleData::getTotalSize(mMaxParticles, mHasRestOffsets)); +} + +void ParticleData::getBinaryMetaData(PxOutputStream& stream) +{ + // define ParticleFlags + PX_DEF_BIN_METADATA_CLASS(stream, Pt::ParticleFlags) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleFlags, PxU16, api, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleFlags, PxU16, low, 0) + + // define Particle + PX_DEF_BIN_METADATA_CLASS(stream, Pt::Particle) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::Particle, PxVec3, position, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::Particle, PxReal, density, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::Particle, PxVec3, velocity, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::Particle, Pt::ParticleFlags, flags, 0) + + // define ParticleData + PX_DEF_BIN_METADATA_VCLASS(stream, Pt::ParticleData) + + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, bool, mOwnMemory, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxU32, mMaxParticles, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, bool, mHasRestOffsets, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxU32, mValidParticleRange, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxU32, mValidParticleCount, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxBounds3, mWorldBounds, 0) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, Pt::Particle, mParticleBuffer, PxMetaDataFlag::ePTR) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, PxReal, mRestOffsetBuffer, PxMetaDataFlag::ePTR) + PX_DEF_BIN_METADATA_ITEM(stream, Pt::ParticleData, BitMap, mParticleMap, 0) + + // extra data + PX_DEF_BIN_METADATA_EXTRA_ARRAY(stream, Pt::ParticleData, Pt::Particle, mMaxParticles, 16, 0) + PX_DEF_BIN_METADATA_EXTRA_ITEMS(stream, Pt::ParticleData, PxReal, mHasRestOffsets, mMaxParticles, 0, 16) +} + +//----------------------------------------------------------------------------// + +void ParticleData::clearSimState() +{ + if(mValidParticleRange > 0) + { + for(PxU32 w = 0; w <= (mValidParticleRange - 1) >> 5; w++) + for(PxU32 b = mParticleMap.getWords()[w]; b; b &= b - 1) + { + PxU32 index = (w << 5 | Ps::lowestSetBit(b)); + Particle& dstParticle = mParticleBuffer[index]; + dstParticle.flags.low = 0; + dstParticle.density = 0.0f; + } + } +} + +//----------------------------------------------------------------------------// + +void ParticleData::onOriginShift(const PxVec3& shift) +{ + if(mValidParticleRange > 0) + { + for(PxU32 w = 0; w <= (mValidParticleRange - 1) >> 5; w++) + for(PxU32 b = mParticleMap.getWords()[w]; b; b &= b - 1) + { + PxU32 index = (w << 5 | Ps::lowestSetBit(b)); + Particle& particle = mParticleBuffer[index]; + particle.position -= shift; + } + } + + mWorldBounds.minimum -= shift; + mWorldBounds.maximum -= shift; +} + +//----------------------------------------------------------------------------// + +ParticleData* ParticleData::create(ParticleSystemStateDataDesc& particles, const PxBounds3& bounds) +{ + Ps::AlignedAllocator<16, Ps::ReflectionAllocator<ParticleData> > align16; + PxU32 totalSize = getTotalSize(particles.maxParticles, particles.restOffsets.ptr() != NULL); + ParticleData* mem = reinterpret_cast<ParticleData*>(align16.allocate(totalSize, __FILE__, __LINE__)); + markSerializedMem(mem, totalSize); + PX_PLACEMENT_NEW(mem, ParticleData)(particles, bounds); + return mem; +} + +//----------------------------------------------------------------------------// + +ParticleData* ParticleData::create(PxU32 maxParticles, bool perParticleRestOffsets) +{ + Ps::AlignedAllocator<16, Ps::ReflectionAllocator<ParticleData> > align16; + PxU32 totalSize = getTotalSize(maxParticles, perParticleRestOffsets); + ParticleData* mem = reinterpret_cast<ParticleData*>(align16.allocate(totalSize, __FILE__, __LINE__)); + markSerializedMem(mem, totalSize); + PX_PLACEMENT_NEW(mem, ParticleData)(maxParticles, perParticleRestOffsets); + return mem; +} + +//----------------------------------------------------------------------------// + +ParticleData* ParticleData::create(PxDeserializationContext& context) +{ + ParticleData* mem = context.readExtraData<ParticleData, PX_SERIAL_ALIGN>(); + new (mem) ParticleData(reinterpret_cast<PxU8*>(mem)); + context.readExtraData<PxU8>(getDataSize(mem->getMaxParticles(), mem->getRestOffsetBuffer() != NULL)); + return mem; +} + +//----------------------------------------------------------------------------// + +void ParticleData::release() +{ + if(!mOwnMemory) + return; + + Ps::AlignedAllocator<16> align16; + align16.deallocate(this); +} + +//----------------------------------------------------------------------------// + +bool ParticleData::addParticlesV(const PxParticleCreationData& creationData) +{ + PX_ASSERT(creationData.numParticles <= mMaxParticles); + PX_ASSERT(creationData.indexBuffer.ptr() && creationData.positionBuffer.ptr()); + PX_ASSERT((mRestOffsetBuffer != NULL) == (creationData.restOffsetBuffer.ptr() != NULL)); + + const PxVec3 zeroVector(0.0f); + + PxStrideIterator<const PxU32> indexIt = creationData.indexBuffer; + PxStrideIterator<const PxVec3> positionIt = creationData.positionBuffer; + PxStrideIterator<const PxVec3> velocityIt = + creationData.velocityBuffer.ptr() ? creationData.velocityBuffer : PxStrideIterator<const PxVec3>(&zeroVector, 0); + + for(PxU32 i = 0; i < creationData.numParticles; i++) + { + const PxU32 particleIndex = *indexIt; + PX_ASSERT(particleIndex <= mMaxParticles); + + Particle& particle = mParticleBuffer[particleIndex]; + PX_ASSERT(!mParticleMap.test(particleIndex)); + mParticleMap.set(particleIndex); + + if(particleIndex + 1 > mValidParticleRange) + { + mValidParticleRange = particleIndex + 1; + } + else + { + PX_ASSERT(!(particle.flags.api & PxParticleFlag::eVALID)); + } + + particle.position = *positionIt; + particle.velocity = *velocityIt; + particle.flags.low = 0; + particle.flags.api = PxParticleFlag::eVALID; + particle.density = 0.0f; + + mWorldBounds.include(particle.position); + + positionIt++; + velocityIt++; + indexIt++; + } + + if(mRestOffsetBuffer) + { + PxStrideIterator<const PxF32> restOffsetIt = creationData.restOffsetBuffer; + indexIt = creationData.indexBuffer; + + for(PxU32 i = 0; i < creationData.numParticles; i++) + { + const PxU32 particleIndex = *indexIt; + mRestOffsetBuffer[particleIndex] = *restOffsetIt; + restOffsetIt++; + indexIt++; + } + } + + mValidParticleCount += creationData.numParticles; + return true; +} + +//----------------------------------------------------------------------------// + +void ParticleData::removeParticlesV(PxU32 count, const PxStrideIterator<const PxU32>& indices) +{ + for(PxU32 i = 0; i < count; ++i) + removeParticle(indices[i]); + + mValidParticleCount -= count; + mValidParticleRange = (mValidParticleCount > 0) ? mParticleMap.findLast() + 1 : 0; +} + +//----------------------------------------------------------------------------// + +void ParticleData::removeParticlesV() +{ + Cm::BitMap::Iterator it(mParticleMap); + for(PxU32 particleIndex = it.getNext(); particleIndex != Cm::BitMap::Iterator::DONE; particleIndex = it.getNext()) + removeParticle(particleIndex); + + mValidParticleCount = 0; + mValidParticleRange = 0; + PX_ASSERT(mValidParticleCount == 0); +} + +//----------------------------------------------------------------------------// + +PxU32 ParticleData::getParticleCountV() const +{ + return mValidParticleCount; +} + +//----------------------------------------------------------------------------// + +/** +In the non-gpu implementation the full state is always available. +*/ +void ParticleData::getParticlesV(ParticleSystemStateDataDesc& particles, bool /*fullState*/, bool) const +{ + PX_ASSERT(mValidParticleCount <= mMaxParticles); + + particles.bitMap = &mParticleMap; + particles.numParticles = mValidParticleCount; + particles.maxParticles = mMaxParticles; + particles.validParticleRange = mValidParticleRange; + + if(mValidParticleCount == 0) + { + particles.positions = PxStrideIterator<const PxVec3>(); + particles.velocities = PxStrideIterator<const PxVec3>(); + particles.flags = PxStrideIterator<const ParticleFlags>(); + particles.restOffsets = PxStrideIterator<const PxF32>(); + } + else + { + PX_ASSERT(mParticleBuffer); + particles.positions = PxStrideIterator<const PxVec3>(&mParticleBuffer->position, sizeof(Particle)); + particles.velocities = PxStrideIterator<const PxVec3>(&mParticleBuffer->velocity, sizeof(Particle)); + particles.flags = PxStrideIterator<const ParticleFlags>(&mParticleBuffer->flags, sizeof(Particle)); + particles.restOffsets = + mRestOffsetBuffer ? PxStrideIterator<const PxF32>(mRestOffsetBuffer) : PxStrideIterator<const PxF32>(); + } +} + +//----------------------------------------------------------------------------// + +void ParticleData::setPositionsV(PxU32 numParticles, const PxStrideIterator<const PxU32>& indices, + const PxStrideIterator<const PxVec3>& positions) +{ + PX_ASSERT(indices.ptr() && positions.ptr()); + + PxStrideIterator<const PxU32> indexIt(indices); + PxStrideIterator<const PxVec3> positionIt(positions); + + for(PxU32 i = 0; i != numParticles; ++i) + { + PxU32 particleIndex = *indexIt++; + PX_ASSERT(particleIndex <= mMaxParticles); + PX_ASSERT(mParticleMap.test(particleIndex)); + Particle& particle = mParticleBuffer[particleIndex]; + particle.position = *positionIt++; + mWorldBounds.include(particle.position); + } +} + +//----------------------------------------------------------------------------// + +void ParticleData::setVelocitiesV(PxU32 numParticles, const PxStrideIterator<const PxU32>& indices, + const PxStrideIterator<const PxVec3>& velocities) +{ + PX_ASSERT(indices.ptr() && velocities.ptr()); + + PxStrideIterator<const PxU32> indexIt(indices); + PxStrideIterator<const PxVec3> velocityIt(velocities); + + for(PxU32 i = 0; i != numParticles; ++i) + { + PxU32 particleIndex = *indexIt++; + PX_ASSERT(particleIndex <= mMaxParticles); + PX_ASSERT(mParticleMap.test(particleIndex)); + Particle& particle = mParticleBuffer[particleIndex]; + particle.velocity = *velocityIt++; + } +} + +//----------------------------------------------------------------------------// + +void ParticleData::setRestOffsetsV(PxU32 numParticles, const PxStrideIterator<const PxU32>& indices, + const PxStrideIterator<const PxF32>& restOffsets) +{ + PX_ASSERT(indices.ptr() && restOffsets.ptr()); + + PxStrideIterator<const PxU32> indexIt(indices); + PxStrideIterator<const PxF32> restOffsetIt(restOffsets); + + for(PxU32 i = 0; i != numParticles; ++i) + { + PxU32 particleIndex = *indexIt++; + PX_ASSERT(particleIndex <= mMaxParticles); + PX_ASSERT(mParticleMap.test(particleIndex)); + mRestOffsetBuffer[particleIndex] = *restOffsetIt++; + } +} + +//----------------------------------------------------------------------------// + +void ParticleData::addDeltaVelocitiesV(const Cm::BitMap& bufferMap, const PxVec3* buffer, PxReal multiplier) +{ + Cm::BitMap::Iterator it(bufferMap); + for(PxU32 particleIndex = it.getNext(); particleIndex != Cm::BitMap::Iterator::DONE; particleIndex = it.getNext()) + { + PX_ASSERT(mParticleMap.boundedTest(particleIndex)); + mParticleBuffer[particleIndex].velocity += buffer[particleIndex] * multiplier; + } +} + +//----------------------------------------------------------------------------// + +PxBounds3 ParticleData::getWorldBoundsV() const +{ + return mWorldBounds; +} + +//----------------------------------------------------------------------------// + +PxU32 ParticleData::getMaxParticlesV() const +{ + return mMaxParticles; +} + +//----------------------------------------------------------------------------// + +PX_FORCE_INLINE void ParticleData::removeParticle(PxU32 particleIndex) +{ + PX_ASSERT(particleIndex <= mMaxParticles); + + Particle& particle = mParticleBuffer[particleIndex]; + PX_ASSERT(particle.flags.api & PxParticleFlag::eVALID); + PX_ASSERT(mParticleMap.test(particleIndex)); + +#if PX_CHECKED + for(PxU32 i = 0; i<sizeof(Particle)>> 2; ++i) + reinterpret_cast<PxU32*>(&particle)[i] = 0xDEADBEEF; +#endif + particle.flags.api = PxParticleFlags(0); + mParticleMap.reset(particleIndex); +} + +//----------------------------------------------------------------------------// + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleOpcodeCache.h b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleOpcodeCache.h new file mode 100644 index 00000000..0e77a146 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleOpcodeCache.h @@ -0,0 +1,441 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_PARTICLE_OPCODE_CACHE_H +#define PT_PARTICLE_OPCODE_CACHE_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxBounds3.h" +#include "GuGeometryUnion.h" +#include "PtParticleSystemFlags.h" +#include "PsUtilities.h" + +namespace physx +{ + +namespace Pt +{ + +/** +Represents a per particle opcode cache for collision with meshes. +The cache contains +- number of triangle indices +- mesh pointer +- triangle indices +- bounds representing the volume within which the cache is valid + +The cache is always guaranteed to reference ALL triangles that are relevant for a given mesh for a given volume. + +There are four different data layouts to optimize access speed for fewer indices and maximize the amount of triangles +that can be cached. +1. regular PxBounds3 with up to 1 x 16 bit triangle indices. +2. compressed volume with up to 6 x 16 bit triangle indices. +3. compressed volume with up to 9 x 10 bit triangle indices. (The indices are compressed if the range of indices +allows). +4. compressed volume with up to 3 x 32 bit triangle indices. (For large meshes). +*/ +struct ParticleOpcodeCache +{ + static const PxU32 sMaxCachedTriangles = 9; + + struct QuantizationParams + { + PxF32 dequantizationMultiplier; + PxF32 quantizationMultiplier; + }; + + static PX_FORCE_INLINE QuantizationParams getQuantizationParams(const PxF32 maxExtents) + { + QuantizationParams params; + params.quantizationMultiplier = 254 * (1.0f / maxExtents); + params.dequantizationMultiplier = (1.0f / 254) * maxExtents; + return params; + } + + PX_FORCE_INLINE const Gu::GeometryUnion* getGeometry() + { + return mGeom; + } + + PX_FORCE_INLINE ParticleOpcodeCache& operator=(const ParticleOpcodeCache& p) + { + const PxU32* src = reinterpret_cast<const PxU32*>(&p); + PxU32* dist = reinterpret_cast<PxU32*>(this); + dist[0] = src[0]; + dist[1] = src[1]; + dist[2] = src[2]; + dist[3] = src[3]; + dist[4] = src[4]; + dist[5] = src[5]; + dist[6] = src[6]; + dist[7] = src[7]; + +#if PX_P64_FAMILY + dist[8] = src[8]; + dist[9] = src[9]; +#endif + return *this; + } + + // set mGeom to a temp mem to store the triangles index + // init for triangles mesh cache + PX_FORCE_INLINE void init(PxU32* triangles) + { + mTriangleCount = 0; + mGeom = reinterpret_cast<Gu::GeometryUnion*>(triangles); + } + + // add triangles + PX_FORCE_INLINE void add(const PxU32* triangles, const PxU32 numTriangles) + { + const PxU32 end = mTriangleCount + numTriangles; + if(end <= sMaxCachedTriangles) + { + PxU32* tmp = const_cast<PxU32*>(reinterpret_cast<const PxU32*>(mGeom)); + for(PxU32 i = mTriangleCount; i < end; i++) + { + tmp[i] = *triangles++; + } + mTriangleCount = Ps::to8(end); + } + else + { + PX_COMPILE_TIME_ASSERT(sMaxCachedTriangles < PX_MAX_U8); + //this result in marking the cache invalid + mTriangleCount = PX_MAX_U8; + } + } + + PX_FORCE_INLINE void write(PxU16& internalParticleFlags, const PxBounds3& bounds, + const QuantizationParams& quantizationParams, const Gu::GeometryUnion& mesh, + const bool isSmallMesh) + { + PxU32* triangles = const_cast<PxU32*>(reinterpret_cast<const PxU32*>(mGeom)); + if(isSmallMesh && mTriangleCount <= 1) + { + // Layout of mData: + // PxU8 pad + // PxU16 index + // PxBounds3 bounds + PxU8* ptr = mData + 1; + reinterpret_cast<PxU16&>(*ptr) = (mTriangleCount > 0) ? static_cast<PxU16>(triangles[0]) : PxU16(0); + ptr += sizeof(PxU16); + reinterpret_cast<PxBounds3&>(*ptr) = bounds; + } + else + { + // Layout of mData: + // PxU8 extentX, extentY, extentZ + // PxVec3 center + // PxU8[12] indexData + PxU8* ptr = mData; + PxU8& extentX = *ptr++; + PxU8& extentY = *ptr++; + PxU8& extentZ = *ptr++; + PxVec3& center = reinterpret_cast<PxVec3&>(*ptr); + ptr += sizeof(PxVec3); + quantizeBounds(center, extentX, extentY, extentZ, bounds, quantizationParams); + + if(isSmallMesh && mTriangleCount <= 6) + { + writeTriangles_6xU16(ptr, triangles, mTriangleCount); + } + else if(isSmallMesh && mTriangleCount <= 9) + { + bool success = writeTriangles_BaseU16_9xU10(ptr, triangles, mTriangleCount); + if(!success) + { + internalParticleFlags &= ~PxU16(InternalParticleFlag::eGEOM_CACHE_MASK); + return; + } + } + else if(!isSmallMesh && mTriangleCount <= 3) + { + writeTriangles_3xU32(ptr, triangles, mTriangleCount); + } + else + { + internalParticleFlags &= ~PxU16(InternalParticleFlag::eGEOM_CACHE_MASK); + return; + } + } + + // refresh the cache flags + internalParticleFlags |= (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1); + mGeom = &mesh; + } + + PX_FORCE_INLINE bool read(PxU16& internalParticleFlags, PxU32& numTriangles, PxU32* triangleBuffer, + const PxBounds3& bounds, const QuantizationParams& quantizationParams, + const Gu::GeometryUnion* mesh, const bool isSmallMesh) const + { + // cache bits: + // (00) -> no read (invalid) + // (01) -> read + // (11) -> no read (can't be the case with mGeom == mesh) + PX_ASSERT(mGeom != mesh || !((internalParticleFlags & InternalParticleFlag::eGEOM_CACHE_BIT_0) != 0 && + (internalParticleFlags & InternalParticleFlag::eGEOM_CACHE_BIT_1) != 0)); + + if((internalParticleFlags & InternalParticleFlag::eGEOM_CACHE_BIT_0) != 0 && mGeom == mesh) + { + numTriangles = mTriangleCount; + if(isSmallMesh && numTriangles <= 1) + { + // Layout of mData: + // PxU8 pad + // PxU16 index + // PxBounds3 bounds + const PxU8* ptr = mData + 1; + *triangleBuffer = reinterpret_cast<const PxU16&>(*ptr); + ptr += sizeof(PxU16); + const PxBounds3& cachedBounds = reinterpret_cast<const PxBounds3&>(*ptr); + + // if (bounds.isInside(cachedBounds)) //sschirm, we should implement the isInside to use fsels as well. + PxVec3 dMin = (bounds.minimum - cachedBounds.minimum).minimum(PxVec3(0)); + PxVec3 dMax = (cachedBounds.maximum - bounds.maximum).minimum(PxVec3(0)); + PxF32 sum = dMin.x + dMin.y + dMin.z + dMax.x + dMax.y + dMax.z; + if(sum == 0.0f) + { + // refresh the cache bits (11) + internalParticleFlags |= + (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1); + return true; + } + } + else + { + // Layout of mData: + // PxU8 extentX, extentY, extentZ + // PxVec3 center + // PxU8[12] indexData + const PxU8* ptr = mData; + const PxU8 extentX = *ptr++; + const PxU8 extentY = *ptr++; + const PxU8 extentZ = *ptr++; + const PxVec3& center = reinterpret_cast<const PxVec3&>(*ptr); + ptr += sizeof(PxVec3); + PX_ASSERT(!bounds.isEmpty()); + + // if (bounds.isInside(cachedBounds)) //sschirm, we should implement the isInside to use fsels as well. + PxVec3 diffMin = bounds.minimum - center; + PxVec3 diffMax = bounds.maximum - center; + PxF32 diffx = PxMax(PxAbs(diffMin.x), PxAbs(diffMax.x)); + PxF32 diffy = PxMax(PxAbs(diffMin.y), PxAbs(diffMax.y)); + PxF32 diffz = PxMax(PxAbs(diffMin.z), PxAbs(diffMax.z)); + PxU8 dX = PxU8(diffx * quantizationParams.quantizationMultiplier); + PxU8 dY = PxU8(diffy * quantizationParams.quantizationMultiplier); + PxU8 dZ = PxU8(diffz * quantizationParams.quantizationMultiplier); + if((dX < extentX) && (dY < extentY) && (dZ < extentZ)) + { + if(isSmallMesh && numTriangles <= 6) + { + readTriangles_6xU16(triangleBuffer, ptr, numTriangles); + // refresh the cache bits (11) + internalParticleFlags |= + (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1); + return true; + } + else if(isSmallMesh && numTriangles <= 9) + { + readTriangles_BaseU16_9xU10(triangleBuffer, ptr, numTriangles); + // refresh the cache bits (11) + internalParticleFlags |= + (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1); + return true; + } + else if(!isSmallMesh && numTriangles <= 3) + { + readTriangles_3xU32(triangleBuffer, ptr, numTriangles); + // refresh the cache bits (11) + internalParticleFlags |= + (InternalParticleFlag::eGEOM_CACHE_BIT_0 | InternalParticleFlag::eGEOM_CACHE_BIT_1); + return true; + } + } + } + } + + // cache invalid! + numTriangles = 0; + return false; + } + + private: + PxU8 mTriangleCount; + PxU8 mData[27]; + const Gu::GeometryUnion* mGeom; + + static PX_FORCE_INLINE void quantizeBounds(PxVec3& center, PxU8& extentX, PxU8& extentY, PxU8& extentZ, + const PxBounds3& bounds, const QuantizationParams& quantizationParams) + { + center = bounds.getCenter(); + if(!bounds.isEmpty()) + { + PxVec3 extents = bounds.getExtents(); + extentX = PxU8((extents.x * quantizationParams.quantizationMultiplier) + 1); + extentY = PxU8((extents.y * quantizationParams.quantizationMultiplier) + 1); + extentZ = PxU8((extents.z * quantizationParams.quantizationMultiplier) + 1); + PX_ASSERT(extentX != 0 && extentY != 0 && extentZ != 0); + } + else + { + extentX = 0; + extentY = 0; + extentZ = 0; + } + } + + static PX_FORCE_INLINE void dequantizeBounds(PxBounds3& bounds, const PxVec3& center, const PxU8 extentX, + const PxU8 extentY, const PxU8 extentZ, + const QuantizationParams& quantizationParams) + { + PxVec3 extents(extentX * quantizationParams.dequantizationMultiplier, + extentY * quantizationParams.dequantizationMultiplier, + extentZ * quantizationParams.dequantizationMultiplier); + bounds = PxBounds3::centerExtents(center, extents); + } + + static PX_FORCE_INLINE void writeTriangles_6xU16(PxU8* data, const PxU32* triangles, const PxU32 numTriangles) + { + PX_ASSERT(numTriangles <= 6); + PxU16* ptr = reinterpret_cast<PxU16*>(data); + for(PxU32 t = 0; t < numTriangles; ++t) + *ptr++ = Ps::to16(triangles[t]); + } + + static PX_FORCE_INLINE bool writeTriangles_BaseU16_9xU10(PxU8* data, const PxU32* triangles, const PxU32 numTriangles) + { + PX_ASSERT(numTriangles <= 9); + + // check index range + PxU32 min = 0xffffffff; + PxU32 max = 0; + PxU32 minIndex = 0xffffffff; + for(PxU32 i = 0; i < numTriangles; ++i) + { + if(triangles[i] < min) + { + min = triangles[i]; + minIndex = i; + } + + if(triangles[i] > max) + max = triangles[i]; + } + + PxU32 range = max - min; + if(range < (1 << 10)) + { + // copy triangles to subtract base and remove 0 element + PX_ASSERT(numTriangles > 6 && numTriangles <= 9); + PxU16 triCopy[12]; + { + for(PxU32 i = 0; i < numTriangles; ++i) + triCopy[i] = PxU16(triangles[i] - min); + + PX_ASSERT(triCopy[minIndex] == 0); + triCopy[minIndex] = triCopy[numTriangles - 1]; + } + + PxU16* buffer = reinterpret_cast<PxU16*>(data); + buffer[0] = Ps::to16(min); + buffer[1] = PxU16((triCopy[0] << 6) | (triCopy[1] >> 4)); + buffer[2] = PxU16((triCopy[1] << 12) | (triCopy[2] << 2) | (triCopy[3] >> 8)); + buffer[3] = PxU16((triCopy[3] << 8) | (triCopy[4] >> 2)); + buffer[4] = PxU16((triCopy[4] << 14) | (triCopy[5] << 4) | (triCopy[6] >> 6)); + buffer[5] = PxU16((triCopy[6] << 10)); + + // copy rubbish, doesn't hurt since we are reading from large enough buffer + buffer[5] |= triCopy[7]; + + return true; + } + return false; + } + + static PX_FORCE_INLINE void writeTriangles_3xU32(PxU8* data, const PxU32* triangles, const PxU32 numTriangles) + { + PX_ASSERT(numTriangles <= 3); + PxU32* ptr = reinterpret_cast<PxU32*>(data); + for(PxU32 t = 0; t < numTriangles; ++t) + *ptr++ = triangles[t]; + } + + static PX_FORCE_INLINE void readTriangles_6xU16(PxU32* triangleBuffer, const PxU8* data, const PxU32 numTriangles) + { + PX_ASSERT(numTriangles <= 6); + const PxU16* ptr = reinterpret_cast<const PxU16*>(data); + const PxU16* end = ptr + numTriangles; + PxU32 dstIndex = 0; + while(ptr != end) + triangleBuffer[dstIndex++] = *ptr++; + } + + static PX_FORCE_INLINE void readTriangles_BaseU16_9xU10(PxU32* triangleBuffer, const PxU8* data, + const PxU32 numTriangles) + { + PX_ASSERT(numTriangles > 6 && numTriangles <= 9); + PX_UNUSED(numTriangles); + + const PxU16* buffer = reinterpret_cast<const PxU16*>(data); + PxU32 offset = buffer[0]; + const PxU32 mask = 0xffffffff >> (6 + 16); + triangleBuffer[0] = offset; + triangleBuffer[1] = ((PxU32(buffer[1] >> 6)) & mask) + offset; + triangleBuffer[2] = ((PxU32(buffer[1] << 4) | PxU32(buffer[2] >> 12)) & mask) + offset; + triangleBuffer[3] = ((PxU32(buffer[2] >> 2)) & mask) + offset; + triangleBuffer[4] = ((PxU32(buffer[2] << 8) | PxU32(buffer[3] >> 8)) & mask) + offset; + triangleBuffer[5] = ((PxU32(buffer[3] << 2) | PxU32(buffer[4] >> 14)) & mask) + offset; + triangleBuffer[6] = ((PxU32(buffer[4] >> 4)) & mask) + offset; + + // we can write the last two, even if they are rubbish. + triangleBuffer[7] = ((PxU32(buffer[4] << 6) | PxU32(buffer[5] >> 10)) & mask) + offset; + triangleBuffer[8] = (PxU32(buffer[5]) & mask) + offset; + } + + static PX_FORCE_INLINE void readTriangles_3xU32(PxU32* triangleBuffer, const PxU8* data, const PxU32 numTriangles) + { + PX_ASSERT(numTriangles <= 3); + const PxU32* ptr = reinterpret_cast<const PxU32*>(data); + const PxU32* end = ptr + numTriangles; + PxU32 dstIndex = 0; + while(ptr != end) + triangleBuffer[dstIndex++] = *ptr++; + } +}; + +PX_COMPILE_TIME_ASSERT(sizeof(PxTriangleMeshGeometryLL*) > 4 || sizeof(ParticleOpcodeCache) == 32); + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_PARTICLE_OPCODE_CACHE_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.cpp new file mode 100644 index 00000000..44b82b3b --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.cpp @@ -0,0 +1,76 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtParticleShapeCpu.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtContext.h" +#include "PtParticleSystemSimCpu.h" +#include "PtSpatialHash.h" + +using namespace physx; +using namespace Pt; + +ParticleShapeCpu::ParticleShapeCpu(Context*, PxU32 index) +: mIndex(index), mParticleSystem(NULL), mPacket(NULL), mUserData(NULL) +{ +} + +ParticleShapeCpu::~ParticleShapeCpu() +{ +} + +void ParticleShapeCpu::init(ParticleSystemSimCpu* particleSystem, const ParticleCell* packet) +{ + PX_ASSERT(mParticleSystem == NULL); + PX_ASSERT(mPacket == NULL); + PX_ASSERT(mUserData == NULL); + + PX_ASSERT(particleSystem); + PX_ASSERT(packet); + + mParticleSystem = particleSystem; + mPacket = packet; + mPacketCoordinates = packet->coords; // this is needed for the remapping process. + + // Compute and store AABB of the assigned packet + mParticleSystem->getPacketBounds(mPacketCoordinates, mBounds); +} + +void ParticleShapeCpu::destroyV() +{ + PX_ASSERT(mParticleSystem); + mParticleSystem->getContext().releaseParticleShape(this); + + mParticleSystem = NULL; + mPacket = NULL; + mUserData = NULL; +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.h b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.h new file mode 100644 index 00000000..ccb525ea --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleShapeCpu.h @@ -0,0 +1,114 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_PARTICLE_SHAPE_CPU_H +#define PT_PARTICLE_SHAPE_CPU_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxTransform.h" +#include "foundation/PxBounds3.h" +#include "PtConfig.h" +#include "PtSpatialHash.h" +#include "PtParticleShape.h" + +namespace physx +{ + +namespace Pt +{ + +class Context; + +class ParticleShapeCpu : public ParticleShape +{ + public: + ParticleShapeCpu(Context* context, PxU32 index); + virtual ~ParticleShapeCpu(); + + void init(class ParticleSystemSimCpu* particleSystem, const ParticleCell* packet); + + // Implements ParticleShapeCpu + virtual PxBounds3 getBoundsV() const + { + return mBounds; + } + virtual void setUserDataV(void* data) + { + mUserData = data; + } + virtual void* getUserDataV() const + { + return mUserData; + } + virtual void destroyV(); + //~Implements ParticleShapeCpu + + PX_FORCE_INLINE void setFluidPacket(const ParticleCell* packet) + { + PX_ASSERT(packet); + mPacket = packet; + } + PX_FORCE_INLINE const ParticleCell* getFluidPacket() const + { + return mPacket; + } + + PX_FORCE_INLINE PxU32 getIndex() const + { + return mIndex; + } + PX_FORCE_INLINE class ParticleSystemSimCpu* getParticleSystem() + { + return mParticleSystem; + } + PX_FORCE_INLINE const class ParticleSystemSimCpu* getParticleSystem() const + { + return mParticleSystem; + } + PX_FORCE_INLINE GridCellVector getPacketCoordinates() const + { + return mPacketCoordinates; + } + + private: + PxU32 mIndex; + class ParticleSystemSimCpu* mParticleSystem; + PxBounds3 mBounds; + GridCellVector mPacketCoordinates; // This is needed for the remapping process. + const ParticleCell* mPacket; + void* mUserData; +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_PARTICLE_SHAPE_CPU_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.cpp new file mode 100644 index 00000000..5b9326bc --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.cpp @@ -0,0 +1,858 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtParticleSystemSimCpu.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxProfiler.h" +#include "PxvGeometry.h" +#include "PtContext.h" +#include "PtParticleShapeCpu.h" + +//----------------------------------------------------------------------------// + +// Standard value for particle resolution +#define PXN_FLUID_REST_PARTICLE_PER_UNIT_STD 10.0f + +// Macros to clamp restitution and adhesion (particle collision) to values that give stable results. +#define DYNAMIC_FRICTION_CLAMP 0.001f +#define RESTITUTION_CLAMP 0.05f + +#define CLAMP_DYNAMIC_FRICTION(t) PxClamp(t, DYNAMIC_FRICTION_CLAMP, 1.0f) +#define CLAMP_RESTITUTION(t) PxClamp(t, 0.0f, 1.0f - RESTITUTION_CLAMP) + +using namespace physx; +using namespace Pt; + +//----------------------------------------------------------------------------// + +ParticleSystemState& ParticleSystemSimCpu::getParticleStateV() +{ + PX_ASSERT(mParticleState); + return *mParticleState; +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::getSimParticleDataV(ParticleSystemSimDataDesc& simParticleData, bool) const +{ + simParticleData.densities = PxStrideIterator<const PxF32>(); + simParticleData.collisionNormals = PxStrideIterator<const PxVec3>(); + simParticleData.collisionVelocities = PxStrideIterator<const PxVec3>(); + simParticleData.twoWayImpluses = PxStrideIterator<const PxVec3>(); + simParticleData.twoWayBodies = PxStrideIterator<BodyHandle>(); + + if(mParticleState->getParticleCount() > 0 && mSimulated) + { + if(mParameter->particleReadDataFlags & PxParticleReadDataFlag::eDENSITY_BUFFER) + simParticleData.densities = + PxStrideIterator<const PxF32>(&mParticleState->getParticleBuffer()->density, sizeof(Particle)); + + if(mParameter->particleReadDataFlags & PxParticleReadDataFlag::eCOLLISION_NORMAL_BUFFER) + simParticleData.collisionNormals = PxStrideIterator<const PxVec3>(mTransientBuffer, sizeof(PxVec3)); + + if(mParameter->particleReadDataFlags & PxParticleReadDataFlag::eCOLLISION_VELOCITY_BUFFER) + simParticleData.collisionVelocities = PxStrideIterator<const PxVec3>(mCollisionVelocities); + + if(mFluidTwoWayData) + { + simParticleData.twoWayImpluses = + PxStrideIterator<const PxVec3>(&mFluidTwoWayData->impulse, sizeof(TwoWayData)); + simParticleData.twoWayBodies = + PxStrideIterator<BodyHandle>(reinterpret_cast<BodyHandle*>(&mFluidTwoWayData->body), sizeof(TwoWayData)); + } + } +} + +//----------------------------------------------------------------------------// + +/** +Will be called from HL twice per step. Once after the shape update (at the start of the frame) has been executed, +and once after the particle pipeline has finished. +*/ +void ParticleSystemSimCpu::getShapesUpdateV(ParticleShapeUpdateResults& updateResults) const +{ + PX_ASSERT(mIsSimulated); + + updateResults.destroyedShapeCount = mNumDeletedParticleShapes; + updateResults.destroyedShapes = mCreatedDeletedParticleShapes; + + updateResults.createdShapeCount = mNumCreatedParticleShapes; + updateResults.createdShapes = mCreatedDeletedParticleShapes + mNumDeletedParticleShapes; +} + +//----------------------------------------------------------------------------// + +physx::PxBaseTask& ParticleSystemSimCpu::schedulePacketShapesUpdate(const ParticleShapesUpdateInput& input, + physx::PxBaseTask& continuation) +{ + mPacketShapesFinalizationTask.setContinuation(&continuation); + mPacketShapesUpdateTask.setContinuation(&mPacketShapesFinalizationTask); + mPacketShapesFinalizationTask.removeReference(); + mPacketShapesUpdateTaskInput = input; + return mPacketShapesUpdateTask; +} + +//----------------------------------------------------------------------------// + +physx::PxBaseTask& ParticleSystemSimCpu::scheduleDynamicsUpdate(physx::PxBaseTask& continuation) +{ + if(mParameter->flags & InternalParticleSystemFlag::eSPH) + { + mDynamicsUpdateTask.setContinuation(&continuation); + return mDynamicsUpdateTask; + } + else + { + continuation.addReference(); + return continuation; + } +} + +//----------------------------------------------------------------------------// + +physx::PxBaseTask& ParticleSystemSimCpu::scheduleCollisionUpdate(physx::PxBaseTask& continuation) +{ + mCollisionFinalizationTask.setContinuation(&continuation); + mCollisionUpdateTask.setContinuation(&mCollisionFinalizationTask); + mCollisionFinalizationTask.removeReference(); + return mCollisionUpdateTask; +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::spatialHashUpdateSections(physx::PxBaseTask* continuation) +{ + PX_ASSERT(mParameter->flags & InternalParticleSystemFlag::eSPH); + + // Split each packet into sections and reorder particles of a packet according to these sections + + mSpatialHash->updatePacketSections(mPacketParticlesIndices, mParticleState->getParticleBuffer(), continuation); +} + +void ParticleSystemSimCpu::packetShapesUpdate(physx::PxBaseTask*) +{ + PX_ASSERT(mIsSimulated); + PX_ASSERT(mSpatialHash); + + // Init parameters for tracking of new/deleted fluid shapes + mNumCreatedParticleShapes = 0; + mNumDeletedParticleShapes = 0; + + if(mParticleState->getValidParticleRange() > 0) + { + if(!mPacketParticlesIndices) + mPacketParticlesIndices = reinterpret_cast<PxU32*>( + mAlign16.allocate(mParticleState->getMaxParticles() * sizeof(PxU32), __FILE__, __LINE__)); + + physx::PxBaseTask* cont; + if(mParameter->flags & InternalParticleSystemFlag::eSPH) + { + cont = &mSpatialHashUpdateSectionsTask; + mSpatialHashUpdateSectionsTask.setContinuation(&mPacketShapesFinalizationTask); + } + else + { + cont = &mPacketShapesFinalizationTask; + cont->addReference(); + } + + // Hash particles to packets and reorder particle indices + + mSpatialHash->updatePacketHash(mNumPacketParticlesIndices, mPacketParticlesIndices, + mParticleState->getParticleBuffer(), mParticleState->getParticleMap(), + mParticleState->getValidParticleRange(), cont); + } +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::packetShapesFinalization(physx::PxBaseTask*) +{ + // - Find for each packet shape the related packet and adjust the mapping. + // - Track created / deleted packets. + remapShapesToPackets(mPacketShapesUpdateTaskInput.shapes, mPacketShapesUpdateTaskInput.shapeCount); + + // release the shapes, since their ownership was tranferred to us. + if(mPacketShapesUpdateTaskInput.shapes) + PX_FREE(mPacketShapesUpdateTaskInput.shapes); +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::dynamicsUpdate(physx::PxBaseTask* continuation) +{ + PX_ASSERT(mParameter->flags & InternalParticleSystemFlag::eSPH); + PX_ASSERT(mIsSimulated); + PX_ASSERT(mSpatialHash); + PX_ASSERT(continuation); + + if(mNumPacketParticlesIndices > 0) + { + updateDynamicsParameter(); + + if(mParameter->flags & InternalParticleSystemFlag::eSPH) + { + mDynamics.updateSph(*continuation); + } + } +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::collisionUpdate(physx::PxBaseTask* continuation) +{ + PX_ASSERT(mIsSimulated); + PX_ASSERT(mSpatialHash); + PX_ASSERT(mCollisionUpdateTaskInput.contactManagerStream); + PX_ASSERT(continuation); + + updateCollisionParameter(); + + mParticleState->getWorldBounds().setEmpty(); + + mCollision.updateCollision(mCollisionUpdateTaskInput.contactManagerStream, *continuation); + mCollision.updateOverflowParticles(); +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::collisionFinalization(physx::PxBaseTask*) +{ + PX_FREE(mCollisionUpdateTaskInput.contactManagerStream); + mCollisionUpdateTaskInput.contactManagerStream = NULL; + + mSimulated = true; + + // clear shape update + mNumDeletedParticleShapes = 0; + mNumCreatedParticleShapes = 0; +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::setExternalAccelerationV(const PxVec3& v) +{ + mExternalAcceleration = v; +} + +//----------------------------------------------------------------------------// + +const PxVec3& ParticleSystemSimCpu::getExternalAccelerationV() const +{ + return mExternalAcceleration; +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::setSimulationTimeStepV(PxReal value) +{ + PX_ASSERT(value >= 0.0f); + + mSimulationTimeStep = value; +} + +//----------------------------------------------------------------------------// + +PxReal ParticleSystemSimCpu::getSimulationTimeStepV() const +{ + return mSimulationTimeStep; +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::setSimulatedV(bool isSimulated) +{ + mIsSimulated = isSimulated; + if(!isSimulated) + clearParticleConstraints(); +} + +//----------------------------------------------------------------------------// + +Ps::IntBool ParticleSystemSimCpu::isSimulatedV() const +{ + return mIsSimulated; +} + +//----------------------------------------------------------------------------// + +ParticleSystemSimCpu::ParticleSystemSimCpu(ContextCpu* context, PxU32 index) +: mContext(*context) +, mParticleState(NULL) +, mSimulated(false) +, mFluidTwoWayData(NULL) +, mCreatedDeletedParticleShapes(NULL) +, mPacketParticlesIndices(NULL) +, mNumPacketParticlesIndices(0) +, mOpcodeCacheBuffer(NULL) +, mTransientBuffer(NULL) +, mCollisionVelocities(NULL) +, mDynamics(*this) +, mCollision(*this) +, mIndex(index) +, mPacketShapesUpdateTask(this, "Pt::ParticleSystemSimCpu.packetShapesUpdate") +, mPacketShapesFinalizationTask(this, "Pt::ParticleSystemSimCpu.packetShapesFinalization") +, mDynamicsUpdateTask(this, "Pt::ParticleSystemSimCpu.dynamicsUpdate") +, mCollisionUpdateTask(this, "Pt::ParticleSystemSimCpu.collisionUpdate") +, mCollisionFinalizationTask(this, "Pt::ParticleSystemSimCpu.collisionFinalization") +, mSpatialHashUpdateSectionsTask(this, "Pt::ParticleSystemSimCpu.spatialHashUpdateSections") +{ +} + +//----------------------------------------------------------------------------// + +ParticleSystemSimCpu::~ParticleSystemSimCpu() +{ +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::init(ParticleData& particleData, const ParticleSystemParameter& parameter) +{ + mParticleState = &particleData; + mParticleState->clearSimState(); + mParameter = ¶meter; + mSimulationTimeStep = 0.0f; + mExternalAcceleration = PxVec3(0); + mPacketParticlesIndices = NULL; + + initializeParameter(); + + PxU32 maxParticles = mParticleState->getMaxParticles(); + + // Initialize buffers + mConstraintBuffers.constraint0Buf = + reinterpret_cast<Constraint*>(mAlign16.allocate(maxParticles * sizeof(Constraint), __FILE__, __LINE__)); + mConstraintBuffers.constraint1Buf = + reinterpret_cast<Constraint*>(mAlign16.allocate(maxParticles * sizeof(Constraint), __FILE__, __LINE__)); + if(mParameter->flags & PxParticleBaseFlag::eCOLLISION_WITH_DYNAMIC_ACTORS) + { + mConstraintBuffers.constraint0DynamicBuf = reinterpret_cast<ConstraintDynamic*>( + mAlign16.allocate(maxParticles * sizeof(ConstraintDynamic), __FILE__, __LINE__)); + mConstraintBuffers.constraint1DynamicBuf = reinterpret_cast<ConstraintDynamic*>( + mAlign16.allocate(maxParticles * sizeof(ConstraintDynamic), __FILE__, __LINE__)); + } + else + { + mConstraintBuffers.constraint0DynamicBuf = NULL; + mConstraintBuffers.constraint1DynamicBuf = NULL; + } + + if((mParameter->flags & PxParticleBaseFlag::eCOLLISION_TWOWAY) && + (mParameter->flags & PxParticleBaseFlag::eCOLLISION_WITH_DYNAMIC_ACTORS)) + mFluidTwoWayData = + reinterpret_cast<TwoWayData*>(mAlign16.allocate(maxParticles * sizeof(TwoWayData), __FILE__, __LINE__)); + +#if PX_CHECKED + { + PxU32 numWords = maxParticles * sizeof(Constraint) >> 2; + for(PxU32 i = 0; i < numWords; ++i) + { + reinterpret_cast<PxU32*>(mConstraintBuffers.constraint0Buf)[i] = 0xDEADBEEF; + reinterpret_cast<PxU32*>(mConstraintBuffers.constraint1Buf)[i] = 0xDEADBEEF; + } + } +#endif + + if(mParameter->flags & PxParticleBaseFlag::ePER_PARTICLE_COLLISION_CACHE_HINT) + { + mOpcodeCacheBuffer = reinterpret_cast<ParticleOpcodeCache*>( + mAlign16.allocate(maxParticles * sizeof(ParticleOpcodeCache), __FILE__, __LINE__)); +#if PX_CHECKED + // sschirm: avoid reading uninitialized mGeom in ParticleOpcodeCache::read in assert statement + PxMemZero(mOpcodeCacheBuffer, maxParticles * sizeof(ParticleOpcodeCache)); +#endif + } + + if((mParameter->flags & InternalParticleSystemFlag::eSPH) || + (mParameter->particleReadDataFlags & PxParticleReadDataFlag::eCOLLISION_NORMAL_BUFFER)) + mTransientBuffer = + reinterpret_cast<PxVec3*>(mAlign16.allocate(maxParticles * sizeof(PxVec3), __FILE__, __LINE__)); + + if(mParameter->particleReadDataFlags & PxParticleReadDataFlag::eCOLLISION_VELOCITY_BUFFER) + mCollisionVelocities = + reinterpret_cast<PxVec3*>(mAlign16.allocate(maxParticles * sizeof(PxVec3), __FILE__, __LINE__)); + + mCreatedDeletedParticleShapes = reinterpret_cast<ParticleShape**>( + PX_ALLOC(2 * PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE * sizeof(ParticleShape*), "ParticleShape*")); + mNumCreatedParticleShapes = 0; + mNumDeletedParticleShapes = 0; + + // Create object for spatial hashing. + mSpatialHash = reinterpret_cast<SpatialHash*>(PX_ALLOC(sizeof(SpatialHash), "SpatialHash")); + if(mSpatialHash) + { + new (mSpatialHash) SpatialHash(PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE, mDynamics.getParameter().cellSizeInv, + mParameter->packetSizeMultiplierLog2, + (mParameter->flags & InternalParticleSystemFlag::eSPH) != 0); + } + + mCollisionUpdateTaskInput.contactManagerStream = NULL; + + // Make sure we start deactivated. + mSimulated = false; +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::clear() +{ + mDynamics.clear(); + + if(mSpatialHash) + { + mSpatialHash->~SpatialHash(); + PX_FREE(mSpatialHash); + mSpatialHash = NULL; + } + + // Free particle buffers + mAlign16.deallocate(mConstraintBuffers.constraint0Buf); + mConstraintBuffers.constraint0Buf = NULL; + + mAlign16.deallocate(mConstraintBuffers.constraint1Buf); + mConstraintBuffers.constraint1Buf = NULL; + + if(mConstraintBuffers.constraint0DynamicBuf) + { + mAlign16.deallocate(mConstraintBuffers.constraint0DynamicBuf); + mConstraintBuffers.constraint0DynamicBuf = NULL; + } + + if(mConstraintBuffers.constraint1DynamicBuf) + { + mAlign16.deallocate(mConstraintBuffers.constraint1DynamicBuf); + mConstraintBuffers.constraint1DynamicBuf = NULL; + } + + if(mOpcodeCacheBuffer) + { + mAlign16.deallocate(mOpcodeCacheBuffer); + mOpcodeCacheBuffer = NULL; + } + + if(mTransientBuffer) + { + mAlign16.deallocate(mTransientBuffer); + mTransientBuffer = NULL; + } + + if(mCollisionVelocities) + { + mAlign16.deallocate(mCollisionVelocities); + mCollisionVelocities = NULL; + } + + if(mCreatedDeletedParticleShapes) + { + PX_FREE(mCreatedDeletedParticleShapes); + mCreatedDeletedParticleShapes = NULL; + } + + if(mPacketParticlesIndices) + { + mAlign16.deallocate(mPacketParticlesIndices); + mPacketParticlesIndices = NULL; + } + mNumPacketParticlesIndices = 0; + + if(mFluidTwoWayData) + { + mAlign16.deallocate(mFluidTwoWayData); + mFluidTwoWayData = NULL; + } + + mSimulated = false; + + if(mParticleState) + { + mParticleState->release(); + mParticleState = NULL; + } +} + +//----------------------------------------------------------------------------// + +ParticleData* ParticleSystemSimCpu::obtainParticleState() +{ + PX_ASSERT(mParticleState); + ParticleData* tmp = mParticleState; + mParticleState = NULL; + return tmp; +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::remapShapesToPackets(ParticleShape* const* shapes, PxU32 numShapes) +{ + PX_ASSERT(mNumCreatedParticleShapes == 0); + PX_ASSERT(mNumDeletedParticleShapes == 0); + + if(mParticleState->getValidParticleRange() > 0) + { + PX_ASSERT(mSpatialHash); + + Cm::BitMap mappedFluidPackets; // Marks the fluid packets that are mapped to a fluid shape. + mappedFluidPackets.resizeAndClear(PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE); + + // Find for each shape the corresponding packet. If it does not exist the shape has to be deleted. + for(PxU32 i = 0; i < numShapes; i++) + { + ParticleShapeCpu* shape = static_cast<ParticleShapeCpu*>(shapes[i]); + + PxU32 hashIndex; + const ParticleCell* particlePacket = mSpatialHash->findCell(hashIndex, shape->getPacketCoordinates()); + if(particlePacket) + { + shape->setFluidPacket(particlePacket); + + // Mark packet as mapped. + mappedFluidPackets.set(hashIndex); + } + else + { + mCreatedDeletedParticleShapes[mNumDeletedParticleShapes++] = shape; + } + } + + // Check for each packet whether it is mapped to a fluid shape. If not, a new shape must be created. + const ParticleCell* fluidPackets = mSpatialHash->getPackets(); + PX_ASSERT((mappedFluidPackets.getWordCount() << 5) >= PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE); + for(PxU32 p = 0; p < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE; p++) + { + if((!mappedFluidPackets.test(p)) && (fluidPackets[p].numParticles != PX_INVALID_U32)) + { + ParticleShapeCpu* shape = mContext.createParticleShape(this, &fluidPackets[p]); + if(shape) + { + mCreatedDeletedParticleShapes[mNumDeletedParticleShapes + mNumCreatedParticleShapes++] = shape; + } + } + } + } + else + { + // Release all shapes. + for(PxU32 i = 0; i < numShapes; i++) + { + ParticleShapeCpu* shape = static_cast<ParticleShapeCpu*>(shapes[i]); + mCreatedDeletedParticleShapes[mNumDeletedParticleShapes++] = shape; + } + } +} + +//----------------------------------------------------------------------------// +// Body Shape Reference Invalidation +//----------------------------------------------------------------------------// + +/** +Removes all BodyShape references. +Only the info in the Particle (constraint0Info, constraint1Info) need +to be cleared, since they are checked before copying references from the constraints +to the TwoWayData, where it is finally used for dereferencing. +*/ +void ParticleSystemSimCpu::clearParticleConstraints() +{ + Particle* particleBuffer = mParticleState->getParticleBuffer(); + Cm::BitMap::Iterator it(mParticleState->getParticleMap()); + for(PxU32 particleIndex = it.getNext(); particleIndex != Cm::BitMap::Iterator::DONE; particleIndex = it.getNext()) + { + Particle& particle = particleBuffer[particleIndex]; + particle.flags.low &= PxU16(~InternalParticleFlag::eANY_CONSTRAINT_VALID); + } +} + +//----------------------------------------------------------------------------// + +/** +Updates shape transform hash from context and removes references to a rigid body that was deleted. +*/ +void ParticleSystemSimCpu::removeInteractionV(const ParticleShape& particleShape, ShapeHandle shape, BodyHandle body, + bool isDynamic, bool isDyingRb, bool) +{ + const PxsShapeCore* pxsShape = reinterpret_cast<const PxsShapeCore*>(shape); + const ParticleShapeCpu& pxsParticleShape = static_cast<const ParticleShapeCpu&>(particleShape); + + if(isDyingRb) + { + if(isDynamic) + { + if(mFluidTwoWayData) + { + // just call when packets cover the same particles when constraints where + // generated (which is the case with isDyingRb). + removeTwoWayRbReferences(pxsParticleShape, reinterpret_cast<const PxsBodyCore*>(body)); + } + } + else if(mOpcodeCacheBuffer && pxsShape->geometry.getType() == PxGeometryType::eTRIANGLEMESH) + { + // just call when packets cover the same particles when cache was used last (must be the last simulation + // step, + // since the cache gets invalidated after one step not being used). + setCollisionCacheInvalid(pxsParticleShape, pxsShape->geometry); + } + } +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::onRbShapeChangeV(const ParticleShape& particleShape, ShapeHandle shape) +{ + const PxsShapeCore* pxsShape = reinterpret_cast<const PxsShapeCore*>(shape); + const ParticleShapeCpu& pxsParticleShape = static_cast<const ParticleShapeCpu&>(particleShape); + + if(mOpcodeCacheBuffer && pxsShape->geometry.getType() == PxGeometryType::eTRIANGLEMESH) + { + // just call when packets cover the same particles when cache was used last (must be the last simulation step, + // since the cache gets invalidated after one step not being used). + setCollisionCacheInvalid(pxsParticleShape, pxsShape->geometry); + } +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::passCollisionInputV(ParticleCollisionUpdateInput input) +{ + PX_ASSERT(mCollisionUpdateTaskInput.contactManagerStream == NULL); + mCollisionUpdateTaskInput = input; +} + +//----------------------------------------------------------------------------// + +/** +Removes specific PxsShapeCore references from particles belonging to a certain shape. +The constraint data itself needs to be accessed, because it's assumed that if there +is only one constraint, it's in the slot 1 of the constraint pair. + +Should only be called when packets cover the same particles when constraints where generated! +*/ +void ParticleSystemSimCpu::removeTwoWayRbReferences(const ParticleShapeCpu& particleShape, const PxsBodyCore* rigidBody) +{ + PX_ASSERT(mFluidTwoWayData); + PX_ASSERT(mConstraintBuffers.constraint0DynamicBuf); + PX_ASSERT(mConstraintBuffers.constraint1DynamicBuf); + PX_ASSERT(rigidBody); + PX_ASSERT(particleShape.getFluidPacket()); + const ParticleCell* packet = particleShape.getFluidPacket(); + Particle* particleBuffer = mParticleState->getParticleBuffer(); + + PxU32 endIndex = packet->firstParticle + packet->numParticles; + for(PxU32 i = packet->firstParticle; i < endIndex; ++i) + { + // update particles for shapes that have been deleted! + PxU32 particleIndex = mPacketParticlesIndices[i]; + Particle& particle = particleBuffer[particleIndex]; + + // we need to skip invalid particles + // it may be that a particle has been deleted prior to the deletion of the RB + // it may also be that a particle has been re-added to the same index, in which case + // the particle.flags.low will have been overwritten + if(!(particle.flags.api & PxParticleFlag::eVALID)) + continue; + + if(!(particle.flags.low & InternalParticleFlag::eANY_CONSTRAINT_VALID)) + continue; + + Constraint& c0 = mConstraintBuffers.constraint0Buf[particleIndex]; + Constraint& c1 = mConstraintBuffers.constraint1Buf[particleIndex]; + ConstraintDynamic& cd0 = mConstraintBuffers.constraint0DynamicBuf[particleIndex]; + ConstraintDynamic& cd1 = mConstraintBuffers.constraint1DynamicBuf[particleIndex]; + + if(reinterpret_cast<const PxsBodyCore*>(rigidBody) == cd1.twoWayBody) + { + particle.flags.low &= + PxU16(~(InternalParticleFlag::eCONSTRAINT_1_VALID | InternalParticleFlag::eCONSTRAINT_1_DYNAMIC)); + } + + if(reinterpret_cast<const PxsBodyCore*>(rigidBody) == cd0.twoWayBody) + { + if(!(particle.flags.low & InternalParticleFlag::eCONSTRAINT_1_VALID)) + { + particle.flags.low &= + PxU16(~(InternalParticleFlag::eCONSTRAINT_0_VALID | InternalParticleFlag::eCONSTRAINT_0_DYNAMIC)); + } + else + { + c0 = c1; + cd0 = cd1; + particle.flags.low &= + PxU16(~(InternalParticleFlag::eCONSTRAINT_1_VALID | InternalParticleFlag::eCONSTRAINT_1_DYNAMIC)); + } + } + } +} + +//----------------------------------------------------------------------------// + +/** +Should only be called when packets cover the same particles when cache was used last. +I.e. after the last collision update and before the next shape update. +It's ok if particles where replaced or removed from the corresponding packet intervalls, +since the cache updates will not do any harm for those. +*/ +void ParticleSystemSimCpu::setCollisionCacheInvalid(const ParticleShapeCpu& particleShape, + const Gu::GeometryUnion& geometry) +{ + PX_ASSERT(mOpcodeCacheBuffer); + PX_ASSERT(particleShape.getFluidPacket()); + const ParticleCell* packet = particleShape.getFluidPacket(); + Particle* particleBuffer = mParticleState->getParticleBuffer(); + + PxU32 endIndex = packet->firstParticle + packet->numParticles; + for(PxU32 i = packet->firstParticle; i < endIndex; ++i) + { + // update particles for shapes that have been deleted! + PxU32 particleIndex = mPacketParticlesIndices[i]; + Particle& particle = particleBuffer[particleIndex]; + + if((particle.flags.low & InternalParticleFlag::eGEOM_CACHE_MASK) != 0) + { + ParticleOpcodeCache& cache = mOpcodeCacheBuffer[particleIndex]; + if(cache.getGeometry() == &geometry) + particle.flags.low &= ~PxU16(InternalParticleFlag::eGEOM_CACHE_MASK); + } + } +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::initializeParameter() +{ + const ParticleSystemParameter& parameter = *mParameter; + + DynamicsParameters& dynamicsParams = mDynamics.getParameter(); + + // initialize dynamics parameter + { + PxReal restParticlesDistance = parameter.restParticleDistance; + PxReal restParticlesDistanceStd = 1.0f / PXN_FLUID_REST_PARTICLE_PER_UNIT_STD; + PxReal restParticlesDistance3 = restParticlesDistance * restParticlesDistance * restParticlesDistance; + PxReal restParticlesDistanceStd3 = restParticlesDistanceStd * restParticlesDistanceStd * restParticlesDistanceStd; + PX_UNUSED(restParticlesDistance3); + + dynamicsParams.initialDensity = parameter.restDensity; + dynamicsParams.particleMassStd = dynamicsParams.initialDensity * restParticlesDistanceStd3; + dynamicsParams.cellSize = parameter.kernelRadiusMultiplier * restParticlesDistance; + dynamicsParams.cellSizeInv = 1.0f / dynamicsParams.cellSize; + dynamicsParams.cellSizeSq = dynamicsParams.cellSize * dynamicsParams.cellSize; + dynamicsParams.packetSize = dynamicsParams.cellSize * (1 << parameter.packetSizeMultiplierLog2); + PxReal radiusStd = parameter.kernelRadiusMultiplier * restParticlesDistanceStd; + PxReal radius2Std = radiusStd * radiusStd; + PxReal radius6Std = radius2Std * radius2Std * radius2Std; + PxReal radius9Std = radius6Std * radius2Std * radiusStd; + PxReal wPoly6ScalarStd = 315.0f / (64.0f * PxPi * radius9Std); + PxReal wSpikyGradientScalarStd = 1.5f * 15.0f / (PxPi * radius6Std); + + dynamicsParams.radiusStd = radiusStd; + dynamicsParams.radiusSqStd = radius2Std; + dynamicsParams.densityMultiplierStd = wPoly6ScalarStd * dynamicsParams.particleMassStd; + dynamicsParams.stiffMulPressureMultiplierStd = + wSpikyGradientScalarStd * dynamicsParams.particleMassStd * parameter.stiffness; + dynamicsParams.selfDensity = dynamicsParams.densityMultiplierStd * radius2Std * radius2Std * radius2Std; + dynamicsParams.scaleToStd = restParticlesDistanceStd / restParticlesDistance; + dynamicsParams.scaleSqToStd = dynamicsParams.scaleToStd * dynamicsParams.scaleToStd; + dynamicsParams.scaleToWorld = 1.0f / dynamicsParams.scaleToStd; + dynamicsParams.packetMultLog = parameter.packetSizeMultiplierLog2; + + PxReal densityRestOffset = (dynamicsParams.initialDensity - dynamicsParams.selfDensity); + dynamicsParams.densityNormalizationFactor = (densityRestOffset > 0.0f) ? (1.0f / densityRestOffset) : 0.0f; + + updateDynamicsParameter(); + } + + CollisionParameters& collisionParams = mCollision.getParameter(); + + // initialize collision parameter: these partially depend on dynamics parameters! + { + collisionParams.cellSize = dynamicsParams.cellSize; + collisionParams.cellSizeInv = dynamicsParams.cellSizeInv; + collisionParams.packetMultLog = parameter.packetSizeMultiplierLog2; + collisionParams.packetMult = PxU32(1 << parameter.packetSizeMultiplierLog2); + collisionParams.packetSize = dynamicsParams.packetSize; + collisionParams.restOffset = parameter.restOffset; + collisionParams.contactOffset = parameter.contactOffset; + PX_ASSERT(collisionParams.contactOffset >= collisionParams.restOffset); + collisionParams.maxMotionDistance = parameter.maxMotionDistance; + collisionParams.collisionRange = + collisionParams.maxMotionDistance + collisionParams.contactOffset + PT_PARTICLE_SYSTEM_COLLISION_SLACK; + updateCollisionParameter(); + } +} + +//----------------------------------------------------------------------------// + +PX_FORCE_INLINE PxF32 computeDampingFactor(PxF32 damping, PxF32 timeStep) +{ + PxF32 dampingDt = damping * timeStep; + if(dampingDt < 1.0f) + return 1.0f - dampingDt; + else + return 0.0f; +} + +void ParticleSystemSimCpu::updateDynamicsParameter() +{ + const ParticleSystemParameter& parameter = *mParameter; + DynamicsParameters& dynamicsParams = mDynamics.getParameter(); + + PxReal restParticlesDistanceStd = 1.0f / PXN_FLUID_REST_PARTICLE_PER_UNIT_STD; + PxReal radiusStd = parameter.kernelRadiusMultiplier * restParticlesDistanceStd; + PxReal radius2Std = radiusStd * radiusStd; + PxReal radius6Std = radius2Std * radius2Std * radius2Std; + + dynamicsParams.viscosityMultiplierStd = + computeViscosityMultiplier(parameter.viscosity, dynamicsParams.particleMassStd, radius6Std); +} + +//----------------------------------------------------------------------------// + +void ParticleSystemSimCpu::updateCollisionParameter() +{ + const ParticleSystemParameter& parameter = *mParameter; + CollisionParameters& collisionParams = mCollision.getParameter(); + + collisionParams.dampingDtComp = computeDampingFactor(parameter.damping, mSimulationTimeStep); + collisionParams.externalAcceleration = mExternalAcceleration; + + collisionParams.projectionPlane.n = parameter.projectionPlane.n; + collisionParams.projectionPlane.d = parameter.projectionPlane.d; + collisionParams.timeStep = mSimulationTimeStep; + collisionParams.invTimeStep = (mSimulationTimeStep > 0.0f) ? 1.0f / mSimulationTimeStep : 0.0f; + + collisionParams.restitution = CLAMP_RESTITUTION(parameter.restitution); + collisionParams.dynamicFriction = CLAMP_DYNAMIC_FRICTION(parameter.dynamicFriction); + collisionParams.staticFrictionSqr = parameter.staticFriction * parameter.staticFriction; + collisionParams.temporalNoise = (parameter.noiseCounter * parameter.noiseCounter * 4999879) & 0xffff; + collisionParams.flags = parameter.flags; +} + +//----------------------------------------------------------------------------// + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.h b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.h new file mode 100644 index 00000000..381b80c4 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtParticleSystemSimCpu.h @@ -0,0 +1,239 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_PARTICLE_SYSTEM_SIM_CPU_H +#define PT_PARTICLE_SYSTEM_SIM_CPU_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtParticleSystemSim.h" +#include "PtDynamics.h" +#include "PtCollision.h" +#include "PtGridCellVector.h" +#include "PsAllocator.h" +#include "PtParticleData.h" +#include "CmTask.h" +#include "PtContextCpu.h" + +namespace physx +{ + +class PxParticleDeviceExclusiveAccess; +class PxBaseTask; + +namespace Pt +{ +class Context; +struct ConstraintPair; +class SpatialHash; +class ParticleShapeCpu; + +class ParticleSystemSimCpu : public ParticleSystemSim +{ + PX_NOCOPY(ParticleSystemSimCpu) + public: + //--------------------------- + // Implements ParticleSystemSim + virtual ParticleSystemState& getParticleStateV(); + virtual void getSimParticleDataV(ParticleSystemSimDataDesc& simParticleData, bool devicePtr) const; + + virtual void getShapesUpdateV(ParticleShapeUpdateResults& updateResults) const; + + virtual void setExternalAccelerationV(const PxVec3& v); + virtual const PxVec3& getExternalAccelerationV() const; + + virtual void setSimulationTimeStepV(PxReal value); + virtual PxReal getSimulationTimeStepV() const; + + virtual void setSimulatedV(bool); + virtual Ps::IntBool isSimulatedV() const; + + virtual void addInteractionV(const ParticleShape&, ShapeHandle, BodyHandle, bool, bool) + { + } + virtual void removeInteractionV(const ParticleShape& particleShape, ShapeHandle shape, BodyHandle body, + bool isDynamic, bool isDyingRb, bool ccdBroadphase); + virtual void onRbShapeChangeV(const ParticleShape& particleShape, ShapeHandle shape); + + virtual void flushBufferedInteractionUpdatesV() + { + } + + virtual void passCollisionInputV(ParticleCollisionUpdateInput input); +#if PX_SUPPORT_GPU_PHYSX + virtual Ps::IntBool isGpuV() const + { + return false; + } + virtual void enableDeviceExclusiveModeGpuV() + { + PX_ASSERT(0); + } + virtual PxParticleDeviceExclusiveAccess* getDeviceExclusiveAccessGpuV() const + { + PX_ASSERT(0); + return NULL; + } +#endif + + //~Implements ParticleSystemSim + //--------------------------- + + ParticleSystemSimCpu(ContextCpu* context, PxU32 index); + virtual ~ParticleSystemSimCpu(); + void init(ParticleData& particleData, const ParticleSystemParameter& parameter); + void clear(); + ParticleData* obtainParticleState(); + + PX_FORCE_INLINE ContextCpu& getContext() const + { + return mContext; + } + + PX_FORCE_INLINE void getPacketBounds(const GridCellVector& coord, PxBounds3& bounds); + + PX_FORCE_INLINE PxReal computeViscosityMultiplier(PxReal viscosityStd, PxReal particleMassStd, PxReal radius6Std); + + PX_FORCE_INLINE PxU32 getIndex() const + { + return mIndex; + } + + void packetShapesUpdate(physx::PxBaseTask* continuation); + void packetShapesFinalization(physx::PxBaseTask* continuation); + void dynamicsUpdate(physx::PxBaseTask* continuation); + void collisionUpdate(physx::PxBaseTask* continuation); + void collisionFinalization(physx::PxBaseTask* continuation); + void spatialHashUpdateSections(physx::PxBaseTask* continuation); + + physx::PxBaseTask& schedulePacketShapesUpdate(const ParticleShapesUpdateInput& input, + physx::PxBaseTask& continuation); + physx::PxBaseTask& scheduleDynamicsUpdate(physx::PxBaseTask& continuation); + physx::PxBaseTask& scheduleCollisionUpdate(physx::PxBaseTask& continuation); + + private: + void remapShapesToPackets(ParticleShape* const* shapes, PxU32 numShapes); + void clearParticleConstraints(); + void initializeParameter(); + void updateDynamicsParameter(); + void updateCollisionParameter(); + void removeTwoWayRbReferences(const ParticleShapeCpu& particleShape, const PxsBodyCore* rigidBody); + void setCollisionCacheInvalid(const ParticleShapeCpu& particleShape, const Gu::GeometryUnion& geometry); + + private: + ContextCpu& mContext; + ParticleData* mParticleState; + const ParticleSystemParameter* mParameter; + + Ps::IntBool mSimulated; + + TwoWayData* mFluidTwoWayData; + + ParticleShape** mCreatedDeletedParticleShapes; // Handles of created and deleted particle packet shapes. + PxU32 mNumCreatedParticleShapes; + PxU32 mNumDeletedParticleShapes; + PxU32* mPacketParticlesIndices; // Dense array of sorted particle indices. + PxU32 mNumPacketParticlesIndices; + + ConstraintBuffers mConstraintBuffers; // Particle constraints. + + ParticleOpcodeCache* mOpcodeCacheBuffer; // Opcode cache. + PxVec3* mTransientBuffer; // force in SPH , collision normal + PxVec3* mCollisionVelocities; + + // Spatial ordering, packet generation + SpatialHash* mSpatialHash; + + // Dynamics update + Dynamics mDynamics; + + // Collision update + Collision mCollision; + + PxReal mSimulationTimeStep; + bool mIsSimulated; + + PxVec3 mExternalAcceleration; // This includes the gravity of the scene + + PxU32 mIndex; + + // pipeline tasks + typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::packetShapesUpdate> PacketShapesUpdateTask; + typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::packetShapesFinalization> PacketShapesFinalizationTask; + typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::dynamicsUpdate> DynamicsUpdateTask; + typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::collisionUpdate> CollisionUpdateTask; + typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::collisionFinalization> CollisionFinalizationTask; + typedef Cm::DelegateTask<ParticleSystemSimCpu, &ParticleSystemSimCpu::spatialHashUpdateSections> SpatialHashUpdateSectionsTask; + + PacketShapesUpdateTask mPacketShapesUpdateTask; + PacketShapesFinalizationTask mPacketShapesFinalizationTask; + DynamicsUpdateTask mDynamicsUpdateTask; + CollisionUpdateTask mCollisionUpdateTask; + CollisionFinalizationTask mCollisionFinalizationTask; + SpatialHashUpdateSectionsTask mSpatialHashUpdateSectionsTask; + + ParticleShapesUpdateInput mPacketShapesUpdateTaskInput; + ParticleCollisionUpdateInput mCollisionUpdateTaskInput; + + Ps::AlignedAllocator<16, Ps::ReflectionAllocator<char> > mAlign16; + + friend class Collision; + friend class Dynamics; +}; + +//----------------------------------------------------------------------------// + +/*! +Compute AABB of a packet given its coordinates. +Enlarge the bounding box such that a particle on the current boundary could +travel the maximum distance and would still be inside the enlarged volume. +*/ +PX_FORCE_INLINE void ParticleSystemSimCpu::getPacketBounds(const GridCellVector& coord, PxBounds3& bounds) +{ + PxVec3 gridOrigin(static_cast<PxReal>(coord.x), static_cast<PxReal>(coord.y), static_cast<PxReal>(coord.z)); + gridOrigin *= mCollision.getParameter().packetSize; + + PxVec3 collisionRangeVec(mCollision.getParameter().collisionRange); + bounds.minimum = gridOrigin - collisionRangeVec; + bounds.maximum = gridOrigin + PxVec3(mCollision.getParameter().packetSize) + collisionRangeVec; +} + +PX_FORCE_INLINE PxReal +ParticleSystemSimCpu::computeViscosityMultiplier(PxReal viscosityStd, PxReal particleMassStd, PxReal radius6Std) +{ + PxReal wViscosityLaplacianScalarStd = 45.0f / (PxPi * radius6Std); + return (wViscosityLaplacianScalarStd * viscosityStd * particleMassStd); +} + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_PARTICLE_SYSTEM_SIM_CPU_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.cpp new file mode 100644 index 00000000..16b6ca25 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.cpp @@ -0,0 +1,514 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtSpatialHash.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PsAlloca.h" +#include "CmTask.h" + +#include "PtParticleSystemSim.h" +#include "PtSpatialHashHelper.h" +#include "PtParticle.h" +#include "PtCollisionData.h" +#include "PsUtilities.h" +#include "PsFoundation.h" + +using namespace physx; +using namespace Pt; + +SpatialHash::SpatialHash(PxU32 numHashBuckets, PxF32 cellSizeInv, PxU32 packetMultLog, bool supportSections) +: mNumCells(0) +, mNumHashBuckets(numHashBuckets) +, mCellSizeInv(cellSizeInv) +, mPacketMultLog(packetMultLog) +, mPacketSections(NULL) +{ + //(numHashBuckets + 1): including overflow cell + mCells = reinterpret_cast<ParticleCell*>(PX_ALLOC((numHashBuckets + 1) * sizeof(ParticleCell), "ParticleCell")); + + if(supportSections) + mPacketSections = + reinterpret_cast<PacketSections*>(PX_ALLOC(numHashBuckets * sizeof(PacketSections), "PacketSections")); +} + +SpatialHash::~SpatialHash() +{ + PX_FREE(mCells); + + if(mPacketSections) + PX_FREE(mPacketSections); +} + +/*-------------------------------------------------------------------------*/ + +/*! +Builds the packet hash and reorders particles. +*/ +void SpatialHash::updatePacketHash(PxU32& numSorted, PxU32* sortedIndices, Particle* particles, + const Cm::BitMap& particleMap, const PxU32 validParticleRange, + physx::PxBaseTask* continuation) +{ + PX_ASSERT(validParticleRange > 0); + PX_UNUSED(validParticleRange); + + // Mark packet hash entries as empty. + for(PxU32 p = 0; p < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE; p++) + { + ParticleCell& packet = mCells[p]; + packet.numParticles = PX_INVALID_U32; + } + + // Initialize overflop packet + mCells[PT_PARTICLE_SYSTEM_OVERFLOW_INDEX].numParticles = 0; + + PxU32 packetMult = PxU32(1 << mPacketMultLog); + const PxF32 packetSizeInv = mCellSizeInv / packetMult; + + const PxU32 validWordCount = particleMap.size() >> 5; //((validParticleRange + 0x1F) & ~0x1F) >> 5; + + { + PxU32 numPackets = 0; + numSorted = 0; + + // Add particles to packet hash + PxU16* hashKeyArray = + reinterpret_cast<PxU16*>(PX_ALLOC(validWordCount * 32 * sizeof(PxU16), "hashKeys")); // save the hashkey for + // reorder + Cm::BitMap::Iterator particleIt(particleMap); + PX_ASSERT(hashKeyArray); + + for(PxU32 particleIndex = particleIt.getNext(); particleIndex != Cm::BitMap::Iterator::DONE; + particleIndex = particleIt.getNext()) + { + Particle& particle = particles[particleIndex]; + + if(particle.flags.api & PxParticleFlag::eSPATIAL_DATA_STRUCTURE_OVERFLOW) // particles which caused overflow + // in the past are rejected. + { + mCells[PT_PARTICLE_SYSTEM_OVERFLOW_INDEX].numParticles++; + hashKeyArray[particleIndex] = PT_PARTICLE_SYSTEM_OVERFLOW_INDEX; + continue; + } + + // Compute cell coordinate for particle + // Transform cell to packet coordinate + GridCellVector packetCoords(particle.position, packetSizeInv); + + PxU32 hashKey; + ParticleCell* packet = getCell(hashKey, packetCoords); + PX_ASSERT(packet); + PX_ASSERT(hashKey < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE); + hashKeyArray[particleIndex] = Ps::to16(hashKey); + + if(packet->numParticles == PX_INVALID_U32) + { + // Entry is empty -> Initialize new entry + + if(numPackets >= PT_PARTICLE_SYSTEM_PACKET_LIMIT) + { + // Reached maximum number of packets -> Mark particle for deletion + PX_WARN_ONCE("Particles: Spatial data structure overflow! Particles might miss collisions with the " + "scene. See particle section of the guide for more information."); + particle.flags.api |= PxParticleFlag::eSPATIAL_DATA_STRUCTURE_OVERFLOW; + particle.flags.low &= PxU16(~InternalParticleFlag::eANY_CONSTRAINT_VALID); + mCells[PT_PARTICLE_SYSTEM_OVERFLOW_INDEX].numParticles++; + hashKeyArray[particleIndex] = PT_PARTICLE_SYSTEM_OVERFLOW_INDEX; + continue; + } + + packet->coords = packetCoords; + packet->numParticles = 0; + numPackets++; + } + + PX_ASSERT(packet->numParticles != PX_INVALID_U32); + packet->numParticles++; + numSorted++; + } + + mNumCells = numPackets; + + // Set for each packet the starting index of the associated particle interval and clear the + // particle counter (preparation for reorder step). + // include overflow packet. + PxU32 numParticles = 0; + for(PxU32 p = 0; p < PT_PARTICLE_SYSTEM_PACKET_HASH_BUFFER_SIZE; p++) + { + ParticleCell& packet = mCells[p]; + + if(packet.numParticles == PX_INVALID_U32) + continue; + + packet.firstParticle = numParticles; + numParticles += packet.numParticles; + packet.numParticles = 0; + } + + reorderParticleIndicesToPackets(sortedIndices, numParticles, particleMap, hashKeyArray); + + PX_FREE(hashKeyArray); + } + + continuation->removeReference(); +} + +/*! +Reorders particle indices to packets. +*/ +void SpatialHash::reorderParticleIndicesToPackets(PxU32* sortedIndices, PxU32 numParticles, + const Cm::BitMap& particleMap, PxU16* hashKeyArray) +{ + Cm::BitMap::Iterator particleIt(particleMap); + for(PxU32 particleIndex = particleIt.getNext(); particleIndex != Cm::BitMap::Iterator::DONE; + particleIndex = particleIt.getNext()) + { + // Get packet for fluid + ParticleCell* packet = &mCells[hashKeyArray[particleIndex]]; + PX_ASSERT(packet); + PX_ASSERT(packet->numParticles != PX_INVALID_U32); + + PxU32 index = packet->firstParticle + packet->numParticles; + PX_ASSERT(index < numParticles); + PX_UNUSED(numParticles); + sortedIndices[index] = particleIndex; + packet->numParticles++; + } +} + +void SpatialHash::updatePacketSections(PxU32* particleIndices, Particle* particles, physx::PxBaseTask* continuation) +{ + PX_ASSERT(mPacketSections); + PX_UNUSED(continuation); + + // MS: For this task we could use multithreading, gather a couple of packets and run them in parallel. + // Multiprocessor systems might take advantage of this but for the PC we will postpone this for now. + PxU32 skipSize = 0; + + for(PxU32 p = 0; p < PT_PARTICLE_SYSTEM_PACKET_HASH_SIZE; p++) + { + ParticleCell& packet = mCells[p]; + + if((packet.numParticles == PX_INVALID_U32) || (packet.numParticles <= skipSize)) + continue; + + buildPacketSections(packet, mPacketSections[p], mPacketMultLog, particles, particleIndices); + } +} + +void SpatialHash::buildPacketSections(const ParticleCell& packet, PacketSections& sections, PxU32 packetMultLog, + Particle* particles, PxU32* particleIndices) +{ + PX_ASSERT(packetMultLog > 0); + + PxU32 packetMult = PxU32(1 << packetMultLog); + + // Compute the smallest cell coordinate within the packet + GridCellVector packetMinCellCoords = packet.coords << packetMultLog; + + // Clear packet section entries + PxMemSet(§ions, 0, sizeof(PacketSections)); + + // Divide the packet into subpackets that fit into local memory of processing unit. + PxU32 particlesRemainder = packet.numParticles % PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS; + if(particlesRemainder == 0) + particlesRemainder = PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS; + + PxU32* packetParticleIndices = particleIndices + packet.firstParticle; + + PX_ALLOCA(sectionIndexBuf, PxU16, packet.numParticles * sizeof(PxU16)); + PX_ASSERT(sectionIndexBuf); + + PxU32 startIdx = 0; + PxU32 endIdx = particlesRemainder; // We start with the smallest subpacket, i.e., the subpacket which does not reach + // its particle limit. + GridCellVector cellCoord; + PxU16* pSectionIndexBuf = sectionIndexBuf; + while(endIdx <= packet.numParticles) + { + // Loop over particles of the subpacket. + for(PxU32 p = startIdx; p < endIdx; p++) + { + PxU32 particleIndex = packetParticleIndices[p]; + Particle& particle = particles[particleIndex]; + // Find packet section the particle belongs to. + cellCoord.set(particle.position, mCellSizeInv); + PxU32 sectionIndex = getPacketSectionIndex(cellCoord, packetMinCellCoords, packetMult); + PX_ASSERT(sectionIndex < PT_PACKET_SECTIONS); + + *pSectionIndexBuf++ = Ps::to16(sectionIndex); + + // Increment particle count of the section the particle belongs to. + sections.numParticles[sectionIndex]++; + } + + startIdx = endIdx; + endIdx += PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS; + } + + // Set for each packet section the starting index of the associated particle interval. + PxU32 particleIndex = packet.firstParticle; + for(PxU32 s = 0; s < PT_PACKET_SECTIONS; s++) + { + sections.firstParticle[s] = particleIndex; + particleIndex += sections.numParticles[s]; + } + + // Simon: This is not yet chunked. Need to when porting. + PX_ALLOCA(tmpIndexBuffer, PxU32, packet.numParticles * sizeof(PxU32)); + PX_ASSERT(tmpIndexBuffer); + PxMemCopy(tmpIndexBuffer, packetParticleIndices, packet.numParticles * sizeof(PxU32)); + + reorderParticlesToPacketSections(packet, sections, particles, tmpIndexBuffer, packetParticleIndices, sectionIndexBuf); +} + +void SpatialHash::reorderParticlesToPacketSections(const ParticleCell& packet, PacketSections& sections, + const Particle* particles, const PxU32* inParticleIndices, + PxU32* outParticleIndices, PxU16* sectionIndexBuf) +{ + // Divide the packet into subpackets that fit into local memory of processing unit. + PxU32 particlesRemainder = packet.numParticles % PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS; + if(particlesRemainder == 0) + particlesRemainder = PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS; + + // Prepare section structure for reorder + PxMemSet(sections.numParticles, 0, (PT_PACKET_SECTIONS * sizeof(PxU32))); + + PxU32 startIdx = 0; + PxU32 endIdx = particlesRemainder; // We start with the smallest subpacket, i.e., the subpacket which does not reach + // its particle limit. + while(endIdx <= packet.numParticles) + { + // Loop over particles of the subpacket. + for(PxU32 p = startIdx; p < endIdx; p++) + { + PxU32 particleIndex = inParticleIndices[p]; + const Particle& particle = particles[particleIndex]; + PX_UNUSED(particle); + + // Reorder particle according to packet section. + // + // It is important that particles inside the core section (the section that will not interact with neighbor + // packets) + // are moved to the end of the buffer. This way we can easily ignore these particles when testing against + // particles of neighboring packets. + + PxU32 sectionIndex = *sectionIndexBuf++; + PxU32 outIndex = sections.firstParticle[sectionIndex] + sections.numParticles[sectionIndex]; + + // the output index array start at the packet start, unlike the section indices, which are absolute. + PxU32 relativeOutIndex = outIndex - packet.firstParticle; + PX_ASSERT(relativeOutIndex < packet.numParticles); + outParticleIndices[relativeOutIndex] = particleIndex; + + sections.numParticles[sectionIndex]++; + } + + startIdx = endIdx; + endIdx += PT_SUBPACKET_PARTICLE_LIMIT_PACKET_SECTIONS; + } +} + +/* +To optimize particle interaction between particles of neighboring packets, each packet is split +into 27 sections. Of these 27 sections, 26 are located at the surface of the packet, i.e., contain +the outermost particle cells, and one section contains all the inner cells. If we want to compute +the particle interactions between neighboring packets, we only want to work with the 26 "surface +sections" of each packet, neglecting the inner sections. Thus, we need to find for a given packet +all the relevant sections of the neighboring packets. These sections will be called halo regions. +The following illustration specifies how these halo regions are indexed (there are 98 halo regions +for a packet). The illustration shows the halo regions of a packet from a viewer perspective that +looks from the outside at the different sides of a packet. + + Left halo regions Front halo regions Top halo regions +__________________________ __________________________ __________________________ +|92 |60 | 62 | 61| 93| |93 |87 | 89 | 88| 97| |92 |81 | 83 | 82| 96| +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| +|67 | 3 | 5 | 4| 73| |73 |46 | 52 | 49| 76| |60 |27 | 33 | 30| 63| +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| +| | | | | | | | | | | | | | | | | | +| | | | | | | | | | | | | | | | | | +|68 | 6 | 8 | 7| 74| |74 |47 | 53 | 50| 77| |62 |29 | 35 | 32| 65| +| | | | | | | | | | | | | | | | | | +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| +|66 | 0 | 2 | 1| 72| |72 |45 | 51 | 48| 75| |61 |28 | 34 | 31| 64| +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| +|90 |54 | 56 | 55| 91| |91 |84 | 86 | 85| 95| |93 |87 | 89 | 88| 97| +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| + + + Right halo regions Rear halo regions Bottom halo regions +__________________________ __________________________ __________________________ +|97 |64 | 65 | 63| 96| |96 |82 | 83 | 81| 92| |91 |84 | 86 | 85| 95| +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| +|76 |13 | 14 | 12| 70| |70 |40 | 43 | 37| 67| |55 |19 | 25 | 22| 58| +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| +| | | | | | | | | | | | | | | | | | +| | | | | | | | | | | | | | | | | | +|77 |16 | 17 | 15| 71| |71 |41 | 44 | 38| 68| |56 |20 | 26 | 23| 59| +| | | | | | | | | | | | | | | | | | +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| +|75 |10 | 11 | 9| 69| |69 |39 | 42 | 36| 66| |54 |18 | 24 | 21| 57| +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| +|95 |58 | 59 | 57| 94| |94 |79 | 80 | 78| 90| |90 |78 | 80 | 79| 94| +|___|___|________|___|___| |___|___|________|___|___| |___|___|________|___|___| + +*/ +void SpatialHash::getHaloRegions(PacketHaloRegions& packetHalo, const GridCellVector& packetCoords, + const ParticleCell* packets, const PacketSections* packetSections, PxU32 numHashBuckets) +{ +#define PXS_COPY_PARTICLE_INTERVAL(destIdx, srcIdx) \ + packetHalo.firstParticle[destIdx] = sections.firstParticle[srcIdx]; \ + packetHalo.numParticles[destIdx] = sections.numParticles[srcIdx]; + +#define PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(dx, dy, dz, startIdx, idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9) \ + coords.set(packetCoords.x + dx, packetCoords.y + dy, packetCoords.z + dz); \ + packet = findConstCell(packetIndex, coords, packets, numHashBuckets); \ + if(packet) \ + { \ + const PacketSections& sections = packetSections[packetIndex]; \ + \ + PXS_COPY_PARTICLE_INTERVAL(startIdx, idx1); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 1, idx2); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 2, idx3); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 3, idx4); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 4, idx5); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 5, idx6); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 6, idx7); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 7, idx8); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 8, idx9); \ + } + +#define PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(dx, dy, dz, startIdx, idx1, idx2, idx3) \ + coords.set(packetCoords.x + dx, packetCoords.y + dy, packetCoords.z + dz); \ + packet = findConstCell(packetIndex, coords, packets, numHashBuckets); \ + if(packet) \ + { \ + const PacketSections& sections = packetSections[packetIndex]; \ + \ + PXS_COPY_PARTICLE_INTERVAL(startIdx, idx1); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 1, idx2); \ + PXS_COPY_PARTICLE_INTERVAL(startIdx + 2, idx3); \ + } + +#define PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(dx, dy, dz, startIdx, idx1) \ + coords.set(packetCoords.x + dx, packetCoords.y + dy, packetCoords.z + dz); \ + packet = findConstCell(packetIndex, coords, packets, numHashBuckets); \ + if(packet) \ + { \ + const PacketSections& sections = packetSections[packetIndex]; \ + \ + PXS_COPY_PARTICLE_INTERVAL(startIdx, idx1); \ + } + + PX_ASSERT(packets); + PX_ASSERT(packetSections); + + // Clear halo information + PxMemSet(&packetHalo, 0, sizeof(PacketHaloRegions)); + + const ParticleCell* packet; + PxU32 packetIndex; + GridCellVector coords; + + // + // Fill halo regions for the 6 neighbors which share a face with the packet. + // + + // Left neighbor + coords.set(packetCoords.x - 1, packetCoords.y, packetCoords.z); + packet = findConstCell(packetIndex, coords, packets, numHashBuckets); + if(packet) + { + const PacketSections& sections = packetSections[packetIndex]; + + PxMemCopy(&(packetHalo.firstParticle[0]), &(sections.firstParticle[9]), (9 * sizeof(PxU32))); + PxMemCopy(&(packetHalo.numParticles[0]), &(sections.numParticles[9]), (9 * sizeof(PxU32))); + } + + // Right neighbor + coords.set(packetCoords.x + 1, packetCoords.y, packetCoords.z); + packet = findConstCell(packetIndex, coords, packets, numHashBuckets); + if(packet) + { + const PacketSections& sections = packetSections[packetIndex]; + + PxMemCopy(&(packetHalo.firstParticle[9]), &(sections.firstParticle[0]), (9 * sizeof(PxU32))); + PxMemCopy(&(packetHalo.numParticles[9]), &(sections.numParticles[0]), (9 * sizeof(PxU32))); + } + + // Bottom neighbor + PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(0, -1, 0, 18, 3, 4, 5, 12, 13, 14, 21, 22, 23) + + // Top neighbor + PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(0, 1, 0, 27, 0, 1, 2, 9, 10, 11, 18, 19, 20) + + // Rear neighbor + PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(0, 0, -1, 36, 1, 4, 7, 10, 13, 16, 19, 22, 25) + + // Front neighbor + PXS_GET_HALO_REGIONS_FACE_NEIGHBOR(0, 0, 1, 45, 0, 3, 6, 9, 12, 15, 18, 21, 24) + + // + // Fill halo regions for the 12 neighbors which share an edge with the packet. + // + + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(-1, -1, 0, 54, 12, 13, 14) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(1, -1, 0, 57, 3, 4, 5) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(-1, 1, 0, 60, 9, 10, 11) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(1, 1, 0, 63, 0, 1, 2) + + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(-1, 0, -1, 66, 10, 13, 16) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(1, 0, -1, 69, 1, 4, 7) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(-1, 0, 1, 72, 9, 12, 15) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(1, 0, 1, 75, 0, 3, 6) + + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(0, -1, -1, 78, 4, 13, 22) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(0, 1, -1, 81, 1, 10, 19) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(0, -1, 1, 84, 3, 12, 21) + PXS_GET_HALO_REGIONS_EDGE_NEIGHBOR(0, 1, 1, 87, 0, 9, 18) + + // + // Fill halo regions for the 8 neighbors which share a corner with the packet. + // + + PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(-1, -1, -1, 90, 13) + PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(-1, -1, 1, 91, 12) + PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(-1, 1, -1, 92, 10) + PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(-1, 1, 1, 93, 9) + PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(1, -1, -1, 94, 4) + PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(1, -1, 1, 95, 3) + PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(1, 1, -1, 96, 1) + PXS_GET_HALO_REGIONS_CORNER_NEIGHBOR(1, 1, 1, 97, 0) + + for(PxU32 i = 0; i < PT_PACKET_HALO_REGIONS; i++) + packetHalo.maxNumParticles = PxMax(packetHalo.maxNumParticles, packetHalo.numParticles[i]); +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.h b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.h new file mode 100644 index 00000000..9d257851 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHash.h @@ -0,0 +1,220 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_SPATIAL_HASH_H +#define PT_SPATIAL_HASH_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "CmBitMap.h" +#include "PtConfig.h" +#include "PtParticleCell.h" +#include "PtPacketSections.h" +#include "PtSpatialHashHelper.h" +#include "PtParticle.h" + +namespace physx +{ + +class PxBaseTask; + +namespace Pt +{ + +struct Particle; + +// Structure describing the regions around a packet which are relevant for particle interactions. +// A packet has 26 neighbor packets: +// - 6 of these neighbors share a face with the packet. Each of these neighbors provide 9 halo regions. +// - 12 of these neighbors share an edge with the packet. Each of these neighbors provides 3 halo region. +// - 8 of these neighbors share a corner with the packet. Each of these neighbors provide 1 halo region. +// +// -> Number of halo regions for a packet: 6*9 + 12*3 + 8*1 = 98 +#define PT_PACKET_HALO_REGIONS 98 +struct PacketHaloRegions +{ + PxU32 numParticles[PT_PACKET_HALO_REGIONS]; //! Number of particles in each halo region + PxU32 firstParticle[PT_PACKET_HALO_REGIONS]; //! Start index of the associated particle interval for each halo + //! region + PxU32 maxNumParticles; //! Maximum number of particles among all halo regions +}; + +// Structure to cache a local cell hash that was computed for a set of particles +struct LocalCellHash +{ + PxU32 numParticles; // Number of particles the cell hash is based on + PxU32* particleIndices; // Particle indices (0..numParticles) with respect to the particle array that was used + // to build the cell hash. Indices are ordered according to cells. + PxU32 numHashEntries; // Size of cell hash table + ParticleCell* hashEntries; // Hash entry for cells + + bool isHashValid; // Marks whether the hash contains valid data or needs to be computed + + LocalCellHash() + { + numParticles = 0; + particleIndices = NULL; + numHashEntries = 0; + hashEntries = NULL; + isHashValid = false; + } +}; + +class SpatialHash +{ + public: + SpatialHash(PxU32 numHashBuckets, PxF32 cellSizeInv, PxU32 packetMultLog, bool supportSections); + ~SpatialHash(); + + static PX_FORCE_INLINE ParticleCell* findCell(PxU32& cellIndex, const GridCellVector& coord, ParticleCell* cells, + PxU32 numHashBuckets); + static PX_FORCE_INLINE const ParticleCell* findConstCell(PxU32& cellIndex, const GridCellVector& coord, + const ParticleCell* cells, PxU32 numHashBuckets); + + PX_FORCE_INLINE PxF32 getCellSizeInv() + { + return mCellSizeInv; + } + PX_FORCE_INLINE PxU32 getPacketMultLog() + { + return mPacketMultLog; + } + + PX_FORCE_INLINE PxU32 getNumPackets() const + { + return mNumCells; + } + PX_FORCE_INLINE const ParticleCell* getPackets() + { + return mCells; + } + PX_FORCE_INLINE const PacketSections* getPacketSections() + { + return mPacketSections; + } + + PX_FORCE_INLINE const ParticleCell* findCell(PxU32& cellIndex, const GridCellVector& coord); + PX_FORCE_INLINE ParticleCell* getCell(PxU32& cellIndex, const GridCellVector& coord); + + /*! + Given the coordinates of a specific packet, the packet table, the packet sections and the packet table + size, this function builds the halo region structure for the packet. The halo region specifies the relevant + particles of neighboring packets. + */ + static void getHaloRegions(PacketHaloRegions& packetHalo, const GridCellVector& packetCoords, + const ParticleCell* packets, const PacketSections* packetSections, PxU32 numHashBuckets); + + /*! + Build local hash table for cells within a packet. Reorders a particle index array according to particle cells. + + The cell entry array must have more entries than the number of particles passed. The particle index + table must have the size of the number of particles passed. The particle array is not declared const + because hash keys might get stored temporarily in the particles. + */ + static void buildLocalHash(const Particle* particles, PxU32 numParticles, ParticleCell* cells, PxU32* particleIndices, + PxU16* hashKeyArray, PxU32 numHashBuckets, PxF32 cellSizeInv, const PxVec3& packetCorner); + + /*! + Builds the packet hash and reorders particle indices to packets. Particles are not declared const since + each particle hash key and cell gets precomputed. + */ + void updatePacketHash(PxU32& numSorted, PxU32* sortedIndices, Particle* particles, const Cm::BitMap& particleMap, + const PxU32 validParticleRange, physx::PxBaseTask* continuation); + + /*! + Divides each fluid packet into sections and reorders particle indices according to sections. + Input particles are not declared const since for each particle the section index gets precomputed. + */ + void updatePacketSections(PxU32* particleIndices, Particle* particles, physx::PxBaseTask* continuation); + + private: + static void reorderParticleIndicesToCells(const Particle* particles, PxU32 numParticles, ParticleCell* cells, + PxU32* particleIndices, PxU32 numHashBuckets, PxU16* hashKeyArray); + + void reorderParticleIndicesToPackets(PxU32* sortedIndices, PxU32 numHashedParticles, const Cm::BitMap& particleMap, + PxU16* hashKeyArray); + + /*! + Splits the specified packet into 26 boundary sections (plus one inner section) and reorders the particles + according to sections. + */ + void buildPacketSections(const ParticleCell& packet, PacketSections& sections, PxU32 packetMultLog, + Particle* particles, PxU32* particleIndices); + + void reorderParticlesToPacketSections(const ParticleCell& packet, PacketSections& sections, + const Particle* particles, const PxU32* inParticleIndices, + PxU32* outParticleIndices, PxU16* sectionIndexBuf); + + private: + ParticleCell* mCells; + PxU32 mNumCells; + PxU32 mNumHashBuckets; + PxF32 mCellSizeInv; + + // Packet Hash data + PxU32 mPacketMultLog; + PacketSections* mPacketSections; +}; + +PX_FORCE_INLINE const ParticleCell* SpatialHash::findConstCell(PxU32& cellIndex, const GridCellVector& coord, + const ParticleCell* cells, PxU32 numHashBuckets) +{ + cellIndex = getCellIndex(coord, cells, numHashBuckets); + const ParticleCell* cell = &cells[cellIndex]; + + if(cell->numParticles == PX_INVALID_U32) + return NULL; + else + return cell; +} + +PX_FORCE_INLINE ParticleCell* SpatialHash::findCell(PxU32& cellIndex, const GridCellVector& coord, ParticleCell* cells, + PxU32 numHashBuckets) +{ + const ParticleCell* constCell = findConstCell(cellIndex, coord, cells, numHashBuckets); + return const_cast<ParticleCell*>(constCell); +} + +PX_FORCE_INLINE const ParticleCell* SpatialHash::findCell(PxU32& cellIndex, const GridCellVector& coord) +{ + return findCell(cellIndex, coord, mCells, mNumHashBuckets); +} + +PX_FORCE_INLINE ParticleCell* SpatialHash::getCell(PxU32& cellIndex, const GridCellVector& coord) +{ + cellIndex = getCellIndex(coord, mCells, mNumHashBuckets); + return &mCells[cellIndex]; +} + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_SPATIAL_HASH_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHashHelper.h b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHashHelper.h new file mode 100644 index 00000000..ac4845ba --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialHashHelper.h @@ -0,0 +1,162 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#ifndef PT_SPATIAL_HASH_HELPER_H +#define PT_SPATIAL_HASH_HELPER_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtParticleCell.h" + +namespace physx +{ + +namespace Pt +{ + +PX_FORCE_INLINE PxU32 hashFunction(const GridCellVector& coord, PxU32 numHashBuckets) +{ + PX_ASSERT((((numHashBuckets - 1) ^ numHashBuckets) + 1) == (2 * numHashBuckets)); + + return ((static_cast<PxU32>(coord.x) + 101 * static_cast<PxU32>(coord.y) + 7919 * static_cast<PxU32>(coord.z)) & + (numHashBuckets - 1)); + // sschirm: weird! The version that spreads all the coordinates is slower! Is the reason the additional + // multiplication? + // return ( (101*static_cast<PxU32>(coord.x) + 7919*static_cast<PxU32>(coord.y) + + // 73856093*static_cast<PxU32>(coord.z)) & (numHashBuckets - 1) ); +} + +PX_FORCE_INLINE PxU32 getCellIndex(const GridCellVector& coord, const ParticleCell* cells, PxU32 numHashBuckets) +{ +#if PX_DEBUG + PxU32 tries = 0; +#endif + + PxU32 key = hashFunction(coord, numHashBuckets); + const ParticleCell* cell = &cells[key]; + + while((cell->numParticles != PX_INVALID_U32) && (coord != cell->coords)) + { + key = (key + 1) & (numHashBuckets - 1); + cell = &cells[key]; + +#if PX_DEBUG + tries++; +#endif + PX_ASSERT(tries < numHashBuckets); + } + + return key; +} + +/* +Compute packet section index for given cell coordinate. The packet sections are indexed as follows. + +Left packet boundary Front packet boundary Top packet boundary +__________________ __________________ __________________ +| 3 | 5 | 4 | | 4 | 22 |13 | | 3 | 21 |12 | +|___|________|___| |___|________|___| |___|________|___| +| | | | | | | | | | | | +| 6 | 8 | 7 | | 7 | 25 |16 | | 5 | 23 |14 | +| | | | | | | | | | | | +|___|________|___| |___|________|___| |___|________|___| +| 0 | 2 | 1 | | 1 | 19 |10 | | 4 | 22 |13 | +|___|________|___| |___|________|___| |___|________|___| + +Right packet boundary Rear packet boundary Bottom packet boundary +__________________ __________________ __________________ +|13 | 14 |12 | |12 | 21 | 3 | | 1 | 19 |10 | +|___|________|___| |___|________|___| |___|________|___| +| | | | | | | | | | | | +|16 | 17 |15 | |15 | 24 | 6 | | 2 | 20 |11 | +| | | | | | | | | | | | +|___|________|___| |___|________|___| |___|________|___| +|10 | 11 | 9 | |9 | 18 | 0 | | 0 | 18 | 9 | +|___|________|___| |___|________|___| |___|________|___| + +Note: One section is missing in this illustration. Section 26 is in the middle of the packet and + enclosed by the other sections. For particles in section 26 we know for sure that no interaction + with particles of neighboring packets occur. +*/ +PX_FORCE_INLINE PxU32 +getPacketSectionIndex(const GridCellVector& cellCoords, const GridCellVector& packetMinCellCoords, PxU32 packetMult) +{ + PxU32 sectionIndex = 0; + + // Translate cell coordinates such that the minimal cell coordinate of the packet is at the origin (0,0,0) + GridCellVector coord(cellCoords); + coord -= packetMinCellCoords; + + // Find section the particle cell belongs to. + + if(PxU32(coord.x + 1) == packetMult) + { + // Right side boundary of packet + sectionIndex = 9; + } + else if(coord.x != 0) + { + sectionIndex = 18; + } + // else: Left side boundary of packet + + //----------- + + if(PxU32(coord.y + 1) == packetMult) + { + // Top boundary of packet + sectionIndex += 3; + } + else if(coord.y != 0) + { + sectionIndex += 6; + } + // else: Bottom boundary of packet + + //----------- + + if(PxU32(coord.z + 1) == packetMult) + { + // Front boundary of packet + sectionIndex += 1; + } + else if(coord.z != 0) + { + sectionIndex += 2; + } + // else: Rear boundary of packet + + return sectionIndex; +} + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_SPATIAL_HASH_HELPER_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialLocalHash.cpp b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialLocalHash.cpp new file mode 100644 index 00000000..82a3ac28 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtSpatialLocalHash.cpp @@ -0,0 +1,173 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PtSpatialHashHelper.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "PtSpatialHash.h" +#include "PtParticle.h" +#include "PsUtilities.h" + +/*! +Builds local hash and reorders particle index table. +*/ +void physx::Pt::SpatialHash::buildLocalHash(const Particle* particles, PxU32 numParticles, ParticleCell* cells, + PxU32* particleIndices, PxU16* hashKeyArray, PxU32 numHashBuckets, + PxF32 cellSizeInv, const PxVec3& packetCorner) +{ + PX_ASSERT(particles); + PX_ASSERT(cells); + PX_ASSERT(particleIndices); + PX_ASSERT(numHashBuckets > numParticles); // Needs to be larger to have at least one empty hash bucket (required to + // detect invalid cells). + + // Mark packet cell entries as empty. + for(PxU32 c = 0; c < numHashBuckets; c++) + cells[c].numParticles = PX_INVALID_U32; + + PX_ALIGN(16, Particle fakeParticle); + fakeParticle.position = PxVec3(FLT_MAX, FLT_MAX, FLT_MAX); + + PxU32 numParticles4 = ((numParticles + 3) & ~0x3) + 4; // ceil up to multiple of four + 4 for save unrolling + + // Add particles to cell hash + + const Particle* prt0 = particles; + const Particle* prt1 = (1 < numParticles) ? particles + 1 : &fakeParticle; + const Particle* prt2 = (2 < numParticles) ? particles + 2 : &fakeParticle; + const Particle* prt3 = (3 < numParticles) ? particles + 3 : &fakeParticle; + + struct Int32Vec3 + { + PX_FORCE_INLINE void set(const PxVec3& realVec, const PxF32 scale) + { + x = static_cast<PxI32>(Ps::floor(realVec.x * scale)); + y = static_cast<PxI32>(Ps::floor(realVec.y * scale)); + z = static_cast<PxI32>(Ps::floor(realVec.z * scale)); + } + PxI32 x; + PxI32 y; + PxI32 z; + }; + + PX_ALIGN(16, Int32Vec3 cellCoords[8]); + cellCoords[0].set(prt0->position - packetCorner, cellSizeInv); + cellCoords[1].set(prt1->position - packetCorner, cellSizeInv); + cellCoords[2].set(prt2->position - packetCorner, cellSizeInv); + cellCoords[3].set(prt3->position - packetCorner, cellSizeInv); + + for(PxU32 p = 0; p < numParticles4; p += 4) + { + const Particle* prt0_N = (p + 4 < numParticles) ? particles + p + 4 : &fakeParticle; + const Particle* prt1_N = (p + 5 < numParticles) ? particles + p + 5 : &fakeParticle; + const Particle* prt2_N = (p + 6 < numParticles) ? particles + p + 6 : &fakeParticle; + const Particle* prt3_N = (p + 7 < numParticles) ? particles + p + 7 : &fakeParticle; + + PxU32 wIndex = (p + 4) & 7; + cellCoords[wIndex].set(prt0_N->position - packetCorner, cellSizeInv); + cellCoords[wIndex + 1].set(prt1_N->position - packetCorner, cellSizeInv); + cellCoords[wIndex + 2].set(prt2_N->position - packetCorner, cellSizeInv); + cellCoords[wIndex + 3].set(prt3_N->position - packetCorner, cellSizeInv); + + PxU32 rIndex = p & 7; + for(PxU32 i = 0; i < 4; ++i) + { + if(p + i < numParticles) + { + const Int32Vec3& int32Vec3 = cellCoords[rIndex + i]; + const GridCellVector cellCoord(PxI16(int32Vec3.x), PxI16(int32Vec3.y), PxI16(int32Vec3.z)); + PxU32 hashKey = getCellIndex(cellCoord, cells, numHashBuckets); + PX_ASSERT(hashKey < PT_PARTICLE_SYSTEM_HASH_KEY_LIMIT); + ParticleCell* cell = &cells[hashKey]; + hashKeyArray[p + i] = Ps::to16(hashKey); + PX_ASSERT(cell); + + if(cell->numParticles == PX_INVALID_U32) + { + // Entry is empty -> Initialize new entry + cell->coords = cellCoord; + cell->numParticles = 1; // this avoids some LHS + } + else + { + cell->numParticles++; // this avoids some LHS + } + PX_ASSERT(cell->numParticles != PX_INVALID_U32); + } + } + } + + // Set for each cell the starting index of the associated particle index interval. + PxU32 cellFirstParticle = 0; + for(PxU32 c = 0; c < numHashBuckets; c++) + { + ParticleCell& cell = cells[c]; + + if(cell.numParticles == PX_INVALID_U32) + continue; + + cell.firstParticle = cellFirstParticle; + cellFirstParticle += cell.numParticles; + } + + reorderParticleIndicesToCells(particles, numParticles, cells, particleIndices, numHashBuckets, hashKeyArray); +} + +/*! +Reorders particle indices to cells. +*/ +void physx::Pt::SpatialHash::reorderParticleIndicesToCells(const Particle* /*particles*/, PxU32 numParticles, + ParticleCell* cells, PxU32* particleIndices, + PxU32 numHashBuckets, PxU16* hashKeyArray) +{ + for(PxU32 c = 0; c < numHashBuckets; c++) + { + ParticleCell& cell = cells[c]; + if(cell.numParticles == PX_INVALID_U32) + continue; + + cell.numParticles = 0; + } + + // Reorder particle indices according to cells + for(PxU32 p = 0; p < numParticles; p++) + { + // Get cell for fluid + ParticleCell* cell; + cell = &cells[hashKeyArray[p]]; + + PX_ASSERT(cell); + PX_ASSERT(cell->numParticles != PX_INVALID_U32); + + particleIndices[cell->firstParticle + cell->numParticles] = p; + cell->numParticles++; + } +} + +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/PtTwoWayData.h b/PhysX_3.4/Source/LowLevelParticles/src/PtTwoWayData.h new file mode 100644 index 00000000..dd9483c7 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/PtTwoWayData.h @@ -0,0 +1,56 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. +#ifndef PT_TWOWAYDATA_H +#define PT_TWOWAYDATA_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API + +#include "foundation/PxVec3.h" +#include "PtCollisionData.h" + +namespace physx +{ + +struct PxsBodyCore; + +namespace Pt +{ + +struct TwoWayData +{ + PxVec3 impulse; // used to accumulate impulse for two way interaction with RB + const PxsBodyCore* body; +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_TWOWAYDATA_H diff --git a/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.cpp b/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.cpp new file mode 100644 index 00000000..5d7844d6 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.cpp @@ -0,0 +1,95 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "gpu/PtRigidBodyAccessGpu.h" +#if PX_USE_PARTICLE_SYSTEM_API +#if PX_SUPPORT_GPU_PHYSX + +#include "PxvGeometry.h" +#include "PxvDynamics.h" +#include "PtBodyTransformVault.h" + +using namespace physx; +using namespace Pt; + +void RigidBodyAccessGpu::copyShapeProperties(ShapeProperties& shapeProperties, const size_t shape, const size_t body) const +{ + const PxsShapeCore* shapeCore = reinterpret_cast<const PxsShapeCore*>(shape); + *shapeProperties.geometry = shapeCore->geometry; + + const PxsRigidCore* rigidCore = reinterpret_cast<const PxsRigidCore*>(body); + *shapeProperties.ownerToWorld = rigidCore->body2World; + *shapeProperties.shapeToOwner = shapeCore->transform; +} + +void RigidBodyAccessGpu::copyBodyProperties(BodyProperties& bodyProperties, const size_t* bodies, PxU32 numBodies) const +{ + const PxsBodyCore* const* bodyIt = reinterpret_cast<const PxsBodyCore* const*>(bodies); + PxStrideIterator<PxTransform> currentTransformIt(bodyProperties.currentTransforms); + PxStrideIterator<PxTransform> previousTransformIt(bodyProperties.previousTransforms); + PxStrideIterator<PxVec3> linearVelocityIt(bodyProperties.linearVelocities); + PxStrideIterator<PxVec3> angularVelocityIt(bodyProperties.angularVelocities); + PxStrideIterator<PxTransform> body2ActorTransformIt(bodyProperties.body2ActorTransforms); + PxStrideIterator<size_t> bodyHandleIt(bodyProperties.cpuBodyHandle); + + for(PxU32 i = 0; i < numBodies; ++i) + { + const PxsBodyCore& body = **bodyIt; + *currentTransformIt = body.body2World; + const PxTransform* preTransform = mTransformVault.getTransform(body); + if(preTransform) + { + *previousTransformIt = *preTransform; + *linearVelocityIt = body.linearVelocity; + *angularVelocityIt = body.angularVelocity; + *body2ActorTransformIt = body.getBody2Actor(); + *bodyHandleIt = (size_t) * bodyIt; + } + else + { + PX_ASSERT(0); + *previousTransformIt = PxTransform(PxIdentity); + *linearVelocityIt = PxVec3(0.f); + *angularVelocityIt = PxVec3(0.f); + *body2ActorTransformIt = PxTransform(PxIdentity); + *bodyHandleIt = 0; + } + + ++bodyIt; + ++currentTransformIt; + ++previousTransformIt; + ++linearVelocityIt; + ++angularVelocityIt; + ++body2ActorTransformIt; + ++bodyHandleIt; + } +} + +#endif // PX_SUPPORT_GPU_PHYSX +#endif // PX_USE_PARTICLE_SYSTEM_API diff --git a/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.h b/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.h new file mode 100644 index 00000000..9caed154 --- /dev/null +++ b/PhysX_3.4/Source/LowLevelParticles/src/gpu/PtRigidBodyAccessGpu.h @@ -0,0 +1,72 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PT_GPU_RIGID_BODY_ACCESS_H +#define PT_GPU_RIGID_BODY_ACCESS_H + +#include "PxPhysXConfig.h" +#if PX_USE_PARTICLE_SYSTEM_API +#if PX_SUPPORT_GPU_PHYSX + +#include "PxRigidBodyAccessGpu.h" +#include "PsUserAllocated.h" + +namespace physx +{ + +namespace Pt +{ + +class BodyTransformVault; + +class RigidBodyAccessGpu : public Ps::UserAllocated, public PxRigidBodyAccessGpu +{ + public: + virtual void copyShapeProperties(ShapeProperties& shapeProperties, const size_t shape, const size_t body) const; + virtual void copyBodyProperties(BodyProperties& bodyProperties, const size_t* bodies, PxU32 numBodies) const; + + public: + RigidBodyAccessGpu(const BodyTransformVault& transformVault) : mTransformVault(transformVault) + { + } + virtual ~RigidBodyAccessGpu() + { + } + + private: + RigidBodyAccessGpu& operator=(const RigidBodyAccessGpu&); + const BodyTransformVault& mTransformVault; +}; + +} // namespace Pt +} // namespace physx + +#endif // PX_SUPPORT_GPU_PHYSX +#endif // PX_USE_PARTICLE_SYSTEM_API +#endif // PT_GPU_RIGID_BODY_ACCESS_H |