diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelCloth/src/windows/CuCloth.cpp | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/LowLevelCloth/src/windows/CuCloth.cpp')
| -rw-r--r-- | PhysX_3.4/Source/LowLevelCloth/src/windows/CuCloth.cpp | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelCloth/src/windows/CuCloth.cpp b/PhysX_3.4/Source/LowLevelCloth/src/windows/CuCloth.cpp new file mode 100644 index 00000000..6ecd1aeb --- /dev/null +++ b/PhysX_3.4/Source/LowLevelCloth/src/windows/CuCloth.cpp @@ -0,0 +1,511 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "CuCloth.h" +#include "CuFabric.h" +#include "CuFactory.h" +#include "CuContextLock.h" +#include "CuCheckSuccess.h" +#include "CuClothData.h" +#include "CuSolver.h" +#include "TripletScheduler.h" +#include "ClothBase.h" +#include "Array.h" +#include "PsFoundation.h" + +#if PX_VC +#pragma warning(disable : 4365) // 'action' : conversion from 'type_1' to 'type_2', signed/unsigned mismatch +#endif + +namespace physx +{ +namespace cloth +{ +PhaseConfig transform(const PhaseConfig&); // from PhaseConfig.cpp +} +} + +using namespace physx; + +namespace +{ +bool isSelfCollisionEnabled(const cloth::CuCloth& cloth) +{ + return PxMin(cloth.mSelfCollisionDistance, -cloth.mSelfCollisionLogStiffness) > 0.0f; +} +} + +cloth::CuCloth::CuCloth(CuFactory& factory, CuFabric& fabric, Range<const PxVec4> particles) +: CuContextLock(factory) +, mFactory(factory) +, mFabric(fabric) +, mClothDataDirty(false) +, mNumParticles(uint32_t(particles.size())) +, mParticles(mFactory.mContextManager) +, mParticlesHostCopy(CuHostAllocator(mFactory.mContextManager, cudaHostAllocMapped)) +, mDeviceParticlesDirty(false) +, mHostParticlesDirty(true) +, mPhaseConfigs(mFactory.mContextManager) +, mMotionConstraints(mFactory.mContextManager) +, mSeparationConstraints(mFactory.mContextManager) +, mParticleAccelerations(mFactory.mContextManager) +, mParticleAccelerationsHostCopy(CuHostAllocator(mFactory.mContextManager, cudaHostAllocMapped)) +, mCapsuleIndices(getMappedAllocator<IndexPair>(mFactory.mContextManager)) +, mStartCollisionSpheres(getMappedAllocator<PxVec4>(mFactory.mContextManager)) +, mTargetCollisionSpheres(getMappedAllocator<PxVec4>(mFactory.mContextManager)) +, mConvexMasks(getMappedAllocator<uint32_t>(mFactory.mContextManager)) +, mStartCollisionPlanes(getMappedAllocator<PxVec4>(mFactory.mContextManager)) +, mTargetCollisionPlanes(getMappedAllocator<PxVec4>(mFactory.mContextManager)) +, mStartCollisionTriangles(getMappedAllocator<PxVec3>(mFactory.mContextManager)) +, mTargetCollisionTriangles(getMappedAllocator<PxVec3>(mFactory.mContextManager)) +, mVirtualParticleSetSizes(mFactory.mContextManager) +, mVirtualParticleIndices(mFactory.mContextManager) +, mVirtualParticleWeights(mFactory.mContextManager) +, mRestPositions(mFactory.mContextManager) +, mSelfCollisionIndices(mFactory.mContextManager) +, mSelfCollisionData(mFactory.mContextManager) +, mSharedMemorySize(0) +, mUserData(0) +{ + PX_ASSERT(!particles.empty()); + + initialize(*this, particles.begin(), particles.end()); + + mParticles.reserve(2 * mNumParticles); + mParticles.push_back(particles.begin(), particles.end()); + mParticles.push_back(particles.begin(), particles.end()); + mParticlesHostCopy.resizeUninitialized(2 * mNumParticles); + + mFabric.incRefCount(); + + CuContextLock::release(); +} + +cloth::CuCloth::CuCloth(CuFactory& factory, const CuCloth& cloth) +: CuContextLock(factory) +, mFactory(factory) +, mFabric(cloth.mFabric) +, mNumParticles(cloth.mNumParticles) +, mParticles(cloth.mParticles) +, mParticlesHostCopy(cloth.mParticlesHostCopy) +, mDeviceParticlesDirty(cloth.mDeviceParticlesDirty) +, mHostParticlesDirty(cloth.mHostParticlesDirty) +, mPhaseConfigs(cloth.mPhaseConfigs) +, mHostPhaseConfigs(cloth.mHostPhaseConfigs) +, mMotionConstraints(cloth.mMotionConstraints) +, mSeparationConstraints(cloth.mSeparationConstraints) +, mParticleAccelerations(cloth.mParticleAccelerations) +, mParticleAccelerationsHostCopy(cloth.mParticleAccelerationsHostCopy) +, mCapsuleIndices(cloth.mCapsuleIndices) +, mStartCollisionSpheres(cloth.mStartCollisionSpheres) +, mTargetCollisionSpheres(cloth.mTargetCollisionSpheres) +, mStartCollisionPlanes(cloth.mStartCollisionPlanes) +, mTargetCollisionPlanes(cloth.mTargetCollisionPlanes) +, mStartCollisionTriangles(cloth.mStartCollisionTriangles) +, mTargetCollisionTriangles(cloth.mTargetCollisionTriangles) +, mVirtualParticleSetSizes(cloth.mVirtualParticleSetSizes) +, mVirtualParticleIndices(cloth.mVirtualParticleIndices) +, mVirtualParticleWeights(cloth.mVirtualParticleWeights) +, mRestPositions(cloth.mRestPositions) +, mSelfCollisionIndices(cloth.mSelfCollisionIndices) +, mSelfCollisionData(mFactory.mContextManager) +, mSharedMemorySize(cloth.mSharedMemorySize) +, mUserData(cloth.mUserData) +{ + copy(*this, cloth); + + mFabric.incRefCount(); + + CuContextLock::release(); +} + +cloth::CuCloth::~CuCloth() +{ + CuContextLock::acquire(); + + mFabric.decRefCount(); +} + +void cloth::CuCloth::notifyChanged() +{ + mClothDataDirty = true; +} + +bool cloth::CuCloth::updateClothData(CuClothData& clothData) +{ + // test particle pointer to detect when cloth data array has been reordered + if(!mClothDataDirty && clothData.mParticles == array(*mParticles.begin().get())) + { + PX_ASSERT(mSharedMemorySize == getSharedMemorySize()); + return false; + } + + mSharedMemorySize = getSharedMemorySize(); + + if(mSelfCollisionData.empty() && isSelfCollisionEnabled(*this)) + { + uint32_t numSelfCollisionIndices = + mSelfCollisionIndices.empty() ? mNumParticles : uint32_t(mSelfCollisionIndices.size()); + + uint32_t particleSize = 4 * mNumParticles; + uint32_t keySize = 2 * numSelfCollisionIndices; // 2x for radix buffer + uint32_t cellStartSize = (129 + 128 * 128 + 130) / 2 + 1; // half because type is int16_t + + // use 16bit indices for cellStart array (128x128 grid) + mSelfCollisionData.resize(particleSize + keySize + cellStartSize); + checkSuccess(cuMemsetD32((mSelfCollisionData.begin() + particleSize + keySize).dev(), 0xffffffff, cellStartSize)); + } + + clothData = CuClothData(*this); + mClothDataDirty = false; + + return true; +} + +uint32_t cloth::CuCloth::getSharedMemorySize() const +{ + uint32_t numPhases = uint32_t(mPhaseConfigs.size()); + uint32_t numSpheres = uint32_t(mStartCollisionSpheres.size()); + uint32_t numCones = uint32_t(mCapsuleIndices.size()); + uint32_t numPlanes = uint32_t(mStartCollisionPlanes.size()); + uint32_t numConvexes = uint32_t(mConvexMasks.size()); + uint32_t numTriangles = uint32_t(mStartCollisionTriangles.size() / 3); + + uint32_t phaseConfigSize = numPhases * sizeof(CuPhaseConfig); + + bool storePrevCollisionData = mEnableContinuousCollision || mFriction > 0.0f; + uint32_t continuousCollisionSize = storePrevCollisionData ? 4 * numSpheres + 10 * numCones : 0; + continuousCollisionSize += 4 * numCones + numConvexes; // capsule and convex masks + uint32_t discreteCollisionSize = 4 * numSpheres + PxMax(10 * numCones + 96, 208u); + discreteCollisionSize = PxMax(discreteCollisionSize, PxMax(4 * numPlanes, 19 * numTriangles)); + + // scratch memory for prefix sum and histogram + uint32_t selfCollisionSize = isSelfCollisionEnabled(*this) ? 544 : 0; + + // see CuSolverKenel.cu::gSharedMemory comment for details + return phaseConfigSize + sizeof(float) * (continuousCollisionSize + PxMax(selfCollisionSize, discreteCollisionSize)); +} + +void cloth::CuCloth::setPhaseConfig(Range<const PhaseConfig> configs) +{ + mHostPhaseConfigs.assign(configs.begin(), configs.end()); + + Vector<CuPhaseConfig>::Type deviceConfigs; + deviceConfigs.reserve(configs.size()); + const PhaseConfig* cEnd = configs.end(); + for(const PhaseConfig* cIt = configs.begin(); cIt != cEnd; ++cIt) + { + CuPhaseConfig config; + + config.mStiffness = cIt->mStiffness; + config.mStiffnessMultiplier = cIt->mStiffnessMultiplier; + config.mCompressionLimit = cIt->mCompressionLimit; + config.mStretchLimit = cIt->mStretchLimit; + + uint16_t phaseIndex = cIt->mPhaseIndex; + config.mNumConstraints = mFabric.mNumConstraintsInPhase[phaseIndex]; + config.mRestvalues = mFabric.mRestvaluesInPhase[phaseIndex].get(); + config.mIndices = mFabric.mIndicesInPhase[phaseIndex].get(); + + deviceConfigs.pushBack(config); + } + + CuContextLock contextLock(mFactory); + mPhaseConfigs.assign(deviceConfigs.begin(), deviceConfigs.end()); +} + +cloth::Range<PxVec4> cloth::CuCloth::push(cloth::CuConstraints& constraints) +{ + if(!constraints.mTarget.capacity()) + { + CuContextLock contextLock(mFactory); + constraints.mTarget.reserve(mNumParticles); + } + if(constraints.mHostCopy.empty()) + constraints.mTarget.resize(mNumParticles); + + if(constraints.mStart.empty()) // initialize start first + constraints.mStart.swap(constraints.mTarget); + + if(!constraints.mHostCopy.capacity()) + { + CuContextLock contextLock(mFactory); + constraints.mHostCopy.reserve(mNumParticles); + } + constraints.mHostCopy.resizeUninitialized(mNumParticles); + + PxVec4* data = &constraints.mHostCopy.front(); + return Range<PxVec4>(data, data + constraints.mHostCopy.size()); +} + +void cloth::CuCloth::clear(cloth::CuConstraints& constraints) +{ + CuContextLock contextLock(mFactory); + CuDeviceVector<PxVec4>(mFactory.mContextManager).swap(constraints.mStart); + CuDeviceVector<PxVec4>(mFactory.mContextManager).swap(constraints.mTarget); +} + +void cloth::CuCloth::syncDeviceParticles() +{ + if(mDeviceParticlesDirty) + { + CuContextLock contextLock(mFactory); + checkSuccess( + cuMemcpyHtoD(mParticles.begin().dev(), mParticlesHostCopy.begin(), 2 * mNumParticles * sizeof(PxVec4))); + mDeviceParticlesDirty = false; + } +} + +void cloth::CuCloth::syncHostParticles() +{ + if(mHostParticlesDirty) + { + CuContextLock contextLock(mFactory); + const PxVec4* src = mParticles.begin().get(); + mFactory.copyToHost(src, src + 2 * mNumParticles, mParticlesHostCopy.begin()); + mHostParticlesDirty = false; + } +} + +cloth::Range<const PxVec3> cloth::CuCloth::clampTriangleCount(Range<const PxVec3> range, uint32_t replaceSize) +{ + // clamp to 500 triangles (1500 vertices) to prevent running out of shared memory + uint32_t removedSize = mStartCollisionTriangles.size() - replaceSize; + const PxVec3* clamp = range.begin() + 1500 - removedSize; + + if(range.end() > clamp) + { + shdfnd::getFoundation().error(PX_WARN, "Too many collision " + "triangles specified for cloth, dropping all but first 500.\n"); + } + + return Range<const PxVec3>(range.begin(), PxMin(range.end(), clamp)); +} + +#include "ClothImpl.h" + +namespace physx +{ +namespace cloth +{ + +// ClothImpl<CuCloth>::clone() implemented in CuClothClone.cpp + +template <> +uint32_t ClothImpl<CuCloth>::getNumParticles() const +{ + return mCloth.mNumParticles; +} + +template <> +void ClothImpl<CuCloth>::lockParticles() const +{ + const_cast<CuCloth&>(mCloth).syncHostParticles(); +} + +template <> +void ClothImpl<CuCloth>::unlockParticles() const +{ +} + +template <> +MappedRange<PxVec4> ClothImpl<CuCloth>::getCurrentParticles() +{ + mCloth.wakeUp(); + lockParticles(); + mCloth.mDeviceParticlesDirty = true; + return getMappedParticles(mCloth.mParticlesHostCopy.begin()); +} + +template <> +MappedRange<const PxVec4> ClothImpl<CuCloth>::getCurrentParticles() const +{ + lockParticles(); + return getMappedParticles(mCloth.mParticlesHostCopy.begin()); +} + +template <> +MappedRange<PxVec4> ClothImpl<CuCloth>::getPreviousParticles() +{ + mCloth.wakeUp(); + lockParticles(); + mCloth.mDeviceParticlesDirty = true; + return getMappedParticles(mCloth.mParticlesHostCopy.begin() + mCloth.mNumParticles); +} + +template <> +MappedRange<const PxVec4> ClothImpl<CuCloth>::getPreviousParticles() const +{ + lockParticles(); + return getMappedParticles(mCloth.mParticlesHostCopy.begin() + mCloth.mNumParticles); +} + +template <> +GpuParticles ClothImpl<CuCloth>::getGpuParticles() +{ + mCloth.syncDeviceParticles(); + mCloth.mHostParticlesDirty = true; + PxVec4* particles = mCloth.mParticles.begin().get(); + GpuParticles result = { particles, particles + mCloth.mNumParticles, 0 }; + return result; +} + +template <> +void ClothImpl<CuCloth>::setPhaseConfig(Range<const PhaseConfig> configs) +{ + Vector<PhaseConfig>::Type transformedConfigs; + transformedConfigs.reserve(configs.size()); + + // transform phase config to use in solver + for(; !configs.empty(); configs.popFront()) + if(configs.front().mStiffness > 0.0f) + transformedConfigs.pushBack(transform(configs.front())); + + mCloth.setPhaseConfig(Range<const PhaseConfig>(transformedConfigs.begin(), transformedConfigs.end())); + mCloth.notifyChanged(); + mCloth.wakeUp(); +} + +template <> +void ClothImpl<CuCloth>::setSelfCollisionIndices(Range<const uint32_t> indices) +{ + ContextLockType lock(mCloth.mFactory); + mCloth.mSelfCollisionIndices.assign(indices.begin(), indices.end()); + mCloth.mSelfCollisionIndicesHost.assign(indices.begin(), indices.end()); + mCloth.notifyChanged(); + mCloth.wakeUp(); +} + +template <> +uint32_t ClothImpl<CuCloth>::getNumVirtualParticles() const +{ + return uint32_t(mCloth.mVirtualParticleIndices.size()); +} + +template <> +Range<PxVec4> ClothImpl<CuCloth>::getParticleAccelerations() +{ + if(mCloth.mParticleAccelerations.empty()) + { + CuContextLock contextLock(mCloth.mFactory); + mCloth.mParticleAccelerations.resize(mCloth.mNumParticles); + } + + if(!mCloth.mParticleAccelerationsHostCopy.capacity()) + { + CuContextLock contextLock(mCloth.mFactory); + mCloth.mParticleAccelerationsHostCopy.reserve(mCloth.mNumParticles); + } + mCloth.mParticleAccelerationsHostCopy.resizeUninitialized(mCloth.mNumParticles); + + mCloth.wakeUp(); + + PxVec4* data = mCloth.mParticleAccelerationsHostCopy.begin(); + return Range<PxVec4>(data, mCloth.mParticleAccelerationsHostCopy.end()); +} + +template <> +void ClothImpl<CuCloth>::clearParticleAccelerations() +{ + CuContextLock contextLock(mCloth.mFactory); + CuDeviceVector<PxVec4>(mCloth.mFactory.mContextManager).swap(mCloth.mParticleAccelerations); + mCloth.mParticleAccelerationsHostCopy.reset(); + mCloth.wakeUp(); +} + +namespace +{ +uint32_t calculateNumReplays(const Vector<Vec4u>::Type& triplets, const Vector<uint32_t>::Type setSizes) +{ + uint32_t result = 0; + + Vector<Vec4u>::Type::ConstIterator tIt = triplets.begin(); + Vector<uint32_t>::Type::ConstIterator sIt, sEnd = setSizes.end(); + uint32_t index = 0; + for(sIt = setSizes.begin(); sIt != sEnd; ++sIt, ++index) + { + Vector<Vec4u>::Type::ConstIterator tEnd = tIt + *sIt, tLast = tIt; + while(tLast != tEnd) + { + uint8_t numConflicts[3][32] = {}; + uint8_t numReplays[3] = {}; + + for(tLast += PxMin(ptrdiff_t(32), tEnd - tLast); tIt != tLast; ++tIt) + for(int i = 0; i < 3; ++i) + numReplays[i] = PxMax(numReplays[i], ++numConflicts[i][(*tIt)[i] & 31]); + + result += numReplays[0] + numReplays[1] + numReplays[2]; + } + } + + return result; +} +} + +template <> +void ClothImpl<CuCloth>::setVirtualParticles(Range<const uint32_t[4]> indices, Range<const PxVec3> weights) +{ + // shuffle indices to form independent SIMD sets + TripletScheduler scheduler(indices); + scheduler.warp(mCloth.mNumParticles, 32); + + // convert to 16bit indices + Vector<Vec4us>::Type hostIndices; + hostIndices.reserve(indices.size()); + TripletScheduler::ConstTripletIter tIt = scheduler.mTriplets.begin(); + TripletScheduler::ConstTripletIter tEnd = scheduler.mTriplets.end(); + for(; tIt != tEnd; ++tIt) + hostIndices.pushBack(Vec4us(*tIt)); + + // printf("num sets = %u, num replays = %u\n", scheduler.mSetSizes.size(), + // calculateNumReplays(scheduler.mTriplets, scheduler.mSetSizes)); + + // add normalization weight + Vector<PxVec4>::Type hostWeights; + hostWeights.reserve(weights.size()); + for(; !weights.empty(); weights.popFront()) + { + PxVec3 w = reinterpret_cast<const PxVec3&>(weights.front()); + PxReal scale = 1 / w.magnitudeSquared(); + hostWeights.pushBack(PxVec4(w.x, w.y, w.z, scale)); + } + + CuContextLock contextLock(mCloth.mFactory); + + // todo: 'swap' these to force reallocation? + mCloth.mVirtualParticleIndices = hostIndices; + mCloth.mVirtualParticleSetSizes = scheduler.mSetSizes; + mCloth.mVirtualParticleWeights = hostWeights; + + mCloth.notifyChanged(); + mCloth.wakeUp(); +} + +} // namespace cloth +} // namespace physx |