// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2018 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #include "CuCloth.h" #include "CuFabric.h" #include "CuFactory.h" #include "CuContextLock.h" #include "CuCheckSuccess.h" #include "CuClothData.h" #include "CuSolver.h" #include "TripletScheduler.h" #include "ClothBase.h" #include "Array.h" #include "PsFoundation.h" #if PX_VC #pragma warning(disable : 4365) // 'action' : conversion from 'type_1' to 'type_2', signed/unsigned mismatch #endif namespace physx { namespace cloth { PhaseConfig transform(const PhaseConfig&); // from PhaseConfig.cpp } } using namespace physx; namespace { bool isSelfCollisionEnabled(const cloth::CuCloth& cloth) { return PxMin(cloth.mSelfCollisionDistance, -cloth.mSelfCollisionLogStiffness) > 0.0f; } } cloth::CuCloth::CuCloth(CuFactory& factory, CuFabric& fabric, Range particles) : CuContextLock(factory) , mFactory(factory) , mFabric(fabric) , mClothDataDirty(false) , mNumParticles(uint32_t(particles.size())) , mParticles(mFactory.mContextManager) , mParticlesHostCopy(CuHostAllocator(mFactory.mContextManager, cudaHostAllocMapped)) , mDeviceParticlesDirty(false) , mHostParticlesDirty(true) , mPhaseConfigs(mFactory.mContextManager) , mMotionConstraints(mFactory.mContextManager) , mSeparationConstraints(mFactory.mContextManager) , mParticleAccelerations(mFactory.mContextManager) , mParticleAccelerationsHostCopy(CuHostAllocator(mFactory.mContextManager, cudaHostAllocMapped)) , mCapsuleIndices(getMappedAllocator(mFactory.mContextManager)) , mStartCollisionSpheres(getMappedAllocator(mFactory.mContextManager)) , mTargetCollisionSpheres(getMappedAllocator(mFactory.mContextManager)) , mConvexMasks(getMappedAllocator(mFactory.mContextManager)) , mStartCollisionPlanes(getMappedAllocator(mFactory.mContextManager)) , mTargetCollisionPlanes(getMappedAllocator(mFactory.mContextManager)) , mStartCollisionTriangles(getMappedAllocator(mFactory.mContextManager)) , mTargetCollisionTriangles(getMappedAllocator(mFactory.mContextManager)) , mVirtualParticleSetSizes(mFactory.mContextManager) , mVirtualParticleIndices(mFactory.mContextManager) , mVirtualParticleWeights(mFactory.mContextManager) , mRestPositions(mFactory.mContextManager) , mSelfCollisionIndices(mFactory.mContextManager) , mSelfCollisionData(mFactory.mContextManager) , mSharedMemorySize(0) , mUserData(0) { PX_ASSERT(!particles.empty()); initialize(*this, particles.begin(), particles.end()); mParticles.reserve(2 * mNumParticles); mParticles.push_back(particles.begin(), particles.end()); mParticles.push_back(particles.begin(), particles.end()); mParticlesHostCopy.resizeUninitialized(2 * mNumParticles); mFabric.incRefCount(); CuContextLock::release(); } cloth::CuCloth::CuCloth(CuFactory& factory, const CuCloth& cloth) : CuContextLock(factory) , mFactory(factory) , mFabric(cloth.mFabric) , mNumParticles(cloth.mNumParticles) , mParticles(cloth.mParticles) , mParticlesHostCopy(cloth.mParticlesHostCopy) , mDeviceParticlesDirty(cloth.mDeviceParticlesDirty) , mHostParticlesDirty(cloth.mHostParticlesDirty) , mPhaseConfigs(cloth.mPhaseConfigs) , mHostPhaseConfigs(cloth.mHostPhaseConfigs) , mMotionConstraints(cloth.mMotionConstraints) , mSeparationConstraints(cloth.mSeparationConstraints) , mParticleAccelerations(cloth.mParticleAccelerations) , mParticleAccelerationsHostCopy(cloth.mParticleAccelerationsHostCopy) , mCapsuleIndices(cloth.mCapsuleIndices) , mStartCollisionSpheres(cloth.mStartCollisionSpheres) , mTargetCollisionSpheres(cloth.mTargetCollisionSpheres) , mStartCollisionPlanes(cloth.mStartCollisionPlanes) , mTargetCollisionPlanes(cloth.mTargetCollisionPlanes) , mStartCollisionTriangles(cloth.mStartCollisionTriangles) , mTargetCollisionTriangles(cloth.mTargetCollisionTriangles) , mVirtualParticleSetSizes(cloth.mVirtualParticleSetSizes) , mVirtualParticleIndices(cloth.mVirtualParticleIndices) , mVirtualParticleWeights(cloth.mVirtualParticleWeights) , mRestPositions(cloth.mRestPositions) , mSelfCollisionIndices(cloth.mSelfCollisionIndices) , mSelfCollisionData(mFactory.mContextManager) , mSharedMemorySize(cloth.mSharedMemorySize) , mUserData(cloth.mUserData) { copy(*this, cloth); mFabric.incRefCount(); CuContextLock::release(); } cloth::CuCloth::~CuCloth() { CuContextLock::acquire(); mFabric.decRefCount(); } void cloth::CuCloth::notifyChanged() { mClothDataDirty = true; } bool cloth::CuCloth::updateClothData(CuClothData& clothData) { // test particle pointer to detect when cloth data array has been reordered if(!mClothDataDirty && clothData.mParticles == array(*mParticles.begin().get())) { PX_ASSERT(mSharedMemorySize == getSharedMemorySize()); return false; } mSharedMemorySize = getSharedMemorySize(); if(mSelfCollisionData.empty() && isSelfCollisionEnabled(*this)) { uint32_t numSelfCollisionIndices = mSelfCollisionIndices.empty() ? mNumParticles : uint32_t(mSelfCollisionIndices.size()); uint32_t particleSize = 4 * mNumParticles; uint32_t keySize = 2 * numSelfCollisionIndices; // 2x for radix buffer uint32_t cellStartSize = (129 + 128 * 128 + 130) / 2 + 1; // half because type is int16_t // use 16bit indices for cellStart array (128x128 grid) mSelfCollisionData.resize(particleSize + keySize + cellStartSize); checkSuccess(cuMemsetD32((mSelfCollisionData.begin() + particleSize + keySize).dev(), 0xffffffff, cellStartSize)); } clothData = CuClothData(*this); mClothDataDirty = false; return true; } uint32_t cloth::CuCloth::getSharedMemorySize() const { uint32_t numPhases = uint32_t(mPhaseConfigs.size()); uint32_t numSpheres = uint32_t(mStartCollisionSpheres.size()); uint32_t numCones = uint32_t(mCapsuleIndices.size()); uint32_t numPlanes = uint32_t(mStartCollisionPlanes.size()); uint32_t numConvexes = uint32_t(mConvexMasks.size()); uint32_t numTriangles = uint32_t(mStartCollisionTriangles.size() / 3); uint32_t phaseConfigSize = numPhases * sizeof(CuPhaseConfig); bool storePrevCollisionData = mEnableContinuousCollision || mFriction > 0.0f; uint32_t continuousCollisionSize = storePrevCollisionData ? 4 * numSpheres + 10 * numCones : 0; continuousCollisionSize += 4 * numCones + numConvexes; // capsule and convex masks uint32_t discreteCollisionSize = 4 * numSpheres + PxMax(10 * numCones + 96, 208u); discreteCollisionSize = PxMax(discreteCollisionSize, PxMax(4 * numPlanes, 19 * numTriangles)); // scratch memory for prefix sum and histogram uint32_t selfCollisionSize = isSelfCollisionEnabled(*this) ? 544 : 0; // see CuSolverKenel.cu::gSharedMemory comment for details return phaseConfigSize + sizeof(float) * (continuousCollisionSize + PxMax(selfCollisionSize, discreteCollisionSize)); } void cloth::CuCloth::setPhaseConfig(Range configs) { mHostPhaseConfigs.assign(configs.begin(), configs.end()); Vector::Type deviceConfigs; deviceConfigs.reserve(configs.size()); const PhaseConfig* cEnd = configs.end(); for(const PhaseConfig* cIt = configs.begin(); cIt != cEnd; ++cIt) { CuPhaseConfig config; config.mStiffness = cIt->mStiffness; config.mStiffnessMultiplier = cIt->mStiffnessMultiplier; config.mCompressionLimit = cIt->mCompressionLimit; config.mStretchLimit = cIt->mStretchLimit; uint16_t phaseIndex = cIt->mPhaseIndex; config.mNumConstraints = mFabric.mNumConstraintsInPhase[phaseIndex]; config.mRestvalues = mFabric.mRestvaluesInPhase[phaseIndex].get(); config.mIndices = mFabric.mIndicesInPhase[phaseIndex].get(); deviceConfigs.pushBack(config); } CuContextLock contextLock(mFactory); mPhaseConfigs.assign(deviceConfigs.begin(), deviceConfigs.end()); } cloth::Range cloth::CuCloth::push(cloth::CuConstraints& constraints) { if(!constraints.mTarget.capacity()) { CuContextLock contextLock(mFactory); constraints.mTarget.reserve(mNumParticles); } if(constraints.mHostCopy.empty()) constraints.mTarget.resize(mNumParticles); if(constraints.mStart.empty()) // initialize start first constraints.mStart.swap(constraints.mTarget); if(!constraints.mHostCopy.capacity()) { CuContextLock contextLock(mFactory); constraints.mHostCopy.reserve(mNumParticles); } constraints.mHostCopy.resizeUninitialized(mNumParticles); PxVec4* data = &constraints.mHostCopy.front(); return Range(data, data + constraints.mHostCopy.size()); } void cloth::CuCloth::clear(cloth::CuConstraints& constraints) { CuContextLock contextLock(mFactory); CuDeviceVector(mFactory.mContextManager).swap(constraints.mStart); CuDeviceVector(mFactory.mContextManager).swap(constraints.mTarget); } void cloth::CuCloth::syncDeviceParticles() { if(mDeviceParticlesDirty) { CuContextLock contextLock(mFactory); checkSuccess( cuMemcpyHtoD(mParticles.begin().dev(), mParticlesHostCopy.begin(), 2 * mNumParticles * sizeof(PxVec4))); mDeviceParticlesDirty = false; } } void cloth::CuCloth::syncHostParticles() { if(mHostParticlesDirty) { CuContextLock contextLock(mFactory); const PxVec4* src = mParticles.begin().get(); mFactory.copyToHost(src, src + 2 * mNumParticles, mParticlesHostCopy.begin()); mHostParticlesDirty = false; } } cloth::Range cloth::CuCloth::clampTriangleCount(Range range, uint32_t replaceSize) { // clamp to 500 triangles (1500 vertices) to prevent running out of shared memory uint32_t removedSize = mStartCollisionTriangles.size() - replaceSize; const PxVec3* clamp = range.begin() + 1500 - removedSize; if(range.end() > clamp) { shdfnd::getFoundation().error(PX_WARN, "Too many collision " "triangles specified for cloth, dropping all but first 500.\n"); } return Range(range.begin(), PxMin(range.end(), clamp)); } #include "ClothImpl.h" namespace physx { namespace cloth { // ClothImpl::clone() implemented in CuClothClone.cpp template <> uint32_t ClothImpl::getNumParticles() const { return mCloth.mNumParticles; } template <> void ClothImpl::lockParticles() const { const_cast(mCloth).syncHostParticles(); } template <> void ClothImpl::unlockParticles() const { } template <> MappedRange ClothImpl::getCurrentParticles() { mCloth.wakeUp(); lockParticles(); mCloth.mDeviceParticlesDirty = true; return getMappedParticles(mCloth.mParticlesHostCopy.begin()); } template <> MappedRange ClothImpl::getCurrentParticles() const { lockParticles(); return getMappedParticles(mCloth.mParticlesHostCopy.begin()); } template <> MappedRange ClothImpl::getPreviousParticles() { mCloth.wakeUp(); lockParticles(); mCloth.mDeviceParticlesDirty = true; return getMappedParticles(mCloth.mParticlesHostCopy.begin() + mCloth.mNumParticles); } template <> MappedRange ClothImpl::getPreviousParticles() const { lockParticles(); return getMappedParticles(mCloth.mParticlesHostCopy.begin() + mCloth.mNumParticles); } template <> GpuParticles ClothImpl::getGpuParticles() { mCloth.syncDeviceParticles(); mCloth.mHostParticlesDirty = true; PxVec4* particles = mCloth.mParticles.begin().get(); GpuParticles result = { particles, particles + mCloth.mNumParticles, 0 }; return result; } template <> void ClothImpl::setPhaseConfig(Range configs) { Vector::Type transformedConfigs; transformedConfigs.reserve(configs.size()); // transform phase config to use in solver for(; !configs.empty(); configs.popFront()) if(configs.front().mStiffness > 0.0f) transformedConfigs.pushBack(transform(configs.front())); mCloth.setPhaseConfig(Range(transformedConfigs.begin(), transformedConfigs.end())); mCloth.notifyChanged(); mCloth.wakeUp(); } template <> void ClothImpl::setSelfCollisionIndices(Range indices) { ContextLockType lock(mCloth.mFactory); mCloth.mSelfCollisionIndices.assign(indices.begin(), indices.end()); mCloth.mSelfCollisionIndicesHost.assign(indices.begin(), indices.end()); mCloth.notifyChanged(); mCloth.wakeUp(); } template <> uint32_t ClothImpl::getNumVirtualParticles() const { return uint32_t(mCloth.mVirtualParticleIndices.size()); } template <> Range ClothImpl::getParticleAccelerations() { if(mCloth.mParticleAccelerations.empty()) { CuContextLock contextLock(mCloth.mFactory); mCloth.mParticleAccelerations.resize(mCloth.mNumParticles); } if(!mCloth.mParticleAccelerationsHostCopy.capacity()) { CuContextLock contextLock(mCloth.mFactory); mCloth.mParticleAccelerationsHostCopy.reserve(mCloth.mNumParticles); } mCloth.mParticleAccelerationsHostCopy.resizeUninitialized(mCloth.mNumParticles); mCloth.wakeUp(); PxVec4* data = mCloth.mParticleAccelerationsHostCopy.begin(); return Range(data, mCloth.mParticleAccelerationsHostCopy.end()); } template <> void ClothImpl::clearParticleAccelerations() { CuContextLock contextLock(mCloth.mFactory); CuDeviceVector(mCloth.mFactory.mContextManager).swap(mCloth.mParticleAccelerations); mCloth.mParticleAccelerationsHostCopy.reset(); mCloth.wakeUp(); } template <> void ClothImpl::setVirtualParticles(Range indices, Range weights) { // shuffle indices to form independent SIMD sets TripletScheduler scheduler(indices); scheduler.warp(mCloth.mNumParticles, 32); // convert to 16bit indices Vector::Type hostIndices; hostIndices.reserve(indices.size()); TripletScheduler::ConstTripletIter tIt = scheduler.mTriplets.begin(); TripletScheduler::ConstTripletIter tEnd = scheduler.mTriplets.end(); for(; tIt != tEnd; ++tIt) hostIndices.pushBack(Vec4us(*tIt)); // printf("num sets = %u, num replays = %u\n", scheduler.mSetSizes.size(), // calculateNumReplays(scheduler.mTriplets, scheduler.mSetSizes)); // add normalization weight Vector::Type hostWeights; hostWeights.reserve(weights.size()); for(; !weights.empty(); weights.popFront()) { PxVec3 w = reinterpret_cast(weights.front()); PxReal scale = 1 / w.magnitudeSquared(); hostWeights.pushBack(PxVec4(w.x, w.y, w.z, scale)); } CuContextLock contextLock(mCloth.mFactory); // todo: 'swap' these to force reallocation? mCloth.mVirtualParticleIndices = hostIndices; mCloth.mVirtualParticleSetSizes = scheduler.mSetSizes; mCloth.mVirtualParticleWeights = hostWeights; mCloth.notifyChanged(); mCloth.wakeUp(); } } // namespace cloth } // namespace physx