// This code contains NVIDIA Confidential Information and is disclosed to you // under a form of NVIDIA software license agreement provided separately to you. // // Notice // NVIDIA Corporation and its licensors retain all intellectual property and // proprietary rights in and to this software and related documentation and // any modifications thereto. Any use, reproduction, disclosure, or // distribution of this software and related documentation without an express // license agreement from NVIDIA Corporation is strictly prohibited. // // ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES // NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO // THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, // MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. // // Information and code furnished is believed to be accurate and reliable. // However, NVIDIA Corporation assumes no responsibility for the consequences of use of such // information or for any infringement of patents or other rights of third parties that may // result from its use. No license is granted by implication or otherwise under any patent // or patent rights of NVIDIA Corporation. Details are subject to change without notice. // This code supersedes and replaces all information previously supplied. // NVIDIA Corporation products are not authorized for use as critical // components in life support devices or systems without express written approval of // NVIDIA Corporation. // // Copyright (c) 2008-2020 NVIDIA Corporation. All rights reserved. // Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. // Copyright (c) 2001-2004 NovodeX AG. All rights reserved. #include "NvCloth/DxContextManagerCallback.h" #include "DxFactory.h" #include "DxFabric.h" #include "DxCloth.h" #include "DxSolver.h" #include "DxCheckSuccess.h" #include "DxContextLock.h" #include "../ClothImpl.h" #if NV_CLOTH_ENABLE_DX11 #include "DxSolverKernelBlob.h" using namespace physx; using namespace nv; namespace nv { namespace cloth { // defined in Factory.cpp uint32_t getNextFabricId(); typedef Vec4T Vec4u; } } void cloth::checkSuccessImpl(HRESULT err, const char* file, const int line) { if (err != S_OK) { NV_CLOTH_LOG_ERROR("direct compute error: %u at %s:%d", err, file, line); } } namespace { // returns max threads as specified by launch bounds in DxSolverKernel.hlsl uint32_t getMaxThreadsPerBlock() { return 1024; } } cloth::DxFactory::DxFactory(DxContextManagerCallback* contextManager) : mContextManager(contextManager) , mStagingBuffer(0) , mSolverKernelComputeShader(nullptr) , mNumThreadsPerBlock(getMaxThreadsPerBlock()) , mMaxThreadsPerBlock(mNumThreadsPerBlock) , mConstraints(mContextManager) , mConstraintsHostCopy(mContextManager, DxStagingBufferPolicy()) , mStiffnessValues(mContextManager) , mTethers(mContextManager) , mParticles(mContextManager, DxDefaultRawBufferPolicy()) , mParticlesHostCopy(mContextManager, DxStagingBufferPolicy()) , mParticleAccelerations(mContextManager) , mParticleAccelerationsHostCopy(mContextManager, DxStagingBufferPolicy()) , mPhaseConfigs(mContextManager) , mCapsuleIndices(mContextManager, DxStagingBufferPolicy()) , mCapsuleIndicesDeviceCopy(mContextManager) , mCollisionSpheres(mContextManager, DxStagingBufferPolicy()) , mCollisionSpheresDeviceCopy(mContextManager) , mConvexMasks(mContextManager, DxStagingBufferPolicy()) , mConvexMasksDeviceCopy(mContextManager) , mCollisionPlanes(mContextManager, DxStagingBufferPolicy()) , mCollisionPlanesDeviceCopy(mContextManager) , mCollisionTriangles(mContextManager, DxStagingBufferPolicy()) , mCollisionTrianglesDeviceCopy(mContextManager) , mVirtualParticleSetSizes(mContextManager, DxStagingBufferPolicy()) , mVirtualParticleSetSizesDeviceCopy(mContextManager) , mVirtualParticleIndices(mContextManager, DxStagingBufferPolicy()) , mVirtualParticleIndicesDeviceCopy(mContextManager) , mVirtualParticleWeights(mContextManager, DxStagingBufferPolicy()) , mVirtualParticleWeightsDeviceCopy(mContextManager) , mMotionConstraints(mContextManager) , mSeparationConstraints(mContextManager) , mRestPositions(mContextManager, DxStagingBufferPolicy()) , mRestPositionsDeviceCopy(mContextManager) , mSelfCollisionIndices(mContextManager) , mSelfCollisionParticles(mContextManager) , mSelfCollisionData(mContextManager) , mTriangles(mContextManager) { if (mContextManager->synchronizeResources()) { // allow particle interop with other device mParticles.mBuffer.mMiscFlag = D3D11_RESOURCE_MISC_FLAG(mParticles.mBuffer.mMiscFlag | D3D11_RESOURCE_MISC_SHARED_KEYEDMUTEX); } } cloth::DxFactory::~DxFactory() { DxContextLock(*this); NV_CLOTH_ASSERT(("All fabrics created by this factory need to be deleted before this factory is destroyed.",mFabrics.size() == 0)); if (mStagingBuffer) mStagingBuffer->Release(); if (mSolverKernelComputeShader) mSolverKernelComputeShader->Release(); } cloth::Fabric* cloth::DxFactory::createFabric(uint32_t numParticles, Range phaseIndices, Range sets, Range restvalues, Range stiffnessValues, Range indices, Range anchors, Range tetherLengths, Range triangles) { return NV_CLOTH_NEW(DxFabric)(*this, numParticles, phaseIndices, sets, restvalues, stiffnessValues, indices, anchors, tetherLengths, triangles, getNextFabricId()); } cloth::Cloth* cloth::DxFactory::createCloth(Range particles, Fabric& fabric) { return NV_CLOTH_NEW(DxCloth)(*this, static_cast(fabric), particles); } cloth::Solver* cloth::DxFactory::createSolver() { CompileComputeShaders(); //Make sure our compute shaders are ready DxSolver* solver = NV_CLOTH_NEW(DxSolver)(*this); if (solver->hasError()) { NV_CLOTH_DELETE(solver); return NULL; } return solver; } // DxFactory::clone() implemented in DxClothClone.cpp void cloth::DxFactory::copyToHost(void* dst, ID3D11Buffer* srcBuffer, uint32_t offset, uint32_t size) const { if (!size) return; DxContextLock contextLock(*this); const_cast(this)->reserveStagingBuffer(size); CD3D11_BOX box(offset, 0, 0, offset + size, 1, 1); mContextManager->getContext()->CopySubresourceRegion(mStagingBuffer, 0, 0, 0, 0, srcBuffer, 0, &box); void* mapIt = mapStagingBuffer(D3D11_MAP_READ); memcpy(dst, mapIt, size); unmapStagingBuffer(); } void cloth::DxFactory::extractFabricData(const Fabric& fabric, Range phaseIndices, Range sets, Range restvalues, Range stiffnessValues, Range indices, Range anchors, Range tetherLengths, Range triangles) const { DxContextLock contextLock(*this); const DxFabric& dxFabric = static_cast(fabric); if (!phaseIndices.empty()) { NV_CLOTH_ASSERT(phaseIndices.size() == dxFabric.mPhases.size()); memcpy(phaseIndices.begin(), dxFabric.mPhases.begin(), phaseIndices.size() * sizeof(uint32_t)); } if (!restvalues.empty()) { NV_CLOTH_ASSERT(restvalues.size() == dxFabric.mConstraints.size()); Vector::Type hostConstraints(restvalues.size()); copyToHost(hostConstraints.begin(), dxFabric.mConstraints.buffer(), dxFabric.mConstraints.mOffset * sizeof(DxConstraint), uint32_t(hostConstraints.size() * sizeof(DxConstraint))); for (uint32_t i = 0, n = restvalues.size(); i < n; ++i) restvalues[i] = hostConstraints[i].mRestvalue; } if (!stiffnessValues.empty()) { NV_CLOTH_ASSERT(stiffnessValues.size() == dxFabric.mStiffnessValues.size()); Vector::Type hostStiffnessValues(stiffnessValues.size()); copyToHost(hostStiffnessValues.begin(), dxFabric.mStiffnessValues.buffer(), dxFabric.mStiffnessValues.mOffset * sizeof(float), uint32_t(hostStiffnessValues.size() * sizeof(float))); for (uint32_t i = 0, n = stiffnessValues.size(); i < n; ++i) stiffnessValues[i] = hostStiffnessValues[i]; } if (!sets.empty()) { // we don't skip first element here NV_CLOTH_ASSERT(sets.size() == dxFabric.mSets.size()); memcpy(sets.begin(), dxFabric.mSets.begin(), sets.size() * sizeof(uint32_t)); } if (!indices.empty()) { NV_CLOTH_ASSERT(indices.size() == dxFabric.mConstraints.size()*2); Vector::Type hostConstraints(dxFabric.mConstraints.size()); copyToHost(hostConstraints.begin(), dxFabric.mConstraints.buffer(), dxFabric.mConstraints.mOffset * sizeof(DxConstraint), uint32_t(hostConstraints.size() * sizeof(DxConstraint))); auto cIt = hostConstraints.begin(), cEnd = hostConstraints.end(); for (uint32_t* iIt = indices.begin(); cIt != cEnd; ++cIt) { *iIt++ = cIt->mFirstIndex; *iIt++ = cIt->mSecondIndex; } } if (!anchors.empty() || !tetherLengths.empty()) { uint32_t numTethers = uint32_t(dxFabric.mTethers.size()); Vector::Type tethers(numTethers, DxTether(0, 0)); copyToHost(tethers.begin(), dxFabric.mTethers.buffer(), dxFabric.mTethers.mOffset * sizeof(DxTether), uint32_t(tethers.size() * sizeof(DxTether))); NV_CLOTH_ASSERT(anchors.empty() || anchors.size() == tethers.size()); for (uint32_t i = 0; !anchors.empty(); ++i, anchors.popFront()) anchors.front() = tethers[i].mAnchor; NV_CLOTH_ASSERT(tetherLengths.empty() || tetherLengths.size() == tethers.size()); for (uint32_t i = 0; !tetherLengths.empty(); ++i, tetherLengths.popFront()) tetherLengths.front() = tethers[i].mLength * dxFabric.mTetherLengthScale; } if (!triangles.empty()) { // todo triangles } } void cloth::DxFactory::extractCollisionData(const Cloth& cloth, Range spheres, Range capsules, Range planes, Range convexes, Range triangles) const { NV_CLOTH_ASSERT(&cloth.getFactory() == this); const DxCloth& dxCloth = static_cast(cloth); NV_CLOTH_ASSERT(spheres.empty() || spheres.size() == dxCloth.mStartCollisionSpheres.size()); NV_CLOTH_ASSERT(capsules.empty() || capsules.size() == dxCloth.mCapsuleIndices.size() * 2); NV_CLOTH_ASSERT(planes.empty() || planes.size() == dxCloth.mStartCollisionPlanes.size()); NV_CLOTH_ASSERT(convexes.empty() || convexes.size() == dxCloth.mConvexMasks.size()); NV_CLOTH_ASSERT(triangles.empty() || triangles.size() == dxCloth.mStartCollisionTriangles.size()); // collision spheres are in pinned memory, so memcpy directly if (!dxCloth.mStartCollisionSpheres.empty() && !spheres.empty()) { memcpy(spheres.begin(), DxCloth::MappedVec4fVectorType(const_cast(dxCloth).mStartCollisionSpheres).begin(), spheres.size() * sizeof(PxVec4)); } if (!dxCloth.mCapsuleIndices.empty() && !capsules.empty()) { memcpy(capsules.begin(), DxCloth::MappedIndexVectorType(const_cast(dxCloth).mCapsuleIndices).begin(), capsules.size() * sizeof(uint32_t)); } if (!dxCloth.mStartCollisionPlanes.empty() && !planes.empty()) { memcpy(planes.begin(), DxCloth::MappedVec4fVectorType(const_cast(dxCloth).mStartCollisionPlanes).begin(), dxCloth.mStartCollisionPlanes.size() * sizeof(PxVec4)); } if (!dxCloth.mConvexMasks.empty() && !convexes.empty()) { memcpy(convexes.begin(), DxCloth::MappedMaskVectorType(const_cast(dxCloth).mConvexMasks).begin(), dxCloth.mConvexMasks.size() * sizeof(uint32_t)); } if (!dxCloth.mStartCollisionTriangles.empty() && !triangles.empty()) { memcpy(triangles.begin(), DxCloth::MappedVec3fVectorType(const_cast(dxCloth).mStartCollisionTriangles).begin(), dxCloth.mStartCollisionTriangles.size() * sizeof(PxVec3)); } } void cloth::DxFactory::extractMotionConstraints(const Cloth& cloth, Range destConstraints) const { NV_CLOTH_ASSERT(&cloth.getFactory() == this); const DxCloth& dxCloth = static_cast(cloth); if (dxCloth.mMotionConstraints.mHostCopy.size()) { NV_CLOTH_ASSERT(destConstraints.size() == dxCloth.mMotionConstraints.mHostCopy.size()); memcpy(destConstraints.begin(), dxCloth.mMotionConstraints.mHostCopy.begin(), sizeof(PxVec4) * dxCloth.mMotionConstraints.mHostCopy.size()); } else { DxContextLock contextLock(*this); DxBatchedVector const& srcConstraints = !dxCloth.mMotionConstraints.mTarget.empty() ? dxCloth.mMotionConstraints.mTarget : dxCloth.mMotionConstraints.mStart; NV_CLOTH_ASSERT(destConstraints.size() == srcConstraints.size()); copyToHost(destConstraints.begin(), srcConstraints.buffer(), 0, destConstraints.size() * sizeof(PxVec4)); } } void cloth::DxFactory::extractSeparationConstraints(const Cloth& cloth, Range destConstraints) const { NV_CLOTH_ASSERT(&cloth.getFactory() == this); const DxCloth& dxCloth = static_cast(cloth); if (dxCloth.mSeparationConstraints.mHostCopy.size()) { NV_CLOTH_ASSERT(destConstraints.size() == dxCloth.mSeparationConstraints.mHostCopy.size()); memcpy(destConstraints.begin(), dxCloth.mSeparationConstraints.mHostCopy.begin(), sizeof(PxVec4) * dxCloth.mSeparationConstraints.mHostCopy.size()); } else { DxContextLock contextLock(*this); DxBatchedVector const& srcConstraints = !dxCloth.mSeparationConstraints.mTarget.empty() ? dxCloth.mSeparationConstraints.mTarget : dxCloth.mSeparationConstraints.mStart; NV_CLOTH_ASSERT(destConstraints.size() == srcConstraints.size()); copyToHost(destConstraints.begin(), srcConstraints.buffer(), 0, destConstraints.size() * sizeof(PxVec4)); } } void cloth::DxFactory::extractParticleAccelerations(const Cloth& cloth, Range destAccelerations) const { /* NV_CLOTH_ASSERT(&cloth.getFactory() == this); const DxCloth& dxCloth = static_cast(cloth).mCloth; if (dxCloth.mParticleAccelerationsHostCopy.size()) { NV_CLOTH_ASSERT(dxCloth.mParticleAccelerationsHostCopy.size()); memcpy(destAccelerations.begin(), dxCloth.mParticleAccelerationsHostCopy.begin(), sizeof(PxVec4) * dxCloth.mParticleAccelerationsHostCopy.size()); } else { DxContextLock contextLock(*this); DxBatchedVector const& srcAccelerations = dxCloth.mParticleAccelerations; NV_CLOTH_ASSERT(destAccelerations.size() == srcAccelerations.size()); copyToHost(destAccelerations.begin(), srcAccelerations.buffer(), 0, destAccelerations.size() * sizeof(PxVec4)); } */ PX_UNUSED(&cloth); PX_UNUSED(&destAccelerations); NV_CLOTH_ASSERT(0); } void cloth::DxFactory::extractVirtualParticles(const Cloth& cloth, Range destIndices, Range destWeights) const { NV_CLOTH_ASSERT(&cloth.getFactory() == this); DxContextLock contextLock(*this); const DxCloth& dxCloth = static_cast(cloth); if (destWeights.size() > 0) { uint32_t numWeights = cloth.getNumVirtualParticleWeights(); Vector::Type hostWeights(numWeights, PxVec4(0.0f)); //copyToHost(hostWeights.begin(), dxCloth.mVirtualParticleWeights.mBuffer.mBuffer, 0, // hostWeights.size() * sizeof(PxVec4)); NV_CLOTH_ASSERT(hostWeights.size() == dxCloth.mVirtualParticleWeights.size()); intrinsics::memCopy(hostWeights.begin(), DxCloth::MappedVec4fVectorType(const_cast(dxCloth).mVirtualParticleWeights).begin(), destIndices.size() * sizeof(uint32_t)); // convert weights to Vec3f PxVec3* destIt = reinterpret_cast(destWeights.begin()); Vector::Type::ConstIterator srcIt = hostWeights.begin(); Vector::Type::ConstIterator srcEnd = srcIt + numWeights; for (; srcIt != srcEnd; ++srcIt, ++destIt) *destIt = reinterpret_cast(*srcIt); NV_CLOTH_ASSERT(destIt <= destWeights.end()); } if (destIndices.size() > 0) { uint32_t numIndices = cloth.getNumVirtualParticles(); Vector::Type hostIndices(numIndices); //copyToHost(hostIndices.begin(), dxCloth.mVirtualParticleIndices.mBuffer.mBuffer, 0, // hostIndices.size() * sizeof(Vec4us)); NV_CLOTH_ASSERT(hostIndices.size() == dxCloth.mVirtualParticleIndices.size()); intrinsics::memCopy(hostIndices.begin(), DxCloth::MappedVec4usVectorType(const_cast(dxCloth).mVirtualParticleIndices).begin(), destIndices.size() * sizeof(uint32_t)); // convert indices to 32 bit Vec4u* destIt = reinterpret_cast(destIndices.begin()); Vector::Type::ConstIterator srcIt = hostIndices.begin(); Vector::Type::ConstIterator srcEnd = srcIt + numIndices; for (; srcIt != srcEnd; ++srcIt, ++destIt) *destIt = Vec4u(*srcIt); NV_CLOTH_ASSERT(&array(*destIt) <= destIndices.end()); } } void cloth::DxFactory::extractSelfCollisionIndices(const Cloth& cloth, Range destIndices) const { const DxCloth& dxCloth = static_cast(cloth); NV_CLOTH_ASSERT(destIndices.size() == dxCloth.mSelfCollisionIndices.size()); intrinsics::memCopy(destIndices.begin(), dxCloth.mSelfCollisionIndicesHost.begin(), destIndices.size() * sizeof(uint32_t)); } void cloth::DxFactory::extractRestPositions(const Cloth& cloth, Range destRestPositions) const { const DxCloth& dxCloth = static_cast(cloth); NV_CLOTH_ASSERT(destRestPositions.size() == dxCloth.mRestPositions.size()); intrinsics::memCopy(destRestPositions.begin(), DxCloth::MappedVec4fVectorType(const_cast(dxCloth).mRestPositions).begin(), destRestPositions.size() * sizeof(PxVec4)); } void cloth::DxFactory::reserveStagingBuffer(uint32_t size) { if (mStagingBuffer) { D3D11_BUFFER_DESC desc; mStagingBuffer->GetDesc(&desc); if (desc.ByteWidth >= size) return; mStagingBuffer->Release(); } CD3D11_BUFFER_DESC desc(size, 0, D3D11_USAGE_STAGING, D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE); mContextManager->getDevice()->CreateBuffer(&desc, 0, &mStagingBuffer); } void* cloth::DxFactory::mapStagingBuffer(D3D11_MAP mapType) const { D3D11_MAPPED_SUBRESOURCE mapped; mContextManager->getContext()->Map(mStagingBuffer, 0, mapType, 0, &mapped); return mapped.pData; } void cloth::DxFactory::unmapStagingBuffer() const { mContextManager->getContext()->Unmap(mStagingBuffer, 0); } void cloth::DxFactory::CompileComputeShaders() { if (mSolverKernelComputeShader == nullptr) { DxContextLock(*this); ID3D11Device* device = mContextManager->getDevice(); device->CreateComputeShader(gDxSolverKernel, sizeof(gDxSolverKernel), NULL, &mSolverKernelComputeShader); } } #endif // NV_CLOTH_ENABLE_DX11