aboutsummaryrefslogtreecommitdiff
path: root/PhysX_3.4/Source/LowLevelCloth/src/windows/CuSolver.cpp
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PhysX_3.4/Source/LowLevelCloth/src/windows/CuSolver.cpp
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'PhysX_3.4/Source/LowLevelCloth/src/windows/CuSolver.cpp')
-rw-r--r--PhysX_3.4/Source/LowLevelCloth/src/windows/CuSolver.cpp556
1 files changed, 556 insertions, 0 deletions
diff --git a/PhysX_3.4/Source/LowLevelCloth/src/windows/CuSolver.cpp b/PhysX_3.4/Source/LowLevelCloth/src/windows/CuSolver.cpp
new file mode 100644
index 00000000..68238664
--- /dev/null
+++ b/PhysX_3.4/Source/LowLevelCloth/src/windows/CuSolver.cpp
@@ -0,0 +1,556 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved.
+// Copyright (c) 2001-2004 NovodeX AG. All rights reserved.
+
+#include "foundation/PxProfiler.h"
+#include "CuSolver.h"
+#include "CuCloth.h"
+#include "ClothImpl.h"
+#include "CuFabric.h"
+#include "CuFactory.h"
+#include "CuSolverKernel.h"
+#include "CuContextLock.h"
+#include "CuCheckSuccess.h"
+#include "IterationState.h"
+#include "CudaKernelWrangler.h"
+#include "PsUtilities.h"
+#include "PsSort.h"
+#include "PsFoundation.h"
+
+#if PX_NVTX
+#include "nvToolsExt.h"
+#endif
+
+//#define ENABLE_CUDA_PRINTF PX_DEBUG // warning: not thread safe
+#define ENABLE_CUDA_PRINTF 0
+
+#if ENABLE_CUDA_PRINTF
+extern "C" cudaError_t cudaPrintfInit(CUmodule hmod, size_t bufferLen = 1048576);
+extern "C" void cudaPrintfEnd();
+extern "C" cudaError_t cudaPrintfDisplay(CUmodule hmod, void* outputFP = NULL, bool showThreadID = false);
+#endif
+
+using namespace physx;
+
+namespace
+{
+//for KernelWrangler interface
+const char* gKernelName = cloth::getKernelFunctionName();
+}
+
+namespace
+{
+template <typename T>
+struct CuDeviceAllocator
+{
+ CuDeviceAllocator(physx::PxCudaContextManager* ctx) : mManager(ctx->getMemoryManager())
+ {
+ }
+
+ T* allocate(size_t n)
+ {
+ return reinterpret_cast<T*>(mManager->alloc(physx::PxCudaBufferMemorySpace::T_GPU, n * sizeof(T)));
+ }
+
+ void deallocate(T* ptr)
+ {
+ mManager->free(physx::PxCudaBufferMemorySpace::T_GPU, reinterpret_cast<physx::PxCudaBufferPtr>(ptr));
+ }
+
+ physx::PxCudaMemoryManager* mManager;
+};
+}
+
+cloth::CuSolver::CuSolver(CuFactory& factory)
+: CuContextLock(factory)
+, mFactory(factory)
+, mClothData(mFactory.mContextManager)
+, mClothDataHostCopy(CuHostAllocator(mFactory.mContextManager, cudaHostAllocWriteCombined))
+, mClothDataDirty(false)
+, mFrameData(getMappedAllocator<CuFrameData>(mFactory.mContextManager))
+, mIterationData(getMappedAllocator<CuIterationData>(mFactory.mContextManager))
+, mIterationDataBegin(0)
+, mFrameDt(0.0f)
+, mSharedMemorySize(0)
+, mSharedMemoryLimit(0)
+, mStartSimulationTask(&CuSolver::beginFrame, "cloth.CuSolver.startSimulation")
+, mKernelSimulationTask(&CuSolver::executeKernel, "cloth.CuSolver.kernelSimulation")
+, mEndSimulationTask(&CuSolver::endFrame, "cloth.CuSolver.endSimulation")
+, mStream(0)
+, mKernelModule(0)
+, mKernelFunction(0)
+, mKernelSharedMemorySize(0)
+, mClothIndex(CuDeviceAllocator<uint32_t>(mFactory.mContextManager).allocate(1))
+, mInterCollisionDistance(0.0f)
+, mInterCollisionStiffness(1.0f)
+, mInterCollisionIterations(1)
+, mInterCollisionScratchMem(NULL)
+, mInterCollisionScratchMemSize(0)
+, mKernelWrangler(getDispatcher(), physx::shdfnd::getFoundation().getErrorCallback(), &gKernelName, 1)
+, mSimulateNvtxRangeId(0)
+, mCudaError(mKernelWrangler.hadError())
+{
+ if(mCudaError)
+ {
+ CuContextLock::release();
+ return;
+ }
+
+ mStartSimulationTask.mSolver = this;
+ mKernelSimulationTask.mSolver = this;
+ mEndSimulationTask.mSolver = this;
+
+ if(mFactory.mContextManager->getUsingConcurrentStreams())
+ checkSuccess(cuStreamCreate(&mStream, 0));
+
+ if(1)
+ {
+ mKernelModule = mKernelWrangler.getCuModule(0);
+ mKernelFunction = mKernelWrangler.getCuFunction(0);
+ }
+ else
+ {
+ // load from ptx instead of embedded SASS, for iterating without recompile
+ checkSuccess(cuModuleLoad(&mKernelModule, "CuSolverKernel.ptx"));
+ checkSuccess(cuModuleGetFunction(&mKernelFunction, mKernelModule, getKernelFunctionName()));
+ shdfnd::getFoundation().error(PX_INFO, "Cloth kernel code loaded from CuSolverKernel.ptx");
+ }
+
+ // get amount of statically allocated shared memory
+ checkSuccess(cuFuncGetAttribute(&mKernelSharedMemorySize, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, mKernelFunction));
+
+ // extract CuKernelData device pointer
+ size_t size = 0;
+ CUdeviceptr ptr = 0;
+ checkSuccess(cuModuleGetGlobal(&ptr, &size, mKernelModule, getKernelDataName()));
+ mKernelData = CuDevicePointer<CuKernelData>(reinterpret_cast<CuKernelData*>(ptr));
+
+ // initialize cloth index
+ checkSuccess(cuMemsetD32(mClothIndex.dev(), 0, 1));
+
+ CuContextLock::release();
+}
+
+cloth::CuSolver::~CuSolver()
+{
+ PX_ASSERT(mCloths.empty());
+
+ CuContextLock::acquire();
+
+ CuKernelData kernelData = {};
+ *mKernelData = kernelData;
+
+ CuDeviceAllocator<uint32_t>(mFactory.mContextManager).deallocate(mClothIndex.get());
+
+ if(mStream)
+ checkSuccess(cuStreamDestroy(mStream));
+
+ if(mInterCollisionScratchMem)
+ PX_FREE(mInterCollisionScratchMem);
+}
+
+void cloth::CuSolver::updateKernelData()
+{
+ CuKernelData kernelData;
+
+ kernelData.mClothIndex = mClothIndex.get();
+ kernelData.mClothData = mClothData.begin().get();
+ kernelData.mFrameData = getDevicePointer(mFrameData);
+
+ *mKernelData = kernelData;
+}
+
+physx::PxGpuDispatcher& cloth::CuSolver::getDispatcher() const
+{
+ return *mFactory.mContextManager->getGpuDispatcher();
+}
+
+namespace
+{
+struct ClothSimCostGreater
+{
+ bool operator()(const cloth::CuCloth* left, const cloth::CuCloth* right) const
+ {
+ return left->mNumParticles * left->mSolverFrequency > right->mNumParticles * right->mSolverFrequency;
+ }
+};
+}
+
+void cloth::CuSolver::addCloth(Cloth* cloth)
+{
+ CuCloth& cuCloth = static_cast<CuClothImpl&>(*cloth).mCloth;
+
+ PX_ASSERT(mCloths.find(&cuCloth) == mCloths.end());
+
+ mCloths.pushBack(&cuCloth);
+ // trigger update of mClothData array
+ cuCloth.notifyChanged();
+
+ // sort cloth instances by size
+ shdfnd::sort(mCloths.begin(), mCloths.size(), ClothSimCostGreater());
+
+ CuContextLock contextLock(mFactory);
+
+ // resize containers and update kernel data
+ mClothDataHostCopy.resize(mCloths.size());
+ mClothData.resize(mCloths.size());
+ mFrameData.resize(mCloths.size());
+ updateKernelData();
+}
+
+void cloth::CuSolver::removeCloth(Cloth* cloth)
+{
+ CuCloth& cuCloth = static_cast<CuClothImpl&>(*cloth).mCloth;
+
+ ClothVector::Iterator begin = mCloths.begin(), end = mCloths.end();
+ ClothVector::Iterator it = mCloths.find(&cuCloth);
+
+ if(it == end)
+ return; // not found
+
+ uint32_t index = uint32_t(it - begin);
+
+ mCloths.remove(index);
+ mClothDataHostCopy.remove(index);
+ mClothData.resize(mCloths.size());
+ mClothDataDirty = true;
+}
+
+physx::PxBaseTask& cloth::CuSolver::simulate(float dt, physx::PxBaseTask& continuation)
+{
+ mFrameDt = dt;
+
+ if(mCloths.empty() || mCudaError)
+ {
+ continuation.addReference();
+ return continuation;
+ }
+
+ physx::PxGpuDispatcher& disp = getDispatcher();
+ mEndSimulationTask.setContinuation(&continuation);
+ disp.addPostLaunchDependent(mEndSimulationTask);
+ mKernelSimulationTask.setContinuation(&disp.getPostLaunchTask());
+ disp.getPostLaunchTask().removeReference();
+ disp.addPreLaunchDependent(mKernelSimulationTask);
+ mStartSimulationTask.setContinuation(&disp.getPreLaunchTask());
+ disp.getPreLaunchTask().removeReference();
+
+ mEndSimulationTask.removeReference();
+ mKernelSimulationTask.removeReference();
+
+ return mStartSimulationTask;
+}
+
+void cloth::CuSolver::beginFrame()
+{
+ CuContextLock contextLock(mFactory);
+
+ PX_PROFILE_START_CROSSTHREAD("cloth.CuSolver.simulate", 0);
+
+ CuIterationData* iterationDataBegin = mIterationData.empty() ? 0 : &mIterationData.front();
+
+ mFrameData.resize(0);
+ mIterationData.resize(0);
+
+ // update cloth data
+ ClothVector::Iterator cIt, cEnd = mCloths.end();
+ CuPinnedVector<CuClothData>::Type::Iterator dIt = mClothDataHostCopy.begin();
+ for(cIt = mCloths.begin(); cIt != cEnd; ++cIt, ++dIt)
+ mClothDataDirty |= (*cIt)->updateClothData(*dIt);
+
+ if(mClothDataDirty)
+ {
+ /* find optimal number of cloths per SM */
+
+ // at least 192 threads per block (e.g. CuCollision::buildAcceleration)
+ uint32_t numSMs = (uint32_t)mFactory.mContextManager->getMultiprocessorCount();
+ uint32_t maxClothsPerSM = PxMin(mFactory.mMaxThreadsPerBlock / 192, (mCloths.size() + numSMs - 1) / numSMs);
+
+ // tuning parameters: relative performance per numSharedPositions
+ float weights[3] = { 0.4f, 0.8f, 1.0f };
+
+ // try all possible number of cloths per SM and estimate performance
+ float maxWeightSum = 0.0f;
+ uint32_t numClothsPerSM = 0;
+ for(uint32_t i = 1; i <= maxClothsPerSM; ++i)
+ {
+ uint32_t sharedMemoryLimit = (mFactory.mContextManager->getSharedMemPerBlock() / i) - mKernelSharedMemorySize;
+
+ float weightSum = 0.0f;
+ for(cIt = mCloths.begin(); cIt != cEnd; ++cIt)
+ {
+ uint32_t sharedMemorySize = (*cIt)->mSharedMemorySize;
+ uint32_t positionsSize = (*cIt)->mNumParticles * sizeof(PxVec4);
+
+ if(sharedMemorySize > sharedMemoryLimit)
+ break;
+
+ uint32_t numSharedPositions = PxMin(2u, (sharedMemoryLimit - sharedMemorySize) / positionsSize);
+
+ weightSum += weights[numSharedPositions] * positionsSize;
+ }
+ // tuning parameter: inverse performance for running i cloths per SM
+ weightSum *= 2.0f + i;
+
+ if(cIt == cEnd && weightSum > maxWeightSum)
+ {
+ maxWeightSum = weightSum;
+ numClothsPerSM = i;
+ }
+ }
+ PX_ASSERT(numClothsPerSM);
+
+ // update block size
+ uint32_t numThreadsPerBlock = mFactory.mMaxThreadsPerBlock / numClothsPerSM & ~31;
+
+ // Workaround for nvbug 1709919: theoretically, register usage should allow us to launch at least
+ // mFactory.mMaxThreadsPerBlock threads, because that value corresponds to __launch_bounds__(maxThreadsPerBlock).
+ CUdevice device = 0;
+ checkSuccess(cuCtxGetDevice(&device));
+ int registersPerBlock = 0, kernelRegisterCount = 0;
+ checkSuccess(cuDeviceGetAttribute(&registersPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, device));
+ checkSuccess(cuFuncGetAttribute(&kernelRegisterCount, CU_FUNC_ATTRIBUTE_NUM_REGS, mKernelFunction));
+ numThreadsPerBlock = PxMin(numThreadsPerBlock, uint32_t(registersPerBlock / kernelRegisterCount));
+ PX_ASSERT(numThreadsPerBlock >= 192);
+
+ if(mFactory.mNumThreadsPerBlock != numThreadsPerBlock)
+ {
+ checkSuccess(
+ cuFuncSetBlockShape(mKernelFunction, int(mFactory.mNumThreadsPerBlock = numThreadsPerBlock), 1, 1));
+ }
+
+ // remember num cloths per SM in terms of max shared memory per block
+ mSharedMemoryLimit =
+ (mFactory.mContextManager->getSharedMemPerBlock() / numClothsPerSM) - mKernelSharedMemorySize;
+ }
+
+ uint32_t maxSharedMemorySize = 0;
+ for(cIt = mCloths.begin(); cIt != cEnd; ++cIt)
+ {
+ CuCloth& cloth = **cIt;
+
+ uint32_t sharedMemorySize = cloth.mSharedMemorySize;
+ uint32_t positionsSize = cloth.mNumParticles * sizeof(PxVec4);
+
+ uint32_t numSharedPositions = PxMin(2u, (mSharedMemoryLimit - sharedMemorySize) / positionsSize);
+
+ maxSharedMemorySize = PxMax(maxSharedMemorySize, sharedMemorySize + numSharedPositions * positionsSize);
+
+ IterationStateFactory factory(cloth, mFrameDt);
+ IterationState<Simd4f> state = factory.create<Simd4f>(cloth);
+
+ mFrameData.pushBack(CuFrameData(cloth, numSharedPositions, state, mIterationDataBegin + mIterationData.size()));
+
+ while(state.mRemainingIterations)
+ {
+ mIterationData.pushBack(CuIterationData(state));
+ state.update();
+ }
+ }
+ mSharedMemorySize = maxSharedMemorySize;
+
+ // add dummy element because we read past the end
+ mIterationData.pushBack(CuIterationData());
+
+ if(&mIterationData.front() != iterationDataBegin)
+ {
+ // mIterationData grew, update pointers
+ iterationDataBegin = getDevicePointer(mIterationData);
+
+ ptrdiff_t diff = (char*)iterationDataBegin - (char*)mIterationDataBegin;
+ CuPinnedVector<CuFrameData>::Type::Iterator fIt = mFrameData.begin(), fEnd;
+ for(fEnd = mFrameData.end(); fIt != fEnd; ++fIt)
+ reinterpret_cast<const char*&>(fIt->mIterationData) += diff;
+
+ mIterationDataBegin = iterationDataBegin;
+ }
+}
+
+void cloth::CuSolver::executeKernel()
+{
+ CuContextLock contextLock(mFactory);
+
+#if ENABLE_CUDA_PRINTF
+ if(cudaError result = cudaPrintfInit(mKernelModule))
+ {
+ shdfnd::getFoundation().error(PxErrorCode::eINTERNAL_ERROR, __FILE__, __LINE__, "cudaPrintfInit() returned %u.",
+ result);
+ }
+#endif
+
+ if(mClothDataDirty)
+ {
+ PX_ASSERT(mClothDataHostCopy.size() == mClothData.size());
+ size_t numBytes = mClothData.size() * sizeof(CuClothData);
+ checkSuccess(cuMemcpyHtoDAsync(mClothData.begin().dev(), mClothDataHostCopy.begin(), numBytes, mStream));
+ mClothDataDirty = false;
+ }
+
+#if 0
+ static int frame = 0;
+ if(++frame == 100)
+ record(*this);
+#endif
+
+ // launch kernel
+ CUresult result = cuLaunchKernel(mKernelFunction, mCloths.size(), 1, 1, mFactory.mNumThreadsPerBlock, 1, 1,
+ mSharedMemorySize, mStream, 0, 0);
+
+#if ENABLE_CUDA_PRINTF
+ cudaPrintfDisplay(mKernelModule);
+ cudaPrintfEnd();
+#endif
+
+#if PX_DEBUG
+ // in debug builds check kernel result
+ checkSuccess(result);
+ checkSuccess(cuStreamSynchronize(mStream));
+#endif
+
+ // mark the solver as being in an error state
+ // all cloth instances will be migrated to software
+ if(result != CUDA_SUCCESS)
+ mCudaError = true;
+}
+
+void cloth::CuSolver::endFrame()
+{
+ CuPinnedVector<CuFrameData>::Type::ConstIterator fIt = mFrameData.begin();
+ ClothVector::Iterator cIt, cEnd = mCloths.end();
+ for(cIt = mCloths.begin(); cIt != cEnd; ++cIt, ++fIt)
+ {
+ CuCloth& cloth = **cIt;
+
+ cloth.mHostParticlesDirty = false;
+ cloth.mDeviceParticlesDirty = false;
+
+ cloth.mMotionConstraints.pop();
+ cloth.mMotionConstraints.mHostCopy.resize(0);
+
+ cloth.mSeparationConstraints.pop();
+ cloth.mSeparationConstraints.mHostCopy.resize(0);
+
+ if(!cloth.mTargetCollisionSpheres.empty())
+ {
+ shdfnd::swap(cloth.mStartCollisionSpheres, cloth.mTargetCollisionSpheres);
+ cloth.mTargetCollisionSpheres.resize(0);
+ }
+
+ if(!cloth.mTargetCollisionPlanes.empty())
+ {
+ shdfnd::swap(cloth.mStartCollisionPlanes, cloth.mTargetCollisionPlanes);
+ cloth.mTargetCollisionPlanes.resize(0);
+ }
+
+ if(!cloth.mTargetCollisionTriangles.empty())
+ {
+ shdfnd::swap(cloth.mStartCollisionTriangles, cloth.mTargetCollisionTriangles);
+ cloth.mTargetCollisionTriangles.resize(0);
+ }
+
+ for(uint32_t i = 0; i < 3; ++i)
+ {
+ float upper = fIt->mParticleBounds[i * 2 + 0];
+ float negativeLower = fIt->mParticleBounds[i * 2 + 1];
+ cloth.mParticleBoundsCenter[i] = (upper - negativeLower) * 0.5f;
+ cloth.mParticleBoundsHalfExtent[i] = (upper + negativeLower) * 0.5f;
+ }
+
+ cloth.mSleepPassCounter = fIt->mSleepPassCounter;
+ cloth.mSleepTestCounter = fIt->mSleepTestCounter;
+ }
+
+ interCollision();
+
+ PX_PROFILE_STOP_CROSSTHREAD("cloth::CuSolver::simulate", 0);
+}
+
+void cloth::CuSolver::interCollision()
+{
+ if(!mInterCollisionIterations || mInterCollisionDistance == 0.0f)
+ return;
+
+ typedef SwInterCollision<Simd4f> SwInterCollision;
+
+ // rebuild cloth instance array
+ mInterCollisionInstances.resize(0);
+ for(uint32_t i = 0, n = mCloths.size(); i < n; ++i)
+ {
+ CuCloth& cloth = *mCloths[i];
+
+ float elasticity = 1.0f / mFrameData[i].mNumIterations;
+ PX_ASSERT(!cloth.mHostParticlesDirty);
+ PxVec4* particles = cloth.mParticlesHostCopy.begin();
+ uint32_t* indices = NULL, numIndices = cloth.mNumParticles;
+ if(!cloth.mSelfCollisionIndices.empty())
+ {
+ indices = cloth.mSelfCollisionIndicesHost.begin();
+ numIndices = uint32_t(cloth.mSelfCollisionIndices.size());
+ }
+
+ mInterCollisionInstances.pushBack(SwInterCollisionData(
+ particles, particles + cloth.mNumParticles, numIndices, indices, cloth.mTargetMotion,
+ cloth.mParticleBoundsCenter, cloth.mParticleBoundsHalfExtent, elasticity, cloth.mUserData));
+
+ cloth.mDeviceParticlesDirty = true;
+ }
+
+ uint32_t requiredTempMemorySize = uint32_t(
+ SwInterCollision::estimateTemporaryMemory(&mInterCollisionInstances[0], mInterCollisionInstances.size()));
+
+ // realloc temp memory if necessary
+ if(mInterCollisionScratchMemSize < requiredTempMemorySize)
+ {
+ if(mInterCollisionScratchMem)
+ PX_FREE(mInterCollisionScratchMem);
+
+ mInterCollisionScratchMem = PX_ALLOC(requiredTempMemorySize, "cloth::SwSolver::mInterCollisionScratchMem");
+ mInterCollisionScratchMemSize = requiredTempMemorySize;
+ }
+
+ SwKernelAllocator allocator(mInterCollisionScratchMem, mInterCollisionScratchMemSize);
+
+ // run inter-collision
+ SwInterCollision(mInterCollisionInstances.begin(), mInterCollisionInstances.size(), mInterCollisionDistance,
+ mInterCollisionStiffness, mInterCollisionIterations, mInterCollisionFilter, allocator)();
+}
+
+cloth::CuSolver::ClothSolverTask::ClothSolverTask(FunctionPtr functionPtr, const char* name)
+: mSolver(0), mFunctionPtr(functionPtr), mName(name)
+{
+}
+
+void cloth::CuSolver::ClothSolverTask::runInternal()
+{
+ (mSolver->*mFunctionPtr)();
+}
+
+const char* cloth::CuSolver::ClothSolverTask::getName() const
+{
+ return mName;
+}