diff options
Diffstat (limited to 'NvCloth/src')
| -rw-r--r-- | NvCloth/src/Callbacks.cpp | 6 | ||||
| -rw-r--r-- | NvCloth/src/ClothImpl.h | 2 | ||||
| -rw-r--r-- | NvCloth/src/MovingAverage.h | 146 | ||||
| -rw-r--r-- | NvCloth/src/SwFactory.cpp | 2 | ||||
| -rw-r--r-- | NvCloth/src/SwSolverKernel.cpp | 2 | ||||
| -rw-r--r-- | NvCloth/src/cuda/CuSolverKernel.cu | 13 |
6 files changed, 95 insertions, 76 deletions
diff --git a/NvCloth/src/Callbacks.cpp b/NvCloth/src/Callbacks.cpp index 22ba03d..f15a53b 100644 --- a/NvCloth/src/Callbacks.cpp +++ b/NvCloth/src/Callbacks.cpp @@ -54,8 +54,8 @@ static NvClothContext sContext; NV_CLOTH_API(void) InitializeNvCloth(PxAllocatorCallback* allocatorCallback, PxErrorCallback* errorCallback, PxAssertHandler* assertHandler, PxProfilerCallback* profilerCallback, int autoDllIDCheck) { PX_UNUSED(autoDllIDCheck); - NV_CLOTH_ASSERT(("NvCloth dll id mismatch, ensure you compile with matching headers/run with matching dll.", NV_CLOTH_DLL_ID == autoDllIDCheck)); - NV_CLOTH_ASSERT(("NvCloth initialized with invalid allocator", allocatorCallback != nullptr)); + NV_CLOTH_ASSERT_WITH_MESSAGE("NvCloth dll id mismatch, ensure you compile with matching headers/run with matching dll.", NV_CLOTH_DLL_ID == autoDllIDCheck); + NV_CLOTH_ASSERT_WITH_MESSAGE("NvCloth initialized with invalid allocator", allocatorCallback != nullptr); sContext.mAllocator = allocatorCallback; sContext.mErrorCallback = errorCallback; sContext.mAssertHandler = assertHandler; @@ -67,7 +67,7 @@ NV_CLOTH_API(void) InitializeNvCloth(PxAllocatorCallback* allocatorCallback, PxE PxAllocatorCallback* GetNvClothAllocator() { - NV_CLOTH_ASSERT(("NvCloth used before calling InitializeNvCloth", nv::cloth::sContext.mAllocator != nullptr)); + NV_CLOTH_ASSERT_WITH_MESSAGE("NvCloth used before calling InitializeNvCloth", nv::cloth::sContext.mAllocator != nullptr); return nv::cloth::sContext.mAllocator; } diff --git a/NvCloth/src/ClothImpl.h b/NvCloth/src/ClothImpl.h index 1e8d9a1..4d7b28d 100644 --- a/NvCloth/src/ClothImpl.h +++ b/NvCloth/src/ClothImpl.h @@ -316,7 +316,7 @@ inline physx::PxVec3 ClothImpl<T>::getGravity() const inline float safeLog2(float x) { - NV_CLOTH_ASSERT(("safeLog2",x >= 0.0f)); + NV_CLOTH_ASSERT_WITH_MESSAGE("safeLog2",x >= 0.0f); return x > 0 ? physx::shdfnd::log2(x) : -FLT_MAX_EXP; } diff --git a/NvCloth/src/MovingAverage.h b/NvCloth/src/MovingAverage.h index f524d95..4b447c7 100644 --- a/NvCloth/src/MovingAverage.h +++ b/NvCloth/src/MovingAverage.h @@ -40,112 +40,124 @@ namespace cloth struct MovingAverage { - struct Element - { - uint32_t mCount; - float mValue; - }; - public: - MovingAverage(uint32_t n = 1) : mCount(0), mSize(n) + MovingAverage(uint32_t n = 1) : mBegin(0), mCount(0), mSize(n) + { + mData = reinterpret_cast<float*>(NV_CLOTH_ALLOC(mSize *sizeof(float), "MovingAverage")); + } + MovingAverage(const MovingAverage& other): mData(nullptr), mBegin(0), mCount(0), mSize(0) { + *this = other; } + MovingAverage& operator=(const MovingAverage& other) + { + mBegin = other.mBegin; + mCount = other.mCount; + mSize = other.mSize; + NV_CLOTH_FREE(mData); + mData = reinterpret_cast<float*>(NV_CLOTH_ALLOC(mSize * sizeof(float), "MovingAverage")); + memcpy(mData, other.mData, mSize * sizeof(float)); + return *this; + } + ~MovingAverage() { NV_CLOTH_FREE(mData); } bool empty() const { - return mData.empty(); + return mCount == 0; } uint32_t size() const { return mSize; } - + void resize(uint32_t n) { - NV_CLOTH_ASSERT(n); + float* newData = reinterpret_cast<float*>(NV_CLOTH_ALLOC(n * sizeof(float), "MovingAverage")); + + const int cutOffFront = std::max(mCount - static_cast<int32_t>(n), 0); + int index = (mBegin + cutOffFront) % mSize; + for(int i = 0; i < static_cast<int>(n); i++) + { + newData[i] = mData[index]; + index = (index + 1) % mSize; + } + + mCount -= cutOffFront; + + NV_CLOTH_FREE(mData); + mSize = n; - trim(); + mData = newData; + mBegin = 0; } - + void reset() { - mData.resize(0); mCount = 0; + mBegin = 0; } void push(uint32_t n, float value) { - n = std::min(n, mSize); - - if (mData.empty() || mData.back().mValue != value) + n = std::min(n, static_cast<uint32_t>(mSize)); + const int start = (mBegin + mCount) % mSize; + const int end = start + n; + const int end1 = std::min(end, mSize); + const int end2 = std::max(end - end1, 0); + for(int i = start; i < end1; i++) { - Element element = { n, value }; - mData.pushBack(element); + mData[i] = value; } - else + for(int i = 0; i < end2; i++) { - mData.back().mCount += n; + mData[i] = value; } - mCount += n; - trim(); + int newCount = std::min(mCount + static_cast<int32_t>(n), mSize); + mBegin = (mBegin + n-(newCount-mCount))%mSize; //move mBegin by the amount of replaced elements + mCount = newCount; } float average() const { - NV_CLOTH_ASSERT(!mData.empty()); + NV_CLOTH_ASSERT(!empty()); float sum = 0.0f; - Vector<Element>::Type::ConstIterator it = mData.begin(), end = mData.end(); - for (; it != end; ++it) - sum += it->mCount * it->mValue; - - // linear weight ramps at both ends for smoother average - uint32_t n = mCount / 8; - float ramp = 0.0f, temp = 0.0f; - uint32_t countLo = (it = mData.begin())->mCount; - uint32_t countHi = (--end)->mCount; - for (uint32_t i = 0; i < n; ++i) + int totalWeight = 0; { - if (i == countLo) - countLo += (++it)->mCount; - if (i == countHi) - countHi += (--end)->mCount; - - temp += it->mValue + end->mValue; - ramp += temp; + int count = 0; + int end = std::min(mBegin + mCount, mSize); + int rampSize = std::max(1,mCount / 8); + for(int i = mBegin; i < end; i++) + { + //ramp weight /''''''\ . + int weight = std::min( + std::min(count+1, rampSize), //left ramp /''' + std::min(mCount-(count), rampSize)); //right ramp '''\ . + sum += mData[i] * weight; + totalWeight += weight; + count++; + } + int leftOver = mCount-(end - mBegin); + for(int i = 0; i < leftOver; i++) + { + int weight = std::min(std::min(count + 1, rampSize), std::min(mCount - (count), rampSize)); + sum += mData[i] * weight; + totalWeight += weight; + count++; + } + NV_CLOTH_ASSERT(count == mCount); } - uint32_t num = (mCount - n) * (n + 1); - return (sum * (n + 1) - ramp) / num; + return sum / static_cast<float>(totalWeight); } - private: - // remove oldest (front) values until mCount<=mSize - void trim() - { - Vector<Element>::Type::Iterator it = mData.begin(); - for (uint32_t k = mSize; k < mCount; it += k <= mCount) - { - k += it->mCount; - it->mCount = k - mCount; - } - - if (it != mData.begin()) - { - Vector<Element>::Type tmp; - tmp.assign(it, mData.end()); - tmp.swap(mData); - } - - mCount = std::min(mCount, mSize); - } - - Vector<Element>::Type mData; - - uint32_t mCount; - uint32_t mSize; +private: + float* mData; //Ring buffer + int32_t mBegin; //Index to first element + int32_t mCount; //current number of elements + int32_t mSize; //max ringbuffer size }; } } diff --git a/NvCloth/src/SwFactory.cpp b/NvCloth/src/SwFactory.cpp index 6f46c96..5c4b625 100644 --- a/NvCloth/src/SwFactory.cpp +++ b/NvCloth/src/SwFactory.cpp @@ -51,7 +51,7 @@ cloth::SwFactory::SwFactory() cloth::SwFactory::~SwFactory() { - NV_CLOTH_ASSERT(("All fabrics created by this factory need to be deleted before this factory is destroyed.", mFabrics.size() == 0)); + NV_CLOTH_ASSERT_WITH_MESSAGE("All fabrics created by this factory need to be deleted before this factory is destroyed.", mFabrics.size() == 0); } cloth::Fabric* cloth::SwFactory::createFabric(uint32_t numParticles, Range<const uint32_t> phaseIndices, diff --git a/NvCloth/src/SwSolverKernel.cpp b/NvCloth/src/SwSolverKernel.cpp index dec46d7..eec7956 100644 --- a/NvCloth/src/SwSolverKernel.cpp +++ b/NvCloth/src/SwSolverKernel.cpp @@ -414,7 +414,7 @@ void applyWind(T4f* __restrict curIt, const T4f* __restrict prevIt, const uint16 T4f previous = oneThird * (p0 + p1 + p2); //offset of the triangle center, including wind - T4f delta = current - previous + wind; //wind is also already multiplied by dt in the iteration state so everything it in the same units + T4f delta = current - previous + wind; //wind is also already multiplied by dt in the iteration state so everything is in the same units if (IsTurning) { diff --git a/NvCloth/src/cuda/CuSolverKernel.cu b/NvCloth/src/cuda/CuSolverKernel.cu index edb66dc..1ad5896 100644 --- a/NvCloth/src/cuda/CuSolverKernel.cu +++ b/NvCloth/src/cuda/CuSolverKernel.cu @@ -849,10 +849,17 @@ __device__ float3 cross3(const float3& u, const float3& v) __device__ void applyImpulse(SharedParticleData::ParticleReferenceType pos, const float3& impulse) { float scale = -pos.mReferences[3]; + +#if CONVERT_ADDRESSES //Use this instead of atomicAdd function to work around compiler issue treating the pointer as global memory instead of shared memory - asm("red.shared.add.f32 [%0], %1;" :: POINTER_CONSTRAINT(pos.mReferences[0].mPtr), "f"(impulse.x * scale)); - asm("red.shared.add.f32 [%0], %1;" :: POINTER_CONSTRAINT(pos.mReferences[1].mPtr), "f"(impulse.y * scale)); - asm("red.shared.add.f32 [%0], %1;" :: POINTER_CONSTRAINT(pos.mReferences[2].mPtr), "f"(impulse.z * scale)); + asm("red.shared.add.f32 [%0], %1;" ::POINTER_CONSTRAINT(pos.mReferences[0].mPtr), "f"(impulse.x * scale)); + asm("red.shared.add.f32 [%0], %1;" ::POINTER_CONSTRAINT(pos.mReferences[1].mPtr), "f"(impulse.y * scale)); + asm("red.shared.add.f32 [%0], %1;" ::POINTER_CONSTRAINT(pos.mReferences[2].mPtr), "f"(impulse.z * scale)); +#else + atomicAdd(pos.mReferences[0].mPtr, impulse.x * scale); + atomicAdd(pos.mReferences[1].mPtr, impulse.y * scale); + atomicAdd(pos.mReferences[2].mPtr, impulse.z * scale); +#endif } __device__ void applyImpulse(GlobalParticleData::ParticleReferenceType pos, const float3& impulse) { |