diff options
| author | Marijn Tamis <[email protected]> | 2019-04-01 14:21:09 +0200 |
|---|---|---|
| committer | Marijn Tamis <[email protected]> | 2019-04-01 14:21:09 +0200 |
| commit | d243404d4ba88bcf53f7310cc8980b4efe38c19f (patch) | |
| tree | dcc8ce2904e9f813e03f71f825c4d3c9ec565d91 /NvCloth/include | |
| parent | Add new SetSpheres and SetPlanes api's to bring them in line with setTriangles. (diff) | |
| download | nvcloth-d243404d4ba88bcf53f7310cc8980b4efe38c19f.tar.xz nvcloth-d243404d4ba88bcf53f7310cc8980b4efe38c19f.zip | |
1.1.6 Release.1.1.6
Diffstat (limited to 'NvCloth/include')
20 files changed, 3870 insertions, 8 deletions
diff --git a/NvCloth/include/NvCloth/Allocator.h b/NvCloth/include/NvCloth/Allocator.h index ade0698..e66f34a 100644 --- a/NvCloth/include/NvCloth/Allocator.h +++ b/NvCloth/include/NvCloth/Allocator.h @@ -35,10 +35,10 @@ /** \cond HIDDEN_SYMBOLS */ #pragma once -#include <PsArray.h> -#include <PsHashMap.h> -#include <PsAlignedMalloc.h> +#include "NvCloth/ps/PsArray.h" +#include "NvCloth/ps/PsHashMap.h" #include "NvCloth/Callbacks.h" +#include "NvCloth/ps/PsAlignedMalloc.h" namespace nv { diff --git a/NvCloth/include/NvCloth/Callbacks.h b/NvCloth/include/NvCloth/Callbacks.h index 72ebe24..f5b54a9 100644 --- a/NvCloth/include/NvCloth/Callbacks.h +++ b/NvCloth/include/NvCloth/Callbacks.h @@ -36,7 +36,7 @@ #pragma once #include <foundation/PxPreprocessor.h> #include <foundation/PxProfiler.h> -#include <foundation/PxAssert.h> +#include <foundation/PxAllocatorCallback.h> #ifndef NV_CLOTH_IMPORT #define NV_CLOTH_IMPORT PX_DLL_IMPORT #endif @@ -79,6 +79,25 @@ NV_CLOTH_API(void) //Allocator NV_CLOTH_API(physx::PxAllocatorCallback*) GetNvClothAllocator(); //Only use internally +#if !PX_DOXYGEN +namespace physx +{ +#endif + +/* Base class to handle assert failures */ +class PxAssertHandler +{ +public: + virtual ~PxAssertHandler() + { + } + virtual void operator()(const char* exp, const char* file, int line, bool& ignore) = 0; +}; + +#if !PX_DOXYGEN +} // namespace physx +#endif + namespace nv { namespace cloth @@ -107,7 +126,7 @@ NV_CLOTH_API(physx::PxAssertHandler*) GetNvClothAssertHandler(); //This function #else #if PX_VC #define PX_CODE_ANALYSIS_ASSUME(exp) \ - __analysis_assume(!!(exp)) // This macro will be used to get rid of analysis warning messages if a PX_ASSERT is used + __analysis_assume(!!(exp)) // This macro will be used to get rid of analysis warning messages if a NV_CLOTH_ASSERT is used // to "guard" illegal mem access, for example. #else #define PX_CODE_ANALYSIS_ASSUME(exp) @@ -121,7 +140,7 @@ NV_CLOTH_API(physx::PxAssertHandler*) GetNvClothAssertHandler(); //This function #define NV_CLOTH_ASSERT_WITH_MESSAGE(message, exp) \ { \ static bool _ignore = false; \ - ((void)((!!(exp)) || (!_ignore && (physx::PxGetAssertHandler()(message, __FILE__, __LINE__, _ignore), false)))); \ + ((void)((!!(exp)) || (!_ignore && ((*nv::cloth::GetNvClothAssertHandler())(message, __FILE__, __LINE__, _ignore), false)))); \ PX_CODE_ANALYSIS_ASSUME(exp); \ } #endif diff --git a/NvCloth/include/NvCloth/Fabric.h b/NvCloth/include/NvCloth/Fabric.h index 02b1140..eb0fc5e 100644 --- a/NvCloth/include/NvCloth/Fabric.h +++ b/NvCloth/include/NvCloth/Fabric.h @@ -31,7 +31,7 @@ #include "NvCloth/Callbacks.h" #include "NvCloth/Allocator.h" -#include <PsAtomic.h> +#include "NvCloth/ps/PsAtomic.h" namespace nv { diff --git a/NvCloth/include/NvCloth/Solver.h b/NvCloth/include/NvCloth/Solver.h index 2ff0e9c..7ebe290 100644 --- a/NvCloth/include/NvCloth/Solver.h +++ b/NvCloth/include/NvCloth/Solver.h @@ -31,7 +31,7 @@ #include "NvCloth/Allocator.h" #include "NvCloth/Range.h" -#include "PsArray.h" +#include "NvCloth/ps/PsArray.h" namespace nv { diff --git a/NvCloth/include/NvCloth/ps/Ps.h b/NvCloth/include/NvCloth/ps/Ps.h new file mode 100644 index 0000000..89fc9c7 --- /dev/null +++ b/NvCloth/include/NvCloth/ps/Ps.h @@ -0,0 +1,70 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PS_H +#define PSFOUNDATION_PS_H + +/*! \file top level include file for shared foundation */ + +#include "foundation/Px.h" + +/** +Platform specific defines +*/ +#if PX_WINDOWS_FAMILY || PX_XBOXONE +#pragma intrinsic(memcmp) +#pragma intrinsic(memcpy) +#pragma intrinsic(memset) +#pragma intrinsic(abs) +#pragma intrinsic(labs) +#endif + +// An expression that should expand to nothing in non PX_CHECKED builds. +// We currently use this only for tagging the purpose of containers for memory use tracking. +#if PX_CHECKED +#define PX_DEBUG_EXP(x) (x) +#else +#define PX_DEBUG_EXP(x) +#endif + +#define PX_SIGN_BITMASK 0x80000000 + +namespace physx +{ +namespace shdfnd +{ +// Int-as-bool type - has some uses for efficiency and with SIMD +typedef int IntBool; +static const IntBool IntFalse = 0; +static const IntBool IntTrue = 1; +} + +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PS_H diff --git a/NvCloth/include/NvCloth/ps/PsAlignedMalloc.h b/NvCloth/include/NvCloth/ps/PsAlignedMalloc.h new file mode 100644 index 0000000..4be8409 --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsAlignedMalloc.h @@ -0,0 +1,88 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSALIGNEDMALLOC_H +#define PSFOUNDATION_PSALIGNEDMALLOC_H + +#include "PsUserAllocated.h" + +/*! +Allocate aligned memory. +Alignment must be a power of 2! +-- should be templated by a base allocator +*/ + +namespace physx +{ +namespace shdfnd +{ +/** +Allocator, which is used to access the global PxAllocatorCallback instance +(used for dynamic data types template instantiation), which can align memory +*/ + +// SCS: AlignedMalloc with 3 params not found, seems not used on PC either +// disabled for now to avoid GCC error + +template <uint32_t N, typename BaseAllocator = NonTrackingAllocator> +class AlignedAllocator : public BaseAllocator +{ + public: + AlignedAllocator(const BaseAllocator& base = BaseAllocator()) : BaseAllocator(base) + { + } + + void* allocate(size_t size, const char* file, int line) + { + size_t pad = N - 1 + sizeof(size_t); // store offset for delete. + uint8_t* base = reinterpret_cast<uint8_t*>(BaseAllocator::allocate(size + pad, file, line)); + if(!base) + return NULL; + + uint8_t* ptr = reinterpret_cast<uint8_t*>(size_t(base + pad) & ~(size_t(N) - 1)); // aligned pointer, ensuring N + // is a size_t + // wide mask + reinterpret_cast<size_t*>(ptr)[-1] = size_t(ptr - base); // store offset + + return ptr; + } + void deallocate(void* ptr) + { + if(ptr == NULL) + return; + + uint8_t* base = reinterpret_cast<uint8_t*>(ptr) - reinterpret_cast<size_t*>(ptr)[-1]; + BaseAllocator::deallocate(base); + } +}; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSALIGNEDMALLOC_H diff --git a/NvCloth/include/NvCloth/ps/PsAllocator.cpp b/NvCloth/include/NvCloth/ps/PsAllocator.cpp new file mode 100644 index 0000000..4e6514f --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsAllocator.cpp @@ -0,0 +1,51 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#include "PsAllocator.h" +#include "NvCloth/Callbacks.h" + +namespace physx +{ +namespace shdfnd +{ + +void* Allocator::allocate(size_t size, const char* file, int line) +{ + if(!size) + return 0; + return GetNvClothAllocator()->allocate(size, "", file, line); +} +void Allocator::deallocate(void* ptr) +{ + if(ptr) + GetNvClothAllocator()->deallocate(ptr); +} + +} // namespace shdfnd +} // namespace physx diff --git a/NvCloth/include/NvCloth/ps/PsAllocator.h b/NvCloth/include/NvCloth/ps/PsAllocator.h new file mode 100644 index 0000000..37605ee --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsAllocator.h @@ -0,0 +1,343 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSALLOCATOR_H +#define PSFOUNDATION_PSALLOCATOR_H + +#include "foundation/PxAllocatorCallback.h" +//#include "foundation/PxFoundation.h" +#include "Ps.h" +#include "../Callbacks.h" + +#if(PX_WINDOWS_FAMILY || PX_XBOXONE) +#include <exception> +#include <typeinfo.h> +#endif +#if(PX_APPLE_FAMILY) +#include <typeinfo> +#endif + +#include <new> + +// Allocation macros going through user allocator +#define PX_ALLOC(n, name) physx::shdfnd::NonTrackingAllocator().allocate(n, __FILE__, __LINE__) +#define PX_ALLOC_TEMP(n, name) PX_ALLOC(n, name) +#define PX_FREE(x) physx::shdfnd::NonTrackingAllocator().deallocate(x) +#define PX_FREE_AND_RESET(x) \ + { \ + PX_FREE(x); \ + x = 0; \ + } + +// The following macros support plain-old-types and classes derived from UserAllocated. +#define PX_NEW(T) new (physx::shdfnd::ReflectionAllocator<T>(), __FILE__, __LINE__) T +#define PX_NEW_TEMP(T) PX_NEW(T) +#define PX_DELETE(x) delete x +#define PX_DELETE_AND_RESET(x) \ + { \ + PX_DELETE(x); \ + x = 0; \ + } +#define PX_DELETE_POD(x) \ + { \ + PX_FREE(x); \ + x = 0; \ + } +#define PX_DELETE_ARRAY(x) \ + { \ + PX_DELETE([] x); \ + x = 0; \ + } + +// aligned allocation +#define PX_ALIGNED16_ALLOC(n) physx::shdfnd::AlignedAllocator<16>().allocate(n, __FILE__, __LINE__) +#define PX_ALIGNED16_FREE(x) physx::shdfnd::AlignedAllocator<16>().deallocate(x) + +//! placement new macro to make it easy to spot bad use of 'new' +#define PX_PLACEMENT_NEW(p, T) new (p) T + +#if PX_DEBUG || PX_CHECKED +#define PX_USE_NAMED_ALLOCATOR 1 +#else +#define PX_USE_NAMED_ALLOCATOR 0 +#endif + +// Don't use inline for alloca !!! +#if PX_WINDOWS_FAMILY +#include <malloc.h> +#define PxAlloca(x) _alloca(x) +#elif PX_LINUX || PX_ANDROID +#include <malloc.h> +#define PxAlloca(x) alloca(x) +#elif PX_APPLE_FAMILY +#include <alloca.h> +#define PxAlloca(x) alloca(x) +#elif PX_PS4 +#include <memory.h> +#define PxAlloca(x) alloca(x) +#elif PX_XBOXONE +#include <malloc.h> +#define PxAlloca(x) alloca(x) +#elif PX_SWITCH +#include <malloc.h> +#define PxAlloca(x) alloca(x) +#endif + +#define PxAllocaAligned(x, alignment) ((size_t(PxAlloca(x + alignment)) + (alignment - 1)) & ~size_t(alignment - 1)) + +namespace physx +{ +namespace shdfnd +{ + +//NV_CLOTH_IMPORT PxAllocatorCallback& getAllocator(); + +/** +Allocator used to access the global PxAllocatorCallback instance without providing additional information. +*/ + +class NV_CLOTH_IMPORT Allocator +{ + public: + Allocator(const char* = 0) + { + } + void* allocate(size_t size, const char* file, int line); + void deallocate(void* ptr); +}; + +/* + * Bootstrap allocator using malloc/free. + * Don't use unless your objects get allocated before foundation is initialized. + */ +class RawAllocator +{ + public: + RawAllocator(const char* = 0) + { + } + void* allocate(size_t size, const char*, int) + { + // malloc returns valid pointer for size==0, no need to check + return ::malloc(size); + } + void deallocate(void* ptr) + { + // free(0) is guaranteed to have no side effect, no need to check + ::free(ptr); + } +}; + +/* + * Allocator that simply calls straight back to the application without tracking. + * This is used by the heap (Foundation::mNamedAllocMap) that tracks allocations + * because it needs to be able to grow as a result of an allocation. + * Making the hash table re-entrant to deal with this may not make sense. + */ +class NonTrackingAllocator +{ + public: + PX_FORCE_INLINE NonTrackingAllocator(const char* = 0) + { + } + PX_FORCE_INLINE void* allocate(size_t size, const char* file, int line) + { + return !size ? 0 : GetNvClothAllocator()->allocate(size, "NonTrackedAlloc", file, line); + } + PX_FORCE_INLINE void deallocate(void* ptr) + { + if(ptr) + GetNvClothAllocator()->deallocate(ptr); + } +}; + +/* +\brief Virtual allocator callback used to provide run-time defined allocators to foundation types like Array or Bitmap. + This is used by VirtualAllocator +*/ +class VirtualAllocatorCallback +{ + public: + VirtualAllocatorCallback() + { + } + virtual ~VirtualAllocatorCallback() + { + } + virtual void* allocate(const size_t size, const char* file, const int line) = 0; + virtual void deallocate(void* ptr) = 0; +}; + +/* +\brief Virtual allocator to be used by foundation types to provide run-time defined allocators. +Due to the fact that Array extends its allocator, rather than contains a reference/pointer to it, the VirtualAllocator +must +be a concrete type containing a pointer to a virtual callback. The callback may not be available at instantiation time, +therefore +methods are provided to set the callback later. +*/ +class VirtualAllocator +{ + public: + VirtualAllocator(VirtualAllocatorCallback* callback = NULL) : mCallback(callback) + { + } + + void* allocate(const size_t size, const char* file, const int line) + { + NV_CLOTH_ASSERT(mCallback); + if(size) + return mCallback->allocate(size, file, line); + return NULL; + } + void deallocate(void* ptr) + { + NV_CLOTH_ASSERT(mCallback); + if(ptr) + mCallback->deallocate(ptr); + } + + void setCallback(VirtualAllocatorCallback* callback) + { + mCallback = callback; + } + VirtualAllocatorCallback* getCallback() + { + return mCallback; + } + + private: + VirtualAllocatorCallback* mCallback; + VirtualAllocator& operator=(const VirtualAllocator&); +}; + +/** +Allocator used to access the global PxAllocatorCallback instance using a static name derived from T. +*/ + +template <typename T> +class ReflectionAllocator +{ + static const char* getName() + { + if(true) + return "<allocation names disabled>"; +#if PX_GCC_FAMILY + return __PRETTY_FUNCTION__; +#else + // name() calls malloc(), raw_name() wouldn't + return typeid(T).name(); +#endif + } + + public: + ReflectionAllocator(const PxEMPTY) + { + } + ReflectionAllocator(const char* = 0) + { + } + inline ReflectionAllocator(const ReflectionAllocator&) + { + } + void* allocate(size_t size, const char* filename, int line) + { + return size ? GetNvClothAllocator()->allocate(size, getName(), filename, line) : 0; + } + void deallocate(void* ptr) + { + if(ptr) + GetNvClothAllocator()->deallocate(ptr); + } +}; + +template <typename T> +struct AllocatorTraits +{ + typedef ReflectionAllocator<T> Type; +}; + +// if you get a build error here, you are trying to PX_NEW a class +// that is neither plain-old-type nor derived from UserAllocated +template <typename T, typename X> +union EnableIfPod +{ + int i; + T t; + typedef X Type; +}; + +} // namespace shdfnd +} // namespace physx + +// Global placement new for ReflectionAllocator templated by +// plain-old-type. Allows using PX_NEW for pointers and built-in-types. +// +// ATTENTION: You need to use PX_DELETE_POD or PX_FREE to deallocate +// memory, not PX_DELETE. PX_DELETE_POD redirects to PX_FREE. +// +// Rationale: PX_DELETE uses global operator delete(void*), which we dont' want to overload. +// Any other definition of PX_DELETE couldn't support array syntax 'PX_DELETE([]a);'. +// PX_DELETE_POD was preferred over PX_DELETE_ARRAY because it is used +// less often and applies to both single instances and arrays. +template <typename T> +PX_INLINE void* operator new(size_t size, physx::shdfnd::ReflectionAllocator<T> alloc, const char* fileName, + typename physx::shdfnd::EnableIfPod<T, int>::Type line) +{ + return alloc.allocate(size, fileName, line); +} + +template <typename T> +PX_INLINE void* operator new [](size_t size, physx::shdfnd::ReflectionAllocator<T> alloc, const char* fileName, + typename physx::shdfnd::EnableIfPod<T, int>::Type line) +{ return alloc.allocate(size, fileName, line); } + +// If construction after placement new throws, this placement delete is being called. +template <typename T> +PX_INLINE void operator delete(void* ptr, physx::shdfnd::ReflectionAllocator<T> alloc, const char* fileName, + typename physx::shdfnd::EnableIfPod<T, int>::Type line) +{ + PX_UNUSED(fileName); + PX_UNUSED(line); + + alloc.deallocate(ptr); +} + +// If construction after placement new throws, this placement delete is being called. +template <typename T> +PX_INLINE void operator delete [](void* ptr, physx::shdfnd::ReflectionAllocator<T> alloc, const char* fileName, + typename physx::shdfnd::EnableIfPod<T, int>::Type line) +{ + PX_UNUSED(fileName); + PX_UNUSED(line); + + alloc.deallocate(ptr); +} + +#endif // #ifndef PSFOUNDATION_PSALLOCATOR_H diff --git a/NvCloth/include/NvCloth/ps/PsArray.h b/NvCloth/include/NvCloth/ps/PsArray.h new file mode 100644 index 0000000..e5f6698 --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsArray.h @@ -0,0 +1,721 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSARRAY_H +#define PSFOUNDATION_PSARRAY_H + +#include "../Callbacks.h" +#include "foundation/PxIntrinsics.h" +#include "PsBasicTemplates.h" +#include "NvCloth/ps/PsAllocator.h" + +namespace physx +{ +namespace shdfnd +{ +template <class Serializer> +void exportArray(Serializer& stream, const void* data, uint32_t size, uint32_t sizeOfElement, uint32_t capacity); +char* importArray(char* address, void** data, uint32_t size, uint32_t sizeOfElement, uint32_t capacity); + +/*! +An array is a sequential container. + +Implementation note +* entries between 0 and size are valid objects +* we use inheritance to build this because the array is included inline in a lot + of objects and we want the allocator to take no space if it's not stateful, which + aggregation doesn't allow. Also, we want the metadata at the front for the inline + case where the allocator contains some inline storage space +*/ +template <class T, class Alloc = typename AllocatorTraits<T>::Type> +class Array : protected Alloc +{ + public: + typedef T* Iterator; + typedef const T* ConstIterator; + + explicit Array(const PxEMPTY v) : Alloc(v) + { + if(mData) + mCapacity |= PX_SIGN_BITMASK; + } + + /*! + Default array constructor. Initialize an empty array + */ + PX_INLINE explicit Array(const Alloc& alloc = Alloc()) : Alloc(alloc), mData(0), mSize(0), mCapacity(0) + { + } + + /*! + Initialize array with given capacity + */ + PX_INLINE explicit Array(uint32_t size, const T& a = T(), const Alloc& alloc = Alloc()) + : Alloc(alloc), mData(0), mSize(0), mCapacity(0) + { + resize(size, a); + } + + /*! + Copy-constructor. Copy all entries from other array + */ + template <class A> + PX_INLINE explicit Array(const Array<T, A>& other, const Alloc& alloc = Alloc()) + : Alloc(alloc) + { + copy(other); + } + + // This is necessary else the basic default copy constructor is used in the case of both arrays being of the same + // template instance + // The C++ standard clearly states that a template constructor is never a copy constructor [2]. In other words, + // the presence of a template constructor does not suppress the implicit declaration of the copy constructor. + // Also never make a copy constructor explicit, or copy-initialization* will no longer work. This is because + // 'binding an rvalue to a const reference requires an accessible copy constructor' (http://gcc.gnu.org/bugs/) + // *http://stackoverflow.com/questions/1051379/is-there-a-difference-in-c-between-copy-initialization-and-assignment-initializ + PX_INLINE Array(const Array& other, const Alloc& alloc = Alloc()) : Alloc(alloc) + { + copy(other); + } + + /*! + Initialize array with given length + */ + PX_INLINE explicit Array(const T* first, const T* last, const Alloc& alloc = Alloc()) + : Alloc(alloc), mSize(last < first ? 0 : uint32_t(last - first)), mCapacity(mSize) + { + mData = allocate(mSize); + copy(mData, mData + mSize, first); + } + + /*! + Destructor + */ + PX_INLINE ~Array() + { + destroy(mData, mData + mSize); + + if(capacity() && !isInUserMemory()) + deallocate(mData); + } + + /*! + Assignment operator. Copy content (deep-copy) + */ + template <class A> + PX_INLINE Array& operator=(const Array<T, A>& rhs) + { + if(&rhs == this) + return *this; + + clear(); + reserve(rhs.mSize); + copy(mData, mData + rhs.mSize, rhs.mData); + + mSize = rhs.mSize; + return *this; + } + + PX_INLINE Array& operator=(const Array& t) // Needs to be declared, see comment at copy-constructor + { + return operator=<Alloc>(t); + } + + /*! + Array indexing operator. + \param i + The index of the element that will be returned. + \return + The element i in the array. + */ + PX_FORCE_INLINE const T& operator[](uint32_t i) const + { + NV_CLOTH_ASSERT(i < mSize); + return mData[i]; + } + + /*! + Array indexing operator. + \param i + The index of the element that will be returned. + \return + The element i in the array. + */ + PX_FORCE_INLINE T& operator[](uint32_t i) + { + NV_CLOTH_ASSERT(i < mSize); + return mData[i]; + } + + /*! + Returns a pointer to the initial element of the array. + \return + a pointer to the initial element of the array. + */ + PX_FORCE_INLINE ConstIterator begin() const + { + return mData; + } + + PX_FORCE_INLINE Iterator begin() + { + return mData; + } + + /*! + Returns an iterator beyond the last element of the array. Do not dereference. + \return + a pointer to the element beyond the last element of the array. + */ + + PX_FORCE_INLINE ConstIterator end() const + { + return mData + mSize; + } + + PX_FORCE_INLINE Iterator end() + { + return mData + mSize; + } + + /*! + Returns a reference to the first element of the array. Undefined if the array is empty. + \return a reference to the first element of the array + */ + + PX_FORCE_INLINE const T& front() const + { + NV_CLOTH_ASSERT(mSize); + return mData[0]; + } + + PX_FORCE_INLINE T& front() + { + NV_CLOTH_ASSERT(mSize); + return mData[0]; + } + + /*! + Returns a reference to the last element of the array. Undefined if the array is empty + \return a reference to the last element of the array + */ + + PX_FORCE_INLINE const T& back() const + { + NV_CLOTH_ASSERT(mSize); + return mData[mSize - 1]; + } + + PX_FORCE_INLINE T& back() + { + NV_CLOTH_ASSERT(mSize); + return mData[mSize - 1]; + } + + /*! + Returns the number of entries in the array. This can, and probably will, + differ from the array capacity. + \return + The number of of entries in the array. + */ + PX_FORCE_INLINE uint32_t size() const + { + return mSize; + } + + /*! + Clears the array. + */ + PX_INLINE void clear() + { + destroy(mData, mData + mSize); + mSize = 0; + } + + /*! + Returns whether the array is empty (i.e. whether its size is 0). + \return + true if the array is empty + */ + PX_FORCE_INLINE bool empty() const + { + return mSize == 0; + } + + /*! + Finds the first occurrence of an element in the array. + \param a + The element to find. + */ + + PX_INLINE Iterator find(const T& a) + { + uint32_t index; + for(index = 0; index < mSize && mData[index] != a; index++) + ; + return mData + index; + } + + PX_INLINE ConstIterator find(const T& a) const + { + uint32_t index; + for(index = 0; index < mSize && mData[index] != a; index++) + ; + return mData + index; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Adds one element to the end of the array. Operation is O(1). + \param a + The element that will be added to this array. + */ + ///////////////////////////////////////////////////////////////////////// + + PX_FORCE_INLINE T& pushBack(const T& a) + { + if(capacity() <= mSize) + return growAndPushBack(a); + + PX_PLACEMENT_NEW(reinterpret_cast<void*>(mData + mSize), T)(a); + + return mData[mSize++]; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Returns the element at the end of the array. Only legal if the array is non-empty. + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE T popBack() + { + NV_CLOTH_ASSERT(mSize); + T t = mData[mSize - 1]; + + mData[--mSize].~T(); + + return t; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Construct one element at the end of the array. Operation is O(1). + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE T& insert() + { + if(capacity() <= mSize) + grow(capacityIncrement()); + + T* ptr = mData + mSize++; + new (ptr) T; // not 'T()' because PODs should not get default-initialized. + return *ptr; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Subtracts the element on position i from the array and replace it with + the last element. + Operation is O(1) + \param i + The position of the element that will be subtracted from this array. + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE void replaceWithLast(uint32_t i) + { + NV_CLOTH_ASSERT(i < mSize); + mData[i] = mData[--mSize]; + + mData[mSize].~T(); + } + + PX_INLINE void replaceWithLast(Iterator i) + { + replaceWithLast(static_cast<uint32_t>(i - mData)); + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Replaces the first occurrence of the element a with the last element + Operation is O(n) + \param a + The position of the element that will be subtracted from this array. + \return true if the element has been removed. + */ + ///////////////////////////////////////////////////////////////////////// + + PX_INLINE bool findAndReplaceWithLast(const T& a) + { + uint32_t index = 0; + while(index < mSize && mData[index] != a) + ++index; + if(index == mSize) + return false; + replaceWithLast(index); + return true; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Subtracts the element on position i from the array. Shift the entire + array one step. + Operation is O(n) + \param i + The position of the element that will be subtracted from this array. + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE void remove(uint32_t i) + { + NV_CLOTH_ASSERT(i < mSize); + + T* it = mData + i; + it->~T(); + while (++i < mSize) + { + new (it) T(mData[i]); + ++it; + it->~T(); + } + --mSize; + } + + ///////////////////////////////////////////////////////////////////////// + /*! + Removes a range from the array. Shifts the array so order is maintained. + Operation is O(n) + \param begin + The starting position of the element that will be subtracted from this array. + \param count + The number of elments that will be subtracted from this array. + */ + ///////////////////////////////////////////////////////////////////////// + PX_INLINE void removeRange(uint32_t begin, uint32_t count) + { + NV_CLOTH_ASSERT(begin < mSize); + NV_CLOTH_ASSERT((begin + count) <= mSize); + + for(uint32_t i = 0; i < count; i++) + mData[begin + i].~T(); // call the destructor on the ones being removed first. + + T* dest = &mData[begin]; // location we are copying the tail end objects to + T* src = &mData[begin + count]; // start of tail objects + uint32_t move_count = mSize - (begin + count); // compute remainder that needs to be copied down + + for(uint32_t i = 0; i < move_count; i++) + { + new (dest) T(*src); // copy the old one to the new location + src->~T(); // call the destructor on the old location + dest++; + src++; + } + mSize -= count; + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Resize array + */ + ////////////////////////////////////////////////////////////////////////// + PX_NOINLINE void resize(const uint32_t size, const T& a = T()); + + PX_NOINLINE void resizeUninitialized(const uint32_t size); + + ////////////////////////////////////////////////////////////////////////// + /*! + Resize array such that only as much memory is allocated to hold the + existing elements + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void shrink() + { + recreate(mSize); + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Deletes all array elements and frees memory. + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void reset() + { + resize(0); + shrink(); + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Ensure that the array has at least size capacity. + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void reserve(const uint32_t capacity) + { + if(capacity > this->capacity()) + grow(capacity); + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Query the capacity(allocated mem) for the array. + */ + ////////////////////////////////////////////////////////////////////////// + PX_FORCE_INLINE uint32_t capacity() const + { + return mCapacity & ~PX_SIGN_BITMASK; + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Unsafe function to force the size of the array + */ + ////////////////////////////////////////////////////////////////////////// + PX_FORCE_INLINE void forceSize_Unsafe(uint32_t size) + { + NV_CLOTH_ASSERT(size <= mCapacity); + mSize = size; + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Swap contents of an array without allocating temporary storage + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void swap(Array<T, Alloc>& other) + { + shdfnd::swap(mData, other.mData); + shdfnd::swap(mSize, other.mSize); + shdfnd::swap(mCapacity, other.mCapacity); + } + + ////////////////////////////////////////////////////////////////////////// + /*! + Assign a range of values to this vector (resizes to length of range) + */ + ////////////////////////////////////////////////////////////////////////// + PX_INLINE void assign(const T* first, const T* last) + { + resizeUninitialized(uint32_t(last - first)); + copy(begin(), end(), first); + } + + // We need one bit to mark arrays that have been deserialized from a user-provided memory block. + // For alignment & memory saving purpose we store that bit in the rarely used capacity member. + PX_FORCE_INLINE uint32_t isInUserMemory() const + { + return mCapacity & PX_SIGN_BITMASK; + } + + /// return reference to allocator + PX_INLINE Alloc& getAllocator() + { + return *this; + } + + protected: + // constructor for where we don't own the memory + Array(T* memory, uint32_t size, uint32_t capacity, const Alloc& alloc = Alloc()) + : Alloc(alloc), mData(memory), mSize(size), mCapacity(capacity | PX_SIGN_BITMASK) + { + } + + template <class A> + PX_NOINLINE void copy(const Array<T, A>& other); + + PX_INLINE T* allocate(uint32_t size) + { + if(size > 0) + { + T* p = reinterpret_cast<T*>(Alloc::allocate(sizeof(T) * size, __FILE__, __LINE__)); +/** +Mark a specified amount of memory with 0xcd pattern. This is used to check that the meta data +definition for serialized classes is complete in checked builds. +*/ +#if PX_CHECKED + if(p) + { + for(uint32_t i = 0; i < (sizeof(T) * size); ++i) + reinterpret_cast<uint8_t*>(p)[i] = 0xcd; + } +#endif + return p; + } + return 0; + } + + PX_INLINE void deallocate(void* mem) + { + Alloc::deallocate(mem); + } + + static PX_INLINE void create(T* first, T* last, const T& a) + { + for(; first < last; ++first) + ::new (first) T(a); + } + + static PX_INLINE void copy(T* first, T* last, const T* src) + { + if(last <= first) + return; + + for(; first < last; ++first, ++src) + ::new (first) T(*src); + } + + static PX_INLINE void destroy(T* first, T* last) + { + for(; first < last; ++first) + first->~T(); + } + + /*! + Called when pushBack() needs to grow the array. + \param a The element that will be added to this array. + */ + PX_NOINLINE T& growAndPushBack(const T& a); + + /*! + Resizes the available memory for the array. + + \param capacity + The number of entries that the set should be able to hold. + */ + PX_INLINE void grow(uint32_t capacity) + { + NV_CLOTH_ASSERT(this->capacity() < capacity); + recreate(capacity); + } + + /*! + Creates a new memory block, copies all entries to the new block and destroys old entries. + + \param capacity + The number of entries that the set should be able to hold. + */ + PX_NOINLINE void recreate(uint32_t capacity); + + // The idea here is to prevent accidental bugs with pushBack or insert. Unfortunately + // it interacts badly with InlineArrays with smaller inline allocations. + // TODO(dsequeira): policy template arg, this is exactly what they're for. + PX_INLINE uint32_t capacityIncrement() const + { + const uint32_t capacity = this->capacity(); + return capacity == 0 ? 1 : capacity * 2; + } + + T* mData; + uint32_t mSize; + uint32_t mCapacity; +}; + +template <class T, class Alloc> +PX_NOINLINE void Array<T, Alloc>::resize(const uint32_t size, const T& a) +{ + reserve(size); + create(mData + mSize, mData + size, a); + destroy(mData + size, mData + mSize); + mSize = size; +} + +template <class T, class Alloc> +template <class A> +PX_NOINLINE void Array<T, Alloc>::copy(const Array<T, A>& other) +{ + if(!other.empty()) + { + mData = allocate(mSize = mCapacity = other.size()); + copy(mData, mData + mSize, other.begin()); + } + else + { + mData = NULL; + mSize = 0; + mCapacity = 0; + } + + // mData = allocate(other.mSize); + // mSize = other.mSize; + // mCapacity = other.mSize; + // copy(mData, mData + mSize, other.mData); +} + +template <class T, class Alloc> +PX_NOINLINE void Array<T, Alloc>::resizeUninitialized(const uint32_t size) +{ + reserve(size); + mSize = size; +} + +template <class T, class Alloc> +PX_NOINLINE T& Array<T, Alloc>::growAndPushBack(const T& a) +{ + uint32_t capacity = capacityIncrement(); + + T* newData = allocate(capacity); + NV_CLOTH_ASSERT((!capacity) || (newData && (newData != mData))); + copy(newData, newData + mSize, mData); + + // inserting element before destroying old array + // avoids referencing destroyed object when duplicating array element. + PX_PLACEMENT_NEW(reinterpret_cast<void*>(newData + mSize), T)(a); + + destroy(mData, mData + mSize); + if(!isInUserMemory()) + deallocate(mData); + + mData = newData; + mCapacity = capacity; + + return mData[mSize++]; +} + +template <class T, class Alloc> +PX_NOINLINE void Array<T, Alloc>::recreate(uint32_t capacity) +{ + T* newData = allocate(capacity); + NV_CLOTH_ASSERT((!capacity) || (newData && (newData != mData))); + + copy(newData, newData + mSize, mData); + destroy(mData, mData + mSize); + if(!isInUserMemory()) + deallocate(mData); + + mData = newData; + mCapacity = capacity; +} + +template <class T, class Alloc> +PX_INLINE void swap(Array<T, Alloc>& x, Array<T, Alloc>& y) +{ + x.swap(y); +} + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSARRAY_H diff --git a/NvCloth/include/NvCloth/ps/PsAtomic.h b/NvCloth/include/NvCloth/ps/PsAtomic.h new file mode 100644 index 0000000..dad18fc --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsAtomic.h @@ -0,0 +1,64 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSATOMIC_H +#define PSFOUNDATION_PSATOMIC_H + +#include "NvCloth/ps/Ps.h" +#include "NvCloth/Callbacks.h" + +namespace physx +{ +namespace shdfnd +{ +/* set *dest equal to val. Return the old value of *dest */ +NV_CLOTH_IMPORT int32_t atomicExchange(volatile int32_t* dest, int32_t val); + +/* if *dest == comp, replace with exch. Return original value of *dest */ +NV_CLOTH_IMPORT int32_t atomicCompareExchange(volatile int32_t* dest, int32_t exch, int32_t comp); + +/* if *dest == comp, replace with exch. Return original value of *dest */ +NV_CLOTH_IMPORT void* atomicCompareExchangePointer(volatile void** dest, void* exch, void* comp); + +/* increment the specified location. Return the incremented value */ +NV_CLOTH_IMPORT int32_t atomicIncrement(volatile int32_t* val); + +/* decrement the specified location. Return the decremented value */ +NV_CLOTH_IMPORT int32_t atomicDecrement(volatile int32_t* val); + +/* add delta to *val. Return the new value */ +NV_CLOTH_IMPORT int32_t atomicAdd(volatile int32_t* val, int32_t delta); + +/* compute the maximum of dest and val. Return the new value */ +NV_CLOTH_IMPORT int32_t atomicMax(volatile int32_t* val, int32_t val2); + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSATOMIC_H diff --git a/NvCloth/include/NvCloth/ps/PsBasicTemplates.h b/NvCloth/include/NvCloth/ps/PsBasicTemplates.h new file mode 100644 index 0000000..514da05 --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsBasicTemplates.h @@ -0,0 +1,146 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSBASICTEMPLATES_H +#define PSFOUNDATION_PSBASICTEMPLATES_H + +#include "Ps.h" + +namespace physx +{ +namespace shdfnd +{ +template <typename A> +struct Equal +{ + bool operator()(const A& a, const A& b) const + { + return a == b; + } +}; + +template <typename A> +struct Less +{ + bool operator()(const A& a, const A& b) const + { + return a < b; + } +}; + +template <typename A> +struct Greater +{ + bool operator()(const A& a, const A& b) const + { + return a > b; + } +}; + +template <class F, class S> +class Pair +{ + public: + F first; + S second; + Pair() : first(F()), second(S()) + { + } + Pair(const F& f, const S& s) : first(f), second(s) + { + } + Pair(const Pair& p) : first(p.first), second(p.second) + { + } + // CN - fix for /.../PsBasicTemplates.h(61) : warning C4512: 'physx::shdfnd::Pair<F,S>' : assignment operator could + // not be generated + Pair& operator=(const Pair& p) + { + first = p.first; + second = p.second; + return *this; + } + bool operator==(const Pair& p) const + { + return first == p.first && second == p.second; + } + bool operator<(const Pair& p) const + { + if(first < p.first) + return true; + else + return !(p.first < first) && (second < p.second); + } +}; + +template <unsigned int A> +struct LogTwo +{ + static const unsigned int value = LogTwo<(A >> 1)>::value + 1; +}; +template <> +struct LogTwo<1> +{ + static const unsigned int value = 0; +}; + +template <typename T> +struct UnConst +{ + typedef T Type; +}; +template <typename T> +struct UnConst<const T> +{ + typedef T Type; +}; + +template <typename T> +T pointerOffset(void* p, ptrdiff_t offset) +{ + return reinterpret_cast<T>(reinterpret_cast<char*>(p) + offset); +} +template <typename T> +T pointerOffset(const void* p, ptrdiff_t offset) +{ + return reinterpret_cast<T>(reinterpret_cast<const char*>(p) + offset); +} + +template <class T> +PX_CUDA_CALLABLE PX_INLINE void swap(T& x, T& y) +{ + const T tmp = x; + x = y; + y = tmp; +} + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSBASICTEMPLATES_H diff --git a/NvCloth/include/NvCloth/ps/PsBitUtils.h b/NvCloth/include/NvCloth/ps/PsBitUtils.h new file mode 100644 index 0000000..3d901ed --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsBitUtils.h @@ -0,0 +1,109 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSBITUTILS_H +#define PSFOUNDATION_PSBITUTILS_H + +#include "foundation/PxIntrinsics.h" +#include "../Callbacks.h" +#include "PsIntrinsics.h" +#include "Ps.h" + +namespace physx +{ +namespace shdfnd +{ +PX_INLINE uint32_t bitCount(uint32_t v) +{ + // from http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + uint32_t const w = v - ((v >> 1) & 0x55555555); + uint32_t const x = (w & 0x33333333) + ((w >> 2) & 0x33333333); + return (((x + (x >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; +} + +PX_INLINE bool isPowerOfTwo(uint32_t x) +{ + return x != 0 && (x & (x - 1)) == 0; +} + +// "Next Largest Power of 2 +// Given a binary integer value x, the next largest power of 2 can be computed by a SWAR algorithm +// that recursively "folds" the upper bits into the lower bits. This process yields a bit vector with +// the same most significant 1 as x, but all 1's below it. Adding 1 to that value yields the next +// largest power of 2. For a 32-bit value:" +PX_INLINE uint32_t nextPowerOfTwo(uint32_t x) +{ + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + return x + 1; +} + +/*! +Return the index of the highest set bit. Not valid for zero arg. +*/ + +PX_INLINE uint32_t lowestSetBit(uint32_t x) +{ + NV_CLOTH_ASSERT(x); + return lowestSetBitUnsafe(x); +} + +/*! +Return the index of the highest set bit. Not valid for zero arg. +*/ + +PX_INLINE uint32_t highestSetBit(uint32_t x) +{ + NV_CLOTH_ASSERT(x); + return highestSetBitUnsafe(x); +} + +// Helper function to approximate log2 of an integer value +// assumes that the input is actually power of two. +// todo: replace 2 usages with 'highestSetBit' +PX_INLINE uint32_t ilog2(uint32_t num) +{ + for(uint32_t i = 0; i < 32; i++) + { + num >>= 1; + if(num == 0) + return i; + } + + NV_CLOTH_ASSERT(0); + return uint32_t(-1); +} + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSBITUTILS_H diff --git a/NvCloth/include/NvCloth/ps/PsHash.h b/NvCloth/include/NvCloth/ps/PsHash.h new file mode 100644 index 0000000..6b74fb2 --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsHash.h @@ -0,0 +1,162 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSHASH_H +#define PSFOUNDATION_PSHASH_H + +#include "Ps.h" +#include "PsBasicTemplates.h" + +#if PX_VC +#pragma warning(push) +#pragma warning(disable : 4302) +#endif + +#if PX_LINUX +#include "foundation/PxSimpleTypes.h" +#endif + +/*! +Central definition of hash functions +*/ + +namespace physx +{ +namespace shdfnd +{ +// Hash functions + +// Thomas Wang's 32 bit mix +// http://www.cris.com/~Ttwang/tech/inthash.htm +PX_FORCE_INLINE uint32_t hash(const uint32_t key) +{ + uint32_t k = key; + k += ~(k << 15); + k ^= (k >> 10); + k += (k << 3); + k ^= (k >> 6); + k += ~(k << 11); + k ^= (k >> 16); + return uint32_t(k); +} + +PX_FORCE_INLINE uint32_t hash(const int32_t key) +{ + return hash(uint32_t(key)); +} + +// Thomas Wang's 64 bit mix +// http://www.cris.com/~Ttwang/tech/inthash.htm +PX_FORCE_INLINE uint32_t hash(const uint64_t key) +{ + uint64_t k = key; + k += ~(k << 32); + k ^= (k >> 22); + k += ~(k << 13); + k ^= (k >> 8); + k += (k << 3); + k ^= (k >> 15); + k += ~(k << 27); + k ^= (k >> 31); + return uint32_t(UINT32_MAX & k); +} + +#if PX_APPLE_FAMILY +// hash for size_t, to make gcc happy +PX_INLINE uint32_t hash(const size_t key) +{ +#if PX_P64_FAMILY + return hash(uint64_t(key)); +#else + return hash(uint32_t(key)); +#endif +} +#endif + +// Hash function for pointers +PX_INLINE uint32_t hash(const void* ptr) +{ +#if PX_P64_FAMILY + return hash(uint64_t(ptr)); +#else + return hash(uint32_t(UINT32_MAX & size_t(ptr))); +#endif +} + +// Hash function for pairs +template <typename F, typename S> +PX_INLINE uint32_t hash(const Pair<F, S>& p) +{ + uint32_t seed = 0x876543; + uint32_t m = 1000007; + return hash(p.second) ^ (m * (hash(p.first) ^ (m * seed))); +} + +// hash object for hash map template parameter +template <class Key> +struct Hash +{ + uint32_t operator()(const Key& k) const + { + return hash(k); + } + bool equal(const Key& k0, const Key& k1) const + { + return k0 == k1; + } +}; + +// specialization for strings +template <> +struct Hash<const char*> +{ + public: + uint32_t operator()(const char* _string) const + { + // "DJB" string hash + const uint8_t* string = reinterpret_cast<const uint8_t*>(_string); + uint32_t h = 5381; + for(const uint8_t* ptr = string; *ptr; ptr++) + h = ((h << 5) + h) ^ uint32_t(*ptr); + return h; + } + bool equal(const char* string0, const char* string1) const + { + return !strcmp(string0, string1); + } +}; + +} // namespace shdfnd +} // namespace physx + +#if PX_VC +#pragma warning(pop) +#endif + +#endif // #ifndef PSFOUNDATION_PSHASH_H diff --git a/NvCloth/include/NvCloth/ps/PsHashInternals.h b/NvCloth/include/NvCloth/ps/PsHashInternals.h new file mode 100644 index 0000000..ec0d3dd --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsHashInternals.h @@ -0,0 +1,795 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSHASHINTERNALS_H +#define PSFOUNDATION_PSHASHINTERNALS_H + +#include "PsBasicTemplates.h" +#include "PsArray.h" +#include "PsBitUtils.h" +#include "PsHash.h" +#include "foundation/PxIntrinsics.h" + +#if PX_VC +#pragma warning(push) +#pragma warning(disable : 4127) // conditional expression is constant +#endif +namespace physx +{ +namespace shdfnd +{ +namespace internal +{ +template <class Entry, class Key, class HashFn, class GetKey, class Allocator, bool compacting> +class HashBase : private Allocator +{ + void init(uint32_t initialTableSize, float loadFactor) + { + mBuffer = NULL; + mEntries = NULL; + mEntriesNext = NULL; + mHash = NULL; + mEntriesCapacity = 0; + mHashSize = 0; + mLoadFactor = loadFactor; + mFreeList = uint32_t(EOL); + mTimestamp = 0; + mEntriesCount = 0; + + if(initialTableSize) + reserveInternal(initialTableSize); + } + + public: + typedef Entry EntryType; + + HashBase(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : Allocator(PX_DEBUG_EXP("hashBase")) + { + init(initialTableSize, loadFactor); + } + + HashBase(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) : Allocator(alloc) + { + init(initialTableSize, loadFactor); + } + + HashBase(const Allocator& alloc) : Allocator(alloc) + { + init(64, 0.75f); + } + + ~HashBase() + { + destroy(); // No need to clear() + + if(mBuffer) + Allocator::deallocate(mBuffer); + } + + static const uint32_t EOL = 0xffffffff; + + PX_INLINE Entry* create(const Key& k, bool& exists) + { + uint32_t h = 0; + if(mHashSize) + { + h = hash(k); + uint32_t index = mHash[h]; + while(index != EOL && !HashFn().equal(GetKey()(mEntries[index]), k)) + index = mEntriesNext[index]; + exists = index != EOL; + if(exists) + return mEntries + index; + } + else + exists = false; + + if(freeListEmpty()) + { + grow(); + h = hash(k); + } + + uint32_t entryIndex = freeListGetNext(); + + mEntriesNext[entryIndex] = mHash[h]; + mHash[h] = entryIndex; + + mEntriesCount++; + mTimestamp++; + + return mEntries + entryIndex; + } + + PX_INLINE const Entry* find(const Key& k) const + { + if(!mEntriesCount) + return NULL; + + const uint32_t h = hash(k); + uint32_t index = mHash[h]; + while(index != EOL && !HashFn().equal(GetKey()(mEntries[index]), k)) + index = mEntriesNext[index]; + return index != EOL ? mEntries + index : NULL; + } + + PX_INLINE bool erase(const Key& k, Entry& e) + { + if(!mEntriesCount) + return false; + + const uint32_t h = hash(k); + uint32_t* ptr = mHash + h; + while(*ptr != EOL && !HashFn().equal(GetKey()(mEntries[*ptr]), k)) + ptr = mEntriesNext + *ptr; + + if(*ptr == EOL) + return false; + + PX_PLACEMENT_NEW(&e, Entry)(mEntries[*ptr]); + + return eraseInternal(ptr); + } + + PX_INLINE bool erase(const Key& k) + { + if(!mEntriesCount) + return false; + + const uint32_t h = hash(k); + uint32_t* ptr = mHash + h; + while(*ptr != EOL && !HashFn().equal(GetKey()(mEntries[*ptr]), k)) + ptr = mEntriesNext + *ptr; + + if(*ptr == EOL) + return false; + + return eraseInternal(ptr); + } + + PX_INLINE uint32_t size() const + { + return mEntriesCount; + } + + PX_INLINE uint32_t capacity() const + { + return mHashSize; + } + + void clear() + { + if(!mHashSize || mEntriesCount == 0) + return; + + destroy(); + + intrinsics::memSet(mHash, EOL, mHashSize * sizeof(uint32_t)); + + const uint32_t sizeMinus1 = mEntriesCapacity - 1; + for(uint32_t i = 0; i < sizeMinus1; i++) + { + prefetchLine(mEntriesNext + i, 128); + mEntriesNext[i] = i + 1; + } + mEntriesNext[mEntriesCapacity - 1] = uint32_t(EOL); + mFreeList = 0; + mEntriesCount = 0; + } + + void reserve(uint32_t size) + { + if(size > mHashSize) + reserveInternal(size); + } + + PX_INLINE const Entry* getEntries() const + { + return mEntries; + } + + PX_INLINE Entry* insertUnique(const Key& k) + { + NV_CLOTH_ASSERT(find(k) == NULL); + uint32_t h = hash(k); + + uint32_t entryIndex = freeListGetNext(); + + mEntriesNext[entryIndex] = mHash[h]; + mHash[h] = entryIndex; + + mEntriesCount++; + mTimestamp++; + + return mEntries + entryIndex; + } + + private: + void destroy() + { + for(uint32_t i = 0; i < mHashSize; i++) + { + for(uint32_t j = mHash[i]; j != EOL; j = mEntriesNext[j]) + mEntries[j].~Entry(); + } + } + + template <typename HK, typename GK, class A, bool comp> + PX_NOINLINE void copy(const HashBase<Entry, Key, HK, GK, A, comp>& other); + + // free list management - if we're coalescing, then we use mFreeList to hold + // the top of the free list and it should always be equal to size(). Otherwise, + // we build a free list in the next() pointers. + + PX_INLINE void freeListAdd(uint32_t index) + { + if(compacting) + { + mFreeList--; + NV_CLOTH_ASSERT(mFreeList == mEntriesCount); + } + else + { + mEntriesNext[index] = mFreeList; + mFreeList = index; + } + } + + PX_INLINE void freeListAdd(uint32_t start, uint32_t end) + { + if(!compacting) + { + for(uint32_t i = start; i < end - 1; i++) // add the new entries to the free list + mEntriesNext[i] = i + 1; + + // link in old free list + mEntriesNext[end - 1] = mFreeList; + NV_CLOTH_ASSERT(mFreeList != end - 1); + mFreeList = start; + } + else if(mFreeList == EOL) // don't reset the free ptr for the compacting hash unless it's empty + mFreeList = start; + } + + PX_INLINE uint32_t freeListGetNext() + { + NV_CLOTH_ASSERT(!freeListEmpty()); + if(compacting) + { + NV_CLOTH_ASSERT(mFreeList == mEntriesCount); + return mFreeList++; + } + else + { + uint32_t entryIndex = mFreeList; + mFreeList = mEntriesNext[mFreeList]; + return entryIndex; + } + } + + PX_INLINE bool freeListEmpty() const + { + if(compacting) + return mEntriesCount == mEntriesCapacity; + else + return mFreeList == EOL; + } + + PX_INLINE void replaceWithLast(uint32_t index) + { + PX_PLACEMENT_NEW(mEntries + index, Entry)(mEntries[mEntriesCount]); + mEntries[mEntriesCount].~Entry(); + mEntriesNext[index] = mEntriesNext[mEntriesCount]; + + uint32_t h = hash(GetKey()(mEntries[index])); + uint32_t* ptr; + for(ptr = mHash + h; *ptr != mEntriesCount; ptr = mEntriesNext + *ptr) + NV_CLOTH_ASSERT(*ptr != EOL); + *ptr = index; + } + + PX_INLINE uint32_t hash(const Key& k, uint32_t hashSize) const + { + return HashFn()(k) & (hashSize - 1); + } + + PX_INLINE uint32_t hash(const Key& k) const + { + return hash(k, mHashSize); + } + + PX_INLINE bool eraseInternal(uint32_t* ptr) + { + const uint32_t index = *ptr; + + *ptr = mEntriesNext[index]; + + mEntries[index].~Entry(); + + mEntriesCount--; + mTimestamp++; + + if (compacting && index != mEntriesCount) + replaceWithLast(index); + + freeListAdd(index); + return true; + } + + void reserveInternal(uint32_t size) + { + if(!isPowerOfTwo(size)) + size = nextPowerOfTwo(size); + + NV_CLOTH_ASSERT(!(size & (size - 1))); + + // decide whether iteration can be done on the entries directly + bool resizeCompact = compacting || freeListEmpty(); + + // define new table sizes + uint32_t oldEntriesCapacity = mEntriesCapacity; + uint32_t newEntriesCapacity = uint32_t(float(size) * mLoadFactor); + uint32_t newHashSize = size; + + // allocate new common buffer and setup pointers to new tables + uint8_t* newBuffer; + uint32_t* newHash; + uint32_t* newEntriesNext; + Entry* newEntries; + { + uint32_t newHashByteOffset = 0; + uint32_t newEntriesNextBytesOffset = newHashByteOffset + newHashSize * sizeof(uint32_t); + uint32_t newEntriesByteOffset = newEntriesNextBytesOffset + newEntriesCapacity * sizeof(uint32_t); + newEntriesByteOffset += (16 - (newEntriesByteOffset & 15)) & 15; + uint32_t newBufferByteSize = newEntriesByteOffset + newEntriesCapacity * sizeof(Entry); + + newBuffer = reinterpret_cast<uint8_t*>(Allocator::allocate(newBufferByteSize, __FILE__, __LINE__)); + NV_CLOTH_ASSERT(newBuffer); + + newHash = reinterpret_cast<uint32_t*>(newBuffer + newHashByteOffset); + newEntriesNext = reinterpret_cast<uint32_t*>(newBuffer + newEntriesNextBytesOffset); + newEntries = reinterpret_cast<Entry*>(newBuffer + newEntriesByteOffset); + } + + // initialize new hash table + intrinsics::memSet(newHash, uint32_t(EOL), newHashSize * sizeof(uint32_t)); + + // iterate over old entries, re-hash and create new entries + if(resizeCompact) + { + // check that old free list is empty - we don't need to copy the next entries + NV_CLOTH_ASSERT(compacting || mFreeList == EOL); + + for(uint32_t index = 0; index < mEntriesCount; ++index) + { + uint32_t h = hash(GetKey()(mEntries[index]), newHashSize); + newEntriesNext[index] = newHash[h]; + newHash[h] = index; + + PX_PLACEMENT_NEW(newEntries + index, Entry)(mEntries[index]); + mEntries[index].~Entry(); + } + } + else + { + // copy old free list, only required for non compact resizing + intrinsics::memCopy(newEntriesNext, mEntriesNext, mEntriesCapacity * sizeof(uint32_t)); + + for(uint32_t bucket = 0; bucket < mHashSize; bucket++) + { + uint32_t index = mHash[bucket]; + while(index != EOL) + { + uint32_t h = hash(GetKey()(mEntries[index]), newHashSize); + newEntriesNext[index] = newHash[h]; + NV_CLOTH_ASSERT(index != newHash[h]); + + newHash[h] = index; + + PX_PLACEMENT_NEW(newEntries + index, Entry)(mEntries[index]); + mEntries[index].~Entry(); + + index = mEntriesNext[index]; + } + } + } + + // swap buffer and pointers + Allocator::deallocate(mBuffer); + mBuffer = newBuffer; + mHash = newHash; + mHashSize = newHashSize; + mEntriesNext = newEntriesNext; + mEntries = newEntries; + mEntriesCapacity = newEntriesCapacity; + + freeListAdd(oldEntriesCapacity, newEntriesCapacity); + } + + void grow() + { + NV_CLOTH_ASSERT((mFreeList == EOL) || (compacting && (mEntriesCount == mEntriesCapacity))); + + uint32_t size = mHashSize == 0 ? 16 : mHashSize * 2; + reserve(size); + } + + uint8_t* mBuffer; + Entry* mEntries; + uint32_t* mEntriesNext; // same size as mEntries + uint32_t* mHash; + uint32_t mEntriesCapacity; + uint32_t mHashSize; + float mLoadFactor; + uint32_t mFreeList; + uint32_t mTimestamp; + uint32_t mEntriesCount; // number of entries + + public: + class Iter + { + public: + PX_INLINE Iter(HashBase& b) : mBucket(0), mEntry(uint32_t(b.EOL)), mTimestamp(b.mTimestamp), mBase(b) + { + if(mBase.mEntriesCapacity > 0) + { + mEntry = mBase.mHash[0]; + skip(); + } + } + + PX_INLINE void check() const + { + NV_CLOTH_ASSERT(mTimestamp == mBase.mTimestamp); + } + PX_INLINE const Entry& operator*() const + { + check(); + return mBase.mEntries[mEntry]; + } + PX_INLINE Entry& operator*() + { + check(); + return mBase.mEntries[mEntry]; + } + PX_INLINE const Entry* operator->() const + { + check(); + return mBase.mEntries + mEntry; + } + PX_INLINE Entry* operator->() + { + check(); + return mBase.mEntries + mEntry; + } + PX_INLINE Iter operator++() + { + check(); + advance(); + return *this; + } + PX_INLINE Iter operator++(int) + { + check(); + Iter i = *this; + advance(); + return i; + } + PX_INLINE bool done() const + { + check(); + return mEntry == mBase.EOL; + } + + private: + PX_INLINE void advance() + { + mEntry = mBase.mEntriesNext[mEntry]; + skip(); + } + PX_INLINE void skip() + { + while(mEntry == mBase.EOL) + { + if(++mBucket == mBase.mHashSize) + break; + mEntry = mBase.mHash[mBucket]; + } + } + + Iter& operator=(const Iter&); + + uint32_t mBucket; + uint32_t mEntry; + uint32_t mTimestamp; + HashBase& mBase; + }; + + /*! + Iterate over entries in a hash base and allow entry erase while iterating + */ + class EraseIterator + { + public: + PX_INLINE EraseIterator(HashBase& b): mBase(b) + { + reset(); + } + + PX_INLINE Entry* eraseCurrentGetNext(bool eraseCurrent) + { + if(eraseCurrent && mCurrentEntryIndexPtr) + { + mBase.eraseInternal(mCurrentEntryIndexPtr); + // if next was valid return the same ptr, if next was EOL search new hash entry + if(*mCurrentEntryIndexPtr != mBase.EOL) + return mBase.mEntries + *mCurrentEntryIndexPtr; + else + return traverseHashEntries(); + } + + // traverse mHash to find next entry + if(mCurrentEntryIndexPtr == NULL) + return traverseHashEntries(); + + const uint32_t index = *mCurrentEntryIndexPtr; + if(mBase.mEntriesNext[index] == mBase.EOL) + { + return traverseHashEntries(); + } + else + { + mCurrentEntryIndexPtr = mBase.mEntriesNext + index; + return mBase.mEntries + *mCurrentEntryIndexPtr; + } + } + + PX_INLINE void reset() + { + mCurrentHashIndex = 0; + mCurrentEntryIndexPtr = NULL; + } + + private: + PX_INLINE Entry* traverseHashEntries() + { + mCurrentEntryIndexPtr = NULL; + while (mCurrentEntryIndexPtr == NULL && mCurrentHashIndex < mBase.mHashSize) + { + if (mBase.mHash[mCurrentHashIndex] != mBase.EOL) + { + mCurrentEntryIndexPtr = mBase.mHash + mCurrentHashIndex; + mCurrentHashIndex++; + return mBase.mEntries + *mCurrentEntryIndexPtr; + } + else + { + mCurrentHashIndex++; + } + } + return NULL; + } + + EraseIterator& operator=(const EraseIterator&); + private: + uint32_t* mCurrentEntryIndexPtr; + uint32_t mCurrentHashIndex; + HashBase& mBase; + }; +}; + +template <class Entry, class Key, class HashFn, class GetKey, class Allocator, bool compacting> +template <typename HK, typename GK, class A, bool comp> +PX_NOINLINE void +HashBase<Entry, Key, HashFn, GetKey, Allocator, compacting>::copy(const HashBase<Entry, Key, HK, GK, A, comp>& other) +{ + reserve(other.mEntriesCount); + + for(uint32_t i = 0; i < other.mEntriesCount; i++) + { + for(uint32_t j = other.mHash[i]; j != EOL; j = other.mEntriesNext[j]) + { + const Entry& otherEntry = other.mEntries[j]; + + bool exists; + Entry* newEntry = create(GK()(otherEntry), exists); + NV_CLOTH_ASSERT(!exists); + + PX_PLACEMENT_NEW(newEntry, Entry)(otherEntry); + } + } +} + +template <class Key, class HashFn, class Allocator = typename AllocatorTraits<Key>::Type, bool Coalesced = false> +class HashSetBase +{ + PX_NOCOPY(HashSetBase) + public: + struct GetKey + { + PX_INLINE const Key& operator()(const Key& e) + { + return e; + } + }; + + typedef HashBase<Key, Key, HashFn, GetKey, Allocator, Coalesced> BaseMap; + typedef typename BaseMap::Iter Iterator; + + HashSetBase(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) + : mBase(initialTableSize, loadFactor, alloc) + { + } + + HashSetBase(const Allocator& alloc) : mBase(64, 0.75f, alloc) + { + } + + HashSetBase(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : mBase(initialTableSize, loadFactor) + { + } + + bool insert(const Key& k) + { + bool exists; + Key* e = mBase.create(k, exists); + if(!exists) + PX_PLACEMENT_NEW(e, Key)(k); + return !exists; + } + + PX_INLINE bool contains(const Key& k) const + { + return mBase.find(k) != 0; + } + PX_INLINE bool erase(const Key& k) + { + return mBase.erase(k); + } + PX_INLINE uint32_t size() const + { + return mBase.size(); + } + PX_INLINE uint32_t capacity() const + { + return mBase.capacity(); + } + PX_INLINE void reserve(uint32_t size) + { + mBase.reserve(size); + } + PX_INLINE void clear() + { + mBase.clear(); + } + + protected: + BaseMap mBase; +}; + +template <class Key, class Value, class HashFn, class Allocator = typename AllocatorTraits<Pair<const Key, Value> >::Type> +class HashMapBase +{ + PX_NOCOPY(HashMapBase) + public: + typedef Pair<const Key, Value> Entry; + + struct GetKey + { + PX_INLINE const Key& operator()(const Entry& e) + { + return e.first; + } + }; + + typedef HashBase<Entry, Key, HashFn, GetKey, Allocator, true> BaseMap; + typedef typename BaseMap::Iter Iterator; + typedef typename BaseMap::EraseIterator EraseIterator; + + HashMapBase(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) + : mBase(initialTableSize, loadFactor, alloc) + { + } + + HashMapBase(const Allocator& alloc) : mBase(64, 0.75f, alloc) + { + } + + HashMapBase(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : mBase(initialTableSize, loadFactor) + { + } + + bool insert(const Key /*&*/ k, const Value /*&*/ v) + { + bool exists; + Entry* e = mBase.create(k, exists); + if(!exists) + PX_PLACEMENT_NEW(e, Entry)(k, v); + return !exists; + } + + Value& operator[](const Key& k) + { + bool exists; + Entry* e = mBase.create(k, exists); + if(!exists) + PX_PLACEMENT_NEW(e, Entry)(k, Value()); + + return e->second; + } + + PX_INLINE const Entry* find(const Key& k) const + { + return mBase.find(k); + } + PX_INLINE bool erase(const Key& k) + { + return mBase.erase(k); + } + PX_INLINE bool erase(const Key& k, Entry& e) + { + return mBase.erase(k, e); + } + PX_INLINE uint32_t size() const + { + return mBase.size(); + } + PX_INLINE uint32_t capacity() const + { + return mBase.capacity(); + } + PX_INLINE Iterator getIterator() + { + return Iterator(mBase); + } + PX_INLINE EraseIterator getEraseIterator() + { + return EraseIterator(mBase); + } + PX_INLINE void reserve(uint32_t size) + { + mBase.reserve(size); + } + PX_INLINE void clear() + { + mBase.clear(); + } + + protected: + BaseMap mBase; +}; +} + +} // namespace shdfnd +} // namespace physx + +#if PX_VC +#pragma warning(pop) +#endif +#endif // #ifndef PSFOUNDATION_PSHASHINTERNALS_H diff --git a/NvCloth/include/NvCloth/ps/PsHashMap.h b/NvCloth/include/NvCloth/ps/PsHashMap.h new file mode 100644 index 0000000..5091dee --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsHashMap.h @@ -0,0 +1,118 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSHASHMAP_H +#define PSFOUNDATION_PSHASHMAP_H + +#include "PsHashInternals.h" + +// TODO: make this doxy-format +// +// This header defines two hash maps. Hash maps +// * support custom initial table sizes (rounded up internally to power-of-2) +// * support custom static allocator objects +// * auto-resize, based on a load factor (i.e. a 64-entry .75 load factor hash will resize +// when the 49th element is inserted) +// * are based on open hashing +// * have O(1) contains, erase +// +// Maps have STL-like copying semantics, and properly initialize and destruct copies of objects +// +// There are two forms of map: coalesced and uncoalesced. Coalesced maps keep the entries in the +// initial segment of an array, so are fast to iterate over; however deletion is approximately +// twice as expensive. +// +// HashMap<T>: +// bool insert(const Key& k, const Value& v) O(1) amortized (exponential resize policy) +// Value & operator[](const Key& k) O(1) for existing objects, else O(1) amortized +// const Entry * find(const Key& k); O(1) +// bool erase(const T& k); O(1) +// uint32_t size(); constant +// void reserve(uint32_t size); O(MAX(currentOccupancy,size)) +// void clear(); O(currentOccupancy) (with zero constant for objects +// without +// destructors) +// Iterator getIterator(); +// +// operator[] creates an entry if one does not exist, initializing with the default constructor. +// CoalescedHashMap<T> does not support getIterator, but instead supports +// const Key *getEntries(); +// +// Use of iterators: +// +// for(HashMap::Iterator iter = test.getIterator(); !iter.done(); ++iter) +// myFunction(iter->first, iter->second); + +namespace physx +{ +namespace shdfnd +{ +template <class Key, class Value, class HashFn = Hash<Key>, class Allocator = NonTrackingAllocator> +class HashMap : public internal::HashMapBase<Key, Value, HashFn, Allocator> +{ + public: + typedef internal::HashMapBase<Key, Value, HashFn, Allocator> HashMapBase; + typedef typename HashMapBase::Iterator Iterator; + + HashMap(uint32_t initialTableSize = 64, float loadFactor = 0.75f) : HashMapBase(initialTableSize, loadFactor) + { + } + HashMap(uint32_t initialTableSize, float loadFactor, const Allocator& alloc) + : HashMapBase(initialTableSize, loadFactor, alloc) + { + } + HashMap(const Allocator& alloc) : HashMapBase(64, 0.75f, alloc) + { + } + Iterator getIterator() + { + return Iterator(HashMapBase::mBase); + } +}; + +template <class Key, class Value, class HashFn = Hash<Key>, class Allocator = NonTrackingAllocator> +class CoalescedHashMap : public internal::HashMapBase<Key, Value, HashFn, Allocator> +{ + public: + typedef internal::HashMapBase<Key, Value, HashFn, Allocator> HashMapBase; + + CoalescedHashMap(uint32_t initialTableSize = 64, float loadFactor = 0.75f) + : HashMapBase(initialTableSize, loadFactor) + { + } + const Pair<const Key, Value>* getEntries() const + { + return HashMapBase::mBase.getEntries(); + } +}; + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSHASHMAP_H diff --git a/NvCloth/include/NvCloth/ps/PsIntrinsics.h b/NvCloth/include/NvCloth/ps/PsIntrinsics.h new file mode 100644 index 0000000..38b91ba --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsIntrinsics.h @@ -0,0 +1,47 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSINTRINSICS_H +#define PSFOUNDATION_PSINTRINSICS_H + +#include "foundation/PxPreprocessor.h" + +#if PX_WINDOWS_FAMILY +#include "windows/PsWindowsIntrinsics.h" +#elif(PX_LINUX || PX_ANDROID || PX_APPLE_FAMILY || PX_PS4) +#include "unix/PsUnixIntrinsics.h" +#elif PX_XBOXONE +#include "XboxOne/PsXboxOneIntrinsics.h" +#elif PX_SWITCH +#include "switch/PsSwitchIntrinsics.h" +#else +#error "Platform not supported!" +#endif + +#endif // #ifndef PSFOUNDATION_PSINTRINSICS_H diff --git a/NvCloth/include/NvCloth/ps/PsMathUtils.h b/NvCloth/include/NvCloth/ps/PsMathUtils.h new file mode 100644 index 0000000..aeaeb93 --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsMathUtils.h @@ -0,0 +1,695 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSMATHUTILS_H +#define PSFOUNDATION_PSMATHUTILS_H + +#include "foundation/PxPreprocessor.h" +#include "foundation/PxTransform.h" +#include "foundation/PxMat33.h" +#include "NvCloth/ps/Ps.h" +#include "NvCloth/ps/PsIntrinsics.h" +#include "NvCloth/Callbacks.h" + +// General guideline is: if it's an abstract math function, it belongs here. +// If it's a math function where the inputs have specific semantics (e.g. +// separateSwingTwist) it doesn't. + +namespace physx +{ +namespace shdfnd +{ +/** +\brief sign returns the sign of its argument. The sign of zero is undefined. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 sign(const PxF32 a) +{ + return intrinsics::sign(a); +} + +/** +\brief sign returns the sign of its argument. The sign of zero is undefined. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 sign(const PxF64 a) +{ + return (a >= 0.0) ? 1.0 : -1.0; +} + +/** +\brief sign returns the sign of its argument. The sign of zero is undefined. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxI32 sign(const PxI32 a) +{ + return (a >= 0) ? 1 : -1; +} + +/** +\brief Returns true if the two numbers are within eps of each other. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE bool equals(const PxF32 a, const PxF32 b, const PxF32 eps) +{ + return (PxAbs(a - b) < eps); +} + +/** +\brief Returns true if the two numbers are within eps of each other. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE bool equals(const PxF64 a, const PxF64 b, const PxF64 eps) +{ + return (PxAbs(a - b) < eps); +} + +/** +\brief The floor function returns a floating-point value representing the largest integer that is less than or equal to +x. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 floor(const PxF32 a) +{ + return floatFloor(a); +} + +/** +\brief The floor function returns a floating-point value representing the largest integer that is less than or equal to +x. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 floor(const PxF64 a) +{ + return ::floor(a); +} + +/** +\brief The ceil function returns a single value representing the smallest integer that is greater than or equal to x. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 ceil(const PxF32 a) +{ + return ::ceilf(a); +} + +/** +\brief The ceil function returns a double value representing the smallest integer that is greater than or equal to x. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 ceil(const PxF64 a) +{ + return ::ceil(a); +} + +/** +\brief mod returns the floating-point remainder of x / y. + +If the value of y is 0.0, mod returns a quiet NaN. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 mod(const PxF32 x, const PxF32 y) +{ + return PxF32(::fmodf(x, y)); +} + +/** +\brief mod returns the floating-point remainder of x / y. + +If the value of y is 0.0, mod returns a quiet NaN. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 mod(const PxF64 x, const PxF64 y) +{ + return ::fmod(x, y); +} + +/** +\brief Square. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 sqr(const PxF32 a) +{ + return a * a; +} + +/** +\brief Square. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 sqr(const PxF64 a) +{ + return a * a; +} + +/** +\brief Calculates x raised to the power of y. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 pow(const PxF32 x, const PxF32 y) +{ + return ::powf(x, y); +} + +/** +\brief Calculates x raised to the power of y. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 pow(const PxF64 x, const PxF64 y) +{ + return ::pow(x, y); +} + +/** +\brief Calculates e^n +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 exp(const PxF32 a) +{ + return ::expf(a); +} +/** + +\brief Calculates e^n +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 exp(const PxF64 a) +{ + return ::exp(a); +} + +/** +\brief Calculates 2^n +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 exp2(const PxF32 a) +{ + return ::expf(a * 0.693147180559945309417f); +} +/** + +\brief Calculates 2^n +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 exp2(const PxF64 a) +{ + return ::exp(a * 0.693147180559945309417); +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 logE(const PxF32 a) +{ + return ::logf(a); +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 logE(const PxF64 a) +{ + return ::log(a); +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 log2(const PxF32 a) +{ + return ::logf(a) / 0.693147180559945309417f; +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 log2(const PxF64 a) +{ + return ::log(a) / 0.693147180559945309417; +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 log10(const PxF32 a) +{ + return ::log10f(a); +} + +/** +\brief Calculates logarithms. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 log10(const PxF64 a) +{ + return ::log10(a); +} + +/** +\brief Converts degrees to radians. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 degToRad(const PxF32 a) +{ + return 0.01745329251994329547f * a; +} + +/** +\brief Converts degrees to radians. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 degToRad(const PxF64 a) +{ + return 0.01745329251994329547 * a; +} + +/** +\brief Converts radians to degrees. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 radToDeg(const PxF32 a) +{ + return 57.29577951308232286465f * a; +} + +/** +\brief Converts radians to degrees. +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF64 radToDeg(const PxF64 a) +{ + return 57.29577951308232286465 * a; +} + +//! \brief compute sine and cosine at the same time. There is a 'fsincos' on PC that we probably want to use here +PX_CUDA_CALLABLE PX_FORCE_INLINE void sincos(const PxF32 radians, PxF32& sin, PxF32& cos) +{ + /* something like: + _asm fld Local + _asm fsincos + _asm fstp LocalCos + _asm fstp LocalSin + */ + sin = PxSin(radians); + cos = PxCos(radians); +} + +/** +\brief uniform random number in [a,b] +*/ +PX_FORCE_INLINE PxI32 rand(const PxI32 a, const PxI32 b) +{ + return a + PxI32(::rand() % (b - a + 1)); +} + +/** +\brief uniform random number in [a,b] +*/ +PX_FORCE_INLINE PxF32 rand(const PxF32 a, const PxF32 b) +{ + return a + (b - a) * ::rand() / RAND_MAX; +} + +//! \brief return angle between two vectors in radians +PX_CUDA_CALLABLE PX_FORCE_INLINE PxF32 angle(const PxVec3& v0, const PxVec3& v1) +{ + const PxF32 cos = v0.dot(v1); // |v0|*|v1|*Cos(Angle) + const PxF32 sin = (v0.cross(v1)).magnitude(); // |v0|*|v1|*Sin(Angle) + return PxAtan2(sin, cos); +} + +//! If possible use instead fsel on the dot product /*fsel(d.dot(p),onething,anotherthing);*/ +//! Compares orientations (more readable, user-friendly function) +PX_CUDA_CALLABLE PX_FORCE_INLINE bool sameDirection(const PxVec3& d, const PxVec3& p) +{ + return d.dot(p) >= 0.0f; +} + +//! Checks 2 values have different signs +PX_CUDA_CALLABLE PX_FORCE_INLINE IntBool differentSign(PxReal f0, PxReal f1) +{ +#if !PX_EMSCRIPTEN + union + { + PxU32 u; + PxReal f; + } u1, u2; + u1.f = f0; + u2.f = f1; + return IntBool((u1.u ^ u2.u) & PX_SIGN_BITMASK); +#else + // javascript floats are 64-bits... + return IntBool( (f0*f1) < 0.0f ); +#endif +} + +PX_CUDA_CALLABLE PX_FORCE_INLINE PxMat33 star(const PxVec3& v) +{ + return PxMat33(PxVec3(0, v.z, -v.y), PxVec3(-v.z, 0, v.x), PxVec3(v.y, -v.x, 0)); +} + +PX_CUDA_CALLABLE PX_INLINE PxVec3 log(const PxQuat& q) +{ + const PxReal s = q.getImaginaryPart().magnitude(); + if(s < 1e-12f) + return PxVec3(0.0f); + // force the half-angle to have magnitude <= pi/2 + PxReal halfAngle = q.w < 0 ? PxAtan2(-s, -q.w) : PxAtan2(s, q.w); + NV_CLOTH_ASSERT(halfAngle >= -PxPi / 2 && halfAngle <= PxPi / 2); + + return q.getImaginaryPart().getNormalized() * 2.f * halfAngle; +} + +PX_CUDA_CALLABLE PX_INLINE PxQuat exp(const PxVec3& v) +{ + const PxReal m = v.magnitudeSquared(); + return m < 1e-24f ? PxQuat(PxIdentity) : PxQuat(PxSqrt(m), v * PxRecipSqrt(m)); +} + +// quat to rotate v0 t0 v1 +PX_CUDA_CALLABLE PX_INLINE PxQuat rotationArc(const PxVec3& v0, const PxVec3& v1) +{ + const PxVec3 cross = v0.cross(v1); + const PxReal d = v0.dot(v1); + if(d <= -0.99999f) + return (PxAbs(v0.x) < 0.1f ? PxQuat(0.0f, v0.z, -v0.y, 0.0f) : PxQuat(v0.y, -v0.x, 0.0, 0.0)).getNormalized(); + + const PxReal s = PxSqrt((1 + d) * 2), r = 1 / s; + + return PxQuat(cross.x * r, cross.y * r, cross.z * r, s * 0.5f).getNormalized(); +} + +/** +\brief returns largest axis +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 largestAxis(const PxVec3& v) +{ + PxU32 m = PxU32(v.y > v.x ? 1 : 0); + return v.z > v[m] ? 2 : m; +} + +/** +\brief returns indices for the largest axis and 2 other axii +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 largestAxis(const PxVec3& v, PxU32& other1, PxU32& other2) +{ + if(v.x >= PxMax(v.y, v.z)) + { + other1 = 1; + other2 = 2; + return 0; + } + else if(v.y >= v.z) + { + other1 = 0; + other2 = 2; + return 1; + } + else + { + other1 = 0; + other2 = 1; + return 2; + } +} + +/** +\brief returns axis with smallest absolute value +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE PxU32 closestAxis(const PxVec3& v) +{ + PxU32 m = PxU32(PxAbs(v.y) > PxAbs(v.x) ? 1 : 0); + return PxAbs(v.z) > PxAbs(v[m]) ? 2 : m; +} + +PX_CUDA_CALLABLE PX_INLINE PxU32 closestAxis(const PxVec3& v, PxU32& j, PxU32& k) +{ + // find largest 2D plane projection + const PxF32 absPx = PxAbs(v.x); + const PxF32 absNy = PxAbs(v.y); + const PxF32 absNz = PxAbs(v.z); + + PxU32 m = 0; // x biggest axis + j = 1; + k = 2; + if(absNy > absPx && absNy > absNz) + { + // y biggest + j = 2; + k = 0; + m = 1; + } + else if(absNz > absPx) + { + // z biggest + j = 0; + k = 1; + m = 2; + } + return m; +} + +/*! +Extend an edge along its length by a factor +*/ +PX_CUDA_CALLABLE PX_FORCE_INLINE void makeFatEdge(PxVec3& p0, PxVec3& p1, PxReal fatCoeff) +{ + PxVec3 delta = p1 - p0; + + const PxReal m = delta.magnitude(); + if(m > 0.0f) + { + delta *= fatCoeff / m; + p0 -= delta; + p1 += delta; + } +} + +//! Compute point as combination of barycentric coordinates +PX_CUDA_CALLABLE PX_FORCE_INLINE PxVec3 +computeBarycentricPoint(const PxVec3& p0, const PxVec3& p1, const PxVec3& p2, PxReal u, PxReal v) +{ + // This seems to confuse the compiler... + // return (1.0f - u - v)*p0 + u*p1 + v*p2; + const PxF32 w = 1.0f - u - v; + return PxVec3(w * p0.x + u * p1.x + v * p2.x, w * p0.y + u * p1.y + v * p2.y, w * p0.z + u * p1.z + v * p2.z); +} + +// generates a pair of quaternions (swing, twist) such that in = swing * twist, with +// swing.x = 0 +// twist.y = twist.z = 0, and twist is a unit quat +PX_FORCE_INLINE void separateSwingTwist(const PxQuat& q, PxQuat& swing, PxQuat& twist) +{ + twist = q.x != 0.0f ? PxQuat(q.x, 0, 0, q.w).getNormalized() : PxQuat(PxIdentity); + swing = q * twist.getConjugate(); +} + +// generate two tangent vectors to a given normal +PX_FORCE_INLINE void normalToTangents(const PxVec3& normal, PxVec3& tangent0, PxVec3& tangent1) +{ + tangent0 = PxAbs(normal.x) < 0.70710678f ? PxVec3(0, -normal.z, normal.y) : PxVec3(-normal.y, normal.x, 0); + tangent0.normalize(); + tangent1 = normal.cross(tangent0); +} + +/** +\brief computes a oriented bounding box around the scaled basis. +\param basis Input = skewed basis, Output = (normalized) orthogonal basis. +\return Bounding box extent. +*/ +NV_CLOTH_IMPORT PxVec3 optimizeBoundingBox(PxMat33& basis); + +NV_CLOTH_IMPORT PxQuat slerp(const PxReal t, const PxQuat& left, const PxQuat& right); + +PX_CUDA_CALLABLE PX_INLINE PxVec3 ellipseClamp(const PxVec3& point, const PxVec3& radii) +{ + // This function need to be implemented in the header file because + // it is included in a spu shader program. + + // finds the closest point on the ellipse to a given point + + // (p.y, p.z) is the input point + // (e.y, e.z) are the radii of the ellipse + + // lagrange multiplier method with Newton/Halley hybrid root-finder. + // see http://www.geometrictools.com/Documentation/DistancePointToEllipse2.pdf + // for proof of Newton step robustness and initial estimate. + // Halley converges much faster but sometimes overshoots - when that happens we take + // a newton step instead + + // converges in 1-2 iterations where D&C works well, and it's good with 4 iterations + // with any ellipse that isn't completely crazy + + const PxU32 MAX_ITERATIONS = 20; + const PxReal convergenceThreshold = 1e-4f; + + // iteration requires first quadrant but we recover generality later + + PxVec3 q(0, PxAbs(point.y), PxAbs(point.z)); + const PxReal tinyEps = 1e-6f; // very close to minor axis is numerically problematic but trivial + if(radii.y >= radii.z) + { + if(q.z < tinyEps) + return PxVec3(0, point.y > 0 ? radii.y : -radii.y, 0); + } + else + { + if(q.y < tinyEps) + return PxVec3(0, 0, point.z > 0 ? radii.z : -radii.z); + } + + PxVec3 denom, e2 = radii.multiply(radii), eq = radii.multiply(q); + + // we can use any initial guess which is > maximum(-e.y^2,-e.z^2) and for which f(t) is > 0. + // this guess works well near the axes, but is weak along the diagonals. + + PxReal t = PxMax(eq.y - e2.y, eq.z - e2.z); + + for(PxU32 i = 0; i < MAX_ITERATIONS; i++) + { + denom = PxVec3(0, 1 / (t + e2.y), 1 / (t + e2.z)); + PxVec3 denom2 = eq.multiply(denom); + + PxVec3 fv = denom2.multiply(denom2); + PxReal f = fv.y + fv.z - 1; + + // although in exact arithmetic we are guaranteed f>0, we can get here + // on the first iteration via catastrophic cancellation if the point is + // very close to the origin. In that case we just behave as if f=0 + + if(f < convergenceThreshold) + return e2.multiply(point).multiply(denom); + + PxReal df = fv.dot(denom) * -2.0f; + t = t - f / df; + } + + // we didn't converge, so clamp what we have + PxVec3 r = e2.multiply(point).multiply(denom); + return r * PxRecipSqrt(sqr(r.y / radii.y) + sqr(r.z / radii.z)); +} + +PX_CUDA_CALLABLE PX_INLINE PxReal tanHalf(PxReal sin, PxReal cos) +{ + return sin / (1 + cos); +} + +PX_INLINE PxQuat quatFromTanQVector(const PxVec3& v) +{ + PxReal v2 = v.dot(v); + if(v2 < 1e-12f) + return PxQuat(PxIdentity); + PxReal d = 1 / (1 + v2); + return PxQuat(v.x * 2, v.y * 2, v.z * 2, 1 - v2) * d; +} + +PX_FORCE_INLINE PxVec3 cross100(const PxVec3& b) +{ + return PxVec3(0.0f, -b.z, b.y); +} +PX_FORCE_INLINE PxVec3 cross010(const PxVec3& b) +{ + return PxVec3(b.z, 0.0f, -b.x); +} +PX_FORCE_INLINE PxVec3 cross001(const PxVec3& b) +{ + return PxVec3(-b.y, b.x, 0.0f); +} + +PX_INLINE void decomposeVector(PxVec3& normalCompo, PxVec3& tangentCompo, const PxVec3& outwardDir, + const PxVec3& outwardNormal) +{ + normalCompo = outwardNormal * (outwardDir.dot(outwardNormal)); + tangentCompo = outwardDir - normalCompo; +} + +//! \brief Return (i+1)%3 +// Avoid variable shift for XBox: +// PX_INLINE PxU32 Ps::getNextIndex3(PxU32 i) { return (1<<i) & 3; } +PX_INLINE PxU32 getNextIndex3(PxU32 i) +{ + return (i + 1 + (i >> 1)) & 3; +} + +PX_INLINE PxMat33 rotFrom2Vectors(const PxVec3& from, const PxVec3& to) +{ + // See bottom of http://www.euclideanspace.com/maths/algebra/matrix/orthogonal/rotation/index.htm + + // Early exit if to = from + if((from - to).magnitudeSquared() < 1e-4f) + return PxMat33(PxIdentity); + + // Early exit if to = -from + if((from + to).magnitudeSquared() < 1e-4f) + return PxMat33::createDiagonal(PxVec3(1.0f, -1.0f, -1.0f)); + + PxVec3 n = from.cross(to); + + PxReal C = from.dot(to), S = PxSqrt(1 - C * C), CC = 1 - C; + + PxReal xx = n.x * n.x, yy = n.y * n.y, zz = n.z * n.z, xy = n.x * n.y, yz = n.y * n.z, xz = n.x * n.z; + + PxMat33 R; + + R(0, 0) = 1 + CC * (xx - 1); + R(0, 1) = -n.z * S + CC * xy; + R(0, 2) = n.y * S + CC * xz; + + R(1, 0) = n.z * S + CC * xy; + R(1, 1) = 1 + CC * (yy - 1); + R(1, 2) = -n.x * S + CC * yz; + + R(2, 0) = -n.y * S + CC * xz; + R(2, 1) = n.x * S + CC * yz; + R(2, 2) = 1 + CC * (zz - 1); + + return R; +} + +NV_CLOTH_IMPORT void integrateTransform(const PxTransform& curTrans, const PxVec3& linvel, const PxVec3& angvel, + PxReal timeStep, PxTransform& result); + +PX_INLINE void computeBasis(const PxVec3& dir, PxVec3& right, PxVec3& up) +{ + // Derive two remaining vectors + if(PxAbs(dir.y) <= 0.9999f) + { + right = PxVec3(dir.z, 0.0f, -dir.x); + right.normalize(); + + // PT: normalize not needed for 'up' because dir & right are unit vectors, + // and by construction the angle between them is 90 degrees (i.e. sin(angle)=1) + up = PxVec3(dir.y * right.z, dir.z * right.x - dir.x * right.z, -dir.y * right.x); + } + else + { + right = PxVec3(1.0f, 0.0f, 0.0f); + + up = PxVec3(0.0f, dir.z, -dir.y); + up.normalize(); + } +} + +PX_INLINE void computeBasis(const PxVec3& p0, const PxVec3& p1, PxVec3& dir, PxVec3& right, PxVec3& up) +{ + // Compute the new direction vector + dir = p1 - p0; + dir.normalize(); + + // Derive two remaining vectors + computeBasis(dir, right, up); +} + +PX_FORCE_INLINE bool isAlmostZero(const PxVec3& v) +{ + if(PxAbs(v.x) > 1e-6f || PxAbs(v.y) > 1e-6f || PxAbs(v.z) > 1e-6f) + return false; + return true; +} + +} // namespace shdfnd +} // namespace physx + +#endif diff --git a/NvCloth/include/NvCloth/ps/PsUserAllocated.h b/NvCloth/include/NvCloth/ps/PsUserAllocated.h new file mode 100644 index 0000000..f41d29e --- /dev/null +++ b/NvCloth/include/NvCloth/ps/PsUserAllocated.h @@ -0,0 +1,92 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUSERALLOCATED_H +#define PSFOUNDATION_PSUSERALLOCATED_H + +#include "PsAllocator.h" + +namespace physx +{ +namespace shdfnd +{ +/** +Provides new and delete using a UserAllocator. +Guarantees that 'delete x;' uses the UserAllocator too. +*/ +class UserAllocated +{ + public: + // PX_SERIALIZATION + PX_INLINE void* operator new(size_t, void* address) + { + return address; + } + //~PX_SERIALIZATION + // Matching operator delete to the above operator new. Don't ask me + // how this makes any sense - Nuernberger. + PX_INLINE void operator delete(void*, void*) + { + } + + template <typename Alloc> + PX_INLINE void* operator new(size_t size, Alloc alloc, const char* fileName, int line) + { + return alloc.allocate(size, fileName, line); + } + template <typename Alloc> + PX_INLINE void* operator new [](size_t size, Alloc alloc, const char* fileName, int line) + { return alloc.allocate(size, fileName, line); } + + // placement delete + template <typename Alloc> + PX_INLINE void operator delete(void* ptr, Alloc alloc, const char* fileName, int line) + { + PX_UNUSED(fileName); + PX_UNUSED(line); + alloc.deallocate(ptr); + } + template <typename Alloc> + PX_INLINE void operator delete [](void* ptr, Alloc alloc, const char* fileName, int line) + { + PX_UNUSED(fileName); + PX_UNUSED(line); + alloc.deallocate(ptr); + } PX_INLINE void + operator delete(void* ptr) + { + NonTrackingAllocator().deallocate(ptr); + } + PX_INLINE void operator delete [](void* ptr) + { NonTrackingAllocator().deallocate(ptr); } +}; +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSUSERALLOCATED_H diff --git a/NvCloth/include/NvCloth/ps/unix/PsUnixIntrinsics.h b/NvCloth/include/NvCloth/ps/unix/PsUnixIntrinsics.h new file mode 100644 index 0000000..93d20b5 --- /dev/null +++ b/NvCloth/include/NvCloth/ps/unix/PsUnixIntrinsics.h @@ -0,0 +1,153 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSUNIXINTRINSICS_H +#define PSFOUNDATION_PSUNIXINTRINSICS_H + +#include "NvCloth/ps/Ps.h" +#include "NvCloth/Callbacks.h" +#include <math.h> + +#if PX_ANDROID || (PX_LINUX && !(PX_X64 || PX_X64)) // x86[_64] Linux uses inline assembly for debug break +#include <signal.h> // for Ns::debugBreak() { raise(SIGTRAP); } +#endif + +#if 0 +#include <libkern/OSAtomic.h> +#endif + +// this file is for internal intrinsics - that is, intrinsics that are used in +// cross platform code but do not appear in the API + +#if !(PX_LINUX || PX_ANDROID || PX_PS4 || PX_APPLE_FAMILY) +#error "This file should only be included by unix builds!!" +#endif + +namespace physx +{ +namespace shdfnd +{ + +PX_FORCE_INLINE void memoryBarrier() +{ + __sync_synchronize(); +} + +/*! +Return the index of the highest set bit. Undefined for zero arg. +*/ +PX_INLINE uint32_t highestSetBitUnsafe(uint32_t v) +{ + + return 31 - __builtin_clz(v); +} + +/*! +Return the index of the highest set bit. Undefined for zero arg. +*/ +PX_INLINE int32_t lowestSetBitUnsafe(uint32_t v) +{ + return __builtin_ctz(v); +} + +/*! +Returns the index of the highest set bit. Returns 32 for v=0. +*/ +PX_INLINE uint32_t countLeadingZeros(uint32_t v) +{ + if(v) + return __builtin_clz(v); + else + return 32; +} + +/*! +Prefetch aligned 64B x86, 32b ARM around \c ptr+offset. +*/ +PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) +{ + __builtin_prefetch(reinterpret_cast<const char* PX_RESTRICT>(ptr) + offset, 0, 3); +} + +/*! +Prefetch \c count bytes starting at \c ptr. +*/ +#if PX_ANDROID || PX_IOS +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = static_cast<const char*>(ptr); + size_t p = reinterpret_cast<size_t>(ptr); + uint32_t startLine = uint32_t(p >> 5), endLine = uint32_t((p + count - 1) >> 5); + uint32_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 32; + } while(--lines); +} +#else +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = reinterpret_cast<const char*>(ptr); + uint64_t p = size_t(ptr); + uint64_t startLine = p >> 6, endLine = (p + count - 1) >> 6; + uint64_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 64; + } while(--lines); +} +#endif + +//! \brief platform-specific reciprocal +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) +{ + return 1.0f / a; +} + +//! \brief platform-specific fast reciprocal square root +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) +{ + return 1.0f / ::sqrtf(a); +} + +//! \brief platform-specific floor +PX_CUDA_CALLABLE PX_FORCE_INLINE float floatFloor(float x) +{ + return ::floorf(x); +} + +#define NS_EXPECT_TRUE(x) x +#define NS_EXPECT_FALSE(x) x + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSUNIXINTRINSICS_H diff --git a/NvCloth/include/NvCloth/ps/windows/PsWindowsIntrinsics.h b/NvCloth/include/NvCloth/ps/windows/PsWindowsIntrinsics.h new file mode 100644 index 0000000..ac81f02 --- /dev/null +++ b/NvCloth/include/NvCloth/ps/windows/PsWindowsIntrinsics.h @@ -0,0 +1,189 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. +// Copyright (c) 2004-2008 AGEIA Technologies, Inc. All rights reserved. +// Copyright (c) 2001-2004 NovodeX AG. All rights reserved. + +#ifndef PSFOUNDATION_PSWINDOWSINTRINSICS_H +#define PSFOUNDATION_PSWINDOWSINTRINSICS_H + +#include "Ps.h" + +// this file is for internal intrinsics - that is, intrinsics that are used in +// cross platform code but do not appear in the API + +#if !PX_WINDOWS_FAMILY +#error "This file should only be included by Windows builds!!" +#endif + +#pragma warning(push) +//'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' +#pragma warning(disable : 4668) +#if PX_VC == 10 +#pragma warning(disable : 4987) // nonstandard extension used: 'throw (...)' +#endif +#include <intrin.h> +#pragma warning(pop) + +#pragma warning(push) +#pragma warning(disable : 4985) // 'symbol name': attributes not present on previous declaration +#include <math.h> +#pragma warning(pop) + +#include <float.h> +#include <mmintrin.h> + +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse) + +namespace physx +{ +namespace shdfnd +{ + +/* +* Implements a memory barrier +*/ +PX_FORCE_INLINE void memoryBarrier() +{ + _ReadWriteBarrier(); + /* long Barrier; + __asm { + xchg Barrier, eax + }*/ +} + +/*! +Returns the index of the highest set bit. Not valid for zero arg. +*/ +PX_FORCE_INLINE uint32_t highestSetBitUnsafe(uint32_t v) +{ + unsigned long retval; + _BitScanReverse(&retval, v); + return retval; +} + +/*! +Returns the index of the highest set bit. Undefined for zero arg. +*/ +PX_FORCE_INLINE uint32_t lowestSetBitUnsafe(uint32_t v) +{ + unsigned long retval; + _BitScanForward(&retval, v); + return retval; +} + +/*! +Returns the number of leading zeros in v. Returns 32 for v=0. +*/ +PX_FORCE_INLINE uint32_t countLeadingZeros(uint32_t v) +{ + if(v) + { + unsigned long bsr = (unsigned long)-1; + _BitScanReverse(&bsr, v); + return 31 - bsr; + } + else + return 32; +} + +/*! +Prefetch aligned cache size around \c ptr+offset. +*/ +#if !PX_ARM +PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) +{ + // cache line on X86/X64 is 64-bytes so a 128-byte prefetch would require 2 prefetches. + // However, we can only dispatch a limited number of prefetch instructions so we opt to prefetch just 1 cache line + /*_mm_prefetch(((const char*)ptr + offset), _MM_HINT_T0);*/ + // We get slightly better performance prefetching to non-temporal addresses instead of all cache levels + _mm_prefetch(((const char*)ptr + offset), _MM_HINT_NTA); +} +#else +PX_FORCE_INLINE void prefetchLine(const void* ptr, uint32_t offset = 0) +{ + // arm does have 32b cache line size + __prefetch(((const char*)ptr + offset)); +} +#endif + +/*! +Prefetch \c count bytes starting at \c ptr. +*/ +#if !PX_ARM +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = (char*)ptr; + uint64_t p = size_t(ptr); + uint64_t startLine = p >> 6, endLine = (p + count - 1) >> 6; + uint64_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 64; + } while(--lines); +} +#else +PX_FORCE_INLINE void prefetch(const void* ptr, uint32_t count = 1) +{ + const char* cp = (char*)ptr; + uint32_t p = size_t(ptr); + uint32_t startLine = p >> 5, endLine = (p + count - 1) >> 5; + uint32_t lines = endLine - startLine + 1; + do + { + prefetchLine(cp); + cp += 32; + } while(--lines); +} +#endif + +//! \brief platform-specific reciprocal +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipFast(float a) +{ + return 1.0f / a; +} + +//! \brief platform-specific fast reciprocal square root +PX_CUDA_CALLABLE PX_FORCE_INLINE float recipSqrtFast(float a) +{ + return 1.0f / ::sqrtf(a); +} + +//! \brief platform-specific floor +PX_CUDA_CALLABLE PX_FORCE_INLINE float floatFloor(float x) +{ + return ::floorf(x); +} + +#define NS_EXPECT_TRUE(x) x +#define NS_EXPECT_FALSE(x) x + +} // namespace shdfnd +} // namespace physx + +#endif // #ifndef PSFOUNDATION_PSWINDOWSINTRINSICS_H |