// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2018 NVIDIA Corporation. All rights reserved. #ifndef APEX_MIRRORED_H #define APEX_MIRRORED_H #include "ApexDefs.h" #include "Apex.h" #include "ApexCutil.h" #include "SceneIntl.h" #include "PxTaskManager.h" #include "PxGpuDispatcher.h" #if defined(__CUDACC__) #error "Mirrored arrays should not be visible to CUDA code. Send device pointers to CUDA kernels." #endif #if APEX_CUDA_SUPPORT #include "PxGpuCopyDesc.h" #include "PxGpuCopyDescQueue.h" #include "PxCudaContextManager.h" #include "PxCudaMemoryManager.h" //#include #else #define PX_ALLOC_INFO(name, ID) __FILE__, __LINE__, name, physx::PxAllocId::ID #define PX_ALLOC_INFO_PARAMS_DECL(p0, p1, p2, p3) const char* file = p0, int line = p1, const char* allocName = p2, physx::PxAllocId::Enum allocId = physx::PxAllocId::p3 #define PX_ALLOC_INFO_PARAMS_DEF() const char* file, int line, const char* allocName, physx::PxAllocId::Enum allocId #define PX_ALLOC_INFO_PARAMS_INPUT() file, line, allocName, allocId #define PX_ALLOC_INFO_PARAMS_INPUT_INFO(info) info.getFileName(), info.getLine(), info.getAllocName(), info.getAllocId() namespace physx { struct PxAllocId { /** * \brief ID of the Feature which owns/allocated memory from the heap */ enum Enum { UNASSIGNED, //!< default APEX, //!< APEX stuff not further classified PARTICLES, //!< all particle related GPU_UTIL, //!< e.g. RadixSort (used in SPH and deformable self collision) CLOTH, //!< all cloth related NUM_IDS //!< number of IDs, be aware that ApexHeapStats contains PxAllocIdStats[NUM_IDS] }; }; /// \brief class to track allocation statistics, see PxgMirrored class PxAllocInfo { public: /** * \brief AllocInfo default constructor */ PxAllocInfo() {} /** * \brief AllocInfo constructor that initializes all of the members */ PxAllocInfo(const char* file, int line, const char* allocName, PxAllocId::Enum allocId) : mFileName(file) , mLine(line) , mAllocName(allocName) , mAllocId(allocId) { } /// \brief get the allocation file name inline const char* getFileName() const { return mFileName; } /// \brief get the allocation line inline int getLine() const { return mLine; } /// \brief get the allocation name inline const char* getAllocName() const { return mAllocName; } /// \brief get the allocation ID inline PxAllocId::Enum getAllocId() const { return mAllocId; } private: const char* mFileName; int mLine; const char* mAllocName; PxAllocId::Enum mAllocId; }; } #endif namespace nvidia { namespace apex { struct ApexMirroredPlace { enum Enum { DEFAULT = 0, CPU = 0x01, #if APEX_CUDA_SUPPORT GPU = 0x02, CPU_GPU = (CPU | GPU), #endif }; }; template class ApexMirrored { PX_NOCOPY(ApexMirrored); public: ApexMirrored(SceneIntl& scene, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) : mCpuPtr(0) , mByteCount(0) , mPlace(ApexMirroredPlace::CPU) , mAllocInfo(PX_ALLOC_INFO_PARAMS_INPUT()) #if APEX_CUDA_SUPPORT , mCpuBuffer(NULL) , mGpuPtr(0) , mGpuBuffer(NULL) #endif { PX_UNUSED(scene); #if APEX_CUDA_SUPPORT PxGpuDispatcher* gd = scene.getTaskManager()->getGpuDispatcher(); if (gd) { mCtx = gd->getCudaContextManager(); } else { mCtx = NULL; return; } #endif }; ~ApexMirrored() { } //Operators for accessing the data pointed to on the host. Using these operators is guaranteed //to maintain the class invariants. Note that these operators are only ever called on the host. //The GPU never sees this class as instances are converted to regular pointers upon kernel //invocation. PX_INLINE T& operator*() { return *getCpuPtr(); } PX_INLINE const T& operator*() const { return *getCpuPtr(); } PX_INLINE T* operator->() { return getCpuPtr(); } PX_INLINE const T* operator->() const { return getCpuPtr(); } PX_INLINE T& operator[](unsigned int i) { return getCpuPtr()[i]; } //Methods for converting the pointer to a regular pointer for use on //the CPU After a pointer has been obtained with these methods, the //data can be accessed multiple times with no extra cost. This is the //fastest method for accessing the data on the cpu. PX_INLINE T* getCpuPtr() const { return mCpuPtr; } /*! \return returns whether CPU buffer has been allocated for this array */ PX_INLINE bool cpuPtrIsValid() const { return mCpuPtr != 0; } PX_INLINE size_t* getCpuHandle() const { return reinterpret_cast(&mCpuPtr); } PX_INLINE size_t getByteSize() const { return mByteCount; } #if APEX_CUDA_SUPPORT /*! \return returns whether GPU buffer has been allocated for this array */ PX_INLINE bool gpuPtrIsValid() const { return mGpuPtr != 0; } PX_INLINE T* getGpuPtr() const { return mGpuPtr; } /*! Get opaque handle to the underlying gpu or cpu memory These must not be cast to a pointer or derefernced, they should only be used to identify the memory region to the allocator */ PX_INLINE size_t* getGpuHandle() const { return reinterpret_cast(&mGpuPtr); } PX_INLINE void copyDeviceToHostDesc(PxGpuCopyDesc& desc, size_t byteSize, size_t byteOffset) const { PX_ASSERT(mCpuPtr && mGpuPtr && mByteCount); desc.type = PxGpuCopyDesc::DeviceToHost; desc.bytes = byteSize; desc.source = ((size_t) mGpuPtr) + byteOffset; desc.dest = ((size_t) mCpuPtr) + byteOffset; } PX_INLINE void copyHostToDeviceDesc(PxGpuCopyDesc& desc, size_t byteSize, size_t byteOffset) const { PX_ASSERT(mCpuPtr && mGpuPtr && mByteCount); desc.type = PxGpuCopyDesc::HostToDevice; desc.bytes = byteSize; desc.source = ((size_t) mCpuPtr) + byteOffset; desc.dest = ((size_t) mGpuPtr) + byteOffset; } PX_INLINE void mallocGpu(size_t byteSize) { PxCudaBufferType bufferType(PxCudaBufferMemorySpace::T_GPU, PxCudaBufferFlags::F_READ_WRITE); PxCudaBuffer* buffer = mCtx->getMemoryManager()->alloc(bufferType, (uint32_t)byteSize); if (buffer) { // in case of realloc if (mGpuBuffer) { mGpuBuffer->free(); } mGpuBuffer = buffer; mGpuPtr = reinterpret_cast(mGpuBuffer->getPtr()); PX_ASSERT(mGpuPtr); } else { PX_ASSERT(!"Out of GPU Memory!"); } } PX_INLINE void freeGpu() { if (mGpuBuffer) { bool success = mGpuBuffer->free(); mGpuBuffer = NULL; mGpuPtr = NULL; PX_UNUSED(success); PX_ASSERT(success); } } PX_INLINE void mallocHost(size_t byteSize) { PxCudaBufferType bufferType(PxCudaBufferMemorySpace::T_PINNED_HOST, PxCudaBufferFlags::F_READ_WRITE); PxCudaBuffer* buffer = mCtx->getMemoryManager()->alloc(bufferType, (uint32_t)byteSize); if (buffer) { // in case of realloc if (mCpuBuffer) { mCpuBuffer->free(); } mCpuBuffer = buffer; mCpuPtr = reinterpret_cast(mCpuBuffer->getPtr()); PX_ASSERT(mCpuPtr); } else { PX_ASSERT(!"Out of Pinned Host Memory!"); } } PX_INLINE void freeHost() { if (mCpuBuffer) { bool success = mCpuBuffer->free(); mCpuBuffer = NULL; mCpuPtr = NULL; PX_UNUSED(success); PX_ASSERT(success); } } PX_INLINE void swapGpuPtr(ApexMirrored& other) { nvidia::swap(mGpuPtr, other.mGpuPtr); nvidia::swap(mGpuBuffer, other.mGpuBuffer); } #endif PX_INLINE const PxAllocInfo& getAllocInfo() const { return mAllocInfo; } PX_INLINE void mallocCpu(size_t byteSize) { mCpuPtr = (T*)getAllocator().allocate(byteSize, mAllocInfo.getAllocName(), mAllocInfo.getFileName(), mAllocInfo.getLine()); PX_ASSERT(mCpuPtr && "Out of CPU Memory!"); } PX_INLINE void freeCpu() { if (mCpuPtr) { getAllocator().deallocate(mCpuPtr); mCpuPtr = NULL; } } PX_INLINE const char* getName() const { return mAllocInfo.getAllocName(); } void realloc(size_t byteCount, ApexMirroredPlace::Enum place) { ApexMirroredPlace::Enum oldPlace = mPlace; ApexMirroredPlace::Enum newPlace = (place != ApexMirroredPlace::DEFAULT) ? place : oldPlace; if (oldPlace == newPlace && byteCount <= mByteCount) { return; } size_t newSize = PxMax(byteCount, mByteCount); #if APEX_CUDA_SUPPORT if (oldPlace != ApexMirroredPlace::CPU && newPlace != ApexMirroredPlace::CPU) { PX_ASSERT(oldPlace != ApexMirroredPlace::CPU); PX_ASSERT(newPlace != ApexMirroredPlace::CPU); if ((mCpuPtr != NULL && byteCount > mByteCount) || (mCpuPtr == NULL && (place & ApexMirroredPlace::CPU) != 0)) { PxCudaBuffer* oldCpuBuffer = mCpuBuffer; T* oldCpuPtr = mCpuPtr; mCpuBuffer = NULL; mallocHost(newSize); PxCudaBuffer* newCpuBuffer = mCpuBuffer; T* newCpuPtr = mCpuPtr; if (oldCpuPtr != NULL && newCpuPtr != NULL && mByteCount > 0) { memcpy(mCpuPtr, oldCpuPtr, mByteCount); } mCpuBuffer = oldCpuBuffer; mCpuPtr = newCpuPtr; freeHost(); mCpuBuffer = newCpuBuffer; mCpuPtr = newCpuPtr; } if ((mGpuPtr != NULL && byteCount > mByteCount) || (mGpuPtr == NULL && (place & ApexMirroredPlace::GPU) != 0)) { // we explicitly do not move old data to the new buffer freeGpu(); mallocGpu(newSize); } } else #endif { T* oldCpuPtr = mCpuPtr; #if APEX_CUDA_SUPPORT if (newPlace != ApexMirroredPlace::CPU) { if (newPlace == ApexMirroredPlace::CPU_GPU) { mallocHost(newSize); } else { mCpuPtr = NULL; } mallocGpu(newSize); } else #endif { mallocCpu(newSize); } T* newCpuPtr = mCpuPtr; if (oldCpuPtr != NULL && newCpuPtr != NULL && mByteCount > 0) { memcpy(newCpuPtr, oldCpuPtr, mByteCount); } mCpuPtr = oldCpuPtr; #if APEX_CUDA_SUPPORT if (oldPlace != ApexMirroredPlace::CPU) { if (oldPlace == ApexMirroredPlace::CPU_GPU) { freeHost(); } freeGpu(); } else #endif { freeCpu(); } mCpuPtr = newCpuPtr; } mByteCount = newSize; mPlace = newPlace; } void free() { PX_ASSERT(mPlace != ApexMirroredPlace::DEFAULT); #if APEX_CUDA_SUPPORT if (mPlace != ApexMirroredPlace::CPU) { freeHost(); freeGpu(); } else #endif { freeCpu(); } mByteCount = 0; } private: mutable T* mCpuPtr; size_t mByteCount; ApexMirroredPlace::Enum mPlace; PxAllocInfo mAllocInfo; #if APEX_CUDA_SUPPORT mutable PxCudaBuffer* mCpuBuffer; mutable T* mGpuPtr; mutable PxCudaBuffer* mGpuBuffer; PxCudaContextManager* mCtx; #endif }; } } // end namespace nvidia::apex #endif