aboutsummaryrefslogtreecommitdiff
path: root/APEX_1.4/common/include/ApexMirrored.h
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/common/include/ApexMirrored.h
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'APEX_1.4/common/include/ApexMirrored.h')
-rw-r--r--APEX_1.4/common/include/ApexMirrored.h507
1 files changed, 507 insertions, 0 deletions
diff --git a/APEX_1.4/common/include/ApexMirrored.h b/APEX_1.4/common/include/ApexMirrored.h
new file mode 100644
index 00000000..3ae808f7
--- /dev/null
+++ b/APEX_1.4/common/include/ApexMirrored.h
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto. Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+
+
+#ifndef APEX_MIRRORED_H
+#define APEX_MIRRORED_H
+
+#include "ApexDefs.h"
+
+#include "Apex.h"
+#include "ApexCutil.h"
+#include "SceneIntl.h"
+
+#include "PxTaskManager.h"
+#include "PxGpuDispatcher.h"
+#include "PxGpuCopyDesc.h"
+#include "PxGpuCopyDescQueue.h"
+#include "PxCudaContextManager.h"
+#include "PxCudaMemoryManager.h"
+//#include <cuda.h>
+
+#if defined(__CUDACC__)
+#error "Mirrored arrays should not be visible to CUDA code. Send device pointers to CUDA kernels."
+#endif
+
+
+#if !PX_SUPPORT_GPU_PHYSX
+#define PX_ALLOC_INFO(name, ID) __FILE__, __LINE__, name, physx::PxAllocId::ID
+#define PX_ALLOC_INFO_PARAMS_DECL(p0, p1, p2, p3) const char* file = p0, int line = p1, const char* allocName = p2, physx::PxAllocId::Enum allocId = physx::PxAllocId::p3
+#define PX_ALLOC_INFO_PARAMS_DEF() const char* file, int line, const char* allocName, physx::PxAllocId::Enum allocId
+#define PX_ALLOC_INFO_PARAMS_INPUT() file, line, allocName, allocId
+#define PX_ALLOC_INFO_PARAMS_INPUT_INFO(info) info.getFileName(), info.getLine(), info.getAllocName(), info.getAllocId()
+
+namespace physx
+{
+
+struct PxAllocId
+{
+ /**
+ * \brief ID of the Feature which owns/allocated memory from the heap
+ */
+ enum Enum
+ {
+ UNASSIGNED, //!< default
+ APEX, //!< APEX stuff not further classified
+ PARTICLES, //!< all particle related
+ GPU_UTIL, //!< e.g. RadixSort (used in SPH and deformable self collision)
+ CLOTH, //!< all cloth related
+ NUM_IDS //!< number of IDs, be aware that ApexHeapStats contains PxAllocIdStats[NUM_IDS]
+ };
+};
+
+/// \brief class to track allocation statistics, see PxgMirrored
+class PxAllocInfo
+{
+public:
+ /**
+ * \brief AllocInfo default constructor
+ */
+ PxAllocInfo() {}
+
+ /**
+ * \brief AllocInfo constructor that initializes all of the members
+ */
+ PxAllocInfo(const char* file, int line, const char* allocName, PxAllocId::Enum allocId)
+ : mFileName(file)
+ , mLine(line)
+ , mAllocName(allocName)
+ , mAllocId(allocId)
+ {
+ }
+
+ /// \brief get the allocation file name
+ inline const char* getFileName() const
+ {
+ return mFileName;
+ }
+
+ /// \brief get the allocation line
+ inline int getLine() const
+ {
+ return mLine;
+ }
+
+ /// \brief get the allocation name
+ inline const char* getAllocName() const
+ {
+ return mAllocName;
+ }
+
+ /// \brief get the allocation ID
+ inline PxAllocId::Enum getAllocId() const
+ {
+ return mAllocId;
+ }
+
+private:
+ const char* mFileName;
+ int mLine;
+ const char* mAllocName;
+ PxAllocId::Enum mAllocId;
+};
+
+}
+
+#endif
+
+namespace nvidia
+{
+namespace apex
+{
+
+struct ApexMirroredPlace
+{
+ enum Enum
+ {
+ DEFAULT = 0,
+ CPU = 0x01,
+#if APEX_CUDA_SUPPORT
+ GPU = 0x02,
+ CPU_GPU = (CPU | GPU),
+#endif
+ };
+};
+
+
+template <class T>
+class ApexMirrored
+{
+ PX_NOCOPY(ApexMirrored);
+
+public:
+ ApexMirrored(SceneIntl& scene, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED))
+ : mCpuPtr(0)
+ , mByteCount(0)
+ , mPlace(ApexMirroredPlace::CPU)
+ , mAllocInfo(PX_ALLOC_INFO_PARAMS_INPUT())
+#if APEX_CUDA_SUPPORT
+ , mCpuBuffer(NULL)
+ , mGpuPtr(0)
+ , mGpuBuffer(NULL)
+#endif
+ {
+ PX_UNUSED(scene);
+#if APEX_CUDA_SUPPORT
+ PxGpuDispatcher* gd = scene.getTaskManager()->getGpuDispatcher();
+ if (gd)
+ {
+ mCtx = gd->getCudaContextManager();
+ }
+ else
+ {
+ mCtx = NULL;
+ return;
+ }
+#endif
+ };
+
+ ~ApexMirrored()
+ {
+ }
+
+ //Operators for accessing the data pointed to on the host. Using these operators is guaranteed
+ //to maintain the class invariants. Note that these operators are only ever called on the host.
+ //The GPU never sees this class as instances are converted to regular pointers upon kernel
+ //invocation.
+
+ PX_INLINE T& operator*()
+ {
+ return *getCpuPtr();
+ }
+
+ PX_INLINE const T& operator*() const
+ {
+ return *getCpuPtr();
+ }
+
+ PX_INLINE T* operator->()
+ {
+ return getCpuPtr();
+ }
+
+ PX_INLINE const T* operator->() const
+ {
+ return getCpuPtr();
+ }
+
+ PX_INLINE T& operator[](unsigned int i)
+ {
+ return getCpuPtr()[i];
+ }
+
+ //Methods for converting the pointer to a regular pointer for use on
+ //the CPU After a pointer has been obtained with these methods, the
+ //data can be accessed multiple times with no extra cost. This is the
+ //fastest method for accessing the data on the cpu.
+
+ PX_INLINE T* getCpuPtr() const
+ {
+ return mCpuPtr;
+ }
+
+ /*!
+ \return
+ returns whether CPU buffer has been allocated for this array
+ */
+ PX_INLINE bool cpuPtrIsValid() const
+ {
+ return mCpuPtr != 0;
+ }
+
+ PX_INLINE size_t* getCpuHandle() const
+ {
+ return reinterpret_cast<size_t*>(&mCpuPtr);
+ }
+
+ PX_INLINE size_t getByteSize() const
+ {
+ return mByteCount;
+ }
+
+#if APEX_CUDA_SUPPORT
+ /*!
+ \return
+ returns whether GPU buffer has been allocated for this array
+ */
+ PX_INLINE bool gpuPtrIsValid() const
+ {
+ return mGpuPtr != 0;
+ }
+
+ PX_INLINE T* getGpuPtr() const
+ {
+ return mGpuPtr;
+ }
+
+ /*!
+ Get opaque handle to the underlying gpu or cpu memory These must not
+ be cast to a pointer or derefernced, they should only be used to
+ identify the memory region to the allocator
+ */
+ PX_INLINE size_t* getGpuHandle() const
+ {
+ return reinterpret_cast<size_t*>(&mGpuPtr);
+ }
+
+ PX_INLINE void copyDeviceToHostDesc(PxGpuCopyDesc& desc, size_t byteSize, size_t byteOffset) const
+ {
+ PX_ASSERT(mCpuPtr && mGpuPtr && mByteCount);
+ desc.type = PxGpuCopyDesc::DeviceToHost;
+ desc.bytes = byteSize;
+ desc.source = ((size_t) mGpuPtr) + byteOffset;
+ desc.dest = ((size_t) mCpuPtr) + byteOffset;
+ }
+
+ PX_INLINE void copyHostToDeviceDesc(PxGpuCopyDesc& desc, size_t byteSize, size_t byteOffset) const
+ {
+ PX_ASSERT(mCpuPtr && mGpuPtr && mByteCount);
+ desc.type = PxGpuCopyDesc::HostToDevice;
+ desc.bytes = byteSize;
+ desc.source = ((size_t) mCpuPtr) + byteOffset;
+ desc.dest = ((size_t) mGpuPtr) + byteOffset;
+ }
+
+ PX_INLINE void mallocGpu(size_t byteSize)
+ {
+ PxCudaBufferType bufferType(PxCudaBufferMemorySpace::T_GPU, PxCudaBufferFlags::F_READ_WRITE);
+ PxCudaBuffer* buffer = mCtx->getMemoryManager()->alloc(bufferType, (uint32_t)byteSize);
+ if (buffer)
+ {
+ // in case of realloc
+ if (mGpuBuffer)
+ {
+ mGpuBuffer->free();
+ }
+ mGpuBuffer = buffer;
+ mGpuPtr = reinterpret_cast<T*>(mGpuBuffer->getPtr());
+ PX_ASSERT(mGpuPtr);
+ }
+ else
+ {
+ PX_ASSERT(!"Out of GPU Memory!");
+ }
+ }
+
+ PX_INLINE void freeGpu()
+ {
+ if (mGpuBuffer)
+ {
+ bool success = mGpuBuffer->free();
+ mGpuBuffer = NULL;
+ mGpuPtr = NULL;
+ PX_UNUSED(success);
+ PX_ASSERT(success);
+ }
+ }
+
+ PX_INLINE void mallocHost(size_t byteSize)
+ {
+ PxCudaBufferType bufferType(PxCudaBufferMemorySpace::T_PINNED_HOST, PxCudaBufferFlags::F_READ_WRITE);
+ PxCudaBuffer* buffer = mCtx->getMemoryManager()->alloc(bufferType, (uint32_t)byteSize);
+ if (buffer)
+ {
+ // in case of realloc
+ if (mCpuBuffer)
+ {
+ mCpuBuffer->free();
+ }
+ mCpuBuffer = buffer;
+ mCpuPtr = reinterpret_cast<T*>(mCpuBuffer->getPtr());
+ PX_ASSERT(mCpuPtr);
+ }
+ else
+ {
+ PX_ASSERT(!"Out of Pinned Host Memory!");
+ }
+ }
+ PX_INLINE void freeHost()
+ {
+ if (mCpuBuffer)
+ {
+ bool success = mCpuBuffer->free();
+ mCpuBuffer = NULL;
+ mCpuPtr = NULL;
+ PX_UNUSED(success);
+ PX_ASSERT(success);
+ }
+ }
+ PX_INLINE void swapGpuPtr(ApexMirrored<T>& other)
+ {
+ nvidia::swap(mGpuPtr, other.mGpuPtr);
+ nvidia::swap(mGpuBuffer, other.mGpuBuffer);
+ }
+#endif
+
+ PX_INLINE const PxAllocInfo& getAllocInfo() const
+ {
+ return mAllocInfo;
+ }
+
+ PX_INLINE void mallocCpu(size_t byteSize)
+ {
+ mCpuPtr = (T*)getAllocator().allocate(byteSize, mAllocInfo.getAllocName(), mAllocInfo.getFileName(), mAllocInfo.getLine());
+ PX_ASSERT(mCpuPtr && "Out of CPU Memory!");
+ }
+ PX_INLINE void freeCpu()
+ {
+ if (mCpuPtr)
+ {
+ getAllocator().deallocate(mCpuPtr);
+ mCpuPtr = NULL;
+ }
+ }
+
+
+ PX_INLINE const char* getName() const
+ {
+ return mAllocInfo.getAllocName();
+ }
+
+ void realloc(size_t byteCount, ApexMirroredPlace::Enum place)
+ {
+ ApexMirroredPlace::Enum oldPlace = mPlace;
+ ApexMirroredPlace::Enum newPlace = (place != ApexMirroredPlace::DEFAULT) ? place : oldPlace;
+ if (oldPlace == newPlace && byteCount <= mByteCount)
+ {
+ return;
+ }
+
+ size_t newSize = PxMax(byteCount, mByteCount);
+
+#if APEX_CUDA_SUPPORT
+ if (oldPlace != ApexMirroredPlace::CPU && newPlace != ApexMirroredPlace::CPU)
+ {
+ PX_ASSERT(oldPlace != ApexMirroredPlace::CPU);
+ PX_ASSERT(newPlace != ApexMirroredPlace::CPU);
+
+ if ((mCpuPtr != NULL && byteCount > mByteCount) ||
+ (mCpuPtr == NULL && (place & ApexMirroredPlace::CPU) != 0))
+ {
+ PxCudaBuffer* oldCpuBuffer = mCpuBuffer;
+ T* oldCpuPtr = mCpuPtr;
+
+ mCpuBuffer = NULL;
+
+ mallocHost(newSize);
+
+ PxCudaBuffer* newCpuBuffer = mCpuBuffer;
+ T* newCpuPtr = mCpuPtr;
+
+
+ if (oldCpuPtr != NULL && newCpuPtr != NULL && mByteCount > 0)
+ {
+ memcpy(mCpuPtr, oldCpuPtr, mByteCount);
+ }
+
+ mCpuBuffer = oldCpuBuffer;
+ mCpuPtr = newCpuPtr;
+
+ freeHost();
+
+ mCpuBuffer = newCpuBuffer;
+ mCpuPtr = newCpuPtr;
+ }
+ if ((mGpuPtr != NULL && byteCount > mByteCount) ||
+ (mGpuPtr == NULL && (place & ApexMirroredPlace::GPU) != 0))
+ {
+ // we explicitly do not move old data to the new buffer
+
+ freeGpu();
+ mallocGpu(newSize);
+ }
+ }
+ else
+#endif
+ {
+ T* oldCpuPtr = mCpuPtr;
+#if APEX_CUDA_SUPPORT
+ if (newPlace != ApexMirroredPlace::CPU)
+ {
+ if (newPlace == ApexMirroredPlace::CPU_GPU)
+ {
+ mallocHost(newSize);
+ }
+ else
+ {
+ mCpuPtr = NULL;
+ }
+ mallocGpu(newSize);
+ }
+ else
+#endif
+ {
+ mallocCpu(newSize);
+ }
+ T* newCpuPtr = mCpuPtr;
+
+ if (oldCpuPtr != NULL && newCpuPtr != NULL && mByteCount > 0)
+ {
+ memcpy(newCpuPtr, oldCpuPtr, mByteCount);
+ }
+
+ mCpuPtr = oldCpuPtr;
+#if APEX_CUDA_SUPPORT
+ if (oldPlace != ApexMirroredPlace::CPU)
+ {
+ if (oldPlace == ApexMirroredPlace::CPU_GPU)
+ {
+ freeHost();
+ }
+ freeGpu();
+ }
+ else
+#endif
+ {
+ freeCpu();
+ }
+ mCpuPtr = newCpuPtr;
+ }
+ mByteCount = newSize;
+ mPlace = newPlace;
+ }
+
+ void free()
+ {
+ PX_ASSERT(mPlace != ApexMirroredPlace::DEFAULT);
+#if APEX_CUDA_SUPPORT
+ if (mPlace != ApexMirroredPlace::CPU)
+ {
+ freeHost();
+ freeGpu();
+ }
+ else
+#endif
+ {
+ freeCpu();
+ }
+ mByteCount = 0;
+ }
+
+private:
+ mutable T* mCpuPtr;
+ size_t mByteCount;
+
+ ApexMirroredPlace::Enum mPlace;
+ PxAllocInfo mAllocInfo;
+
+#if APEX_CUDA_SUPPORT
+ mutable PxCudaBuffer* mCpuBuffer;
+ mutable T* mGpuPtr;
+ mutable PxCudaBuffer* mGpuBuffer;
+ PxCudaContextManager* mCtx;
+#endif
+};
+
+
+}
+} // end namespace nvidia::apex
+
+#endif