diff options
Diffstat (limited to 'PxShared/include/cudamanager')
| -rw-r--r-- | PxShared/include/cudamanager/PxCudaContextManager.h | 425 | ||||
| -rw-r--r-- | PxShared/include/cudamanager/PxCudaMemoryManager.h | 281 | ||||
| -rw-r--r-- | PxShared/include/cudamanager/PxGpuCopyDesc.h | 86 | ||||
| -rw-r--r-- | PxShared/include/cudamanager/PxGpuCopyDescQueue.h | 149 |
4 files changed, 0 insertions, 941 deletions
diff --git a/PxShared/include/cudamanager/PxCudaContextManager.h b/PxShared/include/cudamanager/PxCudaContextManager.h deleted file mode 100644 index aca1112..0000000 --- a/PxShared/include/cudamanager/PxCudaContextManager.h +++ /dev/null @@ -1,425 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - - -#ifndef PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H -#define PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H - -#include "foundation/PxPreprocessor.h" - -#if PX_SUPPORT_GPU_PHYSX - -#include "foundation/PxSimpleTypes.h" -#include "foundation/PxErrorCallback.h" -#include "foundation/PxFlags.h" -#include "task/PxTaskDefine.h" -#include "cudamanager/PxCudaMemoryManager.h" - -/* Forward decl to avoid inclusion of cuda.h */ -typedef struct CUctx_st *CUcontext; -typedef struct CUgraphicsResource_st *CUgraphicsResource; -typedef int CUdevice; - -namespace physx -{ - -class PxGpuDispatcher; - - -/** \brief Possible graphic/CUDA interoperability modes for context */ -struct PxCudaInteropMode -{ - /** - * \brief Possible graphic/CUDA interoperability modes for context - */ - enum Enum - { - NO_INTEROP = 0, - D3D10_INTEROP, - D3D11_INTEROP, - OGL_INTEROP, - - COUNT - }; -}; - -struct PxCudaInteropRegisterFlag -{ - enum Enum - { - eNONE = 0x00, - eREAD_ONLY = 0x01, - eWRITE_DISCARD = 0x02, - eSURFACE_LDST = 0x04, - eTEXTURE_GATHER = 0x08 - }; -}; - -/** -\brief collection of set bits defined in NxCudaInteropRegisterFlag. - -@see NxCudaInteropRegisterFlag -*/ -typedef PxFlags<PxCudaInteropRegisterFlag::Enum, uint32_t> PxCudaInteropRegisterFlags; -PX_FLAGS_OPERATORS(PxCudaInteropRegisterFlag::Enum, uint32_t) - -//! \brief Descriptor used to create a PxCudaContextManager -class PxCudaContextManagerDesc -{ -public: - /** - * \brief The CUDA context to manage - * - * If left NULL, the PxCudaContextManager will create a new context. If - * graphicsDevice is also not NULL, this new CUDA context will be bound to - * that graphics device, enabling the use of CUDA/Graphics interop features. - * - * If ctx is not NULL, the specified context must be applied to the thread - * that is allocating the PxCudaContextManager at creation time (aka, it - * cannot be popped). The PxCudaContextManager will take ownership of the - * context until the manager is released. All access to the context must be - * gated by lock acquisition. - * - * If the user provides a context for the PxCudaContextManager, the context - * _must_ have either been created on the GPU ordinal returned by - * PxGetSuggestedCudaDeviceOrdinal() or on your graphics device. - * - * It is perfectly acceptable to allocate device or host pinned memory from - * the context outside the scope of the PxCudaMemoryManager, so long as you - * manage its eventual cleanup. - */ - CUcontext *ctx; - - /** - * \brief D3D device pointer or OpenGl context handle - * - * Only applicable when ctx is NULL, thus forcing a new context to be - * created. In that case, the created context will be bound to this - * graphics device. - */ - void *graphicsDevice; - -#if PX_SUPPORT_GPU_PHYSX - /** - * \brief Application-specific GUID - * - * If your application employs PhysX modules that use CUDA you need to use a GUID - * so that patches for new architectures can be released for your game.You can obtain a GUID for your - * application from Nvidia. - */ - const char* appGUID; -#endif - /** - * \brief The CUDA/Graphics interop mode of this context - * - * If ctx is NULL, this value describes the nature of the graphicsDevice - * pointer provided by the user. Else it describes the nature of the - * context provided by the user. - */ - PxCudaInteropMode::Enum interopMode; - - - /** - * \brief Size of persistent memory - * - * This memory is allocated up front and stays allocated until the - * PxCudaContextManager is released. Size is in bytes, has to be power of two - * and bigger than the page size. Set to 0 to only use dynamic pages. - * - * Note: On Vista O/S and above, there is a per-memory allocation overhead - * to every CUDA work submission, so we recommend that you carefully tune - * this initial base memory size to closely approximate the amount of - * memory your application will consume. - - Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured - for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. - */ - uint32_t memoryBaseSize[PxCudaBufferMemorySpace::COUNT]; - - /** - * \brief Size of memory pages - * - * The memory manager will dynamically grow and shrink in blocks multiple of - * this page size. Size has to be power of two and bigger than 0. - - Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured - for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. - */ - uint32_t memoryPageSize[PxCudaBufferMemorySpace::COUNT]; - - /** - * \brief Maximum size of memory that the memory manager will allocate - - Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured - for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. - */ - uint32_t maxMemorySize[PxCudaBufferMemorySpace::COUNT]; - - PX_INLINE PxCudaContextManagerDesc() - { - ctx = NULL; - interopMode = PxCudaInteropMode::NO_INTEROP; - graphicsDevice = 0; -#if PX_SUPPORT_GPU_PHYSX - appGUID = NULL; -#endif - for(uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) - { - memoryBaseSize[i] = 0; - memoryPageSize[i] = 2 * 1024*1024; - maxMemorySize[i] = UINT32_MAX; - } - } -}; - - -/** - * \brief Manages memory, thread locks, and task scheduling for a CUDA context - * - * A PxCudaContextManager manages access to a single CUDA context, allowing it to - * be shared between multiple scenes. Memory allocations are dynamic: starting - * with an initial heap size and growing on demand by a configurable page size. - * The context must be acquired from the manager before using any CUDA APIs. - * - * The PxCudaContextManager is based on the CUDA driver API and explictly does not - * support the CUDA runtime API (aka, CUDART). - * - * To enable CUDA use by an APEX scene, a PxCudaContextManager must be created - * (supplying your own CUDA context, or allowing a new context to be allocated - * for you), the PxGpuDispatcher for that context is retrieved via the - * getGpuDispatcher() method, and this is assigned to the TaskManager that is - * given to the scene via its NxApexSceneDesc. - */ -class PxCudaContextManager -{ -public: - /** - * \brief Acquire the CUDA context for the current thread - * - * Acquisitions are allowed to be recursive within a single thread. - * You can acquire the context multiple times so long as you release - * it the same count. - * - * The context must be acquired before using most CUDA functions. - * - * It is not necessary to acquire the CUDA context inside GpuTask - * launch functions, because the PxGpuDispatcher will have already - * acquired the context for its worker thread. However it is not - * harmfull to (re)acquire the context in code that is shared between - * GpuTasks and non-task functions. - */ - virtual void acquireContext() = 0; - - /** - * \brief Release the CUDA context from the current thread - * - * The CUDA context should be released as soon as practically - * possible, to allow other CPU threads (including the - * PxGpuDispatcher) to work efficiently. - */ - virtual void releaseContext() = 0; - - /** - * \brief Return the CUcontext - */ - virtual CUcontext getContext() = 0; - - /** - * \brief Return the PxCudaMemoryManager instance associated with this - * CUDA context - * Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured - * for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig. - */ - virtual PxCudaMemoryManager *getMemoryManager() = 0; - - /** - * \brief Return the PxGpuDispatcher instance associated with this - * CUDA context - */ - virtual class physx::PxGpuDispatcher *getGpuDispatcher() = 0; - - /** - * \brief Context manager has a valid CUDA context - * - * This method should be called after creating a PxCudaContextManager, - * especially if the manager was responsible for allocating its own - * CUDA context (desc.ctx == NULL). If it returns false, there is - * no point in assigning this manager's PxGpuDispatcher to a - * TaskManager as it will be unable to execute GpuTasks. - */ - virtual bool contextIsValid() const = 0; - - /* Query CUDA context and device properties, without acquiring context */ - - virtual bool supportsArchSM10() const = 0; //!< G80 - virtual bool supportsArchSM11() const = 0; //!< G92 - virtual bool supportsArchSM12() const = 0; //!< GT200 - virtual bool supportsArchSM13() const = 0; //!< GT260 - virtual bool supportsArchSM20() const = 0; //!< GF100 - virtual bool supportsArchSM30() const = 0; //!< GK100 - virtual bool supportsArchSM35() const = 0; //!< GK110 - virtual bool supportsArchSM50() const = 0; //!< GM100 - virtual bool supportsArchSM52() const = 0; //!< GM200 - virtual bool supportsArchSM60() const = 0; //!< GP100 - virtual bool isIntegrated() const = 0; //!< true if GPU is an integrated (MCP) part - virtual bool canMapHostMemory() const = 0; //!< true if GPU map host memory to GPU (0-copy) - virtual int getDriverVersion() const = 0; //!< returns cached value of cuGetDriverVersion() - virtual size_t getDeviceTotalMemBytes() const = 0; //!< returns cached value of device memory size - virtual int getMultiprocessorCount() const = 0; //!< returns cache value of SM unit count - virtual unsigned int getClockRate() const = 0; //!< returns cached value of SM clock frequency - virtual int getSharedMemPerBlock() const = 0; //!< returns total amount of shared memory available per block in bytes - virtual int getSharedMemPerMultiprocessor() const = 0; //!< returns total amount of shared memory available per multiprocessor in bytes - virtual unsigned int getMaxThreadsPerBlock() const = 0; //!< returns the maximum number of threads per block - virtual const char *getDeviceName() const = 0; //!< returns device name retrieved from driver - virtual CUdevice getDevice() const = 0; //!< returns device handle retrieved from driver - virtual PxCudaInteropMode::Enum getInteropMode() const = 0; //!< interop mode the context was created with - - virtual void setUsingConcurrentStreams(bool) = 0; //!< turn on/off using concurrent streams for GPU work - virtual bool getUsingConcurrentStreams() const = 0; //!< true if GPU work can run in concurrent streams - /* End query methods that don't require context to be acquired */ - - /** - * \brief Register a rendering resource with CUDA - * - * This function is called to register render resources (allocated - * from OpenGL) with CUDA so that the memory may be shared - * between the two systems. This is only required for render - * resources that are designed for interop use. In APEX, each - * render resource descriptor that could support interop has a - * 'registerInCUDA' boolean variable. - * - * The function must be called again any time your graphics device - * is reset, to re-register the resource. - * - * Returns true if the registration succeeded. A registered - * resource must be unregistered before it can be released. - * - * \param resource [OUT] the handle to the resource that can be used with CUDA - * \param buffer [IN] GLuint buffer index to be mapped to cuda - * \param flags [IN] cuda interop registration flags - */ - virtual bool registerResourceInCudaGL(CUgraphicsResource &resource, uint32_t buffer, PxCudaInteropRegisterFlags flags = PxCudaInteropRegisterFlags()) = 0; - - /** - * \brief Register a rendering resource with CUDA - * - * This function is called to register render resources (allocated - * from Direct3D) with CUDA so that the memory may be shared - * between the two systems. This is only required for render - * resources that are designed for interop use. In APEX, each - * render resource descriptor that could support interop has a - * 'registerInCUDA' boolean variable. - * - * The function must be called again any time your graphics device - * is reset, to re-register the resource. - * - * Returns true if the registration succeeded. A registered - * resource must be unregistered before it can be released. - * - * \param resource [OUT] the handle to the resource that can be used with CUDA - * \param resourcePointer [IN] A pointer to either IDirect3DResource9, or ID3D10Device, or ID3D11Resource to be registered. - * \param flags [IN] cuda interop registration flags - */ - virtual bool registerResourceInCudaD3D(CUgraphicsResource &resource, void *resourcePointer, PxCudaInteropRegisterFlags flags = PxCudaInteropRegisterFlags()) = 0; - - /** - * \brief Unregister a rendering resource with CUDA - * - * If a render resource was successfully registered with CUDA using - * the registerResourceInCuda***() methods, this function must be called - * to unregister the resource before the it can be released. - */ - virtual bool unregisterResourceInCuda(CUgraphicsResource resource) = 0; - - /** - * \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel - * \note If using CUDA Interop, this will always return false - * \returns 1 if there is a dedicated GPU - * 0 if there is NOT a dedicated GPU - * -1 if the routine is not implemented - */ - virtual int usingDedicatedGPU() const = 0; - - /** - * \brief Release the PxCudaContextManager - * - * When the manager instance is released, it also releases its - * PxGpuDispatcher instance and PxCudaMemoryManager. Before the memory - * manager is released, it frees all allocated memory pages. If the - * PxCudaContextManager created the CUDA context it was responsible - * for, it also frees that context. - * - * Do not release the PxCudaContextManager if there are any scenes - * using its PxGpuDispatcher. Those scenes must be released first - * since there is no safe way to remove a PxGpuDispatcher from a - * TaskManager once the TaskManager has been given to a scene. - * - */ - virtual void release() = 0; - -protected: - - /** - * \brief protected destructor, use release() method - */ - virtual ~PxCudaContextManager() {} -}; - -/** - * \brief Convenience class for holding CUDA lock within a scope - */ -class PxScopedCudaLock -{ -public: - /** - * \brief ScopedCudaLock constructor - */ - PxScopedCudaLock(PxCudaContextManager& ctx) : mCtx(&ctx) - { - mCtx->acquireContext(); - } - - /** - * \brief ScopedCudaLock destructor - */ - ~PxScopedCudaLock() - { - mCtx->releaseContext(); - } - -protected: - - /** - * \brief CUDA context manager pointer (initialized in the constructor) - */ - PxCudaContextManager* mCtx; -}; - -} // end physx namespace - -#endif // PX_SUPPORT_GPU_PHYSX -#endif // PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H diff --git a/PxShared/include/cudamanager/PxCudaMemoryManager.h b/PxShared/include/cudamanager/PxCudaMemoryManager.h deleted file mode 100644 index c89b20d..0000000 --- a/PxShared/include/cudamanager/PxCudaMemoryManager.h +++ /dev/null @@ -1,281 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef PXCUDACONTEXTMANAGER_PXCUDAMEMORYMANAGER_H -#define PXCUDACONTEXTMANAGER_PXCUDAMEMORYMANAGER_H - -#include "foundation/PxPreprocessor.h" - -#if PX_SUPPORT_GPU_PHYSX - -#include "task/PxTaskDefine.h" - -// some macros to keep the source code more readable -#define PX_ALLOC_INFO(name, ID) __FILE__, __LINE__, name, physx::PxAllocId::ID -#define PX_ALLOC_INFO_PARAMS_DECL(p0, p1, p2, p3) const char* file = p0, int line = p1, const char* allocName = p2, physx::PxAllocId::Enum allocId = physx::PxAllocId::p3 -#define PX_ALLOC_INFO_PARAMS_DEF() const char* file, int line, const char* allocName, physx::PxAllocId::Enum allocId -#define PX_ALLOC_INFO_PARAMS_INPUT() file, line, allocName, allocId -#define PX_ALLOC_INFO_PARAMS_INPUT_INFO(info) info.getFileName(), info.getLine(), info.getAllocName(), info.getAllocId() - -#ifndef NULL // don't want to include <string.h> -#define NULL 0 -#endif - -namespace physx -{ - -PX_PUSH_PACK_DEFAULT - -/** \brief ID of the Feature which owns/allocated memory from the heap - * - * Maximum of 64k IDs allowed. - */ -struct PxAllocId -{ - /** - * \brief ID of the Feature which owns/allocated memory from the heap - */ - enum Enum - { - UNASSIGNED, //!< default - APEX, //!< APEX stuff not further classified - PARTICLES, //!< all particle related - GPU_UTIL, //!< e.g. RadixSort (used in SPH and deformable self collision) - CLOTH, //!< all cloth related - NUM_IDS //!< number of IDs, be aware that ApexHeapStats contains PxAllocIdStats[NUM_IDS] - }; -}; - -/// \brief memory type managed by a heap -struct PxCudaBufferMemorySpace -{ - /** - * \brief memory type managed by a heap - */ - enum Enum - { - T_GPU, - T_PINNED_HOST, - T_WRITE_COMBINED, - T_HOST, - COUNT - }; -}; - -/// \brief class to track allocation statistics, see PxgMirrored -class PxAllocInfo -{ -public: - /** - * \brief AllocInfo default constructor - */ - PxAllocInfo() {} - - /** - * \brief AllocInfo constructor that initializes all of the members - */ - PxAllocInfo(const char* file, int line, const char* allocName, PxAllocId::Enum allocId) - : mFileName(file) - , mLine(line) - , mAllocName(allocName) - , mAllocId(allocId) - {} - - /// \brief get the allocation file name - inline const char* getFileName() const - { - return mFileName; - } - - /// \brief get the allocation line - inline int getLine() const - { - return mLine; - } - - /// \brief get the allocation name - inline const char* getAllocName() const - { - return mAllocName; - } - - /// \brief get the allocation ID - inline PxAllocId::Enum getAllocId() const - { - return mAllocId; - } - -private: - const char* mFileName; - int mLine; - const char* mAllocName; - PxAllocId::Enum mAllocId; -}; - -/// \brief statistics collected per AllocationId by HeapManager. -struct PxAllocIdStats -{ - size_t size; //!< currently allocated memory by this ID - size_t maxSize; //!< max allocated memory by this ID - size_t elements; //!< number of current allocations by this ID - size_t maxElements; //!< max number of allocations by this ID -}; - -class PxCudaMemoryManager; -typedef size_t PxCudaBufferPtr; - -/// \brief Hint flag to tell how the buffer will be used -struct PxCudaBufferFlags -{ -/// \brief Enumerations for the hint flag to tell how the buffer will be used - enum Enum - { - F_READ = (1 << 0), - F_WRITE = (1 << 1), - F_READ_WRITE = F_READ | F_WRITE - }; -}; - - -/// \brief Memory statistics struct returned by CudaMemMgr::getStats() -struct PxCudaMemoryManagerStats -{ - - size_t heapSize; //!< Size of all pages allocated for this memory type (allocated + free). - size_t totalAllocated; //!< Size occupied by the current allocations. - size_t maxAllocated; //!< High water mark of allocations since the SDK was created. - PxAllocIdStats allocIdStats[PxAllocId::NUM_IDS]; //!< Stats for each allocation ID, see PxAllocIdStats -}; - - -/// \brief Buffer type: made of hint flags and the memory space (Device Memory, Pinned Host Memory, ...) -struct PxCudaBufferType -{ - /// \brief PxCudaBufferType copy constructor - PX_INLINE PxCudaBufferType(const PxCudaBufferType& t) - : memorySpace(t.memorySpace) - , flags(t.flags) - {} - - /// \brief PxCudaBufferType constructor to explicitely assign members - PX_INLINE PxCudaBufferType(PxCudaBufferMemorySpace::Enum _memSpace, PxCudaBufferFlags::Enum _flags) - : memorySpace(_memSpace) - , flags(_flags) - {} - - PxCudaBufferMemorySpace::Enum memorySpace; //!< specifies which memory space for the buffer - PxCudaBufferFlags::Enum flags; //!< specifies the usage flags for the buffer -}; - - -/// \brief Buffer which keeps informations about allocated piece of memory. -class PxCudaBuffer -{ -public: - /// Retrieves the manager over which the buffer was allocated. - virtual PxCudaMemoryManager* getCudaMemoryManager() const = 0; - - /// Releases the buffer and the memory it used, returns true if successful. - virtual bool free() = 0; - - /// Realloc memory. Use to shrink or resize the allocated chunk of memory of this buffer. - /// Returns true if successful. Fails if the operation would change the address and need a memcopy. - /// In that case the user has to allocate, copy and free the memory with separate steps. - /// Realloc to size 0 always returns false and doesn't change the state. - virtual bool realloc(size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0; - - /// Returns the type of the allocated memory. - virtual const PxCudaBufferType& getType() const = 0; - - /// Returns the pointer to the allocated memory. - virtual PxCudaBufferPtr getPtr() const = 0; - - /// Returns the size of the allocated memory. - virtual size_t getSize() const = 0; - -protected: - /// \brief protected destructor - virtual ~PxCudaBuffer() {} -}; - - -/// \brief Allocator class for different kinds of CUDA related memory. -class PxCudaMemoryManager -{ -public: - /// Allocate memory of given type and size. Returns a CudaBuffer if successful. Returns NULL if failed. - virtual PxCudaBuffer* alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0; - - /// Basic heap allocator without PxCudaBuffer - virtual PxCudaBufferPtr alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0; - - /// Basic heap deallocator without PxCudaBuffer - virtual bool free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr) = 0; - - /// Basic heap realloc without PxCudaBuffer - virtual bool realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0; - - /// Retrieve stats for the memory of given type. See PxCudaMemoryManagerStats. - virtual void getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats) = 0; - - /// Ensure that a given amount of free memory is available. Triggers CUDA allocations in size of (2^n * pageSize) if necessary. - /// Returns false if page allocations failed. - virtual bool reserve(const PxCudaBufferType& type, size_t size) = 0; - - /// Set the page size. The managed memory grows by blocks 2^n * pageSize. Page allocations trigger CUDA driver allocations, - /// so the page size should be reasonably big. Returns false if input size was invalid, i.e. not power of two. - /// Default is 2 MB. - virtual bool setPageSize(const PxCudaBufferType& type, size_t size) = 0; - - /// Set the upper limit until which pages of a given memory type can be allocated. - /// Reducing the max when it is already hit does not shrink the memory until it is deallocated by releasing the buffers which own the memory. - virtual bool setMaxMemorySize(const PxCudaBufferType& type, size_t size) = 0; - - /// Returns the base size. The base memory block stays persistently allocated over the SDKs life time. - virtual size_t getBaseSize(const PxCudaBufferType& type) = 0; - - /// Returns the currently set page size. The memory grows and shrinks in blocks of size (2^n pageSize) - virtual size_t getPageSize(const PxCudaBufferType& type) = 0; - - /// Returns the upper limit until which the manager is allowed to allocate additional pages from the CUDA driver. - virtual size_t getMaxMemorySize(const PxCudaBufferType& type) = 0; - - /// Get device mapped pinned host mem ptr. Operation only valid for memory space PxCudaBufferMemorySpace::T_PINNED_HOST. - virtual PxCudaBufferPtr getMappedPinnedPtr(PxCudaBufferPtr hostPtr) = 0; - -protected: - /// \brief protected destructor - virtual ~PxCudaMemoryManager() {} -}; - -PX_POP_PACK - - -} // end physx namespace - -#endif // PX_SUPPORT_GPU_PHYSX -#endif // PXCUDACONTEXTMANAGER_PXCUDAMEMORYMANAGER_H diff --git a/PxShared/include/cudamanager/PxGpuCopyDesc.h b/PxShared/include/cudamanager/PxGpuCopyDesc.h deleted file mode 100644 index c6c240f..0000000 --- a/PxShared/include/cudamanager/PxGpuCopyDesc.h +++ /dev/null @@ -1,86 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef PXCUDACONTEXTMANAGER_PXGPUCOPYDESC_H -#define PXCUDACONTEXTMANAGER_PXGPUCOPYDESC_H - -#include "foundation/PxPreprocessor.h" - -#if PX_SUPPORT_GPU_PHYSX - -#include "task/PxTaskDefine.h" - -namespace physx -{ - -PX_PUSH_PACK_DEFAULT - -/** - * \brief Input descriptor for the GpuDispatcher's built-in copy kernel - * - * All host memory involved in copy transactions must be page-locked. - * If more than one descriptor is passed to the copy kernel in one launch, - * the descriptors themselves must be in page-locked memory. - */ -struct PxGpuCopyDesc -{ - /** - * \brief Input descriptor for the GpuDispatcher's built-in copy kernel - */ - enum CopyType - { - HostToDevice, - DeviceToHost, - DeviceToDevice, - DeviceMemset32 - }; - - size_t dest; //!< the destination - size_t source; //!< the source (32bit value when type == DeviceMemset) - size_t bytes; //!< the size in bytes - CopyType type; //!< the memory transaction type - - /** - * \brief Copy is optimally performed as 64bit words, requires 64bit alignment. But it can - * gracefully degrade to 32bit copies if necessary - */ - PX_INLINE bool isValid() - { - bool ok = true; - ok &= ((dest & 0x3) == 0); - ok &= ((type == DeviceMemset32) || (source & 0x3) == 0); - ok &= ((bytes & 0x3) == 0); - return ok; - } -}; - -PX_POP_PACK - -} // end physx namespace - -#endif // PX_SUPPORT_GPU_PHYSX -#endif // PXCUDACONTEXTMANAGER_PXGPUCOPYDESC_H diff --git a/PxShared/include/cudamanager/PxGpuCopyDescQueue.h b/PxShared/include/cudamanager/PxGpuCopyDescQueue.h deleted file mode 100644 index 4b6d58e..0000000 --- a/PxShared/include/cudamanager/PxGpuCopyDescQueue.h +++ /dev/null @@ -1,149 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef PXCUDACONTEXTMANAGER_PXGPUCOPYDESCQUEUE_H -#define PXCUDACONTEXTMANAGER_PXGPUCOPYDESCQUEUE_H - -#include "foundation/PxPreprocessor.h" - -#if PX_SUPPORT_GPU_PHYSX - -#include "foundation/PxAssert.h" -#include "task/PxTaskDefine.h" -#include "task/PxGpuDispatcher.h" -#include "cudamanager/PxGpuCopyDesc.h" -#include "cudamanager/PxCudaContextManager.h" - -/* forward decl to avoid including <cuda.h> */ -typedef struct CUstream_st* CUstream; - -namespace physx -{ - -PX_PUSH_PACK_DEFAULT - -/// \brief Container class for queueing PxGpuCopyDesc instances in pinned (non-pageable) CPU memory -class PxGpuCopyDescQueue -{ -public: - /// \brief PxGpuCopyDescQueue constructor - PxGpuCopyDescQueue(PxGpuDispatcher& d) - : mDispatcher(d) - , mBuffer(0) - , mStream(0) - , mReserved(0) - , mOccupancy(0) - , mFlushed(0) - { - } - - /// \brief PxGpuCopyDescQueue destructor - ~PxGpuCopyDescQueue() - { - if (mBuffer) - { - mDispatcher.getCudaContextManager()->getMemoryManager()->free(PxCudaBufferMemorySpace::T_PINNED_HOST, (size_t) mBuffer); - } - } - - /// \brief Reset the enqueued copy descriptor list - /// - /// Must be called at least once before any copies are enqueued, and each time the launched - /// copies are known to have been completed. The recommended use case is to call this at the - /// start of each simulation step. - void reset(CUstream stream, uint32_t reserveSize) - { - if (reserveSize > mReserved) - { - if (mBuffer) - { - mDispatcher.getCudaContextManager()->getMemoryManager()->free( - PxCudaBufferMemorySpace::T_PINNED_HOST, - (size_t) mBuffer); - mReserved = 0; - } - mBuffer = (PxGpuCopyDesc*) mDispatcher.getCudaContextManager()->getMemoryManager()->alloc( - PxCudaBufferMemorySpace::T_PINNED_HOST, - reserveSize * sizeof(PxGpuCopyDesc), - PX_ALLOC_INFO("PxGpuCopyDescQueue", GPU_UTIL)); - if (mBuffer) - { - mReserved = reserveSize; - } - } - - mOccupancy = 0; - mFlushed = 0; - mStream = stream; - } - - /// \brief Enqueue the specified copy descriptor, or launch immediately if no room is available - void enqueue(PxGpuCopyDesc& desc) - { - PX_ASSERT(desc.isValid()); - if (desc.bytes == 0) - { - return; - } - - if (mOccupancy < mReserved) - { - mBuffer[ mOccupancy++ ] = desc; - } - else - { - mDispatcher.launchCopyKernel(&desc, 1, mStream); - } - } - - /// \brief Launch all copies queued since the last flush or reset - void flushEnqueued() - { - if (mOccupancy > mFlushed) - { - mDispatcher.launchCopyKernel(mBuffer + mFlushed, mOccupancy - mFlushed, mStream); - mFlushed = mOccupancy; - } - } - -private: - PxGpuDispatcher& mDispatcher; - PxGpuCopyDesc* mBuffer; - CUstream mStream; - uint32_t mReserved; - uint32_t mOccupancy; - uint32_t mFlushed; - - void operator=(const PxGpuCopyDescQueue&); // prevent a warning... -}; - -PX_POP_PACK - -} // end physx namespace - -#endif // PX_SUPPORT_GPU_PHYSX -#endif // PXCUDACONTEXTMANAGER_PXGPUCOPYDESCQUEUE_H |