diff options
Diffstat (limited to 'PxShared/src/cudamanager')
18 files changed, 0 insertions, 6524 deletions
diff --git a/PxShared/src/cudamanager/include/CudaContextManager.h b/PxShared/src/cudamanager/include/CudaContextManager.h deleted file mode 100644 index 3d68f82..0000000 --- a/PxShared/src/cudamanager/include/CudaContextManager.h +++ /dev/null @@ -1,51 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H -#define PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H - -#include "task/PxTaskDefine.h" - -#if PX_SUPPORT_GPU_PHYSX - -namespace physx -{ - -class PxCudaContextManager; -class PxCudaContextManagerDesc; -class PxErrorCallback; - -/** -Creates cuda context manager for PhysX and APEX. -*/ -PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback); - -} - -#endif - -#endif // PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H diff --git a/PxShared/src/cudamanager/include/CudaKernelWrangler.h b/PxShared/src/cudamanager/include/CudaKernelWrangler.h deleted file mode 100644 index 36a2cc8..0000000 --- a/PxShared/src/cudamanager/include/CudaKernelWrangler.h +++ /dev/null @@ -1,331 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef __CUDA_KERNEL_WRANGLER__ -#define __CUDA_KERNEL_WRANGLER__ - -// Make this header is safe for inclusion in headers that are shared with device code. -#if !defined(__CUDACC__) - -#include "task/PxTaskDefine.h" -#include "task/PxGpuDispatcher.h" - -#include "PsUserAllocated.h" -#include "PsArray.h" - -#include <cuda.h> - -namespace physx -{ - -class KernelWrangler : public shdfnd::UserAllocated -{ - PX_NOCOPY(KernelWrangler) -public: - KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs); - ~KernelWrangler(); - - CUfunction getCuFunction(uint16_t funcIndex) const - { - return mCuFunctions[ funcIndex ]; - } - - CUmodule getCuModule(uint16_t funcIndex) const - { - uint16_t modIndex = mCuFuncModIndex[ funcIndex ]; - return mCuModules[ modIndex ]; - } - - static void const* const* getImages(); - static int getNumImages(); - - bool hadError() const { return mError; } - -protected: - bool mError; - shdfnd::Array<CUfunction> mCuFunctions; - shdfnd::Array<uint16_t> mCuFuncModIndex; - shdfnd::Array<CUmodule> mCuModules; - PxGpuDispatcher& mGpuDispatcher; - PxErrorCallback& mErrorCallback; -}; - -/* SJB - These were "borrowed" from an Ignacio Llamas email to devtech-compute. - * If we feel this is too clumsy, we can steal the boost based bits from APEX - */ - -class ExplicitCudaFlush -{ -public: - ExplicitCudaFlush(int cudaFlushCount) : mCudaFlushCount(cudaFlushCount), mDefaultCudaFlushCount(mCudaFlushCount) {} - ~ExplicitCudaFlush() {} - - void setCudaFlushCount(int value) { mCudaFlushCount = mDefaultCudaFlushCount = value; } - unsigned int getCudaFlushCount() const { return (unsigned int)mCudaFlushCount; } - void resetCudaFlushCount() { mCudaFlushCount = mDefaultCudaFlushCount; } - - void decrementFlushCount() - { - if (mCudaFlushCount == 0) return; - - if (--mCudaFlushCount == 0) - { - CUresult ret = cuStreamQuery(0); // flushes current push buffer - PX_UNUSED(ret); - PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY); - - // For current implementation, disable resetting of cuda flush count - // reset cuda flush count - // mCudaFlushCount = mDefaultCudaFlushCount; - } - } - -private: - int mCudaFlushCount; - int mDefaultCudaFlushCount; -}; - -} - -template <typename T0> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0) -{ - void* kernelParams[] = - { - &v0, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1) -{ - void* kernelParams[] = - { - &v0, &v1, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, typename T12> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, typename T12, typename T13> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15, - typename T16> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15, T16 v16) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15, - typename T16, typename T17> -PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, - T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, - T13 v13, T14 v14, T15 v15, T16 v16, T17 v17) -{ - void* kernelParams[] = - { - &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, &v17, - }; - return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); -} - -#endif - -#endif diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h deleted file mode 100644 index 10c412f..0000000 --- a/PxShared/src/cudamanager/include/GpuDispatcher.h +++ /dev/null @@ -1,332 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef PXTASK_GPUDISPATCHER_H -#define PXTASK_GPUDISPATCHER_H - -#include "task/PxTask.h" -#include "task/PxTaskDefine.h" -#include "task/PxGpuTask.h" -#include "task/PxTaskManager.h" -#include "task/PxGpuDispatcher.h" -#include "foundation/PxProfiler.h" - -#include "PsUserAllocated.h" -#include "PsThread.h" -#include "PsAtomic.h" -#include "PsMutex.h" -#include "PsSync.h" -#include "PsArray.h" - -#include <cuda.h> - -namespace physx { - -typedef uint16_t EventID; - -void releaseGpuDispatcher(PxGpuDispatcher&); - -class KernelWrangler; -class BlockingWaitThread; -class FanoutTask; -class LaunchTask; -class BlockTask; -class PxGpuWorkerThread; - -class GpuDispatcherImpl : public PxGpuDispatcher, public shdfnd::UserAllocated -{ -public: - GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx); - virtual ~GpuDispatcherImpl(); - - void start(); - void startSimulation(); - void startGroup(); - void submitTask(PxTask& task); - void finishGroup(); - void addCompletionPrereq(PxBaseTask& task); - bool failureDetected() const; - void forceFailureMode(); - void stopSimulation(); - void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream); - - PxBaseTask& getPreLaunchTask(); - void addPreLaunchDependent(PxBaseTask& dependent); - - PxBaseTask& getPostLaunchTask(); - void addPostLaunchDependent(PxBaseTask& dependent); - - PxCudaContextManager* getCudaContextManager(); - - PxGpuWorkerThread* mDispatcher; - BlockingWaitThread* mBlockingThread; - LaunchTask* mLaunchTask; // predecessor of tasks launching kernels - BlockTask* mBlockTask; // continuation of tasks launching kernels - FanoutTask* mSyncTask; // predecessor of tasks waiting for cuda context synchronize -}; - -class JobQueue -{ - PX_NOCOPY(JobQueue) -public: - JobQueue() : taskarray(PX_DEBUG_EXP("PxTask*")) {} - void push(PxTask* t) - { - access.lock(); - taskarray.pushBack(t); - access.unlock(); - } - PxTask* popBack() - { - access.lock(); - PxTask* t = NULL; - if (taskarray.size()) - { - t = taskarray.popBack(); - } - access.unlock(); - return t; - } - uint32_t size() - { - return taskarray.size(); - } - bool empty() - { - return taskarray.size() == 0; - } - -private: - shdfnd::Array<PxTask*> taskarray; - shdfnd::Mutex access; -}; - -class EventPool -{ - PX_NOCOPY(EventPool) -public: - EventPool(uint32_t inflags) : flags(inflags), evarray(PX_DEBUG_EXP("CUevent")) {} - void add(CUevent ev) - { - access.lock(); - evarray.pushBack(ev); - access.unlock(); - } - CUevent get() - { - access.lock(); - CUevent ev; - if (evarray.size()) - { - ev = evarray.popBack(); - } - else - { - cuEventCreate(&ev, flags); - } - access.unlock(); - return ev; - } - bool empty() const - { - return evarray.size() == 0; - } - void clear() - { - access.lock(); - for (uint32_t i = 0; i < evarray.size(); i++) - { - cuEventDestroy(evarray[i]); - } - access.unlock(); - } - -private: - uint32_t flags; - shdfnd::Array<CUevent> evarray; - shdfnd::Mutex access; -}; - -class StreamCache -{ -public: - StreamCache() : sarray(PX_DEBUG_EXP("CUstream")), freeIndices(PX_DEBUG_EXP("freeIndices")) - { - } - CUstream get(uint32_t s) - { - PX_ASSERT(s); - return sarray[ s - 1 ]; - } - void push(uint32_t s) - { - freeIndices.pushBack(s); - } - uint32_t popBack() - { - if (freeIndices.size()) - { - return freeIndices.popBack(); - } - else - { - CUstream s; - cuStreamCreate(&s, 0); - sarray.pushBack(s); - return sarray.size(); - } - } - void reset() - { - freeIndices.resize(sarray.size()); - for (uint32_t i = 0 ; i < sarray.size() ; i++) - { - freeIndices[i] = i + 1; - } - } - bool empty() - { - return freeIndices.size() == 0; - } - -private: - shdfnd::Array<CUstream> sarray; - shdfnd::Array<uint32_t> freeIndices; -}; - -class KernelBar -{ -public: - KernelBar() - { - reset(); - } - void reset() - { - start = 0xffffffff; - stop = 0; - } - - uint32_t start; - uint32_t stop; -}; - -const int SIZE_COMPLETION_RING = 1024; - -struct CudaBatch -{ - CUevent blockingEvent; - CUstream blockingStream; // sync on stream instead of event if lsb is zero (faster) - PxBaseTask* continuationTask; -}; - -struct ReadyTask -{ - PxGpuTask* task; - uint32_t iteration; -}; - -class PxGpuWorkerThread : public shdfnd::Thread -{ - PX_NOCOPY(PxGpuWorkerThread) -public: - PxGpuWorkerThread(); - ~PxGpuWorkerThread(); - - void setCudaContext(PxCudaContextManager& ctx); - - /* API to TaskManager */ - void startSimulation(); - void stopSimulation(); - - /* API to GPU tasks */ - void addCompletionPrereq(PxBaseTask& task); - - /* PxGpuTask execution thread */ - void execute(); - void pollSubmitted(shdfnd::Array<ReadyTask> *ready); - void processActiveTasks(); - void flushBatch(CUevent endEvent, CUstream, PxBaseTask* task); - void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream); - - /* Blocking wait thread */ - void blockingWaitFunc(); - - StreamCache mCachedStreams; - shdfnd::Array<PxBaseTask*> mCompletionTasks; - JobQueue mSubmittedTaskList; - volatile int mActiveGroups; - shdfnd::Sync mInputReady; - shdfnd::Sync mRecordEventQueued; - PxCudaContextManager* mCtxMgr; - bool mNewTasksSubmitted; - bool mFailureDetected; - - bool mUsingConcurrentStreams; - - CudaBatch mCompletionRing[ SIZE_COMPLETION_RING ]; - volatile int mCompletionRingPush; - volatile int mCompletionRingPop; - - EventPool mCachedBlockingEvents; - EventPool mCachedNonBlockingEvents; - - volatile int mCountActiveScenes; - - uint32_t* mSmStartTimes; - uint32_t mSmClockFreq; - - shdfnd::Array<ReadyTask> mReady[ PxGpuTaskHint::NUM_GPU_TASK_HINTS ]; - - KernelWrangler* mUtilKernelWrapper; - - CUevent mStartEvent; - - shdfnd::Mutex mMutex; -}; - -class BlockingWaitThread : public shdfnd::Thread -{ -public: - BlockingWaitThread(PxGpuWorkerThread& worker) : mWorker(worker) {} - ~BlockingWaitThread() {} - - void execute(); - -protected: - PxGpuWorkerThread& mWorker; - -private: - BlockingWaitThread& operator=(const BlockingWaitThread&); -}; - -#define GD_CHECK_CALL(call) { CUresult ret = call; \ - if( CUDA_SUCCESS != ret ) { mFailureDetected=true; PX_ASSERT(!ret); } } - -} - -#endif // PXTASK_GPUDISPATCHER_H diff --git a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h deleted file mode 100644 index 5358915..0000000 --- a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h +++ /dev/null @@ -1,56 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H -#define PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H - -#include "task/PxTaskDefine.h" - -#if PX_SUPPORT_GPU_PHYSX - -namespace physx -{ - class PxErrorCallback; - - /** - Helper functions to expose control panel functionality - */ - class PhysXDeviceSettings - { - private: - PhysXDeviceSettings() {} - - public: - static int getSuggestedCudaDeviceOrdinal(PxErrorCallback& errc); - static int isUsingDedicatedGPU(); - static bool isSLIEnabled(void* graphicsDevice); - }; -} - -#endif - -#endif // PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H diff --git a/PxShared/src/cudamanager/src/BlockingWait.cpp b/PxShared/src/cudamanager/src/BlockingWait.cpp deleted file mode 100644 index 8a2cc44..0000000 --- a/PxShared/src/cudamanager/src/BlockingWait.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#include "task/PxTaskDefine.h" - -#if PX_SUPPORT_GPU_PHYSX - -#include "task/PxTask.h" -#include "task/PxGpuTask.h" -#include "cudamanager/PxCudaContextManager.h" - -#include "PsString.h" - -#if PX_SUPPORT_PXTASK_PROFILING -#include "foundation/PxFoundation.h" -#include "foundation/PxProfiler.h" -#endif - -#include "GpuDispatcher.h" - -using namespace physx; - -/* Blocking thread / GPU Profiling Event Code */ - -void PxGpuWorkerThread::blockingWaitFunc() -{ - mCtxMgr->acquireContext(); - - while (mCompletionRingPop != mCompletionRingPush) - { - CudaBatch& b = mCompletionRing[ mCompletionRingPop ]; - PxBaseTask* t = b.continuationTask; - - if (!b.blockingEvent) - { - PX_ASSERT(b.continuationTask != 0); - - /* No blocking necessary, just allow continuation task to run */ - } - else if (!mFailureDetected) - { -#if PX_SUPPORT_PXTASK_PROFILING - PX_PROFILE_ZONE("GpuDispatcher.BlockingWaitEvent", 0); -#endif - if (1 & ~intptr_t(b.blockingStream)) - { - GD_CHECK_CALL(cuStreamSynchronize(b.blockingStream)); - } - else - { - GD_CHECK_CALL(cuEventSynchronize(b.blockingEvent)); - } - } - - if (b.blockingEvent) - { - mCachedBlockingEvents.add(b.blockingEvent); - } - if (t) - { - t->removeReference(); - } - mCompletionRingPop = (mCompletionRingPop + 1) % SIZE_COMPLETION_RING; - } - - mCtxMgr->releaseContext(); -} - - -/* Blocking wait thread - - All this thread does is block waiting for CUDA Record Events to - be signaled. - */ - -void BlockingWaitThread::execute() -{ - setName("GpuDispatcher.BlockingWait"); - bool running = true; - while (running) - { - mWorker.mRecordEventQueued.wait(); - if (quitIsSignalled()) - { - running = false; - } - - mWorker.mRecordEventQueued.reset(); - mWorker.blockingWaitFunc(); - } - quit(); -} - -#endif - diff --git a/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu b/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu deleted file mode 100644 index 3c73364..0000000 --- a/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu +++ /dev/null @@ -1,164 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#include "cudamanager/PxGpuCopyDesc.h" -#include "foundation/PxSimpleTypes.h" - -#include <cuda.h> - -using namespace physx; - -extern "C" __host__ void initUtilKernels() {} - -extern "C" __global__ -void Saturate( ) -{ - // NOP -} - -__device__ -void performCopy( const physx::PxGpuCopyDesc& desc, uint32_t totalBlocks ) -{ - if( desc.type == physx::PxGpuCopyDesc::DeviceMemset32 ) - { - uint32_t *dest = (uint32_t*) desc.dest; - uint32_t wordCount = desc.bytes >> 2; - size_t word = blockIdx.x * blockDim.x + threadIdx.x; - size_t stride = blockDim.x * totalBlocks; - for( ; word < wordCount ; word += stride ) - dest[ word ] = desc.source; - return; - } - - /* The idea here is to maximize throughput with minimal register and thread counts */ - /* Manually unrolled 4 times, the compiler refuses to do it for me */ - - if( (desc.source & 0x7) != 0 || (desc.dest & 0x7) != 0 || (desc.bytes & 0x7) != 0) - { - /* Input is word aligned */ - - uint32_t *dest = (uint32_t*) desc.dest; - uint32_t *source = (uint32_t*) desc.source; - uint32_t wordCount = desc.bytes >> 2; - size_t word = blockIdx.x * blockDim.x + threadIdx.x; - size_t stride = blockDim.x * totalBlocks; - while( word < wordCount ) - { - uint32_t a0, a1, a2, a3, a4, a5; - a0 = source[ word ]; - if( word + stride < wordCount ) - a1 = source[ word + stride ]; - if( word + stride*2 < wordCount ) - a2 = source[ word + stride*2 ]; - if( word + stride*3 < wordCount ) - a3 = source[ word + stride*3 ]; - if( word + stride*4 < wordCount ) - a4 = source[ word + stride*4 ]; - if( word + stride*5 < wordCount ) - a5 = source[ word + stride*5 ]; - - dest[ word ] = a0; - if( word + stride < wordCount ) - dest[ word + stride ] = a1; - if( word + stride*2 < wordCount ) - dest[ word + stride*2 ] = a2; - if( word + stride*3 < wordCount ) - dest[ word + stride*3 ] = a3; - if( word + stride*4 < wordCount ) - dest[ word + stride*4 ] = a4; - if( word + stride*5 < wordCount ) - dest[ word + stride*5 ] = a5; - - word += stride*6; - } - } - else - { - /* Input is DWord aligned */ - - uint2 *dest = (uint2*) desc.dest; - uint2 *source = (uint2*) desc.source; - uint32_t dwordCount = desc.bytes >> 3; - size_t word = blockIdx.x * blockDim.x + threadIdx.x; - size_t stride = blockDim.x * totalBlocks; - while( word < dwordCount ) - { - uint2 a0, a1, a2, a3, a4, a5; - a0 = source[ word ]; - if( word + stride < dwordCount ) - a1 = source[ word + stride ]; - if( word + stride*2 < dwordCount ) - a2 = source[ word + stride*2 ]; - if( word + stride*3 < dwordCount ) - a3 = source[ word + stride*3 ]; - if( word + stride*4 < dwordCount ) - a4 = source[ word + stride*4 ]; - if( word + stride*5 < dwordCount ) - a5 = source[ word + stride*5 ]; - - dest[ word ] = a0; - if( word + stride < dwordCount ) - dest[ word + stride ] = a1; - if( word + stride*2 < dwordCount ) - dest[ word + stride*2 ] = a2; - if( word + stride*3 < dwordCount ) - dest[ word + stride*3 ] = a3; - if( word + stride*4 < dwordCount ) - dest[ word + stride*4 ] = a4; - if( word + stride*5 < dwordCount ) - dest[ word + stride*5 ] = a5; - - word += stride*6; - } - } - - __threadfence_system(); -} - -extern "C" __global__ -void MemCopyAsync( physx::PxGpuCopyDesc desc ) -{ - performCopy( desc, gridDim.x ); -} - - -extern "C" __global__ -void MemCopyBatchedAsync( physx::PxGpuCopyDesc *desc ) -{ - __shared__ physx::PxGpuCopyDesc sdesc; - - if( threadIdx.x < sizeof(physx::PxGpuCopyDesc) / sizeof(uint32_t) ) - { - uint32_t *dest = (uint32_t*)&sdesc; - uint32_t *source = (uint32_t*)(desc + blockIdx.y); - dest[ threadIdx.x ] = source[ threadIdx.x ]; - __threadfence_block(); - } - __syncthreads(); - - performCopy( sdesc, gridDim.x ); -} diff --git a/PxShared/src/cudamanager/src/CudaContextManager.cpp b/PxShared/src/cudamanager/src/CudaContextManager.cpp deleted file mode 100644 index e05911e..0000000 --- a/PxShared/src/cudamanager/src/CudaContextManager.cpp +++ /dev/null @@ -1,823 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#include "foundation/PxAssert.h" -#include "foundation/PxErrorCallback.h" -#include "foundation/PxMath.h" -#include "foundation/PxPreprocessor.h" - -#include "cudamanager/PxCudaContextManager.h" -#include "task/PxGpuDispatcher.h" - -#include "CudaMemoryManager.h" -#include "GpuDispatcher.h" -#include "PhysXDeviceSettings.h" - -#include "PsMutex.h" -#include "PsThread.h" -#include "PsUserAllocated.h" -#include "PsString.h" - -#include <cuda.h> - -#if PX_WIN32 || PX_WIN64 - -#ifdef PX_SECURE_LOAD_LIBRARY -#include "nvSecureLoadLibrary.h" -#endif - -#pragma warning (push) -#pragma warning (disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' -#include <windows.h> -#pragma warning (pop) - -class IDirect3DDevice9; -class IDirect3DResource9; -class IDirect3DVertexBuffer9; -#include <cudad3d9.h> - -class IDXGIAdapter; -class ID3D10Device; -class ID3D10Resource; -#include <cudad3d10.h> - -struct ID3D11Device; -struct ID3D11Resource; -#include <cudad3d11.h> - -#endif // PX_WINDOWS_FAMILY - -#if PX_LINUX -#include <dlfcn.h> -static void* GetProcAddress(void* handle, const char* name) { return dlsym(handle, name); } -#endif - -#include <GL/gl.h> -#include <cudaGL.h> -#include <assert.h> - -#include "foundation/PxErrors.h" -#include "foundation/PxErrorCallback.h" - -#define CU_INIT_UUID -#include "CudaNode3DLowLatencyInterface.h" - -#define ENABLE_DEVICE_INFO_BRINGUP 0 - -namespace physx -{ - -#if PX_VC -#pragma warning(disable: 4191) //'operator/operation' : unsafe conversion from 'type of expression' to 'type required' -#endif - -#define MIN_SM_MAJOR_VERSION 2 -#define MIN_SM_MINOR_VERSION 0 - -class CudaCtxMgr : public PxCudaContextManager, public shdfnd::UserAllocated -{ -public: - CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback); - ~CudaCtxMgr(); - - bool safeDelayImport(PxErrorCallback& errorCallback); - void acquireContext(); - void releaseContext(); - - /* All these methods can be called without acquiring the context */ - - PxCudaMemoryManager* getMemoryManager(); - PxGpuDispatcher* getGpuDispatcher(); - - bool contextIsValid() const; - bool supportsArchSM10() const; // G80 - bool supportsArchSM11() const; // G92 - bool supportsArchSM12() const; - bool supportsArchSM13() const; // GT200 - bool supportsArchSM20() const; // GF100 - bool supportsArchSM30() const; // GK100 - bool supportsArchSM35() const; // GK110 - bool supportsArchSM50() const; // GM100 - bool supportsArchSM52() const; // GM200 - bool supportsArchSM60() const; // GP100 - bool isIntegrated() const; // true if GPU is integrated (MCP) part - bool canMapHostMemory() const; // true if GPU map host memory to GPU - int getDriverVersion() const; - size_t getDeviceTotalMemBytes() const; - int getMultiprocessorCount() const; - int getSharedMemPerBlock() const; - int getSharedMemPerMultiprocessor() const; - unsigned int getMaxThreadsPerBlock() const; - unsigned int getClockRate() const; - - const char* getDeviceName() const; - CUdevice getDevice() const; - const CUdevprop* getDeviceProperties() const; - - PxCudaInteropMode::Enum getInteropMode() const; - - void setUsingConcurrentStreams(bool); - bool getUsingConcurrentStreams() const; - - bool registerResourceInCudaD3D(CUgraphicsResource& resource, void* resourcePointer, PxCudaInteropRegisterFlags flags); - bool registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags); - bool unregisterResourceInCuda(CUgraphicsResource resource); - - /* - \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel - \returns 1 if there is a dedicated PhysX GPU - \returns 0 if there is NOT a dedicated PhysX GPU - \returns -1 if the routine is not implemented - */ - int usingDedicatedGPU() const; - - void release(); - - CUcontext getContext() { return mCtx; } - -private: - - int mSceneCount; - bool mIsValid; - bool mOwnContext; - CUdevice mDevHandle; - CUcontext mCtx; - CudaMemMgr* mMemMgr; - - GpuDispatcherImpl* mDispatcher; - CUetblPhysXInterface* m_physXInterface; - - /* Cached device attributes, so threads can query w/o context */ - int mComputeCapMajor; - int mComputeCapMinor; - int mIsIntegrated; - int mCanMapHost; - int mDriverVersion; - size_t mTotalMemBytes; - int mMultiprocessorCount; - int mMaxThreadsPerBlock; - char mDeviceName[128]; - int mSharedMemPerBlock; - int mSharedMemPerMultiprocessor; - int mClockRate; - PxCudaInteropMode::Enum mInteropMode; - bool mUsingConcurrentStreams; - -#if PX_DEBUG - static uint32_t mManagerRefCount; - static uint32_t mContextRefCountTls; -#endif -}; - -#if PX_DEBUG -uint32_t CudaCtxMgr::mManagerRefCount = 0; -uint32_t CudaCtxMgr::mContextRefCountTls = 0; -#endif - -bool CudaCtxMgr::contextIsValid() const -{ - return mIsValid; -} -bool CudaCtxMgr::supportsArchSM10() const -{ - return mIsValid; -} -bool CudaCtxMgr::supportsArchSM11() const -{ - return mIsValid && (mComputeCapMinor >= 1 || mComputeCapMajor > 1); -} -bool CudaCtxMgr::supportsArchSM12() const -{ - return mIsValid && (mComputeCapMinor >= 2 || mComputeCapMajor > 1); -} -bool CudaCtxMgr::supportsArchSM13() const -{ - return mIsValid && (mComputeCapMinor >= 3 || mComputeCapMajor > 1); -} -bool CudaCtxMgr::supportsArchSM20() const -{ - return mIsValid && mComputeCapMajor >= 2; -} -bool CudaCtxMgr::supportsArchSM30() const -{ - return mIsValid && mComputeCapMajor >= 3; -} -bool CudaCtxMgr::supportsArchSM35() const -{ - return mIsValid && ((mComputeCapMajor > 3) || (mComputeCapMajor == 3 && mComputeCapMinor >= 5)); -} -bool CudaCtxMgr::supportsArchSM50() const -{ - return mIsValid && mComputeCapMajor >= 5; -} -bool CudaCtxMgr::supportsArchSM52() const -{ - return mIsValid && ((mComputeCapMajor > 5) || (mComputeCapMajor == 5 && mComputeCapMinor >= 2)); -} -bool CudaCtxMgr::supportsArchSM60() const -{ - return mIsValid && mComputeCapMajor >= 6; -} - -bool CudaCtxMgr::isIntegrated() const -{ - return mIsValid && mIsIntegrated; -} -bool CudaCtxMgr::canMapHostMemory() const -{ - return mIsValid && mCanMapHost; -} -int CudaCtxMgr::getDriverVersion() const -{ - return mDriverVersion; -} -size_t CudaCtxMgr::getDeviceTotalMemBytes() const -{ - return mTotalMemBytes; -} -int CudaCtxMgr::getMultiprocessorCount() const -{ - return mMultiprocessorCount; -} -int CudaCtxMgr::getSharedMemPerBlock() const -{ - return mSharedMemPerBlock; -} -int CudaCtxMgr::getSharedMemPerMultiprocessor() const -{ - return mSharedMemPerMultiprocessor; -} -unsigned int CudaCtxMgr::getMaxThreadsPerBlock() const -{ - return (unsigned int)mMaxThreadsPerBlock; -} -unsigned int CudaCtxMgr::getClockRate() const -{ - return (unsigned int)mClockRate; -} - -const char* CudaCtxMgr::getDeviceName() const -{ - if (mIsValid) - { - return mDeviceName; - } - else - { - return "Invalid"; - } -} - -CUdevice CudaCtxMgr::getDevice() const -{ - if (mIsValid) - { - return mDevHandle; - } - else - { - return -1; - } -} - -PxCudaInteropMode::Enum CudaCtxMgr::getInteropMode() const -{ - return mInteropMode; -} - -void CudaCtxMgr::setUsingConcurrentStreams(bool value) -{ - mUsingConcurrentStreams = value; -} - -bool CudaCtxMgr::getUsingConcurrentStreams() const -{ - return mUsingConcurrentStreams; -} - -PxCudaMemoryManager* CudaCtxMgr::getMemoryManager() -{ - if (mIsValid) - { - return mMemMgr; - } - else - { - return NULL; - } -} - -PxGpuDispatcher* CudaCtxMgr::getGpuDispatcher() -{ - if (mIsValid) - { - return mDispatcher; - } - else - { - return NULL; - } -} - -int CudaCtxMgr::usingDedicatedGPU() const -{ - if (PxCudaInteropMode::NO_INTEROP == getInteropMode()) - { - return PhysXDeviceSettings::isUsingDedicatedGPU(); - } - else - { - return 0; // not a dedicated GPU - } -} - -#define CUT_SAFE_CALL(call) { CUresult ret = call; \ - if( CUDA_SUCCESS != ret ) { PX_ASSERT(0); } } - -/* If a context is not provided, an ordinal must be given */ -CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback) - : mSceneCount(0) - , mOwnContext(false) - , mMemMgr(0) - , mDispatcher(0) - , m_physXInterface(0) - , mInteropMode(desc.interopMode) - , mUsingConcurrentStreams(true) -{ - CUresult status; - mIsValid = false; - mDeviceName[0] = 0; - - if (safeDelayImport(errorCallback) == false) - { - // The table where this info is found is here: https://wiki.nvidia.com/nvcompute/index.php/NVCompute#CUDA_Planning - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "NVIDIA Release 331 graphics driver and above is required for GPU acceleration.", __FILE__, __LINE__); - return; - } - - if (desc.ctx == 0) - { - int flags = CU_CTX_LMEM_RESIZE_TO_MAX | CU_CTX_SCHED_BLOCKING_SYNC | CU_CTX_MAP_HOST; - class FoundationErrorReporter : public PxErrorCallback - { - public: - FoundationErrorReporter(PxErrorCallback& ec) - : errorCallback(&ec) - { - } - - virtual void reportError(PxErrorCode::Enum code, const char* message, const char* file, int line) - { - errorCallback->reportError( code, message, file, line); - } - - PxErrorCallback* errorCallback; - } foundationErrorReporter(errorCallback); - - int devOrdinal = PhysXDeviceSettings::getSuggestedCudaDeviceOrdinal(foundationErrorReporter); - if (devOrdinal < 0) - { - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "No PhysX capable GPU suggested.", __FILE__, __LINE__); - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "If you have a PhysX capable GPU, verify that PhysX is not set to CPU in the NVIDIA Control Panel.", __FILE__, __LINE__); - return; - } - - status = cuInit(0); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuInit failed", __FILE__, __LINE__); - return; - } - - // Try to create the context on Node3DLowLatency. - // If that does not work, try to create the cuda context using cuCtxCreatePhysX, - // since we must be on a driver that does not support cuCtxCreateOnNode3DLowLatency. - cuGetExportTable((const void**)&m_physXInterface, (const CUuuid*)&CU_ETID_PhysXInterface); - - // if using a dedicated GPU or SLI we disable D3D interop (which is not supported over multiple GPUs) - // this ensures the users control panel setting is always respected - bool sliEnabled = false; - if (mInteropMode != PxCudaInteropMode::NO_INTEROP && desc.graphicsDevice != NULL) - { - sliEnabled = PhysXDeviceSettings::isSLIEnabled(desc.graphicsDevice) == 1 ? true : false; - } - - if (PhysXDeviceSettings::isUsingDedicatedGPU() == 1 || sliEnabled) - { - if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP || - mInteropMode == PxCudaInteropMode::D3D11_INTEROP) - { - mInteropMode = PxCudaInteropMode::NO_INTEROP; - if (sliEnabled) - { - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "D3D/CUDA interop cannot be used in tandem with SLI, disabling interop. Query PxCudaContextManager::getInteropMode() for interop status.", - __FILE__,__LINE__); - } - } - } - - if (mInteropMode == PxCudaInteropMode::NO_INTEROP) - { - status = cuDeviceGet(&mDevHandle, devOrdinal); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGet failed",__FILE__,__LINE__); - return; - } - - if (m_physXInterface) - status = m_physXInterface->cuCtxCreateOnNode3DLowLatency(&mCtx, (unsigned int)flags, mDevHandle); - else - status = cuCtxCreate(&mCtx, (unsigned int)flags, mDevHandle); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxCreate failed",__FILE__,__LINE__); - return; - } - mOwnContext = true; - } - else if (mInteropMode == PxCudaInteropMode::OGL_INTEROP) - { - status = cuDeviceGet(&mDevHandle, devOrdinal); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGet failed",__FILE__,__LINE__); - return; - } - - status = cuGLCtxCreate(&mCtx, (unsigned int)flags, mDevHandle); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxGLCreate failed",__FILE__,__LINE__); - return; - } - - status = cuGLInit(); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuGLInit failed",__FILE__,__LINE__); - return; - } - mOwnContext = true; - } -#if PX_WIN32 || PX_WIN64 - else if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP) - { - status = cuD3D10CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, - reinterpret_cast<ID3D10Device*>(desc.graphicsDevice)); - - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D10CtxCreate failed",__FILE__,__LINE__); - return; - } - mOwnContext = true; - } - else if (mInteropMode == PxCudaInteropMode::D3D11_INTEROP) - { - status = cuD3D11CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags, - reinterpret_cast<ID3D11Device*>(desc.graphicsDevice)); - - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D11CtxCreate failed",__FILE__,__LINE__); - return; - } - mOwnContext = true; - } -#endif //PX_WIN32 || PX_WIN64 - else - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Requested interop type is not supported!",__FILE__,__LINE__); - return; - } - } - else - { - mCtx = *desc.ctx; - status = cuCtxGetDevice(&mDevHandle); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxGetDevice failed",__FILE__,__LINE__); - return; - } - } - - // Verify we can at least allocate a CUDA event from this context - CUevent testEvent; - if (CUDA_SUCCESS == cuEventCreate(&testEvent, 0)) - { - cuEventDestroy(testEvent); - } - else - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CUDA context validation failed",__FILE__,__LINE__); - return; - } - - status = cuDeviceGetName(mDeviceName, sizeof(mDeviceName), mDevHandle); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGetName failed",__FILE__,__LINE__); - return; - } - - cuDeviceGetAttribute(&mSharedMemPerBlock, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, mDevHandle); - cuDeviceGetAttribute(&mSharedMemPerMultiprocessor, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, mDevHandle); - cuDeviceGetAttribute(&mClockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, mDevHandle); - cuDeviceGetAttribute(&mComputeCapMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, mDevHandle); - cuDeviceGetAttribute(&mComputeCapMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, mDevHandle); - cuDeviceGetAttribute(&mIsIntegrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, mDevHandle); - cuDeviceGetAttribute(&mCanMapHost, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, mDevHandle); - cuDeviceGetAttribute(&mMultiprocessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, mDevHandle); - cuDeviceGetAttribute(&mMaxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, mDevHandle); - - status = cuDeviceTotalMem((size_t*)&mTotalMemBytes, mDevHandle); - if (CUDA_SUCCESS != status) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceTotalMem failed",__FILE__,__LINE__); - return; - } - - // minimum compute capability is MIN_SM_MAJOR_VERSION.MIN_SM_MINOR_VERSION - if ((mComputeCapMajor < MIN_SM_MAJOR_VERSION) || - (mComputeCapMajor == MIN_SM_MAJOR_VERSION && mComputeCapMinor < MIN_SM_MINOR_VERSION)) - { - char buffer[256]; - physx::shdfnd::snprintf(buffer, 256, "Minimum GPU compute capability %d.%d is required", MIN_SM_MAJOR_VERSION, MIN_SM_MINOR_VERSION); - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING,buffer,__FILE__,__LINE__); - return; - } - - mMemMgr = PX_NEW(CudaMemMgr)(*this, errorCallback); - if (mMemMgr == NULL) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CudaMemMgr failed: Unable to allocate heaps",__FILE__,__LINE__); - return; - } - - bool succ = true; - for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) - { - PxCudaBufferType type(PxCudaBufferMemorySpace::Enum(i), PxCudaBufferFlags::F_READ_WRITE); - succ &= mMemMgr->setBaseSize(type, desc.memoryBaseSize[i]); - succ &= mMemMgr->setPageSize(type, desc.memoryPageSize[i]); - succ &= mMemMgr->setMaxMemorySize(type, desc.maxMemorySize[i]); - PX_ASSERT(succ); - if (!succ) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CudaMemMgr failed: Invalid memory parameter",__FILE__,__LINE__); - return; - } - } - -#if PX_DEBUG - if(!mManagerRefCount++) - mContextRefCountTls = shdfnd::TlsAlloc(); - if(!shdfnd::TlsGet(mContextRefCountTls)) - CUT_SAFE_CALL(cuCtxSetCurrent(0)); -#endif - - mIsValid = true; - mDispatcher = PX_NEW(GpuDispatcherImpl)(errorCallback, *this); - if (!mDispatcher || mDispatcher->failureDetected()) - { - errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Failed to create functional GPU dispatcher",__FILE__,__LINE__); - mIsValid = false; - return; - } - - mDispatcher->start(); - -#if ENABLE_DEVICE_INFO_BRINGUP - // Device info (Enable for Amodel and Emulator testing) - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Device Name: %s", mDeviceName); - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Shared Memory Per Block: %d", mSharedMemPerBlock); - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Shared Memory Per Multiprocessor: %d", mSharedMemPerMultiprocessor); - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Number of SM: %d", mMultiprocessorCount); - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Max Threads Per Block: %d", mMaxThreadsPerBlock); -#endif -} - -/* Some driver version mismatches can cause delay import crashes. Load NVCUDA.dll - * manually, verify its version number, then allow delay importing to bind all the - * APIs. - */ -bool CudaCtxMgr::safeDelayImport(PxErrorCallback& errorCallback) -{ -#if PX_WIN32 || PX_WIN64 -#ifdef PX_SECURE_LOAD_LIBRARY - HMODULE hCudaDriver = nvLoadSystemLibrary("nvcuda.dll"); -#else - HMODULE hCudaDriver = LoadLibrary("nvcuda.dll"); -#endif -#elif PX_LINUX - void* hCudaDriver = dlopen("libcuda.so", RTLD_NOW); -#endif - if (!hCudaDriver) - { - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "nvcuda.dll not found or could not be loaded.", __FILE__, __LINE__); - return false; - } - - typedef CUresult(CUDAAPI * pfnCuDriverGetVersion_t)(int*); - pfnCuDriverGetVersion_t pfnCuDriverGetVersion = (pfnCuDriverGetVersion_t) GetProcAddress(hCudaDriver, "cuDriverGetVersion"); - if (!pfnCuDriverGetVersion) - { - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "cuDriverGetVersion missing in nvcuda.dll.", __FILE__, __LINE__); - return false; - } - - CUresult status = pfnCuDriverGetVersion(&mDriverVersion); - if (status != CUDA_SUCCESS) - { - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Retrieving CUDA driver version failed.", __FILE__, __LINE__); - return false; - } - - /* Let's require a driver version >= to the version we compile against - * Currently, CUDA_VERSION is 6000 or 6.0, but APEX still uses CUDA 5.0 so we can't assert on 6.0 yet. - */ - PX_COMPILE_TIME_ASSERT(5000 <= CUDA_VERSION); - - if (mDriverVersion < CUDA_VERSION) - { - char buffer[256]; - physx::shdfnd::snprintf(buffer, 256, "CUDA driver version is %u, expected at least %u.", mDriverVersion, CUDA_VERSION); - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, buffer, __FILE__,__LINE__); - return false; - } - - /* Now trigger delay import and API binding */ - status = cuDriverGetVersion(&mDriverVersion); - if (status != CUDA_SUCCESS) - { - errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Failed to bind CUDA API.", __FILE__, __LINE__); - return false; - } - - /* Not strictly necessary, but good practice */ -#if PX_WIN32 | PX_WIN64 - FreeLibrary(hCudaDriver); -#elif PX_LINUX - dlclose(hCudaDriver); -#endif - - - return true; -} - -void CudaCtxMgr::release() -{ - delete this; -} - -CudaCtxMgr::~CudaCtxMgr() -{ - if (mDispatcher) - { - releaseGpuDispatcher(*mDispatcher); - } - if (mMemMgr) - { - delete mMemMgr; - } - if (mOwnContext) - { - CUT_SAFE_CALL(cuCtxDestroy(mCtx)); - } - -#if PX_DEBUG - if(!--mManagerRefCount) - shdfnd::TlsFree(mContextRefCountTls); -#endif -} - -bool CudaCtxMgr::registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags) -{ - CUresult ret = CUDA_ERROR_UNKNOWN; - - acquireContext(); - - PX_ASSERT(mInteropMode == PxCudaInteropMode::OGL_INTEROP); - - ret = cuGraphicsGLRegisterBuffer(&resource, (GLuint) buffer, uint32_t(flags)); - - releaseContext(); - - return ret == CUDA_SUCCESS; -} - -bool CudaCtxMgr::registerResourceInCudaD3D(CUgraphicsResource& resource, void* resourcePointer, PxCudaInteropRegisterFlags flags) -{ - CUresult ret = CUDA_ERROR_UNKNOWN; -#if PX_WINDOWS_FAMILY - acquireContext(); - - switch (mInteropMode) - { - case PxCudaInteropMode::D3D10_INTEROP: - ret = cuGraphicsD3D10RegisterResource(&resource, (ID3D10Resource*)resourcePointer, uint32_t(flags)); - break; - case PxCudaInteropMode::D3D11_INTEROP: - ret = cuGraphicsD3D11RegisterResource(&resource, (ID3D11Resource*)resourcePointer, uint32_t(flags)); - break; - case PxCudaInteropMode::NO_INTEROP: - case PxCudaInteropMode::OGL_INTEROP: - case PxCudaInteropMode::COUNT: - default: - PX_ALWAYS_ASSERT_MESSAGE("unexpected state in registerResourceInCuda3D"); - } - - releaseContext(); -#else - PX_UNUSED(resource); - PX_UNUSED(resourcePointer); - PX_UNUSED(flags); -#endif //PX_WINDOWS_FAMILY - return ret == CUDA_SUCCESS; -} - -bool CudaCtxMgr::unregisterResourceInCuda(CUgraphicsResource resource) -{ - CUresult ret = CUDA_ERROR_UNKNOWN; - - acquireContext(); - - ret = cuGraphicsUnregisterResource(resource); - - releaseContext(); - - return ret == CUDA_SUCCESS; -} - -void CudaCtxMgr::acquireContext() -{ - CUcontext ctx = 0; - CUT_SAFE_CALL(cuCtxGetCurrent(&ctx)); - - if (ctx != mCtx) - { -#if PX_DEBUG - PX_ASSERT(!shdfnd::TlsGet(mContextRefCountTls)); -#endif - CUT_SAFE_CALL(cuCtxSetCurrent(mCtx)); - } - -#if PX_DEBUG - char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls); - shdfnd::TlsSet(mContextRefCountTls, ++refCount); -#endif -} - -void CudaCtxMgr::releaseContext() -{ -#if PX_DEBUG - char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls); - shdfnd::TlsSet(mContextRefCountTls, --refCount); - // see DE8475 - if(!refCount) - CUT_SAFE_CALL(cuCtxSetCurrent(0)); -#endif -} - -#if PX_SUPPORT_GPU_PHYSX -extern "C" void initUtilKernels(); - -PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback) -{ - //this call is needed to force UtilKernels linkage in case someone links PxCudaContextManager as Static Library! - initUtilKernels(); - - return PX_NEW(CudaCtxMgr)(desc, errorCallback); -} - -#endif - -} // end physx namespace - - diff --git a/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp b/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp deleted file mode 100644 index 7579d63..0000000 --- a/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp +++ /dev/null @@ -1,242 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#include "task/PxGpuDispatcher.h" -#include "cudamanager/PxCudaContextManager.h" - -#include "foundation/PxAssert.h" -#include "foundation/PxErrorCallback.h" - -#include "PsString.h" - -#include "CudaKernelWrangler.h" - -#include <cuda.h> -#include <texture_types.h> - -/** - * Workaround hacks for using nvcc --compiler output object files - * without linking with CUDART. We must implement our own versions - * of these functions that the object files are hard-coded to call into. - */ - -#define CUT_SAFE_CALL(call) { CUresult ret = call; if( CUDA_SUCCESS != ret ) { PX_ASSERT(!ret); } } - -#define MAX_MODULES 64 // Max number of .cu files you will build -static void* gModuleTable[ MAX_MODULES ]; -static int gNumModules = 0; - -#define MAX_FUNCTIONS 256 // Max number of kernel of entry points -typedef struct -{ - int moduleIndex; - const char* functionName; -} cuFuncDesc; -static cuFuncDesc gFunctionTable[ MAX_FUNCTIONS ]; -static int gNumFunctions = 0; - -using namespace physx::shdfnd; -using namespace physx; - -KernelWrangler::KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs) - : mError(false) - , mCuFunctions(PX_DEBUG_EXP("CuFunctions")) - , mCuModules(PX_DEBUG_EXP("CuModules")) - , mGpuDispatcher(gd) - , mErrorCallback(errorCallback) -{ - PxScopedCudaLock _lock_(*gd.getCudaContextManager()); - - /* Formally load the CUDA modules, get CUmodule handles */ - mCuModules.resize((uint32_t)gNumModules); - for (int i = 0 ; i < gNumModules ; ++i) - { - CUresult ret = cuModuleLoadDataEx(&mCuModules[(uint32_t)i], gModuleTable[i], 0, NULL, NULL); - if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_NO_BINARY_FOR_GPU) - { - mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, "Failed to load CUDA module data.", __FILE__, __LINE__); - mError = true; - return; - } - } - - /* matchup funcNames to CUDA modules, get CUfunction handles */ - mCuFunctions.resize(numFuncs); - mCuFuncModIndex.resize(numFuncs); - for (uint32_t i = 0 ; i < numFuncs ; ++i) - { - for (int j = 0; ; ++j) - { - if(j == gNumFunctions) - { - char buffer[256]; - physx::shdfnd::snprintf(buffer, 256, "Could not find registered CUDA function '%s'.", funcNames[i]); - mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, __FILE__, __LINE__); - mError = true; - return; - } - - if (!physx::shdfnd::strcmp(gFunctionTable[j].functionName, funcNames[i])) - { - mCuFuncModIndex[i] = (uint16_t)gFunctionTable[j].moduleIndex; - CUresult ret = cuModuleGetFunction(&mCuFunctions[i], mCuModules[mCuFuncModIndex[i]], funcNames[i]); - if (ret != CUDA_SUCCESS) - { - char buffer[256]; - physx::shdfnd::snprintf(buffer, 256, "Could not find CUDA module containing function '%s'.", funcNames[i]); - mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, __FILE__, __LINE__); - mError = true; - return; - } - break; - } - } - } -} - -KernelWrangler::~KernelWrangler() -{ - if (mCuModules.size()) - { - PxScopedCudaLock _lock_(*mGpuDispatcher.getCudaContextManager()); - - for (uint32_t i = 0 ; i < mCuModules.size() ; i++) - if(mCuModules[i]) - CUT_SAFE_CALL(cuModuleUnload(mCuModules[i])); - } -} - -void const* const* KernelWrangler::getImages() -{ - return gModuleTable; -} - -int KernelWrangler::getNumImages() -{ - return gNumModules; -} - -/* - * These calls are all made _before_ main() during static initialization - * of this DLL. - */ - -#include <driver_types.h> - -#if PX_WINDOWS_FAMILY -#define CUDARTAPI __stdcall -#endif - -struct uint3; -struct dim3; - -extern "C" -void** CUDARTAPI __cudaRegisterFatBinary(void* fatBin) -{ - //HACK to get real fatbin in CUDA 4.0 - struct CUIfatbinStruct - { - int magic; - int version; - void *fatbinArray; - char *fatbinFile; - }; - const CUIfatbinStruct *fatbinStruct = (const CUIfatbinStruct *)fatBin; - if (fatbinStruct->magic == 0x466243B1) - { - fatBin = fatbinStruct->fatbinArray; - } - - if (gNumModules < MAX_MODULES) - { - gModuleTable[ gNumModules ] = fatBin; - return (void**)(size_t) gNumModules++; - } - return NULL; -} - -extern "C" -void CUDARTAPI __cudaUnregisterFatBinary(void** fatCubinHandle) -{ - gModuleTable[(int)(size_t) fatCubinHandle ] = 0; -} - -extern "C" -void CUDARTAPI __cudaRegisterTexture(void**, const struct textureReference*, const void**, const char*, int, int, int) -{ -} - -extern "C" void CUDARTAPI __cudaRegisterVar(void**, char*, char*, const char*, int, int, int, int) -{ -} - - -extern "C" void CUDARTAPI __cudaRegisterShared(void**, void**) -{ -} - -extern "C" -void CUDARTAPI __cudaRegisterFunction(void** fatCubinHandle, const char*, - char*, const char* deviceName, int, uint3*, uint3*, dim3*, dim3*, int*) -{ - if (gNumFunctions < MAX_FUNCTIONS) - { - // We need this association of function to module in order to find textures and globals - gFunctionTable[ gNumFunctions ].moduleIndex = (int)(size_t) fatCubinHandle; - gFunctionTable[ gNumFunctions ].functionName = deviceName; - gNumFunctions++; - } -} - -/* These functions are implemented just to resolve link dependencies */ - -extern "C" -cudaError_t CUDARTAPI cudaLaunch(const char* entry) -{ - PX_UNUSED(entry); - return cudaSuccess; -} - -extern "C" -cudaError_t CUDARTAPI cudaSetupArgument(const void*, size_t, size_t) -{ - return cudaSuccess; -} - -extern "C" -struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( - int x, int y, int z, int w, enum cudaChannelFormatKind f) -{ - struct cudaChannelFormatDesc desc; - desc.x = x; - desc.y = y; - desc.z = z; - desc.w = w; - desc.f = f; - return desc; -} - diff --git a/PxShared/src/cudamanager/src/CudaMemoryManager.cpp b/PxShared/src/cudamanager/src/CudaMemoryManager.cpp deleted file mode 100644 index b1c6f94..0000000 --- a/PxShared/src/cudamanager/src/CudaMemoryManager.cpp +++ /dev/null @@ -1,649 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#include "cudamanager/PxCudaContextManager.h" -#include "foundation/PxMath.h" -#include "foundation/PxMemory.h" - -#include "CudaMemoryManager.h" -#include "HeapManagerRef.h" - -#include <cuda.h> - -#define DEVICE_BASE_SIZE (0) -#define DEVICE_PAGE_SIZE ( 2 * 1024*1024) -#define PINNED_BASE_SIZE (0) -#define PINNED_PAGE_SIZE ( 2 * 1024*1024) -#define WC_BASE_SIZE (0) -#define WC_PAGE_SIZE ( 2 * 1024*1024) -#define MIN_BLOCK_SIZE 2048 - - -#define CMM_DELETE_SINGLE(x) { if(x) delete x; } -#define CMM_DELETE_ARRAY(x) { if(x) delete [] x; } - -using namespace physx; - -CudaMemMgr::CudaMemMgr(PxCudaContextManager& mgr, physx::PxErrorCallback& errorCallback) - : mErrorCallback(errorCallback) - , mBufferPool("mBufferPool", 1024) - , mInitialized(false) - , mMgr(mgr) - , mDebugDisableAllocs(false) -{ - for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) - { - mHeap[i] = NULL; - mMemoryAllocator[i] = NULL; - mMemoryMaxSize[i] = size_t(-1); - } - - mMemoryBaseSize[PxCudaBufferMemorySpace::T_GPU] = DEVICE_BASE_SIZE; - mMemoryBaseSize[PxCudaBufferMemorySpace::T_PINNED_HOST] = PINNED_BASE_SIZE; - mMemoryBaseSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = WC_BASE_SIZE; - mMemoryBaseSize[PxCudaBufferMemorySpace::T_HOST] = 0; - - mMemoryPageSize[PxCudaBufferMemorySpace::T_GPU] = DEVICE_PAGE_SIZE; - mMemoryPageSize[PxCudaBufferMemorySpace::T_PINNED_HOST] = PINNED_PAGE_SIZE; - mMemoryPageSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = WC_PAGE_SIZE; - mMemoryPageSize[PxCudaBufferMemorySpace::T_HOST] = PINNED_PAGE_SIZE; -} - - -CudaMemMgr::~CudaMemMgr() -{ - for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) - { - CMM_DELETE_SINGLE(mHeap[i]); - CMM_DELETE_SINGLE(mMemoryAllocator[i]); - } -} - - -PX_INLINE bool CudaMemMgr::initialize() -{ - if (mInitialized) - { - return true; - } - - for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) - { - mHeap[i] = PX_NEW(HeapManagerRef)(mErrorCallback, false); - PX_ASSERT(mHeap[i]); - } - - mMemoryAllocator[PxCudaBufferMemorySpace::T_GPU] = PX_NEW(DeviceMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_GPU]); - mMemoryAllocator[PxCudaBufferMemorySpace::T_PINNED_HOST] = PX_NEW(PinnedMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_PINNED_HOST]); - mMemoryAllocator[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = PX_NEW(WriteCombinedMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED]); - mMemoryAllocator[PxCudaBufferMemorySpace::T_HOST] = PX_NEW(HostMemAllocator)(mMemoryMaxSize[PxCudaBufferMemorySpace::T_HOST]); - - bool succ = true; - for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) - { - succ &= mHeap[i]->init(mMemoryAllocator[i], mMemoryBaseSize[i], mMemoryPageSize[i], MIN_BLOCK_SIZE); - PX_ASSERT(succ); - } - - for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) - { - succ &= mHeap[i] && mMemoryAllocator[i]; - } - - if (!succ) - { - for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++) - { - CMM_DELETE_SINGLE(mHeap[i]); - CMM_DELETE_SINGLE(mMemoryAllocator[i]); - } - mInitialized = false; - } - - return mInitialized = succ;; -} - - -bool CudaMemMgr::setPageSize(const PxCudaBufferType& type, size_t size) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (!mInitialized) - { - mMemoryPageSize[type.memorySpace] = PxMax(mMemoryPageSize[type.memorySpace], size); - return true; - } - else - { - bool ret = mHeap[type.memorySpace]->setPageSize(size); - mMemoryPageSize[type.memorySpace] = ret ? size : mMemoryPageSize[type.memorySpace]; - return ret; - } -} - - -bool CudaMemMgr::setBaseSize(const PxCudaBufferType& type, size_t size) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (mInitialized || (((size - 1)&size) != 0)) - { - return false; - } - else - { - mMemoryBaseSize[type.memorySpace] = PxMax(mMemoryBaseSize[type.memorySpace], size); - return true; - } -} - - -size_t CudaMemMgr::getBaseSize(const PxCudaBufferType& type) -{ - return mMemoryBaseSize[type.memorySpace]; -} - - -size_t CudaMemMgr::getPageSize(const PxCudaBufferType& type) -{ - return mMemoryPageSize[type.memorySpace]; -} - - -bool CudaMemMgr::setMaxMemorySize(const PxCudaBufferType& type, size_t size) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (mInitialized) - { - switch (type.memorySpace) - { - case PxCudaBufferMemorySpace::T_GPU: - static_cast<DeviceMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size); - return true; - break; - case PxCudaBufferMemorySpace::T_PINNED_HOST: - static_cast<PinnedMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size); - return true; - break; - case PxCudaBufferMemorySpace::T_HOST: - static_cast<HostMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size); - return true; - break; - case PxCudaBufferMemorySpace::T_WRITE_COMBINED: - static_cast<WriteCombinedMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size); - return true; - break; - case PxCudaBufferMemorySpace::COUNT: - default: - PX_ASSERT(!"unknown memory type"); - break; - } - } - else - { - mMemoryMaxSize[type.memorySpace] = PxMax(mMemoryMaxSize[type.memorySpace], size); - return true; - } - - return false; -} - -size_t CudaMemMgr::getMaxMemorySize(const PxCudaBufferType& type) -{ - if (mInitialized) - { - switch (type.memorySpace) - { - case PxCudaBufferMemorySpace::T_GPU: - return static_cast<DeviceMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize(); - break; - case PxCudaBufferMemorySpace::T_PINNED_HOST: - return static_cast<PinnedMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize(); - break; - case PxCudaBufferMemorySpace::T_HOST: - return static_cast<HostMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize(); - break; - case PxCudaBufferMemorySpace::T_WRITE_COMBINED: - return static_cast<WriteCombinedMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize(); - break; - case PxCudaBufferMemorySpace::COUNT: - default: - PX_ASSERT(!"unknown memory type"); - break; - } - } - return 0; -} - -bool CudaMemMgr::reserve(const PxCudaBufferType& type, size_t size) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (!mInitialized) - { - if (!initialize()) - { - return false; - } - } - - return mHeap[type.memorySpace]->reserve(size); -} - - -PxCudaBuffer* CudaMemMgr::alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - PxCudaBufferPtr addr = alloc(type.memorySpace, size, PX_ALLOC_INFO_PARAMS_INPUT()); - - shdfnd::Mutex::ScopedLock lock(mMutex); - CudaBuffer* buffer = NULL; - if (addr) - { - buffer = mBufferPool.construct(type); - if (buffer) - { - buffer->init(addr, size, *this, PX_ALLOC_INFO_PARAMS_INPUT()); - } - } - return buffer; -} - -PxCudaBufferPtr CudaMemMgr::alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (!mInitialized) - { - if (!initialize()) - { - return 0; - } - } - - if (mDebugDisableAllocs) - { - return 0; - } - - return reinterpret_cast<PxCudaBufferPtr>(mHeap[memorySpace]->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT())); -} - -bool CudaMemMgr::free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (!mInitialized) - { - return false; - } - - if (addr) - { - return mHeap[memorySpace]->free((void*)(addr)); - } - else - { - return false; - } -} - -bool CudaMemMgr::realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (!mInitialized) - { - return false; - } - - if (!size) - { - return false; - } - - if (mDebugDisableAllocs) - { - return NULL; - } - - bool ret = false; - if (addr) - { - ret = mHeap[memorySpace]->realloc((void*)(addr), size, PX_ALLOC_INFO_PARAMS_INPUT()); - } - - return ret; -} - -void CudaMemMgr::getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (!mInitialized) - { - return; - } - - ApexHeapStats hpStats; - mHeap[type.memorySpace]->getStats(hpStats, HeapStatsFlags::F_BASIC_STATS | HeapStatsFlags::F_ALLOC_ID_STATS); - - outStats.heapSize = hpStats.heapSize; - outStats.totalAllocated = hpStats.totalAllocated; - outStats.maxAllocated = hpStats.maxAllocated; - PxMemCopy(outStats.allocIdStats, hpStats.allocIdStats, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS); -} - - -bool CudaMemMgr::free(CudaBuffer& buffer) -{ - PxCudaBufferMemorySpace::Enum memSpace = buffer.getTypeFast().memorySpace; - PxCudaBufferPtr addr = buffer.getPtrFast(); - - { - shdfnd::Mutex::ScopedLock lock(mMutex); - mBufferPool.destroy(&buffer); - } - - return free(memSpace, addr); -} - - -bool CudaMemMgr::realloc(CudaBuffer& buffer, size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - return realloc(buffer.getTypeFast().memorySpace, buffer.getPtrFast(), size, PX_ALLOC_INFO_PARAMS_INPUT()); -} - -PxCudaBufferPtr CudaMemMgr::getMappedPinnedPtr(PxCudaBufferPtr hostPtr) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - void* base = mHeap[PxCudaBufferMemorySpace::T_PINNED_HOST]->findBaseAddress((void*)hostPtr); - if (base) - { - size_t offset = ((PinnedMemAllocator*)mMemoryAllocator[PxCudaBufferMemorySpace::T_PINNED_HOST])->getMappedPinnedOffset(base); - return hostPtr + offset; - } - return 0; -} - -bool CudaBuffer::free() -{ - return mMemManager->free(*this); -} - - -bool CudaBuffer::realloc(size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - return mMemManager->realloc(*this, size, PX_ALLOC_INFO_PARAMS_INPUT()); -} - - -DeviceMemAllocator::DeviceMemAllocator(PxCudaContextManager& mgr, size_t maxSize) - : mMgr(mgr) - , mMaxSize(maxSize) - , mAllocSize(0) -{} - - -DeviceMemAllocator::~DeviceMemAllocator() -{ - PX_ASSERT(mAllocSize == 0); -} - - -void* DeviceMemAllocator::alloc(const size_t size) -{ - if (mAllocSize + size > mMaxSize) - { - return NULL; - } - else - { - PxScopedCudaLock lock(mMgr); - CUdeviceptr dPtr; - CUresult result = cuMemAlloc(&dPtr, uint32_t(size)); - - if (result == CUDA_SUCCESS) - { - mAllocSize += size; - return (void*)(size_t)(dPtr); - } - else - { - PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate device memory."); - return NULL; - } - } -} - - -void DeviceMemAllocator::free(void* addr, const size_t size) -{ - PxScopedCudaLock lock(mMgr); - PX_ASSERT(mAllocSize >= size); - CUresult result = cuMemFree((CUdeviceptr)(size_t)(addr)); - PX_UNUSED(result); - PX_ASSERT(result == CUDA_SUCCESS); - mAllocSize -= size; -} - - -PinnedMemAllocator::PinnedMemAllocator(PxCudaContextManager& mgr, size_t maxSize) - : mMgr(mgr) - , mMaxSize(maxSize) - , mAllocSize(0) -{ -} - - -PinnedMemAllocator::~PinnedMemAllocator() -{ - PX_ASSERT(mAllocSize == 0); -} - - -void* PinnedMemAllocator::alloc(const size_t size) -{ - if (mAllocSize + size > mMaxSize) - { - return NULL; - } - else - { - PxScopedCudaLock lock(mMgr); - void* hPtr; - unsigned int flags = 0; - - if (mMgr.canMapHostMemory()) - { - flags |= CU_MEMHOSTALLOC_DEVICEMAP; - } - CUresult result = cuMemHostAlloc(&hPtr, uint32_t(size), flags); - - if (result == CUDA_SUCCESS) - { - if (hPtr) - { - mAllocSize += size; - } - - if (mMgr.canMapHostMemory()) - { - CUdeviceptr dptr = 0; - cuMemHostGetDevicePointer(&dptr, hPtr, 0); - mMappedPinnedPtrs.insert(hPtr, size_t(dptr)); - } - - return hPtr; - } - else - { - PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate pinned memory."); - return NULL; - } - } -} - - -void PinnedMemAllocator::free(void* addr, const size_t size) -{ - PxScopedCudaLock lock(mMgr); - PX_ASSERT(mAllocSize >= size); - - if (mMgr.canMapHostMemory()) - { - PX_ASSERT(mMappedPinnedPtrs.find(addr)); - mMappedPinnedPtrs.erase(addr); - } - - CUresult result = cuMemFreeHost(addr); - PX_UNUSED(result); - PX_ASSERT(result == CUDA_SUCCESS); - mAllocSize -= size; -} - - -WriteCombinedMemAllocator::WriteCombinedMemAllocator(PxCudaContextManager& mgr, size_t maxSize) - : mMgr(mgr) - , mMaxSize(maxSize) - , mAllocSize(0) -{ - mWcMemSupport = mMgr.getDriverVersion() >= 2020 ? WcMem::SUPPORTED : WcMem::NOT_SUPPORTED; -} - - -WriteCombinedMemAllocator::~WriteCombinedMemAllocator() -{ - PX_ASSERT(mAllocSize == 0); -} - - -bool WriteCombinedMemAllocator::isWcMemSupported() -{ - if (mWcMemSupport == WcMem::SUPPORTED) - { - return true; - } - else - { - PX_ASSERT(mWcMemSupport == WcMem::NOT_SUPPORTED); - return false; - } -} - - -void* WriteCombinedMemAllocator::alloc(const size_t size) -{ - if (mAllocSize + size > mMaxSize) - { - return NULL; - } - else - { - PxScopedCudaLock lock(mMgr); - void* hPtr = NULL; - - unsigned int flags = CU_MEMHOSTALLOC_WRITECOMBINED; - - if (mMgr.canMapHostMemory()) - { - flags |= CU_MEMHOSTALLOC_DEVICEMAP; - } - - bool success = isWcMemSupported() && (cuMemHostAlloc(&hPtr, size, flags) == CUDA_SUCCESS); - if (success) - { - if (hPtr) - { - mAllocSize += size; - } - - return hPtr; - } - else - { - PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate write combined memory."); - return NULL; - } - } -} - - -void WriteCombinedMemAllocator::free(void* addr, const size_t size) -{ - PxScopedCudaLock lock(mMgr); - PX_ASSERT(mAllocSize >= size); - CUresult result = cuMemFreeHost(addr); - PX_ASSERT(result == CUDA_SUCCESS); - PX_UNUSED(result); - mAllocSize -= size; -} - - -HostMemAllocator::HostMemAllocator(size_t maxSize) - : mMaxSize(maxSize) - , mAllocSize(0) -{ -} - - -HostMemAllocator::~HostMemAllocator() -{ - PX_ASSERT(mAllocSize == 0); -} - - -void* HostMemAllocator::alloc(const size_t size) -{ - if (mAllocSize + size > mMaxSize) - { - return NULL; - } - else - { - void* ret = PX_ALLOC(size, "host memory"); - if (ret) - { - mAllocSize += size; - return ret; - } - else - { - PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate host memory."); - return NULL; - } - } -} - - -void HostMemAllocator::free(void* addr, const size_t size) -{ - PX_ASSERT(mAllocSize >= size); - PX_FREE(addr); - mAllocSize -= size; -} - diff --git a/PxShared/src/cudamanager/src/CudaMemoryManager.h b/PxShared/src/cudamanager/src/CudaMemoryManager.h deleted file mode 100644 index 071b4ab..0000000 --- a/PxShared/src/cudamanager/src/CudaMemoryManager.h +++ /dev/null @@ -1,297 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H -#define PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H - -#include "task/PxTaskDefine.h" -#include "HeapManagerInterface.h" - -#include "PsPool.h" -#include "PsMutex.h" -#include "PsUserAllocated.h" -#include "PsHashMap.h" - -namespace physx -{ - -class CudaBuffer; -class HeapManagerInterface; -class CudaMemMgr; -class PxCudaContextManager; - -class CudaBuffer: public PxCudaBuffer -{ -public: - PX_INLINE CudaBuffer(const PxCudaBufferType& type) - : mType(type) - {} - -// Ni Interface - bool free(); - bool realloc(size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - const PxCudaBufferType& getType() const - { - return getTypeFast(); - } - PxCudaBufferPtr getPtr() const - { - return getPtrFast(); - } - size_t getSize() const - { - return getSizeFast(); - } - PxCudaMemoryManager* getCudaMemoryManager() const - { - return getCudaMemoryManagerFast(); - } -// - PX_INLINE const PxCudaBufferType& getTypeFast() const - { - return mType; - } - PX_INLINE PxCudaBufferPtr getPtrFast() const - { - return mPtr; - } - PX_INLINE void setPtr(PxCudaBufferPtr val) - { - mPtr = val; - } - PX_INLINE size_t getSizeFast() const - { - return mSize; - } - PX_INLINE void setSize(size_t val) - { - mSize = val; - } - PX_INLINE PxCudaMemoryManager* getCudaMemoryManagerFast() const - { - return reinterpret_cast<PxCudaMemoryManager*>(mMemManager); - } - PX_INLINE void init(PxCudaBufferPtr ptr, size_t size, CudaMemMgr& manager, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) - { - mPtr = ptr; - mSize = size; - mMemManager = &manager; - mAllocInfo = PxAllocInfo(PX_ALLOC_INFO_PARAMS_INPUT()); - } - - void operator=(const CudaBuffer& in) - { - const_cast<PxCudaBufferType&>(mType) = in.mType; - mPtr = in.mPtr; - mSize = in.mSize; - mMemManager = in.mMemManager; - } - -private: - const PxCudaBufferType mType; - PxCudaBufferPtr mPtr; - size_t mSize; - CudaMemMgr* mMemManager; - PxAllocInfo mAllocInfo; -}; - - -class CudaMemMgr: public PxCudaMemoryManager, public shdfnd::UserAllocated -{ - PX_NOCOPY(CudaMemMgr) -public: - CudaMemMgr(PxCudaContextManager& mMgr, physx::PxErrorCallback& errorCallback); - virtual ~CudaMemMgr(); - - PxCudaBuffer* alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - PxCudaBufferPtr alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - bool free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr); - bool realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - void getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats); - bool reserve(const PxCudaBufferType& type, size_t size); - bool setPageSize(const PxCudaBufferType& type, size_t size); - bool setMaxMemorySize(const PxCudaBufferType& type, size_t size); - size_t getBaseSize(const PxCudaBufferType& type); - size_t getPageSize(const PxCudaBufferType& type); - size_t getMaxMemorySize(const PxCudaBufferType& type); - void debugDisableAllocs() - { - mDebugDisableAllocs = true; - } - PxCudaBufferPtr getMappedPinnedPtr(PxCudaBufferPtr hostPtr); - - // internals - bool free(CudaBuffer& buffer); - bool realloc(CudaBuffer& buffer, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - bool setBaseSize(const PxCudaBufferType& type, size_t size); - -private: - PX_INLINE bool initialize(); - physx::PxErrorCallback& mErrorCallback; - HeapManagerInterface* mHeap[PxCudaBufferMemorySpace::COUNT]; - HeapManagerInterface::Allocator* mMemoryAllocator[PxCudaBufferMemorySpace::COUNT]; - size_t mMemoryBaseSize[PxCudaBufferMemorySpace::COUNT]; - size_t mMemoryPageSize[PxCudaBufferMemorySpace::COUNT]; - size_t mMemoryMaxSize[PxCudaBufferMemorySpace::COUNT]; - shdfnd::Pool<CudaBuffer> mBufferPool; - bool mInitialized; - PxCudaContextManager& mMgr; - shdfnd::Mutex mMutex; - bool mDebugDisableAllocs; -}; - -// TODO, give MemoryAllocator prefix or namespace -class DeviceMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated -{ - PX_NOCOPY(DeviceMemAllocator) -public: - DeviceMemAllocator(PxCudaContextManager& mgr, size_t maxSize); - virtual ~DeviceMemAllocator(); - - virtual void* alloc(const size_t size); - virtual void free(void* addr, const size_t size); - - void setMaxSize(size_t maxSize) - { - mMaxSize = maxSize; - } - size_t getMaxSize() - { - return mMaxSize; - } - -private: - PxCudaContextManager& mMgr; - size_t mMaxSize; - size_t mAllocSize; -}; - - -class PinnedMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated -{ - PX_NOCOPY(PinnedMemAllocator) -public: - PinnedMemAllocator(PxCudaContextManager& mMgr, size_t maxSize); - virtual ~PinnedMemAllocator(); - - virtual void* alloc(const size_t size); - virtual void free(void* addr, const size_t size); - - void setMaxSize(size_t maxSize) - { - mMaxSize = maxSize; - } - size_t getMaxSize() - { - return mMaxSize; - } - - size_t getMappedPinnedOffset(void* base) - { - - PX_ASSERT(base); - const shdfnd::HashMap<void*, size_t>::Entry* entry = mMappedPinnedPtrs.find(base); - PX_ASSERT(entry); - return entry->second - size_t(base); - } - -private: - PxCudaContextManager& mMgr; - size_t mMaxSize; - size_t mAllocSize; - shdfnd::HashMap<void*, size_t> mMappedPinnedPtrs; -}; - - -class HostMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated -{ - PX_NOCOPY(HostMemAllocator) -public: - HostMemAllocator(size_t maxSize); - virtual ~HostMemAllocator(); - - virtual void* alloc(const size_t size); - virtual void free(void* addr, const size_t size); - - void setMaxSize(size_t maxSize) - { - mMaxSize = maxSize; - } - size_t getMaxSize() - { - return mMaxSize; - } - -private: - size_t mMaxSize; - size_t mAllocSize; -}; - - -class WriteCombinedMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated -{ - PX_NOCOPY(WriteCombinedMemAllocator) -public: - WriteCombinedMemAllocator(PxCudaContextManager& mgr, size_t maxSize); - virtual ~WriteCombinedMemAllocator(); - - virtual void* alloc(const size_t size); - virtual void free(void* addr, const size_t size); - - void setMaxSize(size_t maxSize) - { - mMaxSize = maxSize; - } - size_t getMaxSize() - { - return mMaxSize; - } - -private: - struct WcMem - { - enum Enum - { - NOT_CHECKED, - SUPPORTED, - NOT_SUPPORTED - }; - }; - - bool isWcMemSupported(); - -private: - int mCudaOrdinal; - PxCudaContextManager& mMgr; - WcMem::Enum mWcMemSupport; - size_t mMaxSize; - size_t mAllocSize; -}; - -} // end physx namespace - -#endif // PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H diff --git a/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h b/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h deleted file mode 100644 index f20d87a..0000000 --- a/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h +++ /dev/null @@ -1,128 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H -#define PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H - -#include <cuda.h> - -namespace physx -{ -// Utility macros for defining and using UUID values for use with -// the CUDA driver. -// -// CU_INIT_UUID must be #defined in exactly one translation unit -// per linkage unit (i.e. one .c or .cpp file per binary). This -// allows multiple .c and .cpp files to include headers that define -// UUIDs using CU_DEFINE_UUID: The translation unit that #defines -// CU_INIT_UUID will define and initialize the UUIDs, and all other -// translation units will link to that definition. - -// Define helper macro: CU_INIT_EXTERN_CONST -// In C, global consts have external linkage by default. In C++, -// global consts have internal linkage by default, and require the -// "extern" storage class specifier to have external linkage. C++ -// allows using "extern" with initializers, but it is illegal in C. -// Thus, there is no common syntax for C and C++ to declare and -// initialize global constants with external linkage. This macro -// helps reduce duplication of other macros by factoring out the -// C/C++ discrepancy. -#ifdef __cplusplus -#define CU_INIT_EXTERN_CONST extern const -#else -#define CU_INIT_EXTERN_CONST const -#endif - -// Define macro CU_DEFINE_UUID. The parameters are the commonly -// used "int-short-short-char[8]" style, which can be generated by -// Microsoft's guidgen.exe tool, Visual Studio's "Create GUID" -// option in the Tools menu (select style #2), and many web-based -// UUID generator tools. Here's an example of what "Create GUID" -// style #2 generates: -// -// DEFINE_GUID( <<name>>, -// 0x2446054, 0xbb8e, 0x4b2f, 0x8b, 0xfc, 0xa4, 0xfe, 0x44, 0x9, 0x8f, 0xb8); -// -// So to use one of these with CUDA, just change the macro to -// CU_DEFINE_UUID and choose a symbol name. For example: -// -// CU_DEFINE_UUID( MyUuid, -// 0x2446054, 0xbb8e, 0x4b2f, 0x8b, 0xfc, 0xa4, 0xfe, 0x44, 0x9, 0x8f, 0xb8); -// -#if defined(CU_INIT_UUID) -#define CU_CHAR(x) (char)(unsigned char)((x) & 0xff) -// Define the symbol as exportable to other translation units, and -// initialize the value. Inner set of parens is necessary because -// "bytes" array needs parens within the struct initializer, which -// also needs parens. -#define CU_DEFINE_UUID(name, a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ - CU_INIT_EXTERN_CONST CUuuid name = \ - { \ - { \ - CU_CHAR(a), CU_CHAR((a) >> 8), CU_CHAR((a) >> 16), CU_CHAR((a) >> 24), \ - CU_CHAR(b), CU_CHAR((b) >> 8), \ - CU_CHAR(c), CU_CHAR((c) >> 8), \ - CU_CHAR(d0), \ - CU_CHAR(d1), \ - CU_CHAR(d2), \ - CU_CHAR(d3), \ - CU_CHAR(d4), \ - CU_CHAR(d5), \ - CU_CHAR(d6), \ - CU_CHAR(d7) \ - } \ - } -#else -// Declare the symbol to be imported from another translation unit. -#define CU_DEFINE_UUID(name, a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \ - extern const CUuuid name -#endif - -//------------------------------------------------------------------ -// Cuda Private API Interfaces for PhysX -//------------------------------------------------------------------ - -// This provides backdoor interfaces used by PhysX -CU_DEFINE_UUID(CU_ETID_PhysXInterface, 0x8c0ba50c, 0x0410, 0x9a92, 0x89, 0xa7, 0xd0, 0xdf, 0x10, 0xe7, 0x72, 0x86); - -typedef struct CUetblPhysXInterface_st -{ - /* Size of this structure */ - size_t size; - - /* Create a new CUDA context on Node3dLowLatency. - * - will usually it will just call cuCtxCreateOnNode3DLowLatency. - */ - CUresult (CUDAAPI *cuCtxCreateOnNode3DLowLatency)( - CUcontext *pctx, - unsigned int flags, - CUdevice dev); - -} CUetblPhysXInterface; -} - -#endif // PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H diff --git a/PxShared/src/cudamanager/src/GpuDispatcher.cpp b/PxShared/src/cudamanager/src/GpuDispatcher.cpp deleted file mode 100644 index 432a0cd..0000000 --- a/PxShared/src/cudamanager/src/GpuDispatcher.cpp +++ /dev/null @@ -1,928 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#include "task/PxTaskDefine.h" - -#if PX_SUPPORT_GPU_PHYSX - -#include "task/PxGpuDispatcher.h" -#include "task/PxCpuDispatcher.h" -#include "cudamanager/PxCudaContextManager.h" -#include "cudamanager/PxGpuCopyDesc.h" - -#include "CudaKernelWrangler.h" -#include "GpuDispatcher.h" - -#if PX_SUPPORT_PXTASK_PROFILING -#include "foundation/PxProfiler.h" -#endif - -#include "PsArray.h" -#include "PsSync.h" -#include "PsInlineArray.h" -#include "PsUserAllocated.h" -#include "PsAtomic.h" - -#if PX_VC -#pragma warning(disable: 4062) //enumerator 'identifier' in switch of enum 'enumeration' is not handled -#endif - -using namespace physx; - -// hack to run CUDA calls in a single thread -// used to capture CUDA APIC traces -// #define SINGLE_CUDA_THREAD 1 - -/* Kernels in UtilsKernels.cu */ -const char* UtilKernelNames[] = -{ - "Saturate", - "MemCopyAsync", - "MemCopyBatchedAsync", -}; -enum -{ - KERNEL_SATURATE, - KERNEL_MEMCOPY, - KERNEL_MEMCOPY_BATCHED -}; - -/* ==================== LaunchTask =========================== */ - -/** -\brief A task that maintains a list of dependent tasks. - -This task maintains a list of dependent tasks that have their reference counts -reduced on completion of the task. - -The refcount is incremented every time a dependent task is added. -*/ -class physx::FanoutTask : public PxBaseTask, public shdfnd::UserAllocated -{ - PX_NOCOPY(FanoutTask) -public: - FanoutTask(const char* name) : mRefCount(0), mName(name), mNotifySubmission(false) {} - - virtual void run() {} - virtual const char* getName(void) const { return mName; } - - virtual void removeReference() - { - shdfnd::Mutex::ScopedLock lock(mMutex); - if (!shdfnd::atomicDecrement(&mRefCount)) - { - // prevents access to mReferencesToRemove until release - shdfnd::atomicIncrement(&mRefCount); - mNotifySubmission = false; - PX_ASSERT(mReferencesToRemove.empty()); - // save continuation and dependents - for (uint32_t i = 0; i < mDependents.size(); i++) - mReferencesToRemove.pushBack(mDependents[i]); - mDependents.clear(); - mTm->getCpuDispatcher()->submitTask(*this); - } - } - - /** - \brief Increases reference count - */ - virtual void addReference() - { - shdfnd::Mutex::ScopedLock lock(mMutex); - shdfnd::atomicIncrement(&mRefCount); - mNotifySubmission = true; - } - - /** - \brief Return the ref-count for this task - */ - PX_INLINE int32_t getReference() const - { - return mRefCount; - } - - /** - Adds a dependent task. It also sets the task manager querying it from the dependent task. - The refcount is incremented every time a dependent task is added. - */ - void addDependent(PxBaseTask& dependent) - { - shdfnd::Mutex::ScopedLock lock(mMutex); - shdfnd::atomicIncrement(&mRefCount); - mTm = dependent.getTaskManager(); - mDependents.pushBack(&dependent); - dependent.addReference(); - mNotifySubmission = true; - } - - /** - Reduces reference counts of the continuation task and the dependent tasks, also - clearing the copy of continuation and dependents task list. - */ - virtual void release() - { - shdfnd::InlineArray<PxBaseTask*, 10> referencesToRemove; - - { - shdfnd::Mutex::ScopedLock lock(mMutex); - - const uint32_t contCount = mReferencesToRemove.size(); - referencesToRemove.reserve(contCount); - for (uint32_t i=0; i < contCount; ++i) - referencesToRemove.pushBack(mReferencesToRemove[i]); - - mReferencesToRemove.clear(); - // allow access to mReferencesToRemove again - if (mNotifySubmission) - { - removeReference(); - } - else - { - physx::shdfnd::atomicDecrement(&mRefCount); - } - - // the scoped lock needs to get freed before the continuation tasks get (potentially) submitted because - // those continuation tasks might trigger events that delete this task and corrupt the memory of the - // mutex (for example, assume this task is a member of the scene then the submitted tasks cause the simulation - // to finish and then the scene gets released which in turn will delete this task. When this task then finally - // continues the heap memory will be corrupted. - } - - for (uint32_t i=0; i < referencesToRemove.size(); ++i) - referencesToRemove[i]->removeReference(); - } - -private: - friend class LaunchTask; - - volatile int32_t mRefCount; - const char* mName; - shdfnd::InlineArray<PxBaseTask*, 4> mDependents; - shdfnd::InlineArray<PxBaseTask*, 4> mReferencesToRemove; - bool mNotifySubmission; - shdfnd::Mutex mMutex; // guarding mDependents and mNotifySubmission -}; - -class physx::LaunchTask : public physx::FanoutTask -{ -public: - LaunchTask() : FanoutTask("GpuDispatcher.launch"), mIsBatchStarted(false) - { - } - - virtual void run() - { - if (mReferencesToRemove.size() >= 1) - mIsBatchStarted = true; - } - - bool mIsBatchStarted; -}; - -class physx::BlockTask : public PxLightCpuTask, public shdfnd::UserAllocated -{ - PX_NOCOPY(BlockTask) -public: - BlockTask(PxGpuWorkerThread* dispatcher, physx::LaunchTask* launchTask) - : mDispatcher(dispatcher), mLaunchTask(launchTask), mSyncTask(NULL) - {} - - virtual const char* getName(void) const - { - return "GpuDispatcher.block"; - } - - virtual void removeReference() - { - shdfnd::Mutex::ScopedLock lock(mMutex); - if (!physx::shdfnd::atomicDecrement(&mRefCount)) - { - // prevents access to mSyncTask until release - physx::shdfnd::atomicIncrement(&mRefCount); - PX_ASSERT(!mSyncTask); - shdfnd::swap(mSyncTask, mCont); - mTm->getCpuDispatcher()->submitTask(*this); - } - } - - virtual void run() - { - if (mLaunchTask->mIsBatchStarted) - { - mDispatcher->mCtxMgr->acquireContext(); - CUevent stopEv = mDispatcher->mCachedBlockingEvents.get(); - CUstream stream = (CUstream)0; - mSyncTask->addReference(); - mDispatcher->flushBatch(stopEv, stream, mSyncTask); - mDispatcher->mCtxMgr->releaseContext(); - mLaunchTask->mIsBatchStarted = false; - } - } - - virtual void setContinuation(PxBaseTask* continuation) - { - // this function is called multiple times, skip after first call - shdfnd::Mutex::ScopedLock lock(mMutex); - physx::shdfnd::atomicIncrement(&mRefCount); - if (!mCont) - { - mCont = continuation; - mTm = mCont->getTaskManager(); - mCont->addReference(); - } - } - - virtual void release() - { - shdfnd::Mutex::ScopedLock lock(mMutex); - mSyncTask->removeReference(); - mSyncTask = NULL; - // allow access to mSyncTask again - if (mCont) - { - removeReference(); - } - else - { - physx::shdfnd::atomicDecrement(&mRefCount); - } - } - - PxGpuWorkerThread* mDispatcher; - physx::LaunchTask* mLaunchTask; - PxBaseTask* mSyncTask; - shdfnd::Mutex mMutex; // guarding mCont -}; - -/* ==================== API functions =========================== */ - -void physx::releaseGpuDispatcher(PxGpuDispatcher& gd) -{ - GpuDispatcherImpl* impl = (GpuDispatcherImpl*) &gd; - delete impl; -} - -PxCudaContextManager* GpuDispatcherImpl::getCudaContextManager() -{ - return mDispatcher->mCtxMgr; -} - -GpuDispatcherImpl::GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx) : - mDispatcher(NULL), - mBlockingThread(NULL), - mLaunchTask(NULL), - mBlockTask(NULL), - mSyncTask(NULL) -{ - mDispatcher = PX_NEW(PxGpuWorkerThread); - if (!mDispatcher) - { - forceFailureMode(); - return; - } - - mDispatcher->setCudaContext(ctx); - - mDispatcher->mUtilKernelWrapper = PX_NEW(KernelWrangler)(*this, errorCallback, UtilKernelNames, sizeof(UtilKernelNames) / sizeof(char*)); - if (!mDispatcher->mUtilKernelWrapper || mDispatcher->mUtilKernelWrapper->hadError()) - { - forceFailureMode(); - return; - } - - mLaunchTask = PX_NEW(LaunchTask); - mBlockTask = PX_NEW(BlockTask)(mDispatcher, mLaunchTask); - mSyncTask = PX_NEW(FanoutTask)("GpuDispatcher.sync"); -} - -GpuDispatcherImpl::~GpuDispatcherImpl() -{ - if (mBlockingThread) - { - mBlockingThread->signalQuit(); - PX_ASSERT(mDispatcher); - mDispatcher->mRecordEventQueued.set(); - mBlockingThread->waitForQuit(); - delete mBlockingThread; - } - - if (mDispatcher) - { - mDispatcher->signalQuit(); - mDispatcher->mInputReady.set(); - mDispatcher->waitForQuit(); - delete mDispatcher; - } - - if (mLaunchTask) - PX_DELETE(mLaunchTask); - - if (mBlockTask) - PX_DELETE(mBlockTask); - - if (mSyncTask) - PX_DELETE(mSyncTask); -} - -void GpuDispatcherImpl::start() -{ -#ifndef SINGLE_CUDA_THREAD - mDispatcher->start(shdfnd::Thread::getDefaultStackSize()); -#else - mDispatcher->execute(); -#endif - - mBlockingThread = PX_NEW(BlockingWaitThread)(*mDispatcher); -#ifndef SINGLE_CUDA_THREAD - mBlockingThread->start(shdfnd::Thread::getDefaultStackSize()); -#endif -} - -void GpuDispatcherImpl::startSimulation() -{ - mDispatcher->startSimulation(); -} - -void GpuDispatcherImpl::stopSimulation() -{ - mDispatcher->stopSimulation(); -} - -void GpuDispatcherImpl::startGroup() -{ - shdfnd::atomicIncrement(&mDispatcher->mActiveGroups); -} - -void GpuDispatcherImpl::submitTask(PxTask& task) -{ - mDispatcher->mSubmittedTaskList.push(&task); -} - -void GpuDispatcherImpl::finishGroup() -{ - if (!shdfnd::atomicDecrement(&mDispatcher->mActiveGroups)) - { -#ifdef SINGLE_CUDA_THREAD - mDispatcher->mCtxMgr->acquireContext(); - mDispatcher->processActiveTasks(); - mDispatcher->mCtxMgr->releaseContext(); - mDispatcher->blockingWaitFunc(); -#endif - mDispatcher->mInputReady.set(); - } -} - -void GpuDispatcherImpl::addCompletionPrereq(PxBaseTask& task) -{ - mDispatcher->addCompletionPrereq(task); -} - -bool GpuDispatcherImpl::failureDetected() const -{ - return mDispatcher->mFailureDetected; -} - -void GpuDispatcherImpl::forceFailureMode() -{ - mDispatcher->mFailureDetected = true; -} - -void GpuDispatcherImpl::launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream) -{ - mDispatcher->launchCopyKernel(desc, count, stream); -} - -PxBaseTask& GpuDispatcherImpl::getPreLaunchTask() -{ - PX_ASSERT(mLaunchTask); - return *mLaunchTask; -} - -void GpuDispatcherImpl::addPreLaunchDependent(PxBaseTask& dependent) -{ - PX_ASSERT(mLaunchTask); - mLaunchTask->addDependent(dependent); -} - -PxBaseTask& GpuDispatcherImpl::getPostLaunchTask() -{ - PX_ASSERT(mBlockTask); - return *mBlockTask; -} - -void GpuDispatcherImpl::addPostLaunchDependent(PxBaseTask& dependent) -{ - PX_ASSERT(mSyncTask && mBlockTask); - mSyncTask->addDependent(dependent); - mBlockTask->setContinuation(mSyncTask); - mSyncTask->removeReference(); -} - -/* ==================== Worker Thread =========================== */ - -PxGpuWorkerThread::PxGpuWorkerThread() - : mActiveGroups(0) - , mCtxMgr(NULL) - , mFailureDetected(false) - , mCompletionRingPush(0) - , mCompletionRingPop(0) - , mCachedBlockingEvents(CU_EVENT_BLOCKING_SYNC) - , mCachedNonBlockingEvents(0) - , mCountActiveScenes(0) - , mSmStartTimes(0) - , mUtilKernelWrapper(0) -{ -} - -void PxGpuWorkerThread::setCudaContext(PxCudaContextManager& ctx) -{ - mCtxMgr = &ctx; -} - -PxGpuWorkerThread::~PxGpuWorkerThread() -{ - mCtxMgr->acquireContext(); - mCachedBlockingEvents.clear(); - mCachedNonBlockingEvents.clear(); - mCachedStreams.reset(); - while (!mCachedStreams.empty()) - { - GD_CHECK_CALL(cuStreamDestroy(mCachedStreams.get(mCachedStreams.popBack()))); - } - mCtxMgr->releaseContext(); - - if (mSmStartTimes) - { - PX_FREE(mSmStartTimes); - } - if (mUtilKernelWrapper) - { - // will acquire the context for itself - PX_DELETE(mUtilKernelWrapper); - } -} - - -/* A TaskManager is informing us that its simulation is being stepped */ -void PxGpuWorkerThread::startSimulation() -{ - mUsingConcurrentStreams = mCtxMgr->getUsingConcurrentStreams(); - - shdfnd::atomicIncrement(&mCountActiveScenes); -} - - -/* A TaskManager is informing us that its simulation has ended */ -void PxGpuWorkerThread::stopSimulation() -{ - if (shdfnd::atomicDecrement(&mCountActiveScenes) == 0) - mCachedStreams.reset(); -} - - -/* PxGpuDispatcher worker thread main loop */ -void PxGpuWorkerThread::execute() -{ - setName("GpuDispatcher.Worker"); - -#ifndef SINGLE_CUDA_THREAD - bool running = true; - while (running) - { - mInputReady.wait(); - - if (quitIsSignalled()) - { - break; - } - - if (!mSubmittedTaskList.empty()) - { - mCtxMgr->acquireContext(); - processActiveTasks(); - mCtxMgr->releaseContext(); - } - } - - quit(); -#endif -} - -/* Expected to be called by a GPU task, or a function called by a GPU - * task. The context is important because I think it does not require - * any locking since only one GPU task's launchInstance() function will - * be running at a time, per GpuDispatcherImpl (per CUDA context). - */ -void PxGpuWorkerThread::addCompletionPrereq(PxBaseTask& task) -{ - if(mFailureDetected) - return; - -#if PX_SUPPORT_PXTASK_PROFILING - PX_PROFILE_ZONE("GpuDispatcher.AddCompletionEvent", task.getContextId()); -#endif - task.addReference(); - mCompletionTasks.pushBack(&task); -} - -namespace -{ - template <typename T0> - PX_NOINLINE CUresult launchKernelGrid(CUfunction func, unsigned int gridWidth, unsigned int gridHeight, unsigned int numThreads, unsigned int sharedMem, CUstream stream, T0 v0) - { - void* kernelParams[] = - { - &v0, - }; - return cuLaunchKernel(func, gridWidth, gridHeight, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); - } -} - -void PxGpuWorkerThread::launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream) -{ - if (!mCtxMgr->canMapHostMemory()) - { - for (uint32_t i = 0 ; i < count ; i++) - { - PX_ASSERT(desc->isValid()); - switch (desc->type) - { - case PxGpuCopyDesc::DeviceMemset32: - GD_CHECK_CALL(cuMemsetD32Async(desc->dest, (uint32_t) desc->source, desc->bytes >> 2, stream)); - break; - case PxGpuCopyDesc::DeviceToDevice: - GD_CHECK_CALL(cuMemcpyDtoDAsync(desc->dest, desc->source, desc->bytes, stream)); - break; - case PxGpuCopyDesc::DeviceToHost: - GD_CHECK_CALL(cuMemcpyDtoHAsync((void*) desc->dest, desc->source, desc->bytes, stream)); - break; - case PxGpuCopyDesc::HostToDevice: - GD_CHECK_CALL(cuMemcpyHtoDAsync(desc->dest, (void*) desc->source, desc->bytes, stream)); - break; - } - - desc++; - } - } - else if (count == 1) - { - CUfunction func = mUtilKernelWrapper->getCuFunction(KERNEL_MEMCOPY); - uint32_t smCount = (uint32_t)mCtxMgr->getMultiprocessorCount(); - - PX_ASSERT(desc->isValid()); - - CUdeviceptr dptr; - switch (desc->type) - { - case PxGpuCopyDesc::DeviceToHost: - dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc->dest)); - desc->dest = dptr; - break; - case PxGpuCopyDesc::HostToDevice: - dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc->source)); - desc->source = dptr; - break; - case PxGpuCopyDesc::DeviceMemset32: - case PxGpuCopyDesc::DeviceToDevice: - //do nothing, cases are here for GCCs warning system - break; - } - - uint32_t numThreads; - if (mCtxMgr->supportsArchSM20()) - { - numThreads = 256; - } - else - { - numThreads = 128; - } - uint32_t blocks = uint32_t(desc->bytes / (numThreads * 4 * 6)); - if (blocks == 0) - { - blocks = 1; - } - if (blocks > smCount) - { - blocks = smCount; - } - - GD_CHECK_CALL( - launchKernel(func, blocks, numThreads, 0, stream, *desc) - ); - } - else - { - CUfunction func = mUtilKernelWrapper->getCuFunction(KERNEL_MEMCOPY_BATCHED); - CUdeviceptr dptr; - - for (uint32_t i = 0 ; i < count ; i++) - { - PX_ASSERT(desc[i].isValid()); - - switch (desc[i].type) - { - case PxGpuCopyDesc::DeviceToHost: - dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc[i].dest)); - desc[i].dest = dptr; - break; - case PxGpuCopyDesc::HostToDevice: - dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc[i].source)); - desc[i].source = dptr; - break; - case PxGpuCopyDesc::DeviceMemset32: - case PxGpuCopyDesc::DeviceToDevice: - //do nothing, cases are here for GCCs warning system - break; - } - } - - uint32_t numThreads, numBlocksX; - if (mCtxMgr->supportsArchSM20()) - { - numThreads = 256; - numBlocksX = 1; - } - else - { - numThreads = 128; - numBlocksX = 2; - } - - dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc)); - - GD_CHECK_CALL( - launchKernelGrid(func, numBlocksX, count, numThreads, 0, stream, - dptr) - ); - } -} - -void PxGpuWorkerThread::flushBatch(CUevent endEvent, CUstream syncStream, PxBaseTask* task) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (mFailureDetected) - { - return; - } - - if (endEvent && (1 & intptr_t(syncStream))) - { - // blocking record event on stream 0, flushes current push buffer - GD_CHECK_CALL(cuEventRecord(endEvent, 0)); - } else { - CUresult ret = cuStreamQuery(0); // flushes current push buffer - PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY); - PX_UNUSED(ret); - } - - int next = (mCompletionRingPush + 1) % SIZE_COMPLETION_RING; - while (next == mCompletionRingPop) - { - // lordy, I hope we never get here. - PX_ALWAYS_ASSERT(); - mCtxMgr->releaseContext(); - sleep(1); - mCtxMgr->acquireContext(); - } - - CudaBatch& b = mCompletionRing[ mCompletionRingPush ]; - b.blockingEvent = endEvent; - b.blockingStream = syncStream; - b.continuationTask = task; - - mCompletionRingPush = next; - mRecordEventQueued.set(); -} - -// Take any submitted tasks into its appropriate read list -void PxGpuWorkerThread::pollSubmitted(shdfnd::Array<ReadyTask>* ready) -{ - mInputReady.reset(); - PxGpuTask* gt; - while ((gt = (PxGpuTask*) mSubmittedTaskList.popBack()) != 0) - { - PxGpuTaskHint::Enum type = gt->getTaskHint(); - ReadyTask* r = &ready[ type ].insert(); - r->task = gt; - r->iteration = 0; - if (mUsingConcurrentStreams && gt->mStreamIndex == 0) - { - gt->mStreamIndex = mCachedStreams.popBack(); - } - } -} - -void PxGpuWorkerThread::processActiveTasks() -{ -#if PX_SUPPORT_PXTASK_PROFILING - PX_PROFILE_ZONE("GpuDispatcher.ProcessTasksEvent", 0); // PT: TODO: fix invalid context -#endif - - if (mFailureDetected) - { - while (!mSubmittedTaskList.empty()) - { - mInputReady.reset(); - mSubmittedTaskList.popBack()->release(); - } - return; - } - - for (uint32_t i = 0; i < PxGpuTaskHint::NUM_GPU_TASK_HINTS; i++) - { - mReady[i].clear(); - } - - //OutputDebugString("batch beginning\n"); - - const CUevent nonBlockEv = mCachedNonBlockingEvents.get(); - bool workToFlush = false; - bool tasksRemain = false; - PxGpuTaskHint::Enum curMode = PxGpuTaskHint::HostToDevice; - - pollSubmitted(mReady); - - do - { - // cycle current run mode when necessary - if (mReady[ curMode ].size() == 0) - { - if (curMode == PxGpuTaskHint::HostToDevice) - { - curMode = PxGpuTaskHint::Kernel; - } - else if (curMode == PxGpuTaskHint::Kernel) - { - curMode = PxGpuTaskHint::DeviceToHost; - } - else - { - curMode = PxGpuTaskHint::HostToDevice; - } - } - - uint32_t singleStream = mReady[curMode].empty() ? 0 : mReady[curMode].front().task->mStreamIndex; - - while (mReady[ curMode ].size()) - { - bool needwfi = false; - for (uint32_t i = 0 ; i < mReady[ curMode ].size() ; i++) - { - ReadyTask& r = mReady[ curMode ][ i ]; - - if (r.task->mPreSyncRequired) - { - // If mPreSyncRequired is set *before* the task is run, it implies - // a WFI must be inserted before this task issues any work. Multiple - // ready tasks may have this flag, so to avoid inserting multiple WFI - // requests, we skip marked tasks in this pass and note a WFI is needed. - needwfi = true; - r.task->mPreSyncRequired = false; - } - else - { - const CUstream s = (r.task->mStreamIndex > 0) ? mCachedStreams.get(r.task->mStreamIndex) : 0; - - bool active; - { -#if PX_PROFILE -#if PX_SUPPORT_PXTASK_PROFILING - PX_PROFILE_ZONE(r.task->getName(), r.task->getContextId()); -#endif -#endif - active = r.task->launchInstance(s, int(r.iteration++)); - } - if(singleStream != r.task->mStreamIndex) - singleStream = 0; - - // If the launchInstance() call reported a non-recoverable error, gracefully - // release all scheduled tasks - if (mFailureDetected) - { - // Release all ready tasks - for (uint32_t h = 0; h < PxGpuTaskHint::NUM_GPU_TASK_HINTS; h++) - { - for (uint32_t j = 0 ; j < mReady[ h ].size() ; j++) - { - mReady[ h ][ j ].task->release(); - } - mReady[ h ].clear(); - } - - // Release all submitted tasks, until idle - while (!mSubmittedTaskList.empty()) - { - mInputReady.reset(); - mSubmittedTaskList.popBack()->release(); - } - return; - } - - workToFlush = true; - if (r.task->mPreSyncRequired) - { - // This task has asked for a sync point, meaning it has launched a copy - // or a kernel that must be completed before any later tasks are allowed - // to start. Insert a WFI and clear the needwfi flag - GD_CHECK_CALL(cuEventRecord(nonBlockEv, 0)); - needwfi = false; - r.task->mPreSyncRequired = false; - } - - if (!active) - { - r.task->release(); - mReady[ curMode ].replaceWithLast(i); - pollSubmitted(mReady); - i -= 1; - } - } - } - - if (needwfi) - { - GD_CHECK_CALL(cuEventRecord(nonBlockEv, 0)); - } - } - - /* We have completed one of the three phases */ - - tasksRemain = false; - for (int e = (int) PxGpuTaskHint::HostToDevice ; e != (int) PxGpuTaskHint::NUM_GPU_TASK_HINTS ; e++) - { - tasksRemain |= (mReady[ e ].size() != 0); - } - - if (!mCompletionTasks.empty()) - { - workToFlush = true; - } - - if (workToFlush && (tasksRemain == false || curMode == PxGpuTaskHint::DeviceToHost)) - { - //OutputDebugString("batch ending\n"); - - while (mCompletionTasks.size()) - { - PxBaseTask* t = mCompletionTasks.popBack(); - if (workToFlush) - { - CUevent stopEv = mCachedBlockingEvents.get(); - CUstream stream = singleStream ? mCachedStreams.get(singleStream) : (CUstream)1; - flushBatch(stopEv, stream, t); - workToFlush = false; - } - else - { - flushBatch(0, 0, t); - } - } - if (workToFlush) - { - /* Getting here is probably an indication of a bug in your task graph, - * but it is possible to get this warning if you have CPU tasks that - * can delay GpuTasks. So, consider this warning "training wheels" and - * disable it if you know your graph is correct. - */ - // SJB - Disabling this warning, APEX does this every frame because - // of how BasicIOS and IOFX interact. - //shdfnd::getFoundation().error(PX_WARN, - // "CUDA work generated without a completion dependency!"); - CUevent stopEv = mCachedBlockingEvents.get(); - flushBatch(stopEv, (CUstream)1, NULL); - } - } - } - while (tasksRemain); - - mCachedNonBlockingEvents.add(nonBlockEv); -} - -#endif diff --git a/PxShared/src/cudamanager/src/HeapManagerInterface.h b/PxShared/src/cudamanager/src/HeapManagerInterface.h deleted file mode 100644 index 7fe7f2e..0000000 --- a/PxShared/src/cudamanager/src/HeapManagerInterface.h +++ /dev/null @@ -1,156 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -//----------------------------------------------------------------------------// -// HeapManagerInterface.h -//----------------------------------------------------------------------------// - -#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H -#define PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H - -#include "task/PxTaskDefine.h" -#include "foundation/PxSimpleTypes.h" -#include "cudamanager/PxCudaMemoryManager.h" - -#include <string.h> - -namespace physx -{ -struct HeapStatsFlags -{ - enum Enum - { - F_BASIC_STATS = 1 << 0, - F_INTERNAL_FRAGMENTATION = 1 << 1, - F_BIGGEST_FREE_BLOCK = 1 << 2, - F_HISTOGRAM = 1 << 3, - F_ALLOC_ID_STATS = 1 << 4, - F_ALL = 0xFFFFFFFF, - }; -}; - -#define BITSPERWORD sizeof(size_t)*8 - -class ApexHeapStats -{ -public: - ApexHeapStats(): - heapSize(0), - totalAllocated(0), - maxAllocated(0), - internalFragmentation(0), - maxInternalFragmentation(0), - biggestFreeBlock(0), - numEntries(0) - {} - - PX_INLINE void reset() - { - memset(this, 0, sizeof(ApexHeapStats)); - } - - // F_BASIC_STATS - size_t heapSize; - size_t totalAllocated; - size_t maxAllocated; - - // F_INTERNAL_FRAGMENTATION - size_t internalFragmentation; - size_t maxInternalFragmentation; - - // F_BIGGEST_FREE_BLOCK - size_t biggestFreeBlock; - - // F_HISTOGRAM - size_t freeBuddyHistogram[BITSPERWORD]; - size_t allocatedBuddyHistogram[BITSPERWORD]; - size_t numEntries; - - // F_ALLOC_ID_STATS - PxAllocIdStats allocIdStats[PxAllocId::NUM_IDS]; -}; - - -class HeapManagerInterface -{ -public: - // simple allocator interface over which the heap manager does its base allocation and allocates further pages - class Allocator - { - public: - virtual ~Allocator() {}; - - virtual void* alloc(const size_t size) = 0; - virtual void free(void* addr, const size_t size) = 0; - }; - - virtual ~HeapManagerInterface() {}; - - // INTERFACE METHODS - // init the HeapManager by passing it a block of memory and the smallest size of a memory block. - // returns true if init was successful - virtual bool init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag = size_t(-1)) = 0; - - // Changes the page size. The size of allocations over the supplied Allocator are a multiple of the pageSize. - // returns true if the page size was valid. (!0, >minBlockSize, pow2) - virtual bool setPageSize(size_t pageSize) = 0; - - // returns the address of an allocated block for the requested size. - // returns a NULL ptr if alloc failed. - virtual void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0; - - // returns true if the block at the given address could be resized to size - // returns false if this failed. Manual free and alloc is still possible but needs a memcopy. - virtual bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0; - - // frees a given block. - // returns true if the operation was successful - virtual bool free(void* addr) = 0; - - // deallocates all empty pages - virtual void freeEmptyPages() = 0; - - // ensures that there there is free memory of at least the requested size - // returns true if the operation was successful. Free memory was already big enough or new pages were allocated successfully. - virtual bool reserve(size_t size) = 0; - - // returns stats into a ApexHeapStats object, stats can be selected with HeapManagerStatsFlags. - // returns true if the operation was successful - virtual bool getStats(ApexHeapStats& stats, const uint32_t flags) = 0; - - // discretisize memory into an array such that it can be visualized - // returns true if the operation was successful - virtual bool visualizeMemory(uint8_t* array, const size_t size) = 0; - - // returns the base address of the page containing the memory block at addr. - // returns NULL if addr doesn't correspond to a page - virtual void* findBaseAddress(void* addr) = 0; -}; - -} // end physx namespace - -#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H diff --git a/PxShared/src/cudamanager/src/HeapManagerLinkedList.h b/PxShared/src/cudamanager/src/HeapManagerLinkedList.h deleted file mode 100644 index 45a359d..0000000 --- a/PxShared/src/cudamanager/src/HeapManagerLinkedList.h +++ /dev/null @@ -1,204 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -//----------------------------------------------------------------------------// -// HeapManagerLinkedList.h -//----------------------------------------------------------------------------// - -#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H -#define PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H - -#include "foundation/PxAssert.h" - -namespace physx -{ - -template <typename T> -class LinkedList -{ -public: - - PX_INLINE LinkedList() - : mHead(NULL) - , mNumElements(0) - { - } - - PX_INLINE void insert(T*& elt) - { - if (mHead) - { - elt->next = mHead->next; - } - mHead = elt; - mNumElements++; - } - - PX_INLINE void insertSorted(T*& elt) - { - if (!mHead) - { - mHead = elt; - mHead->next = NULL; - } - else if (!mHead->next || (mHead->addr > elt->addr)) - { - if (mHead->addr > elt->addr) - { - elt->next = mHead; - mHead = elt; - } - else - { - mHead->next = elt; - elt->next = NULL; - } - } - else - { - T* cur = mHead; - while (cur->next && (elt->addr > cur->next->addr)) - { - cur = cur->next; - } - elt->next = cur->next; - cur->next = elt; - } - mNumElements++; - } - - PX_INLINE T* pop() - { - if (mHead) - { - T* ret = mHead; - mHead = mHead->next; - mNumElements--; - return ret; - } - return NULL; - } - - PX_INLINE bool remove(const T* elt) - { - PX_ASSERT(elt); - if (mHead && mHead == elt) - { - mHead = mHead->next; - mNumElements--; - return true; - } - else - { - T* cur = mHead; - while (cur && cur->next != elt) - { - PX_ASSERT(cur->addr < elt->addr); // assert for sorted list property. - cur = cur->next; - } - if (cur && elt) - { - cur->next = elt->next; - mNumElements--; - return true; - } - } - return false; - } - - PX_INLINE T* find(const size_t addr) - { - T* cur = mHead; - while (cur && cur->addr < addr) - { - cur = cur->next; - } - - return cur && (cur->addr == addr) ? cur : NULL; - } - - PX_INLINE T* findAndPop(const size_t addr) - { - if (mHead == NULL) - { - return NULL; - } - - if (mHead->addr == addr) - { - return pop(); - } - - T* cur = mHead; - T* last = mHead; - while (cur) - { - if (cur->addr == addr) - { - last->next = cur->next; - mNumElements--; - return cur; - } - else if (cur->addr > addr) - { - return NULL; // because list is sorted. - } - else - { - last = cur; - cur = cur->next; - } - } - return NULL; - } - - PX_INLINE size_t getSize() - { - return mNumElements; - } - PX_INLINE T* getHead() - { - return mHead; - } - - // hacky - PX_INLINE void setSize(size_t s) - { - mNumElements = s; - } - PX_INLINE void setHead(T* h) - { - mHead = h; - } -private: - T* mHead; - size_t mNumElements; -}; - -} // end physx namespace - -#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H diff --git a/PxShared/src/cudamanager/src/HeapManagerRef.cpp b/PxShared/src/cudamanager/src/HeapManagerRef.cpp deleted file mode 100644 index bf3847f..0000000 --- a/PxShared/src/cudamanager/src/HeapManagerRef.cpp +++ /dev/null @@ -1,1380 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#include "CudaMemoryManager.h" -#include "HeapManagerRef.h" - -#include "PsSort.h" -#include "PsArray.h" -#include "PsAllocator.h" -#include "PsString.h" - -#include "foundation/PxMath.h" -#include "foundation/PxErrorCallback.h" -#include "foundation/PxMemory.h" - - -#if DUMP_HEAP_USAGE_TO_FILE -#include "PsWindowsInclude.h" -#endif - -using namespace physx::shdfnd; -using namespace physx; - - -#define CMM_DELETE_SINGLE(x) { if(x) delete x; x = NULL; } -#define CMM_DELETE_ARRAY(x) { if(x) delete [] x; x = NULL; } - -HeapManagerRef::HeapManagerRef(physx::PxErrorCallback& errorCallback, bool enableMutex) - : mHeaps(PX_DEBUG_EXP("HeapManagerRef:mHeaps")) - , mBuddyPool("mBuddyPool", 1024) - , mPageSize(0) - , mMinBlockSize(0) - , mMaxIntFrag(size_t(-1)) - , mNewEmptyPage(false) - , mMemAllocator(NULL) - , mGlobalAllocMem(0) - , mGlobalMaxAllocMem(0) - , mGlobalInternalFragmentation(0) - , mGlobalMaxInternalFragmentation(0) - , mErrorCallback(errorCallback) - -{ - PX_UNUSED(enableMutex); // SJB: heap alloc of shdfnd::Mutex not working for me -} - -HeapManagerRef::~HeapManagerRef() -{ - for (uint32_t i = 0; i < mHeaps.size(); i++) - { - if (mMemAllocator && mHeaps[i].baseAddr) - { - mMemAllocator->free(reinterpret_cast<void*>(mHeaps[i].baseAddr), mHeaps[i].heap->getTotalMemorySize()); - } - CMM_DELETE_SINGLE(mHeaps[i].heap); - } -#if DUMP_HEAP_USAGE_TO_FILE - fclose(mLogFile); -#endif -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// INTERFACE METHODS -bool HeapManagerRef::init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - // init Heap and do some basic checks. - - // init only once - if (mHeaps.size()) - { - return false; - } - - if (baseSize && (minBlockSize > baseSize)) - { - return false; - } - - if (minBlockSize > pageSize) - { - return false; - } - - if (baseSize && (baseSize % minBlockSize)) - { - return false; - } - - uint8_t minBlockSizeLog2; - if (minBlockSize != findNextPow2(minBlockSizeLog2, minBlockSize, 0, BITSPERWORD)) - { - return false; - } - - if (pageSize != findNextPow2(pageSize, minBlockSizeLog2, BITSPERWORD)) - { - return false; - } - - if (!memAllocator) - { - return false; - } - - mMemAllocator = memAllocator; - mPageSize = pageSize; - mMinBlockSize = minBlockSize; - mMaxIntFrag = maxIntFrag; - - memset(&mGlobalAllocIdStats, 0, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS); - -#if DUMP_HEAP_USAGE_TO_FILE - char fileName[1024]; - sprintf_s(fileName, 1024, "HeapLog_%p.txt", this); - fopen_s(&mLogFile, fileName, "w"); - fprintf(mLogFile, "HeapSize: %d, BlockSize: %d Addr: 0x0\n", baseSize, minBlockSize); - QueryPerformanceCounter((LARGE_INTEGER*)&m_qpc); - QueryPerformanceFrequency((LARGE_INTEGER*)&m_qpf); -#endif - - // init heap - if (baseSize) - { - return allocateNewHeap(baseSize, true) != NULL; - } - else - { - return true; - } -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool HeapManagerRef::setPageSize(size_t pageSize) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - if (pageSize == 0) - { - return false; - } - - if (mMinBlockSize > pageSize) - { - return false; - } - - if (pageSize != findNextPow2(pageSize, 0, BITSPERWORD)) - { - return false; - } - - mPageSize = pageSize; - return true; -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void* HeapManagerRef::alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - -#if DUMP_HEAP_USAGE_TO_FILE - unsigned __int64 qpc; - QueryPerformanceCounter((LARGE_INTEGER*)&qpc); - float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf); - fprintf(mLogFile, "alloc t: %f s: %d", dtime, size); -#endif - - void* ret = NULL; - // try to allocate it in one of the heaps/pages - for (uint32_t i = 0; !ret && i < mHeaps.size(); i++) - { - ret = mHeaps[i].heap->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT()); - } - - // create a new page - if (!ret) - { - Heap* heap = allocateNewPages(size); - if (heap) - { - ret = heap->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT()); - } - } - -#if DUMP_HEAP_USAGE_TO_FILE - fprintf(mLogFile, " a: 0x%p\n", ret); -#endif - - return ret; -} - -//(10/20/2009 feodorb) TODO: decide whether we move the binary search -//somewhere away from here. Stands here for std::lower_bound replacement -template<typename T> -static uint32_t findUpperBound(const physx::shdfnd::Array<T>& refArray, const T& refValue) -{ - uint32_t start = 0, end = refArray.size(); - while (end - start > 0) - { - uint32_t midPoint = start + ((end - start) >> 1); - - if (!(refValue < refArray[midPoint])) - { - start = midPoint + 1; - } - else - { - end = midPoint; - } - } - return start; -} - -Heap* HeapManagerRef::findHeap(void* addr) const -{ - HeapManagerPage searchPage; - searchPage.baseAddr = reinterpret_cast<size_t>(addr); - - uint32_t upperBound = findUpperBound(mHeaps, searchPage); - PX_ASSERT(upperBound == 0 || - (searchPage.baseAddr >= mHeaps[upperBound - 1].baseAddr && - searchPage.baseAddr < mHeaps[upperBound - 1].baseAddr + mHeaps[upperBound - 1].heap->getTotalMemorySize()) - ); - - return (upperBound > 0) ? mHeaps[upperBound - 1].heap : 0; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool HeapManagerRef::realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - -#if DUMP_HEAP_USAGE_TO_FILE - unsigned __int64 qpc; - QueryPerformanceCounter((LARGE_INTEGER*)&qpc); - float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf); - fprintf(mLogFile, "realloc t: %f s: %d, a: 0x%p\n", dtime, size, addr); -#endif - - Heap* heap = findHeap(addr); - - if (heap != 0) - { - bool ret = heap->realloc(addr, size, PX_ALLOC_INFO_PARAMS_INPUT()); - if (ret && size > 0 && mNewEmptyPage) - { - shrinkMemory(); - } - return ret; - } - return false; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool HeapManagerRef::free(void* addr) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - -#if DUMP_HEAP_USAGE_TO_FILE - unsigned __int64 qpc; - QueryPerformanceCounter((LARGE_INTEGER*)&qpc); - float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf); - fprintf(mLogFile, "free t: %f a: 0x%p\n", dtime, addr); -#endif - - if (addr == NULL) - { - return false; - } - - Heap* heap = findHeap(addr); - if (heap != 0) - { - bool ret = heap->free(addr); - if (ret && mNewEmptyPage) - { - shrinkMemory(); - } - return ret; - } - return false; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -static void addStats(ApexHeapStats& dst, const ApexHeapStats& src, uint32_t flags) -{ - if (flags & HeapStatsFlags::F_BASIC_STATS) - { - dst.heapSize += src.heapSize; - } - if (flags & HeapStatsFlags::F_BIGGEST_FREE_BLOCK) - { - dst.biggestFreeBlock = PxMax(dst.biggestFreeBlock, src.biggestFreeBlock); - } - if (flags & HeapStatsFlags::F_HISTOGRAM) - { - dst.numEntries = PxMax(dst.numEntries, src.numEntries); - for (uint32_t i = 0; i < BITSPERWORD; i++) - { - dst.freeBuddyHistogram[i] += src.freeBuddyHistogram[i]; - dst.allocatedBuddyHistogram[i] += src.allocatedBuddyHistogram[i]; - } - } -} - -PX_INLINE void HeapManagerRef::addToStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation) -{ - PxAllocIdStats& idStats = mGlobalAllocIdStats[id]; - idStats.elements++; - idStats.size += size; - idStats.maxElements = PxMax(idStats.maxElements, idStats.elements); - idStats.maxSize = PxMax(idStats.maxSize, idStats.size); - mGlobalAllocMem += size; - mGlobalMaxAllocMem = PxMax(mGlobalMaxAllocMem, mGlobalAllocMem); - mGlobalInternalFragmentation += fragmentation; - mGlobalMaxInternalFragmentation = PxMax(mGlobalMaxInternalFragmentation, mGlobalInternalFragmentation); -} - - -PX_INLINE void HeapManagerRef::removeFromStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation) -{ - PxAllocIdStats& idStats = mGlobalAllocIdStats[id]; - PX_ASSERT(idStats.elements); - PX_ASSERT(idStats.size >= size); - - idStats.elements--; - idStats.size -= size; - mGlobalAllocMem -= size; - mGlobalInternalFragmentation -= fragmentation; -} - -PX_INLINE void HeapManagerRef::incStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation) -{ - PxAllocIdStats& idStats = mGlobalAllocIdStats[id]; - idStats.size += change; - idStats.maxSize = PxMax(idStats.maxSize, idStats.size); - mGlobalAllocMem += change; - mGlobalMaxAllocMem = PxMax(mGlobalMaxAllocMem, mGlobalAllocMem); - mGlobalInternalFragmentation += fragmentation; - mGlobalMaxInternalFragmentation = PxMax(mGlobalMaxInternalFragmentation, mGlobalInternalFragmentation); -} - -PX_INLINE void HeapManagerRef::decStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation) -{ - PxAllocIdStats& idStats = mGlobalAllocIdStats[id]; - PX_ASSERT(idStats.size >= change); - idStats.size -= change; - mGlobalAllocMem += change; - mGlobalInternalFragmentation += fragmentation; -} - -bool HeapManagerRef::getStats(ApexHeapStats& stats, const uint32_t flags) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - ApexHeapStats tmpStats; - stats.reset(); - for (uint32_t i = 0; i < mHeaps.size(); i++) - { - mHeaps[i].heap->getStats(tmpStats, flags); - addStats(stats, tmpStats, flags); - } - if (flags & HeapStatsFlags::F_BASIC_STATS) - { - stats.totalAllocated = mGlobalAllocMem; - stats.maxAllocated = mGlobalMaxAllocMem; - - } - if (flags & HeapStatsFlags::F_INTERNAL_FRAGMENTATION) - { - stats.internalFragmentation = mGlobalInternalFragmentation; - stats.maxInternalFragmentation = mGlobalMaxInternalFragmentation; - } - if (flags & HeapStatsFlags::F_ALLOC_ID_STATS) - { - // stats per allocation ID - PxMemCopy(stats.allocIdStats, mGlobalAllocIdStats, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS); - } - return true; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool HeapManagerRef::visualizeMemory(uint8_t* array, const size_t arraySize) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - ApexHeapStats tmpStats; - getStats(tmpStats, HeapStatsFlags::F_BASIC_STATS); - float scale = float(arraySize) / float(tmpStats.heapSize); - uint8_t* start = array; - for (uint32_t i = 0; i < mHeaps.size(); i++) - { - size_t heapSize = mHeaps[i].heap->getTotalMemorySize(); - size_t numVis = size_t(float(heapSize) * scale); - PX_ASSERT(start + numVis <= array + arraySize); - mHeaps[i].heap->visualizeMemory(start, numVis); - start += numVis; - } - return true; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void* HeapManagerRef::findBaseAddress(void* addr) -{ - Heap* heap = findHeap(addr); - if (heap) - { - return heap->getBaseAddress(); - } - return NULL; -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -Heap* HeapManagerRef::allocateNewHeap(size_t heapSize, bool isPersistent) -{ - if (!mMemAllocator) - { - return NULL; - } - - void* newPage = mMemAllocator->alloc(heapSize); - if (newPage) - { - HeapManagerPage page; - page.baseAddr = reinterpret_cast<size_t>(newPage); - page.heap = PX_NEW(Heap)(*this, mErrorCallback); - page.isPersistent = isPersistent; - if (page.heap && page.heap->init(page.baseAddr, heapSize, mMinBlockSize, mMaxIntFrag)) - { - mHeaps.pushBack(page); - shdfnd::sort(mHeaps.begin(), (uint32_t) mHeaps.size()); - return page.heap; - } - else - { - mMemAllocator->free(newPage, page.heap->getTotalMemorySize()); - CMM_DELETE_SINGLE(page.heap); - } - } - return NULL; -} - -Heap* HeapManagerRef::allocateNewPages(size_t requestedSize) -{ - uint8_t pageSizeLog2; - uint8_t minBlockSizeLog2; - findNextPow2(minBlockSizeLog2, mMinBlockSize, 0, BITSPERWORD); - findNextPow2(pageSizeLog2, mPageSize, minBlockSizeLog2, BITSPERWORD); - const size_t allocSize = findNextPow2(requestedSize, pageSizeLog2, BITSPERWORD); - return allocateNewHeap(allocSize); -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void HeapManagerRef::removeDeletedHeapsFromList(uint32_t numDeletes) -{ - // remove pages from list, keeping it sorted. - if (numDeletes) - { - const uint32_t numEntries = (uint32_t) mHeaps.size(); - - //seek - uint32_t w = 0; - while (w < (numEntries) && mHeaps[w].heap != NULL) - { - w++; - } - - // remove holes - uint32_t r = w + 1; - while (r < numEntries) - { - if (mHeaps[r].heap == NULL) - { - r++; - } - else - { - mHeaps[w++] = mHeaps[r++]; - } - } - - mHeaps.resize(numEntries - numDeletes); - } -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void HeapManagerRef::resetHeap(HeapManagerPage& page) -{ - PX_ASSERT(page.heap->getAllocatedMemorySize() == 0); - Heap* newHeap = PX_NEW(Heap)(*this, mErrorCallback); - if (newHeap) - { - if (newHeap->init(page.baseAddr, page.heap->getTotalMemorySize(), mMinBlockSize, mMaxIntFrag)) - { - CMM_DELETE_SINGLE(page.heap); - page.heap = newHeap; - } - else - { - CMM_DELETE_SINGLE(newHeap); - } - } -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void HeapManagerRef::freeEmptyPages() -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - // release heaps - uint32_t numDeletes = 0; - const uint32_t numEntries = (uint32_t) mHeaps.size(); - for (uint32_t i = 0; i < numEntries; i++) - { - HeapManagerPage& page = mHeaps[i]; - PX_ASSERT(page.heap); - if (page.isPersistent) - { - // for persistent pages: reset without release. - if (page.heap->getAllocatedMemorySize() == 0) - { - resetHeap(page); - } - } - else if (page.heap->getAllocatedMemorySize() == 0) - { - mMemAllocator->free(reinterpret_cast<void*>(page.baseAddr), page.heap->getTotalMemorySize()); - CMM_DELETE_SINGLE(page.heap); - numDeletes++; - } - } - - if (numDeletes) - { - removeDeletedHeapsFromList(numDeletes); - } -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void HeapManagerRef::shrinkMemory() -{ - mNewEmptyPage = false; - - // collect stats - size_t totalAllocated = 0; - size_t totalPageSize = 0; - const uint32_t numEntries = (uint32_t) mHeaps.size(); - for (uint32_t i = 0; i < numEntries; i++) - { - HeapManagerPage& page = mHeaps[i]; - totalAllocated += page.heap->getAllocatedMemorySize(); - totalPageSize += page.heap->getTotalMemorySize(); - PX_ASSERT(totalAllocated <= totalPageSize); - } - - // shrink memory if free non-persistent space is half or more of the allocated pages. - // releasing from the back of address sorted list, other strategies like LRU, best fit are also possible. - if (totalPageSize) - { - float allocScale = float(totalAllocated) / float(totalPageSize); - if (allocScale <= 0.5f) - { - size_t sizeToRelease = totalAllocated ? (totalPageSize - totalAllocated) >> 1 : totalPageSize; - uint32_t numDeletes = 0; - for (uint32_t i = 0; i < numEntries; i++) - { - HeapManagerPage& page = mHeaps[numEntries - i - 1]; - PX_ASSERT(page.heap); - if (page.heap->getAllocatedMemorySize() == 0) - { - if (!page.isPersistent && page.heap->getTotalMemorySize() <= sizeToRelease) - { - mMemAllocator->free(reinterpret_cast<void*>(page.baseAddr), page.heap->getTotalMemorySize()); - sizeToRelease -= page.heap->getTotalMemorySize(); - CMM_DELETE_SINGLE(page.heap); - numDeletes++; - } - else - { - resetHeap(page); - } - } - } - - if (numDeletes) - { - removeDeletedHeapsFromList(numDeletes); - } - } - } -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool HeapManagerRef::reserve(size_t size) -{ - shdfnd::Mutex::ScopedLock lock(mMutex); - - size_t freeSize = 0; - for (uint32_t i = 0; i < mHeaps.size(); i++) - { - freeSize += mHeaps[i].heap->getTotalMemorySize() - mHeaps[i].heap->getAllocatedMemorySize(); - } - - if (freeSize < size) - { - return allocateNewPages(size - freeSize) != NULL; - } - else - { - return true; - } -} - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -PX_INLINE size_t HeapManagerRef::findNextPow2(const size_t val, const uint8_t min, const uint8_t max) -{ - size_t ret = val; - for (uint8_t i = min; i <= max; i++) - { - ret = size_t(1) << i; - if (ret >= val) - { - break; - } - } - return ret; -} - - -PX_INLINE size_t HeapManagerRef::findNextPow2(uint8_t& pow, const size_t val, const uint8_t min, const uint8_t max) -{ - size_t ret = val; - for (pow = min; pow <= max; pow++) - { - ret = size_t(1) << pow; - if (ret >= val) - { - break; - } - } - return ret; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void BuddyList::clear() -{ - Buddy* cur = pop(); - while (cur) - { - heap->getBuddyPool().destroy(cur); - cur = pop(); - } -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -Heap::~Heap() -{ - for (uint32_t i = 0; i <= mMaxLevel; i++) - { - mFreeBuddiesAtLevel[i].clear(); - } - CMM_DELETE_ARRAY(mFreeBuddiesAtLevel); - - for (Buddy* buddy = mAllocatedBuddies.getHead(); buddy != NULL; buddy = buddy->next) - { - void* address = reinterpret_cast<void*>((buddy->addr << mMinBlockLog2) + mBaseAddr); -#if KEEP_DEBUG_INFO - char buffer[256]; - physx::shdfnd::snprintf(buffer, 256, "Memory leak!\naddress %p file %s, line %d, name %s", address, buddy->file, buddy->line, buddy->allocName); - mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, buffer, __FILE__, __LINE__); -#else - char buffer[256]; - physx::shdfnd::snprintf(buffer, 256, "Memory leak at address %p", address); - mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, buffer, __FILE__, __LINE__); -#endif - } - - //clear it anyway - mAllocatedBuddies.clear(); -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool Heap::init(size_t baseAddr, const size_t baseSize, const size_t minBlockSize, const size_t maxIntFrag) -{ - if (minBlockSize != HeapManagerRef::findNextPow2(mMinBlockLog2, minBlockSize, 0, BITSPERWORD)) - { - return false; - } - - if ((maxIntFrag != size_t(-1)) && (maxIntFrag != HeapManagerRef::findNextPow2(maxIntFrag, 0, BITSPERWORD))) - { - return false; - } - - mMaxIntFrag = maxIntFrag; - - mMinBlockSize = minBlockSize; - mTotalSize = baseSize; - mBaseAddr = baseAddr; - - if (mBaseAddr == 0) - { - return false; - } - - size_t numBlocks = baseSize >> mMinBlockLog2; - // allow only memory blocks which have a power of 2 in size. and numblocks must be at least 1. - if (numBlocks != HeapManagerRef::findNextPow2(mMaxLevel, numBlocks, 0, sizeof(size_t) * 4)) - { - return false; - } - - mFreeBuddiesAtLevel = PX_NEW(BuddyList)[(unsigned int)(mMaxLevel + 1)]; - if (!mFreeBuddiesAtLevel) - { - return false; - } - - // init size of buddy arrays - for (uint32_t i = 0; i <= mMaxLevel; i++) - { - mFreeBuddiesAtLevel[i].buddySize = size_t(1) << i; - mFreeBuddiesAtLevel[i].heap = this; - } - mAllocatedBuddies.heap = this; - - Buddy* b = mManager.getBuddyPool().construct(); - if (!b) - { - CMM_DELETE_ARRAY(mFreeBuddiesAtLevel); - return false; - } - b->level = mMaxLevel; - - // add buddy to its array - mFreeBuddiesAtLevel[mMaxLevel].insert(b); - - return true; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -void* Heap::alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - /* - compute needed buddysize -> level - if(mFreeBuddiesAtLevel[level].array.size() > 0) - { - ret = array.popBack() - allocList.pushBack(ret) - return ret.adr+basePtr; - } - else - { - if(nonemptylist at higher levels) - ret = recursive split - else if(nonemptylist at lower levels ) - ret = recursive merge - else - ret = NULL; // heap full or to fragmented - } - */ - - PX_UNUSED(allocId); - PX_UNUSED(allocName); - PX_UNUSED(line); - PX_UNUSED(file); - - if (size == 0 || size > mTotalSize) - { - return NULL; - } - - PX_ASSERT(allocId < PxAllocId::NUM_IDS); - //PX_ASSERT(allocId != PxAllocId::UNASSIGNED); // enable to track unassigned memory - - // compute needed buddysize -> level - uint8_t level = 0; - HeapManagerRef::findNextPow2(level, size, mMinBlockLog2, BITSPERWORD); - level = uint8_t(level - mMinBlockLog2); - - Buddy* ret = NULL; - if (mFreeBuddiesAtLevel[level].getSize() > 0) - { - ret = mFreeBuddiesAtLevel[level].pop(); - } - else - { - // prefer splitting - if (level != mMaxLevel) - { - ret = findBySplitting(level); - } - // else try merging - if (!ret && level != 0) - { - ret = findByMerging(level); - } - } - - if (ret) - { - ret->occupiedSize = size; - size_t addr = ret->addr; - ret->allocId = uint16_t(allocId); -#if KEEP_DEBUG_INFO - ret->file = file; - ret->line = (uint32_t)line; - ret->allocName = allocName; -#endif - - size_t allocSize; - if (mMaxIntFrag != size_t(-1)) - { - allocSize = reduceIntFragment(*ret, mMaxIntFrag); // ret can be changed in here, that's why we store the address - } - else - { - allocSize = size_t(1) << (level + mMinBlockLog2); - mAllocatedBuddies.insertSorted(ret); - } - mAllocMem += allocSize; - mInternalFragmentation += allocSize - size; - mMaxAllocMem = PxMax(mAllocMem, mMaxAllocMem); - mMaxInternalFragmentation = PxMax(mInternalFragmentation, mMaxInternalFragmentation); - mManager.addToStats(allocId, allocSize, allocSize - size); - - PX_ASSERT(sanityTest()); - return reinterpret_cast<void*>((addr << mMinBlockLog2) + mBaseAddr); - } - else - { - PX_ASSERT(sanityTest()); - return NULL; - } -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool Heap::realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DEF()) -{ - PX_UNUSED(allocId); - PX_UNUSED(allocName); - PX_UNUSED(line); - PX_UNUSED(file); - - if (addr == NULL || size > mTotalSize) - { - return false; - } - - if (size == 0) - { - //realloc to 0 bytes can't keep the pointer as it was. - return false; - //return free(addr); - } - - size_t inernal_addr = reinterpret_cast<size_t>(addr) - mBaseAddr; - inernal_addr >>= mMinBlockLog2; - - // collect all buddies which are associated with this addr - shdfnd::Array<Buddy*, shdfnd::TempAllocator> budyList; - size_t totalAllocated = 0; - size_t buddyAllocated = 0; - Buddy* found = NULL; - do - { - found = mAllocatedBuddies.find(inernal_addr); - if (!found) - { - return false; - } - budyList.pushBack(found); - inernal_addr += size_t(1) << found->level; - totalAllocated += found->occupiedSize; - buddyAllocated += size_t(1) << (found->level + mMinBlockLog2); - } - while (found && !found->isLastBuddy); - - Buddy* cur = budyList.popBack(); - - // increase size - if (totalAllocated < size) - { - size_t leftSpace = (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize; - size_t neededSpace = size - totalAllocated; - if (neededSpace <= leftSpace) - { - cur->occupiedSize += neededSpace; -#if KEEP_DEBUG_INFO - cur->file = file; - cur->line = (uint32_t)line; - cur->allocName = allocName; -#endif - - mInternalFragmentation -= neededSpace; - mManager.decStats(PxAllocId::Enum(cur->allocId), 0, neededSpace); - - // replace - mAllocatedBuddies.remove(cur); - mAllocatedBuddies.insertSorted(cur); - PX_ASSERT(sanityTest()); - return true; - } - else - { - return false; -#ifdef UNREACHABLE - // TODO:try merge free buddies until big enough, - // then add buddy and do internal fragmentation reduction. - - // search for free blocks next to this one. - size_t addr = cur->addr + (size_t(1) << cur->level); - if (!mAllocatedBuddies.find(addr)) - { - return false; - } - - // if not found, return null, let user reallocate - PX_ASSERT(sanityTest()); - return false; -#endif - } - } - // reduce size - else - { - // succededly remove buddies until the requested size is reached. - // if internal fragmentation reduction is turned on, then an allocation can consist of multiple buddies. - mInternalFragmentation -= (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize; - mManager.decStats(PxAllocId::Enum(cur->allocId), 0, (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize); - size_t diff = totalAllocated - size; - while (diff >= cur->occupiedSize) - { - diff -= cur->occupiedSize; - cur->occupiedSize = 0; - bool succ = mAllocatedBuddies.remove(cur); - PX_UNUSED(succ); - PX_ASSERT(succ); - mFreeBuddiesAtLevel[cur->level].insertSorted(cur); - size_t allocSize = size_t(1) << (cur->level + mMinBlockLog2); - mAllocMem -= allocSize; - mManager.decStats(PxAllocId::Enum(cur->allocId), allocSize, 0); - cur = budyList.popBack(); - } - cur->isLastBuddy = true; - cur->occupiedSize -= diff; - -#if KEEP_DEBUG_INFO - cur->file = file; - cur->line =(uint32_t)line; - cur->allocName = allocName; -#endif - - // replace - bool succ = mAllocatedBuddies.remove(cur); - PX_UNUSED(succ); - PX_ASSERT(succ); - mAllocatedBuddies.insertSorted(cur); - mInternalFragmentation += (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize; - mManager.incStats(PxAllocId::Enum(cur->allocId), 0, (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize); - PX_ASSERT(sanityTest()); - return true; - } -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool Heap::free(void* addr) -{ - if (addr == NULL) - { - return false; - } - - size_t internalAddr = reinterpret_cast<size_t>(addr) - mBaseAddr; - internalAddr >>= mMinBlockLog2; - - bool ret = true; - - bool dummy = true; - while (dummy) - { - Buddy* b = mAllocatedBuddies.findAndPop(internalAddr); - - if (!b) - { - return false; - } - - size_t allocSize = size_t(1) << (b->level + mMinBlockLog2); - mAllocMem -= allocSize; - mInternalFragmentation -= allocSize - b->occupiedSize; - mManager.removeFromStats(PxAllocId::Enum(b->allocId), allocSize, allocSize - b->occupiedSize); - b->occupiedSize = 0; - - mFreeBuddiesAtLevel[b->level].insertSorted(b); - - // check if this memory block occupied another buddy - if (b->isLastBuddy) - { - break; - } - else - { - internalAddr += size_t(1) << b->level; - } - } - - if (mAllocMem == 0) - { - mManager.notifyEmptyPage(); - } - - PX_ASSERT(sanityTest()); - return ret; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool Heap::getStats(ApexHeapStats& stats, const uint32_t flags) -{ - if (flags & HeapStatsFlags::F_BASIC_STATS) - { - stats.heapSize = mTotalSize; - stats.totalAllocated = mAllocMem; - stats.maxAllocated = mMaxAllocMem; - } - if (flags & HeapStatsFlags::F_INTERNAL_FRAGMENTATION) - { - // internal fragmentation - stats.internalFragmentation = mInternalFragmentation; - stats.maxInternalFragmentation = mMaxInternalFragmentation; - } - if (flags & HeapStatsFlags::F_BIGGEST_FREE_BLOCK) - { - // bigggest free block - stats.biggestFreeBlock = 0; - uint8_t curLevel = mMaxLevel; - do - { - if (mFreeBuddiesAtLevel[curLevel].getSize()) - { - stats.biggestFreeBlock = mFreeBuddiesAtLevel[curLevel].buddySize << mMinBlockLog2; - break; - } - curLevel--; - } - while (curLevel != 0); - } - if (flags & HeapStatsFlags::F_HISTOGRAM) - { - // histograms - for (uint8_t i = 0; i <= mMaxLevel; i++) - { - stats.freeBuddyHistogram[i] = mFreeBuddiesAtLevel[i].getSize(); - stats.allocatedBuddyHistogram[i] = 0; - } - Buddy* b = mAllocatedBuddies.getHead(); - while (b) - { - stats.allocatedBuddyHistogram[b->level]++; - b = b->next; - } - stats.numEntries = size_t(mMaxLevel + 1); - } - return true; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool Heap::sanityTest() -{ - size_t sum = 0; - - for (uint8_t i = 0; i <= mMaxLevel; i++) - { - size_t buddiesAtLevel = 0; - for (Buddy* cur = mFreeBuddiesAtLevel[i].getHead(); cur; cur = cur->next) - { - if ((size_t(1) << cur->level != mFreeBuddiesAtLevel[i].buddySize) || - (cur->occupiedSize > size_t(1) << (cur->level + mMinBlockLog2))) - { - return false; - } - sum += mFreeBuddiesAtLevel[i].buddySize << mMinBlockLog2; - buddiesAtLevel++; - } - if (mFreeBuddiesAtLevel[i].getSize() != buddiesAtLevel || - (buddiesAtLevel > (size_t(1) << (mMaxLevel - i)))) - { - return false; - } - } - - size_t numAllocated = 0; - for (Buddy* cur = mAllocatedBuddies.getHead(); cur; cur = cur->next) - { - sum += size_t(1) << (cur->level + mMinBlockLog2); - numAllocated++; - } - - if (numAllocated != mAllocatedBuddies.getSize()) - { - return false; - } - - ptrdiff_t diff = ptrdiff_t(sum - (size_t(1) << (mMaxLevel + mMinBlockLog2))); - if (diff != 0) - { - return false; - } - else - { - return true; - } -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool Heap::visualizeMemory(uint8_t* array, const size_t size) -{ - float scale = (float)size / (size_t(1) << mMaxLevel); - - for (size_t i = 0; i < size; i++) - { - array[i] = 0; - } - for (Buddy* cur = mAllocatedBuddies.getHead(); cur; cur = cur->next) - { - size_t start = (size_t)((float)(cur->addr) * scale); - size_t end = (size_t)((float)(cur->addr + (size_t(1) << size_t(cur->level))) * scale); - PX_ASSERT(start <= size); - PX_ASSERT(end <= size); - for (size_t i = start; i < end; i++) - { - PX_ASSERT(i < size); - array[i] = uint8_t(cur->level + 1); - } - } - - return true; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool Heap::splitBuddy(Buddy* b) -{ - if (b->level == 0) - { - return false; - } - - b->level--; - size_t newSize = size_t(1) << b->level; - - Buddy* b0 = b; - Buddy* b1 = mManager.getBuddyPool().construct(*b); - PX_ASSERT(b0 && b1); - - b1->addr = b1->addr + newSize; - - mFreeBuddiesAtLevel[b0->level].insertSorted(b0); - mFreeBuddiesAtLevel[b1->level].insertSorted(b1); - return true; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -bool Heap::mergeBuddies(Buddy* b0, Buddy* b1) -{ - if (b0->level != b1->level || b0->level >= mMaxLevel || (b1->addr - b0->addr) != size_t(1) << size_t(b0->level)) - { - return false; - } - - Buddy* b = b0; - b->occupiedSize = 0; - b->isLastBuddy = true; - b->level++; - b->next = NULL; - mFreeBuddiesAtLevel[b->level].insertSorted(b); - - mManager.getBuddyPool().destroy(b1); - return true; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -Buddy* Heap::findBySplitting(uint8_t searchLevel) -{ - Buddy* ret = NULL; - - uint8_t curLevel = searchLevel; - - // walk through array of buddy lists and search for a free buddy which is at level >= searchLevel - for (; !mFreeBuddiesAtLevel[curLevel].getSize() && (curLevel < mMaxLevel); curLevel++) - { - ; - } - - // pop buddy at highest level and split until it has the correct level - ret = mFreeBuddiesAtLevel[curLevel].pop(); - for (; ret && (curLevel != searchLevel) && curLevel > 0; curLevel--) - { - splitBuddy(ret); - ret = mFreeBuddiesAtLevel[curLevel - 1].pop(); - } - return ret; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -Buddy* Heap::findByMerging(uint8_t searchLevel) -{ - /* - while there is no pair to merge on this level, ask lower level to merge - may ask lower level to merge more than 1 pair after each failure - or just merge all pairs of lower levels - */ - if (searchLevel == 0) - { - return NULL; - } - - uint8_t curLevel = uint8_t(searchLevel - 1); - bool dummy = true; - while (dummy) - { - int32_t shift = (mMaxLevel - (1 << (curLevel + 1))); - shift = shift >= 0 ? shift : 0; - size_t numToFind = size_t(1) << shift; - size_t found = findPairAndMerge(mFreeBuddiesAtLevel[curLevel], numToFind); - if (found) - { - if (curLevel == searchLevel - 1) - { - break; - } - curLevel++; - } - else - { - if (curLevel > 0) - { - curLevel--; - } - else - { - return NULL; - } - } - } - return mFreeBuddiesAtLevel[searchLevel].pop(); -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -size_t Heap::findPairAndMerge(BuddyList& list, size_t numToFind) -{ - size_t found = 0; - Buddy* cur = list.getHead(); - Buddy* last = list.getHead(); - size_t diff = list.buddySize; - while ((found != numToFind) && cur && cur->next) - { - // find buddy pair b0 and b1, b0 must be at an even address, and b0 and b1 must be neighbours in address space. - // since the list is sorted, we do only compare neighbours in the list. - if (((cur->addr & (size_t(1) << size_t(cur->level))) == 0) && (cur->next->addr - cur->addr == diff)) - { - Buddy* b0 = cur; - Buddy* b1 = cur->next; - - if (cur == list.getHead()) - { - list.setHead(cur->next->next); - cur = list.getHead(); - last = cur; - } - else - { - cur = cur->next->next; - last->next = cur; - } - list.setSize(list.getSize() - 2); - if (mergeBuddies(b0, b1)) - { - found++; - } - } - else - { - last = cur; - cur = cur->next; - } - } - return found; -} - - -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -size_t Heap::reduceIntFragment(Buddy& b, size_t threshold) -{ - /* - while internalFragmentation > threshold - cut buddy in the middle - if cut goes through occupied space - left buddy is full, can be pushed to alloc list clear last buddy flag - else - right buddy is free, push it to free list - */ - size_t allocSize = 0; - Buddy* curB = &b; - curB->isLastBuddy = false; - while (curB->level && ((size_t(1) << (curB->level + mMinBlockLog2)) - curB->occupiedSize) > threshold) - { - //split - Buddy* b0 = mManager.getBuddyPool().construct(*curB); - Buddy* b1 = curB; - b0->level--; - b1->level--; - b1->addr += size_t(1) << size_t(b1->level); - if ((size_t(1) << (b0->level + mMinBlockLog2)) < b0->occupiedSize) - { - b0->occupiedSize = size_t(1) << (b0->level + mMinBlockLog2); - b1->occupiedSize -= b0->occupiedSize; - mAllocatedBuddies.insertSorted(b0); - allocSize += size_t(1) << b1->level; - curB = b1; - } - else - { - b1->occupiedSize = 0; - mFreeBuddiesAtLevel[b1->level].insertSorted(b1); - curB = b0; - } - } - curB->isLastBuddy = true; - allocSize += size_t(1) << curB->level; - mAllocatedBuddies.insertSorted(curB); - return (allocSize << mMinBlockLog2); -} - - diff --git a/PxShared/src/cudamanager/src/HeapManagerRef.h b/PxShared/src/cudamanager/src/HeapManagerRef.h deleted file mode 100644 index e6e585e..0000000 --- a/PxShared/src/cudamanager/src/HeapManagerRef.h +++ /dev/null @@ -1,297 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -//----------------------------------------------------------------------------// -// HeapManagerRef.h -//----------------------------------------------------------------------------// - -#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H -#define PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H - -#include "task/PxTaskDefine.h" - -#include "HeapManagerInterface.h" -#include "HeapManagerLinkedList.h" -#include "PsPool.h" -#include "PsMutex.h" -#include "PsArray.h" -#include "PsUserAllocated.h" - -#define DUMP_HEAP_USAGE_TO_FILE 0 - -#ifdef _DEBUG -#define KEEP_DEBUG_INFO 1 -#else -#define KEEP_DEBUG_INFO 0 -#endif - -#if DUMP_HEAP_USAGE_TO_FILE -#include "stdio.h" -#endif - -namespace physx -{ - -class Heap; -struct Buddy -{ - PX_INLINE Buddy() - : addr(0) - , next(0) - , occupiedSize(0) - , allocId(PxAllocId::UNASSIGNED) - , isLastBuddy(true) - , level(0) -#if KEEP_DEBUG_INFO - , file(NULL) - , allocName(NULL) - , line(0) -#endif - {} - - PX_INLINE Buddy(Buddy& b) - : addr(b.addr) - , next(b.next) - , occupiedSize(b.occupiedSize) - , allocId(b.allocId) - , isLastBuddy(b.isLastBuddy) - , level(b.level) -#if KEEP_DEBUG_INFO - , file(b.file) - , allocName(b.allocName) - , line(b.line) -#endif - {} - - size_t addr; - Buddy* next; - size_t occupiedSize; - uint16_t allocId; - uint8_t isLastBuddy; - uint8_t level; -#if KEEP_DEBUG_INFO - const char* file; - const char* allocName; - uint32_t line; -#endif -}; - -struct BuddyList: public LinkedList<Buddy>, public shdfnd::UserAllocated -{ - BuddyList() - : buddySize(0) - , heap(NULL) - {} - void clear(); - - size_t buddySize; // = 2^level - Heap* heap; -}; - -struct HeapManagerPage -{ - PX_INLINE bool operator < (const HeapManagerPage& p) const - { - return baseAddr < p.baseAddr; - } - - PX_INLINE bool operator > (const HeapManagerPage& p) const - { - return baseAddr > p.baseAddr; - } - - size_t baseAddr; - Heap* heap; - bool isPersistent; -}; - - -class HeapManagerRef: public HeapManagerInterface, public shdfnd::UserAllocated -{ - PX_NOCOPY(HeapManagerRef) -public: - HeapManagerRef(physx::PxErrorCallback& errorCallback, bool enableMutex = true); - virtual ~HeapManagerRef(); - - // INTERFACE METHODS - virtual bool init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag); - virtual bool setPageSize(size_t pageSize); - virtual void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - virtual bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - virtual bool free(void* addr); - virtual void freeEmptyPages(); - virtual bool reserve(size_t size); - virtual bool getStats(ApexHeapStats& stats, const uint32_t flags); - virtual bool visualizeMemory(uint8_t* array, const size_t size); - virtual void* findBaseAddress(void* addr); - - // INTERNALS - // searches 2^pow >= val, searches pow in [min, max] - static PX_INLINE size_t findNextPow2(const size_t val, const uint8_t min = 0, const uint8_t max = BITSPERWORD); - static PX_INLINE size_t findNextPow2(uint8_t& pow, const size_t val, const uint8_t min = 0, const uint8_t max = BITSPERWORD); - - PX_INLINE void addToStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation); - PX_INLINE void removeFromStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation); - PX_INLINE void incStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation); - PX_INLINE void decStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation); - - PX_INLINE void notifyEmptyPage() - { - mNewEmptyPage = true; - } - PX_INLINE shdfnd::Pool<Buddy>& getBuddyPool() - { - return mBuddyPool; - } - -private: - Heap* allocateNewHeap(size_t heapSize, bool isPersistent = false); - Heap* allocateNewPages(size_t requestedSize); - void resetHeap(HeapManagerPage& page); - void removeDeletedHeapsFromList(uint32_t numDeletes); - void shrinkMemory(); - - Heap* findHeap(void* addr) const; - -private: - // heaps - shdfnd::Array<HeapManagerPage> mHeaps; - shdfnd::Pool<Buddy> mBuddyPool; - size_t mPageSize; - size_t mMinBlockSize; - size_t mMaxIntFrag; - bool mNewEmptyPage; - // lock - shdfnd::Mutex mMutex; - // page allocator - Allocator* mMemAllocator; - // overall stats - size_t mGlobalAllocMem; - size_t mGlobalMaxAllocMem; - size_t mGlobalInternalFragmentation; - size_t mGlobalMaxInternalFragmentation; - // stats per allocation ID - PxAllocIdStats mGlobalAllocIdStats[PxAllocId::NUM_IDS]; - // error callback - physx::PxErrorCallback& mErrorCallback; - -#if DUMP_HEAP_USAGE_TO_FILE - FILE* mLogFile; - unsigned __int64 m_qpc; - unsigned __int64 m_qpf; -#endif -}; - - -class Heap : public shdfnd::UserAllocated -{ -public: - PX_INLINE Heap(HeapManagerRef& manager, physx::PxErrorCallback& errorCallback) - : mManager(manager) - , mErrorCallback(errorCallback) - , mBaseAddr(0) - , mMinBlockSize(0) - , mFreeBuddiesAtLevel(NULL) - , mMaxIntFrag(0) - , mTotalSize(0) - , mMaxLevel(0) - , mMinBlockLog2(0) - , mAllocMem(0) - , mMaxAllocMem(0) - , mInternalFragmentation(0) - , mMaxInternalFragmentation(0) - {} - - PX_INLINE ~Heap(); - - bool init(size_t baseAddr, const size_t baseSize, const size_t minBlockSize, const size_t maxIntFrag); - void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)); - bool free(void* addr); - bool getStats(ApexHeapStats& stats, const uint32_t flags); - bool visualizeMemory(uint8_t* array, const size_t size); - - PX_INLINE size_t getTotalMemorySize() - { - return mTotalSize; - } - PX_INLINE size_t getAllocatedMemorySize() - { - return mAllocMem; - } - PX_INLINE shdfnd::Pool<Buddy>& getBuddyPool() - { - return mManager.getBuddyPool(); - } - PX_INLINE void* getBaseAddress() - { - return (void*)mBaseAddr; - } - -private: - // split buddy b with size 2^level into two buddies with level 2^(level-1) and append those to the free list. deletes b, assumes that b was removed from the list before. - bool splitBuddy(Buddy* b); - // merge 2 buddies to next bigger one. deletes b0 and b1, assumes that they are already removed from their array. - bool mergeBuddies(Buddy* b0, Buddy* b1); - - // split of right, free children of a buddy if the internal fragmentation of a buddy is bigger than a threshold - // returns the size of all allocated buddies - size_t reduceIntFragment(Buddy& b, size_t threshold); - - // find a Buddy by splitting a Buddy at searchLevel - Buddy* findBySplitting(uint8_t searchLevel); - Buddy* findByMerging(uint8_t searchLevel); - size_t findPairAndMerge(BuddyList& list, size_t numToFind); - - bool sanityTest(); - - void operator=(const Heap&) - { - PX_ASSERT(0); - } - -private: - HeapManagerRef& mManager; - physx::PxErrorCallback& mErrorCallback; - size_t mBaseAddr; - size_t mMinBlockSize; - BuddyList* mFreeBuddiesAtLevel; - BuddyList mAllocatedBuddies; - size_t mMaxIntFrag; - size_t mTotalSize; - uint8_t mMaxLevel; // 2^maxLevel <= memorySize - uint8_t mMinBlockLog2; - - size_t mAllocMem; // fragmented - size_t mMaxAllocMem; - size_t mInternalFragmentation; - size_t mMaxInternalFragmentation; -}; - -} // end physx namespace - -#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H diff --git a/PxShared/src/cudamanager/src/PhysXDevice.h b/PxShared/src/cudamanager/src/PhysXDevice.h deleted file mode 100644 index b066bdc..0000000 --- a/PxShared/src/cudamanager/src/PhysXDevice.h +++ /dev/null @@ -1,119 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#ifndef ___PHYS_X_DEVICE_ -#define ___PHYS_X_DEVICE_ - -#include "foundation/PxPreprocessor.h" - -#if PX_WINDOWS -# pragma warning (push) -# pragma warning (disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives' -# include "windows/PsWindowsInclude.h" -# pragma warning (pop) -#endif - -#if PX_WINDOWS - #define PHYSX_DEV_DLL_API extern "C" __declspec(dllexport) - #ifdef _DEBUG - # define PHYSX_DEV_DLL_PRIVATE_API extern "C" __declspec(dllexport) - #else - # define PHYSX_DEV_DLL_PRIVATE_API - #endif -#else - #define PHYSX_DEV_DLL_API - #define PHYSX_DEV_DLL_PRIVATE_API -#endif - -/** typedefs */ -typedef int PHYSX_DEV_STATUS; -typedef unsigned int PhysXDevHandle; - -/** PHYSX_DEV_STATUS values */ -enum -{ - PHYSX_DEV_OK = 0, - PHYSX_DEV_UNKNOWN_ERROR, - PHYSX_DEV_INVALID_HANDLE, - PHYSX_DEV_UNINITIALIZED, - PHYSX_DEV_NV_API_UNAVAILABLE, - PHYSX_DEV_CUDA_UNAVAILABLE, - PHYSX_DEV_CUDA_MEMORY_ALLOC_FAILURE, - PHYSX_DEV_LEGACY_MODE_GPU_HANDLE, - PHYSX_DEV_PHYSX_DEV_UNAVAILABLE, -}; - - -/** - * physxDevInit - * Initialize the PhysX Device information functions. - * Must be called before using any other API functions. - */ -PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevInit(); - -/** - * physxDevClose - * Call this when finished with the PhysX Device API, it - * frees memory that is allocated in physxDevInit - */ -PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevClose(); - -/** - * physxDevGetCudaOrdinal - * Returns the CUDA device ordinal for the given PhysX GPU device - */ -PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevGetCudaOrdinal(int* cudaDevOrdinal, PhysXDevHandle devHandle); -PHYSX_DEV_STATUS physxDevGetCudaOrdinalWrapper(int* cudaDevOrdinal); - -/** - * physxDevGet - * Returns the PhysX GPU device that the PhysX Engine - * will use. If the device is -1, the engine will - * automatically choose which GPU to use. - * - * This function handles the R177/R180 detection first, then decides accordingly - * - * if(180+) - * if(GPU Enabled) ? get NVAPI sel : -1 - * else (177) - * if regkey ? regkey value : -1 (PHYSX_DEV_LEGACY_MODE_GPU_HANDLE returned) - */ -PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevGet(PhysXDevHandle* devHandle); - -/** - * physxDevUsingDedicatedGPU - * Returns whether or not PhysX has a dedicated GPU (set by the user in the NV CPL) - */ -PHYSX_DEV_DLL_API bool physxDevUsingDedicatedGPU(); - -/** - * physxDevSLIEnabled - * Returns whether or not the device pointer specified (D3D device) is in an SLI group - */ -PHYSX_DEV_DLL_API bool physxDevSLIEnabled(void* graphicsDevice); - -#endif diff --git a/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp b/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp deleted file mode 100644 index 77896c2..0000000 --- a/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// This code contains NVIDIA Confidential Information and is disclosed to you -// under a form of NVIDIA software license agreement provided separately to you. -// -// Notice -// NVIDIA Corporation and its licensors retain all intellectual property and -// proprietary rights in and to this software and related documentation and -// any modifications thereto. Any use, reproduction, disclosure, or -// distribution of this software and related documentation without an express -// license agreement from NVIDIA Corporation is strictly prohibited. -// -// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES -// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO -// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, -// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. -// -// Information and code furnished is believed to be accurate and reliable. -// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such -// information or for any infringement of patents or other rights of third parties that may -// result from its use. No license is granted by implication or otherwise under any patent -// or patent rights of NVIDIA Corporation. Details are subject to change without notice. -// This code supersedes and replaces all information previously supplied. -// NVIDIA Corporation products are not authorized for use as critical -// components in life support devices or systems without express written approval of -// NVIDIA Corporation. -// -// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved. - -#include "task/PxTaskDefine.h" - -#if PX_SUPPORT_GPU_PHYSX - -#include "foundation/PxErrorCallback.h" - -#include "PhysXDeviceSettings.h" -#include "PhysXDevice.h" - -#if PX_VC -#pragma warning(disable: 4191) //'operator/operation' : unsafe conversion from 'type of expression' to 'type required' -#endif - -namespace -{ -#if PX_WIN32 || PX_WIN64 - /** funcs for the dynamic loading of the PhysXDevice.dll file */ - typedef PHYSX_DEV_STATUS __cdecl physxDevInit_t(); - typedef PHYSX_DEV_STATUS __cdecl physxDevClose_t(); - typedef PHYSX_DEV_STATUS __cdecl physxDevGet_t(PhysXDevHandle* devHandle); - typedef PHYSX_DEV_STATUS __cdecl physxDevGetCudaOrdinal_t(int* cudaDevOrdinal, PhysXDevHandle devHandle); - typedef bool __cdecl physxDevUsingDedicatedGPU_t(); - typedef bool __cdecl physxDevSLIEnabled_t(void* graphicsDevice); - - /** globals for cuda functions */ - static physxDevInit_t* physxDevInit_f; - static physxDevClose_t* physxDevClose_f; - static physxDevGetCudaOrdinal_t* physxDevGetCudaOrdinal_f; - static physxDevGet_t* physxDevGet_f; - static physxDevUsingDedicatedGPU_t* physxDevUsingDedicatedGPU_f; - static physxDevSLIEnabled_t* physxDevSLIEnabled_f; - - /** globals */ - static HMODULE gPhysXDevModuleH; - - PHYSX_DEV_STATUS initPhysXDeviceLib() - { - PHYSX_DEV_STATUS status; -#if PX_X86 - gPhysXDevModuleH = LoadLibrary("PhysXDevice.dll"); -#else - gPhysXDevModuleH = LoadLibrary("PhysXDevice64.dll"); -#endif - if (!gPhysXDevModuleH) - { - return PHYSX_DEV_PHYSX_DEV_UNAVAILABLE; - } - - physxDevInit_f = (physxDevInit_t*)GetProcAddress(gPhysXDevModuleH, "physxDevInit"); - physxDevClose_f = (physxDevClose_t*)GetProcAddress(gPhysXDevModuleH, "physxDevClose"); - physxDevGetCudaOrdinal_f = (physxDevGetCudaOrdinal_t*)GetProcAddress(gPhysXDevModuleH, "physxDevGetCudaOrdinal"); - physxDevGet_f = (physxDevGet_t*)GetProcAddress(gPhysXDevModuleH, "physxDevGet"); - physxDevUsingDedicatedGPU_f = (physxDevUsingDedicatedGPU_t*)GetProcAddress(gPhysXDevModuleH, "physxDevUsingDedicatedGPU"); - physxDevSLIEnabled_f = (physxDevSLIEnabled_t*)GetProcAddress(gPhysXDevModuleH, "physxDevSLIEnabled"); - - if (!physxDevInit_f || - !physxDevClose_f || - !physxDevGetCudaOrdinal_f || - !physxDevGet_f) - { - FreeLibrary(gPhysXDevModuleH); - return PHYSX_DEV_CUDA_UNAVAILABLE; - } - - status = physxDevInit_f(); - if (PHYSX_DEV_OK != status) - { - FreeLibrary(gPhysXDevModuleH); - return status; - } - - return PHYSX_DEV_OK; - } -#endif // PX_WIN32 || PX_WIN64 - - PHYSX_DEV_STATUS getCudaOrdinal(int* cudaDevOrdinal) - { -#if PX_WIN32 || PX_WIN64 - PHYSX_DEV_STATUS status; - PhysXDevHandle selectedDev; - - status = initPhysXDeviceLib(); - - if (PHYSX_DEV_OK != status) - { - return status; - } - - status = physxDevGet_f(&selectedDev); - physxDevGetCudaOrdinal_f(cudaDevOrdinal, selectedDev); - - physxDevClose_f(); - FreeLibrary(gPhysXDevModuleH); - - if (status == PHYSX_DEV_LEGACY_MODE_GPU_HANDLE) // R177 installed - { - return PHYSX_DEV_LEGACY_MODE_GPU_HANDLE; - } - else - { - return PHYSX_DEV_OK; - } -#elif PX_LINUX - const char* deviceOrdinalString = ::getenv("PHYSX_GPU_DEVICE"); - if (!deviceOrdinalString) - *cudaDevOrdinal = 0; - else - *cudaDevOrdinal = atoi(deviceOrdinalString); - return PHYSX_DEV_OK; -#endif - } - -} - -namespace physx -{ - - int PhysXDeviceSettings::getSuggestedCudaDeviceOrdinal(physx::PxErrorCallback& errc) - { - int cudaDevOrdinal = -1; - switch (getCudaOrdinal(&cudaDevOrdinal)) - { - case PHYSX_DEV_OK: - break; - - case PHYSX_DEV_UNKNOWN_ERROR: - errc.reportError(PxErrorCode::eDEBUG_WARNING, "unknown error during CUDA device detection\n", __FILE__, __LINE__); - break; - - case PHYSX_DEV_NV_API_UNAVAILABLE: - errc.reportError(PxErrorCode::eDEBUG_WARNING, "NVAPI is not available\n", __FILE__, __LINE__); - break; - - case PHYSX_DEV_CUDA_UNAVAILABLE: - errc.reportError(PxErrorCode::eDEBUG_WARNING, "CUDA is not available\n", __FILE__, __LINE__); - break; - - case PHYSX_DEV_PHYSX_DEV_UNAVAILABLE: -#if PX_X86 - errc.reportError(PxErrorCode::eDEBUG_WARNING, "PhysXDevice.dll is not available\n", __FILE__, __LINE__); -#else - errc.reportError(PxErrorCode::eDEBUG_WARNING, "PhysXDevice64.dll is not available\n", __FILE__, __LINE__); -#endif - break; - - default: - errc.reportError(PxErrorCode::eDEBUG_WARNING, "unknown error during CUDA device detection\n", __FILE__, __LINE__); - break; - } - - return cudaDevOrdinal; - } - - int PhysXDeviceSettings::isUsingDedicatedGPU() - { -#if PX_WIN32 || PX_WIN64 - PHYSX_DEV_STATUS status; - bool dedicated = false; - - status = initPhysXDeviceLib(); - - if (PHYSX_DEV_OK != status) - { - return 0; - } - - if (physxDevUsingDedicatedGPU_f) - { - dedicated = physxDevUsingDedicatedGPU_f(); - physxDevClose_f(); - FreeLibrary(gPhysXDevModuleH); - return(dedicated); - } - else - { - physxDevClose_f(); - FreeLibrary(gPhysXDevModuleH); - return(-1); - } -#elif PX_LINUX - // need some way to set this - return 0; -#endif - } - - bool PhysXDeviceSettings::isSLIEnabled(void* graphicsDevice) - { -#if PX_WIN32 || PX_WIN64 - PHYSX_DEV_STATUS status; - status = initPhysXDeviceLib(); - - if (PHYSX_DEV_OK != status) - { - return false; - } - - if (physxDevSLIEnabled_f) - { - bool enabled = physxDevSLIEnabled_f(graphicsDevice); - physxDevClose_f(); - FreeLibrary(gPhysXDevModuleH); - return enabled; - } - else - { - physxDevClose_f(); - FreeLibrary(gPhysXDevModuleH); - return false; - } -#elif PX_LINUX - // Unimplemented for Linux because we don't need it, not because it's really always false. - PX_UNUSED(graphicsDevice); - return false; -#endif - } - -} // end physx namespace - -#endif // PX_SUPPORT_GPU_PHYSX - - |