aboutsummaryrefslogtreecommitdiff
path: root/PxShared/src/cudamanager
diff options
context:
space:
mode:
Diffstat (limited to 'PxShared/src/cudamanager')
-rw-r--r--PxShared/src/cudamanager/include/CudaContextManager.h51
-rw-r--r--PxShared/src/cudamanager/include/CudaKernelWrangler.h331
-rw-r--r--PxShared/src/cudamanager/include/GpuDispatcher.h332
-rw-r--r--PxShared/src/cudamanager/include/PhysXDeviceSettings.h56
-rw-r--r--PxShared/src/cudamanager/src/BlockingWait.cpp119
-rw-r--r--PxShared/src/cudamanager/src/CUDA/UtilKernels.cu164
-rw-r--r--PxShared/src/cudamanager/src/CudaContextManager.cpp823
-rw-r--r--PxShared/src/cudamanager/src/CudaKernelWrangler.cpp242
-rw-r--r--PxShared/src/cudamanager/src/CudaMemoryManager.cpp649
-rw-r--r--PxShared/src/cudamanager/src/CudaMemoryManager.h297
-rw-r--r--PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h128
-rw-r--r--PxShared/src/cudamanager/src/GpuDispatcher.cpp928
-rw-r--r--PxShared/src/cudamanager/src/HeapManagerInterface.h156
-rw-r--r--PxShared/src/cudamanager/src/HeapManagerLinkedList.h204
-rw-r--r--PxShared/src/cudamanager/src/HeapManagerRef.cpp1380
-rw-r--r--PxShared/src/cudamanager/src/HeapManagerRef.h297
-rw-r--r--PxShared/src/cudamanager/src/PhysXDevice.h119
-rw-r--r--PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp248
18 files changed, 0 insertions, 6524 deletions
diff --git a/PxShared/src/cudamanager/include/CudaContextManager.h b/PxShared/src/cudamanager/include/CudaContextManager.h
deleted file mode 100644
index 3d68f82..0000000
--- a/PxShared/src/cudamanager/include/CudaContextManager.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
-#define PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-namespace physx
-{
-
-class PxCudaContextManager;
-class PxCudaContextManagerDesc;
-class PxErrorCallback;
-
-/**
-Creates cuda context manager for PhysX and APEX.
-*/
-PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback);
-
-}
-
-#endif
-
-#endif // PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
diff --git a/PxShared/src/cudamanager/include/CudaKernelWrangler.h b/PxShared/src/cudamanager/include/CudaKernelWrangler.h
deleted file mode 100644
index 36a2cc8..0000000
--- a/PxShared/src/cudamanager/include/CudaKernelWrangler.h
+++ /dev/null
@@ -1,331 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef __CUDA_KERNEL_WRANGLER__
-#define __CUDA_KERNEL_WRANGLER__
-
-// Make this header is safe for inclusion in headers that are shared with device code.
-#if !defined(__CUDACC__)
-
-#include "task/PxTaskDefine.h"
-#include "task/PxGpuDispatcher.h"
-
-#include "PsUserAllocated.h"
-#include "PsArray.h"
-
-#include <cuda.h>
-
-namespace physx
-{
-
-class KernelWrangler : public shdfnd::UserAllocated
-{
- PX_NOCOPY(KernelWrangler)
-public:
- KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs);
- ~KernelWrangler();
-
- CUfunction getCuFunction(uint16_t funcIndex) const
- {
- return mCuFunctions[ funcIndex ];
- }
-
- CUmodule getCuModule(uint16_t funcIndex) const
- {
- uint16_t modIndex = mCuFuncModIndex[ funcIndex ];
- return mCuModules[ modIndex ];
- }
-
- static void const* const* getImages();
- static int getNumImages();
-
- bool hadError() const { return mError; }
-
-protected:
- bool mError;
- shdfnd::Array<CUfunction> mCuFunctions;
- shdfnd::Array<uint16_t> mCuFuncModIndex;
- shdfnd::Array<CUmodule> mCuModules;
- PxGpuDispatcher& mGpuDispatcher;
- PxErrorCallback& mErrorCallback;
-};
-
-/* SJB - These were "borrowed" from an Ignacio Llamas email to devtech-compute.
- * If we feel this is too clumsy, we can steal the boost based bits from APEX
- */
-
-class ExplicitCudaFlush
-{
-public:
- ExplicitCudaFlush(int cudaFlushCount) : mCudaFlushCount(cudaFlushCount), mDefaultCudaFlushCount(mCudaFlushCount) {}
- ~ExplicitCudaFlush() {}
-
- void setCudaFlushCount(int value) { mCudaFlushCount = mDefaultCudaFlushCount = value; }
- unsigned int getCudaFlushCount() const { return (unsigned int)mCudaFlushCount; }
- void resetCudaFlushCount() { mCudaFlushCount = mDefaultCudaFlushCount; }
-
- void decrementFlushCount()
- {
- if (mCudaFlushCount == 0) return;
-
- if (--mCudaFlushCount == 0)
- {
- CUresult ret = cuStreamQuery(0); // flushes current push buffer
- PX_UNUSED(ret);
- PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY);
-
- // For current implementation, disable resetting of cuda flush count
- // reset cuda flush count
- // mCudaFlushCount = mDefaultCudaFlushCount;
- }
- }
-
-private:
- int mCudaFlushCount;
- int mDefaultCudaFlushCount;
-};
-
-}
-
-template <typename T0>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0)
-{
- void* kernelParams[] =
- {
- &v0,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1)
-{
- void* kernelParams[] =
- {
- &v0, &v1,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16, T17 v17)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, &v17,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-#endif
-
-#endif
diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h
deleted file mode 100644
index 10c412f..0000000
--- a/PxShared/src/cudamanager/include/GpuDispatcher.h
+++ /dev/null
@@ -1,332 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXTASK_GPUDISPATCHER_H
-#define PXTASK_GPUDISPATCHER_H
-
-#include "task/PxTask.h"
-#include "task/PxTaskDefine.h"
-#include "task/PxGpuTask.h"
-#include "task/PxTaskManager.h"
-#include "task/PxGpuDispatcher.h"
-#include "foundation/PxProfiler.h"
-
-#include "PsUserAllocated.h"
-#include "PsThread.h"
-#include "PsAtomic.h"
-#include "PsMutex.h"
-#include "PsSync.h"
-#include "PsArray.h"
-
-#include <cuda.h>
-
-namespace physx {
-
-typedef uint16_t EventID;
-
-void releaseGpuDispatcher(PxGpuDispatcher&);
-
-class KernelWrangler;
-class BlockingWaitThread;
-class FanoutTask;
-class LaunchTask;
-class BlockTask;
-class PxGpuWorkerThread;
-
-class GpuDispatcherImpl : public PxGpuDispatcher, public shdfnd::UserAllocated
-{
-public:
- GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx);
- virtual ~GpuDispatcherImpl();
-
- void start();
- void startSimulation();
- void startGroup();
- void submitTask(PxTask& task);
- void finishGroup();
- void addCompletionPrereq(PxBaseTask& task);
- bool failureDetected() const;
- void forceFailureMode();
- void stopSimulation();
- void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream);
-
- PxBaseTask& getPreLaunchTask();
- void addPreLaunchDependent(PxBaseTask& dependent);
-
- PxBaseTask& getPostLaunchTask();
- void addPostLaunchDependent(PxBaseTask& dependent);
-
- PxCudaContextManager* getCudaContextManager();
-
- PxGpuWorkerThread* mDispatcher;
- BlockingWaitThread* mBlockingThread;
- LaunchTask* mLaunchTask; // predecessor of tasks launching kernels
- BlockTask* mBlockTask; // continuation of tasks launching kernels
- FanoutTask* mSyncTask; // predecessor of tasks waiting for cuda context synchronize
-};
-
-class JobQueue
-{
- PX_NOCOPY(JobQueue)
-public:
- JobQueue() : taskarray(PX_DEBUG_EXP("PxTask*")) {}
- void push(PxTask* t)
- {
- access.lock();
- taskarray.pushBack(t);
- access.unlock();
- }
- PxTask* popBack()
- {
- access.lock();
- PxTask* t = NULL;
- if (taskarray.size())
- {
- t = taskarray.popBack();
- }
- access.unlock();
- return t;
- }
- uint32_t size()
- {
- return taskarray.size();
- }
- bool empty()
- {
- return taskarray.size() == 0;
- }
-
-private:
- shdfnd::Array<PxTask*> taskarray;
- shdfnd::Mutex access;
-};
-
-class EventPool
-{
- PX_NOCOPY(EventPool)
-public:
- EventPool(uint32_t inflags) : flags(inflags), evarray(PX_DEBUG_EXP("CUevent")) {}
- void add(CUevent ev)
- {
- access.lock();
- evarray.pushBack(ev);
- access.unlock();
- }
- CUevent get()
- {
- access.lock();
- CUevent ev;
- if (evarray.size())
- {
- ev = evarray.popBack();
- }
- else
- {
- cuEventCreate(&ev, flags);
- }
- access.unlock();
- return ev;
- }
- bool empty() const
- {
- return evarray.size() == 0;
- }
- void clear()
- {
- access.lock();
- for (uint32_t i = 0; i < evarray.size(); i++)
- {
- cuEventDestroy(evarray[i]);
- }
- access.unlock();
- }
-
-private:
- uint32_t flags;
- shdfnd::Array<CUevent> evarray;
- shdfnd::Mutex access;
-};
-
-class StreamCache
-{
-public:
- StreamCache() : sarray(PX_DEBUG_EXP("CUstream")), freeIndices(PX_DEBUG_EXP("freeIndices"))
- {
- }
- CUstream get(uint32_t s)
- {
- PX_ASSERT(s);
- return sarray[ s - 1 ];
- }
- void push(uint32_t s)
- {
- freeIndices.pushBack(s);
- }
- uint32_t popBack()
- {
- if (freeIndices.size())
- {
- return freeIndices.popBack();
- }
- else
- {
- CUstream s;
- cuStreamCreate(&s, 0);
- sarray.pushBack(s);
- return sarray.size();
- }
- }
- void reset()
- {
- freeIndices.resize(sarray.size());
- for (uint32_t i = 0 ; i < sarray.size() ; i++)
- {
- freeIndices[i] = i + 1;
- }
- }
- bool empty()
- {
- return freeIndices.size() == 0;
- }
-
-private:
- shdfnd::Array<CUstream> sarray;
- shdfnd::Array<uint32_t> freeIndices;
-};
-
-class KernelBar
-{
-public:
- KernelBar()
- {
- reset();
- }
- void reset()
- {
- start = 0xffffffff;
- stop = 0;
- }
-
- uint32_t start;
- uint32_t stop;
-};
-
-const int SIZE_COMPLETION_RING = 1024;
-
-struct CudaBatch
-{
- CUevent blockingEvent;
- CUstream blockingStream; // sync on stream instead of event if lsb is zero (faster)
- PxBaseTask* continuationTask;
-};
-
-struct ReadyTask
-{
- PxGpuTask* task;
- uint32_t iteration;
-};
-
-class PxGpuWorkerThread : public shdfnd::Thread
-{
- PX_NOCOPY(PxGpuWorkerThread)
-public:
- PxGpuWorkerThread();
- ~PxGpuWorkerThread();
-
- void setCudaContext(PxCudaContextManager& ctx);
-
- /* API to TaskManager */
- void startSimulation();
- void stopSimulation();
-
- /* API to GPU tasks */
- void addCompletionPrereq(PxBaseTask& task);
-
- /* PxGpuTask execution thread */
- void execute();
- void pollSubmitted(shdfnd::Array<ReadyTask> *ready);
- void processActiveTasks();
- void flushBatch(CUevent endEvent, CUstream, PxBaseTask* task);
- void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream);
-
- /* Blocking wait thread */
- void blockingWaitFunc();
-
- StreamCache mCachedStreams;
- shdfnd::Array<PxBaseTask*> mCompletionTasks;
- JobQueue mSubmittedTaskList;
- volatile int mActiveGroups;
- shdfnd::Sync mInputReady;
- shdfnd::Sync mRecordEventQueued;
- PxCudaContextManager* mCtxMgr;
- bool mNewTasksSubmitted;
- bool mFailureDetected;
-
- bool mUsingConcurrentStreams;
-
- CudaBatch mCompletionRing[ SIZE_COMPLETION_RING ];
- volatile int mCompletionRingPush;
- volatile int mCompletionRingPop;
-
- EventPool mCachedBlockingEvents;
- EventPool mCachedNonBlockingEvents;
-
- volatile int mCountActiveScenes;
-
- uint32_t* mSmStartTimes;
- uint32_t mSmClockFreq;
-
- shdfnd::Array<ReadyTask> mReady[ PxGpuTaskHint::NUM_GPU_TASK_HINTS ];
-
- KernelWrangler* mUtilKernelWrapper;
-
- CUevent mStartEvent;
-
- shdfnd::Mutex mMutex;
-};
-
-class BlockingWaitThread : public shdfnd::Thread
-{
-public:
- BlockingWaitThread(PxGpuWorkerThread& worker) : mWorker(worker) {}
- ~BlockingWaitThread() {}
-
- void execute();
-
-protected:
- PxGpuWorkerThread& mWorker;
-
-private:
- BlockingWaitThread& operator=(const BlockingWaitThread&);
-};
-
-#define GD_CHECK_CALL(call) { CUresult ret = call; \
- if( CUDA_SUCCESS != ret ) { mFailureDetected=true; PX_ASSERT(!ret); } }
-
-}
-
-#endif // PXTASK_GPUDISPATCHER_H
diff --git a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h
deleted file mode 100644
index 5358915..0000000
--- a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
-#define PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-namespace physx
-{
- class PxErrorCallback;
-
- /**
- Helper functions to expose control panel functionality
- */
- class PhysXDeviceSettings
- {
- private:
- PhysXDeviceSettings() {}
-
- public:
- static int getSuggestedCudaDeviceOrdinal(PxErrorCallback& errc);
- static int isUsingDedicatedGPU();
- static bool isSLIEnabled(void* graphicsDevice);
- };
-}
-
-#endif
-
-#endif // PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
diff --git a/PxShared/src/cudamanager/src/BlockingWait.cpp b/PxShared/src/cudamanager/src/BlockingWait.cpp
deleted file mode 100644
index 8a2cc44..0000000
--- a/PxShared/src/cudamanager/src/BlockingWait.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-#include "task/PxTask.h"
-#include "task/PxGpuTask.h"
-#include "cudamanager/PxCudaContextManager.h"
-
-#include "PsString.h"
-
-#if PX_SUPPORT_PXTASK_PROFILING
-#include "foundation/PxFoundation.h"
-#include "foundation/PxProfiler.h"
-#endif
-
-#include "GpuDispatcher.h"
-
-using namespace physx;
-
-/* Blocking thread / GPU Profiling Event Code */
-
-void PxGpuWorkerThread::blockingWaitFunc()
-{
- mCtxMgr->acquireContext();
-
- while (mCompletionRingPop != mCompletionRingPush)
- {
- CudaBatch& b = mCompletionRing[ mCompletionRingPop ];
- PxBaseTask* t = b.continuationTask;
-
- if (!b.blockingEvent)
- {
- PX_ASSERT(b.continuationTask != 0);
-
- /* No blocking necessary, just allow continuation task to run */
- }
- else if (!mFailureDetected)
- {
-#if PX_SUPPORT_PXTASK_PROFILING
- PX_PROFILE_ZONE("GpuDispatcher.BlockingWaitEvent", 0);
-#endif
- if (1 & ~intptr_t(b.blockingStream))
- {
- GD_CHECK_CALL(cuStreamSynchronize(b.blockingStream));
- }
- else
- {
- GD_CHECK_CALL(cuEventSynchronize(b.blockingEvent));
- }
- }
-
- if (b.blockingEvent)
- {
- mCachedBlockingEvents.add(b.blockingEvent);
- }
- if (t)
- {
- t->removeReference();
- }
- mCompletionRingPop = (mCompletionRingPop + 1) % SIZE_COMPLETION_RING;
- }
-
- mCtxMgr->releaseContext();
-}
-
-
-/* Blocking wait thread
-
- All this thread does is block waiting for CUDA Record Events to
- be signaled.
- */
-
-void BlockingWaitThread::execute()
-{
- setName("GpuDispatcher.BlockingWait");
- bool running = true;
- while (running)
- {
- mWorker.mRecordEventQueued.wait();
- if (quitIsSignalled())
- {
- running = false;
- }
-
- mWorker.mRecordEventQueued.reset();
- mWorker.blockingWaitFunc();
- }
- quit();
-}
-
-#endif
-
diff --git a/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu b/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu
deleted file mode 100644
index 3c73364..0000000
--- a/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu
+++ /dev/null
@@ -1,164 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "cudamanager/PxGpuCopyDesc.h"
-#include "foundation/PxSimpleTypes.h"
-
-#include <cuda.h>
-
-using namespace physx;
-
-extern "C" __host__ void initUtilKernels() {}
-
-extern "C" __global__
-void Saturate( )
-{
- // NOP
-}
-
-__device__
-void performCopy( const physx::PxGpuCopyDesc& desc, uint32_t totalBlocks )
-{
- if( desc.type == physx::PxGpuCopyDesc::DeviceMemset32 )
- {
- uint32_t *dest = (uint32_t*) desc.dest;
- uint32_t wordCount = desc.bytes >> 2;
- size_t word = blockIdx.x * blockDim.x + threadIdx.x;
- size_t stride = blockDim.x * totalBlocks;
- for( ; word < wordCount ; word += stride )
- dest[ word ] = desc.source;
- return;
- }
-
- /* The idea here is to maximize throughput with minimal register and thread counts */
- /* Manually unrolled 4 times, the compiler refuses to do it for me */
-
- if( (desc.source & 0x7) != 0 || (desc.dest & 0x7) != 0 || (desc.bytes & 0x7) != 0)
- {
- /* Input is word aligned */
-
- uint32_t *dest = (uint32_t*) desc.dest;
- uint32_t *source = (uint32_t*) desc.source;
- uint32_t wordCount = desc.bytes >> 2;
- size_t word = blockIdx.x * blockDim.x + threadIdx.x;
- size_t stride = blockDim.x * totalBlocks;
- while( word < wordCount )
- {
- uint32_t a0, a1, a2, a3, a4, a5;
- a0 = source[ word ];
- if( word + stride < wordCount )
- a1 = source[ word + stride ];
- if( word + stride*2 < wordCount )
- a2 = source[ word + stride*2 ];
- if( word + stride*3 < wordCount )
- a3 = source[ word + stride*3 ];
- if( word + stride*4 < wordCount )
- a4 = source[ word + stride*4 ];
- if( word + stride*5 < wordCount )
- a5 = source[ word + stride*5 ];
-
- dest[ word ] = a0;
- if( word + stride < wordCount )
- dest[ word + stride ] = a1;
- if( word + stride*2 < wordCount )
- dest[ word + stride*2 ] = a2;
- if( word + stride*3 < wordCount )
- dest[ word + stride*3 ] = a3;
- if( word + stride*4 < wordCount )
- dest[ word + stride*4 ] = a4;
- if( word + stride*5 < wordCount )
- dest[ word + stride*5 ] = a5;
-
- word += stride*6;
- }
- }
- else
- {
- /* Input is DWord aligned */
-
- uint2 *dest = (uint2*) desc.dest;
- uint2 *source = (uint2*) desc.source;
- uint32_t dwordCount = desc.bytes >> 3;
- size_t word = blockIdx.x * blockDim.x + threadIdx.x;
- size_t stride = blockDim.x * totalBlocks;
- while( word < dwordCount )
- {
- uint2 a0, a1, a2, a3, a4, a5;
- a0 = source[ word ];
- if( word + stride < dwordCount )
- a1 = source[ word + stride ];
- if( word + stride*2 < dwordCount )
- a2 = source[ word + stride*2 ];
- if( word + stride*3 < dwordCount )
- a3 = source[ word + stride*3 ];
- if( word + stride*4 < dwordCount )
- a4 = source[ word + stride*4 ];
- if( word + stride*5 < dwordCount )
- a5 = source[ word + stride*5 ];
-
- dest[ word ] = a0;
- if( word + stride < dwordCount )
- dest[ word + stride ] = a1;
- if( word + stride*2 < dwordCount )
- dest[ word + stride*2 ] = a2;
- if( word + stride*3 < dwordCount )
- dest[ word + stride*3 ] = a3;
- if( word + stride*4 < dwordCount )
- dest[ word + stride*4 ] = a4;
- if( word + stride*5 < dwordCount )
- dest[ word + stride*5 ] = a5;
-
- word += stride*6;
- }
- }
-
- __threadfence_system();
-}
-
-extern "C" __global__
-void MemCopyAsync( physx::PxGpuCopyDesc desc )
-{
- performCopy( desc, gridDim.x );
-}
-
-
-extern "C" __global__
-void MemCopyBatchedAsync( physx::PxGpuCopyDesc *desc )
-{
- __shared__ physx::PxGpuCopyDesc sdesc;
-
- if( threadIdx.x < sizeof(physx::PxGpuCopyDesc) / sizeof(uint32_t) )
- {
- uint32_t *dest = (uint32_t*)&sdesc;
- uint32_t *source = (uint32_t*)(desc + blockIdx.y);
- dest[ threadIdx.x ] = source[ threadIdx.x ];
- __threadfence_block();
- }
- __syncthreads();
-
- performCopy( sdesc, gridDim.x );
-}
diff --git a/PxShared/src/cudamanager/src/CudaContextManager.cpp b/PxShared/src/cudamanager/src/CudaContextManager.cpp
deleted file mode 100644
index e05911e..0000000
--- a/PxShared/src/cudamanager/src/CudaContextManager.cpp
+++ /dev/null
@@ -1,823 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "foundation/PxAssert.h"
-#include "foundation/PxErrorCallback.h"
-#include "foundation/PxMath.h"
-#include "foundation/PxPreprocessor.h"
-
-#include "cudamanager/PxCudaContextManager.h"
-#include "task/PxGpuDispatcher.h"
-
-#include "CudaMemoryManager.h"
-#include "GpuDispatcher.h"
-#include "PhysXDeviceSettings.h"
-
-#include "PsMutex.h"
-#include "PsThread.h"
-#include "PsUserAllocated.h"
-#include "PsString.h"
-
-#include <cuda.h>
-
-#if PX_WIN32 || PX_WIN64
-
-#ifdef PX_SECURE_LOAD_LIBRARY
-#include "nvSecureLoadLibrary.h"
-#endif
-
-#pragma warning (push)
-#pragma warning (disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives'
-#include <windows.h>
-#pragma warning (pop)
-
-class IDirect3DDevice9;
-class IDirect3DResource9;
-class IDirect3DVertexBuffer9;
-#include <cudad3d9.h>
-
-class IDXGIAdapter;
-class ID3D10Device;
-class ID3D10Resource;
-#include <cudad3d10.h>
-
-struct ID3D11Device;
-struct ID3D11Resource;
-#include <cudad3d11.h>
-
-#endif // PX_WINDOWS_FAMILY
-
-#if PX_LINUX
-#include <dlfcn.h>
-static void* GetProcAddress(void* handle, const char* name) { return dlsym(handle, name); }
-#endif
-
-#include <GL/gl.h>
-#include <cudaGL.h>
-#include <assert.h>
-
-#include "foundation/PxErrors.h"
-#include "foundation/PxErrorCallback.h"
-
-#define CU_INIT_UUID
-#include "CudaNode3DLowLatencyInterface.h"
-
-#define ENABLE_DEVICE_INFO_BRINGUP 0
-
-namespace physx
-{
-
-#if PX_VC
-#pragma warning(disable: 4191) //'operator/operation' : unsafe conversion from 'type of expression' to 'type required'
-#endif
-
-#define MIN_SM_MAJOR_VERSION 2
-#define MIN_SM_MINOR_VERSION 0
-
-class CudaCtxMgr : public PxCudaContextManager, public shdfnd::UserAllocated
-{
-public:
- CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback);
- ~CudaCtxMgr();
-
- bool safeDelayImport(PxErrorCallback& errorCallback);
- void acquireContext();
- void releaseContext();
-
- /* All these methods can be called without acquiring the context */
-
- PxCudaMemoryManager* getMemoryManager();
- PxGpuDispatcher* getGpuDispatcher();
-
- bool contextIsValid() const;
- bool supportsArchSM10() const; // G80
- bool supportsArchSM11() const; // G92
- bool supportsArchSM12() const;
- bool supportsArchSM13() const; // GT200
- bool supportsArchSM20() const; // GF100
- bool supportsArchSM30() const; // GK100
- bool supportsArchSM35() const; // GK110
- bool supportsArchSM50() const; // GM100
- bool supportsArchSM52() const; // GM200
- bool supportsArchSM60() const; // GP100
- bool isIntegrated() const; // true if GPU is integrated (MCP) part
- bool canMapHostMemory() const; // true if GPU map host memory to GPU
- int getDriverVersion() const;
- size_t getDeviceTotalMemBytes() const;
- int getMultiprocessorCount() const;
- int getSharedMemPerBlock() const;
- int getSharedMemPerMultiprocessor() const;
- unsigned int getMaxThreadsPerBlock() const;
- unsigned int getClockRate() const;
-
- const char* getDeviceName() const;
- CUdevice getDevice() const;
- const CUdevprop* getDeviceProperties() const;
-
- PxCudaInteropMode::Enum getInteropMode() const;
-
- void setUsingConcurrentStreams(bool);
- bool getUsingConcurrentStreams() const;
-
- bool registerResourceInCudaD3D(CUgraphicsResource& resource, void* resourcePointer, PxCudaInteropRegisterFlags flags);
- bool registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags);
- bool unregisterResourceInCuda(CUgraphicsResource resource);
-
- /*
- \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel
- \returns 1 if there is a dedicated PhysX GPU
- \returns 0 if there is NOT a dedicated PhysX GPU
- \returns -1 if the routine is not implemented
- */
- int usingDedicatedGPU() const;
-
- void release();
-
- CUcontext getContext() { return mCtx; }
-
-private:
-
- int mSceneCount;
- bool mIsValid;
- bool mOwnContext;
- CUdevice mDevHandle;
- CUcontext mCtx;
- CudaMemMgr* mMemMgr;
-
- GpuDispatcherImpl* mDispatcher;
- CUetblPhysXInterface* m_physXInterface;
-
- /* Cached device attributes, so threads can query w/o context */
- int mComputeCapMajor;
- int mComputeCapMinor;
- int mIsIntegrated;
- int mCanMapHost;
- int mDriverVersion;
- size_t mTotalMemBytes;
- int mMultiprocessorCount;
- int mMaxThreadsPerBlock;
- char mDeviceName[128];
- int mSharedMemPerBlock;
- int mSharedMemPerMultiprocessor;
- int mClockRate;
- PxCudaInteropMode::Enum mInteropMode;
- bool mUsingConcurrentStreams;
-
-#if PX_DEBUG
- static uint32_t mManagerRefCount;
- static uint32_t mContextRefCountTls;
-#endif
-};
-
-#if PX_DEBUG
-uint32_t CudaCtxMgr::mManagerRefCount = 0;
-uint32_t CudaCtxMgr::mContextRefCountTls = 0;
-#endif
-
-bool CudaCtxMgr::contextIsValid() const
-{
- return mIsValid;
-}
-bool CudaCtxMgr::supportsArchSM10() const
-{
- return mIsValid;
-}
-bool CudaCtxMgr::supportsArchSM11() const
-{
- return mIsValid && (mComputeCapMinor >= 1 || mComputeCapMajor > 1);
-}
-bool CudaCtxMgr::supportsArchSM12() const
-{
- return mIsValid && (mComputeCapMinor >= 2 || mComputeCapMajor > 1);
-}
-bool CudaCtxMgr::supportsArchSM13() const
-{
- return mIsValid && (mComputeCapMinor >= 3 || mComputeCapMajor > 1);
-}
-bool CudaCtxMgr::supportsArchSM20() const
-{
- return mIsValid && mComputeCapMajor >= 2;
-}
-bool CudaCtxMgr::supportsArchSM30() const
-{
- return mIsValid && mComputeCapMajor >= 3;
-}
-bool CudaCtxMgr::supportsArchSM35() const
-{
- return mIsValid && ((mComputeCapMajor > 3) || (mComputeCapMajor == 3 && mComputeCapMinor >= 5));
-}
-bool CudaCtxMgr::supportsArchSM50() const
-{
- return mIsValid && mComputeCapMajor >= 5;
-}
-bool CudaCtxMgr::supportsArchSM52() const
-{
- return mIsValid && ((mComputeCapMajor > 5) || (mComputeCapMajor == 5 && mComputeCapMinor >= 2));
-}
-bool CudaCtxMgr::supportsArchSM60() const
-{
- return mIsValid && mComputeCapMajor >= 6;
-}
-
-bool CudaCtxMgr::isIntegrated() const
-{
- return mIsValid && mIsIntegrated;
-}
-bool CudaCtxMgr::canMapHostMemory() const
-{
- return mIsValid && mCanMapHost;
-}
-int CudaCtxMgr::getDriverVersion() const
-{
- return mDriverVersion;
-}
-size_t CudaCtxMgr::getDeviceTotalMemBytes() const
-{
- return mTotalMemBytes;
-}
-int CudaCtxMgr::getMultiprocessorCount() const
-{
- return mMultiprocessorCount;
-}
-int CudaCtxMgr::getSharedMemPerBlock() const
-{
- return mSharedMemPerBlock;
-}
-int CudaCtxMgr::getSharedMemPerMultiprocessor() const
-{
- return mSharedMemPerMultiprocessor;
-}
-unsigned int CudaCtxMgr::getMaxThreadsPerBlock() const
-{
- return (unsigned int)mMaxThreadsPerBlock;
-}
-unsigned int CudaCtxMgr::getClockRate() const
-{
- return (unsigned int)mClockRate;
-}
-
-const char* CudaCtxMgr::getDeviceName() const
-{
- if (mIsValid)
- {
- return mDeviceName;
- }
- else
- {
- return "Invalid";
- }
-}
-
-CUdevice CudaCtxMgr::getDevice() const
-{
- if (mIsValid)
- {
- return mDevHandle;
- }
- else
- {
- return -1;
- }
-}
-
-PxCudaInteropMode::Enum CudaCtxMgr::getInteropMode() const
-{
- return mInteropMode;
-}
-
-void CudaCtxMgr::setUsingConcurrentStreams(bool value)
-{
- mUsingConcurrentStreams = value;
-}
-
-bool CudaCtxMgr::getUsingConcurrentStreams() const
-{
- return mUsingConcurrentStreams;
-}
-
-PxCudaMemoryManager* CudaCtxMgr::getMemoryManager()
-{
- if (mIsValid)
- {
- return mMemMgr;
- }
- else
- {
- return NULL;
- }
-}
-
-PxGpuDispatcher* CudaCtxMgr::getGpuDispatcher()
-{
- if (mIsValid)
- {
- return mDispatcher;
- }
- else
- {
- return NULL;
- }
-}
-
-int CudaCtxMgr::usingDedicatedGPU() const
-{
- if (PxCudaInteropMode::NO_INTEROP == getInteropMode())
- {
- return PhysXDeviceSettings::isUsingDedicatedGPU();
- }
- else
- {
- return 0; // not a dedicated GPU
- }
-}
-
-#define CUT_SAFE_CALL(call) { CUresult ret = call; \
- if( CUDA_SUCCESS != ret ) { PX_ASSERT(0); } }
-
-/* If a context is not provided, an ordinal must be given */
-CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback)
- : mSceneCount(0)
- , mOwnContext(false)
- , mMemMgr(0)
- , mDispatcher(0)
- , m_physXInterface(0)
- , mInteropMode(desc.interopMode)
- , mUsingConcurrentStreams(true)
-{
- CUresult status;
- mIsValid = false;
- mDeviceName[0] = 0;
-
- if (safeDelayImport(errorCallback) == false)
- {
- // The table where this info is found is here: https://wiki.nvidia.com/nvcompute/index.php/NVCompute#CUDA_Planning
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "NVIDIA Release 331 graphics driver and above is required for GPU acceleration.", __FILE__, __LINE__);
- return;
- }
-
- if (desc.ctx == 0)
- {
- int flags = CU_CTX_LMEM_RESIZE_TO_MAX | CU_CTX_SCHED_BLOCKING_SYNC | CU_CTX_MAP_HOST;
- class FoundationErrorReporter : public PxErrorCallback
- {
- public:
- FoundationErrorReporter(PxErrorCallback& ec)
- : errorCallback(&ec)
- {
- }
-
- virtual void reportError(PxErrorCode::Enum code, const char* message, const char* file, int line)
- {
- errorCallback->reportError( code, message, file, line);
- }
-
- PxErrorCallback* errorCallback;
- } foundationErrorReporter(errorCallback);
-
- int devOrdinal = PhysXDeviceSettings::getSuggestedCudaDeviceOrdinal(foundationErrorReporter);
- if (devOrdinal < 0)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "No PhysX capable GPU suggested.", __FILE__, __LINE__);
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "If you have a PhysX capable GPU, verify that PhysX is not set to CPU in the NVIDIA Control Panel.", __FILE__, __LINE__);
- return;
- }
-
- status = cuInit(0);
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuInit failed", __FILE__, __LINE__);
- return;
- }
-
- // Try to create the context on Node3DLowLatency.
- // If that does not work, try to create the cuda context using cuCtxCreatePhysX,
- // since we must be on a driver that does not support cuCtxCreateOnNode3DLowLatency.
- cuGetExportTable((const void**)&m_physXInterface, (const CUuuid*)&CU_ETID_PhysXInterface);
-
- // if using a dedicated GPU or SLI we disable D3D interop (which is not supported over multiple GPUs)
- // this ensures the users control panel setting is always respected
- bool sliEnabled = false;
- if (mInteropMode != PxCudaInteropMode::NO_INTEROP && desc.graphicsDevice != NULL)
- {
- sliEnabled = PhysXDeviceSettings::isSLIEnabled(desc.graphicsDevice) == 1 ? true : false;
- }
-
- if (PhysXDeviceSettings::isUsingDedicatedGPU() == 1 || sliEnabled)
- {
- if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP ||
- mInteropMode == PxCudaInteropMode::D3D11_INTEROP)
- {
- mInteropMode = PxCudaInteropMode::NO_INTEROP;
- if (sliEnabled)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "D3D/CUDA interop cannot be used in tandem with SLI, disabling interop. Query PxCudaContextManager::getInteropMode() for interop status.",
- __FILE__,__LINE__);
- }
- }
- }
-
- if (mInteropMode == PxCudaInteropMode::NO_INTEROP)
- {
- status = cuDeviceGet(&mDevHandle, devOrdinal);
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGet failed",__FILE__,__LINE__);
- return;
- }
-
- if (m_physXInterface)
- status = m_physXInterface->cuCtxCreateOnNode3DLowLatency(&mCtx, (unsigned int)flags, mDevHandle);
- else
- status = cuCtxCreate(&mCtx, (unsigned int)flags, mDevHandle);
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxCreate failed",__FILE__,__LINE__);
- return;
- }
- mOwnContext = true;
- }
- else if (mInteropMode == PxCudaInteropMode::OGL_INTEROP)
- {
- status = cuDeviceGet(&mDevHandle, devOrdinal);
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGet failed",__FILE__,__LINE__);
- return;
- }
-
- status = cuGLCtxCreate(&mCtx, (unsigned int)flags, mDevHandle);
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxGLCreate failed",__FILE__,__LINE__);
- return;
- }
-
- status = cuGLInit();
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuGLInit failed",__FILE__,__LINE__);
- return;
- }
- mOwnContext = true;
- }
-#if PX_WIN32 || PX_WIN64
- else if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP)
- {
- status = cuD3D10CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags,
- reinterpret_cast<ID3D10Device*>(desc.graphicsDevice));
-
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D10CtxCreate failed",__FILE__,__LINE__);
- return;
- }
- mOwnContext = true;
- }
- else if (mInteropMode == PxCudaInteropMode::D3D11_INTEROP)
- {
- status = cuD3D11CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags,
- reinterpret_cast<ID3D11Device*>(desc.graphicsDevice));
-
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D11CtxCreate failed",__FILE__,__LINE__);
- return;
- }
- mOwnContext = true;
- }
-#endif //PX_WIN32 || PX_WIN64
- else
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Requested interop type is not supported!",__FILE__,__LINE__);
- return;
- }
- }
- else
- {
- mCtx = *desc.ctx;
- status = cuCtxGetDevice(&mDevHandle);
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxGetDevice failed",__FILE__,__LINE__);
- return;
- }
- }
-
- // Verify we can at least allocate a CUDA event from this context
- CUevent testEvent;
- if (CUDA_SUCCESS == cuEventCreate(&testEvent, 0))
- {
- cuEventDestroy(testEvent);
- }
- else
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CUDA context validation failed",__FILE__,__LINE__);
- return;
- }
-
- status = cuDeviceGetName(mDeviceName, sizeof(mDeviceName), mDevHandle);
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGetName failed",__FILE__,__LINE__);
- return;
- }
-
- cuDeviceGetAttribute(&mSharedMemPerBlock, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, mDevHandle);
- cuDeviceGetAttribute(&mSharedMemPerMultiprocessor, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, mDevHandle);
- cuDeviceGetAttribute(&mClockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, mDevHandle);
- cuDeviceGetAttribute(&mComputeCapMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, mDevHandle);
- cuDeviceGetAttribute(&mComputeCapMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, mDevHandle);
- cuDeviceGetAttribute(&mIsIntegrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, mDevHandle);
- cuDeviceGetAttribute(&mCanMapHost, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, mDevHandle);
- cuDeviceGetAttribute(&mMultiprocessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, mDevHandle);
- cuDeviceGetAttribute(&mMaxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, mDevHandle);
-
- status = cuDeviceTotalMem((size_t*)&mTotalMemBytes, mDevHandle);
- if (CUDA_SUCCESS != status)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceTotalMem failed",__FILE__,__LINE__);
- return;
- }
-
- // minimum compute capability is MIN_SM_MAJOR_VERSION.MIN_SM_MINOR_VERSION
- if ((mComputeCapMajor < MIN_SM_MAJOR_VERSION) ||
- (mComputeCapMajor == MIN_SM_MAJOR_VERSION && mComputeCapMinor < MIN_SM_MINOR_VERSION))
- {
- char buffer[256];
- physx::shdfnd::snprintf(buffer, 256, "Minimum GPU compute capability %d.%d is required", MIN_SM_MAJOR_VERSION, MIN_SM_MINOR_VERSION);
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING,buffer,__FILE__,__LINE__);
- return;
- }
-
- mMemMgr = PX_NEW(CudaMemMgr)(*this, errorCallback);
- if (mMemMgr == NULL)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CudaMemMgr failed: Unable to allocate heaps",__FILE__,__LINE__);
- return;
- }
-
- bool succ = true;
- for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
- {
- PxCudaBufferType type(PxCudaBufferMemorySpace::Enum(i), PxCudaBufferFlags::F_READ_WRITE);
- succ &= mMemMgr->setBaseSize(type, desc.memoryBaseSize[i]);
- succ &= mMemMgr->setPageSize(type, desc.memoryPageSize[i]);
- succ &= mMemMgr->setMaxMemorySize(type, desc.maxMemorySize[i]);
- PX_ASSERT(succ);
- if (!succ)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CudaMemMgr failed: Invalid memory parameter",__FILE__,__LINE__);
- return;
- }
- }
-
-#if PX_DEBUG
- if(!mManagerRefCount++)
- mContextRefCountTls = shdfnd::TlsAlloc();
- if(!shdfnd::TlsGet(mContextRefCountTls))
- CUT_SAFE_CALL(cuCtxSetCurrent(0));
-#endif
-
- mIsValid = true;
- mDispatcher = PX_NEW(GpuDispatcherImpl)(errorCallback, *this);
- if (!mDispatcher || mDispatcher->failureDetected())
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Failed to create functional GPU dispatcher",__FILE__,__LINE__);
- mIsValid = false;
- return;
- }
-
- mDispatcher->start();
-
-#if ENABLE_DEVICE_INFO_BRINGUP
- // Device info (Enable for Amodel and Emulator testing)
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Device Name: %s", mDeviceName);
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Shared Memory Per Block: %d", mSharedMemPerBlock);
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Shared Memory Per Multiprocessor: %d", mSharedMemPerMultiprocessor);
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Number of SM: %d", mMultiprocessorCount);
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Max Threads Per Block: %d", mMaxThreadsPerBlock);
-#endif
-}
-
-/* Some driver version mismatches can cause delay import crashes. Load NVCUDA.dll
- * manually, verify its version number, then allow delay importing to bind all the
- * APIs.
- */
-bool CudaCtxMgr::safeDelayImport(PxErrorCallback& errorCallback)
-{
-#if PX_WIN32 || PX_WIN64
-#ifdef PX_SECURE_LOAD_LIBRARY
- HMODULE hCudaDriver = nvLoadSystemLibrary("nvcuda.dll");
-#else
- HMODULE hCudaDriver = LoadLibrary("nvcuda.dll");
-#endif
-#elif PX_LINUX
- void* hCudaDriver = dlopen("libcuda.so", RTLD_NOW);
-#endif
- if (!hCudaDriver)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "nvcuda.dll not found or could not be loaded.", __FILE__, __LINE__);
- return false;
- }
-
- typedef CUresult(CUDAAPI * pfnCuDriverGetVersion_t)(int*);
- pfnCuDriverGetVersion_t pfnCuDriverGetVersion = (pfnCuDriverGetVersion_t) GetProcAddress(hCudaDriver, "cuDriverGetVersion");
- if (!pfnCuDriverGetVersion)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "cuDriverGetVersion missing in nvcuda.dll.", __FILE__, __LINE__);
- return false;
- }
-
- CUresult status = pfnCuDriverGetVersion(&mDriverVersion);
- if (status != CUDA_SUCCESS)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Retrieving CUDA driver version failed.", __FILE__, __LINE__);
- return false;
- }
-
- /* Let's require a driver version >= to the version we compile against
- * Currently, CUDA_VERSION is 6000 or 6.0, but APEX still uses CUDA 5.0 so we can't assert on 6.0 yet.
- */
- PX_COMPILE_TIME_ASSERT(5000 <= CUDA_VERSION);
-
- if (mDriverVersion < CUDA_VERSION)
- {
- char buffer[256];
- physx::shdfnd::snprintf(buffer, 256, "CUDA driver version is %u, expected at least %u.", mDriverVersion, CUDA_VERSION);
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, buffer, __FILE__,__LINE__);
- return false;
- }
-
- /* Now trigger delay import and API binding */
- status = cuDriverGetVersion(&mDriverVersion);
- if (status != CUDA_SUCCESS)
- {
- errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Failed to bind CUDA API.", __FILE__, __LINE__);
- return false;
- }
-
- /* Not strictly necessary, but good practice */
-#if PX_WIN32 | PX_WIN64
- FreeLibrary(hCudaDriver);
-#elif PX_LINUX
- dlclose(hCudaDriver);
-#endif
-
-
- return true;
-}
-
-void CudaCtxMgr::release()
-{
- delete this;
-}
-
-CudaCtxMgr::~CudaCtxMgr()
-{
- if (mDispatcher)
- {
- releaseGpuDispatcher(*mDispatcher);
- }
- if (mMemMgr)
- {
- delete mMemMgr;
- }
- if (mOwnContext)
- {
- CUT_SAFE_CALL(cuCtxDestroy(mCtx));
- }
-
-#if PX_DEBUG
- if(!--mManagerRefCount)
- shdfnd::TlsFree(mContextRefCountTls);
-#endif
-}
-
-bool CudaCtxMgr::registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags)
-{
- CUresult ret = CUDA_ERROR_UNKNOWN;
-
- acquireContext();
-
- PX_ASSERT(mInteropMode == PxCudaInteropMode::OGL_INTEROP);
-
- ret = cuGraphicsGLRegisterBuffer(&resource, (GLuint) buffer, uint32_t(flags));
-
- releaseContext();
-
- return ret == CUDA_SUCCESS;
-}
-
-bool CudaCtxMgr::registerResourceInCudaD3D(CUgraphicsResource& resource, void* resourcePointer, PxCudaInteropRegisterFlags flags)
-{
- CUresult ret = CUDA_ERROR_UNKNOWN;
-#if PX_WINDOWS_FAMILY
- acquireContext();
-
- switch (mInteropMode)
- {
- case PxCudaInteropMode::D3D10_INTEROP:
- ret = cuGraphicsD3D10RegisterResource(&resource, (ID3D10Resource*)resourcePointer, uint32_t(flags));
- break;
- case PxCudaInteropMode::D3D11_INTEROP:
- ret = cuGraphicsD3D11RegisterResource(&resource, (ID3D11Resource*)resourcePointer, uint32_t(flags));
- break;
- case PxCudaInteropMode::NO_INTEROP:
- case PxCudaInteropMode::OGL_INTEROP:
- case PxCudaInteropMode::COUNT:
- default:
- PX_ALWAYS_ASSERT_MESSAGE("unexpected state in registerResourceInCuda3D");
- }
-
- releaseContext();
-#else
- PX_UNUSED(resource);
- PX_UNUSED(resourcePointer);
- PX_UNUSED(flags);
-#endif //PX_WINDOWS_FAMILY
- return ret == CUDA_SUCCESS;
-}
-
-bool CudaCtxMgr::unregisterResourceInCuda(CUgraphicsResource resource)
-{
- CUresult ret = CUDA_ERROR_UNKNOWN;
-
- acquireContext();
-
- ret = cuGraphicsUnregisterResource(resource);
-
- releaseContext();
-
- return ret == CUDA_SUCCESS;
-}
-
-void CudaCtxMgr::acquireContext()
-{
- CUcontext ctx = 0;
- CUT_SAFE_CALL(cuCtxGetCurrent(&ctx));
-
- if (ctx != mCtx)
- {
-#if PX_DEBUG
- PX_ASSERT(!shdfnd::TlsGet(mContextRefCountTls));
-#endif
- CUT_SAFE_CALL(cuCtxSetCurrent(mCtx));
- }
-
-#if PX_DEBUG
- char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls);
- shdfnd::TlsSet(mContextRefCountTls, ++refCount);
-#endif
-}
-
-void CudaCtxMgr::releaseContext()
-{
-#if PX_DEBUG
- char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls);
- shdfnd::TlsSet(mContextRefCountTls, --refCount);
- // see DE8475
- if(!refCount)
- CUT_SAFE_CALL(cuCtxSetCurrent(0));
-#endif
-}
-
-#if PX_SUPPORT_GPU_PHYSX
-extern "C" void initUtilKernels();
-
-PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback)
-{
- //this call is needed to force UtilKernels linkage in case someone links PxCudaContextManager as Static Library!
- initUtilKernels();
-
- return PX_NEW(CudaCtxMgr)(desc, errorCallback);
-}
-
-#endif
-
-} // end physx namespace
-
-
diff --git a/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp b/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp
deleted file mode 100644
index 7579d63..0000000
--- a/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "task/PxGpuDispatcher.h"
-#include "cudamanager/PxCudaContextManager.h"
-
-#include "foundation/PxAssert.h"
-#include "foundation/PxErrorCallback.h"
-
-#include "PsString.h"
-
-#include "CudaKernelWrangler.h"
-
-#include <cuda.h>
-#include <texture_types.h>
-
-/**
- * Workaround hacks for using nvcc --compiler output object files
- * without linking with CUDART. We must implement our own versions
- * of these functions that the object files are hard-coded to call into.
- */
-
-#define CUT_SAFE_CALL(call) { CUresult ret = call; if( CUDA_SUCCESS != ret ) { PX_ASSERT(!ret); } }
-
-#define MAX_MODULES 64 // Max number of .cu files you will build
-static void* gModuleTable[ MAX_MODULES ];
-static int gNumModules = 0;
-
-#define MAX_FUNCTIONS 256 // Max number of kernel of entry points
-typedef struct
-{
- int moduleIndex;
- const char* functionName;
-} cuFuncDesc;
-static cuFuncDesc gFunctionTable[ MAX_FUNCTIONS ];
-static int gNumFunctions = 0;
-
-using namespace physx::shdfnd;
-using namespace physx;
-
-KernelWrangler::KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs)
- : mError(false)
- , mCuFunctions(PX_DEBUG_EXP("CuFunctions"))
- , mCuModules(PX_DEBUG_EXP("CuModules"))
- , mGpuDispatcher(gd)
- , mErrorCallback(errorCallback)
-{
- PxScopedCudaLock _lock_(*gd.getCudaContextManager());
-
- /* Formally load the CUDA modules, get CUmodule handles */
- mCuModules.resize((uint32_t)gNumModules);
- for (int i = 0 ; i < gNumModules ; ++i)
- {
- CUresult ret = cuModuleLoadDataEx(&mCuModules[(uint32_t)i], gModuleTable[i], 0, NULL, NULL);
- if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_NO_BINARY_FOR_GPU)
- {
- mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, "Failed to load CUDA module data.", __FILE__, __LINE__);
- mError = true;
- return;
- }
- }
-
- /* matchup funcNames to CUDA modules, get CUfunction handles */
- mCuFunctions.resize(numFuncs);
- mCuFuncModIndex.resize(numFuncs);
- for (uint32_t i = 0 ; i < numFuncs ; ++i)
- {
- for (int j = 0; ; ++j)
- {
- if(j == gNumFunctions)
- {
- char buffer[256];
- physx::shdfnd::snprintf(buffer, 256, "Could not find registered CUDA function '%s'.", funcNames[i]);
- mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, __FILE__, __LINE__);
- mError = true;
- return;
- }
-
- if (!physx::shdfnd::strcmp(gFunctionTable[j].functionName, funcNames[i]))
- {
- mCuFuncModIndex[i] = (uint16_t)gFunctionTable[j].moduleIndex;
- CUresult ret = cuModuleGetFunction(&mCuFunctions[i], mCuModules[mCuFuncModIndex[i]], funcNames[i]);
- if (ret != CUDA_SUCCESS)
- {
- char buffer[256];
- physx::shdfnd::snprintf(buffer, 256, "Could not find CUDA module containing function '%s'.", funcNames[i]);
- mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, __FILE__, __LINE__);
- mError = true;
- return;
- }
- break;
- }
- }
- }
-}
-
-KernelWrangler::~KernelWrangler()
-{
- if (mCuModules.size())
- {
- PxScopedCudaLock _lock_(*mGpuDispatcher.getCudaContextManager());
-
- for (uint32_t i = 0 ; i < mCuModules.size() ; i++)
- if(mCuModules[i])
- CUT_SAFE_CALL(cuModuleUnload(mCuModules[i]));
- }
-}
-
-void const* const* KernelWrangler::getImages()
-{
- return gModuleTable;
-}
-
-int KernelWrangler::getNumImages()
-{
- return gNumModules;
-}
-
-/*
- * These calls are all made _before_ main() during static initialization
- * of this DLL.
- */
-
-#include <driver_types.h>
-
-#if PX_WINDOWS_FAMILY
-#define CUDARTAPI __stdcall
-#endif
-
-struct uint3;
-struct dim3;
-
-extern "C"
-void** CUDARTAPI __cudaRegisterFatBinary(void* fatBin)
-{
- //HACK to get real fatbin in CUDA 4.0
- struct CUIfatbinStruct
- {
- int magic;
- int version;
- void *fatbinArray;
- char *fatbinFile;
- };
- const CUIfatbinStruct *fatbinStruct = (const CUIfatbinStruct *)fatBin;
- if (fatbinStruct->magic == 0x466243B1)
- {
- fatBin = fatbinStruct->fatbinArray;
- }
-
- if (gNumModules < MAX_MODULES)
- {
- gModuleTable[ gNumModules ] = fatBin;
- return (void**)(size_t) gNumModules++;
- }
- return NULL;
-}
-
-extern "C"
-void CUDARTAPI __cudaUnregisterFatBinary(void** fatCubinHandle)
-{
- gModuleTable[(int)(size_t) fatCubinHandle ] = 0;
-}
-
-extern "C"
-void CUDARTAPI __cudaRegisterTexture(void**, const struct textureReference*, const void**, const char*, int, int, int)
-{
-}
-
-extern "C" void CUDARTAPI __cudaRegisterVar(void**, char*, char*, const char*, int, int, int, int)
-{
-}
-
-
-extern "C" void CUDARTAPI __cudaRegisterShared(void**, void**)
-{
-}
-
-extern "C"
-void CUDARTAPI __cudaRegisterFunction(void** fatCubinHandle, const char*,
- char*, const char* deviceName, int, uint3*, uint3*, dim3*, dim3*, int*)
-{
- if (gNumFunctions < MAX_FUNCTIONS)
- {
- // We need this association of function to module in order to find textures and globals
- gFunctionTable[ gNumFunctions ].moduleIndex = (int)(size_t) fatCubinHandle;
- gFunctionTable[ gNumFunctions ].functionName = deviceName;
- gNumFunctions++;
- }
-}
-
-/* These functions are implemented just to resolve link dependencies */
-
-extern "C"
-cudaError_t CUDARTAPI cudaLaunch(const char* entry)
-{
- PX_UNUSED(entry);
- return cudaSuccess;
-}
-
-extern "C"
-cudaError_t CUDARTAPI cudaSetupArgument(const void*, size_t, size_t)
-{
- return cudaSuccess;
-}
-
-extern "C"
-struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(
- int x, int y, int z, int w, enum cudaChannelFormatKind f)
-{
- struct cudaChannelFormatDesc desc;
- desc.x = x;
- desc.y = y;
- desc.z = z;
- desc.w = w;
- desc.f = f;
- return desc;
-}
-
diff --git a/PxShared/src/cudamanager/src/CudaMemoryManager.cpp b/PxShared/src/cudamanager/src/CudaMemoryManager.cpp
deleted file mode 100644
index b1c6f94..0000000
--- a/PxShared/src/cudamanager/src/CudaMemoryManager.cpp
+++ /dev/null
@@ -1,649 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "cudamanager/PxCudaContextManager.h"
-#include "foundation/PxMath.h"
-#include "foundation/PxMemory.h"
-
-#include "CudaMemoryManager.h"
-#include "HeapManagerRef.h"
-
-#include <cuda.h>
-
-#define DEVICE_BASE_SIZE (0)
-#define DEVICE_PAGE_SIZE ( 2 * 1024*1024)
-#define PINNED_BASE_SIZE (0)
-#define PINNED_PAGE_SIZE ( 2 * 1024*1024)
-#define WC_BASE_SIZE (0)
-#define WC_PAGE_SIZE ( 2 * 1024*1024)
-#define MIN_BLOCK_SIZE 2048
-
-
-#define CMM_DELETE_SINGLE(x) { if(x) delete x; }
-#define CMM_DELETE_ARRAY(x) { if(x) delete [] x; }
-
-using namespace physx;
-
-CudaMemMgr::CudaMemMgr(PxCudaContextManager& mgr, physx::PxErrorCallback& errorCallback)
- : mErrorCallback(errorCallback)
- , mBufferPool("mBufferPool", 1024)
- , mInitialized(false)
- , mMgr(mgr)
- , mDebugDisableAllocs(false)
-{
- for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
- {
- mHeap[i] = NULL;
- mMemoryAllocator[i] = NULL;
- mMemoryMaxSize[i] = size_t(-1);
- }
-
- mMemoryBaseSize[PxCudaBufferMemorySpace::T_GPU] = DEVICE_BASE_SIZE;
- mMemoryBaseSize[PxCudaBufferMemorySpace::T_PINNED_HOST] = PINNED_BASE_SIZE;
- mMemoryBaseSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = WC_BASE_SIZE;
- mMemoryBaseSize[PxCudaBufferMemorySpace::T_HOST] = 0;
-
- mMemoryPageSize[PxCudaBufferMemorySpace::T_GPU] = DEVICE_PAGE_SIZE;
- mMemoryPageSize[PxCudaBufferMemorySpace::T_PINNED_HOST] = PINNED_PAGE_SIZE;
- mMemoryPageSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = WC_PAGE_SIZE;
- mMemoryPageSize[PxCudaBufferMemorySpace::T_HOST] = PINNED_PAGE_SIZE;
-}
-
-
-CudaMemMgr::~CudaMemMgr()
-{
- for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
- {
- CMM_DELETE_SINGLE(mHeap[i]);
- CMM_DELETE_SINGLE(mMemoryAllocator[i]);
- }
-}
-
-
-PX_INLINE bool CudaMemMgr::initialize()
-{
- if (mInitialized)
- {
- return true;
- }
-
- for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
- {
- mHeap[i] = PX_NEW(HeapManagerRef)(mErrorCallback, false);
- PX_ASSERT(mHeap[i]);
- }
-
- mMemoryAllocator[PxCudaBufferMemorySpace::T_GPU] = PX_NEW(DeviceMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_GPU]);
- mMemoryAllocator[PxCudaBufferMemorySpace::T_PINNED_HOST] = PX_NEW(PinnedMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_PINNED_HOST]);
- mMemoryAllocator[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = PX_NEW(WriteCombinedMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED]);
- mMemoryAllocator[PxCudaBufferMemorySpace::T_HOST] = PX_NEW(HostMemAllocator)(mMemoryMaxSize[PxCudaBufferMemorySpace::T_HOST]);
-
- bool succ = true;
- for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
- {
- succ &= mHeap[i]->init(mMemoryAllocator[i], mMemoryBaseSize[i], mMemoryPageSize[i], MIN_BLOCK_SIZE);
- PX_ASSERT(succ);
- }
-
- for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
- {
- succ &= mHeap[i] && mMemoryAllocator[i];
- }
-
- if (!succ)
- {
- for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
- {
- CMM_DELETE_SINGLE(mHeap[i]);
- CMM_DELETE_SINGLE(mMemoryAllocator[i]);
- }
- mInitialized = false;
- }
-
- return mInitialized = succ;;
-}
-
-
-bool CudaMemMgr::setPageSize(const PxCudaBufferType& type, size_t size)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (!mInitialized)
- {
- mMemoryPageSize[type.memorySpace] = PxMax(mMemoryPageSize[type.memorySpace], size);
- return true;
- }
- else
- {
- bool ret = mHeap[type.memorySpace]->setPageSize(size);
- mMemoryPageSize[type.memorySpace] = ret ? size : mMemoryPageSize[type.memorySpace];
- return ret;
- }
-}
-
-
-bool CudaMemMgr::setBaseSize(const PxCudaBufferType& type, size_t size)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (mInitialized || (((size - 1)&size) != 0))
- {
- return false;
- }
- else
- {
- mMemoryBaseSize[type.memorySpace] = PxMax(mMemoryBaseSize[type.memorySpace], size);
- return true;
- }
-}
-
-
-size_t CudaMemMgr::getBaseSize(const PxCudaBufferType& type)
-{
- return mMemoryBaseSize[type.memorySpace];
-}
-
-
-size_t CudaMemMgr::getPageSize(const PxCudaBufferType& type)
-{
- return mMemoryPageSize[type.memorySpace];
-}
-
-
-bool CudaMemMgr::setMaxMemorySize(const PxCudaBufferType& type, size_t size)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (mInitialized)
- {
- switch (type.memorySpace)
- {
- case PxCudaBufferMemorySpace::T_GPU:
- static_cast<DeviceMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size);
- return true;
- break;
- case PxCudaBufferMemorySpace::T_PINNED_HOST:
- static_cast<PinnedMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size);
- return true;
- break;
- case PxCudaBufferMemorySpace::T_HOST:
- static_cast<HostMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size);
- return true;
- break;
- case PxCudaBufferMemorySpace::T_WRITE_COMBINED:
- static_cast<WriteCombinedMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size);
- return true;
- break;
- case PxCudaBufferMemorySpace::COUNT:
- default:
- PX_ASSERT(!"unknown memory type");
- break;
- }
- }
- else
- {
- mMemoryMaxSize[type.memorySpace] = PxMax(mMemoryMaxSize[type.memorySpace], size);
- return true;
- }
-
- return false;
-}
-
-size_t CudaMemMgr::getMaxMemorySize(const PxCudaBufferType& type)
-{
- if (mInitialized)
- {
- switch (type.memorySpace)
- {
- case PxCudaBufferMemorySpace::T_GPU:
- return static_cast<DeviceMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize();
- break;
- case PxCudaBufferMemorySpace::T_PINNED_HOST:
- return static_cast<PinnedMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize();
- break;
- case PxCudaBufferMemorySpace::T_HOST:
- return static_cast<HostMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize();
- break;
- case PxCudaBufferMemorySpace::T_WRITE_COMBINED:
- return static_cast<WriteCombinedMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize();
- break;
- case PxCudaBufferMemorySpace::COUNT:
- default:
- PX_ASSERT(!"unknown memory type");
- break;
- }
- }
- return 0;
-}
-
-bool CudaMemMgr::reserve(const PxCudaBufferType& type, size_t size)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (!mInitialized)
- {
- if (!initialize())
- {
- return false;
- }
- }
-
- return mHeap[type.memorySpace]->reserve(size);
-}
-
-
-PxCudaBuffer* CudaMemMgr::alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- PxCudaBufferPtr addr = alloc(type.memorySpace, size, PX_ALLOC_INFO_PARAMS_INPUT());
-
- shdfnd::Mutex::ScopedLock lock(mMutex);
- CudaBuffer* buffer = NULL;
- if (addr)
- {
- buffer = mBufferPool.construct(type);
- if (buffer)
- {
- buffer->init(addr, size, *this, PX_ALLOC_INFO_PARAMS_INPUT());
- }
- }
- return buffer;
-}
-
-PxCudaBufferPtr CudaMemMgr::alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (!mInitialized)
- {
- if (!initialize())
- {
- return 0;
- }
- }
-
- if (mDebugDisableAllocs)
- {
- return 0;
- }
-
- return reinterpret_cast<PxCudaBufferPtr>(mHeap[memorySpace]->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT()));
-}
-
-bool CudaMemMgr::free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (!mInitialized)
- {
- return false;
- }
-
- if (addr)
- {
- return mHeap[memorySpace]->free((void*)(addr));
- }
- else
- {
- return false;
- }
-}
-
-bool CudaMemMgr::realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (!mInitialized)
- {
- return false;
- }
-
- if (!size)
- {
- return false;
- }
-
- if (mDebugDisableAllocs)
- {
- return NULL;
- }
-
- bool ret = false;
- if (addr)
- {
- ret = mHeap[memorySpace]->realloc((void*)(addr), size, PX_ALLOC_INFO_PARAMS_INPUT());
- }
-
- return ret;
-}
-
-void CudaMemMgr::getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (!mInitialized)
- {
- return;
- }
-
- ApexHeapStats hpStats;
- mHeap[type.memorySpace]->getStats(hpStats, HeapStatsFlags::F_BASIC_STATS | HeapStatsFlags::F_ALLOC_ID_STATS);
-
- outStats.heapSize = hpStats.heapSize;
- outStats.totalAllocated = hpStats.totalAllocated;
- outStats.maxAllocated = hpStats.maxAllocated;
- PxMemCopy(outStats.allocIdStats, hpStats.allocIdStats, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS);
-}
-
-
-bool CudaMemMgr::free(CudaBuffer& buffer)
-{
- PxCudaBufferMemorySpace::Enum memSpace = buffer.getTypeFast().memorySpace;
- PxCudaBufferPtr addr = buffer.getPtrFast();
-
- {
- shdfnd::Mutex::ScopedLock lock(mMutex);
- mBufferPool.destroy(&buffer);
- }
-
- return free(memSpace, addr);
-}
-
-
-bool CudaMemMgr::realloc(CudaBuffer& buffer, size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- return realloc(buffer.getTypeFast().memorySpace, buffer.getPtrFast(), size, PX_ALLOC_INFO_PARAMS_INPUT());
-}
-
-PxCudaBufferPtr CudaMemMgr::getMappedPinnedPtr(PxCudaBufferPtr hostPtr)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
- void* base = mHeap[PxCudaBufferMemorySpace::T_PINNED_HOST]->findBaseAddress((void*)hostPtr);
- if (base)
- {
- size_t offset = ((PinnedMemAllocator*)mMemoryAllocator[PxCudaBufferMemorySpace::T_PINNED_HOST])->getMappedPinnedOffset(base);
- return hostPtr + offset;
- }
- return 0;
-}
-
-bool CudaBuffer::free()
-{
- return mMemManager->free(*this);
-}
-
-
-bool CudaBuffer::realloc(size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- return mMemManager->realloc(*this, size, PX_ALLOC_INFO_PARAMS_INPUT());
-}
-
-
-DeviceMemAllocator::DeviceMemAllocator(PxCudaContextManager& mgr, size_t maxSize)
- : mMgr(mgr)
- , mMaxSize(maxSize)
- , mAllocSize(0)
-{}
-
-
-DeviceMemAllocator::~DeviceMemAllocator()
-{
- PX_ASSERT(mAllocSize == 0);
-}
-
-
-void* DeviceMemAllocator::alloc(const size_t size)
-{
- if (mAllocSize + size > mMaxSize)
- {
- return NULL;
- }
- else
- {
- PxScopedCudaLock lock(mMgr);
- CUdeviceptr dPtr;
- CUresult result = cuMemAlloc(&dPtr, uint32_t(size));
-
- if (result == CUDA_SUCCESS)
- {
- mAllocSize += size;
- return (void*)(size_t)(dPtr);
- }
- else
- {
- PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate device memory.");
- return NULL;
- }
- }
-}
-
-
-void DeviceMemAllocator::free(void* addr, const size_t size)
-{
- PxScopedCudaLock lock(mMgr);
- PX_ASSERT(mAllocSize >= size);
- CUresult result = cuMemFree((CUdeviceptr)(size_t)(addr));
- PX_UNUSED(result);
- PX_ASSERT(result == CUDA_SUCCESS);
- mAllocSize -= size;
-}
-
-
-PinnedMemAllocator::PinnedMemAllocator(PxCudaContextManager& mgr, size_t maxSize)
- : mMgr(mgr)
- , mMaxSize(maxSize)
- , mAllocSize(0)
-{
-}
-
-
-PinnedMemAllocator::~PinnedMemAllocator()
-{
- PX_ASSERT(mAllocSize == 0);
-}
-
-
-void* PinnedMemAllocator::alloc(const size_t size)
-{
- if (mAllocSize + size > mMaxSize)
- {
- return NULL;
- }
- else
- {
- PxScopedCudaLock lock(mMgr);
- void* hPtr;
- unsigned int flags = 0;
-
- if (mMgr.canMapHostMemory())
- {
- flags |= CU_MEMHOSTALLOC_DEVICEMAP;
- }
- CUresult result = cuMemHostAlloc(&hPtr, uint32_t(size), flags);
-
- if (result == CUDA_SUCCESS)
- {
- if (hPtr)
- {
- mAllocSize += size;
- }
-
- if (mMgr.canMapHostMemory())
- {
- CUdeviceptr dptr = 0;
- cuMemHostGetDevicePointer(&dptr, hPtr, 0);
- mMappedPinnedPtrs.insert(hPtr, size_t(dptr));
- }
-
- return hPtr;
- }
- else
- {
- PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate pinned memory.");
- return NULL;
- }
- }
-}
-
-
-void PinnedMemAllocator::free(void* addr, const size_t size)
-{
- PxScopedCudaLock lock(mMgr);
- PX_ASSERT(mAllocSize >= size);
-
- if (mMgr.canMapHostMemory())
- {
- PX_ASSERT(mMappedPinnedPtrs.find(addr));
- mMappedPinnedPtrs.erase(addr);
- }
-
- CUresult result = cuMemFreeHost(addr);
- PX_UNUSED(result);
- PX_ASSERT(result == CUDA_SUCCESS);
- mAllocSize -= size;
-}
-
-
-WriteCombinedMemAllocator::WriteCombinedMemAllocator(PxCudaContextManager& mgr, size_t maxSize)
- : mMgr(mgr)
- , mMaxSize(maxSize)
- , mAllocSize(0)
-{
- mWcMemSupport = mMgr.getDriverVersion() >= 2020 ? WcMem::SUPPORTED : WcMem::NOT_SUPPORTED;
-}
-
-
-WriteCombinedMemAllocator::~WriteCombinedMemAllocator()
-{
- PX_ASSERT(mAllocSize == 0);
-}
-
-
-bool WriteCombinedMemAllocator::isWcMemSupported()
-{
- if (mWcMemSupport == WcMem::SUPPORTED)
- {
- return true;
- }
- else
- {
- PX_ASSERT(mWcMemSupport == WcMem::NOT_SUPPORTED);
- return false;
- }
-}
-
-
-void* WriteCombinedMemAllocator::alloc(const size_t size)
-{
- if (mAllocSize + size > mMaxSize)
- {
- return NULL;
- }
- else
- {
- PxScopedCudaLock lock(mMgr);
- void* hPtr = NULL;
-
- unsigned int flags = CU_MEMHOSTALLOC_WRITECOMBINED;
-
- if (mMgr.canMapHostMemory())
- {
- flags |= CU_MEMHOSTALLOC_DEVICEMAP;
- }
-
- bool success = isWcMemSupported() && (cuMemHostAlloc(&hPtr, size, flags) == CUDA_SUCCESS);
- if (success)
- {
- if (hPtr)
- {
- mAllocSize += size;
- }
-
- return hPtr;
- }
- else
- {
- PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate write combined memory.");
- return NULL;
- }
- }
-}
-
-
-void WriteCombinedMemAllocator::free(void* addr, const size_t size)
-{
- PxScopedCudaLock lock(mMgr);
- PX_ASSERT(mAllocSize >= size);
- CUresult result = cuMemFreeHost(addr);
- PX_ASSERT(result == CUDA_SUCCESS);
- PX_UNUSED(result);
- mAllocSize -= size;
-}
-
-
-HostMemAllocator::HostMemAllocator(size_t maxSize)
- : mMaxSize(maxSize)
- , mAllocSize(0)
-{
-}
-
-
-HostMemAllocator::~HostMemAllocator()
-{
- PX_ASSERT(mAllocSize == 0);
-}
-
-
-void* HostMemAllocator::alloc(const size_t size)
-{
- if (mAllocSize + size > mMaxSize)
- {
- return NULL;
- }
- else
- {
- void* ret = PX_ALLOC(size, "host memory");
- if (ret)
- {
- mAllocSize += size;
- return ret;
- }
- else
- {
- PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate host memory.");
- return NULL;
- }
- }
-}
-
-
-void HostMemAllocator::free(void* addr, const size_t size)
-{
- PX_ASSERT(mAllocSize >= size);
- PX_FREE(addr);
- mAllocSize -= size;
-}
-
diff --git a/PxShared/src/cudamanager/src/CudaMemoryManager.h b/PxShared/src/cudamanager/src/CudaMemoryManager.h
deleted file mode 100644
index 071b4ab..0000000
--- a/PxShared/src/cudamanager/src/CudaMemoryManager.h
+++ /dev/null
@@ -1,297 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H
-#define PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H
-
-#include "task/PxTaskDefine.h"
-#include "HeapManagerInterface.h"
-
-#include "PsPool.h"
-#include "PsMutex.h"
-#include "PsUserAllocated.h"
-#include "PsHashMap.h"
-
-namespace physx
-{
-
-class CudaBuffer;
-class HeapManagerInterface;
-class CudaMemMgr;
-class PxCudaContextManager;
-
-class CudaBuffer: public PxCudaBuffer
-{
-public:
- PX_INLINE CudaBuffer(const PxCudaBufferType& type)
- : mType(type)
- {}
-
-// Ni Interface
- bool free();
- bool realloc(size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- const PxCudaBufferType& getType() const
- {
- return getTypeFast();
- }
- PxCudaBufferPtr getPtr() const
- {
- return getPtrFast();
- }
- size_t getSize() const
- {
- return getSizeFast();
- }
- PxCudaMemoryManager* getCudaMemoryManager() const
- {
- return getCudaMemoryManagerFast();
- }
-//
- PX_INLINE const PxCudaBufferType& getTypeFast() const
- {
- return mType;
- }
- PX_INLINE PxCudaBufferPtr getPtrFast() const
- {
- return mPtr;
- }
- PX_INLINE void setPtr(PxCudaBufferPtr val)
- {
- mPtr = val;
- }
- PX_INLINE size_t getSizeFast() const
- {
- return mSize;
- }
- PX_INLINE void setSize(size_t val)
- {
- mSize = val;
- }
- PX_INLINE PxCudaMemoryManager* getCudaMemoryManagerFast() const
- {
- return reinterpret_cast<PxCudaMemoryManager*>(mMemManager);
- }
- PX_INLINE void init(PxCudaBufferPtr ptr, size_t size, CudaMemMgr& manager, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED))
- {
- mPtr = ptr;
- mSize = size;
- mMemManager = &manager;
- mAllocInfo = PxAllocInfo(PX_ALLOC_INFO_PARAMS_INPUT());
- }
-
- void operator=(const CudaBuffer& in)
- {
- const_cast<PxCudaBufferType&>(mType) = in.mType;
- mPtr = in.mPtr;
- mSize = in.mSize;
- mMemManager = in.mMemManager;
- }
-
-private:
- const PxCudaBufferType mType;
- PxCudaBufferPtr mPtr;
- size_t mSize;
- CudaMemMgr* mMemManager;
- PxAllocInfo mAllocInfo;
-};
-
-
-class CudaMemMgr: public PxCudaMemoryManager, public shdfnd::UserAllocated
-{
- PX_NOCOPY(CudaMemMgr)
-public:
- CudaMemMgr(PxCudaContextManager& mMgr, physx::PxErrorCallback& errorCallback);
- virtual ~CudaMemMgr();
-
- PxCudaBuffer* alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- PxCudaBufferPtr alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- bool free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr);
- bool realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- void getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats);
- bool reserve(const PxCudaBufferType& type, size_t size);
- bool setPageSize(const PxCudaBufferType& type, size_t size);
- bool setMaxMemorySize(const PxCudaBufferType& type, size_t size);
- size_t getBaseSize(const PxCudaBufferType& type);
- size_t getPageSize(const PxCudaBufferType& type);
- size_t getMaxMemorySize(const PxCudaBufferType& type);
- void debugDisableAllocs()
- {
- mDebugDisableAllocs = true;
- }
- PxCudaBufferPtr getMappedPinnedPtr(PxCudaBufferPtr hostPtr);
-
- // internals
- bool free(CudaBuffer& buffer);
- bool realloc(CudaBuffer& buffer, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- bool setBaseSize(const PxCudaBufferType& type, size_t size);
-
-private:
- PX_INLINE bool initialize();
- physx::PxErrorCallback& mErrorCallback;
- HeapManagerInterface* mHeap[PxCudaBufferMemorySpace::COUNT];
- HeapManagerInterface::Allocator* mMemoryAllocator[PxCudaBufferMemorySpace::COUNT];
- size_t mMemoryBaseSize[PxCudaBufferMemorySpace::COUNT];
- size_t mMemoryPageSize[PxCudaBufferMemorySpace::COUNT];
- size_t mMemoryMaxSize[PxCudaBufferMemorySpace::COUNT];
- shdfnd::Pool<CudaBuffer> mBufferPool;
- bool mInitialized;
- PxCudaContextManager& mMgr;
- shdfnd::Mutex mMutex;
- bool mDebugDisableAllocs;
-};
-
-// TODO, give MemoryAllocator prefix or namespace
-class DeviceMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated
-{
- PX_NOCOPY(DeviceMemAllocator)
-public:
- DeviceMemAllocator(PxCudaContextManager& mgr, size_t maxSize);
- virtual ~DeviceMemAllocator();
-
- virtual void* alloc(const size_t size);
- virtual void free(void* addr, const size_t size);
-
- void setMaxSize(size_t maxSize)
- {
- mMaxSize = maxSize;
- }
- size_t getMaxSize()
- {
- return mMaxSize;
- }
-
-private:
- PxCudaContextManager& mMgr;
- size_t mMaxSize;
- size_t mAllocSize;
-};
-
-
-class PinnedMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated
-{
- PX_NOCOPY(PinnedMemAllocator)
-public:
- PinnedMemAllocator(PxCudaContextManager& mMgr, size_t maxSize);
- virtual ~PinnedMemAllocator();
-
- virtual void* alloc(const size_t size);
- virtual void free(void* addr, const size_t size);
-
- void setMaxSize(size_t maxSize)
- {
- mMaxSize = maxSize;
- }
- size_t getMaxSize()
- {
- return mMaxSize;
- }
-
- size_t getMappedPinnedOffset(void* base)
- {
-
- PX_ASSERT(base);
- const shdfnd::HashMap<void*, size_t>::Entry* entry = mMappedPinnedPtrs.find(base);
- PX_ASSERT(entry);
- return entry->second - size_t(base);
- }
-
-private:
- PxCudaContextManager& mMgr;
- size_t mMaxSize;
- size_t mAllocSize;
- shdfnd::HashMap<void*, size_t> mMappedPinnedPtrs;
-};
-
-
-class HostMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated
-{
- PX_NOCOPY(HostMemAllocator)
-public:
- HostMemAllocator(size_t maxSize);
- virtual ~HostMemAllocator();
-
- virtual void* alloc(const size_t size);
- virtual void free(void* addr, const size_t size);
-
- void setMaxSize(size_t maxSize)
- {
- mMaxSize = maxSize;
- }
- size_t getMaxSize()
- {
- return mMaxSize;
- }
-
-private:
- size_t mMaxSize;
- size_t mAllocSize;
-};
-
-
-class WriteCombinedMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated
-{
- PX_NOCOPY(WriteCombinedMemAllocator)
-public:
- WriteCombinedMemAllocator(PxCudaContextManager& mgr, size_t maxSize);
- virtual ~WriteCombinedMemAllocator();
-
- virtual void* alloc(const size_t size);
- virtual void free(void* addr, const size_t size);
-
- void setMaxSize(size_t maxSize)
- {
- mMaxSize = maxSize;
- }
- size_t getMaxSize()
- {
- return mMaxSize;
- }
-
-private:
- struct WcMem
- {
- enum Enum
- {
- NOT_CHECKED,
- SUPPORTED,
- NOT_SUPPORTED
- };
- };
-
- bool isWcMemSupported();
-
-private:
- int mCudaOrdinal;
- PxCudaContextManager& mMgr;
- WcMem::Enum mWcMemSupport;
- size_t mMaxSize;
- size_t mAllocSize;
-};
-
-} // end physx namespace
-
-#endif // PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H
diff --git a/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h b/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h
deleted file mode 100644
index f20d87a..0000000
--- a/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h
+++ /dev/null
@@ -1,128 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H
-#define PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H
-
-#include <cuda.h>
-
-namespace physx
-{
-// Utility macros for defining and using UUID values for use with
-// the CUDA driver.
-//
-// CU_INIT_UUID must be #defined in exactly one translation unit
-// per linkage unit (i.e. one .c or .cpp file per binary). This
-// allows multiple .c and .cpp files to include headers that define
-// UUIDs using CU_DEFINE_UUID: The translation unit that #defines
-// CU_INIT_UUID will define and initialize the UUIDs, and all other
-// translation units will link to that definition.
-
-// Define helper macro: CU_INIT_EXTERN_CONST
-// In C, global consts have external linkage by default. In C++,
-// global consts have internal linkage by default, and require the
-// "extern" storage class specifier to have external linkage. C++
-// allows using "extern" with initializers, but it is illegal in C.
-// Thus, there is no common syntax for C and C++ to declare and
-// initialize global constants with external linkage. This macro
-// helps reduce duplication of other macros by factoring out the
-// C/C++ discrepancy.
-#ifdef __cplusplus
-#define CU_INIT_EXTERN_CONST extern const
-#else
-#define CU_INIT_EXTERN_CONST const
-#endif
-
-// Define macro CU_DEFINE_UUID. The parameters are the commonly
-// used "int-short-short-char[8]" style, which can be generated by
-// Microsoft's guidgen.exe tool, Visual Studio's "Create GUID"
-// option in the Tools menu (select style #2), and many web-based
-// UUID generator tools. Here's an example of what "Create GUID"
-// style #2 generates:
-//
-// DEFINE_GUID( <<name>>,
-// 0x2446054, 0xbb8e, 0x4b2f, 0x8b, 0xfc, 0xa4, 0xfe, 0x44, 0x9, 0x8f, 0xb8);
-//
-// So to use one of these with CUDA, just change the macro to
-// CU_DEFINE_UUID and choose a symbol name. For example:
-//
-// CU_DEFINE_UUID( MyUuid,
-// 0x2446054, 0xbb8e, 0x4b2f, 0x8b, 0xfc, 0xa4, 0xfe, 0x44, 0x9, 0x8f, 0xb8);
-//
-#if defined(CU_INIT_UUID)
-#define CU_CHAR(x) (char)(unsigned char)((x) & 0xff)
-// Define the symbol as exportable to other translation units, and
-// initialize the value. Inner set of parens is necessary because
-// "bytes" array needs parens within the struct initializer, which
-// also needs parens.
-#define CU_DEFINE_UUID(name, a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
- CU_INIT_EXTERN_CONST CUuuid name = \
- { \
- { \
- CU_CHAR(a), CU_CHAR((a) >> 8), CU_CHAR((a) >> 16), CU_CHAR((a) >> 24), \
- CU_CHAR(b), CU_CHAR((b) >> 8), \
- CU_CHAR(c), CU_CHAR((c) >> 8), \
- CU_CHAR(d0), \
- CU_CHAR(d1), \
- CU_CHAR(d2), \
- CU_CHAR(d3), \
- CU_CHAR(d4), \
- CU_CHAR(d5), \
- CU_CHAR(d6), \
- CU_CHAR(d7) \
- } \
- }
-#else
-// Declare the symbol to be imported from another translation unit.
-#define CU_DEFINE_UUID(name, a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
- extern const CUuuid name
-#endif
-
-//------------------------------------------------------------------
-// Cuda Private API Interfaces for PhysX
-//------------------------------------------------------------------
-
-// This provides backdoor interfaces used by PhysX
-CU_DEFINE_UUID(CU_ETID_PhysXInterface, 0x8c0ba50c, 0x0410, 0x9a92, 0x89, 0xa7, 0xd0, 0xdf, 0x10, 0xe7, 0x72, 0x86);
-
-typedef struct CUetblPhysXInterface_st
-{
- /* Size of this structure */
- size_t size;
-
- /* Create a new CUDA context on Node3dLowLatency.
- * - will usually it will just call cuCtxCreateOnNode3DLowLatency.
- */
- CUresult (CUDAAPI *cuCtxCreateOnNode3DLowLatency)(
- CUcontext *pctx,
- unsigned int flags,
- CUdevice dev);
-
-} CUetblPhysXInterface;
-}
-
-#endif // PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H
diff --git a/PxShared/src/cudamanager/src/GpuDispatcher.cpp b/PxShared/src/cudamanager/src/GpuDispatcher.cpp
deleted file mode 100644
index 432a0cd..0000000
--- a/PxShared/src/cudamanager/src/GpuDispatcher.cpp
+++ /dev/null
@@ -1,928 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-#include "task/PxGpuDispatcher.h"
-#include "task/PxCpuDispatcher.h"
-#include "cudamanager/PxCudaContextManager.h"
-#include "cudamanager/PxGpuCopyDesc.h"
-
-#include "CudaKernelWrangler.h"
-#include "GpuDispatcher.h"
-
-#if PX_SUPPORT_PXTASK_PROFILING
-#include "foundation/PxProfiler.h"
-#endif
-
-#include "PsArray.h"
-#include "PsSync.h"
-#include "PsInlineArray.h"
-#include "PsUserAllocated.h"
-#include "PsAtomic.h"
-
-#if PX_VC
-#pragma warning(disable: 4062) //enumerator 'identifier' in switch of enum 'enumeration' is not handled
-#endif
-
-using namespace physx;
-
-// hack to run CUDA calls in a single thread
-// used to capture CUDA APIC traces
-// #define SINGLE_CUDA_THREAD 1
-
-/* Kernels in UtilsKernels.cu */
-const char* UtilKernelNames[] =
-{
- "Saturate",
- "MemCopyAsync",
- "MemCopyBatchedAsync",
-};
-enum
-{
- KERNEL_SATURATE,
- KERNEL_MEMCOPY,
- KERNEL_MEMCOPY_BATCHED
-};
-
-/* ==================== LaunchTask =========================== */
-
-/**
-\brief A task that maintains a list of dependent tasks.
-
-This task maintains a list of dependent tasks that have their reference counts
-reduced on completion of the task.
-
-The refcount is incremented every time a dependent task is added.
-*/
-class physx::FanoutTask : public PxBaseTask, public shdfnd::UserAllocated
-{
- PX_NOCOPY(FanoutTask)
-public:
- FanoutTask(const char* name) : mRefCount(0), mName(name), mNotifySubmission(false) {}
-
- virtual void run() {}
- virtual const char* getName(void) const { return mName; }
-
- virtual void removeReference()
- {
- shdfnd::Mutex::ScopedLock lock(mMutex);
- if (!shdfnd::atomicDecrement(&mRefCount))
- {
- // prevents access to mReferencesToRemove until release
- shdfnd::atomicIncrement(&mRefCount);
- mNotifySubmission = false;
- PX_ASSERT(mReferencesToRemove.empty());
- // save continuation and dependents
- for (uint32_t i = 0; i < mDependents.size(); i++)
- mReferencesToRemove.pushBack(mDependents[i]);
- mDependents.clear();
- mTm->getCpuDispatcher()->submitTask(*this);
- }
- }
-
- /**
- \brief Increases reference count
- */
- virtual void addReference()
- {
- shdfnd::Mutex::ScopedLock lock(mMutex);
- shdfnd::atomicIncrement(&mRefCount);
- mNotifySubmission = true;
- }
-
- /**
- \brief Return the ref-count for this task
- */
- PX_INLINE int32_t getReference() const
- {
- return mRefCount;
- }
-
- /**
- Adds a dependent task. It also sets the task manager querying it from the dependent task.
- The refcount is incremented every time a dependent task is added.
- */
- void addDependent(PxBaseTask& dependent)
- {
- shdfnd::Mutex::ScopedLock lock(mMutex);
- shdfnd::atomicIncrement(&mRefCount);
- mTm = dependent.getTaskManager();
- mDependents.pushBack(&dependent);
- dependent.addReference();
- mNotifySubmission = true;
- }
-
- /**
- Reduces reference counts of the continuation task and the dependent tasks, also
- clearing the copy of continuation and dependents task list.
- */
- virtual void release()
- {
- shdfnd::InlineArray<PxBaseTask*, 10> referencesToRemove;
-
- {
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- const uint32_t contCount = mReferencesToRemove.size();
- referencesToRemove.reserve(contCount);
- for (uint32_t i=0; i < contCount; ++i)
- referencesToRemove.pushBack(mReferencesToRemove[i]);
-
- mReferencesToRemove.clear();
- // allow access to mReferencesToRemove again
- if (mNotifySubmission)
- {
- removeReference();
- }
- else
- {
- physx::shdfnd::atomicDecrement(&mRefCount);
- }
-
- // the scoped lock needs to get freed before the continuation tasks get (potentially) submitted because
- // those continuation tasks might trigger events that delete this task and corrupt the memory of the
- // mutex (for example, assume this task is a member of the scene then the submitted tasks cause the simulation
- // to finish and then the scene gets released which in turn will delete this task. When this task then finally
- // continues the heap memory will be corrupted.
- }
-
- for (uint32_t i=0; i < referencesToRemove.size(); ++i)
- referencesToRemove[i]->removeReference();
- }
-
-private:
- friend class LaunchTask;
-
- volatile int32_t mRefCount;
- const char* mName;
- shdfnd::InlineArray<PxBaseTask*, 4> mDependents;
- shdfnd::InlineArray<PxBaseTask*, 4> mReferencesToRemove;
- bool mNotifySubmission;
- shdfnd::Mutex mMutex; // guarding mDependents and mNotifySubmission
-};
-
-class physx::LaunchTask : public physx::FanoutTask
-{
-public:
- LaunchTask() : FanoutTask("GpuDispatcher.launch"), mIsBatchStarted(false)
- {
- }
-
- virtual void run()
- {
- if (mReferencesToRemove.size() >= 1)
- mIsBatchStarted = true;
- }
-
- bool mIsBatchStarted;
-};
-
-class physx::BlockTask : public PxLightCpuTask, public shdfnd::UserAllocated
-{
- PX_NOCOPY(BlockTask)
-public:
- BlockTask(PxGpuWorkerThread* dispatcher, physx::LaunchTask* launchTask)
- : mDispatcher(dispatcher), mLaunchTask(launchTask), mSyncTask(NULL)
- {}
-
- virtual const char* getName(void) const
- {
- return "GpuDispatcher.block";
- }
-
- virtual void removeReference()
- {
- shdfnd::Mutex::ScopedLock lock(mMutex);
- if (!physx::shdfnd::atomicDecrement(&mRefCount))
- {
- // prevents access to mSyncTask until release
- physx::shdfnd::atomicIncrement(&mRefCount);
- PX_ASSERT(!mSyncTask);
- shdfnd::swap(mSyncTask, mCont);
- mTm->getCpuDispatcher()->submitTask(*this);
- }
- }
-
- virtual void run()
- {
- if (mLaunchTask->mIsBatchStarted)
- {
- mDispatcher->mCtxMgr->acquireContext();
- CUevent stopEv = mDispatcher->mCachedBlockingEvents.get();
- CUstream stream = (CUstream)0;
- mSyncTask->addReference();
- mDispatcher->flushBatch(stopEv, stream, mSyncTask);
- mDispatcher->mCtxMgr->releaseContext();
- mLaunchTask->mIsBatchStarted = false;
- }
- }
-
- virtual void setContinuation(PxBaseTask* continuation)
- {
- // this function is called multiple times, skip after first call
- shdfnd::Mutex::ScopedLock lock(mMutex);
- physx::shdfnd::atomicIncrement(&mRefCount);
- if (!mCont)
- {
- mCont = continuation;
- mTm = mCont->getTaskManager();
- mCont->addReference();
- }
- }
-
- virtual void release()
- {
- shdfnd::Mutex::ScopedLock lock(mMutex);
- mSyncTask->removeReference();
- mSyncTask = NULL;
- // allow access to mSyncTask again
- if (mCont)
- {
- removeReference();
- }
- else
- {
- physx::shdfnd::atomicDecrement(&mRefCount);
- }
- }
-
- PxGpuWorkerThread* mDispatcher;
- physx::LaunchTask* mLaunchTask;
- PxBaseTask* mSyncTask;
- shdfnd::Mutex mMutex; // guarding mCont
-};
-
-/* ==================== API functions =========================== */
-
-void physx::releaseGpuDispatcher(PxGpuDispatcher& gd)
-{
- GpuDispatcherImpl* impl = (GpuDispatcherImpl*) &gd;
- delete impl;
-}
-
-PxCudaContextManager* GpuDispatcherImpl::getCudaContextManager()
-{
- return mDispatcher->mCtxMgr;
-}
-
-GpuDispatcherImpl::GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx) :
- mDispatcher(NULL),
- mBlockingThread(NULL),
- mLaunchTask(NULL),
- mBlockTask(NULL),
- mSyncTask(NULL)
-{
- mDispatcher = PX_NEW(PxGpuWorkerThread);
- if (!mDispatcher)
- {
- forceFailureMode();
- return;
- }
-
- mDispatcher->setCudaContext(ctx);
-
- mDispatcher->mUtilKernelWrapper = PX_NEW(KernelWrangler)(*this, errorCallback, UtilKernelNames, sizeof(UtilKernelNames) / sizeof(char*));
- if (!mDispatcher->mUtilKernelWrapper || mDispatcher->mUtilKernelWrapper->hadError())
- {
- forceFailureMode();
- return;
- }
-
- mLaunchTask = PX_NEW(LaunchTask);
- mBlockTask = PX_NEW(BlockTask)(mDispatcher, mLaunchTask);
- mSyncTask = PX_NEW(FanoutTask)("GpuDispatcher.sync");
-}
-
-GpuDispatcherImpl::~GpuDispatcherImpl()
-{
- if (mBlockingThread)
- {
- mBlockingThread->signalQuit();
- PX_ASSERT(mDispatcher);
- mDispatcher->mRecordEventQueued.set();
- mBlockingThread->waitForQuit();
- delete mBlockingThread;
- }
-
- if (mDispatcher)
- {
- mDispatcher->signalQuit();
- mDispatcher->mInputReady.set();
- mDispatcher->waitForQuit();
- delete mDispatcher;
- }
-
- if (mLaunchTask)
- PX_DELETE(mLaunchTask);
-
- if (mBlockTask)
- PX_DELETE(mBlockTask);
-
- if (mSyncTask)
- PX_DELETE(mSyncTask);
-}
-
-void GpuDispatcherImpl::start()
-{
-#ifndef SINGLE_CUDA_THREAD
- mDispatcher->start(shdfnd::Thread::getDefaultStackSize());
-#else
- mDispatcher->execute();
-#endif
-
- mBlockingThread = PX_NEW(BlockingWaitThread)(*mDispatcher);
-#ifndef SINGLE_CUDA_THREAD
- mBlockingThread->start(shdfnd::Thread::getDefaultStackSize());
-#endif
-}
-
-void GpuDispatcherImpl::startSimulation()
-{
- mDispatcher->startSimulation();
-}
-
-void GpuDispatcherImpl::stopSimulation()
-{
- mDispatcher->stopSimulation();
-}
-
-void GpuDispatcherImpl::startGroup()
-{
- shdfnd::atomicIncrement(&mDispatcher->mActiveGroups);
-}
-
-void GpuDispatcherImpl::submitTask(PxTask& task)
-{
- mDispatcher->mSubmittedTaskList.push(&task);
-}
-
-void GpuDispatcherImpl::finishGroup()
-{
- if (!shdfnd::atomicDecrement(&mDispatcher->mActiveGroups))
- {
-#ifdef SINGLE_CUDA_THREAD
- mDispatcher->mCtxMgr->acquireContext();
- mDispatcher->processActiveTasks();
- mDispatcher->mCtxMgr->releaseContext();
- mDispatcher->blockingWaitFunc();
-#endif
- mDispatcher->mInputReady.set();
- }
-}
-
-void GpuDispatcherImpl::addCompletionPrereq(PxBaseTask& task)
-{
- mDispatcher->addCompletionPrereq(task);
-}
-
-bool GpuDispatcherImpl::failureDetected() const
-{
- return mDispatcher->mFailureDetected;
-}
-
-void GpuDispatcherImpl::forceFailureMode()
-{
- mDispatcher->mFailureDetected = true;
-}
-
-void GpuDispatcherImpl::launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream)
-{
- mDispatcher->launchCopyKernel(desc, count, stream);
-}
-
-PxBaseTask& GpuDispatcherImpl::getPreLaunchTask()
-{
- PX_ASSERT(mLaunchTask);
- return *mLaunchTask;
-}
-
-void GpuDispatcherImpl::addPreLaunchDependent(PxBaseTask& dependent)
-{
- PX_ASSERT(mLaunchTask);
- mLaunchTask->addDependent(dependent);
-}
-
-PxBaseTask& GpuDispatcherImpl::getPostLaunchTask()
-{
- PX_ASSERT(mBlockTask);
- return *mBlockTask;
-}
-
-void GpuDispatcherImpl::addPostLaunchDependent(PxBaseTask& dependent)
-{
- PX_ASSERT(mSyncTask && mBlockTask);
- mSyncTask->addDependent(dependent);
- mBlockTask->setContinuation(mSyncTask);
- mSyncTask->removeReference();
-}
-
-/* ==================== Worker Thread =========================== */
-
-PxGpuWorkerThread::PxGpuWorkerThread()
- : mActiveGroups(0)
- , mCtxMgr(NULL)
- , mFailureDetected(false)
- , mCompletionRingPush(0)
- , mCompletionRingPop(0)
- , mCachedBlockingEvents(CU_EVENT_BLOCKING_SYNC)
- , mCachedNonBlockingEvents(0)
- , mCountActiveScenes(0)
- , mSmStartTimes(0)
- , mUtilKernelWrapper(0)
-{
-}
-
-void PxGpuWorkerThread::setCudaContext(PxCudaContextManager& ctx)
-{
- mCtxMgr = &ctx;
-}
-
-PxGpuWorkerThread::~PxGpuWorkerThread()
-{
- mCtxMgr->acquireContext();
- mCachedBlockingEvents.clear();
- mCachedNonBlockingEvents.clear();
- mCachedStreams.reset();
- while (!mCachedStreams.empty())
- {
- GD_CHECK_CALL(cuStreamDestroy(mCachedStreams.get(mCachedStreams.popBack())));
- }
- mCtxMgr->releaseContext();
-
- if (mSmStartTimes)
- {
- PX_FREE(mSmStartTimes);
- }
- if (mUtilKernelWrapper)
- {
- // will acquire the context for itself
- PX_DELETE(mUtilKernelWrapper);
- }
-}
-
-
-/* A TaskManager is informing us that its simulation is being stepped */
-void PxGpuWorkerThread::startSimulation()
-{
- mUsingConcurrentStreams = mCtxMgr->getUsingConcurrentStreams();
-
- shdfnd::atomicIncrement(&mCountActiveScenes);
-}
-
-
-/* A TaskManager is informing us that its simulation has ended */
-void PxGpuWorkerThread::stopSimulation()
-{
- if (shdfnd::atomicDecrement(&mCountActiveScenes) == 0)
- mCachedStreams.reset();
-}
-
-
-/* PxGpuDispatcher worker thread main loop */
-void PxGpuWorkerThread::execute()
-{
- setName("GpuDispatcher.Worker");
-
-#ifndef SINGLE_CUDA_THREAD
- bool running = true;
- while (running)
- {
- mInputReady.wait();
-
- if (quitIsSignalled())
- {
- break;
- }
-
- if (!mSubmittedTaskList.empty())
- {
- mCtxMgr->acquireContext();
- processActiveTasks();
- mCtxMgr->releaseContext();
- }
- }
-
- quit();
-#endif
-}
-
-/* Expected to be called by a GPU task, or a function called by a GPU
- * task. The context is important because I think it does not require
- * any locking since only one GPU task's launchInstance() function will
- * be running at a time, per GpuDispatcherImpl (per CUDA context).
- */
-void PxGpuWorkerThread::addCompletionPrereq(PxBaseTask& task)
-{
- if(mFailureDetected)
- return;
-
-#if PX_SUPPORT_PXTASK_PROFILING
- PX_PROFILE_ZONE("GpuDispatcher.AddCompletionEvent", task.getContextId());
-#endif
- task.addReference();
- mCompletionTasks.pushBack(&task);
-}
-
-namespace
-{
- template <typename T0>
- PX_NOINLINE CUresult launchKernelGrid(CUfunction func, unsigned int gridWidth, unsigned int gridHeight, unsigned int numThreads, unsigned int sharedMem, CUstream stream, T0 v0)
- {
- void* kernelParams[] =
- {
- &v0,
- };
- return cuLaunchKernel(func, gridWidth, gridHeight, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
- }
-}
-
-void PxGpuWorkerThread::launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream)
-{
- if (!mCtxMgr->canMapHostMemory())
- {
- for (uint32_t i = 0 ; i < count ; i++)
- {
- PX_ASSERT(desc->isValid());
- switch (desc->type)
- {
- case PxGpuCopyDesc::DeviceMemset32:
- GD_CHECK_CALL(cuMemsetD32Async(desc->dest, (uint32_t) desc->source, desc->bytes >> 2, stream));
- break;
- case PxGpuCopyDesc::DeviceToDevice:
- GD_CHECK_CALL(cuMemcpyDtoDAsync(desc->dest, desc->source, desc->bytes, stream));
- break;
- case PxGpuCopyDesc::DeviceToHost:
- GD_CHECK_CALL(cuMemcpyDtoHAsync((void*) desc->dest, desc->source, desc->bytes, stream));
- break;
- case PxGpuCopyDesc::HostToDevice:
- GD_CHECK_CALL(cuMemcpyHtoDAsync(desc->dest, (void*) desc->source, desc->bytes, stream));
- break;
- }
-
- desc++;
- }
- }
- else if (count == 1)
- {
- CUfunction func = mUtilKernelWrapper->getCuFunction(KERNEL_MEMCOPY);
- uint32_t smCount = (uint32_t)mCtxMgr->getMultiprocessorCount();
-
- PX_ASSERT(desc->isValid());
-
- CUdeviceptr dptr;
- switch (desc->type)
- {
- case PxGpuCopyDesc::DeviceToHost:
- dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc->dest));
- desc->dest = dptr;
- break;
- case PxGpuCopyDesc::HostToDevice:
- dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc->source));
- desc->source = dptr;
- break;
- case PxGpuCopyDesc::DeviceMemset32:
- case PxGpuCopyDesc::DeviceToDevice:
- //do nothing, cases are here for GCCs warning system
- break;
- }
-
- uint32_t numThreads;
- if (mCtxMgr->supportsArchSM20())
- {
- numThreads = 256;
- }
- else
- {
- numThreads = 128;
- }
- uint32_t blocks = uint32_t(desc->bytes / (numThreads * 4 * 6));
- if (blocks == 0)
- {
- blocks = 1;
- }
- if (blocks > smCount)
- {
- blocks = smCount;
- }
-
- GD_CHECK_CALL(
- launchKernel(func, blocks, numThreads, 0, stream, *desc)
- );
- }
- else
- {
- CUfunction func = mUtilKernelWrapper->getCuFunction(KERNEL_MEMCOPY_BATCHED);
- CUdeviceptr dptr;
-
- for (uint32_t i = 0 ; i < count ; i++)
- {
- PX_ASSERT(desc[i].isValid());
-
- switch (desc[i].type)
- {
- case PxGpuCopyDesc::DeviceToHost:
- dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc[i].dest));
- desc[i].dest = dptr;
- break;
- case PxGpuCopyDesc::HostToDevice:
- dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc[i].source));
- desc[i].source = dptr;
- break;
- case PxGpuCopyDesc::DeviceMemset32:
- case PxGpuCopyDesc::DeviceToDevice:
- //do nothing, cases are here for GCCs warning system
- break;
- }
- }
-
- uint32_t numThreads, numBlocksX;
- if (mCtxMgr->supportsArchSM20())
- {
- numThreads = 256;
- numBlocksX = 1;
- }
- else
- {
- numThreads = 128;
- numBlocksX = 2;
- }
-
- dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc));
-
- GD_CHECK_CALL(
- launchKernelGrid(func, numBlocksX, count, numThreads, 0, stream,
- dptr)
- );
- }
-}
-
-void PxGpuWorkerThread::flushBatch(CUevent endEvent, CUstream syncStream, PxBaseTask* task)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (mFailureDetected)
- {
- return;
- }
-
- if (endEvent && (1 & intptr_t(syncStream)))
- {
- // blocking record event on stream 0, flushes current push buffer
- GD_CHECK_CALL(cuEventRecord(endEvent, 0));
- } else {
- CUresult ret = cuStreamQuery(0); // flushes current push buffer
- PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY);
- PX_UNUSED(ret);
- }
-
- int next = (mCompletionRingPush + 1) % SIZE_COMPLETION_RING;
- while (next == mCompletionRingPop)
- {
- // lordy, I hope we never get here.
- PX_ALWAYS_ASSERT();
- mCtxMgr->releaseContext();
- sleep(1);
- mCtxMgr->acquireContext();
- }
-
- CudaBatch& b = mCompletionRing[ mCompletionRingPush ];
- b.blockingEvent = endEvent;
- b.blockingStream = syncStream;
- b.continuationTask = task;
-
- mCompletionRingPush = next;
- mRecordEventQueued.set();
-}
-
-// Take any submitted tasks into its appropriate read list
-void PxGpuWorkerThread::pollSubmitted(shdfnd::Array<ReadyTask>* ready)
-{
- mInputReady.reset();
- PxGpuTask* gt;
- while ((gt = (PxGpuTask*) mSubmittedTaskList.popBack()) != 0)
- {
- PxGpuTaskHint::Enum type = gt->getTaskHint();
- ReadyTask* r = &ready[ type ].insert();
- r->task = gt;
- r->iteration = 0;
- if (mUsingConcurrentStreams && gt->mStreamIndex == 0)
- {
- gt->mStreamIndex = mCachedStreams.popBack();
- }
- }
-}
-
-void PxGpuWorkerThread::processActiveTasks()
-{
-#if PX_SUPPORT_PXTASK_PROFILING
- PX_PROFILE_ZONE("GpuDispatcher.ProcessTasksEvent", 0); // PT: TODO: fix invalid context
-#endif
-
- if (mFailureDetected)
- {
- while (!mSubmittedTaskList.empty())
- {
- mInputReady.reset();
- mSubmittedTaskList.popBack()->release();
- }
- return;
- }
-
- for (uint32_t i = 0; i < PxGpuTaskHint::NUM_GPU_TASK_HINTS; i++)
- {
- mReady[i].clear();
- }
-
- //OutputDebugString("batch beginning\n");
-
- const CUevent nonBlockEv = mCachedNonBlockingEvents.get();
- bool workToFlush = false;
- bool tasksRemain = false;
- PxGpuTaskHint::Enum curMode = PxGpuTaskHint::HostToDevice;
-
- pollSubmitted(mReady);
-
- do
- {
- // cycle current run mode when necessary
- if (mReady[ curMode ].size() == 0)
- {
- if (curMode == PxGpuTaskHint::HostToDevice)
- {
- curMode = PxGpuTaskHint::Kernel;
- }
- else if (curMode == PxGpuTaskHint::Kernel)
- {
- curMode = PxGpuTaskHint::DeviceToHost;
- }
- else
- {
- curMode = PxGpuTaskHint::HostToDevice;
- }
- }
-
- uint32_t singleStream = mReady[curMode].empty() ? 0 : mReady[curMode].front().task->mStreamIndex;
-
- while (mReady[ curMode ].size())
- {
- bool needwfi = false;
- for (uint32_t i = 0 ; i < mReady[ curMode ].size() ; i++)
- {
- ReadyTask& r = mReady[ curMode ][ i ];
-
- if (r.task->mPreSyncRequired)
- {
- // If mPreSyncRequired is set *before* the task is run, it implies
- // a WFI must be inserted before this task issues any work. Multiple
- // ready tasks may have this flag, so to avoid inserting multiple WFI
- // requests, we skip marked tasks in this pass and note a WFI is needed.
- needwfi = true;
- r.task->mPreSyncRequired = false;
- }
- else
- {
- const CUstream s = (r.task->mStreamIndex > 0) ? mCachedStreams.get(r.task->mStreamIndex) : 0;
-
- bool active;
- {
-#if PX_PROFILE
-#if PX_SUPPORT_PXTASK_PROFILING
- PX_PROFILE_ZONE(r.task->getName(), r.task->getContextId());
-#endif
-#endif
- active = r.task->launchInstance(s, int(r.iteration++));
- }
- if(singleStream != r.task->mStreamIndex)
- singleStream = 0;
-
- // If the launchInstance() call reported a non-recoverable error, gracefully
- // release all scheduled tasks
- if (mFailureDetected)
- {
- // Release all ready tasks
- for (uint32_t h = 0; h < PxGpuTaskHint::NUM_GPU_TASK_HINTS; h++)
- {
- for (uint32_t j = 0 ; j < mReady[ h ].size() ; j++)
- {
- mReady[ h ][ j ].task->release();
- }
- mReady[ h ].clear();
- }
-
- // Release all submitted tasks, until idle
- while (!mSubmittedTaskList.empty())
- {
- mInputReady.reset();
- mSubmittedTaskList.popBack()->release();
- }
- return;
- }
-
- workToFlush = true;
- if (r.task->mPreSyncRequired)
- {
- // This task has asked for a sync point, meaning it has launched a copy
- // or a kernel that must be completed before any later tasks are allowed
- // to start. Insert a WFI and clear the needwfi flag
- GD_CHECK_CALL(cuEventRecord(nonBlockEv, 0));
- needwfi = false;
- r.task->mPreSyncRequired = false;
- }
-
- if (!active)
- {
- r.task->release();
- mReady[ curMode ].replaceWithLast(i);
- pollSubmitted(mReady);
- i -= 1;
- }
- }
- }
-
- if (needwfi)
- {
- GD_CHECK_CALL(cuEventRecord(nonBlockEv, 0));
- }
- }
-
- /* We have completed one of the three phases */
-
- tasksRemain = false;
- for (int e = (int) PxGpuTaskHint::HostToDevice ; e != (int) PxGpuTaskHint::NUM_GPU_TASK_HINTS ; e++)
- {
- tasksRemain |= (mReady[ e ].size() != 0);
- }
-
- if (!mCompletionTasks.empty())
- {
- workToFlush = true;
- }
-
- if (workToFlush && (tasksRemain == false || curMode == PxGpuTaskHint::DeviceToHost))
- {
- //OutputDebugString("batch ending\n");
-
- while (mCompletionTasks.size())
- {
- PxBaseTask* t = mCompletionTasks.popBack();
- if (workToFlush)
- {
- CUevent stopEv = mCachedBlockingEvents.get();
- CUstream stream = singleStream ? mCachedStreams.get(singleStream) : (CUstream)1;
- flushBatch(stopEv, stream, t);
- workToFlush = false;
- }
- else
- {
- flushBatch(0, 0, t);
- }
- }
- if (workToFlush)
- {
- /* Getting here is probably an indication of a bug in your task graph,
- * but it is possible to get this warning if you have CPU tasks that
- * can delay GpuTasks. So, consider this warning "training wheels" and
- * disable it if you know your graph is correct.
- */
- // SJB - Disabling this warning, APEX does this every frame because
- // of how BasicIOS and IOFX interact.
- //shdfnd::getFoundation().error(PX_WARN,
- // "CUDA work generated without a completion dependency!");
- CUevent stopEv = mCachedBlockingEvents.get();
- flushBatch(stopEv, (CUstream)1, NULL);
- }
- }
- }
- while (tasksRemain);
-
- mCachedNonBlockingEvents.add(nonBlockEv);
-}
-
-#endif
diff --git a/PxShared/src/cudamanager/src/HeapManagerInterface.h b/PxShared/src/cudamanager/src/HeapManagerInterface.h
deleted file mode 100644
index 7fe7f2e..0000000
--- a/PxShared/src/cudamanager/src/HeapManagerInterface.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-//----------------------------------------------------------------------------//
-// HeapManagerInterface.h
-//----------------------------------------------------------------------------//
-
-#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H
-#define PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H
-
-#include "task/PxTaskDefine.h"
-#include "foundation/PxSimpleTypes.h"
-#include "cudamanager/PxCudaMemoryManager.h"
-
-#include <string.h>
-
-namespace physx
-{
-struct HeapStatsFlags
-{
- enum Enum
- {
- F_BASIC_STATS = 1 << 0,
- F_INTERNAL_FRAGMENTATION = 1 << 1,
- F_BIGGEST_FREE_BLOCK = 1 << 2,
- F_HISTOGRAM = 1 << 3,
- F_ALLOC_ID_STATS = 1 << 4,
- F_ALL = 0xFFFFFFFF,
- };
-};
-
-#define BITSPERWORD sizeof(size_t)*8
-
-class ApexHeapStats
-{
-public:
- ApexHeapStats():
- heapSize(0),
- totalAllocated(0),
- maxAllocated(0),
- internalFragmentation(0),
- maxInternalFragmentation(0),
- biggestFreeBlock(0),
- numEntries(0)
- {}
-
- PX_INLINE void reset()
- {
- memset(this, 0, sizeof(ApexHeapStats));
- }
-
- // F_BASIC_STATS
- size_t heapSize;
- size_t totalAllocated;
- size_t maxAllocated;
-
- // F_INTERNAL_FRAGMENTATION
- size_t internalFragmentation;
- size_t maxInternalFragmentation;
-
- // F_BIGGEST_FREE_BLOCK
- size_t biggestFreeBlock;
-
- // F_HISTOGRAM
- size_t freeBuddyHistogram[BITSPERWORD];
- size_t allocatedBuddyHistogram[BITSPERWORD];
- size_t numEntries;
-
- // F_ALLOC_ID_STATS
- PxAllocIdStats allocIdStats[PxAllocId::NUM_IDS];
-};
-
-
-class HeapManagerInterface
-{
-public:
- // simple allocator interface over which the heap manager does its base allocation and allocates further pages
- class Allocator
- {
- public:
- virtual ~Allocator() {};
-
- virtual void* alloc(const size_t size) = 0;
- virtual void free(void* addr, const size_t size) = 0;
- };
-
- virtual ~HeapManagerInterface() {};
-
- // INTERFACE METHODS
- // init the HeapManager by passing it a block of memory and the smallest size of a memory block.
- // returns true if init was successful
- virtual bool init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag = size_t(-1)) = 0;
-
- // Changes the page size. The size of allocations over the supplied Allocator are a multiple of the pageSize.
- // returns true if the page size was valid. (!0, >minBlockSize, pow2)
- virtual bool setPageSize(size_t pageSize) = 0;
-
- // returns the address of an allocated block for the requested size.
- // returns a NULL ptr if alloc failed.
- virtual void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0;
-
- // returns true if the block at the given address could be resized to size
- // returns false if this failed. Manual free and alloc is still possible but needs a memcopy.
- virtual bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0;
-
- // frees a given block.
- // returns true if the operation was successful
- virtual bool free(void* addr) = 0;
-
- // deallocates all empty pages
- virtual void freeEmptyPages() = 0;
-
- // ensures that there there is free memory of at least the requested size
- // returns true if the operation was successful. Free memory was already big enough or new pages were allocated successfully.
- virtual bool reserve(size_t size) = 0;
-
- // returns stats into a ApexHeapStats object, stats can be selected with HeapManagerStatsFlags.
- // returns true if the operation was successful
- virtual bool getStats(ApexHeapStats& stats, const uint32_t flags) = 0;
-
- // discretisize memory into an array such that it can be visualized
- // returns true if the operation was successful
- virtual bool visualizeMemory(uint8_t* array, const size_t size) = 0;
-
- // returns the base address of the page containing the memory block at addr.
- // returns NULL if addr doesn't correspond to a page
- virtual void* findBaseAddress(void* addr) = 0;
-};
-
-} // end physx namespace
-
-#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H
diff --git a/PxShared/src/cudamanager/src/HeapManagerLinkedList.h b/PxShared/src/cudamanager/src/HeapManagerLinkedList.h
deleted file mode 100644
index 45a359d..0000000
--- a/PxShared/src/cudamanager/src/HeapManagerLinkedList.h
+++ /dev/null
@@ -1,204 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-//----------------------------------------------------------------------------//
-// HeapManagerLinkedList.h
-//----------------------------------------------------------------------------//
-
-#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H
-#define PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H
-
-#include "foundation/PxAssert.h"
-
-namespace physx
-{
-
-template <typename T>
-class LinkedList
-{
-public:
-
- PX_INLINE LinkedList()
- : mHead(NULL)
- , mNumElements(0)
- {
- }
-
- PX_INLINE void insert(T*& elt)
- {
- if (mHead)
- {
- elt->next = mHead->next;
- }
- mHead = elt;
- mNumElements++;
- }
-
- PX_INLINE void insertSorted(T*& elt)
- {
- if (!mHead)
- {
- mHead = elt;
- mHead->next = NULL;
- }
- else if (!mHead->next || (mHead->addr > elt->addr))
- {
- if (mHead->addr > elt->addr)
- {
- elt->next = mHead;
- mHead = elt;
- }
- else
- {
- mHead->next = elt;
- elt->next = NULL;
- }
- }
- else
- {
- T* cur = mHead;
- while (cur->next && (elt->addr > cur->next->addr))
- {
- cur = cur->next;
- }
- elt->next = cur->next;
- cur->next = elt;
- }
- mNumElements++;
- }
-
- PX_INLINE T* pop()
- {
- if (mHead)
- {
- T* ret = mHead;
- mHead = mHead->next;
- mNumElements--;
- return ret;
- }
- return NULL;
- }
-
- PX_INLINE bool remove(const T* elt)
- {
- PX_ASSERT(elt);
- if (mHead && mHead == elt)
- {
- mHead = mHead->next;
- mNumElements--;
- return true;
- }
- else
- {
- T* cur = mHead;
- while (cur && cur->next != elt)
- {
- PX_ASSERT(cur->addr < elt->addr); // assert for sorted list property.
- cur = cur->next;
- }
- if (cur && elt)
- {
- cur->next = elt->next;
- mNumElements--;
- return true;
- }
- }
- return false;
- }
-
- PX_INLINE T* find(const size_t addr)
- {
- T* cur = mHead;
- while (cur && cur->addr < addr)
- {
- cur = cur->next;
- }
-
- return cur && (cur->addr == addr) ? cur : NULL;
- }
-
- PX_INLINE T* findAndPop(const size_t addr)
- {
- if (mHead == NULL)
- {
- return NULL;
- }
-
- if (mHead->addr == addr)
- {
- return pop();
- }
-
- T* cur = mHead;
- T* last = mHead;
- while (cur)
- {
- if (cur->addr == addr)
- {
- last->next = cur->next;
- mNumElements--;
- return cur;
- }
- else if (cur->addr > addr)
- {
- return NULL; // because list is sorted.
- }
- else
- {
- last = cur;
- cur = cur->next;
- }
- }
- return NULL;
- }
-
- PX_INLINE size_t getSize()
- {
- return mNumElements;
- }
- PX_INLINE T* getHead()
- {
- return mHead;
- }
-
- // hacky
- PX_INLINE void setSize(size_t s)
- {
- mNumElements = s;
- }
- PX_INLINE void setHead(T* h)
- {
- mHead = h;
- }
-private:
- T* mHead;
- size_t mNumElements;
-};
-
-} // end physx namespace
-
-#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H
diff --git a/PxShared/src/cudamanager/src/HeapManagerRef.cpp b/PxShared/src/cudamanager/src/HeapManagerRef.cpp
deleted file mode 100644
index bf3847f..0000000
--- a/PxShared/src/cudamanager/src/HeapManagerRef.cpp
+++ /dev/null
@@ -1,1380 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "CudaMemoryManager.h"
-#include "HeapManagerRef.h"
-
-#include "PsSort.h"
-#include "PsArray.h"
-#include "PsAllocator.h"
-#include "PsString.h"
-
-#include "foundation/PxMath.h"
-#include "foundation/PxErrorCallback.h"
-#include "foundation/PxMemory.h"
-
-
-#if DUMP_HEAP_USAGE_TO_FILE
-#include "PsWindowsInclude.h"
-#endif
-
-using namespace physx::shdfnd;
-using namespace physx;
-
-
-#define CMM_DELETE_SINGLE(x) { if(x) delete x; x = NULL; }
-#define CMM_DELETE_ARRAY(x) { if(x) delete [] x; x = NULL; }
-
-HeapManagerRef::HeapManagerRef(physx::PxErrorCallback& errorCallback, bool enableMutex)
- : mHeaps(PX_DEBUG_EXP("HeapManagerRef:mHeaps"))
- , mBuddyPool("mBuddyPool", 1024)
- , mPageSize(0)
- , mMinBlockSize(0)
- , mMaxIntFrag(size_t(-1))
- , mNewEmptyPage(false)
- , mMemAllocator(NULL)
- , mGlobalAllocMem(0)
- , mGlobalMaxAllocMem(0)
- , mGlobalInternalFragmentation(0)
- , mGlobalMaxInternalFragmentation(0)
- , mErrorCallback(errorCallback)
-
-{
- PX_UNUSED(enableMutex); // SJB: heap alloc of shdfnd::Mutex not working for me
-}
-
-HeapManagerRef::~HeapManagerRef()
-{
- for (uint32_t i = 0; i < mHeaps.size(); i++)
- {
- if (mMemAllocator && mHeaps[i].baseAddr)
- {
- mMemAllocator->free(reinterpret_cast<void*>(mHeaps[i].baseAddr), mHeaps[i].heap->getTotalMemorySize());
- }
- CMM_DELETE_SINGLE(mHeaps[i].heap);
- }
-#if DUMP_HEAP_USAGE_TO_FILE
- fclose(mLogFile);
-#endif
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// INTERFACE METHODS
-bool HeapManagerRef::init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
- // init Heap and do some basic checks.
-
- // init only once
- if (mHeaps.size())
- {
- return false;
- }
-
- if (baseSize && (minBlockSize > baseSize))
- {
- return false;
- }
-
- if (minBlockSize > pageSize)
- {
- return false;
- }
-
- if (baseSize && (baseSize % minBlockSize))
- {
- return false;
- }
-
- uint8_t minBlockSizeLog2;
- if (minBlockSize != findNextPow2(minBlockSizeLog2, minBlockSize, 0, BITSPERWORD))
- {
- return false;
- }
-
- if (pageSize != findNextPow2(pageSize, minBlockSizeLog2, BITSPERWORD))
- {
- return false;
- }
-
- if (!memAllocator)
- {
- return false;
- }
-
- mMemAllocator = memAllocator;
- mPageSize = pageSize;
- mMinBlockSize = minBlockSize;
- mMaxIntFrag = maxIntFrag;
-
- memset(&mGlobalAllocIdStats, 0, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS);
-
-#if DUMP_HEAP_USAGE_TO_FILE
- char fileName[1024];
- sprintf_s(fileName, 1024, "HeapLog_%p.txt", this);
- fopen_s(&mLogFile, fileName, "w");
- fprintf(mLogFile, "HeapSize: %d, BlockSize: %d Addr: 0x0\n", baseSize, minBlockSize);
- QueryPerformanceCounter((LARGE_INTEGER*)&m_qpc);
- QueryPerformanceFrequency((LARGE_INTEGER*)&m_qpf);
-#endif
-
- // init heap
- if (baseSize)
- {
- return allocateNewHeap(baseSize, true) != NULL;
- }
- else
- {
- return true;
- }
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::setPageSize(size_t pageSize)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- if (pageSize == 0)
- {
- return false;
- }
-
- if (mMinBlockSize > pageSize)
- {
- return false;
- }
-
- if (pageSize != findNextPow2(pageSize, 0, BITSPERWORD))
- {
- return false;
- }
-
- mPageSize = pageSize;
- return true;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void* HeapManagerRef::alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
-#if DUMP_HEAP_USAGE_TO_FILE
- unsigned __int64 qpc;
- QueryPerformanceCounter((LARGE_INTEGER*)&qpc);
- float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf);
- fprintf(mLogFile, "alloc t: %f s: %d", dtime, size);
-#endif
-
- void* ret = NULL;
- // try to allocate it in one of the heaps/pages
- for (uint32_t i = 0; !ret && i < mHeaps.size(); i++)
- {
- ret = mHeaps[i].heap->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT());
- }
-
- // create a new page
- if (!ret)
- {
- Heap* heap = allocateNewPages(size);
- if (heap)
- {
- ret = heap->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT());
- }
- }
-
-#if DUMP_HEAP_USAGE_TO_FILE
- fprintf(mLogFile, " a: 0x%p\n", ret);
-#endif
-
- return ret;
-}
-
-//(10/20/2009 feodorb) TODO: decide whether we move the binary search
-//somewhere away from here. Stands here for std::lower_bound replacement
-template<typename T>
-static uint32_t findUpperBound(const physx::shdfnd::Array<T>& refArray, const T& refValue)
-{
- uint32_t start = 0, end = refArray.size();
- while (end - start > 0)
- {
- uint32_t midPoint = start + ((end - start) >> 1);
-
- if (!(refValue < refArray[midPoint]))
- {
- start = midPoint + 1;
- }
- else
- {
- end = midPoint;
- }
- }
- return start;
-}
-
-Heap* HeapManagerRef::findHeap(void* addr) const
-{
- HeapManagerPage searchPage;
- searchPage.baseAddr = reinterpret_cast<size_t>(addr);
-
- uint32_t upperBound = findUpperBound(mHeaps, searchPage);
- PX_ASSERT(upperBound == 0 ||
- (searchPage.baseAddr >= mHeaps[upperBound - 1].baseAddr &&
- searchPage.baseAddr < mHeaps[upperBound - 1].baseAddr + mHeaps[upperBound - 1].heap->getTotalMemorySize())
- );
-
- return (upperBound > 0) ? mHeaps[upperBound - 1].heap : 0;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
-#if DUMP_HEAP_USAGE_TO_FILE
- unsigned __int64 qpc;
- QueryPerformanceCounter((LARGE_INTEGER*)&qpc);
- float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf);
- fprintf(mLogFile, "realloc t: %f s: %d, a: 0x%p\n", dtime, size, addr);
-#endif
-
- Heap* heap = findHeap(addr);
-
- if (heap != 0)
- {
- bool ret = heap->realloc(addr, size, PX_ALLOC_INFO_PARAMS_INPUT());
- if (ret && size > 0 && mNewEmptyPage)
- {
- shrinkMemory();
- }
- return ret;
- }
- return false;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::free(void* addr)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
-#if DUMP_HEAP_USAGE_TO_FILE
- unsigned __int64 qpc;
- QueryPerformanceCounter((LARGE_INTEGER*)&qpc);
- float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf);
- fprintf(mLogFile, "free t: %f a: 0x%p\n", dtime, addr);
-#endif
-
- if (addr == NULL)
- {
- return false;
- }
-
- Heap* heap = findHeap(addr);
- if (heap != 0)
- {
- bool ret = heap->free(addr);
- if (ret && mNewEmptyPage)
- {
- shrinkMemory();
- }
- return ret;
- }
- return false;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-static void addStats(ApexHeapStats& dst, const ApexHeapStats& src, uint32_t flags)
-{
- if (flags & HeapStatsFlags::F_BASIC_STATS)
- {
- dst.heapSize += src.heapSize;
- }
- if (flags & HeapStatsFlags::F_BIGGEST_FREE_BLOCK)
- {
- dst.biggestFreeBlock = PxMax(dst.biggestFreeBlock, src.biggestFreeBlock);
- }
- if (flags & HeapStatsFlags::F_HISTOGRAM)
- {
- dst.numEntries = PxMax(dst.numEntries, src.numEntries);
- for (uint32_t i = 0; i < BITSPERWORD; i++)
- {
- dst.freeBuddyHistogram[i] += src.freeBuddyHistogram[i];
- dst.allocatedBuddyHistogram[i] += src.allocatedBuddyHistogram[i];
- }
- }
-}
-
-PX_INLINE void HeapManagerRef::addToStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation)
-{
- PxAllocIdStats& idStats = mGlobalAllocIdStats[id];
- idStats.elements++;
- idStats.size += size;
- idStats.maxElements = PxMax(idStats.maxElements, idStats.elements);
- idStats.maxSize = PxMax(idStats.maxSize, idStats.size);
- mGlobalAllocMem += size;
- mGlobalMaxAllocMem = PxMax(mGlobalMaxAllocMem, mGlobalAllocMem);
- mGlobalInternalFragmentation += fragmentation;
- mGlobalMaxInternalFragmentation = PxMax(mGlobalMaxInternalFragmentation, mGlobalInternalFragmentation);
-}
-
-
-PX_INLINE void HeapManagerRef::removeFromStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation)
-{
- PxAllocIdStats& idStats = mGlobalAllocIdStats[id];
- PX_ASSERT(idStats.elements);
- PX_ASSERT(idStats.size >= size);
-
- idStats.elements--;
- idStats.size -= size;
- mGlobalAllocMem -= size;
- mGlobalInternalFragmentation -= fragmentation;
-}
-
-PX_INLINE void HeapManagerRef::incStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation)
-{
- PxAllocIdStats& idStats = mGlobalAllocIdStats[id];
- idStats.size += change;
- idStats.maxSize = PxMax(idStats.maxSize, idStats.size);
- mGlobalAllocMem += change;
- mGlobalMaxAllocMem = PxMax(mGlobalMaxAllocMem, mGlobalAllocMem);
- mGlobalInternalFragmentation += fragmentation;
- mGlobalMaxInternalFragmentation = PxMax(mGlobalMaxInternalFragmentation, mGlobalInternalFragmentation);
-}
-
-PX_INLINE void HeapManagerRef::decStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation)
-{
- PxAllocIdStats& idStats = mGlobalAllocIdStats[id];
- PX_ASSERT(idStats.size >= change);
- idStats.size -= change;
- mGlobalAllocMem += change;
- mGlobalInternalFragmentation += fragmentation;
-}
-
-bool HeapManagerRef::getStats(ApexHeapStats& stats, const uint32_t flags)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- ApexHeapStats tmpStats;
- stats.reset();
- for (uint32_t i = 0; i < mHeaps.size(); i++)
- {
- mHeaps[i].heap->getStats(tmpStats, flags);
- addStats(stats, tmpStats, flags);
- }
- if (flags & HeapStatsFlags::F_BASIC_STATS)
- {
- stats.totalAllocated = mGlobalAllocMem;
- stats.maxAllocated = mGlobalMaxAllocMem;
-
- }
- if (flags & HeapStatsFlags::F_INTERNAL_FRAGMENTATION)
- {
- stats.internalFragmentation = mGlobalInternalFragmentation;
- stats.maxInternalFragmentation = mGlobalMaxInternalFragmentation;
- }
- if (flags & HeapStatsFlags::F_ALLOC_ID_STATS)
- {
- // stats per allocation ID
- PxMemCopy(stats.allocIdStats, mGlobalAllocIdStats, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS);
- }
- return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::visualizeMemory(uint8_t* array, const size_t arraySize)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- ApexHeapStats tmpStats;
- getStats(tmpStats, HeapStatsFlags::F_BASIC_STATS);
- float scale = float(arraySize) / float(tmpStats.heapSize);
- uint8_t* start = array;
- for (uint32_t i = 0; i < mHeaps.size(); i++)
- {
- size_t heapSize = mHeaps[i].heap->getTotalMemorySize();
- size_t numVis = size_t(float(heapSize) * scale);
- PX_ASSERT(start + numVis <= array + arraySize);
- mHeaps[i].heap->visualizeMemory(start, numVis);
- start += numVis;
- }
- return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void* HeapManagerRef::findBaseAddress(void* addr)
-{
- Heap* heap = findHeap(addr);
- if (heap)
- {
- return heap->getBaseAddress();
- }
- return NULL;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Heap* HeapManagerRef::allocateNewHeap(size_t heapSize, bool isPersistent)
-{
- if (!mMemAllocator)
- {
- return NULL;
- }
-
- void* newPage = mMemAllocator->alloc(heapSize);
- if (newPage)
- {
- HeapManagerPage page;
- page.baseAddr = reinterpret_cast<size_t>(newPage);
- page.heap = PX_NEW(Heap)(*this, mErrorCallback);
- page.isPersistent = isPersistent;
- if (page.heap && page.heap->init(page.baseAddr, heapSize, mMinBlockSize, mMaxIntFrag))
- {
- mHeaps.pushBack(page);
- shdfnd::sort(mHeaps.begin(), (uint32_t) mHeaps.size());
- return page.heap;
- }
- else
- {
- mMemAllocator->free(newPage, page.heap->getTotalMemorySize());
- CMM_DELETE_SINGLE(page.heap);
- }
- }
- return NULL;
-}
-
-Heap* HeapManagerRef::allocateNewPages(size_t requestedSize)
-{
- uint8_t pageSizeLog2;
- uint8_t minBlockSizeLog2;
- findNextPow2(minBlockSizeLog2, mMinBlockSize, 0, BITSPERWORD);
- findNextPow2(pageSizeLog2, mPageSize, minBlockSizeLog2, BITSPERWORD);
- const size_t allocSize = findNextPow2(requestedSize, pageSizeLog2, BITSPERWORD);
- return allocateNewHeap(allocSize);
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void HeapManagerRef::removeDeletedHeapsFromList(uint32_t numDeletes)
-{
- // remove pages from list, keeping it sorted.
- if (numDeletes)
- {
- const uint32_t numEntries = (uint32_t) mHeaps.size();
-
- //seek
- uint32_t w = 0;
- while (w < (numEntries) && mHeaps[w].heap != NULL)
- {
- w++;
- }
-
- // remove holes
- uint32_t r = w + 1;
- while (r < numEntries)
- {
- if (mHeaps[r].heap == NULL)
- {
- r++;
- }
- else
- {
- mHeaps[w++] = mHeaps[r++];
- }
- }
-
- mHeaps.resize(numEntries - numDeletes);
- }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void HeapManagerRef::resetHeap(HeapManagerPage& page)
-{
- PX_ASSERT(page.heap->getAllocatedMemorySize() == 0);
- Heap* newHeap = PX_NEW(Heap)(*this, mErrorCallback);
- if (newHeap)
- {
- if (newHeap->init(page.baseAddr, page.heap->getTotalMemorySize(), mMinBlockSize, mMaxIntFrag))
- {
- CMM_DELETE_SINGLE(page.heap);
- page.heap = newHeap;
- }
- else
- {
- CMM_DELETE_SINGLE(newHeap);
- }
- }
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void HeapManagerRef::freeEmptyPages()
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- // release heaps
- uint32_t numDeletes = 0;
- const uint32_t numEntries = (uint32_t) mHeaps.size();
- for (uint32_t i = 0; i < numEntries; i++)
- {
- HeapManagerPage& page = mHeaps[i];
- PX_ASSERT(page.heap);
- if (page.isPersistent)
- {
- // for persistent pages: reset without release.
- if (page.heap->getAllocatedMemorySize() == 0)
- {
- resetHeap(page);
- }
- }
- else if (page.heap->getAllocatedMemorySize() == 0)
- {
- mMemAllocator->free(reinterpret_cast<void*>(page.baseAddr), page.heap->getTotalMemorySize());
- CMM_DELETE_SINGLE(page.heap);
- numDeletes++;
- }
- }
-
- if (numDeletes)
- {
- removeDeletedHeapsFromList(numDeletes);
- }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void HeapManagerRef::shrinkMemory()
-{
- mNewEmptyPage = false;
-
- // collect stats
- size_t totalAllocated = 0;
- size_t totalPageSize = 0;
- const uint32_t numEntries = (uint32_t) mHeaps.size();
- for (uint32_t i = 0; i < numEntries; i++)
- {
- HeapManagerPage& page = mHeaps[i];
- totalAllocated += page.heap->getAllocatedMemorySize();
- totalPageSize += page.heap->getTotalMemorySize();
- PX_ASSERT(totalAllocated <= totalPageSize);
- }
-
- // shrink memory if free non-persistent space is half or more of the allocated pages.
- // releasing from the back of address sorted list, other strategies like LRU, best fit are also possible.
- if (totalPageSize)
- {
- float allocScale = float(totalAllocated) / float(totalPageSize);
- if (allocScale <= 0.5f)
- {
- size_t sizeToRelease = totalAllocated ? (totalPageSize - totalAllocated) >> 1 : totalPageSize;
- uint32_t numDeletes = 0;
- for (uint32_t i = 0; i < numEntries; i++)
- {
- HeapManagerPage& page = mHeaps[numEntries - i - 1];
- PX_ASSERT(page.heap);
- if (page.heap->getAllocatedMemorySize() == 0)
- {
- if (!page.isPersistent && page.heap->getTotalMemorySize() <= sizeToRelease)
- {
- mMemAllocator->free(reinterpret_cast<void*>(page.baseAddr), page.heap->getTotalMemorySize());
- sizeToRelease -= page.heap->getTotalMemorySize();
- CMM_DELETE_SINGLE(page.heap);
- numDeletes++;
- }
- else
- {
- resetHeap(page);
- }
- }
- }
-
- if (numDeletes)
- {
- removeDeletedHeapsFromList(numDeletes);
- }
- }
- }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::reserve(size_t size)
-{
- shdfnd::Mutex::ScopedLock lock(mMutex);
-
- size_t freeSize = 0;
- for (uint32_t i = 0; i < mHeaps.size(); i++)
- {
- freeSize += mHeaps[i].heap->getTotalMemorySize() - mHeaps[i].heap->getAllocatedMemorySize();
- }
-
- if (freeSize < size)
- {
- return allocateNewPages(size - freeSize) != NULL;
- }
- else
- {
- return true;
- }
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-PX_INLINE size_t HeapManagerRef::findNextPow2(const size_t val, const uint8_t min, const uint8_t max)
-{
- size_t ret = val;
- for (uint8_t i = min; i <= max; i++)
- {
- ret = size_t(1) << i;
- if (ret >= val)
- {
- break;
- }
- }
- return ret;
-}
-
-
-PX_INLINE size_t HeapManagerRef::findNextPow2(uint8_t& pow, const size_t val, const uint8_t min, const uint8_t max)
-{
- size_t ret = val;
- for (pow = min; pow <= max; pow++)
- {
- ret = size_t(1) << pow;
- if (ret >= val)
- {
- break;
- }
- }
- return ret;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void BuddyList::clear()
-{
- Buddy* cur = pop();
- while (cur)
- {
- heap->getBuddyPool().destroy(cur);
- cur = pop();
- }
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Heap::~Heap()
-{
- for (uint32_t i = 0; i <= mMaxLevel; i++)
- {
- mFreeBuddiesAtLevel[i].clear();
- }
- CMM_DELETE_ARRAY(mFreeBuddiesAtLevel);
-
- for (Buddy* buddy = mAllocatedBuddies.getHead(); buddy != NULL; buddy = buddy->next)
- {
- void* address = reinterpret_cast<void*>((buddy->addr << mMinBlockLog2) + mBaseAddr);
-#if KEEP_DEBUG_INFO
- char buffer[256];
- physx::shdfnd::snprintf(buffer, 256, "Memory leak!\naddress %p file %s, line %d, name %s", address, buddy->file, buddy->line, buddy->allocName);
- mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, buffer, __FILE__, __LINE__);
-#else
- char buffer[256];
- physx::shdfnd::snprintf(buffer, 256, "Memory leak at address %p", address);
- mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, buffer, __FILE__, __LINE__);
-#endif
- }
-
- //clear it anyway
- mAllocatedBuddies.clear();
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::init(size_t baseAddr, const size_t baseSize, const size_t minBlockSize, const size_t maxIntFrag)
-{
- if (minBlockSize != HeapManagerRef::findNextPow2(mMinBlockLog2, minBlockSize, 0, BITSPERWORD))
- {
- return false;
- }
-
- if ((maxIntFrag != size_t(-1)) && (maxIntFrag != HeapManagerRef::findNextPow2(maxIntFrag, 0, BITSPERWORD)))
- {
- return false;
- }
-
- mMaxIntFrag = maxIntFrag;
-
- mMinBlockSize = minBlockSize;
- mTotalSize = baseSize;
- mBaseAddr = baseAddr;
-
- if (mBaseAddr == 0)
- {
- return false;
- }
-
- size_t numBlocks = baseSize >> mMinBlockLog2;
- // allow only memory blocks which have a power of 2 in size. and numblocks must be at least 1.
- if (numBlocks != HeapManagerRef::findNextPow2(mMaxLevel, numBlocks, 0, sizeof(size_t) * 4))
- {
- return false;
- }
-
- mFreeBuddiesAtLevel = PX_NEW(BuddyList)[(unsigned int)(mMaxLevel + 1)];
- if (!mFreeBuddiesAtLevel)
- {
- return false;
- }
-
- // init size of buddy arrays
- for (uint32_t i = 0; i <= mMaxLevel; i++)
- {
- mFreeBuddiesAtLevel[i].buddySize = size_t(1) << i;
- mFreeBuddiesAtLevel[i].heap = this;
- }
- mAllocatedBuddies.heap = this;
-
- Buddy* b = mManager.getBuddyPool().construct();
- if (!b)
- {
- CMM_DELETE_ARRAY(mFreeBuddiesAtLevel);
- return false;
- }
- b->level = mMaxLevel;
-
- // add buddy to its array
- mFreeBuddiesAtLevel[mMaxLevel].insert(b);
-
- return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void* Heap::alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- /*
- compute needed buddysize -> level
- if(mFreeBuddiesAtLevel[level].array.size() > 0)
- {
- ret = array.popBack()
- allocList.pushBack(ret)
- return ret.adr+basePtr;
- }
- else
- {
- if(nonemptylist at higher levels)
- ret = recursive split
- else if(nonemptylist at lower levels )
- ret = recursive merge
- else
- ret = NULL; // heap full or to fragmented
- }
- */
-
- PX_UNUSED(allocId);
- PX_UNUSED(allocName);
- PX_UNUSED(line);
- PX_UNUSED(file);
-
- if (size == 0 || size > mTotalSize)
- {
- return NULL;
- }
-
- PX_ASSERT(allocId < PxAllocId::NUM_IDS);
- //PX_ASSERT(allocId != PxAllocId::UNASSIGNED); // enable to track unassigned memory
-
- // compute needed buddysize -> level
- uint8_t level = 0;
- HeapManagerRef::findNextPow2(level, size, mMinBlockLog2, BITSPERWORD);
- level = uint8_t(level - mMinBlockLog2);
-
- Buddy* ret = NULL;
- if (mFreeBuddiesAtLevel[level].getSize() > 0)
- {
- ret = mFreeBuddiesAtLevel[level].pop();
- }
- else
- {
- // prefer splitting
- if (level != mMaxLevel)
- {
- ret = findBySplitting(level);
- }
- // else try merging
- if (!ret && level != 0)
- {
- ret = findByMerging(level);
- }
- }
-
- if (ret)
- {
- ret->occupiedSize = size;
- size_t addr = ret->addr;
- ret->allocId = uint16_t(allocId);
-#if KEEP_DEBUG_INFO
- ret->file = file;
- ret->line = (uint32_t)line;
- ret->allocName = allocName;
-#endif
-
- size_t allocSize;
- if (mMaxIntFrag != size_t(-1))
- {
- allocSize = reduceIntFragment(*ret, mMaxIntFrag); // ret can be changed in here, that's why we store the address
- }
- else
- {
- allocSize = size_t(1) << (level + mMinBlockLog2);
- mAllocatedBuddies.insertSorted(ret);
- }
- mAllocMem += allocSize;
- mInternalFragmentation += allocSize - size;
- mMaxAllocMem = PxMax(mAllocMem, mMaxAllocMem);
- mMaxInternalFragmentation = PxMax(mInternalFragmentation, mMaxInternalFragmentation);
- mManager.addToStats(allocId, allocSize, allocSize - size);
-
- PX_ASSERT(sanityTest());
- return reinterpret_cast<void*>((addr << mMinBlockLog2) + mBaseAddr);
- }
- else
- {
- PX_ASSERT(sanityTest());
- return NULL;
- }
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
- PX_UNUSED(allocId);
- PX_UNUSED(allocName);
- PX_UNUSED(line);
- PX_UNUSED(file);
-
- if (addr == NULL || size > mTotalSize)
- {
- return false;
- }
-
- if (size == 0)
- {
- //realloc to 0 bytes can't keep the pointer as it was.
- return false;
- //return free(addr);
- }
-
- size_t inernal_addr = reinterpret_cast<size_t>(addr) - mBaseAddr;
- inernal_addr >>= mMinBlockLog2;
-
- // collect all buddies which are associated with this addr
- shdfnd::Array<Buddy*, shdfnd::TempAllocator> budyList;
- size_t totalAllocated = 0;
- size_t buddyAllocated = 0;
- Buddy* found = NULL;
- do
- {
- found = mAllocatedBuddies.find(inernal_addr);
- if (!found)
- {
- return false;
- }
- budyList.pushBack(found);
- inernal_addr += size_t(1) << found->level;
- totalAllocated += found->occupiedSize;
- buddyAllocated += size_t(1) << (found->level + mMinBlockLog2);
- }
- while (found && !found->isLastBuddy);
-
- Buddy* cur = budyList.popBack();
-
- // increase size
- if (totalAllocated < size)
- {
- size_t leftSpace = (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize;
- size_t neededSpace = size - totalAllocated;
- if (neededSpace <= leftSpace)
- {
- cur->occupiedSize += neededSpace;
-#if KEEP_DEBUG_INFO
- cur->file = file;
- cur->line = (uint32_t)line;
- cur->allocName = allocName;
-#endif
-
- mInternalFragmentation -= neededSpace;
- mManager.decStats(PxAllocId::Enum(cur->allocId), 0, neededSpace);
-
- // replace
- mAllocatedBuddies.remove(cur);
- mAllocatedBuddies.insertSorted(cur);
- PX_ASSERT(sanityTest());
- return true;
- }
- else
- {
- return false;
-#ifdef UNREACHABLE
- // TODO:try merge free buddies until big enough,
- // then add buddy and do internal fragmentation reduction.
-
- // search for free blocks next to this one.
- size_t addr = cur->addr + (size_t(1) << cur->level);
- if (!mAllocatedBuddies.find(addr))
- {
- return false;
- }
-
- // if not found, return null, let user reallocate
- PX_ASSERT(sanityTest());
- return false;
-#endif
- }
- }
- // reduce size
- else
- {
- // succededly remove buddies until the requested size is reached.
- // if internal fragmentation reduction is turned on, then an allocation can consist of multiple buddies.
- mInternalFragmentation -= (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize;
- mManager.decStats(PxAllocId::Enum(cur->allocId), 0, (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize);
- size_t diff = totalAllocated - size;
- while (diff >= cur->occupiedSize)
- {
- diff -= cur->occupiedSize;
- cur->occupiedSize = 0;
- bool succ = mAllocatedBuddies.remove(cur);
- PX_UNUSED(succ);
- PX_ASSERT(succ);
- mFreeBuddiesAtLevel[cur->level].insertSorted(cur);
- size_t allocSize = size_t(1) << (cur->level + mMinBlockLog2);
- mAllocMem -= allocSize;
- mManager.decStats(PxAllocId::Enum(cur->allocId), allocSize, 0);
- cur = budyList.popBack();
- }
- cur->isLastBuddy = true;
- cur->occupiedSize -= diff;
-
-#if KEEP_DEBUG_INFO
- cur->file = file;
- cur->line =(uint32_t)line;
- cur->allocName = allocName;
-#endif
-
- // replace
- bool succ = mAllocatedBuddies.remove(cur);
- PX_UNUSED(succ);
- PX_ASSERT(succ);
- mAllocatedBuddies.insertSorted(cur);
- mInternalFragmentation += (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize;
- mManager.incStats(PxAllocId::Enum(cur->allocId), 0, (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize);
- PX_ASSERT(sanityTest());
- return true;
- }
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::free(void* addr)
-{
- if (addr == NULL)
- {
- return false;
- }
-
- size_t internalAddr = reinterpret_cast<size_t>(addr) - mBaseAddr;
- internalAddr >>= mMinBlockLog2;
-
- bool ret = true;
-
- bool dummy = true;
- while (dummy)
- {
- Buddy* b = mAllocatedBuddies.findAndPop(internalAddr);
-
- if (!b)
- {
- return false;
- }
-
- size_t allocSize = size_t(1) << (b->level + mMinBlockLog2);
- mAllocMem -= allocSize;
- mInternalFragmentation -= allocSize - b->occupiedSize;
- mManager.removeFromStats(PxAllocId::Enum(b->allocId), allocSize, allocSize - b->occupiedSize);
- b->occupiedSize = 0;
-
- mFreeBuddiesAtLevel[b->level].insertSorted(b);
-
- // check if this memory block occupied another buddy
- if (b->isLastBuddy)
- {
- break;
- }
- else
- {
- internalAddr += size_t(1) << b->level;
- }
- }
-
- if (mAllocMem == 0)
- {
- mManager.notifyEmptyPage();
- }
-
- PX_ASSERT(sanityTest());
- return ret;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::getStats(ApexHeapStats& stats, const uint32_t flags)
-{
- if (flags & HeapStatsFlags::F_BASIC_STATS)
- {
- stats.heapSize = mTotalSize;
- stats.totalAllocated = mAllocMem;
- stats.maxAllocated = mMaxAllocMem;
- }
- if (flags & HeapStatsFlags::F_INTERNAL_FRAGMENTATION)
- {
- // internal fragmentation
- stats.internalFragmentation = mInternalFragmentation;
- stats.maxInternalFragmentation = mMaxInternalFragmentation;
- }
- if (flags & HeapStatsFlags::F_BIGGEST_FREE_BLOCK)
- {
- // bigggest free block
- stats.biggestFreeBlock = 0;
- uint8_t curLevel = mMaxLevel;
- do
- {
- if (mFreeBuddiesAtLevel[curLevel].getSize())
- {
- stats.biggestFreeBlock = mFreeBuddiesAtLevel[curLevel].buddySize << mMinBlockLog2;
- break;
- }
- curLevel--;
- }
- while (curLevel != 0);
- }
- if (flags & HeapStatsFlags::F_HISTOGRAM)
- {
- // histograms
- for (uint8_t i = 0; i <= mMaxLevel; i++)
- {
- stats.freeBuddyHistogram[i] = mFreeBuddiesAtLevel[i].getSize();
- stats.allocatedBuddyHistogram[i] = 0;
- }
- Buddy* b = mAllocatedBuddies.getHead();
- while (b)
- {
- stats.allocatedBuddyHistogram[b->level]++;
- b = b->next;
- }
- stats.numEntries = size_t(mMaxLevel + 1);
- }
- return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::sanityTest()
-{
- size_t sum = 0;
-
- for (uint8_t i = 0; i <= mMaxLevel; i++)
- {
- size_t buddiesAtLevel = 0;
- for (Buddy* cur = mFreeBuddiesAtLevel[i].getHead(); cur; cur = cur->next)
- {
- if ((size_t(1) << cur->level != mFreeBuddiesAtLevel[i].buddySize) ||
- (cur->occupiedSize > size_t(1) << (cur->level + mMinBlockLog2)))
- {
- return false;
- }
- sum += mFreeBuddiesAtLevel[i].buddySize << mMinBlockLog2;
- buddiesAtLevel++;
- }
- if (mFreeBuddiesAtLevel[i].getSize() != buddiesAtLevel ||
- (buddiesAtLevel > (size_t(1) << (mMaxLevel - i))))
- {
- return false;
- }
- }
-
- size_t numAllocated = 0;
- for (Buddy* cur = mAllocatedBuddies.getHead(); cur; cur = cur->next)
- {
- sum += size_t(1) << (cur->level + mMinBlockLog2);
- numAllocated++;
- }
-
- if (numAllocated != mAllocatedBuddies.getSize())
- {
- return false;
- }
-
- ptrdiff_t diff = ptrdiff_t(sum - (size_t(1) << (mMaxLevel + mMinBlockLog2)));
- if (diff != 0)
- {
- return false;
- }
- else
- {
- return true;
- }
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::visualizeMemory(uint8_t* array, const size_t size)
-{
- float scale = (float)size / (size_t(1) << mMaxLevel);
-
- for (size_t i = 0; i < size; i++)
- {
- array[i] = 0;
- }
- for (Buddy* cur = mAllocatedBuddies.getHead(); cur; cur = cur->next)
- {
- size_t start = (size_t)((float)(cur->addr) * scale);
- size_t end = (size_t)((float)(cur->addr + (size_t(1) << size_t(cur->level))) * scale);
- PX_ASSERT(start <= size);
- PX_ASSERT(end <= size);
- for (size_t i = start; i < end; i++)
- {
- PX_ASSERT(i < size);
- array[i] = uint8_t(cur->level + 1);
- }
- }
-
- return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::splitBuddy(Buddy* b)
-{
- if (b->level == 0)
- {
- return false;
- }
-
- b->level--;
- size_t newSize = size_t(1) << b->level;
-
- Buddy* b0 = b;
- Buddy* b1 = mManager.getBuddyPool().construct(*b);
- PX_ASSERT(b0 && b1);
-
- b1->addr = b1->addr + newSize;
-
- mFreeBuddiesAtLevel[b0->level].insertSorted(b0);
- mFreeBuddiesAtLevel[b1->level].insertSorted(b1);
- return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::mergeBuddies(Buddy* b0, Buddy* b1)
-{
- if (b0->level != b1->level || b0->level >= mMaxLevel || (b1->addr - b0->addr) != size_t(1) << size_t(b0->level))
- {
- return false;
- }
-
- Buddy* b = b0;
- b->occupiedSize = 0;
- b->isLastBuddy = true;
- b->level++;
- b->next = NULL;
- mFreeBuddiesAtLevel[b->level].insertSorted(b);
-
- mManager.getBuddyPool().destroy(b1);
- return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Buddy* Heap::findBySplitting(uint8_t searchLevel)
-{
- Buddy* ret = NULL;
-
- uint8_t curLevel = searchLevel;
-
- // walk through array of buddy lists and search for a free buddy which is at level >= searchLevel
- for (; !mFreeBuddiesAtLevel[curLevel].getSize() && (curLevel < mMaxLevel); curLevel++)
- {
- ;
- }
-
- // pop buddy at highest level and split until it has the correct level
- ret = mFreeBuddiesAtLevel[curLevel].pop();
- for (; ret && (curLevel != searchLevel) && curLevel > 0; curLevel--)
- {
- splitBuddy(ret);
- ret = mFreeBuddiesAtLevel[curLevel - 1].pop();
- }
- return ret;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Buddy* Heap::findByMerging(uint8_t searchLevel)
-{
- /*
- while there is no pair to merge on this level, ask lower level to merge
- may ask lower level to merge more than 1 pair after each failure
- or just merge all pairs of lower levels
- */
- if (searchLevel == 0)
- {
- return NULL;
- }
-
- uint8_t curLevel = uint8_t(searchLevel - 1);
- bool dummy = true;
- while (dummy)
- {
- int32_t shift = (mMaxLevel - (1 << (curLevel + 1)));
- shift = shift >= 0 ? shift : 0;
- size_t numToFind = size_t(1) << shift;
- size_t found = findPairAndMerge(mFreeBuddiesAtLevel[curLevel], numToFind);
- if (found)
- {
- if (curLevel == searchLevel - 1)
- {
- break;
- }
- curLevel++;
- }
- else
- {
- if (curLevel > 0)
- {
- curLevel--;
- }
- else
- {
- return NULL;
- }
- }
- }
- return mFreeBuddiesAtLevel[searchLevel].pop();
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-size_t Heap::findPairAndMerge(BuddyList& list, size_t numToFind)
-{
- size_t found = 0;
- Buddy* cur = list.getHead();
- Buddy* last = list.getHead();
- size_t diff = list.buddySize;
- while ((found != numToFind) && cur && cur->next)
- {
- // find buddy pair b0 and b1, b0 must be at an even address, and b0 and b1 must be neighbours in address space.
- // since the list is sorted, we do only compare neighbours in the list.
- if (((cur->addr & (size_t(1) << size_t(cur->level))) == 0) && (cur->next->addr - cur->addr == diff))
- {
- Buddy* b0 = cur;
- Buddy* b1 = cur->next;
-
- if (cur == list.getHead())
- {
- list.setHead(cur->next->next);
- cur = list.getHead();
- last = cur;
- }
- else
- {
- cur = cur->next->next;
- last->next = cur;
- }
- list.setSize(list.getSize() - 2);
- if (mergeBuddies(b0, b1))
- {
- found++;
- }
- }
- else
- {
- last = cur;
- cur = cur->next;
- }
- }
- return found;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-size_t Heap::reduceIntFragment(Buddy& b, size_t threshold)
-{
- /*
- while internalFragmentation > threshold
- cut buddy in the middle
- if cut goes through occupied space
- left buddy is full, can be pushed to alloc list clear last buddy flag
- else
- right buddy is free, push it to free list
- */
- size_t allocSize = 0;
- Buddy* curB = &b;
- curB->isLastBuddy = false;
- while (curB->level && ((size_t(1) << (curB->level + mMinBlockLog2)) - curB->occupiedSize) > threshold)
- {
- //split
- Buddy* b0 = mManager.getBuddyPool().construct(*curB);
- Buddy* b1 = curB;
- b0->level--;
- b1->level--;
- b1->addr += size_t(1) << size_t(b1->level);
- if ((size_t(1) << (b0->level + mMinBlockLog2)) < b0->occupiedSize)
- {
- b0->occupiedSize = size_t(1) << (b0->level + mMinBlockLog2);
- b1->occupiedSize -= b0->occupiedSize;
- mAllocatedBuddies.insertSorted(b0);
- allocSize += size_t(1) << b1->level;
- curB = b1;
- }
- else
- {
- b1->occupiedSize = 0;
- mFreeBuddiesAtLevel[b1->level].insertSorted(b1);
- curB = b0;
- }
- }
- curB->isLastBuddy = true;
- allocSize += size_t(1) << curB->level;
- mAllocatedBuddies.insertSorted(curB);
- return (allocSize << mMinBlockLog2);
-}
-
-
diff --git a/PxShared/src/cudamanager/src/HeapManagerRef.h b/PxShared/src/cudamanager/src/HeapManagerRef.h
deleted file mode 100644
index e6e585e..0000000
--- a/PxShared/src/cudamanager/src/HeapManagerRef.h
+++ /dev/null
@@ -1,297 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-//----------------------------------------------------------------------------//
-// HeapManagerRef.h
-//----------------------------------------------------------------------------//
-
-#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H
-#define PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H
-
-#include "task/PxTaskDefine.h"
-
-#include "HeapManagerInterface.h"
-#include "HeapManagerLinkedList.h"
-#include "PsPool.h"
-#include "PsMutex.h"
-#include "PsArray.h"
-#include "PsUserAllocated.h"
-
-#define DUMP_HEAP_USAGE_TO_FILE 0
-
-#ifdef _DEBUG
-#define KEEP_DEBUG_INFO 1
-#else
-#define KEEP_DEBUG_INFO 0
-#endif
-
-#if DUMP_HEAP_USAGE_TO_FILE
-#include "stdio.h"
-#endif
-
-namespace physx
-{
-
-class Heap;
-struct Buddy
-{
- PX_INLINE Buddy()
- : addr(0)
- , next(0)
- , occupiedSize(0)
- , allocId(PxAllocId::UNASSIGNED)
- , isLastBuddy(true)
- , level(0)
-#if KEEP_DEBUG_INFO
- , file(NULL)
- , allocName(NULL)
- , line(0)
-#endif
- {}
-
- PX_INLINE Buddy(Buddy& b)
- : addr(b.addr)
- , next(b.next)
- , occupiedSize(b.occupiedSize)
- , allocId(b.allocId)
- , isLastBuddy(b.isLastBuddy)
- , level(b.level)
-#if KEEP_DEBUG_INFO
- , file(b.file)
- , allocName(b.allocName)
- , line(b.line)
-#endif
- {}
-
- size_t addr;
- Buddy* next;
- size_t occupiedSize;
- uint16_t allocId;
- uint8_t isLastBuddy;
- uint8_t level;
-#if KEEP_DEBUG_INFO
- const char* file;
- const char* allocName;
- uint32_t line;
-#endif
-};
-
-struct BuddyList: public LinkedList<Buddy>, public shdfnd::UserAllocated
-{
- BuddyList()
- : buddySize(0)
- , heap(NULL)
- {}
- void clear();
-
- size_t buddySize; // = 2^level
- Heap* heap;
-};
-
-struct HeapManagerPage
-{
- PX_INLINE bool operator < (const HeapManagerPage& p) const
- {
- return baseAddr < p.baseAddr;
- }
-
- PX_INLINE bool operator > (const HeapManagerPage& p) const
- {
- return baseAddr > p.baseAddr;
- }
-
- size_t baseAddr;
- Heap* heap;
- bool isPersistent;
-};
-
-
-class HeapManagerRef: public HeapManagerInterface, public shdfnd::UserAllocated
-{
- PX_NOCOPY(HeapManagerRef)
-public:
- HeapManagerRef(physx::PxErrorCallback& errorCallback, bool enableMutex = true);
- virtual ~HeapManagerRef();
-
- // INTERFACE METHODS
- virtual bool init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag);
- virtual bool setPageSize(size_t pageSize);
- virtual void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- virtual bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- virtual bool free(void* addr);
- virtual void freeEmptyPages();
- virtual bool reserve(size_t size);
- virtual bool getStats(ApexHeapStats& stats, const uint32_t flags);
- virtual bool visualizeMemory(uint8_t* array, const size_t size);
- virtual void* findBaseAddress(void* addr);
-
- // INTERNALS
- // searches 2^pow >= val, searches pow in [min, max]
- static PX_INLINE size_t findNextPow2(const size_t val, const uint8_t min = 0, const uint8_t max = BITSPERWORD);
- static PX_INLINE size_t findNextPow2(uint8_t& pow, const size_t val, const uint8_t min = 0, const uint8_t max = BITSPERWORD);
-
- PX_INLINE void addToStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation);
- PX_INLINE void removeFromStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation);
- PX_INLINE void incStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation);
- PX_INLINE void decStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation);
-
- PX_INLINE void notifyEmptyPage()
- {
- mNewEmptyPage = true;
- }
- PX_INLINE shdfnd::Pool<Buddy>& getBuddyPool()
- {
- return mBuddyPool;
- }
-
-private:
- Heap* allocateNewHeap(size_t heapSize, bool isPersistent = false);
- Heap* allocateNewPages(size_t requestedSize);
- void resetHeap(HeapManagerPage& page);
- void removeDeletedHeapsFromList(uint32_t numDeletes);
- void shrinkMemory();
-
- Heap* findHeap(void* addr) const;
-
-private:
- // heaps
- shdfnd::Array<HeapManagerPage> mHeaps;
- shdfnd::Pool<Buddy> mBuddyPool;
- size_t mPageSize;
- size_t mMinBlockSize;
- size_t mMaxIntFrag;
- bool mNewEmptyPage;
- // lock
- shdfnd::Mutex mMutex;
- // page allocator
- Allocator* mMemAllocator;
- // overall stats
- size_t mGlobalAllocMem;
- size_t mGlobalMaxAllocMem;
- size_t mGlobalInternalFragmentation;
- size_t mGlobalMaxInternalFragmentation;
- // stats per allocation ID
- PxAllocIdStats mGlobalAllocIdStats[PxAllocId::NUM_IDS];
- // error callback
- physx::PxErrorCallback& mErrorCallback;
-
-#if DUMP_HEAP_USAGE_TO_FILE
- FILE* mLogFile;
- unsigned __int64 m_qpc;
- unsigned __int64 m_qpf;
-#endif
-};
-
-
-class Heap : public shdfnd::UserAllocated
-{
-public:
- PX_INLINE Heap(HeapManagerRef& manager, physx::PxErrorCallback& errorCallback)
- : mManager(manager)
- , mErrorCallback(errorCallback)
- , mBaseAddr(0)
- , mMinBlockSize(0)
- , mFreeBuddiesAtLevel(NULL)
- , mMaxIntFrag(0)
- , mTotalSize(0)
- , mMaxLevel(0)
- , mMinBlockLog2(0)
- , mAllocMem(0)
- , mMaxAllocMem(0)
- , mInternalFragmentation(0)
- , mMaxInternalFragmentation(0)
- {}
-
- PX_INLINE ~Heap();
-
- bool init(size_t baseAddr, const size_t baseSize, const size_t minBlockSize, const size_t maxIntFrag);
- void* alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- bool realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
- bool free(void* addr);
- bool getStats(ApexHeapStats& stats, const uint32_t flags);
- bool visualizeMemory(uint8_t* array, const size_t size);
-
- PX_INLINE size_t getTotalMemorySize()
- {
- return mTotalSize;
- }
- PX_INLINE size_t getAllocatedMemorySize()
- {
- return mAllocMem;
- }
- PX_INLINE shdfnd::Pool<Buddy>& getBuddyPool()
- {
- return mManager.getBuddyPool();
- }
- PX_INLINE void* getBaseAddress()
- {
- return (void*)mBaseAddr;
- }
-
-private:
- // split buddy b with size 2^level into two buddies with level 2^(level-1) and append those to the free list. deletes b, assumes that b was removed from the list before.
- bool splitBuddy(Buddy* b);
- // merge 2 buddies to next bigger one. deletes b0 and b1, assumes that they are already removed from their array.
- bool mergeBuddies(Buddy* b0, Buddy* b1);
-
- // split of right, free children of a buddy if the internal fragmentation of a buddy is bigger than a threshold
- // returns the size of all allocated buddies
- size_t reduceIntFragment(Buddy& b, size_t threshold);
-
- // find a Buddy by splitting a Buddy at searchLevel
- Buddy* findBySplitting(uint8_t searchLevel);
- Buddy* findByMerging(uint8_t searchLevel);
- size_t findPairAndMerge(BuddyList& list, size_t numToFind);
-
- bool sanityTest();
-
- void operator=(const Heap&)
- {
- PX_ASSERT(0);
- }
-
-private:
- HeapManagerRef& mManager;
- physx::PxErrorCallback& mErrorCallback;
- size_t mBaseAddr;
- size_t mMinBlockSize;
- BuddyList* mFreeBuddiesAtLevel;
- BuddyList mAllocatedBuddies;
- size_t mMaxIntFrag;
- size_t mTotalSize;
- uint8_t mMaxLevel; // 2^maxLevel <= memorySize
- uint8_t mMinBlockLog2;
-
- size_t mAllocMem; // fragmented
- size_t mMaxAllocMem;
- size_t mInternalFragmentation;
- size_t mMaxInternalFragmentation;
-};
-
-} // end physx namespace
-
-#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H
diff --git a/PxShared/src/cudamanager/src/PhysXDevice.h b/PxShared/src/cudamanager/src/PhysXDevice.h
deleted file mode 100644
index b066bdc..0000000
--- a/PxShared/src/cudamanager/src/PhysXDevice.h
+++ /dev/null
@@ -1,119 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef ___PHYS_X_DEVICE_
-#define ___PHYS_X_DEVICE_
-
-#include "foundation/PxPreprocessor.h"
-
-#if PX_WINDOWS
-# pragma warning (push)
-# pragma warning (disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives'
-# include "windows/PsWindowsInclude.h"
-# pragma warning (pop)
-#endif
-
-#if PX_WINDOWS
- #define PHYSX_DEV_DLL_API extern "C" __declspec(dllexport)
- #ifdef _DEBUG
- # define PHYSX_DEV_DLL_PRIVATE_API extern "C" __declspec(dllexport)
- #else
- # define PHYSX_DEV_DLL_PRIVATE_API
- #endif
-#else
- #define PHYSX_DEV_DLL_API
- #define PHYSX_DEV_DLL_PRIVATE_API
-#endif
-
-/** typedefs */
-typedef int PHYSX_DEV_STATUS;
-typedef unsigned int PhysXDevHandle;
-
-/** PHYSX_DEV_STATUS values */
-enum
-{
- PHYSX_DEV_OK = 0,
- PHYSX_DEV_UNKNOWN_ERROR,
- PHYSX_DEV_INVALID_HANDLE,
- PHYSX_DEV_UNINITIALIZED,
- PHYSX_DEV_NV_API_UNAVAILABLE,
- PHYSX_DEV_CUDA_UNAVAILABLE,
- PHYSX_DEV_CUDA_MEMORY_ALLOC_FAILURE,
- PHYSX_DEV_LEGACY_MODE_GPU_HANDLE,
- PHYSX_DEV_PHYSX_DEV_UNAVAILABLE,
-};
-
-
-/**
- * physxDevInit
- * Initialize the PhysX Device information functions.
- * Must be called before using any other API functions.
- */
-PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevInit();
-
-/**
- * physxDevClose
- * Call this when finished with the PhysX Device API, it
- * frees memory that is allocated in physxDevInit
- */
-PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevClose();
-
-/**
- * physxDevGetCudaOrdinal
- * Returns the CUDA device ordinal for the given PhysX GPU device
- */
-PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevGetCudaOrdinal(int* cudaDevOrdinal, PhysXDevHandle devHandle);
-PHYSX_DEV_STATUS physxDevGetCudaOrdinalWrapper(int* cudaDevOrdinal);
-
-/**
- * physxDevGet
- * Returns the PhysX GPU device that the PhysX Engine
- * will use. If the device is -1, the engine will
- * automatically choose which GPU to use.
- *
- * This function handles the R177/R180 detection first, then decides accordingly
- *
- * if(180+)
- * if(GPU Enabled) ? get NVAPI sel : -1
- * else (177)
- * if regkey ? regkey value : -1 (PHYSX_DEV_LEGACY_MODE_GPU_HANDLE returned)
- */
-PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevGet(PhysXDevHandle* devHandle);
-
-/**
- * physxDevUsingDedicatedGPU
- * Returns whether or not PhysX has a dedicated GPU (set by the user in the NV CPL)
- */
-PHYSX_DEV_DLL_API bool physxDevUsingDedicatedGPU();
-
-/**
- * physxDevSLIEnabled
- * Returns whether or not the device pointer specified (D3D device) is in an SLI group
- */
-PHYSX_DEV_DLL_API bool physxDevSLIEnabled(void* graphicsDevice);
-
-#endif
diff --git a/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp b/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp
deleted file mode 100644
index 77896c2..0000000
--- a/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-#include "foundation/PxErrorCallback.h"
-
-#include "PhysXDeviceSettings.h"
-#include "PhysXDevice.h"
-
-#if PX_VC
-#pragma warning(disable: 4191) //'operator/operation' : unsafe conversion from 'type of expression' to 'type required'
-#endif
-
-namespace
-{
-#if PX_WIN32 || PX_WIN64
- /** funcs for the dynamic loading of the PhysXDevice.dll file */
- typedef PHYSX_DEV_STATUS __cdecl physxDevInit_t();
- typedef PHYSX_DEV_STATUS __cdecl physxDevClose_t();
- typedef PHYSX_DEV_STATUS __cdecl physxDevGet_t(PhysXDevHandle* devHandle);
- typedef PHYSX_DEV_STATUS __cdecl physxDevGetCudaOrdinal_t(int* cudaDevOrdinal, PhysXDevHandle devHandle);
- typedef bool __cdecl physxDevUsingDedicatedGPU_t();
- typedef bool __cdecl physxDevSLIEnabled_t(void* graphicsDevice);
-
- /** globals for cuda functions */
- static physxDevInit_t* physxDevInit_f;
- static physxDevClose_t* physxDevClose_f;
- static physxDevGetCudaOrdinal_t* physxDevGetCudaOrdinal_f;
- static physxDevGet_t* physxDevGet_f;
- static physxDevUsingDedicatedGPU_t* physxDevUsingDedicatedGPU_f;
- static physxDevSLIEnabled_t* physxDevSLIEnabled_f;
-
- /** globals */
- static HMODULE gPhysXDevModuleH;
-
- PHYSX_DEV_STATUS initPhysXDeviceLib()
- {
- PHYSX_DEV_STATUS status;
-#if PX_X86
- gPhysXDevModuleH = LoadLibrary("PhysXDevice.dll");
-#else
- gPhysXDevModuleH = LoadLibrary("PhysXDevice64.dll");
-#endif
- if (!gPhysXDevModuleH)
- {
- return PHYSX_DEV_PHYSX_DEV_UNAVAILABLE;
- }
-
- physxDevInit_f = (physxDevInit_t*)GetProcAddress(gPhysXDevModuleH, "physxDevInit");
- physxDevClose_f = (physxDevClose_t*)GetProcAddress(gPhysXDevModuleH, "physxDevClose");
- physxDevGetCudaOrdinal_f = (physxDevGetCudaOrdinal_t*)GetProcAddress(gPhysXDevModuleH, "physxDevGetCudaOrdinal");
- physxDevGet_f = (physxDevGet_t*)GetProcAddress(gPhysXDevModuleH, "physxDevGet");
- physxDevUsingDedicatedGPU_f = (physxDevUsingDedicatedGPU_t*)GetProcAddress(gPhysXDevModuleH, "physxDevUsingDedicatedGPU");
- physxDevSLIEnabled_f = (physxDevSLIEnabled_t*)GetProcAddress(gPhysXDevModuleH, "physxDevSLIEnabled");
-
- if (!physxDevInit_f ||
- !physxDevClose_f ||
- !physxDevGetCudaOrdinal_f ||
- !physxDevGet_f)
- {
- FreeLibrary(gPhysXDevModuleH);
- return PHYSX_DEV_CUDA_UNAVAILABLE;
- }
-
- status = physxDevInit_f();
- if (PHYSX_DEV_OK != status)
- {
- FreeLibrary(gPhysXDevModuleH);
- return status;
- }
-
- return PHYSX_DEV_OK;
- }
-#endif // PX_WIN32 || PX_WIN64
-
- PHYSX_DEV_STATUS getCudaOrdinal(int* cudaDevOrdinal)
- {
-#if PX_WIN32 || PX_WIN64
- PHYSX_DEV_STATUS status;
- PhysXDevHandle selectedDev;
-
- status = initPhysXDeviceLib();
-
- if (PHYSX_DEV_OK != status)
- {
- return status;
- }
-
- status = physxDevGet_f(&selectedDev);
- physxDevGetCudaOrdinal_f(cudaDevOrdinal, selectedDev);
-
- physxDevClose_f();
- FreeLibrary(gPhysXDevModuleH);
-
- if (status == PHYSX_DEV_LEGACY_MODE_GPU_HANDLE) // R177 installed
- {
- return PHYSX_DEV_LEGACY_MODE_GPU_HANDLE;
- }
- else
- {
- return PHYSX_DEV_OK;
- }
-#elif PX_LINUX
- const char* deviceOrdinalString = ::getenv("PHYSX_GPU_DEVICE");
- if (!deviceOrdinalString)
- *cudaDevOrdinal = 0;
- else
- *cudaDevOrdinal = atoi(deviceOrdinalString);
- return PHYSX_DEV_OK;
-#endif
- }
-
-}
-
-namespace physx
-{
-
- int PhysXDeviceSettings::getSuggestedCudaDeviceOrdinal(physx::PxErrorCallback& errc)
- {
- int cudaDevOrdinal = -1;
- switch (getCudaOrdinal(&cudaDevOrdinal))
- {
- case PHYSX_DEV_OK:
- break;
-
- case PHYSX_DEV_UNKNOWN_ERROR:
- errc.reportError(PxErrorCode::eDEBUG_WARNING, "unknown error during CUDA device detection\n", __FILE__, __LINE__);
- break;
-
- case PHYSX_DEV_NV_API_UNAVAILABLE:
- errc.reportError(PxErrorCode::eDEBUG_WARNING, "NVAPI is not available\n", __FILE__, __LINE__);
- break;
-
- case PHYSX_DEV_CUDA_UNAVAILABLE:
- errc.reportError(PxErrorCode::eDEBUG_WARNING, "CUDA is not available\n", __FILE__, __LINE__);
- break;
-
- case PHYSX_DEV_PHYSX_DEV_UNAVAILABLE:
-#if PX_X86
- errc.reportError(PxErrorCode::eDEBUG_WARNING, "PhysXDevice.dll is not available\n", __FILE__, __LINE__);
-#else
- errc.reportError(PxErrorCode::eDEBUG_WARNING, "PhysXDevice64.dll is not available\n", __FILE__, __LINE__);
-#endif
- break;
-
- default:
- errc.reportError(PxErrorCode::eDEBUG_WARNING, "unknown error during CUDA device detection\n", __FILE__, __LINE__);
- break;
- }
-
- return cudaDevOrdinal;
- }
-
- int PhysXDeviceSettings::isUsingDedicatedGPU()
- {
-#if PX_WIN32 || PX_WIN64
- PHYSX_DEV_STATUS status;
- bool dedicated = false;
-
- status = initPhysXDeviceLib();
-
- if (PHYSX_DEV_OK != status)
- {
- return 0;
- }
-
- if (physxDevUsingDedicatedGPU_f)
- {
- dedicated = physxDevUsingDedicatedGPU_f();
- physxDevClose_f();
- FreeLibrary(gPhysXDevModuleH);
- return(dedicated);
- }
- else
- {
- physxDevClose_f();
- FreeLibrary(gPhysXDevModuleH);
- return(-1);
- }
-#elif PX_LINUX
- // need some way to set this
- return 0;
-#endif
- }
-
- bool PhysXDeviceSettings::isSLIEnabled(void* graphicsDevice)
- {
-#if PX_WIN32 || PX_WIN64
- PHYSX_DEV_STATUS status;
- status = initPhysXDeviceLib();
-
- if (PHYSX_DEV_OK != status)
- {
- return false;
- }
-
- if (physxDevSLIEnabled_f)
- {
- bool enabled = physxDevSLIEnabled_f(graphicsDevice);
- physxDevClose_f();
- FreeLibrary(gPhysXDevModuleH);
- return enabled;
- }
- else
- {
- physxDevClose_f();
- FreeLibrary(gPhysXDevModuleH);
- return false;
- }
-#elif PX_LINUX
- // Unimplemented for Linux because we don't need it, not because it's really always false.
- PX_UNUSED(graphicsDevice);
- return false;
-#endif
- }
-
-} // end physx namespace
-
-#endif // PX_SUPPORT_GPU_PHYSX
-
-