18 files changed, 0 insertions, 6524 deletions
diff --git a/PxShared/src/cudamanager/include/CudaContextManager.h b/PxShared/src/cudamanager/include/CudaContextManager.h
deleted file mode 100644
index 3d68f82..0000000
--- a/PxShared/src/cudamanager/include/CudaContextManager.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
-#define PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-namespace physx
-{
-
-class PxCudaContextManager;
-class PxCudaContextManagerDesc;
-class PxErrorCallback;
-
-/**
-Creates cuda context manager for PhysX and APEX.
-*/
-PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback);
-
-}
-
-#endif
-
-#endif // PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
diff --git a/PxShared/src/cudamanager/include/CudaKernelWrangler.h b/PxShared/src/cudamanager/include/CudaKernelWrangler.h
deleted file mode 100644
index 36a2cc8..0000000
--- a/PxShared/src/cudamanager/include/CudaKernelWrangler.h
+++ /dev/null
@@ -1,331 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef __CUDA_KERNEL_WRANGLER__
-#define __CUDA_KERNEL_WRANGLER__
-
-// Make this header is safe for inclusion in headers that are shared with device code.
-#if !defined(__CUDACC__)
-
-#include "task/PxTaskDefine.h"
-#include "task/PxGpuDispatcher.h"
-
-#include "PsUserAllocated.h"
-#include "PsArray.h"
-
-#include <cuda.h>
-
-namespace physx
-{
-
-class KernelWrangler : public shdfnd::UserAllocated
-{
-	PX_NOCOPY(KernelWrangler)
-public:
-	KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs);
-	~KernelWrangler();
-
-	CUfunction getCuFunction(uint16_t funcIndex) const
-	{
-		return mCuFunctions[ funcIndex ];
-	}
-
-	CUmodule getCuModule(uint16_t funcIndex) const
-	{
-		uint16_t modIndex = mCuFuncModIndex[ funcIndex ];
-		return mCuModules[ modIndex ];
-	}
-
-	static void const* const* getImages();
-	static int getNumImages();
-
-	bool hadError() const { return mError; }
-
-protected:
-	bool						mError;
-	shdfnd::Array<CUfunction>	mCuFunctions;
-	shdfnd::Array<uint16_t>		mCuFuncModIndex;
-	shdfnd::Array<CUmodule>	    mCuModules;
-	PxGpuDispatcher&			mGpuDispatcher;
-	PxErrorCallback&			mErrorCallback;
-};
-
-/* SJB - These were "borrowed" from an Ignacio Llamas email to devtech-compute.
- * If we feel this is too clumsy, we can steal the boost based bits from APEX
- */
-
-class ExplicitCudaFlush
-{
-public:
-	ExplicitCudaFlush(int cudaFlushCount) : mCudaFlushCount(cudaFlushCount), mDefaultCudaFlushCount(mCudaFlushCount) {}
-	~ExplicitCudaFlush() {}
-
-	void setCudaFlushCount(int value) { mCudaFlushCount = mDefaultCudaFlushCount = value; }
-	unsigned int getCudaFlushCount() const	{ return (unsigned int)mCudaFlushCount; }
-	void resetCudaFlushCount() { mCudaFlushCount = mDefaultCudaFlushCount; }
-
-	void decrementFlushCount()
-	{
-		if (mCudaFlushCount == 0) return;
-
-		if (--mCudaFlushCount == 0)
-		{
-			CUresult ret = cuStreamQuery(0); // flushes current push buffer
-			PX_UNUSED(ret);
-			PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY);
-
-			// For current implementation, disable resetting of cuda flush count
-			// reset cuda flush count
-			// mCudaFlushCount = mDefaultCudaFlushCount;
-		}
-	}
-
-private:
-	int mCudaFlushCount;
-	int mDefaultCudaFlushCount;
-};
-
-}
-
-template <typename T0>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0)
-{
-	void* kernelParams[] =
-	{
-		&v0,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9, typename T10>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9, typename T10, typename T11>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9, typename T10, typename T11, typename T12>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-								  T13 v13)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-								  T13 v13, T14 v14)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-								  T13 v13, T14 v14, T15 v15)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15,
-          typename T16>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-								  T13 v13, T14 v14, T15 v15, T16 v16)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
-          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15,
-          typename T16, typename T17>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
-								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
-								  T13 v13, T14 v14, T15 v15, T16 v16, T17 v17)
-{
-	void* kernelParams[] =
-	{
-		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, &v17,
-	};
-	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-#endif
-
-#endif
diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h
deleted file mode 100644
index 10c412f..0000000
--- a/PxShared/src/cudamanager/include/GpuDispatcher.h
+++ /dev/null
@@ -1,332 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXTASK_GPUDISPATCHER_H
-#define PXTASK_GPUDISPATCHER_H
-
-#include "task/PxTask.h"
-#include "task/PxTaskDefine.h"
-#include "task/PxGpuTask.h"
-#include "task/PxTaskManager.h"
-#include "task/PxGpuDispatcher.h"
-#include "foundation/PxProfiler.h"
-
-#include "PsUserAllocated.h"
-#include "PsThread.h"
-#include "PsAtomic.h"
-#include "PsMutex.h"
-#include "PsSync.h"
-#include "PsArray.h"
-
-#include <cuda.h>
-
-namespace physx { 
-
-typedef uint16_t EventID;
-
-void releaseGpuDispatcher(PxGpuDispatcher&);
-
-class KernelWrangler;
-class BlockingWaitThread;
-class FanoutTask;
-class LaunchTask;
-class BlockTask;
-class PxGpuWorkerThread;
-
-class GpuDispatcherImpl : public PxGpuDispatcher, public shdfnd::UserAllocated
-{
-public:
-	GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx);
-	virtual ~GpuDispatcherImpl();
-
-	void	start();
-	void    startSimulation();
-	void	startGroup();
-	void    submitTask(PxTask& task);
-	void    finishGroup();
-	void    addCompletionPrereq(PxBaseTask& task);
-	bool	failureDetected() const;
-	void    forceFailureMode();
-	void    stopSimulation();
-	void    launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream);
-	
-	PxBaseTask&	getPreLaunchTask();
-	void		addPreLaunchDependent(PxBaseTask& dependent);
-
-	PxBaseTask&	getPostLaunchTask();
-	void		addPostLaunchDependent(PxBaseTask& dependent);
-
-	PxCudaContextManager* getCudaContextManager();
-
-	PxGpuWorkerThread* mDispatcher;
-	BlockingWaitThread* mBlockingThread;
-	LaunchTask* mLaunchTask; // predecessor of tasks launching kernels
-	BlockTask* mBlockTask; // continuation of tasks launching kernels
-	FanoutTask* mSyncTask; // predecessor of tasks waiting for cuda context synchronize
-};
-
-class JobQueue
-{
-	PX_NOCOPY(JobQueue)
-public:
-	JobQueue() : taskarray(PX_DEBUG_EXP("PxTask*")) {}
-	void push(PxTask* t)
-	{
-		access.lock();
-		taskarray.pushBack(t);
-		access.unlock();
-	}
-	PxTask* popBack()
-	{
-		access.lock();
-		PxTask* t = NULL;
-		if (taskarray.size())
-		{
-			t = taskarray.popBack();
-		}
-		access.unlock();
-		return t;
-	}
-	uint32_t size()
-	{
-		return taskarray.size();
-	}
-	bool empty()
-	{
-		return taskarray.size() == 0;
-	}
-
-private:
-	shdfnd::Array<PxTask*> taskarray;
-	shdfnd::Mutex		  access;
-};
-
-class EventPool
-{
-	PX_NOCOPY(EventPool)
-public:
-	EventPool(uint32_t inflags) : flags(inflags), evarray(PX_DEBUG_EXP("CUevent")) {}
-	void add(CUevent ev)
-	{
-		access.lock();
-		evarray.pushBack(ev);
-		access.unlock();
-	}
-	CUevent get()
-	{
-		access.lock();
-		CUevent ev;
-		if (evarray.size())
-		{
-			ev = evarray.popBack();
-		}
-		else
-		{
-			cuEventCreate(&ev, flags);
-		}
-		access.unlock();
-		return ev;
-	}
-	bool empty() const
-	{
-		return evarray.size() == 0;
-	}
-	void clear()
-	{
-		access.lock();
-		for (uint32_t i = 0; i < evarray.size(); i++)
-		{
-			cuEventDestroy(evarray[i]);
-		}
-		access.unlock();
-	}
-
-private:
-	uint32_t flags;
-	shdfnd::Array<CUevent> evarray;
-	shdfnd::Mutex access;
-};
-
-class StreamCache
-{
-public:
-	StreamCache() : sarray(PX_DEBUG_EXP("CUstream")), freeIndices(PX_DEBUG_EXP("freeIndices"))
-	{
-	}
-	CUstream get(uint32_t s)
-	{
-		PX_ASSERT(s);
-		return sarray[ s - 1 ];
-	}
-	void push(uint32_t s)
-	{
-		freeIndices.pushBack(s);
-	}
-	uint32_t popBack()
-	{
-		if (freeIndices.size())
-		{
-			return freeIndices.popBack();
-		}
-		else
-		{
-			CUstream s;
-			cuStreamCreate(&s, 0);
-			sarray.pushBack(s);
-			return sarray.size();
-		}
-	}
-	void reset()
-	{
-		freeIndices.resize(sarray.size());
-		for (uint32_t i = 0 ; i < sarray.size() ; i++)
-		{
-			freeIndices[i] = i + 1;
-		}
-	}
-	bool empty()
-	{
-		return freeIndices.size() == 0;
-	}
-
-private:
-	shdfnd::Array<CUstream>	 sarray;
-	shdfnd::Array<uint32_t>	 freeIndices;
-};
-
-class KernelBar
-{
-public:
-	KernelBar()
-	{
-		reset();
-	}
-	void reset()
-	{
-		start = 0xffffffff;
-		stop = 0;
-	}
-
-	uint32_t start;
-	uint32_t stop;
-};
-
-const int SIZE_COMPLETION_RING = 1024;
-
-struct CudaBatch
-{
-	CUevent		blockingEvent;
-	CUstream    blockingStream; // sync on stream instead of event if lsb is zero (faster)
-	PxBaseTask*   continuationTask;
-};
-
-struct ReadyTask
-{
-	PxGpuTask* 	task;
-	uint32_t       iteration;
-};
-
-class PxGpuWorkerThread : public shdfnd::Thread
-{
-	PX_NOCOPY(PxGpuWorkerThread)
-public:
-	PxGpuWorkerThread();
-	~PxGpuWorkerThread();
-
-	void					setCudaContext(PxCudaContextManager& ctx);
-
-	/* API to TaskManager */
-	void					startSimulation();
-	void					stopSimulation();
-
-	/* API to GPU tasks */
-	void					addCompletionPrereq(PxBaseTask& task);
-
-	/* PxGpuTask execution thread */
-	void					execute();
-	void					pollSubmitted(shdfnd::Array<ReadyTask> *ready);
-	void					processActiveTasks();
-	void					flushBatch(CUevent endEvent, CUstream, PxBaseTask* task);
-	void					launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream);
-
-	/* Blocking wait thread */
-	void					blockingWaitFunc();
-
-	StreamCache				mCachedStreams;
-	shdfnd::Array<PxBaseTask*> mCompletionTasks;
-	JobQueue      			mSubmittedTaskList;
-	volatile int			mActiveGroups;
-	shdfnd::Sync			mInputReady;
-	shdfnd::Sync			mRecordEventQueued;
-	PxCudaContextManager* 	mCtxMgr;
-	bool                    mNewTasksSubmitted;
-	bool                    mFailureDetected;
-
-	bool                    mUsingConcurrentStreams;
-
-	CudaBatch				mCompletionRing[ SIZE_COMPLETION_RING ];
-	volatile int            mCompletionRingPush;
-	volatile int            mCompletionRingPop;
-
-	EventPool               mCachedBlockingEvents;
-	EventPool               mCachedNonBlockingEvents;
-
-	volatile int			mCountActiveScenes;
-
-	uint32_t*				mSmStartTimes;
-	uint32_t                 mSmClockFreq;
-
-	shdfnd::Array<ReadyTask> mReady[ PxGpuTaskHint::NUM_GPU_TASK_HINTS ];
-
-	KernelWrangler*         mUtilKernelWrapper;
-
-	CUevent                 mStartEvent;
-
-	shdfnd::Mutex			mMutex;
-};
-
-class BlockingWaitThread : public shdfnd::Thread
-{
-public:
-	BlockingWaitThread(PxGpuWorkerThread& worker) : mWorker(worker) {}
-	~BlockingWaitThread() {}
-
-	void		    execute();
-
-protected:
-	PxGpuWorkerThread& mWorker;
-
-private:
-	BlockingWaitThread& operator=(const BlockingWaitThread&);
-};
-
-#define GD_CHECK_CALL(call)  { CUresult ret = call;                          \
-		if( CUDA_SUCCESS != ret ) { mFailureDetected=true; PX_ASSERT(!ret); } }
-
-}
-
-#endif // PXTASK_GPUDISPATCHER_H
diff --git a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h
deleted file mode 100644
index 5358915..0000000
--- a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
-#define PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-namespace physx
-{
-	class PxErrorCallback;
-
-	/**
-	Helper functions to expose control panel functionality 
-	*/
-	class PhysXDeviceSettings
-	{
-	private:
-		PhysXDeviceSettings() {}
-
-	public:
-		static int getSuggestedCudaDeviceOrdinal(PxErrorCallback& errc);
-		static int isUsingDedicatedGPU();
-		static bool isSLIEnabled(void* graphicsDevice);
-	};
-}
-
-#endif
-
-#endif // PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
diff --git a/PxShared/src/cudamanager/src/BlockingWait.cpp b/PxShared/src/cudamanager/src/BlockingWait.cpp
deleted file mode 100644
index 8a2cc44..0000000
--- a/PxShared/src/cudamanager/src/BlockingWait.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-#include "task/PxTask.h"
-#include "task/PxGpuTask.h"
-#include "cudamanager/PxCudaContextManager.h"
-
-#include "PsString.h"
-
-#if PX_SUPPORT_PXTASK_PROFILING
-#include "foundation/PxFoundation.h"
-#include "foundation/PxProfiler.h"
-#endif
-
-#include "GpuDispatcher.h"
-
-using namespace physx;
-
-/* Blocking thread / GPU Profiling Event Code */
-
-void PxGpuWorkerThread::blockingWaitFunc()
-{
-	mCtxMgr->acquireContext();
-
-	while (mCompletionRingPop != mCompletionRingPush)
-	{
-		CudaBatch& b = mCompletionRing[ mCompletionRingPop ];
-		PxBaseTask* t = b.continuationTask;
-
-		if (!b.blockingEvent)
-		{
-			PX_ASSERT(b.continuationTask != 0);
-
-			/* No blocking necessary, just allow continuation task to run */
-		}
-		else if (!mFailureDetected)
-		{
-#if PX_SUPPORT_PXTASK_PROFILING
-			PX_PROFILE_ZONE("GpuDispatcher.BlockingWaitEvent", 0);
-#endif
-			if (1 & ~intptr_t(b.blockingStream))
-			{
-				GD_CHECK_CALL(cuStreamSynchronize(b.blockingStream));
-			}
-			else
-			{
-				GD_CHECK_CALL(cuEventSynchronize(b.blockingEvent));
-			}
-		}
-
-		if (b.blockingEvent)
-		{
-			mCachedBlockingEvents.add(b.blockingEvent);
-		}
-		if (t)
-		{
-			t->removeReference();
-		}
-		mCompletionRingPop = (mCompletionRingPop + 1) % SIZE_COMPLETION_RING;
-	}
-
-	mCtxMgr->releaseContext();
-}
-
-
-/* Blocking wait thread
-
-   All this thread does is block waiting for CUDA Record Events to
-   be signaled.
- */
-
-void BlockingWaitThread::execute()
-{
-	setName("GpuDispatcher.BlockingWait");
-	bool running = true;
-	while (running)
-	{
-		mWorker.mRecordEventQueued.wait();
-		if (quitIsSignalled())
-		{
-			running = false;
-		}
-
-		mWorker.mRecordEventQueued.reset();
-		mWorker.blockingWaitFunc();
-	}
-	quit();
-}
-
-#endif
-
diff --git a/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu b/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu
deleted file mode 100644
index 3c73364..0000000
--- a/PxShared/src/cudamanager/src/CUDA/UtilKernels.cu
+++ /dev/null
@@ -1,164 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "cudamanager/PxGpuCopyDesc.h"
-#include "foundation/PxSimpleTypes.h"
-
-#include <cuda.h>
-
-using namespace physx;
-
-extern "C" __host__ void initUtilKernels() {}
-
-extern "C" __global__
-void Saturate( )
-{
-    // NOP
-}
-
-__device__
-void performCopy( const physx::PxGpuCopyDesc& desc, uint32_t totalBlocks )
-{
-	if( desc.type == physx::PxGpuCopyDesc::DeviceMemset32 )
-	{
-		uint32_t *dest = (uint32_t*) desc.dest;
-		uint32_t wordCount = desc.bytes >> 2;
-		size_t word = blockIdx.x * blockDim.x + threadIdx.x;
-		size_t stride = blockDim.x * totalBlocks;
-		for( ; word < wordCount ; word += stride )
-			dest[ word ] = desc.source;
-		return;
-	}
-
-	/* The idea here is to maximize throughput with minimal register and thread counts */
-	/* Manually unrolled 4 times, the compiler refuses to do it for me */
-
-	if( (desc.source & 0x7) != 0 || (desc.dest & 0x7) != 0 || (desc.bytes & 0x7) != 0)
-	{
-		/* Input is word aligned */
-
-		uint32_t *dest = (uint32_t*) desc.dest;
-		uint32_t *source = (uint32_t*) desc.source;
-		uint32_t wordCount = desc.bytes >> 2;
-		size_t word = blockIdx.x * blockDim.x + threadIdx.x;
-		size_t stride = blockDim.x * totalBlocks;
-		while( word < wordCount )
-		{
-			uint32_t a0, a1, a2, a3, a4, a5;
-			a0 = source[ word ];
-			if( word + stride < wordCount )
-				a1 = source[ word + stride ];
-			if( word + stride*2 < wordCount )
-				a2 = source[ word + stride*2 ];
-			if( word + stride*3 < wordCount )
-				a3 = source[ word + stride*3 ];
-			if( word + stride*4 < wordCount )
-				a4 = source[ word + stride*4 ];
-			if( word + stride*5 < wordCount )
-				a5 = source[ word + stride*5 ];
-
-			dest[ word ] = a0;
-			if( word + stride < wordCount )
-				dest[ word + stride ] = a1;
-			if( word + stride*2 < wordCount )
-				dest[ word + stride*2 ] = a2;
-			if( word + stride*3 < wordCount )
-				dest[ word + stride*3 ] = a3;
-			if( word + stride*4 < wordCount )
-				dest[ word + stride*4 ] = a4;
-			if( word + stride*5 < wordCount )
-				dest[ word + stride*5 ] = a5;
-
-			word += stride*6;
-		}
-	}
-	else
-	{
-		/* Input is DWord aligned */
-
-		uint2 *dest = (uint2*) desc.dest;
-		uint2 *source = (uint2*) desc.source;
-		uint32_t dwordCount = desc.bytes >> 3;
-		size_t word = blockIdx.x * blockDim.x + threadIdx.x;
-		size_t stride = blockDim.x * totalBlocks;
-		while( word < dwordCount )
-		{
-			uint2 a0, a1, a2, a3, a4, a5;
-			a0 = source[ word ];
-			if( word + stride < dwordCount )
-				a1 = source[ word + stride ];
-			if( word + stride*2 < dwordCount )
-				a2 = source[ word + stride*2 ];
-			if( word + stride*3 < dwordCount )
-				a3 = source[ word + stride*3 ];
-			if( word + stride*4 < dwordCount )
-				a4 = source[ word + stride*4 ];
-			if( word + stride*5 < dwordCount )
-				a5 = source[ word + stride*5 ];
-
-			dest[ word ] = a0;
-			if( word + stride < dwordCount )
-				dest[ word + stride ] = a1;
-			if( word + stride*2 < dwordCount )
-				dest[ word + stride*2 ] = a2;
-			if( word + stride*3 < dwordCount )
-				dest[ word + stride*3 ] = a3;
-			if( word + stride*4 < dwordCount )
-				dest[ word + stride*4 ] = a4;
-			if( word + stride*5 < dwordCount )
-				dest[ word + stride*5 ] = a5;
-
-			word += stride*6;
-		}
-	}
-
-	__threadfence_system();
-}
-
-extern "C" __global__
-void MemCopyAsync( physx::PxGpuCopyDesc desc )
-{
-	performCopy( desc, gridDim.x );
-}
-
-
-extern "C" __global__
-void MemCopyBatchedAsync( physx::PxGpuCopyDesc *desc )
-{
-	__shared__ physx::PxGpuCopyDesc sdesc;
-
-	if( threadIdx.x < sizeof(physx::PxGpuCopyDesc) / sizeof(uint32_t) )
-	{
-		uint32_t *dest = (uint32_t*)&sdesc;
-		uint32_t *source = (uint32_t*)(desc + blockIdx.y);
-		dest[ threadIdx.x ] = source[ threadIdx.x ];
-		__threadfence_block();
-	}
-	__syncthreads();
-
-	performCopy( sdesc, gridDim.x );
-}
diff --git a/PxShared/src/cudamanager/src/CudaContextManager.cpp b/PxShared/src/cudamanager/src/CudaContextManager.cpp
deleted file mode 100644
index e05911e..0000000
--- a/PxShared/src/cudamanager/src/CudaContextManager.cpp
+++ /dev/null
@@ -1,823 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "foundation/PxAssert.h"
-#include "foundation/PxErrorCallback.h"
-#include "foundation/PxMath.h"
-#include "foundation/PxPreprocessor.h"
-
-#include "cudamanager/PxCudaContextManager.h"
-#include "task/PxGpuDispatcher.h"
-
-#include "CudaMemoryManager.h"
-#include "GpuDispatcher.h"
-#include "PhysXDeviceSettings.h"
-
-#include "PsMutex.h"
-#include "PsThread.h"
-#include "PsUserAllocated.h"
-#include "PsString.h"
-
-#include <cuda.h>
-
-#if PX_WIN32 || PX_WIN64
-
-#ifdef PX_SECURE_LOAD_LIBRARY
-#include "nvSecureLoadLibrary.h"
-#endif
-
-#pragma warning (push)
-#pragma warning (disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives'
-#include <windows.h>
-#pragma warning (pop)
-
-class IDirect3DDevice9;
-class IDirect3DResource9;
-class IDirect3DVertexBuffer9;
-#include <cudad3d9.h>
-
-class IDXGIAdapter;
-class ID3D10Device;
-class ID3D10Resource;
-#include <cudad3d10.h>
-
-struct ID3D11Device;
-struct ID3D11Resource;
-#include <cudad3d11.h>
-
-#endif // PX_WINDOWS_FAMILY
-
-#if PX_LINUX
-#include <dlfcn.h>
-static void* GetProcAddress(void* handle, const char* name) { return dlsym(handle, name); }
-#endif
-
-#include <GL/gl.h>
-#include <cudaGL.h>
-#include <assert.h>
-
-#include "foundation/PxErrors.h"
-#include "foundation/PxErrorCallback.h"
-
-#define CU_INIT_UUID
-#include "CudaNode3DLowLatencyInterface.h"
-
-#define ENABLE_DEVICE_INFO_BRINGUP	0
-
-namespace physx
-{
-
-#if PX_VC
-#pragma warning(disable: 4191)	//'operator/operation' : unsafe conversion from 'type of expression' to 'type required'
-#endif
-
-#define MIN_SM_MAJOR_VERSION	2
-#define MIN_SM_MINOR_VERSION	0
-
-class CudaCtxMgr : public PxCudaContextManager, public shdfnd::UserAllocated
-{
-public:
-	CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback);
-	~CudaCtxMgr();
-
-	bool            safeDelayImport(PxErrorCallback& errorCallback);
-	void            acquireContext();
-	void            releaseContext();
-
-	/* All these methods can be called without acquiring the context */
-
-	PxCudaMemoryManager* getMemoryManager();
-	PxGpuDispatcher* getGpuDispatcher();
-
-	bool            contextIsValid() const;
-	bool            supportsArchSM10() const;  // G80
-	bool            supportsArchSM11() const;  // G92
-	bool            supportsArchSM12() const;
-	bool            supportsArchSM13() const;  // GT200
-	bool            supportsArchSM20() const;  // GF100
-	bool            supportsArchSM30() const;  // GK100
-	bool            supportsArchSM35() const;  // GK110
-	bool            supportsArchSM50() const;  // GM100
-	bool            supportsArchSM52() const;  // GM200
-	bool            supportsArchSM60() const;  // GP100
-	bool            isIntegrated() const;      // true if GPU is integrated (MCP) part
-	bool            canMapHostMemory() const;  // true if GPU map host memory to GPU
-	int             getDriverVersion() const;
-	size_t          getDeviceTotalMemBytes() const;
-	int				getMultiprocessorCount() const;
-	int             getSharedMemPerBlock() const;
-	int             getSharedMemPerMultiprocessor() const;
-	unsigned int	getMaxThreadsPerBlock() const;
-	unsigned int	getClockRate() const;
-
-	const char*     getDeviceName() const;
-	CUdevice		getDevice() const;
-	const CUdevprop* getDeviceProperties() const;
-
-	PxCudaInteropMode::Enum	getInteropMode() const;
-
-	void			setUsingConcurrentStreams(bool);
-	bool			getUsingConcurrentStreams() const;
-
-	bool registerResourceInCudaD3D(CUgraphicsResource& resource, void* resourcePointer, PxCudaInteropRegisterFlags flags);
-	bool registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags);
-	bool unregisterResourceInCuda(CUgraphicsResource resource);
-
-	/*
-	\brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel
-	\returns 1 if there is a dedicated PhysX GPU
-	\returns 0 if there is NOT a dedicated PhysX GPU
-	\returns -1 if the routine is not implemented
-	*/
-	int				usingDedicatedGPU() const;
-
-	void            release();
-
-	CUcontext		getContext() { return mCtx; }
-
-private:
-
-	int             mSceneCount;
-	bool            mIsValid;
-	bool			mOwnContext;
-	CUdevice        mDevHandle;
-	CUcontext       mCtx;
-	CudaMemMgr* 	mMemMgr;
-
-	GpuDispatcherImpl* mDispatcher;
-    CUetblPhysXInterface* m_physXInterface;
-
-	/* Cached device attributes, so threads can query w/o context */
-	int             mComputeCapMajor;
-	int             mComputeCapMinor;
-	int				mIsIntegrated;
-	int				mCanMapHost;
-	int				mDriverVersion;
-	size_t			mTotalMemBytes;
-	int				mMultiprocessorCount;
-	int				mMaxThreadsPerBlock;
-	char            mDeviceName[128];
-	int				mSharedMemPerBlock;
-	int				mSharedMemPerMultiprocessor;
-	int				mClockRate;
-	PxCudaInteropMode::Enum mInteropMode;
-	bool            mUsingConcurrentStreams;
-
-#if PX_DEBUG
-	static uint32_t    mManagerRefCount;
-	static uint32_t    mContextRefCountTls;
-#endif
-};
-
-#if PX_DEBUG
-uint32_t CudaCtxMgr::mManagerRefCount = 0;
-uint32_t CudaCtxMgr::mContextRefCountTls = 0;
-#endif
-
-bool CudaCtxMgr::contextIsValid() const
-{
-	return mIsValid;
-}
-bool CudaCtxMgr::supportsArchSM10() const
-{
-	return mIsValid;
-}
-bool CudaCtxMgr::supportsArchSM11() const
-{
-	return mIsValid && (mComputeCapMinor >= 1 || mComputeCapMajor > 1);
-}
-bool CudaCtxMgr::supportsArchSM12() const
-{
-	return mIsValid && (mComputeCapMinor >= 2 || mComputeCapMajor > 1);
-}
-bool CudaCtxMgr::supportsArchSM13() const
-{
-	return mIsValid && (mComputeCapMinor >= 3 || mComputeCapMajor > 1);
-}
-bool CudaCtxMgr::supportsArchSM20() const
-{
-	return mIsValid && mComputeCapMajor >= 2;
-}
-bool CudaCtxMgr::supportsArchSM30() const
-{
-	return mIsValid && mComputeCapMajor >= 3;
-}
-bool CudaCtxMgr::supportsArchSM35() const
-{
-	return mIsValid && ((mComputeCapMajor > 3) || (mComputeCapMajor == 3 && mComputeCapMinor >= 5));
-}
-bool CudaCtxMgr::supportsArchSM50() const
-{
-	return mIsValid && mComputeCapMajor >= 5;
-}
-bool CudaCtxMgr::supportsArchSM52() const
-{
-	return mIsValid && ((mComputeCapMajor > 5) || (mComputeCapMajor == 5 && mComputeCapMinor >= 2));
-}
-bool CudaCtxMgr::supportsArchSM60() const
-{
-	return mIsValid && mComputeCapMajor >= 6;
-}
-
-bool CudaCtxMgr::isIntegrated() const
-{
-	return mIsValid && mIsIntegrated;
-}
-bool CudaCtxMgr::canMapHostMemory() const
-{
-	return mIsValid && mCanMapHost;
-}
-int  CudaCtxMgr::getDriverVersion() const
-{
-	return mDriverVersion;
-}
-size_t  CudaCtxMgr::getDeviceTotalMemBytes() const
-{
-	return mTotalMemBytes;
-}
-int	CudaCtxMgr::getMultiprocessorCount() const
-{
-	return mMultiprocessorCount;
-}
-int CudaCtxMgr::getSharedMemPerBlock() const
-{
-	return mSharedMemPerBlock;
-}
-int CudaCtxMgr::getSharedMemPerMultiprocessor() const
-{
-	return mSharedMemPerMultiprocessor;
-}
-unsigned int CudaCtxMgr::getMaxThreadsPerBlock() const
-{
-	return (unsigned int)mMaxThreadsPerBlock;
-}
-unsigned int CudaCtxMgr::getClockRate() const
-{
-	return (unsigned int)mClockRate;
-}
-
-const char* CudaCtxMgr::getDeviceName() const
-{
-	if (mIsValid)
-	{
-		return mDeviceName;
-	}
-	else
-	{
-		return "Invalid";
-	}
-}
-
-CUdevice CudaCtxMgr::getDevice() const
-{
-	if (mIsValid)
-	{
-		return mDevHandle;
-	}
-	else
-	{
-		return -1;
-	}
-}
-
-PxCudaInteropMode::Enum CudaCtxMgr::getInteropMode() const
-{
-	return mInteropMode;
-}
-
-void CudaCtxMgr::setUsingConcurrentStreams(bool value)
-{
-	mUsingConcurrentStreams = value;
-}
-
-bool CudaCtxMgr::getUsingConcurrentStreams() const
-{
-	return mUsingConcurrentStreams;
-}
-
-PxCudaMemoryManager* CudaCtxMgr::getMemoryManager()
-{
-	if (mIsValid)
-	{
-		return mMemMgr;
-	}
-	else
-	{
-		return NULL;
-	}
-}
-
-PxGpuDispatcher* CudaCtxMgr::getGpuDispatcher()
-{
-	if (mIsValid)
-	{
-		return mDispatcher;
-	}
-	else
-	{
-		return NULL;
-	}
-}
-
-int CudaCtxMgr::usingDedicatedGPU() const
-{
-	if (PxCudaInteropMode::NO_INTEROP == getInteropMode())
-	{
-		return PhysXDeviceSettings::isUsingDedicatedGPU();
-	}
-	else
-	{
-		return 0; // not a dedicated GPU
-	}
-}
-
-#define CUT_SAFE_CALL(call)  { CUresult ret = call;	\
-		if( CUDA_SUCCESS != ret ) { PX_ASSERT(0); } }
-
-/* If a context is not provided, an ordinal must be given */
-CudaCtxMgr::CudaCtxMgr(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback)
-	: mSceneCount(0)
-	, mOwnContext(false)
-	, mMemMgr(0)
-	, mDispatcher(0)
-	, m_physXInterface(0)
-	, mInteropMode(desc.interopMode)
-	, mUsingConcurrentStreams(true)
-{
-	CUresult status;
-	mIsValid = false;
-	mDeviceName[0] = 0;
-
-	if (safeDelayImport(errorCallback) == false)
-	{
-		// The table where this info is found is here: https://wiki.nvidia.com/nvcompute/index.php/NVCompute#CUDA_Planning
-		errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "NVIDIA Release 331 graphics driver and above is required for GPU acceleration.", __FILE__, __LINE__);
-		return;
-	}
-
-	if (desc.ctx == 0)
-	{
-		int flags = CU_CTX_LMEM_RESIZE_TO_MAX | CU_CTX_SCHED_BLOCKING_SYNC | CU_CTX_MAP_HOST;
-		class FoundationErrorReporter : public PxErrorCallback
-		{
-		public:
-			FoundationErrorReporter(PxErrorCallback& ec)
-			: errorCallback(&ec)
-			{
-			}
-
-			virtual void reportError(PxErrorCode::Enum code, const char* message, const char* file, int line)
-			{
-				errorCallback->reportError( code, message, file, line);
-			}
-
-			PxErrorCallback* errorCallback;
-		} foundationErrorReporter(errorCallback);
-
-		int devOrdinal = PhysXDeviceSettings::getSuggestedCudaDeviceOrdinal(foundationErrorReporter);
-		if (devOrdinal < 0)
-		{
-			errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "No PhysX capable GPU suggested.", __FILE__, __LINE__);
-			errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "If you have a PhysX capable GPU, verify that PhysX is not set to CPU in the NVIDIA Control Panel.", __FILE__, __LINE__);
-			return;
-		}
-
-		status = cuInit(0);
-		if (CUDA_SUCCESS != status)
-		{
-			errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuInit failed", __FILE__, __LINE__);
-			return;
-		}
-		
-		// Try to create the context on Node3DLowLatency.
-		// If that does not work, try to create the cuda context using cuCtxCreatePhysX,
-		// since we must be on a driver that does not support cuCtxCreateOnNode3DLowLatency.
-		cuGetExportTable((const void**)&m_physXInterface, (const CUuuid*)&CU_ETID_PhysXInterface);
-
-		// if using a dedicated GPU or SLI we disable D3D interop (which is not supported over multiple GPUs)
-		// this ensures the users control panel setting is always respected
-		bool sliEnabled = false;
-		if (mInteropMode != PxCudaInteropMode::NO_INTEROP && desc.graphicsDevice != NULL)
-		{
-			sliEnabled = PhysXDeviceSettings::isSLIEnabled(desc.graphicsDevice) == 1 ? true : false;
-		}
-
-		if (PhysXDeviceSettings::isUsingDedicatedGPU() == 1 || sliEnabled)
-		{
-			if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP ||
-				mInteropMode == PxCudaInteropMode::D3D11_INTEROP)
-			{
-				mInteropMode = PxCudaInteropMode::NO_INTEROP;
-				if (sliEnabled)
-				{
-					errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "D3D/CUDA interop cannot be used in tandem with SLI, disabling interop.  Query PxCudaContextManager::getInteropMode() for interop status.",
-						__FILE__,__LINE__);
-				}
-			}
-		}
-
-		if (mInteropMode == PxCudaInteropMode::NO_INTEROP)
-		{
-			status = cuDeviceGet(&mDevHandle, devOrdinal);
-			if (CUDA_SUCCESS != status)
-			{
-				errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGet failed",__FILE__,__LINE__);
-				return;
-			}
-
-			if (m_physXInterface)
-				status = m_physXInterface->cuCtxCreateOnNode3DLowLatency(&mCtx, (unsigned int)flags, mDevHandle);
-			else
-				status = cuCtxCreate(&mCtx, (unsigned int)flags, mDevHandle);
-			if (CUDA_SUCCESS != status)
-			{
-				errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxCreate failed",__FILE__,__LINE__);
-				return;
-			}
-			mOwnContext = true;
-		}
-		else if (mInteropMode == PxCudaInteropMode::OGL_INTEROP)
-		{
-			status = cuDeviceGet(&mDevHandle, devOrdinal);
-			if (CUDA_SUCCESS != status)
-			{
-				errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGet failed",__FILE__,__LINE__);
-				return;
-			}
-
-			status = cuGLCtxCreate(&mCtx, (unsigned int)flags, mDevHandle);
-			if (CUDA_SUCCESS != status)
-			{
-				errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxGLCreate failed",__FILE__,__LINE__);
-				return;
-			}
-
-			status = cuGLInit();
-			if (CUDA_SUCCESS != status)
-			{
-				errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuGLInit failed",__FILE__,__LINE__);
-				return;
-			}
-			mOwnContext = true;
-		}
-#if PX_WIN32 || PX_WIN64
-		else if (mInteropMode == PxCudaInteropMode::D3D10_INTEROP)
-		{
-			status = cuD3D10CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags,
-			                          reinterpret_cast<ID3D10Device*>(desc.graphicsDevice));
-
-			if (CUDA_SUCCESS != status)
-			{
-				errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D10CtxCreate failed",__FILE__,__LINE__);
-				return;
-			}
-			mOwnContext = true;
-		}
-		else if (mInteropMode == PxCudaInteropMode::D3D11_INTEROP)
-		{
-			status = cuD3D11CtxCreate(&mCtx, &mDevHandle, (unsigned int)flags,
-			                          reinterpret_cast<ID3D11Device*>(desc.graphicsDevice));
-
-			if (CUDA_SUCCESS != status)
-			{
-				errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuD3D11CtxCreate failed",__FILE__,__LINE__);
-				return;
-			}
-			mOwnContext = true;
-		}
-#endif //PX_WIN32 || PX_WIN64
-		else
-		{
-			errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Requested interop type is not supported!",__FILE__,__LINE__);
-			return;
-		}
-	}
-	else
-	{
-		mCtx = *desc.ctx;
-		status = cuCtxGetDevice(&mDevHandle);
-		if (CUDA_SUCCESS != status)
-		{
-			errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuCtxGetDevice failed",__FILE__,__LINE__);
-			return;
-		}
-	}
-
-	// Verify we can at least allocate a CUDA event from this context
-	CUevent testEvent;
-	if (CUDA_SUCCESS == cuEventCreate(&testEvent, 0))
-	{
-		cuEventDestroy(testEvent);
-	}
-	else
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CUDA context validation failed",__FILE__,__LINE__);
-		return;
-	}
-
-	status = cuDeviceGetName(mDeviceName, sizeof(mDeviceName), mDevHandle);
-	if (CUDA_SUCCESS != status)
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceGetName failed",__FILE__,__LINE__);
-		return;
-	}
-
-	cuDeviceGetAttribute(&mSharedMemPerBlock, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, mDevHandle);
-	cuDeviceGetAttribute(&mSharedMemPerMultiprocessor, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR, mDevHandle);
-	cuDeviceGetAttribute(&mClockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, mDevHandle);
-	cuDeviceGetAttribute(&mComputeCapMajor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, mDevHandle);
-	cuDeviceGetAttribute(&mComputeCapMinor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, mDevHandle);
-	cuDeviceGetAttribute(&mIsIntegrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, mDevHandle);
-	cuDeviceGetAttribute(&mCanMapHost, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, mDevHandle);
-	cuDeviceGetAttribute(&mMultiprocessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, mDevHandle);
-	cuDeviceGetAttribute(&mMaxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, mDevHandle);
-
-	status = cuDeviceTotalMem((size_t*)&mTotalMemBytes, mDevHandle);
-	if (CUDA_SUCCESS != status)
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "cuDeviceTotalMem failed",__FILE__,__LINE__);
-		return;
-	}
-
-	// minimum compute capability is MIN_SM_MAJOR_VERSION.MIN_SM_MINOR_VERSION
-	if ((mComputeCapMajor < MIN_SM_MAJOR_VERSION)	||
-		(mComputeCapMajor == MIN_SM_MAJOR_VERSION && mComputeCapMinor < MIN_SM_MINOR_VERSION))
-	{
-		char buffer[256];
-		physx::shdfnd::snprintf(buffer, 256, "Minimum GPU compute capability %d.%d is required", MIN_SM_MAJOR_VERSION, MIN_SM_MINOR_VERSION);
-		errorCallback.reportError(PxErrorCode::eDEBUG_WARNING,buffer,__FILE__,__LINE__);
-		return;
-	}
-
-	mMemMgr = PX_NEW(CudaMemMgr)(*this, errorCallback);
-	if (mMemMgr == NULL)
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CudaMemMgr failed: Unable to allocate heaps",__FILE__,__LINE__);
-		return;
-	}
-
-	bool succ = true;
-	for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
-	{
-		PxCudaBufferType type(PxCudaBufferMemorySpace::Enum(i), PxCudaBufferFlags::F_READ_WRITE);
-		succ &= mMemMgr->setBaseSize(type, desc.memoryBaseSize[i]);
-		succ &= mMemMgr->setPageSize(type, desc.memoryPageSize[i]);
-		succ &= mMemMgr->setMaxMemorySize(type, desc.maxMemorySize[i]);
-		PX_ASSERT(succ);
-		if (!succ)
-		{
-			errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "CudaMemMgr failed: Invalid memory parameter",__FILE__,__LINE__);
-			return;
-		}
-	}
-
-#if PX_DEBUG
-	if(!mManagerRefCount++)
-		mContextRefCountTls = shdfnd::TlsAlloc();
-	if(!shdfnd::TlsGet(mContextRefCountTls))
-		CUT_SAFE_CALL(cuCtxSetCurrent(0));
-#endif
-
-	mIsValid = true;
-	mDispatcher = PX_NEW(GpuDispatcherImpl)(errorCallback, *this);
-	if (!mDispatcher || mDispatcher->failureDetected())
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_WARNING, "Failed to create functional GPU dispatcher",__FILE__,__LINE__);
-		mIsValid = false;
-		return;
-	}
-
-	mDispatcher->start();
-
-#if ENABLE_DEVICE_INFO_BRINGUP
-	// Device info (Enable for Amodel and Emulator testing)
-	errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Device Name: %s", mDeviceName);
-	errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Shared Memory Per Block: %d", mSharedMemPerBlock);
-	errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Shared Memory Per Multiprocessor: %d", mSharedMemPerMultiprocessor);
-	errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Number of SM: %d", mMultiprocessorCount);
-	errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Max Threads Per Block: %d", mMaxThreadsPerBlock);
-#endif
-}
-
-/* Some driver version mismatches can cause delay import crashes.  Load NVCUDA.dll
- * manually, verify its version number, then allow delay importing to bind all the
- * APIs.
- */
-bool CudaCtxMgr::safeDelayImport(PxErrorCallback& errorCallback)
-{
-#if PX_WIN32 || PX_WIN64
-#ifdef PX_SECURE_LOAD_LIBRARY
-	HMODULE hCudaDriver = nvLoadSystemLibrary("nvcuda.dll");
-#else
-	HMODULE hCudaDriver = LoadLibrary("nvcuda.dll");
-#endif
-#elif PX_LINUX
-	void*	hCudaDriver = dlopen("libcuda.so", RTLD_NOW);
-#endif
-	if (!hCudaDriver)
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "nvcuda.dll not found or could not be loaded.", __FILE__, __LINE__);
-		return false;
-	}
-
-	typedef CUresult(CUDAAPI * pfnCuDriverGetVersion_t)(int*);
-	pfnCuDriverGetVersion_t pfnCuDriverGetVersion = (pfnCuDriverGetVersion_t) GetProcAddress(hCudaDriver, "cuDriverGetVersion");
-	if (!pfnCuDriverGetVersion)
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "cuDriverGetVersion missing in nvcuda.dll.", __FILE__, __LINE__);
-		return false;
-	}
-
-	CUresult status = pfnCuDriverGetVersion(&mDriverVersion);
-	if (status != CUDA_SUCCESS)
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Retrieving CUDA driver version failed.", __FILE__, __LINE__);
-		return false;
-	}
-
-	/* Let's require a driver version >= to the version we compile against
-	 * Currently, CUDA_VERSION is 6000 or 6.0, but APEX still uses CUDA 5.0 so we can't assert on 6.0 yet.
-	 */
-	PX_COMPILE_TIME_ASSERT(5000 <= CUDA_VERSION);
-
-	if (mDriverVersion < CUDA_VERSION)
-	{
-		char buffer[256];
-		physx::shdfnd::snprintf(buffer, 256, "CUDA driver version is %u, expected at least %u.", mDriverVersion, CUDA_VERSION);
-		errorCallback.reportError(PxErrorCode::eDEBUG_INFO, buffer, __FILE__,__LINE__);
-		return false;
-	}
-
-	/* Now trigger delay import and API binding */
-	status = cuDriverGetVersion(&mDriverVersion);
-	if (status != CUDA_SUCCESS)
-	{
-		errorCallback.reportError(PxErrorCode::eDEBUG_INFO, "Failed to bind CUDA API.", __FILE__, __LINE__);
-		return false;
-	}
-
-	/* Not strictly necessary, but good practice */
-#if PX_WIN32 | PX_WIN64
-	FreeLibrary(hCudaDriver);
-#elif PX_LINUX
-	dlclose(hCudaDriver);
-#endif
-	
-
-	return true;
-}
-
-void CudaCtxMgr::release()
-{
-	delete this;
-}
-
-CudaCtxMgr::~CudaCtxMgr()
-{
-	if (mDispatcher)
-	{
-		releaseGpuDispatcher(*mDispatcher);
-	}
-	if (mMemMgr)
-	{
-		delete mMemMgr;
-	}
-	if (mOwnContext)
-	{
-		CUT_SAFE_CALL(cuCtxDestroy(mCtx));
-	}
-
-#if PX_DEBUG
-	if(!--mManagerRefCount)
-		 shdfnd::TlsFree(mContextRefCountTls);
-#endif
-}
-
-bool CudaCtxMgr::registerResourceInCudaGL(CUgraphicsResource& resource, uint32_t buffer, PxCudaInteropRegisterFlags flags)
-{
-	CUresult ret = CUDA_ERROR_UNKNOWN;
-
-	acquireContext();
-
-	PX_ASSERT(mInteropMode == PxCudaInteropMode::OGL_INTEROP);
-
-	ret = cuGraphicsGLRegisterBuffer(&resource, (GLuint) buffer, uint32_t(flags));
-
-	releaseContext();
-
-	return ret == CUDA_SUCCESS;
-}
-
-bool CudaCtxMgr::registerResourceInCudaD3D(CUgraphicsResource& resource, void* resourcePointer, PxCudaInteropRegisterFlags flags)
-{
-	CUresult ret = CUDA_ERROR_UNKNOWN;
-#if PX_WINDOWS_FAMILY
-	acquireContext();
-
-	switch (mInteropMode)
-	{
-	case PxCudaInteropMode::D3D10_INTEROP:
-		ret = cuGraphicsD3D10RegisterResource(&resource, (ID3D10Resource*)resourcePointer, uint32_t(flags));
-		break;
-	case PxCudaInteropMode::D3D11_INTEROP:
-		ret = cuGraphicsD3D11RegisterResource(&resource, (ID3D11Resource*)resourcePointer, uint32_t(flags));
-		break;
-	case PxCudaInteropMode::NO_INTEROP:
-	case PxCudaInteropMode::OGL_INTEROP:
-	case PxCudaInteropMode::COUNT:
-	default:
-		PX_ALWAYS_ASSERT_MESSAGE("unexpected state in registerResourceInCuda3D");
-	}
-
-	releaseContext();
-#else
-	PX_UNUSED(resource);
-	PX_UNUSED(resourcePointer);
-	PX_UNUSED(flags);
-#endif //PX_WINDOWS_FAMILY
-	return ret == CUDA_SUCCESS;
-}
-
-bool CudaCtxMgr::unregisterResourceInCuda(CUgraphicsResource resource)
-{
-	CUresult ret = CUDA_ERROR_UNKNOWN;
-
-	acquireContext();
-
-	ret = cuGraphicsUnregisterResource(resource);
-
-	releaseContext();
-
-	return ret == CUDA_SUCCESS;
-}
-
-void CudaCtxMgr::acquireContext()
-{
-	CUcontext ctx = 0;
-	CUT_SAFE_CALL(cuCtxGetCurrent(&ctx));
-
-	if (ctx != mCtx)
-	{
-#if PX_DEBUG
-		PX_ASSERT(!shdfnd::TlsGet(mContextRefCountTls));
-#endif
-		CUT_SAFE_CALL(cuCtxSetCurrent(mCtx));
-	}
-
-#if PX_DEBUG
-	char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls);
-	shdfnd::TlsSet(mContextRefCountTls, ++refCount);
-#endif
-}
-
-void CudaCtxMgr::releaseContext()
-{
-#if PX_DEBUG
-	char* refCount = (char*)shdfnd::TlsGet(mContextRefCountTls);
-	shdfnd::TlsSet(mContextRefCountTls, --refCount);
-	// see DE8475
-	if(!refCount)
-		CUT_SAFE_CALL(cuCtxSetCurrent(0));
-#endif
-}
-
-#if PX_SUPPORT_GPU_PHYSX
-extern "C" void initUtilKernels();
-
-PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback)
-{
-	//this call is needed to force UtilKernels linkage in case someone links PxCudaContextManager as Static Library!
-	initUtilKernels();
-
-	return PX_NEW(CudaCtxMgr)(desc, errorCallback);
-}
-
-#endif
-
-} // end physx namespace
-
-
diff --git a/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp b/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp
deleted file mode 100644
index 7579d63..0000000
--- a/PxShared/src/cudamanager/src/CudaKernelWrangler.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "task/PxGpuDispatcher.h"
-#include "cudamanager/PxCudaContextManager.h"
-
-#include "foundation/PxAssert.h"
-#include "foundation/PxErrorCallback.h"
-
-#include "PsString.h"
-
-#include "CudaKernelWrangler.h"
-
-#include <cuda.h>
-#include <texture_types.h>
-
-/**
- * Workaround hacks for using nvcc --compiler output object files
- * without linking with CUDART.  We must implement our own versions
- * of these functions that the object files are hard-coded to call into.
- */
-
-#define CUT_SAFE_CALL(call)  { CUresult ret = call; if( CUDA_SUCCESS != ret ) { PX_ASSERT(!ret); } }
-
-#define MAX_MODULES					64  // Max number of .cu files you will build
-static void* gModuleTable[ MAX_MODULES ];
-static int gNumModules = 0;
-
-#define MAX_FUNCTIONS				256 // Max number of kernel of entry points
-typedef struct
-{
-	int moduleIndex;
-	const char* functionName;
-} cuFuncDesc;
-static cuFuncDesc gFunctionTable[ MAX_FUNCTIONS ];
-static int gNumFunctions = 0;
-
-using namespace physx::shdfnd;
-using namespace physx;
-		
-KernelWrangler::KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs)
-	: mError(false)
-	, mCuFunctions(PX_DEBUG_EXP("CuFunctions"))
-	, mCuModules(PX_DEBUG_EXP("CuModules"))
-	, mGpuDispatcher(gd)
-	, mErrorCallback(errorCallback)
-{
-	PxScopedCudaLock _lock_(*gd.getCudaContextManager());
-
-	/* Formally load the CUDA modules, get CUmodule handles */
-	mCuModules.resize((uint32_t)gNumModules);
-	for (int i = 0 ; i < gNumModules ; ++i)
-	{
-		CUresult ret = cuModuleLoadDataEx(&mCuModules[(uint32_t)i], gModuleTable[i], 0, NULL, NULL);
-		if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_NO_BINARY_FOR_GPU)
-		{
-			mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, "Failed to load CUDA module data.", __FILE__, __LINE__);
-			mError = true;
-			return;
-		}
-	}
-
-	/* matchup funcNames to CUDA modules, get CUfunction handles */
-	mCuFunctions.resize(numFuncs);
-	mCuFuncModIndex.resize(numFuncs);
-	for (uint32_t i = 0 ; i < numFuncs ; ++i)
-	{
-		for (int j = 0; ; ++j)
-		{
-			if(j == gNumFunctions)
-			{
-				char buffer[256];
-				physx::shdfnd::snprintf(buffer, 256, "Could not find registered CUDA function '%s'.", funcNames[i]);
-				mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, __FILE__, __LINE__);
-				mError = true;
-				return;
-			}
-
-			if (!physx::shdfnd::strcmp(gFunctionTable[j].functionName, funcNames[i]))
-			{
-				mCuFuncModIndex[i] = (uint16_t)gFunctionTable[j].moduleIndex;
-				CUresult ret = cuModuleGetFunction(&mCuFunctions[i], mCuModules[mCuFuncModIndex[i]], funcNames[i]);
-				if (ret != CUDA_SUCCESS)
-				{
-					char buffer[256];
-					physx::shdfnd::snprintf(buffer, 256, "Could not find CUDA module containing function '%s'.", funcNames[i]);
-					mErrorCallback.reportError(PxErrorCode::eINTERNAL_ERROR, buffer, __FILE__, __LINE__);
-					mError = true;
-					return;
-				}
-				break;
-			}
-		}
-	}
-}
-
-KernelWrangler::~KernelWrangler()
-{
-	if (mCuModules.size())
-	{
-		PxScopedCudaLock _lock_(*mGpuDispatcher.getCudaContextManager());
-
-		for (uint32_t i = 0 ; i < mCuModules.size() ; i++)
-			if(mCuModules[i])
-				CUT_SAFE_CALL(cuModuleUnload(mCuModules[i]));
-	}
-}
-
-void const* const* KernelWrangler::getImages()
-{
-	return gModuleTable;
-}
-
-int KernelWrangler::getNumImages()
-{
-	return gNumModules;
-}
-
-/*
- * These calls are all made _before_ main() during static initialization
- * of this DLL.
- */
-
-#include <driver_types.h>
-
-#if PX_WINDOWS_FAMILY
-#define CUDARTAPI __stdcall
-#endif
-
-struct uint3;
-struct dim3;
-
-extern "C"
-void** CUDARTAPI __cudaRegisterFatBinary(void* fatBin)
-{
-	//HACK to get real fatbin in CUDA 4.0
-	struct CUIfatbinStruct
-	{
-		int magic;
-		int version;
-		void *fatbinArray;
-		char *fatbinFile;
-	};
-	const CUIfatbinStruct *fatbinStruct = (const CUIfatbinStruct *)fatBin;
-	if (fatbinStruct->magic == 0x466243B1)
-	{
-		fatBin = fatbinStruct->fatbinArray;
-	}
-
-	if (gNumModules < MAX_MODULES)
-	{
-		gModuleTable[ gNumModules ] = fatBin;
-		return (void**)(size_t) gNumModules++;
-	}
-	return NULL;
-}
-
-extern "C"
-void CUDARTAPI __cudaUnregisterFatBinary(void** fatCubinHandle)
-{
-	gModuleTable[(int)(size_t) fatCubinHandle ] = 0;
-}
-
-extern "C"
-void CUDARTAPI __cudaRegisterTexture(void**, const struct textureReference*, const void**, const char*, int, int, int)
-{
-}
-
-extern "C" void CUDARTAPI __cudaRegisterVar(void**, char*, char*, const char*, int, int, int, int)
-{
-}
-
-
-extern "C" void CUDARTAPI __cudaRegisterShared(void**, void**)
-{
-}
-
-extern "C"
-void CUDARTAPI __cudaRegisterFunction(void** fatCubinHandle, const char*, 
-	char*, const char* deviceName, int, uint3*, uint3*, dim3*, dim3*, int*)
-{
-	if (gNumFunctions < MAX_FUNCTIONS)
-	{
-		// We need this association of function to module in order to find textures and globals
-		gFunctionTable[ gNumFunctions ].moduleIndex = (int)(size_t) fatCubinHandle;
-		gFunctionTable[ gNumFunctions ].functionName = deviceName;
-		gNumFunctions++;
-	}
-}
-
-/* These functions are implemented just to resolve link dependencies */
-
-extern "C"
-cudaError_t CUDARTAPI cudaLaunch(const char* entry)
-{
-	PX_UNUSED(entry);
-	return cudaSuccess;
-}
-
-extern "C"
-cudaError_t CUDARTAPI cudaSetupArgument(const void*, size_t, size_t)
-{
-	return cudaSuccess;
-}
-
-extern "C"
-struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(
-    int x, int y, int z, int w, enum cudaChannelFormatKind f)
-{
-	struct cudaChannelFormatDesc desc;
-	desc.x = x;
-	desc.y = y;
-	desc.z = z;
-	desc.w = w;
-	desc.f = f;
-	return desc;
-}
-
diff --git a/PxShared/src/cudamanager/src/CudaMemoryManager.cpp b/PxShared/src/cudamanager/src/CudaMemoryManager.cpp
deleted file mode 100644
index b1c6f94..0000000
--- a/PxShared/src/cudamanager/src/CudaMemoryManager.cpp
+++ /dev/null
@@ -1,649 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "cudamanager/PxCudaContextManager.h"
-#include "foundation/PxMath.h"
-#include "foundation/PxMemory.h"
-
-#include "CudaMemoryManager.h"
-#include "HeapManagerRef.h"
-
-#include <cuda.h>
-
-#define DEVICE_BASE_SIZE (0)
-#define DEVICE_PAGE_SIZE ( 2 * 1024*1024)
-#define PINNED_BASE_SIZE (0)
-#define PINNED_PAGE_SIZE ( 2 * 1024*1024)
-#define WC_BASE_SIZE (0)
-#define WC_PAGE_SIZE ( 2 * 1024*1024)
-#define MIN_BLOCK_SIZE 2048
-
-
-#define CMM_DELETE_SINGLE(x)	{ if(x) delete x; }
-#define CMM_DELETE_ARRAY(x)		{ if(x) delete [] x; }
-
-using namespace physx;
-
-CudaMemMgr::CudaMemMgr(PxCudaContextManager& mgr, physx::PxErrorCallback& errorCallback)
-	: mErrorCallback(errorCallback)
-	, mBufferPool("mBufferPool", 1024)
-	, mInitialized(false)
-	, mMgr(mgr)
-	, mDebugDisableAllocs(false)
-{
-	for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
-	{
-		mHeap[i] = NULL;
-		mMemoryAllocator[i] = NULL;
-		mMemoryMaxSize[i] = size_t(-1);
-	}
-
-	mMemoryBaseSize[PxCudaBufferMemorySpace::T_GPU] = DEVICE_BASE_SIZE;
-	mMemoryBaseSize[PxCudaBufferMemorySpace::T_PINNED_HOST] = PINNED_BASE_SIZE;
-	mMemoryBaseSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = WC_BASE_SIZE;
-	mMemoryBaseSize[PxCudaBufferMemorySpace::T_HOST] = 0;
-
-	mMemoryPageSize[PxCudaBufferMemorySpace::T_GPU] = DEVICE_PAGE_SIZE;
-	mMemoryPageSize[PxCudaBufferMemorySpace::T_PINNED_HOST] = PINNED_PAGE_SIZE;
-	mMemoryPageSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED] = WC_PAGE_SIZE;
-	mMemoryPageSize[PxCudaBufferMemorySpace::T_HOST] = PINNED_PAGE_SIZE;
-}
-
-
-CudaMemMgr::~CudaMemMgr()
-{
-	for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
-	{
-		CMM_DELETE_SINGLE(mHeap[i]);
-		CMM_DELETE_SINGLE(mMemoryAllocator[i]);
-	}
-}
-
-
-PX_INLINE bool CudaMemMgr::initialize()
-{
-	if (mInitialized)
-	{
-		return true;
-	}
-
-	for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
-	{
-		mHeap[i] = PX_NEW(HeapManagerRef)(mErrorCallback, false);
-		PX_ASSERT(mHeap[i]);
-	}
-
-	mMemoryAllocator[PxCudaBufferMemorySpace::T_GPU]				= PX_NEW(DeviceMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_GPU]);
-	mMemoryAllocator[PxCudaBufferMemorySpace::T_PINNED_HOST]		= PX_NEW(PinnedMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_PINNED_HOST]);
-	mMemoryAllocator[PxCudaBufferMemorySpace::T_WRITE_COMBINED]	= PX_NEW(WriteCombinedMemAllocator)(mMgr, mMemoryMaxSize[PxCudaBufferMemorySpace::T_WRITE_COMBINED]);
-	mMemoryAllocator[PxCudaBufferMemorySpace::T_HOST]				= PX_NEW(HostMemAllocator)(mMemoryMaxSize[PxCudaBufferMemorySpace::T_HOST]);
-
-	bool succ = true;
-	for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
-	{
-		succ &= mHeap[i]->init(mMemoryAllocator[i], mMemoryBaseSize[i], mMemoryPageSize[i], MIN_BLOCK_SIZE);
-		PX_ASSERT(succ);
-	}
-
-	for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
-	{
-		succ &= mHeap[i] && mMemoryAllocator[i];
-	}
-
-	if (!succ)
-	{
-		for (uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
-		{
-			CMM_DELETE_SINGLE(mHeap[i]);
-			CMM_DELETE_SINGLE(mMemoryAllocator[i]);
-		}
-		mInitialized = false;
-	}
-
-	return mInitialized = succ;;
-}
-
-
-bool CudaMemMgr::setPageSize(const PxCudaBufferType& type, size_t size)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (!mInitialized)
-	{
-		mMemoryPageSize[type.memorySpace] = PxMax(mMemoryPageSize[type.memorySpace], size);
-		return true;
-	}
-	else
-	{
-		bool ret = mHeap[type.memorySpace]->setPageSize(size);
-		mMemoryPageSize[type.memorySpace] = ret ? size : mMemoryPageSize[type.memorySpace];
-		return ret;
-	}
-}
-
-
-bool CudaMemMgr::setBaseSize(const PxCudaBufferType& type, size_t size)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (mInitialized || (((size - 1)&size) != 0))
-	{
-		return false;
-	}
-	else
-	{
-		mMemoryBaseSize[type.memorySpace] = PxMax(mMemoryBaseSize[type.memorySpace], size);
-		return true;
-	}
-}
-
-
-size_t CudaMemMgr::getBaseSize(const PxCudaBufferType& type)
-{
-	return mMemoryBaseSize[type.memorySpace];
-}
-
-
-size_t CudaMemMgr::getPageSize(const PxCudaBufferType& type)
-{
-	return mMemoryPageSize[type.memorySpace];
-}
-
-
-bool CudaMemMgr::setMaxMemorySize(const PxCudaBufferType& type, size_t size)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (mInitialized)
-	{
-		switch (type.memorySpace)
-		{
-		case PxCudaBufferMemorySpace::T_GPU:
-			static_cast<DeviceMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size);
-			return true;
-			break;
-		case PxCudaBufferMemorySpace::T_PINNED_HOST:
-			static_cast<PinnedMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size);
-			return true;
-			break;
-		case PxCudaBufferMemorySpace::T_HOST:
-			static_cast<HostMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size);
-			return true;
-			break;
-		case PxCudaBufferMemorySpace::T_WRITE_COMBINED:
-			static_cast<WriteCombinedMemAllocator*>(mMemoryAllocator[type.memorySpace])->setMaxSize(size);
-			return true;
-			break;
-		case PxCudaBufferMemorySpace::COUNT:
-		default:
-			PX_ASSERT(!"unknown memory type");
-			break;
-		}
-	}
-	else
-	{
-		mMemoryMaxSize[type.memorySpace] = PxMax(mMemoryMaxSize[type.memorySpace], size);
-		return true;
-	}
-
-	return false;
-}
-
-size_t CudaMemMgr::getMaxMemorySize(const PxCudaBufferType& type)
-{
-	if (mInitialized)
-	{
-		switch (type.memorySpace)
-		{
-		case PxCudaBufferMemorySpace::T_GPU:
-			return static_cast<DeviceMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize();
-			break;
-		case PxCudaBufferMemorySpace::T_PINNED_HOST:
-			return static_cast<PinnedMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize();
-			break;
-		case PxCudaBufferMemorySpace::T_HOST:
-			return static_cast<HostMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize();
-			break;
-		case PxCudaBufferMemorySpace::T_WRITE_COMBINED:
-			return static_cast<WriteCombinedMemAllocator*>(mMemoryAllocator[type.memorySpace])->getMaxSize();
-			break;
-		case PxCudaBufferMemorySpace::COUNT:
-		default:
-			PX_ASSERT(!"unknown memory type");
-			break;
-		}
-	}
-	return 0;
-}
-
-bool CudaMemMgr::reserve(const PxCudaBufferType& type, size_t size)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (!mInitialized)
-	{
-		if (!initialize())
-		{
-			return false;
-		}
-	}
-
-	return mHeap[type.memorySpace]->reserve(size);
-}
-
-
-PxCudaBuffer* CudaMemMgr::alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	PxCudaBufferPtr addr = alloc(type.memorySpace, size, PX_ALLOC_INFO_PARAMS_INPUT());
-
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-	CudaBuffer* buffer = NULL;
-	if (addr)
-	{
-		buffer = mBufferPool.construct(type);
-		if (buffer)
-		{
-			buffer->init(addr, size, *this, PX_ALLOC_INFO_PARAMS_INPUT());
-		}
-	}
-	return buffer;
-}
-
-PxCudaBufferPtr CudaMemMgr::alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (!mInitialized)
-	{
-		if (!initialize())
-		{
-			return 0;
-		}
-	}
-
-	if (mDebugDisableAllocs)
-	{
-		return 0;
-	}
-
-	return reinterpret_cast<PxCudaBufferPtr>(mHeap[memorySpace]->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT()));
-}
-
-bool CudaMemMgr::free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (!mInitialized)
-	{
-		return false;
-	}
-
-	if (addr)
-	{
-		return mHeap[memorySpace]->free((void*)(addr));
-	}
-	else
-	{
-		return false;
-	}
-}
-
-bool CudaMemMgr::realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (!mInitialized)
-	{
-		return false;
-	}
-
-	if (!size)
-	{
-		return false;
-	}
-
-	if (mDebugDisableAllocs)
-	{
-		return NULL;
-	}
-
-	bool ret = false;
-	if (addr)
-	{
-		ret = mHeap[memorySpace]->realloc((void*)(addr), size, PX_ALLOC_INFO_PARAMS_INPUT());
-	}
-
-	return ret;
-}
-
-void CudaMemMgr::getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (!mInitialized)
-	{
-		return;
-	}
-
-	ApexHeapStats hpStats;
-	mHeap[type.memorySpace]->getStats(hpStats, HeapStatsFlags::F_BASIC_STATS | HeapStatsFlags::F_ALLOC_ID_STATS);
-
-	outStats.heapSize = hpStats.heapSize;
-	outStats.totalAllocated = hpStats.totalAllocated;
-	outStats.maxAllocated = hpStats.maxAllocated;
-	PxMemCopy(outStats.allocIdStats, hpStats.allocIdStats, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS);
-}
-
-
-bool CudaMemMgr::free(CudaBuffer& buffer)
-{
-	PxCudaBufferMemorySpace::Enum memSpace = buffer.getTypeFast().memorySpace;
-	PxCudaBufferPtr addr = buffer.getPtrFast();
-
-	{
-		shdfnd::Mutex::ScopedLock lock(mMutex);
-		mBufferPool.destroy(&buffer);
-	}
-
-	return free(memSpace, addr);
-}
-
-
-bool CudaMemMgr::realloc(CudaBuffer& buffer, size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	return realloc(buffer.getTypeFast().memorySpace, buffer.getPtrFast(), size, PX_ALLOC_INFO_PARAMS_INPUT());
-}
-
-PxCudaBufferPtr CudaMemMgr::getMappedPinnedPtr(PxCudaBufferPtr hostPtr)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-	void* base = mHeap[PxCudaBufferMemorySpace::T_PINNED_HOST]->findBaseAddress((void*)hostPtr);
-	if (base)
-	{
-		size_t offset = ((PinnedMemAllocator*)mMemoryAllocator[PxCudaBufferMemorySpace::T_PINNED_HOST])->getMappedPinnedOffset(base);
-		return hostPtr + offset;
-	}
-	return 0;
-}
-
-bool CudaBuffer::free()
-{
-	return mMemManager->free(*this);
-}
-
-
-bool CudaBuffer::realloc(size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	return mMemManager->realloc(*this, size, PX_ALLOC_INFO_PARAMS_INPUT());
-}
-
-
-DeviceMemAllocator::DeviceMemAllocator(PxCudaContextManager& mgr, size_t maxSize)
-	: mMgr(mgr)
-	, mMaxSize(maxSize)
-	, mAllocSize(0)
-{}
-
-
-DeviceMemAllocator::~DeviceMemAllocator()
-{
-	PX_ASSERT(mAllocSize == 0);
-}
-
-
-void* DeviceMemAllocator::alloc(const size_t size)
-{
-	if (mAllocSize + size > mMaxSize)
-	{
-		return NULL;
-	}
-	else
-	{
-		PxScopedCudaLock lock(mMgr);
-		CUdeviceptr dPtr;
-		CUresult result = cuMemAlloc(&dPtr,  uint32_t(size));
-
-		if (result == CUDA_SUCCESS)
-		{
-			mAllocSize += size;
-			return (void*)(size_t)(dPtr);
-		}
-		else
-		{
-			PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate device memory.");
-			return NULL;
-		}
-	}
-}
-
-
-void DeviceMemAllocator::free(void* addr, const size_t size)
-{
-	PxScopedCudaLock lock(mMgr);
-	PX_ASSERT(mAllocSize >= size);
-	CUresult result = cuMemFree((CUdeviceptr)(size_t)(addr));
-	PX_UNUSED(result);
-	PX_ASSERT(result == CUDA_SUCCESS);
-	mAllocSize -= size;
-}
-
-
-PinnedMemAllocator::PinnedMemAllocator(PxCudaContextManager& mgr, size_t maxSize)
-	: mMgr(mgr)
-	, mMaxSize(maxSize)
-	, mAllocSize(0)
-{
-}
-
-
-PinnedMemAllocator::~PinnedMemAllocator()
-{
-	PX_ASSERT(mAllocSize == 0);
-}
-
-
-void* PinnedMemAllocator::alloc(const size_t size)
-{
-	if (mAllocSize + size > mMaxSize)
-	{
-		return NULL;
-	}
-	else
-	{
-		PxScopedCudaLock lock(mMgr);
-		void* hPtr;
-		unsigned int flags = 0;
-
-		if (mMgr.canMapHostMemory())
-		{
-			flags |= CU_MEMHOSTALLOC_DEVICEMAP;
-		}
-		CUresult result = cuMemHostAlloc(&hPtr, uint32_t(size), flags);
-
-		if (result == CUDA_SUCCESS)
-		{
-			if (hPtr)
-			{
-				mAllocSize += size;
-			}
-
-			if (mMgr.canMapHostMemory())
-			{
-				CUdeviceptr dptr = 0;
-				cuMemHostGetDevicePointer(&dptr, hPtr, 0);
-				mMappedPinnedPtrs.insert(hPtr, size_t(dptr));
-			}
-
-			return hPtr;
-		}
-		else
-		{
-			PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate pinned memory.");			
-			return NULL;
-		}
-	}
-}
-
-
-void PinnedMemAllocator::free(void* addr, const size_t size)
-{
-	PxScopedCudaLock lock(mMgr);
-	PX_ASSERT(mAllocSize >= size);
-
-	if (mMgr.canMapHostMemory())
-	{
-		PX_ASSERT(mMappedPinnedPtrs.find(addr));
-		mMappedPinnedPtrs.erase(addr);
-	}
-	
-	CUresult result = cuMemFreeHost(addr);
-	PX_UNUSED(result);
-	PX_ASSERT(result == CUDA_SUCCESS);
-	mAllocSize -= size;
-}
-
-
-WriteCombinedMemAllocator::WriteCombinedMemAllocator(PxCudaContextManager& mgr, size_t maxSize)
-	: mMgr(mgr)
-	, mMaxSize(maxSize)
-	, mAllocSize(0)
-{
-	mWcMemSupport = mMgr.getDriverVersion() >= 2020 ? WcMem::SUPPORTED : WcMem::NOT_SUPPORTED;
-}
-
-
-WriteCombinedMemAllocator::~WriteCombinedMemAllocator()
-{
-	PX_ASSERT(mAllocSize == 0);
-}
-
-
-bool WriteCombinedMemAllocator::isWcMemSupported()
-{
-	if (mWcMemSupport == WcMem::SUPPORTED)
-	{
-		return true;
-	}
-	else
-	{
-		PX_ASSERT(mWcMemSupport == WcMem::NOT_SUPPORTED);
-		return false;
-	}
-}
-
-
-void* WriteCombinedMemAllocator::alloc(const size_t size)
-{
-	if (mAllocSize + size > mMaxSize)
-	{
-		return NULL;
-	}
-	else
-	{
-		PxScopedCudaLock lock(mMgr);
-		void* hPtr = NULL;
-
-		unsigned int flags = CU_MEMHOSTALLOC_WRITECOMBINED;
-
-		if (mMgr.canMapHostMemory())
-		{
-			flags |= CU_MEMHOSTALLOC_DEVICEMAP;
-		}
-
-		bool success = isWcMemSupported() && (cuMemHostAlloc(&hPtr, size, flags) == CUDA_SUCCESS);
-		if (success)
-		{
-			if (hPtr)
-			{
-				mAllocSize += size;
-			}
-
-			return hPtr;
-		}
-		else
-		{
-			PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate write combined memory.");			
-			return NULL;
-		}
-	}
-}
-
-
-void WriteCombinedMemAllocator::free(void* addr, const size_t size)
-{
-	PxScopedCudaLock lock(mMgr);
-	PX_ASSERT(mAllocSize >= size);
-	CUresult result = cuMemFreeHost(addr);
-	PX_ASSERT(result == CUDA_SUCCESS);
-	PX_UNUSED(result);
-	mAllocSize -= size;
-}
-
-
-HostMemAllocator::HostMemAllocator(size_t maxSize)
-	: mMaxSize(maxSize)
-	, mAllocSize(0)
-{
-}
-
-
-HostMemAllocator::~HostMemAllocator()
-{
-	PX_ASSERT(mAllocSize == 0);
-}
-
-
-void* HostMemAllocator::alloc(const size_t size)
-{
-	if (mAllocSize + size > mMaxSize)
-	{
-		return NULL;
-	}
-	else
-	{
-		void* ret = PX_ALLOC(size, "host memory");
-		if (ret)
-		{
-			mAllocSize += size;
-			return ret;
-		}
-		else
-		{
-			PX_ASSERT_WITH_MESSAGE(0, "Failed to allocate host memory.");			
-			return NULL;
-		}
-	}
-}
-
-
-void HostMemAllocator::free(void* addr, const size_t size)
-{
-	PX_ASSERT(mAllocSize >= size);
-	PX_FREE(addr);
-	mAllocSize -= size;
-}
-
diff --git a/PxShared/src/cudamanager/src/CudaMemoryManager.h b/PxShared/src/cudamanager/src/CudaMemoryManager.h
deleted file mode 100644
index 071b4ab..0000000
--- a/PxShared/src/cudamanager/src/CudaMemoryManager.h
+++ /dev/null
@@ -1,297 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H
-#define PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H
-
-#include "task/PxTaskDefine.h"
-#include "HeapManagerInterface.h"
-
-#include "PsPool.h"
-#include "PsMutex.h"
-#include "PsUserAllocated.h"
-#include "PsHashMap.h"
-
-namespace physx
-{
-
-class CudaBuffer;
-class HeapManagerInterface;
-class CudaMemMgr;
-class PxCudaContextManager;
-
-class CudaBuffer: public PxCudaBuffer
-{
-public:
-	PX_INLINE CudaBuffer(const PxCudaBufferType& type)
-		: mType(type)
-	{}
-
-// Ni Interface
-	bool									free();
-	bool									realloc(size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	const PxCudaBufferType&					getType() const
-	{
-		return getTypeFast();
-	}
-	PxCudaBufferPtr							getPtr() const
-	{
-		return getPtrFast();
-	}
-	size_t									getSize() const
-	{
-		return getSizeFast();
-	}
-	PxCudaMemoryManager*					getCudaMemoryManager() const
-	{
-		return getCudaMemoryManagerFast();
-	}
-//
-	PX_INLINE	const PxCudaBufferType&		getTypeFast() const
-	{
-		return mType;
-	}
-	PX_INLINE	PxCudaBufferPtr				getPtrFast() const
-	{
-		return mPtr;
-	}
-	PX_INLINE	void						setPtr(PxCudaBufferPtr val)
-	{
-		mPtr = val;
-	}
-	PX_INLINE	size_t						getSizeFast() const
-	{
-		return mSize;
-	}
-	PX_INLINE	void						setSize(size_t val)
-	{
-		mSize = val;
-	}
-	PX_INLINE	PxCudaMemoryManager*			getCudaMemoryManagerFast() const
-	{
-		return reinterpret_cast<PxCudaMemoryManager*>(mMemManager);
-	}
-	PX_INLINE	void						init(PxCudaBufferPtr ptr, size_t size, CudaMemMgr& manager, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED))
-	{
-		mPtr = ptr;
-		mSize = size;
-		mMemManager = &manager;
-		mAllocInfo = PxAllocInfo(PX_ALLOC_INFO_PARAMS_INPUT());
-	}
-
-	void operator=(const CudaBuffer& in)
-	{
-		const_cast<PxCudaBufferType&>(mType) = in.mType;
-		mPtr = in.mPtr;
-		mSize = in.mSize;
-		mMemManager = in.mMemManager;
-	}
-
-private:
-	const PxCudaBufferType	mType;
-	PxCudaBufferPtr			mPtr;
-	size_t					mSize;
-	CudaMemMgr*				mMemManager;
-	PxAllocInfo				mAllocInfo;
-};
-
-
-class CudaMemMgr: public PxCudaMemoryManager, public shdfnd::UserAllocated
-{
-	PX_NOCOPY(CudaMemMgr)
-public:
-	CudaMemMgr(PxCudaContextManager& mMgr, physx::PxErrorCallback& errorCallback);
-	virtual ~CudaMemMgr();
-
-	PxCudaBuffer*				alloc(const PxCudaBufferType& type, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	PxCudaBufferPtr				alloc(PxCudaBufferMemorySpace::Enum memorySpace, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	bool						free(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr);
-	bool						realloc(PxCudaBufferMemorySpace::Enum memorySpace, PxCudaBufferPtr addr, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	void						getStats(const PxCudaBufferType& type, PxCudaMemoryManagerStats& outStats);
-	bool						reserve(const PxCudaBufferType& type, size_t size);
-	bool						setPageSize(const PxCudaBufferType& type, size_t size);
-	bool						setMaxMemorySize(const PxCudaBufferType& type, size_t size);
-	size_t						getBaseSize(const PxCudaBufferType& type);
-	size_t						getPageSize(const PxCudaBufferType& type);
-	size_t						getMaxMemorySize(const PxCudaBufferType& type);
-	void						debugDisableAllocs()
-	{
-		mDebugDisableAllocs = true;
-	}
-	PxCudaBufferPtr				getMappedPinnedPtr(PxCudaBufferPtr hostPtr);
-
-	// internals
-	bool						free(CudaBuffer& buffer);
-	bool						realloc(CudaBuffer& buffer, size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	bool						setBaseSize(const PxCudaBufferType& type, size_t size);
-
-private:
-	PX_INLINE	bool						initialize();
-	physx::PxErrorCallback&				mErrorCallback;
-	HeapManagerInterface*					mHeap[PxCudaBufferMemorySpace::COUNT];
-	HeapManagerInterface::Allocator*		mMemoryAllocator[PxCudaBufferMemorySpace::COUNT];
-	size_t									mMemoryBaseSize[PxCudaBufferMemorySpace::COUNT];
-	size_t									mMemoryPageSize[PxCudaBufferMemorySpace::COUNT];
-	size_t									mMemoryMaxSize[PxCudaBufferMemorySpace::COUNT];
-	shdfnd::Pool<CudaBuffer>				mBufferPool;
-	bool									mInitialized;
-	PxCudaContextManager&					mMgr;
-	shdfnd::Mutex							mMutex;
-	bool									mDebugDisableAllocs;	
-};
-
-// TODO, give MemoryAllocator prefix or namespace
-class DeviceMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated
-{
-	PX_NOCOPY(DeviceMemAllocator)
-public:
-	DeviceMemAllocator(PxCudaContextManager& mgr, size_t maxSize);
-	virtual ~DeviceMemAllocator();
-
-	virtual void*	alloc(const size_t size);
-	virtual void	free(void* addr, const size_t size);
-
-	void	setMaxSize(size_t maxSize)
-	{
-		mMaxSize = maxSize;
-	}
-	size_t	getMaxSize()
-	{
-		return mMaxSize;
-	}
-
-private:
-	PxCudaContextManager& mMgr;
-	size_t			mMaxSize;
-	size_t			mAllocSize;
-};
-
-
-class PinnedMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated
-{
-	PX_NOCOPY(PinnedMemAllocator)
-public:
-	PinnedMemAllocator(PxCudaContextManager& mMgr, size_t maxSize);
-	virtual ~PinnedMemAllocator();
-
-	virtual void*	alloc(const size_t size);
-	virtual void	free(void* addr, const size_t size);
-
-	void	setMaxSize(size_t maxSize)
-	{
-		mMaxSize = maxSize;
-	}
-	size_t	getMaxSize()
-	{
-		return mMaxSize;
-	}
-
-	size_t getMappedPinnedOffset(void* base)
-	{
-
-		PX_ASSERT(base);
-		const shdfnd::HashMap<void*, size_t>::Entry* entry = mMappedPinnedPtrs.find(base);
-		PX_ASSERT(entry);
-		return entry->second - size_t(base);
-	}
-
-private:
-	PxCudaContextManager& mMgr;
-	size_t			mMaxSize;
-	size_t			mAllocSize;
-	shdfnd::HashMap<void*, size_t> mMappedPinnedPtrs;
-};
-
-
-class HostMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated
-{
-	PX_NOCOPY(HostMemAllocator)
-public:
-	HostMemAllocator(size_t maxSize);
-	virtual ~HostMemAllocator();
-
-	virtual void*	alloc(const size_t size);
-	virtual void	free(void* addr, const size_t size);
-
-	void	setMaxSize(size_t maxSize)
-	{
-		mMaxSize = maxSize;
-	}
-	size_t	getMaxSize()
-	{
-		return mMaxSize;
-	}
-
-private:
-	size_t			mMaxSize;
-	size_t			mAllocSize;
-};
-
-
-class WriteCombinedMemAllocator: public HeapManagerInterface::Allocator, public shdfnd::UserAllocated
-{
-	PX_NOCOPY(WriteCombinedMemAllocator)
-public:
-	WriteCombinedMemAllocator(PxCudaContextManager& mgr, size_t maxSize);
-	virtual ~WriteCombinedMemAllocator();
-
-	virtual void*	alloc(const size_t size);
-	virtual void	free(void* addr, const size_t size);
-
-	void	setMaxSize(size_t maxSize)
-	{
-		mMaxSize = maxSize;
-	}
-	size_t	getMaxSize()
-	{
-		return mMaxSize;
-	}
-
-private:
-	struct WcMem
-	{
-		enum Enum
-		{
-			NOT_CHECKED,
-			SUPPORTED,
-			NOT_SUPPORTED
-		};
-	};
-
-	bool	isWcMemSupported();
-
-private:
-	int					mCudaOrdinal;
-	PxCudaContextManager& mMgr;
-	WcMem::Enum			mWcMemSupport;
-	size_t				mMaxSize;
-	size_t				mAllocSize;
-};
-
-} // end physx namespace
-
-#endif // PXCUDACONTEXTMANAGER_CUDAMEMORYMANAGER_H
diff --git a/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h b/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h
deleted file mode 100644
index f20d87a..0000000
--- a/PxShared/src/cudamanager/src/CudaNode3DLowLatencyInterface.h
+++ /dev/null
@@ -1,128 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H
-#define PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H
-
-#include <cuda.h>
-
-namespace physx
-{
-// Utility macros for defining and using UUID values for use with
-// the CUDA driver.
-//
-// CU_INIT_UUID must be #defined in exactly one translation unit
-// per linkage unit (i.e. one .c or .cpp file per binary).  This
-// allows multiple .c and .cpp files to include headers that define
-// UUIDs using CU_DEFINE_UUID: The translation unit that #defines
-// CU_INIT_UUID will define and initialize the UUIDs, and all other
-// translation units will link to that definition.
-
-// Define helper macro: CU_INIT_EXTERN_CONST 
-// In C, global consts have external linkage by default.  In C++,
-// global consts have internal linkage by default, and require the
-// "extern" storage class specifier to have external linkage.  C++
-// allows using "extern" with initializers, but it is illegal in C.
-// Thus, there is no common syntax for C and C++ to declare and
-// initialize global constants with external linkage.  This macro
-// helps reduce duplication of other macros by factoring out the
-// C/C++ discrepancy.
-#ifdef __cplusplus
-#define CU_INIT_EXTERN_CONST extern const
-#else
-#define CU_INIT_EXTERN_CONST const
-#endif
-
-// Define macro CU_DEFINE_UUID.  The parameters are the commonly
-// used "int-short-short-char[8]" style, which can be generated by
-// Microsoft's guidgen.exe tool, Visual Studio's "Create GUID"
-// option in the Tools menu (select style #2), and many web-based
-// UUID generator tools.  Here's an example of what "Create GUID"
-// style #2 generates:
-//
-//   DEFINE_GUID( <<name>>, 
-//   0x2446054, 0xbb8e, 0x4b2f, 0x8b, 0xfc, 0xa4, 0xfe, 0x44, 0x9, 0x8f, 0xb8);
-//
-// So to use one of these with CUDA, just change the macro to
-// CU_DEFINE_UUID and choose a symbol name.  For example:
-//
-//   CU_DEFINE_UUID( MyUuid, 
-//   0x2446054, 0xbb8e, 0x4b2f, 0x8b, 0xfc, 0xa4, 0xfe, 0x44, 0x9, 0x8f, 0xb8);
-//
-#if defined(CU_INIT_UUID)
-#define CU_CHAR(x) (char)(unsigned char)((x) & 0xff)
-// Define the symbol as exportable to other translation units, and
-// initialize the value.  Inner set of parens is necessary because
-// "bytes" array needs parens within the struct initializer, which
-// also needs parens.  
-#define CU_DEFINE_UUID(name, a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)          \
-    CU_INIT_EXTERN_CONST CUuuid name =                                         \
-    {                                                                          \
-      {                                                                        \
-        CU_CHAR(a), CU_CHAR((a) >> 8), CU_CHAR((a) >> 16), CU_CHAR((a) >> 24), \
-        CU_CHAR(b), CU_CHAR((b) >> 8),                                         \
-        CU_CHAR(c), CU_CHAR((c) >> 8),                                         \
-        CU_CHAR(d0),                                                           \
-        CU_CHAR(d1),                                                           \
-        CU_CHAR(d2),                                                           \
-        CU_CHAR(d3),                                                           \
-        CU_CHAR(d4),                                                           \
-        CU_CHAR(d5),                                                           \
-        CU_CHAR(d6),                                                           \
-        CU_CHAR(d7)                                                            \
-      }                                                                        \
-    }
-#else
-// Declare the symbol to be imported from another translation unit.
-#define CU_DEFINE_UUID(name, a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
-    extern const CUuuid name
-#endif
-
-//------------------------------------------------------------------
-// Cuda Private API Interfaces for PhysX
-//------------------------------------------------------------------
-
-// This provides backdoor interfaces used by PhysX
-CU_DEFINE_UUID(CU_ETID_PhysXInterface, 0x8c0ba50c, 0x0410, 0x9a92, 0x89, 0xa7, 0xd0, 0xdf, 0x10, 0xe7, 0x72, 0x86);
-
-typedef struct CUetblPhysXInterface_st
-{
-    /* Size of this structure */
-    size_t size;
-    
-    /* Create a new CUDA context on Node3dLowLatency.
-     * - will usually it will just call cuCtxCreateOnNode3DLowLatency.
-     */
-    CUresult (CUDAAPI *cuCtxCreateOnNode3DLowLatency)(
-        CUcontext *pctx,
-        unsigned int flags,
-        CUdevice dev);
-
-}	CUetblPhysXInterface;
-}
-
-#endif // PXCUDACONTEXTMANAGER_CUDANODE3DLOWLATENCYINTERFACE_H
diff --git a/PxShared/src/cudamanager/src/GpuDispatcher.cpp b/PxShared/src/cudamanager/src/GpuDispatcher.cpp
deleted file mode 100644
index 432a0cd..0000000
--- a/PxShared/src/cudamanager/src/GpuDispatcher.cpp
+++ /dev/null
@@ -1,928 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-#include "task/PxGpuDispatcher.h"
-#include "task/PxCpuDispatcher.h"
-#include "cudamanager/PxCudaContextManager.h"
-#include "cudamanager/PxGpuCopyDesc.h"
-
-#include "CudaKernelWrangler.h"
-#include "GpuDispatcher.h"
-
-#if PX_SUPPORT_PXTASK_PROFILING
-#include "foundation/PxProfiler.h"
-#endif
-
-#include "PsArray.h"
-#include "PsSync.h"
-#include "PsInlineArray.h"
-#include "PsUserAllocated.h"
-#include "PsAtomic.h"
-
-#if PX_VC
-#pragma warning(disable: 4062)	//enumerator 'identifier' in switch of enum 'enumeration' is not handled
-#endif
-
-using namespace physx;
-
-// hack to run CUDA calls in a single thread
-// used to capture CUDA APIC traces
-// #define SINGLE_CUDA_THREAD 1
-
-/* Kernels in UtilsKernels.cu */
-const char* UtilKernelNames[] =
-{
-	"Saturate",
-	"MemCopyAsync",
-	"MemCopyBatchedAsync",
-};
-enum
-{
-	KERNEL_SATURATE,
-	KERNEL_MEMCOPY,
-	KERNEL_MEMCOPY_BATCHED
-};
-
-/* ==================== LaunchTask =========================== */
-
-/**
-\brief A task that maintains a list of dependent tasks.
-
-This task maintains a list of dependent tasks that have their reference counts 
-reduced on completion of the task.
-
-The refcount is incremented every time a dependent task is added.
-*/
-class physx::FanoutTask : public PxBaseTask, public shdfnd::UserAllocated
-{
-	PX_NOCOPY(FanoutTask)
-public:
-	FanoutTask(const char* name) : mRefCount(0), mName(name), mNotifySubmission(false) {}
-
-	virtual void run() {}
-	virtual const char* getName(void) const { return mName; }
-
-	virtual void removeReference()
-	{
-		shdfnd::Mutex::ScopedLock lock(mMutex);
-		if (!shdfnd::atomicDecrement(&mRefCount))
-		{
-			// prevents access to mReferencesToRemove until release
-			shdfnd::atomicIncrement(&mRefCount);
-			mNotifySubmission = false;
-			PX_ASSERT(mReferencesToRemove.empty());
-			// save continuation and dependents
-			for (uint32_t i = 0; i < mDependents.size(); i++)
-				mReferencesToRemove.pushBack(mDependents[i]);
-			mDependents.clear();
-			mTm->getCpuDispatcher()->submitTask(*this);
-		}
-	}
-
-	/** 
-	\brief Increases reference count
-	*/
-	virtual void addReference()
-	{
-		shdfnd::Mutex::ScopedLock lock(mMutex);
-		shdfnd::atomicIncrement(&mRefCount);
-		mNotifySubmission = true;
-	}
-
-	/**
-	\brief Return the ref-count for this task 
-	*/
-	PX_INLINE int32_t getReference() const
-	{
-		return mRefCount;
-	}
-
-	/**
-	Adds a dependent task. It also sets the task manager querying it from the dependent task.  
-	The refcount is incremented every time a dependent task is added.
-	*/
-	void addDependent(PxBaseTask& dependent)
-	{
-		shdfnd::Mutex::ScopedLock lock(mMutex);
-		shdfnd::atomicIncrement(&mRefCount);
-		mTm = dependent.getTaskManager();
-		mDependents.pushBack(&dependent);
-		dependent.addReference();
-		mNotifySubmission = true;
-	}
-
-	/**
-	Reduces reference counts of the continuation task and the dependent tasks, also 
-	clearing the copy of continuation and dependents task list.
-	*/
-	virtual void release()
-	{
-		shdfnd::InlineArray<PxBaseTask*, 10> referencesToRemove;
-
-		{
-			shdfnd::Mutex::ScopedLock lock(mMutex);
-
-			const uint32_t contCount = mReferencesToRemove.size(); 
-			referencesToRemove.reserve(contCount);
-			for (uint32_t i=0; i < contCount; ++i)
-				referencesToRemove.pushBack(mReferencesToRemove[i]);
-				
-			mReferencesToRemove.clear();
-			// allow access to mReferencesToRemove again
-			if (mNotifySubmission)
-			{
-				removeReference();
-			}
-			else
-			{
-				physx::shdfnd::atomicDecrement(&mRefCount);
-			}
-
-			// the scoped lock needs to get freed before the continuation tasks get (potentially) submitted because
-			// those continuation tasks might trigger events that delete this task and corrupt the memory of the
-			// mutex (for example, assume this task is a member of the scene then the submitted tasks cause the simulation 
-			// to finish and then the scene gets released which in turn will delete this task. When this task then finally
-			// continues the heap memory will be corrupted.
-		}
-
-		for (uint32_t i=0; i < referencesToRemove.size(); ++i)
-			referencesToRemove[i]->removeReference();
-	}
-
-private:
-	friend class LaunchTask;
-
-	volatile int32_t mRefCount;
-	const char* mName;
-	shdfnd::InlineArray<PxBaseTask*, 4> mDependents;
-	shdfnd::InlineArray<PxBaseTask*, 4> mReferencesToRemove;
-	bool mNotifySubmission;
-	shdfnd::Mutex mMutex; // guarding mDependents and mNotifySubmission
-};
-
-class physx::LaunchTask : public physx::FanoutTask
-{
-public:
-	LaunchTask() : FanoutTask("GpuDispatcher.launch"), mIsBatchStarted(false)
-	{
-	}
-
-	virtual void run() 
-	{
-		if (mReferencesToRemove.size() >= 1)
-			mIsBatchStarted = true;
-	}
-
-	bool mIsBatchStarted;
-};
-
-class physx::BlockTask : public PxLightCpuTask, public shdfnd::UserAllocated
-{
-	PX_NOCOPY(BlockTask)
-public:
-	BlockTask(PxGpuWorkerThread* dispatcher, physx::LaunchTask* launchTask) 
-		: mDispatcher(dispatcher), mLaunchTask(launchTask), mSyncTask(NULL)
-	{}
-
-	virtual const char* getName(void) const 
-	{ 
-		return "GpuDispatcher.block"; 
-	}
-
-	virtual void removeReference()
-	{
-		shdfnd::Mutex::ScopedLock lock(mMutex);
-		if (!physx::shdfnd::atomicDecrement(&mRefCount))
-		{
-			// prevents access to mSyncTask until release
-			physx::shdfnd::atomicIncrement(&mRefCount);
-			PX_ASSERT(!mSyncTask);
-			shdfnd::swap(mSyncTask, mCont);
-			mTm->getCpuDispatcher()->submitTask(*this);
-		}
-	}
-
-	virtual void run() 
-	{
-		if (mLaunchTask->mIsBatchStarted)
-		{
-			mDispatcher->mCtxMgr->acquireContext();
-			CUevent stopEv = mDispatcher->mCachedBlockingEvents.get();
-			CUstream stream = (CUstream)0;
-			mSyncTask->addReference();
-			mDispatcher->flushBatch(stopEv, stream, mSyncTask);
-			mDispatcher->mCtxMgr->releaseContext();
-			mLaunchTask->mIsBatchStarted = false;
-		}
-	}
-
-	virtual void setContinuation(PxBaseTask* continuation) 
-	{
-		// this function is called multiple times, skip after first call
-		shdfnd::Mutex::ScopedLock lock(mMutex);
-		physx::shdfnd::atomicIncrement(&mRefCount);
-		if (!mCont)
-		{
-			mCont = continuation;
-			mTm = mCont->getTaskManager();
-			mCont->addReference();
-		}
-	}
-
-	virtual void release()
-	{
-		shdfnd::Mutex::ScopedLock lock(mMutex);
-		mSyncTask->removeReference();
-		mSyncTask = NULL;
-		// allow access to mSyncTask again
-		if (mCont)
-		{
-			removeReference();
-		}
-		else
-		{
-			physx::shdfnd::atomicDecrement(&mRefCount);
-		}
-	}
-
-	PxGpuWorkerThread* mDispatcher;
-	physx::LaunchTask* mLaunchTask;
-	PxBaseTask* mSyncTask;
-	shdfnd::Mutex mMutex; // guarding mCont
-};
-
-/* ==================== API functions =========================== */
-
-void physx::releaseGpuDispatcher(PxGpuDispatcher& gd)
-{
-	GpuDispatcherImpl* impl = (GpuDispatcherImpl*) &gd;
-	delete impl;
-}
-
-PxCudaContextManager* GpuDispatcherImpl::getCudaContextManager()
-{
-	return mDispatcher->mCtxMgr;
-}
-
-GpuDispatcherImpl::GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx) :
-	mDispatcher(NULL),
-	mBlockingThread(NULL),
-	mLaunchTask(NULL),
-	mBlockTask(NULL),
-	mSyncTask(NULL)
-{
-	mDispatcher = PX_NEW(PxGpuWorkerThread);
-	if (!mDispatcher)
-	{
-		forceFailureMode();
-		return;
-	}
-
-	mDispatcher->setCudaContext(ctx);
-
-	mDispatcher->mUtilKernelWrapper = PX_NEW(KernelWrangler)(*this, errorCallback, UtilKernelNames, sizeof(UtilKernelNames) / sizeof(char*));
-	if (!mDispatcher->mUtilKernelWrapper || mDispatcher->mUtilKernelWrapper->hadError())
-	{
-		forceFailureMode();
-		return;
-	}
-
-	mLaunchTask = PX_NEW(LaunchTask);
-	mBlockTask = PX_NEW(BlockTask)(mDispatcher, mLaunchTask);
-	mSyncTask = PX_NEW(FanoutTask)("GpuDispatcher.sync");
-}
-
-GpuDispatcherImpl::~GpuDispatcherImpl()
-{
-	if (mBlockingThread)
-	{
-		mBlockingThread->signalQuit();
-		PX_ASSERT(mDispatcher);
-		mDispatcher->mRecordEventQueued.set();
-		mBlockingThread->waitForQuit();
-		delete mBlockingThread;
-	}
-
-	if (mDispatcher)
-	{
-		mDispatcher->signalQuit();
-		mDispatcher->mInputReady.set();
-		mDispatcher->waitForQuit();
-		delete mDispatcher;
-	}
-
-	if (mLaunchTask)
-		PX_DELETE(mLaunchTask);
-
-	if (mBlockTask)
-		PX_DELETE(mBlockTask);
-
-	if (mSyncTask)
-		PX_DELETE(mSyncTask);
-}
-
-void GpuDispatcherImpl::start()
-{
-#ifndef SINGLE_CUDA_THREAD
-	mDispatcher->start(shdfnd::Thread::getDefaultStackSize());
-#else
-	mDispatcher->execute();
-#endif
-
-	mBlockingThread = PX_NEW(BlockingWaitThread)(*mDispatcher);
-#ifndef SINGLE_CUDA_THREAD
-	mBlockingThread->start(shdfnd::Thread::getDefaultStackSize());
-#endif
-}
-
-void GpuDispatcherImpl::startSimulation()
-{
-	mDispatcher->startSimulation();
-}
-
-void GpuDispatcherImpl::stopSimulation()
-{
-	mDispatcher->stopSimulation();
-}
-
-void GpuDispatcherImpl::startGroup()
-{
-	shdfnd::atomicIncrement(&mDispatcher->mActiveGroups);
-}
-
-void GpuDispatcherImpl::submitTask(PxTask& task)
-{
-	mDispatcher->mSubmittedTaskList.push(&task);
-}
-
-void GpuDispatcherImpl::finishGroup()
-{
-	if (!shdfnd::atomicDecrement(&mDispatcher->mActiveGroups))
-	{
-#ifdef SINGLE_CUDA_THREAD
-		mDispatcher->mCtxMgr->acquireContext();
-		mDispatcher->processActiveTasks();
-		mDispatcher->mCtxMgr->releaseContext();
-		mDispatcher->blockingWaitFunc();
-#endif
-		mDispatcher->mInputReady.set();
-	}
-}
-
-void GpuDispatcherImpl::addCompletionPrereq(PxBaseTask& task)
-{
-	mDispatcher->addCompletionPrereq(task);
-}
-
-bool GpuDispatcherImpl::failureDetected() const
-{
-	return mDispatcher->mFailureDetected;
-}
-
-void GpuDispatcherImpl::forceFailureMode()
-{
-	mDispatcher->mFailureDetected = true;
-}
-
-void GpuDispatcherImpl::launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream)
-{
-	mDispatcher->launchCopyKernel(desc, count, stream);
-}
-
-PxBaseTask& GpuDispatcherImpl::getPreLaunchTask()
-{
-	PX_ASSERT(mLaunchTask);
-	return *mLaunchTask;
-}
-
-void GpuDispatcherImpl::addPreLaunchDependent(PxBaseTask& dependent)
-{
-	PX_ASSERT(mLaunchTask);
-	mLaunchTask->addDependent(dependent);
-}
-
-PxBaseTask& GpuDispatcherImpl::getPostLaunchTask()
-{
-	PX_ASSERT(mBlockTask);
-	return *mBlockTask;
-}
-
-void GpuDispatcherImpl::addPostLaunchDependent(PxBaseTask& dependent)
-{
-	PX_ASSERT(mSyncTask && mBlockTask);
-	mSyncTask->addDependent(dependent);
-	mBlockTask->setContinuation(mSyncTask);
-	mSyncTask->removeReference();
-}
-
-/* ==================== Worker Thread =========================== */
-
-PxGpuWorkerThread::PxGpuWorkerThread()
-	: mActiveGroups(0)
-	, mCtxMgr(NULL)
-	, mFailureDetected(false)
-	, mCompletionRingPush(0)
-	, mCompletionRingPop(0)
-	, mCachedBlockingEvents(CU_EVENT_BLOCKING_SYNC)
-	, mCachedNonBlockingEvents(0)
-	, mCountActiveScenes(0)
-	, mSmStartTimes(0)
-	, mUtilKernelWrapper(0)
-{
-}
-
-void PxGpuWorkerThread::setCudaContext(PxCudaContextManager& ctx)
-{
-	mCtxMgr = &ctx;
-}
-
-PxGpuWorkerThread::~PxGpuWorkerThread()
-{
-	mCtxMgr->acquireContext();
-	mCachedBlockingEvents.clear();
-	mCachedNonBlockingEvents.clear();
-	mCachedStreams.reset();
-	while (!mCachedStreams.empty())
-	{
-		GD_CHECK_CALL(cuStreamDestroy(mCachedStreams.get(mCachedStreams.popBack())));
-	}
-	mCtxMgr->releaseContext();
-
-	if (mSmStartTimes)
-	{
-		PX_FREE(mSmStartTimes);
-	}
-	if (mUtilKernelWrapper)
-	{
-		// will acquire the context for itself
-		PX_DELETE(mUtilKernelWrapper);
-	}
-}
-
-
-/* A TaskManager is informing us that its simulation is being stepped */
-void PxGpuWorkerThread::startSimulation()
-{
-	mUsingConcurrentStreams = mCtxMgr->getUsingConcurrentStreams();
-
-	shdfnd::atomicIncrement(&mCountActiveScenes);
-}
-
-
-/* A TaskManager is informing us that its simulation has ended */
-void PxGpuWorkerThread::stopSimulation()
-{
-	if (shdfnd::atomicDecrement(&mCountActiveScenes) == 0)
-		mCachedStreams.reset();
-}
-
-
-/* PxGpuDispatcher worker thread main loop */
-void PxGpuWorkerThread::execute()
-{
-	setName("GpuDispatcher.Worker");
-
-#ifndef SINGLE_CUDA_THREAD
-	bool running = true;
-	while (running)
-	{
-		mInputReady.wait();
-
-		if (quitIsSignalled())
-		{
-			break;
-		}
-
-		if (!mSubmittedTaskList.empty())
-		{
-			mCtxMgr->acquireContext();
-			processActiveTasks();
-			mCtxMgr->releaseContext();
-		}
-	}
-
-	quit();
-#endif
-}
-
-/* Expected to be called by a GPU task, or a function called by a GPU
- * task.  The context is important because I think it does not require
- * any locking since only one GPU task's launchInstance() function will
- * be running at a time, per GpuDispatcherImpl (per CUDA context).
- */
-void PxGpuWorkerThread::addCompletionPrereq(PxBaseTask& task)
-{
-	if(mFailureDetected)
-		return;
-
-#if PX_SUPPORT_PXTASK_PROFILING
-	PX_PROFILE_ZONE("GpuDispatcher.AddCompletionEvent", task.getContextId());
-#endif
-	task.addReference();
-	mCompletionTasks.pushBack(&task);
-}
-
-namespace
-{
-	template <typename T0>
-	PX_NOINLINE CUresult launchKernelGrid(CUfunction func, unsigned int gridWidth, unsigned int gridHeight, unsigned int numThreads, unsigned int sharedMem, CUstream stream, T0 v0)
-	{
-		void* kernelParams[] =
-		{
-			&v0,
-		};
-		return cuLaunchKernel(func, gridWidth, gridHeight, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-	}
-}
-
-void PxGpuWorkerThread::launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream)
-{
-	if (!mCtxMgr->canMapHostMemory())
-	{
-		for (uint32_t i = 0 ; i < count ; i++)
-		{
-			PX_ASSERT(desc->isValid());
-			switch (desc->type)
-			{
-			case PxGpuCopyDesc::DeviceMemset32:
-				GD_CHECK_CALL(cuMemsetD32Async(desc->dest, (uint32_t) desc->source, desc->bytes >> 2, stream));
-				break;
-			case PxGpuCopyDesc::DeviceToDevice:
-				GD_CHECK_CALL(cuMemcpyDtoDAsync(desc->dest, desc->source, desc->bytes, stream));
-				break;
-			case PxGpuCopyDesc::DeviceToHost:
-				GD_CHECK_CALL(cuMemcpyDtoHAsync((void*) desc->dest, desc->source, desc->bytes, stream));
-				break;
-			case PxGpuCopyDesc::HostToDevice:
-				GD_CHECK_CALL(cuMemcpyHtoDAsync(desc->dest, (void*) desc->source, desc->bytes, stream));
-				break;
-			}
-
-			desc++;
-		}
-	}
-	else if (count == 1)
-	{
-		CUfunction func = mUtilKernelWrapper->getCuFunction(KERNEL_MEMCOPY);
-		uint32_t smCount = (uint32_t)mCtxMgr->getMultiprocessorCount();
-
-		PX_ASSERT(desc->isValid());
-
-		CUdeviceptr dptr;
-		switch (desc->type)
-		{
-		case PxGpuCopyDesc::DeviceToHost:
-			dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc->dest));
-			desc->dest = dptr;
-			break;
-		case PxGpuCopyDesc::HostToDevice:
-			dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc->source));
-			desc->source = dptr;
-			break;
-		case PxGpuCopyDesc::DeviceMemset32:
-		case PxGpuCopyDesc::DeviceToDevice:
-			//do nothing, cases are here for GCCs warning system
-			break;
-		}
-
-		uint32_t numThreads;
-		if (mCtxMgr->supportsArchSM20())
-		{
-			numThreads = 256;
-		}
-		else
-		{
-			numThreads = 128;
-		}
-		uint32_t blocks = uint32_t(desc->bytes / (numThreads * 4 * 6));
-		if (blocks == 0)
-		{
-			blocks = 1;
-		}
-		if (blocks > smCount)
-		{
-			blocks = smCount;
-		}
-
-		GD_CHECK_CALL(
-			launchKernel(func, blocks, numThreads, 0, stream, *desc)
-			);
-	}
-	else
-	{
-		CUfunction func = mUtilKernelWrapper->getCuFunction(KERNEL_MEMCOPY_BATCHED);
-		CUdeviceptr dptr;
-
-		for (uint32_t i = 0 ; i < count ; i++)
-		{
-			PX_ASSERT(desc[i].isValid());
-
-			switch (desc[i].type)
-			{
-			case PxGpuCopyDesc::DeviceToHost:
-				dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc[i].dest));
-				desc[i].dest = dptr;
-				break;
-			case PxGpuCopyDesc::HostToDevice:
-				dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc[i].source));
-				desc[i].source = dptr;
-				break;
-			case PxGpuCopyDesc::DeviceMemset32:
-			case PxGpuCopyDesc::DeviceToDevice:
-				//do nothing, cases are here for GCCs warning system
-				break;
-			}
-		}
-
-		uint32_t numThreads, numBlocksX;
-		if (mCtxMgr->supportsArchSM20())
-		{
-			numThreads = 256;
-			numBlocksX = 1;
-		}
-		else
-		{
-			numThreads = 128;
-			numBlocksX = 2;
-		}
-
-		dptr = mCtxMgr->getMemoryManager()->getMappedPinnedPtr(PxCudaBufferPtr(desc));
-
-		GD_CHECK_CALL(
-			launchKernelGrid(func, numBlocksX, count, numThreads, 0, stream, 
-				dptr)
-			);
-	}
-}
-
-void PxGpuWorkerThread::flushBatch(CUevent endEvent, CUstream syncStream, PxBaseTask* task)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (mFailureDetected)
-	{
-		return;
-	}
-
-	if (endEvent && (1 & intptr_t(syncStream)))
-	{
-		// blocking record event on stream 0, flushes current push buffer
-		GD_CHECK_CALL(cuEventRecord(endEvent, 0));
-	} else {
-		CUresult ret = cuStreamQuery(0); // flushes current push buffer
-		PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY);
-		PX_UNUSED(ret);
-	}
-	
-	int next = (mCompletionRingPush + 1) % SIZE_COMPLETION_RING;
-	while (next == mCompletionRingPop)
-	{
-		// lordy, I hope we never get here.
-		PX_ALWAYS_ASSERT();
-		mCtxMgr->releaseContext();
-		sleep(1);
-		mCtxMgr->acquireContext();
-	}
-
-	CudaBatch& b = mCompletionRing[ mCompletionRingPush ];
-	b.blockingEvent = endEvent;
-	b.blockingStream = syncStream;
-	b.continuationTask = task;
-
-	mCompletionRingPush = next;
-	mRecordEventQueued.set();
-}
-
-// Take any submitted tasks into its appropriate read list
-void PxGpuWorkerThread::pollSubmitted(shdfnd::Array<ReadyTask>* ready)
-{
-	mInputReady.reset();
-	PxGpuTask* gt;
-	while ((gt = (PxGpuTask*) mSubmittedTaskList.popBack()) != 0)
-	{
-		PxGpuTaskHint::Enum type = gt->getTaskHint();
-		ReadyTask* r = &ready[ type ].insert();
-		r->task = gt;
-		r->iteration = 0;
-		if (mUsingConcurrentStreams && gt->mStreamIndex == 0)
-		{
-			gt->mStreamIndex = mCachedStreams.popBack();
-		}
-	}
-}
-
-void PxGpuWorkerThread::processActiveTasks()
-{
-#if PX_SUPPORT_PXTASK_PROFILING
-	PX_PROFILE_ZONE("GpuDispatcher.ProcessTasksEvent", 0);	// PT: TODO: fix invalid context
-#endif
-	
-	if (mFailureDetected)
-	{
-		while (!mSubmittedTaskList.empty())
-		{
-			mInputReady.reset();
-			mSubmittedTaskList.popBack()->release();
-		}
-		return;
-	}
-
-	for (uint32_t i = 0; i < PxGpuTaskHint::NUM_GPU_TASK_HINTS; i++)
-	{
-		mReady[i].clear();
-	}
-
-	//OutputDebugString("batch beginning\n");
-
-	const CUevent nonBlockEv = mCachedNonBlockingEvents.get();
-	bool workToFlush = false;
-	bool tasksRemain = false;
-	PxGpuTaskHint::Enum curMode = PxGpuTaskHint::HostToDevice;
-
-	pollSubmitted(mReady);
-
-	do
-	{
-		// cycle current run mode when necessary
-		if (mReady[ curMode ].size() == 0)
-		{
-			if (curMode == PxGpuTaskHint::HostToDevice)
-			{
-				curMode = PxGpuTaskHint::Kernel;
-			}
-			else if (curMode == PxGpuTaskHint::Kernel)
-			{
-				curMode = PxGpuTaskHint::DeviceToHost;
-			}
-			else
-			{
-				curMode = PxGpuTaskHint::HostToDevice;
-			}
-		}
-
-		uint32_t singleStream = mReady[curMode].empty() ? 0 : mReady[curMode].front().task->mStreamIndex;
-
-		while (mReady[ curMode ].size())
-		{
-			bool needwfi = false;
-			for (uint32_t i = 0 ; i < mReady[ curMode ].size() ; i++)
-			{
-				ReadyTask& r = mReady[ curMode ][ i ];
-
-				if (r.task->mPreSyncRequired)
-				{
-					// If mPreSyncRequired is set *before* the task is run, it implies
-					// a WFI must be inserted before this task issues any work.  Multiple
-					// ready tasks may have this flag, so to avoid inserting multiple WFI
-					// requests, we skip marked tasks in this pass and note a WFI is needed.
-					needwfi = true;
-					r.task->mPreSyncRequired = false;
-				}
-				else
-				{
-					const CUstream s = (r.task->mStreamIndex > 0) ? mCachedStreams.get(r.task->mStreamIndex) : 0;
-
-					bool active;
-					{
-#if PX_PROFILE
-#if PX_SUPPORT_PXTASK_PROFILING
-						PX_PROFILE_ZONE(r.task->getName(), r.task->getContextId());
-#endif
-#endif
-						active = r.task->launchInstance(s, int(r.iteration++));
-					}
-					if(singleStream != r.task->mStreamIndex)
-						singleStream = 0;
-
-					// If the launchInstance() call reported a non-recoverable error, gracefully
-					// release all scheduled tasks
-					if (mFailureDetected)
-					{
-						// Release all ready tasks
-						for (uint32_t h = 0; h < PxGpuTaskHint::NUM_GPU_TASK_HINTS; h++)
-						{
-							for (uint32_t j = 0 ; j < mReady[ h ].size() ; j++)
-							{
-								mReady[ h ][ j ].task->release();
-							}
-							mReady[ h ].clear();
-						}
-
-						// Release all submitted tasks, until idle
-						while (!mSubmittedTaskList.empty())
-						{
-							mInputReady.reset();
-							mSubmittedTaskList.popBack()->release();
-						}
-						return;
-					}
-
-					workToFlush = true;
-					if (r.task->mPreSyncRequired)
-					{
-						// This task has asked for a sync point, meaning it has launched a copy
-						// or a kernel that must be completed before any later tasks are allowed
-						// to start.  Insert a WFI and clear the needwfi flag
-						GD_CHECK_CALL(cuEventRecord(nonBlockEv, 0));
-						needwfi = false;
-						r.task->mPreSyncRequired = false;
-					}
-
-					if (!active)
-					{
-						r.task->release();
-						mReady[ curMode ].replaceWithLast(i);
-						pollSubmitted(mReady);
-						i -= 1;
-					}
-				}
-			}
-
-			if (needwfi)
-			{
-				GD_CHECK_CALL(cuEventRecord(nonBlockEv, 0));
-			}
-		}
-
-		/* We have completed one of the three phases */
-
-		tasksRemain = false;
-		for (int e = (int) PxGpuTaskHint::HostToDevice ; e != (int) PxGpuTaskHint::NUM_GPU_TASK_HINTS ; e++)
-		{
-			tasksRemain |= (mReady[ e ].size() != 0);
-		}
-
-		if (!mCompletionTasks.empty())
-		{
-			workToFlush = true;
-		}
-
-		if (workToFlush && (tasksRemain == false || curMode == PxGpuTaskHint::DeviceToHost))
-		{
-			//OutputDebugString("batch ending\n");
-
-			while (mCompletionTasks.size())
-			{
-				PxBaseTask* t = mCompletionTasks.popBack();
-				if (workToFlush)
-				{
-					CUevent stopEv = mCachedBlockingEvents.get();
-					CUstream stream = singleStream ? mCachedStreams.get(singleStream) : (CUstream)1;
-					flushBatch(stopEv, stream, t);
-					workToFlush = false;
-				}
-				else
-				{
-					flushBatch(0, 0, t);
-				}
-			}
-			if (workToFlush)
-			{
-				/* Getting here is probably an indication of a bug in your task graph,
-				 * but it is possible to get this warning if you have CPU tasks that
-				 * can delay GpuTasks.  So, consider this warning "training wheels" and
-				 * disable it if you know your graph is correct.
-				 */
-				// SJB - Disabling this warning, APEX does this every frame because
-				// of how BasicIOS and IOFX interact.
-				//shdfnd::getFoundation().error(PX_WARN,
-				//	"CUDA work generated without a completion dependency!");
-				CUevent stopEv = mCachedBlockingEvents.get();
-				flushBatch(stopEv, (CUstream)1, NULL);
-			}
-		}
-	}
-	while (tasksRemain);
-
-	mCachedNonBlockingEvents.add(nonBlockEv);
-}
-
-#endif
diff --git a/PxShared/src/cudamanager/src/HeapManagerInterface.h b/PxShared/src/cudamanager/src/HeapManagerInterface.h
deleted file mode 100644
index 7fe7f2e..0000000
--- a/PxShared/src/cudamanager/src/HeapManagerInterface.h
+++ /dev/null
@@ -1,156 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-//----------------------------------------------------------------------------//
-// HeapManagerInterface.h
-//----------------------------------------------------------------------------//
-
-#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H
-#define PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H
-
-#include "task/PxTaskDefine.h"
-#include "foundation/PxSimpleTypes.h"
-#include "cudamanager/PxCudaMemoryManager.h"
-
-#include <string.h>
-
-namespace physx
-{
-struct HeapStatsFlags
-{
-	enum Enum
-	{
-		F_BASIC_STATS				= 1 << 0,
-		F_INTERNAL_FRAGMENTATION	= 1 << 1,
-		F_BIGGEST_FREE_BLOCK		= 1 << 2,
-		F_HISTOGRAM					= 1 << 3,
-		F_ALLOC_ID_STATS			= 1 << 4,
-		F_ALL						= 0xFFFFFFFF,
-	};
-};
-
-#define BITSPERWORD sizeof(size_t)*8
-
-class ApexHeapStats
-{
-public:
-	ApexHeapStats():
-		heapSize(0),
-		totalAllocated(0),
-		maxAllocated(0),
-		internalFragmentation(0),
-		maxInternalFragmentation(0),
-		biggestFreeBlock(0),
-		numEntries(0)
-	{}
-
-	PX_INLINE void reset()
-	{
-		memset(this, 0, sizeof(ApexHeapStats));
-	}
-
-	// F_BASIC_STATS
-	size_t	heapSize;
-	size_t	totalAllocated;
-	size_t	maxAllocated;
-
-	// F_INTERNAL_FRAGMENTATION
-	size_t	internalFragmentation;
-	size_t	maxInternalFragmentation;
-
-	// F_BIGGEST_FREE_BLOCK
-	size_t	biggestFreeBlock;
-
-	// F_HISTOGRAM
-	size_t	freeBuddyHistogram[BITSPERWORD];
-	size_t	allocatedBuddyHistogram[BITSPERWORD];
-	size_t	numEntries;
-
-	// F_ALLOC_ID_STATS
-	PxAllocIdStats allocIdStats[PxAllocId::NUM_IDS];
-};
-
-
-class HeapManagerInterface
-{
-public:
-	// simple allocator interface over which the heap manager does its base allocation and allocates further pages
-	class Allocator
-	{
-	public:
-		virtual ~Allocator() {};
-
-		virtual		void*		alloc(const size_t size) = 0;
-		virtual		void		free(void* addr, const size_t size) = 0;
-	};
-
-	virtual ~HeapManagerInterface() {};
-
-	// INTERFACE METHODS
-	// init the HeapManager by passing it a block of memory and the smallest size of a memory block.
-	// returns true if init was successful
-	virtual		bool		init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag = size_t(-1)) = 0;
-
-	// Changes the page size. The size of allocations over the supplied Allocator are a multiple of the pageSize.
-	// returns true if the page size was valid. (!0, >minBlockSize, pow2)
-	virtual		bool		setPageSize(size_t pageSize) = 0;
-
-	// returns the address of an allocated block for the requested size.
-	// returns a NULL ptr if alloc failed.
-	virtual		void*		alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0;
-
-	// returns true if the block at the given address could be resized to size
-	// returns false if this failed. Manual free and alloc is still possible but needs a memcopy.
-	virtual		bool		realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED)) = 0;
-
-	// frees a given block.
-	// returns true if the operation was successful
-	virtual		bool		free(void* addr) = 0;
-
-	// deallocates all empty pages
-	virtual		void		freeEmptyPages() = 0;
-
-	// ensures that there there is free memory of at least the requested size
-	// returns true if the operation was successful. Free memory was already big enough or new pages were allocated successfully.
-	virtual		bool		reserve(size_t size) = 0;
-
-	// returns stats into a ApexHeapStats object, stats can be selected with HeapManagerStatsFlags.
-	// returns true if the operation was successful
-	virtual		bool		getStats(ApexHeapStats& stats, const uint32_t flags) = 0;
-
-	// discretisize memory into an array such that it can be visualized
-	// returns true if the operation was successful
-	virtual		bool		visualizeMemory(uint8_t* array, const size_t size) = 0;
-
-	// returns the base address of the page containing the memory block at addr. 
-	// returns NULL if addr doesn't correspond to a page
-	virtual		void*		findBaseAddress(void* addr) = 0;
-};
-
-} // end physx namespace
-
-#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERINTERFACE_H
diff --git a/PxShared/src/cudamanager/src/HeapManagerLinkedList.h b/PxShared/src/cudamanager/src/HeapManagerLinkedList.h
deleted file mode 100644
index 45a359d..0000000
--- a/PxShared/src/cudamanager/src/HeapManagerLinkedList.h
+++ /dev/null
@@ -1,204 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-//----------------------------------------------------------------------------//
-// HeapManagerLinkedList.h
-//----------------------------------------------------------------------------//
-
-#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H
-#define PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H
-
-#include "foundation/PxAssert.h"
-
-namespace physx
-{
-
-template <typename T>
-class LinkedList
-{
-public:
-
-	PX_INLINE LinkedList()
-		: mHead(NULL)
-		, mNumElements(0)
-	{
-	}
-
-	PX_INLINE void insert(T*& elt)
-	{
-		if (mHead)
-		{
-			elt->next = mHead->next;
-		}
-		mHead = elt;
-		mNumElements++;
-	}
-
-	PX_INLINE void insertSorted(T*& elt)
-	{
-		if (!mHead)
-		{
-			mHead = elt;
-			mHead->next = NULL;
-		}
-		else if (!mHead->next || (mHead->addr > elt->addr))
-		{
-			if (mHead->addr > elt->addr)
-			{
-				elt->next = mHead;
-				mHead = elt;
-			}
-			else
-			{
-				mHead->next = elt;
-				elt->next = NULL;
-			}
-		}
-		else
-		{
-			T* cur = mHead;
-			while (cur->next && (elt->addr > cur->next->addr))
-			{
-				cur = cur->next;
-			}
-			elt->next = cur->next;
-			cur->next = elt;
-		}
-		mNumElements++;
-	}
-
-	PX_INLINE T* pop()
-	{
-		if (mHead)
-		{
-			T* ret = mHead;
-			mHead = mHead->next;
-			mNumElements--;
-			return ret;
-		}
-		return NULL;
-	}
-
-	PX_INLINE bool remove(const T* elt)
-	{
-		PX_ASSERT(elt);
-		if (mHead && mHead == elt)
-		{
-			mHead = mHead->next;
-			mNumElements--;
-			return true;
-		}
-		else
-		{
-			T* cur = mHead;
-			while (cur && cur->next != elt)
-			{
-				PX_ASSERT(cur->addr < elt->addr); // assert for sorted list property.
-				cur = cur->next;
-			}
-			if (cur && elt)
-			{
-				cur->next = elt->next;
-				mNumElements--;
-				return true;
-			}
-		}
-		return false;
-	}
-
-	PX_INLINE T* find(const size_t addr)
-	{
-		T* cur = mHead;
-		while (cur && cur->addr < addr)
-		{
-			cur = cur->next;
-		}
-
-		return cur && (cur->addr == addr) ? cur : NULL;
-	}
-
-	PX_INLINE T* findAndPop(const size_t addr)
-	{
-		if (mHead == NULL)
-		{
-			return NULL;
-		}
-
-		if (mHead->addr == addr)
-		{
-			return pop();
-		}
-
-		T* cur = mHead;
-		T* last = mHead;
-		while (cur)
-		{
-			if (cur->addr == addr)
-			{
-				last->next = cur->next;
-				mNumElements--;
-				return cur;
-			}
-			else if (cur->addr > addr)
-			{
-				return NULL; // because list is sorted.
-			}
-			else
-			{
-				last = cur;
-				cur = cur->next;
-			}
-		}
-		return NULL;
-	}
-
-	PX_INLINE	size_t	getSize()
-	{
-		return mNumElements;
-	}
-	PX_INLINE	T*		getHead()
-	{
-		return mHead;
-	}
-
-	// hacky
-	PX_INLINE	void	setSize(size_t s)
-	{
-		mNumElements = s;
-	}
-	PX_INLINE	void	setHead(T* h)
-	{
-		mHead = h;
-	}
-private:
-	T*		mHead;
-	size_t	mNumElements;
-};
-
-}  // end physx namespace
-
-#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERLINKEDLIST_H
diff --git a/PxShared/src/cudamanager/src/HeapManagerRef.cpp b/PxShared/src/cudamanager/src/HeapManagerRef.cpp
deleted file mode 100644
index bf3847f..0000000
--- a/PxShared/src/cudamanager/src/HeapManagerRef.cpp
+++ /dev/null
@@ -1,1380 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "CudaMemoryManager.h"
-#include "HeapManagerRef.h"
-
-#include "PsSort.h"
-#include "PsArray.h"
-#include "PsAllocator.h"
-#include "PsString.h"
-
-#include "foundation/PxMath.h"
-#include "foundation/PxErrorCallback.h"
-#include "foundation/PxMemory.h"
-
-
-#if DUMP_HEAP_USAGE_TO_FILE
-#include "PsWindowsInclude.h"
-#endif
-
-using namespace physx::shdfnd;
-using namespace physx;
-
-
-#define CMM_DELETE_SINGLE(x)	{ if(x) delete x; x = NULL; }
-#define CMM_DELETE_ARRAY(x)		{ if(x) delete [] x; x = NULL; }
-
-HeapManagerRef::HeapManagerRef(physx::PxErrorCallback& errorCallback, bool enableMutex)
-	: mHeaps(PX_DEBUG_EXP("HeapManagerRef:mHeaps"))
-	, mBuddyPool("mBuddyPool", 1024)
-	, mPageSize(0)
-	, mMinBlockSize(0)
-	, mMaxIntFrag(size_t(-1))
-	, mNewEmptyPage(false)
-	, mMemAllocator(NULL)
-	, mGlobalAllocMem(0)
-	, mGlobalMaxAllocMem(0)
-	, mGlobalInternalFragmentation(0)
-	, mGlobalMaxInternalFragmentation(0)
-	, mErrorCallback(errorCallback)
-
-{
-	PX_UNUSED(enableMutex);  // SJB: heap alloc of shdfnd::Mutex not working for me
-}
-
-HeapManagerRef::~HeapManagerRef()
-{
-	for (uint32_t i = 0; i < mHeaps.size(); i++)
-	{
-		if (mMemAllocator && mHeaps[i].baseAddr)
-		{
-			mMemAllocator->free(reinterpret_cast<void*>(mHeaps[i].baseAddr), mHeaps[i].heap->getTotalMemorySize());
-		}
-		CMM_DELETE_SINGLE(mHeaps[i].heap);
-	}
-#if DUMP_HEAP_USAGE_TO_FILE
-	fclose(mLogFile);
-#endif
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// INTERFACE METHODS
-bool HeapManagerRef::init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-	// init Heap and do some basic checks.
-
-	// init only once
-	if (mHeaps.size())
-	{
-		return false;
-	}
-
-	if (baseSize && (minBlockSize > baseSize))
-	{
-		return false;
-	}
-
-	if (minBlockSize > pageSize)
-	{
-		return false;
-	}
-
-	if (baseSize && (baseSize % minBlockSize))
-	{
-		return false;
-	}
-
-	uint8_t minBlockSizeLog2;
-	if (minBlockSize != findNextPow2(minBlockSizeLog2, minBlockSize, 0, BITSPERWORD))
-	{
-		return false;
-	}
-
-	if (pageSize != findNextPow2(pageSize, minBlockSizeLog2, BITSPERWORD))
-	{
-		return false;
-	}
-
-	if (!memAllocator)
-	{
-		return false;
-	}
-
-	mMemAllocator = memAllocator;
-	mPageSize = pageSize;
-	mMinBlockSize = minBlockSize;
-	mMaxIntFrag = maxIntFrag;
-
-	memset(&mGlobalAllocIdStats, 0, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS);
-
-#if DUMP_HEAP_USAGE_TO_FILE
-	char fileName[1024];
-	sprintf_s(fileName, 1024, "HeapLog_%p.txt", this);
-	fopen_s(&mLogFile, fileName, "w");
-	fprintf(mLogFile, "HeapSize: %d, BlockSize: %d Addr: 0x0\n", baseSize, minBlockSize);
-	QueryPerformanceCounter((LARGE_INTEGER*)&m_qpc);
-	QueryPerformanceFrequency((LARGE_INTEGER*)&m_qpf);
-#endif
-
-	// init heap
-	if (baseSize)
-	{
-		return allocateNewHeap(baseSize, true) != NULL;
-	}
-	else
-	{
-		return true;
-	}
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::setPageSize(size_t pageSize)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	if (pageSize == 0)
-	{
-		return false;
-	}
-
-	if (mMinBlockSize > pageSize)
-	{
-		return false;
-	}
-
-	if (pageSize != findNextPow2(pageSize, 0, BITSPERWORD))
-	{
-		return false;
-	}
-
-	mPageSize = pageSize;
-	return true;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void* HeapManagerRef::alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-#if DUMP_HEAP_USAGE_TO_FILE
-	unsigned __int64 qpc;
-	QueryPerformanceCounter((LARGE_INTEGER*)&qpc);
-	float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf);
-	fprintf(mLogFile, "alloc t: %f s: %d", dtime, size);
-#endif
-
-	void* ret = NULL;
-	// try to allocate it in one of the heaps/pages
-	for (uint32_t i = 0; !ret && i < mHeaps.size(); i++)
-	{
-		ret = mHeaps[i].heap->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT());
-	}
-
-	// create a new page
-	if (!ret)
-	{
-		Heap* heap = allocateNewPages(size);
-		if (heap)
-		{
-			ret = heap->alloc(size, PX_ALLOC_INFO_PARAMS_INPUT());
-		}
-	}
-
-#if DUMP_HEAP_USAGE_TO_FILE
-	fprintf(mLogFile, " a: 0x%p\n", ret);
-#endif
-
-	return ret;
-}
-
-//(10/20/2009 feodorb) TODO: decide whether we move the binary search
-//somewhere away from here. Stands here for std::lower_bound replacement
-template<typename T>
-static uint32_t findUpperBound(const physx::shdfnd::Array<T>& refArray, const T& refValue)
-{
-	uint32_t start = 0, end = refArray.size();
-	while (end - start > 0)
-	{
-		uint32_t midPoint = start + ((end - start) >> 1);
-
-		if (!(refValue < refArray[midPoint]))
-		{
-			start = midPoint + 1;
-		}
-		else
-		{
-			end = midPoint;
-		}
-	}
-	return start;
-}
-
-Heap* HeapManagerRef::findHeap(void* addr) const
-{
-	HeapManagerPage searchPage;
-	searchPage.baseAddr = reinterpret_cast<size_t>(addr);
-
-	uint32_t upperBound = findUpperBound(mHeaps, searchPage);
-	PX_ASSERT(upperBound == 0 ||
-	          (searchPage.baseAddr >= mHeaps[upperBound - 1].baseAddr &&
-	           searchPage.baseAddr < mHeaps[upperBound - 1].baseAddr + mHeaps[upperBound - 1].heap->getTotalMemorySize())
-	         );
-
-	return (upperBound > 0) ? mHeaps[upperBound - 1].heap : 0;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-#if DUMP_HEAP_USAGE_TO_FILE
-	unsigned __int64 qpc;
-	QueryPerformanceCounter((LARGE_INTEGER*)&qpc);
-	float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf);
-	fprintf(mLogFile, "realloc t: %f s: %d, a: 0x%p\n", dtime, size, addr);
-#endif
-
-	Heap* heap = findHeap(addr);
-
-	if (heap != 0)
-	{
-		bool ret = heap->realloc(addr, size, PX_ALLOC_INFO_PARAMS_INPUT());
-		if (ret && size > 0 && mNewEmptyPage)
-		{
-			shrinkMemory();
-		}
-		return ret;
-	}
-	return false;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::free(void* addr)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-#if DUMP_HEAP_USAGE_TO_FILE
-	unsigned __int64 qpc;
-	QueryPerformanceCounter((LARGE_INTEGER*)&qpc);
-	float dtime = (float)((double)(qpc - m_qpc) / (double)m_qpf);
-	fprintf(mLogFile, "free t: %f a: 0x%p\n", dtime, addr);
-#endif
-
-	if (addr == NULL)
-	{
-		return false;
-	}
-
-	Heap* heap = findHeap(addr);
-	if (heap != 0)
-	{
-		bool ret = heap->free(addr);
-		if (ret && mNewEmptyPage)
-		{
-			shrinkMemory();
-		}
-		return ret;
-	}
-	return false;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-static void addStats(ApexHeapStats& dst, const ApexHeapStats& src, uint32_t flags)
-{
-	if (flags & HeapStatsFlags::F_BASIC_STATS)
-	{
-		dst.heapSize += src.heapSize;
-	}
-	if (flags & HeapStatsFlags::F_BIGGEST_FREE_BLOCK)
-	{
-		dst.biggestFreeBlock = PxMax(dst.biggestFreeBlock, src.biggestFreeBlock);
-	}
-	if (flags & HeapStatsFlags::F_HISTOGRAM)
-	{
-		dst.numEntries = PxMax(dst.numEntries, src.numEntries);
-		for (uint32_t i = 0; i < BITSPERWORD; i++)
-		{
-			dst.freeBuddyHistogram[i] += src.freeBuddyHistogram[i];
-			dst.allocatedBuddyHistogram[i] += src.allocatedBuddyHistogram[i];
-		}
-	}
-}
-
-PX_INLINE void HeapManagerRef::addToStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation)
-{
-	PxAllocIdStats& idStats = mGlobalAllocIdStats[id];
-	idStats.elements++;
-	idStats.size += size;
-	idStats.maxElements = PxMax(idStats.maxElements, idStats.elements);
-	idStats.maxSize = PxMax(idStats.maxSize, idStats.size);
-	mGlobalAllocMem += size;
-	mGlobalMaxAllocMem = PxMax(mGlobalMaxAllocMem, mGlobalAllocMem);
-	mGlobalInternalFragmentation += fragmentation;
-	mGlobalMaxInternalFragmentation = PxMax(mGlobalMaxInternalFragmentation, mGlobalInternalFragmentation);
-}
-
-
-PX_INLINE void HeapManagerRef::removeFromStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation)
-{
-	PxAllocIdStats& idStats = mGlobalAllocIdStats[id];
-	PX_ASSERT(idStats.elements);
-	PX_ASSERT(idStats.size >= size);
-
-	idStats.elements--;
-	idStats.size -= size;
-	mGlobalAllocMem -= size;
-	mGlobalInternalFragmentation -= fragmentation;
-}
-
-PX_INLINE void HeapManagerRef::incStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation)
-{
-	PxAllocIdStats& idStats = mGlobalAllocIdStats[id];
-	idStats.size += change;
-	idStats.maxSize = PxMax(idStats.maxSize, idStats.size);
-	mGlobalAllocMem += change;
-	mGlobalMaxAllocMem = PxMax(mGlobalMaxAllocMem, mGlobalAllocMem);
-	mGlobalInternalFragmentation += fragmentation;
-	mGlobalMaxInternalFragmentation = PxMax(mGlobalMaxInternalFragmentation, mGlobalInternalFragmentation);
-}
-
-PX_INLINE void HeapManagerRef::decStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation)
-{
-	PxAllocIdStats& idStats = mGlobalAllocIdStats[id];
-	PX_ASSERT(idStats.size >= change);
-	idStats.size -= change;
-	mGlobalAllocMem += change;
-	mGlobalInternalFragmentation += fragmentation;
-}
-
-bool HeapManagerRef::getStats(ApexHeapStats& stats, const uint32_t flags)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	ApexHeapStats tmpStats;
-	stats.reset();
-	for (uint32_t i = 0; i < mHeaps.size(); i++)
-	{
-		mHeaps[i].heap->getStats(tmpStats, flags);
-		addStats(stats, tmpStats, flags);
-	}
-	if (flags & HeapStatsFlags::F_BASIC_STATS)
-	{
-		stats.totalAllocated = mGlobalAllocMem;
-		stats.maxAllocated = mGlobalMaxAllocMem;
-
-	}
-	if (flags & HeapStatsFlags::F_INTERNAL_FRAGMENTATION)
-	{
-		stats.internalFragmentation = mGlobalInternalFragmentation;
-		stats.maxInternalFragmentation = mGlobalMaxInternalFragmentation;
-	}
-	if (flags & HeapStatsFlags::F_ALLOC_ID_STATS)
-	{
-		// stats per allocation ID
-		PxMemCopy(stats.allocIdStats, mGlobalAllocIdStats, sizeof(PxAllocIdStats)*PxAllocId::NUM_IDS);
-	}
-	return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::visualizeMemory(uint8_t* array, const size_t arraySize)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	ApexHeapStats tmpStats;
-	getStats(tmpStats, HeapStatsFlags::F_BASIC_STATS);
-	float scale = float(arraySize) / float(tmpStats.heapSize);
-	uint8_t* start = array;
-	for (uint32_t i = 0; i < mHeaps.size(); i++)
-	{
-		size_t heapSize = mHeaps[i].heap->getTotalMemorySize();
-		size_t numVis = size_t(float(heapSize) * scale);
-		PX_ASSERT(start + numVis <= array + arraySize);
-		mHeaps[i].heap->visualizeMemory(start, numVis);
-		start += numVis;
-	}
-	return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void* HeapManagerRef::findBaseAddress(void* addr)
-{
-	Heap* heap = findHeap(addr);
-	if (heap)
-	{
-		return heap->getBaseAddress();
-	}
-	return NULL;
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Heap* HeapManagerRef::allocateNewHeap(size_t heapSize, bool isPersistent)
-{
-	if (!mMemAllocator)
-	{
-		return NULL;
-	}
-
-	void* newPage = mMemAllocator->alloc(heapSize);
-	if (newPage)
-	{
-		HeapManagerPage page;
-		page.baseAddr = reinterpret_cast<size_t>(newPage);
-		page.heap = PX_NEW(Heap)(*this, mErrorCallback);
-		page.isPersistent = isPersistent;
-		if (page.heap && page.heap->init(page.baseAddr, heapSize, mMinBlockSize, mMaxIntFrag))
-		{
-			mHeaps.pushBack(page);
-			shdfnd::sort(mHeaps.begin(), (uint32_t) mHeaps.size());
-			return page.heap;
-		}
-		else
-		{
-			mMemAllocator->free(newPage, page.heap->getTotalMemorySize());
-			CMM_DELETE_SINGLE(page.heap);
-		}
-	}
-	return NULL;
-}
-
-Heap* HeapManagerRef::allocateNewPages(size_t requestedSize)
-{
-	uint8_t pageSizeLog2;
-	uint8_t minBlockSizeLog2;
-	findNextPow2(minBlockSizeLog2, mMinBlockSize, 0, BITSPERWORD);
-	findNextPow2(pageSizeLog2, mPageSize, minBlockSizeLog2, BITSPERWORD);
-	const size_t allocSize = findNextPow2(requestedSize, pageSizeLog2, BITSPERWORD);
-	return allocateNewHeap(allocSize);
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void HeapManagerRef::removeDeletedHeapsFromList(uint32_t numDeletes)
-{
-	// remove pages from list, keeping it sorted.
-	if (numDeletes)
-	{
-		const uint32_t numEntries = (uint32_t) mHeaps.size();
-
-		//seek
-		uint32_t w = 0;
-		while (w < (numEntries) && mHeaps[w].heap != NULL)
-		{
-			w++;
-		}
-
-		// remove holes
-		uint32_t r = w + 1;
-		while (r < numEntries)
-		{
-			if (mHeaps[r].heap == NULL)
-			{
-				r++;
-			}
-			else
-			{
-				mHeaps[w++] = mHeaps[r++];
-			}
-		}
-
-		mHeaps.resize(numEntries - numDeletes);
-	}
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void HeapManagerRef::resetHeap(HeapManagerPage& page)
-{
-	PX_ASSERT(page.heap->getAllocatedMemorySize() == 0);
-	Heap* newHeap = PX_NEW(Heap)(*this, mErrorCallback);
-	if (newHeap)
-	{
-		if (newHeap->init(page.baseAddr, page.heap->getTotalMemorySize(), mMinBlockSize, mMaxIntFrag))
-		{
-			CMM_DELETE_SINGLE(page.heap);
-			page.heap = newHeap;
-		}
-		else
-		{
-			CMM_DELETE_SINGLE(newHeap);
-		}
-	}
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void HeapManagerRef::freeEmptyPages()
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	// release heaps
-	uint32_t numDeletes = 0;
-	const uint32_t numEntries = (uint32_t) mHeaps.size();
-	for (uint32_t i = 0; i < numEntries; i++)
-	{
-		HeapManagerPage& page = mHeaps[i];
-		PX_ASSERT(page.heap);
-		if (page.isPersistent)
-		{
-			// for persistent pages: reset without release.
-			if (page.heap->getAllocatedMemorySize() == 0)
-			{
-				resetHeap(page);
-			}
-		}
-		else if (page.heap->getAllocatedMemorySize() == 0)
-		{
-			mMemAllocator->free(reinterpret_cast<void*>(page.baseAddr), page.heap->getTotalMemorySize());
-			CMM_DELETE_SINGLE(page.heap);
-			numDeletes++;
-		}
-	}
-
-	if (numDeletes)
-	{
-		removeDeletedHeapsFromList(numDeletes);
-	}
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void HeapManagerRef::shrinkMemory()
-{
-	mNewEmptyPage = false;
-
-	// collect stats
-	size_t totalAllocated = 0;
-	size_t totalPageSize = 0;
-	const uint32_t numEntries = (uint32_t) mHeaps.size();
-	for (uint32_t i = 0; i < numEntries; i++)
-	{
-		HeapManagerPage& page = mHeaps[i];
-		totalAllocated += page.heap->getAllocatedMemorySize();
-		totalPageSize += page.heap->getTotalMemorySize();
-		PX_ASSERT(totalAllocated <= totalPageSize);
-	}
-
-	// shrink memory if free non-persistent space is half or more of the allocated pages.
-	// releasing from the back of address sorted list, other strategies like LRU, best fit are also possible.
-	if (totalPageSize)
-	{
-		float allocScale = float(totalAllocated) / float(totalPageSize);
-		if (allocScale <= 0.5f)
-		{
-			size_t sizeToRelease = totalAllocated ? (totalPageSize - totalAllocated) >> 1 : totalPageSize;
-			uint32_t numDeletes = 0;
-			for (uint32_t i = 0; i < numEntries; i++)
-			{
-				HeapManagerPage& page = mHeaps[numEntries - i - 1];
-				PX_ASSERT(page.heap);
-				if (page.heap->getAllocatedMemorySize() == 0)
-				{
-					if (!page.isPersistent && page.heap->getTotalMemorySize() <= sizeToRelease)
-					{
-						mMemAllocator->free(reinterpret_cast<void*>(page.baseAddr), page.heap->getTotalMemorySize());
-						sizeToRelease -= page.heap->getTotalMemorySize();
-						CMM_DELETE_SINGLE(page.heap);
-						numDeletes++;
-					}
-					else
-					{
-						resetHeap(page);
-					}
-				}
-			}
-
-			if (numDeletes)
-			{
-				removeDeletedHeapsFromList(numDeletes);
-			}
-		}
-	}
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool HeapManagerRef::reserve(size_t size)
-{
-	shdfnd::Mutex::ScopedLock lock(mMutex);
-
-	size_t freeSize = 0;
-	for (uint32_t i = 0; i < mHeaps.size(); i++)
-	{
-		freeSize += mHeaps[i].heap->getTotalMemorySize() - mHeaps[i].heap->getAllocatedMemorySize();
-	}
-
-	if (freeSize < size)
-	{
-		return allocateNewPages(size - freeSize) != NULL;
-	}
-	else
-	{
-		return true;
-	}
-}
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-PX_INLINE size_t HeapManagerRef::findNextPow2(const size_t val, const uint8_t min, const uint8_t max)
-{
-	size_t ret = val;
-	for (uint8_t i = min; i <= max; i++)
-	{
-		ret = size_t(1) << i;
-		if (ret >= val)
-		{
-			break;
-		}
-	}
-	return ret;
-}
-
-
-PX_INLINE size_t HeapManagerRef::findNextPow2(uint8_t& pow, const size_t val, const uint8_t min, const uint8_t max)
-{
-	size_t ret = val;
-	for (pow = min; pow <= max; pow++)
-	{
-		ret = size_t(1) << pow;
-		if (ret >= val)
-		{
-			break;
-		}
-	}
-	return ret;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void BuddyList::clear()
-{
-	Buddy* cur = pop();
-	while (cur)
-	{
-		heap->getBuddyPool().destroy(cur);
-		cur = pop();
-	}
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Heap::~Heap()
-{
-	for (uint32_t i = 0; i <= mMaxLevel; i++)
-	{
-		mFreeBuddiesAtLevel[i].clear();
-	}
-	CMM_DELETE_ARRAY(mFreeBuddiesAtLevel);
-
-	for (Buddy* buddy = mAllocatedBuddies.getHead(); buddy != NULL; buddy = buddy->next)
-	{
-		void* address = reinterpret_cast<void*>((buddy->addr << mMinBlockLog2) + mBaseAddr);
-#if KEEP_DEBUG_INFO
-		char buffer[256];
-		physx::shdfnd::snprintf(buffer, 256, "Memory leak!\naddress %p file %s, line %d, name %s",  address, buddy->file, buddy->line, buddy->allocName);
-		mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, buffer, __FILE__, __LINE__);
-#else
-		char buffer[256];
-		physx::shdfnd::snprintf(buffer, 256, "Memory leak at address %p", address);
-		mErrorCallback.reportError(PxErrorCode::eDEBUG_WARNING, buffer, __FILE__, __LINE__);
-#endif
-	}
-
-	//clear it anyway
-	mAllocatedBuddies.clear();
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::init(size_t baseAddr, const size_t baseSize, const size_t minBlockSize, const size_t maxIntFrag)
-{
-	if (minBlockSize != HeapManagerRef::findNextPow2(mMinBlockLog2, minBlockSize, 0, BITSPERWORD))
-	{
-		return false;
-	}
-
-	if ((maxIntFrag != size_t(-1)) && (maxIntFrag != HeapManagerRef::findNextPow2(maxIntFrag, 0, BITSPERWORD)))
-	{
-		return false;
-	}
-
-	mMaxIntFrag = maxIntFrag;
-
-	mMinBlockSize = minBlockSize;
-	mTotalSize = baseSize;
-	mBaseAddr = baseAddr;
-
-	if (mBaseAddr == 0)
-	{
-		return false;
-	}
-
-	size_t numBlocks = baseSize >> mMinBlockLog2;
-	// allow only memory blocks which have a power of 2 in size. and numblocks must be at least 1.
-	if (numBlocks != HeapManagerRef::findNextPow2(mMaxLevel, numBlocks, 0, sizeof(size_t) * 4))
-	{
-		return false;
-	}
-
-	mFreeBuddiesAtLevel = PX_NEW(BuddyList)[(unsigned int)(mMaxLevel + 1)];
-	if (!mFreeBuddiesAtLevel)
-	{
-		return false;
-	}
-
-	// init size of buddy arrays
-	for (uint32_t i = 0; i <= mMaxLevel; i++)
-	{
-		mFreeBuddiesAtLevel[i].buddySize = size_t(1) << i;
-		mFreeBuddiesAtLevel[i].heap = this;
-	}
-	mAllocatedBuddies.heap = this;
-
-	Buddy* b = mManager.getBuddyPool().construct();
-	if (!b)
-	{
-		CMM_DELETE_ARRAY(mFreeBuddiesAtLevel);
-		return false;
-	}
-	b->level = mMaxLevel;
-
-	// add buddy to its array
-	mFreeBuddiesAtLevel[mMaxLevel].insert(b);
-
-	return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-void* Heap::alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	/*
-	compute needed buddysize -> level
-	if(mFreeBuddiesAtLevel[level].array.size() > 0)
-	{
-		ret =  array.popBack()
-		allocList.pushBack(ret)
-		return ret.adr+basePtr;
-	}
-	else
-	{
-		if(nonemptylist at higher levels)
-			ret = recursive split
-		else if(nonemptylist at lower levels )
-			ret = recursive merge
-		else
-			ret = NULL; // heap full or to fragmented
-	}
-	*/
-
-	PX_UNUSED(allocId);
-	PX_UNUSED(allocName);
-	PX_UNUSED(line);
-	PX_UNUSED(file);
-
-	if (size == 0 || size > mTotalSize)
-	{
-		return NULL;
-	}
-
-	PX_ASSERT(allocId < PxAllocId::NUM_IDS);
-	//PX_ASSERT(allocId != PxAllocId::UNASSIGNED);	// enable to track unassigned memory
-
-	// compute needed buddysize -> level
-	uint8_t level = 0;
-	HeapManagerRef::findNextPow2(level, size, mMinBlockLog2, BITSPERWORD);
-	level = uint8_t(level - mMinBlockLog2);
-
-	Buddy* ret = NULL;
-	if (mFreeBuddiesAtLevel[level].getSize() > 0)
-	{
-		ret = mFreeBuddiesAtLevel[level].pop();
-	}
-	else
-	{
-		// prefer splitting
-		if (level != mMaxLevel)
-		{
-			ret = findBySplitting(level);
-		}
-		// else try merging
-		if (!ret && level != 0)
-		{
-			ret = findByMerging(level);
-		}
-	}
-
-	if (ret)
-	{
-		ret->occupiedSize = size;
-		size_t addr = ret->addr;
-		ret->allocId = uint16_t(allocId);
-#if KEEP_DEBUG_INFO
-		ret->file = file;
-		ret->line = (uint32_t)line;
-		ret->allocName = allocName;
-#endif
-
-		size_t allocSize;
-		if (mMaxIntFrag != size_t(-1))
-		{
-			allocSize = reduceIntFragment(*ret, mMaxIntFrag); // ret can be changed in here, that's why we store the address
-		}
-		else
-		{
-			allocSize = size_t(1) << (level + mMinBlockLog2);
-			mAllocatedBuddies.insertSorted(ret);
-		}
-		mAllocMem += allocSize;
-		mInternalFragmentation += allocSize - size;
-		mMaxAllocMem = PxMax(mAllocMem, mMaxAllocMem);
-		mMaxInternalFragmentation = PxMax(mInternalFragmentation, mMaxInternalFragmentation);
-		mManager.addToStats(allocId, allocSize, allocSize - size);
-
-		PX_ASSERT(sanityTest());
-		return reinterpret_cast<void*>((addr << mMinBlockLog2) + mBaseAddr);
-	}
-	else
-	{
-		PX_ASSERT(sanityTest());
-		return 	NULL;
-	}
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DEF())
-{
-	PX_UNUSED(allocId);
-	PX_UNUSED(allocName);
-	PX_UNUSED(line);
-	PX_UNUSED(file);
-
-	if (addr == NULL || size > mTotalSize)
-	{
-		return false;
-	}
-
-	if (size == 0)
-	{
-		//realloc to 0 bytes can't keep the pointer as it was.
-		return false;
-		//return free(addr);
-	}
-
-	size_t inernal_addr = reinterpret_cast<size_t>(addr) - mBaseAddr;
-	inernal_addr >>= mMinBlockLog2;
-
-	// collect all buddies which are associated with this addr
-	shdfnd::Array<Buddy*, shdfnd::TempAllocator> budyList;
-	size_t totalAllocated = 0;
-	size_t buddyAllocated = 0;
-	Buddy* found = NULL;
-	do
-	{
-		found = mAllocatedBuddies.find(inernal_addr);
-		if (!found)
-		{
-			return false;
-		}
-		budyList.pushBack(found);
-		inernal_addr += size_t(1) << found->level;
-		totalAllocated += found->occupiedSize;
-		buddyAllocated += size_t(1) << (found->level + mMinBlockLog2);
-	}
-	while (found && !found->isLastBuddy);
-
-	Buddy* cur = budyList.popBack();
-
-	// increase size
-	if (totalAllocated < size)
-	{
-		size_t leftSpace = (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize;
-		size_t neededSpace = size - totalAllocated;
-		if (neededSpace <= leftSpace)
-		{
-			cur->occupiedSize += neededSpace;
-#if KEEP_DEBUG_INFO
-			cur->file = file;
-			cur->line = (uint32_t)line;
-			cur->allocName = allocName;
-#endif
-
-			mInternalFragmentation -= neededSpace;
-			mManager.decStats(PxAllocId::Enum(cur->allocId), 0, neededSpace);
-
-			// replace
-			mAllocatedBuddies.remove(cur);
-			mAllocatedBuddies.insertSorted(cur);
-			PX_ASSERT(sanityTest());
-			return true;
-		}
-		else
-		{
-			return false;
-#ifdef UNREACHABLE
-			// TODO:try merge free buddies until big enough,
-			// then add buddy and do internal fragmentation reduction.
-
-			// search for free blocks next to this one.
-			size_t addr = cur->addr + (size_t(1) << cur->level);
-			if (!mAllocatedBuddies.find(addr))
-			{
-				return false;
-			}
-
-			// if not found, return null, let user reallocate
-			PX_ASSERT(sanityTest());
-			return false;
-#endif
-		}
-	}
-	// reduce size
-	else
-	{
-		// succededly remove buddies until the requested size is reached.
-		// if internal fragmentation reduction is turned on, then an allocation can consist of multiple buddies.
-		mInternalFragmentation -= (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize;
-		mManager.decStats(PxAllocId::Enum(cur->allocId), 0, (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize);
-		size_t diff = totalAllocated - size;
-		while (diff >= cur->occupiedSize)
-		{
-			diff -= cur->occupiedSize;
-			cur->occupiedSize = 0;
-			bool succ = mAllocatedBuddies.remove(cur);
-			PX_UNUSED(succ);
-			PX_ASSERT(succ);
-			mFreeBuddiesAtLevel[cur->level].insertSorted(cur);
-			size_t allocSize = size_t(1) << (cur->level + mMinBlockLog2);
-			mAllocMem -= allocSize;
-			mManager.decStats(PxAllocId::Enum(cur->allocId), allocSize, 0);
-			cur = budyList.popBack();
-		}
-		cur->isLastBuddy = true;
-		cur->occupiedSize -= diff;
-
-#if KEEP_DEBUG_INFO
-		cur->file = file;
-		cur->line =(uint32_t)line;
-		cur->allocName = allocName;
-#endif
-
-		// replace
-		bool succ = mAllocatedBuddies.remove(cur);
-		PX_UNUSED(succ);
-		PX_ASSERT(succ);
-		mAllocatedBuddies.insertSorted(cur);
-		mInternalFragmentation += (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize;
-		mManager.incStats(PxAllocId::Enum(cur->allocId), 0, (size_t(1) << (cur->level + mMinBlockLog2)) - cur->occupiedSize);
-		PX_ASSERT(sanityTest());
-		return true;
-	}
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::free(void* addr)
-{
-	if (addr == NULL)
-	{
-		return false;
-	}
-
-	size_t internalAddr = reinterpret_cast<size_t>(addr) - mBaseAddr;
-	internalAddr >>= mMinBlockLog2;
-
-	bool ret = true;
-
-	bool dummy = true;
-	while (dummy)
-	{
-		Buddy* b = mAllocatedBuddies.findAndPop(internalAddr);
-
-		if (!b)
-		{
-			return false;
-		}
-
-		size_t allocSize = size_t(1) << (b->level + mMinBlockLog2);
-		mAllocMem -= allocSize;
-		mInternalFragmentation -= allocSize - b->occupiedSize;
-		mManager.removeFromStats(PxAllocId::Enum(b->allocId), allocSize, allocSize - b->occupiedSize);
-		b->occupiedSize = 0;
-
-		mFreeBuddiesAtLevel[b->level].insertSorted(b);
-
-		// check if this memory block occupied another buddy
-		if (b->isLastBuddy)
-		{
-			break;
-		}
-		else
-		{
-			internalAddr += size_t(1) << b->level;
-		}
-	}
-
-	if (mAllocMem == 0)
-	{
-		mManager.notifyEmptyPage();
-	}
-
-	PX_ASSERT(sanityTest());
-	return ret;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::getStats(ApexHeapStats& stats, const uint32_t flags)
-{
-	if (flags & HeapStatsFlags::F_BASIC_STATS)
-	{
-		stats.heapSize = mTotalSize;
-		stats.totalAllocated = mAllocMem;
-		stats.maxAllocated = mMaxAllocMem;
-	}
-	if (flags & HeapStatsFlags::F_INTERNAL_FRAGMENTATION)
-	{
-		// internal fragmentation
-		stats.internalFragmentation = mInternalFragmentation;
-		stats.maxInternalFragmentation = mMaxInternalFragmentation;
-	}
-	if (flags & HeapStatsFlags::F_BIGGEST_FREE_BLOCK)
-	{
-		// bigggest free block
-		stats.biggestFreeBlock = 0;
-		uint8_t curLevel = mMaxLevel;
-		do
-		{
-			if (mFreeBuddiesAtLevel[curLevel].getSize())
-			{
-				stats.biggestFreeBlock = mFreeBuddiesAtLevel[curLevel].buddySize << mMinBlockLog2;
-				break;
-			}
-			curLevel--;
-		}
-		while (curLevel != 0);
-	}
-	if (flags & HeapStatsFlags::F_HISTOGRAM)
-	{
-		// histograms
-		for (uint8_t i = 0; i <= mMaxLevel; i++)
-		{
-			stats.freeBuddyHistogram[i] = mFreeBuddiesAtLevel[i].getSize();
-			stats.allocatedBuddyHistogram[i] = 0;
-		}
-		Buddy* b = mAllocatedBuddies.getHead();
-		while (b)
-		{
-			stats.allocatedBuddyHistogram[b->level]++;
-			b = b->next;
-		}
-		stats.numEntries = size_t(mMaxLevel + 1);
-	}
-	return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::sanityTest()
-{
-	size_t sum = 0;
-
-	for (uint8_t i = 0; i <= mMaxLevel; i++)
-	{
-		size_t buddiesAtLevel = 0;
-		for (Buddy* cur = mFreeBuddiesAtLevel[i].getHead(); cur; cur = cur->next)
-		{
-			if ((size_t(1) << cur->level != mFreeBuddiesAtLevel[i].buddySize) ||
-			        (cur->occupiedSize > size_t(1) << (cur->level + mMinBlockLog2)))
-			{
-				return false;
-			}
-			sum += mFreeBuddiesAtLevel[i].buddySize << mMinBlockLog2;
-			buddiesAtLevel++;
-		}
-		if (mFreeBuddiesAtLevel[i].getSize() != buddiesAtLevel ||
-		        (buddiesAtLevel > (size_t(1) << (mMaxLevel - i))))
-		{
-			return false;
-		}
-	}
-
-	size_t numAllocated = 0;
-	for (Buddy* cur = mAllocatedBuddies.getHead(); cur; cur = cur->next)
-	{
-		sum += size_t(1) << (cur->level + mMinBlockLog2);
-		numAllocated++;
-	}
-
-	if (numAllocated != mAllocatedBuddies.getSize())
-	{
-		return false;
-	}
-
-	ptrdiff_t diff = ptrdiff_t(sum - (size_t(1) << (mMaxLevel + mMinBlockLog2)));
-	if (diff != 0)
-	{
-		return false;
-	}
-	else
-	{
-		return true;
-	}
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::visualizeMemory(uint8_t* array, const size_t size)
-{
-	float scale = (float)size / (size_t(1) << mMaxLevel);
-
-	for (size_t i = 0; i < size; i++)
-	{
-		array[i] = 0;
-	}
-	for (Buddy* cur = mAllocatedBuddies.getHead(); cur; cur = cur->next)
-	{
-		size_t start = (size_t)((float)(cur->addr) * scale);
-		size_t end = (size_t)((float)(cur->addr + (size_t(1) << size_t(cur->level))) * scale);
-		PX_ASSERT(start <= size);
-		PX_ASSERT(end <= size);
-		for (size_t i = start; i < end; i++)
-		{
-			PX_ASSERT(i < size);
-			array[i] = uint8_t(cur->level + 1);
-		}
-	}
-
-	return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::splitBuddy(Buddy* b)
-{
-	if (b->level == 0)
-	{
-		return false;
-	}
-
-	b->level--;
-	size_t newSize = size_t(1) << b->level;
-
-	Buddy* b0 = b;
-	Buddy* b1 = mManager.getBuddyPool().construct(*b);
-	PX_ASSERT(b0 && b1);
-
-	b1->addr = b1->addr + newSize;
-
-	mFreeBuddiesAtLevel[b0->level].insertSorted(b0);
-	mFreeBuddiesAtLevel[b1->level].insertSorted(b1);
-	return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-bool Heap::mergeBuddies(Buddy* b0, Buddy* b1)
-{
-	if (b0->level != b1->level || b0->level >= mMaxLevel || (b1->addr - b0->addr) != size_t(1) << size_t(b0->level))
-	{
-		return false;
-	}
-
-	Buddy* b = b0;
-	b->occupiedSize = 0;
-	b->isLastBuddy = true;
-	b->level++;
-	b->next = NULL;
-	mFreeBuddiesAtLevel[b->level].insertSorted(b);
-
-	mManager.getBuddyPool().destroy(b1);
-	return true;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Buddy* Heap::findBySplitting(uint8_t searchLevel)
-{
-	Buddy* ret = NULL;
-
-	uint8_t curLevel = searchLevel;
-
-	// walk through array of buddy lists and search for a free buddy which is at level >= searchLevel
-	for (; !mFreeBuddiesAtLevel[curLevel].getSize() && (curLevel < mMaxLevel); curLevel++)
-	{
-		;
-	}
-
-	// pop buddy at highest level and split until it has the correct level
-	ret = mFreeBuddiesAtLevel[curLevel].pop();
-	for (; ret && (curLevel != searchLevel) && curLevel > 0; curLevel--)
-	{
-		splitBuddy(ret);
-		ret = mFreeBuddiesAtLevel[curLevel - 1].pop();
-	}
-	return ret;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-Buddy* Heap::findByMerging(uint8_t searchLevel)
-{
-	/*
-		while there is no pair to merge on this level, ask lower level to merge
-		may ask lower level to merge more than 1 pair after each failure
-		or just merge all pairs of lower levels
-	*/
-	if (searchLevel == 0)
-	{
-		return NULL;
-	}
-
-	uint8_t curLevel = uint8_t(searchLevel - 1);
-	bool dummy = true;
-	while (dummy)
-	{
-		int32_t shift = (mMaxLevel - (1 << (curLevel + 1)));
-		shift = shift >= 0 ? shift : 0;
-		size_t numToFind = size_t(1) << shift;
-		size_t found = findPairAndMerge(mFreeBuddiesAtLevel[curLevel], numToFind);
-		if (found)
-		{
-			if (curLevel == searchLevel - 1)
-			{
-				break;
-			}
-			curLevel++;
-		}
-		else
-		{
-			if (curLevel > 0)
-			{
-				curLevel--;
-			}
-			else
-			{
-				return NULL;
-			}
-		}
-	}
-	return mFreeBuddiesAtLevel[searchLevel].pop();
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-size_t Heap::findPairAndMerge(BuddyList& list, size_t numToFind)
-{
-	size_t found = 0;
-	Buddy* cur = list.getHead();
-	Buddy* last = list.getHead();
-	size_t diff = list.buddySize;
-	while ((found != numToFind) && cur && cur->next)
-	{
-		// find buddy pair b0 and b1, b0 must be at an even address, and b0 and b1 must be neighbours in address space.
-		// since the list is sorted, we do only compare neighbours in the list.
-		if (((cur->addr & (size_t(1) << size_t(cur->level))) == 0) && (cur->next->addr - cur->addr == diff))
-		{
-			Buddy* b0 = cur;
-			Buddy* b1 = cur->next;
-
-			if (cur == list.getHead())
-			{
-				list.setHead(cur->next->next);
-				cur = list.getHead();
-				last = cur;
-			}
-			else
-			{
-				cur = cur->next->next;
-				last->next = cur;
-			}
-			list.setSize(list.getSize() - 2);
-			if (mergeBuddies(b0, b1))
-			{
-				found++;
-			}
-		}
-		else
-		{
-			last = cur;
-			cur = cur->next;
-		}
-	}
-	return found;
-}
-
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-size_t Heap::reduceIntFragment(Buddy& b, size_t threshold)
-{
-	/*
-	while internalFragmentation > threshold
-		cut buddy in the middle
-		if cut goes through occupied space
-			left buddy is full, can be pushed to alloc list clear last buddy flag
-		else
-			right buddy is free, push it to free list
-	*/
-	size_t allocSize = 0;
-	Buddy* curB = &b;
-	curB->isLastBuddy = false;
-	while (curB->level && ((size_t(1) << (curB->level + mMinBlockLog2)) - curB->occupiedSize) > threshold)
-	{
-		//split
-		Buddy* b0 = mManager.getBuddyPool().construct(*curB);
-		Buddy* b1 = curB;
-		b0->level--;
-		b1->level--;
-		b1->addr += size_t(1) << size_t(b1->level);
-		if ((size_t(1) << (b0->level + mMinBlockLog2)) < b0->occupiedSize)
-		{
-			b0->occupiedSize = size_t(1) << (b0->level + mMinBlockLog2);
-			b1->occupiedSize -= b0->occupiedSize;
-			mAllocatedBuddies.insertSorted(b0);
-			allocSize += size_t(1) << b1->level;
-			curB = b1;
-		}
-		else
-		{
-			b1->occupiedSize = 0;
-			mFreeBuddiesAtLevel[b1->level].insertSorted(b1);
-			curB = b0;
-		}
-	}
-	curB->isLastBuddy = true;
-	allocSize += size_t(1) << curB->level;
-	mAllocatedBuddies.insertSorted(curB);
-	return (allocSize << mMinBlockLog2);
-}
-
-
diff --git a/PxShared/src/cudamanager/src/HeapManagerRef.h b/PxShared/src/cudamanager/src/HeapManagerRef.h
deleted file mode 100644
index e6e585e..0000000
--- a/PxShared/src/cudamanager/src/HeapManagerRef.h
+++ /dev/null
@@ -1,297 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-//----------------------------------------------------------------------------//
-// HeapManagerRef.h
-//----------------------------------------------------------------------------//
-
-#ifndef PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H
-#define PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H
-
-#include "task/PxTaskDefine.h"
-
-#include "HeapManagerInterface.h"
-#include "HeapManagerLinkedList.h"
-#include "PsPool.h"
-#include "PsMutex.h"
-#include "PsArray.h"
-#include "PsUserAllocated.h"
-
-#define DUMP_HEAP_USAGE_TO_FILE 0
-
-#ifdef _DEBUG
-#define KEEP_DEBUG_INFO 1
-#else
-#define KEEP_DEBUG_INFO 0
-#endif
-
-#if DUMP_HEAP_USAGE_TO_FILE
-#include "stdio.h"
-#endif
-
-namespace physx
-{
-
-class Heap;
-struct Buddy
-{
-	PX_INLINE Buddy()
-		: addr(0)
-		, next(0)
-		, occupiedSize(0)
-		, allocId(PxAllocId::UNASSIGNED)
-		, isLastBuddy(true)
-		, level(0)
-#if KEEP_DEBUG_INFO
-		, file(NULL)
-		, allocName(NULL)
-		, line(0)
-#endif
-	{}
-
-	PX_INLINE Buddy(Buddy& b)
-		: addr(b.addr)
-		, next(b.next)
-		, occupiedSize(b.occupiedSize)
-		, allocId(b.allocId)
-		, isLastBuddy(b.isLastBuddy)
-		, level(b.level)
-#if KEEP_DEBUG_INFO
-		, file(b.file)
-		, allocName(b.allocName)
-		, line(b.line)
-#endif
-	{}
-
-	size_t		addr;
-	Buddy*		next;
-	size_t		occupiedSize;
-	uint16_t	allocId;
-	uint8_t 	isLastBuddy;
-	uint8_t		level;
-#if KEEP_DEBUG_INFO
-	const char* file;
-	const char* allocName;
-	uint32_t	line;
-#endif
-};
-
-struct BuddyList: public LinkedList<Buddy>, public shdfnd::UserAllocated
-{
-	BuddyList()
-		: buddySize(0)
-		, heap(NULL)
-	{}
-	void clear();
-
-	size_t		buddySize;  // = 2^level
-	Heap*		heap;
-};
-
-struct HeapManagerPage
-{
-	PX_INLINE bool operator < (const HeapManagerPage& p) const
-	{
-		return baseAddr < p.baseAddr;
-	}
-
-	PX_INLINE bool operator > (const HeapManagerPage& p) const
-	{
-		return baseAddr > p.baseAddr;
-	}
-
-	size_t	baseAddr;
-	Heap*	heap;
-	bool	isPersistent;
-};
-
-
-class HeapManagerRef: public HeapManagerInterface, public shdfnd::UserAllocated
-{
-	PX_NOCOPY(HeapManagerRef)
-public:
-	HeapManagerRef(physx::PxErrorCallback& errorCallback, bool enableMutex = true);
-	virtual ~HeapManagerRef();
-
-	// INTERFACE METHODS
-	virtual		bool		init(Allocator* memAllocator, const size_t baseSize, const size_t pageSize, const size_t minBlockSize, const size_t maxIntFrag);
-	virtual		bool		setPageSize(size_t pageSize);
-	virtual		void*		alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	virtual		bool		realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	virtual		bool		free(void* addr);
-	virtual		void		freeEmptyPages();
-	virtual		bool		reserve(size_t size);
-	virtual		bool		getStats(ApexHeapStats& stats, const uint32_t flags);
-	virtual		bool		visualizeMemory(uint8_t* array, const size_t size);
-	virtual		void*		findBaseAddress(void* addr);
-
-	// INTERNALS
-	// searches 2^pow >= val,  searches pow in [min, max]
-	static PX_INLINE size_t	findNextPow2(const size_t val, const uint8_t min = 0, const uint8_t max = BITSPERWORD);
-	static PX_INLINE size_t	findNextPow2(uint8_t& pow, const size_t val, const uint8_t min = 0, const uint8_t max = BITSPERWORD);
-
-	PX_INLINE	void		addToStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation);
-	PX_INLINE	void		removeFromStats(PxAllocId::Enum id, const size_t size, const size_t fragmentation);
-	PX_INLINE	void		incStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation);
-	PX_INLINE	void		decStats(PxAllocId::Enum id, const size_t change, const size_t fragmentation);
-
-	PX_INLINE	void					notifyEmptyPage()
-	{
-		mNewEmptyPage = true;
-	}
-	PX_INLINE	shdfnd::Pool<Buddy>&	getBuddyPool()
-	{
-		return mBuddyPool;
-	}
-
-private:
-	Heap*	allocateNewHeap(size_t heapSize, bool isPersistent = false);
-	Heap*	allocateNewPages(size_t requestedSize);
-	void	resetHeap(HeapManagerPage& page);
-	void	removeDeletedHeapsFromList(uint32_t numDeletes);
-	void	shrinkMemory();
-
-	Heap*	findHeap(void* addr) const;
-
-private:
-	// heaps
-	shdfnd::Array<HeapManagerPage> mHeaps;
-	shdfnd::Pool<Buddy>		    mBuddyPool;
-	size_t						mPageSize;
-	size_t						mMinBlockSize;
-	size_t						mMaxIntFrag;
-	bool						mNewEmptyPage;
-	// lock
-	shdfnd::Mutex				mMutex;
-	// page allocator
-	Allocator*					mMemAllocator;
-	// overall stats
-	size_t						mGlobalAllocMem;
-	size_t						mGlobalMaxAllocMem;
-	size_t						mGlobalInternalFragmentation;
-	size_t						mGlobalMaxInternalFragmentation;
-	// stats per allocation ID
-	PxAllocIdStats				mGlobalAllocIdStats[PxAllocId::NUM_IDS];
-	// error callback
-	physx::PxErrorCallback&	mErrorCallback;
-
-#if DUMP_HEAP_USAGE_TO_FILE
-	FILE*				mLogFile;
-	unsigned __int64    m_qpc;
-	unsigned __int64    m_qpf;
-#endif
-};
-
-
-class Heap : public shdfnd::UserAllocated
-{
-public:
-	PX_INLINE Heap(HeapManagerRef& manager, physx::PxErrorCallback& errorCallback)
-		: mManager(manager)
-		, mErrorCallback(errorCallback)
-		, mBaseAddr(0)
-		, mMinBlockSize(0)
-		, mFreeBuddiesAtLevel(NULL)
-		, mMaxIntFrag(0)
-		, mTotalSize(0)
-		, mMaxLevel(0)
-		, mMinBlockLog2(0)
-		, mAllocMem(0)
-		, mMaxAllocMem(0)
-		, mInternalFragmentation(0)
-		, mMaxInternalFragmentation(0)
-	{}
-
-	PX_INLINE ~Heap();
-
-	bool	init(size_t baseAddr, const size_t baseSize, const size_t minBlockSize, const size_t maxIntFrag);
-	void*	alloc(const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	bool	realloc(void* addr, const size_t size, PX_ALLOC_INFO_PARAMS_DECL(NULL, 0, NULL, UNASSIGNED));
-	bool	free(void* addr);
-	bool	getStats(ApexHeapStats& stats, const uint32_t flags);
-	bool	visualizeMemory(uint8_t* array, const size_t size);
-
-	PX_INLINE	size_t					getTotalMemorySize()
-	{
-		return mTotalSize;
-	}
-	PX_INLINE	size_t					getAllocatedMemorySize()
-	{
-		return mAllocMem;
-	}
-	PX_INLINE	shdfnd::Pool<Buddy>&	getBuddyPool()
-	{
-		return mManager.getBuddyPool();
-	}
-	PX_INLINE	void*					getBaseAddress() 
-	{ 
-		return (void*)mBaseAddr; 
-	}
-
-private:
-	// split buddy b with size 2^level into two buddies with level 2^(level-1) and append those to the free list. deletes b, assumes that b was removed from the list before.
-	bool	splitBuddy(Buddy* b);
-	// merge 2 buddies to next bigger one. deletes b0 and b1, assumes that they are already removed from their array.
-	bool	mergeBuddies(Buddy* b0, Buddy* b1);
-
-	// split of right, free children of a buddy if the internal fragmentation of a buddy is bigger than a threshold
-	// returns the size of all allocated buddies
-	size_t	reduceIntFragment(Buddy& b, size_t threshold);
-
-	// find a Buddy by splitting a Buddy at searchLevel
-	Buddy*	findBySplitting(uint8_t searchLevel);
-	Buddy*	findByMerging(uint8_t searchLevel);
-	size_t	findPairAndMerge(BuddyList& list, size_t numToFind);
-
-	bool	sanityTest();
-
-	void operator=(const Heap&)
-	{
-		PX_ASSERT(0);
-	}
-
-private:
-	HeapManagerRef&			mManager;
-	physx::PxErrorCallback& mErrorCallback;
-	size_t					mBaseAddr;
-	size_t					mMinBlockSize;
-	BuddyList*				mFreeBuddiesAtLevel;
-	BuddyList				mAllocatedBuddies;
-	size_t					mMaxIntFrag;
-	size_t					mTotalSize;
-	uint8_t					mMaxLevel;		// 2^maxLevel <= memorySize
-	uint8_t					mMinBlockLog2;
-
-	size_t					mAllocMem;		// fragmented
-	size_t					mMaxAllocMem;
-	size_t					mInternalFragmentation;
-	size_t					mMaxInternalFragmentation;
-};
-
-} // end physx namespace
-
-#endif // PXCUDACONTEXTMANAGER_HEAPMANAGERREF_H
diff --git a/PxShared/src/cudamanager/src/PhysXDevice.h b/PxShared/src/cudamanager/src/PhysXDevice.h
deleted file mode 100644
index b066bdc..0000000
--- a/PxShared/src/cudamanager/src/PhysXDevice.h
+++ /dev/null
@@ -1,119 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef ___PHYS_X_DEVICE_
-#define ___PHYS_X_DEVICE_
-
-#include "foundation/PxPreprocessor.h"
-
-#if PX_WINDOWS
-#	pragma warning (push)
-#	pragma warning (disable : 4668) //'symbol' is not defined as a preprocessor macro, replacing with '0' for 'directives'
-#	include "windows/PsWindowsInclude.h"
-#	pragma warning (pop)
-#endif
-
-#if PX_WINDOWS
-	#define PHYSX_DEV_DLL_API extern "C" __declspec(dllexport)
-	#ifdef _DEBUG
-	#   define PHYSX_DEV_DLL_PRIVATE_API extern "C" __declspec(dllexport)
-	#else
-	#   define PHYSX_DEV_DLL_PRIVATE_API
-	#endif
-#else
-	#define PHYSX_DEV_DLL_API
-	#define PHYSX_DEV_DLL_PRIVATE_API
-#endif
-
-/** typedefs */
-typedef int PHYSX_DEV_STATUS;
-typedef unsigned int PhysXDevHandle;
-
-/** PHYSX_DEV_STATUS values */
-enum
-{
-	PHYSX_DEV_OK = 0,
-	PHYSX_DEV_UNKNOWN_ERROR,
-	PHYSX_DEV_INVALID_HANDLE,
-	PHYSX_DEV_UNINITIALIZED,
-	PHYSX_DEV_NV_API_UNAVAILABLE,
-	PHYSX_DEV_CUDA_UNAVAILABLE,
-	PHYSX_DEV_CUDA_MEMORY_ALLOC_FAILURE,
-	PHYSX_DEV_LEGACY_MODE_GPU_HANDLE,
-	PHYSX_DEV_PHYSX_DEV_UNAVAILABLE,
-};
-
-
-/**
- *  physxDevInit
- *  Initialize the PhysX Device information functions.
- *  Must be called before using any other API functions.
- */
-PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevInit();
-
-/**
- *  physxDevClose
- *  Call this when finished with the PhysX Device API, it
- *  frees memory that is allocated in physxDevInit
- */
-PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevClose();
-
-/**
- *  physxDevGetCudaOrdinal
- *  Returns the CUDA device ordinal for the given PhysX GPU device
- */
-PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevGetCudaOrdinal(int* cudaDevOrdinal, PhysXDevHandle devHandle);
-PHYSX_DEV_STATUS physxDevGetCudaOrdinalWrapper(int* cudaDevOrdinal);
-
-/**
- *  physxDevGet
- *  Returns the PhysX GPU device that the PhysX Engine
- *  will use.  If the device is -1, the engine will
- *  automatically choose which GPU to use.
- *
- *  This function handles the R177/R180 detection first, then decides accordingly
- *
- *  if(180+)
- *      if(GPU Enabled) ? get NVAPI sel : -1
- *  else (177)
- *      if regkey ? regkey value : -1 (PHYSX_DEV_LEGACY_MODE_GPU_HANDLE returned)
- */
-PHYSX_DEV_DLL_API PHYSX_DEV_STATUS physxDevGet(PhysXDevHandle* devHandle);
-
-/**
- *  physxDevUsingDedicatedGPU
- *  Returns whether or not PhysX has a dedicated GPU (set by the user in the NV CPL)
- */
-PHYSX_DEV_DLL_API bool physxDevUsingDedicatedGPU();
-
-/**
- *  physxDevSLIEnabled
- *  Returns whether or not the device pointer specified (D3D device) is in an SLI group
- */
-PHYSX_DEV_DLL_API bool physxDevSLIEnabled(void* graphicsDevice);
-
-#endif
diff --git a/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp b/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp
deleted file mode 100644
index 77896c2..0000000
--- a/PxShared/src/cudamanager/src/PhysXDeviceSettings.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#include "task/PxTaskDefine.h"
-
-#if PX_SUPPORT_GPU_PHYSX
-
-#include "foundation/PxErrorCallback.h"
-
-#include "PhysXDeviceSettings.h"
-#include "PhysXDevice.h"
-
-#if PX_VC
-#pragma warning(disable: 4191)	//'operator/operation' : unsafe conversion from 'type of expression' to 'type required'
-#endif
-
-namespace
-{
-#if PX_WIN32 || PX_WIN64
-	/** funcs for the dynamic loading of the PhysXDevice.dll file */
-	typedef PHYSX_DEV_STATUS __cdecl physxDevInit_t();
-	typedef PHYSX_DEV_STATUS __cdecl physxDevClose_t();
-	typedef PHYSX_DEV_STATUS __cdecl physxDevGet_t(PhysXDevHandle* devHandle);
-	typedef PHYSX_DEV_STATUS __cdecl physxDevGetCudaOrdinal_t(int* cudaDevOrdinal, PhysXDevHandle devHandle);
-	typedef bool __cdecl physxDevUsingDedicatedGPU_t();
-	typedef bool __cdecl physxDevSLIEnabled_t(void* graphicsDevice);
-
-	/** globals for cuda functions */
-	static physxDevInit_t* physxDevInit_f;
-	static physxDevClose_t* physxDevClose_f;
-	static physxDevGetCudaOrdinal_t* physxDevGetCudaOrdinal_f;
-	static physxDevGet_t* physxDevGet_f;
-	static physxDevUsingDedicatedGPU_t* physxDevUsingDedicatedGPU_f;
-	static physxDevSLIEnabled_t* physxDevSLIEnabled_f;
-
-	/** globals */
-	static HMODULE gPhysXDevModuleH;
-
-	PHYSX_DEV_STATUS initPhysXDeviceLib()
-	{
-		PHYSX_DEV_STATUS status;
-#if PX_X86
-		gPhysXDevModuleH = LoadLibrary("PhysXDevice.dll");
-#else
-		gPhysXDevModuleH = LoadLibrary("PhysXDevice64.dll");
-#endif
-		if (!gPhysXDevModuleH)
-		{
-			return PHYSX_DEV_PHYSX_DEV_UNAVAILABLE;
-		}
-
-		physxDevInit_f = (physxDevInit_t*)GetProcAddress(gPhysXDevModuleH, "physxDevInit");
-		physxDevClose_f = (physxDevClose_t*)GetProcAddress(gPhysXDevModuleH, "physxDevClose");
-		physxDevGetCudaOrdinal_f = (physxDevGetCudaOrdinal_t*)GetProcAddress(gPhysXDevModuleH, "physxDevGetCudaOrdinal");
-		physxDevGet_f = (physxDevGet_t*)GetProcAddress(gPhysXDevModuleH, "physxDevGet");
-		physxDevUsingDedicatedGPU_f = (physxDevUsingDedicatedGPU_t*)GetProcAddress(gPhysXDevModuleH, "physxDevUsingDedicatedGPU");
-		physxDevSLIEnabled_f = (physxDevSLIEnabled_t*)GetProcAddress(gPhysXDevModuleH, "physxDevSLIEnabled");
-
-		if (!physxDevInit_f ||
-			!physxDevClose_f ||
-			!physxDevGetCudaOrdinal_f ||
-			!physxDevGet_f)
-		{
-			FreeLibrary(gPhysXDevModuleH);
-			return PHYSX_DEV_CUDA_UNAVAILABLE;
-		}
-
-		status = physxDevInit_f();
-		if (PHYSX_DEV_OK != status)
-		{
-			FreeLibrary(gPhysXDevModuleH);
-			return status;
-		}
-
-		return PHYSX_DEV_OK;
-	}
-#endif // PX_WIN32 || PX_WIN64
-
-	PHYSX_DEV_STATUS getCudaOrdinal(int* cudaDevOrdinal)
-	{
-#if PX_WIN32 || PX_WIN64
-		PHYSX_DEV_STATUS status;
-		PhysXDevHandle selectedDev;
-
-		status = initPhysXDeviceLib();
-
-		if (PHYSX_DEV_OK != status)
-		{
-			return status;
-		}
-
-		status = physxDevGet_f(&selectedDev);
-		physxDevGetCudaOrdinal_f(cudaDevOrdinal, selectedDev);
-
-		physxDevClose_f();
-		FreeLibrary(gPhysXDevModuleH);
-
-		if (status == PHYSX_DEV_LEGACY_MODE_GPU_HANDLE) // R177 installed
-		{
-			return PHYSX_DEV_LEGACY_MODE_GPU_HANDLE;
-		}
-		else
-		{
-			return PHYSX_DEV_OK;
-		}
-#elif PX_LINUX
-		const char* deviceOrdinalString = ::getenv("PHYSX_GPU_DEVICE");
-		if (!deviceOrdinalString)
-			*cudaDevOrdinal = 0;
-		else
-			*cudaDevOrdinal = atoi(deviceOrdinalString);
-		return PHYSX_DEV_OK;
-#endif
-	}
-
-}
-
-namespace physx
-{
-
-	int PhysXDeviceSettings::getSuggestedCudaDeviceOrdinal(physx::PxErrorCallback& errc)
-	{
-		int cudaDevOrdinal = -1;
-		switch (getCudaOrdinal(&cudaDevOrdinal))
-		{
-		case PHYSX_DEV_OK:
-			break;
-
-		case PHYSX_DEV_UNKNOWN_ERROR:
-			errc.reportError(PxErrorCode::eDEBUG_WARNING, "unknown error during CUDA device detection\n", __FILE__, __LINE__);
-			break;
-
-		case PHYSX_DEV_NV_API_UNAVAILABLE:
-			errc.reportError(PxErrorCode::eDEBUG_WARNING, "NVAPI is not available\n", __FILE__, __LINE__);
-			break;
-
-		case PHYSX_DEV_CUDA_UNAVAILABLE:
-			errc.reportError(PxErrorCode::eDEBUG_WARNING, "CUDA is not available\n", __FILE__, __LINE__);
-			break;
-
-		case PHYSX_DEV_PHYSX_DEV_UNAVAILABLE:
-#if PX_X86
-			errc.reportError(PxErrorCode::eDEBUG_WARNING, "PhysXDevice.dll is not available\n", __FILE__, __LINE__);
-#else
-			errc.reportError(PxErrorCode::eDEBUG_WARNING, "PhysXDevice64.dll is not available\n", __FILE__, __LINE__);
-#endif
-			break;
-
-		default:
-			errc.reportError(PxErrorCode::eDEBUG_WARNING, "unknown error during CUDA device detection\n", __FILE__, __LINE__);
-			break;
-		}
-
-		return cudaDevOrdinal;
-	}
-
-	int PhysXDeviceSettings::isUsingDedicatedGPU()
-	{
-#if PX_WIN32 || PX_WIN64
-		PHYSX_DEV_STATUS status;
-		bool dedicated = false;
-
-		status = initPhysXDeviceLib();
-
-		if (PHYSX_DEV_OK != status)
-		{
-			return 0;
-		}
-
-		if (physxDevUsingDedicatedGPU_f)
-		{
-			dedicated = physxDevUsingDedicatedGPU_f();
-			physxDevClose_f();
-			FreeLibrary(gPhysXDevModuleH);
-			return(dedicated);
-		}
-		else
-		{
-			physxDevClose_f();
-			FreeLibrary(gPhysXDevModuleH);
-			return(-1);
-		}
-#elif PX_LINUX
-		// need some way to set this
-		return 0;
-#endif
-	}
-
-	bool PhysXDeviceSettings::isSLIEnabled(void* graphicsDevice)
-	{
-#if PX_WIN32 || PX_WIN64
-		PHYSX_DEV_STATUS status;
-		status = initPhysXDeviceLib();
-
-		if (PHYSX_DEV_OK != status)
-		{
-			return false;
-		}
-
-		if (physxDevSLIEnabled_f)
-		{
-			bool enabled = physxDevSLIEnabled_f(graphicsDevice);
-			physxDevClose_f();
-			FreeLibrary(gPhysXDevModuleH);
-			return enabled;
-		}
-		else
-		{
-			physxDevClose_f();
-			FreeLibrary(gPhysXDevModuleH);
-			return false;
-		}
-#elif PX_LINUX
-		// Unimplemented for Linux because we don't need it, not because it's really always false.
-		PX_UNUSED(graphicsDevice);
-		return false;
-#endif
-	}
-
-} // end physx namespace
-
-#endif // PX_SUPPORT_GPU_PHYSX
-
-