Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PxShared/src/cudamanager
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
4 files changed, 772 insertions, 0 deletions
diff --git a/PxShared/src/cudamanager/include/CudaContextManager.h b/PxShared/src/cudamanager/include/CudaContextManager.h
new file mode 100644
index 00000000..3aff9581
--- /dev/null
+++ b/PxShared/src/cudamanager/include/CudaContextManager.h
@@ -0,0 +1,51 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+
+#ifndef PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
+#define PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
+
+#include "task/PxTaskDefine.h"
+
+#if PX_SUPPORT_GPU_PHYSX
+
+namespace physx
+{
+
+class PxCudaContextManager;
+class PxCudaContextManagerDesc;
+class PxErrorCallback;
+
+/**
+Creates cuda context manager for PhysX and APEX.
+*/
+PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback);
+
+}
+
+#endif
+
+#endif // PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H
diff --git a/PxShared/src/cudamanager/include/CudaKernelWrangler.h b/PxShared/src/cudamanager/include/CudaKernelWrangler.h
new file mode 100644
index 00000000..65a41ab4
--- /dev/null
+++ b/PxShared/src/cudamanager/include/CudaKernelWrangler.h
@@ -0,0 +1,331 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+
+#ifndef __CUDA_KERNEL_WRANGLER__
+#define __CUDA_KERNEL_WRANGLER__
+
+// Make this header is safe for inclusion in headers that are shared with device code.
+#if !defined(__CUDACC__)
+
+#include "task/PxTaskDefine.h"
+#include "task/PxGpuDispatcher.h"
+
+#include "PsUserAllocated.h"
+#include "PsArray.h"
+
+#include <cuda.h>
+
+namespace physx
+{
+
+class KernelWrangler : public shdfnd::UserAllocated
+{
+	PX_NOCOPY(KernelWrangler)
+public:
+	KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs);
+	~KernelWrangler();
+
+	CUfunction getCuFunction(uint16_t funcIndex) const
+	{
+		return mCuFunctions[ funcIndex ];
+	}
+
+	CUmodule getCuModule(uint16_t funcIndex) const
+	{
+		uint16_t modIndex = mCuFuncModIndex[ funcIndex ];
+		return mCuModules[ modIndex ];
+	}
+
+	static void const* const* getImages();
+	static int getNumImages();
+
+	bool hadError() const { return mError; }
+
+protected:
+	bool						mError;
+	shdfnd::Array<CUfunction>	mCuFunctions;
+	shdfnd::Array<uint16_t>		mCuFuncModIndex;
+	shdfnd::Array<CUmodule>	    mCuModules;
+	PxGpuDispatcher&			mGpuDispatcher;
+	PxErrorCallback&			mErrorCallback;
+};
+
+/* SJB - These were "borrowed" from an Ignacio Llamas email to devtech-compute.
+ * If we feel this is too clumsy, we can steal the boost based bits from APEX
+ */
+
+class ExplicitCudaFlush
+{
+public:
+	ExplicitCudaFlush(int cudaFlushCount) : mCudaFlushCount(cudaFlushCount), mDefaultCudaFlushCount(mCudaFlushCount) {}
+	~ExplicitCudaFlush() {}
+
+	void setCudaFlushCount(int value) { mCudaFlushCount = mDefaultCudaFlushCount = value; }
+	unsigned int getCudaFlushCount() const	{ return (unsigned int)mCudaFlushCount; }
+	void resetCudaFlushCount() { mCudaFlushCount = mDefaultCudaFlushCount; }
+
+	void decrementFlushCount()
+	{
+		if (mCudaFlushCount == 0) return;
+
+		if (--mCudaFlushCount == 0)
+		{
+			CUresult ret = cuStreamQuery(0); // flushes current push buffer
+			PX_UNUSED(ret);
+			PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY);
+
+			// For current implementation, disable resetting of cuda flush count
+			// reset cuda flush count
+			// mCudaFlushCount = mDefaultCudaFlushCount;
+		}
+	}
+
+private:
+	int mCudaFlushCount;
+	int mDefaultCudaFlushCount;
+};
+
+}
+
+template <typename T0>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0)
+{
+	void* kernelParams[] =
+	{
+		&v0,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9, typename T10>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9, typename T10, typename T11>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9, typename T10, typename T11, typename T12>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+								  T13 v13)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+								  T13 v13, T14 v14)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+								  T13 v13, T14 v14, T15 v15)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15,
+          typename T16>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+								  T13 v13, T14 v14, T15 v15, T16 v16)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
+          typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15,
+          typename T16, typename T17>
+PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
+								  T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+								  T13 v13, T14 v14, T15 v15, T16 v16, T17 v17)
+{
+	void* kernelParams[] =
+	{
+		&v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, &v17,
+	};
+	return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
+}
+
+#endif
+
+#endif
diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h
new file mode 100644
index 00000000..30e3fcfa
--- /dev/null
+++ b/PxShared/src/cudamanager/include/GpuDispatcher.h
@@ -0,0 +1,334 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+
+#ifndef PXTASK_GPUDISPATCHER_H
+#define PXTASK_GPUDISPATCHER_H
+
+#include "task/PxTask.h"
+#include "task/PxTaskDefine.h"
+#include "task/PxGpuTask.h"
+#include "task/PxTaskManager.h"
+#include "task/PxGpuDispatcher.h"
+#include "foundation/PxProfiler.h"
+
+#include "PsUserAllocated.h"
+#include "PsThread.h"
+#include "PsAtomic.h"
+#include "PsMutex.h"
+#include "PsSync.h"
+#include "PsArray.h"
+
+#include <cuda.h>
+
+namespace physx { 
+
+typedef uint16_t EventID;
+
+void releaseGpuDispatcher(PxGpuDispatcher&);
+
+class KernelWrangler;
+class BlockingWaitThread;
+class FanoutTask;
+class LaunchTask;
+class BlockTask;
+class PxGpuWorkerThread;
+
+class GpuDispatcherImpl : public PxGpuDispatcher, public shdfnd::UserAllocated
+{
+public:
+	GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx);
+	virtual ~GpuDispatcherImpl();
+
+	void	start();
+	void    startSimulation();
+	void	startGroup();
+	void    submitTask(PxTask& task);
+	void    finishGroup();
+	void    addCompletionPrereq(PxBaseTask& task);
+	bool	failureDetected() const;
+	void    forceFailureMode();
+	void    stopSimulation();
+	void    launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream);
+	
+	PxBaseTask&	getPreLaunchTask();
+	void		addPreLaunchDependent(PxBaseTask& dependent);
+
+	PxBaseTask&	getPostLaunchTask();
+	void		addPostLaunchDependent(PxBaseTask& dependent);
+
+	PxCudaContextManager* getCudaContextManager();
+
+	PxGpuWorkerThread* mDispatcher;
+	BlockingWaitThread* mBlockingThread;
+	LaunchTask* mLaunchTask; // predecessor of tasks launching kernels
+	BlockTask* mBlockTask; // continuation of tasks launching kernels
+	FanoutTask* mSyncTask; // predecessor of tasks waiting for cuda context synchronize
+};
+
+class JobQueue
+{
+	PX_NOCOPY(JobQueue)
+public:
+	JobQueue() : taskarray(PX_DEBUG_EXP("PxTask*")) {}
+	void push(PxTask* t)
+	{
+		access.lock();
+		taskarray.pushBack(t);
+		access.unlock();
+	}
+	PxTask* popBack()
+	{
+		access.lock();
+		PxTask* t = NULL;
+		if (taskarray.size())
+		{
+			t = taskarray.popBack();
+		}
+		access.unlock();
+		return t;
+	}
+	uint32_t size()
+	{
+		return taskarray.size();
+	}
+	bool empty()
+	{
+		return taskarray.size() == 0;
+	}
+
+private:
+	shdfnd::Array<PxTask*> taskarray;
+	shdfnd::Mutex		  access;
+};
+
+class EventPool
+{
+	PX_NOCOPY(EventPool)
+public:
+	EventPool(uint32_t inflags) : flags(inflags), evarray(PX_DEBUG_EXP("CUevent")) {}
+	void add(CUevent ev)
+	{
+		access.lock();
+		evarray.pushBack(ev);
+		access.unlock();
+	}
+	CUevent get()
+	{
+		access.lock();
+		CUevent ev;
+		if (evarray.size())
+		{
+			ev = evarray.popBack();
+		}
+		else
+		{
+			cuEventCreate(&ev, flags);
+		}
+		access.unlock();
+		return ev;
+	}
+	bool empty() const
+	{
+		return evarray.size() == 0;
+	}
+	void clear()
+	{
+		access.lock();
+		for (uint32_t i = 0; i < evarray.size(); i++)
+		{
+			cuEventDestroy(evarray[i]);
+		}
+		access.unlock();
+	}
+
+private:
+	uint32_t flags;
+	shdfnd::Array<CUevent> evarray;
+	shdfnd::Mutex access;
+};
+
+class StreamCache
+{
+public:
+	StreamCache() : sarray(PX_DEBUG_EXP("CUstream")), freeIndices(PX_DEBUG_EXP("freeIndices"))
+	{
+	}
+	CUstream get(uint32_t s)
+	{
+		PX_ASSERT(s);
+		return sarray[ s - 1 ];
+	}
+	void push(uint32_t s)
+	{
+		freeIndices.pushBack(s);
+	}
+	uint32_t popBack()
+	{
+		if (freeIndices.size())
+		{
+			return freeIndices.popBack();
+		}
+		else
+		{
+			CUstream s;
+			cuStreamCreate(&s, 0);
+			sarray.pushBack(s);
+			return sarray.size();
+		}
+	}
+	void reset()
+	{
+		freeIndices.resize(sarray.size());
+		for (uint32_t i = 0 ; i < sarray.size() ; i++)
+		{
+			freeIndices[i] = i + 1;
+		}
+	}
+	bool empty()
+	{
+		return freeIndices.size() == 0;
+	}
+
+private:
+	shdfnd::Array<CUstream>	 sarray;
+	shdfnd::Array<uint32_t>	 freeIndices;
+};
+
+class KernelBar
+{
+public:
+	KernelBar()
+	{
+		reset();
+	}
+	void reset()
+	{
+		start = 0xffffffff;
+		stop = 0;
+	}
+
+	uint32_t start;
+	uint32_t stop;
+};
+
+const int SIZE_COMPLETION_RING = 1024;
+
+struct CudaBatch
+{
+	CUevent		blockingEvent;
+	CUstream    blockingStream; // sync on stream instead of event if lsb is zero (faster)
+	PxBaseTask*   continuationTask;
+};
+
+struct ReadyTask
+{
+	PxGpuTask* 	task;
+	uint32_t       iteration;
+};
+
+class PxGpuWorkerThread : public shdfnd::Thread
+{
+	PX_NOCOPY(PxGpuWorkerThread)
+public:
+	PxGpuWorkerThread();
+	~PxGpuWorkerThread();
+
+	void					setCudaContext(PxCudaContextManager& ctx);
+	void					emitStartEvent(const char *id);
+	void					emitStopEvent(const char *id);
+
+	/* API to TaskManager */
+	void					startSimulation();
+	void					stopSimulation();
+
+	/* API to GPU tasks */
+	void					addCompletionPrereq(PxBaseTask& task);
+
+	/* PxGpuTask execution thread */
+	void					execute();
+	void					pollSubmitted(shdfnd::Array<ReadyTask> *ready);
+	void					processActiveTasks();
+	void					flushBatch(CUevent endEvent, CUstream, PxBaseTask* task);
+	void					launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream);
+
+	/* Blocking wait thread */
+	void					blockingWaitFunc();
+
+	StreamCache				mCachedStreams;
+	shdfnd::Array<PxBaseTask*> mCompletionTasks;
+	JobQueue      			mSubmittedTaskList;
+	volatile int			mActiveGroups;
+	shdfnd::Sync			mInputReady;
+	shdfnd::Sync			mRecordEventQueued;
+	PxCudaContextManager* 	mCtxMgr;
+	bool                    mNewTasksSubmitted;
+	bool                    mFailureDetected;
+
+	bool                    mUsingConcurrentStreams;
+
+	CudaBatch				mCompletionRing[ SIZE_COMPLETION_RING ];
+	volatile int            mCompletionRingPush;
+	volatile int            mCompletionRingPop;
+
+	EventPool               mCachedBlockingEvents;
+	EventPool               mCachedNonBlockingEvents;
+
+	volatile int			mCountActiveScenes;
+
+	uint32_t*				mSmStartTimes;
+	uint32_t                 mSmClockFreq;
+
+	shdfnd::Array<ReadyTask> mReady[ PxGpuTaskHint::NUM_GPU_TASK_HINTS ];
+
+	KernelWrangler*         mUtilKernelWrapper;
+
+	CUevent                 mStartEvent;
+
+	shdfnd::Mutex			mMutex;
+};
+
+class BlockingWaitThread : public shdfnd::Thread
+{
+public:
+	BlockingWaitThread(PxGpuWorkerThread& worker) : mWorker(worker) {}
+	~BlockingWaitThread() {}
+
+	void		    execute();
+
+protected:
+	PxGpuWorkerThread& mWorker;
+
+private:
+	BlockingWaitThread& operator=(const BlockingWaitThread&);
+};
+
+#define GD_CHECK_CALL(call)  { CUresult ret = call;                          \
+		if( CUDA_SUCCESS != ret ) { mFailureDetected=true; PX_ASSERT(!ret); } }
+
+}
+
+#endif // PXTASK_GPUDISPATCHER_H
diff --git a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h
new file mode 100644
index 00000000..d73e7c75
--- /dev/null
+++ b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h
@@ -0,0 +1,56 @@
+// This code contains NVIDIA Confidential Information and is disclosed to you
+// under a form of NVIDIA software license agreement provided separately to you.
+//
+// Notice
+// NVIDIA Corporation and its licensors retain all intellectual property and
+// proprietary rights in and to this software and related documentation and
+// any modifications thereto. Any use, reproduction, disclosure, or
+// distribution of this software and related documentation without an express
+// license agreement from NVIDIA Corporation is strictly prohibited.
+//
+// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
+// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
+// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
+// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
+//
+// Information and code furnished is believed to be accurate and reliable.
+// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
+// information or for any infringement of patents or other rights of third parties that may
+// result from its use. No license is granted by implication or otherwise under any patent
+// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
+// This code supersedes and replaces all information previously supplied.
+// NVIDIA Corporation products are not authorized for use as critical
+// components in life support devices or systems without express written approval of
+// NVIDIA Corporation.
+//
+// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved.
+
+#ifndef PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
+#define PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
+
+#include "task/PxTaskDefine.h"
+
+#if PX_SUPPORT_GPU_PHYSX
+
+namespace physx
+{
+	class PxErrorCallback;
+
+	/**
+	Helper functions to expose control panel functionality 
+	*/
+	class PhysXDeviceSettings
+	{
+	private:
+		PhysXDeviceSettings() {}
+
+	public:
+		static int getSuggestedCudaDeviceOrdinal(PxErrorCallback& errc);
+		static int isUsingDedicatedGPU();
+		static bool isSLIEnabled(void* graphicsDevice);
+	};
+}
+
+#endif
+
+#endif // PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PxShared/src/cudamanager
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip