diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PxShared/src/cudamanager | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PxShared/src/cudamanager')
| -rw-r--r-- | PxShared/src/cudamanager/include/CudaContextManager.h | 51 | ||||
| -rw-r--r-- | PxShared/src/cudamanager/include/CudaKernelWrangler.h | 331 | ||||
| -rw-r--r-- | PxShared/src/cudamanager/include/GpuDispatcher.h | 334 | ||||
| -rw-r--r-- | PxShared/src/cudamanager/include/PhysXDeviceSettings.h | 56 |
4 files changed, 772 insertions, 0 deletions
diff --git a/PxShared/src/cudamanager/include/CudaContextManager.h b/PxShared/src/cudamanager/include/CudaContextManager.h new file mode 100644 index 00000000..3aff9581 --- /dev/null +++ b/PxShared/src/cudamanager/include/CudaContextManager.h @@ -0,0 +1,51 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. + +#ifndef PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H +#define PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H + +#include "task/PxTaskDefine.h" + +#if PX_SUPPORT_GPU_PHYSX + +namespace physx +{ + +class PxCudaContextManager; +class PxCudaContextManagerDesc; +class PxErrorCallback; + +/** +Creates cuda context manager for PhysX and APEX. +*/ +PxCudaContextManager* createCudaContextManager(const PxCudaContextManagerDesc& desc, PxErrorCallback& errorCallback); + +} + +#endif + +#endif // PXCUDACONTEXTMANAGER_CUDACONTEXTMANAGER_H diff --git a/PxShared/src/cudamanager/include/CudaKernelWrangler.h b/PxShared/src/cudamanager/include/CudaKernelWrangler.h new file mode 100644 index 00000000..65a41ab4 --- /dev/null +++ b/PxShared/src/cudamanager/include/CudaKernelWrangler.h @@ -0,0 +1,331 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. + +#ifndef __CUDA_KERNEL_WRANGLER__ +#define __CUDA_KERNEL_WRANGLER__ + +// Make this header is safe for inclusion in headers that are shared with device code. +#if !defined(__CUDACC__) + +#include "task/PxTaskDefine.h" +#include "task/PxGpuDispatcher.h" + +#include "PsUserAllocated.h" +#include "PsArray.h" + +#include <cuda.h> + +namespace physx +{ + +class KernelWrangler : public shdfnd::UserAllocated +{ + PX_NOCOPY(KernelWrangler) +public: + KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs); + ~KernelWrangler(); + + CUfunction getCuFunction(uint16_t funcIndex) const + { + return mCuFunctions[ funcIndex ]; + } + + CUmodule getCuModule(uint16_t funcIndex) const + { + uint16_t modIndex = mCuFuncModIndex[ funcIndex ]; + return mCuModules[ modIndex ]; + } + + static void const* const* getImages(); + static int getNumImages(); + + bool hadError() const { return mError; } + +protected: + bool mError; + shdfnd::Array<CUfunction> mCuFunctions; + shdfnd::Array<uint16_t> mCuFuncModIndex; + shdfnd::Array<CUmodule> mCuModules; + PxGpuDispatcher& mGpuDispatcher; + PxErrorCallback& mErrorCallback; +}; + +/* SJB - These were "borrowed" from an Ignacio Llamas email to devtech-compute. + * If we feel this is too clumsy, we can steal the boost based bits from APEX + */ + +class ExplicitCudaFlush +{ +public: + ExplicitCudaFlush(int cudaFlushCount) : mCudaFlushCount(cudaFlushCount), mDefaultCudaFlushCount(mCudaFlushCount) {} + ~ExplicitCudaFlush() {} + + void setCudaFlushCount(int value) { mCudaFlushCount = mDefaultCudaFlushCount = value; } + unsigned int getCudaFlushCount() const { return (unsigned int)mCudaFlushCount; } + void resetCudaFlushCount() { mCudaFlushCount = mDefaultCudaFlushCount; } + + void decrementFlushCount() + { + if (mCudaFlushCount == 0) return; + + if (--mCudaFlushCount == 0) + { + CUresult ret = cuStreamQuery(0); // flushes current push buffer + PX_UNUSED(ret); + PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY); + + // For current implementation, disable resetting of cuda flush count + // reset cuda flush count + // mCudaFlushCount = mDefaultCudaFlushCount; + } + } + +private: + int mCudaFlushCount; + int mDefaultCudaFlushCount; +}; + +} + +template <typename T0> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0) +{ + void* kernelParams[] = + { + &v0, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1) +{ + void* kernelParams[] = + { + &v0, &v1, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17> +PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream, + T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, + T13 v13, T14 v14, T15 v15, T16 v16, T17 v17) +{ + void* kernelParams[] = + { + &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, &v17, + }; + return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL); +} + +#endif + +#endif diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h new file mode 100644 index 00000000..30e3fcfa --- /dev/null +++ b/PxShared/src/cudamanager/include/GpuDispatcher.h @@ -0,0 +1,334 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. + +#ifndef PXTASK_GPUDISPATCHER_H +#define PXTASK_GPUDISPATCHER_H + +#include "task/PxTask.h" +#include "task/PxTaskDefine.h" +#include "task/PxGpuTask.h" +#include "task/PxTaskManager.h" +#include "task/PxGpuDispatcher.h" +#include "foundation/PxProfiler.h" + +#include "PsUserAllocated.h" +#include "PsThread.h" +#include "PsAtomic.h" +#include "PsMutex.h" +#include "PsSync.h" +#include "PsArray.h" + +#include <cuda.h> + +namespace physx { + +typedef uint16_t EventID; + +void releaseGpuDispatcher(PxGpuDispatcher&); + +class KernelWrangler; +class BlockingWaitThread; +class FanoutTask; +class LaunchTask; +class BlockTask; +class PxGpuWorkerThread; + +class GpuDispatcherImpl : public PxGpuDispatcher, public shdfnd::UserAllocated +{ +public: + GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx); + virtual ~GpuDispatcherImpl(); + + void start(); + void startSimulation(); + void startGroup(); + void submitTask(PxTask& task); + void finishGroup(); + void addCompletionPrereq(PxBaseTask& task); + bool failureDetected() const; + void forceFailureMode(); + void stopSimulation(); + void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream); + + PxBaseTask& getPreLaunchTask(); + void addPreLaunchDependent(PxBaseTask& dependent); + + PxBaseTask& getPostLaunchTask(); + void addPostLaunchDependent(PxBaseTask& dependent); + + PxCudaContextManager* getCudaContextManager(); + + PxGpuWorkerThread* mDispatcher; + BlockingWaitThread* mBlockingThread; + LaunchTask* mLaunchTask; // predecessor of tasks launching kernels + BlockTask* mBlockTask; // continuation of tasks launching kernels + FanoutTask* mSyncTask; // predecessor of tasks waiting for cuda context synchronize +}; + +class JobQueue +{ + PX_NOCOPY(JobQueue) +public: + JobQueue() : taskarray(PX_DEBUG_EXP("PxTask*")) {} + void push(PxTask* t) + { + access.lock(); + taskarray.pushBack(t); + access.unlock(); + } + PxTask* popBack() + { + access.lock(); + PxTask* t = NULL; + if (taskarray.size()) + { + t = taskarray.popBack(); + } + access.unlock(); + return t; + } + uint32_t size() + { + return taskarray.size(); + } + bool empty() + { + return taskarray.size() == 0; + } + +private: + shdfnd::Array<PxTask*> taskarray; + shdfnd::Mutex access; +}; + +class EventPool +{ + PX_NOCOPY(EventPool) +public: + EventPool(uint32_t inflags) : flags(inflags), evarray(PX_DEBUG_EXP("CUevent")) {} + void add(CUevent ev) + { + access.lock(); + evarray.pushBack(ev); + access.unlock(); + } + CUevent get() + { + access.lock(); + CUevent ev; + if (evarray.size()) + { + ev = evarray.popBack(); + } + else + { + cuEventCreate(&ev, flags); + } + access.unlock(); + return ev; + } + bool empty() const + { + return evarray.size() == 0; + } + void clear() + { + access.lock(); + for (uint32_t i = 0; i < evarray.size(); i++) + { + cuEventDestroy(evarray[i]); + } + access.unlock(); + } + +private: + uint32_t flags; + shdfnd::Array<CUevent> evarray; + shdfnd::Mutex access; +}; + +class StreamCache +{ +public: + StreamCache() : sarray(PX_DEBUG_EXP("CUstream")), freeIndices(PX_DEBUG_EXP("freeIndices")) + { + } + CUstream get(uint32_t s) + { + PX_ASSERT(s); + return sarray[ s - 1 ]; + } + void push(uint32_t s) + { + freeIndices.pushBack(s); + } + uint32_t popBack() + { + if (freeIndices.size()) + { + return freeIndices.popBack(); + } + else + { + CUstream s; + cuStreamCreate(&s, 0); + sarray.pushBack(s); + return sarray.size(); + } + } + void reset() + { + freeIndices.resize(sarray.size()); + for (uint32_t i = 0 ; i < sarray.size() ; i++) + { + freeIndices[i] = i + 1; + } + } + bool empty() + { + return freeIndices.size() == 0; + } + +private: + shdfnd::Array<CUstream> sarray; + shdfnd::Array<uint32_t> freeIndices; +}; + +class KernelBar +{ +public: + KernelBar() + { + reset(); + } + void reset() + { + start = 0xffffffff; + stop = 0; + } + + uint32_t start; + uint32_t stop; +}; + +const int SIZE_COMPLETION_RING = 1024; + +struct CudaBatch +{ + CUevent blockingEvent; + CUstream blockingStream; // sync on stream instead of event if lsb is zero (faster) + PxBaseTask* continuationTask; +}; + +struct ReadyTask +{ + PxGpuTask* task; + uint32_t iteration; +}; + +class PxGpuWorkerThread : public shdfnd::Thread +{ + PX_NOCOPY(PxGpuWorkerThread) +public: + PxGpuWorkerThread(); + ~PxGpuWorkerThread(); + + void setCudaContext(PxCudaContextManager& ctx); + void emitStartEvent(const char *id); + void emitStopEvent(const char *id); + + /* API to TaskManager */ + void startSimulation(); + void stopSimulation(); + + /* API to GPU tasks */ + void addCompletionPrereq(PxBaseTask& task); + + /* PxGpuTask execution thread */ + void execute(); + void pollSubmitted(shdfnd::Array<ReadyTask> *ready); + void processActiveTasks(); + void flushBatch(CUevent endEvent, CUstream, PxBaseTask* task); + void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream); + + /* Blocking wait thread */ + void blockingWaitFunc(); + + StreamCache mCachedStreams; + shdfnd::Array<PxBaseTask*> mCompletionTasks; + JobQueue mSubmittedTaskList; + volatile int mActiveGroups; + shdfnd::Sync mInputReady; + shdfnd::Sync mRecordEventQueued; + PxCudaContextManager* mCtxMgr; + bool mNewTasksSubmitted; + bool mFailureDetected; + + bool mUsingConcurrentStreams; + + CudaBatch mCompletionRing[ SIZE_COMPLETION_RING ]; + volatile int mCompletionRingPush; + volatile int mCompletionRingPop; + + EventPool mCachedBlockingEvents; + EventPool mCachedNonBlockingEvents; + + volatile int mCountActiveScenes; + + uint32_t* mSmStartTimes; + uint32_t mSmClockFreq; + + shdfnd::Array<ReadyTask> mReady[ PxGpuTaskHint::NUM_GPU_TASK_HINTS ]; + + KernelWrangler* mUtilKernelWrapper; + + CUevent mStartEvent; + + shdfnd::Mutex mMutex; +}; + +class BlockingWaitThread : public shdfnd::Thread +{ +public: + BlockingWaitThread(PxGpuWorkerThread& worker) : mWorker(worker) {} + ~BlockingWaitThread() {} + + void execute(); + +protected: + PxGpuWorkerThread& mWorker; + +private: + BlockingWaitThread& operator=(const BlockingWaitThread&); +}; + +#define GD_CHECK_CALL(call) { CUresult ret = call; \ + if( CUDA_SUCCESS != ret ) { mFailureDetected=true; PX_ASSERT(!ret); } } + +} + +#endif // PXTASK_GPUDISPATCHER_H diff --git a/PxShared/src/cudamanager/include/PhysXDeviceSettings.h b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h new file mode 100644 index 00000000..d73e7c75 --- /dev/null +++ b/PxShared/src/cudamanager/include/PhysXDeviceSettings.h @@ -0,0 +1,56 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. + +#ifndef PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H +#define PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H + +#include "task/PxTaskDefine.h" + +#if PX_SUPPORT_GPU_PHYSX + +namespace physx +{ + class PxErrorCallback; + + /** + Helper functions to expose control panel functionality + */ + class PhysXDeviceSettings + { + private: + PhysXDeviceSettings() {} + + public: + static int getSuggestedCudaDeviceOrdinal(PxErrorCallback& errc); + static int isUsingDedicatedGPU(); + static bool isSLIEnabled(void* graphicsDevice); + }; +} + +#endif + +#endif // PXCUDACONTEXTMANAGER_PHYSXDEVICESETTINGS_H |