diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /PxShared/src/cudamanager/include/GpuDispatcher.h | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'PxShared/src/cudamanager/include/GpuDispatcher.h')
| -rw-r--r-- | PxShared/src/cudamanager/include/GpuDispatcher.h | 334 |
1 files changed, 334 insertions, 0 deletions
diff --git a/PxShared/src/cudamanager/include/GpuDispatcher.h b/PxShared/src/cudamanager/include/GpuDispatcher.h new file mode 100644 index 00000000..30e3fcfa --- /dev/null +++ b/PxShared/src/cudamanager/include/GpuDispatcher.h @@ -0,0 +1,334 @@ +// This code contains NVIDIA Confidential Information and is disclosed to you +// under a form of NVIDIA software license agreement provided separately to you. +// +// Notice +// NVIDIA Corporation and its licensors retain all intellectual property and +// proprietary rights in and to this software and related documentation and +// any modifications thereto. Any use, reproduction, disclosure, or +// distribution of this software and related documentation without an express +// license agreement from NVIDIA Corporation is strictly prohibited. +// +// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES +// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO +// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, +// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. +// +// Information and code furnished is believed to be accurate and reliable. +// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such +// information or for any infringement of patents or other rights of third parties that may +// result from its use. No license is granted by implication or otherwise under any patent +// or patent rights of NVIDIA Corporation. Details are subject to change without notice. +// This code supersedes and replaces all information previously supplied. +// NVIDIA Corporation products are not authorized for use as critical +// components in life support devices or systems without express written approval of +// NVIDIA Corporation. +// +// Copyright (c) 2008-2016 NVIDIA Corporation. All rights reserved. + +#ifndef PXTASK_GPUDISPATCHER_H +#define PXTASK_GPUDISPATCHER_H + +#include "task/PxTask.h" +#include "task/PxTaskDefine.h" +#include "task/PxGpuTask.h" +#include "task/PxTaskManager.h" +#include "task/PxGpuDispatcher.h" +#include "foundation/PxProfiler.h" + +#include "PsUserAllocated.h" +#include "PsThread.h" +#include "PsAtomic.h" +#include "PsMutex.h" +#include "PsSync.h" +#include "PsArray.h" + +#include <cuda.h> + +namespace physx { + +typedef uint16_t EventID; + +void releaseGpuDispatcher(PxGpuDispatcher&); + +class KernelWrangler; +class BlockingWaitThread; +class FanoutTask; +class LaunchTask; +class BlockTask; +class PxGpuWorkerThread; + +class GpuDispatcherImpl : public PxGpuDispatcher, public shdfnd::UserAllocated +{ +public: + GpuDispatcherImpl(PxErrorCallback& errorCallback, PxCudaContextManager& ctx); + virtual ~GpuDispatcherImpl(); + + void start(); + void startSimulation(); + void startGroup(); + void submitTask(PxTask& task); + void finishGroup(); + void addCompletionPrereq(PxBaseTask& task); + bool failureDetected() const; + void forceFailureMode(); + void stopSimulation(); + void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream); + + PxBaseTask& getPreLaunchTask(); + void addPreLaunchDependent(PxBaseTask& dependent); + + PxBaseTask& getPostLaunchTask(); + void addPostLaunchDependent(PxBaseTask& dependent); + + PxCudaContextManager* getCudaContextManager(); + + PxGpuWorkerThread* mDispatcher; + BlockingWaitThread* mBlockingThread; + LaunchTask* mLaunchTask; // predecessor of tasks launching kernels + BlockTask* mBlockTask; // continuation of tasks launching kernels + FanoutTask* mSyncTask; // predecessor of tasks waiting for cuda context synchronize +}; + +class JobQueue +{ + PX_NOCOPY(JobQueue) +public: + JobQueue() : taskarray(PX_DEBUG_EXP("PxTask*")) {} + void push(PxTask* t) + { + access.lock(); + taskarray.pushBack(t); + access.unlock(); + } + PxTask* popBack() + { + access.lock(); + PxTask* t = NULL; + if (taskarray.size()) + { + t = taskarray.popBack(); + } + access.unlock(); + return t; + } + uint32_t size() + { + return taskarray.size(); + } + bool empty() + { + return taskarray.size() == 0; + } + +private: + shdfnd::Array<PxTask*> taskarray; + shdfnd::Mutex access; +}; + +class EventPool +{ + PX_NOCOPY(EventPool) +public: + EventPool(uint32_t inflags) : flags(inflags), evarray(PX_DEBUG_EXP("CUevent")) {} + void add(CUevent ev) + { + access.lock(); + evarray.pushBack(ev); + access.unlock(); + } + CUevent get() + { + access.lock(); + CUevent ev; + if (evarray.size()) + { + ev = evarray.popBack(); + } + else + { + cuEventCreate(&ev, flags); + } + access.unlock(); + return ev; + } + bool empty() const + { + return evarray.size() == 0; + } + void clear() + { + access.lock(); + for (uint32_t i = 0; i < evarray.size(); i++) + { + cuEventDestroy(evarray[i]); + } + access.unlock(); + } + +private: + uint32_t flags; + shdfnd::Array<CUevent> evarray; + shdfnd::Mutex access; +}; + +class StreamCache +{ +public: + StreamCache() : sarray(PX_DEBUG_EXP("CUstream")), freeIndices(PX_DEBUG_EXP("freeIndices")) + { + } + CUstream get(uint32_t s) + { + PX_ASSERT(s); + return sarray[ s - 1 ]; + } + void push(uint32_t s) + { + freeIndices.pushBack(s); + } + uint32_t popBack() + { + if (freeIndices.size()) + { + return freeIndices.popBack(); + } + else + { + CUstream s; + cuStreamCreate(&s, 0); + sarray.pushBack(s); + return sarray.size(); + } + } + void reset() + { + freeIndices.resize(sarray.size()); + for (uint32_t i = 0 ; i < sarray.size() ; i++) + { + freeIndices[i] = i + 1; + } + } + bool empty() + { + return freeIndices.size() == 0; + } + +private: + shdfnd::Array<CUstream> sarray; + shdfnd::Array<uint32_t> freeIndices; +}; + +class KernelBar +{ +public: + KernelBar() + { + reset(); + } + void reset() + { + start = 0xffffffff; + stop = 0; + } + + uint32_t start; + uint32_t stop; +}; + +const int SIZE_COMPLETION_RING = 1024; + +struct CudaBatch +{ + CUevent blockingEvent; + CUstream blockingStream; // sync on stream instead of event if lsb is zero (faster) + PxBaseTask* continuationTask; +}; + +struct ReadyTask +{ + PxGpuTask* task; + uint32_t iteration; +}; + +class PxGpuWorkerThread : public shdfnd::Thread +{ + PX_NOCOPY(PxGpuWorkerThread) +public: + PxGpuWorkerThread(); + ~PxGpuWorkerThread(); + + void setCudaContext(PxCudaContextManager& ctx); + void emitStartEvent(const char *id); + void emitStopEvent(const char *id); + + /* API to TaskManager */ + void startSimulation(); + void stopSimulation(); + + /* API to GPU tasks */ + void addCompletionPrereq(PxBaseTask& task); + + /* PxGpuTask execution thread */ + void execute(); + void pollSubmitted(shdfnd::Array<ReadyTask> *ready); + void processActiveTasks(); + void flushBatch(CUevent endEvent, CUstream, PxBaseTask* task); + void launchCopyKernel(PxGpuCopyDesc* desc, uint32_t count, CUstream stream); + + /* Blocking wait thread */ + void blockingWaitFunc(); + + StreamCache mCachedStreams; + shdfnd::Array<PxBaseTask*> mCompletionTasks; + JobQueue mSubmittedTaskList; + volatile int mActiveGroups; + shdfnd::Sync mInputReady; + shdfnd::Sync mRecordEventQueued; + PxCudaContextManager* mCtxMgr; + bool mNewTasksSubmitted; + bool mFailureDetected; + + bool mUsingConcurrentStreams; + + CudaBatch mCompletionRing[ SIZE_COMPLETION_RING ]; + volatile int mCompletionRingPush; + volatile int mCompletionRingPop; + + EventPool mCachedBlockingEvents; + EventPool mCachedNonBlockingEvents; + + volatile int mCountActiveScenes; + + uint32_t* mSmStartTimes; + uint32_t mSmClockFreq; + + shdfnd::Array<ReadyTask> mReady[ PxGpuTaskHint::NUM_GPU_TASK_HINTS ]; + + KernelWrangler* mUtilKernelWrapper; + + CUevent mStartEvent; + + shdfnd::Mutex mMutex; +}; + +class BlockingWaitThread : public shdfnd::Thread +{ +public: + BlockingWaitThread(PxGpuWorkerThread& worker) : mWorker(worker) {} + ~BlockingWaitThread() {} + + void execute(); + +protected: + PxGpuWorkerThread& mWorker; + +private: + BlockingWaitThread& operator=(const BlockingWaitThread&); +}; + +#define GD_CHECK_CALL(call) { CUresult ret = call; \ + if( CUDA_SUCCESS != ret ) { mFailureDetected=true; PX_ASSERT(!ret); } } + +} + +#endif // PXTASK_GPUDISPATCHER_H |