aboutsummaryrefslogtreecommitdiff
path: root/PxShared/src/cudamanager/include/CudaKernelWrangler.h
diff options
context:
space:
mode:
Diffstat (limited to 'PxShared/src/cudamanager/include/CudaKernelWrangler.h')
-rw-r--r--PxShared/src/cudamanager/include/CudaKernelWrangler.h331
1 files changed, 0 insertions, 331 deletions
diff --git a/PxShared/src/cudamanager/include/CudaKernelWrangler.h b/PxShared/src/cudamanager/include/CudaKernelWrangler.h
deleted file mode 100644
index 36a2cc8..0000000
--- a/PxShared/src/cudamanager/include/CudaKernelWrangler.h
+++ /dev/null
@@ -1,331 +0,0 @@
-// This code contains NVIDIA Confidential Information and is disclosed to you
-// under a form of NVIDIA software license agreement provided separately to you.
-//
-// Notice
-// NVIDIA Corporation and its licensors retain all intellectual property and
-// proprietary rights in and to this software and related documentation and
-// any modifications thereto. Any use, reproduction, disclosure, or
-// distribution of this software and related documentation without an express
-// license agreement from NVIDIA Corporation is strictly prohibited.
-//
-// ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES
-// NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO
-// THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT,
-// MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.
-//
-// Information and code furnished is believed to be accurate and reliable.
-// However, NVIDIA Corporation assumes no responsibility for the consequences of use of such
-// information or for any infringement of patents or other rights of third parties that may
-// result from its use. No license is granted by implication or otherwise under any patent
-// or patent rights of NVIDIA Corporation. Details are subject to change without notice.
-// This code supersedes and replaces all information previously supplied.
-// NVIDIA Corporation products are not authorized for use as critical
-// components in life support devices or systems without express written approval of
-// NVIDIA Corporation.
-//
-// Copyright (c) 2008-2017 NVIDIA Corporation. All rights reserved.
-
-#ifndef __CUDA_KERNEL_WRANGLER__
-#define __CUDA_KERNEL_WRANGLER__
-
-// Make this header is safe for inclusion in headers that are shared with device code.
-#if !defined(__CUDACC__)
-
-#include "task/PxTaskDefine.h"
-#include "task/PxGpuDispatcher.h"
-
-#include "PsUserAllocated.h"
-#include "PsArray.h"
-
-#include <cuda.h>
-
-namespace physx
-{
-
-class KernelWrangler : public shdfnd::UserAllocated
-{
- PX_NOCOPY(KernelWrangler)
-public:
- KernelWrangler(PxGpuDispatcher& gd, PxErrorCallback& errorCallback, const char** funcNames, uint16_t numFuncs);
- ~KernelWrangler();
-
- CUfunction getCuFunction(uint16_t funcIndex) const
- {
- return mCuFunctions[ funcIndex ];
- }
-
- CUmodule getCuModule(uint16_t funcIndex) const
- {
- uint16_t modIndex = mCuFuncModIndex[ funcIndex ];
- return mCuModules[ modIndex ];
- }
-
- static void const* const* getImages();
- static int getNumImages();
-
- bool hadError() const { return mError; }
-
-protected:
- bool mError;
- shdfnd::Array<CUfunction> mCuFunctions;
- shdfnd::Array<uint16_t> mCuFuncModIndex;
- shdfnd::Array<CUmodule> mCuModules;
- PxGpuDispatcher& mGpuDispatcher;
- PxErrorCallback& mErrorCallback;
-};
-
-/* SJB - These were "borrowed" from an Ignacio Llamas email to devtech-compute.
- * If we feel this is too clumsy, we can steal the boost based bits from APEX
- */
-
-class ExplicitCudaFlush
-{
-public:
- ExplicitCudaFlush(int cudaFlushCount) : mCudaFlushCount(cudaFlushCount), mDefaultCudaFlushCount(mCudaFlushCount) {}
- ~ExplicitCudaFlush() {}
-
- void setCudaFlushCount(int value) { mCudaFlushCount = mDefaultCudaFlushCount = value; }
- unsigned int getCudaFlushCount() const { return (unsigned int)mCudaFlushCount; }
- void resetCudaFlushCount() { mCudaFlushCount = mDefaultCudaFlushCount; }
-
- void decrementFlushCount()
- {
- if (mCudaFlushCount == 0) return;
-
- if (--mCudaFlushCount == 0)
- {
- CUresult ret = cuStreamQuery(0); // flushes current push buffer
- PX_UNUSED(ret);
- PX_ASSERT(ret == CUDA_SUCCESS || ret == CUDA_ERROR_NOT_READY);
-
- // For current implementation, disable resetting of cuda flush count
- // reset cuda flush count
- // mCudaFlushCount = mDefaultCudaFlushCount;
- }
- }
-
-private:
- int mCudaFlushCount;
- int mDefaultCudaFlushCount;
-};
-
-}
-
-template <typename T0>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0)
-{
- void* kernelParams[] =
- {
- &v0,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1)
-{
- void* kernelParams[] =
- {
- &v0, &v1,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7,
- typename T8, typename T9, typename T10, typename T11, typename T12, typename T13, typename T14, typename T15,
- typename T16, typename T17>
-PX_NOINLINE CUresult launchKernel(CUfunction func, unsigned int numBlocks, unsigned int numThreads, unsigned int sharedMem, CUstream stream,
- T0 v0, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
- T13 v13, T14 v14, T15 v15, T16 v16, T17 v17)
-{
- void* kernelParams[] =
- {
- &v0, &v1, &v2, &v3, &v4, &v5, &v6, &v7, &v8, &v9, &v10, &v11, &v12, &v13, &v14, &v15, &v16, &v17,
- };
- return cuLaunchKernel(func, numBlocks, 1, 1, numThreads, 1, 1, sharedMem, stream, kernelParams, NULL);
-}
-
-#endif
-
-#endif