Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/module/common/src/CudaModuleScene.cpp
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
1 files changed, 545 insertions, 0 deletions
diff --git a/APEX_1.4/module/common/src/CudaModuleScene.cpp b/APEX_1.4/module/common/src/CudaModuleScene.cpp
new file mode 100644
index 00000000..34a8fccd
--- /dev/null
+++ b/APEX_1.4/module/common/src/CudaModuleScene.cpp
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2008-2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto.  Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+
+
+#include "ApexDefs.h"
+#if APEX_CUDA_SUPPORT
+
+#include "Apex.h"
+#include "ApexSDKIntl.h"
+#include "SceneIntl.h"
+#include "ApexCutil.h"
+#include "CudaModuleScene.h"
+#include <cuda.h>
+#include <texture_types.h>
+
+#include "PxTaskManager.h"
+#include "PxGpuDispatcher.h"
+#include "PxCudaContextManager.h"
+
+#define CUDA_KERNEL_CHECK_ALWAYS 0
+
+namespace nvidia
+{
+class PhysXGpuIndicator;
+
+namespace apex
+{
+
+/**
+ * Workaround hacks for using nvcc --compiler output object files
+ * without linking with CUDART.  We must implement our own versions
+ * of these functions that the object files are hard-coded to call into.
+ */
+
+#define MAX_MODULES					64
+static void* moduleTable[ MAX_MODULES ];
+static int numRegisteredModules = 0;
+
+#define MAX_FUNCTIONS				256
+typedef struct
+{
+	int         modIndex;
+	const char* funcName;
+} cuFuncDesc;
+static cuFuncDesc functionTable[ MAX_FUNCTIONS ];
+static int numRegisteredFunctions = 0;
+
+const char* funcNameTable[ MAX_FUNCTIONS ];
+
+
+#define MAX_TEXTURES				256
+typedef struct
+{
+	int                             modIndex;
+	const char*                     texRefName;
+	const struct textureReference*  texRefData;
+	int                             dim;
+	int                             read_normalized_float;
+} cuTexRefDesc;
+static cuTexRefDesc textureTable[ MAX_TEXTURES ];
+static int numRegisteredTextures = 0;
+
+
+#define MAX_SURFACES				256
+typedef struct
+{
+	int                             modIndex;
+	const char*                     surfRefName;
+	const struct surfaceReference*  surfRefData;
+	int                             dim;
+} cuSurfRefDesc;
+static cuSurfRefDesc surfaceTable[ MAX_SURFACES ];
+static int numRegisteredSurfaces = 0;
+
+
+#define MAX_VARIABLES				256
+typedef struct
+{
+	int			modIndex;
+	const char* varName;
+	int			size;
+} cuVarDesc;
+static cuVarDesc variableTable[ MAX_VARIABLES ];
+static int numRegisteredVariables = 0;
+
+CudaModuleScene::CudaModuleScene(SceneIntl& scene, Module& module, const char* modulePrefix)
+	: mSceneIntl(scene)
+{
+	PX_UNUSED(modulePrefix);
+
+	PxTaskManager* tm = scene.getTaskManager();
+	PxGpuDispatcher* gd = tm->getGpuDispatcher();
+	PX_ASSERT(gd != NULL);
+	PxScopedCudaLock _lock_(*gd->getCudaContextManager());
+
+	ApexCudaObjManager::init(&module, &scene.getApexCudaTestManager(), gd);
+
+	mCudaModules.resize((uint32_t)numRegisteredModules);
+
+	ApexSDKIntl* apexSdk = GetInternalApexSDK();
+	mPhysXGpuIndicator = apexSdk->registerPhysXIndicatorGpuClient();
+}
+
+void CudaModuleScene::destroy(SceneIntl&)
+{
+	{
+		PxScopedCudaLock _lock_(*getGpuDispatcher()->getCudaContextManager());
+
+		ApexCudaObjManager::releaseAll();
+
+		for (uint32_t i = 0 ; i < mCudaModules.size() ; i++)
+		{
+			mCudaModules[i].release();
+		}
+	}
+
+	ApexSDKIntl* apexSdk = GetInternalApexSDK();
+	apexSdk->unregisterPhysXIndicatorGpuClient(mPhysXGpuIndicator);
+	mPhysXGpuIndicator = NULL;
+}
+
+void CudaModuleScene::onBeforeLaunchApexCudaFunc(const ApexCudaFunc& func, CUstream stream)
+{
+	if (mCudaProfileSession)
+	{
+		mCudaProfileSession->onFuncStart(func.getProfileId(), stream);
+	}
+}
+
+void CudaModuleScene::onAfterLaunchApexCudaFunc(const ApexCudaFunc& func, CUstream stream)
+{
+	if (mCudaProfileSession)
+	{
+		mCudaProfileSession->onFuncFinish(func.getProfileId(), stream);
+	}
+
+#if !CUDA_KERNEL_CHECK_ALWAYS
+	if (mSceneIntl.getCudaKernelCheckEnabled())
+#endif
+	{
+		CUresult ret = cuStreamSynchronize(stream);
+		if ( CUDA_SUCCESS != ret )
+		{
+			APEX_INTERNAL_ERROR("Cuda Error %d after launch of func '%s'", ret, func.getName());
+			PX_ALWAYS_ASSERT();
+		}
+	}
+}
+
+ApexCudaModule* CudaModuleScene::getCudaModule(int modIndex)
+{
+	mCudaModules[(uint32_t)modIndex].init(moduleTable[(uint32_t)modIndex]);
+	return &mCudaModules[(uint32_t)modIndex];
+}
+
+void CudaModuleScene::initCudaObj(ApexCudaTexRef& texRef)
+{
+	const char* texRefName = texRef.getName();
+
+	for (int j = 0 ; j < numRegisteredTextures ; j++)
+	{
+		if (nvidia::strcmp(textureTable[j].texRefName, texRefName) == 0)
+		{
+			ApexCudaModule* cudaModule = getCudaModule(textureTable[j].modIndex);
+			PX_ASSERT(cudaModule->isValid());
+
+			CUtexref cuTexRef;
+			CUT_SAFE_CALL(cuModuleGetTexRef(&cuTexRef, cudaModule->getCuModule(), texRefName));
+
+			const struct textureReference* texRefData = textureTable[j].texRefData;
+
+			PX_ASSERT(texRefData->channelDesc.x > 0);
+			int numChannels = 1;
+			if (texRefData->channelDesc.y > 0)
+			{
+				PX_ASSERT(texRefData->channelDesc.y == texRefData->channelDesc.x);
+				++numChannels;
+			}
+			if (texRefData->channelDesc.z > 0)
+			{
+				PX_ASSERT(texRefData->channelDesc.z == texRefData->channelDesc.x);
+				++numChannels;
+			}
+			if (texRefData->channelDesc.w > 0)
+			{
+				PX_ASSERT(texRefData->channelDesc.w == texRefData->channelDesc.x);
+				++numChannels;
+			}
+
+			CUarray_format cuFormat = CUarray_format(0);
+			switch (texRefData->channelDesc.f)
+			{
+			case cudaChannelFormatKindSigned:
+				switch (texRefData->channelDesc.x)
+				{
+				case  8:
+					cuFormat = CU_AD_FORMAT_SIGNED_INT8;
+					break;
+				case 16:
+					cuFormat = CU_AD_FORMAT_SIGNED_INT16;
+					break;
+				case 32:
+					cuFormat = CU_AD_FORMAT_SIGNED_INT32;
+					break;
+				}
+				break;
+			case cudaChannelFormatKindUnsigned:
+				switch (texRefData->channelDesc.x)
+				{
+				case  8:
+					cuFormat = CU_AD_FORMAT_UNSIGNED_INT8;
+					break;
+				case 16:
+					cuFormat = CU_AD_FORMAT_UNSIGNED_INT16;
+					break;
+				case 32:
+					cuFormat = CU_AD_FORMAT_UNSIGNED_INT32;
+					break;
+				}
+				break;
+			case cudaChannelFormatKindFloat:
+				cuFormat = CU_AD_FORMAT_FLOAT;
+				break;
+			default:
+				PX_ASSERT(0);
+			};
+			PX_ASSERT(cuFormat != 0);
+
+			int cuFlags = 0;
+			if (textureTable[j].read_normalized_float == 0)
+			{
+				cuFlags |= CU_TRSF_READ_AS_INTEGER;
+			}
+			if (textureTable[j].texRefData->normalized != 0)
+			{
+				cuFlags |= CU_TRSF_NORMALIZED_COORDINATES;
+			}
+
+			texRef.init(this, cuTexRef, cudaModule, cuFormat, numChannels, textureTable[j].dim, cuFlags);
+			break;
+		}
+	}
+}
+
+void CudaModuleScene::initCudaObj(ApexCudaVar& var)
+{
+	const char* varName = var.getName();
+
+	for (int j = 0 ; j < numRegisteredVariables ; j++)
+	{
+		if (nvidia::strcmp(variableTable[j].varName, varName) == 0)
+		{
+			ApexCudaModule* cudaModule = getCudaModule(variableTable[j].modIndex);
+			PX_ASSERT(cudaModule->isValid());
+
+			CUdeviceptr cuDevPtr;
+			size_t size;
+			cuModuleGetGlobal(&cuDevPtr, &size, cudaModule->getCuModule(), varName);
+
+			var.init(this, cudaModule, cuDevPtr, size, getGpuDispatcher()->getCudaContextManager());
+			break;
+		}
+	}
+}
+
+void CudaModuleScene::initCudaObj(ApexCudaFunc& func)
+{
+	for (int j = 0 ; j < numRegisteredFunctions ; j++)
+	{
+		const char* funcName = functionTable[j].funcName;
+		if (func.testNameMatch(funcName))
+		{
+			ApexCudaModule* cudaModule = getCudaModule(functionTable[j].modIndex);
+			PX_ASSERT(cudaModule->isValid());
+
+			CUfunction cuFunc = 0;
+			CUT_SAFE_CALL(cuModuleGetFunction(&cuFunc, cudaModule->getCuModule(), funcName));
+
+			func.init(this, funcName, cuFunc, cudaModule);
+		}
+	}
+}
+
+void CudaModuleScene::initCudaObj(ApexCudaSurfRef& surfRef)
+{
+	if (getGpuDispatcher()->getCudaContextManager()->supportsArchSM20() == false)
+	{
+		return;
+	}
+
+	const char* surfRefName = surfRef.getName();
+
+	for (int j = 0 ; j < numRegisteredSurfaces ; j++)
+	{
+		if (nvidia::strcmp(surfaceTable[j].surfRefName, surfRefName) == 0)
+		{
+			ApexCudaModule* cudaModule = getCudaModule(surfaceTable[j].modIndex);
+			PX_ASSERT(cudaModule->isValid());
+
+			CUsurfref cuSurfRef;
+			CUT_SAFE_CALL(cuModuleGetSurfRef(&cuSurfRef, cudaModule->getCuModule(), surfRefName));
+
+			surfRef.init(this, cuSurfRef, cudaModule);
+			break;
+		}
+	}
+
+}
+
+/*
+ * These calls are all made _before_ main() during static initialization
+ * of your APEX module.  So calling into APEX Framework or other
+ * external code modules is out of the question.
+ */
+
+#include "driver_types.h"
+
+#define CUDARTAPI __stdcall
+
+typedef struct uint3_t
+{
+	unsigned int x, y, z;
+} uint3;
+
+typedef struct dim3_t
+{
+	unsigned int x, y, z;
+} dim3;
+
+extern "C"
+void** CUDARTAPI __cudaRegisterFatBinary(void* fatBin)
+{
+	//HACK to get real fatbin in CUDA 4.0
+	struct CUIfatbinStruct
+	{
+		int magic;
+		int version;
+		void* fatbinArray;
+		char* fatbinFile;
+	};
+	const CUIfatbinStruct* fatbinStruct = (const CUIfatbinStruct*)fatBin;
+	if (fatbinStruct->magic == 0x466243B1)
+	{
+		fatBin = fatbinStruct->fatbinArray;
+	}
+
+	if (numRegisteredModules < MAX_MODULES)
+	{
+		moduleTable[ numRegisteredModules ] = fatBin;
+		return (void**)(size_t) numRegisteredModules++;
+	}
+	return NULL;
+}
+
+extern "C"
+void CUDARTAPI __cudaUnregisterFatBinary(void** fatCubinHandle)
+{
+	moduleTable[(int)(size_t) fatCubinHandle ] = 0;
+}
+
+extern "C"
+void CUDARTAPI __cudaRegisterTexture(
+    void**                    fatCubinHandle,
+    const struct textureReference*  hostvar,
+    const void**                    deviceAddress,
+    const char*                     deviceName,
+    int                       dim,
+    int                       read_normalized_float,
+    int                       ext)
+{
+	PX_UNUSED(fatCubinHandle);
+	PX_UNUSED(hostvar);
+	PX_UNUSED(deviceAddress);
+	PX_UNUSED(deviceName);
+	PX_UNUSED(dim);
+	PX_UNUSED(read_normalized_float);
+	PX_UNUSED(ext);
+
+	if (numRegisteredTextures < MAX_TEXTURES)
+	{
+		//Fix for CUDA 5.5 - remove leading "::"
+		while (*deviceName == ':')
+		{
+			++deviceName;
+		}
+
+		// We need this association of function to module in order to find textures and globals
+		textureTable[ numRegisteredTextures ].modIndex = (int)(size_t) fatCubinHandle;
+		textureTable[ numRegisteredTextures ].texRefName = deviceName;
+		textureTable[ numRegisteredTextures ].texRefData = hostvar;
+		textureTable[ numRegisteredTextures ].dim = dim;
+		textureTable[ numRegisteredTextures ].read_normalized_float = read_normalized_float;
+		numRegisteredTextures++;
+	}
+}
+
+extern "C"
+void CUDARTAPI __cudaRegisterSurface(
+    void**                          fatCubinHandle,
+    const struct surfaceReference*        hostvar,
+    const void**                          deviceAddress,
+    const char*                           deviceName,
+    int                             dim,
+    int                             ext)
+{
+	PX_UNUSED(fatCubinHandle);
+	PX_UNUSED(hostvar);
+	PX_UNUSED(deviceAddress);
+	PX_UNUSED(deviceName);
+	PX_UNUSED(dim);
+	PX_UNUSED(ext);
+
+	if (numRegisteredSurfaces < MAX_SURFACES)
+	{
+		//Fix for CUDA 5.5 - remove leading "::"
+		while (*deviceName == ':')
+		{
+			++deviceName;
+		}
+
+		surfaceTable[ numRegisteredSurfaces ].modIndex = (int)(size_t) fatCubinHandle;
+		surfaceTable[ numRegisteredSurfaces ].surfRefName = deviceName;
+		surfaceTable[ numRegisteredSurfaces ].surfRefData = hostvar;
+		surfaceTable[ numRegisteredSurfaces ].dim = dim;
+		numRegisteredSurfaces++;
+	}
+}
+
+extern "C" void CUDARTAPI __cudaRegisterVar(
+    void**                    fatCubinHandle,
+    char*                    hostVar,
+    char*                    deviceAddress,
+    const char*                    deviceName,
+    int                      ext,
+    int                      size,
+    int                      constant,
+    int                      global)
+{
+	PX_UNUSED(fatCubinHandle);
+	PX_UNUSED(hostVar);
+	PX_UNUSED(deviceAddress);
+	PX_UNUSED(deviceName);
+	PX_UNUSED(ext);
+	PX_UNUSED(size);
+	PX_UNUSED(constant);
+	PX_UNUSED(global);
+
+	if (constant != 0 && numRegisteredVariables < MAX_VARIABLES)
+	{
+		variableTable[ numRegisteredVariables ].modIndex = (int)(size_t) fatCubinHandle;
+		variableTable[ numRegisteredVariables ].varName = deviceName;
+		variableTable[ numRegisteredVariables ].size = size;
+		numRegisteredVariables++;
+	}
+}
+
+
+extern "C" void CUDARTAPI __cudaRegisterShared(
+    void**                    fatCubinHandle,
+    void**                    devicePtr
+)
+{
+	PX_UNUSED(fatCubinHandle);
+	PX_UNUSED(devicePtr);
+}
+
+
+
+extern "C"
+void CUDARTAPI __cudaRegisterFunction(
+    void**  fatCubinHandle,
+    const char*   hostFun,
+    char*   deviceFun,
+    const char*   deviceName,
+    int     thread_limit,
+    uint3*  tid,
+    uint3*  bid,
+    dim3*   bDim,
+    dim3*   gDim,
+    int*    wSize)
+{
+	PX_UNUSED(hostFun);
+	PX_UNUSED(deviceFun);
+	PX_UNUSED(thread_limit);
+	PX_UNUSED(tid);
+	PX_UNUSED(bid);
+	PX_UNUSED(bDim);
+	PX_UNUSED(gDim);
+	PX_UNUSED(wSize);
+
+	if (numRegisteredFunctions < MAX_FUNCTIONS)
+	{
+		// We need this association of function to module in order to find textures and globals
+		functionTable[ numRegisteredFunctions ].modIndex = (int)(size_t) fatCubinHandle;
+		functionTable[ numRegisteredFunctions ].funcName = deviceName;
+		funcNameTable[ numRegisteredFunctions ] = deviceName;
+		numRegisteredFunctions++;
+	}
+}
+
+/* These functions are implemented just to resolve link dependencies */
+
+extern "C"
+cudaError_t CUDARTAPI cudaLaunch(const char* entry)
+{
+	PX_UNUSED(entry);
+	return cudaSuccess;
+}
+
+extern "C"
+cudaError_t CUDARTAPI cudaSetupArgument(
+    const void*   arg,
+    size_t  size,
+    size_t  offset)
+{
+	PX_UNUSED(arg);
+	PX_UNUSED(size);
+	PX_UNUSED(offset);
+	return cudaSuccess;
+}
+
+extern "C"
+struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc(
+    int x, int y, int z, int w, enum cudaChannelFormatKind f)
+{
+	struct cudaChannelFormatDesc desc;
+	desc.x = x;
+	desc.y = y;
+	desc.z = z;
+	desc.w = w;
+	desc.f = f;
+	return desc;
+}
+
+}
+} // namespace nvidia
+
+#endif
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/module/common/src/CudaModuleScene.cpp
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip