Initial commit:

PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
author: git perforce import user <a@b> 2016-10-25 12:29:14 -0600
committer: Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> 2016-10-25 18:56:37 -0500
commit: 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree: fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/common/src/ApexCudaProfile.cpp
download: physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
1 files changed, 332 insertions, 0 deletions
diff --git a/APEX_1.4/common/src/ApexCudaProfile.cpp b/APEX_1.4/common/src/ApexCudaProfile.cpp
new file mode 100644
index 00000000..cee4cfc2
--- /dev/null
+++ b/APEX_1.4/common/src/ApexCudaProfile.cpp
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2008-2015, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto.  Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+
+
+#include "ApexDefs.h"
+#if APEX_CUDA_SUPPORT && !defined(INSTALLER)
+
+#include "ApexCudaProfile.h"
+#include "ApexCudaWrapper.h"
+#include <cuda.h>
+#include "ModuleIntl.h"
+#include "ApexSDKHelpers.h"
+
+namespace nvidia
+{
+namespace apex
+{
+
+	ApexCudaProfileSession::ApexCudaProfileSession()
+		: mTimer(NULL)
+		, mFrameStart(PX_MAX_F32)
+		, mFrameFinish(0.f)
+	{
+		mMemBuf.setEndianMode(nvidia::PsMemoryBuffer::ENDIAN_LITTLE);
+	}
+	ApexCudaProfileSession::~ApexCudaProfileSession()
+	{
+		if (mTimer)
+		{
+			CUT_SAFE_CALL(cuEventDestroy((CUevent)mTimer));
+		}
+	}
+
+	void ApexCudaProfileSession::nextFrame()
+	{
+		mFrameStart = PX_MAX_F32;
+		mFrameFinish = 0.f;
+		float sumElapsed = 0.f;
+		for (uint32_t i = 0; i < mProfileDataList.size(); i++)
+		{
+			sumElapsed += flushProfileInfo(mProfileDataList[i]);
+		}
+		
+		// Write frame as fictive event
+		uint32_t op = 1, id = 0;
+		uint64_t start = static_cast<uint64_t>(mFrameStart * mManager->mTimeFormat);
+		mMemBuf.write(&op, sizeof(op));
+		mMemBuf.write(&start, sizeof(start));
+		mMemBuf.write(&id, sizeof(id));
+
+		op = 2;
+		uint64_t stop = static_cast<uint64_t>(mFrameFinish * mManager->mTimeFormat);
+		mMemBuf.write(&op, sizeof(op));
+		mMemBuf.write(&stop, sizeof(stop));
+		mMemBuf.write(&id, sizeof(id));
+
+		// Write summary of elapsed gpu kernel time as event
+		op = 1, id = 1;
+		start = static_cast<uint64_t>(mFrameStart * mManager->mTimeFormat);
+		mMemBuf.write(&op, sizeof(op));
+		mMemBuf.write(&start, sizeof(start));
+		mMemBuf.write(&id, sizeof(id));
+
+		op = 2;
+		stop = static_cast<uint64_t>((mFrameStart + sumElapsed) * mManager->mTimeFormat);
+		mMemBuf.write(&op, sizeof(op));
+		mMemBuf.write(&stop, sizeof(stop));
+		mMemBuf.write(&id, sizeof(id));
+
+		mProfileDataList.clear();
+	}
+	
+	void ApexCudaProfileSession::start()
+	{
+		if (!mManager || !mManager->mApexScene) return;
+		
+		mLock.lock();
+
+		mMemBuf.seekWrite(0);
+		uint32_t op = 0, sz, id = 0;
+		const char* frameEvent = "Frame"; sz = sizeof(frameEvent);
+		mMemBuf.write(&op, sizeof(op));
+		mMemBuf.write(&sz, sizeof(sz));
+		mMemBuf.write(frameEvent, sz);
+		mMemBuf.write(&id, sizeof(id));
+		
+		const char* summaryElapsed = "Summary of elapsed time"; sz = sizeof(summaryElapsed);
+		id = 1;
+		mMemBuf.write(&op, sizeof(op));
+		mMemBuf.write(&sz, sizeof(sz));
+		mMemBuf.write(summaryElapsed, sz);
+		mMemBuf.write(&id, sizeof(id));
+
+		//Register kernels
+		for (uint32_t i = 0; i < mManager->mKernels.size(); i++)
+		{
+			ApexCudaProfileManager::KernelInfo& ki = mManager->mKernels[i];
+			sz = ki.functionName.size();
+			mMemBuf.write(&op, sizeof(op));
+			mMemBuf.write(&sz, sizeof(sz));
+			mMemBuf.write(ki.functionName.c_str(), sz);
+			mMemBuf.write(&ki.id, sizeof(ki.id));
+			
+			ModuleSceneIntl* moduleScene = mManager->mApexScene->getInternalModuleScene(ki.moduleName.c_str());
+			ApexCudaObj* obj = NULL;
+			if (moduleScene)
+			{
+				obj = static_cast<ApexCudaObj*>(moduleScene->getHeadCudaObj());
+			}
+			while(obj)
+			{
+				if (obj->getType() == ApexCudaObj::FUNCTION)				
+				{				
+					if (ApexSimpleString(DYNAMIC_CAST(ApexCudaFunc*)(obj)->getName()) == ki.functionName)
+					{
+						DYNAMIC_CAST(ApexCudaFunc*)(obj)->setProfileSession(this);
+						break;
+					}
+				}
+				obj = obj->next();
+			}
+		}
+
+		{
+			PxCudaContextManager* ctx = mManager->mApexScene->getTaskManager()->getGpuDispatcher()->getCudaContextManager();
+			PxScopedCudaLock s(*ctx);
+
+			//Run timer
+			if (mTimer == NULL)
+			{
+				CUT_SAFE_CALL(cuEventCreate((CUevent*)&mTimer, CU_EVENT_DEFAULT));
+			}
+			CUT_SAFE_CALL(cuEventRecord((CUevent)mTimer, 0));
+		}		
+		mLock.unlock();
+	}
+
+	uint32_t ApexCudaProfileSession::getProfileId(const char* name, const char* moduleName)
+	{
+		Array <ApexCudaProfileManager::KernelInfo>::Iterator it 
+			= mManager->mKernels.find(ApexCudaProfileManager::KernelInfo(name, moduleName));
+		if (it != mManager->mKernels.end())
+		{
+			return it->id;
+		}
+		return 0;
+	}
+
+	void ApexCudaProfileSession::onFuncStart(uint32_t id, void* stream)
+	{
+		mLock.lock();
+		CUevent start;
+		CUevent stop;
+
+		CUT_SAFE_CALL(cuEventCreate(&start, CU_EVENT_DEFAULT));
+		CUT_SAFE_CALL(cuEventCreate(&stop, CU_EVENT_DEFAULT));
+
+		CUT_SAFE_CALL(cuEventRecord(start, (CUstream)stream));
+
+		ProfileData data;
+		data.id = id;
+		data.start = start;
+		data.stop = stop;
+		mProfileDataList.pushBack(data);
+		
+	}
+	void ApexCudaProfileSession::onFuncFinish(uint32_t id, void* stream)
+	{
+		PX_UNUSED(id);
+		ProfileData& data = mProfileDataList.back();
+		PX_ASSERT(data.id == id);
+
+		CUT_SAFE_CALL(cuEventRecord((CUevent)data.stop, (CUstream)stream));
+		
+		mLock.unlock();
+	}
+
+	float ApexCudaProfileSession::flushProfileInfo(ProfileData& pd)
+	{
+		CUevent start = (CUevent)pd.start;
+		CUevent stop = (CUevent)pd.stop;
+
+		uint32_t op = 1;
+		float startTf = 0.f, stopTf = 0.f;
+		uint64_t startT = 0, stopT = 0;
+		CUT_SAFE_CALL(cuEventSynchronize(start));		
+		CUT_SAFE_CALL(cuEventElapsedTime(&startTf, (CUevent)mTimer, start));		
+		startT = static_cast<uint64_t>(startTf * mManager->mTimeFormat) ;
+		mMemBuf.write(&op, sizeof(op));
+		mMemBuf.write(&startT, sizeof(startT));
+		mMemBuf.write(&pd.id, sizeof(pd.id));
+
+		op = 2;
+		CUT_SAFE_CALL(cuEventSynchronize((CUevent)stop));
+		CUT_SAFE_CALL(cuEventElapsedTime(&stopTf, (CUevent)mTimer, (CUevent)stop));
+		stopT = static_cast<uint64_t>(stopTf * mManager->mTimeFormat);
+		mMemBuf.write(&op, sizeof(op));
+		mMemBuf.write(&stopT, sizeof(stopT));
+		mMemBuf.write(&pd.id, sizeof(pd.id));
+
+		CUT_SAFE_CALL(cuEventDestroy((CUevent)start));
+		CUT_SAFE_CALL(cuEventDestroy((CUevent)stop));
+
+		mFrameStart = PxMin(mFrameStart, startTf);
+		mFrameFinish = PxMax(mFrameFinish, stopTf);
+		return stopTf - startTf;
+	}
+
+	bool ApexCudaProfileSession::stopAndSave()
+	{
+		if (!mManager || !mManager->mApexScene) return false;
+
+		//unregister functions
+		for (uint32_t i = 0; i < mManager->mKernels.size(); i++)
+		{
+			ApexCudaProfileManager::KernelInfo& ki = mManager->mKernels[i];
+			
+			ModuleSceneIntl* moduleScene = mManager->mApexScene->getInternalModuleScene(ki.moduleName.c_str());
+			ApexCudaObj* obj = NULL;
+			if (moduleScene)
+			{
+				obj = static_cast<ApexCudaObj*>(moduleScene->getHeadCudaObj());
+			}
+			while(obj)
+			{
+				if (obj->getType() == ApexCudaObj::FUNCTION)				
+				{				
+					if (ApexSimpleString(DYNAMIC_CAST(ApexCudaFunc*)(obj)->getName()) == ki.functionName)
+					{
+						DYNAMIC_CAST(ApexCudaFunc*)(obj)->setProfileSession(NULL);
+						break;
+					}
+				}
+				obj = obj->next();
+			}
+		}
+
+		//save to file
+		ApexSimpleString path(mManager->mPath);
+		path += ApexSimpleString("profileSesion_");
+		path += ApexSimpleString(mManager->mSessionCount, 3);
+		FILE* saveFile = fopen(path.c_str(), "wb");
+		if (saveFile)
+		{
+			fwrite(mMemBuf.getWriteBuffer(), mMemBuf.getWriteBufferSize(), 1, saveFile);
+			return !fclose(saveFile);
+		}
+		return false;
+	}
+
+	ApexCudaProfileManager::ApexCudaProfileManager()
+		: mState(false)
+		, mTimeFormat(NANOSECOND)
+		, mSessionCount(0)
+		, mReservedId(2)
+	{
+		mSession.init(this);
+	}
+
+	ApexCudaProfileManager::~ApexCudaProfileManager()
+	{
+	}
+
+	void ApexCudaProfileManager::setKernel(const char* functionName, const char* moduleName)
+	{
+		if (mKernels.find(KernelInfo(functionName, moduleName)) == mKernels.end())
+		{
+			if (ApexSimpleString(functionName) == "*")
+			{
+				//Add all function registered in module
+				ModuleSceneIntl* moduleScene = mApexScene->getInternalModuleScene(moduleName);
+				ApexCudaObj* obj = NULL;
+				if (moduleScene)
+				{
+					obj = static_cast<ApexCudaObj*>(moduleScene->getHeadCudaObj());
+				}
+				while(obj)
+				{
+					if (obj->getType() == ApexCudaObj::FUNCTION)				
+					{
+						const char* name = DYNAMIC_CAST(ApexCudaFunc*)(obj)->getName();
+						if (mKernels.find(KernelInfo(name, moduleName)) == mKernels.end())
+						{
+							mKernels.pushBack(KernelInfo(name, moduleName, mKernels.size() + mReservedId));
+						}
+					}
+					obj = obj->next();
+				}
+			}
+			else
+			{
+				mKernels.pushBack(KernelInfo(functionName, moduleName, mKernels.size() + mReservedId));
+			}
+			enable(false);
+		}
+	}
+
+	void ApexCudaProfileManager::enable(bool state)
+	{
+		if (state != mState)
+		{
+			if (state)
+			{
+				mSession.start();
+				mSessionCount++;
+			}
+			else
+			{
+				mSession.stopAndSave();
+			}
+		}
+		mState = state;
+	}
+
+	void ApexCudaProfileManager::nextFrame()
+	{
+		if (mApexScene && mState)
+		{
+			mSession.nextFrame();
+		}
+	}
+}
+} // namespace nvidia::apex
+
+#endif
author	git perforce import user <a@b>	2016-10-25 12:29:14 -0600
committer	Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>	2016-10-25 18:56:37 -0500
commit	3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
tree	fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/common/src/ApexCudaProfile.cpp
download	physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip