diff options
| author | git perforce import user <a@b> | 2016-10-25 12:29:14 -0600 |
|---|---|---|
| committer | Sheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees> | 2016-10-25 18:56:37 -0500 |
| commit | 3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch) | |
| tree | fa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/common/src/ApexCudaProfile.cpp | |
| download | physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip | |
Initial commit:
PhysX 3.4.0 Update @ 21294896
APEX 1.4.0 Update @ 21275617
[CL 21300167]
Diffstat (limited to 'APEX_1.4/common/src/ApexCudaProfile.cpp')
| -rw-r--r-- | APEX_1.4/common/src/ApexCudaProfile.cpp | 332 |
1 files changed, 332 insertions, 0 deletions
diff --git a/APEX_1.4/common/src/ApexCudaProfile.cpp b/APEX_1.4/common/src/ApexCudaProfile.cpp new file mode 100644 index 00000000..cee4cfc2 --- /dev/null +++ b/APEX_1.4/common/src/ApexCudaProfile.cpp @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ + + +#include "ApexDefs.h" +#if APEX_CUDA_SUPPORT && !defined(INSTALLER) + +#include "ApexCudaProfile.h" +#include "ApexCudaWrapper.h" +#include <cuda.h> +#include "ModuleIntl.h" +#include "ApexSDKHelpers.h" + +namespace nvidia +{ +namespace apex +{ + + ApexCudaProfileSession::ApexCudaProfileSession() + : mTimer(NULL) + , mFrameStart(PX_MAX_F32) + , mFrameFinish(0.f) + { + mMemBuf.setEndianMode(nvidia::PsMemoryBuffer::ENDIAN_LITTLE); + } + ApexCudaProfileSession::~ApexCudaProfileSession() + { + if (mTimer) + { + CUT_SAFE_CALL(cuEventDestroy((CUevent)mTimer)); + } + } + + void ApexCudaProfileSession::nextFrame() + { + mFrameStart = PX_MAX_F32; + mFrameFinish = 0.f; + float sumElapsed = 0.f; + for (uint32_t i = 0; i < mProfileDataList.size(); i++) + { + sumElapsed += flushProfileInfo(mProfileDataList[i]); + } + + // Write frame as fictive event + uint32_t op = 1, id = 0; + uint64_t start = static_cast<uint64_t>(mFrameStart * mManager->mTimeFormat); + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&start, sizeof(start)); + mMemBuf.write(&id, sizeof(id)); + + op = 2; + uint64_t stop = static_cast<uint64_t>(mFrameFinish * mManager->mTimeFormat); + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&stop, sizeof(stop)); + mMemBuf.write(&id, sizeof(id)); + + // Write summary of elapsed gpu kernel time as event + op = 1, id = 1; + start = static_cast<uint64_t>(mFrameStart * mManager->mTimeFormat); + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&start, sizeof(start)); + mMemBuf.write(&id, sizeof(id)); + + op = 2; + stop = static_cast<uint64_t>((mFrameStart + sumElapsed) * mManager->mTimeFormat); + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&stop, sizeof(stop)); + mMemBuf.write(&id, sizeof(id)); + + mProfileDataList.clear(); + } + + void ApexCudaProfileSession::start() + { + if (!mManager || !mManager->mApexScene) return; + + mLock.lock(); + + mMemBuf.seekWrite(0); + uint32_t op = 0, sz, id = 0; + const char* frameEvent = "Frame"; sz = sizeof(frameEvent); + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&sz, sizeof(sz)); + mMemBuf.write(frameEvent, sz); + mMemBuf.write(&id, sizeof(id)); + + const char* summaryElapsed = "Summary of elapsed time"; sz = sizeof(summaryElapsed); + id = 1; + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&sz, sizeof(sz)); + mMemBuf.write(summaryElapsed, sz); + mMemBuf.write(&id, sizeof(id)); + + //Register kernels + for (uint32_t i = 0; i < mManager->mKernels.size(); i++) + { + ApexCudaProfileManager::KernelInfo& ki = mManager->mKernels[i]; + sz = ki.functionName.size(); + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&sz, sizeof(sz)); + mMemBuf.write(ki.functionName.c_str(), sz); + mMemBuf.write(&ki.id, sizeof(ki.id)); + + ModuleSceneIntl* moduleScene = mManager->mApexScene->getInternalModuleScene(ki.moduleName.c_str()); + ApexCudaObj* obj = NULL; + if (moduleScene) + { + obj = static_cast<ApexCudaObj*>(moduleScene->getHeadCudaObj()); + } + while(obj) + { + if (obj->getType() == ApexCudaObj::FUNCTION) + { + if (ApexSimpleString(DYNAMIC_CAST(ApexCudaFunc*)(obj)->getName()) == ki.functionName) + { + DYNAMIC_CAST(ApexCudaFunc*)(obj)->setProfileSession(this); + break; + } + } + obj = obj->next(); + } + } + + { + PxCudaContextManager* ctx = mManager->mApexScene->getTaskManager()->getGpuDispatcher()->getCudaContextManager(); + PxScopedCudaLock s(*ctx); + + //Run timer + if (mTimer == NULL) + { + CUT_SAFE_CALL(cuEventCreate((CUevent*)&mTimer, CU_EVENT_DEFAULT)); + } + CUT_SAFE_CALL(cuEventRecord((CUevent)mTimer, 0)); + } + mLock.unlock(); + } + + uint32_t ApexCudaProfileSession::getProfileId(const char* name, const char* moduleName) + { + Array <ApexCudaProfileManager::KernelInfo>::Iterator it + = mManager->mKernels.find(ApexCudaProfileManager::KernelInfo(name, moduleName)); + if (it != mManager->mKernels.end()) + { + return it->id; + } + return 0; + } + + void ApexCudaProfileSession::onFuncStart(uint32_t id, void* stream) + { + mLock.lock(); + CUevent start; + CUevent stop; + + CUT_SAFE_CALL(cuEventCreate(&start, CU_EVENT_DEFAULT)); + CUT_SAFE_CALL(cuEventCreate(&stop, CU_EVENT_DEFAULT)); + + CUT_SAFE_CALL(cuEventRecord(start, (CUstream)stream)); + + ProfileData data; + data.id = id; + data.start = start; + data.stop = stop; + mProfileDataList.pushBack(data); + + } + void ApexCudaProfileSession::onFuncFinish(uint32_t id, void* stream) + { + PX_UNUSED(id); + ProfileData& data = mProfileDataList.back(); + PX_ASSERT(data.id == id); + + CUT_SAFE_CALL(cuEventRecord((CUevent)data.stop, (CUstream)stream)); + + mLock.unlock(); + } + + float ApexCudaProfileSession::flushProfileInfo(ProfileData& pd) + { + CUevent start = (CUevent)pd.start; + CUevent stop = (CUevent)pd.stop; + + uint32_t op = 1; + float startTf = 0.f, stopTf = 0.f; + uint64_t startT = 0, stopT = 0; + CUT_SAFE_CALL(cuEventSynchronize(start)); + CUT_SAFE_CALL(cuEventElapsedTime(&startTf, (CUevent)mTimer, start)); + startT = static_cast<uint64_t>(startTf * mManager->mTimeFormat) ; + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&startT, sizeof(startT)); + mMemBuf.write(&pd.id, sizeof(pd.id)); + + op = 2; + CUT_SAFE_CALL(cuEventSynchronize((CUevent)stop)); + CUT_SAFE_CALL(cuEventElapsedTime(&stopTf, (CUevent)mTimer, (CUevent)stop)); + stopT = static_cast<uint64_t>(stopTf * mManager->mTimeFormat); + mMemBuf.write(&op, sizeof(op)); + mMemBuf.write(&stopT, sizeof(stopT)); + mMemBuf.write(&pd.id, sizeof(pd.id)); + + CUT_SAFE_CALL(cuEventDestroy((CUevent)start)); + CUT_SAFE_CALL(cuEventDestroy((CUevent)stop)); + + mFrameStart = PxMin(mFrameStart, startTf); + mFrameFinish = PxMax(mFrameFinish, stopTf); + return stopTf - startTf; + } + + bool ApexCudaProfileSession::stopAndSave() + { + if (!mManager || !mManager->mApexScene) return false; + + //unregister functions + for (uint32_t i = 0; i < mManager->mKernels.size(); i++) + { + ApexCudaProfileManager::KernelInfo& ki = mManager->mKernels[i]; + + ModuleSceneIntl* moduleScene = mManager->mApexScene->getInternalModuleScene(ki.moduleName.c_str()); + ApexCudaObj* obj = NULL; + if (moduleScene) + { + obj = static_cast<ApexCudaObj*>(moduleScene->getHeadCudaObj()); + } + while(obj) + { + if (obj->getType() == ApexCudaObj::FUNCTION) + { + if (ApexSimpleString(DYNAMIC_CAST(ApexCudaFunc*)(obj)->getName()) == ki.functionName) + { + DYNAMIC_CAST(ApexCudaFunc*)(obj)->setProfileSession(NULL); + break; + } + } + obj = obj->next(); + } + } + + //save to file + ApexSimpleString path(mManager->mPath); + path += ApexSimpleString("profileSesion_"); + path += ApexSimpleString(mManager->mSessionCount, 3); + FILE* saveFile = fopen(path.c_str(), "wb"); + if (saveFile) + { + fwrite(mMemBuf.getWriteBuffer(), mMemBuf.getWriteBufferSize(), 1, saveFile); + return !fclose(saveFile); + } + return false; + } + + ApexCudaProfileManager::ApexCudaProfileManager() + : mState(false) + , mTimeFormat(NANOSECOND) + , mSessionCount(0) + , mReservedId(2) + { + mSession.init(this); + } + + ApexCudaProfileManager::~ApexCudaProfileManager() + { + } + + void ApexCudaProfileManager::setKernel(const char* functionName, const char* moduleName) + { + if (mKernels.find(KernelInfo(functionName, moduleName)) == mKernels.end()) + { + if (ApexSimpleString(functionName) == "*") + { + //Add all function registered in module + ModuleSceneIntl* moduleScene = mApexScene->getInternalModuleScene(moduleName); + ApexCudaObj* obj = NULL; + if (moduleScene) + { + obj = static_cast<ApexCudaObj*>(moduleScene->getHeadCudaObj()); + } + while(obj) + { + if (obj->getType() == ApexCudaObj::FUNCTION) + { + const char* name = DYNAMIC_CAST(ApexCudaFunc*)(obj)->getName(); + if (mKernels.find(KernelInfo(name, moduleName)) == mKernels.end()) + { + mKernels.pushBack(KernelInfo(name, moduleName, mKernels.size() + mReservedId)); + } + } + obj = obj->next(); + } + } + else + { + mKernels.pushBack(KernelInfo(functionName, moduleName, mKernels.size() + mReservedId)); + } + enable(false); + } + } + + void ApexCudaProfileManager::enable(bool state) + { + if (state != mState) + { + if (state) + { + mSession.start(); + mSessionCount++; + } + else + { + mSession.stopAndSave(); + } + } + mState = state; + } + + void ApexCudaProfileManager::nextFrame() + { + if (mApexScene && mState) + { + mSession.nextFrame(); + } + } +} +} // namespace nvidia::apex + +#endif |