aboutsummaryrefslogtreecommitdiff
path: root/APEX_1.4/common/include/ApexCudaTest.h
diff options
context:
space:
mode:
authorgit perforce import user <a@b>2016-10-25 12:29:14 -0600
committerSheikh Dawood Abdul Ajees <Sheikh Dawood Abdul Ajees>2016-10-25 18:56:37 -0500
commit3dfe2108cfab31ba3ee5527e217d0d8e99a51162 (patch)
treefa6485c169e50d7415a651bf838f5bcd0fd3bfbd /APEX_1.4/common/include/ApexCudaTest.h
downloadphysx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.tar.xz
physx-3.4-3dfe2108cfab31ba3ee5527e217d0d8e99a51162.zip
Initial commit:
PhysX 3.4.0 Update @ 21294896 APEX 1.4.0 Update @ 21275617 [CL 21300167]
Diffstat (limited to 'APEX_1.4/common/include/ApexCudaTest.h')
-rw-r--r--APEX_1.4/common/include/ApexCudaTest.h363
1 files changed, 363 insertions, 0 deletions
diff --git a/APEX_1.4/common/include/ApexCudaTest.h b/APEX_1.4/common/include/ApexCudaTest.h
new file mode 100644
index 00000000..43a0c6ea
--- /dev/null
+++ b/APEX_1.4/common/include/ApexCudaTest.h
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto. Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+
+
+#ifndef __APEX_CUDA_TEST__
+#define __APEX_CUDA_TEST__
+
+#include "ApexDefs.h"
+#include "CudaTestManager.h"
+
+#include "PsMemoryBuffer.h"
+#include "ApexString.h"
+#include "ApexMirroredArray.h"
+#include "SceneIntl.h"
+
+#include "ApexCudaDefs.h"
+
+namespace nvidia
+{
+namespace apex
+{
+
+struct ApexCudaMemRefBase;
+class ApexCudaObj;
+class ApexCudaFunc;
+struct ApexCudaFuncParams;
+class ApexCudaTexRef;
+class ApexCudaSurfRef;
+
+const uint32_t ApexCudaTestFileVersion = 103;
+
+namespace apexCudaTest
+{
+
+struct MemRef
+{
+ ApexSimpleString name;
+ const void* gpuPtr;
+ size_t size;
+ int32_t dataOffset;
+ uint32_t bufferOffset;
+ uint32_t fpType; // Floating point type, if 0 - not a float, else if 4 - float, else if 8 - double
+
+ MemRef(const void* gpuPtr, size_t size, int32_t dataOffset, uint32_t bufferOffset, uint32_t fpType = 0)
+ : gpuPtr(gpuPtr), size(size), dataOffset(dataOffset), bufferOffset(bufferOffset), fpType(fpType) {}
+};
+
+enum ObjTypeEnum
+{
+ OBJ_TYPE_NONE = 0,
+ OBJ_TYPE_TEX_REF_MEM = 1,
+ OBJ_TYPE_CONST_MEM = 2,
+ OBJ_TYPE_SURF_REF = 4,
+ OBJ_TYPE_TEX_REF_ARR = 5
+};
+
+enum KernelTypeEnum
+{
+ KT_SYNC,
+ KT_FREE,
+ KT_FREE2D,
+ KT_FREE3D,
+ KT_BOUND
+};
+
+}
+
+/** Read cuda kernel context from specified file. Run kernel ant compare output with results from file
+*/
+class ApexCudaTestKernelContextReader : public UserAllocated
+{
+ struct Dim3
+ {
+ int x,y,z;
+ };
+
+ struct TexRef
+ {
+ ApexCudaTexRef* cudaTexRef;
+ uint32_t memRefIdx;
+ ApexCudaArray* cudaArray;
+ };
+
+ struct SurfRef
+ {
+ ApexCudaSurfRef* cudaSurfRef;
+ ApexCudaArray* cudaArray;
+ ApexCudaMemFlags::Enum flags;
+ };
+
+ struct ArrayRef
+ {
+ ApexSimpleString name;
+ ApexCudaArray* cudaArray;
+ const uint8_t* bufferPtr;
+ uint32_t size;
+
+ ArrayRef(const char* name, ApexCudaArray* cudaArray, const uint8_t* bufferPtr, uint32_t size)
+ {
+ this->name = name;
+ this->cudaArray = cudaArray;
+ this->bufferPtr = bufferPtr;
+ this->size = size;
+ }
+ };
+
+ struct ParamRef
+ {
+ ApexSimpleString name;
+ uint32_t value;
+ };
+
+public:
+ ApexCudaTestKernelContextReader(const char* path, SceneIntl* scene);
+ ~ApexCudaTestKernelContextReader();
+
+ bool runKernel();
+
+private:
+ ApexCudaArray* loadCudaArray();
+
+ void loadContext(ApexCudaFuncParams& params);
+ void loadTexRef(uint32_t& memOffset, bool bBindToArray);
+ void loadSurfRef();
+ void loadConstMem();
+ uint32_t getParamSize();
+ void loadParam(uint32_t& memOffset, ApexCudaFuncParams& params);
+
+ bool compare(const uint8_t* resData, const uint8_t* refData, size_t size, uint32_t fpType, const char* name);
+ void dumpParams(char* str);
+
+ nvidia::PsMemoryBuffer* mMemBuf;
+
+ uint32_t mCudaObjOffset;
+ uint32_t mParamOffset;
+
+ int mCuOffset;
+ void* mCuStream;
+
+ ApexSimpleString mName;
+ ApexSimpleString mModuleName;
+ uint32_t mFrame;
+ uint32_t mCallPerFrame;
+
+ uint32_t mFuncInstId;
+ uint32_t mSharedSize;
+ Dim3 mBlockDim;
+ Dim3 mGridDim;
+ apexCudaTest::KernelTypeEnum mKernelType;
+ uint32_t mThreadCount[3];
+ uint32_t mBlockCountY;
+
+ ApexCudaObj* mHeadCudaObj;
+ ApexCudaFunc* mFunc;
+
+ SceneIntl* mApexScene;
+ Array <uint8_t*> mArgSeq;
+ ApexMirroredArray <uint8_t> mTmpArray;
+ PxGpuCopyDescQueue mCopyQueue;
+
+ Array <apexCudaTest::MemRef> mInMemRefs;
+ Array <apexCudaTest::MemRef> mOutMemRefs;
+ Array <ArrayRef> mInArrayRefs;
+ Array <ArrayRef> mOutArrayRefs;
+
+ Array <TexRef> mTexRefs;
+ Array <SurfRef> mSurfRefs;
+
+ uint32_t mCudaArrayCount;
+ ApexCudaArray* mCudaArrayList;
+
+ Array <ParamRef> mParamRefs;
+};
+
+/** Extract context data from CudaModuleScene about cuda kernel and save it to specified file
+*/
+class ApexCudaTestKernelContext : public UserAllocated
+{
+ struct ArrayRef
+ {
+ CUarray cuArray;
+ uint32_t bufferOffset;
+
+ ArrayRef(CUarray cuArray, uint32_t bufferOffset)
+ {
+ this->cuArray = cuArray;
+ this->bufferOffset = bufferOffset;
+ }
+ };
+
+public:
+ ApexCudaTestKernelContext(const char* path, const char* functionName, const char* moduleName, uint32_t frame, uint32_t callPerFrame, bool isWriteForNonSuccessfulKernel, bool isKernelForSave);
+ ~ApexCudaTestKernelContext();
+
+ bool saveToFile();
+
+ PX_INLINE void setCuStream(void* cuStream)
+ {
+ mCuStream = cuStream;
+ }
+
+ void startObjList();
+ void finishObjList();
+
+ void setFreeKernel(uint32_t threadCount);
+ void setFreeKernel(uint32_t threadCountX, uint32_t threadCountY);
+ void setFreeKernel(uint32_t threadCountX, uint32_t threadCountY, uint32_t threadCountZ, uint32_t blockCountY);
+ void setBoundKernel(uint32_t threadCount);
+ void setSyncKernel();
+
+ void setBlockDim(uint32_t x, uint32_t y, uint32_t z);
+ void setGridDim(uint32_t x, uint32_t y);
+
+ void setSharedSize(uint32_t size);
+ void setFuncInstId(int id);
+
+ void addParam(const char* name, uint32_t align, const void *val, size_t size, int isMemRef = 0, int dataOffset = 0, uint32_t fpType = 0);
+ void addTexRef(const char* name, const void* mem, size_t size, CUarray arr);
+ void addSurfRef(const char* name, CUarray arr, ApexCudaMemFlags::Enum flags);
+ void addConstMem(const char* name, const void* mem, size_t size);
+ void setKernelStatus();
+
+private:
+ void copyMemRefs();
+ void copyArrayRefs();
+
+ uint32_t addCuArray(CUarray cuArray);
+
+ void completeCudaObjsBlock();
+ void completeCallParamsBlock();
+
+ PX_INLINE uint32_t advanceMemBuf(uint32_t size, uint32_t align = 4);
+ PX_INLINE void copyToMemBuf(const apexCudaTest::MemRef& memRef);
+ PX_INLINE void copyToMemBuf(const ArrayRef& arrayRef);
+
+ void* mCuStream;
+
+ uint32_t mVersion;
+ uint32_t mFrame;
+ uint32_t mCallPerFrame;
+ ApexSimpleString mName;
+ ApexSimpleString mErrorCode;
+ ApexSimpleString mModuleName;
+ ApexSimpleString mPath;
+ nvidia::PsMemoryBuffer mMemBuf;
+
+ uint32_t mCudaObjsOffset;
+ uint32_t mCallParamsOffset;
+
+ uint32_t mCudaObjsCounter;
+ uint32_t mCallParamsCounter;
+
+ Array <ArrayRef> mArrayRefs;
+ Array <apexCudaTest::MemRef> mMemRefs;
+
+ bool mIsCompleteContext;
+ bool mIsWriteForNonSuccessfulKernel;
+ bool mIsContextForSave;
+};
+
+
+/** Class get information what kernels should be tested and give directive for creation ApexCudaTestContext
+ */
+class ApexCudaTestManager : public CudaTestManager, public UserAllocated
+{
+ struct KernelInfo
+ {
+ ApexSimpleString functionName;
+ ApexSimpleString moduleName;
+ uint32_t callCount;
+
+ KernelInfo(const char* functionName, const char* moduleName)
+ : functionName(functionName), moduleName(moduleName), callCount(0) {}
+
+ bool operator!= (const KernelInfo& ki)
+ {
+ return this->functionName != ki.functionName || this->moduleName != ki.moduleName;
+ }
+ };
+
+public:
+
+ ApexCudaTestManager();
+ virtual ~ApexCudaTestManager();
+
+ PX_INLINE void setInternalApexScene(SceneIntl* scene)
+ {
+ mApexScene = scene;
+ }
+ void nextFrame();
+ ApexCudaTestKernelContext* isTestKernel(const char* functionName, const char* moduleName);
+
+ // interface for CudaTestManager
+ PX_INLINE void setWritePath(const char* path)
+ {
+ mPath = ApexSimpleString(path);
+ }
+ void setWriteForFunction(const char* functionName, const char* moduleName);
+
+ PX_INLINE void setMaxSamples(uint32_t maxSamples)
+ {
+ mMaxSamples = maxSamples;
+ }
+ void setFrames(uint32_t numFrames, const uint32_t* frames)
+ {
+ for(uint32_t i = 0; i < numFrames && mSampledFrames.size() < mMaxSamples; i++)
+ {
+ if (frames == NULL) // write next numFrames frames after current
+ {
+ mSampledFrames.pushBack(mCurrentFrame + i + 1);
+ }
+ else
+ {
+ mSampledFrames.pushBack(frames[i]);
+ }
+ }
+ }
+ void setFramePeriod(uint32_t period)
+ {
+ mFramePeriod = period;
+ }
+ void setCallPerFrameMaxCount(uint32_t cpfMaxCount)
+ {
+ mCallPerFrameMaxCount = cpfMaxCount;
+ }
+ void setWriteForNotSuccessfulKernel(bool flag)
+ {
+ mIsWriteForNonSuccessfulKernel = flag;
+ }
+/* void setCallPerFrameSeries(uint32_t callsCount, const uint32_t* calls)
+ {
+ for(uint32_t i = 0; i < callsCount && mSampledCallsPerFrame.size() < mCallPerFrameMaxCount; i++)
+ {
+ mSampledCallsPerFrame.pushBack(calls[i]);
+ }
+ }*/
+ bool runKernel(const char* path);
+
+private:
+ SceneIntl* mApexScene;
+ uint32_t mCurrentFrame;
+ uint32_t mMaxSamples;
+ uint32_t mFramePeriod;
+ uint32_t mCallPerFrameMaxCount;
+ bool mIsWriteForNonSuccessfulKernel;
+ ApexSimpleString mPath;
+ Array <uint32_t> mSampledFrames;
+ //Array <uint32_t> mSampledCallsPerFrame;
+ Array <KernelInfo> mKernels;
+ Array <ApexCudaTestKernelContext*> mContexts;
+};
+
+}
+} // namespace nvidia::apex
+
+#endif // __APEX_CUDA_TEST__