// This code contains NVIDIA Confidential Information and is disclosed to you // under a form of NVIDIA software license agreement provided separately to you. // // Notice // NVIDIA Corporation and its licensors retain all intellectual property and // proprietary rights in and to this software and related documentation and // any modifications thereto. Any use, reproduction, disclosure, or // distribution of this software and related documentation without an express // license agreement from NVIDIA Corporation is strictly prohibited. // // ALL NVIDIA DESIGN SPECIFICATIONS, CODE ARE PROVIDED "AS IS.". NVIDIA MAKES // NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO // THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, // MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. // // Information and code furnished is believed to be accurate and reliable. // However, NVIDIA Corporation assumes no responsibility for the consequences of use of such // information or for any infringement of patents or other rights of third parties that may // result from its use. No license is granted by implication or otherwise under any patent // or patent rights of NVIDIA Corporation. Details are subject to change without notice. // This code supersedes and replaces all information previously supplied. // NVIDIA Corporation products are not authorized for use as critical // components in life support devices or systems without express written approval of // NVIDIA Corporation. // // Copyright (c) 2008-2018 NVIDIA Corporation. All rights reserved. #include #include #include #include #include #include #include #include #include #if defined(RENDERER_ENABLE_CUDA_INTEROP) #include #include // pre-compiled fatbin #if defined(_M_X64) #include "cuda/RenderClothNormals_x64.cuh" #else #include "cuda/RenderClothNormals_x86.cuh" #endif namespace { CUmodule gClothRenderModule; CUfunction gClothRenderFunction; void checkSuccess(CUresult r) { PX_ASSERT(r == CUDA_SUCCESS); } } #endif using namespace SampleRenderer; RendererClothShape::RendererClothShape( Renderer& renderer, const PxVec3* verts, PxU32 numVerts, const PxVec3* normals, const PxReal* uvs, const PxU16* faces, PxU32 numFaces, bool flipWinding, PxCudaContextManager* ctxMgr) : RendererShape(renderer), m_numFaces(numFaces), m_ctxMgr(ctxMgr) { PX_UNUSED(m_numFaces); { RendererVertexBufferDesc vbdesc; vbdesc.hint = RendererVertexBuffer::HINT_STATIC; vbdesc.semanticFormats[RendererVertexBuffer::SEMANTIC_TEXCOORD0] = RendererVertexBuffer::FORMAT_FLOAT2; vbdesc.maxVertices = numVerts; m_vertexBuffer[STATIC_VB] = m_renderer.createVertexBuffer(vbdesc); RENDERER_ASSERT(m_vertexBuffer[STATIC_VB], "Failed to create Vertex Buffer."); } { RendererVertexBufferDesc vbdesc; vbdesc.hint = RendererVertexBuffer::HINT_DYNAMIC; vbdesc.semanticFormats[RendererVertexBuffer::SEMANTIC_POSITION] = RendererVertexBuffer::FORMAT_FLOAT3; vbdesc.semanticFormats[RendererVertexBuffer::SEMANTIC_NORMAL] = RendererVertexBuffer::FORMAT_FLOAT3; vbdesc.maxVertices = numVerts; #if defined(RENDERER_ENABLE_CUDA_INTEROP) if (ctxMgr) { vbdesc.registerInCUDA = true; vbdesc.interopContext = ctxMgr; ctxMgr->acquireContext(); // find module containing our kernel CUresult success; success = cuModuleLoadDataEx(&gClothRenderModule, computeSmoothNormals, 0, NULL, NULL); RENDERER_ASSERT(success == CUDA_SUCCESS, "Could not load cloth render CUDA module."); success = cuModuleGetFunction(&gClothRenderFunction, gClothRenderModule, "computeSmoothNormals"); RENDERER_ASSERT(success == CUDA_SUCCESS, "Could not find cloth render CUDA function."); ctxMgr->releaseContext(); } #endif //RENDERER_ENABLE_CUDA_INTEROP m_vertexBuffer[DYNAMIC_VB] = m_renderer.createVertexBuffer(vbdesc); RENDERER_ASSERT(m_vertexBuffer[DYNAMIC_VB], "Failed to create Vertex Buffer."); } if(m_vertexBuffer[DYNAMIC_VB] && m_vertexBuffer[STATIC_VB]) { PxU32 positionStride = 0; void* vertPositions = m_vertexBuffer[DYNAMIC_VB]->lockSemantic(RendererVertexBuffer::SEMANTIC_POSITION, positionStride); PxU32 normalStride = 0; void* vertNormals = m_vertexBuffer[DYNAMIC_VB]->lockSemantic(RendererVertexBuffer::SEMANTIC_NORMAL, normalStride); PxU32 uvStride = 0; void* vertUVs = m_vertexBuffer[STATIC_VB]->lockSemantic(RendererVertexBuffer::SEMANTIC_TEXCOORD0, uvStride); if(vertPositions && vertNormals && vertUVs) { for(PxU32 i=0; iunlockSemantic(RendererVertexBuffer::SEMANTIC_NORMAL); m_vertexBuffer[DYNAMIC_VB]->unlockSemantic(RendererVertexBuffer::SEMANTIC_POSITION); m_vertexBuffer[STATIC_VB]->unlockSemantic(RendererVertexBuffer::SEMANTIC_TEXCOORD0); } const PxU32 numIndices = numFaces*3; RendererIndexBufferDesc ibdesc; ibdesc.hint = RendererIndexBuffer::HINT_STATIC; ibdesc.format = RendererIndexBuffer::FORMAT_UINT16; ibdesc.maxIndices = numIndices; #if defined(RENDERER_ENABLE_CUDA_INTEROP) if (ctxMgr) { ibdesc.registerInCUDA = true; ibdesc.interopContext = ctxMgr; ibdesc.hint = RendererIndexBuffer::HINT_DYNAMIC; } #endif // RENDERER_ENABLE_CUDA_INTEROP m_indexBuffer = m_renderer.createIndexBuffer(ibdesc); RENDERER_ASSERT(m_indexBuffer, "Failed to create Index Buffer."); if(m_indexBuffer) { PxU16* indices = (PxU16*)m_indexBuffer->lock(); if(indices) { if(flipWinding) { for(PxU32 i=0;iunlock(); } if(m_vertexBuffer[DYNAMIC_VB] && m_vertexBuffer[STATIC_VB] && m_indexBuffer) { RendererMeshDesc meshdesc; meshdesc.primitives = RendererMesh::PRIMITIVE_TRIANGLES; meshdesc.vertexBuffers = m_vertexBuffer; meshdesc.numVertexBuffers = NUM_VERTEX_BUFFERS; meshdesc.firstVertex = 0; meshdesc.numVertices = numVerts; meshdesc.indexBuffer = m_indexBuffer; meshdesc.firstIndex = 0; meshdesc.numIndices = numIndices; m_mesh = m_renderer.createMesh(meshdesc); RENDERER_ASSERT(m_mesh, "Failed to create Mesh."); } } namespace { #if defined(RENDERER_ENABLE_CUDA_INTEROP) PX_INLINE void cuuAlignUp(int* offset, int alignment) { *offset = ((*offset) + (alignment) - 1) & ~((alignment) - 1); } template CUresult cuuParamSet(CUfunction hf, int* offset, const T2& var) { cuuAlignUp(offset, __alignof(T)); CUresult err = cuParamSetv(hf, *offset, (void*)&var, sizeof(T)); *offset += sizeof(T); return err; } #endif // RENDERER_ENABLE_CUDA_INTEROP } // anonymous namespace bool RendererClothShape::update(CUdeviceptr srcVerts, PxU32 numVerts) { #if defined(RENDERER_ENABLE_CUDA_INTEROP) // note: CUDA context must be acquired, and buffers mapped at this point PX_ASSERT(m_ctxMgr); PX_ASSERT(srcVerts); // get vertex buffer CUgraphicsResource vbRes; CUdeviceptr destVerts; size_t destSize; m_vertexBuffer[DYNAMIC_VB]->getInteropResourceHandle(vbRes); cuGraphicsResourceGetMappedPointer(&destVerts, &destSize, vbRes); // get index buffer CUgraphicsResource ibRes; CUdeviceptr srcIndices; size_t indicesSize; m_indexBuffer->getInteropResourceHandle(ibRes); cuGraphicsResourceGetMappedPointer(&srcIndices, &indicesSize, ibRes); // if either buffers could not be mapped revert to CPU if (!destVerts || !srcIndices) return false; const size_t ptrSize = sizeof(CUdeviceptr); int offset = 0; checkSuccess(cuuParamSet(gClothRenderFunction, &offset, srcVerts)); checkSuccess(cuuParamSet(gClothRenderFunction, &offset, srcIndices)); checkSuccess(cuuParamSet(gClothRenderFunction, &offset, destVerts)); checkSuccess(cuuParamSet(gClothRenderFunction, &offset, m_numFaces)); checkSuccess(cuuParamSet(gClothRenderFunction, &offset, numVerts)); checkSuccess(cuParamSetSize(gClothRenderFunction, offset)); const PxU32 blockSize = 512; const PxU32 numBlocks = 1; checkSuccess(cuFuncSetBlockShape(gClothRenderFunction, blockSize, 1, 1)); checkSuccess(cuLaunchGridAsync(gClothRenderFunction, numBlocks, 1, 0)); m_mesh->setVertexBufferRange(0, numVerts); #else PX_ASSERT(0); #endif return true; } void RendererClothShape::mapShapes(RendererClothShape** shapes, PxU32 n) { PX_UNUSED(shapes); PX_UNUSED(n); #if defined(RENDERER_ENABLE_CUDA_INTEROP) if (n == 0) return; PX_PROFILE_ZONE("RendererClothShape::mapGraphicsResource",0); std::vector resources; for (PxU32 i=0; i < n; ++i) { CUgraphicsResource vbRes, ibRes; shapes[i]->m_vertexBuffer[DYNAMIC_VB]->getInteropResourceHandle(vbRes); shapes[i]->m_indexBuffer->getInteropResourceHandle(ibRes); resources.push_back(vbRes); resources.push_back(ibRes); } checkSuccess(cuGraphicsMapResources(unsigned int(resources.size()), &resources[0], 0)); #else PX_ASSERT(0); #endif } void RendererClothShape::unmapShapes(RendererClothShape** shapes, PxU32 n) { PX_UNUSED(shapes); PX_UNUSED(n); #if defined(RENDERER_ENABLE_CUDA_INTEROP) if (n == 0) return; PX_PROFILE_ZONE("RendererClothShape::unmapGraphicsResource",0); std::vector resources; for (PxU32 i=0; i < n; ++i) { CUgraphicsResource vbRes, ibRes; shapes[i]->m_vertexBuffer[DYNAMIC_VB]->getInteropResourceHandle(vbRes); shapes[i]->m_indexBuffer->getInteropResourceHandle(ibRes); resources.push_back(vbRes); resources.push_back(ibRes); } checkSuccess(cuGraphicsUnmapResources(unsigned int(resources.size()), &resources[0], 0)); #else PX_ASSERT(0); #endif } void RendererClothShape::update(const PxVec3* verts, PxU32 numVerts, const PxVec3* normals) { PxU32 positionStride = 0, normalStride = 0; PxU8 *locked_positions = static_cast(m_vertexBuffer[DYNAMIC_VB]->lockSemantic( RendererVertexBuffer::SEMANTIC_POSITION, positionStride)); PxU8 *locked_normals = static_cast(m_vertexBuffer[DYNAMIC_VB]->lockSemantic( RendererVertexBuffer::SEMANTIC_NORMAL, normalStride)); for(PxU32 i=0; iunlockSemantic(SampleRenderer::RendererVertexBuffer::SEMANTIC_NORMAL); m_vertexBuffer[DYNAMIC_VB]->unlockSemantic(SampleRenderer::RendererVertexBuffer::SEMANTIC_POSITION); m_mesh->setVertexBufferRange(0, numVerts); } RendererClothShape::~RendererClothShape(void) { for(PxU32 i = 0; i < NUM_VERTEX_BUFFERS; i++ ) { SAFE_RELEASE(m_vertexBuffer[i]); } SAFE_RELEASE(m_indexBuffer); SAFE_RELEASE(m_mesh); }