// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of NVIDIA CORPORATION nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Copyright (c) 2008-2018 NVIDIA Corporation. All rights reserved. #include #include #include #include #include #include #include #include #include #if defined(RENDERER_ENABLE_CUDA_INTEROP) #include #include // pre-compiled fatbin #if defined(_M_X64) #include "cuda/RenderClothNormals_x64.cuh" #else #include "cuda/RenderClothNormals_x86.cuh" #endif namespace { CUmodule gClothRenderModule; CUfunction gClothRenderFunction; void checkSuccess(CUresult r) { PX_ASSERT(r == CUDA_SUCCESS); } } #endif using namespace SampleRenderer; RendererClothShape::RendererClothShape( Renderer& renderer, const PxVec3* verts, PxU32 numVerts, const PxVec3* normals, const PxReal* uvs, const PxU16* faces, PxU32 numFaces, bool flipWinding, PxCudaContextManager* ctxMgr) : RendererShape(renderer), m_numFaces(numFaces), m_ctxMgr(ctxMgr) { PX_UNUSED(m_numFaces); { RendererVertexBufferDesc vbdesc; vbdesc.hint = RendererVertexBuffer::HINT_STATIC; vbdesc.semanticFormats[RendererVertexBuffer::SEMANTIC_TEXCOORD0] = RendererVertexBuffer::FORMAT_FLOAT2; vbdesc.maxVertices = numVerts; m_vertexBuffer[STATIC_VB] = m_renderer.createVertexBuffer(vbdesc); RENDERER_ASSERT(m_vertexBuffer[STATIC_VB], "Failed to create Vertex Buffer."); } { RendererVertexBufferDesc vbdesc; vbdesc.hint = RendererVertexBuffer::HINT_DYNAMIC; vbdesc.semanticFormats[RendererVertexBuffer::SEMANTIC_POSITION] = RendererVertexBuffer::FORMAT_FLOAT3; vbdesc.semanticFormats[RendererVertexBuffer::SEMANTIC_NORMAL] = RendererVertexBuffer::FORMAT_FLOAT3; vbdesc.maxVertices = numVerts; #if defined(RENDERER_ENABLE_CUDA_INTEROP) if (ctxMgr) { vbdesc.registerInCUDA = true; vbdesc.interopContext = ctxMgr; ctxMgr->acquireContext(); // find module containing our kernel CUresult success; success = cuModuleLoadDataEx(&gClothRenderModule, computeSmoothNormals, 0, NULL, NULL); RENDERER_ASSERT(success == CUDA_SUCCESS, "Could not load cloth render CUDA module."); success = cuModuleGetFunction(&gClothRenderFunction, gClothRenderModule, "computeSmoothNormals"); RENDERER_ASSERT(success == CUDA_SUCCESS, "Could not find cloth render CUDA function."); ctxMgr->releaseContext(); } #endif //RENDERER_ENABLE_CUDA_INTEROP m_vertexBuffer[DYNAMIC_VB] = m_renderer.createVertexBuffer(vbdesc); RENDERER_ASSERT(m_vertexBuffer[DYNAMIC_VB], "Failed to create Vertex Buffer."); } if(m_vertexBuffer[DYNAMIC_VB] && m_vertexBuffer[STATIC_VB]) { PxU32 positionStride = 0; void* vertPositions = m_vertexBuffer[DYNAMIC_VB]->lockSemantic(RendererVertexBuffer::SEMANTIC_POSITION, positionStride); PxU32 normalStride = 0; void* vertNormals = m_vertexBuffer[DYNAMIC_VB]->lockSemantic(RendererVertexBuffer::SEMANTIC_NORMAL, normalStride); PxU32 uvStride = 0; void* vertUVs = m_vertexBuffer[STATIC_VB]->lockSemantic(RendererVertexBuffer::SEMANTIC_TEXCOORD0, uvStride); if(vertPositions && vertNormals && vertUVs) { for(PxU32 i=0; iunlockSemantic(RendererVertexBuffer::SEMANTIC_NORMAL); m_vertexBuffer[DYNAMIC_VB]->unlockSemantic(RendererVertexBuffer::SEMANTIC_POSITION); m_vertexBuffer[STATIC_VB]->unlockSemantic(RendererVertexBuffer::SEMANTIC_TEXCOORD0); } const PxU32 numIndices = numFaces*3; RendererIndexBufferDesc ibdesc; ibdesc.hint = RendererIndexBuffer::HINT_STATIC; ibdesc.format = RendererIndexBuffer::FORMAT_UINT16; ibdesc.maxIndices = numIndices; #if defined(RENDERER_ENABLE_CUDA_INTEROP) if (ctxMgr) { ibdesc.registerInCUDA = true; ibdesc.interopContext = ctxMgr; ibdesc.hint = RendererIndexBuffer::HINT_DYNAMIC; } #endif // RENDERER_ENABLE_CUDA_INTEROP m_indexBuffer = m_renderer.createIndexBuffer(ibdesc); RENDERER_ASSERT(m_indexBuffer, "Failed to create Index Buffer."); if(m_indexBuffer) { PxU16* indices = (PxU16*)m_indexBuffer->lock(); if(indices) { if(flipWinding) { for(PxU32 i=0;iunlock(); } if(m_vertexBuffer[DYNAMIC_VB] && m_vertexBuffer[STATIC_VB] && m_indexBuffer) { RendererMeshDesc meshdesc; meshdesc.primitives = RendererMesh::PRIMITIVE_TRIANGLES; meshdesc.vertexBuffers = m_vertexBuffer; meshdesc.numVertexBuffers = NUM_VERTEX_BUFFERS; meshdesc.firstVertex = 0; meshdesc.numVertices = numVerts; meshdesc.indexBuffer = m_indexBuffer; meshdesc.firstIndex = 0; meshdesc.numIndices = numIndices; m_mesh = m_renderer.createMesh(meshdesc); RENDERER_ASSERT(m_mesh, "Failed to create Mesh."); } } namespace { #if defined(RENDERER_ENABLE_CUDA_INTEROP) PX_INLINE void cuuAlignUp(int* offset, int alignment) { *offset = ((*offset) + (alignment) - 1) & ~((alignment) - 1); } template CUresult cuuParamSet(CUfunction hf, int* offset, const T2& var) { cuuAlignUp(offset, __alignof(T)); CUresult err = cuParamSetv(hf, *offset, (void*)&var, sizeof(T)); *offset += sizeof(T); return err; } #endif // RENDERER_ENABLE_CUDA_INTEROP } // anonymous namespace bool RendererClothShape::update(CUdeviceptr srcVerts, PxU32 numVerts) { #if defined(RENDERER_ENABLE_CUDA_INTEROP) // note: CUDA context must be acquired, and buffers mapped at this point PX_ASSERT(m_ctxMgr); PX_ASSERT(srcVerts); // get vertex buffer CUgraphicsResource vbRes; CUdeviceptr destVerts; size_t destSize; m_vertexBuffer[DYNAMIC_VB]->getInteropResourceHandle(vbRes); cuGraphicsResourceGetMappedPointer(&destVerts, &destSize, vbRes); // get index buffer CUgraphicsResource ibRes; CUdeviceptr srcIndices; size_t indicesSize; m_indexBuffer->getInteropResourceHandle(ibRes); cuGraphicsResourceGetMappedPointer(&srcIndices, &indicesSize, ibRes); // if either buffers could not be mapped revert to CPU if (!destVerts || !srcIndices) return false; const size_t ptrSize = sizeof(CUdeviceptr); int offset = 0; checkSuccess(cuuParamSet(gClothRenderFunction, &offset, srcVerts)); checkSuccess(cuuParamSet(gClothRenderFunction, &offset, srcIndices)); checkSuccess(cuuParamSet(gClothRenderFunction, &offset, destVerts)); checkSuccess(cuuParamSet(gClothRenderFunction, &offset, m_numFaces)); checkSuccess(cuuParamSet(gClothRenderFunction, &offset, numVerts)); checkSuccess(cuParamSetSize(gClothRenderFunction, offset)); const PxU32 blockSize = 512; const PxU32 numBlocks = 1; checkSuccess(cuFuncSetBlockShape(gClothRenderFunction, blockSize, 1, 1)); checkSuccess(cuLaunchGridAsync(gClothRenderFunction, numBlocks, 1, 0)); m_mesh->setVertexBufferRange(0, numVerts); #else PX_ASSERT(0); #endif return true; } void RendererClothShape::mapShapes(RendererClothShape** shapes, PxU32 n) { PX_UNUSED(shapes); PX_UNUSED(n); #if defined(RENDERER_ENABLE_CUDA_INTEROP) if (n == 0) return; PX_PROFILE_ZONE("RendererClothShape::mapGraphicsResource",0); std::vector resources; for (PxU32 i=0; i < n; ++i) { CUgraphicsResource vbRes, ibRes; shapes[i]->m_vertexBuffer[DYNAMIC_VB]->getInteropResourceHandle(vbRes); shapes[i]->m_indexBuffer->getInteropResourceHandle(ibRes); resources.push_back(vbRes); resources.push_back(ibRes); } checkSuccess(cuGraphicsMapResources(unsigned int(resources.size()), &resources[0], 0)); #else PX_ASSERT(0); #endif } void RendererClothShape::unmapShapes(RendererClothShape** shapes, PxU32 n) { PX_UNUSED(shapes); PX_UNUSED(n); #if defined(RENDERER_ENABLE_CUDA_INTEROP) if (n == 0) return; PX_PROFILE_ZONE("RendererClothShape::unmapGraphicsResource",0); std::vector resources; for (PxU32 i=0; i < n; ++i) { CUgraphicsResource vbRes, ibRes; shapes[i]->m_vertexBuffer[DYNAMIC_VB]->getInteropResourceHandle(vbRes); shapes[i]->m_indexBuffer->getInteropResourceHandle(ibRes); resources.push_back(vbRes); resources.push_back(ibRes); } checkSuccess(cuGraphicsUnmapResources(unsigned int(resources.size()), &resources[0], 0)); #else PX_ASSERT(0); #endif } void RendererClothShape::update(const PxVec3* verts, PxU32 numVerts, const PxVec3* normals) { PxU32 positionStride = 0, normalStride = 0; PxU8 *locked_positions = static_cast(m_vertexBuffer[DYNAMIC_VB]->lockSemantic( RendererVertexBuffer::SEMANTIC_POSITION, positionStride)); PxU8 *locked_normals = static_cast(m_vertexBuffer[DYNAMIC_VB]->lockSemantic( RendererVertexBuffer::SEMANTIC_NORMAL, normalStride)); for(PxU32 i=0; iunlockSemantic(SampleRenderer::RendererVertexBuffer::SEMANTIC_NORMAL); m_vertexBuffer[DYNAMIC_VB]->unlockSemantic(SampleRenderer::RendererVertexBuffer::SEMANTIC_POSITION); m_mesh->setVertexBufferRange(0, numVerts); } RendererClothShape::~RendererClothShape(void) { for(PxU32 i = 0; i < NUM_VERTEX_BUFFERS; i++ ) { SAFE_RELEASE(m_vertexBuffer[i]); } SAFE_RELEASE(m_indexBuffer); SAFE_RELEASE(m_mesh); }