diff options
| author | sschirm <[email protected]> | 2016-12-23 14:20:36 +0100 |
|---|---|---|
| committer | sschirm <[email protected]> | 2016-12-23 14:56:17 +0100 |
| commit | ef6937e69e8ee3f409cf9d460d5ad300a65d5924 (patch) | |
| tree | 710426e8daa605551ce3f34b581897011101c30f /APEX_1.4/module/iofx/src/IofxManagerGPU.cpp | |
| parent | Initial commit: (diff) | |
| download | physx-3.4-ef6937e69e8ee3f409cf9d460d5ad300a65d5924.tar.xz physx-3.4-ef6937e69e8ee3f409cf9d460d5ad300a65d5924.zip | |
PhysX 3.4 / APEX 1.4 release candidate @21506124
Diffstat (limited to 'APEX_1.4/module/iofx/src/IofxManagerGPU.cpp')
| -rw-r--r-- | APEX_1.4/module/iofx/src/IofxManagerGPU.cpp | 1319 |
1 files changed, 0 insertions, 1319 deletions
diff --git a/APEX_1.4/module/iofx/src/IofxManagerGPU.cpp b/APEX_1.4/module/iofx/src/IofxManagerGPU.cpp deleted file mode 100644 index 06d1209a..00000000 --- a/APEX_1.4/module/iofx/src/IofxManagerGPU.cpp +++ /dev/null @@ -1,1319 +0,0 @@ -/* - * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - */ - - -#include "Apex.h" -#include "ApexDefs.h" - -#if APEX_CUDA_SUPPORT - -#include "ApexSDKIntl.h" -#include "SceneIntl.h" -#include "ModifierImpl.h" -#include "IofxActor.h" -#include "IofxManagerGPU.h" -#include "IofxAssetImpl.h" -#include "IofxSceneGPU.h" - -#include "ModuleIofxImpl.h" -#include "IofxActorGPU.h" - -#include "PxGpuTask.h" -#include "ApexCutil.h" - -#include "RandStateHelpers.h" - -#include "IofxRenderData.h" - -#define CUDA_OBJ(name) SCENE_CUDA_OBJ(mIofxScene, name) - -namespace nvidia -{ -namespace iofx -{ - -class IofxAssetSceneInstGPU : public IofxAssetSceneInst -{ -public: - IofxAssetSceneInstGPU(IofxAssetImpl* asset, uint32_t semantics, IofxScene* scene) - : IofxAssetSceneInst(asset, semantics) - , _constMemGroup(SCENE_CUDA_OBJ(*scene, modifierStorage)) - { - _totalRandomCount = 0; - - APEX_CUDA_CONST_MEM_GROUP_SCOPE(_constMemGroup) - - _storage_.alloc(_assetParamsHandle); - AssetParams assetParams; - buildModifierList(assetParams.spawnModifierList, _asset->mSpawnModifierStack); - buildModifierList(assetParams.continuousModifierList, _asset->mContinuousModifierStack); - _storage_.update(_assetParamsHandle, assetParams); - } - virtual ~IofxAssetSceneInstGPU() {} - - InplaceHandle<AssetParams> getAssetParamsHandle() const - { - return _assetParamsHandle; - } - -private: - - void buildModifierList(ModifierList& list, const ModifierStack& stack) - { - InplaceStorage& _storage_ = _constMemGroup.getStorage(); - - class Mapper : public ModifierParamsMapperGPU - { - public: - InplaceStorage* storage; - - InplaceHandleBase paramsHandle; - uint32_t paramsRandomCount; - - virtual InplaceStorage& getStorage() - { - return *storage; - } - - virtual void onParams(InplaceHandleBase handle, uint32_t randomCount) - { - paramsHandle = handle; - paramsRandomCount = randomCount; - } - - } mapper; - mapper.storage = &_storage_; - - list.resize(_storage_, stack.size()); - - uint32_t index = 0; - for (ModifierStack::ConstIterator it = stack.begin(); it != stack.end(); ++it) - { - uint32_t type = (*it)->getModifierType(); - //NxU32 usage = (*it)->getModifierUsage(); - //if ((usage & usageStage) == usageStage && (usage & usageClass) == usageClass) - { - const ModifierImpl* modifier = ModifierImpl::castFrom(*it); - modifier->mapParamsGPU(mapper); - - ModifierListElem listElem; - listElem.type = type; - listElem.paramsHandle = mapper.paramsHandle; - list.updateElem(_storage_, listElem, index); - - _totalRandomCount += mapper.paramsRandomCount; - } - ++index; - } - } - - ApexCudaConstMemGroup _constMemGroup; - InplaceHandle<AssetParams> _assetParamsHandle; - uint32_t _totalRandomCount; -}; - -class IofxManagerClientGPU : public IofxManagerClient -{ -public: - IofxManagerClientGPU(IofxAssetSceneInst* assetSceneInst, uint32_t actorClassID, const IofxManagerClientIntl::Params& params, IofxScene* scene) - : IofxManagerClient(assetSceneInst, actorClassID, params) - , _constMemGroup(SCENE_CUDA_OBJ(*scene, modifierStorage)) - { - setParamsGPU(); - } - - InplaceHandle<ClientParams> getClientParamsHandle() const - { - return _clientParamsHandle; - } - - // IofxManagerClientIntl interface - virtual void setParams(const IofxManagerClientIntl::Params& params) - { - IofxManagerClient::setParams(params); - setParamsGPU(); - } - -private: - void setParamsGPU() - { - APEX_CUDA_CONST_MEM_GROUP_SCOPE(_constMemGroup) - - ClientParams clientParams; - if (_clientParamsHandle.allocOrFetch(_storage_, clientParams)) - { - clientParams.assetParamsHandle = static_cast<IofxAssetSceneInstGPU*>(_assetSceneInst)->getAssetParamsHandle(); - } - clientParams.objectScale = _params.objectScale; - _clientParamsHandle.update(_storage_, clientParams); - } - - ApexCudaConstMemGroup _constMemGroup; - InplaceHandle<ClientParams> _clientParamsHandle; -}; - - -IofxManagerClient* IofxManagerGPU::createClient(IofxAssetSceneInst* assetSceneInst, uint32_t actorClassID, const IofxManagerClientIntl::Params& params) -{ - return PX_NEW(IofxManagerClientGPU)(assetSceneInst, actorClassID, params, &mIofxScene); -} - -IofxAssetSceneInst* IofxManagerGPU::createAssetSceneInst(IofxAssetImpl* asset,uint32_t semantics) -{ - return PX_NEW(IofxAssetSceneInstGPU)(asset, semantics, &mIofxScene); -} - -class IofxManagerLaunchTask : public PxGpuTask, public UserAllocated -{ -public: - IofxManagerLaunchTask(IofxManagerGPU* actor) : mActor(actor) {} - const char* getName() const - { - return "IofxManagerLaunchTask"; - } - void run() - { - PX_ALWAYS_ASSERT(); - } - bool launchInstance(CUstream stream, int kernelIndex) - { - return mActor->cudaLaunch(stream, kernelIndex); - } - PxGpuTaskHint::Enum getTaskHint() const - { - return PxGpuTaskHint::Kernel; - } - -protected: - IofxManagerGPU* mActor; -}; - -IofxManagerGPU::IofxManagerGPU(SceneIntl& scene, const IofxManagerDescIntl& desc, IofxManager& mgr, const ApexMirroredPlace::Enum defaultPlace) - : mManager(mgr) - , mIofxScene(*mgr.mIofxScene) - , mCopyQueue(*scene.getTaskManager()->getGpuDispatcher()) - , mDefaultPlace(defaultPlace) - , mCuSpawnScale(scene) - , mCuSpawnSeed(scene) - , mCuBlockPRNGs(scene) - , mCuSortedActorIDs(scene) - , mCuSortedStateIDs(scene) - , mCuSortTempKeys(scene) - , mCuSortTempValues(scene) - , mCuSortTemp(scene) - , mCuMinBounds(scene) - , mCuMaxBounds(scene) - , mCuTempMinBounds(scene) - , mCuTempMaxBounds(scene) - , mCuTempActorIDs(scene) - , mCuActorStart(scene) - , mCuActorEnd(scene) - , mCuActorVisibleEnd(scene) - , mCurSeed(0) - , mTargetBufDevPtr(NULL) - , mCountActorIDs(0) - , mNumberVolumes(0) - , mNumberActorClasses(0) - , mEmptySimulation(false) - , mVolumeConstMemGroup(CUDA_OBJ(migrationStorage)) - , mRemapConstMemGroup(CUDA_OBJ(remapStorage)) - , mModifierConstMemGroup(CUDA_OBJ(modifierStorage)) -{ - mTaskLaunch = PX_NEW(IofxManagerLaunchTask)(this); - - const uint32_t maxObjectCount = desc.maxObjectCount; - const uint32_t maxInStateCount = desc.maxInStateCount; - uint32_t usageClass = 0; - uint32_t blockSize = MAX_THREADS_PER_BLOCK; - - if (mManager.mIsMesh) - { - usageClass = ModifierUsage_Mesh; - //blockSize = CUDA_OBJ(meshModifiersKernel).getBlockDim().x; - } - else - { - usageClass = ModifierUsage_Sprite; - //blockSize = CUDA_OBJ(spriteModifiersKernel).getBlockDim().x; - } - - mCuSpawnScale.reserve(mManager.mOutStateOffset + maxObjectCount, ApexMirroredPlace::GPU); - mCuSpawnSeed.reserve(mManager.mOutStateOffset + maxObjectCount, ApexMirroredPlace::GPU); - - mCuSortedActorIDs.reserve(maxInStateCount, defaultPlace); - mCuSortedStateIDs.reserve(maxInStateCount, defaultPlace); - - mCuSortTempKeys.reserve(maxInStateCount, ApexMirroredPlace::GPU); - mCuSortTempValues.reserve(maxInStateCount, ApexMirroredPlace::GPU); - mCuSortTemp.reserve(MAX_BOUND_BLOCKS * NEW_SORT_KEY_DIGITS, ApexMirroredPlace::GPU); - - mCuTempMinBounds.reserve(WARP_SIZE * 2, ApexMirroredPlace::GPU); - mCuTempMaxBounds.reserve(WARP_SIZE * 2, ApexMirroredPlace::GPU); - mCuTempActorIDs.reserve(WARP_SIZE * 2, ApexMirroredPlace::GPU); - - // alloc volumeConstMem - { - APEX_CUDA_CONST_MEM_GROUP_SCOPE(mVolumeConstMemGroup) - - mVolumeParamsArrayHandle.alloc(_storage_); - mActorClassIDBitmapArrayHandle.alloc(_storage_); - } - - // alloc remapConstMem - { - APEX_CUDA_CONST_MEM_GROUP_SCOPE(mRemapConstMemGroup) - - mActorIDRemapArrayHandle.alloc(_storage_); - } - - // alloc modifierConstMem - { - APEX_CUDA_CONST_MEM_GROUP_SCOPE(mModifierConstMemGroup) - - mClientParamsHandleArrayHandle.alloc(_storage_); - - if (mManager.mIsMesh) - { - mMeshOutputLayoutHandle.alloc(_storage_); - } - else - { - mSpriteOutputLayoutHandle.alloc(_storage_); - } - } - - InitDevicePRNGs(scene, blockSize, mRandThreadLeap, mRandGridLeap, mCuBlockPRNGs); -} - -void IofxManagerGPU::release() -{ - delete this; -} - -IofxManagerGPU::~IofxManagerGPU() -{ - delete mTaskLaunch; -} - - -void IofxManagerGPU::submitTasks() -{ - mNumberActorClasses = mManager.mActorClassTable.size(); - mNumberVolumes = mManager.mVolumeTable.size(); - mCountActorIDs = mManager.mActorTable.size() * mNumberVolumes; - - // update volumeConstMem - if (mNumberVolumes) - { - APEX_CUDA_CONST_MEM_GROUP_SCOPE(mVolumeConstMemGroup) - - VolumeParamsArray volumeParamsArray; - _storage_.fetch(mVolumeParamsArrayHandle, volumeParamsArray); - volumeParamsArray.resize(_storage_, mNumberVolumes); - _storage_.update(mVolumeParamsArrayHandle, volumeParamsArray); - - - ActorClassIDBitmapArray actorClassIDBitmapArray; - _storage_.fetch(mActorClassIDBitmapArrayHandle, actorClassIDBitmapArray); - actorClassIDBitmapArray.resize(_storage_, mManager.mVolumeActorClassBitmap.size()); - _storage_.update(mActorClassIDBitmapArrayHandle, actorClassIDBitmapArray); - - actorClassIDBitmapArray.updateRange(_storage_, &mManager.mVolumeActorClassBitmap.front(), actorClassIDBitmapArray.getSize()); - - for (uint32_t i = 0 ; i < mNumberVolumes ; i++) - { - VolumeParams volumeParams; - IofxManager::VolumeData& vd = mManager.mVolumeTable[ i ]; - if (vd.vol) - { - volumeParams.bounds = vd.mBounds; - volumeParams.priority = vd.mPri; - } - else - { - volumeParams.bounds.setEmpty(); - volumeParams.priority = 0; - } - volumeParamsArray.updateElem(_storage_, volumeParams, i); - } - } - else - { - APEX_DEBUG_WARNING("IofxManager: There is no render volume!"); - } - - // update remapConstMem - { - APEX_CUDA_CONST_MEM_GROUP_SCOPE(mRemapConstMemGroup) - - ActorIDRemapArray actorIDRemapArray; - _storage_.fetch(mActorIDRemapArrayHandle, actorIDRemapArray); - actorIDRemapArray.resize(_storage_, mNumberActorClasses); - for (uint32_t i = 0 ; i < mNumberActorClasses ; ++i) - { - actorIDRemapArray.updateElem(_storage_, mManager.mActorClassTable[i].actorID, i); - } - _storage_.update(mActorIDRemapArrayHandle, actorIDRemapArray); - } - - // update modifierConstMem - { - APEX_CUDA_CONST_MEM_GROUP_SCOPE(mModifierConstMemGroup) - - ClientParamsHandleArray clientParamsHandleArray; - _storage_.fetch(mClientParamsHandleArrayHandle, clientParamsHandleArray); - clientParamsHandleArray.resize(_storage_, mNumberActorClasses); - for (uint32_t i = 0 ; i < mNumberActorClasses ; ++i) - { - InplaceHandle<ClientParams> clientParamsHandle; - IofxManagerClientGPU* clientGPU = static_cast<IofxManagerClientGPU*>(mManager.mActorClassTable[i].client); - if (clientGPU != NULL) - { - clientParamsHandle = clientGPU->getClientParamsHandle(); - } - clientParamsHandleArray.updateElem(_storage_, clientParamsHandle, i); - } - _storage_.update(mClientParamsHandleArrayHandle, clientParamsHandleArray); - - if (mManager.mIsMesh) - { - MeshOutputLayout meshOutputLayout; - - IosObjectGpuData* mWorkingData = DYNAMIC_CAST(IosObjectGpuData*)(mManager.mWorkingIosData); - IofxSharedRenderDataMeshImpl* meshRenderData = DYNAMIC_CAST(IofxSharedRenderDataMeshImpl*)(mWorkingData->renderData); - const IofxMeshRenderLayout& meshRenderLayout = meshRenderData->getRenderLayout(); - - mOutputDWords = meshRenderLayout.stride >> 2; - meshOutputLayout.stride = meshRenderLayout.stride; - ::memcpy(meshOutputLayout.offsets, meshRenderLayout.offsets, sizeof(meshOutputLayout.offsets)); - - _storage_.update(mMeshOutputLayoutHandle, meshOutputLayout); - } - else - { - SpriteOutputLayout spriteOutputLayout; - - IosObjectGpuData* mWorkingData = DYNAMIC_CAST(IosObjectGpuData*)(mManager.mWorkingIosData); - IofxSharedRenderDataSpriteImpl* spriteRenderData = DYNAMIC_CAST(IofxSharedRenderDataSpriteImpl*)(mWorkingData->renderData); - const IofxSpriteRenderLayout& spriteRenderLayout = spriteRenderData->getRenderLayout(); - - mOutputDWords = spriteRenderLayout.stride >> 2; - spriteOutputLayout.stride = spriteRenderLayout.stride; - ::memcpy(spriteOutputLayout.offsets, spriteRenderLayout.offsets, sizeof(spriteOutputLayout.offsets)); - - _storage_.update(mSpriteOutputLayoutHandle, spriteOutputLayout); - } - } - -} - - -#pragma warning(push) -#pragma warning(disable:4312) // conversion from 'CUdeviceptr' to 'uint32_t *' of greater size - -PxTaskID IofxManagerGPU::launchGpuTasks() -{ - PxTaskManager* tm = mIofxScene.mApexScene->getTaskManager(); - tm->submitUnnamedTask(*mTaskLaunch, PxTaskType::TT_GPU); - mTaskLaunch->finishBefore(mManager.mPostUpdateTaskID); - return mTaskLaunch->getTaskID(); -} - -void IofxManagerGPU::launchPrep() -{ - IosObjectGpuData* mWorkingData = DYNAMIC_CAST(IosObjectGpuData*)(mManager.mWorkingIosData); - - if (!mWorkingData->numParticles) - { - mEmptySimulation = true; - return; - } - - mCurSeed = static_cast<uint32_t>(mIofxScene.mApexScene->getSeed()); - - PxTaskManager* tm = mIofxScene.mApexScene->getTaskManager(); - PxCudaContextManager* ctx = tm->getGpuDispatcher()->getCudaContextManager(); - { - PxScopedCudaLock s(*ctx); - - mTargetTextureCount = 0; - mTargetBufDevPtr = 0; - if (!mManager.mIsMesh) - { - IofxSharedRenderDataSpriteImpl* spriteRenderData = DYNAMIC_CAST(IofxSharedRenderDataSpriteImpl*)(mWorkingData->renderData); - const IofxSpriteRenderLayout& spriteRenderLayout = spriteRenderData->getRenderLayout(); - - mTargetTextureCount = spriteRenderLayout.surfaceCount; - for( uint32_t i = 0; i < mTargetTextureCount; ++i ) - { - const CUarray cuArray = spriteRenderData->getSurfaceMappedCudaArray(i); - if (cuArray != NULL) - { - mTargetCudaArrayList[i].assign(cuArray, false); - } - else - { - CUarray_format format = CUarray_format(0); - uint32_t numChannels = 0; - switch (spriteRenderLayout.surfaceElements[i]) - { - case IofxSpriteRenderLayoutSurfaceElement::POSITION_FLOAT4: - case IofxSpriteRenderLayoutSurfaceElement::SCALE_ORIENT_SUBTEX_FLOAT4: - case IofxSpriteRenderLayoutSurfaceElement::COLOR_FLOAT4: - format = CU_AD_FORMAT_FLOAT; - numChannels = 4; - break; - case IofxSpriteRenderLayoutSurfaceElement::COLOR_RGBA8: - case IofxSpriteRenderLayoutSurfaceElement::COLOR_BGRA8: - format = CU_AD_FORMAT_UNSIGNED_INT32; - numChannels = 1; - default: - PX_ALWAYS_ASSERT(); - break; - } - const UserRenderSurfaceDesc& desc = spriteRenderLayout.surfaceDescs[i]; - mTargetCudaArrayList[i].create(format, numChannels, uint32_t(desc.width), uint32_t(desc.height), 0, true); - } - } - for( uint32_t i = mTargetTextureCount; i < IofxSpriteRenderLayout::MAX_SURFACE_COUNT; ++i ) { - mTargetCudaArrayList[i].release(); - } - } - - if (mTargetTextureCount == 0) - { - const CUdeviceptr cudaPtr = mWorkingData->renderData->getBufferMappedCudaPtr(); - if (cudaPtr != 0) - { - mTargetOutputBuffer.release(); - mTargetBufDevPtr = reinterpret_cast<uint32_t*>(cudaPtr); - } - else - { - const size_t size = mWorkingData->renderData->getRenderBufferSize(); - if (size > 0) - { - mTargetOutputBuffer.realloc(size, ctx); - mTargetBufDevPtr = static_cast<uint32_t*>( mTargetOutputBuffer.getGpuPtr() ); - } - } - } - } - - const uint32_t numActorIDValues = mCountActorIDs + 2; - mCuActorStart.setSize(numActorIDValues, ApexMirroredPlace::CPU_GPU); - mCuActorEnd.setSize(numActorIDValues, ApexMirroredPlace::CPU_GPU); - mCuActorVisibleEnd.setSize(numActorIDValues, ApexMirroredPlace::CPU_GPU); - mCuMinBounds.setSize(numActorIDValues, ApexMirroredPlace::CPU_GPU); - mCuMaxBounds.setSize(numActorIDValues, ApexMirroredPlace::CPU_GPU); - - mCuSortedActorIDs.setSize(mWorkingData->maxStateID, mDefaultPlace); - mCuSortedStateIDs.setSize(mWorkingData->maxStateID, mDefaultPlace); - - mManager.positionMass.setSize(mWorkingData->maxInputID, ApexMirroredPlace::CPU_GPU); - mManager.velocityLife.setSize(mWorkingData->maxInputID, ApexMirroredPlace::CPU_GPU); - mManager.actorIdentifiers.setSize(mWorkingData->maxInputID, ApexMirroredPlace::CPU_GPU); - mManager.inStateToInput.setSize(mWorkingData->maxStateID, ApexMirroredPlace::CPU_GPU); - mManager.outStateToInput.setSize(mWorkingData->numParticles, ApexMirroredPlace::CPU_GPU); - if (mWorkingData->iosSupportsCollision) - { - mManager.collisionNormalFlags.setSize(mWorkingData->maxInputID, ApexMirroredPlace::CPU_GPU); - } - if (mWorkingData->iosSupportsDensity) - { - mManager.density.setSize(mWorkingData->maxInputID, ApexMirroredPlace::CPU_GPU); - } - if (mWorkingData->iosSupportsUserData) - { - mManager.userData.setSize(mWorkingData->maxInputID, ApexMirroredPlace::CPU_GPU); - } - - mEmptySimulation = false; -} - -#pragma warning(pop) - - -/// -PX_INLINE uint32_t getHighestBitShift(uint32_t x) -{ - PX_ASSERT(isPowerOfTwo(x)); - return highestSetBit(x); -} - -void IofxManagerGPU::cudaLaunchRadixSort(CUstream stream, unsigned int numElements, unsigned int keyBits, unsigned int startBit, bool useSyncKernels) -{ - if (useSyncKernels) - { - //we use OLD Radix Sort on Tesla (SM < 2), because it is faster - CUDA_OBJ(radixSortSyncKernel)( - stream, numElements, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mCuSortTempKeys.getGpuPtr(), mCuSortTempValues.getGpuPtr(), - mCuSortTemp.getGpuPtr(), keyBits, startBit - ); - } - else - { -#if 1 - //NEW Radix Sort - unsigned int totalThreads = (numElements + NEW_SORT_VECTOR_SIZE - 1) / NEW_SORT_VECTOR_SIZE; - if (CUDA_OBJ(newRadixSortBlockKernel).isSingleBlock(totalThreads)) - { - //launch just a single block for small sizes - CUDA_OBJ(newRadixSortBlockKernel)( - stream, APEX_CUDA_SINGLE_BLOCK_LAUNCH, - numElements, keyBits, startBit, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr() - ); - } - else - { - for (unsigned int bit = startBit; bit < startBit + keyBits; bit += RADIX_SORT_NBITS) - { - uint32_t gridSize = - CUDA_OBJ(newRadixSortStepKernel)( - stream, totalThreads, - numElements, bit, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mCuSortTempKeys.getGpuPtr(), mCuSortTempValues.getGpuPtr(), - mCuSortTemp.getGpuPtr(), - 1, 0 - ); - - //launch just a single block - CUDA_OBJ(newRadixSortStepKernel)( - stream, APEX_CUDA_SINGLE_BLOCK_LAUNCH, - numElements, bit, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mCuSortTempKeys.getGpuPtr(), mCuSortTempValues.getGpuPtr(), - mCuSortTemp.getGpuPtr(), - 2, gridSize - ); - - CUDA_OBJ(newRadixSortStepKernel)( - stream, totalThreads, - numElements, bit, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mCuSortTempKeys.getGpuPtr(), mCuSortTempValues.getGpuPtr(), - mCuSortTemp.getGpuPtr(), - 3, 0 - ); - - mCuSortedActorIDs.swapGpuPtr(mCuSortTempKeys); - mCuSortedStateIDs.swapGpuPtr(mCuSortTempValues); - } - } -#else - //OLD Radix Sort - for (unsigned int startBit = 0; startBit < keyBits; startBit += RADIX_SORT_NBITS) - { - int gridSize = - CUDA_OBJ(radixSortStep1Kernel)( - stream, numElements, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mCuSortTempKeys.getGpuPtr(), mCuSortTempValues.getGpuPtr(), - mCuSortTemp.getGpuPtr(), startBit - ); - - //launch just 1 block - CUDA_OBJ(radixSortStep2Kernel)( - stream, CUDA_OBJ(radixSortStep2Kernel).getBlockDim().x, - mCuSortTemp.getGpuPtr(), gridSize - ); - - CUDA_OBJ(radixSortStep3Kernel)( - stream, numElements, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mCuSortTempKeys.getGpuPtr(), mCuSortTempValues.getGpuPtr(), - mCuSortTemp.getGpuPtr(), startBit - ); - } -#endif - } -} - -bool IofxManagerGPU::cudaLaunch(CUstream stream, int kernelIndex) -{ - PxTaskManager* tm = mIofxScene.mApexScene->getTaskManager(); - - if (mEmptySimulation) - { - return false; - } - - const uint32_t numActorIDValues = mCountActorIDs + 2; - //value < mCountActorIDs - valid particle with volume - //value == mCountActorIDs - homeless particle (no volume or invalid actor class) - //value == mCountActorIDs + 1 - NOT_A_PARTICLE - - - IofxSceneGPU* sceneGPU = static_cast<IofxSceneGPU*>(&mIofxScene); - bool useSyncKernels = !sceneGPU->getGpuDispatcher()->getCudaContextManager()->supportsArchSM20(); - - IosObjectGpuData* mWorkingData = DYNAMIC_CAST(IosObjectGpuData*)(mManager.mWorkingIosData); - - switch (kernelIndex) - { - case 0: - if (mManager.mOnStartCallback) - { - (*mManager.mOnStartCallback)(stream); - } - mCopyQueue.reset(stream, 24); - if (!mManager.mCudaIos && mWorkingData->maxInputID > 0) - { - mManager.positionMass.copyHostToDeviceQ(mCopyQueue); - mManager.velocityLife.copyHostToDeviceQ(mCopyQueue); - mManager.actorIdentifiers.copyHostToDeviceQ(mCopyQueue); - mManager.inStateToInput.copyHostToDeviceQ(mCopyQueue); - if (mWorkingData->iosSupportsCollision) - { - mManager.collisionNormalFlags.copyHostToDeviceQ(mCopyQueue); - } - if (mWorkingData->iosSupportsDensity) - { - mManager.density.copyHostToDeviceQ(mCopyQueue); - } - if (mWorkingData->iosSupportsUserData) - { - mManager.userData.copyHostToDeviceQ(mCopyQueue); - } - mCopyQueue.flushEnqueued(); - } - break; - - case 1: - /* Volume Migration (input space) */ - CUDA_OBJ(volumeMigrationKernel)(stream, - PxMax(mWorkingData->maxInputID, numActorIDValues), - mVolumeConstMemGroup.getStorage().mappedHandle(mVolumeParamsArrayHandle), - mVolumeConstMemGroup.getStorage().mappedHandle(mActorClassIDBitmapArrayHandle), - mNumberActorClasses, mNumberVolumes, numActorIDValues, - mManager.actorIdentifiers.getGpuPtr(), mWorkingData->maxInputID, - (const float4*)mManager.positionMass.getGpuPtr(), - mCuActorStart.getGpuPtr(), mCuActorEnd.getGpuPtr(), mCuActorVisibleEnd.getGpuPtr() - ); - break; - - case 2: - { - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefRemapPositions, mManager.positionMass) - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefRemapActorIDs, mManager.actorIdentifiers) - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefRemapInStateToInput, mManager.inStateToInput) - - /* if mDistanceSortingEnabled, sort on camera distance first, else directly make ActorID keys */ - CUDA_OBJ(makeSortKeys)(stream, mWorkingData->maxStateID, - mManager.inStateToInput.getGpuPtr(), mWorkingData->maxInputID, - mManager.mActorTable.size(), mCountActorIDs, - mRemapConstMemGroup.getStorage().mappedHandle(mActorIDRemapArrayHandle), - (const float4*)mManager.positionMass.getGpuPtr(), mManager.mDistanceSortingEnabled, - mWorkingData->eyePosition, mWorkingData->eyeDirection, mWorkingData->zNear, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr()); - - if (mManager.mDistanceSortingEnabled) - { - cudaLaunchRadixSort(stream, mWorkingData->maxStateID, 32, 0, useSyncKernels); - - /* Generate ActorID sort keys, using distance sorted stateID values */ - CUDA_OBJ(remapKernel)(stream, mWorkingData->maxStateID, - mManager.inStateToInput.getGpuPtr(), mWorkingData->maxInputID, - mManager.mActorTable.size(), mCountActorIDs, - mRemapConstMemGroup.getStorage().mappedHandle(mActorIDRemapArrayHandle), - mCuSortedStateIDs.getGpuPtr(), mCuSortedActorIDs.getGpuPtr()); - } - } - break; - - case 3: - /* ActorID Sort (output state space) */ - // input: mCuSortedActorIDs == actorIDs, in distance sorted order - // input: mCuSortedStateIDs == stateIDs, in distance sorted order - - // output: mCuSortedActorIDs == sorted ActorIDs - // output: mCuSortedStateIDs == output-to-input state - { - //SortedActorIDs could contain values from 0 to mCountActorIDs + 1 (included), - //so keybits should cover at least mCountActorIDs + 2 numbers - uint32_t keybits = 0; - while ((1U << keybits) < numActorIDValues) - { - ++keybits; - } - - cudaLaunchRadixSort(stream, mWorkingData->maxStateID, keybits, 0, useSyncKernels); - } - break; - - case 4: - /* Per-IOFX actor particle range detection */ - CUDA_OBJ(actorRangeKernel)(stream, mWorkingData->maxStateID, - mCuSortedActorIDs.getGpuPtr(), mCountActorIDs, - mCuActorStart.getGpuPtr(), mCuActorEnd.getGpuPtr(), mCuActorVisibleEnd.getGpuPtr(), - mCuSortedStateIDs.getGpuPtr() - ); - break; - - case 5: - /* Modifiers (output state space) */ - { - PX_PROFILE_ZONE("IofxManagerGPUModifiers", GetInternalApexSDK()->getContextId()); - ModifierCommonParams commonParams = mWorkingData->getCommonParams(); - - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefPositionMass, mManager.positionMass) - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefVelocityLife, mManager.velocityLife) - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefInStateToInput, mManager.inStateToInput) - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefStateSpawnSeed, mCuSpawnSeed) - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefStateSpawnScale, mCuSpawnScale) - - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefActorIDs, mManager.actorIdentifiers) - - if (mWorkingData->iosSupportsCollision) - { - CUDA_OBJ(texRefCollisionNormalFlags).bindTo(mManager.collisionNormalFlags); - } - if (mWorkingData->iosSupportsDensity) - { - CUDA_OBJ(texRefDensity).bindTo(mManager.density); - } - if (mWorkingData->iosSupportsUserData) - { - CUDA_OBJ(texRefUserData).bindTo(mManager.userData); - } - - PRNGInfo rand; - rand.g_stateSpawnSeed = mCuSpawnSeed.getGpuPtr(); - rand.g_randBlock = mCuBlockPRNGs.getGpuPtr(); - rand.randGrid = mRandGridLeap; - rand.randThread = mRandThreadLeap; - rand.seed = mCurSeed; - - if (mManager.mIsMesh) - { - // 3x3 matrix => 9 float scalars => 3 slices - - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefMeshPrivState0, *mManager.privState.slices[0]); - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefMeshPrivState1, *mManager.privState.slices[1]); - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefMeshPrivState2, *mManager.privState.slices[2]); - - MeshPrivateStateArgs meshPrivStateArgs; - meshPrivStateArgs.g_state[0] = mManager.privState.a[0]; - meshPrivStateArgs.g_state[1] = mManager.privState.a[1]; - meshPrivStateArgs.g_state[2] = mManager.privState.a[2]; - - CUDA_OBJ(meshModifiersKernel)(ApexKernelConfig(MAX_SMEM_BANKS * mOutputDWords, WARP_SIZE * PxMax<uint32_t>(mOutputDWords, 4)), - stream, mWorkingData->numParticles, - mManager.mInStateOffset, mManager.mOutStateOffset, - mModifierConstMemGroup.getStorage().mappedHandle(mClientParamsHandleArrayHandle), - commonParams, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mManager.outStateToInput.getGpuPtr(), - meshPrivStateArgs, mCuSpawnScale.getGpuPtr(), - rand, mTargetBufDevPtr, - mModifierConstMemGroup.getStorage().mappedHandle(mMeshOutputLayoutHandle) - ); - } - else - { - // 1 float scalar => 1 slice - - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefSpritePrivState0, *mManager.privState.slices[0]); - - SpritePrivateStateArgs spritePrivStateArgs; - spritePrivStateArgs.g_state[0] = mManager.privState.a[0]; - - IofxSharedRenderDataSpriteImpl* renderDataSprite = static_cast<IofxSharedRenderDataSpriteImpl*>(mWorkingData->renderData); - const IofxSpriteRenderLayout& spriteRenderLayout = renderDataSprite->getRenderLayout(); - - if (mTargetTextureCount > 0) - { - SpriteTextureOutputLayout outputLayout; - outputLayout.textureCount = mTargetTextureCount; - for (uint32_t i = 0; i < outputLayout.textureCount; ++i) - { - outputLayout.textureData[i].layout = static_cast<uint16_t>(spriteRenderLayout.surfaceElements[i]); - - uint32_t width = mTargetCudaArrayList[i].getWidth(); - //width should be a power of 2 and a multiply of WARP_SIZE - PX_ASSERT(isPowerOfTwo(width)); - PX_ASSERT((width & (WARP_SIZE - 1)) == 0); - outputLayout.textureData[i].widthShift = static_cast<uint8_t>(highestSetBit(width)); - - outputLayout.textureData[i].pitchShift = 0; //unused in GPU mode! - outputLayout.texturePtr[i] = NULL; //unused in GPU mode! - } - - if (0 < outputLayout.textureCount) APEX_CUDA_SURFACE_BIND(surfRefOutput0, mTargetCudaArrayList[0], ApexCudaMemFlags::OUT); - if (1 < outputLayout.textureCount) APEX_CUDA_SURFACE_BIND(surfRefOutput1, mTargetCudaArrayList[1], ApexCudaMemFlags::OUT); - if (2 < outputLayout.textureCount) APEX_CUDA_SURFACE_BIND(surfRefOutput2, mTargetCudaArrayList[2], ApexCudaMemFlags::OUT); - if (3 < outputLayout.textureCount) APEX_CUDA_SURFACE_BIND(surfRefOutput3, mTargetCudaArrayList[3], ApexCudaMemFlags::OUT); - - CUDA_OBJ(spriteTextureModifiersKernel)(stream, mWorkingData->numParticles, - mManager.mInStateOffset, mManager.mOutStateOffset, - mModifierConstMemGroup.getStorage().mappedHandle(mClientParamsHandleArrayHandle), - commonParams, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mManager.outStateToInput.getGpuPtr(), - spritePrivStateArgs, mCuSpawnScale.getGpuPtr(), - rand, outputLayout - ); - - if (0 < outputLayout.textureCount) APEX_CUDA_SURFACE_UNBIND(surfRefOutput0); - if (1 < outputLayout.textureCount) APEX_CUDA_SURFACE_UNBIND(surfRefOutput1); - if (2 < outputLayout.textureCount) APEX_CUDA_SURFACE_UNBIND(surfRefOutput2); - if (3 < outputLayout.textureCount) APEX_CUDA_SURFACE_UNBIND(surfRefOutput3); - } - else - { - CUDA_OBJ(spriteModifiersKernel)(ApexKernelConfig(MAX_SMEM_BANKS * mOutputDWords, WARP_SIZE * PxMax<uint32_t>(mOutputDWords, 4)), - stream, mWorkingData->numParticles, - mManager.mInStateOffset, mManager.mOutStateOffset, - mModifierConstMemGroup.getStorage().mappedHandle(mClientParamsHandleArrayHandle), - commonParams, - mCuSortedActorIDs.getGpuPtr(), mCuSortedStateIDs.getGpuPtr(), - mManager.outStateToInput.getGpuPtr(), - spritePrivStateArgs, mCuSpawnScale.getGpuPtr(), - rand, mTargetBufDevPtr, - mModifierConstMemGroup.getStorage().mappedHandle(mSpriteOutputLayoutHandle) - ); - } - } - - if (mWorkingData->iosSupportsCollision) - { - CUDA_OBJ(texRefCollisionNormalFlags).unbind(); - } - if (mWorkingData->iosSupportsDensity) - { - CUDA_OBJ(texRefDensity).unbind(); - } - if (mWorkingData->iosSupportsUserData) - { - CUDA_OBJ(texRefUserData).unbind(); - } - } - break; - - case 6: - if (mCountActorIDs > 0) - { - /* Per-IOFX actor BBox generation */ - APEX_CUDA_TEXTURE_SCOPE_BIND(texRefBBoxPositions, mManager.positionMass) - - if (useSyncKernels) - { - CUDA_OBJ(bboxSyncKernel)( - stream, mWorkingData->numParticles, - mCuSortedActorIDs.getGpuPtr(), - mManager.outStateToInput.getGpuPtr(), - (const float4*)mManager.positionMass.getGpuPtr(), - (float4*)mCuMinBounds.getGpuPtr(), (float4*)mCuMaxBounds.getGpuPtr(), - mCuTempActorIDs.getGpuPtr(), - (float4*)mCuTempMinBounds.getGpuPtr(), (float4*)mCuTempMaxBounds.getGpuPtr() - ); - } - else - { - uint32_t bboxGridSize = - CUDA_OBJ(bboxKernel)( - stream, mWorkingData->numParticles, - mCuSortedActorIDs.getGpuPtr(), - mManager.outStateToInput.getGpuPtr(), - (const float4*)mManager.positionMass.getGpuPtr(), - (float4*)mCuMinBounds.getGpuPtr(), (float4*)mCuMaxBounds.getGpuPtr(), - mCuTempActorIDs.getGpuPtr(), - (float4*)mCuTempMinBounds.getGpuPtr(), (float4*)mCuTempMaxBounds.getGpuPtr(), - 1, 0 - ); - - CUDA_OBJ(bboxKernel)( - stream, APEX_CUDA_SINGLE_BLOCK_LAUNCH, - mCuSortedActorIDs.getGpuPtr(), - mManager.outStateToInput.getGpuPtr(), - (const float4*)mManager.positionMass.getGpuPtr(), - (float4*)mCuMinBounds.getGpuPtr(), (float4*)mCuMaxBounds.getGpuPtr(), - mCuTempActorIDs.getGpuPtr(), - (float4*)mCuTempMinBounds.getGpuPtr(), (float4*)mCuTempMaxBounds.getGpuPtr(), - 2, bboxGridSize - ); - } - } - break; - - case 7: - if (mTargetTextureCount > 0) - { - IofxSharedRenderDataSpriteImpl* spriteRenderData = DYNAMIC_CAST(IofxSharedRenderDataSpriteImpl*)(mWorkingData->renderData); - PX_ASSERT(spriteRenderData->getRenderLayout().surfaceCount == mTargetTextureCount); - - for (uint32_t i = 0; i < mTargetTextureCount; ++i) - { - UserRenderSurface::MappedInfo mappedInfo; - if (spriteRenderData->getSurfaceMappedInfo(i, mappedInfo)) - { - const size_t surfaceWidth = spriteRenderData->getRenderLayout().surfaceDescs[i].width; - size_t copyHeight = (mWorkingData->numParticles + surfaceWidth - 1) / surfaceWidth; - mTargetCudaArrayList[i].copyToHost(stream, mappedInfo.pData, mappedInfo.rowPitch, 0, 0, copyHeight); - } - } - } - else - { - void* mappedPtr = mWorkingData->renderData->getBufferMappedPtr(); - if (mappedPtr) - { - size_t size = (mOutputDWords << 2) * mWorkingData->numParticles; - mTargetOutputBuffer.copyToHost(stream, mappedPtr, size); - } - } - if (mCountActorIDs > 0) - { - mCuMinBounds.copyDeviceToHostQ(mCopyQueue); - mCuMaxBounds.copyDeviceToHostQ(mCopyQueue); - } - mCuActorStart.copyDeviceToHostQ(mCopyQueue); - mCuActorEnd.copyDeviceToHostQ(mCopyQueue); - mCuActorVisibleEnd.copyDeviceToHostQ(mCopyQueue); - - - if (mCuSortedActorIDs.cpuPtrIsValid()) - { - mManager.inStateToInput.copyDeviceToHostQ(mCopyQueue); - mManager.actorIdentifiers.copyDeviceToHostQ(mCopyQueue); - mManager.outStateToInput.copyDeviceToHostQ(mCopyQueue); - mManager.positionMass.copyDeviceToHostQ(mCopyQueue); - - mCuSortedActorIDs.copyDeviceToHostQ(mCopyQueue); - mCuSortedStateIDs.copyDeviceToHostQ(mCopyQueue); - } - else if (!mManager.mCudaIos) - { - mManager.actorIdentifiers.copyDeviceToHostQ(mCopyQueue); - mManager.outStateToInput.copyDeviceToHostQ(mCopyQueue); - } - - mCopyQueue.flushEnqueued(); - - if (mManager.mOnFinishCallback) - { - (*mManager.mOnFinishCallback)(stream); - } - - tm->getGpuDispatcher()->addCompletionPrereq(*tm->getTaskFromID(mManager.mPostUpdateTaskID)); - return false; - - default: - PX_ALWAYS_ASSERT(); - return false; - } - - return true; -} - -void IofxManagerGPU::fetchResults() -{ - IosObjectGpuData* mWorkingData = DYNAMIC_CAST(IosObjectGpuData*)(mManager.mWorkingIosData); - PX_UNUSED(mWorkingData); - -#if 0 - { - ApexMirroredArray<uint32_t> actorID(*mIofxScene.mApexScene); - ApexMirroredArray<PxVec4> outMinBounds(*mIofxScene.mApexScene); - ApexMirroredArray<PxVec4> outMaxBounds(*mIofxScene.mApexScene); - ApexMirroredArray<PxVec4> outDebugInfo(*mIofxScene.mApexScene); - ApexMirroredArray<uint32_t> tmpLastActorID(*mIofxScene.mApexScene); - tmpLastActorID.setSize(64, ApexMirroredPlace::CPU_GPU); - - const uint32_t NE = 2000; - actorID.setSize(NE, ApexMirroredPlace::CPU_GPU); - - Array<uint32_t> actorCounts; - actorCounts.reserve(1000); - - uint32_t NA = 0; - for (uint32_t ie = 0; ie < NE; ++NA) - { - uint32_t num_ie = rand(1, 100); // We need to use QDSRand here s.t. seed could be preset during tests! - uint32_t next_ie = PxMin(ie + num_ie, NE); - - actorCounts.pushBack(next_ie - ie); - - for (; ie < next_ie; ++ie) - { - actorID[ie] = NA; - } - } - outMinBounds.setSize(NA, ApexMirroredPlace::CPU_GPU); - outMaxBounds.setSize(NA, ApexMirroredPlace::CPU_GPU); - outDebugInfo.setSize(NA, ApexMirroredPlace::CPU_GPU); - - for (uint32_t ia = 0; ia < NA; ++ia) - { - outMinBounds[ia].setZero(); - outMaxBounds[ia].setZero(); - } - - PxTaskManager* tm = mIofxScene.mApexScene->getTaskManager(); - PxCudaContextManager* ctx = tm->getGpuDispatcher()->getCudaContextManager(); - PxScopedCudaLock s(*ctx); - - mCopyQueue.reset(0, 4); - - actorID.copyHostToDeviceQ(mCopyQueue); - outMinBounds.copyHostToDeviceQ(mCopyQueue); - outMaxBounds.copyHostToDeviceQ(mCopyQueue); - mCopyQueue.flushEnqueued(); - - CUDA_OBJ(bboxKernel2)(0, NE, actorID.getGpuPtr(), NULL, 0, (float4*)outDebugInfo.getGpuPtr(), (float4*)outMinBounds.getGpuPtr(), (float4*)outMaxBounds.getGpuPtr()/*, tmpLastActorID.getGpuPtr()*/); - - outMinBounds.copyDeviceToHostQ(mCopyQueue); - outMaxBounds.copyDeviceToHostQ(mCopyQueue); - outDebugInfo.copyDeviceToHostQ(mCopyQueue); - tmpLastActorID.copyDeviceToHostQ(mCopyQueue); - mCopyQueue.flushEnqueued(); - - CUT_SAFE_CALL(cuCtxSynchronize()); - - uint32_t errors = 0; - float totCount = 0; - for (uint32_t ie = 0; ie < NE; ++ie) - { - uint32_t id = actorID[ie]; - if (ie == 0 || actorID[ie - 1] != id) - { - uint32_t count = actorCounts[id]; - const PxVec4& bounds = outMinBounds[id]; - if (bounds.x != count) - { - ++errors; - } - if (bounds.y != count * 2) - { - ++errors; - } - if (bounds.z != count * 3) - { - ++errors; - } - totCount += count; - } - } - - } -#endif - -#if 0 - { - PxTaskManager* tm = mIofxScene.mApexScene->getTaskManager(); - PxCudaContextManager* ctx = tm->getGpuDispatcher()->getCudaContextManager(); - - PxScopedCudaLock s(*ctx); - - CUT_SAFE_CALL(cuCtxSynchronize()); - } -#endif -#if DEBUG_GPU - { - nvidia::Array<int> valuesCounters(mWorkingData->maxStateID, 0); - uint32_t lastKey = uint32_t(-1); - for (uint32_t i = 0; i < mWorkingData->maxStateID; ++i) - { - uint32_t currKey = mCuSortedActorIDs.get(i); - PX_ASSERT(currKey < mCountActorIDs + 2); - if (lastKey != uint32_t(-1)) - { - PX_ASSERT(lastKey <= currKey); - } - if (lastKey != currKey) - { - if (mCuActorStart[currKey] != i) - { - int temp = 0; - temp++; - } - PX_ASSERT(mCuActorStart[currKey] == i); - if (lastKey != uint32_t(-1)) - { - if (mCuActorEnd[lastKey] != i) - { - int temp = 0; - temp++; - } - PX_ASSERT(mCuActorEnd[lastKey] == i); - } - } - lastKey = currKey; - - uint32_t currValue = (mCuSortedStateIDs.get(i) & STATE_ID_MASK); - PX_ASSERT(currValue < mWorkingData->maxStateID); - if (currValue < mWorkingData->maxStateID) - { - valuesCounters[currValue] += 1; - } - } - if (lastKey != uint32_t(-1)) - { - PX_ASSERT(mCuActorEnd[lastKey] == mWorkingData->maxStateID); - } - for (uint32_t i = 0; i < mWorkingData->maxStateID; ++i) - { - PX_ASSERT(valuesCounters[i] == 1); - } - } -#endif - - /* Swap input/output state offsets */ - mManager.swapStates(); - - if (mEmptySimulation) - { - for (uint32_t i = 0 ; i < mNumberVolumes ; i++) - { - IofxManager::VolumeData& d = mManager.mVolumeTable[ i ]; - if (d.vol == 0) - { - continue; - } - - for (uint32_t j = 0 ; j < mManager.mActorTable.size() ; j++) - { - IofxActorImpl* iofx = d.mActors[ j ]; - if (iofx && iofx != DEFERRED_IOFX_ACTOR) - { - iofx->mResultBounds.setEmpty(); - iofx->mResultRange.startIndex = 0; - iofx->mResultRange.objectCount = 0; - iofx->mResultVisibleCount = 0; - } - } - } - } - else - { - PX_ASSERT(mCuActorStart.cpuPtrIsValid() && mCuActorEnd.cpuPtrIsValid()); - if (!mCuActorStart.cpuPtrIsValid() || !mCuActorEnd.cpuPtrIsValid()) - { - // Workaround for issue seen by a customer - APEX_INTERNAL_ERROR("Bad cpuPtr in IofxManagerGPU::fetchResults"); - return; - } -#ifndef NDEBUG - //check Actor Ranges - { - uint32_t totalCount = 0; - //range with the last index (= mCountActorIDs) contains homeless particles! - for (uint32_t i = 0 ; i <= mCountActorIDs ; i++) - { - const uint32_t rangeStart = mCuActorStart[ i ]; - const uint32_t rangeEnd = mCuActorEnd[ i ]; - const uint32_t rangeVisibleEnd = mCuActorVisibleEnd[ i ]; - - PX_ASSERT(rangeStart < mWorkingData->numParticles); - PX_ASSERT(rangeEnd <= mWorkingData->numParticles); - PX_ASSERT(rangeStart <= rangeEnd); - PX_ASSERT(rangeStart <= rangeVisibleEnd && rangeVisibleEnd <= rangeEnd); - PX_UNUSED(rangeVisibleEnd); - - const uint32_t rangeCount = rangeEnd - rangeStart; - totalCount += rangeCount; - } - PX_ASSERT(totalCount == mWorkingData->numParticles); - } -#endif - - uint32_t aid = 0; - for (uint32_t i = 0 ; i < mNumberVolumes ; i++) - { - IofxManager::VolumeData& d = mManager.mVolumeTable[ i ]; - if (d.vol == 0) - { - aid += mManager.mActorTable.size(); - continue; - } - - for (uint32_t j = 0 ; j < mManager.mActorTable.size() ; j++) - { - const uint32_t rangeStart = mCuActorStart[ aid ]; - const uint32_t rangeEnd = mCuActorEnd[ aid ]; - const uint32_t rangeVisibleEnd = mCuActorVisibleEnd[ aid ]; - - const uint32_t rangeCount = rangeEnd - rangeStart; - const uint32_t visibleCount = rangeVisibleEnd - rangeStart; - - if (d.mActors[ j ] == DEFERRED_IOFX_ACTOR && mManager.mActorTable[ j ] != NULL && - (mIofxScene.mModule->mDeferredDisabled || rangeCount)) - { - IofxActorImpl* iofxActor = PX_NEW(IofxActorGPU)(mManager.mActorTable[j]->getRenderResID(), &mIofxScene, mManager); - if (d.vol->addIofxActor(*iofxActor)) - { - d.mActors[ j ] = iofxActor; - - mManager.initIofxActor(iofxActor, j, d.vol); - - // lock this renderable because the APEX scene will unlock it after this method is called - iofxActor->renderDataLock(); - } - else - { - iofxActor->release(); - } - } - - IofxActorImpl* iofxActor = d.mActors[ j ]; - if (iofxActor && iofxActor != DEFERRED_IOFX_ACTOR) - { - iofxActor->mResultBounds.setEmpty(); - if (rangeCount > 0) - { - iofxActor->mResultBounds.minimum = mCuMinBounds[ aid ].getXYZ(); - iofxActor->mResultBounds.maximum = mCuMaxBounds[ aid ].getXYZ(); - } - PX_ASSERT(iofxActor->mRenderBounds.isFinite()); - iofxActor->mResultRange.startIndex = rangeStart; - iofxActor->mResultRange.objectCount = rangeCount; - iofxActor->mResultVisibleCount = visibleCount; - } - - aid++; - } - } - } - -} - - -/** - * Called from render thread context, just before renderer calls update/dispatch on any IOFX - * actors. Map/Unmap render resources as required. "Mapped" means the graphics buffer has been - * mapped into our CUDA context where our kernels can write directly into it. - */ -void IofxManager::fillMapUnmapArraysForInterop(nvidia::Array<CUgraphicsResource> &toMapArray, nvidia::Array<CUgraphicsResource> &toUnmapArray) -{ - if (mInteropFlags == RenderInteropFlags::CUDA_INTEROP) - { - mResultIosData->renderData->fillMapUnmapArraysForInterop(toMapArray, toUnmapArray); - mStagingIosData->renderData->fillMapUnmapArraysForInterop(toMapArray, toUnmapArray); - } -} - - -void IofxManager::mapBufferResultsForInterop(bool mapSuccess, bool unmapSuccess) -{ - if (mInteropFlags == RenderInteropFlags::CUDA_INTEROP) - { - mResultIosData->renderData->mapBufferResultsForInterop(mapSuccess, unmapSuccess); - mStagingIosData->renderData->mapBufferResultsForInterop(mapSuccess, unmapSuccess); - } -} - -} -} // namespace nvidia - -#endif |