diff options
| author | sschirm <[email protected]> | 2016-12-23 14:20:36 +0100 |
|---|---|---|
| committer | sschirm <[email protected]> | 2016-12-23 14:56:17 +0100 |
| commit | ef6937e69e8ee3f409cf9d460d5ad300a65d5924 (patch) | |
| tree | 710426e8daa605551ce3f34b581897011101c30f /APEX_1.4/module/fieldsampler/src/FieldSamplerQuery.cpp | |
| parent | Initial commit: (diff) | |
| download | physx-3.4-ef6937e69e8ee3f409cf9d460d5ad300a65d5924.tar.xz physx-3.4-ef6937e69e8ee3f409cf9d460d5ad300a65d5924.zip | |
PhysX 3.4 / APEX 1.4 release candidate @21506124
Diffstat (limited to 'APEX_1.4/module/fieldsampler/src/FieldSamplerQuery.cpp')
| -rw-r--r-- | APEX_1.4/module/fieldsampler/src/FieldSamplerQuery.cpp | 876 |
1 files changed, 0 insertions, 876 deletions
diff --git a/APEX_1.4/module/fieldsampler/src/FieldSamplerQuery.cpp b/APEX_1.4/module/fieldsampler/src/FieldSamplerQuery.cpp deleted file mode 100644 index bada10fc..00000000 --- a/APEX_1.4/module/fieldsampler/src/FieldSamplerQuery.cpp +++ /dev/null @@ -1,876 +0,0 @@ -/* - * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - */ - - -#include "ApexDefs.h" -#include "Apex.h" -#include "FieldSamplerQuery.h" -#include "FieldSamplerManager.h" -#include "FieldSamplerWrapper.h" -#include "FieldSamplerSceneWrapper.h" -#include "FieldBoundaryWrapper.h" - -#include "SceneIntl.h" - -#if APEX_CUDA_SUPPORT -#include "PxGpuTask.h" -#endif - -#include "FieldSamplerCommon.h" - - -namespace nvidia -{ -namespace fieldsampler -{ - - -FieldSamplerQuery::FieldSamplerQuery(const FieldSamplerQueryDescIntl& desc, ResourceList& list, FieldSamplerManager* manager) - : mManager(manager) - , mQueryDesc(desc) - , mAccumVelocity(manager->getApexScene(), PX_ALLOC_INFO("mAccumVelocity", PARTICLES)) - , mOnStartCallback(NULL) - , mOnFinishCallback(NULL) -{ - list.add(*this); -} - -void FieldSamplerQuery::release() -{ - if (mInRelease) - { - return; - } - mInRelease = true; - destroy(); -} - -void FieldSamplerQuery::destroy() -{ - delete this; -} - - -FieldSamplerQuery::SceneInfo* FieldSamplerQuery::findSceneInfo(FieldSamplerSceneWrapper* sceneWrapper) const -{ - for (uint32_t i = 0; i < mSceneList.getSize(); ++i) - { - SceneInfo* sceneInfo = DYNAMIC_CAST(SceneInfo*)(mSceneList.getResource(i)); - if (sceneInfo->getSceneWrapper() == sceneWrapper) - { - return sceneInfo; - } - } - return NULL; -} - - -bool FieldSamplerQuery::addFieldSampler(FieldSamplerWrapper* fieldSamplerWrapper) -{ - const FieldSamplerDescIntl& fieldSamplerDesc = fieldSamplerWrapper->getInternalFieldSamplerDesc(); - float multiplier = 1.0f; - bool result = mManager->getFieldSamplerGroupsFiltering(mQueryDesc.samplerFilterData, fieldSamplerDesc.samplerFilterData, multiplier); - if (result) - { - FieldSamplerSceneWrapper* sceneWrapper = fieldSamplerWrapper->getFieldSamplerSceneWrapper(); - SceneInfo* sceneInfo = findSceneInfo(sceneWrapper); - if (sceneInfo == NULL) - { - sceneInfo = createSceneInfo(sceneWrapper); - } - sceneInfo->addFieldSampler(fieldSamplerWrapper, multiplier); - } - return result; -} - -bool FieldSamplerQuery::removeFieldSampler(FieldSamplerWrapper* fieldSamplerWrapper) -{ - FieldSamplerSceneWrapper* sceneWrapper = fieldSamplerWrapper->getFieldSamplerSceneWrapper(); - SceneInfo* sceneInfo = findSceneInfo(sceneWrapper); - return (sceneInfo != NULL) ? sceneInfo->removeFieldSampler(fieldSamplerWrapper) : false; -} - -void FieldSamplerQuery::clearAllFieldSamplers() -{ - for (uint32_t i = 0; i < mSceneList.getSize(); ++i) - { - SceneInfo* sceneInfo = DYNAMIC_CAST(SceneInfo*)(mSceneList.getResource(i)); - sceneInfo->clearAllFieldSamplers(); - } -} - -void FieldSamplerQuery::submitFieldSamplerQuery(const FieldSamplerQueryDataIntl& data, PxTask* task, PxTask* readyTask) -{ - PX_UNUSED(readyTask); - for (uint32_t i = 0; i < mSceneList.getSize(); ++i) - { - SceneInfo* sceneInfo = DYNAMIC_CAST(SceneInfo*)(mSceneList.getResource(i)); - FieldSamplerSceneIntl* niFieldSamplerScene = sceneInfo->getSceneWrapper()->getInternalFieldSamplerScene(); - const PxTask* fieldSamplerReadyTask = niFieldSamplerScene->onSubmitFieldSamplerQuery(data, readyTask); - if (fieldSamplerReadyTask != 0) - { - task->startAfter(fieldSamplerReadyTask->getTaskID()); - } - } -} - -void FieldSamplerQuery::update() -{ - mPrimarySceneList.clear(); - mSecondarySceneList.clear(); - - for (uint32_t i = 0; i < mSceneList.getSize(); ++i) - { - SceneInfo* sceneInfo = DYNAMIC_CAST(SceneInfo*)(mSceneList.getResource(i)); - sceneInfo->update(); - - if (sceneInfo->getEnabledFieldSamplerCount() > 0 && (sceneInfo->getSceneWrapper()->getInternalFieldSamplerScene() != mQueryDesc.ownerFieldSamplerScene)) - { - ((sceneInfo->getSceneWrapper()->getInternalFieldSamplerSceneDesc().isPrimary) ? mPrimarySceneList : mSecondarySceneList).pushBack(sceneInfo); - } - } -} - -bool FieldSamplerQuery::SceneInfo::update() -{ - mEnabledFieldSamplerCount = 0; - for (uint32_t i = 0; i < mFieldSamplerArray.size(); ++i) - { - if (mFieldSamplerArray[i].mFieldSamplerWrapper->isEnabled()) - { - ++mEnabledFieldSamplerCount; - } - if (mFieldSamplerArray[i].mFieldSamplerWrapper->isEnabledChanged()) - { - mFieldSamplerArrayChanged = true; - } - } - - if (mFieldSamplerArrayChanged) - { - mFieldSamplerArrayChanged = false; - return true; - } - return false; -} -/******************************** CPU Version ********************************/ -class TaskExecute : public PxTask, public UserAllocated -{ -public: - TaskExecute(FieldSamplerQueryCPU* query) : mQuery(query) {} - - const char* getName() const - { - return "FieldSamplerQueryCPU::TaskExecute"; - } - void run() - { - mQuery->execute(); - } - -protected: - FieldSamplerQueryCPU* mQuery; -}; - -FieldSamplerQueryCPU::FieldSamplerQueryCPU(const FieldSamplerQueryDescIntl& desc, ResourceList& list, FieldSamplerManager* manager) - : FieldSamplerQuery(desc, list, manager) -{ - mTaskExecute = PX_NEW(TaskExecute)(this); - - mExecuteCount = 256; -} - -FieldSamplerQueryCPU::~FieldSamplerQueryCPU() -{ - delete mTaskExecute; -} - -PxTaskID FieldSamplerQueryCPU::submitFieldSamplerQuery(const FieldSamplerQueryDataIntl& data, PxTaskID taskID) -{ - PX_ASSERT(data.isDataOnDevice == false); - PX_ASSERT(data.count <= mQueryDesc.maxCount); - if (data.count == 0) - { - return taskID; - } - mQueryData = data; - - mResultField.resize(mExecuteCount); - mWeights.resize(mExecuteCount); - mAccumVelocity.reserve(mQueryDesc.maxCount); - - PxTaskManager* tm = mManager->getApexScene().getTaskManager(); - tm->submitUnnamedTask(*mTaskExecute); - - FieldSamplerQuery::submitFieldSamplerQuery(data, mTaskExecute, NULL); - - mTaskExecute->finishBefore(taskID); - return mTaskExecute->getTaskID(); -} - -void FieldSamplerQueryCPU::execute() -{ - if (mOnStartCallback) - { - (*mOnStartCallback)(NULL); - } - - FieldSamplerIntl::ExecuteData executeData; - - executeData.position = mQueryData.pmaInPosition; - executeData.velocity = mQueryData.pmaInVelocity; - executeData.mass = mQueryData.pmaInMass;// + massOffset; - executeData.resultField = mResultField.begin(); - executeData.positionStride = mQueryData.positionStrideBytes; - executeData.velocityStride = mQueryData.velocityStrideBytes; - executeData.massStride = mQueryData.massStrideBytes; - executeData.indicesMask = 0; - - uint32_t beginIndex; - uint32_t* indices = &beginIndex; - if (mQueryData.pmaInIndices) - { - indices = mQueryData.pmaInIndices; - executeData.indicesMask = ~executeData.indicesMask; - } - - for (uint32_t executeOffset = 0; executeOffset < mQueryData.count; executeOffset += mExecuteCount) - { - const uint32_t positionStride = mQueryData.positionStrideBytes / 4; - const uint32_t velocityStride = mQueryData.velocityStrideBytes / 4; - const uint32_t massStride = mQueryData.massStrideBytes / 4; - //const uint32_t offset = executeOffset * stride; - //const uint32_t massOffset = executeOffset * massStride; - - beginIndex = executeOffset; - executeData.count = PxMin(mExecuteCount, mQueryData.count - executeOffset); - executeData.indices = indices + (executeOffset & executeData.indicesMask); - - PxVec4* accumField = (PxVec4*)(mQueryData.pmaOutField); - PxVec4* accumVelocity = mAccumVelocity.getPtr() + executeOffset; - //clear accum - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - accumField[j] = PxVec4(0.0f); - accumVelocity[i] = PxVec4(0.0f); - } - for (uint32_t sceneIdx = 0; sceneIdx < mPrimarySceneList.size(); ++sceneIdx) - { - executeScene(mPrimarySceneList[sceneIdx], executeData, accumField, accumVelocity, positionStride, velocityStride, massStride); - } - - //setup weights for secondary scenes - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - accumField[j].w = accumVelocity[i].w; - accumVelocity[i].w = 0.0f; - } - for (uint32_t sceneIdx = 0; sceneIdx < mSecondarySceneList.size(); ++sceneIdx) - { - executeScene(mSecondarySceneList[sceneIdx], executeData, accumField, accumVelocity, positionStride, velocityStride, massStride); - } - - //compose accum field - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - float blend = accumField[j].w; - float velW = accumVelocity[i].w; - float weight = blend + velW * (1 - blend); - if (weight >= VELOCITY_WEIGHT_THRESHOLD) - { - PxVec3 result = accumField[j].getXYZ(); - const PxVec3& velocity = *(PxVec3*)(executeData.velocity + j * velocityStride); - result += (accumVelocity[i].getXYZ() - weight * velocity); - accumField[j] = PxVec4(result, 0); - } - } - } - - if (mOnFinishCallback) - { - (*mOnFinishCallback)(NULL); - } -} - -void FieldSamplerQueryCPU::executeScene(const SceneInfo* sceneInfo, - const FieldSamplerIntl::ExecuteData& executeData, - PxVec4* accumField, - PxVec4* accumVelocity, - uint32_t positionStride, - uint32_t velocityStride, - uint32_t massStride) -{ - FieldSamplerExecuteArgs execArgs; - execArgs.elapsedTime = mQueryData.timeStep; - execArgs.totalElapsedMS = mManager->getApexScene().getTotalElapsedMS(); - - const nvidia::Array<FieldSamplerInfo>& fieldSamplerArray = sceneInfo->getFieldSamplerArray(); - for (uint32_t fieldSamplerIdx = 0; fieldSamplerIdx < fieldSamplerArray.size(); ++fieldSamplerIdx) - { - const FieldSamplerWrapperCPU* fieldSampler = DYNAMIC_CAST(FieldSamplerWrapperCPU*)(fieldSamplerArray[fieldSamplerIdx].mFieldSamplerWrapper); - if (fieldSampler->isEnabled()) - { - const float multiplier = fieldSamplerArray[fieldSamplerIdx].mMultiplier; - PX_UNUSED(multiplier); - - const FieldSamplerDescIntl& desc = fieldSampler->getInternalFieldSamplerDesc(); - if (desc.cpuSimulationSupport) - { - const FieldShapeDescIntl& shapeDesc = fieldSampler->getInternalFieldSamplerShape(); - PX_ASSERT(shapeDesc.weight >= 0.0f && shapeDesc.weight <= 1.0f); - - for (uint32_t i = 0; i < executeData.count; ++i) - { - mWeights[i] = 0; - } - - uint32_t boundaryCount = fieldSampler->getFieldBoundaryCount(); - for (uint32_t boundaryIndex = 0; boundaryIndex < boundaryCount; ++boundaryIndex) - { - FieldBoundaryWrapper* fieldBoundaryWrapper = fieldSampler->getFieldBoundaryWrapper(boundaryIndex); - - const nvidia::Array<FieldShapeDescIntl>& fieldShapes = fieldBoundaryWrapper->getFieldShapes(); - for (uint32_t shapeIndex = 0; shapeIndex < fieldShapes.size(); ++shapeIndex) - { - const FieldShapeDescIntl& boundaryShapeDesc = fieldShapes[shapeIndex]; - PX_ASSERT(boundaryShapeDesc.weight >= 0.0f && boundaryShapeDesc.weight <= 1.0f); - - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - PxVec3* pos = (PxVec3*)(executeData.position + j * positionStride); - const float excludeWeight = evalFade(evalDistInShape(boundaryShapeDesc, *pos), 0.0f) * boundaryShapeDesc.weight; - mWeights[i] = PxMax(mWeights[i], excludeWeight); - } - } - } - - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - PxVec3* pos = (PxVec3*)(executeData.position + j * positionStride); - const float includeWeight = evalFade(evalDistInShape(shapeDesc, *pos), desc.boundaryFadePercentage) * shapeDesc.weight; - const float excludeWeight = mWeights[i]; - mWeights[i] = includeWeight * (1.0f - excludeWeight); -#if FIELD_SAMPLER_MULTIPLIER == FIELD_SAMPLER_MULTIPLIER_WEIGHT - mWeights[i] *= multiplier; -#endif - } - - //execute field - fieldSampler->getInternalFieldSampler()->executeFieldSampler(executeData); - -#if FIELD_SAMPLER_MULTIPLIER == FIELD_SAMPLER_MULTIPLIER_VALUE - const float multiplier = fieldSamplerArray[fieldSamplerIdx].mMultiplier; - for (uint32_t i = 0; i < executeData.count; ++i) - { - executeData.resultField[i] *= multiplier; - } -#endif - - //accum field - switch (desc.type) - { - case FieldSamplerTypeIntl::FORCE: - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - execArgs.position = *(PxVec3*)(executeData.position + j * positionStride); - execArgs.velocity = *(PxVec3*)(executeData.velocity + j * velocityStride); - execArgs.mass = *(executeData.mass + massStride * j); - - accumFORCE(execArgs, executeData.resultField[i], mWeights[i], accumField[j], accumVelocity[i]); - } - break; - case FieldSamplerTypeIntl::ACCELERATION: - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - execArgs.position = *(PxVec3*)(executeData.position + j * positionStride); - execArgs.velocity = *(PxVec3*)(executeData.velocity + j * velocityStride); - execArgs.mass = *(executeData.mass + massStride * j); - - accumACCELERATION(execArgs, executeData.resultField[i], mWeights[i], accumField[j], accumVelocity[i]); - } - break; - case FieldSamplerTypeIntl::VELOCITY_DRAG: - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - execArgs.position = *(PxVec3*)(executeData.position + j * positionStride); - execArgs.velocity = *(PxVec3*)(executeData.velocity + j * velocityStride); - execArgs.mass = *(executeData.mass + massStride * j); - - accumVELOCITY_DRAG(execArgs, desc.dragCoeff, executeData.resultField[i], mWeights[i], accumField[j], accumVelocity[i]); - } - break; - case FieldSamplerTypeIntl::VELOCITY_DIRECT: - for (uint32_t i = 0; i < executeData.count; ++i) - { - uint32_t j = executeData.indices[i & executeData.indicesMask] + (i & ~executeData.indicesMask); - execArgs.position = *(PxVec3*)(executeData.position + j * positionStride); - execArgs.velocity = *(PxVec3*)(executeData.velocity + j * velocityStride); - execArgs.mass = *(executeData.mass + massStride * j); - - accumVELOCITY_DIRECT(execArgs, executeData.resultField[i], mWeights[i], accumField[j], accumVelocity[i]); - } - break; - }; - } - } - } - -} - - -/******************************** GPU Version ********************************/ -#if APEX_CUDA_SUPPORT - -class FieldSamplerQueryLaunchTask : public PxGpuTask, public UserAllocated -{ -public: - FieldSamplerQueryLaunchTask(FieldSamplerQueryGPU* query) : mQuery(query) {} - const char* getName() const - { - return "FieldSamplerQueryLaunchTask"; - } - void run() - { - PX_ALWAYS_ASSERT(); - } - bool launchInstance(CUstream stream, int kernelIndex) - { - return mQuery->launch(stream, kernelIndex); - } - PxGpuTaskHint::Enum getTaskHint() const - { - return PxGpuTaskHint::Kernel; - } - -protected: - FieldSamplerQueryGPU* mQuery; -}; - -class FieldSamplerQueryPrepareTask : public PxTask, public UserAllocated -{ -public: - FieldSamplerQueryPrepareTask(FieldSamplerQueryGPU* query) : mQuery(query) {} - - const char* getName() const - { - return "FieldSamplerQueryPrepareTask"; - } - void run() - { - mQuery->prepare(); - } - -protected: - FieldSamplerQueryGPU* mQuery; -}; - -class FieldSamplerQueryCopyTask : public PxGpuTask, public UserAllocated -{ -public: - FieldSamplerQueryCopyTask(FieldSamplerQueryGPU* query) : mQuery(query) {} - const char* getName() const - { - return "FieldSamplerQueryCopyTask"; - } - void run() - { - PX_ALWAYS_ASSERT(); - } - bool launchInstance(CUstream stream, int kernelIndex) - { - return mQuery->copy(stream, kernelIndex); - } - PxGpuTaskHint::Enum getTaskHint() const - { - return PxGpuTaskHint::Kernel; - } - -protected: - FieldSamplerQueryGPU* mQuery; -}; - -class FieldSamplerQueryFetchTask : public PxTask, public UserAllocated -{ -public: - FieldSamplerQueryFetchTask(FieldSamplerQueryGPU* query) : mQuery(query) {} - - const char* getName() const - { - return "FieldSamplerQueryFetchTask"; - } - void run() - { - mQuery->fetch(); - } - -protected: - FieldSamplerQueryGPU* mQuery; -}; - - -FieldSamplerQueryGPU::FieldSamplerQueryGPU(const FieldSamplerQueryDescIntl& desc, ResourceList& list, FieldSamplerManager* manager) - : FieldSamplerQueryCPU(desc, list, manager) - , mPositionMass(manager->getApexScene(), PX_ALLOC_INFO("mPositionMass", PARTICLES)) - , mVelocity(manager->getApexScene(), PX_ALLOC_INFO("mVelocity", PARTICLES)) - , mAccumField(manager->getApexScene(), PX_ALLOC_INFO("mAccumField", PARTICLES)) - , mCopyQueue(*manager->getApexScene().getTaskManager()->getGpuDispatcher()) -{ - mTaskLaunch = PX_NEW(FieldSamplerQueryLaunchTask)(this); - mTaskPrepare = PX_NEW(FieldSamplerQueryPrepareTask)(this); - mTaskCopy = PX_NEW(FieldSamplerQueryCopyTask)(this); - mTaskFetch = PX_NEW(FieldSamplerQueryFetchTask)(this); -} - -FieldSamplerQueryGPU::~FieldSamplerQueryGPU() -{ - PX_DELETE(mTaskFetch); - PX_DELETE(mTaskCopy); - PX_DELETE(mTaskPrepare); - PX_DELETE(mTaskLaunch); -} - -PxTaskID FieldSamplerQueryGPU::submitFieldSamplerQuery(const FieldSamplerQueryDataIntl& data, PxTaskID taskID) -{ - PX_ASSERT(data.count <= mQueryDesc.maxCount); - if (data.count == 0) - { - return taskID; - } - mQueryData = data; - - if (!data.isDataOnDevice) - { - bool isWorkOnCPU = true; - // try to find FieldSampler which has no CPU implemntation (Turbulence for example) - for (uint32_t sceneIdx = 0; (sceneIdx < mPrimarySceneList.size() + mSecondarySceneList.size()) && isWorkOnCPU; ++sceneIdx) - { - const nvidia::Array<FieldSamplerInfo>& fsArray = sceneIdx < mPrimarySceneList.size() - ? mPrimarySceneList[sceneIdx]->getFieldSamplerArray() - : mSecondarySceneList[sceneIdx-mPrimarySceneList.size()]->getFieldSamplerArray(); - for (uint32_t fsIdx = 0; fsIdx < fsArray.size() && isWorkOnCPU; fsIdx++) - { - if (fsArray[fsIdx].mFieldSamplerWrapper->isEnabled()) - { - isWorkOnCPU = fsArray[fsIdx].mFieldSamplerWrapper->getInternalFieldSamplerDesc().cpuSimulationSupport; - } - } - } - - // if all FSs can work on CPU we will execute FieldSamplerQuery on CPU - if (isWorkOnCPU) - { - return FieldSamplerQueryCPU::submitFieldSamplerQuery(data, taskID); - } - - mPositionMass.reserve(mQueryDesc.maxCount, ApexMirroredPlace::CPU_GPU); - mVelocity.reserve(mQueryDesc.maxCount, ApexMirroredPlace::CPU_GPU); - mAccumField.reserve(mQueryDesc.maxCount, ApexMirroredPlace::CPU_GPU); - } - mAccumVelocity.reserve(mQueryDesc.maxCount, ApexMirroredPlace::CPU_GPU); - - // if data on device or some FS can't work on CPU we will launch FieldSamplerQuery on GPU - PxTaskManager* tm = mManager->getApexScene().getTaskManager(); - tm->submitUnnamedTask(*mTaskLaunch, PxTaskType::TT_GPU); - - if (data.isDataOnDevice) - { - FieldSamplerQuery::submitFieldSamplerQuery(data, mTaskLaunch, NULL); - - mTaskLaunch->finishBefore(taskID); - return mTaskLaunch->getTaskID(); - } - else - { - FieldSamplerQueryDataIntl data4Device; - data4Device.timeStep = data.timeStep; - data4Device.count = data.count; - data4Device.isDataOnDevice = true; - data4Device.positionStrideBytes = sizeof(PxVec4); - data4Device.velocityStrideBytes = sizeof(PxVec4); - data4Device.massStrideBytes = sizeof(PxVec4); - data4Device.pmaInPosition = (float*)mPositionMass.getGpuPtr(); - data4Device.pmaInVelocity = (float*)mVelocity.getGpuPtr(); - data4Device.pmaInMass = &mPositionMass.getGpuPtr()->w; - data4Device.pmaOutField = mAccumField.getGpuPtr(); - data4Device.pmaInIndices = 0; - - FieldSamplerQuery::submitFieldSamplerQuery(data4Device, mTaskLaunch, mTaskCopy); - - tm->submitUnnamedTask(*mTaskPrepare); - tm->submitUnnamedTask(*mTaskCopy, PxTaskType::TT_GPU); - tm->submitUnnamedTask(*mTaskFetch); - - mTaskPrepare->finishBefore(mTaskCopy->getTaskID()); - mTaskCopy->finishBefore(mTaskLaunch->getTaskID()); - mTaskLaunch->finishBefore(mTaskFetch->getTaskID()); - mTaskFetch->finishBefore(taskID); - return mTaskPrepare->getTaskID(); - } -} - -void FieldSamplerQueryGPU::prepare() -{ - const uint32_t positionStride = mQueryData.positionStrideBytes / sizeof(float); - const uint32_t velocityStride = mQueryData.velocityStrideBytes / sizeof(float); - const uint32_t massStride = mQueryData.massStrideBytes / sizeof(float); - for (uint32_t idx = 0; idx < mQueryData.count; idx++) - { - mPositionMass[idx] = PxVec4(*(PxVec3*)(mQueryData.pmaInPosition + idx * positionStride), *(mQueryData.pmaInMass + idx * massStride)); - mVelocity[idx] = PxVec4(*(PxVec3*)(mQueryData.pmaInVelocity + idx * velocityStride), 0.f); - } -} - -void FieldSamplerQueryGPU::fetch() -{ - for (uint32_t idx = 0; idx < mQueryData.count; idx++) - { - mQueryData.pmaOutField[idx] = mAccumField[idx]; - } -} - -bool FieldSamplerQueryGPU::copy(CUstream stream, int kernelIndex) -{ - if (kernelIndex == 0) - { - mCopyQueue.reset(stream, 4); - mPositionMass.copyHostToDeviceQ(mCopyQueue, mQueryData.count); - mVelocity.copyHostToDeviceQ(mCopyQueue, mQueryData.count); - mCopyQueue.flushEnqueued(); - } - return false; -} - -bool FieldSamplerQueryGPU::launch(CUstream stream, int kernelIndex) -{ - FieldSamplerPointsKernelArgs args; - args.elapsedTime = mQueryData.timeStep; - args.totalElapsedMS = mManager->getApexScene().getTotalElapsedMS(); - if (mQueryData.isDataOnDevice) - { - args.positionMass = (float4*)mQueryData.pmaInPosition; - args.velocity = (float4*)mQueryData.pmaInVelocity; - args.accumField = (float4*)mQueryData.pmaOutField; - } - else - { - args.positionMass = (float4*)mPositionMass.getGpuPtr(); - args.velocity = (float4*)mVelocity.getGpuPtr(); - args.accumField = (float4*)mAccumField.getGpuPtr(); - } - args.accumVelocity = (float4*)mAccumVelocity.getGpuPtr(); - - FieldSamplerPointsKernelLaunchDataIntl launchData; - launchData.stream = stream; - launchData.kernelType = FieldSamplerKernelType::POINTS; - launchData.kernelArgs = &args; - launchData.threadCount = mQueryData.count; - launchData.memRefSize = mQueryData.count; - - if (kernelIndex == 0 && mOnStartCallback) - { - (*mOnStartCallback)(stream); - } - - if (kernelIndex == 0) - { - CUDA_OBJ(clearKernel)(stream, mQueryData.count, - createApexCudaMemRef(args.accumField, launchData.memRefSize, ApexCudaMemFlags::OUT), - createApexCudaMemRef(args.accumVelocity, launchData.memRefSize, ApexCudaMemFlags::OUT)); - return true; - } - --kernelIndex; - - const uint32_t bothSceneCount = mPrimarySceneList.size() + mSecondarySceneList.size(); - if (kernelIndex < (int) bothSceneCount) - { - SceneInfo* sceneInfo = (kernelIndex < (int) mPrimarySceneList.size()) - ? mPrimarySceneList[(uint32_t)kernelIndex] - : mSecondarySceneList[(uint32_t)kernelIndex - mPrimarySceneList.size()]; - SceneInfoGPU* sceneInfoGPU = DYNAMIC_CAST(SceneInfoGPU*)(sceneInfo); - - launchData.kernelMode = FieldSamplerKernelMode::DEFAULT; - if (kernelIndex == (int) mPrimarySceneList.size() - 1) - { - launchData.kernelMode = FieldSamplerKernelMode::FINISH_PRIMARY; - } - if ((kernelIndex == (int) bothSceneCount - 1)) - { - launchData.kernelMode = FieldSamplerKernelMode::FINISH_SECONDARY; - } - - FieldSamplerSceneWrapperGPU* sceneWrapper = DYNAMIC_CAST(FieldSamplerSceneWrapperGPU*)(sceneInfo->getSceneWrapper()); - - launchData.queryParamsHandle = sceneInfoGPU->getQueryParamsHandle(); - launchData.paramsExArrayHandle = sceneInfoGPU->getParamsHandle(); - launchData.fieldSamplerArray = &sceneInfo->getFieldSamplerArray(); - launchData.activeFieldSamplerCount = sceneInfo->getEnabledFieldSamplerCount(); - - sceneWrapper->getInternalFieldSamplerScene()->launchFieldSamplerCudaKernel(launchData); - return true; - } - kernelIndex -= bothSceneCount; - - if (kernelIndex == 0) - { - CUDA_OBJ(composeKernel)(stream, mQueryData.count, - createApexCudaMemRef(args.accumField, launchData.memRefSize, ApexCudaMemFlags::IN_OUT), - createApexCudaMemRef((const float4*)args.accumVelocity, launchData.memRefSize, ApexCudaMemFlags::IN), - createApexCudaMemRef(args.velocity, launchData.memRefSize, ApexCudaMemFlags::IN), - args.elapsedTime); - return true; - } - --kernelIndex; - - if (!mQueryData.isDataOnDevice) - { - mAccumField.copyDeviceToHostQ(mCopyQueue, mQueryData.count); - mCopyQueue.flushEnqueued(); - - PxTaskManager* tm = mManager->getApexScene().getTaskManager(); - tm->getGpuDispatcher()->addCompletionPrereq(*mTaskFetch); - } - - if (mOnFinishCallback) - { - (*mOnFinishCallback)(stream); - } - return false; -} - -FieldSamplerQueryGPU::SceneInfoGPU::SceneInfoGPU(ResourceList& list, FieldSamplerQuery* query, FieldSamplerSceneWrapper* sceneWrapper) - : SceneInfo(list, query, sceneWrapper) - , mConstMemGroup(DYNAMIC_CAST(FieldSamplerSceneWrapperGPU*)(sceneWrapper)->getConstStorage()) -{ - APEX_CUDA_CONST_MEM_GROUP_SCOPE(mConstMemGroup); - - mQueryParamsHandle.alloc(_storage_); -} - -bool FieldSamplerQueryGPU::SceneInfoGPU::update() -{ - if (FieldSamplerQuery::SceneInfo::update()) - { - APEX_CUDA_CONST_MEM_GROUP_SCOPE(mConstMemGroup); - - FieldSamplerParamsExArray paramsExArray; - mParamsExArrayHandle.allocOrFetch(_storage_, paramsExArray); - if (paramsExArray.resize(_storage_, mEnabledFieldSamplerCount)) - { - for (uint32_t i = 0, enabledIdx = 0; i < mFieldSamplerArray.size(); ++i) - { - FieldSamplerWrapperGPU* fieldSamplerWrapper = DYNAMIC_CAST(FieldSamplerWrapperGPU*)(mFieldSamplerArray[i].mFieldSamplerWrapper); - if (fieldSamplerWrapper->isEnabled()) - { - FieldSamplerParamsEx fsParamsEx; - fsParamsEx.paramsHandle = fieldSamplerWrapper->getParamsHandle(); - fsParamsEx.multiplier = mFieldSamplerArray[i].mMultiplier; - PX_ASSERT(enabledIdx < mEnabledFieldSamplerCount); - paramsExArray.updateElem(_storage_, fsParamsEx, enabledIdx++); - } - } - mParamsExArrayHandle.update(_storage_, paramsExArray); - } - return true; - } - return false; -} - -PxVec3 FieldSamplerQueryGPU::executeFieldSamplerQueryOnGrid(const FieldSamplerQueryGridDataIntl& data) -{ - FieldSamplerGridKernelArgs args; - - args.numX = data.numX; - args.numY = data.numY; - args.numZ = data.numZ; - - args.gridToWorld = data.gridToWorld; - - args.mass = data.mass; - args.elapsedTime = data.timeStep; - args.cellSize = data.cellSize; - args.totalElapsedMS = mManager->getApexScene().getTotalElapsedMS(); - - FieldSamplerGridKernelLaunchDataIntl launchData; - launchData.stream = data.stream; - launchData.kernelType = FieldSamplerKernelType::GRID; - launchData.kernelArgs = &args; - launchData.threadCountX = data.numX; - launchData.threadCountY = data.numY; - launchData.threadCountZ = data.numZ; - launchData.accumArray = data.resultVelocity; - - - { - APEX_CUDA_SURFACE_SCOPE_BIND(surfRefGridAccum, *launchData.accumArray, ApexCudaMemFlags::OUT); - - CUDA_OBJ(clearGridKernel)(data.stream, launchData.threadCountX, launchData.threadCountY, launchData.threadCountZ, - args.numX, args.numY, args.numZ); - } - - PxVec3 velocity(0.0f); - for (uint32_t i = 0; i < mSecondarySceneList.size(); ++i) - { - SceneInfoGPU* sceneInfo = DYNAMIC_CAST(SceneInfoGPU*)(mSecondarySceneList[i]); - FieldSamplerSceneWrapperGPU* sceneWrapper = DYNAMIC_CAST(FieldSamplerSceneWrapperGPU*)(sceneInfo->getSceneWrapper()); - - launchData.activeFieldSamplerCount = 0; - - const nvidia::Array<FieldSamplerInfo>& fieldSamplerArray = sceneInfo->getFieldSamplerArray(); - for (uint32_t fieldSamplerIdx = 0; fieldSamplerIdx < fieldSamplerArray.size(); ++fieldSamplerIdx) - { - const FieldSamplerWrapperGPU* wrapper = static_cast<const FieldSamplerWrapperGPU* >( fieldSamplerArray[fieldSamplerIdx].mFieldSamplerWrapper ); - if (wrapper->isEnabled()) - { - switch (wrapper->getInternalFieldSamplerDesc().gridSupportType) - { - case FieldSamplerGridSupportTypeIntl::SINGLE_VELOCITY: - { - const FieldSamplerIntl* fieldSampler = wrapper->getInternalFieldSampler(); - velocity += fieldSampler->queryFieldSamplerVelocity(); - } - break; - case FieldSamplerGridSupportTypeIntl::VELOCITY_PER_CELL: - { - launchData.activeFieldSamplerCount += 1; - } - break; - default: - break; - } - } - } - - if (launchData.activeFieldSamplerCount > 0) - { - launchData.queryParamsHandle = sceneInfo->getQueryParamsHandle(); - launchData.paramsExArrayHandle = sceneInfo->getParamsHandle(); - launchData.fieldSamplerArray = &sceneInfo->getFieldSamplerArray(); - launchData.kernelMode = FieldSamplerKernelMode::DEFAULT; - - sceneWrapper->getInternalFieldSamplerScene()->launchFieldSamplerCudaKernel(launchData); - } - } - return velocity; -} - - -#endif - -} -} // end namespace nvidia::apex - |