aboutsummaryrefslogtreecommitdiff
path: root/APEX_1.4/module/clothing/src/ClothingActorData.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'APEX_1.4/module/clothing/src/ClothingActorData.cpp')
-rw-r--r--APEX_1.4/module/clothing/src/ClothingActorData.cpp1161
1 files changed, 1161 insertions, 0 deletions
diff --git a/APEX_1.4/module/clothing/src/ClothingActorData.cpp b/APEX_1.4/module/clothing/src/ClothingActorData.cpp
new file mode 100644
index 00000000..a3084779
--- /dev/null
+++ b/APEX_1.4/module/clothing/src/ClothingActorData.cpp
@@ -0,0 +1,1161 @@
+/*
+ * Copyright (c) 2008-2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * NVIDIA CORPORATION and its licensors retain all intellectual property
+ * and proprietary rights in and to this software, related documentation
+ * and any modifications thereto. Any use, reproduction, disclosure or
+ * distribution of this software and related documentation without an express
+ * license agreement from NVIDIA CORPORATION is strictly prohibited.
+ */
+
+#include "ApexSimdMath.h"
+#include "PxPreprocessor.h"
+#include "RenderDataFormat.h"
+#include "ClothingActorData.h"
+#include "AbstractMeshDescription.h"
+#include "PsIntrinsics.h"
+#include "PxMat44.h"
+#include "ApexSDKIntl.h"
+
+#include "ClothingGlobals.h"
+
+#include "ProfilerCallback.h"
+
+using namespace physx::shdfnd;
+
+#pragma warning(disable : 4101 4127) // unreferenced local variable and conditional is constant
+
+#define NX_PARAMETERIZED_ONLY_LAYOUTS
+#include "ClothingGraphicalLodParameters.h"
+
+#include "PsIntrinsics.h"
+#include "PsVecMath.h"
+
+namespace nvidia
+{
+namespace clothing
+{
+
+
+ClothingActorData::ClothingActorData() :
+ mNewBounds(PxBounds3::empty()),
+
+ mGlobalPose(PxVec4(1.0f)),
+ mInternalGlobalPose(PxVec4(1.0f)),
+
+ mInternalBoneMatricesCur(NULL),
+ mInternalBoneMatricesPrev(NULL),
+ mRenderingDataPosition(NULL),
+ mRenderingDataNormal(NULL),
+ mRenderingDataTangent(NULL),
+ mMorphDisplacementBuffer(NULL),
+ mSdkWritebackNormal(NULL),
+ mSdkWritebackPositions(NULL),
+ mSkinnedPhysicsPositions(NULL),
+ mSkinnedPhysicsNormals(NULL),
+
+ mInternalMatricesCount(0),
+ mMorphDisplacementBufferCount(0),
+ mSdkDeformableVerticesCount(0),
+ mSdkDeformableIndicesCount(0),
+ mCurrentGraphicalLodId(0),
+ mCurrentPhysicsSubmesh(0),
+
+ mActorScale(0.0f),
+
+ bInternalFrozen(false),
+ bShouldComputeRenderData(false),
+ bIsInitialized(false),
+ bIsSimulationMeshDirty(false),
+ bRecomputeNormals(false),
+ bRecomputeTangents(false),
+ bCorrectSimulationNormals(false),
+ bParallelCpuSkinning(false),
+ bIsClothingSimulationNull(false)
+{
+}
+
+
+
+ClothingActorData::~ClothingActorData()
+{
+ PX_ASSERT(mInternalBoneMatricesCur == NULL); // properly deallocated
+}
+
+
+
+void ClothingActorData::renderDataLock()
+{
+ mRenderLock.lock();
+}
+
+
+
+void ClothingActorData::renderDataUnLock()
+{
+ //TODO - release a mutex here
+ mRenderLock.unlock();
+}
+
+
+
+void ClothingActorData::skinPhysicsMaxDist0Normals_NoPhysx()
+{
+ if (mSdkWritebackNormal == NULL /*|| bInternalFrozen == 1*/)
+ {
+ return;
+ }
+
+ //ClothingPhysicalMeshParametersNS::PhysicalMesh_Type* physicalMesh = mAsset->getPhysicalMeshFromLod(mCurrentGraphicalLodId);
+
+ ClothingPhysicalMeshData* physicalMesh = mAsset.GetPhysicalMeshFromLod(mCurrentGraphicalLodId);
+ const PxVec3* PX_RESTRICT _normals = physicalMesh->mSkinningNormals;
+
+ if (_normals == NULL)
+ {
+ return;
+ }
+
+ if (physicalMesh->mMaxDistance0VerticesCount == 0)
+ {
+ return;
+ }
+
+ const uint32_t startVertex = physicalMesh->mSimulatedVertexCount - physicalMesh->mMaxDistance0VerticesCount;
+ const uint32_t numVertices = physicalMesh->mSimulatedVertexCount;
+ const uint32_t numBoneIndicesPerVertex = physicalMesh->mNumBonesPerVertex;
+
+ // offset the normals array as well
+ _normals += startVertex;
+
+ const uint32_t UnrollSize = 160;
+ const uint32_t vertCount = numVertices - startVertex;
+ const uint32_t numIterations = (vertCount + UnrollSize - 1) / UnrollSize;
+
+ PxVec3* PX_RESTRICT targetNormals = mSdkWritebackNormal + startVertex;
+
+ //uint32_t tags[2] = {10, 11};
+ //const uint32_t prefetchRange = (startVertex & 0xfffffff0); //A multiple of 16 before this prefetch, with the assumption that normals is 16-byte aligned!
+ //C_Prefetcher<2, sizeof(PxVec3) * UnrollSize> normPrefetcher(tags, (void*)(normals + prefetchRange), (void*)(normals + numVertices));
+
+ if (mInternalBoneMatricesCur == NULL || numBoneIndicesPerVertex == 0)
+ {
+ if (mActorScale == 1.0f)
+ {
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ const uint32_t numToProcess = PxMin(UnrollSize, (vertCount - (UnrollSize * a)));
+ const PxVec3* PX_RESTRICT localNormals = (const PxVec3 * PX_RESTRICT)(void*)_normals;
+ for (uint32_t i = 0; i < numToProcess; i++)
+ {
+ targetNormals[i] = mInternalGlobalPose.rotate(localNormals[i]);
+ }
+ targetNormals += UnrollSize;
+ _normals += UnrollSize;
+ }
+ }
+ else
+ {
+ const float recipActorScale = 1.f / mActorScale;
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ const uint32_t numToProcess = PxMin(UnrollSize, (vertCount - (UnrollSize * a)));
+ const PxVec3* PX_RESTRICT localNormals = (const PxVec3 * PX_RESTRICT)(void*)_normals;
+ for (uint32_t i = 0; i < numToProcess; i++)
+ {
+ targetNormals[i] = mInternalGlobalPose.rotate(localNormals[i]) * recipActorScale;
+ }
+ targetNormals += UnrollSize;
+ _normals += UnrollSize;
+ }
+ }
+ }
+ else
+ {
+ //OK a slight refactor is required here - we don't want to fetch in everything only to
+ const uint32_t startBoneIndex = startVertex * numBoneIndicesPerVertex;
+ //Another problem - this is an arbitrarily large amount of data that has to be fetched here!!!! Consider revising
+
+ const uint16_t* PX_RESTRICT eaSimBoneIndices = &physicalMesh->mBoneIndices[startBoneIndex];
+ const float* PX_RESTRICT eaSimBoneWeights = &physicalMesh->mBoneWeights[startBoneIndex];
+
+ const PxMat44* const PX_RESTRICT matrices = (const PxMat44*)mInternalBoneMatricesCur;
+
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ const uint32_t numToProcess = PxMin(UnrollSize, (vertCount - (UnrollSize * a)));
+ const PxVec3* PX_RESTRICT localNormals = (const PxVec3 * PX_RESTRICT)(void*)_normals;
+
+ const uint16_t* const PX_RESTRICT simBoneIndices = (const uint16_t * const PX_RESTRICT)(void*)eaSimBoneIndices;
+ const float* const PX_RESTRICT simBoneWeights = (const float * const PX_RESTRICT)(void*)eaSimBoneWeights;
+
+ eaSimBoneIndices += numBoneIndicesPerVertex * numToProcess;
+ eaSimBoneWeights += numBoneIndicesPerVertex * numToProcess;
+
+ for (uint32_t i = 0; i < numToProcess; i++)
+ {
+ PxVec3 normal(0.0f, 0.0f, 0.0f);
+ for (uint32_t j = 0; j < numBoneIndicesPerVertex; j++)
+ {
+ const float weight = simBoneWeights[i * numBoneIndicesPerVertex + j];
+
+ if (weight > 0.f)
+ {
+ PX_ASSERT(weight <= 1.0f);
+ const uint32_t index = simBoneIndices[i * numBoneIndicesPerVertex + j];
+
+ const PxMat44& bone = matrices[index];
+
+ normal += bone.rotate(localNormals[i]) * weight; // 12% here
+ }
+ else
+ {
+ // PH: Assuming sorted weights is faster
+ break;
+ }
+ }
+
+ normal.normalize();
+ targetNormals[i] = normal;
+ }
+ targetNormals += UnrollSize;
+ _normals += UnrollSize;
+ }
+
+ }
+
+}
+
+
+void ClothingActorData::skinToAnimation_NoPhysX(bool fromFetchResults)
+{
+ // This optimization only works if the render data from last frame is still there.
+ // So this can only be used if we're using the same ClothingRenderProxy again.
+ //if (!bIsSimulationMeshDirty)
+ //{
+ // return;
+ //}
+
+ PX_PROFILE_ZONE("ClothingActorImpl::skinToAnimation", GetInternalApexSDK()->getContextId());
+
+ //const bool recomputeNormals = bRecomputeNormals;
+
+ // PH: If fromFetchResults is true, renderLock does not need to be aquired as it is already aquired by ApexScene::fetchResults()
+ if (!fromFetchResults)
+ {
+ renderDataLock();
+ }
+
+ for (uint32_t graphicalLod = 0; graphicalLod < mAsset.mGraphicalLodsCount; graphicalLod++)
+ {
+ ClothingMeshAssetData& meshAsset = *mAsset.GetLod(graphicalLod);
+ if (!meshAsset.bActive)
+ {
+ continue;
+ }
+
+ uint32_t submeshVertexOffset = 0;
+
+ for (uint32_t submeshIndex = 0; submeshIndex < meshAsset.mSubMeshCount; submeshIndex++)
+ {
+ AbstractMeshDescription renderData;
+ ClothingAssetSubMesh* pSubMesh = mAsset.GetSubmesh(&meshAsset, submeshIndex);
+ renderData.numVertices = pSubMesh->mVertexCount;
+
+ renderData.pPosition = mRenderingDataPosition + submeshVertexOffset;
+
+ renderData.pNormal = mRenderingDataNormal + submeshVertexOffset;
+
+ if (mRenderingDataTangent != NULL)
+ {
+ renderData.pTangent4 = mRenderingDataTangent + submeshVertexOffset;
+ }
+
+ PxMat44* matrices = NULL;
+ PX_ALIGN(16, PxMat44 alignedGlobalPose); // matrices must be 16 byte aligned!
+ if (mInternalBoneMatricesCur == NULL)
+ {
+ matrices = &alignedGlobalPose;
+ alignedGlobalPose = mInternalGlobalPose;
+ }
+ else
+ {
+ matrices = (PxMat44*)mInternalBoneMatricesCur;
+ PX_ASSERT(matrices != NULL);
+ }
+
+ mAsset.skinToBones(renderData, submeshIndex, graphicalLod, pSubMesh->mCurrentMaxVertexSimulation, matrices, mMorphDisplacementBuffer);
+
+ submeshVertexOffset += pSubMesh->mVertexCount;
+ }
+ }
+
+ if (!fromFetchResults)
+ {
+ renderDataUnLock();
+ }
+}
+
+template<bool computeNormals>
+uint32_t ClothingAssetData::skinClothMap(PxVec3* dstPositions, PxVec3* dstNormals, PxVec4* dstTangents, uint32_t numVertices,
+ const AbstractMeshDescription& srcPM, ClothingGraphicalLodParametersNS::SkinClothMapD_Type* map,
+ uint32_t numVerticesInMap, float offsetAlongNormal, float actorScale) const
+{
+ PX_ASSERT(srcPM.numIndices % 3 == 0);
+
+ const ClothingGraphicalLodParametersNS::SkinClothMapD_Type* PX_RESTRICT pTCM = map;
+ nvidia::prefetchLine(pTCM);
+
+ const float invOffsetAlongNormal = 1.0f / offsetAlongNormal;
+
+ uint32_t numVerticesWritten = 0;
+ uint32_t numTangentsWritten = 0;
+ const uint32_t numVerticesTotal = numVertices;
+
+ uint32_t firstMiss = numVerticesInMap;
+
+ const uint32_t unrollCount = 256;
+
+ const uint32_t numIterations = (numVerticesInMap + unrollCount - 1) / unrollCount;
+
+ //uint32_t vertexIndex = 0;
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ const uint32_t numToProcess = PxMin(numVerticesInMap - (a * unrollCount), unrollCount);
+ const ClothingGraphicalLodParametersNS::SkinClothMapD_Type* PX_RESTRICT pTCMLocal =
+ (const ClothingGraphicalLodParametersNS::SkinClothMapD_Type * PX_RESTRICT)(void*)pTCM;
+
+ for (uint32_t j = 0; j < numToProcess; ++j)
+ {
+ nvidia::prefetchLine(pTCMLocal + 1);
+
+ //PX_ASSERT(vertexIndex == pTCMLocal->vertexIndexPlusOffset);
+ uint32_t vertexIndex = pTCMLocal->vertexIndexPlusOffset;
+ const uint32_t physVertIndex0 = pTCMLocal->vertexIndex0;
+ const uint32_t physVertIndex1 = pTCMLocal->vertexIndex1;
+ const uint32_t physVertIndex2 = pTCMLocal->vertexIndex2;
+
+ if (vertexIndex >= numVerticesTotal)
+ {
+ pTCM++;
+ pTCMLocal++;
+ //vertexIndex++;
+ continue;
+ }
+
+ // TODO do only 1 test, make sure physVertIndex0 is the smallest index
+ if (physVertIndex0 >= srcPM.numVertices || physVertIndex1 >= srcPM.numVertices || physVertIndex2 >= srcPM.numVertices)
+ {
+ firstMiss = PxMin(firstMiss, vertexIndex);
+ pTCM++;
+ pTCMLocal++;
+ //vertexIndex++;
+ continue;
+ }
+
+ numVerticesWritten++;
+
+ //PX_ASSERT(!vertexWriteCache.IsStomped());
+
+ const PxVec3 vtx[3] =
+ {
+ *(PxVec3*)&srcPM.pPosition[physVertIndex0],
+ *(PxVec3*)&srcPM.pPosition[physVertIndex1],
+ *(PxVec3*)&srcPM.pPosition[physVertIndex2],
+ };
+
+ //PX_ASSERT(!vertexWriteCache.IsStomped());
+
+ const PxVec3 nrm[3] =
+ {
+ *(PxVec3*)&srcPM.pNormal[physVertIndex0],
+ *(PxVec3*)&srcPM.pNormal[physVertIndex1],
+ *(PxVec3*)&srcPM.pNormal[physVertIndex2],
+ };
+
+ //PX_ASSERT(!vertexWriteCache.IsStomped());
+
+ PxVec3 bary = pTCMLocal->vertexBary;
+ const float vHeight = bary.z * actorScale;
+ bary.z = 1.0f - bary.x - bary.y;
+
+ const PxVec3 positionVertex = bary.x * vtx[0] + bary.y * vtx[1] + bary.z * vtx[2];
+ const PxVec3 positionNormal = (bary.x * nrm[0] + bary.y * nrm[1] + bary.z * nrm[2]) * vHeight;
+
+ const PxVec3 resultPosition = positionVertex + positionNormal;
+ //Write back - to use a DMA list
+
+ PxVec3* dstPosition = (PxVec3*)&dstPositions[vertexIndex];
+
+ *dstPosition = resultPosition;
+
+ PX_ASSERT(resultPosition.isFinite());
+
+ if (computeNormals)
+ {
+ bary = pTCMLocal->normalBary;
+ const float nHeight = bary.z * actorScale;
+ bary.z = 1.0f - bary.x - bary.y;
+
+ const PxVec3 normalVertex = bary.x * vtx[0] + bary.y * vtx[1] + bary.z * vtx[2];
+ const PxVec3 normalNormal = (bary.x * nrm[0] + bary.y * nrm[1] + bary.z * nrm[2]) * nHeight;
+
+ PxVec3* dstNormal = (PxVec3*)&dstNormals[vertexIndex];
+
+ // we multiply in invOffsetAlongNormal in order to get a newNormal that is closer to size 1,
+ // so the normalize approximation will be better
+ PxVec3 newNormal = ((normalVertex + normalNormal) - (resultPosition)) * invOffsetAlongNormal;
+#if 1
+ // PH: Normally this is accurate enough. For testing we can also use the second
+ const PxVec3 resultNormal = newNormal * nvidia::recipSqrtFast(newNormal.magnitudeSquared());
+ *dstNormal = resultNormal;
+#else
+ newNormal.normalize();
+ *dstNormal = newNormal;
+#endif
+ }
+ if (dstTangents != NULL)
+ {
+ bary = pTCMLocal->tangentBary;
+ const float nHeight = bary.z * actorScale;
+ bary.z = 1.0f - bary.x - bary.y;
+
+ const PxVec3 tangentVertex = bary.x * vtx[0] + bary.y * vtx[1] + bary.z * vtx[2];
+ const PxVec3 tangentTangent = (bary.x * nrm[0] + bary.y * nrm[1] + bary.z * nrm[2]) * nHeight;
+
+ PxVec4* dstTangent = (PxVec4*)&dstTangents[vertexIndex];
+
+ // we multiply in invOffsetAlongNormal in order to get a newNormal that is closer to size 1,
+ // so the normalize approximation will be better
+ PxVec3 newTangent = ((tangentVertex + tangentTangent) - (resultPosition)) * invOffsetAlongNormal;
+#if 1
+ // PH: Normally this is accurate enough. For testing we can also use the second
+ const PxVec3 resultTangent = newTangent * nvidia::recipSqrtFast(newTangent.magnitudeSquared());
+
+ uint32_t arrayIndex = numTangentsWritten / 4;
+ uint32_t offset = numTangentsWritten % 4;
+ float w = ((mCompressedTangentW[arrayIndex] >> offset) & 1) ? 1.f : -1.f;
+
+ *dstTangent = PxVec4(resultTangent, w);
+#else
+ newTangent.normalize();
+ *dstTangent = newTangent;
+#endif
+ }
+
+ pTCM++;
+ pTCMLocal++;
+ //vertexIndex++;
+ }
+ }
+
+ return firstMiss;
+}
+
+
+#if PX_ANDROID || PX_LINUX
+template uint32_t ClothingAssetData::skinClothMap<true>(PxVec3* dstPositions, PxVec3* dstNormals, PxVec4* dstTangents, uint32_t numVertices,
+ const AbstractMeshDescription& srcPM, ClothingGraphicalLodParametersNS::SkinClothMapD_Type* map,
+ uint32_t numVerticesInMap, float offsetAlongNormal, float actorScale) const;
+
+template uint32_t ClothingAssetData::skinClothMap<false>(PxVec3* dstPositions, PxVec3* dstNormals, PxVec4* dstTangents, uint32_t numVertices,
+ const AbstractMeshDescription& srcPM, ClothingGraphicalLodParametersNS::SkinClothMapD_Type* map,
+ uint32_t numVerticesInMap, float offsetAlongNormal, float actorScale) const;
+#endif
+
+void ClothingActorData::skinToImmediateMap(const uint32_t* immediateClothMap_, uint32_t numGraphicalVertices_, uint32_t numSrcVertices_,
+ const PxVec3* srcPositions_)
+{
+ const uint32_t* PX_RESTRICT immediateClothMap = immediateClothMap_;
+
+ const PxVec3* PX_RESTRICT srcPositions = srcPositions_;
+ PxVec3* PX_RESTRICT destPositions = mRenderingDataPosition;
+
+ const uint32_t numGraphicalVertices = numGraphicalVertices_;
+ const uint32_t numSrcVertices = numSrcVertices_;
+
+ const uint32_t WorkSize = 512;
+
+ const uint32_t numIterations = (numGraphicalVertices + WorkSize - 1) / WorkSize;
+
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ const uint32_t numToProcess = PxMin(numGraphicalVertices - (a * WorkSize), WorkSize);
+
+ const uint32_t* PX_RESTRICT immediateClothMapLocal = (const uint32_t * PX_RESTRICT)(void*)&immediateClothMap[a * WorkSize];
+ PxVec3* PX_RESTRICT destPositionsLocal = (PxVec3 * PX_RESTRICT)(void*)&destPositions[a * WorkSize];
+
+ for (uint32_t j = 0; j < numToProcess; ++j)
+ {
+ const uint32_t mapEntry = immediateClothMapLocal[j];
+ const uint32_t index = mapEntry & ClothingConstants::ImmediateClothingReadMask;
+ const uint32_t flags = mapEntry & ~ClothingConstants::ImmediateClothingReadMask;
+
+ if (index < numSrcVertices && ((flags & ClothingConstants::ImmediateClothingInSkinFlag)) == 0)
+ {
+ destPositionsLocal[j] = *((PxVec3*)(void*)&srcPositions[index]);
+ PX_ASSERT(destPositionsLocal[j].isFinite());
+ }
+ }
+ }
+}
+
+
+
+void ClothingActorData::skinToImmediateMap(const uint32_t* immediateClothMap_, uint32_t numGraphicalVertices_, uint32_t numSrcVertices_,
+ const PxVec3* srcPositions_, const PxVec3* srcNormals_)
+{
+ const uint32_t* PX_RESTRICT immediateClothMap = immediateClothMap_;
+
+ const PxVec3* PX_RESTRICT srcPositions = srcPositions_;
+ const PxVec3* PX_RESTRICT srcNormals = srcNormals_;
+
+ PxVec3* PX_RESTRICT destPositions = mRenderingDataPosition;
+ PxVec3* PX_RESTRICT destNormals = mRenderingDataNormal;
+
+ const uint32_t numGraphicalVertices = numGraphicalVertices_;
+ const uint32_t numSrcVertices = numSrcVertices_;
+
+ const uint32_t WorkSize = 160;
+
+ //__builtin_snpause();
+
+ const uint32_t numIterations = (numGraphicalVertices + WorkSize - 1) / WorkSize;
+
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ const uint32_t numToProcess = PxMin(numGraphicalVertices - (a * WorkSize), WorkSize);
+
+ const uint32_t* PX_RESTRICT immediateClothMapLocal = (const uint32_t * PX_RESTRICT)(void*)&immediateClothMap[a * WorkSize];
+ PxVec3* PX_RESTRICT destPositionsLocal = (PxVec3 * PX_RESTRICT)(void*)&destPositions[a * WorkSize];
+ PxVec3* PX_RESTRICT destNormalsLocal = (PxVec3 * PX_RESTRICT)(void*)&destNormals[a * WorkSize];
+
+ for (uint32_t j = 0; j < numToProcess; ++j)
+ {
+ const uint32_t mapEntry = immediateClothMapLocal[j];
+ const uint32_t index = mapEntry & ClothingConstants::ImmediateClothingReadMask;
+ const uint32_t flags = mapEntry & ~ClothingConstants::ImmediateClothingReadMask;
+
+ if (index < numSrcVertices && ((flags & ClothingConstants::ImmediateClothingInSkinFlag)) == 0)
+ {
+ destPositionsLocal[j] = *((PxVec3*)(void*)&srcPositions[index]);
+ PX_ASSERT(destPositionsLocal[j].isFinite());
+
+ const PxVec3 destNormal = *((PxVec3*)(void*)&srcNormals[index]);
+ destNormalsLocal[j] = (flags & ClothingConstants::ImmediateClothingInvertNormal) ? -destNormal : destNormal;
+ PX_ASSERT(destNormalsLocal[j].isFinite());
+ }
+ }
+ }
+}
+
+
+
+void ClothingActorData::skinToPhysicalMesh_NoPhysX(bool fromFetchResults)
+{
+ // This optimization only works if the render data from last frame is still there.
+ // So this can only be used if we're using the same ClothingRenderProxy again.
+ //if (!bIsSimulationMeshDirty)
+ //{
+ // return;
+ //}
+
+ PX_PROFILE_ZONE("ClothingActorImpl::meshMesh-Skinning", GetInternalApexSDK()->getContextId());
+
+ const ClothingMeshAssetData& graphicalLod = *mAsset.GetLod(mCurrentGraphicalLodId);
+
+ const ClothingPhysicalMeshData* physicalMesh = mAsset.GetPhysicalMeshFromLod(mCurrentGraphicalLodId);
+
+ AbstractMeshDescription pcm;
+ pcm.numVertices = mSdkDeformableVerticesCount;
+ pcm.numIndices = mSdkDeformableIndicesCount;
+ pcm.pPosition = mSdkWritebackPositions;
+ pcm.pNormal = mSdkWritebackNormal;
+ pcm.pIndices = physicalMesh->mIndices;
+ pcm.avgEdgeLength = graphicalLod.mSkinClothMapThickness;
+
+ const bool skinNormals = !bRecomputeNormals;
+
+ if (!fromFetchResults)
+ {
+ renderDataLock();
+ }
+
+ uint32_t activeCount = 0;
+
+ for (uint32_t i = 0; i < mAsset.mGraphicalLodsCount; i++)
+ {
+ const ClothingMeshAssetData& lod = *mAsset.GetLod(i);
+ if (!lod.bActive)
+ {
+ continue;
+ }
+ activeCount++;
+
+ bool skinTangents = !bRecomputeTangents;
+
+ uint32_t graphicalVerticesCount = 0;
+ for (uint32_t j = 0; j < lod.mSubMeshCount; j++)
+ {
+ ClothingAssetSubMesh* subMesh = mAsset.GetSubmesh(&lod, j);
+ graphicalVerticesCount += subMesh->mVertexCount; // only 1 part is supported
+
+ if (subMesh->mTangents == NULL)
+ {
+ skinTangents = false;
+ }
+ }
+
+ //__builtin_snpause();
+ //RenderMeshAssetIntl* renderMeshAsset = mAsset->getGraphicalMesh(i);
+ //PX_ASSERT(renderMeshAsset != NULL);
+
+ // Do mesh-to-mesh skinning here
+ if (graphicalLod.mSkinClothMapB != NULL)
+ {
+ mAsset.skinClothMapB(mRenderingDataPosition, mRenderingDataNormal, graphicalVerticesCount, pcm,
+ graphicalLod.mSkinClothMapB, graphicalLod.mSkinClothMapBCount, skinNormals);
+ }
+ else if (graphicalLod.mSkinClothMap != NULL)
+ {
+ PxVec4* tangents = skinTangents ? mRenderingDataTangent : NULL;
+ if (skinNormals)
+ mAsset.skinClothMap<true>(mRenderingDataPosition, mRenderingDataNormal, tangents, graphicalVerticesCount, pcm,
+ graphicalLod.mSkinClothMap, graphicalLod.mSkinClothMapCount, graphicalLod.mSkinClothMapOffset, mActorScale);
+ else
+ mAsset.skinClothMap<false>(mRenderingDataPosition, mRenderingDataNormal, tangents, graphicalVerticesCount, pcm,
+ graphicalLod.mSkinClothMap, graphicalLod.mSkinClothMapCount, graphicalLod.mSkinClothMapOffset, mActorScale);
+
+ }
+ else if (graphicalLod.mTetraMap != NULL)
+ {
+ AbstractMeshDescription destMesh;
+ destMesh.pPosition = mRenderingDataPosition;
+ if (skinNormals)
+ {
+ destMesh.pNormal = mRenderingDataNormal;
+ }
+ destMesh.numVertices = graphicalVerticesCount;
+ mAsset.skinToTetraMesh(destMesh, pcm, graphicalLod);
+ }
+
+ if (graphicalLod.mImmediateClothMap != NULL)
+ {
+ if (skinNormals)
+ {
+ skinToImmediateMap(graphicalLod.mImmediateClothMap, graphicalVerticesCount, pcm.numVertices, pcm.pPosition, pcm.pNormal);
+ }
+ else
+ {
+ skinToImmediateMap(graphicalLod.mImmediateClothMap, graphicalVerticesCount, pcm.numVertices, pcm.pPosition);
+ }
+ }
+ }
+
+ PX_ASSERT(activeCount < 2);
+
+ if (!fromFetchResults)
+ {
+ renderDataUnLock();
+ }
+}
+
+
+
+
+
+
+void ClothingActorData::finalizeSkinning_NoPhysX(bool fromFetchResults)
+{
+ // PH: If fromFetchResults is true, renderLock does not need to be aquired as it is already aquired by ApexScene::fetchResults()
+ if (!fromFetchResults)
+ {
+ renderDataLock();
+ }
+
+ mNewBounds.setEmpty();
+
+ for (uint32_t graphicalLod = 0; graphicalLod < mAsset.mGraphicalLodsCount; graphicalLod++)
+ {
+ ClothingMeshAssetData& renderMeshAsset = *mAsset.GetLod(graphicalLod);
+ if (!renderMeshAsset.bActive)
+ {
+ continue;
+ }
+
+ const uint32_t submeshCount = renderMeshAsset.mSubMeshCount;
+
+ uint32_t submeshVertexOffset = 0;
+ for (uint32_t submeshIndex = 0; submeshIndex < submeshCount; submeshIndex++)
+ {
+ AbstractMeshDescription renderData;
+
+ ClothingAssetSubMesh* pSubmesh = mAsset.GetSubmesh(&renderMeshAsset, submeshIndex);
+
+ renderData.numVertices = pSubmesh->mVertexCount;
+
+ renderData.pPosition = mRenderingDataPosition + submeshVertexOffset;
+
+ bool recomputeTangents = bRecomputeTangents && renderMeshAsset.bNeedsTangents;
+ if (bRecomputeNormals || recomputeTangents)
+ {
+ renderData.pNormal = mRenderingDataNormal + submeshVertexOffset;
+
+ const uint32_t* compressedTangentW = NULL;
+
+ if (recomputeTangents)
+ {
+ renderData.pTangent4 = mRenderingDataTangent + submeshVertexOffset;
+ uint32_t mapSize = 0;
+ compressedTangentW = mAsset.getCompressedTangentW(graphicalLod, submeshIndex, mapSize);
+ }
+ if (bRecomputeNormals && recomputeTangents)
+ {
+ PX_PROFILE_ZONE("ClothingActorImpl::recomupteNormalAndTangent", GetInternalApexSDK()->getContextId());
+ computeTangentSpaceUpdate<true, true>(renderData, renderMeshAsset, submeshIndex, compressedTangentW);
+ }
+ else if (bRecomputeNormals)
+ {
+ PX_PROFILE_ZONE("ClothingActorImpl::recomupteNormal", GetInternalApexSDK()->getContextId());
+ computeTangentSpaceUpdate<true, false>(renderData, renderMeshAsset, submeshIndex, compressedTangentW);
+ }
+ else
+ {
+ PX_PROFILE_ZONE("ClothingActorImpl::recomupteTangent", GetInternalApexSDK()->getContextId());
+ computeTangentSpaceUpdate<false, true>(renderData, renderMeshAsset, submeshIndex, compressedTangentW);
+ }
+ }
+
+ const uint32_t unrollCount = 1024;
+ const uint32_t numIterations = (renderData.numVertices + unrollCount - 1) / unrollCount;
+
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ const uint32_t numToProcess = PxMin(unrollCount, renderData.numVertices - (a * unrollCount));
+ const PxVec3* PX_RESTRICT positions = (const PxVec3 * PX_RESTRICT)(renderData.pPosition + (a * unrollCount));
+ for (uint32_t b = 0; b < numToProcess; ++b)
+ {
+ mNewBounds.include(positions[b]);
+ }
+ }
+
+ submeshVertexOffset += renderData.numVertices;
+ }
+ }
+
+ if (!fromFetchResults)
+ {
+ renderDataUnLock();
+ }
+}
+
+#define FLOAT_TANGENT_UPDATE 0
+
+
+template <bool withNormals, bool withTangents>
+void ClothingActorData::computeTangentSpaceUpdate(AbstractMeshDescription& destMesh,
+ const ClothingMeshAssetData& rendermesh, uint32_t submeshIndex, const uint32_t* compressedTangentW)
+{
+ //__builtin_snpause();
+ ClothingAssetSubMesh* pSubMesh = mAsset.GetSubmesh(&rendermesh, submeshIndex);
+
+ if (withNormals && withTangents)
+ {
+ computeTangentSpaceUpdate<true, false>(destMesh, rendermesh, submeshIndex, compressedTangentW);
+ computeTangentSpaceUpdate<false, true>(destMesh, rendermesh, submeshIndex, compressedTangentW);
+ }
+ else
+ {
+ const RenderDataFormat::Enum uvFormat = pSubMesh->mUvFormat;
+
+ if (uvFormat != RenderDataFormat::FLOAT2)
+ {
+ if (withNormals)
+ {
+ computeTangentSpaceUpdate<true, false>(destMesh, rendermesh, submeshIndex, compressedTangentW);
+ }
+
+ return;
+ }
+
+ PX_ASSERT(pSubMesh->mCurrentMaxIndexSimulation <= pSubMesh->mIndicesCount);
+ const uint32_t numGraphicalVertexIndices = pSubMesh->mCurrentMaxIndexSimulation;
+ const uint32_t* indices = pSubMesh->mIndices;
+
+ const VertexUVLocal* PX_RESTRICT uvs = pSubMesh->mUvs;
+ PX_ASSERT(uvs != NULL);
+
+ const uint32_t numVertices = pSubMesh->mCurrentMaxVertexAdditionalSimulation;
+ const uint32_t numZeroVertices = pSubMesh->mCurrentMaxVertexSimulation;
+ PX_ASSERT(numVertices <= destMesh.numVertices);
+
+ PX_ASSERT(pSubMesh->mVertexCount == destMesh.numVertices);
+ PX_ASSERT(destMesh.pPosition != NULL);
+ PX_ASSERT(destMesh.pNormal != NULL);
+ PX_ASSERT(destMesh.pTangent4 != NULL || !withTangents);
+ PX_ASSERT(destMesh.pTangent == NULL);
+ PX_ASSERT(destMesh.pBitangent == NULL);
+
+ const Simd4f vZero = gSimd4fZero;
+
+ //All indices read in in blocks of 3...hence need to fetch in an exact multiple of 3...
+
+ const uint32_t UnrollSize = 192; //exactly divisible by 16 AND 3 :-)
+
+ const uint32_t numIterations = (numGraphicalVertexIndices + UnrollSize - 1) / UnrollSize;
+
+ const PxVec3* PX_RESTRICT destPositions = (const PxVec3 * PX_RESTRICT)(destMesh.pPosition);
+
+ if (withNormals)
+ {
+ //__builtin_snpause();
+ PxVec3* PX_RESTRICT destNormals = destMesh.pNormal;
+ for (uint32_t a = 0; a < numZeroVertices; ++a)
+ {
+ destNormals[a] = PxVec3(0.0f);
+ }
+
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ //__builtin_snpause();
+ const uint32_t numToProcess = PxMin(numGraphicalVertexIndices - (a * UnrollSize), UnrollSize);
+ const uint32_t* localIndices = (const uint32_t*)((void*)(indices + (a * UnrollSize)));
+
+ for (uint32_t i = 0; i < numToProcess; i += 3)
+ {
+ const uint32_t i0 = localIndices[i + 0];
+ const uint32_t i1 = localIndices[i + 1];
+ const uint32_t i2 = localIndices[i + 2];
+
+ const Simd4f P0 = createSimd3f(destPositions[i0]);
+ const Simd4f P1 = createSimd3f(destPositions[i1]);
+ const Simd4f P2 = createSimd3f(destPositions[i2]);
+
+ const Simd4f X1 = P1 - P0;
+ const Simd4f X2 = P2 - P0;
+
+ Simd4f FACENORMAL = cross3(X1, X2);
+
+ PxVec3* PX_RESTRICT nor1 = &destNormals[i0];
+ Simd4f n1 = createSimd3f(*nor1);
+ n1 = n1 + FACENORMAL;
+ store3(&nor1->x, n1);
+
+ PxVec3* PX_RESTRICT nor2 = &destNormals[i1];
+ Simd4f n2 = createSimd3f(*nor2);
+ n2 = n2 + FACENORMAL;
+ store3(&nor2->x, n2);
+
+ PxVec3* PX_RESTRICT nor3 = &destNormals[i2];
+ Simd4f n3 = createSimd3f(*nor3);
+ n3 = n3 + FACENORMAL;
+ store3(&nor3->x, n3);
+
+ }
+ }
+ }
+ if (withTangents)
+ {
+ const VertexUVLocal* PX_RESTRICT uvLocal = (const VertexUVLocal * PX_RESTRICT)(void*)uvs;
+
+ PxVec4* PX_RESTRICT tangents = destMesh.pTangent4;
+ for (uint32_t a = 0; a < numZeroVertices; ++a)
+ {
+ tangents[a] = PxVec4(0.f);
+ }
+
+
+ for (uint32_t a = 0; a < numIterations; ++a)
+ {
+ //__builtin_snpause();
+ const uint32_t numToProcess = PxMin(numGraphicalVertexIndices - (a * UnrollSize), UnrollSize);
+ const uint32_t* localIndices = (const uint32_t*)(void*)(indices + (a * UnrollSize));
+
+ for (uint32_t i = 0; i < numToProcess; i += 3)
+ {
+ const uint32_t i0 = localIndices[i + 0];
+ const uint32_t i1 = localIndices[i + 1];
+ const uint32_t i2 = localIndices[i + 2];
+
+ const Simd4f P0 = createSimd3f(destPositions[i0]);
+ const Simd4f P1 = createSimd3f(destPositions[i1]);
+ const Simd4f P2 = createSimd3f(destPositions[i2]);
+
+ const Simd4f X1 = P1 - P0;
+ const Simd4f X2 = P2 - P0;
+
+ const VertexUVLocal& w0 = uvLocal[i0];
+ const VertexUVLocal& w1 = uvLocal[i1];
+ const VertexUVLocal& w2 = uvLocal[i2];
+
+ const Simd4f W0U = Simd4fScalarFactory(w0.u);
+ const Simd4f W1U = Simd4fScalarFactory(w1.u);
+ const Simd4f W2U = Simd4fScalarFactory(w2.u);
+ const Simd4f W0V = Simd4fScalarFactory(w0.v);
+ const Simd4f W1V = Simd4fScalarFactory(w1.v);
+ const Simd4f W2V = Simd4fScalarFactory(w2.v);
+
+ //This could be just 1 sub...
+
+ const Simd4f S1 = W1U - W0U;
+ const Simd4f S2 = W2U - W0U;
+ const Simd4f T1 = W1V - W0V;
+ const Simd4f T2 = W2V - W0V;
+
+ // invH = (s1 * t2 - s2 * t1);
+ const Simd4f S1T2 = S1 * T2;
+ const Simd4f invHR = S1T2 - S2 * T1;
+ const Simd4f HR = recip(invHR);
+ const Simd4f T2X1 = X1 * T2;
+ //const Vec3V S1X2 = V3Scale(X2, S1);
+ const Simd4f invHREqZero = (invHR == vZero);
+
+ const Simd4f T1X2MT2X1 = T2X1 - X2 * T1;
+ //const Simd4f S2X1MS1X2 = S1X2 - X1 * X2;
+
+ const Simd4f scale = select(invHREqZero, vZero, HR);
+
+ const Simd4f SDIR = T1X2MT2X1 * scale; // .w gets overwritten later on
+ //const Simd4f TDIR = S2X1MS1X2 * scale;
+
+ PxVec4* PX_RESTRICT tangent0 = tangents + i0;
+ PxVec4* PX_RESTRICT tangent1 = tangents + i1;
+ PxVec4* PX_RESTRICT tangent2 = tangents + i2;
+ Simd4f t0 = Simd4fAlignedLoadFactory((float*)tangent0);
+ Simd4f t1 = Simd4fAlignedLoadFactory((float*)tangent1);
+ Simd4f t2 = Simd4fAlignedLoadFactory((float*)tangent2);
+
+ t0 = t0 + SDIR;
+ t1 = t1 + SDIR;
+ t2 = t2 + SDIR;
+
+ storeAligned((float*)tangent0, t0);
+ storeAligned((float*)tangent1, t1);
+ storeAligned((float*)tangent2, t2);
+ }
+ }
+
+ uint32_t tangentW = 0;
+
+ int32_t j = 0;
+#if 1
+ // This makes it quite a bit faster, but it also works without it.
+ for (; j < (int32_t)numVertices - 4; j += 4)
+ {
+ if ((j & 0x1f) == 0)
+ {
+ tangentW = compressedTangentW[j >> 5];
+ }
+
+ tangents[j].w = (tangentW & 0x1) ? 1.0f : -1.0f;
+ tangents[j + 1].w = (tangentW & 0x2) ? 1.0f : -1.0f;
+ tangents[j + 2].w = (tangentW & 0x4) ? 1.0f : -1.0f;
+ tangents[j + 3].w = (tangentW & 0x8) ? 1.0f : -1.0f;
+ tangentW >>= 4;
+ }
+#endif
+
+ // We need this loop to handle last vertices in tangents[], it shares the same j as previous loop
+ for (; j < (int32_t)numVertices; j++)
+ {
+ if ((j & 0x1f) == 0)
+ {
+ tangentW = compressedTangentW[j >> 5];
+ }
+
+ tangents[j].w = (tangentW & 0x1) ? 1.0f : -1.0f;
+ tangentW >>= 1;
+ }
+ }
+ }
+}
+
+
+PxBounds3 ClothingActorData::getRenderMeshAssetBoundsTransformed()
+{
+ PxBounds3 newBounds = mAsset.GetLod(mCurrentGraphicalLodId)->mBounds;
+
+ PxMat44 transformation;
+ if (mInternalBoneMatricesCur != NULL)
+ {
+ transformation = mInternalBoneMatricesCur[mAsset.mRootBoneIndex];
+ }
+ else
+ {
+ //transformation = mActorDesc->globalPose;
+ transformation = mGlobalPose;
+ }
+
+ if (!newBounds.isEmpty())
+ {
+ PxVec3 center = transformation.transform(newBounds.getCenter());
+ PxVec3 extent = newBounds.getExtents();
+
+ // extended basis vectors
+ PxVec3 c0 = transformation.column0.getXYZ() * extent.x;
+ PxVec3 c1 = transformation.column1.getXYZ() * extent.y;
+ PxVec3 c2 = transformation.column2.getXYZ() * extent.z;
+
+ // find combination of base vectors that produces max. distance for each component = sum of PxAbs()
+ extent.x = PxAbs(c0.x) + PxAbs(c1.x) + PxAbs(c2.x);
+ extent.y = PxAbs(c0.y) + PxAbs(c1.y) + PxAbs(c2.y);
+ extent.z = PxAbs(c0.z) + PxAbs(c1.z) + PxAbs(c2.z);
+
+ return PxBounds3::centerExtents(center, extent);
+ }
+ else
+ {
+ return newBounds;
+ }
+}
+
+
+void ClothingActorData::tickSynchAfterFetchResults_LocksPhysX()
+{
+ if (bIsInitialized && !bIsClothingSimulationNull && bShouldComputeRenderData /*&& !bInternalFrozen*/)
+ {
+ // overwrite a few writeback normals!
+
+ if (bCorrectSimulationNormals)
+ {
+ skinPhysicsMaxDist0Normals_NoPhysx();
+ }
+
+ //// perform mesh-to-mesh skinning if using skin cloth
+
+ if (!bParallelCpuSkinning)
+ {
+ skinToAnimation_NoPhysX(true);
+ }
+
+ skinToPhysicalMesh_NoPhysX(true);
+
+ finalizeSkinning_NoPhysX(true);
+
+ PX_ASSERT(!mNewBounds.isEmpty());
+ PX_ASSERT(mNewBounds.isFinite());
+ }
+}
+
+
+bool ClothingActorData::calcIfSimplePhysicsMesh() const
+{
+ // this number is the blocksize in SPU_ClothSkinPhysicsSimple.spu.cpp
+ return skinPhysicsSimpleMem() < BLOCK_SIZE_SKIN_PHYSICS;
+
+ // with
+ // BLOCK_SIZE_SKIN_PHYSICS (32768*6)
+ // 100 bones
+ // 4 bone indices per vertex
+ // => simple mesh is vertexCount < 3336
+}
+
+
+uint32_t ClothingActorData::skinPhysicsSimpleMem() const
+{
+ PX_ASSERT(bIsInitialized);
+
+ const ClothingPhysicalMeshData* physicalMesh = mAsset.GetPhysicalMeshFromLod(mCurrentGraphicalLodId);
+ PX_ASSERT(physicalMesh != NULL);
+
+ const uint32_t numVertices = physicalMesh->mSimulatedVertexCount;
+ const uint32_t numBoneIndicesPerVertex = physicalMesh->mNumBonesPerVertex;
+
+ uint32_t srcPositionMem = numVertices * sizeof(PxVec3);
+ uint32_t srcNormalMem = numVertices * sizeof(PxVec3);
+
+ uint32_t simBoneIndicesMem = numBoneIndicesPerVertex * numVertices * sizeof(uint16_t);
+ uint32_t simBoneWeightsMem = numBoneIndicesPerVertex * numVertices * sizeof(float);
+
+ uint32_t matricesMem = mInternalMatricesCount * sizeof(PxMat44);
+
+ uint32_t optimizationDataMem = physicalMesh->mOptimizationDataCount * sizeof(uint8_t); // mOptimizationDataCount ~ numVertices
+
+ uint32_t mem = srcPositionMem + srcNormalMem + simBoneIndicesMem + simBoneWeightsMem + matricesMem + optimizationDataMem;
+ // numVertices * (33 + (6*numBonesPerVert)) + 64*numBones
+
+ return mem;
+}
+
+
+void ClothingActorData::skinPhysicsMeshSimple()
+{
+ if (!bIsInitialized)
+ {
+ return;
+ }
+
+ // with bones, no interpolated matrices, no backstop?
+
+ // data
+ const ClothingPhysicalMeshData* physicalMesh = mAsset.GetPhysicalMeshFromLod(mCurrentGraphicalLodId);
+ PX_ASSERT(physicalMesh != NULL);
+
+ const uint32_t numVertices = physicalMesh->mSimulatedVertexCount;
+ const uint32_t numBoneIndicesPerVertex = physicalMesh->mNumBonesPerVertex;
+
+ PxVec3* const PX_RESTRICT eaPositions = physicalMesh->mVertices;
+ PxVec3* const PX_RESTRICT positions = (PxVec3*)eaPositions;
+
+ PxVec3* const PX_RESTRICT eaNormals = physicalMesh->mNormals;
+ PxVec3* const PX_RESTRICT normals = (PxVec3*)eaNormals;
+
+ PxVec3* const PX_RESTRICT targetPositions = mSkinnedPhysicsPositions;
+ PxVec3* const PX_RESTRICT targetNormals = mSkinnedPhysicsNormals;
+
+ uint16_t* const PX_RESTRICT eaSimBoneIndices = physicalMesh->mBoneIndices;
+ const uint16_t* const PX_RESTRICT simBoneIndices = (uint16_t*)eaSimBoneIndices;
+
+ float* const PX_RESTRICT eaSimBoneWeights = physicalMesh->mBoneWeights;
+ const float* const PX_RESTRICT simBoneWeights = (float*)eaSimBoneWeights;
+
+ PxMat44* eaMatrices = mInternalBoneMatricesCur; // TODO interpolated matrices?
+ const PxMat44* matrices = (PxMat44*)eaMatrices;
+
+ uint8_t* const PX_RESTRICT eaOptimizationData = physicalMesh->mOptimizationData;
+ const uint8_t* const PX_RESTRICT optimizationData = (uint8_t*)eaOptimizationData;
+
+ PX_ASSERT(optimizationData != NULL);
+
+ for (uint32_t vertexIndex = 0; vertexIndex < numVertices; ++vertexIndex)
+ {
+ Simd4f positionV = gSimd4fZero;
+ Simd4f normalV = gSimd4fZero;
+
+ const uint8_t shift = 4 * (vertexIndex % 2);
+ const uint8_t numBones = uint8_t((optimizationData[vertexIndex / 2] >> shift) & 0x7);
+ for (uint32_t k = 0; k < numBones; k++)
+ {
+ const float weight = simBoneWeights[vertexIndex * numBoneIndicesPerVertex + k];
+
+ PX_ASSERT(weight <= 1.0f);
+
+ //sumWeights += weight;
+ Simd4f weightV = Simd4fScalarFactory(weight);
+
+ const uint32_t index = simBoneIndices[vertexIndex * numBoneIndicesPerVertex + k];
+ PX_ASSERT(index < mInternalMatricesCount);
+
+ /// PH: This might be faster without the reference, but on PC I can't tell
+ /// HL: Now with SIMD it's significantly faster as reference
+ const PxMat44& bone = (PxMat44&)matrices[index];
+
+ Simd4f pV = applyAffineTransform(bone, createSimd3f(positions[vertexIndex]));
+ pV = pV * weightV;
+ positionV = positionV + pV;
+
+ ///todo There are probably cases where we don't need the normal on the physics mesh
+ Simd4f nV = applyLinearTransform(bone, createSimd3f(normals[vertexIndex]));
+ nV = nV * weightV;
+ normalV = normalV + nV;
+ }
+
+ normalV = normalizeSimd3f(normalV);
+ store3(&targetNormals[vertexIndex].x, normalV);
+ store3(&targetPositions[vertexIndex].x, positionV);
+ }
+}
+
+
+}
+} // namespace nvidia