diff options
Diffstat (limited to 'studiorender/r_studiodraw.cpp')
| -rw-r--r-- | studiorender/r_studiodraw.cpp | 2986 |
1 files changed, 2986 insertions, 0 deletions
diff --git a/studiorender/r_studiodraw.cpp b/studiorender/r_studiodraw.cpp new file mode 100644 index 0000000..5f3565e --- /dev/null +++ b/studiorender/r_studiodraw.cpp @@ -0,0 +1,2986 @@ +//========= Copyright Valve Corporation, All rights reserved. ============// +// +// Purpose: +// +//=====================================================================================// + +#include "studiorender.h" +#include "studio.h" +#include "materialsystem/imesh.h" +#include "materialsystem/imaterialsystemhardwareconfig.h" +#include "materialsystem/imaterialvar.h" +#include "materialsystem/imorph.h" +#include "materialsystem/itexture.h" +#include "materialsystem/imaterial.h" +#include "optimize.h" +#include "mathlib/mathlib.h" +#include "mathlib/vector.h" +#include <malloc.h> +#include "mathlib/vmatrix.h" +#include "studiorendercontext.h" +#include "tier2/tier2.h" +#include "tier0/vprof.h" + +//#define PROFILE_STUDIO VPROF +#define PROFILE_STUDIO + +// memdbgon must be the last include file in a .cpp file!!! +#include "tier0/memdbgon.h" + +typedef void (*SoftwareProcessMeshFunc_t)( const mstudio_meshvertexdata_t *, matrix3x4_t *pPoseToWorld, + CCachedRenderData &vertexCache, CMeshBuilder& meshBuilder, int numVertices, unsigned short* pGroupToMesh, unsigned int nAlphaMask, + IMaterial *pMaterial); + +//----------------------------------------------------------------------------- +// Forward declarations +//----------------------------------------------------------------------------- + +class IClientEntity; + + +static int boxpnt[6][4] = +{ + { 0, 4, 6, 2 }, // +X + { 0, 1, 5, 4 }, // +Y + { 0, 2, 3, 1 }, // +Z + { 7, 5, 1, 3 }, // -X + { 7, 3, 2, 6 }, // -Y + { 7, 6, 4, 5 }, // -Z +}; + +static TableVector hullcolor[8] = +{ + { 1.0, 1.0, 1.0 }, + { 1.0, 0.5, 0.5 }, + { 0.5, 1.0, 0.5 }, + { 1.0, 1.0, 0.5 }, + { 0.5, 0.5, 1.0 }, + { 1.0, 0.5, 1.0 }, + { 0.5, 1.0, 1.0 }, + { 1.0, 1.0, 1.0 } +}; + + +//----------------------------------------------------------------------------- +// +//----------------------------------------------------------------------------- +static unsigned int s_nTranslucentModelHullCache = 0; +static unsigned int s_nSolidModelHullCache = 0; +void CStudioRender::R_StudioDrawHulls( int hitboxset, bool translucent ) +{ + int i, j; +// float lv; + Vector tmp; + Vector p[8]; + mstudiobbox_t *pbbox; + IMaterialVar *colorVar; + + mstudiohitboxset_t *s = m_pStudioHdr->pHitboxSet( hitboxset ); + if ( !s ) + return; + + pbbox = s->pHitbox( 0 ); + if ( !pbbox ) + return; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + if( translucent ) + { + pRenderContext->Bind( m_pMaterialTranslucentModelHulls ); + colorVar = m_pMaterialTranslucentModelHulls->FindVarFast( "$color", &s_nTranslucentModelHullCache ); + } + else + { + pRenderContext->Bind( m_pMaterialSolidModelHulls ); + colorVar = m_pMaterialSolidModelHulls->FindVarFast( "$color", &s_nSolidModelHullCache ); + } + + + for (i = 0; i < s->numhitboxes; i++) + { + for (j = 0; j < 8; j++) + { + tmp[0] = (j & 1) ? pbbox[i].bbmin[0] : pbbox[i].bbmax[0]; + tmp[1] = (j & 2) ? pbbox[i].bbmin[1] : pbbox[i].bbmax[1]; + tmp[2] = (j & 4) ? pbbox[i].bbmin[2] : pbbox[i].bbmax[2]; + + VectorTransform( tmp, m_pBoneToWorld[pbbox[i].bone], p[j] ); + } + + j = (pbbox[i].group % 8); + g_pMaterialSystem->Flush(); + if( colorVar ) + { + if( translucent ) + { + colorVar->SetVecValue( 0.2f * hullcolor[j].x, 0.2f * hullcolor[j].y, 0.2f * hullcolor[j].z ); + } + else + { + colorVar->SetVecValue( hullcolor[j].x, hullcolor[j].y, hullcolor[j].z ); + } + } + for (j = 0; j < 6; j++) + { +#if 0 + tmp[0] = tmp[1] = tmp[2] = 0; + tmp[j % 3] = (j < 3) ? 1.0 : -1.0; + // R_StudioLighting( &lv, pbbox[i].bone, 0, tmp ); // BUG: not updated +#endif + + IMesh* pMesh = pRenderContext->GetDynamicMesh(); + CMeshBuilder meshBuilder; + meshBuilder.Begin( pMesh, MATERIAL_QUADS, 1 ); + + for (int k = 0; k < 4; ++k) + { + meshBuilder.Position3fv( p[boxpnt[j][k]].Base() ); + meshBuilder.AdvanceVertex(); + } + + meshBuilder.End(); + pMesh->Draw(); + } + } +} + + +void CStudioRender::R_StudioDrawBones (void) +{ + int i, j, k; +// float lv; + Vector tmp; + Vector p[8]; + Vector up, right, forward; + Vector a1; + mstudiobone_t *pbones; + Vector positionArray[4]; + + pbones = m_pStudioHdr->pBone( 0 ); + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + for (i = 0; i < m_pStudioHdr->numbones; i++) + { + if (pbones[i].parent == -1) + continue; + + k = pbones[i].parent; + + a1[0] = a1[1] = a1[2] = 1.0; + up[0] = m_pBoneToWorld[i][0][3] - m_pBoneToWorld[k][0][3]; + up[1] = m_pBoneToWorld[i][1][3] - m_pBoneToWorld[k][1][3]; + up[2] = m_pBoneToWorld[i][2][3] - m_pBoneToWorld[k][2][3]; + if (up[0] > up[1]) + if (up[0] > up[2]) + a1[0] = 0.0; + else + a1[2] = 0.0; + else + if (up[1] > up[2]) + a1[1] = 0.0; + else + a1[2] = 0.0; + CrossProduct( up, a1, right ); + VectorNormalize( right ); + CrossProduct( up, right, forward ); + VectorNormalize( forward ); + VectorScale( right, 2.0, right ); + VectorScale( forward, 2.0, forward ); + + for (j = 0; j < 8; j++) + { + p[j][0] = m_pBoneToWorld[k][0][3]; + p[j][1] = m_pBoneToWorld[k][1][3]; + p[j][2] = m_pBoneToWorld[k][2][3]; + + if (j & 1) + { + VectorSubtract( p[j], right, p[j] ); + } + else + { + VectorAdd( p[j], right, p[j] ); + } + + if (j & 2) + { + VectorSubtract( p[j], forward, p[j] ); + } + else + { + VectorAdd( p[j], forward, p[j] ); + } + + if (j & 4) + { + } + else + { + VectorAdd( p[j], up, p[j] ); + } + } + + VectorNormalize( up ); + VectorNormalize( right ); + VectorNormalize( forward ); + + pRenderContext->Bind( m_pMaterialModelBones ); + + for (j = 0; j < 6; j++) + { + switch( j) + { + case 0: VectorCopy( right, tmp ); break; + case 1: VectorCopy( forward, tmp ); break; + case 2: VectorCopy( up, tmp ); break; + case 3: VectorScale( right, -1, tmp ); break; + case 4: VectorScale( forward, -1, tmp ); break; + case 5: VectorScale( up, -1, tmp ); break; + } + // R_StudioLighting( &lv, -1, 0, tmp ); // BUG: not updated + + IMesh* pMesh = pRenderContext->GetDynamicMesh(); + CMeshBuilder meshBuilder; + meshBuilder.Begin( pMesh, MATERIAL_QUADS, 1 ); + + for (int k = 0; k < 4; ++k) + { + meshBuilder.Position3fv( p[boxpnt[j][k]].Base() ); + meshBuilder.AdvanceVertex(); + } + + meshBuilder.End(); + pMesh->Draw(); + } + } +} + + +int CStudioRender::R_StudioRenderModel( IMatRenderContext *pRenderContext, int skin, + int body, int hitboxset, void /*IClientEntity*/ *pEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int flags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes ) +{ + VPROF("CStudioRender::R_StudioRenderModel"); + + int nDrawGroup = flags & STUDIORENDER_DRAW_GROUP_MASK; + + if ( m_pRC->m_Config.drawEntities == 2 ) + { + if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY ) + { + R_StudioDrawBones( ); + } + return 0; + } + + if ( m_pRC->m_Config.drawEntities == 3 ) + { + if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY ) + { + R_StudioDrawHulls( hitboxset, false ); + } + return 0; + } + + // BUG: This method is crap, though less crap than before. It should just sort + // the materials though it'll need to sort at render time as "skin" + // can change what materials a given mesh may use + int numTrianglesRendered = 0; + + // don't try to use these if not supported + if ( IsPC() && !g_pMaterialSystemHardwareConfig->SupportsColorOnSecondStream() ) + { + pColorMeshes = NULL; + } + + // Build list of submodels + BodyPartInfo_t *pBodyPartInfo = (BodyPartInfo_t*)_alloca( m_pStudioHdr->numbodyparts * sizeof(BodyPartInfo_t) ); + for ( int i=0 ; i < m_pStudioHdr->numbodyparts; ++i ) + { + pBodyPartInfo[i].m_nSubModelIndex = R_StudioSetupModel( i, body, &pBodyPartInfo[i].m_pSubModel, m_pStudioHdr ); + } + + // mark possible translucent meshes + if ( nDrawGroup != STUDIORENDER_DRAW_TRANSLUCENT_ONLY ) + { + // we're going to render the opaque meshes, so these will get counted in that pass + m_bSkippedMeshes = false; + m_bDrawTranslucentSubModels = false; + numTrianglesRendered += R_StudioRenderFinal( pRenderContext, skin, m_pStudioHdr->numbodyparts, pBodyPartInfo, + pEntity, ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes ); + } + else + { + m_bSkippedMeshes = true; + } + + if ( m_bSkippedMeshes && nDrawGroup != STUDIORENDER_DRAW_OPAQUE_ONLY ) + { + m_bDrawTranslucentSubModels = true; + numTrianglesRendered += R_StudioRenderFinal( pRenderContext, skin, m_pStudioHdr->numbodyparts, pBodyPartInfo, + pEntity, ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes ); + } + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Generate morph accumulator +//----------------------------------------------------------------------------- +void CStudioRender::GenerateMorphAccumulator( mstudiomodel_t *pSubModel ) +{ + // Deal with all flexes + // FIXME: HW Morphing doesn't work with translucent models yet + if ( !m_pRC->m_Config.m_bEnableHWMorph || !m_pRC->m_Config.bFlex || m_bDrawTranslucentSubModels || + !g_pMaterialSystemHardwareConfig->HasFastVertexTextures() ) + return; + + int nActiveMeshCount = 0; + mstudiomesh_t *ppMeshes[512]; + + // First, build the list of meshes that need morphing + for ( int i = 0; i < pSubModel->nummeshes; ++i ) + { + mstudiomesh_t *pMesh = pSubModel->pMesh(i); + studiomeshdata_t *pMeshData = &m_pStudioMeshes[pMesh->meshid]; + Assert( pMeshData ); + + int nFlexCount = pMesh->numflexes; + if ( !nFlexCount ) + continue; + + for ( int j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + bool bIsDeltaFlexed = (pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) != 0; + if ( !bIsDeltaFlexed ) + continue; + + ppMeshes[nActiveMeshCount++] = pMesh; + Assert( nActiveMeshCount < 512 ); + break; + } + } + + if ( nActiveMeshCount == 0 ) + return; + + // HACK - Just turn off scissor for this model if it is doing morph accumulation + DisableScissor(); + + // Next, accumulate morphs for appropriate meshes + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + pRenderContext->BeginMorphAccumulation(); + for ( int i = 0; i < nActiveMeshCount; ++i ) + { + mstudiomesh_t *pMesh = ppMeshes[i]; + studiomeshdata_t *pMeshData = &m_pStudioMeshes[pMesh->meshid]; + + int nFlexCount = pMesh->numflexes; + MorphWeight_t *pWeights = (MorphWeight_t*)_alloca( nFlexCount * sizeof(MorphWeight_t) ); + ComputeFlexWeights( nFlexCount, pMesh->pFlex(0), pWeights ); + + for ( int j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + if ( !pGroup->m_pMorph ) + continue; + + pRenderContext->AccumulateMorph( pGroup->m_pMorph, nFlexCount, pWeights ); + } + } + pRenderContext->EndMorphAccumulation(); +} + + +//----------------------------------------------------------------------------- +// Computes eyeball state +//----------------------------------------------------------------------------- +void CStudioRender::ComputeEyelidStateFACS( mstudiomodel_t *pSubModel ) +{ + for ( int j = 0; j < pSubModel->numeyeballs; j++ ) + { + // FIXME: This might not be necessary... + R_StudioEyeballPosition( pSubModel->pEyeball( j ), &m_pEyeballState[ j ] ); + R_StudioEyelidFACS( pSubModel->pEyeball(j), &m_pEyeballState[j] ); + } +} + + +/* +================ +R_StudioRenderFinal +inputs: +outputs: returns the number of triangles rendered. +================ +*/ +int CStudioRender::R_StudioRenderFinal( IMatRenderContext *pRenderContext, + int skin, int nBodyPartCount, BodyPartInfo_t *pBodyPartInfo, void /*IClientEntity*/ *pClientEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes ) +{ + VPROF("CStudioRender::R_StudioRenderFinal"); + + int numTrianglesRendered = 0; + + for ( int i=0 ; i < nBodyPartCount; i++ ) + { + m_pSubModel = pBodyPartInfo[i].m_pSubModel; + + // NOTE: This has to run here because it effects flex targets, + // so therefore it must happen prior to GenerateMorphAccumulator. + ComputeEyelidStateFACS( m_pSubModel ); + GenerateMorphAccumulator( m_pSubModel ); + + // Set up SW flex + m_VertexCache.SetBodyPart( i ); + m_VertexCache.SetModel( pBodyPartInfo[i].m_nSubModelIndex ); + + numTrianglesRendered += R_StudioDrawPoints( pRenderContext, skin, pClientEntity, + ppMaterials, pMaterialFlags, boneMask, lod, pColorMeshes ); + } + return numTrianglesRendered; +} + +static ConVar r_flashlightscissor( "r_flashlightscissor", "1", 0 ); + +void CStudioRender::EnableScissor( FlashlightState_t *state ) +{ + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + // Only scissor into the backbuffer + if ( r_flashlightscissor.GetBool() && state->DoScissor() && ( pRenderContext->GetRenderTarget() == NULL ) ) + { + pRenderContext->SetScissorRect( state->GetLeft(), state->GetTop(), state->GetRight(), state->GetBottom(), true ); + } +} + +void CStudioRender::DisableScissor() +{ + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + // Scissor even if we're not shadow depth mapping + if ( r_flashlightscissor.GetBool() ) + { + pRenderContext->SetScissorRect( -1, -1, -1, -1, false ); + } +} + + +//----------------------------------------------------------------------------- +// Draw shadows +//----------------------------------------------------------------------------- +void CStudioRender::DrawShadows( const DrawModelInfo_t& info, int flags, int boneMask ) +{ + if ( !m_ShadowState.Count() ) + return; + + VPROF("CStudioRender::DrawShadows"); + + IMaterial* pForcedMat = m_pRC->m_pForcedMaterial; + OverrideType_t nForcedType = m_pRC->m_nForcedMaterialType; + + // Here, we have to redraw the model one time for each flashlight + // Having a material of NULL means that we are a light source. + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + + pRenderContext->SetFlashlightMode( true ); + int i; + for (i = 0; i < m_ShadowState.Count(); ++i ) + { + if( !m_ShadowState[i].m_pMaterial ) + { + Assert( m_ShadowState[i].m_pFlashlightState && m_ShadowState[i].m_pWorldToTexture ); + pRenderContext->SetFlashlightStateEx( *m_ShadowState[i].m_pFlashlightState, *m_ShadowState[i].m_pWorldToTexture, m_ShadowState[i].m_pFlashlightDepthTexture ); + + EnableScissor( m_ShadowState[i].m_pFlashlightState ); + + R_StudioRenderModel( pRenderContext, info.m_Skin, info.m_Body, info.m_HitboxSet, info.m_pClientEntity, + info.m_pHardwareData->m_pLODs[info.m_Lod].ppMaterials, + info.m_pHardwareData->m_pLODs[info.m_Lod].pMaterialFlags, flags, boneMask, info.m_Lod, info.m_pColorMeshes ); + + DisableScissor(); + } + } + pRenderContext->SetFlashlightMode( false ); + + // Here, we have to redraw the model one time for each shadow + for (int i = 0; i < m_ShadowState.Count(); ++i ) + { + if( m_ShadowState[i].m_pMaterial ) + { + m_pRC->m_pForcedMaterial = m_ShadowState[i].m_pMaterial; + m_pRC->m_nForcedMaterialType = OVERRIDE_NORMAL; + R_StudioRenderModel( pRenderContext, 0, info.m_Body, 0, m_ShadowState[i].m_pProxyData, + NULL, NULL, flags, boneMask, info.m_Lod, NULL ); + } + } + + // Restore the previous forced material + m_pRC->m_pForcedMaterial = pForcedMat; + m_pRC->m_nForcedMaterialType = nForcedType; +} + +void CStudioRender::DrawStaticPropShadows( const DrawModelInfo_t &info, const StudioRenderContext_t &rc, const matrix3x4_t& rootToWorld, int flags ) +{ + memcpy( &m_StaticPropRootToWorld, &rootToWorld, sizeof(matrix3x4_t) ); + memcpy( &m_PoseToWorld[0], &rootToWorld, sizeof(matrix3x4_t) ); + + m_pRC = const_cast< StudioRenderContext_t* >( &rc ); + m_pBoneToWorld = &m_StaticPropRootToWorld; + m_pStudioHdr = info.m_pStudioHdr; + m_pStudioMeshes = info.m_pHardwareData->m_pLODs[info.m_Lod].m_pMeshData; + DrawShadows( info, flags, BONE_USED_BY_ANYTHING ); + m_pRC = NULL; + m_pBoneToWorld = NULL; +} + +// Draw flashlight lighting on decals. +void CStudioRender::DrawFlashlightDecals( const DrawModelInfo_t& info, int lod ) +{ + if ( !m_ShadowState.Count() ) + return; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + pRenderContext->SetFlashlightMode( true ); + int i; + for (i = 0; i < m_ShadowState.Count(); ++i ) + { + // This isn't clear. This means that this is a flashlight if the material is NULL. FLASHLIGHTFIXME + if( !m_ShadowState[i].m_pMaterial ) + { + Assert( m_ShadowState[i].m_pFlashlightState && m_ShadowState[i].m_pWorldToTexture ); + pRenderContext->SetFlashlightStateEx( *m_ShadowState[i].m_pFlashlightState, *m_ShadowState[i].m_pWorldToTexture, m_ShadowState[i].m_pFlashlightDepthTexture ); + + EnableScissor( m_ShadowState[i].m_pFlashlightState ); + + DrawDecal( info, lod, info.m_Body ); + + DisableScissor(); + } + } + pRenderContext->SetFlashlightMode( false ); +} + + +static matrix3x4_t *ComputeSkinMatrix( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result ) +{ + float flWeight0, flWeight1, flWeight2; + + switch( boneweights.numbones ) + { + default: + case 1: + return &pPoseToWorld[(unsigned)boneweights.bone[0]]; + + case 2: + { + matrix3x4_t &boneMat0 = pPoseToWorld[(unsigned)boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[(unsigned)boneweights.bone[1]]; + flWeight0 = boneweights.weight[0]; + flWeight1 = boneweights.weight[1]; + + // NOTE: Inlining here seems to make a fair amount of difference + result[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1; + result[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1; + result[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1; + result[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1; + result[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1; + result[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1; + result[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1; + result[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1; + result[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1; + result[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1; + result[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1; + result[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1; + } + return &result; + + case 3: + { + matrix3x4_t &boneMat0 = pPoseToWorld[(unsigned)boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[(unsigned)boneweights.bone[1]]; + matrix3x4_t &boneMat2 = pPoseToWorld[(unsigned)boneweights.bone[2]]; + flWeight0 = boneweights.weight[0]; + flWeight1 = boneweights.weight[1]; + flWeight2 = boneweights.weight[2]; + + result[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1 + boneMat2[0][0] * flWeight2; + result[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1 + boneMat2[0][1] * flWeight2; + result[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1 + boneMat2[0][2] * flWeight2; + result[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1 + boneMat2[0][3] * flWeight2; + result[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1 + boneMat2[1][0] * flWeight2; + result[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1 + boneMat2[1][1] * flWeight2; + result[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1 + boneMat2[1][2] * flWeight2; + result[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1 + boneMat2[1][3] * flWeight2; + result[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1 + boneMat2[2][0] * flWeight2; + result[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1 + boneMat2[2][1] * flWeight2; + result[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1 + boneMat2[2][2] * flWeight2; + result[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1 + boneMat2[2][3] * flWeight2; + } + return &result; + + case 4: + Assert(0); +#if (MAX_NUM_BONES_PER_VERT > 3) + { + // Don't compile this if MAX_NUM_BONES_PER_VERT is too low + matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; + matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]]; + matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]]; + flWeight0 = boneweights.weight[0]; + flWeight1 = boneweights.weight[1]; + flWeight2 = boneweights.weight[2]; + float flWeight3 = boneweights.weight[3]; + + result[0][0] = boneMat0[0][0] * flWeight0 + boneMat1[0][0] * flWeight1 + boneMat2[0][0] * flWeight2 + boneMat3[0][0] * flWeight3; + result[0][1] = boneMat0[0][1] * flWeight0 + boneMat1[0][1] * flWeight1 + boneMat2[0][1] * flWeight2 + boneMat3[0][1] * flWeight3; + result[0][2] = boneMat0[0][2] * flWeight0 + boneMat1[0][2] * flWeight1 + boneMat2[0][2] * flWeight2 + boneMat3[0][2] * flWeight3; + result[0][3] = boneMat0[0][3] * flWeight0 + boneMat1[0][3] * flWeight1 + boneMat2[0][3] * flWeight2 + boneMat3[0][3] * flWeight3; + result[1][0] = boneMat0[1][0] * flWeight0 + boneMat1[1][0] * flWeight1 + boneMat2[1][0] * flWeight2 + boneMat3[1][0] * flWeight3; + result[1][1] = boneMat0[1][1] * flWeight0 + boneMat1[1][1] * flWeight1 + boneMat2[1][1] * flWeight2 + boneMat3[1][1] * flWeight3; + result[1][2] = boneMat0[1][2] * flWeight0 + boneMat1[1][2] * flWeight1 + boneMat2[1][2] * flWeight2 + boneMat3[1][2] * flWeight3; + result[1][3] = boneMat0[1][3] * flWeight0 + boneMat1[1][3] * flWeight1 + boneMat2[1][3] * flWeight2 + boneMat3[1][3] * flWeight3; + result[2][0] = boneMat0[2][0] * flWeight0 + boneMat1[2][0] * flWeight1 + boneMat2[2][0] * flWeight2 + boneMat3[2][0] * flWeight3; + result[2][1] = boneMat0[2][1] * flWeight0 + boneMat1[2][1] * flWeight1 + boneMat2[2][1] * flWeight2 + boneMat3[2][1] * flWeight3; + result[2][2] = boneMat0[2][2] * flWeight0 + boneMat1[2][2] * flWeight1 + boneMat2[2][2] * flWeight2 + boneMat3[2][2] * flWeight3; + result[2][3] = boneMat0[2][3] * flWeight0 + boneMat1[2][3] * flWeight1 + boneMat2[2][3] * flWeight2 + boneMat3[2][3] * flWeight3; + } + return &result; +#endif + } + + Assert(0); + return NULL; +} + + +static matrix3x4_t *ComputeSkinMatrixSSE( mstudioboneweight_t &boneweights, matrix3x4_t *pPoseToWorld, matrix3x4_t &result ) +{ + // NOTE: pPoseToWorld, being cache aligned, doesn't need explicit initialization +#if defined( _WIN32 ) && !defined( _X360 ) + switch( boneweights.numbones ) + { + default: + case 1: + return &pPoseToWorld[boneweights.bone[0]]; + + case 2: + { + matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; + float *pWeights = boneweights.weight; + + _asm + { + mov eax, DWORD PTR [pWeights] + movss xmm6, dword ptr[eax] ; boneweights.weight[0] + movss xmm7, dword ptr[eax + 4] ; boneweights.weight[1] + + mov eax, DWORD PTR [boneMat0] + mov ecx, DWORD PTR [boneMat1] + mov edi, DWORD PTR [result] + + // Fill xmm6, and 7 with all the bone weights + shufps xmm6, xmm6, 0 + shufps xmm7, xmm7, 0 + + // Load up all rows of the three matrices + movaps xmm0, XMMWORD PTR [eax] + movaps xmm1, XMMWORD PTR [ecx] + movaps xmm2, XMMWORD PTR [eax + 16] + movaps xmm3, XMMWORD PTR [ecx + 16] + movaps xmm4, XMMWORD PTR [eax + 32] + movaps xmm5, XMMWORD PTR [ecx + 32] + + // Multiply the rows by the weights + mulps xmm0, xmm6 + mulps xmm1, xmm7 + mulps xmm2, xmm6 + mulps xmm3, xmm7 + mulps xmm4, xmm6 + mulps xmm5, xmm7 + + addps xmm0, xmm1 + addps xmm2, xmm3 + addps xmm4, xmm5 + + movaps XMMWORD PTR [edi], xmm0 + movaps XMMWORD PTR [edi + 16], xmm2 + movaps XMMWORD PTR [edi + 32], xmm4 + } + } + return &result; + + case 3: + { + matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; + matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]]; + float *pWeights = boneweights.weight; + + _asm + { + mov eax, DWORD PTR [pWeights] + movss xmm5, dword ptr[eax] ; boneweights.weight[0] + movss xmm6, dword ptr[eax + 4] ; boneweights.weight[1] + movss xmm7, dword ptr[eax + 8] ; boneweights.weight[2] + + mov eax, DWORD PTR [boneMat0] + mov ecx, DWORD PTR [boneMat1] + mov edx, DWORD PTR [boneMat2] + mov edi, DWORD PTR [result] + + // Fill xmm5, 6, and 7 with all the bone weights + shufps xmm5, xmm5, 0 + shufps xmm6, xmm6, 0 + shufps xmm7, xmm7, 0 + + // Load up the first row of the three matrices + movaps xmm0, XMMWORD PTR [eax] + movaps xmm1, XMMWORD PTR [ecx] + movaps xmm2, XMMWORD PTR [edx] + + // Multiply the rows by the weights + mulps xmm0, xmm5 + mulps xmm1, xmm6 + mulps xmm2, xmm7 + + addps xmm0, xmm1 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi], xmm0 + + // Load up the second row of the three matrices + movaps xmm0, XMMWORD PTR [eax + 16] + movaps xmm1, XMMWORD PTR [ecx + 16] + movaps xmm2, XMMWORD PTR [edx + 16] + + // Multiply the rows by the weights + mulps xmm0, xmm5 + mulps xmm1, xmm6 + mulps xmm2, xmm7 + + addps xmm0, xmm1 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi + 16], xmm0 + + // Load up the third row of the three matrices + movaps xmm0, XMMWORD PTR [eax + 32] + movaps xmm1, XMMWORD PTR [ecx + 32] + movaps xmm2, XMMWORD PTR [edx + 32] + + // Multiply the rows by the weights + mulps xmm0, xmm5 + mulps xmm1, xmm6 + mulps xmm2, xmm7 + + addps xmm0, xmm1 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi + 32], xmm0 + } + } + return &result; + + case 4: + Assert(0); +#if (MAX_NUM_BONES_PER_VERT > 3) + { + // Don't compile this if MAX_NUM_BONES_PER_VERT is too low + matrix3x4_t &boneMat0 = pPoseToWorld[boneweights.bone[0]]; + matrix3x4_t &boneMat1 = pPoseToWorld[boneweights.bone[1]]; + matrix3x4_t &boneMat2 = pPoseToWorld[boneweights.bone[2]]; + matrix3x4_t &boneMat3 = pPoseToWorld[boneweights.bone[3]]; + float *pWeights = boneweights.weight; + + _asm + { + mov eax, DWORD PTR [pWeights] + movss xmm4, dword ptr[eax] ; boneweights.weight[0] + movss xmm5, dword ptr[eax + 4] ; boneweights.weight[1] + movss xmm6, dword ptr[eax + 8] ; boneweights.weight[2] + movss xmm7, dword ptr[eax + 12] ; boneweights.weight[3] + + mov eax, DWORD PTR [boneMat0] + mov ecx, DWORD PTR [boneMat1] + mov edx, DWORD PTR [boneMat2] + mov esi, DWORD PTR [boneMat3] + mov edi, DWORD PTR [result] + + // Fill xmm5, 6, and 7 with all the bone weights + shufps xmm4, xmm4, 0 + shufps xmm5, xmm5, 0 + shufps xmm6, xmm6, 0 + shufps xmm7, xmm7, 0 + + // Load up the first row of the four matrices + movaps xmm0, XMMWORD PTR [eax] + movaps xmm1, XMMWORD PTR [ecx] + movaps xmm2, XMMWORD PTR [edx] + movaps xmm3, XMMWORD PTR [esi] + + // Multiply the rows by the weights + mulps xmm0, xmm4 + mulps xmm1, xmm5 + mulps xmm2, xmm6 + mulps xmm3, xmm7 + + addps xmm0, xmm1 + addps xmm2, xmm3 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi], xmm0 + + // Load up the second row of the three matrices + movaps xmm0, XMMWORD PTR [eax + 16] + movaps xmm1, XMMWORD PTR [ecx + 16] + movaps xmm2, XMMWORD PTR [edx + 16] + movaps xmm3, XMMWORD PTR [esi + 16] + + // Multiply the rows by the weights + mulps xmm0, xmm4 + mulps xmm1, xmm5 + mulps xmm2, xmm6 + mulps xmm3, xmm7 + + addps xmm0, xmm1 + addps xmm2, xmm3 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi + 16], xmm0 + + // Load up the third row of the three matrices + movaps xmm0, XMMWORD PTR [eax + 32] + movaps xmm1, XMMWORD PTR [ecx + 32] + movaps xmm2, XMMWORD PTR [edx + 32] + movaps xmm3, XMMWORD PTR [esi + 32] + + // Multiply the rows by the weights + mulps xmm0, xmm4 + mulps xmm1, xmm5 + mulps xmm2, xmm6 + mulps xmm3, xmm7 + + addps xmm0, xmm1 + addps xmm2, xmm3 + addps xmm0, xmm2 + movaps XMMWORD PTR [edi + 32], xmm0 + } + } + return &result; +#endif + } +#elif POSIX +#warning "ComputeSkinMatrixSSE C implementation only" + return ComputeSkinMatrix( boneweights, pPoseToWorld, result ); +#elif defined( _X360 ) + return ComputeSkinMatrix( boneweights, pPoseToWorld, result ); +#else + #error +#endif + + Assert( 0 ); + return NULL; +} + +//----------------------------------------------------------------------------- +// Designed for inter-module draw optimized calling, requires R_InitLightEffectWorld3() +// Compute the lighting at a point and normal +// Uses the set function pointer +// Final lighting is in gamma space +//----------------------------------------------------------------------------- +static lightpos_t lightpos[MAXLOCALLIGHTS]; +inline void CStudioRender::R_ComputeLightAtPoint3( const Vector &pos, const Vector &normal, Vector &color ) +{ + if ( m_pRC->m_Config.fullbright ) + { + color.Init( 1.0f, 1.0f, 1.0f ); + return; + } + + // Set up lightpos[i].dot, lightpos[i].falloff, and lightpos[i].delta for all lights + R_LightStrengthWorld( pos, m_pRC->m_NumLocalLights, m_pRC->m_LocalLights, lightpos ); + + // calculate ambient values from the ambient cube given a normal. + R_LightAmbient_4D( normal, m_pRC->m_LightBoxColors, color ); + + // Calculate color given lightpos_t lightpos, a normal, and the ambient + // color from the ambient cube calculated above. + Assert(R_LightEffectsWorld3); + R_LightEffectsWorld3( m_pRC->m_LocalLights, lightpos, normal, color ); +} + + +// define SPECIAL_SSE_MESH_PROCESSOR to enable code which contains a special optimized SSE lighting loop, significantly +// improving software vertex processing performace. +#if defined( _WIN32 ) && !defined( _X360 ) +#define SPECIAL_SSE_MESH_PROCESSOR +#endif + +#ifdef SPECIAL_SSE_MESH_PROCESSOR +//#define VERIFY_SSE_LIGHTING + +// false: MAX(0,L*N) true: .5*(L.N)+.5. set based on material +static bool SSELightingHalfLambert; + +// These variables are used by the special SSE lighting path. The +// lighting path calculates them everytime it processes a mesh so their +// is no need to keep them in sync with changes to the other light variables +static fltx4 OneOver_ThetaDot_Minus_PhiDot[MAXLOCALLIGHTS]; // 1/(theta-phi) + +void CStudioRender::R_MouthLighting( fltx4 fIllum, const FourVectors& normal, const FourVectors& forward, FourVectors &light ) +{ + fltx4 dot = SubSIMD(Four_Zeros,normal*forward); + dot=MaxSIMD(Four_Zeros,dot); + dot=MulSIMD(fIllum,dot); + light *= dot; +} + +inline void CStudioRender::R_ComputeLightAtPoints3( const FourVectors &pos, const FourVectors &normal, FourVectors &color ) +{ + if ( m_pRC->m_Config.fullbright ) + { + color.DuplicateVector( Vector( 1.0f, 1.0f, 1.0f ) ); + return; + } + + R_LightAmbient_4D( normal, m_pRC->m_LightBoxColors, color ); + // now, add in contribution from all lights + for ( int i = 0; i < m_pRC->m_NumLocalLights; i++) + { + FourVectors delta; + LightDesc_t const *wl = m_pRC->m_LocalLights+i; + Assert((wl->m_Type==MATERIAL_LIGHT_POINT) || (wl->m_Type==MATERIAL_LIGHT_SPOT) || (wl->m_Type==MATERIAL_LIGHT_DIRECTIONAL)); + switch (wl->m_Type) + { + case MATERIAL_LIGHT_POINT: + case MATERIAL_LIGHT_SPOT: + delta.DuplicateVector(wl->m_Position); + delta-=pos; + break; + + case MATERIAL_LIGHT_DIRECTIONAL: + delta.DuplicateVector(wl->m_Direction); + delta*=-1.0; + break; + + } + fltx4 falloff = R_WorldLightDistanceFalloff( wl, delta); + delta.VectorNormalizeFast(); + fltx4 strength=delta*normal; + if (SSELightingHalfLambert) + { + strength=AddSIMD(MulSIMD(strength,Four_PointFives),Four_PointFives); + } + else + strength=MaxSIMD(Four_Zeros,delta*normal); + + switch(wl->m_Type) + { + case MATERIAL_LIGHT_POINT: + // half-lambert + break; + + case MATERIAL_LIGHT_SPOT: + { + fltx4 dot2=SubSIMD(Four_Zeros,delta*wl->m_Direction); // dot position with spot light dir for cone falloff + + fltx4 cone_falloff_scale=MulSIMD(OneOver_ThetaDot_Minus_PhiDot[i], + SubSIMD(dot2,ReplicateX4(wl->m_PhiDot))); + cone_falloff_scale=MinSIMD(cone_falloff_scale,Four_Ones); + if ((wl->m_Falloff!=0.0) && (wl->m_Falloff!=1.0)) + { + // !!speed!! could compute integer exponent needed by powsimd and store in light + cone_falloff_scale=PowSIMD(cone_falloff_scale,wl->m_Falloff); + } + strength=MulSIMD(cone_falloff_scale,strength); + + // now, zero out lighting where dot2<phidot. This will mask out any invalid results + // from pow function, etc + fltx4 OutsideMask=CmpGtSIMD(dot2,ReplicateX4(wl->m_PhiDot)); // outside light cone? + strength=AndSIMD(OutsideMask,strength); + } + break; + + case MATERIAL_LIGHT_DIRECTIONAL: + break; + + } + strength=MulSIMD(strength,falloff); + color.x=AddSIMD(color.x,MulSIMD(strength,ReplicateX4(wl->m_Color.x))); + color.y=AddSIMD(color.y,MulSIMD(strength,ReplicateX4(wl->m_Color.y))); + color.z=AddSIMD(color.z,MulSIMD(strength,ReplicateX4(wl->m_Color.z))); + } +} + +#endif // SPECIAL_SSE_MESH_PROCESSOR + +//----------------------------------------------------------------------------- +// Optimized for low-end hardware +//----------------------------------------------------------------------------- +#pragma warning (disable:4701) + +// NOTE: I'm using this crazy wrapper because using straight template functions +// doesn't appear to work with function tables +template< int nHasTangentSpace, int nDoFlex, int nHasSIMD, int nLighting, int nDX8VertexFormat > +class CProcessMeshWrapper +{ +public: + static void R_PerformLighting( const Vector &forward, float fIllum, + const Vector &pos, const Vector &norm, unsigned int nAlphaMask, unsigned int *pColor ) + { + if ( nLighting == LIGHTING_SOFTWARE ) + { + Vector color; + g_StudioRender.R_ComputeLightAtPoint3( pos, norm, color ); + + unsigned char r = LinearToLightmap( color.x ); + unsigned char g = LinearToLightmap( color.y ); + unsigned char b = LinearToLightmap( color.z ); + + *pColor = b | (g << 8) | (r << 16) | nAlphaMask; + } + else if ( nLighting == LIGHTING_MOUTH ) + { + if ( fIllum != 0.0f ) + { + Vector color; + g_StudioRender.R_ComputeLightAtPoint3( pos, norm, color ); + g_StudioRender.R_MouthLighting( fIllum, norm, forward, color ); + + unsigned char r = LinearToLightmap( color.x ); + unsigned char g = LinearToLightmap( color.y ); + unsigned char b = LinearToLightmap( color.z ); + + *pColor = b | (g << 8) | (r << 16) | nAlphaMask; + } + else + { + *pColor = nAlphaMask; + } + } + } + + static void R_TransformVert( const Vector *pSrcPos, const Vector *pSrcNorm, const Vector4D *pSrcTangentS, + matrix3x4_t *pSkinMat, VectorAligned &pos, Vector &norm, Vector4DAligned &tangentS ) + { + // NOTE: Could add SSE stuff here, if we knew what SSE stuff could make it faster + + pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3]; + norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2]; + + pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3]; + norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2]; + + pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3]; + norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2]; + + if ( nHasTangentSpace ) + { + tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2]; + tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2]; + tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2]; + tangentS.w = pSrcTangentS->w; + } + } + + static void R_StudioSoftwareProcessMesh( const mstudio_meshvertexdata_t *vertData, matrix3x4_t *pPoseToWorld, + CCachedRenderData &vertexCache, CMeshBuilder& meshBuilder, int numVertices, unsigned short* pGroupToMesh, unsigned int nAlphaMask, + IMaterial* pMaterial) + { + Vector color; + Vector4D *pStudioTangentS; + Vector4DAligned tangentS; + Vector *pSrcPos; + Vector *pSrcNorm; + Vector4D *pSrcTangentS = NULL; + + ALIGN16 ModelVertexDX8_t dstVertex ALIGN16_POST; + dstVertex.m_flBoneWeights[0] = 1.0f; + dstVertex.m_flBoneWeights[1] = 0.0f; + dstVertex.m_nBoneIndices = 0; + dstVertex.m_nColor = 0xFFFFFFFF; + dstVertex.m_vecUserData.Init( 1.0f, 0.0f, 0.0f, 1.0f ); + + ALIGN16 matrix3x4_t temp ALIGN16_POST; + ALIGN16 matrix3x4_t *pSkinMat ALIGN16_POST; + + int ntemp[PREFETCH_VERT_COUNT]; + + Assert( numVertices > 0 ); + + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + if (nHasTangentSpace) + { + pStudioTangentS = vertData->TangentS( 0 ); + Assert( pStudioTangentS->w == -1.0f || pStudioTangentS->w == 1.0f ); + } + + // Mouth related stuff... + float fIllum = 1.0f; + Vector forward; + if (nLighting == LIGHTING_MOUTH) + { + g_StudioRender.R_MouthComputeLightingValues( fIllum, forward ); + } + + if ((nLighting == LIGHTING_MOUTH) || (nLighting == LIGHTING_SOFTWARE)) + { + g_StudioRender.R_InitLightEffectsWorld3(); + } +#ifdef _DEBUG + // In debug, clear it out to ensure we aren't accidentially calling + // the last setup for R_ComputeLightForPoint3. + else + { + g_StudioRender.R_LightEffectsWorld3 = NULL; + } +#endif + +#if defined( _WIN32 ) && !defined( _X360 ) + if ( nHasSIMD ) + { + // Precaches the data + _mm_prefetch( (char*)((int)pGroupToMesh & (~0x1F)), _MM_HINT_NTA ); + } +#endif + for ( int i = 0; i < PREFETCH_VERT_COUNT; ++i ) + { + ntemp[i] = pGroupToMesh[i]; +#if defined( _WIN32 ) && !defined( _X360 ) + if ( nHasSIMD ) + { + char *pMem = (char*)&pVertices[ntemp[i]]; + _mm_prefetch( pMem, _MM_HINT_NTA ); + _mm_prefetch( pMem + 32, _MM_HINT_NTA ); + if ( nHasTangentSpace ) + { + _mm_prefetch( (char*)&pStudioTangentS[ntemp[i]], _MM_HINT_NTA ); + } + } +#endif + } + + int n, idx; + for ( int j=0; j < numVertices; ++j ) + { +#if defined( _WIN32 ) && !defined( _X360 ) + if ( nHasSIMD ) + { + char *pMem = (char*)&pGroupToMesh[j + PREFETCH_VERT_COUNT + 1]; + _mm_prefetch( (char*)((int)pMem & (~0x1F)), _MM_HINT_NTA ); + } +#endif + idx = j & (PREFETCH_VERT_COUNT-1); + n = ntemp[idx]; + + mstudiovertex_t &vert = pVertices[n]; + + ntemp[idx] = pGroupToMesh[j + PREFETCH_VERT_COUNT]; + + // Compute the skinning matrix + if ( nHasSIMD ) + { + pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, pPoseToWorld, temp ); + } + else + { + pSkinMat = ComputeSkinMatrix( vert.m_BoneWeights, pPoseToWorld, temp ); + } + + // transform into world space + if (nDoFlex && vertexCache.IsVertexFlexed(n)) + { + CachedPosNormTan_t* pFlexedVertex = vertexCache.GetFlexVertex(n); + pSrcPos = &pFlexedVertex->m_Position; + pSrcNorm = &pFlexedVertex->m_Normal; + + if (nHasTangentSpace) + { + pSrcTangentS = &pFlexedVertex->m_TangentS; + Assert( pSrcTangentS->w == -1.0f || pSrcTangentS->w == 1.0f ); + } + } + else + { + pSrcPos = &vert.m_vecPosition; + pSrcNorm = &vert.m_vecNormal; + + if (nHasTangentSpace) + { + pSrcTangentS = &pStudioTangentS[n]; + Assert( pSrcTangentS->w == -1.0f || pSrcTangentS->w == 1.0f ); + } + } + + // Transform the vert into world space + R_TransformVert( pSrcPos, pSrcNorm, pSrcTangentS, pSkinMat, + *(VectorAligned*)&dstVertex.m_vecPosition, dstVertex.m_vecNormal, *(Vector4DAligned*)&dstVertex.m_vecUserData ); + +#if defined( _WIN32 ) && !defined( _X360 ) + if ( nHasSIMD ) + { + _mm_prefetch( (char*)&pVertices[ntemp[idx]], _MM_HINT_NTA); + _mm_prefetch( (char*)&pVertices[ntemp[idx]] + 32, _MM_HINT_NTA ); + if ( nHasTangentSpace ) + { + _mm_prefetch( (char*)&pStudioTangentS[ntemp[idx]], _MM_HINT_NTA ); + } + } +#endif + // Compute lighting + R_PerformLighting( forward, fIllum, dstVertex.m_vecPosition, dstVertex.m_vecNormal, nAlphaMask, &dstVertex.m_nColor ); + + dstVertex.m_vecTexCoord = vert.m_vecTexCoord; + + if ( IsX360() || nDX8VertexFormat ) + { +#if !defined( _X360 ) + Assert( dstVertex.m_vecUserData.w == -1.0f || dstVertex.m_vecUserData.w == 1.0f ); + if ( nHasSIMD ) + { + meshBuilder.FastVertexSSE( dstVertex ); + } + else + { + meshBuilder.FastVertex( dstVertex ); + } +#else + meshBuilder.VertexDX8ToX360( dstVertex ); +#endif + } + else + { + if ( nHasSIMD ) + { + meshBuilder.FastVertexSSE( *(ModelVertexDX7_t*)&dstVertex ); + } + else + { + meshBuilder.FastVertex( *(ModelVertexDX7_t*)&dstVertex ); + } + } + } + meshBuilder.FastAdvanceNVertices( numVertices ); + } + +#ifdef SPECIAL_SSE_MESH_PROCESSOR + +#ifdef VERIFY_SSE_LIGHTING + static int NotCloseEnough( float a, float b ) + { + // check if 2 linear lighting values are close enough between the sse and non see lighting model + // no point being more precise than 1% since it all maps to 8 bit anyway + float thresh=0.1f*fabs( a ); + if ( thresh < 0.1f ) + thresh = 0.1f; + return ( fabs( a-b ) > thresh ); + } +#endif + + // this special version of the vertex processor does 4 vertices at once, so that they can be lit using SSE instructions. This provides + // a >2x speedup in the lit case + static void R_PerformVectorizedLightingSSE( const FourVectors &forward, fltx4 fIllum, ModelVertexDX8_t *dst, unsigned int nAlphaMask) + { + if ( nLighting == LIGHTING_SOFTWARE ) + { +#ifdef VERIFY_SSE_LIGHTING +// if ( (g_StudioRender.m_NumLocalLights==1) && +// ( (g_StudioRender.m_LocalLights[0].m_Type==MATERIAL_LIGHT_SPOT))) +// { +// // ihvtest doesn't use different exponents for its spots, +// // so i mess with the exponents when testing +// static int ctr=0; +// static float exps[8]={0,1,2,3,4,4.5,5.25,2.5}; +// ctr=(ctr+1)&7; +// g_StudioRender.m_LocalLights[0].m_Falloff=exps[ctr]; +// } +#endif + FourVectors Position; + Position.LoadAndSwizzleAligned(dst[0].m_vecPosition,dst[1].m_vecPosition,dst[2].m_vecPosition,dst[3].m_vecPosition); + FourVectors Normal(dst[0].m_vecNormal,dst[1].m_vecNormal,dst[2].m_vecNormal,dst[3].m_vecNormal); + FourVectors Color; + g_StudioRender.R_ComputeLightAtPoints3( Position, Normal, Color); + + for (int i=0; i<4; i++) + { + Vector color; +#ifdef VERIFY_SSE_LIGHTING + // debug - check sse version against "real" version + g_StudioRender.R_ComputeLightAtPoint3( dst[i].m_vecPosition,dst[i].m_vecNormal, color ); + if ( NotCloseEnough(color.x,Color.X(i)) || + NotCloseEnough(color.y,Color.Y(i)) || + NotCloseEnough(color.z,Color.Z(i))) + { + Assert(0); + // recompute so can step in debugger + g_StudioRender.R_ComputeLightAtPoints3( Position,Normal,Color); + g_StudioRender.R_ComputeLightAtPoint3( dst[i].m_vecPosition,dst[i].m_vecNormal, color ); + } +#endif + unsigned char r = LinearToLightmap( Color.X(i) ); + unsigned char g = LinearToLightmap( Color.Y(i) ); + unsigned char b = LinearToLightmap( Color.Z(i) ); + + dst[i].m_nColor = b | (g << 8) | (r << 16) | nAlphaMask; + } + } + else if ( nLighting == LIGHTING_MOUTH ) + { + FourVectors Position; + Position.LoadAndSwizzleAligned(dst[0].m_vecPosition,dst[1].m_vecPosition,dst[2].m_vecPosition,dst[3].m_vecPosition); + FourVectors Normal(dst[0].m_vecNormal,dst[1].m_vecNormal,dst[2].m_vecNormal,dst[3].m_vecNormal); + FourVectors Color; + + g_StudioRender.R_ComputeLightAtPoints3( Position, Normal, Color); + g_StudioRender.R_MouthLighting( fIllum, Normal, forward, Color ); + for (int i=0; i<4; i++) + { + unsigned char r = LinearToLightmap( Color.X(i) ); + unsigned char g = LinearToLightmap( Color.Y(i) ); + unsigned char b = LinearToLightmap( Color.Z(i) ); + + dst[i].m_nColor = b | (g << 8) | (r << 16) | nAlphaMask; + } + } + } + + static void R_StudioSoftwareProcessMeshSSE_DX7( const mstudio_meshvertexdata_t *vertData, matrix3x4_t *pPoseToWorld, + CCachedRenderData &vertexCache, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh, unsigned int nAlphaMask, + IMaterial* pMaterial) + { + Assert( numVertices > 0 ); + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + +#define N_VERTS_TO_DO_AT_ONCE 4 // for SSE processing + Assert(N_VERTS_TO_DO_AT_ONCE<=PREFETCH_VERT_COUNT); + + SSELightingHalfLambert=(pMaterial && (pMaterial->GetMaterialVarFlag( MATERIAL_VAR_HALFLAMBERT))); + Vector color; + Vector *pSrcPos; + Vector *pSrcNorm; + + ALIGN16 ModelVertexDX8_t dstVertexBuf[N_VERTS_TO_DO_AT_ONCE] ALIGN16_POST; + for(int i=0;i<N_VERTS_TO_DO_AT_ONCE;i++) + { + dstVertexBuf[i].m_flBoneWeights[0] = 1.0f; + dstVertexBuf[i].m_flBoneWeights[1] = 0.0f; + dstVertexBuf[i].m_nBoneIndices = 0; + dstVertexBuf[i].m_nColor = 0xFFFFFFFF; + dstVertexBuf[i].m_vecUserData.Init( 1.0f, 0.0f, 0.0f, 1.0f ); + } + + // do per-light precalcs. Better than doing them per vertex + for ( int l = 0; l < g_StudioRender.m_pRC->m_NumLocalLights; l++) + { + LightDesc_t *wl=g_StudioRender.m_pRC->m_LocalLights+l; + if (wl->m_Type==MATERIAL_LIGHT_SPOT) + { + float spread=wl->m_ThetaDot-wl->m_PhiDot; + if (spread>1.0e-10) + { + // note - this quantity is very sensitive to round off error. the sse + // reciprocal approximation won't cut it here. + OneOver_ThetaDot_Minus_PhiDot[l]=ReplicateX4(1.0/spread); + } + else + { + // hard falloff instead of divide by zero + OneOver_ThetaDot_Minus_PhiDot[l]=ReplicateX4(1.0); + } + } + } + + ALIGN16 matrix3x4_t temp ALIGN16_POST; + ALIGN16 matrix3x4_t *pSkinMat ALIGN16_POST; + + // Mouth related stuff... + float fIllum = 1.0f; + fltx4 fIllumReplicated; + + Vector forward; + FourVectors mouth_forward; + if (nLighting == LIGHTING_MOUTH) + { + g_StudioRender.R_MouthComputeLightingValues( fIllum, forward ); + mouth_forward.DuplicateVector(forward); + } + fIllumReplicated=ReplicateX4(fIllum); + + if ((nLighting == LIGHTING_MOUTH) || (nLighting == LIGHTING_SOFTWARE)) + { + g_StudioRender.R_InitLightEffectsWorld3(); + } +#ifdef _DEBUG + // In debug, clear it out to ensure we aren't accidentially calling + // the last setup for R_ComputeLightForPoint3. + else + { + g_StudioRender.R_LightEffectsWorld3 = NULL; + } +#endif + + int n_iters=numVertices; + + ModelVertexDX8_t *dst=dstVertexBuf; + while(1) + { + for(int subc=0;subc<4;subc++) + { + int n=*(pGroupToMesh++); + + mstudiovertex_t &vert = pVertices[n]; + + // Compute the skinning matrix + pSkinMat = ComputeSkinMatrixSSE( vert.m_BoneWeights, pPoseToWorld, temp ); + + // transform into world space + if (nDoFlex && vertexCache.IsVertexFlexed(n)) + { + CachedPosNormTan_t* pFlexedVertex = vertexCache.GetFlexVertex(n); + pSrcPos = &pFlexedVertex->m_Position; + pSrcNorm = &pFlexedVertex->m_Normal; + } + else + { + pSrcPos = &vert.m_vecPosition; + pSrcNorm = &vert.m_vecNormal; + + } + + // Transform the vert into world space + R_TransformVert( pSrcPos, pSrcNorm, 0, pSkinMat, + *(VectorAligned*)&dst->m_vecPosition, dst->m_vecNormal, *(Vector4DAligned*)&dst->m_vecUserData ); + + dst->m_vecTexCoord = vert.m_vecTexCoord; + dst++; + } + n_iters-=4; + dst=dstVertexBuf; + // Compute lighting + R_PerformVectorizedLightingSSE( mouth_forward, fIllumReplicated, dst, nAlphaMask); + if (n_iters<=0) // partial copy back? + { + // copy 1..3 verts + while(n_iters!=-4) + { + meshBuilder.FastVertexSSE( *(ModelVertexDX7_t*)dst ); + n_iters--; + dst++; + } + break; + } + else + { + meshBuilder.Fast4VerticesSSE( + (ModelVertexDX7_t*)&(dst[0]), + (ModelVertexDX7_t*)&(dst[1]), + (ModelVertexDX7_t*)&(dst[2]), + (ModelVertexDX7_t*)&(dst[3])); + } + } + meshBuilder.FastAdvanceNVertices( numVertices ); + } +#endif // SPECIAL_SSE_MESH_PROCESSOR +}; + +//----------------------------------------------------------------------------- +// Draws the mesh as tristrips using software +//----------------------------------------------------------------------------- +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, false, false, LIGHTING_HARDWARE, false > ProcessMesh000H7_t; +typedef CProcessMeshWrapper< false, false, false, LIGHTING_SOFTWARE, false > ProcessMesh000S7_t; +typedef CProcessMeshWrapper< false, false, false, LIGHTING_MOUTH, false > ProcessMesh000M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, false, true, LIGHTING_HARDWARE, false > ProcessMesh001H7_t; +typedef CProcessMeshWrapper< false, false, true, LIGHTING_SOFTWARE, false > ProcessMesh001S7_t; +typedef CProcessMeshWrapper< false, false, true, LIGHTING_MOUTH, false > ProcessMesh001M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, true, false, LIGHTING_HARDWARE, false > ProcessMesh010H7_t; +typedef CProcessMeshWrapper< false, true, false, LIGHTING_SOFTWARE, false > ProcessMesh010S7_t; +typedef CProcessMeshWrapper< false, true, false, LIGHTING_MOUTH, false > ProcessMesh010M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, true, true, LIGHTING_HARDWARE, false > ProcessMesh011H7_t; +typedef CProcessMeshWrapper< false, true, true, LIGHTING_SOFTWARE, false > ProcessMesh011S7_t; +typedef CProcessMeshWrapper< false, true, true, LIGHTING_MOUTH, false > ProcessMesh011M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, false, false, LIGHTING_HARDWARE, false > ProcessMesh100H7_t; +typedef CProcessMeshWrapper< true, false, false, LIGHTING_SOFTWARE, false > ProcessMesh100S7_t; +typedef CProcessMeshWrapper< true, false, false, LIGHTING_MOUTH, false > ProcessMesh100M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, false, true, LIGHTING_HARDWARE, false > ProcessMesh101H7_t; +typedef CProcessMeshWrapper< true, false, true, LIGHTING_SOFTWARE, false > ProcessMesh101S7_t; +typedef CProcessMeshWrapper< true, false, true, LIGHTING_MOUTH, false > ProcessMesh101M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, true, false, LIGHTING_HARDWARE, false > ProcessMesh110H7_t; +typedef CProcessMeshWrapper< true, true, false, LIGHTING_SOFTWARE, false > ProcessMesh110S7_t; +typedef CProcessMeshWrapper< true, true, false, LIGHTING_MOUTH, false > ProcessMesh110M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, true, true, LIGHTING_HARDWARE, false > ProcessMesh111H7_t; +typedef CProcessMeshWrapper< true, true, true, LIGHTING_SOFTWARE, false > ProcessMesh111S7_t; +typedef CProcessMeshWrapper< true, true, true, LIGHTING_MOUTH, false > ProcessMesh111M7_t; +#endif + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, false, false, LIGHTING_HARDWARE, true > ProcessMesh000H8_t; +typedef CProcessMeshWrapper< false, false, false, LIGHTING_SOFTWARE, true > ProcessMesh000S8_t; +typedef CProcessMeshWrapper< false, false, false, LIGHTING_MOUTH, true > ProcessMesh000M8_t; +#endif + +typedef CProcessMeshWrapper< false, false, true, LIGHTING_HARDWARE, true > ProcessMesh001H8_t; +typedef CProcessMeshWrapper< false, false, true, LIGHTING_SOFTWARE, true > ProcessMesh001S8_t; +typedef CProcessMeshWrapper< false, false, true, LIGHTING_MOUTH, true > ProcessMesh001M8_t; + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< false, true, false, LIGHTING_HARDWARE, true > ProcessMesh010H8_t; +typedef CProcessMeshWrapper< false, true, false, LIGHTING_SOFTWARE, true > ProcessMesh010S8_t; +typedef CProcessMeshWrapper< false, true, false, LIGHTING_MOUTH, true > ProcessMesh010M8_t; +#endif + +typedef CProcessMeshWrapper< false, true, true, LIGHTING_HARDWARE, true > ProcessMesh011H8_t; +typedef CProcessMeshWrapper< false, true, true, LIGHTING_SOFTWARE, true > ProcessMesh011S8_t; +typedef CProcessMeshWrapper< false, true, true, LIGHTING_MOUTH, true > ProcessMesh011M8_t; + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, false, false, LIGHTING_HARDWARE, true > ProcessMesh100H8_t; +typedef CProcessMeshWrapper< true, false, false, LIGHTING_SOFTWARE, true > ProcessMesh100S8_t; +typedef CProcessMeshWrapper< true, false, false, LIGHTING_MOUTH, true > ProcessMesh100M8_t; +#endif + +typedef CProcessMeshWrapper< true, false, true, LIGHTING_HARDWARE, true > ProcessMesh101H8_t; +typedef CProcessMeshWrapper< true, false, true, LIGHTING_SOFTWARE, true > ProcessMesh101S8_t; +typedef CProcessMeshWrapper< true, false, true, LIGHTING_MOUTH, true > ProcessMesh101M8_t; + +#if !defined( _X360 ) +typedef CProcessMeshWrapper< true, true, false, LIGHTING_HARDWARE, true > ProcessMesh110H8_t; +typedef CProcessMeshWrapper< true, true, false, LIGHTING_SOFTWARE, true > ProcessMesh110S8_t; +typedef CProcessMeshWrapper< true, true, false, LIGHTING_MOUTH, true > ProcessMesh110M8_t; +#endif + +typedef CProcessMeshWrapper< true, true, true, LIGHTING_HARDWARE, true > ProcessMesh111H8_t; +typedef CProcessMeshWrapper< true, true, true, LIGHTING_SOFTWARE, true > ProcessMesh111S8_t; +typedef CProcessMeshWrapper< true, true, true, LIGHTING_MOUTH, true > ProcessMesh111M8_t; + +static SoftwareProcessMeshFunc_t g_SoftwareProcessMeshFunc[] = +{ +#if !defined( _X360 ) + ProcessMesh000H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh000S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh000M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh001H7_t::R_StudioSoftwareProcessMesh, +#ifdef SPECIAL_SSE_MESH_PROCESSOR + ProcessMesh001S7_t::R_StudioSoftwareProcessMeshSSE_DX7, + ProcessMesh001M7_t::R_StudioSoftwareProcessMeshSSE_DX7, +#else + ProcessMesh001S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh001M7_t::R_StudioSoftwareProcessMesh, +#endif + + ProcessMesh010H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh010S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh010M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh011H7_t::R_StudioSoftwareProcessMesh, +#ifdef SPECIAL_SSE_MESH_PROCESSOR + ProcessMesh011S7_t::R_StudioSoftwareProcessMeshSSE_DX7, + ProcessMesh011M7_t::R_StudioSoftwareProcessMeshSSE_DX7, +#else + ProcessMesh011S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh011M7_t::R_StudioSoftwareProcessMesh, +#endif + + ProcessMesh100H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh100S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh100M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh101H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh101S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh101M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh110H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh110S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh110M7_t::R_StudioSoftwareProcessMesh, + + ProcessMesh111H7_t::R_StudioSoftwareProcessMesh, + ProcessMesh111S7_t::R_StudioSoftwareProcessMesh, + ProcessMesh111M7_t::R_StudioSoftwareProcessMesh, +#endif + +#if !defined( _X360 ) + ProcessMesh000H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh000S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh000M8_t::R_StudioSoftwareProcessMesh, +#endif + ProcessMesh001H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh001S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh001M8_t::R_StudioSoftwareProcessMesh, +#if !defined( _X360 ) + ProcessMesh010H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh010S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh010M8_t::R_StudioSoftwareProcessMesh, +#endif + ProcessMesh011H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh011S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh011M8_t::R_StudioSoftwareProcessMesh, +#if !defined( _X360 ) + ProcessMesh100H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh100S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh100M8_t::R_StudioSoftwareProcessMesh, +#endif + ProcessMesh101H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh101S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh101M8_t::R_StudioSoftwareProcessMesh, +#if !defined( _X360 ) + ProcessMesh110H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh110S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh110M8_t::R_StudioSoftwareProcessMesh, +#endif + ProcessMesh111H8_t::R_StudioSoftwareProcessMesh, + ProcessMesh111S8_t::R_StudioSoftwareProcessMesh, + ProcessMesh111M8_t::R_StudioSoftwareProcessMesh, +}; + +inline const mstudio_meshvertexdata_t * GetFatVertexData( mstudiomesh_t * pMesh, studiohdr_t * pStudioHdr ) +{ + if ( !pMesh->pModel()->CacheVertexData( pStudioHdr ) ) + { + // not available yet + return NULL; + } + const mstudio_meshvertexdata_t *pVertData = pMesh->GetVertexData( pStudioHdr ); + Assert( pVertData ); + if ( !pVertData ) + { + static unsigned int warnCount = 0; + if ( warnCount++ < 20 ) + Warning( "ERROR: model verts have been compressed, cannot render! (use \"-no_compressed_vvds\")" ); + } + return pVertData; +} + +void CStudioRender::R_StudioSoftwareProcessMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh, StudioModelLighting_t lighting, bool doFlex, float r_blend, + bool bNeedsTangentSpace, bool bDX8Vertex, IMaterial *pMaterial ) +{ + unsigned int nAlphaMask = RoundFloatToInt( r_blend * 255.0f ); + nAlphaMask = clamp( nAlphaMask, 0, 255 ); + nAlphaMask <<= 24; + + // FIXME: Use function pointers to simplify this?!? + int idx; + if ( IsPC() ) + { + idx = bDX8Vertex * 24 + bNeedsTangentSpace * 12 + doFlex * 6 + MathLib_SSEEnabled() * 3 + lighting; + } + else + { + idx = bNeedsTangentSpace * 6 + doFlex * 3 + lighting; + } + + const mstudio_meshvertexdata_t *pVertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( pVertData ) + { + // invoke the software mesh processing handler + g_SoftwareProcessMeshFunc[idx]( pVertData, m_PoseToWorld, m_VertexCache, meshBuilder, numVertices, pGroupToMesh, nAlphaMask, pMaterial ); + } +} + +static void R_SlowTransformVert( const Vector *pSrcPos, const Vector *pSrcNorm, + matrix3x4_t *pSkinMat, VectorAligned &pos, VectorAligned &norm ) +{ + pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3]; + norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2]; + + pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3]; + norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2]; + + pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3]; + norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2]; +} + +static void R_SlowTransformVert( const Vector *pSrcPos, const Vector *pSrcNorm, const Vector4D *pSrcTangentS, + matrix3x4_t *pSkinMat, VectorAligned &pos, VectorAligned &norm, VectorAligned &tangentS ) +{ + pos.x = pSrcPos->x * (*pSkinMat)[0][0] + pSrcPos->y * (*pSkinMat)[0][1] + pSrcPos->z * (*pSkinMat)[0][2] + (*pSkinMat)[0][3]; + norm.x = pSrcNorm->x * (*pSkinMat)[0][0] + pSrcNorm->y * (*pSkinMat)[0][1] + pSrcNorm->z * (*pSkinMat)[0][2]; + tangentS.x = pSrcTangentS->x * (*pSkinMat)[0][0] + pSrcTangentS->y * (*pSkinMat)[0][1] + pSrcTangentS->z * (*pSkinMat)[0][2]; + + pos.y = pSrcPos->x * (*pSkinMat)[1][0] + pSrcPos->y * (*pSkinMat)[1][1] + pSrcPos->z * (*pSkinMat)[1][2] + (*pSkinMat)[1][3]; + norm.y = pSrcNorm->x * (*pSkinMat)[1][0] + pSrcNorm->y * (*pSkinMat)[1][1] + pSrcNorm->z * (*pSkinMat)[1][2]; + tangentS.y = pSrcTangentS->x * (*pSkinMat)[1][0] + pSrcTangentS->y * (*pSkinMat)[1][1] + pSrcTangentS->z * (*pSkinMat)[1][2]; + + pos.z = pSrcPos->x * (*pSkinMat)[2][0] + pSrcPos->y * (*pSkinMat)[2][1] + pSrcPos->z * (*pSkinMat)[2][2] + (*pSkinMat)[2][3]; + norm.z = pSrcNorm->x * (*pSkinMat)[2][0] + pSrcNorm->y * (*pSkinMat)[2][1] + pSrcNorm->z * (*pSkinMat)[2][2]; + tangentS.z = pSrcTangentS->x * (*pSkinMat)[2][0] + pSrcTangentS->y * (*pSkinMat)[2][1] + pSrcTangentS->z * (*pSkinMat)[2][2]; +} + +void CStudioRender::R_StudioSoftwareProcessMesh_Normals( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh, StudioModelLighting_t lighting, bool doFlex, float r_blend, + bool bShowNormals, bool bShowTangentFrame ) +{ + ALIGN16 matrix3x4_t temp ALIGN16_POST; + ALIGN16 matrix3x4_t *pSkinMat ALIGN16_POST; + + Vector *pSrcPos = NULL; + Vector *pSrcNorm = NULL; + Vector4D *pSrcTangentS = NULL; + VectorAligned norm, pos, tangentS, tangentT; + + // Gets at the vertex data + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return; + } + + if ( bShowTangentFrame && !vertData->HasTangentData() ) + return; + + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + Vector4D *pTangentS = NULL; + Vector4D tang; + if ( bShowTangentFrame ) + { + pTangentS = vertData->TangentS( 0 ); + } + + for ( int j=0; j < numVertices; j++ ) + { + int n = pGroupToMesh[j]; + + mstudiovertex_t &vert = pVertices[n]; + if ( bShowTangentFrame ) + { + tang = pTangentS[n]; + } + + pSkinMat = ComputeSkinMatrix( vert.m_BoneWeights, m_PoseToWorld, temp ); + + // transform into world space + if ( m_VertexCache.IsVertexFlexed(n) ) + { + CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n); + pSrcPos = &pFlexedVertex->m_Position; + pSrcNorm = &pFlexedVertex->m_Normal; + + if ( bShowTangentFrame ) + { + pSrcTangentS = &pFlexedVertex->m_TangentS; + } + } + else + { + pSrcPos = &vert.m_vecPosition; + pSrcNorm = &vert.m_vecNormal; + if ( bShowTangentFrame ) + { + pSrcTangentS = &tang; + } + } + + // Transform the vert into world space + if ( bShowTangentFrame && ( pSrcTangentS != NULL ) ) + { + R_SlowTransformVert( pSrcPos, pSrcNorm, pSrcTangentS, pSkinMat, pos, norm, tangentS ); + } + else + { + R_SlowTransformVert( pSrcPos, pSrcNorm, pSkinMat, pos, norm ); + } + + if ( bShowNormals ) + { + meshBuilder.Position3fv( pos.Base() ); + meshBuilder.Color3f( 0.0f, 0.0f, 1.0f ); + meshBuilder.AdvanceVertex(); + + Vector normalPos; + normalPos = pos + norm * 0.5f; + meshBuilder.Position3fv( normalPos.Base() ); + meshBuilder.Color3f( 0.0f, 0.0f, 1.0f ); + meshBuilder.AdvanceVertex(); + } + + if ( bShowTangentFrame && ( pSrcTangentS != NULL) ) + { + // TangentS + meshBuilder.Position3fv( pos.Base() ); + meshBuilder.Color3f( 1.0f, 0.0f, 0.0f ); + meshBuilder.AdvanceVertex(); + + Vector vTangentSPos; + vTangentSPos = pos + tangentS * 0.5f; + meshBuilder.Position3fv( vTangentSPos.Base() ); + meshBuilder.Color3f( 1.0f, 0.0f, 0.0f ); + meshBuilder.AdvanceVertex(); + + // TangentT + meshBuilder.Position3fv( pos.Base() ); + meshBuilder.Color3f( 0.0f, 1.0f, 0.0f ); + meshBuilder.AdvanceVertex(); + + // Compute tangentT from normal and tangentS + CrossProduct( norm, tangentS, tangentT ); + + Vector vTangentTPos; + vTangentTPos = pos + tangentT * 0.5f; + meshBuilder.Position3fv( vTangentTPos.Base() ); + meshBuilder.Color3f( 0.0f, 1.0f, 0.0f ); + meshBuilder.AdvanceVertex(); + + } // end tacking on tangentS and tangetT line segments + } +} + +#pragma warning (default:4701) + + + +template +void CCachedRenderData::ComputeFlexedVertex_StreamOffset<mstudiovertanim_t>( studiohdr_t *pStudioHdr, mstudioflex_t *pflex, + mstudiovertanim_t *pvanim, int vertCount, float w1, float w2, float w3, float w4 ); + + + +void CStudioRender::R_StudioProcessFlexedMesh_StreamOffset( mstudiomesh_t* pmesh, int lod ) +{ + VPROF_BUDGET( "ProcessFlexedMesh_SO", _T("HW Morphing") ); + + if ( m_VertexCache.IsFlexComputationDone() ) + return; + + int vertCount = pmesh->vertexdata.numLODVertexes[lod]; + m_VertexCache.SetupComputation( pmesh, true ); + mstudioflex_t *pflex = pmesh->pFlex( 0 ); + + for (int i = 0; i < pmesh->numflexes; i++) + { + float w1 = RampFlexWeight( pflex[i], m_pFlexWeights[ pflex[i].flexdesc ] ); + float w2 = RampFlexWeight( pflex[i], m_pFlexDelayedWeights[ pflex[i].flexdesc ] ); + + float w3, w4; + if ( pflex[i].flexpair != 0) + { + w3 = RampFlexWeight( pflex[i], m_pFlexWeights[ pflex[i].flexpair ] ); + w4 = RampFlexWeight( pflex[i], m_pFlexDelayedWeights[ pflex[i].flexpair ] ); + } + else + { + w3 = w1; + w4 = w2; + } + + // Move on if the weights for this flex are sufficiently small + if (w1 > -0.001 && w1 < 0.001 && w2 > -0.001 && w2 < 0.001) + { + if (w3 > -0.001 && w3 < 0.001 && w4 > -0.001 && w4 < 0.001) + { + continue; + } + } + +#ifdef PLATFORM_WINDOWS + if ( pflex[i].vertanimtype == STUDIO_VERT_ANIM_NORMAL ) + { + mstudiovertanim_t *pvanim = pflex[i].pVertanim( 0 ); + m_VertexCache.ComputeFlexedVertex_StreamOffset_Optimized( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 ); + } + else + { + mstudiovertanim_wrinkle_t *pvanim = pflex[i].pVertanimWrinkle( 0 ); + m_VertexCache.ComputeFlexedVertexWrinkle_StreamOffset_Optimized( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 ); + } +#else // PLATFORM_WINDOWS + if ( pflex[i].vertanimtype == STUDIO_VERT_ANIM_NORMAL ) + { + mstudiovertanim_t *pvanim = pflex[i].pVertanim( 0 ); + m_VertexCache.ComputeFlexedVertex_StreamOffset( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 ); + } + else + { + mstudiovertanim_wrinkle_t *pvanim = pflex[i].pVertanimWrinkle( 0 ); + m_VertexCache.ComputeFlexedVertex_StreamOffset( m_pStudioHdr, &pflex[i], pvanim, vertCount, w1, w2, w3, w4 ); + } +#endif // PLATFORM_WINDOWS + } +} + + +//----------------------------------------------------------------------------- +// Purpose: +// +// ** Only execute this function if device supports stream offset ** +// +// Input : pGroup - pointer to a studio mesh group +// Output : none +//----------------------------------------------------------------------------- +void CStudioRender::R_StudioFlexMeshGroup( studiomeshgroup_t *pGroup ) +{ + VPROF_BUDGET( "R_StudioFlexMeshGroup", VPROF_BUDGETGROUP_MODEL_RENDERING ); + + CMeshBuilder meshBuilder; + int nVertexOffsetInBytes = 0; + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + IMesh *pMesh = pRenderContext->GetFlexMesh(); + meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0, &nVertexOffsetInBytes ); + + // Just pos and norm deltas (tangents use same deltas as normals) + for ( int j=0; j < pGroup->m_NumVertices; j++) + { + int n = pGroup->m_pGroupIndexToMeshIndex[j]; + if ( m_VertexCache.IsThinVertexFlexed(n) ) + { + CachedPosNorm_t *pIn = m_VertexCache.GetThinFlexVertex(n); + meshBuilder.Position3fv( pIn->m_Position.Base() ); + meshBuilder.NormalDelta3fv( pIn->m_Normal.Base() ); + meshBuilder.Wrinkle1f( pIn->m_Position.w ); + } + else + { + meshBuilder.Position3f( 0.0f, 0.0f, 0.0f ); + meshBuilder.NormalDelta3f( 0.0f, 0.0f, 0.0f ); + meshBuilder.Wrinkle1f( 0.0f ); + } + meshBuilder.AdvanceVertex(); + } + + meshBuilder.End( false, false ); + + pGroup->m_pMesh->SetFlexMesh( pMesh, nVertexOffsetInBytes ); +} + +//----------------------------------------------------------------------------- +// Processes a flexed mesh to be hw skinned +//----------------------------------------------------------------------------- +void CStudioRender::R_StudioProcessFlexedMesh( mstudiomesh_t* pmesh, CMeshBuilder& meshBuilder, + int numVertices, unsigned short* pGroupToMesh ) +{ + PROFILE_STUDIO("FlexMeshBuilder"); + + Vector4D *pStudioTangentS; + + // get the vertex data + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return; + } + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + if (vertData->HasTangentData()) + { + pStudioTangentS = vertData->TangentS( 0 ); + Assert( pStudioTangentS->w == -1.0f || pStudioTangentS->w == 1.0f ); + + for ( int j=0; j < numVertices ; j++) + { + int n = pGroupToMesh[j]; + mstudiovertex_t &vert = pVertices[n]; + + // FIXME: For now, flexed hw-skinned meshes can only have one bone + // The data must exist in the 0th hardware matrix + + // Here, we are doing HW skinning, so we need to simply copy over the flex + if ( m_VertexCache.IsVertexFlexed(n) ) + { + CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n); + meshBuilder.Position3fv( pFlexedVertex->m_Position.Base() ); + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.Normal3fv( pFlexedVertex->m_Normal.Base() ); + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + Assert( pFlexedVertex->m_TangentS.w == -1.0f || pFlexedVertex->m_TangentS.w == 1.0f ); + meshBuilder.UserData( pFlexedVertex->m_TangentS.Base() ); + } + else + { + meshBuilder.Position3fv( vert.m_vecPosition.Base() ); + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.Normal3fv( vert.m_vecNormal.Base() ); + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + Assert( pStudioTangentS[n].w == -1.0f || pStudioTangentS[n].w == 1.0f ); + meshBuilder.UserData( pStudioTangentS[n].Base() ); + } + + meshBuilder.AdvanceVertex(); + } + } + else + { + // no TangentS, replicated code to save inner conditional + for ( int j=0; j < numVertices ; j++) + { + int n = pGroupToMesh[j]; + mstudiovertex_t &vert = pVertices[n]; + + // FIXME: For now, flexed hw-skinned meshes can only have one bone + // The data must exist in the 0th hardware matrix + + // Here, we are doing HW skinning, so we need to simply copy over the flex + if ( m_VertexCache.IsVertexFlexed(n) ) + { + CachedPosNormTan_t* pFlexedVertex = m_VertexCache.GetFlexVertex(n); + meshBuilder.Position3fv( pFlexedVertex->m_Position.Base() ); + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.Normal3fv( pFlexedVertex->m_Normal.Base() ); + } + else + { + meshBuilder.Position3fv( vert.m_vecPosition.Base() ); + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.Normal3fv( vert.m_vecNormal.Base() ); + } + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + meshBuilder.AdvanceVertex(); + } + } +} + +//----------------------------------------------------------------------------- +// Restores the static mesh +//----------------------------------------------------------------------------- +template<VertexCompressionType_t T> void CStudioRender::R_StudioRestoreMesh( mstudiomesh_t* pmesh, studiomeshgroup_t* pMeshData ) +{ + Vector4D *pStudioTangentS; + + if ( IsX360() ) + return; + + // get at the vertex data + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return; + } + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + if (vertData->HasTangentData()) + { + pStudioTangentS = vertData->TangentS( 0 ); + } + else + { + pStudioTangentS = NULL; + } + + CMeshBuilder meshBuilder; + + meshBuilder.BeginModify( pMeshData->m_pMesh ); + meshBuilder.SetCompressionType( T ); + for ( int j=0; j < meshBuilder.VertexCount() ; j++) + { + meshBuilder.SelectVertex(j); + int n = pMeshData->m_pGroupIndexToMeshIndex[j]; + mstudiovertex_t &vert = pVertices[n]; + + meshBuilder.Position3fv( vert.m_vecPosition.Base() ); + meshBuilder.CompressedNormal3fv<T>( vert.m_vecNormal.Base() ); + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + + if (pStudioTangentS) + { + Assert( pStudioTangentS[n].w == -1.0f || pStudioTangentS[n].w == 1.0f ); + meshBuilder.CompressedUserData<T>( pStudioTangentS[n].Base() ); + } + + meshBuilder.Color4ub( 255, 255, 255, 255 ); + } + meshBuilder.EndModify(); +} + +//----------------------------------------------------------------------------- +// Draws a mesh using hardware + software skinning +//----------------------------------------------------------------------------- +int CStudioRender::R_StudioDrawGroupHWSkin( IMatRenderContext *pRenderContext, studiomeshgroup_t* pGroup, IMesh* pMesh, ColorMeshInfo_t * pColorMeshInfo ) +{ + PROFILE_STUDIO("HwSkin"); + int numTrianglesRendered = 0; + +#if PIX_ENABLE + char szPIXEventName[128]; + sprintf( szPIXEventName, "R_StudioDrawGroupHWSkin (%s)", m_pStudioHdr->name ); // PIX + PIXEVENT( pRenderContext, szPIXEventName ); +#endif + + if ( m_pStudioHdr->numbones == 1 ) + { + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadMatrix( m_PoseToWorld[0] ); + + // a single bone means all verts rigidly assigned + // any bonestatechange would needlessly re-load the same matrix + // xbox can skip further hw skinning, seems ok for pc too + pRenderContext->SetNumBoneWeights( 0 ); + } + + if ( pColorMeshInfo ) + pMesh->SetColorMesh( pColorMeshInfo->m_pMesh, pColorMeshInfo->m_nVertOffsetInBytes ); + else + pMesh->SetColorMesh( NULL, 0 ); + + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[j]; + + if ( m_pStudioHdr->numbones > 1 ) + { + // Reset bone state if we're hardware skinning + pRenderContext->SetNumBoneWeights( pStrip->numBones ); + + for (int k = 0; k < pStrip->numBoneStateChanges; ++k) + { + OptimizedModel::BoneStateChangeHeader_t* pStateChange = pStrip->pBoneStateChange(k); + if ( pStateChange->newBoneID < 0 ) + break; + + pRenderContext->LoadBoneMatrix( pStateChange->hardwareID, m_PoseToWorld[pStateChange->newBoneID] ); + } + } + + pMesh->SetPrimitiveType( pStrip->flags & OptimizedModel::STRIP_IS_TRISTRIP ? + MATERIAL_TRIANGLE_STRIP : MATERIAL_TRIANGLES ); + + pMesh->Draw( pStrip->indexOffset, pStrip->numIndices ); + numTrianglesRendered += pGroup->m_pUniqueTris[j]; + } + pMesh->SetColorMesh( NULL, 0 ); + + return numTrianglesRendered; +} + +int CStudioRender::R_StudioDrawGroupSWSkin( studiomeshgroup_t* pGroup, IMesh* pMesh ) +{ + int numTrianglesRendered = 0; + + CMatRenderContextPtr pRenderContext( g_pMaterialSystem ); + // Disable skinning + pRenderContext->SetNumBoneWeights( 0 ); + + for (int j = 0; j < pGroup->m_NumStrips; ++j) + { + OptimizedModel::StripHeader_t* pStrip = &pGroup->m_pStripData[j]; + + // Choose our primitive type + pMesh->SetPrimitiveType( pStrip->flags & OptimizedModel::STRIP_IS_TRISTRIP ? + MATERIAL_TRIANGLE_STRIP : MATERIAL_TRIANGLES ); + + pMesh->Draw( pStrip->indexOffset, pStrip->numIndices ); + numTrianglesRendered += pGroup->m_pUniqueTris[j]; + } + + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Sets up the hw flex mesh +//----------------------------------------------------------------------------- +void CStudioRender::ComputeFlexWeights( int nFlexCount, mstudioflex_t *pFlex, MorphWeight_t *pWeights ) +{ + for ( int i = 0; i < nFlexCount; ++i, ++pFlex ) + { + MorphWeight_t &weight = pWeights[i]; + + weight.m_pWeight[MORPH_WEIGHT] = RampFlexWeight( *pFlex, m_pFlexWeights[ pFlex->flexdesc ] ); + weight.m_pWeight[MORPH_WEIGHT_LAGGED] = RampFlexWeight( *pFlex, m_pFlexDelayedWeights[ pFlex->flexdesc ] ); + + if ( pFlex->flexpair != 0 ) + { + weight.m_pWeight[MORPH_WEIGHT_STEREO] = RampFlexWeight( *pFlex, m_pFlexWeights[ pFlex->flexpair ] ); + weight.m_pWeight[MORPH_WEIGHT_STEREO_LAGGED] = RampFlexWeight( *pFlex, m_pFlexDelayedWeights[ pFlex->flexpair ] ); + } + else + { + weight.m_pWeight[MORPH_WEIGHT_STEREO] = weight.m_pWeight[MORPH_WEIGHT]; + weight.m_pWeight[MORPH_WEIGHT_STEREO_LAGGED] = weight.m_pWeight[MORPH_WEIGHT_LAGGED]; + } + } +} + + +//----------------------------------------------------------------------------- +// Computes a vertex format to use +//----------------------------------------------------------------------------- +inline VertexFormat_t CStudioRender::ComputeSWSkinVertexFormat( IMaterial *pMaterial ) const +{ + bool bDX8OrHigherVertex = IsX360() || ( UserDataSize( pMaterial->GetVertexFormat() ) != 0 ); + VertexFormat_t fmt = VERTEX_POSITION | VERTEX_NORMAL | VERTEX_COLOR | VERTEX_BONE_INDEX | + VERTEX_BONEWEIGHT( 2 ) | VERTEX_TEXCOORD_SIZE( 0, 2 ); + if ( bDX8OrHigherVertex ) + { + fmt |= VERTEX_USERDATA_SIZE( 4 ); + } + return fmt; +} + + +//----------------------------------------------------------------------------- +// Draws the mesh as tristrips using hardware +//----------------------------------------------------------------------------- +int CStudioRender::R_StudioDrawStaticMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, + studiomeshgroup_t* pGroup, StudioModelLighting_t lighting, + float r_blend, IMaterial* pMaterial, int lod, ColorMeshInfo_t *pColorMeshes ) +{ + MatSysQueueMark( g_pMaterialSystem, "R_StudioDrawStaticMesh\n" ); + VPROF( "R_StudioDrawStaticMesh" ); + + int numTrianglesRendered = 0; + + bool bDoSoftwareLighting = !pColorMeshes && + ((m_pRC->m_Config.bSoftwareSkin != 0) || m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame || + (pMaterial ? pMaterial->NeedsSoftwareSkinning() : false) || + (m_pRC->m_Config.bSoftwareLighting != 0) || + ((lighting != LIGHTING_HARDWARE) && (lighting != LIGHTING_MOUTH) )); + + // software lighting case + if ( bDoSoftwareLighting || m_pRC->m_Config.m_bStatsMode == true ) + { + if ( m_pRC->m_Config.bNoSoftware ) + return 0; + + bool bNeedsTangentSpace = pMaterial ? pMaterial->NeedsTangentSpace() : false; + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadIdentity(); + + // Hardcode the vertex format to a well-known format to make sw skin code faster + VertexFormat_t fmt = ComputeSWSkinVertexFormat( pMaterial ); + bool bDX8Vertex = ( UserDataSize( fmt ) != 0 ); + + if ( m_pRC->m_Config.m_bStatsMode == false ) + { + Assert( ( pGroup->m_Flags & ( MESHGROUP_IS_FLEXED | MESHGROUP_IS_DELTA_FLEXED ) ) == 0 ); + } + + CMeshBuilder meshBuilder; + IMesh* pMesh = pRenderContext->GetDynamicMeshEx( fmt, false, 0, pGroup->m_pMesh ); + meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0 ); + + R_StudioSoftwareProcessMesh( pmesh, meshBuilder, + pGroup->m_NumVertices, pGroup->m_pGroupIndexToMeshIndex, + lighting, false, r_blend, bNeedsTangentSpace, bDX8Vertex, pMaterial); + + if ( m_pRC->m_Config.m_bStatsMode == true ) + { + R_GatherStats( pGroup, meshBuilder, pMesh, pMaterial ); + } + else + { + meshBuilder.End(); + + numTrianglesRendered = R_StudioDrawGroupSWSkin( pGroup, pMesh ); + } + + MatSysQueueMark( g_pMaterialSystem, "END R_StudioDrawStaticMesh\n" ); + return numTrianglesRendered; + } + + // Needed when we switch back and forth between hardware + software lighting + if ( IsPC() && pGroup->m_MeshNeedsRestore ) + { + VertexCompressionType_t compressionType = CompressionType( pGroup->m_pMesh->GetVertexFormat() ); + switch ( compressionType ) + { + case VERTEX_COMPRESSION_ON: + R_StudioRestoreMesh<VERTEX_COMPRESSION_ON>( pmesh, pGroup ); + case VERTEX_COMPRESSION_NONE: + default: + R_StudioRestoreMesh<VERTEX_COMPRESSION_NONE>( pmesh, pGroup ); + break; + } + pGroup->m_MeshNeedsRestore = false; + } + + // Build separate flex stream containing deltas, which will get copied into another vertex stream + bool bUseHWFlex = m_pRC->m_Config.m_bEnableHWMorph && pGroup->m_pMorph && !m_bDrawTranslucentSubModels; + bool bUseSOFlex = g_pMaterialSystemHardwareConfig->SupportsStreamOffset() && !bUseHWFlex; + if ( (pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) && m_pRC->m_Config.bFlex ) + { + PIXEVENT( pRenderContext, "Delta Flex Processing" ); + if ( bUseHWFlex ) + { + pRenderContext->BindMorph( pGroup->m_pMorph ); + } + if ( bUseSOFlex ) + { + R_StudioProcessFlexedMesh_StreamOffset( pmesh, lod ); + R_StudioFlexMeshGroup( pGroup ); + } + } + + // Draw it baby + if ( pColorMeshes && ( pGroup->m_ColorMeshID != -1 ) ) + { + // draw using specified color mesh + numTrianglesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pGroup->m_pMesh, &(pColorMeshes[pGroup->m_ColorMeshID]) ); + } + else + { + numTrianglesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pGroup->m_pMesh, NULL ); + } + + if ( ( pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED ) && m_pRC->m_Config.bFlex ) + { + if ( bUseHWFlex ) + { + pRenderContext->BindMorph( NULL ); + } + if ( bUseSOFlex ) + { + pGroup->m_pMesh->DisableFlexMesh(); // clear flex stream + } + } + + MatSysQueueMark( g_pMaterialSystem, "END2 R_StudioDrawStaticMesh\n" ); + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Draws a dynamic mesh +//----------------------------------------------------------------------------- +int CStudioRender::R_StudioDrawDynamicMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, + studiomeshgroup_t* pGroup, StudioModelLighting_t lighting, + float r_blend, IMaterial* pMaterial, int lod ) +{ + VPROF( "R_StudioDrawDynamicMesh" ); + + bool doFlex = ((pGroup->m_Flags & MESHGROUP_IS_FLEXED) != 0) && m_pRC->m_Config.bFlex; + + bool doSoftwareLighting = (m_pRC->m_Config.bSoftwareLighting != 0) || + ((lighting != LIGHTING_HARDWARE) && (lighting != LIGHTING_MOUTH) ); + + bool swSkin = doSoftwareLighting || m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame || + ((pGroup->m_Flags & MESHGROUP_IS_HWSKINNED) == 0) || + m_pRC->m_Config.bSoftwareSkin || + ( pMaterial ? pMaterial->NeedsSoftwareSkinning() : false ); + + if ( !doFlex && !swSkin ) + { + return R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, r_blend, pMaterial, lod, NULL ); + } + + // drawers before this might not need the vertexes, so don't pay the penalty of getting them + // everybody else past this point (flex or swskinning) expects to read vertexes + // get vertex data + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return 0; + } + + MatSysQueueMark( g_pMaterialSystem, "R_StudioDrawDynamicMesh\n" ); + + int numTrianglesRendered = 0; + +#ifdef _DEBUG + const char *pDebugMaterialName = NULL; + if ( pMaterial ) + { + pDebugMaterialName = pMaterial->GetName(); + } +#endif + + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadIdentity(); + + // Software flex verts (not a delta stream) + if ( doFlex ) + { + R_StudioFlexVerts( pmesh, lod ); + } + + IMesh* pMesh; + bool bNeedsTangentSpace = pMaterial ? pMaterial->NeedsTangentSpace() : false; + + VertexFormat_t fmt = ComputeSWSkinVertexFormat( pMaterial ); + bool bDX8Vertex = ( UserDataSize( fmt ) != 0 ); + + CMeshBuilder meshBuilder; + pMesh = pRenderContext->GetDynamicMeshEx( fmt, false, 0, pGroup->m_pMesh); + meshBuilder.Begin( pMesh, MATERIAL_HETEROGENOUS, pGroup->m_NumVertices, 0 ); + + if ( swSkin ) + { + R_StudioSoftwareProcessMesh( pmesh, meshBuilder, pGroup->m_NumVertices, + pGroup->m_pGroupIndexToMeshIndex, lighting, doFlex, r_blend, + bNeedsTangentSpace, bDX8Vertex, pMaterial ); + } + else if ( doFlex ) + { + R_StudioProcessFlexedMesh( pmesh, meshBuilder, pGroup->m_NumVertices, + pGroup->m_pGroupIndexToMeshIndex ); + } + + meshBuilder.End(); + + // Draw it baby + if ( !swSkin ) + { + numTrianglesRendered = R_StudioDrawGroupHWSkin( pRenderContext, pGroup, pMesh ); + } + else + { + numTrianglesRendered = R_StudioDrawGroupSWSkin( pGroup, pMesh ); + } + + if ( m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame ) + { + pRenderContext->SetNumBoneWeights( 0 ); + pRenderContext->Bind( m_pMaterialTangentFrame ); + + CMeshBuilder meshBuilder; + pMesh = pRenderContext->GetDynamicMesh( false ); + meshBuilder.Begin( pMesh, MATERIAL_LINES, pGroup->m_NumVertices ); + + R_StudioSoftwareProcessMesh_Normals( pmesh, meshBuilder, pGroup->m_NumVertices, + pGroup->m_pGroupIndexToMeshIndex, lighting, doFlex, r_blend, m_pRC->m_Config.bDrawNormals, m_pRC->m_Config.bDrawTangentFrame ); + meshBuilder.End( ); + + pMesh->Draw(); + pRenderContext->Bind( pMaterial ); + } + + MatSysQueueMark( g_pMaterialSystem, "END R_StudioDrawDynamicMesh\n" ); + + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Sets the material vars for the eye vertex shader +//----------------------------------------------------------------------------- +static unsigned int eyeOriginCache = 0; +static unsigned int eyeUpCache = 0; +static unsigned int irisUCache = 0; +static unsigned int irisVCache = 0; +static unsigned int glintUCache = 0; +static unsigned int glintVCache = 0; +void CStudioRender::SetEyeMaterialVars( IMaterial* pMaterial, mstudioeyeball_t* peyeball, + Vector const& eyeOrigin, const matrix3x4_t& irisTransform, const matrix3x4_t& glintTransform ) +{ + if ( !pMaterial ) + return; + + IMaterialVar* pVar = pMaterial->FindVarFast( "$eyeorigin", &eyeOriginCache ); + if (pVar) + { + pVar->SetVecValue( eyeOrigin.Base(), 3 ); + } + + pVar = pMaterial->FindVarFast( "$eyeup", &eyeUpCache ); + if (pVar) + { + pVar->SetVecValue( peyeball->up.Base(), 3 ); + } + pVar = pMaterial->FindVarFast( "$irisu", &irisUCache ); + if (pVar) + { + pVar->SetVecValue( irisTransform[0], 4 ); + } + + pVar = pMaterial->FindVarFast( "$irisv", &irisVCache ); + if (pVar) + { + pVar->SetVecValue( irisTransform[1], 4 ); + } + + pVar = pMaterial->FindVarFast( "$glintu", &glintUCache ); + if (pVar) + { + pVar->SetVecValue( glintTransform[0], 4 ); + } + + pVar = pMaterial->FindVarFast( "$glintv", &glintVCache ); + if (pVar) + { + pVar->SetVecValue( glintTransform[1], 4 ); + } +} + + +//----------------------------------------------------------------------------- +// Specialized routine to draw the eyeball +//----------------------------------------------------------------------------- +static unsigned int glintCache = 0; +int CStudioRender::R_StudioDrawEyeball( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshdata_t* pMeshData, + StudioModelLighting_t lighting, IMaterial *pMaterial, int lod ) +{ + if ( !m_pRC->m_Config.bEyes ) + { + return 0; + } + + // FIXME: We could compile a static vertex buffer in this case + // if there's no flexed verts. + const mstudio_meshvertexdata_t *vertData = GetFatVertexData( pmesh, m_pStudioHdr ); + if ( !vertData ) + { + // not available + return 0; + } + mstudiovertex_t *pVertices = vertData->Vertex( 0 ); + + int j; + int numTrianglesRendered = 0; + + // See if any meshes in the group want to go down the static path... + bool bIsDeltaFlexed = false; + bool bIsHardwareSkinnedData = false; + bool bIsFlexed = false; + for (j = 0; j < pMeshData->m_NumGroup; ++j) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + + if ( ( pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED ) && g_pMaterialSystemHardwareConfig->SupportsStreamOffset() ) + bIsDeltaFlexed = true; + + if ( pGroup->m_Flags & MESHGROUP_IS_FLEXED ) + bIsFlexed = true; + + if ( pGroup->m_Flags & MESHGROUP_IS_HWSKINNED ) + bIsHardwareSkinnedData = true; + } + + // Take the static path for new flexed models on DX9 hardware + bool bFlexStatic = bIsDeltaFlexed && g_pMaterialSystemHardwareConfig->SupportsStreamOffset(); + bool bShouldHardwareSkin = bIsHardwareSkinnedData && ( !bIsFlexed || bFlexStatic ) && + ( lighting != LIGHTING_SOFTWARE ) && ( !m_pRC->m_Config.bSoftwareSkin ); + + pRenderContext->MatrixMode( MATERIAL_MODEL ); + pRenderContext->LoadIdentity(); + + // Software flex eyeball verts (not a delta stream) + if ( bIsFlexed && ( !bFlexStatic || !bShouldHardwareSkin ) ) + { + R_StudioFlexVerts( pmesh, lod ); + } + + mstudioeyeball_t *peyeball = m_pSubModel->pEyeball(pmesh->materialparam); + + // We'll need this to compute normals + Vector org; + VectorTransform( peyeball->org, m_pBoneToWorld[peyeball->bone], org ); + + // Compute the glint projection + matrix3x4_t glintMat; + ComputeGlintTextureProjection( &m_pEyeballState[pmesh->materialparam], m_pRC->m_ViewRight, m_pRC->m_ViewUp, glintMat ); + + if ( !m_pRC->m_Config.bWireframe ) + { + // Compute the glint procedural texture + IMaterialVar* pGlintVar = pMaterial->FindVarFast( "$glint", &glintCache ); + if (pGlintVar) + { + R_StudioEyeballGlint( &m_pEyeballState[pmesh->materialparam], pGlintVar, m_pRC->m_ViewRight, m_pRC->m_ViewUp, m_pRC->m_ViewOrigin ); + } + SetEyeMaterialVars( pMaterial, peyeball, org, m_pEyeballState[pmesh->materialparam].mat, glintMat ); + } + + if ( bShouldHardwareSkin ) + { + for ( j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + numTrianglesRendered += R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod, NULL ); + } + + return numTrianglesRendered; + } + + pRenderContext->SetNumBoneWeights( 0 ); + m_VertexCache.SetupComputation( pmesh ); + + int nAlpnaInt = RoundFloatToInt( m_pRC->m_AlphaMod * 255 ); + unsigned char a = clamp( nAlpnaInt, 0, 255 ); + + Vector position, normal, color; + + // setup the call + R_InitLightEffectsWorld3(); + + // Render the puppy + CMeshBuilder meshBuilder; + + bool useHWLighting = m_pRC->m_Config.m_bSupportsVertexAndPixelShaders && !m_pRC->m_Config.bSoftwareLighting; + // Draw all the various mesh groups... + for ( j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + + IMesh* pMesh = pRenderContext->GetDynamicMesh(false, 0, pGroup->m_pMesh); + + // garymcthack! need to look at the strip flags to figure out what it is. + meshBuilder.Begin( pMesh, MATERIAL_TRIANGLES, pmesh->numvertices, 0 ); +// meshBuilder.Begin( pMesh, MATERIAL_TRIANGLE_STRIP, pmesh->numvertices, 0 ); + //VPROF_INCREMENT_COUNTER( "TransformFlexVerts", pGroup->m_NumVertices ); + + for ( int i=0; i < pGroup->m_NumVertices; ++i) + { + int n = pGroup->m_pGroupIndexToMeshIndex[i]; + mstudiovertex_t &vert = pVertices[n]; + + CachedPosNorm_t* pWorldVert = m_VertexCache.CreateWorldVertex(n); + + // transform into world space + if ( m_VertexCache.IsVertexFlexed(n) ) + { + CachedPosNormTan_t* pFlexVert = m_VertexCache.GetFlexVertex(n); + R_StudioTransform( pFlexVert->m_Position, &vert.m_BoneWeights, pWorldVert->m_Position.AsVector3D() ); + R_StudioRotate( pFlexVert->m_Normal, &vert.m_BoneWeights, pWorldVert->m_Normal.AsVector3D() ); + Assert( pWorldVert->m_Normal.x >= -1.05f && pWorldVert->m_Normal.x <= 1.05f ); + Assert( pWorldVert->m_Normal.y >= -1.05f && pWorldVert->m_Normal.y <= 1.05f ); + Assert( pWorldVert->m_Normal.z >= -1.05f && pWorldVert->m_Normal.z <= 1.05f ); + } + else + { + R_StudioTransform( vert.m_vecPosition, &vert.m_BoneWeights, pWorldVert->m_Position.AsVector3D() ); + R_StudioRotate( vert.m_vecNormal, &vert.m_BoneWeights, pWorldVert->m_Normal.AsVector3D() ); + Assert( pWorldVert->m_Normal.x >= -1.05f && pWorldVert->m_Normal.x <= 1.05f ); + Assert( pWorldVert->m_Normal.y >= -1.05f && pWorldVert->m_Normal.y <= 1.05f ); + Assert( pWorldVert->m_Normal.z >= -1.05f && pWorldVert->m_Normal.z <= 1.05f ); + } + + // Don't bother to light in software when we've got vertex + pixel shaders. + meshBuilder.Position3fv( pWorldVert->m_Position.Base() ); + + if (useHWLighting) + { + meshBuilder.Normal3fv( pWorldVert->m_Normal.Base() ); + } + else + { + R_StudioEyeballNormal( peyeball, org, pWorldVert->m_Position.AsVector3D(), pWorldVert->m_Normal.AsVector3D() ); + + // This isn't really used, but since the meshbuilder checks for messed up + // normals, let's do this here in debug mode. + // WRONGO YOU FRIGGIN IDIOT!!!!!!!!!! + // DX7 needs these for the flashlight. + meshBuilder.Normal3fv( pWorldVert->m_Normal.Base() ); + R_ComputeLightAtPoint3( pWorldVert->m_Position.AsVector3D(), pWorldVert->m_Normal.AsVector3D(), color ); + + unsigned char r = LinearToLightmap( color.x ); + unsigned char g = LinearToLightmap( color.y ); + unsigned char b = LinearToLightmap( color.z ); + + meshBuilder.Color4ub( r, g, b, a ); + } + + meshBuilder.TexCoord2fv( 0, vert.m_vecTexCoord.Base() ); + + // FIXME: For now, flexed hw-skinned meshes can only have one bone + // The data must exist in the 0th hardware matrix + meshBuilder.BoneWeight( 0, 1.0f ); + meshBuilder.BoneWeight( 1, 0.0f ); + meshBuilder.BoneWeight( 2, 0.0f ); + meshBuilder.BoneWeight( 3, 0.0f ); + meshBuilder.BoneMatrix( 0, 0 ); + meshBuilder.BoneMatrix( 1, 0 ); + meshBuilder.BoneMatrix( 2, 0 ); + meshBuilder.BoneMatrix( 3, 0 ); + meshBuilder.AdvanceVertex(); + } + + meshBuilder.End(); + pMesh->Draw(); + + for (int k=0; k<pGroup->m_NumStrips; k++) + { + numTrianglesRendered += pGroup->m_pUniqueTris[k]; + } + + if ( m_pRC->m_Config.bDrawNormals || m_pRC->m_Config.bDrawTangentFrame ) + { + pRenderContext->SetNumBoneWeights( 0 ); + pRenderContext->Bind( m_pMaterialTangentFrame ); + + CMeshBuilder meshBuilder; + pMesh = pRenderContext->GetDynamicMesh( false ); + meshBuilder.Begin( pMesh, MATERIAL_LINES, pGroup->m_NumVertices ); + + bool doFlex = true; + bool r_blend = false; + R_StudioSoftwareProcessMesh_Normals( pmesh, meshBuilder, pGroup->m_NumVertices, + pGroup->m_pGroupIndexToMeshIndex, lighting, doFlex, r_blend, m_pRC->m_Config.bDrawNormals, m_pRC->m_Config.bDrawTangentFrame ); + meshBuilder.End( ); + + pMesh->Draw(); + pRenderContext->Bind( pMaterial ); + } + } + + return numTrianglesRendered; +} + + + +//----------------------------------------------------------------------------- +// Draws a mesh +//----------------------------------------------------------------------------- +int CStudioRender::R_StudioDrawMesh( IMatRenderContext *pRenderContext, mstudiomesh_t* pmesh, studiomeshdata_t* pMeshData, + StudioModelLighting_t lighting, IMaterial *pMaterial, + ColorMeshInfo_t *pColorMeshes, int lod ) +{ + VPROF( "R_StudioDrawMesh" ); + + int numTrianglesRendered = 0; + + // Draw all the various mesh groups... + for ( int j = 0; j < pMeshData->m_NumGroup; ++j ) + { + studiomeshgroup_t* pGroup = &pMeshData->m_pMeshGroup[j]; + + // Older models are merely flexed while new ones are also delta flexed + bool bIsFlexed = (pGroup->m_Flags & MESHGROUP_IS_FLEXED) != 0; + bool bIsDeltaFlexed = (pGroup->m_Flags & MESHGROUP_IS_DELTA_FLEXED) != 0; + + // Take the static path for new flexed models on DX9 hardware + bool bFlexStatic = ( bIsDeltaFlexed && g_pMaterialSystemHardwareConfig->SupportsStreamOffset() ); + + // Use the hardware if the mesh is hw skinned and we can put flexes on another stream + // Otherwise, we gotta do some expensive locks + bool bIsHardwareSkinnedData = ( pGroup->m_Flags & MESHGROUP_IS_HWSKINNED ) != 0; + bool bShouldHardwareSkin = bIsHardwareSkinnedData && ( !bIsFlexed || bFlexStatic ) && + ( lighting != LIGHTING_SOFTWARE ); + + if ( bShouldHardwareSkin && !m_pRC->m_Config.bDrawNormals && !m_pRC->m_Config.bDrawTangentFrame && !m_pRC->m_Config.bWireframe ) + { + if ( !m_pRC->m_Config.bNoHardware ) + { + numTrianglesRendered += R_StudioDrawStaticMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod, pColorMeshes ); + } + } + else + { + if ( !m_pRC->m_Config.bNoSoftware ) + { + numTrianglesRendered += R_StudioDrawDynamicMesh( pRenderContext, pmesh, pGroup, lighting, m_pRC->m_AlphaMod, pMaterial, lod ); + } + } + } + return numTrianglesRendered; +} + + +//----------------------------------------------------------------------------- +// Inserts translucent mesh into list +//----------------------------------------------------------------------------- +template< class T > +void InsertRenderable( int mesh, T val, int count, int* pIndices, T* pValList ) +{ + // Compute insertion point... + int i; + for ( i = count; --i >= 0; ) + { + if (val < pValList[i]) + break; + + // Shift down + pIndices[i + 1] = pIndices[i]; + pValList[i+1] = pValList[i]; + } + + // Insert at insertion point + ++i; + pValList[i] = val; + pIndices[i] = mesh; +} + + +//----------------------------------------------------------------------------- +// Sorts the meshes +//----------------------------------------------------------------------------- +int CStudioRender::SortMeshes( int* pIndices, IMaterial **ppMaterials, + short* pskinref, Vector const& vforward, Vector const& r_origin ) +{ + int numMeshes = 0; + if (m_bDrawTranslucentSubModels) + { +// float* pDist = (float*)_alloca( m_pSubModel->nummeshes * sizeof(float) ); + + // Sort each model piece by it's center, if it's translucent + for (int i = 0; i < m_pSubModel->nummeshes; ++i) + { + // Don't add opaque materials + mstudiomesh_t* pmesh = m_pSubModel->pMesh(i); + IMaterial *pMaterial = ppMaterials[pskinref[pmesh->material]]; + if( !pMaterial || !pMaterial->IsTranslucent() ) + continue; + + // FIXME: put the "center" of the mesh into delta +// Vector delta; +// VectorSubtract( delta, r_origin, delta ); +// float dist = DotProduct( delta, vforward ); + + // Add it to our lists +// InsertRenderable( i, dist, numMeshes, pIndices, pDist ); + + // One more mesh + ++numMeshes; + } + } + else + { + IMaterial** ppMat = (IMaterial**)_alloca( m_pSubModel->nummeshes * sizeof(IMaterial*) ); + + // Sort by material type + for (int i = 0; i < m_pSubModel->nummeshes; ++i) + { + mstudiomesh_t* pmesh = m_pSubModel->pMesh(i); + IMaterial *pMaterial = ppMaterials[pskinref[pmesh->material]]; + if( !pMaterial ) + continue; + + // Don't add translucent materials + if (( !m_pRC->m_Config.bWireframe ) && pMaterial->IsTranslucent() ) + continue; + + // Add it to our lists + InsertRenderable( i, pMaterial, numMeshes, pIndices, ppMat ); + + // One more mesh + ++numMeshes; + } + } + + return numMeshes; +} + +//----------------------------------------------------------------------------- +// R_StudioDrawPoints +// +// Returns the number of triangles rendered. +//----------------------------------------------------------------------------- +#pragma warning (disable:4189) +int CStudioRender::R_StudioDrawPoints( IMatRenderContext *pRenderContext, int skin, void /*IClientEntity*/ *pClientEntity, + IMaterial **ppMaterials, int *pMaterialFlags, int boneMask, int lod, ColorMeshInfo_t *pColorMeshes ) +{ + VPROF( "R_StudioDrawPoints" ); + int i; + int numTrianglesRendered = 0; + +#if 0 // garymcthack + if ( m_pSubModel->numfaces == 0 ) + return 0; +#endif + + // happens when there's a model load failure + if ( m_pStudioMeshes == 0 ) + return 0; + + if ( m_pRC->m_Config.bWireframe && m_bDrawTranslucentSubModels ) + return 0; + + // ConDMsg("%d: %d %d\n", pimesh->numFaces, pimesh->numVertices, pimesh->numNormals ); + if ( m_pRC->m_Config.skin ) + { + skin = m_pRC->m_Config.skin; + if ( skin >= m_pStudioHdr->numskinfamilies ) + { + skin = 0; + } + } + + // get skinref array + short *pskinref = m_pStudioHdr->pSkinref( 0 ); + if ( skin > 0 && skin < m_pStudioHdr->numskinfamilies ) + { + pskinref += ( skin * m_pStudioHdr->numskinref ); + } + + // FIXME: Activate sorting on a mesh level +// int* pIndices = (int*)_alloca( m_pSubModel->nummeshes * sizeof(int) ); +// int numMeshes = SortMeshes( pIndices, ppMaterials, pskinref, vforward, r_origin ); + + // draw each mesh + for ( i = 0; i < m_pSubModel->nummeshes; ++i) + { + mstudiomesh_t *pmesh = m_pSubModel->pMesh(i); + studiomeshdata_t *pMeshData = &m_pStudioMeshes[pmesh->meshid]; + Assert( pMeshData ); + + if ( !pMeshData->m_NumGroup ) + continue; + + if ( !pMaterialFlags ) + continue; + + StudioModelLighting_t lighting = LIGHTING_HARDWARE; + int materialFlags = pMaterialFlags[pskinref[pmesh->material]]; + + IMaterial* pMaterial = R_StudioSetupSkinAndLighting( pRenderContext, pskinref[ pmesh->material ], ppMaterials, materialFlags, pClientEntity, pColorMeshes, lighting ); + if ( !pMaterial ) + continue; + +#ifdef _DEBUG + char const *materialName = pMaterial->GetName(); +#endif + // Set up flex data + m_VertexCache.SetMesh( i ); + + // The following are special cases that can't be covered with + // the normal static/dynamic methods due to optimization reasons + switch ( pmesh->materialtype ) + { + case 1: + // eyeballs + numTrianglesRendered += R_StudioDrawEyeball( pRenderContext, pmesh, pMeshData, lighting, pMaterial, lod ); + break; + + default: + numTrianglesRendered += R_StudioDrawMesh( pRenderContext, pmesh, pMeshData, lighting, pMaterial, pColorMeshes, lod ); + break; + } + } + + // Reset this state so it doesn't hose other parts of rendering + pRenderContext->SetNumBoneWeights( 0 ); + + return numTrianglesRendered; +} +#pragma warning (default:4189) |